├── tests
├── __init__.py
├── test_utils
│ ├── __init__.py
│ ├── test_accessor.py
│ ├── test_progress.py
│ ├── test_deprecated.py
│ ├── test_downloader.py
│ ├── test_display.py
│ ├── test_models.py
│ ├── test_xml_utils.py
│ ├── test_transformatter.py
│ ├── test_mapping_utils.py
│ ├── test_doc_utils.py
│ └── test_jit.py
├── test_compat.py
├── test_transform
│ ├── test_transformations.py
│ ├── test_fft.py
│ └── test_spectro_transform.py
├── test_viz
│ ├── conftest.py
│ └── test_wiggle.py
├── test_proc
│ └── test_detrend.py
├── test_integrations
│ └── test_doc_examples.py
├── test_io
│ ├── test_sentek
│ │ └── test_sentek.py
│ ├── test_optodas
│ │ └── test_optodas.py
│ ├── test_gdr
│ │ └── test_gdr.py
│ ├── test_sintela_binary
│ │ └── test_sintela_binary.py
│ ├── test_h5simple
│ │ └── test_h5simple.py
│ ├── test_febus
│ │ └── test_febus.py
│ ├── test_tdms
│ │ └── test_tdms_utils.py
│ ├── test_wav
│ │ └── test_wav.py
│ └── test_rsf
│ │ └── test_rsf.py
└── test_clients
│ └── test_filespool.py
├── .gitattributes
├── scripts
├── _templates
│ ├── notes.md
│ ├── parent_source_block.html
│ ├── signature.html
│ └── table.html
├── paper
│ └── make_viz_figure.py
├── build_api_docs.py
├── find_futures.py
├── _validate_links.py
└── test_render_api.py
├── docs
├── .gitignore
├── _static
│ ├── logo.png
│ ├── diataxis.png
│ └── patch_n_spool.png
├── _publish.yml
├── changelog.qmd
├── contributors.qmd
├── notes
│ ├── notes.qmd
│ └── doc_strategy.qmd
├── acknowledgements.qmd
├── recipes
│ ├── overview.qmd
│ ├── edge_effects.qmd
│ ├── docker_basic.qmd
│ ├── plotting_channel_number.qmd
│ ├── contributing_to_documentation.qmd
│ ├── correlate.qmd
│ ├── external_conversion.qmd
│ ├── add_spatial_coordinates_to_patch.qmd
│ └── parallelization.qmd
├── supported_formats.qmd
├── contributing
│ ├── style_and_linting.qmd
│ ├── general_guidelines.qmd
│ ├── publish_a_new_release.qmd
│ ├── profiling_benchmarks.qmd
│ ├── dev_install.qmd
│ ├── testing.qmd
│ └── adding_test_data.qmd
├── styles.css
├── tutorial
│ ├── visualization.qmd
│ ├── transformations.qmd
│ └── file_io.qmd
└── references.bib
├── .github
├── test_condarc.yml
├── min_deps_environment.yml
├── actions
│ ├── build-docs
│ │ └── action.yml
│ └── prep_doc_build
│ │ └── action.yml
├── doc_environment.yml
├── ISSUE_TEMPLATE
│ ├── config.yml
│ └── bug_report.md
├── test_code.sh
├── workflows
│ ├── lint.yml
│ ├── profile.yml
│ ├── get_coverage.yml
│ ├── upload_pypi.yml
│ ├── build_deploy_master_docs.yaml
│ ├── build_deploy_stable_docs.yaml
│ ├── run_min_dep_tests.yml
│ ├── runtests.yml
│ └── test_doc_build.yml
└── pull_request_template.md
├── dascore
├── io
│ ├── sintela_binary
│ │ ├── __init__.py
│ │ └── core.py
│ ├── wav
│ │ └── __init__.py
│ ├── silixah5
│ │ ├── __init__.py
│ │ └── core.py
│ ├── pickle
│ │ ├── __init__.py
│ │ └── core.py
│ ├── ap_sensing
│ │ ├── __init__.py
│ │ └── core.py
│ ├── tdms
│ │ ├── __init__.py
│ │ └── core.py
│ ├── dasdae
│ │ └── __init__.py
│ ├── prodml
│ │ ├── __init__.py
│ │ └── core.py
│ ├── febus
│ │ ├── __init__.py
│ │ └── core.py
│ ├── optodas
│ │ ├── __init__.py
│ │ ├── core.py
│ │ └── utils.py
│ ├── h5simple
│ │ ├── __init__.py
│ │ └── core.py
│ ├── dashdf5
│ │ ├── __init__.py
│ │ ├── core.py
│ │ └── utils.py
│ ├── sentek
│ │ ├── __init__.py
│ │ └── core.py
│ ├── rsf
│ │ └── __init__.py
│ ├── gdr
│ │ ├── __init__.py
│ │ └── core.py
│ ├── xml_binary
│ │ ├── __init__.py
│ │ └── core.py
│ ├── neubrex
│ │ ├── __init__.py
│ │ └── utils_rfs.py
│ ├── segy
│ │ └── __init__.py
│ ├── __init__.py
│ └── terra15
│ │ ├── __init__.py
│ │ └── core.py
├── clients
│ ├── __init__.py
│ └── filespool.py
├── core
│ └── __init__.py
├── utils
│ ├── __init__.py
│ ├── signal.py
│ ├── xml.py
│ ├── transformatter.py
│ ├── downloader.py
│ ├── mapping.py
│ ├── deprecate.py
│ └── progress.py
├── version.py
├── viz
│ └── __init__.py
├── transform
│ ├── __init__.py
│ └── fft.py
├── proc
│ ├── __init__.py
│ ├── detrend.py
│ └── wiener.py
├── __init__.py
└── compat.py
├── benchmarks
├── notebooks
│ ├── readme.md
│ └── patch_v_xarray.ipynb
├── readme.md
└── test_io_benchmarks.py
├── environment.yml
├── Dockerfile
├── MANIFEST.in
├── .pre-commit-config.yaml
├── readme.md
└── .gitignore
/tests/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/tests/test_utils/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | * text=auto eol=lf
2 |
--------------------------------------------------------------------------------
/tests/test_compat.py:
--------------------------------------------------------------------------------
1 | """
2 | Tests for compatibility module.
3 | """
4 |
--------------------------------------------------------------------------------
/scripts/_templates/notes.md:
--------------------------------------------------------------------------------
1 |
2 | :::{.callout-note}
3 | {{ note_text }}
4 | :::
5 |
--------------------------------------------------------------------------------
/docs/.gitignore:
--------------------------------------------------------------------------------
1 | .quarto/
2 | _site/*
3 |
4 | /.quarto/
5 |
6 | **/*.quarto_ipynb
7 |
--------------------------------------------------------------------------------
/docs/_static/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DASDAE/dascore/HEAD/docs/_static/logo.png
--------------------------------------------------------------------------------
/docs/_static/diataxis.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DASDAE/dascore/HEAD/docs/_static/diataxis.png
--------------------------------------------------------------------------------
/.github/test_condarc.yml:
--------------------------------------------------------------------------------
1 | channels:
2 | - conda-forge
3 | - defaults
4 | auto_activate_base: false
5 |
--------------------------------------------------------------------------------
/docs/_static/patch_n_spool.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DASDAE/dascore/HEAD/docs/_static/patch_n_spool.png
--------------------------------------------------------------------------------
/dascore/io/sintela_binary/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Sintela binary reader.
3 | """
4 | from .core import SintelaBinaryV3
5 |
--------------------------------------------------------------------------------
/dascore/clients/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | DAS Core module for accessing remote resources.
3 | """
4 | from __future__ import annotations
5 |
--------------------------------------------------------------------------------
/dascore/io/wav/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Module for writing wave files to disk.
3 | """
4 | from __future__ import annotations
5 | from .core import WavIO
6 |
--------------------------------------------------------------------------------
/docs/_publish.yml:
--------------------------------------------------------------------------------
1 | - source: project
2 | netlify:
3 | - id: da79b12f-cb25-4fcc-aeb6-f19705848130
4 | url: 'https://dascore.netlify.app'
5 |
--------------------------------------------------------------------------------
/docs/changelog.qmd:
--------------------------------------------------------------------------------
1 | # Changelog
2 |
3 | The [releases page](https://github.com/DASDAE/dascore/releases) tracks changes from one version to another.
4 |
--------------------------------------------------------------------------------
/docs/contributors.qmd:
--------------------------------------------------------------------------------
1 | # Contributors
2 |
3 | A huge thanks to [all the DASCore contributors](https://github.com/DASDAE/dascore/graphs/contributors)!
4 |
--------------------------------------------------------------------------------
/dascore/io/silixah5/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | IO support for Silixa HDF5 files.
3 |
4 | Website: https://silixa.com/
5 | """
6 |
7 | from .core import SilixaH5V1
8 |
--------------------------------------------------------------------------------
/dascore/io/pickle/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | A module for reading and writing pickle format.
3 | """
4 | from __future__ import annotations
5 | from .core import PickleIO
6 |
--------------------------------------------------------------------------------
/.github/min_deps_environment.yml:
--------------------------------------------------------------------------------
1 | name: dascore
2 | channels:
3 | - conda-forge
4 | dependencies:
5 | - pytables
6 | - h5py
7 | - pooch
8 | - tk # needed for plotting
9 |
--------------------------------------------------------------------------------
/dascore/io/ap_sensing/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | IO support for AP sensing interrogators.
3 |
4 | Website: https://www.apsensing.com/
5 | """
6 |
7 | from .core import APSensingV10
8 |
--------------------------------------------------------------------------------
/docs/notes/notes.qmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: Notes
3 | ---
4 |
5 | This section of the documentation provides understanding-oriented explanation for DASCore implementation and design decisions.
6 |
--------------------------------------------------------------------------------
/dascore/io/tdms/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Module for reading and writing TDMS fiber data recorded by Silixa.
3 | """
4 | from __future__ import annotations
5 | from .core import TDMSFormatterV4713
6 |
--------------------------------------------------------------------------------
/benchmarks/notebooks/readme.md:
--------------------------------------------------------------------------------
1 | # Notebooks
2 | These are simple notebooks that are manually run in order to gauge relative
3 | performance. Pre-commit should strip out any content to ensure they don't
4 | get too large.
5 |
--------------------------------------------------------------------------------
/dascore/io/dasdae/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Support for the DASDAE format.
3 |
4 | Note
5 | ----
6 | This is an experimental format and is subject to change.
7 | """
8 | from __future__ import annotations
9 | from .core import DASDAEV1
10 |
--------------------------------------------------------------------------------
/dascore/io/prodml/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Support for prodML format.
3 |
4 | More info about ProdML can be found here:
5 | https://www.energistics.org/prodml-developers-users
6 | """
7 | from __future__ import annotations
8 | from .core import ProdMLV2_0, ProdMLV2_1
9 |
--------------------------------------------------------------------------------
/dascore/io/febus/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Support for Febus format.
3 |
4 | This is used by the Febus DAS interrogator.
5 |
6 | More info about febus can be found here: https://www.febus-optics.com/en/
7 | """
8 | from __future__ import annotations
9 | from .core import Febus1, Febus2
10 |
--------------------------------------------------------------------------------
/scripts/_templates/parent_source_block.html:
--------------------------------------------------------------------------------
1 |
2 |
3 | | {{ origin_txt }} |
4 | [source]({{ source_url }}) |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/dascore/core/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Core routines and functionality for processing distributed fiber data.
3 | """
4 | from __future__ import annotations
5 | from .patch import Patch # noqa
6 | from .coords import CoordSummary, get_coord
7 | from .coordmanager import get_coord_manager, CoordManager
8 |
--------------------------------------------------------------------------------
/dascore/io/optodas/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Support for OptoDAS format.
3 |
4 | This is used by the OptoDAS interrogator made by Alcatel Submarine Networks.
5 |
6 | More info here: https://web.asn.com/
7 | """
8 | from __future__ import annotations
9 | from .core import OptoDASV8, OptoDASV10
10 |
--------------------------------------------------------------------------------
/tests/test_utils/test_accessor.py:
--------------------------------------------------------------------------------
1 | """Tests for creating/registering accessors."""
2 |
3 | from __future__ import annotations
4 |
5 |
6 | # class TestDFSBasics:
7 | # """Test the basic dascore namespace functions."""
8 | #
9 | # def test_namespace_exists(self, terra15_das_array):
10 |
--------------------------------------------------------------------------------
/dascore/io/h5simple/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Support for generic H5 files.
3 |
4 | This attempts to use some common names to read in some very simple h5 files.
5 |
6 | The FORESEE data in PubDAS motivated this module.
7 | """
8 | from __future__ import annotations
9 |
10 | from .core import H5Simple
11 |
--------------------------------------------------------------------------------
/docs/acknowledgements.qmd:
--------------------------------------------------------------------------------
1 | ---
2 | ---
3 |
4 |
7 |
8 | # Acknowledgments
9 |
10 | - DASDAE is supported in part by the NSF Geoinformatics Program, under grant [#2148614](https://nsf.gov/awardsearch/showAward?AWD_ID=2148614)
11 |
--------------------------------------------------------------------------------
/dascore/utils/__init__.py:
--------------------------------------------------------------------------------
1 | """Utilities for dascore."""
2 | from __future__ import annotations
3 | from .time import to_datetime64, to_timedelta64
4 | from .moving import (
5 | move_max,
6 | move_mean,
7 | move_median,
8 | move_min,
9 | move_std,
10 | move_sum,
11 | moving_window,
12 | )
13 |
--------------------------------------------------------------------------------
/docs/recipes/overview.qmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Overview"
3 | ---
4 |
5 | Welcome to DASCore's cookbook! This is a collection of examples (recipes) to demonstrate how to perform various tasks with DASCore. Use the collapsible menu in the sidebar to browse the recipes and consider [contributing](../contributing/contributing.qmd) some examples of your own.
6 |
--------------------------------------------------------------------------------
/.github/actions/build-docs/action.yml:
--------------------------------------------------------------------------------
1 | name: "Build DASCore Docs"
2 | description: "Builds DASCore's Documentation."
3 |
4 | runs:
5 | using: "composite"
6 | steps:
7 |
8 | - uses: ./.github/actions/prep_doc_build
9 |
10 | - name: build quarto project
11 | shell: bash -l {0}
12 | run: |
13 | quarto render docs
14 |
--------------------------------------------------------------------------------
/tests/test_transform/test_transformations.py:
--------------------------------------------------------------------------------
1 | """General tests for transformations."""
2 |
3 | from __future__ import annotations
4 |
5 | import pytest
6 |
7 |
8 | def test_deprecated(random_patch):
9 | """Ensure the tran patch namespace is deprecated."""
10 | with pytest.warns(DeprecationWarning):
11 | _ = random_patch.tran
12 |
--------------------------------------------------------------------------------
/tests/test_viz/conftest.py:
--------------------------------------------------------------------------------
1 | """Configuration for all vizualization tests."""
2 |
3 | from __future__ import annotations
4 |
5 | import matplotlib.pyplot as plt
6 | import pytest
7 |
8 |
9 | @pytest.fixture(scope="function", autouse=True)
10 | def close_figures():
11 | """Close all figures after each test."""
12 | yield
13 | plt.close("all")
14 |
--------------------------------------------------------------------------------
/dascore/io/dashdf5/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Basic support for DAS-HDF5 a subset of CF (climate and forcasting).
3 |
4 | This was created mainly for reading PoroTomo data from Brady Hotsprings.
5 |
6 | More info on PoroTomo here:
7 | https://github.com/openEDI/documentation/tree/main/PoroTomo
8 | """
9 | from __future__ import annotations
10 |
11 | from .core import DASHDF5
12 |
--------------------------------------------------------------------------------
/scripts/_templates/signature.html:
--------------------------------------------------------------------------------
1 | :::{.padded_bottom_10pt}
2 |
3 | :::{.def_block}
4 | {% if params %}
5 |
6 | {{ name }}(
7 | {%- for param in params %}
8 | {{ param }},
9 | {%- endfor %}
10 | {{ return_line }}
11 |
12 | {% else %}
13 |
14 | {{ name }}({{ return_line }}
15 |
16 | {% endif %}
17 | :::
18 |
19 | :::
20 |
--------------------------------------------------------------------------------
/dascore/io/sentek/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Module for reading DAS data recorded by Sentek interrogator
3 |
4 | Examples
5 | --------
6 |
7 | import dascore as dc
8 | from dascore.utils.downloader import fetch
9 |
10 | path_to_sentek_file = fetch("DASDMSShot00_20230328155653619.das")
11 | sentek_patch = dc.spool(path_to_sentek_file)[0]
12 | """
13 | from .core import SentekV5
14 |
--------------------------------------------------------------------------------
/.github/doc_environment.yml:
--------------------------------------------------------------------------------
1 | name: dascore
2 | channels:
3 | - conda-forge
4 | dependencies:
5 | - python=3.12
6 | - pytest
7 | - pydantic>2.0
8 | - pip
9 | - pandas
10 | - pooch>=1.2
11 | - xarray
12 | - pre-commit
13 | - pytables
14 | - h5py
15 | - matplotlib
16 | - scipy>=1.10.0
17 | - findiff
18 | - jupyter
19 | - nbformat
20 | - tk # needed for plotting
21 |
--------------------------------------------------------------------------------
/dascore/io/rsf/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | RSF format support module.
3 |
4 | Notes
5 | -----
6 | - output has been tested on Madagascar version 3.1-git
7 |
8 |
9 | Examples
10 | --------
11 | import dascore as dc
12 |
13 | # get the path to a random DAS file.
14 | patch = dc.get_example_patch()
15 | patch.io.write("test_out.rsf","rsf",data_path="test_out.rsf")
16 |
17 | """
18 |
19 | from .core import RSFV1
20 |
--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
1 | name: dascore
2 | channels:
3 | - conda-forge
4 | dependencies:
5 | - pytest
6 | - numpy>=1.24
7 | - pydantic>=2.1
8 | - pip
9 | - pandas>=2.0
10 | - pooch>=1.2
11 | - xarray
12 | - pre-commit
13 | - pytables
14 | - h5py
15 | - matplotlib>=3.5
16 | - scipy>=1.15.0
17 | - findiff
18 | - jupyter
19 | - nbformat
20 | - pint
21 | - typing_extensions>=4.12
22 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/config.yml:
--------------------------------------------------------------------------------
1 | blank_issues_enabled: true
2 | contact_links:
3 | - name: Ask a question
4 | url: https://github.com/DASDAE/dascore/discussions/categories/q-a
5 | about: Please ask and answer questions in the discussion board
6 | - name: Share an idea, a missing feature or anything else
7 | url: https://github.com/DASDAE/dascore/discussions/
8 | about: Please give us any feedback in the discussion board
9 |
--------------------------------------------------------------------------------
/dascore/io/gdr/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Support for the Geothermal Data Repository (gdr) h5 format.
3 |
4 | The gdr format is a combination of prodml and the Earthscope DMC's meta
5 | data spec. It houses many data sets, not just DFOS.
6 |
7 | Find more information here: https://gdr.openei.org/. Information regarding
8 | the DAS format can be found here: https://gdr.openei.org/das_data_standard
9 | """
10 |
11 | from .core import GDR_V1
12 |
--------------------------------------------------------------------------------
/docs/supported_formats.qmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: Supported File Formats
3 | execute:
4 | warning: false
5 | ---
6 |
7 | ```{python}
8 | #| echo: false
9 | #| output: asis
10 | import pandas as pd
11 | from dascore.io.core import FiberIO
12 | out_str = FiberIO.get_supported_io_table().replace(True, value='✅').replace(False, value='❌').to_markdown(index=False, stralign="center")
13 |
14 | out_str += '\n: {.striped}'
15 | print(out_str)
16 | ```
17 |
--------------------------------------------------------------------------------
/dascore/version.py:
--------------------------------------------------------------------------------
1 | """Module for reporting the version of dascore."""
2 |
3 | from __future__ import annotations
4 |
5 | from contextlib import suppress
6 | from importlib.metadata import PackageNotFoundError, version
7 |
8 | __version__ = "0.0.0"
9 |
10 | # try to get version from installed metadata
11 | with suppress(PackageNotFoundError):
12 | __version__ = version("dascore")
13 |
14 | __last_version__ = ".".join(__version__.split(".")[:3])
15 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | # syntax=docker/dockerfile:1
2 |
3 | FROM ubuntu:22.04
4 | RUN apt update && apt install -y wget
5 | RUN wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
6 | RUN chmod +x Miniconda3-latest-Linux-x86_64.sh
7 | RUN ./Miniconda3-latest-Linux-x86_64.sh -b -p /opt/miniconda3
8 | RUN apt autoremove && apt autoclean
9 | RUN rm Miniconda3-latest-Linux-x86_64.sh
10 | ENV PATH="/opt/miniconda3/bin:$PATH"
11 | RUN conda install dascore -c conda-forge
12 |
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | # This file controls the contents of the created distribution
2 | # https://packaging.python.org/en/latest/guides/using-manifest-in/
3 |
4 | # by default exclude everything
5 | exclude *
6 | prune *
7 |
8 | # Then add only the needed things to distribution (including tests)
9 | include pyproject.toml
10 | include README.md
11 | include docs/LICENSE
12 | graft src
13 | graft dascore
14 | graft tests
15 |
16 | # now re-remove compiled files
17 | global-exclude __pycache__ *pyc
18 |
--------------------------------------------------------------------------------
/dascore/io/xml_binary/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Module for reading data stored in xml_binary format.
3 |
4 | This format is a directory which contains metadata as a single xml file
5 | as well as any number of binary files (raw numeric buffers) which contain
6 | information about their start time in the file name. An example directory
7 | might look like this:
8 |
9 | data_folder
10 | metadata.xml
11 | DAS_20240530T011500_000000Z.raw
12 | DAS_20240530T011501_000000Z.raw
13 | """
14 | from __future__ import annotations
15 | from .core import XMLBinaryV1
16 |
--------------------------------------------------------------------------------
/.github/test_code.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Script to run tests to account for wonkiness of periodic mac failures.
4 | args="tests -s --cov dascore --cov-append --cov-report=xml"
5 | if [[ "$1" == "doctest" ]]; then
6 | args="dascore --doctest-modules"
7 | fi
8 | if [[ "$1" == "profile" ]]; then
9 | args="benchmarks --codspeed"
10 | fi
11 |
12 | exit_code=0
13 |
14 | python -m pytest $args || exit_code=$?
15 |
16 | # Check the exit code is related to sporadic failures on mac, see #312
17 | if [ $exit_code -ne 132 ] && [ $exit_code -ne 0 ]; then
18 | exit $exit_code
19 | fi
20 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Bug report
3 | about: Report a bug or unexpected behavior
4 | title: ''
5 | labels: bug
6 | assignees: ''
7 | ---
8 |
9 | ## Description
10 |
13 |
14 | ## Example
15 |
18 |
19 | ## Expected behavior
20 |
23 |
24 | ## Versions
25 | - OS [e.g. Ubuntu 20.04]:
26 | - DASCore Version [e.g. 0.0.5]:
27 | - Python Version [e.g. 3.10]:
28 |
--------------------------------------------------------------------------------
/dascore/viz/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Module for static, matplotlib-based visualizations and figure generation.
3 | """
4 | from __future__ import annotations
5 | from dascore.utils.misc import MethodNameSpace
6 |
7 | from .spectrogram import spectrogram
8 | from .waterfall import waterfall
9 | from .wiggle import wiggle
10 | from .map_fiber import map_fiber
11 |
12 |
13 | class VizPatchNameSpace(MethodNameSpace):
14 | """A class for storing visualization namespace."""
15 |
16 | waterfall = waterfall
17 | spectrogram = spectrogram
18 | wiggle = wiggle
19 | map_fiber = map_fiber
20 |
--------------------------------------------------------------------------------
/.github/actions/prep_doc_build/action.yml:
--------------------------------------------------------------------------------
1 | name: "prepare for doc build"
2 | description: "Installs quarto, renders api docs, prints quarto version"
3 |
4 | runs:
5 | using: "composite"
6 | steps:
7 | - name: Install quarto
8 | uses: quarto-dev/quarto-actions/setup@v2
9 | with:
10 | version: 1.3.450
11 | tinytex: true
12 |
13 | - name: print quarto version
14 | shell: bash -l {0}
15 | run: |
16 | quarto --version
17 |
18 | - name: render API docs
19 | shell: bash -l {0}
20 | run: |
21 | python scripts/build_api_docs.py
22 |
--------------------------------------------------------------------------------
/dascore/transform/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | A module for applying transformation to Patches.
3 |
4 | Transforms are defined as
5 | """
6 | from __future__ import annotations
7 |
8 | from .differentiate import differentiate
9 | from .fft import rfft
10 | from .fourier import dft, idft, stft, istft
11 | from .integrate import integrate
12 | from .hilbert import hilbert, envelope, phase_weighted_stack
13 | from .spectro import spectrogram
14 | from .strain import velocity_to_strain_rate, velocity_to_strain_rate_edgeless, radians_to_strain
15 | from .dispersion import dispersion_phase_shift
16 | from .taup import tau_p
17 |
--------------------------------------------------------------------------------
/scripts/_templates/table.html:
--------------------------------------------------------------------------------
1 | {# A template for making tables. #}
2 |
3 | {%- if caption %}
4 |
5 | {{ caption }}
6 |
7 | {%- endif %}
8 |
9 |
10 | {%- for col in columns %}
11 | | {{ col }} |
12 | {%- endfor %}
13 |
14 |
15 |
16 | {%- for row in rows %}
17 |
18 | {%- for val in row %}
19 | | {{ val }} |
20 | {%- endfor %}
21 |
22 | {%- endfor %}
23 |
24 |
25 |
--------------------------------------------------------------------------------
/tests/test_proc/test_detrend.py:
--------------------------------------------------------------------------------
1 | """Tests for detrending functions."""
2 |
3 | from __future__ import annotations
4 |
5 | import numpy as np
6 |
7 |
8 | class TestDetrend:
9 | """Tests for detrending data."""
10 |
11 | def test_detrend(self, random_patch):
12 | """Ensure detrending removes mean."""
13 | new = random_patch.new(data=random_patch.data + 10)
14 | # perform detrend, ensure all mean values are close to zero
15 | det = new.detrend(dim="time", type="linear")
16 | means = np.mean(det.data, axis=det.get_axis("time"))
17 | assert np.allclose(means, 0)
18 |
--------------------------------------------------------------------------------
/dascore/io/neubrex/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Support for Neubrex H5 DSS/DTS files.
3 |
4 | This module was written to read the DSS/DTS files created by Neubrex for
5 | the Forge dataset: https://gdr.openei.org/submissions/1565
6 |
7 | The citation for the dataset is:
8 |
9 | Energy and Geoscience Institute at the University of Utah. (2023).
10 | Utah FORGE: Well 16B(78)-32 2023 Neubrex Energy Services Circulation
11 | Test Period with Fiber Optics Monitoring [data set].
12 | Retrieved from https://dx.doi.org/10.15121/2222469.
13 | """
14 | from __future__ import annotations
15 |
16 | from .core import NeubrexRFSV1, NeubrexDASV1
17 |
--------------------------------------------------------------------------------
/tests/test_integrations/test_doc_examples.py:
--------------------------------------------------------------------------------
1 | """Tests for some doc examples which had problems at one point."""
2 |
3 | from __future__ import annotations
4 |
5 | import matplotlib.pyplot as plt
6 |
7 | import dascore as dc
8 |
9 |
10 | class TestQuickStart:
11 | """A few examples from the quickstart."""
12 |
13 | def test_filter_plot(self):
14 | """Test get, taper, filter, plot."""
15 | patch = (
16 | dc.get_example_patch("example_event_1")
17 | .taper(time=0.05)
18 | .pass_filter(time=(None, 300))
19 | )
20 | ax = patch.viz.waterfall(scale=0.2)
21 | assert isinstance(ax, plt.Axes)
22 |
--------------------------------------------------------------------------------
/tests/test_io/test_sentek/test_sentek.py:
--------------------------------------------------------------------------------
1 | """
2 | Tests specific to the Sentek format.
3 | """
4 |
5 | import numpy as np
6 |
7 | from dascore.compat import random_state
8 | from dascore.io.sentek import SentekV5
9 |
10 |
11 | class TestSentekV5:
12 | """Tests for Sentek format that aren;t covered by common tests."""
13 |
14 | def test_das_extension_not_sentek(self, tmp_path_factory):
15 | """Ensure a non-sentek file with a das extension isn't id as sentek."""
16 | path = tmp_path_factory.mktemp("sentek_test") / "not_sentek.das"
17 | ar = random_state.random(10)
18 | with path.open("wb") as fi:
19 | np.save(fi, ar)
20 | sentek = SentekV5()
21 | assert not sentek.get_format(path)
22 |
--------------------------------------------------------------------------------
/tests/test_utils/test_progress.py:
--------------------------------------------------------------------------------
1 | """Test the progress bar."""
2 |
3 | from __future__ import annotations
4 |
5 | from rich.progress import Progress
6 |
7 | import dascore as dc
8 | from dascore.utils.progress import get_progress_instance, track
9 |
10 |
11 | class TestProgressBar:
12 | """Tests for the rich progress bar."""
13 |
14 | def test_progressbar_shows(self, monkeypatch):
15 | """Undo debug patch to progress bar shows."""
16 | monkeypatch.setattr(dc, "_debug", False)
17 | for _ in track([1, 2, 3], "testing_tracker"):
18 | pass
19 |
20 | def test_get_basic_progress(self):
21 | """Ensure we can return a basic progress bar."""
22 | pbar = get_progress_instance("basic")
23 | assert isinstance(pbar, Progress)
24 |
--------------------------------------------------------------------------------
/dascore/io/segy/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | SEGY format support module.
3 |
4 | Notes
5 | -----
6 | - Distance information is not found in most SEGY DAS files so returned
7 | dimensions are "channel" and "time" rather than "distance" and "time".
8 | - Segy standards found at: https://library.seg.org/pb-assets/technical-standards
9 |
10 | segy v1 spec: seg_y_rev1-1686080991247.pdf
11 |
12 | segy v2 spec: seg_y_rev2_0-mar2017-1686080998003.pdf
13 |
14 | segy v2.1 spec: seg_y_rev2_1-oct2023-1701361639333.pdf
15 |
16 | Examples
17 | --------
18 | import dascore as dc
19 | from dascore.utils.downloader import fetch
20 |
21 | # get the path to a segy file.
22 | path = fetch("conoco_segy_1.sgy")
23 |
24 | segy_patch = dc.spool(path)[0]
25 | """
26 |
27 | from .core import SegyV1_0, SegyV2_0, SegyV2_1
28 |
--------------------------------------------------------------------------------
/tests/test_io/test_optodas/test_optodas.py:
--------------------------------------------------------------------------------
1 | """
2 | Tests for optoDAS files.
3 | """
4 |
5 | import dascore as dc
6 | from dascore.io.optodas import OptoDASV8
7 | from dascore.utils.downloader import fetch
8 |
9 |
10 | class TestOptoDASIssues:
11 | """Test case related to issues in OptoDAS parser."""
12 |
13 | def test_read_decimated_patch(self):
14 | """Tests for reading spatially decimated patch (#419)"""
15 | path = fetch("decimated_optodas.hdf5")
16 | fiber_io = OptoDASV8()
17 |
18 | fmt_str, version_str = fiber_io.get_format(path)
19 | assert (fmt_str, version_str) == (fiber_io.name, fiber_io.version)
20 |
21 | spool = fiber_io.read(path)
22 | patch = spool[0]
23 | assert isinstance(patch, dc.Patch)
24 | assert patch.data.shape
25 |
--------------------------------------------------------------------------------
/.github/workflows/lint.yml:
--------------------------------------------------------------------------------
1 | # Lint the code using the defined pre-commits
2 | name: LintCode
3 | on: [push]
4 |
5 | jobs:
6 | lint_code:
7 | runs-on: ubuntu-latest
8 |
9 | # only run if CI isn't turned off
10 | if: github.event_name == 'push' || !contains(github.event.pull_request.labels.*.name, 'no_ci')
11 |
12 | steps:
13 | - uses: actions/checkout@v4
14 | with:
15 | fetch-tags: "true"
16 | fetch-depth: '0'
17 |
18 | - name: Install uv
19 | uses: astral-sh/setup-uv@v3
20 |
21 | - uses: actions/setup-python@v5
22 | with:
23 | python-version: '3.12'
24 |
25 | - name: install linting packages
26 | run: uv tool install pre-commit
27 |
28 | - name: run all precommits
29 | run: uv tool run pre-commit run --all
30 |
--------------------------------------------------------------------------------
/docs/recipes/edge_effects.qmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: Taper Edge Effects
3 | execute:
4 | warning: false
5 | ---
6 |
7 | [`taper`](`dascore.proc.taper.taper`) is often used to manage "edge effects" associated with several other types of processing such as filtering. For example:
8 |
9 | ### Bandpass filtering without tapering
10 |
11 | ```{python}
12 | import dascore as dc
13 |
14 | patch = (
15 | dc.get_example_patch('example_event_1')
16 | .pass_filter(time=(None, 300))
17 | )
18 |
19 | patch.viz.waterfall(show=True);
20 | ```
21 |
22 | ### Bandpass filtering with tapering
23 |
24 | ```{python}
25 | import dascore as dc
26 |
27 | patch = (
28 | dc.get_example_patch('example_event_1')
29 | .taper(time=0.05)
30 | .pass_filter(time=(None, 300))
31 | )
32 |
33 | patch.viz.waterfall(show=True);
34 | ```
35 |
--------------------------------------------------------------------------------
/dascore/proc/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Module containing patch processing routines.
3 | """
4 | from __future__ import annotations
5 |
6 | import dascore.proc.aggregate as agg
7 | from .basic import * # noqa
8 | from .coords import * # noqa
9 | from .correlate import correlate, correlate_shift
10 | from .detrend import detrend
11 | from .filter import median_filter, pass_filter, sobel_filter, savgol_filter, gaussian_filter, slope_filter, notch_filter
12 | from .resample import decimate, interpolate, resample
13 | from .rolling import rolling
14 | from .taper import taper, taper_range
15 | from .mute import line_mute, slope_mute
16 | from .units import convert_units, set_units, simplify_units
17 | from .whiten import whiten
18 | from .hampel import hampel_filter
19 | from .wiener import wiener_filter
20 | from .align import align_to_coord
21 |
--------------------------------------------------------------------------------
/docs/recipes/docker_basic.qmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: Using DASCore with Docker
3 | execute:
4 | warning: false
5 | ---
6 |
7 | If you have [Docker](https://www.docker.com/) installed, you may also run the stable build of DASCore in a Docker container.
8 |
9 | First, you need to clone the DASCore main repository:
10 | ```bash
11 | git clone https://github.com/DASDAE/dascore
12 | ```
13 |
14 | Then go to the DASCore directory and run docker build:
15 | ```bash
16 | cd dascore
17 | docker build -t dascore .
18 | ```
19 |
20 | To run a Python script with DASCore, mount the directory containing your scripts to the `/app` directory in the image using the `-v` flag:
21 |
22 | ```bash
23 | docker run -it -v $PWD:/app dascore python /app/test.py
24 | ```
25 |
26 | In the example above, we assume `test.py` is in our parent working directory (`$PWD`).
27 |
--------------------------------------------------------------------------------
/scripts/paper/make_viz_figure.py:
--------------------------------------------------------------------------------
1 | """A script to make the vizualization figure of the DASCore paper."""
2 |
3 | import matplotlib.pyplot as plt
4 |
5 | import dascore as dc
6 |
7 | # setup matplotlib figure/axis
8 | _, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 4.5))
9 |
10 | # load example patch
11 | patch = dc.get_example_patch("example_event_2")
12 |
13 | # sub-select only center channels
14 | sub_patch = patch.select(distance=(650, 750))
15 |
16 | # plot waterfall
17 | patch.viz.waterfall(ax=ax1, scale=0.5)
18 | # plot wiggle
19 | sub_patch.viz.wiggle(ax=ax2, scale=0.5)
20 |
21 | # Add subplot labels
22 | ax1.text(0.01, 0.99, "A", ha="left", va="top", transform=ax1.transAxes, size=24)
23 | ax2.text(0.01, 0.99, "B", ha="left", va="top", transform=ax2.transAxes, size=24)
24 |
25 | plt.tight_layout()
26 | plt.savefig("waterfalls_and_wiggles.pdf")
27 | plt.show()
28 |
--------------------------------------------------------------------------------
/dascore/io/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Modules for reading and writing fiber data.
3 | """
4 | from __future__ import annotations
5 |
6 | import dascore.utils.pd
7 | from dascore.io.core import FiberIO, read, scan, scan_to_df, write, PatchFileSummary
8 | from dascore.utils.io import BinaryReader, BinaryWriter
9 | from dascore.utils.hdf5 import (
10 | HDF5Writer,
11 | HDF5Reader,
12 | PyTablesWriter,
13 | PyTablesReader,
14 | H5Reader,
15 | H5Writer,
16 | )
17 | from dascore.utils.misc import MethodNameSpace
18 | from dascore.utils.pd import dataframe_to_patch, patch_to_dataframe
19 | from dascore.utils.io import (
20 | xarray_to_patch,
21 | patch_to_xarray,
22 | patch_to_obspy,
23 | obspy_to_patch,
24 | )
25 |
26 |
27 | class PatchIO(MethodNameSpace):
28 | write = write
29 | to_dataframe = patch_to_dataframe
30 | to_xarray = patch_to_xarray
31 | to_obspy = patch_to_obspy
32 |
--------------------------------------------------------------------------------
/tests/test_io/test_gdr/test_gdr.py:
--------------------------------------------------------------------------------
1 | """Tests for the GDR file format."""
2 |
3 | import numpy as np
4 | import pytest
5 |
6 | from dascore.io.gdr import GDR_V1
7 | from dascore.utils.downloader import fetch
8 |
9 |
10 | @pytest.fixture(scope="module")
11 | def gpr_path():
12 | """Return the file path to a GDR file."""
13 | return fetch("gdr_1.h5")
14 |
15 |
16 | class TestGDR:
17 | """Misc. tests not covered by common tests."""
18 |
19 | def test_no_snap(self, gpr_path):
20 | """Ensure snap or no snap produces the same coord for this file."""
21 | fiber_io = GDR_V1()
22 | patch1 = fiber_io.read(gpr_path, snap=False)[0]
23 | patch2 = fiber_io.read(gpr_path, snap=True)[0]
24 | time_1 = patch1.get_coord("time")
25 | time_2 = patch2.get_coord("time")
26 | assert len(time_1) == len(time_2)
27 | assert np.all(time_1.values == time_2.values)
28 |
--------------------------------------------------------------------------------
/.github/workflows/profile.yml:
--------------------------------------------------------------------------------
1 | # An action for running DASCore's performance benchmarks.
2 | name: CodSpeed Benchmarks
3 |
4 | on:
5 | push:
6 | branches:
7 | - master
8 | pull_request:
9 | workflow_dispatch:
10 |
11 | env:
12 | python_version: "3.13"
13 |
14 | jobs:
15 | benchmarks:
16 | name: Run benchmarks
17 | runs-on: ubuntu-latest
18 | steps:
19 | - uses: actions/checkout@v4
20 | with:
21 | fetch-tags: "true"
22 | fetch-depth: '0'
23 |
24 | - uses: actions/setup-python@v6
25 | with:
26 | python-version: ${{ env.python_version }}
27 |
28 | - name: Install dependencies
29 | run: pip install ".[profile]"
30 |
31 | - name: Run benchmarks
32 | uses: CodSpeedHQ/action@v4
33 | with:
34 | mode: instrumentation
35 | run: ./.github/test_code.sh profile
36 | token: ${{ secrets.CODSPEED_TOKEN }} # Optional for public repos
37 |
--------------------------------------------------------------------------------
/.github/workflows/get_coverage.yml:
--------------------------------------------------------------------------------
1 | # Calculates new coverage for the base branch and uploads to codecov
2 | name: Coverage
3 | on:
4 | push:
5 | branches:
6 | - master
7 |
8 | jobs:
9 | calc_coverage:
10 | runs-on: ubuntu-latest
11 |
12 | steps:
13 | - uses: actions/checkout@v4
14 | with:
15 | fetch-tags: "true"
16 | fetch-depth: '0'
17 |
18 |
19 | - uses: ./.github/actions/mamba-install-dascore
20 | with:
21 | python-version: "3.12"
22 | cache-number: 1
23 |
24 | - name: run test suite
25 | shell: bash -l {0}
26 | run: |
27 | pytest -s --cov dascore --cov-report=xml
28 |
29 | - uses: codecov/codecov-action@v4
30 | with:
31 | fail_ci_if_error: true
32 | files: ./coverage.xml
33 | flags: unittests # optional
34 | name: master_tests # optional
35 | token: ${{ secrets.CODECOV_TOKEN }} # required
36 |
--------------------------------------------------------------------------------
/.github/workflows/upload_pypi.yml:
--------------------------------------------------------------------------------
1 | # Upload to PyPI when a new release tag is made.
2 | name: PublishPackage
3 | on:
4 | release:
5 | types:
6 | - published
7 |
8 | jobs:
9 | upload:
10 | runs-on: ubuntu-latest
11 | steps:
12 | - uses: actions/checkout@v4
13 | with:
14 | fetch-tags: "true"
15 | fetch-depth: '0'
16 |
17 | - name: setup conda
18 | uses: conda-incubator/setup-miniconda@v3
19 | with:
20 | mamba-version: "*"
21 | channels: conda-forge,defaults
22 | channel-priority: true
23 | environment-file: environment.yml
24 | python-version: 3.11
25 |
26 | - name: create dists
27 | shell: bash -l {0}
28 | run: |
29 | python -m pip install build
30 | python -m build
31 |
32 | - name: publish package
33 | uses: pypa/gh-action-pypi-publish@release/v1
34 | with:
35 | password: ${{ secrets.PYPI_TOKEN }}
36 |
--------------------------------------------------------------------------------
/tests/test_utils/test_deprecated.py:
--------------------------------------------------------------------------------
1 | """
2 | Tests for the deprecate decorator.
3 | """
4 |
5 | import pytest
6 |
7 | from dascore.utils.deprecate import deprecate
8 |
9 |
10 | class TestDeprecated:
11 | """Tests for the deprecated decorator."""
12 |
13 | @pytest.fixture(scope="class")
14 | def deprecated_func(self):
15 | """Create a deprecated function."""
16 |
17 | @deprecate("Too old, use new()", since="0.1.0", removed_in="0.2.0")
18 | def old_func(*args, **kwargs):
19 | """Just a dummy function."""
20 | return 42
21 |
22 | return old_func
23 |
24 | def test_warning_issued(self, deprecated_func):
25 | """Ensure a warning is issued."""
26 | msg = "Too old"
27 | with pytest.warns(DeprecationWarning, match=msg):
28 | deprecated_func()
29 |
30 | def test_docstring(self, deprecated_func):
31 | """Ensure the docstring was updated."""
32 | assert "deprecated" in deprecated_func.__doc__
33 |
--------------------------------------------------------------------------------
/docs/recipes/plotting_channel_number.qmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: Plot Channel Number
3 | execute:
4 | warning: false
5 | ---
6 |
7 | Rather than plotting distance or depth, sometimes it is desirable to plot channel number (an index of the non-time dimension).
8 |
9 | To do this involves three steps:
10 |
11 | 1. Adding a new coordinate called channel number
12 | 2. Setting the channel number as the dimension
13 | 3. Call plot function
14 |
15 |
16 | ```{python}
17 | import numpy as np
18 |
19 | import dascore as dc
20 |
21 | patch = dc.get_example_patch()
22 |
23 | # get length of distance axis
24 | dist_len = patch.coord_shapes["distance"][0]
25 |
26 | # create range from 0 to dist_len
27 | channel_number = np.arange(dist_len)
28 |
29 | out = (
30 | # first add channel number to coords.
31 | patch.update_coords(channel_number=("distance", channel_number))
32 | # then make channel number a dimensional coordinate
33 | .set_dims(distance="channel_number")
34 | )
35 |
36 | # then plot
37 | out.viz.waterfall();
38 | ```
39 |
--------------------------------------------------------------------------------
/dascore/__init__.py:
--------------------------------------------------------------------------------
1 | """DASCore - A library for fiber optic sensing."""
2 | from __future__ import annotations
3 |
4 | import warnings
5 |
6 | from rich import print # noqa
7 |
8 | from dascore.core.patch import Patch
9 | from dascore.core.attrs import PatchAttrs
10 | from dascore.core.spool import BaseSpool, spool
11 | from dascore.core.coordmanager import get_coord_manager, CoordManager
12 | from dascore.core.coords import get_coord
13 | from dascore.examples import get_example_patch, get_example_spool
14 | from dascore.io.core import get_format, read, scan, scan_to_df, write
15 | from dascore.units import get_quantity, get_unit
16 | from dascore.utils.patch import patch_function
17 | from dascore.utils.time import to_datetime64, to_timedelta64, to_float
18 | from dascore.version import __last_version__, __version__
19 |
20 | # flag for disabling progress bar when debugging
21 | _debug = False
22 |
23 | # Ensure warnings are issued only once (per warning/line)
24 | warnings.filterwarnings("once", category=UserWarning, module=r"dascore\..*")
25 |
--------------------------------------------------------------------------------
/scripts/build_api_docs.py:
--------------------------------------------------------------------------------
1 | """Script to build the API docs for dascore."""
2 |
3 | from __future__ import annotations
4 |
5 | import sys
6 | from contextlib import suppress
7 |
8 | from _index_api import get_alias_mapping, parse_project
9 | from _qmd_builder import create_quarto_qmd
10 | from _render_api import render_project
11 | from _validate_links import validate_all_links
12 |
13 | import dascore as dc
14 |
15 | with suppress(AttributeError):
16 | sys.stdout.encoding = "utf-8"
17 |
18 |
19 | if __name__ == "__main__":
20 | print("Building documentation") # noqa
21 | print(f"Parsing project {dc.__name__}") # noqa
22 | data_dict = parse_project(dc)
23 | obj_dict = get_alias_mapping(dc)
24 | print("Generating qmd files") # noqa
25 | render_project(data_dict, obj_dict, debug=False)
26 | # create the quarto info file (needs templating)
27 | print("creating quarto config") # noqa
28 | create_quarto_qmd()
29 | # validate links
30 | print("Validating links") # noqa
31 | validate_all_links()
32 |
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | exclude: scripts/_templates
2 | repos:
3 | - repo: https://github.com/pre-commit/pre-commit-hooks
4 | rev: v2.3.0
5 | hooks:
6 | - id: check-yaml
7 | - id: end-of-file-fixer
8 | - id: check-merge-conflict
9 | - id: mixed-line-ending
10 | args: ['--fix=lf']
11 |
12 | # Ruff is a replacement for flake8 and many other linters (much faster too)
13 | - repo: https://github.com/astral-sh/ruff-pre-commit
14 | # Ruff version.
15 | rev: v0.4.8
16 | hooks:
17 | - id: ruff
18 | args: ["--fix"]
19 | # Run the formatter.
20 | - id: ruff-format
21 |
22 | # ensures __future__ import annotations at top of files which require it
23 | # for the typing features they are using.
24 | - repo: https://github.com/frostming/fix-future-annotations
25 | rev: 0.5.0
26 | hooks:
27 | - id: fix-future-annotations
28 |
29 | # strips out all outputs from notebooks.
30 | - repo: https://github.com/kynan/nbstripout
31 | rev: 0.6.1
32 | hooks:
33 | - id: nbstripout
34 |
--------------------------------------------------------------------------------
/docs/contributing/style_and_linting.qmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Style and Linting"
3 | ---
4 |
5 | # Linting
6 |
7 | DASCore uses [Black](https://github.com/ambv/black) and [flake8](http://flake8.pycqa.org/en/latest/) for code linting.
8 | If you have [properly installed DASCore's pre-commit hooks](dev_install.qmd#setup-pre-commit-hooks) they will be
9 | invoked automatically when you make a git commit. If any complaints are raised simply address them and try again.
10 |
11 | Alternatively, before making a commit, you can run all the hooks on all the code like so:
12 |
13 | ```shell
14 | pre-commit run --all
15 | ```
16 |
17 | It is often useful to run this command twice before making changes because many of the hooks
18 | will automatically fix the issue they raise on the first run.
19 |
20 | # Type Hints
21 |
22 | DASCore makes extensive use of Python 3's [type hints](https://docs.python.org/3/library/typing.html).
23 | Use them to annotate any public functions/methods. See the docstring section of the [documentation page](documentation.qmd)
24 | for more information and some examples.
25 |
--------------------------------------------------------------------------------
/docs/notes/doc_strategy.qmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: DASCore Documentation Strategy
3 | ---
4 |
5 | DASCore's documentation strives to follows [diataxis](https://diataxis.fr/) approach. In this system (developed by Daniele Procida) there are four types of documentation, each with different goals:
6 |
7 | 
8 |
9 | The sections of DASCore's documentation can be classified as follows:
10 |
11 | **Tutorials**: Tutorial, Contributing
12 |
13 | **How-to-Guides**: Recipes
14 |
15 | **Explanation**: Notes
16 |
17 | **Reference**: API
18 |
19 | We should try to not mix these types of documentation, but rather provide links in relevant locations. A few implications:
20 |
21 | * Long explanations of theory don't belong in the tutorial section; put a link to a page in the notes section if long explanation is needed.
22 |
23 | * Tutorial pages should only show basic usage of functions/methods/classes, then [cross reference](../contributing/documentation.qmd#cross-references) the API page.
24 |
25 | * Notes shouldn't spend much time teaching how to use the code, they should be theory-focused
26 |
--------------------------------------------------------------------------------
/dascore/utils/signal.py:
--------------------------------------------------------------------------------
1 | """
2 | Utilities for signal processing.
3 | """
4 |
5 | from scipy.signal import windows
6 |
7 | from dascore.exceptions import ParameterError
8 |
9 | WINDOW_FUNCTIONS = dict(
10 | barthann=windows.barthann,
11 | bartlett=windows.bartlett,
12 | blackman=windows.blackman,
13 | blackmanharris=windows.blackmanharris,
14 | bohman=windows.bohman,
15 | hamming=windows.hamming,
16 | hann=windows.hann,
17 | cos=windows.hann,
18 | nuttall=windows.nuttall,
19 | parzen=windows.parzen,
20 | triang=windows.triang,
21 | ramp=windows.triang,
22 | boxcar=windows.boxcar,
23 | )
24 |
25 |
26 | def _get_window_function(window_type):
27 | """Get the window function to use for taper."""
28 | # get taper function or raise if it isn't known.
29 | if window_type not in WINDOW_FUNCTIONS:
30 | msg = (
31 | f"'{window_type}' is not a known window type. "
32 | f"Options are: {sorted(WINDOW_FUNCTIONS)}"
33 | )
34 | raise ParameterError(msg)
35 | func = WINDOW_FUNCTIONS[window_type]
36 | return func
37 |
--------------------------------------------------------------------------------
/tests/test_utils/test_downloader.py:
--------------------------------------------------------------------------------
1 | """Tests for dascore's downloader."""
2 |
3 | from __future__ import annotations
4 |
5 | import pandas as pd
6 | import pytest
7 |
8 | from dascore.utils.downloader import fetch, get_registry_df
9 |
10 |
11 | @pytest.fixture()
12 | def registry_df():
13 | """Load the registry df."""
14 | df = get_registry_df()
15 | return df
16 |
17 |
18 | class TestRegistryDF:
19 | """Tests for getting the data registry."""
20 |
21 | def test_dataframe(self, registry_df):
22 | """Ensure a non-empty df was returned."""
23 | assert len(registry_df)
24 | assert isinstance(registry_df, pd.DataFrame)
25 |
26 |
27 | class TestFetch:
28 | """Tests for fetching filepaths of test files."""
29 |
30 | def test_multiple_fetch(self, registry_df):
31 | """Ensure multiple fetch calls return same path."""
32 | path = registry_df["name"].iloc[0]
33 | assert fetch(path) == fetch(path)
34 |
35 | def test_existing_file(self, registry_df):
36 | """Ensure an existing file just returns."""
37 | path = fetch(registry_df["name"].iloc[0])
38 | assert fetch(path) == path
39 |
--------------------------------------------------------------------------------
/dascore/utils/xml.py:
--------------------------------------------------------------------------------
1 | """
2 | Utilities for working with xml files.
3 | """
4 |
5 | from xml.etree import ElementTree
6 |
7 |
8 | def xml_to_dict(xml_string):
9 | """Convert a simple xml string to a dict."""
10 | root = ElementTree.fromstring(xml_string)
11 | return _element_to_dict(root)
12 |
13 |
14 | def _element_to_dict(element):
15 | """
16 | Recursively convert an element tree into a dict.
17 |
18 | Note: This function is probably not general enough to handle complicated
19 | xml, use with caution.
20 | """
21 | # Base case: If the element has no children, return its text content
22 | if len(element) == 0:
23 | return element.text
24 |
25 | # Recursive case: Convert children to dictionary
26 | result = {}
27 | for child in element:
28 | child_value = _element_to_dict(child)
29 | if child.tag in result:
30 | if isinstance(result[child.tag], list):
31 | result[child.tag].append(child_value)
32 | else:
33 | result[child.tag] = [result[child.tag], child_value]
34 | else:
35 | result[child.tag] = child_value
36 | return result
37 |
--------------------------------------------------------------------------------
/.github/pull_request_template.md:
--------------------------------------------------------------------------------
1 |
9 |
10 | ## Description
11 |
12 |
18 |
19 | ## Checklist
20 |
21 | I have (if applicable):
22 |
23 | - [ ] referenced the GitHub issue this PR closes.
24 | - [ ] documented the new feature with docstrings and/or appropriate doc page.
25 | - [ ] included tests. See [testing guidelines](https://dascore.org/contributing/testing.html).
26 | - [ ] added the "ready_for_review" tag once the PR is ready to be reviewed.
27 |
--------------------------------------------------------------------------------
/tests/test_io/test_sintela_binary/test_sintela_binary.py:
--------------------------------------------------------------------------------
1 | """
2 | Tests for sintela binary format.
3 | """
4 |
5 | import shutil
6 | from pathlib import Path
7 |
8 | import pytest
9 |
10 | from dascore.exceptions import InvalidFiberFileError
11 | from dascore.io.sintela_binary import SintelaBinaryV3
12 | from dascore.utils.downloader import fetch
13 |
14 |
15 | class TestScanSintelaBinary:
16 | """Tests for scanning a binary file."""
17 |
18 | @pytest.fixture(scope="class")
19 | def extra_bytes_file(self, tmp_path_factory):
20 | """Create a sintela binary file with extra bytes."""
21 | tmp = tmp_path_factory.mktemp("sintela_binary")
22 | binary_path = Path(fetch("sintela_binary_v3_test_1.raw"))
23 | new = tmp / "extra_bytes.raw"
24 | shutil.copy(binary_path, new)
25 |
26 | with open(new, "ab") as fi:
27 | fi.write(b"some_bytes_des_is")
28 |
29 | return new
30 |
31 | def test_extra_bytes_raises(self, extra_bytes_file):
32 | """Ensure a file with extra bytes raises an exception."""
33 | fiber_io = SintelaBinaryV3()
34 | with pytest.raises(InvalidFiberFileError):
35 | fiber_io.scan(extra_bytes_file)
36 |
--------------------------------------------------------------------------------
/dascore/proc/detrend.py:
--------------------------------------------------------------------------------
1 | """Module for detrending."""
2 |
3 | from __future__ import annotations
4 |
5 | from scipy.signal import detrend as scipy_detrend
6 |
7 | from dascore.constants import PatchType
8 | from dascore.utils.patch import patch_function
9 |
10 |
11 | @patch_function()
12 | def detrend(patch: PatchType, dim, type="linear") -> PatchType:
13 | """
14 | Perform detrending along a given dimension (distance or time) of a patch.
15 |
16 | Parameters
17 | ----------
18 | dim
19 | The dimension ("distance" or "time") along where detrending is applied.
20 | type
21 | Specifies least-squares fit type for detrend,
22 | with "linear" (default) or "constant" as options.
23 |
24 | Returns
25 | -------
26 | The Patch instance after applying the detrend function.
27 |
28 | Examples
29 | --------
30 | >>> import dascore # import dascore library
31 | >>> pa = dascore.get_example_patch() # generate example patch
32 | >>> out = pa.detrend("time") # detrend along the time dimension
33 | """
34 | assert dim in patch.dims
35 | axis = patch.get_axis(dim)
36 | out = scipy_detrend(patch.data, axis=axis, type=type)
37 | return patch.new(data=out)
38 |
--------------------------------------------------------------------------------
/scripts/find_futures.py:
--------------------------------------------------------------------------------
1 | """Find all python files which dont have "from __future__ import annotation"."""
2 |
3 | from __future__ import annotations
4 |
5 | from pathlib import Path
6 |
7 |
8 | def has_search_str(path, search_string):
9 | """Return True if search string in path."""
10 | for line in path.read_text().splitlines():
11 | if search_string in line:
12 | return True
13 | return False
14 |
15 |
16 | def rewrite_file(path, import_str):
17 | """Rewrite file to have import statement."""
18 | out = path.read_text().splitlines()
19 | try:
20 | index = out[1:].index('"""')
21 | except ValueError:
22 | try:
23 | index = out[1:].index("'''")
24 | except ValueError:
25 | index = -1
26 | out.insert(index + 2, import_str)
27 | new_str = "\n".join(out)
28 | with open(path, "w") as fi:
29 | fi.write(new_str)
30 |
31 |
32 | if __name__ == "__main__":
33 | search_str = "from __future__ import annotations"
34 | base = Path(__file__).parent.parent
35 | missing = []
36 | for path in base.rglob("*.py"):
37 | if not has_search_str(path, search_str):
38 | rewrite_file(path, search_str)
39 | for path in missing:
40 | print(path) # noqa
41 |
--------------------------------------------------------------------------------
/docs/contributing/general_guidelines.qmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: Guidelines
3 | ---
4 | This page highlights a few guidelines for DASCore development.
5 |
6 | # Branching and versioning
7 |
8 | We create new features or bug fixes in their own branches and merge them into master via pull requests. We may switch
9 | to a more complex branching model if the need arises.
10 |
11 | If substantial new features have been added since the last release we will bump the minor version. If only bug
12 | fixes/minor changes have been made, only the patch version will be bumped. Like most Python projects, we loosely
13 | follow [semantic versioning](https://semver.org/), meaning we will not bump the major version until DASCore
14 | is more stable.
15 |
16 |
17 | # Paths
18 |
19 | Prefer `pathlib.Path` to strings when working with paths. However, when dealing with many many files (e.g., indexers)
20 | strings may be preferred for efficiency.
21 |
22 | # Working with dataframes
23 |
24 | Column names should be snake_cased whenever possible.
25 |
26 | Always access columns with getitem and not getattr (i.e., use `df['column_name']` not `df.column_name`).
27 |
28 | Prefer creating a new `DataFrame`/`Series` to modifying them inplace. Inplace modifications should require opting in
29 | (usually through an `inplace` key word argument).
30 |
--------------------------------------------------------------------------------
/tests/test_utils/test_display.py:
--------------------------------------------------------------------------------
1 | """Tests for displaying dascore objects."""
2 |
3 | from __future__ import annotations
4 |
5 | import numpy as np
6 | import pandas as pd
7 |
8 | import dascore as dc
9 | from dascore.utils.display import get_nice_text
10 |
11 |
12 | class TestGetNiceText:
13 | """Tests for converting coordinate to nice looking rich Text."""
14 |
15 | def test_simple_datetime(self):
16 | """Ensure the process works for datetime objects."""
17 | dt = dc.to_datetime64("2023-10-01")
18 | # YMD should just show YMD
19 | txt1 = get_nice_text(dt)
20 | assert str(txt1) == "2023-10-01"
21 | # Unless YMD is 1970-01-01
22 | txt2 = get_nice_text(dc.to_datetime64(0))
23 | assert str(txt2) == "00:00:00"
24 | # Decimals are displayed if present
25 | txt3 = get_nice_text(dc.to_datetime64(1.111111111))
26 | assert str(txt3).endswith(".111111111")
27 |
28 | def test_nat(self):
29 | """Tests for NaT."""
30 | dt = np.datetime64("NaT")
31 | txt = get_nice_text(dt)
32 | assert str(txt) == "NaT"
33 |
34 | def test_timestamp(self):
35 | """Tests for pandas timestamps."""
36 | ts = pd.Timestamp("2012-01-10")
37 | txt = get_nice_text(ts)
38 | assert str(txt) == "2012-01-10"
39 |
--------------------------------------------------------------------------------
/docs/recipes/contributing_to_documentation.qmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: Contributing to Documentation
3 | ---
4 |
5 | Here, we elaborate on how you can build new documentation or make changes to existing DASCore documentation.
6 |
7 | ## Building/Editing documentation
8 |
9 | First, install the [Quarto](https://quarto.org/docs/get-started/)
10 |
11 | To ensure Quarto is installed properly and to get the installation version:
12 |
13 | ```bash
14 | quarto --version
15 | ```
16 |
17 | If you have not already installed jupyter in your environment, install it using conda:
18 |
19 | ```bash
20 | conda install jupyter
21 | ```
22 |
23 | Go to the DASCore directory on your machine:
24 |
25 | ```bash
26 | cd dascore
27 | ```
28 |
29 | Then generate DASCore's API markdown files by running the following script:
30 |
31 | ```bash
32 | python scripts/build_api_docs.py
33 | ```
34 |
35 | ```bash
36 | quarto preview docs
37 | ```
38 |
39 | :::{.note}
40 | This will take a few minutes the first time you run it. After that, the results are cached and only the changed files are re-rendered.
41 | :::
42 |
43 | Now, you can make new documentation or make changes to the "index.qmd" file on the /dascore/docs/ directory and so on.
44 | However, if you make changes to any of DASCore's docstring, you need to re-run the build_api_docs.py script for the changes to appear.
45 |
--------------------------------------------------------------------------------
/dascore/compat.py:
--------------------------------------------------------------------------------
1 | """
2 | Compatibility module for DASCore.
3 |
4 | All components/functions that may be exchanged for other numpy/scipy
5 | compatible libraries should go in this model.
6 | """
7 |
8 | from __future__ import annotations
9 |
10 | from contextlib import suppress
11 |
12 | import numpy as np
13 | from numpy import floor, interp, ndarray # NOQA
14 | from numpy.random import RandomState
15 | from rich.progress import Progress # NOQA
16 | from scipy.interpolate import interp1d # NOQA
17 | from scipy.ndimage import zoom # NOQA
18 | from scipy.signal import decimate, resample, resample_poly # NOQA
19 |
20 | random_state = RandomState(42)
21 |
22 |
23 | class DataArray:
24 | """A dummy class for when xarray isn't installed."""
25 |
26 |
27 | with suppress(ImportError):
28 | from xarray import DataArray # NOQA
29 |
30 |
31 | def array(array):
32 | """Wrapper function for creating 'immutable' arrays."""
33 | out = np.asarray(array)
34 | # Setting the write flag to false makes the array immutable unless
35 | # the flag is switched back.
36 | out.setflags(write=False)
37 | return out
38 |
39 |
40 | def is_array(maybe_array):
41 | """Determine if an object is array like."""
42 | # This is here so that we can support other array types in the future.
43 | return isinstance(maybe_array, np.ndarray)
44 |
--------------------------------------------------------------------------------
/dascore/utils/transformatter.py:
--------------------------------------------------------------------------------
1 | """Implements logic to apply formatting changes to patches from transformations."""
2 |
3 | from __future__ import annotations
4 |
5 | import abc
6 |
7 | from dascore.utils.misc import iterate
8 |
9 |
10 | class BaseTransformatter(abc.ABC):
11 | """Base model for helping to apply transformation format changes."""
12 |
13 | forward_prefix: str = ""
14 | inverse_prefix: str = ""
15 |
16 | def _forward_rename(self, name):
17 | """Rename the dimension for forward transform."""
18 | if name.startswith(self.inverse_prefix):
19 | return name[len(self.inverse_prefix) :]
20 | return f"{self.forward_prefix}{name}"
21 |
22 | def _inverse_rename(self, name):
23 | """Rename the dimension for backward transform."""
24 | if name.startswith(self.forward_prefix):
25 | return name[len(self.forward_prefix) :]
26 | return f"{self.inverse_prefix}{name}"
27 |
28 | def rename_dims(self, dims, index=None, forward=True):
29 | """Rename the dimensions."""
30 | func = self._forward_rename if forward else self._inverse_rename
31 | new = list(iterate(dims))
32 | index_list = iterate(index) if index is not None else range(len(new))
33 | for index in index_list:
34 | new[index] = func(new[index])
35 | return tuple(new)
36 |
37 |
38 | class FourierTransformatter(BaseTransformatter):
39 | """Formatters."""
40 |
41 | forward_prefix: str = "ft_"
42 | inverse_prefix: str = "ift_"
43 |
--------------------------------------------------------------------------------
/.github/workflows/build_deploy_master_docs.yaml:
--------------------------------------------------------------------------------
1 | # This action renders and publishes the development docs whenever
2 | # new commits are added to the master bracnh.
3 |
4 | name: BuildDeployDevDocs
5 |
6 | on:
7 | # Runs on pushes targeting the default branch
8 | push:
9 | branches: ["master"]
10 |
11 | # Allows you to run this workflow manually from the Actions tab
12 | workflow_dispatch:
13 |
14 | # Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages
15 | permissions:
16 | contents: read
17 | pages: write
18 | deployments: write
19 | id-token: write
20 |
21 | # Allow one concurrent deployment
22 | concurrency:
23 | group: "pages"
24 | cancel-in-progress: true
25 |
26 | jobs:
27 | # Build job
28 | build:
29 | runs-on: ubuntu-latest
30 | environment:
31 | name: github-pages
32 | url: ${{ steps.deployment.outputs.page_url }}
33 |
34 | steps:
35 | - uses: actions/checkout@v4
36 | with:
37 | fetch-tags: "true"
38 | fetch-depth: '0'
39 |
40 | - uses: ./.github/actions/mamba-install-dascore
41 | with:
42 | python-version: "3.12"
43 | environment-file: './.github/doc_environment.yml'
44 |
45 | - uses: ./.github/actions/prep_doc_build
46 |
47 | - name: publish docs to netlify
48 | shell: bash -l {0}
49 | env:
50 | QUARTO_PRINT_STACK: true
51 | NETLIFY_AUTH_TOKEN: ${{ secrets.NETLIFY_AUTH_TOKEN }}
52 | run: |
53 | quarto publish docs --no-prompt --no-browser
54 |
--------------------------------------------------------------------------------
/tests/test_io/test_h5simple/test_h5simple.py:
--------------------------------------------------------------------------------
1 | """Tests for simple h5 format."""
2 |
3 | from __future__ import annotations
4 |
5 | import shutil
6 |
7 | import pytest
8 | import tables
9 |
10 | import dascore as dc
11 | from dascore.utils.downloader import fetch
12 |
13 |
14 | class TestH5Simple:
15 | """Tests for h5simple that aren't covered in common tests."""
16 |
17 | @pytest.fixture(scope="class")
18 | def h5simple_path(self):
19 | """Get the path to a h5 simple file."""
20 | return fetch("h5_simple_1.h5")
21 |
22 | @pytest.fixture(scope="class")
23 | def h5simple_with_dim_attrs_path(self, tmp_path_factory):
24 | """Create a h5_simpl which has dimensions specified."""
25 | basic_path = fetch("h5_simple_2.h5")
26 | new_path = tmp_path_factory.mktemp("h5simple_dim_attrs") / "simple.h5"
27 |
28 | shutil.copy2(basic_path, new_path)
29 | with tables.open_file(new_path, "a") as h5:
30 | h5.root._v_attrs["dims"] = "distance,time"
31 | return new_path
32 |
33 | def test_no_snap(self, h5simple_path):
34 | """Ensure when snap is not used it still reads patch."""
35 | patch = dc.read(h5simple_path, file_format="h5simple", snap=False)[0]
36 | assert isinstance(patch, dc.Patch)
37 |
38 | def test_dims_in_attrs(self, h5simple_with_dim_attrs_path):
39 | """Ensure if 'dims' is in attrs it gets used."""
40 | patch = dc.spool(h5simple_with_dim_attrs_path, file_format="h5simple")[0]
41 | assert isinstance(patch, dc.Patch)
42 |
--------------------------------------------------------------------------------
/dascore/io/terra15/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Module for reading and writing data recorded by Terra15 DAS interrogators.
3 |
4 | Terra15's website can be found [here](https://terra15.com.au/)
5 |
6 | Notes
7 | -----
8 | There are a few implementation details to note. The tricky part of the
9 | implementation is how time is handled. The Terra15 files contain two arrays
10 | corresponding to GPS_time and posix_time. We elected to simply use GPS time.
11 |
12 | However, there are a few issues. First, there can be significant jitter in the
13 | GPS time vector, and occasionally, sample n+1 has a smaller timestamp than n.
14 | This reeks havoc on the pandas indexes used by xarray. Second, the spacing is
15 | often different from the dT attributes. Our implementation does the following:
16 |
17 | 1. dt = (max(gps_time) - min(gps_time)) / (len(gps_time) - 1). This ensures
18 | min(gps_time) + len(gps_time) * dt ≈ max(gps_time).
19 |
20 | 2. The time array returned by the parser is calculated by min(gps_time) +
21 | dt * np.arange(len(gps_time)) which insures it is monotonically increasing.
22 | The time is then cast to datetime64 with
23 | [to_datetime64](dascore.utils.time.to_datettime64).
24 |
25 | 3. The start/end time returned by the scan function are gps_time[0] and
26 | gps_time[-1], cast to datetime64 objects.
27 |
28 | It is very important that the scan method returns exactly the same time_min
29 | and time_max as contained in the patch when loaded into memory.
30 | """
31 | from __future__ import annotations
32 | from .core import Terra15FormatterV4, Terra15FormatterV5, Terra15FormatterV6
33 |
--------------------------------------------------------------------------------
/docs/recipes/correlate.qmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: Correlate
3 | execute:
4 | warning: false
5 | ---
6 |
7 |
8 | Here, we demonstrate how to utilize the `Correlate` module to perform cross-correlation between a specific channel/time sample and other channels/time samples within a given patch. By doing so, we create a new "correlate patch", which is essentially a data construct that encapsulates the results of these cross-correlation operations.
9 | We use the Ricker wavelet as an example signal and compute its cross-correlation using DASCore.
10 |
11 |
12 | ## Load and visualize the Ricker wavelet
13 |
14 | ```{python}
15 | import dascore as dc
16 |
17 | patch = dc.get_example_patch(
18 | "ricker_moveout",
19 | velocity=100,
20 | duration=2,
21 | )
22 |
23 | patch.viz.waterfall();
24 | ```
25 |
26 | ## Compute Ricker wavelet's cross-correlation
27 |
28 | As an example, here we cross-correlate our Ricker patch's all channels with channel number 4 as a master channel or virtual source. In the correlate patch, the first three traces have negative time lag, but the fourth trace appears at 0.0 sec time lag (auto-correlation) along with other traces with positive time lags. As expected, both Ricker patch and the correlate patch have the same 100 m/s moveout.
29 |
30 | ```{python}
31 | import dascore as dc
32 |
33 | patch = dc.get_example_patch(
34 | "ricker_moveout",
35 | velocity=100,
36 | duration=2,
37 | )
38 |
39 | corr = patch.correlate(distance=3, samples=True)
40 |
41 | # Note we squeeze the last dimension to get 2D patch
42 |
43 | corr.squeeze().viz.waterfall();
44 | ```
45 |
--------------------------------------------------------------------------------
/benchmarks/readme.md:
--------------------------------------------------------------------------------
1 | # Benchmarks
2 |
3 | DASCore's benchmark suite uses [CodSpeed](https://codspeed.io/) for continuous performance monitoring.
4 |
5 | ## Running Benchmarks Locally
6 |
7 | To run benchmarks locally:
8 |
9 | ```bash
10 | # Install test dependencies (includes pytest-codspeed)
11 | pip install -e ".[test]"
12 |
13 | # Run all benchmarks
14 | pytest benchmarks/ --codspeed
15 |
16 | # Run specific benchmark files
17 | pytest benchmarks/test_patch_benchmarks.py --codspeed
18 | pytest benchmarks/test_io_benchmarks.py --codspeed
19 | pytest benchmarks/test_spool_benchmarks.py --codspeed
20 | ```
21 |
22 | ## Benchmark Structure
23 |
24 | Benchmarks are now organized as pytest tests in the `benchmarks/` directory:
25 |
26 | - `test_patch_benchmarks.py` - Core Patch processing, transform, and visualization benchmarks
27 | - `test_io_benchmarks.py` - File I/O operations benchmarks
28 | - `test_spool_benchmarks.py` - Spool chunking and selection benchmarks
29 |
30 | Each benchmark uses the `@pytest.mark.benchmark` decorator to automatically measure performance.
31 |
32 | ## Continuous Performance Monitoring
33 |
34 | Benchmarks automatically run on:
35 | - Push to main/master branch
36 | - Pull requests
37 |
38 | Performance results are tracked by CodSpeed and reported in pull requests, helping identify performance regressions before they're merged.
39 |
40 | ## Migration Notes
41 |
42 | The legacy ASV benchmarks in the `benchmarks/` directory have been converted to pytest format. The new benchmarks maintain the same functionality while providing better integration with the existing test suite.
43 |
--------------------------------------------------------------------------------
/dascore/utils/downloader.py:
--------------------------------------------------------------------------------
1 | """Simple script for downloading external files."""
2 |
3 | from __future__ import annotations
4 |
5 | from functools import cache
6 | from importlib.resources import files
7 | from pathlib import Path
8 |
9 | import pandas as pd
10 | import pooch
11 |
12 | from dascore.constants import DATA_VERSION
13 |
14 | REGISTRY_PATH = Path(files("dascore").joinpath("data_registry.txt"))
15 |
16 | # Create a pooch for fetching data files
17 | fetcher = pooch.create(
18 | path=pooch.os_cache("dascore"),
19 | base_url="https://github.com/d-chambers/dascore",
20 | version=DATA_VERSION,
21 | version_dev="master",
22 | env="DFS_DATA_DIR",
23 | )
24 | fetcher.load_registry(REGISTRY_PATH)
25 |
26 |
27 | @cache
28 | def get_registry_df() -> pd.DataFrame:
29 | """Returns a dataframe of all files in the data registry."""
30 | names = (
31 | "name",
32 | "hash",
33 | "url",
34 | )
35 | df = pd.read_csv(REGISTRY_PATH, sep=r"\s+", skiprows=1, names=names)
36 | return df
37 |
38 |
39 | @cache
40 | def fetch(name: Path | str, **kwargs) -> Path:
41 | """
42 | Fetch a data file from the registry.
43 |
44 | Parameters
45 | ----------
46 | name
47 | The name of the file to fetch. Must be in the data registry or a
48 | path which exists.
49 | kwargs
50 | Left for compatibility reasons.
51 |
52 | Returns
53 | -------
54 | A path to the downloaded file.
55 | """
56 | if (existing_path := Path(name)).exists():
57 | return existing_path
58 | return Path(fetcher.fetch(name, **kwargs))
59 |
--------------------------------------------------------------------------------
/tests/test_io/test_febus/test_febus.py:
--------------------------------------------------------------------------------
1 | """
2 | Febus specific tests.
3 | """
4 |
5 | import h5py
6 | import numpy as np
7 | import pytest
8 |
9 | from dascore.io.febus import Febus2
10 | from dascore.io.febus.utils import _flatten_febus_info
11 | from dascore.utils.downloader import fetch
12 | from dascore.utils.time import to_float
13 |
14 |
15 | class TestFebus:
16 | """Special test cases for febus."""
17 |
18 | @pytest.fixture(scope="class")
19 | def febus_path(self):
20 | """Return the path to a test febus file."""
21 | return fetch("febus_1.h5")
22 |
23 | def test_time_coords_consistent_with_metadata(self, febus_path):
24 | """
25 | Ensure the time coords returned have the same length as
26 | metadata indicates.
27 | """
28 | patch = Febus2().read(febus_path)[0]
29 | coords = patch.coords
30 | time = coords.get_coord("time")
31 | time_span = to_float((time.max() - time.min()) + time.step)
32 |
33 | with h5py.File(febus_path, "r") as f:
34 | feb = _flatten_febus_info(f)[0]
35 | # First check total time extent
36 | n_blocks = feb.zone[feb.data_name].shape[0]
37 | block_time = 1 / (feb.zone.attrs["BlockRate"] / 1_000)
38 | expected_time_span = block_time * n_blocks
39 | assert np.isclose(expected_time_span, time_span)
40 | # Then check absolute time
41 | time_offset = feb.zone.attrs["Origin"][1] / 1_000
42 | time_start = feb.source["time"][0] + time_offset
43 | assert np.isclose(to_float(time.min()), time_start)
44 |
--------------------------------------------------------------------------------
/tests/test_utils/test_models.py:
--------------------------------------------------------------------------------
1 | """Tests for DASCore models and related functionality."""
2 |
3 | from __future__ import annotations
4 |
5 | import numpy as np
6 |
7 | from dascore.utils.models import DascoreBaseModel, sensible_model_equals
8 |
9 |
10 | class _TestModel(DascoreBaseModel):
11 | array: np.ndarray | None = None
12 | _private: int = 0
13 | some_str: str = "10"
14 |
15 |
16 | class TestModelEquals:
17 | """Tests for seeing if models/dicts are equal."""
18 |
19 | def test_empty(self):
20 | """Empty dicts should be equal."""
21 | assert sensible_model_equals({}, {})
22 |
23 | def test_arrays_not_equal(self):
24 | """Ensure when arrays aren't equal models arent."""
25 | mod1 = _TestModel(array=np.arange(10))
26 | mod2 = _TestModel(array=np.arange(10) + 10)
27 | assert not sensible_model_equals(mod1, mod2)
28 |
29 | def test_private(self):
30 | """When private attrs arent equal the models should still be."""
31 | mod1 = _TestModel(_private=1)
32 | mod2 = _TestModel(_private=2)
33 | assert sensible_model_equals(mod1, mod2)
34 |
35 | def test_private_disjoint(self):
36 | """Private attrs not shared should not affect equality."""
37 | mod1 = _TestModel(_private_1=1)
38 | mod2 = _TestModel(_private_2=2)
39 | assert sensible_model_equals(mod1, mod2)
40 |
41 | def test_new(self):
42 | """Ensure a new model can b e created."""
43 | mod = _TestModel(some_str="test")
44 | new = mod.new(some_str="bob")
45 | assert new.some_str == "bob"
46 | assert new is not mod
47 |
--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
1 | # DASCore
2 |
3 | A python library for distributed fiber optic sensing.
4 |
5 | [](https://codecov.io/gh/dasdae/dascore)
6 | [](https://codspeed.io/DASDAE/dascore)
7 | [](https://pypi.python.org/pypi/dascore)
8 | [](https://pypi.python.org/pypi/dascore)
9 | [](https://pypi.org/project/dascore/)
10 | [](https://github.com/conda-forge/dascore-feedstock)
11 | [](https://zenodo.org/badge/latestdoi/422627477)
12 | [](https://www.gnu.org/licenses/lgpl.html)
13 |
14 | [Code](https://github.com/DASDAE/dascore)
15 |
16 | Documentation [[stable](https://dascore.org), [development](https://dascore.netlify.app/)]
17 |
18 | [Citation](https://seismica.library.mcgill.ca/article/view/1184)
19 |
20 | > Chambers, D., Jin, G., Tourei, A., Issah, A. H. S., Lellouch, A., Martin, E., Zhu, D., Girard, A., Yuan, S., Cullison, T., Snyder, T., Kim, S., Danes, N., Pnithan, N., Boltz, M. S. & Mendoza, M. M. (2024). DASCore: a Python Library for Distributed Fiber Optic Sensing. Seismica, 3(2).
21 |
22 | [](https://github.com/DASDAE/dascore/graphs/contributors)
23 |
--------------------------------------------------------------------------------
/docs/styles.css:
--------------------------------------------------------------------------------
1 | /* css styles */
2 |
3 | body {
4 | font-size: 18px;
5 | }
6 |
7 | table {
8 | border-collapse: collapse;
9 | border: 2px solid rgb(200, 200, 200);
10 | letter-spacing: 1px;
11 | font-size: 0.8rem;
12 | display: table;
13 | width: 100%;
14 | padding-bottom: 4px;
15 | }
16 |
17 | caption {
18 | padding: 10px;
19 | caption-side: top;
20 | color: #666;
21 | text-align: center;
22 | letter-spacing: 1px;
23 | font-size: small;
24 | }
25 |
26 | td {
27 | border: 0.5px solid rgb(190, 190, 190);
28 | padding: 10px 20px;
29 | text-align: left;
30 | vertical-align: middle;
31 | /*white-space: pre;*/
32 | }
33 |
34 | /*Try not to let first column wrap.*/
35 | td:first-child {
36 | white-space: nowrap;
37 | }
38 |
39 | th {
40 | border: 0.5px solid rgb(190, 190, 190);
41 | padding: 10px 20px;
42 | text-align: center;
43 | }
44 |
45 |
46 | /*This makes every other row striped. */
47 | tbody tr:nth-child(odd) {
48 | background-color: rgba(210, 209, 209, 0.35);
49 | }
50 |
51 |
52 | /* Custom classes */
53 |
54 | .padded_bottom_10pt {
55 | padding-bottom: 10pt;
56 | }
57 |
58 | .def_block {
59 | background-color: rgba(210, 209, 209, 0.3);
60 | margin: 1em;
61 | box-sizing: content-box;
62 | padding-left: 10px;
63 | padding-right: 10px;
64 | border-radius: 25px;
65 | padding-bottom: 0px;
66 | padding-top: 2px;
67 | }
68 |
69 | .origin_table {
70 | border: 0em;
71 | width: 100%;
72 | margin: 0;
73 | font-size: 1em;
74 | border-style: hidden !important;
75 | font-family: 'Bree Serif', serif;
76 | display: block;
77 | margin-bottom: 10px;
78 | }
79 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 |
27 | # PyInstaller
28 | # Usually these files are written by a python script from a template
29 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
30 | *.manifest
31 | *.spec
32 |
33 | # Installer logs
34 | pip-log.txt
35 | pip-delete-this-directory.txt
36 |
37 | # Unit test / coverage reports
38 | htmlcov/
39 | .tox/
40 | .coverage
41 | .coverage.*
42 | .cache
43 | nosetests.xml
44 | coverage.xml
45 | *,cover
46 | .hypothesis/
47 |
48 | # Translations
49 | *.mo
50 | *.pot
51 |
52 | # logs:
53 | *.log
54 |
55 | # Sphinx documentation
56 | docs/_build/
57 |
58 | # PyBuilder
59 | target/
60 |
61 | # pyenv python configuration file
62 | .python-version
63 |
64 | # pycharm
65 | .idea
66 |
67 | # Ipython stuff
68 | .ipynb_checkpoints
69 |
70 | # linux/osx garbage
71 | *~
72 | *.swp
73 | .DS_store*
74 |
75 | # docs
76 | docs/api/*
77 | _autosummary
78 | .quarto/
79 | docs/site_libs
80 | docs/_quarto.yml
81 | docs/**/*.ipynb
82 | .cross_ref.json
83 |
84 | # profile stuff from asv
85 | .asv
86 | .codspeed
87 |
88 | # misc
89 | scratch/**
90 | .ruff_cache
91 | uv.lock
92 |
93 | docs/index_files
94 | docs/index.quarto_ipynb
95 |
96 | # Claude stuff
97 | .claude
98 | CLAUDE.md
99 |
100 | # profile stuff
101 | prof/
102 |
--------------------------------------------------------------------------------
/docs/contributing/publish_a_new_release.qmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Publish a new release"
3 | ---
4 |
5 | On this page, we provide step-by-step guidance on how to publish a new release for DASCore.
6 |
7 | ## Step 1: Draft a new release and publish
8 |
9 | On DASCore GitHub page, you should go to [Releases](https://github.com/DASDAE/dascore/releases), draft a new release. On the draft, choose a new tag and a release title (e.g. 0.0.14). Finally, generate release notes and publish the release.
10 |
11 | ## Step 2: Check the release status
12 |
13 | On [Actions](https://github.com/DASDAE/dascore/actions), check the release status for both "PublishPackage" and "BuildDeployStableDocs". Also, make sure [PyPI](https://pypi.org/project/dascore/) is updated.
14 |
15 | ## Step 3: Commit required changes for Conda
16 |
17 | For [conda](https://github.com/conda-forge/dascore-feedstock), you need to wait a few hours to get a pull request, and then verify/edit the dependencies, extras, etc. on [meta.yaml](https://github.com/conda-forge/dascore-feedstock/blob/main/recipe/meta.yaml) at /dascore-feedstock/recipe directory based on [pyproject.toml](https://github.com/DASDAE/dascore/blob/master/pyproject.toml) at /dascore directory. Therefore, if they do not match, you need to clone the dascore-feedstock branch (the branch that the bot wants to merge into the master), add the bot's fork as a remote (`git remote add conda_bot git@github.com:regro-cf-autotick-bot/dascore-feedstock`), fetch the new branch (`git fetch conda_bot`), and then check the bot's created branch out. Then, you modify the branch and push it back. After merging the pull request, you can verify the latest DASCore version at [conda-forge](https://anaconda.org/conda-forge/dascore).
18 |
--------------------------------------------------------------------------------
/docs/contributing/profiling_benchmarks.qmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: Profiling and Benchmarks
3 | ---
4 | # Benchmarks
5 |
6 | DASCore uses [codspeed](https://codspeed.io/) to create and run a simple benchmark suite. The benchmarks are found in the benchmarks folder at the top level of the repository.
7 |
8 | To run the benchmarks:
9 | ```bash
10 | python -m pytest benchmarks/ --codspeed
11 | ```
12 |
13 | However, when you create a pull request, the benchmarks will be run in the CI/CD and a report displayed. If there are significant performance regressions, more discussion is needed before merging the pull request.
14 |
15 | If you add significant new functionality, you should probably add a benchmark.
16 |
17 | # Profiling
18 |
19 | If you find a significant issue, you can profile the problematic benchmark(s) to see *why* their performance degraded. This can be done with the [pytest profile plugin](https://pypi.org/project/pytest-profiling/).
20 |
21 | For example, suppose you notice a large increase in runtime for the pass_filter benchmark in the patch_benchmarks' TestProcessingBenchmarks class. Run the benchmark again under profiling:
22 |
23 | ```bash
24 | pytest benchmarks/test_patch_benchmarks.py::TestProcessingBenchmarks::test_pass_filter --profile
25 | ```
26 | This will create a new `prof` folder with `test_pass_filter_performance.prof` as one of the files. You can view these with a variety of tools, such as [snakeviz](https://jiffyclub.github.io/snakeviz/) (assuming you installed snakeviz with `pip install snakeviz`).
27 |
28 | ```bash
29 | snakeviz prof/test_pass_filter_performance.prof
30 | ```
31 |
32 | You can then click through the call stack and see which functions can be improved. After tweaking them, re-run the profiling and see if the overall time improves.
33 |
--------------------------------------------------------------------------------
/docs/contributing/dev_install.qmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: DASCore Development Installation
3 | ---
4 |
5 | The following steps are needed to set up `DASCore` for development:
6 |
7 | ## 1. Clone DASCore
8 |
9 | ```bash
10 | git clone https://github.com/dasdae/dascore
11 | cd dascore
12 | ```
13 |
14 | ## 2. Pull tags
15 |
16 | Make sure to pull all of the latest git tags.
17 |
18 |
19 | ```bash
20 | git pull origin master --tags
21 | ```
22 |
23 | ## 3. Create a virtual environment (optional)
24 |
25 | Create and activate a virtual environment so DASCore will not mess with the base (or system) python installation.
26 |
27 | If you are using [Anaconda](https://www.anaconda.com/), simply use the environment provided:
28 |
29 | ```bash
30 | conda env create -f environment.yml
31 | conda activate dascore
32 | ```
33 |
34 | ## 4. Install DASCore in development mode
35 |
36 | ```bash
37 | pip install -e ".[dev]"
38 | ```
39 |
40 | ## 5. Setup pre-commit hooks
41 |
42 | dascore uses several [pre-commit](https://pre-commit.com/) hooks to ensure the code stays tidy. Please install and use them!
43 |
44 | ```bash
45 | pre-commit install -f
46 | ```
47 |
48 | # Refresh
49 |
50 | If you have already installed dascore but it has been a while, please do the following before creating a new branch:
51 |
52 | ```bash
53 | conda activate dascore
54 | ```
55 | ```bash
56 | git checkout master
57 | ```
58 | ```bash
59 | git pull origin master --tags
60 | ```
61 | ```bash
62 | pip install -e ".[dev]"
63 | ```
64 |
65 | The first line ensures you are on the master branch, the second line will pull the latest changes and tags, and the last line
66 | will install again in developer mode. This is only required if DASCore's entry points or requirements have changed, but
67 | won't hurt anything if not.
68 |
--------------------------------------------------------------------------------
/docs/recipes/external_conversion.qmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: Patch Conversions
3 | execute:
4 | warning: false
5 | ---
6 |
7 | Although DASCore provides a lot of the functionality needed for DFOS processing, is not intended to do absolutely everything, and some other libraries might be better suited for a particular task. In addition to making it simple to access the underlying data as shown in the [patch tutorial](../tutorial/patch.qmd), DASCore provides convenience functions to convert data to formats used by other libraries. Here are a few examples:
8 |
9 |
10 | ## [Pandas](https://pandas.pydata.org/)
11 |
12 | ```python
13 | import dascore as dc
14 |
15 | patch = dc.get_example_patch()
16 |
17 | # Convert to a dataframe
18 | df = patch.io.to_dataframe()
19 |
20 | # Convert a dataframe to a patch
21 | patch_from_df = dc.io.dataframe_to_patch(df)
22 | ```
23 |
24 | ## [Xarray](https://docs.xarray.dev/)
25 |
26 | ```python
27 | import dascore as dc
28 |
29 | patch = dc.get_example_patch()
30 |
31 | # Convert to xarray's DataArray
32 | dar = patch.io.to_xarray()
33 |
34 | # Convert a DataArray to a patch
35 | patch_from_dar = dc.io.xarray_to_patch(dar)
36 | ```
37 |
38 | ## [ObsPy](https://docs.obspy.org/)
39 |
40 | ```python
41 | import dascore as dc
42 |
43 | patch = dc.get_example_patch()
44 |
45 | # Convert the patch to an ObsPy Stream
46 | stream = patch.io.to_obspy()
47 |
48 | # Convert an ObsPy Stream to a patch
49 | patch_from_dar = dc.io.obspy_to_patch(stream)
50 | ```
51 |
52 | :::{.callout-note}
53 | As explained in the [`obspy_to_patch` docs](`dascore.utils.io.obspy_to_patch`), there must be a value in the stats dict which indicates values for a non-time dimension. For example, each trace might have a 'distance' key in its stats dict which DASCore uses to construct the distance dimensional coordinate.
54 | :::
55 |
--------------------------------------------------------------------------------
/tests/test_utils/test_xml_utils.py:
--------------------------------------------------------------------------------
1 | """
2 | Tests for xml utilities.
3 | """
4 |
5 | from dascore.utils.xml import xml_to_dict
6 |
7 |
8 | class TestXMLtoDict:
9 | """Tests for converting XML to dictionaries."""
10 |
11 | def test_single_element(self):
12 | """Test conversion of XML with a single element."""
13 | xml_string = "Hello"
14 | assert xml_to_dict(xml_string) == "Hello"
15 |
16 | def test_multiple_elements(self):
17 | """Test conversion of XML with multiple elements."""
18 | xml_string = "12"
19 | expected_dict = {"a": "1", "b": "2"}
20 | assert xml_to_dict(xml_string) == expected_dict
21 |
22 | def test_nested_elements(self):
23 | """Test conversion of XML with nested elements."""
24 | xml_string = "1"
25 | expected_dict = {"a": {"b": "1"}}
26 | assert xml_to_dict(xml_string) == expected_dict
27 |
28 | def test_multiple_nested_elements(self):
29 | """Test conversion of XML with multiple nested elements."""
30 | xml_string = "12"
31 | expected_dict = {"a": {"b": "1"}, "c": {"d": "2"}}
32 | assert xml_to_dict(xml_string) == expected_dict
33 |
34 | def test_elements_repeated_twice(self):
35 | """Test conversion of XML with repeated elements."""
36 | xml_string = "12"
37 | expected_dict = {"a": ["1", "2"]}
38 | assert xml_to_dict(xml_string) == expected_dict
39 |
40 | def test_repeated_elements(self):
41 | """Test conversion of XML with repeated elements."""
42 | xml_string = "123"
43 | expected_dict = {"a": ["1", "2", "3"]}
44 | assert xml_to_dict(xml_string) == expected_dict
45 |
--------------------------------------------------------------------------------
/docs/contributing/testing.qmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Testing"
3 | ---
4 |
5 | # Testing
6 |
7 | DASCore's test suite is run with [pytest](https://docs.pytest.org/en/stable/). While in the base dascore repo
8 | (and after [installing DASCore for development](dev_install.qmd)) invoke pytest from the command line:
9 |
10 |
11 | ```bash
12 | pytest tests
13 | ```
14 |
15 | You can also use the cov flags to check coverage. Please make sure you don't
16 | introduce large blocks of dead code.
17 |
18 |
19 | ```bash
20 | pytest tests --cov dascore --cov-report term-missing
21 | ```
22 |
23 | If you would like to test the IO modules it can be done like so:
24 |
25 |
26 | ```bash
27 | pytest tests/test_io
28 | ```
29 |
30 | Or a particular IO module:
31 |
32 |
33 | ```bash
34 | pytest tests/test_io/test_dasdae.py
35 | ```
36 |
37 | Pytest is highly configurable and has some rather useful flags such as
38 | -s, -x, and --pdb (especially with [pdbpp](https://github.com/pdbpp/pdbpp)).
39 |
40 |
41 | To run the docstring tests use the following:
42 |
43 |
44 | ```bash
45 | pytest dascore --doctest-modules
46 | ```
47 |
48 | ## Writing Tests
49 |
50 | Tests should go into the `tests/` folder, which mirrors the structure of the main package.
51 | For example, if you are writing tests for `dascore.Patch`, whose class definition is
52 | located in `dascore/core/patch` it should go in `tests/test_core/test_patch.py`.
53 |
54 | In general, tests should be grouped together in classes. Fixtures go as close as
55 | possible to the test(s) that need them, moving from class, module, and then to conftest.
56 | Checkout the pytest documentation for a [review on fixtures](https://docs.pytest.org/en/6.2.x/fixture.html)
57 | (and why to use them).
58 |
--------------------------------------------------------------------------------
/tests/test_io/test_tdms/test_tdms_utils.py:
--------------------------------------------------------------------------------
1 | """Tests for TDMS utilities to improve coverage."""
2 |
3 | from __future__ import annotations
4 |
5 | import pytest
6 |
7 | from dascore.io.tdms.utils import parse_time_stamp, type_not_supported
8 |
9 |
10 | class TestTDMSUtils:
11 | """Tests for TDMS utility functions."""
12 |
13 | def test_type_not_supported(self):
14 | """Test that type_not_supported raises NotImplementedError."""
15 | with pytest.raises(
16 | NotImplementedError, match="Reading of this tdsDataType is not implemented"
17 | ):
18 | type_not_supported("any_input")
19 |
20 | def test_parse_time_stamp_none(self):
21 | """Test that parse_time_stamp returns None for invalid input."""
22 | # Test with invalid timestamp values that should return None
23 | result = parse_time_stamp(0, 0) # epoch start should return None
24 | assert result is None
25 |
26 | # Test with None values
27 | result = parse_time_stamp(None, 100)
28 | assert result is None
29 |
30 | result = parse_time_stamp(100, None)
31 | assert result is None
32 |
33 | result = parse_time_stamp(None, None)
34 | assert result is None
35 |
36 | def test_parse_time_stamp_valid(self):
37 | """Test that parse_time_stamp works with valid input."""
38 | # Test with valid timestamp - using a reasonable epoch timestamp
39 | # LabVIEW epoch starts at 1904-01-01, so we need a positive value
40 | import datetime
41 |
42 | # Use a timestamp that represents a valid date after 1904
43 | seconds = 365 * 24 * 3600 * 100 # 100 years after 1904
44 | fractions = 0.5 # Some fractional seconds
45 |
46 | result = parse_time_stamp(fractions, seconds)
47 | assert isinstance(result, datetime.datetime)
48 | assert result.year >= 1904
49 |
--------------------------------------------------------------------------------
/docs/recipes/add_spatial_coordinates_to_patch.qmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: Add Spatial Coordinates
3 | execute:
4 | warning: false
5 | ---
6 |
7 | Sometimes spatial coordinates are stored in a table which are associated with a patch dimension. For example, the results of a tap-test may associate X, Y, and Z values to the dimensions "distance". DASCore provides the patch function [`coords_from_df`](`dascore.proc.coords.coords_from_df`) for just this purpose. The functionality is not limited to spatial coordinates; it can be used to add any coordinates to any existing dimension.
8 |
9 | Here, we:
10 | - Make a patch and coordinates from example data
11 | - Call a command that extrapolates and adds the coordinates
12 | - Plot coordinates for comparison
13 |
14 | ```{python}
15 | #| code-fold: true
16 | import pandas as pd
17 |
18 | import dascore as dc
19 | from dascore.utils.downloader import fetch
20 |
21 | # Get path for example coordinates.
22 | path = fetch("brady_hs_DAS_DTS_coords.csv")
23 |
24 | # Read coordinates data from csv file.
25 | coord_table = pd.read_csv(path).rename(columns={"Channel": "distance"})
26 |
27 | # Make patch from example.
28 | patch = dc.get_example_patch()
29 |
30 | # Get length of distance axis.
31 | dist_len = patch.coord_shapes["distance"][0]
32 |
33 | # Make a dictionary of units.
34 | units = {a:"m" for a in coord_table.columns[1:]}
35 |
36 | # Select portion of new coordinates to map to existing coordinate.
37 | new_coordinates = coord_table.iloc[51:]
38 |
39 | ```
40 |
41 |
42 | ```{python}
43 | import matplotlib.pyplot as plt
44 |
45 | import dascore as dc
46 |
47 | patch = dc.get_example_patch()
48 |
49 | # Add coordinates to patch.
50 | patch_with_coords = patch.coords_from_df(
51 | new_coordinates, units=units, extrapolate=True,
52 | )
53 |
54 | # Plot coordinates.
55 | coords = patch_with_coords.coords
56 | X = coords.get_array('X')
57 | Y = coords.get_array('Y')
58 | plt.scatter(X,Y);
59 | ```
60 |
--------------------------------------------------------------------------------
/tests/test_transform/test_fft.py:
--------------------------------------------------------------------------------
1 | """Tests for Fourier transforms."""
2 |
3 | from __future__ import annotations
4 |
5 | import numpy as np
6 | import pytest
7 |
8 | from dascore.transform.fft import rfft
9 | from dascore.units import get_quantity
10 | from dascore.utils.misc import suppress_warnings
11 |
12 |
13 | class TestRfft:
14 | """Tests for the real fourier transform."""
15 |
16 | @pytest.fixture(scope="class")
17 | def rfft_patch(self, random_patch):
18 | """Return the random patched transformed along time w/ rrft."""
19 | with suppress_warnings(DeprecationWarning):
20 | out = rfft(random_patch, dim="time")
21 | return out
22 |
23 | def test_dims(self, rfft_patch):
24 | """Ensure ft of original axis shows up in dimensions."""
25 | dims = rfft_patch.dims
26 | start_freq = [x.startswith("ft_") for x in dims]
27 | assert any(start_freq)
28 |
29 | def test_abs_rrft(self, rfft_patch):
30 | """Ensure abs works with rfft to get amplitude spectra."""
31 | out = rfft_patch.abs()
32 | assert np.allclose(out.data, np.abs(rfft_patch.data))
33 |
34 | def test_time_coord_units(self, random_patch, rfft_patch):
35 | """Ensure time label units have been correctly set."""
36 | units1 = random_patch.coords.coord_map["time"].units
37 | units2 = rfft_patch.coords.coord_map["ft_time"].units
38 | assert get_quantity(units1) == 1 / get_quantity(units2)
39 |
40 | def test_data_units(self, random_patch):
41 | """Ensure data units have been updated."""
42 | patch = random_patch.update_attrs(data_units="m/s")
43 | with suppress_warnings(DeprecationWarning):
44 | fft_patch = patch.rfft("time")
45 | dunits1 = get_quantity(patch.attrs.data_units)
46 | dunits2 = get_quantity(fft_patch.attrs.data_units)
47 | assert dunits2 == dunits1 * get_quantity("second")
48 |
--------------------------------------------------------------------------------
/dascore/io/sentek/core.py:
--------------------------------------------------------------------------------
1 | """IO module for reading Sentek's DAS data format."""
2 |
3 | from __future__ import annotations
4 |
5 | import numpy as np
6 |
7 | import dascore as dc
8 | from dascore.io import BinaryReader
9 | from dascore.io.core import FiberIO
10 |
11 | from .utils import _get_patch_attrs, _get_version
12 |
13 |
14 | class SentekV5(FiberIO):
15 | """Support for Sentek Instrument data format."""
16 |
17 | name = "sentek"
18 | version = "5"
19 | preferred_extensions = ("das",)
20 |
21 | def read(
22 | self,
23 | resource: BinaryReader,
24 | time=None,
25 | distance=None,
26 | **kwargs,
27 | ) -> dc.BaseSpool:
28 | """Read a Sentek das file, return a DataArray."""
29 | attrs, coords, offsets = _get_patch_attrs(resource)
30 | resource.seek(offsets[0])
31 | array = np.fromfile(resource, dtype=np.float32, count=offsets[1] * offsets[2])
32 | array = np.reshape(array, (offsets[1], offsets[2])).T
33 | patch = dc.Patch(data=array, attrs=attrs, coords=coords, dims=coords.dims)
34 | # Note: we are being a bit sloppy here in that selecting on time/distance
35 | # doesn't actually affect how much data is read from the binary file. This
36 | # is probably ok though since Sentek files tend to be quite small.
37 | return dc.spool(patch).select(time=time, distance=distance)
38 |
39 | def get_format(self, resource: BinaryReader, **kwargs) -> tuple[str, str] | bool:
40 | """Auto detect sentek format."""
41 | return _get_version(resource)
42 |
43 | def scan(self, resource: BinaryReader, **kwargs):
44 | """Extract metadata from sentek file."""
45 | extras = {
46 | "file_format": self.name,
47 | "file_version": self.version,
48 | "path": resource.name,
49 | }
50 |
51 | return [_get_patch_attrs(resource, extras=extras)[0]]
52 |
--------------------------------------------------------------------------------
/dascore/io/h5simple/core.py:
--------------------------------------------------------------------------------
1 | """IO module for reading simple h5 data."""
2 |
3 | from __future__ import annotations
4 |
5 | import dascore as dc
6 | from dascore.constants import SpoolType
7 | from dascore.io import FiberIO
8 | from dascore.utils.hdf5 import H5Reader, PyTablesReader
9 |
10 | from .utils import _get_attrs_coords_and_data, _is_h5simple, _maybe_trim_data
11 |
12 |
13 | class H5Simple(FiberIO):
14 | """Support for bare-bones h5 format."""
15 |
16 | name = "H5Simple"
17 | preferred_extensions = ("hdf5", "h5")
18 | version = "1"
19 |
20 | def get_format(self, resource: H5Reader, **kwargs) -> tuple[str, str] | bool:
21 | """Determine if is simple h5 format."""
22 | if _is_h5simple(resource):
23 | return self.name, self.version
24 | return False
25 |
26 | def read(self, resource: PyTablesReader, snap=True, **kwargs) -> SpoolType:
27 | """
28 | Read a simple h5 file.
29 |
30 | Parameters
31 | ----------
32 | resource
33 | The open h5 object.
34 | snap
35 | If True, snap each coordinate to be evenly sampled.
36 | **kwargs
37 | Passed to filtering coordinates.
38 | """
39 | attrs, cm, data = _get_attrs_coords_and_data(resource, snap, self)
40 | new_cm, new_data = _maybe_trim_data(cm, data, kwargs)
41 | if not new_cm.size:
42 | return dc.spool([])
43 | patch = dc.Patch(coords=new_cm, data=new_data[:], attrs=attrs)
44 | return dc.spool([patch])
45 |
46 | def scan(
47 | self, resource: PyTablesReader, snap=True, **kwargs
48 | ) -> list[dc.PatchAttrs]:
49 | """Get the attributes of a h5simple file."""
50 | attrs, cm, data = _get_attrs_coords_and_data(resource, snap, self)
51 | attrs["coords"] = cm.to_summary_dict()
52 | attrs["path"] = resource.filename
53 | return [dc.PatchAttrs(**attrs)]
54 |
--------------------------------------------------------------------------------
/.github/workflows/build_deploy_stable_docs.yaml:
--------------------------------------------------------------------------------
1 | # Create documentation for stable (latest version) and deploy
2 | name: BuildDeployStableDocs
3 |
4 | on:
5 | # Runs when creating a new release
6 | release:
7 | types: [ created ]
8 |
9 | # Allows you to run this workflow manually from the Actions tab
10 | workflow_dispatch:
11 |
12 | # Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages
13 | permissions:
14 | contents: write
15 | pages: write
16 | deployments: write
17 | id-token: write
18 |
19 | # Allow one concurrent deployment
20 | concurrency:
21 | group: "pages"
22 | cancel-in-progress: true
23 |
24 | jobs:
25 | # Build job
26 | build:
27 | runs-on: ubuntu-latest
28 | steps:
29 | - uses: actions/checkout@v4
30 | with:
31 | fetch-tags: "true"
32 | fetch-depth: '0'
33 |
34 |
35 | - uses: ./.github/actions/mamba-install-dascore
36 | with:
37 | python-version: "3.12"
38 | environment-file: './.github/doc_environment.yml'
39 |
40 | - uses: ./.github/actions/build-docs
41 |
42 | # - name: Setup Pages
43 | # uses: actions/configure-pages@v5
44 |
45 | - name: Upload artifact
46 | uses: actions/upload-pages-artifact@v3
47 | with:
48 | path: 'docs/_site'
49 |
50 | - name: Zip doc build
51 | if: startsWith(github.ref, 'refs/tags/')
52 | run: zip docs.zip docs/_site -r
53 |
54 | - name: Upload release docs
55 | uses: softprops/action-gh-release@v2
56 | if: startsWith(github.ref, 'refs/tags/')
57 | with:
58 | files: docs.zip
59 |
60 | deploy:
61 | runs-on: ubuntu-latest
62 | needs: build
63 | environment:
64 | name: github-pages
65 | url: ${{ steps.deployment.outputs.page_url }}
66 |
67 | steps:
68 | - name: Deploy to GitHub Pages
69 | id: deployment
70 | uses: actions/deploy-pages@v4
71 |
--------------------------------------------------------------------------------
/dascore/utils/mapping.py:
--------------------------------------------------------------------------------
1 | """
2 | A few mappings that might be useful.
3 |
4 | We can't simply use types.MappingProxyType because it can't be pickled.
5 | """
6 |
7 | from __future__ import annotations
8 |
9 | from collections.abc import Mapping as ABCMap
10 |
11 |
12 | class FrozenDict(ABCMap):
13 | """
14 | An immutable wrapper around dictionaries that implements the complete
15 | :py:class:`collections.Mapping` interface. It can be used as a drop-in
16 | replacement for dictionaries where immutability is desired.
17 |
18 | Notes
19 | -----
20 | This implementation was Inspired by the no-longer maintained package
21 | frozen-dict (https://github.com/slezica/python-frozendict)
22 |
23 | By design, changes in the original dict are not reflected in the frozen
24 | dict so that the hash doesn't break.
25 | """
26 |
27 | def __init__(self, *args, **kwargs):
28 | self._dict = dict(*args, **kwargs)
29 | self._hash = None
30 |
31 | def __getitem__(self, key):
32 | return self._dict[key]
33 |
34 | def __contains__(self, key):
35 | return key in self._dict
36 |
37 | def new(self, **kwargs):
38 | """Copy the contents and update with new values."""
39 | contents = dict(self._dict)
40 | contents.update(kwargs)
41 | return self.__class__(**contents)
42 |
43 | def __iter__(self):
44 | return iter(self._dict)
45 |
46 | def __len__(self):
47 | return len(self._dict)
48 |
49 | def __repr__(self):
50 | return f"<{self.__class__.__name__} {self._dict!r}>"
51 |
52 | def _hash_contents(self):
53 | """Returns a hash of the dictionary."""
54 | out = 0
55 | for key, value in self._dict.items():
56 | out ^= hash((key, value))
57 | return out
58 |
59 | def __hash__(self):
60 | if self._hash is None:
61 | self._hash = self._hash_contents()
62 | return self._hash
63 |
--------------------------------------------------------------------------------
/.github/workflows/run_min_dep_tests.yml:
--------------------------------------------------------------------------------
1 | # Run minimum dependency tests without conda
2 | name: TestCodeMinDeps
3 | on:
4 | push:
5 | branches:
6 | - master
7 | pull_request:
8 | branches:
9 | - master
10 | paths:
11 | - 'pyproject.toml'
12 | - '**.py'
13 | - '.github/workflows/run_min_dep_tests.yml'
14 |
15 | env:
16 | # Ensure matplotlib doesn't try to show figures in CI
17 | MPLBACKEND: Agg
18 | QT_QPA_PLATFORM: offscreen
19 |
20 | # Cancel previous runs when this one starts.
21 | concurrency:
22 | group: TestCodeMinDeps-${{ github.event.pull_request.number || github.run_id }}
23 | cancel-in-progress: true
24 |
25 | jobs:
26 | # Runs the tests on combinations of the supported python/os matrix.
27 | test_code_min_deps:
28 |
29 | timeout-minutes: 25
30 | runs-on: ${{ matrix.os }}
31 | strategy:
32 | matrix:
33 | os: [ ubuntu-latest, macos-latest, windows-latest ]
34 | python-version: [ '3.12', '3.13' ]
35 |
36 |
37 | # only run if CI isn't turned off
38 | if: github.event_name == 'push' || !contains(github.event.pull_request.labels.*.name, 'no_ci')
39 |
40 | steps:
41 | - uses: actions/checkout@v4
42 | with:
43 | fetch-tags: "true"
44 | fetch-depth: '0'
45 |
46 | - uses: ./.github/actions/mamba-install-dascore
47 | with:
48 | python-version: ${{ matrix.python-version }}
49 | install-package: false
50 | environment-file: './.github/min_deps_environment.yml'
51 | cache-number: 1
52 |
53 | - name: Install dascore (min deps)
54 | shell: bash -l {0}
55 | run: pip install -e .[test]
56 |
57 | # Runs test suite and calculates coverage
58 | - name: run test suite
59 | shell: bash -el {0}
60 | run: ./.github/test_code.sh
61 |
62 | # Runs examples in docstrings
63 | - name: test docstrings
64 | shell: bash -el {0}
65 | run: ./.github/test_code.sh doctest
66 |
--------------------------------------------------------------------------------
/dascore/io/pickle/core.py:
--------------------------------------------------------------------------------
1 | """Core module for reading and writing pickle format."""
2 |
3 | from __future__ import annotations
4 |
5 | import pickle
6 |
7 | import dascore
8 | from dascore.io import BinaryReader, BinaryWriter, FiberIO
9 |
10 |
11 | class PickleIO(FiberIO):
12 | """
13 | Provides IO support for the pickle format.
14 |
15 | Warning
16 | -------
17 | The pickle format is discouraged due to potential security and
18 | compatibility issues.
19 | """
20 |
21 | name = "PICKLE"
22 | preferred_extensions = ("pkl", "pickle")
23 |
24 | def _header_is_dascore(self, byte_stream):
25 | """Return True if the first few bytes mention dascore classes."""
26 | has_dascore = b"dascore.core" in byte_stream
27 | spool_or_patch = b"Spool" in byte_stream or b"Patch" in byte_stream
28 | return has_dascore and spool_or_patch
29 |
30 | def get_format(self, resource: BinaryReader, **kwargs) -> tuple[str, str] | bool:
31 | """
32 | Return True if file contains a pickled Patch or Spool.
33 |
34 | Parameters
35 | ----------
36 | resource
37 | A path to the file which may contain terra15 data.
38 | """
39 | try:
40 | start = resource.read(100) # read first 100 bytes, look for class names
41 | if self._header_is_dascore(start):
42 | getattr(resource, "seek", lambda x: None)(0)
43 | pickle.load(resource)
44 | return ("PICKLE", self.version) # TODO add pickle protocol
45 | else:
46 | return False
47 | except (pickle.UnpicklingError, FileNotFoundError, IndexError):
48 | return False
49 |
50 | def read(self, resource: BinaryReader, **kwargs):
51 | """Read a Patch/Spool from disk."""
52 | out = pickle.load(resource)
53 | return dascore.spool(out)
54 |
55 | def write(self, patch, resource: BinaryWriter, **kwargs):
56 | """Write a Patch/Spool to disk."""
57 | pickle.dump(patch, resource)
58 |
--------------------------------------------------------------------------------
/tests/test_utils/test_transformatter.py:
--------------------------------------------------------------------------------
1 | """Tests for transformatter."""
2 |
3 | from __future__ import annotations
4 |
5 | import pytest
6 |
7 | from dascore.utils.transformatter import FourierTransformatter
8 |
9 |
10 | @pytest.fixture()
11 | def ft_reformatter():
12 | """Simple fourier transform formatter."""
13 | return FourierTransformatter()
14 |
15 |
16 | class TestFTDimensionRename:
17 | """Tests for renaming dimensions using FT transformer."""
18 |
19 | dims = ("distance", "time")
20 |
21 | def test_forward_rename_one_index(self, ft_reformatter):
22 | """Ensure the name can be reassigned."""
23 | out = ft_reformatter.rename_dims(self.dims, 1)
24 | assert out == ("distance", "ft_time")
25 |
26 | def test_forward_rename_all_index(self, ft_reformatter):
27 | """Ensure all indices can be renamed."""
28 | out = ft_reformatter.rename_dims(self.dims)
29 | assert out == tuple(f"{ft_reformatter.forward_prefix}{x}" for x in self.dims)
30 |
31 | def test_forward_undo_inverse(self, ft_reformatter):
32 | """Ensure the inverse is correctly undone."""
33 | dims = tuple([f"{ft_reformatter.inverse_prefix}{x}" for x in self.dims])
34 | out = ft_reformatter.rename_dims(dims)
35 | assert out == self.dims
36 |
37 | def test_inverse_rename_one_index(self, ft_reformatter):
38 | """Ensure the name can be reassigned."""
39 | out = ft_reformatter.rename_dims(self.dims, 1, forward=False)
40 | assert out == ("distance", "ift_time")
41 |
42 | def test_undo_forward_index(self, ft_reformatter):
43 | """Ensure forward prefex is undone by inverse."""
44 | dims = tuple(f"{ft_reformatter.forward_prefix}{x}" for x in self.dims)
45 | out = ft_reformatter.rename_dims(dims, forward=False)
46 | assert out == self.dims
47 |
48 | def test_double_forward(self, ft_reformatter):
49 | """Prefixes should stack."""
50 | pre = ft_reformatter.forward_prefix
51 | dims1 = ft_reformatter.rename_dims(self.dims)
52 | dims2 = ft_reformatter.rename_dims(dims1)
53 | assert dims2 == tuple(f"{pre}{pre}{x}" for x in self.dims)
54 |
--------------------------------------------------------------------------------
/scripts/_validate_links.py:
--------------------------------------------------------------------------------
1 | """Script to validate links in qmd files."""
2 |
3 | from __future__ import annotations
4 |
5 | import json
6 | import re
7 | from functools import cache
8 | from pathlib import Path
9 |
10 |
11 | def _get_docs_path():
12 | """Find the documentation path."""
13 | path = Path(__file__).parent.parent / "docs"
14 | return path
15 |
16 |
17 | def get_qmd_files(
18 | path=None,
19 | ):
20 | """Yield all QMD files."""
21 | path = _get_docs_path() if path is None else path
22 | yield from path.rglob("*qmd")
23 |
24 |
25 | def yield_links(text, pattern=r"(?<=\]\(`).*?(?=`\))"):
26 | """Yield links found in documentation."""
27 | matches = re.findall(pattern, text)
28 | yield from matches
29 |
30 |
31 | @cache
32 | def load_index(path=None):
33 | """Load the index with the linked locations."""
34 | if path is None:
35 | path = _get_docs_path() / ".cross_ref.json"
36 | with open(path) as fi:
37 | out = json.load(fi)
38 | return out
39 |
40 |
41 | def validate_all_links():
42 | """Scan all documentation files and ensure the links are valid."""
43 | index = load_index()
44 | good_links, bad_links, file_count = 0, 0, 0
45 | bad = []
46 | for path in get_qmd_files():
47 | file_count += 1
48 | text = path.read_text()
49 | for link in yield_links(text):
50 | if link not in index:
51 | bad.append((str(path), link))
52 | bad_links += 1
53 | else:
54 | good_links += 1
55 | print( # noqa
56 | f"Validated links in documentation. Scanned {file_count} files, "
57 | f"found {good_links} good links and {bad_links} bad links"
58 | )
59 | if bad_links:
60 | msg = "Please fix the following (path/link)\n"
61 | max_len = max(len(x[0]) for x in bad)
62 | out = []
63 | for path, link in bad:
64 | path_str = path.ljust(max_len + 3)
65 | out.append(f"{path_str} {link}")
66 | new_str = msg + "\n".join(out)
67 | raise ValueError(new_str)
68 |
69 |
70 | if __name__ == "__main__":
71 | validate_all_links()
72 | pass
73 |
--------------------------------------------------------------------------------
/docs/tutorial/visualization.qmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: Visualization
3 | execute:
4 | warning: false
5 | ---
6 |
7 | # Viz
8 | The following provides some examples of patch visualization.
9 | See the [viz module documentation](`dascore.viz`) for a list of visualization functions
10 |
11 | ## Waterfall
12 | The [`waterfall patch function`](`dascore.viz.waterfall`) creates a waterfall plot of the patch data.
13 |
14 | ```{python}
15 | import dascore as dc
16 |
17 | patch = dc.get_example_patch('example_event_2')
18 |
19 | # Default scaling uses IQR-based fence to handle outliers
20 | patch.viz.waterfall(show=True)
21 | ```
22 |
23 | ### Controlling color scaling
24 |
25 | The `scale` parameter controls the colorbar saturation. By default, waterfall uses a statistical fence (1.5×IQR) to exclude outliers and show the majority of the data clearly.
26 |
27 | ```{python}
28 | import matplotlib.pyplot as plt
29 | import dascore as dc
30 |
31 | patch = dc.get_example_patch('example_event_2')
32 |
33 | fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4))
34 |
35 | # Relative scaling: 0.2 means ±20% of dynamic range around mean
36 | patch.viz.waterfall(scale=0.2, scale_type="relative", ax=ax1)
37 | ax1.set_title("Relative scaling (scale=0.2)")
38 |
39 | # Absolute scaling: directly set colorbar limits
40 | patch.viz.waterfall(scale=(-50, 50), scale_type="absolute", ax=ax2)
41 | ax2.set_title("Absolute scaling (scale=(-50, 50))")
42 |
43 | plt.tight_layout()
44 | plt.show()
45 | ```
46 |
47 | ## Wiggle
48 | The [`wiggle patch function`](`dascore.viz.wiggle`) creates a wiggle plot of the patch data. We'll use the same patch as above to model this function.
49 |
50 | ```{python}
51 | import dascore as dc
52 |
53 | patch = (
54 | dc.get_example_patch('example_event_1')
55 | .set_units("mm/(m*s)", distance='m', time='s')
56 | .taper(time=0.05)
57 | .pass_filter(time=(None, 300))
58 | )
59 | patch.viz.wiggle(scale = .5)
60 | ```
61 |
62 | Another example using wiggle to plot a sine wave is demonstrated below.
63 |
64 | ```{python}
65 | import dascore as dc
66 |
67 | patch = dc.examples.get_example_patch(
68 | "sin_wav",
69 | sample_rate=60,
70 | frequency=[60, 10],
71 | channel_count=1,
72 | )
73 | patch.viz.wiggle(show=True);
74 | ```
75 |
--------------------------------------------------------------------------------
/dascore/io/ap_sensing/core.py:
--------------------------------------------------------------------------------
1 | """
2 | Core modules for AP sensing support.
3 | """
4 |
5 | from __future__ import annotations
6 |
7 | import numpy as np
8 |
9 | import dascore as dc
10 | from dascore.constants import opt_timeable_types
11 | from dascore.io import FiberIO
12 | from dascore.utils.hdf5 import H5Reader
13 |
14 | from .utils import _get_attrs_dict, _get_patches, _get_version_string
15 |
16 |
17 | class APSensingPatchAttrs(dc.PatchAttrs):
18 | """Patch Attributes for AP sensing."""
19 |
20 | gauge_length: float = np.nan
21 | radians_to_nano_strain: float = np.nan
22 |
23 |
24 | class APSensingV10(FiberIO):
25 | """Support for APSensing V 10."""
26 |
27 | name = "APSensing"
28 | preferred_extensions = ("hdf5", "h5")
29 | version = "10"
30 |
31 | def get_format(self, resource: H5Reader, **kwargs) -> tuple[str, str] | bool:
32 | """
33 | Return format name and version string if AP sensing, else False.
34 |
35 | Parameters
36 | ----------
37 | resource
38 | A path to the file which may contain terra15 data.
39 | """
40 | version_str = _get_version_string(resource)
41 | if version_str:
42 | return self.name, version_str
43 |
44 | def scan(self, resource: H5Reader, **kwargs) -> list[dc.PatchAttrs]:
45 | """Scan an AP sensing file, return summary info about the contents."""
46 | file_version = _get_version_string(resource)
47 | extras = {
48 | "path": resource.filename,
49 | "file_format": self.name,
50 | "file_version": str(file_version),
51 | }
52 | attrs = _get_attrs_dict(resource)
53 | attrs.update(extras)
54 | return [APSensingPatchAttrs(**attrs)]
55 |
56 | def read(
57 | self,
58 | resource: H5Reader,
59 | time: tuple[opt_timeable_types, opt_timeable_types] | None = None,
60 | distance: tuple[float | None, float | None] | None = None,
61 | **kwargs,
62 | ) -> dc.BaseSpool:
63 | """Read a single file with APSensing data inside."""
64 | patches = _get_patches(
65 | resource, time=time, distance=distance, attr_cls=APSensingPatchAttrs
66 | )
67 | return dc.spool(patches)
68 |
--------------------------------------------------------------------------------
/dascore/io/silixah5/core.py:
--------------------------------------------------------------------------------
1 | """
2 | Core modules for Silixa H5 support.
3 | """
4 |
5 | from __future__ import annotations
6 |
7 | import numpy as np
8 |
9 | import dascore as dc
10 | from dascore.constants import opt_timeable_types
11 | from dascore.io import FiberIO
12 | from dascore.utils.hdf5 import H5Reader
13 |
14 | from .utils import _get_attr, _get_patches, _get_version_string
15 |
16 |
17 | class SilixaPatchAttrs(dc.PatchAttrs):
18 | """Patch Attributes for Silixa hdf5 format."""
19 |
20 | gauge_length: float = np.nan
21 | gauge_length_units: str = "m"
22 | pulse_width: float = np.nan
23 | pulse_width_units: str = "ns"
24 |
25 |
26 | class SilixaH5V1(FiberIO):
27 | """Support for Silixa hdf5 format."""
28 |
29 | name = "Silixa_H5"
30 | preferred_extensions = ("hdf5", "h5")
31 | version = "1"
32 |
33 | def get_format(self, resource: H5Reader, **kwargs) -> tuple[str, str] | bool:
34 | """
35 | Return name and version string if Silixa hdf5 else False.
36 |
37 | Parameters
38 | ----------
39 | resource
40 | A path to the file which may contain terra15 data.
41 | """
42 | version_str = _get_version_string(resource, self.version)
43 | if version_str:
44 | return self.name, version_str
45 |
46 | def scan(self, resource: H5Reader, **kwargs) -> list[dc.PatchAttrs]:
47 | """Scan a Silixa HDF5 file, return summary information on the contents."""
48 | file_version = _get_version_string(resource, self.version)
49 | extras = {
50 | "path": resource.filename,
51 | "file_format": self.name,
52 | "file_version": str(file_version),
53 | }
54 | attrs = _get_attr(resource, SilixaPatchAttrs, extras=extras)
55 | return [attrs]
56 |
57 | def read(
58 | self,
59 | resource: H5Reader,
60 | time: tuple[opt_timeable_types, opt_timeable_types] | None = None,
61 | distance: tuple[float | None, float | None] | None = None,
62 | **kwargs,
63 | ) -> dc.BaseSpool:
64 | """Read a single file with Silixa H5 data inside."""
65 | patches = _get_patches(
66 | resource, time=time, distance=distance, attr_cls=SilixaPatchAttrs
67 | )
68 | return dc.spool(patches)
69 |
--------------------------------------------------------------------------------
/dascore/transform/fft.py:
--------------------------------------------------------------------------------
1 | """
2 | Deprecated module for Fourier transforms. Use
3 | [fourier](`dascore.transform.fourier`) instead.
4 | """
5 |
6 | from __future__ import annotations
7 |
8 | from operator import mul
9 |
10 | import numpy as np
11 |
12 | from dascore.constants import PatchType
13 | from dascore.core.coords import get_coord
14 | from dascore.units import get_quantity
15 | from dascore.utils.deprecate import deprecate
16 | from dascore.utils.patch import _get_data_units_from_dims, patch_function
17 | from dascore.utils.time import to_float
18 | from dascore.utils.transformatter import FourierTransformatter
19 |
20 |
21 | @patch_function()
22 | @deprecate(
23 | info="The Patch transform rfft is deprecated. Use dft instead.",
24 | removed_in="0.2.0",
25 | )
26 | def rfft(patch: PatchType, dim="time") -> PatchType:
27 | """
28 | Perform a real fourier transform along the specified dimension.
29 |
30 | Examples
31 | --------
32 | >>> import dascore as dc
33 | >>> patch = dc.get_example_patch()
34 | >>>
35 | >>> # Transform along time dimension
36 | >>> ft_patch = patch.rfft(dim='time')
37 | >>> assert 'ft_time' in ft_patch.dims
38 |
39 | Notes
40 | -----
41 | - Use [dft](`dascore.transform.fourier.dft`) instead.
42 | - This function is not scaled as detailed in the dascore documentation.
43 | """
44 | assert dim in patch.dims
45 | axis = patch.get_axis(dim)
46 |
47 | ft = FourierTransformatter()
48 | data = patch.data
49 | sr = 1 / to_float(patch.attrs[f"{dim}_step"])
50 | freqs = np.fft.rfftfreq(data.shape[axis], sr)
51 | new_data = np.fft.rfft(data, axis=axis)
52 | # get new dims and data units
53 | new_dims = ft.rename_dims(dim)
54 | new_data_units = _get_data_units_from_dims(patch, dim, mul)
55 | attrs = patch.attrs.update(data_units=new_data_units)
56 | dims = [x if i != axis else new_dims[0] for i, x in enumerate(patch.dims)]
57 | # get new coord
58 | units = get_quantity(patch.coords.coord_map[dim].units)
59 | coord = get_coord(data=freqs, units=None if units is None else 1 / units)
60 | new_coords = {new_dims[0]: coord}
61 | new_coords.update({x: patch.coords.get_array(x) for x in patch.dims if x != dim})
62 | return patch.__class__(data=new_data, coords=new_coords, dims=dims, attrs=attrs)
63 |
--------------------------------------------------------------------------------
/benchmarks/test_io_benchmarks.py:
--------------------------------------------------------------------------------
1 | """Benchmarks for generic IO operations using pytest-codspeed."""
2 |
3 | from __future__ import annotations
4 |
5 | from contextlib import suppress
6 | from functools import cache
7 |
8 | import pytest
9 |
10 | import dascore as dc
11 | from dascore.exceptions import MissingOptionalDependencyError
12 | from dascore.utils.downloader import fetch, get_registry_df
13 |
14 |
15 | @cache
16 | def get_test_file_paths():
17 | """Get a dict of name: path for all files in data registry."""
18 | df = get_registry_df().loc[lambda x: ~x["name"].str.endswith(".csv")]
19 | out = {row["name"]: fetch(row["name"]) for _, row in df.iterrows()}
20 | return out
21 |
22 |
23 | @pytest.fixture(scope="session")
24 | def test_file_paths():
25 | """Get paths of test files."""
26 | return get_test_file_paths()
27 |
28 |
29 | class TestIOBenchmarks:
30 | """Benchmarks for IO operations."""
31 |
32 | @pytest.mark.benchmark
33 | def test_scan(self, test_file_paths):
34 | """Time for basic scanning of all datafiles."""
35 | for path in test_file_paths.values():
36 | with suppress(MissingOptionalDependencyError):
37 | dc.scan(path)
38 |
39 | @pytest.mark.benchmark
40 | def test_scan_df(self, test_file_paths):
41 | """Time for basic scanning of all datafiles to DataFrame."""
42 | for path in test_file_paths.values():
43 | with suppress(MissingOptionalDependencyError):
44 | dc.scan_to_df(path)
45 |
46 | @pytest.mark.benchmark
47 | def test_get_format(self, test_file_paths):
48 | """Time for format detection of all datafiles."""
49 | for path in test_file_paths.values():
50 | with suppress(MissingOptionalDependencyError):
51 | dc.get_format(path)
52 |
53 | @pytest.mark.benchmark
54 | def test_read(self, test_file_paths):
55 | """Time for basic reading of all datafiles."""
56 | for path in test_file_paths.values():
57 | with suppress(MissingOptionalDependencyError):
58 | dc.read(path)[0]
59 |
60 | @pytest.mark.benchmark
61 | def test_spool(self, test_file_paths):
62 | """Time for creating spools from all datafiles."""
63 | for path in test_file_paths.values():
64 | with suppress(MissingOptionalDependencyError):
65 | dc.spool(path)[0]
66 |
--------------------------------------------------------------------------------
/.github/workflows/runtests.yml:
--------------------------------------------------------------------------------
1 | # Run full test suite using conda env and all optional deps.
2 | name: TestCode
3 | on:
4 | push:
5 | branches:
6 | - master
7 | pull_request:
8 | branches:
9 | - master
10 | paths:
11 | - 'pyproject.toml'
12 | - '**.py'
13 | - '.github/workflows/*.yml'
14 |
15 | env:
16 | # used to manually trigger cache reset. Just increment if needed.
17 | CACHE_NUMBER: 1
18 | # Ensure matplotlib doesn't try to show figures in CI
19 | MPLBACKEND: Agg
20 | QT_QPA_PLATFORM: offscreen
21 |
22 | # Cancel previous runs when this one starts.
23 | concurrency:
24 | group: TestCode-${{ github.event.pull_request.number || github.run_id }}
25 | cancel-in-progress: true
26 |
27 | jobs:
28 | # Runs the tests on combinations of the supported python/os matrix.
29 | test_code:
30 |
31 | timeout-minutes: 25
32 | runs-on: ${{ matrix.os }}
33 | strategy:
34 | matrix:
35 | os: [ubuntu-latest, macos-latest, windows-latest]
36 | python-version: ['3.10', '3.11', "3.12", "3.13"]
37 |
38 | # only run if CI isn't turned off
39 | if: github.event_name == 'push' || !contains(github.event.pull_request.labels.*.name, 'no_ci')
40 |
41 | env:
42 | # set conda environment file with dependencies
43 | env_file: "environment.yml"
44 |
45 | steps:
46 | - uses: actions/checkout@v4
47 | with:
48 | fetch-tags: "true"
49 | fetch-depth: '0'
50 |
51 | - uses: ./.github/actions/mamba-install-dascore
52 | with:
53 | python-version: ${{ matrix.python-version }}
54 | cache-number: ${{ env.CACHE_NUMBER }}
55 |
56 | # Runs test suite and calculates coverage
57 | - name: run test suite
58 | shell: bash -el {0}
59 | run: ./.github/test_code.sh
60 |
61 | # Runs examples in docstrings
62 | - name: test docstrings
63 | shell: bash -el {0}
64 | run: ./.github/test_code.sh doctest
65 |
66 | # Upload coverage files
67 | - uses: codecov/codecov-action@v4
68 | with:
69 | fail_ci_if_error: false
70 | files: ./coverage.xml
71 | flags: unittests
72 | name: PR_tests
73 | token: ${{ secrets.CODECOV_TOKEN }}
74 |
75 |
76 | # This is a very useful step for debugging, it allows you to ssh into the CI
77 | # machine (https://github.com/marketplace/actions/debugging-with-tmate).
78 | #
79 | #- name: Setup tmate session
80 | # uses: mxschmitt/action-tmate@v3
81 |
--------------------------------------------------------------------------------
/dascore/io/tdms/core.py:
--------------------------------------------------------------------------------
1 | """IO module for reading Silixa's TDMS DAS data format."""
2 |
3 | from __future__ import annotations
4 |
5 | import dascore as dc
6 | from dascore.constants import timeable_types
7 | from dascore.core import Patch
8 | from dascore.io import BinaryReader, FiberIO
9 |
10 | from .utils import _get_data, _get_default_attrs, _get_version_str
11 |
12 |
13 | class TDMSFormatterV4713(FiberIO):
14 | """Support for Silixa data format (tdms)."""
15 |
16 | name = "TDMS"
17 | version = "4713"
18 | preferred_extensions = ("tdms",)
19 | lead_in_length = 28
20 |
21 | def get_format(self, stream: BinaryReader, **kwargs) -> tuple[str, str] | bool:
22 | """
23 | Return a tuple of (TDMS, version) if TDMS else False.
24 |
25 | Parameters
26 | ----------
27 | stream
28 | A path to the file which may contain silixa data.
29 | """
30 | try:
31 | version_str = _get_version_str(stream)
32 | if version_str:
33 | return "TDMS", version_str
34 | else:
35 | return False
36 | except Exception:
37 | return False
38 |
39 | def scan(self, resource: BinaryReader, **kwargs) -> list[dc.PatchAttrs]:
40 | """Scan a tdms file, return summary information about the file's contents."""
41 | out = _get_default_attrs(resource)
42 | out["path"] = getattr(resource, "name", "")
43 | out["file_format"] = self.name
44 | out["file_version"] = self.version
45 | return [dc.PatchAttrs(**out)]
46 |
47 | def read(
48 | self,
49 | resource: BinaryReader,
50 | time: tuple[timeable_types, timeable_types] | None = None,
51 | distance: tuple[float, float] | None = None,
52 | **kwargs,
53 | ) -> dc.BaseSpool:
54 | """Read a silixa tdms file, return a DataArray."""
55 | # get all data, total amount of samples and associated attributes
56 | data, channel_length, attrs_full = _get_data(resource, lead_in_length=28)
57 | attrs = _get_default_attrs(resource, attrs_full)
58 | coords = dc.core.get_coord_manager(attrs.pop("coords"))
59 | # trim data if required
60 | if time is not None or distance is not None:
61 | coords, data = coords.select(data, time=time, distance=distance)
62 | if not data.size:
63 | return dc.spool([])
64 | patch = Patch(data=data, coords=coords, attrs=attrs)
65 | return dc.spool(patch)
66 |
--------------------------------------------------------------------------------
/tests/test_clients/test_filespool.py:
--------------------------------------------------------------------------------
1 | """Tests for the file spool."""
2 |
3 | from __future__ import annotations
4 |
5 | import pytest
6 |
7 | import dascore as dc
8 | from dascore.clients.filespool import FileSpool
9 | from dascore.utils.hdf5 import HDFPatchIndexManager
10 |
11 |
12 | class TestBasic:
13 | """Basic tests for the filespool."""
14 |
15 | def test_type(self, terra15_file_spool, terra15_v5_path):
16 | """Ensure a file spool was returned."""
17 | assert isinstance(terra15_file_spool, FileSpool)
18 | assert len(terra15_file_spool) == len(dc.scan_to_df(terra15_v5_path))
19 |
20 | def test_get_patch(self, terra15_file_spool):
21 | """Ensure the patch is returned."""
22 | patch = terra15_file_spool[0]
23 | assert isinstance(patch, dc.Patch)
24 |
25 | def test_init_from_filespool(self, terra15_file_spool):
26 | """Ensure FileSpool can init from FileSPool."""
27 | new = FileSpool(terra15_file_spool)
28 | assert isinstance(new, FileSpool)
29 |
30 | def test_str(self, terra15_file_spool):
31 | """Ensure file spool works."""
32 | out = str(terra15_file_spool)
33 | assert "FileSpool" in out
34 |
35 | def test_update(self, tmp_path_factory, random_patch):
36 | """Update should trigger indexing on formats that support it."""
37 | path = tmp_path_factory.mktemp("update_test") / "random.h5"
38 | dc.write(random_patch, path, "dasdae", "1")
39 | # pre-update
40 | spool = dc.spool(path)
41 | contents = spool.get_contents()
42 | assert not HDFPatchIndexManager(path).has_index
43 | new_spool = spool.update()
44 | assert HDFPatchIndexManager(path).has_index
45 | new_contents = new_spool.get_contents()
46 | assert contents.equals(new_contents)
47 |
48 | def test_raises_bad_file(self):
49 | """Simply ensures a bad file will raise."""
50 | with pytest.raises(FileNotFoundError, match="does not exist"):
51 | FileSpool("/not/a/directory")
52 |
53 | def test_chunk(self, terra15_file_spool):
54 | """Ensure chunking along time axis works with FileSpool."""
55 | spool = terra15_file_spool
56 | attrs = spool[0].attrs
57 | duration = attrs.time_max - attrs.time_min
58 | dt = duration / 3
59 | spool = terra15_file_spool.chunk(time=dt, keep_partial=True)
60 | assert len(spool) == 3
61 | for patch in spool:
62 | assert isinstance(patch, dc.Patch)
63 |
--------------------------------------------------------------------------------
/dascore/io/gdr/core.py:
--------------------------------------------------------------------------------
1 | """
2 | Core modules for reading GDR data.
3 |
4 | GDR files do not specify the GDR version directly. Instead, they use versions
5 | from other standards for the metadata and raw data. These can be found in the
6 | overview attributes MetadataStandard and RawDataStandard.
7 | """
8 |
9 | from __future__ import annotations
10 |
11 | import dascore as dc
12 | from dascore.constants import SpoolType
13 | from dascore.io import FiberIO
14 | from dascore.io.gdr.utils_das import (
15 | _get_attrs_coords_and_data,
16 | _get_version,
17 | _maybe_trim_data,
18 | )
19 | from dascore.utils.hdf5 import H5Reader
20 |
21 |
22 | class GDRPatchAttrs(dc.PatchAttrs):
23 | """Patch attrs for GDR files."""
24 |
25 | gauge_length: float
26 | gauge_length_units: str
27 | project_number: str = ""
28 |
29 |
30 | class GDR_V1(FiberIO): # noqa
31 | """
32 | Support for GDR version 1.
33 | """
34 |
35 | name = "GDR_DAS"
36 | preferred_extensions = ("hdf5", "h5")
37 | version = "1"
38 |
39 | def get_format(self, resource: H5Reader, **kwargs) -> tuple[str, str] | bool:
40 | """Determine if the resource belongs to this format."""
41 | return _get_version(resource)
42 |
43 | def read(self, resource: H5Reader, snap=True, **kwargs) -> SpoolType:
44 | """
45 | Read a resource belonging to this format.
46 |
47 | Parameters
48 | ----------
49 | resource
50 | The open h5 object.
51 | snap
52 | If True, snap each coordinate to be evenly sampled.
53 | **kwargs
54 | Passed to filtering coordinates.
55 | """
56 | attr_dict, cm, data = _get_attrs_coords_and_data(resource, snap=snap)
57 | if kwargs:
58 | cm, data = _maybe_trim_data(cm, data, **kwargs)
59 | if not data.size: # skip empty patches.
60 | return dc.spool([])
61 | attrs = GDRPatchAttrs(**attr_dict)
62 | patch = dc.Patch(coords=cm, data=data[:], attrs=attrs)
63 | return dc.spool([patch])
64 |
65 | def scan(self, resource: H5Reader, snap=True, **kwargs) -> list[dc.PatchAttrs]:
66 | """Get the attributes of a resource belong to this type."""
67 | attrs, cm, data = _get_attrs_coords_and_data(resource, snap)
68 | attrs["coords"] = cm.to_summary_dict()
69 | attrs["path"] = resource.filename
70 | attrs["file_format"] = self.name
71 | attrs["file_version"] = self.version
72 | return [dc.PatchAttrs(**attrs)]
73 |
--------------------------------------------------------------------------------
/dascore/io/prodml/core.py:
--------------------------------------------------------------------------------
1 | """IO module for reading prodML data."""
2 |
3 | from __future__ import annotations
4 |
5 | import numpy as np
6 |
7 | import dascore as dc
8 | from dascore.constants import opt_timeable_types
9 | from dascore.io import FiberIO
10 | from dascore.utils.models import UnitQuantity, UTF8Str
11 |
12 | from ...utils.hdf5 import H5Reader
13 | from .utils import _get_prodml_version_str, _read_prodml, _yield_prodml_attrs_coords
14 |
15 |
16 | class ProdMLPatchAttrs(dc.PatchAttrs):
17 | """Patch attrs for ProdML."""
18 |
19 | pulse_width: float = np.nan
20 | pulse_width_units: UnitQuantity | None = None
21 | gauge_length: float = np.nan
22 | gauge_length_units: UnitQuantity | None = None
23 | schema_version: UTF8Str = ""
24 |
25 |
26 | class ProdMLV2_0(FiberIO): # noqa
27 | """Support for ProdML V 2.0."""
28 |
29 | name = "PRODML"
30 | preferred_extensions = ("hdf5", "h5")
31 | version = "2.0"
32 |
33 | def get_format(self, resource: H5Reader, **kwargs) -> tuple[str, str] | bool:
34 | """
35 | Return True if file contains prodML version 2 data else False.
36 |
37 | Parameters
38 | ----------
39 | resource
40 | A path to the file which may contain prodML data.
41 | """
42 | version_str = _get_prodml_version_str(resource)
43 | if version_str:
44 | return (self.name, version_str)
45 |
46 | def scan(self, resource: H5Reader, **kwargs) -> list[dc.PatchAttrs]:
47 | """Scan a prodml file, return summary information about the file's contents."""
48 | file_version = _get_prodml_version_str(resource)
49 | extras = {
50 | "path": resource.filename,
51 | "file_format": self.name,
52 | "file_version": str(file_version),
53 | }
54 | out = []
55 | for attr, coords in _yield_prodml_attrs_coords(resource, extras=extras):
56 | out.append(attr.update(coords=coords))
57 | return out
58 |
59 | def read(
60 | self,
61 | resource: H5Reader,
62 | time: tuple[opt_timeable_types, opt_timeable_types] | None = None,
63 | distance: tuple[float | None, float | None] | None = None,
64 | **kwargs,
65 | ) -> dc.BaseSpool:
66 | """Read a ProdML file."""
67 | patches = _read_prodml(resource, time=time, distance=distance)
68 | return dc.spool(patches)
69 |
70 |
71 | class ProdMLV2_1(ProdMLV2_0): # noqa
72 | """Support for ProdML V 2.1."""
73 |
74 | version = "2.1"
75 |
--------------------------------------------------------------------------------
/docs/references.bib:
--------------------------------------------------------------------------------
1 | @article{daley2013field,
2 | title={Field testing of fiber-optic distributed acoustic sensing (DAS) for subsurface seismic monitoring},
3 | author={Daley, Thomas M and Freifeld, Barry M and Ajo-Franklin, Jonathan and Dou, Shan and Pevzner, Roman and Shulakova, Valeriya and Kashikar, Sudhendu and Miller, Douglas E and Goetz, Julia and Henninges, Jan and others},
4 | journal={The Leading Edge},
5 | volume={32},
6 | number={6},
7 | pages={699--706},
8 | year={2013},
9 | publisher={Society of Exploration Geophysicists}
10 | }
11 |
12 | @article{lindsey2021fiber,
13 | title={Fiber-optic seismology},
14 | author={Lindsey, Nathaniel J and Martin, Eileen R},
15 | journal={Annual Review of Earth and Planetary Sciences},
16 | volume={49},
17 | pages={309--336},
18 | year={2021},
19 | publisher={Annual Reviews}
20 | }
21 |
22 | @incollection{park1998imaging,
23 | title={Imaging dispersion curves of surface waves on multi-channel record},
24 | author={Park, Choon Byong and Miller, Richard D and Xia, Jianghai},
25 | booktitle={SEG technical program expanded abstracts 1998},
26 | pages={1377--1380},
27 | year={1998},
28 | publisher={Society of Exploration Geophysicists}
29 | }
30 |
31 | @article{stanvek2022fracture,
32 | title={Fracture Imaging Using DAS-Recorded Microseismic Events},
33 | author={Stan{\v{e}}k, Franti{\v{s}}ek and Jin, Ge and Simmons, James},
34 | journal={Frontiers in Earth Science},
35 | volume={10},
36 | year={2022}
37 | }
38 |
39 | @article{yang2022filtering,
40 | title={Filtering strategies for deformation-rate distributed acoustic sensing},
41 | author={Yang, Jihyun and Shragge, Jeffrey and Jin, Ge},
42 | journal={Sensors},
43 | volume={22},
44 | number={22},
45 | pages={8777},
46 | year={2022},
47 | publisher={MDPI}
48 | }
49 |
50 | @article{lindsey2020broadband,
51 | title={On the broadband instrument response of fiber-optic DAS arrays},
52 | author={Lindsey, Nathaniel J and Rademacher, Horst and Ajo-Franklin, Jonathan B},
53 | journal={Journal of Geophysical Research: Solid Earth},
54 | volume={125},
55 | number={2},
56 | pages={e2019JB018145},
57 | year={2020},
58 | publisher={Wiley Online Library}
59 | }
60 | @article{schimmel1997noise,
61 | title={Noise reduction and detection of weak, coherent signals through phase-weighted stacks},
62 | author={Schimmel, Martin and Paulssen, Hanneke},
63 | journal={Geophysical Journal International},
64 | volume={130},
65 | number={2},
66 | pages={497--505},
67 | year={1997},
68 | publisher={Blackwell Publishing Ltd Oxford, UK}
69 | }
70 |
--------------------------------------------------------------------------------
/dascore/io/optodas/core.py:
--------------------------------------------------------------------------------
1 | """IO module for reading OptoDAS data."""
2 |
3 | from __future__ import annotations
4 |
5 | import numpy as np
6 |
7 | import dascore as dc
8 | from dascore.constants import opt_timeable_types
9 | from dascore.io import FiberIO
10 | from dascore.utils.hdf5 import H5Reader
11 | from dascore.utils.models import UnitQuantity, UTF8Str
12 |
13 | from .utils import _get_opto_das_attrs, _get_opto_das_version_str, _read_opto_das
14 |
15 |
16 | class OptoDASPatchAttrs(dc.PatchAttrs):
17 | """Patch attrs for OptoDAS."""
18 |
19 | gauge_length: float = np.nan
20 | gauge_length_units: UnitQuantity | None = None
21 | schema_version: UTF8Str = ""
22 |
23 |
24 | class OptoDASV8(FiberIO):
25 | """Support for OptoDAS V 8."""
26 |
27 | name = "OptoDAS"
28 | preferred_extensions = ("hdf5", "h5")
29 | version = "8"
30 |
31 | def get_format(self, resource: H5Reader, **kwargs) -> tuple[str, str] | bool:
32 | """
33 | Return True if file contains OptoDAS version 8 data else False.
34 |
35 | Parameters
36 | ----------
37 | resource
38 | A path to the file which may contain terra15 data.
39 | """
40 | version_str = _get_opto_das_version_str(resource)
41 | if version_str:
42 | return self.name, version_str
43 |
44 | def scan(self, resource: H5Reader, **kwargs) -> list[dc.PatchAttrs]:
45 | """Scan a OptoDAS file, return summary information about the file's contents."""
46 | file_version = _get_opto_das_version_str(resource)
47 | extras = {
48 | "path": resource.filename,
49 | "file_format": self.name,
50 | "file_version": str(file_version),
51 | }
52 | attrs = _get_opto_das_attrs(resource)
53 | attrs.update(extras)
54 | return [OptoDASPatchAttrs(**attrs)]
55 |
56 | def read(
57 | self,
58 | resource: H5Reader,
59 | time: tuple[opt_timeable_types, opt_timeable_types] | None = None,
60 | distance: tuple[float | None, float | None] | None = None,
61 | **kwargs,
62 | ) -> dc.BaseSpool:
63 | """Read a OptoDAS spool of patches."""
64 | patches = _read_opto_das(
65 | resource, time=time, distance=distance, attr_cls=OptoDASPatchAttrs
66 | )
67 | return dc.spool(patches)
68 |
69 |
70 | class OptoDASV9(OptoDASV8):
71 | """Support for OptoDAS V 9."""
72 |
73 | version = "9"
74 |
75 |
76 | class OptoDASV10(OptoDASV8):
77 | """Support for OptoDAS V 10."""
78 |
79 | version = "10"
80 |
--------------------------------------------------------------------------------
/dascore/io/sintela_binary/core.py:
--------------------------------------------------------------------------------
1 | """
2 | Core module for reading Sintela binary format.
3 | """
4 |
5 | from __future__ import annotations
6 |
7 | import numpy as np
8 |
9 | import dascore as dc
10 | from dascore.constants import opt_timeable_types
11 | from dascore.io import FiberIO
12 | from dascore.utils.io import BinaryReader
13 |
14 | from .utils import (
15 | _HEADER_SIZES,
16 | SYNC_WORD,
17 | _get_attrs_coords_header,
18 | _get_patches,
19 | _read_base_header,
20 | )
21 |
22 |
23 | class SintelaPatchAttrs(dc.PatchAttrs):
24 | """Patch Attributes for Sintela binary format."""
25 |
26 | gauge_length: float = np.nan
27 | gauge_length_units: str = "m"
28 |
29 |
30 | class SintelaBinaryV3(FiberIO):
31 | """Version 3 of Sintela's binary format."""
32 |
33 | name = "Sintela_Binary"
34 | preferred_extensions = ("raw",)
35 | version = "3"
36 |
37 | def get_format(self, resource: BinaryReader, **kwargs) -> tuple[str, str] | bool:
38 | """
39 | Return name and version string or False.
40 |
41 | Parameters
42 | ----------
43 | resource
44 | A path to the file which may contain terra15 data.
45 | """
46 | resource.seek(0)
47 | base = _read_base_header(resource)
48 | sync = base["sync_word"]
49 | version = str(base["version"])
50 | size = base["header_size"]
51 | expected_size = _HEADER_SIZES.get(version, 0)
52 | if sync == SYNC_WORD and version == self.version and size == expected_size:
53 | return self.name, version
54 | return False
55 |
56 | def scan(self, resource: BinaryReader, **kwargs) -> list[dc.PatchAttrs]:
57 | """Scan a file, return summary information on the contents."""
58 | extras = {
59 | "path": resource.name,
60 | "file_format": self.name,
61 | "file_version": self.version,
62 | }
63 | attrs, _, _ = _get_attrs_coords_header(
64 | resource, SintelaPatchAttrs, extras=extras
65 | )
66 |
67 | return [attrs]
68 |
69 | def read(
70 | self,
71 | resource: BinaryReader,
72 | time: tuple[opt_timeable_types, opt_timeable_types] | None = None,
73 | distance: tuple[float | None, float | None] | None = None,
74 | **kwargs,
75 | ) -> dc.BaseSpool:
76 | """Read a single Sintela binary file."""
77 | patch = _get_patches(
78 | resource, time=time, distance=distance, attr_class=SintelaPatchAttrs
79 | )
80 |
81 | return dc.spool(patch)
82 |
--------------------------------------------------------------------------------
/scripts/test_render_api.py:
--------------------------------------------------------------------------------
1 | """Tests for rendering api stuff."""
2 |
3 | from __future__ import annotations
4 |
5 | import pytest
6 |
7 | # These tests only work if doc deps are installed.
8 | pytest.importorskip("jinja2")
9 |
10 | from _render_api import to_quarto_code # noqa
11 |
12 |
13 | class TestToQuartoCode:
14 | """Tests for code parsing to quarto-style code strings."""
15 |
16 | def test_basic(self):
17 | """Ensure a simple example works."""
18 | code = """
19 | print("hey")
20 | """
21 | out = to_quarto_code(code)
22 | assert '```{python}\nprint("hey")\n```' == out
23 |
24 | def test_docstring(self):
25 | """Ensure docstring works."""
26 | code = """
27 | >>> print("bob")
28 | >>> for a in range(10):
29 | ... print(a)
30 | """
31 | out = to_quarto_code(code)
32 | assert " print(a)" in out.splitlines()
33 |
34 | def test_output_handled(self):
35 | """Docstrings can have outputs in them, we need to strip them out."""
36 | code = """
37 | >>> print("bob")
38 | bob
39 | """
40 | out = to_quarto_code(code)
41 | assert '```{python}\nprint("bob")\n```' == out
42 |
43 | def test_titles(self):
44 | """Ensure titles are carried forward."""
45 | code1 = """
46 | >>> ### Simple example
47 | >>> print("a")
48 | >>>
49 | >>> ### More complex example
50 | >>> print(1 + 2)
51 | """
52 | out1 = to_quarto_code(code1)
53 | code2 = """
54 |
55 | ### Simple example
56 | print("a")
57 | ### More complex example
58 | print(1 + 2)
59 |
60 | """
61 | out2 = to_quarto_code(code2)
62 | assert out1 == out2
63 |
64 | def test_options(self):
65 | """Ensure quarto options cary forward."""
66 | code1 = """
67 | >>> #| fold: true
68 | >>> print("bob")
69 | >>>
70 | >>> ### Another example
71 | >>> print("bill")
72 | """
73 | out = to_quarto_code(code1)
74 | expected_str = "#| fold: true"
75 | assert expected_str in out
76 | assert out.count(expected_str) == 2
77 |
78 | def test_combination(self):
79 | """A combination of stuff."""
80 | code1 = """
81 | >>> #| code-fold: true
82 | >>> # This is a base example
83 | >>> print(1 + 2)
84 | >>> ### This is a sub-section
85 | >>> print("cool beans")
86 | """
87 | out = to_quarto_code(code1)
88 | assert out
89 |
--------------------------------------------------------------------------------
/dascore/io/dashdf5/core.py:
--------------------------------------------------------------------------------
1 | """IO module for reading prodML data."""
2 |
3 | from __future__ import annotations
4 |
5 | import numpy as np
6 |
7 | import dascore as dc
8 | from dascore.constants import opt_timeable_types
9 | from dascore.io import FiberIO
10 | from dascore.utils.hdf5 import H5Reader
11 | from dascore.utils.models import UnitQuantity, UTF8Str
12 |
13 | from .utils import _get_cf_attrs, _get_cf_coords, _get_cf_version_str
14 |
15 |
16 | class ProdMLPatchAttrs(dc.PatchAttrs):
17 | """Patch attrs for ProdML."""
18 |
19 | pulse_width: float = np.nan
20 | pulse_width_units: UnitQuantity | None = None
21 | gauge_length: float = np.nan
22 | gauge_length_units: UnitQuantity | None = None
23 | schema_version: UTF8Str = ""
24 |
25 |
26 | class DASHDF5(FiberIO):
27 | """IO Support for DASHDF5 which uses CF version 1.7."""
28 |
29 | name = "DASHDF5"
30 | preferred_extensions = ("hdf5", "h5")
31 | version = "1.0"
32 |
33 | def get_format(self, resource: H5Reader, **kwargs) -> tuple[str, str] | bool:
34 | """
35 | Return True if file contains terra15 version 2 data else False.
36 |
37 | Parameters
38 | ----------
39 | resource
40 | A path to the file which may contain terra15 data.
41 | """
42 | version_str = _get_cf_version_str(resource)
43 | if version_str:
44 | return self.name, version_str
45 |
46 | def scan(self, resource: H5Reader, **kwargs) -> list[dc.PatchAttrs]:
47 | """Get metadata from file."""
48 | coords = _get_cf_coords(resource)
49 | extras = {
50 | "path": resource.filename,
51 | "file_format": self.name,
52 | "file_version": str(self.version),
53 | }
54 | attrs = _get_cf_attrs(resource, coords, extras=extras)
55 | return [attrs]
56 |
57 | def read(
58 | self,
59 | resource: H5Reader,
60 | time: tuple[opt_timeable_types, opt_timeable_types] | None = None,
61 | channel: tuple[float | None, float | None] | None = None,
62 | **kwargs,
63 | ):
64 | """Read a CF file and return a Patch."""
65 | coords = _get_cf_coords(resource)
66 | coords_new, data = coords.select(
67 | array=resource["das"],
68 | time=time,
69 | channel=channel,
70 | )
71 | if not data.size:
72 | return dc.spool([])
73 | attrs = _get_cf_attrs(resource, coords_new)
74 | patch = dc.Patch(
75 | data=data, attrs=attrs, coords=coords_new, dims=coords_new.dims
76 | )
77 | return dc.spool(patch)
78 |
--------------------------------------------------------------------------------
/dascore/io/neubrex/utils_rfs.py:
--------------------------------------------------------------------------------
1 | """Utilities functions for Neubrex IO support"""
2 |
3 | import dascore as dc
4 | from dascore.utils.misc import maybe_get_items
5 |
6 |
7 | def _is_neubrex(h5fi):
8 | """Determine if the file is of Neubrex origin."""
9 | expected_keys = {"data", "depth", "stamps"}
10 | keys = set(h5fi.keys())
11 | if not expected_keys.issubset(keys):
12 | return False
13 | expected_attrs = {"DataUnitLabel", "StartDateTime", "EndDateTime"}
14 | data_attrs = set(h5fi["data"].attrs)
15 | if expected_attrs.issubset(data_attrs):
16 | return True
17 |
18 |
19 | def _get_coord_manager(h5fi, snap=True):
20 | """Get a coordinate manager from the file."""
21 |
22 | def _get_time_coord(h5fi, snap):
23 | """Get the time coordinate."""
24 | # Unix stamps are in us for test files, not sure if always true.
25 | unix_stamps = dc.to_datetime64(h5fi["stamps_unix"][:] / 1_000_000)
26 | time_coord = dc.get_coord(values=unix_stamps)
27 | if snap:
28 | time_coord = time_coord.snap()
29 | return time_coord
30 |
31 | def _get_dist_coord(h5fi):
32 | """Get the distance (depth) coordinate."""
33 | depth = h5fi["depth"][:]
34 | return dc.get_coord(values=depth)
35 |
36 | coords = {
37 | "time": _get_time_coord(h5fi, snap=snap),
38 | "distance": _get_dist_coord(h5fi),
39 | }
40 | return dc.get_coord_manager(coords=coords, dims=("time", "distance"))
41 |
42 |
43 | def _get_data_units_and_type(data_unit_label):
44 | """Get the units from contained string."""
45 | quantity = dc.get_quantity(data_unit_label.replace("-", ""))
46 | return quantity
47 |
48 |
49 | def _get_attr_dict(h5fi):
50 | """Get a dict of neubrex attributes."""
51 | mapping = {
52 | "API": "api",
53 | # "DataUnitLabel": "data_unit_label",
54 | "FieldName": "field_name",
55 | "WellID": "well_id",
56 | "WellName": "well_name",
57 | "WellBoreID": "well_bore_id",
58 | }
59 | data_attrs = dict(h5fi["data"].attrs)
60 | out = maybe_get_items(data_attrs, mapping)
61 | out["data_units"] = _get_data_units_and_type(data_attrs["DataUnitLabel"])
62 | return out
63 |
64 |
65 | def _maybe_trim_data(cm, data, time=None, distance=None, **kwargs):
66 | """Maybe trim the data."""
67 | if time is not None or distance is not None:
68 | cm, data = cm.select(time=time, distance=distance, array=data)
69 | return cm, data
70 |
71 |
72 | def _get_attrs_coords_and_data(h5fi, snap=True):
73 | """Return the attributes, coordinates, and data array."""
74 | cm = _get_coord_manager(h5fi, snap)
75 | attrs = _get_attr_dict(h5fi)
76 | data = h5fi["data"]
77 | return attrs, cm, data
78 |
--------------------------------------------------------------------------------
/dascore/utils/deprecate.py:
--------------------------------------------------------------------------------
1 | """
2 | A module for handling deprecations.
3 | """
4 |
5 | from __future__ import annotations
6 |
7 | import functools
8 | from collections.abc import Callable
9 | from typing import Any, TypeVar
10 |
11 | from typing_extensions import deprecated as dep
12 |
13 | F = TypeVar("F", bound=Callable[..., Any])
14 |
15 |
16 | def deprecate(
17 | info: str = "",
18 | *,
19 | since: str | None = None,
20 | removed_in: str | None = None,
21 | ) -> Callable[[F], F]:
22 | """
23 | Mark a function as deprecated.
24 |
25 | - Raises a runtime warning when called.
26 | - Annotates the function so editors/type checkers show deprecation.
27 | - Augments the docstring.
28 |
29 | Parameters
30 | ----------
31 | info
32 | Short message shown in warnings and editor hints. It is useful to specify
33 | what should be used in place of the deprecated function.
34 | since
35 | Version/date when deprecation started (for the message only).
36 | removed_in
37 | Version/date when the function will be removed (for the message only).
38 |
39 | Examples
40 | --------
41 | >>> from dascore.utils.deprecate import deprecate
42 | >>>
43 | >>> # Deprecate function so it issues a warning when used.
44 | >>> @deprecate(info="This function is deprecated.")
45 | ... def foo():
46 | ... pass
47 | """
48 |
49 | def _build_msg(func):
50 | """Build the message to emmit."""
51 | # Build a clear message for both runtime and typing hint
52 | qual = f"{func.__module__}.{getattr(func, '__qualname__', func.__name__)}"
53 | since_str = f" since {since}" if since else ""
54 | removed_str = f" and will be removed in {removed_in}" if removed_in else ""
55 | info_str = f" {info}" if info else ""
56 | msg = f"{qual} is deprecated{since_str}{removed_str}.{info_str}"
57 | return msg
58 |
59 | def _decorate(func: F) -> F:
60 | # Wrap to emit a runtime warning on call
61 | @functools.wraps(func)
62 | def wrapper(*args: Any, **kwargs: Any):
63 | return func(*args, **kwargs)
64 |
65 | # Add a simple marker attribute some tools may inspect
66 | setattr(wrapper, "__deprecated__", True)
67 |
68 | # Prepend/augment the docstring so it shows in help() / tooltips
69 | dep_header = f"\n\n.. deprecated:: {since or ''}\n {info}"
70 | if removed_in:
71 | dep_header += f" (removal in {removed_in})"
72 | wrapper.__doc__ = (func.__doc__ or "").rstrip() + dep_header
73 |
74 | # Apply typing-level deprecation *to the wrapper* so editors see it
75 | msg = _build_msg(func)
76 | return dep(msg)(wrapper) # type: ignore[return-value]
77 |
78 | return _decorate
79 |
--------------------------------------------------------------------------------
/benchmarks/notebooks/patch_v_xarray.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Patch VS DataArray\n",
8 | "\n",
9 | "A few simple profiling tests to compare dascore patches to xarray DataArrays"
10 | ]
11 | },
12 | {
13 | "cell_type": "code",
14 | "execution_count": null,
15 | "metadata": {},
16 | "outputs": [],
17 | "source": [
18 | "import dascore as dc\n",
19 | "\n",
20 | "patch = dc.get_example_patch(\"random_das\")\n",
21 | "dar = patch.to_xarray()\n",
22 | "t1, t2 = patch.attrs['time_min'], patch.attrs['time_max']\n",
23 | "duration = t2 - t1"
24 | ]
25 | },
26 | {
27 | "cell_type": "code",
28 | "execution_count": null,
29 | "metadata": {},
30 | "outputs": [],
31 | "source": [
32 | "%%timeit\n",
33 | "patch.select(time=(t1, t2))"
34 | ]
35 | },
36 | {
37 | "cell_type": "code",
38 | "execution_count": null,
39 | "metadata": {},
40 | "outputs": [],
41 | "source": [
42 | "%%timeit \n",
43 | "dar.sel(time=slice(t1, t2))"
44 | ]
45 | },
46 | {
47 | "cell_type": "code",
48 | "execution_count": null,
49 | "metadata": {},
50 | "outputs": [],
51 | "source": [
52 | "%%timeit\n",
53 | "patch.select(time=(t1 + duration/2, t2))"
54 | ]
55 | },
56 | {
57 | "cell_type": "code",
58 | "execution_count": null,
59 | "metadata": {},
60 | "outputs": [],
61 | "source": [
62 | "%%timeit\n",
63 | "patch.select(time=(t1, t2-duration/2))"
64 | ]
65 | },
66 | {
67 | "cell_type": "code",
68 | "execution_count": null,
69 | "metadata": {},
70 | "outputs": [],
71 | "source": [
72 | "%%timeit \n",
73 | "dar.sel(time=slice(t1+ duration/2, t2))"
74 | ]
75 | },
76 | {
77 | "cell_type": "code",
78 | "execution_count": null,
79 | "metadata": {},
80 | "outputs": [],
81 | "source": [
82 | "%%timeit \n",
83 | "dar.sel(time=slice(t1, t2 - duration/2))"
84 | ]
85 | },
86 | {
87 | "cell_type": "code",
88 | "execution_count": null,
89 | "metadata": {},
90 | "outputs": [],
91 | "source": []
92 | }
93 | ],
94 | "metadata": {
95 | "kernelspec": {
96 | "display_name": "Python 3 (ipykernel)",
97 | "language": "python",
98 | "name": "python3"
99 | },
100 | "language_info": {
101 | "codemirror_mode": {
102 | "name": "ipython",
103 | "version": 3
104 | },
105 | "file_extension": ".py",
106 | "mimetype": "text/x-python",
107 | "name": "python",
108 | "nbconvert_exporter": "python",
109 | "pygments_lexer": "ipython3",
110 | "version": "3.10.11"
111 | }
112 | },
113 | "nbformat": 4,
114 | "nbformat_minor": 4
115 | }
116 |
--------------------------------------------------------------------------------
/dascore/io/terra15/core.py:
--------------------------------------------------------------------------------
1 | """IO module for reading Terra15 DAS data."""
2 |
3 | from __future__ import annotations
4 |
5 | import dascore as dc
6 | from dascore.constants import timeable_types
7 | from dascore.io import FiberIO
8 | from dascore.utils.hdf5 import H5Reader
9 |
10 | from .utils import (
11 | _get_terra15_version_str,
12 | _get_version_data_node,
13 | _read_terra15,
14 | _scan_terra15,
15 | )
16 |
17 |
18 | class Terra15FormatterV4(FiberIO):
19 | """Support for Terra15 data format, version 4."""
20 |
21 | name = "TERRA15"
22 | preferred_extensions = ("hdf5", "h5")
23 | version = "4"
24 |
25 | def get_format(self, resource: H5Reader, **kwargs) -> tuple[str, str] | bool:
26 | """
27 | Return True if file contains terra15 version 2 data else False.
28 |
29 | Parameters
30 | ----------
31 | resource
32 | A path to the file which may contain terra15 data.
33 | """
34 | version_str = _get_terra15_version_str(resource)
35 | if version_str:
36 | return (self.name, version_str)
37 |
38 | def scan(self, resource: H5Reader, **kwargs) -> list[dc.PatchAttrs]:
39 | """Scan a terra15 v2 file, return summary information."""
40 | version, data_node = _get_version_data_node(resource)
41 | extras = {
42 | "path": resource.filename,
43 | "file_format": self.name,
44 | "file_version": str(version),
45 | }
46 | return _scan_terra15(resource, data_node, extras)
47 |
48 | def read(
49 | self,
50 | resource: H5Reader,
51 | time: tuple[timeable_types, timeable_types] | None = None,
52 | distance: tuple[float, float] | None = None,
53 | snap_dims: bool = True,
54 | **kwargs,
55 | ) -> dc.BaseSpool:
56 | """
57 | Read a terra15 file.
58 |
59 | Parameters
60 | ----------
61 | resource
62 | The path to the file.
63 | time
64 | A tuple for filtering time.
65 | distance
66 | A tuple for filtering distance.
67 | snap_dims
68 | If True, ensure the coordinates are evenly sampled monotonic.
69 | This will cause some loss in precision but it is usually
70 | negligible.
71 | """
72 | patch = _read_terra15(resource, time, distance, snap_dims=snap_dims)
73 | if not patch.data.size:
74 | return dc.spool([])
75 | return dc.spool(patch)
76 |
77 |
78 | class Terra15FormatterV5(Terra15FormatterV4):
79 | """Support for Terra15 data format, version 5."""
80 |
81 | version = "5"
82 |
83 |
84 | class Terra15FormatterV6(Terra15FormatterV4):
85 | """Support for Terra15 data format, version 5."""
86 |
87 | version = "6"
88 |
--------------------------------------------------------------------------------
/dascore/clients/filespool.py:
--------------------------------------------------------------------------------
1 | """A spool for working with a single file."""
2 |
3 | from __future__ import annotations
4 |
5 | import copy
6 | from pathlib import Path
7 |
8 | from rich.text import Text
9 | from typing_extensions import Self
10 |
11 | import dascore as dc
12 | from dascore.constants import PROGRESS_LEVELS, SpoolType
13 | from dascore.core.spool import BaseSpool, DataFrameSpool
14 | from dascore.io.core import FiberIO
15 | from dascore.utils.docs import compose_docstring
16 |
17 |
18 | class FileSpool(DataFrameSpool):
19 | """
20 | A spool for a single file.
21 |
22 | Parameters
23 | ----------
24 | path
25 | The path to the file.
26 | file_format
27 | The format name, optional.
28 | file_version
29 | The version string of the format, optional.
30 |
31 | Notes
32 | -----
33 | Some file formats support storing multiple patches, this is most useful
34 | for those formats, but should work on all dascore supported formats.
35 | """
36 |
37 | def __init__(
38 | self,
39 | path: str | Path,
40 | file_format: str | None = None,
41 | file_version: str | None = None,
42 | ):
43 | super().__init__()
44 | # Init file spool from another file spool
45 | if isinstance(path, self.__class__):
46 | self.__dict__.update(copy.deepcopy(path.__dict__))
47 | return
48 | self._path = Path(path)
49 | if not self._path.exists() or self._path.is_dir():
50 | msg = f"{path} does not exist or is a directory"
51 | raise FileNotFoundError(msg)
52 |
53 | _format, _version = dc.get_format(path, file_format, file_version)
54 | source_df = dc.scan_to_df(path, file_format=_format, file_version=_version)
55 | dfs = self._get_dummy_dataframes(source_df)
56 | self._df, self._source_df, self._instruction_df = dfs
57 | self._file_format = _format
58 | self._file_version = _version
59 |
60 | def __rich__(self):
61 | """Augment rich string with path."""
62 | base = super().__rich__()
63 | out = base + Text(f" Path: {self._path}")
64 | return out
65 |
66 | def _load_patch(self, kwargs) -> Self:
67 | """Given a row from the managed dataframe, return a patch."""
68 | return dc.read(**kwargs)[0]
69 |
70 | @compose_docstring(doc=BaseSpool.update.__doc__)
71 | def update(self: SpoolType, progress: PROGRESS_LEVELS = "standard") -> Self:
72 | """
73 | {doc}.
74 |
75 | Note: If the file format supports indexing (e.g. DASDAE) this will
76 | trigger an indexing of the file.
77 | """
78 | formater = FiberIO.manager.get_fiberio(
79 | format=self._file_format, version=self._file_version
80 | )
81 | getattr(formater, "index", lambda x: None)(self._path)
82 | return self
83 |
--------------------------------------------------------------------------------
/tests/test_io/test_wav/test_wav.py:
--------------------------------------------------------------------------------
1 | """Tests module for wave format."""
2 |
3 | from __future__ import annotations
4 |
5 | from pathlib import Path
6 |
7 | import pytest
8 | from scipy.io.wavfile import read as read_wav
9 |
10 | import dascore as dc
11 | from dascore.constants import ONE_SECOND
12 |
13 |
14 | class TestWriteWav:
15 | """Tests for writing wav format to disk."""
16 |
17 | @pytest.fixture(scope="class")
18 | def audio_patch(self):
19 | """Return the example sin wave patch."""
20 | return dc.get_example_patch("sin_wav", sample_rate=500)
21 |
22 | @pytest.fixture(scope="class")
23 | def wave_dir(self, audio_patch, tmp_path_factory):
24 | """Create a wave directory, return path."""
25 | new = Path(tmp_path_factory.mktemp("wavs"))
26 | dc.write(audio_patch, new, "wav")
27 | return new
28 |
29 | @pytest.fixture(scope="class")
30 | def audio_patch_non_distance_dim(self, audio_patch):
31 | """Create a patch that has a non-distance dimension in addition to time."""
32 | patch = audio_patch.rename_coords(distance="microphone")
33 | return patch
34 |
35 | def test_directory(self, wave_dir, audio_patch):
36 | """Sanity checks on wav directory."""
37 | assert wave_dir.exists()
38 | wavs = list(wave_dir.rglob("*.wav"))
39 | assert len(wavs) == len(audio_patch.coords.get_array("distance"))
40 |
41 | def test_write_single_file(self, audio_patch, tmp_path_factory):
42 | """Ensure a single file can be written."""
43 | path = tmp_path_factory.mktemp("wave_temp") / "temp.wav"
44 | dc.write(audio_patch, path, "wav")
45 | assert path.exists()
46 |
47 | def test_resample(self, audio_patch, tmp_path_factory):
48 | """Ensure resampling changes sampling rate in file."""
49 | path = tmp_path_factory.mktemp("wav_resample") / "resampled.wav"
50 | dc.write(audio_patch, path, "wav", resample_frequency=1000)
51 | (sr, ar) = read_wav(str(path))
52 | assert sr == 1000
53 |
54 | def test_write_non_distance_dims(
55 | self, audio_patch_non_distance_dim, tmp_path_factory
56 | ):
57 | """Ensure any non-time dimension still works."""
58 | path = tmp_path_factory.mktemp("wav_resample")
59 | patch = audio_patch_non_distance_dim
60 | patch.io.write(path, "wav")
61 | assert path.exists()
62 | # Verify number of WAV files
63 | wavs = list(path.rglob("*.wav"))
64 | assert len(wavs) == len(patch.coords.get_array("microphone"))
65 | # Verify file naming
66 | for mic_val in patch.coords.get_array("microphone"):
67 | assert path / f"microphone_{mic_val}.wav" in wavs
68 | # Verify content of first file
69 | sr, data = read_wav(str(wavs[0]))
70 | assert sr == int(ONE_SECOND / patch.get_coord("time").step)
71 |
--------------------------------------------------------------------------------
/tests/test_viz/test_wiggle.py:
--------------------------------------------------------------------------------
1 | """Tests for wiggle plots."""
2 |
3 | from __future__ import annotations
4 |
5 | import matplotlib.pyplot as plt
6 | import numpy as np
7 | import pytest
8 |
9 | import dascore as dc
10 |
11 |
12 | class TestWiggle:
13 | """Tests for wiggle plot."""
14 |
15 | @pytest.fixture()
16 | def small_patch(self, random_patch):
17 | """A small patch to cut back on plot time."""
18 | pa = random_patch.select(distance=(10, 15), samples=True)
19 | return pa
20 |
21 | def test_example(self):
22 | """Test the example from the docs."""
23 | patch = dc.examples.sin_wave_patch(
24 | sample_rate=1000,
25 | frequency=[200, 10],
26 | channel_count=2,
27 | )
28 | _ = patch.viz.wiggle()
29 |
30 | def test_returns_axes(self, random_patch):
31 | """Call waterfall plot, return."""
32 | data = np.array(random_patch.data)
33 | data[:100, :100] = 2.0 # create an origin block for testing axis line up
34 | data[:100, -100:] = -2.0 #
35 | out = random_patch.new(data=data)
36 | ax = out.viz.wiggle()
37 | # check labels
38 | assert random_patch.dims[0].lower() in ax.get_ylabel().lower()
39 | assert random_patch.dims[1].lower() in ax.get_xlabel().lower()
40 | assert isinstance(ax, plt.Axes)
41 |
42 | def test_shading(self, small_patch):
43 | """Ensure shading parameter works."""
44 | _ = small_patch.viz.wiggle(shade=True)
45 |
46 | def test_non_time_axis(self, random_patch):
47 | """Ensure another dimension works."""
48 | sub_patch = random_patch.select(time=(10, 20), samples=True)
49 | ax = sub_patch.viz.wiggle(dim="distance")
50 | assert "distance" in str(ax.get_xlabel())
51 | assert "time" in str(ax.get_ylabel())
52 |
53 | def test_show(self, random_patch, monkeypatch):
54 | """Ensure show path is callable."""
55 | monkeypatch.setattr(plt, "show", lambda: None)
56 | random_patch.viz.wiggle(show=True)
57 |
58 | def test_1d_patch(self, random_patch):
59 | """Test that wiggle works with 1D patches (issue #462)."""
60 | # Create a 1D patch by reducing one dimension
61 | patch_1d = random_patch.mean("distance", dim_reduce="squeeze")
62 | # This should work without raising an assertion error
63 | ax = patch_1d.viz.wiggle()
64 | assert isinstance(ax, plt.Axes)
65 | # The remaining dimension should be on the x-axis
66 | assert patch_1d.dims[0].lower() in ax.get_xlabel().lower()
67 |
68 | def test_1d_patch_show(self, random_patch, monkeypatch):
69 | """Test that show works with 1D patches (issue #462)."""
70 | monkeypatch.setattr(plt, "show", lambda: None)
71 | patch_1d = random_patch.mean("distance", dim_reduce="squeeze")
72 | patch_1d.viz.wiggle(show=True)
73 |
--------------------------------------------------------------------------------
/docs/tutorial/transformations.qmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: Transformations
3 | execute:
4 | warning: false
5 | ---
6 |
7 | In DASCore, transformations are operations which change the units of a patch. Transforms can be found in the [transform module](`dascore.transform`) or accessed as `Patch` methods.
8 |
9 | # Discrete Fourier Transforms
10 |
11 | The [Discrete Fourier Transform](https://en.wikipedia.org/wiki/Discrete_Fourier_transform) (dft) is commonly used in many signal processing workflows. DASCore implements this as the [dft](`dascore.transform.fourier.dft`) patch method.
12 |
13 | ```{python}
14 | import numpy as np
15 | import dascore as dc
16 |
17 | # Get example patch, set unit to velocity (for demonstration)
18 | patch = dc.get_example_patch().set_units("m/s")
19 |
20 | transformed = patch.dft(dim="time")
21 |
22 | # Note how the dimension name has changed
23 | print(f"old_dims: {patch.dims} new dims: {transformed.dims}")
24 |
25 | # As have the units
26 | old_units = patch.attrs.data_units
27 | new_units = transformed.attrs.data_units
28 | print(f"old units: {old_units}, new units: {new_units}")
29 | ```
30 |
31 | :::{.callout-note}
32 | The transformed dimension names change; "time" becomes "ft_time" indicating the domain of the dimension has changed. The units are also updated. See the [note on Fourier transforms in DASCore](../notes/dft_notes.qmd) for more details.
33 | :::
34 |
35 | In many cases, it is advantageous to only calculate the fourier transform corresponding to the positive frequencies (since the Fourier transform of a real signal is symmetric).
36 |
37 | ```{python}
38 | # Transform distance axis to Fourier domain using real fourier transform
39 | real_transform = patch.dft(dim='distance', real=True)
40 | print(real_transform.get_coord("ft_distance"))
41 | ```
42 |
43 | The Inverse Discrete Fourier Transform [idft](`dascore.transform.fourier.idft`) undoes the transformation.
44 |
45 | # Short Time Fourier Transform
46 |
47 | Related to the Discrete Fourier Transform, the [Short Term Fourier Transform](https://en.wikipedia.org/wiki/Short-time_Fourier_transform) is useful for analyzing the time-dependent frequency content. DASCore implements this as [stft](`dascore.transform.fourier.stft`) and the corresponding [istft](`dascore.transform.fourier.istft`).
48 |
49 | ```{python}
50 | import numpy as np
51 | import dascore as dc
52 | from dascore.units import second, percent
53 |
54 | # Get example patch, set unit to velocity (for demonstration)
55 | patch = (
56 | dc.get_example_patch("chirp", channel_count=3)
57 | .set_units("m/s")
58 | )
59 |
60 | patch.viz.waterfall();
61 | ```
62 |
63 | ```{python}
64 | # Perform the transform, visualize the amplitude spectra, for the first
65 | # channel then invert.
66 | transformed = (
67 | patch.stft(time=1*second, overlap=50*percent)
68 | )
69 | inverse = transformed.istft()
70 |
71 | transformed.abs().select(distance=0, samples=True).squeeze().viz.waterfall();
72 | ```
73 |
--------------------------------------------------------------------------------
/.github/workflows/test_doc_build.yml:
--------------------------------------------------------------------------------
1 | name: TestDocBuild
2 |
3 | on:
4 | pull_request:
5 | types: [labeled, synchronize]
6 |
7 | jobs:
8 | test_build_docs:
9 | if: |
10 | (github.event.action == 'labeled' && github.event.label.name == 'documentation')
11 | || (github.event.action == 'synchronize' && contains(github.event.pull_request.labels.*.name, 'documentation'))
12 | runs-on: ubuntu-latest
13 | steps:
14 | - uses: actions/checkout@v4
15 | with:
16 | fetch-tags: "true"
17 | fetch-depth: '0'
18 |
19 |
20 | - uses: ./.github/actions/mamba-install-dascore
21 | with:
22 | python-version: "3.12"
23 | environment-file: './.github/doc_environment.yml'
24 | cache-number: 1
25 |
26 | - uses: ./.github/actions/build-docs
27 |
28 | - uses: actions/upload-artifact@v4
29 | with:
30 | name: documentation_build_test
31 | path: ./docs/_site
32 | retention-days: 1
33 |
34 | - name: Generate documentation URL
35 | id: artifact_url
36 | env:
37 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
38 | run: |
39 | run_id="${{ github.run_id }}"
40 | artifact_name="documentation_build_test"
41 | repo="${{ github.repository }}"
42 |
43 | artifact_id=$(
44 | curl -s -H "Authorization: Bearer $GITHUB_TOKEN" \
45 | -H "X-GitHub-Api-Version: 2022-11-28" \
46 | "https://api.github.com/repos/${repo}/actions/runs/${run_id}/artifacts" |
47 | jq --arg NAME "$artifact_name" -r '.artifacts[] | select(.name==$NAME) | .id'
48 | )
49 |
50 | if [[ -z "$artifact_id" ]]; then
51 | echo "❌ Unable to find artefact \"$artifact_name\" for run $run_id" >&2
52 | exit 1
53 | fi
54 | echo "found=true" >> "$GITHUB_OUTPUT"
55 | echo "url=https://github.com/$repo/actions/runs/$run_id/artifacts/$artifact_id" >> "$GITHUB_OUTPUT"
56 |
57 | # Determine if the automated documentation comment exists.
58 | - name: Find Comment
59 | uses: peter-evans/find-comment@v3
60 | id: fc
61 | with:
62 | issue-number: ${{ github.event.pull_request.number }}
63 | comment-author: 'github-actions[bot]'
64 | body-includes: "✅ Documentation built:"
65 |
66 | # Create a comment with a download link to build docs
67 | - name: Create or update comment
68 | uses: peter-evans/create-or-update-comment@v4
69 | with:
70 | comment-id: ${{ steps.fc.outputs.comment-id }}
71 | issue-number: ${{ github.event.pull_request.number }}
72 | body: |
73 | ✅ Documentation built:
74 | 👉 [Download](${{ steps.artifact_url.outputs.url }})
75 | Note: You must be logged in to github and a DASDAE member to access the link.
76 | edit-mode: replace
77 |
--------------------------------------------------------------------------------
/tests/test_utils/test_mapping_utils.py:
--------------------------------------------------------------------------------
1 | """Simple tests for FrozenDict."""
2 |
3 | from __future__ import annotations
4 |
5 | from collections.abc import Mapping
6 |
7 | import pytest
8 |
9 | from dascore.utils.mapping import FrozenDict
10 |
11 |
12 | @pytest.fixture(scope="session")
13 | def frozen_dict():
14 | """Return an example frozen dict."""
15 | return FrozenDict({"bob": 1, "bill": 2})
16 |
17 |
18 | class TestFrozenDict:
19 | """Test chases for frozen dict."""
20 |
21 | def test_is_mapping(self, frozen_dict):
22 | """Frozen dict should follow mapping ABC."""
23 | assert isinstance(frozen_dict, Mapping)
24 |
25 | def test_init_on_dict(self):
26 | """Ensure a dict can be used to init frozendict."""
27 | out = FrozenDict({"bob": 1})
28 | assert isinstance(out, FrozenDict)
29 | assert "bob" in out
30 |
31 | def test_init_on_frozen_dict(self):
32 | """Ensure a frozen dict is valid input."""
33 | out = FrozenDict({"bob": 1})
34 | out2 = FrozenDict(out)
35 | assert out == out2
36 |
37 | def test_len(self, frozen_dict):
38 | """Ensure len works."""
39 | assert len(frozen_dict) == 2
40 |
41 | def test_contains(self, frozen_dict):
42 | """Ensure contains works."""
43 | assert "bob" in frozen_dict
44 | assert "bill" in frozen_dict
45 |
46 | def test_hash(self, frozen_dict):
47 | """A frozen dict should be a valid key in a dict/set."""
48 | out = {frozen_dict: 1}
49 | assert frozen_dict in out
50 |
51 | def test_init_on_keys(self):
52 | """Ensure dict can be inited with keys as well."""
53 | out = FrozenDict(bob=1, bill=2)
54 | assert isinstance(out, FrozenDict)
55 |
56 | def test_cant_add_keys(self, frozen_dict):
57 | """Ensure keys can't be added to the dict."""
58 | with pytest.raises(TypeError, match="not support item assignment"):
59 | frozen_dict["bob"] = 1
60 |
61 | with pytest.raises(TypeError, match="not support item assignment"):
62 | frozen_dict["new"] = 1
63 |
64 | def test_cant_mutate_original(self, frozen_dict):
65 | """
66 | Ensure the original dict can be changed and this does not affect frozen's
67 | contents.
68 | """
69 | original = {"one": 1, "two": 2}
70 | froz = FrozenDict(original)
71 | # test adding new key
72 | assert "three" not in froz
73 | original["three"] = 3
74 | assert "three" not in froz
75 | # test modifying existing key
76 | original["one"] = 11
77 | assert froz["one"] == 1
78 |
79 | def test_repr(self, frozen_dict):
80 | """Ensure repr returns a string"""
81 | repr_str = repr(frozen_dict)
82 | assert isinstance(repr_str, str)
83 |
84 | def test_new(self, frozen_dict):
85 | """Ensure new values can be added to the dict."""
86 | out = frozen_dict.new(bob=10)
87 | assert out["bob"] == 10
88 |
--------------------------------------------------------------------------------
/dascore/proc/wiener.py:
--------------------------------------------------------------------------------
1 | """
2 | Wiener filtering functionality for noise reduction.
3 | """
4 |
5 | from __future__ import annotations
6 |
7 | from scipy.signal import wiener
8 |
9 | from dascore.constants import PatchType
10 | from dascore.exceptions import ParameterError
11 | from dascore.utils.patch import get_patch_window_size, patch_function
12 |
13 |
14 | @patch_function()
15 | def wiener_filter(
16 | patch: PatchType,
17 | *,
18 | noise=None,
19 | samples=False,
20 | **kwargs,
21 | ):
22 | """
23 | Apply a Wiener filter to reduce noise in the patch data.
24 |
25 | The Wiener filter is an adaptive filter that reduces noise while preserving
26 | signal features. It estimates the local mean and variance within a sliding
27 | window and uses these statistics to determine the optimal filtering.
28 |
29 | Parameters
30 | ----------
31 | patch
32 | Input patch.
33 | noise
34 | The noise-power to use. If None, noise is estimated as the average
35 | of the local variance of the input.
36 | samples
37 | If True, values specified by kwargs are in samples not coordinate units.
38 | **kwargs
39 | Used to specify the window sizes for each dimension. Each selected
40 | dimension must be evenly sampled. It works best when the window samples
41 | are odd.
42 |
43 | Returns
44 | -------
45 | Patch with noise-reduced data.
46 |
47 | Examples
48 | --------
49 | >>> import numpy as np
50 | >>> import dascore as dc
51 | >>> # Get an example patch and add noise
52 | >>> patch = dc.get_example_patch()
53 | >>> noisy_data = patch.data + np.random.normal(0, 0.1, patch.data.shape)
54 | >>> noisy_patch = patch.update(data=noisy_data)
55 | >>>
56 | >>> # Apply Wiener filter along time dimension with 5-sample window
57 | >>> filtered = noisy_patch.wiener_filter(time=5, samples=True)
58 | >>> assert filtered.data.shape == patch.data.shape
59 | >>>
60 | >>> # Apply filter with custom noise parameter
61 | >>> filtered_custom = noisy_patch.wiener_filter(time=5, samples=True, noise=0.01)
62 | >>> assert isinstance(filtered_custom, dc.Patch)
63 | >>>
64 | >>> # Apply filter along multiple dimensions
65 | >>> filtered_2d = noisy_patch.wiener_filter(time=5, distance=3, samples=True)
66 | >>> assert filtered_2d.data.shape == patch.data.shape
67 |
68 | Notes
69 | -----
70 | This implementation uses scipy.signal.wiener which performs adaptive
71 | noise reduction based on local statistics within the specified window.
72 | """
73 | if not kwargs:
74 | msg = (
75 | "To use wiener_filter you must specify dimension-specific window "
76 | "sizes via kwargs (e.g., time=5, distance=3)"
77 | )
78 | raise ParameterError(msg)
79 |
80 | size = get_patch_window_size(patch, kwargs, samples, min_samples=1)
81 | filtered_data = wiener(patch.data, mysize=size, noise=noise)
82 | return patch.update(data=filtered_data)
83 |
--------------------------------------------------------------------------------
/dascore/utils/progress.py:
--------------------------------------------------------------------------------
1 | """Simple interface for progress markers."""
2 |
3 | from __future__ import annotations
4 |
5 | from collections.abc import Generator, Sized
6 | from contextlib import suppress
7 |
8 | import rich.progress as prog
9 |
10 | import dascore as dc
11 | from dascore.compat import Progress
12 | from dascore.constants import PROGRESS_LEVELS
13 |
14 |
15 | def get_progress_instance(progress: PROGRESS_LEVELS | Progress = "standard"):
16 | """
17 | Get the Rich progress bar instance based on complexity level.
18 | """
19 | # If a progress class is passed in, just use it.
20 | if isinstance(progress, Progress):
21 | return progress
22 | kwargs = {}
23 | progress_list = [
24 | prog.SpinnerColumn(),
25 | prog.TextColumn("[progress.description]{task.description}"),
26 | prog.BarColumn(bar_width=30),
27 | prog.TaskProgressColumn(),
28 | prog.TimeRemainingColumn(),
29 | prog.TimeElapsedColumn(),
30 | prog.MofNCompleteColumn(),
31 | ]
32 | if progress == "basic":
33 | # set the refresh rate very low and eliminate the spinner
34 | kwargs["refresh_per_second"] = 0.25
35 | progress_list = progress_list[1:]
36 | return Progress(*progress_list, **kwargs)
37 |
38 |
39 | def track(
40 | sequence: Sized | Generator,
41 | description: str,
42 | progress: PROGRESS_LEVELS | Progress = "standard",
43 | length: int | None = None,
44 | min_length: int = 1,
45 | ):
46 | """
47 | A simple iterator for tracking updates.
48 |
49 | Parameters
50 | ----------
51 | sequence
52 | A sequence or generator to trace the iteration over.
53 | description
54 | A string describing the operation
55 | progress
56 | options are
57 | None- disable progress bar,
58 | "basic" reduced refresh rate,
59 | "standard" - the normal progress bar
60 | can also accept a subclass of rich.progress.Progress.
61 | min_length
62 | The minimum length to emmit a progress bar.
63 | """
64 | # In the case of a generator we need to make sure this just exists
65 | guess_len = length if length is not None else 0
66 | with suppress(TypeError, ValueError):
67 | length = len(sequence) if not guess_len else guess_len
68 | if length < min_length:
69 | length = 0
70 | # This is a dirty hack to allow debugging while running tests.
71 | # Otherwise, pdb doesn't work in any tracking scope.
72 | # See: https://github.com/Textualize/rich/issues/1053
73 | if dc._debug or not length or progress is None:
74 | yield from sequence
75 | return
76 | update = 1.0 if isinstance(progress, str) and progress == "standard" else 5.0
77 | progress = get_progress_instance(progress)
78 | with progress:
79 | yield from progress.track(
80 | sequence,
81 | total=length or len(sequence),
82 | description=description,
83 | update_period=update,
84 | )
85 |
--------------------------------------------------------------------------------
/tests/test_io/test_rsf/test_rsf.py:
--------------------------------------------------------------------------------
1 | """Tests for RSF format."""
2 |
3 | import os
4 | from pathlib import Path
5 |
6 | import numpy as np
7 | import pytest
8 |
9 | import dascore as dc
10 | from dascore.io.rsf import RSFV1
11 |
12 |
13 | class TestRsfWrite:
14 | """testing the rSF write out function."""
15 |
16 | def test_write_nopath(self, random_patch, tmp_path):
17 | """
18 | Test write function with no binary path specified.
19 | Data and header are combined.
20 | """
21 | spool = dc.spool(random_patch)
22 | path = tmp_path / "test_hdrdata.rsf"
23 | RSFV1().write(spool, path)
24 |
25 | assert path.exists()
26 | test_data = random_patch.data.astype(np.float32)
27 | dtype = np.dtype(test_data.dtype)
28 | file_esize = dtype.itemsize
29 | datasize = test_data.size * file_esize
30 | assert os.path.getsize(path) >= datasize
31 |
32 | def test_write_path(self, random_patch, tmp_path):
33 | """Test write function with different binary data path specified."""
34 | spool = dc.spool(random_patch)
35 | path = tmp_path / "test_hdr.rsf"
36 | datapath = tmp_path / "binary/test_data.rsf"
37 | RSFV1().write(spool, path, data_path=datapath)
38 |
39 | assert path.exists()
40 | newdatapath = Path(str(datapath) + "@")
41 | assert newdatapath.exists()
42 | test_data = random_patch.data.astype(np.float32)
43 | dtype = np.dtype(test_data.dtype)
44 | file_esize = dtype.itemsize
45 | datasize = test_data.size * file_esize
46 | assert os.path.getsize(newdatapath) == datasize
47 |
48 | def test_write_cmplx(self, random_patch, tmp_path, **kwargs):
49 | """Test write function for non-int and non-float values.
50 | if should fail and return:
51 | ValueError("Data format is not integer or floating.").
52 | """
53 | complex_patch = random_patch.dft("time")
54 | spool = dc.spool(complex_patch)
55 | path = tmp_path / "test_hdrcmplx.rsf"
56 | with pytest.raises(ValueError):
57 | RSFV1().write(spool, path)
58 |
59 | def test_write_int(self, random_patch, tmp_path, **kwargs):
60 | """Test write function for int values.
61 | if should return float values of the integer numbers.
62 | """
63 | data = np.ones_like(random_patch.data, dtype=np.int32)
64 | int_patch = random_patch.new(data=data)
65 | spool = dc.spool(int_patch)
66 | path = tmp_path / "test_hdrint.rsf"
67 | datapath = tmp_path / "binary/test_int.rsf"
68 | RSFV1().write(spool, path, data_path=datapath)
69 |
70 | assert path.exists()
71 | newdatapath = Path(str(datapath) + "@")
72 | assert newdatapath.exists()
73 | test_data = data.astype(np.float32)
74 | dtype = np.dtype(test_data.dtype)
75 | file_esize = dtype.itemsize
76 | datasize = test_data.size * file_esize
77 | assert os.path.getsize(newdatapath) == datasize
78 |
--------------------------------------------------------------------------------
/dascore/io/dashdf5/utils.py:
--------------------------------------------------------------------------------
1 | """Utilities for terra15."""
2 |
3 | from __future__ import annotations
4 |
5 | import dascore as dc
6 | from dascore.core import get_coord
7 |
8 | # --- Getting format/version
9 |
10 | _REQUIRED_GROUPS = frozenset({"channel", "trace", "das", "t", "x", "y", "z"})
11 | _COORD_GROUPS = ("channel", "trace", "t", "x", "y", "z")
12 |
13 |
14 | # maps attributes on DAS group to attrs stored in patch.
15 | _ROOT_ATTR_MAPPING = {"project": "project"}
16 | _DAS_ATTR_MAPPING = {"long_name": "data_type"}
17 | _CRS_MAPPING = {"epsg_code": "epsg_code"}
18 |
19 |
20 | def _get_cf_version_str(hdf_fi) -> str | bool:
21 | """Return the version string for dashdf5 files."""
22 | conventions = hdf_fi.attrs.get("Conventions", [])
23 | cf_str = [x for x in conventions if x.startswith("CF-")]
24 | das_hdf_str = [x for x in conventions if x.startswith("DAS-HDF5")]
25 | has_req_groups = _REQUIRED_GROUPS.issubset(set(hdf_fi))
26 | # if CF convention not found or not all
27 | if len(cf_str) == 0 or len(das_hdf_str) == 0 or not has_req_groups:
28 | return False
29 | return das_hdf_str[0].replace("DAS-HDF5-", "")
30 |
31 |
32 | def _get_cf_coords(hdf_fi, minimal=False) -> dc.core.CoordManager:
33 | """
34 | Get a coordinate manager of full file range.
35 |
36 | Parameters
37 | ----------
38 | minimal
39 | If True, only return queryable parameters.
40 |
41 | """
42 |
43 | def _get_spatialcoord(hdf_fi, code):
44 | """Get spatial coord."""
45 | return get_coord(
46 | data=hdf_fi[code],
47 | units=hdf_fi[code].attrs["units"],
48 | )
49 |
50 | coords_map = {
51 | "channel": get_coord(data=hdf_fi["channel"][:]),
52 | "trace": get_coord(data=hdf_fi["trace"][:]),
53 | "time": get_coord(data=dc.to_datetime64(hdf_fi["t"][:])),
54 | "x": _get_spatialcoord(hdf_fi, "x"),
55 | "y": _get_spatialcoord(hdf_fi, "y"),
56 | "z": _get_spatialcoord(hdf_fi, "z"),
57 | }
58 | dim_map = {
59 | "time": ("time",),
60 | "trace": ("time",),
61 | "channel": ("channel",),
62 | "x": ("channel",),
63 | "y": ("channel",),
64 | "z": ("channel",),
65 | }
66 | dims = ("channel", "time")
67 | cm = dc.core.CoordManager(
68 | coord_map=coords_map,
69 | dim_map=dim_map,
70 | dims=dims,
71 | )
72 | # a bit of a hack to make sure data and coords align.
73 | if cm.shape != hdf_fi["das"].shape:
74 | cm = cm.transpose()
75 | return cm
76 |
77 |
78 | def _get_cf_attrs(hdf_fi, coords=None, extras=None):
79 | """Get attributes for CF file."""
80 | out = {"coords": coords or _get_cf_coords(hdf_fi)}
81 | out.update(extras or {})
82 | for n1, n2 in _ROOT_ATTR_MAPPING.items():
83 | out[n1] = hdf_fi.attrs.get(n2)
84 | for n1, n2 in _DAS_ATTR_MAPPING.items():
85 | out[n1] = getattr(hdf_fi.get("das", {}), "attrs", {}).get(n2)
86 | for n1, n2 in _CRS_MAPPING.items():
87 | out[n1] = getattr(hdf_fi.get("crs", {}), "attrs", {}).get(n2)
88 | return dc.PatchAttrs(**out)
89 |
--------------------------------------------------------------------------------
/dascore/io/febus/core.py:
--------------------------------------------------------------------------------
1 | """
2 | IO module for reading Febus data.
3 | """
4 |
5 | from __future__ import annotations
6 |
7 | import numpy as np
8 |
9 | import dascore as dc
10 | from dascore.constants import opt_timeable_types
11 | from dascore.io import FiberIO
12 | from dascore.utils.hdf5 import H5Reader
13 | from dascore.utils.models import UTF8Str
14 |
15 | from .utils import (
16 | _get_febus_version_str,
17 | _read_febus,
18 | _yield_attrs_coords,
19 | )
20 |
21 |
22 | class FebusPatchAttrs(dc.PatchAttrs):
23 | """
24 | Patch attrs for febus.
25 |
26 | Attributes
27 | ----------
28 | source
29 | The source designation
30 | zone
31 | The zone designations
32 | """
33 |
34 | gauge_length: float = np.nan
35 | gauge_length_units: str = "m"
36 | pulse_width: float = np.nan
37 | pulse_width_units: str = "m"
38 |
39 | group: str = ""
40 | source: str = ""
41 | zone: str = ""
42 |
43 | folog_a1_software_version: UTF8Str = ""
44 |
45 |
46 | class Febus2(FiberIO):
47 | """Support for Febus V 2.
48 |
49 | This should cover all versions 2.* of the format (maybe).
50 | """
51 |
52 | name = "febus"
53 | preferred_extensions = ("hdf5", "h5")
54 | version = "2"
55 |
56 | def get_format(self, resource: H5Reader, **kwargs) -> tuple[str, str] | bool:
57 | """
58 | Return True if file contains febus version 8 data else False.
59 |
60 | Parameters
61 | ----------
62 | resource
63 | A path to the file which may contain terra15 data.
64 | """
65 | version_str = _get_febus_version_str(resource)
66 | if version_str:
67 | return self.name, version_str
68 |
69 | def scan(self, resource: H5Reader, **kwargs) -> list[dc.PatchAttrs]:
70 | """Scan a febus file, return summary information about the file's contents."""
71 | out = []
72 | file_version = _get_febus_version_str(resource)
73 | extras = {
74 | "path": resource.filename,
75 | "file_format": self.name,
76 | "file_version": str(file_version),
77 | }
78 | for attr, cm, _ in _yield_attrs_coords(resource):
79 | attr["coords"] = cm.to_summary_dict()
80 | attr.update(dict(extras))
81 | out.append(FebusPatchAttrs(**attr))
82 | return out
83 |
84 | def read(
85 | self,
86 | resource: H5Reader,
87 | time: tuple[opt_timeable_types, opt_timeable_types] | None = None,
88 | distance: tuple[float | None, float | None] | None = None,
89 | **kwargs,
90 | ) -> dc.BaseSpool:
91 | """Read a febus spool of patches."""
92 | patches = _read_febus(
93 | resource, time=time, distance=distance, attr_cls=FebusPatchAttrs
94 | )
95 | return dc.spool(patches)
96 |
97 |
98 | class Febus1(Febus2):
99 | """Support for Febus V 1.
100 |
101 | This is here to support legacy Febus (eg pubdas Valencia)
102 | """
103 |
104 | version = "1"
105 |
--------------------------------------------------------------------------------
/docs/tutorial/file_io.qmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: Working with Files
3 | execute:
4 | warning: false
5 | ---
6 |
7 | The following highlights some DASCore features for working with IO.
8 |
9 |
10 | ## Writing Patches to Disk
11 |
12 | Patches can be written to disk using the `io` namespace. The following shows how to write a Patch to disk in the [DASDAE format](`dascore.io.dasdae`)
13 |
14 | ```{python}
15 | from pathlib import Path
16 | import dascore as dc
17 |
18 | write_path = Path("output_file.h5")
19 | patch = dc.get_example_patch()
20 |
21 | patch.io.write(write_path, "dasdae")
22 | ```
23 |
24 | ```{python}
25 | #| echo: false
26 | if write_path.exists():
27 | write_path.unlink()
28 | ```
29 |
30 | ## DirectorySpool
31 |
32 | The [DirectorySpool](`dascore.clients.dirspool.DirectorySpool`) is used to retrieve data from a directory of dascore-readable files. It has the same interface as other spools and is created with the [`dascore.spool`](`dascore.spool`) function.
33 |
34 | For example:
35 |
36 | ```{python}
37 | #| output: false
38 |
39 | import dascore
40 | from dascore import examples as ex
41 |
42 | # Get a directory with several files
43 | diverse_spool = dascore.get_example_spool('diverse_das')
44 | path = ex.spool_to_directory(diverse_spool)
45 |
46 | # Create a spool for interacting with the files in the directory.
47 | spool = (
48 | dascore.spool(path)
49 | .select(network='das2') # sub-select das2 network
50 | .select(time=(None, '2022-01-01')) # unselect anything after 2022
51 | .chunk(time=2, overlap=0.5) # change the chunking of the patches
52 | )
53 |
54 | # Iterate each patch and do something with it
55 | for patch in spool:
56 | ...
57 | ```
58 |
59 | ## Converting Patches to Other Library Formats
60 |
61 | The `Patch.io` namespace also includes functionality for converting `Patch` instances to datastructures used by other libraries including Pandas, Xarray, and ObsPy. See the [external conversion recipe](../recipes/external_conversion.qmd) for examples.
62 |
63 |
64 | ## Directory Indexer
65 | The 'DirectoryIndexer' is used to track the contents of a directory which
66 | contains fiber data. It creates a small, hidden HDF index file at the top
67 | of the directory which can be efficiently queried for directory contents
68 | (it is used internally by the `DirectorySpool`).
69 |
70 |
71 | ```{python}
72 | #| output: false
73 | import dascore
74 | from dascore.io.indexer import DirectoryIndexer
75 | from dascore import examples as ex
76 |
77 | # Get a directory with several files
78 | diverse_spool = dascore.get_example_spool('diverse_das')
79 | path = ex.spool_to_directory(diverse_spool)
80 |
81 | # Create an indexer and update the index. This will include any new files
82 | # with timestamps newer than the last update, or create a new HDF index file
83 | # if one does not yet exist.
84 | indexer = DirectoryIndexer(path).update()
85 |
86 | # get the contents of the directory's files
87 | df = indexer.get_contents()
88 |
89 | # This dataframe can be used to ascertain data availability, detect gaps, etc.
90 | ```
91 |
92 | ```{python}
93 | #| echo: false
94 |
95 | from IPython.display import display
96 |
97 | display(df.head())
98 | ```
99 |
--------------------------------------------------------------------------------
/docs/recipes/parallelization.qmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Parallel Processing"
3 | execute:
4 | eval: false
5 | ---
6 |
7 | This recipe shows a few strategies to parallelize "embarrassingly parallel" spool processing workflows.
8 |
9 | # Processes and Threads
10 | [dascore.Spool.map](`dascore.BaseSpool.map`) is the easiest way to process patches in a spool in parallel. Here is an example using the Python standard library module [concurrent.futures](https://docs.python.org/3/library/concurrent.futures.html):
11 |
12 | ```{python}
13 | from concurrent.futures import ProcessPoolExecutor
14 |
15 | import dascore as dc
16 |
17 | def my_patch_processing_function(patch):
18 | """A custom function for processing patches."""
19 | ...
20 |
21 | spool = dc.get_example_spool("random_das")
22 |
23 | executor = ProcessPoolExecutor()
24 |
25 | spool.map(my_patch_processing_function, client=executor)
26 | ```
27 |
28 | The `ThreadPoolExecutor` from the same module will also work, but due to python's GIL may not provide much of a speed-up.
29 |
30 | There are two downsides to this approach. First, if the patches aren't chunked adequately it may exhaust the available memory. Second, it will only work on a single machine. The next section presents a more scalable option.
31 |
32 | # MPI4Py
33 |
34 | This section shows how to use the "mpi4py" library to parallelize dascore code.
35 |
36 | ## Installation
37 |
38 | First, make sure you have installed DASCore on your machine. See [DASCore Installation](https://dascore.org/#:~:text=0.2%29%3B-,Installation). Secondly, you need to properly install the mpi4py library. After installing and loading the [Open MPI](https://docs.open-mpi.org/en/v5.0.x/installing-open-mpi/quickstart.html) module on your machine (e.g., on Linux: `load module to/mpi/openmpi/gcc/compiler/path`), install [mpi4py](https://pypi.org/project/mpi4py/). It might be easier to install using conda-forge as below:
39 |
40 | ```bash
41 | conda install -c conda-forge mpi4py openmpi
42 | ```
43 |
44 | Please note that this procedure is tested for Python 3.11 and Open MPI GCC 3.1.3
45 |
46 |
47 | ## Parallel script
48 |
49 | Here is an example for parallelization of Patches over processors:
50 |
51 | ```{.python filename="mpi_spool.py"}
52 | #| execute: false
53 |
54 | import sys
55 |
56 | import dascore as dc
57 |
58 | from mpi4py import MPI
59 |
60 |
61 | # Load the spool
62 | spool = dc.get_example_spool("random_das")
63 |
64 | # Initiate MPI
65 | comm = MPI.COMM_WORLD
66 | rank = comm.Get_rank()
67 | size = comm.Get_size()
68 |
69 | # Check the spool on the first processor
70 | if len(spool)<1:
71 | if rank==0:
72 | raise ValueError('No Patch of data found within the spool.')
73 | else:
74 | sys.exit(1)
75 |
76 | for i in range(rank, len(spool), size):
77 | patch = spool[i]
78 | print(f"rank: {rank}, patch number: {i}, patch: {patch}")
79 | ...
80 |
81 | comm.barrier()
82 | sys.exit(0)
83 | ```
84 |
85 | ## Run the script
86 |
87 | If you like to run the `mpi_spool.py` script using `n = 4` processors (which means each processor will run the script separately), you can use:
88 |
89 | ```bash
90 | mpiexec -n 4 python mpi_spool.py
91 | ```
92 |
93 | or
94 |
95 | ```bash
96 | mpirun -n 4 python mpi_spool.py
97 | ```
98 |
--------------------------------------------------------------------------------
/tests/test_utils/test_doc_utils.py:
--------------------------------------------------------------------------------
1 | """Tests for docstring utils."""
2 |
3 | from __future__ import annotations
4 |
5 | import textwrap
6 |
7 | import pandas as pd
8 |
9 | from dascore.core.attrs import PatchAttrs
10 | from dascore.examples import EXAMPLE_PATCHES
11 | from dascore.utils.docs import compose_docstring, format_dtypes, objs_to_doc_df
12 |
13 |
14 | class TestFormatDtypes:
15 | """Tests for formatting datatypes to display in docstrings."""
16 |
17 | def test_formatting(self):
18 | """Test for formatting StationDtypes."""
19 | out = format_dtypes(PatchAttrs.__annotations__)
20 | assert isinstance(out, str)
21 |
22 |
23 | class TestDocsting:
24 | """tests for obsplus' simple docstring substitution function."""
25 |
26 | def count_white_space(self, some_str):
27 | """Count the number of whitespace chars in a str."""
28 | return len(some_str) - len(some_str.lstrip(" "))
29 |
30 | def test_docstring(self):
31 | """Ensure docstrings can be composed with the docstring decorator."""
32 | params = textwrap.dedent(
33 | """
34 | Parameters
35 | ----------
36 | a
37 | a
38 | b
39 | b
40 | """
41 | )
42 |
43 | @compose_docstring(params=params)
44 | def testfun1():
45 | """
46 | A simple test function.
47 |
48 | {params}
49 | """
50 |
51 | assert "Parameters" in testfun1.__doc__
52 | line = next(x for x in testfun1.__doc__.split("\n") if "Parameters" in x)
53 | base_spaces = line.split("Parameters")[0]
54 | # py3.13+ automatically strips white space from docstrings so 12
55 | # and 0 are valid lengths.
56 | assert len(base_spaces) in {12, 0}
57 |
58 | def test_list_indent(self):
59 | """Ensure lists are indented equally."""
60 | str_list = ["Hey", "who", "moved", "my", "cheese!?"]
61 |
62 | @compose_docstring(params=str_list)
63 | def dummy_func():
64 | """
65 | Some useful information indeed:
66 | {params}.
67 | """
68 |
69 | doc_str_list = dummy_func.__doc__.split("\n")
70 | # the number of spaces between each list element should be the same.
71 | list_lines = doc_str_list[2:-1]
72 | white_space_counts = [self.count_white_space(x) for x in list_lines]
73 | # all whitespace counts should be the same for the list lines.
74 | assert len(set(white_space_counts)) == 1
75 |
76 |
77 | class TestObjToDocDF:
78 | """Tests for generating documentation dataframes."""
79 |
80 | def test_examples_cross_ref(self):
81 | """Tests for documenting examples with cross references."""
82 | df = objs_to_doc_df(EXAMPLE_PATCHES, cross_reference=True)
83 | assert "(`dascore.examples" in df["Name"].iloc[0]
84 | assert isinstance(df, pd.DataFrame)
85 |
86 | def test_example_no_cross_ref(self):
87 | """Tests for documenting examples without cross references."""
88 | df = objs_to_doc_df(EXAMPLE_PATCHES, cross_reference=False)
89 | assert "(`dascore.examples" not in df["Name"].iloc[0]
90 | assert isinstance(df, pd.DataFrame)
91 |
--------------------------------------------------------------------------------
/tests/test_transform/test_spectro_transform.py:
--------------------------------------------------------------------------------
1 | """Tests for the spectrogram transformation."""
2 |
3 | from __future__ import annotations
4 |
5 | import pytest
6 |
7 | import dascore as dc
8 | from dascore.transform.spectro import spectrogram
9 |
10 |
11 | class TestSpectroTransform:
12 | """Tests for transforming regular patches into spectrograms."""
13 |
14 | @pytest.fixture()
15 | def spec_patch(self, random_patch):
16 | """Simple patch trasnformed to spectrogram."""
17 | patch = random_patch.set_units("m/s")
18 | return patch.spectrogram("time")
19 |
20 | def test_units(self, random_patch):
21 | """Ensure units were properly converted."""
22 | patch = random_patch.set_units("m/s")
23 | spec_patch = patch.spectrogram("time")
24 | # first check coord units
25 | coord1 = patch.get_coord("time")
26 | coord2 = spec_patch.get_coord("ft_time")
27 | units1 = dc.get_quantity(coord1.units)
28 | units2 = dc.get_quantity(coord2.units)
29 | assert units1 == 1 / units2
30 | # then check data units
31 | data_units1 = dc.get_quantity(patch.attrs.data_units)
32 | data_units2 = dc.get_quantity(spec_patch.attrs.data_units)
33 | assert data_units1 * units1 == data_units2
34 |
35 | def test_spec_patch_dimensions(self, spec_patch, random_patch):
36 | """Ensure expected dimensions now exist."""
37 | dims = spec_patch.dims
38 | # dims should have been added
39 | assert len(dims) > len(random_patch.dims)
40 | assert set(dims) == (set(random_patch.dims) | {"ft_time"})
41 |
42 | def test_transformed_coord(self, spec_patch, random_patch):
43 | """
44 | The start values of transformed dimension should be comparable and the
45 | units unchanged.
46 | """
47 | time_1 = random_patch.get_coord("time")
48 | time_2 = spec_patch.get_coord("time")
49 | assert time_1.units == time_2.units
50 |
51 | def test_time_first(self, random_patch):
52 | """Ensure the spectrogram still works when time dim is first."""
53 | transposed = random_patch.transpose(*("time", "distance"))
54 | out = spectrogram(transposed, dim="time")
55 | assert isinstance(out, dc.Patch)
56 |
57 | def test_transpose(self, random_patch):
58 | """Ensure when patch dim order is different it still works."""
59 | out = random_patch.transpose().spectrogram("time")
60 | assert set(out.dims) == (set(random_patch.dims) | {"ft_time"})
61 |
62 | def test_distance_dim(self, random_patch):
63 | """Ensure distance dimension works."""
64 | out = random_patch.transpose().spectrogram("distance")
65 | assert set(out.dims) == (set(random_patch.dims) | {"ft_distance"})
66 |
67 | def test_time_range_unchanged(self, dispersion_patch):
68 | """Ensure time axis isn't outside original bounds, see #286."""
69 | spectro = dispersion_patch.spectrogram(dim="time")
70 | # the new time on the spectrogram should be contained in the original
71 | original_time = dispersion_patch.get_coord("time")
72 | new_time = spectro.get_coord("time")
73 | assert new_time.min() >= original_time.min()
74 | assert new_time.max() <= original_time.max()
75 |
--------------------------------------------------------------------------------
/docs/contributing/adding_test_data.qmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: Adding Test Data
3 | ---
4 |
5 | There are a few different way to add test data to dascore. The key, however, is to ensure test files and generated patches are small (a few mb at most) so the documentation and test suite still run quickly.
6 |
7 | # Adding functions which create example data
8 |
9 | The [examples module](`dascore.examples`) contains several functions for creating example `Patch` and `Spool` instances. You can add a new function in that module which creates a new patch or spool, then just register the function so it can be called from `dc.get_example_patch` or `dc.get_example_spool`. These should be simple objects which can be generated within python. If you need to download a file see
10 | [adding a data file](#adding_a_data_file).
11 |
12 | :::{.callout-note}
13 | All example functions should have either no arguments or keyword arguments with
14 | default values.
15 | :::
16 |
17 | ```{python, filename="dascore.examples.py"}
18 | #| eval: false
19 |
20 | # Register an example patch function
21 | @register_func(EXAMPLE_PATCHES, key="new_das_patch")
22 | def create_example_patch(argument_1='default_value'):
23 | ...
24 |
25 | # Register an example spool function
26 | @register_func(EXAMPLE_SPOOLS, key="new_das_spool")
27 | def create_example_spool(another_value=None):
28 | ...
29 | ```
30 |
31 | The new example patches/spools can then be created via
32 |
33 | ```{python}
34 | #| eval: false
35 | import dascore as dc
36 |
37 | patch_example = dc.get_example_patch("new_das_patch", argument_1="bob")
38 |
39 | spool_example = dc.get_example_spool("new_das_spool")
40 | ```
41 |
42 | If, in the test code, the example patch or spool is used only once, just call the get_example function in the test. If it is needed multiple times, consider putting it in a fixture. See [testing](./testing.qmd) for more on fixtures.
43 |
44 | # Adding a data file
45 |
46 | Of course, not all data can easily be generated in python. For example, testing [support for new file formats](./new_format.qmd) typically requires a test file.
47 |
48 | If you have a small file that isn't already hosted on a permanent site, you can put it into [dasdae's data repo](https://github.com/DASDAE/test_data). Simply clone the repo, add you file format, and push back to master or open a PR on a separate branch and someone will merge it.
49 |
50 | Next, add your file to dascore's data registry (dascore/data_registry.txt).
51 | You will have to get the sha256 hash of your test file, for that you can simply use [Pooch's hash_file function](https://www.fatiando.org/pooch/latest/api/generated/pooch.file_hash.html), and you can create the proper download url using the other entries as examples.
52 |
53 | The name, hash, and url might look something like this:
54 | ```
55 | jingle_test_file.jgl
56 | 12e087d2c1cd08c9afd18334e17e21787be0b646151b39802541ee11a516976a
57 | https://github.com/dasdae/test_data/raw/master/das/jingle_test_file.jgl
58 | ```
59 |
60 | ```{python}
61 | #| eval: false
62 | from dascore.utils.downloader import fetch
63 | path = fetch("jingle_test_file.jgl")
64 | ```
65 |
66 | If you need to create a smaller version of an existing hdf5 file you can use the `modify_h5_file.py` in DASCore's scripts directory. It will require some modifications, but shows how to copy and modify datasets and attributes.
67 |
--------------------------------------------------------------------------------
/dascore/io/xml_binary/core.py:
--------------------------------------------------------------------------------
1 | """IO module for reading binary raw format DAS data."""
2 |
3 | from __future__ import annotations
4 |
5 | from pathlib import Path
6 | from xml.etree.ElementTree import ParseError
7 |
8 | import numpy as np
9 | from pydantic import ValidationError
10 |
11 | import dascore as dc
12 | from dascore.io import FiberIO
13 | from dascore.utils.models import UTF8Str
14 |
15 | from .utils import _load_patches, _paths_to_attrs, _read_xml_metadata
16 |
17 |
18 | class BinaryPatchAttrs(dc.PatchAttrs):
19 | """Patch attrs for Binary."""
20 |
21 | pulse_width_ns: float = np.nan
22 | gauge_length: float = np.nan
23 | instrument_id: UTF8Str = ""
24 | distance_units: UTF8Str = ""
25 | zone_name: UTF8Str = ""
26 |
27 |
28 | class XMLBinaryV1(FiberIO):
29 | """Support for binary data format with xml metadata."""
30 |
31 | name = "XMLBinary"
32 | version = "1"
33 | input_type = "directory"
34 |
35 | _metadata_name = "metadata.xml"
36 | # File extension for data files.
37 | _data_extension = ".raw"
38 |
39 | def scan(self, resource, timestamp=None, **kwargs) -> list[dc.PatchAttrs]:
40 | """Scan the contents of the directory."""
41 | path = Path(resource)
42 | metadata = _read_xml_metadata(path / self._metadata_name)
43 | data_files = list(path.glob(f"*{self._data_extension}"))
44 | extra_attrs = {
45 | "file_version": self.version,
46 | "file_format": self.name,
47 | }
48 | # Need to update time
49 | attrs = _paths_to_attrs(
50 | data_files,
51 | metadata,
52 | timestamp=timestamp,
53 | attr_cls=BinaryPatchAttrs,
54 | extra_attrs=extra_attrs,
55 | )
56 | return attrs
57 |
58 | def read(self, resource, time=None, distance=None, **kwargs) -> dc.BaseSpool:
59 | """
60 | Load data from the directory structure.
61 |
62 | Parameters
63 | ----------
64 | resource
65 | A directory, path to the index file, or path to a data file.
66 | time
67 | Parameters for filtering by time.
68 | distance
69 | Parameters for filtering by distance.
70 | **kwargs
71 | Extra keyword arguments are ignored.
72 | """
73 | path = Path(resource)
74 | base_path = path if path.is_dir() else path.parent
75 | meta_data = _read_xml_metadata(base_path / self._metadata_name)
76 | if path.is_dir():
77 | path = list(path.glob(f"*{self._data_extension}"))
78 | # Determine if this is a single file or all of them.
79 | patches = _load_patches(
80 | path,
81 | meta_data,
82 | time=time,
83 | distance=distance,
84 | attr_cls=BinaryPatchAttrs,
85 | )
86 | return dc.spool(patches)
87 |
88 | def get_format(self, resource, **kwargs) -> tuple[str, str] | bool:
89 | """Determine if directory is an XML Binary type."""
90 | path = Path(resource)
91 | index_path = path / self._metadata_name
92 | if not index_path.exists():
93 | return False
94 | try:
95 | _ = _read_xml_metadata(index_path)
96 | except (ParseError, TypeError, IndexError, ValidationError):
97 | return False
98 | return self.name, self.version
99 |
--------------------------------------------------------------------------------
/dascore/io/optodas/utils.py:
--------------------------------------------------------------------------------
1 | """Utilities for OptoDAS."""
2 |
3 | from __future__ import annotations
4 |
5 | import dascore as dc
6 | import dascore.core
7 | from dascore.core.coords import get_coord
8 | from dascore.utils.hdf5 import unpack_scalar_h5_dataset
9 | from dascore.utils.misc import unbyte
10 |
11 | # --- Getting format/version
12 |
13 |
14 | def _get_opto_das_version_str(hdf_fi) -> str:
15 | """Return the version string for OptoDAS file."""
16 | # define a few root attrs that act as a "fingerprint"
17 | expected_attrs = (
18 | "acqSpec",
19 | "header",
20 | "cableSpec",
21 | "data",
22 | "fileVersion",
23 | )
24 | if not all([x in hdf_fi for x in expected_attrs]):
25 | return ""
26 | version_str = str(unbyte(hdf_fi["fileVersion"][()]))
27 | return version_str
28 |
29 |
30 | def _get_coord_manager(fi):
31 | """Get the distance ranges and spacing."""
32 | header = fi["header"]
33 | dims = tuple(unbyte(x) for x in header["dimensionNames"])
34 | units = tuple(unbyte(x) for x in header["dimensionUnits"])
35 | coords = {}
36 | for index, (dim, unit) in enumerate(zip(dims, units)):
37 | crange = header["dimensionRanges"][f"dimension{index}"]
38 | step = unpack_scalar_h5_dataset(crange["unitScale"])
39 |
40 | # Special case for time.
41 | if dim == "time":
42 | step = dc.to_timedelta64(step)
43 | t1 = dc.to_datetime64(unpack_scalar_h5_dataset(header["time"]))
44 | start = t1 + unpack_scalar_h5_dataset(crange["min"]) * step
45 | stop = t1 + (unpack_scalar_h5_dataset(crange["max"]) + 1) * step
46 | coord = get_coord(min=start, max=stop, step=step, units=unit)
47 | else: # and distance
48 | # The channels are ints so we multiply by step to get distance.
49 | distance = fi["/header/channels"][:] * step
50 | coord = get_coord(values=distance)
51 | coords[dim] = coord
52 | out = dascore.core.get_coord_manager(coords=coords, dims=dims)
53 | return out
54 |
55 |
56 | def _get_attr_dict(header):
57 | """Map header info to DAS attrs."""
58 | attr_map = {
59 | "gaugeLength": "gauge_length",
60 | "unit": "data_units",
61 | "instrument": "instrument_id",
62 | "experiment": "acquisition_id",
63 | }
64 | out = {"data_category": "DAS"}
65 | for head_name, attr_name in attr_map.items():
66 | value = header[head_name]
67 | if hasattr(value, "shape"):
68 | value = unpack_scalar_h5_dataset(value)
69 | out[attr_name] = unbyte(value)
70 | return out
71 |
72 |
73 | def _get_opto_das_attrs(fi) -> dict:
74 | """Scan a OptoDAS file, return metadata."""
75 | cm = _get_coord_manager(fi)
76 | attrs = _get_attr_dict(fi["header"])
77 | attrs["coords"] = cm
78 | return attrs
79 |
80 |
81 | def _read_opto_das(fi, distance=None, time=None, attr_cls=dc.PatchAttrs):
82 | """Read the OptoDAS values into a patch."""
83 | attrs = _get_opto_das_attrs(fi)
84 | data_node = fi["data"]
85 | coords = attrs.pop("coords")
86 | cm, data = coords.select(array=data_node, distance=distance, time=time)
87 | if not data.size:
88 | return []
89 | attrs["coords"] = cm.to_summary_dict()
90 | attrs["dims"] = cm.dims
91 | return [dc.Patch(data=data, coords=cm, attrs=attr_cls(**attrs))]
92 |
--------------------------------------------------------------------------------
/tests/test_utils/test_jit.py:
--------------------------------------------------------------------------------
1 | """
2 | Tests for applying just in time compilations.
3 | """
4 |
5 | from functools import cache
6 |
7 | import numpy as np
8 | import pytest
9 |
10 | from dascore.utils.jit import maybe_numba_jit
11 | from dascore.utils.misc import optional_import, suppress_warnings
12 |
13 |
14 | class TestMaybeNumbaJit:
15 | """Tests for optional jit'ing."""
16 |
17 | def test_jit(self):
18 | """Ensure the jit works. Only test if numba installed."""
19 | pytest.importorskip("numba")
20 |
21 | @maybe_numba_jit
22 | def my_jit(ar):
23 | return ar
24 |
25 | ar = np.array([1, 2, 3])
26 | new_ar = my_jit(ar)
27 | assert np.all(new_ar == ar)
28 |
29 | def test_warning(self):
30 | """When numba is not installed ensure a warning is issued."""
31 |
32 | @maybe_numba_jit(_missing_numba=True)
33 | def _jit_test_func(ar):
34 | return ar
35 |
36 | match = "can be compiled to improve performance"
37 | with pytest.warns(UserWarning, match=match):
38 | _jit_test_func(np.array([1, 2, 3]))
39 |
40 | def test_raises(self):
41 | """Ensure an error is raised when specified by the decorator."""
42 |
43 | @maybe_numba_jit(required=True, _missing_numba=True)
44 | def _jit_test_func(ar):
45 | return ar
46 |
47 | match = "requires python module"
48 | with pytest.raises(ImportError, match=match):
49 | _jit_test_func(np.array([1, 2, 3]))
50 |
51 | def test_example(self):
52 | """Test docstring examples."""
53 | pytest.importorskip("numba")
54 |
55 | @maybe_numba_jit(nopython=True, nogil=True)
56 | def _jit_func(array):
57 | return array
58 |
59 | @cache
60 | def jit_wrapper():
61 | numba = optional_import("numba")
62 |
63 | @maybe_numba_jit
64 | def jit_func(array):
65 | for a in numba.prange(len(array)):
66 | pass
67 | return array
68 |
69 | return jit_func
70 |
71 | out = jit_wrapper()(np.array([1, 2, 3]))
72 | assert isinstance(out, np.ndarray)
73 |
74 | def test_numba_used_in_function(self):
75 | """Tests for numba used in function without being imported."""
76 | pytest.importorskip("numba")
77 |
78 | @maybe_numba_jit(nopython=True, nogil=True)
79 | def _my_jit(array):
80 | for sub in numba.prange(len(array)): # noqa
81 | pass
82 | return array
83 |
84 | array = np.array([1, 2, 3])
85 | out = _my_jit(array)
86 | assert np.all(out == array)
87 |
88 | def test_prange_no_numba(self):
89 | """
90 | In order to make the doctests work, we had to implement a dummy
91 | numba module. It doesn't support everything, (barely anything) but
92 | should be enough for now.
93 | """
94 |
95 | @maybe_numba_jit(_missing_numba=True)
96 | def _my_jit(array):
97 | for sub in numba.prange(len(array)): # noqa
98 | pass
99 | return array
100 |
101 | array = np.array([1, 2, 3])
102 |
103 | with suppress_warnings(UserWarning):
104 | out = _my_jit(array)
105 | assert np.all(out == array)
106 |
--------------------------------------------------------------------------------