├── .github
    └── workflows
    │   └── workflow.yml
├── .gitignore
├── CONTRIBUTING.md
├── CONTRIBUTORS.txt
├── LICENSE
├── README.md
├── daspy
    ├── CONTRIBUTORS.txt
    ├── __init__.py
    ├── advanced_tools
    │   ├── __init__.py
    │   ├── channel.py
    │   ├── decomposition.py
    │   ├── denoising.py
    │   ├── fdct.py
    │   └── strain2vel.py
    ├── basic_tools
    │   ├── __init__.py
    │   ├── filter.py
    │   ├── freqattributes.py
    │   ├── preprocessing.py
    │   └── visualization.py
    └── core
    │   ├── __init__.py
    │   ├── collection.py
    │   ├── dasdatetime.py
    │   ├── example.pkl
    │   ├── read.py
    │   ├── section.py
    │   └── write.py
├── document
    ├── Ridgecrest_traffic_noise.mat
    └── example.ipynb
├── setup.py
└── website
    ├── logo.png
    └── waveform.png


/.github/workflows/workflow.yml:
--------------------------------------------------------------------------------
  1 | name: Publish Python 🐍 distribution 📦 to PyPI and TestPyPI
  2 | 
  3 | on: push
  4 | 
  5 | jobs:
  6 |   build:
  7 |     name: Build distribution 📦
  8 |     runs-on: ubuntu-latest
  9 | 
 10 |     steps:
 11 |     - uses: actions/checkout@v4
 12 |       with:
 13 |         persist-credentials: false
 14 |     - name: Set up Python
 15 |       uses: actions/setup-python@v5
 16 |       with:
 17 |         python-version: "3.x"
 18 |     - name: Install pypa/build
 19 |       run: >-
 20 |         python3 -m
 21 |         pip install
 22 |         build
 23 |         --user
 24 |     - name: Build a binary wheel and a source tarball
 25 |       run: python3 -m build
 26 |     - name: Store the distribution packages
 27 |       uses: actions/upload-artifact@v4
 28 |       with:
 29 |         name: python-package-distributions
 30 |         path: dist/
 31 | 
 32 |   publish-to-pypi:
 33 |     name: >-
 34 |       Publish Python 🐍 distribution 📦 to PyPI
 35 |     if: startsWith(github.ref, 'refs/tags/')  # only publish to PyPI on tag pushes
 36 |     needs:
 37 |     - build
 38 |     runs-on: ubuntu-latest
 39 |     environment:
 40 |       name: pypi
 41 |       url: https://pypi.org/p/DASPy-toolbox
 42 |     permissions:
 43 |       id-token: write  # IMPORTANT: mandatory for trusted publishing
 44 | 
 45 |     steps:
 46 |     - name: Download all the dists
 47 |       uses: actions/download-artifact@v4
 48 |       with:
 49 |         name: python-package-distributions
 50 |         path: dist/
 51 |     - name: Publish distribution 📦 to PyPI
 52 |       uses: pypa/gh-action-pypi-publish@release/v1
 53 | 
 54 |   github-release:
 55 |     name: >-
 56 |       Sign the Python 🐍 distribution 📦 with Sigstore
 57 |       and upload them to GitHub Release
 58 |     needs:
 59 |     - publish-to-pypi
 60 |     runs-on: ubuntu-latest
 61 | 
 62 |     permissions:
 63 |       contents: write  # IMPORTANT: mandatory for making GitHub Releases
 64 |       id-token: write  # IMPORTANT: mandatory for sigstore
 65 | 
 66 |     steps:
 67 |     - name: Download all the dists
 68 |       uses: actions/download-artifact@v4
 69 |       with:
 70 |         name: python-package-distributions
 71 |         path: dist/
 72 |     - name: Sign the dists with Sigstore
 73 |       uses: sigstore/gh-action-sigstore-python@v3.0.0
 74 |       with:
 75 |         inputs: >-
 76 |           ./dist/*.tar.gz
 77 |           ./dist/*.whl
 78 |     - name: Create GitHub Release
 79 |       env:
 80 |         GITHUB_TOKEN: ${{ github.token }}
 81 |       run: >-
 82 |         gh release create
 83 |         "$GITHUB_REF_NAME"
 84 |         --repo "$GITHUB_REPOSITORY"
 85 |         --notes ""
 86 |     - name: Upload artifact signatures to GitHub Release
 87 |       env:
 88 |         GITHUB_TOKEN: ${{ github.token }}
 89 |       # Upload to GitHub Release using the `gh` CLI.
 90 |       # `dist/` contains the built packages, and the
 91 |       # sigstore-produced signatures and certificates.
 92 |       run: >-
 93 |         gh release upload
 94 |         "$GITHUB_REF_NAME" dist/**
 95 |         --repo "$GITHUB_REPOSITORY"
 96 | 
 97 |   publish-to-testpypi:
 98 |     name: Publish Python 🐍 distribution 📦 to TestPyPI
 99 |     if: startsWith(github.ref, 'refs/tags/')  # only publish to TestPyPI on tag pushes
100 |     needs:
101 |     - build
102 |     runs-on: ubuntu-latest
103 | 
104 |     environment:
105 |       name: testpypi
106 |       url: https://test.pypi.org/p/DASPy-toolbox
107 | 
108 |     permissions:
109 |       id-token: write  # IMPORTANT: mandatory for trusted publishing
110 | 
111 |     steps:
112 |     - name: Download all the dists
113 |       uses: actions/download-artifact@v4
114 |       with:
115 |         name: python-package-distributions
116 |         path: dist/
117 |     - name: Publish distribution 📦 to TestPyPI
118 |       uses: pypa/gh-action-pypi-publish@release/v1
119 |       with:
120 |         repository-url: https://test.pypi.org/legacy/


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing to DASPy
 2 | 
 3 | We hope you to submit your changes through Git's Pull Request feature.
 4 | 
 5 | ## Submit a Pull Request
 6 | 
 7 | Here's a quick guide:
 8 | 
 9 |  1. Fork the repo.
10 |  2. Make a new branch based at `main`.
11 |  3. Push to your fork and submit a pull request.
12 |  4. Wait for our review. We may suggest some changes or improvements or alternatives.
13 | 
14 | ## DASPy Coding Style Guide
15 | 
16 | Like most Python projects, we try to adhere to [PEP 8](https://peps.python.org/pep-0008/) (Style Guide for Python Code) and [PEP 257](https://peps.python.org/pep-0257/) (Docstring Conventions) with the modifications documented here. Be sure to read all documents if you intend to contribute code to DASPy.
17 | 
18 | ## Naming
19 | 
20 | ### Names to Avoid
21 | 
22 | * single character names except for counters or iterators
23 | * dashes (-) in any package/module name
24 | * **__double_leading_and_trailing_underscore__** names (reserved by Python)
25 | 
26 | ### Naming Convention
27 | 
28 | * Use meaningful variable/function/method names; these will help other people a lot when reading your code.
29 | * Prepending a single underscore (_) means an object is “internal” / “private”, which means that it is not supposed to be used by end-users and the API might change internally without notice to users (in contrast to API changes in public objects which get handled with deprecation warnings for one release cycle).
30 | * Prepending a double underscore (__) to an instance variable or method effectively serves to make the variable or method private to its class (using name mangling).
31 | * Place related classes and top-level functions together in a module. Unlike Java, there is no need to limit yourself to one class per module.
32 | * Use CamelCase for class names, but snake_case for module names, variables and functions/methods.
33 | 


--------------------------------------------------------------------------------
/CONTRIBUTORS.txt:
--------------------------------------------------------------------------------
1 | daspy/CONTRIBUTORS.txt
2 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 University of Science and Technology of China
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | <img src="https://raw.github.com/hmz-03/daspy/main/website/logo.png" height="200" />
 2 | 
 3 | [![Supported Python versions](https://img.shields.io/badge/python-3.9%20|%203.10%20|%203.11%20|%203.12-blue)](https://pypi.org/project/DASPy-toolbox/)
 4 | [![License](https://img.shields.io/pypi/l/daspy-toolbox.svg)](https://opensource.org/license/mit)
 5 | [![PyPI Version](https://img.shields.io/pypi/v/daspy-toolbox.svg)](https://pypi.org/project/DASPy-toolbox/)
 6 | 
 7 | [![DOI](https://img.shields.io/badge/DOI-10.1785/0220240124-blue.svg)](https://doi.org/10.1785/0220240124)
 8 | [![PyPI Downloads](https://img.shields.io/pypi/dm/daspy-toolbox.svg?label=pypi)](https://pypi.org/project/DASPy-toolbox/)
 9 | [![Conda Downloads](https://img.shields.io/conda/dn/conda-forge/daspy-toolbox?label=conda)](https://anaconda.org/conda-forge/daspy-toolbox)
10 | 
11 | DASPy is an open-source project dedicated to provide a python package for DAS (Distributed Acoustic Sensing) data processing.
12 | 
13 | The goal of the DASPy project is to lower the bar of DAS data processing. DASPy includes:
14 | * Classic seismic data processing techniques, including preprocessing, filter, spectrum analysis, and visualization
15 | * Specialized algorithms for DAS applications, including denoising, waveform decomposition, channel attribute analysis, and strain-velocity conversion. 
16 | 
17 | DASPy is licensed under the MIT License. [An English version of DASPy tutorial](https://daspy-tutorial.readthedocs.io/en/latest/), [a Chinese version of DASPy tutorial](https://daspy-tutorial-cn.readthedocs.io/zh-cn/latest/) and [an example of Jupyter notebook](document/example.ipynb) is available. If you have any questions, please contact me via <hmz2018@mail.ustc.edu.cn>.
18 | 
19 | ## Installation
20 | DASPy runs on Linux, Windows and Mac OS and on Python 3.9 and up.
21 | 
22 | ### Pip
23 | ```
24 | pip install daspy-toolbox
25 | ```
26 | 
27 | Install the latest version from GitHub:
28 | 
29 | ```
30 | pip install git+https://github.com/HMZ-03/DASPy.git
31 | ```
32 | 
33 | ### Conda
34 | 
35 | ```
36 | conda install daspy-toolbox
37 | ```
38 | 
39 | or
40 | 
41 | ```
42 | conda install conda-forge::daspy-toolbox
43 | ```
44 | 
45 | ### Manual installation
46 | 1. Install dependent packages: numpy, scipy >=1.13, matplotlib, geographiclib, pyproj, h5py, segyio, nptdms, tqdm
47 | 
48 | 2. Add DASPy into your Python path.
49 | 
50 | ## Getting started
51 | ```
52 | from daspy import read
53 | sec = read()  # load example waveform
54 | sec.bandpass(1, 15)
55 | sec.plot()
56 | ```
57 | <img src="./website/waveform.png" height="500" />
58 | 
59 | ### Contributing
60 | 
61 | Please see details on how to contribute to the project [here](CONTRIBUTING.md) and [here](CodingStyleGuide.md).
62 | 
63 | ### Reference
64 | 
65 |   * Minzhe Hu and Zefeng Li (2024), [DASPy: A Python Toolbox for DAS Seismology](https://pubs.geoscienceworld.org/ssa/srl/article/95/5/3055/645865/DASPy-A-Python-Toolbox-for-DAS-Seismology), *Seismological Research Letters*, 95(5), 3055–3066, doi: `https://doi.org/10.1785/0220240124`.
66 | 


--------------------------------------------------------------------------------
/daspy/CONTRIBUTORS.txt:
--------------------------------------------------------------------------------
1 | Hu, Minzhe
2 | Li, Zefeng
3 | Zhang, Ji


--------------------------------------------------------------------------------
/daspy/__init__.py:
--------------------------------------------------------------------------------
1 | from daspy.core.section import Section
2 | from daspy.core.collection import Collection
3 | from daspy.core.read import read
4 | from daspy.core.dasdatetime import DASDateTime, local_tz, utc


--------------------------------------------------------------------------------
/daspy/advanced_tools/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HMZ-03/DASPy/9f308c93d7ad8f4e572705827b03c5d0fec3eac2/daspy/advanced_tools/__init__.py


--------------------------------------------------------------------------------
/daspy/advanced_tools/channel.py:
--------------------------------------------------------------------------------
  1 | # Purpose: Several functions for analysis data quality and geometry of channels
  2 | # Author: Minzhe Hu, Zefeng Li
  3 | # Date: 2025.3.31
  4 | # Email: hmz2018@mail.ustc.edu.cn
  5 | import numpy as np
  6 | from copy import deepcopy
  7 | from geographiclib.geodesic import Geodesic
  8 | from pyproj import Proj
  9 | 
 10 | 
 11 | def robust_polyfit(data, deg, thresh):
 12 |     """
 13 |     Fit a curve with a robust weighted polynomial.
 14 | 
 15 |     :param data: 1-dimensional array.
 16 |     :param deg: int. Degree of the fitting polynomial
 17 |     :param thresh: int or float. Defined MAD multiple of outliers.
 18 |     :return: Fitting data
 19 |     """
 20 |     nch = len(data)
 21 |     channels = np.arange(nch)
 22 |     p_coef = np.polyfit(channels, data, deg)
 23 |     p_fit = np.poly1d(p_coef)
 24 |     old_data = p_fit(channels)
 25 |     mse = 1
 26 | 
 27 |     # robust fitting until the fitting curve changes < 0.1% at every point.
 28 |     while mse > 0.001:
 29 |         rsl = abs(data - old_data)
 30 |         mad = np.median(rsl)
 31 |         weights = np.zeros(nch)
 32 |         weights[rsl < thresh * mad] = 1
 33 |         p_coef = np.polyfit(channels, data, deg, w=weights)
 34 |         p_fit = np.poly1d(p_coef)
 35 |         new_data = p_fit(channels)
 36 |         mse = np.nanmax(np.abs((new_data - old_data) / old_data))
 37 |         old_data = new_data
 38 | 
 39 |     return new_data, weights
 40 | 
 41 | 
 42 | def _continuity_checking(lst1, lst2, adjacent=2, toleration=2):
 43 |     lst1_raw = deepcopy(lst1)
 44 |     for chn in lst1_raw:
 45 |         discont = [a for a in lst2 if abs(a - chn) <= adjacent]
 46 |         if len(discont) >= adjacent * 2 + 1 - toleration:
 47 |             lst1.remove(chn)
 48 |             lst2.append(chn)
 49 | 
 50 |     return lst1, lst2
 51 | 
 52 | 
 53 | def channel_checking(data, deg=10, thresh=5, continuity=True, adjacent=2,
 54 |                      toleration=2, mode='low', verbose=False):
 55 |     """
 56 |     Use the energy of each channel to determine which channels are bad.
 57 | 
 58 |     :param data: 2-dimensional np.ndarray. Axis 0 is channel number and axis 1
 59 |         is time series.
 60 |     :param deg: int. Degree of the fitting polynomial
 61 |     :param thresh: int or float. The MAD multiple of bad channel energy lower
 62 |         than good channels.
 63 |     :param continuity: bool. Perform continuity checks on bad channels and good
 64 |         channels.
 65 |     :param adjacent: int. The number of nearby channels for continuity checks.
 66 |     :param toleration: int. The number of discontinuous channel allowed in each
 67 |         channel (including itself) in the continuity check.
 68 |     :param mode: str. 'low' means bad channels have low amplitude, 'high' means
 69 |         bad channels have high amplitude, and 'both' means bad channels are
 70 |         likely to have low or high amplitude.
 71 |     :return: Good channels and bad channels.
 72 |     """
 73 |     nch = len(data)
 74 |     energy = np.log10(np.sum(data**2, axis=1))
 75 |     energy[energy == -np.inf] = -308
 76 | 
 77 |     # Remove abnormal value by robust polynomial fitting.
 78 |     fitted_energy, weights = robust_polyfit(energy, deg, thresh)
 79 |     deviation = energy - fitted_energy
 80 | 
 81 |     # Iterate eliminates outliers.
 82 |     mad = np.median(abs(deviation[weights > 0]))
 83 |     if mode == 'low':
 84 |         bad_chn = np.argwhere(deviation < -thresh * mad).ravel().tolist()
 85 |     elif mode == 'high':
 86 |         bad_chn = np.argwhere(deviation > thresh * mad).ravel().tolist()
 87 |     elif mode == 'both':
 88 |         bad_chn = np.argwhere(deviation < -thresh * mad).ravel().tolist() + \
 89 |                 np.argwhere(deviation > thresh * mad).ravel().tolist()
 90 |     good_chn = list(set(range(nch)) - set(bad_chn))
 91 | 
 92 |     if continuity:
 93 |         # Discontinuous normal value are part of bad channels.
 94 |         good_chn, bad_chn = _continuity_checking(good_chn, bad_chn,
 95 |                                                  adjacent=adjacent,
 96 |                                                  toleration=toleration)
 97 | 
 98 |         # Discontinuous outliers are usually not bad channels.
 99 |         bad_chn, good_chn = _continuity_checking(bad_chn, good_chn,
100 |                                                  adjacent=adjacent,
101 |                                                  toleration=toleration)
102 | 
103 |     bad_chn = np.sort(np.array(bad_chn))
104 |     good_chn = np.sort(np.array(good_chn))
105 |     if verbose:
106 |         return good_chn, bad_chn, energy, fitted_energy - thresh * mad
107 | 
108 |     return good_chn, bad_chn
109 | 
110 | 
111 | def _channel_location(track_pt):
112 |     track, tn = track_pt[:, :-1], track_pt[:, -1]
113 |     dim = track.shape[1]
114 |     l_track = np.sqrt(np.sum(np.diff(track, axis=0) ** 2, axis=1))
115 |     l_track_cum = np.hstack(([0], np.cumsum(l_track)))
116 |     idx_kp = np.where(tn >= 0)[0]
117 | 
118 |     interp_ch = []
119 |     chn = np.floor(tn[idx_kp[0]]).astype(int)
120 |     interp_ch.append([*track[idx_kp[0]], chn])
121 |     if abs(chn - tn[idx_kp[0]]) > 1e-6:
122 |         chn += 1
123 | 
124 |     seg_interval = []
125 |     for i in range(1, len(idx_kp)):
126 |         # calculate actual interval between known-channel points
127 |         istart, iend = idx_kp[i - 1], idx_kp[i]
128 |         n_chn_kp = tn[iend] - tn[istart]
129 |         d_interp = (l_track_cum[iend] - l_track_cum[istart]) / n_chn_kp
130 |         seg_interval.append([tn[istart], tn[iend], d_interp])
131 | 
132 |         l_res = 0  # remaining fiber length before counting the next segment
133 |         # consider if the given channelnumber is not an integer
134 |         chn_res = tn[istart] - int(tn[istart])
135 |         if d_interp == 0:
136 |             while chn < int(tn[iend]):
137 |                 chn += 1
138 |                 interp_ch.append([*track[istart, :], chn])
139 |             continue
140 |         for j in range(istart, iend):
141 |             l_start = l_track[j] + l_res
142 | 
143 |             # if tp segment length is large for more than one interval, get the
144 |             # channel loc
145 |             if l_start >= d_interp * (1 - chn_res - 1e-6):
146 |                 # floor int, num of channel available
147 |                 n_chn_tp = int(l_start / d_interp + chn_res)
148 |                 l_new = (np.arange(n_chn_tp) + 1 - chn_res) * d_interp - \
149 |                     l_res  # channel distance from segment start
150 | 
151 |                 # interpolate the channel loc
152 |                 t_new = np.zeros((len(l_new), dim))
153 |                 for d in range(dim):
154 |                     t_new[:, d] = np.interp(l_new, [0, l_track[j]],
155 |                                             [track[j, d], track[j + 1, d]])
156 | 
157 |                 # remaining length to add to next segment
158 |                 l_res = l_start - n_chn_tp * d_interp
159 | 
160 |                 # write interpolated channel loc
161 |                 for ti in t_new:
162 |                     chn += 1
163 |                     interp_ch.append([*ti, chn])
164 | 
165 |                 # handle floor int problem when l_start/d_interp is near an
166 |                 # interger
167 |                 if (d_interp - l_res) / d_interp < 1e-6:
168 |                     chn += 1
169 |                     interp_ch.append([*track[j + 1, :], int(tn[j + 1])])
170 |                     l_res = 0
171 |                 chn_res = 0
172 |             # if tp segment length is not enough for one interval, simply add
173 |             # the length to next segment
174 |             elif l_start < d_interp:
175 |                 l_res = l_start
176 | 
177 |     if abs(tn[iend] - int(tn[iend])) > 1e-6:
178 |         chn += 1
179 |         interp_ch.append([*track[iend, :], chn])
180 | 
181 |     return np.array(seg_interval), np.array(interp_ch)
182 | 
183 | 
184 | def location_interpolation(known_pt, track_pt=None, dx=2, data_type='lonlat',
185 |                            verbose=False):
186 |     """
187 |     Interpolate to obtain the positions of all channels.
188 | 
189 |     :param known_pt: np.ndarray. Points with known channel numbers. Each row
190 |         includes 2 or 3 coordinates and a channel number.
191 |     :param track_pt: np.ndarray. Optional fiber spatial track points without
192 |         channel numbers. Each row includes 2 or 3 coordinates. Please ensure
193 |         that the track points are arranged in increasing order of track number.
194 |         If track points is not dense enough, please insert the coordinates of
195 |         known points into track points in order.
196 |     :param dx: Known points far from the track (> dx) will be excluded.
197 |         Recommended setting is channel interval. The unit is m.
198 |     :param data_type: str. Coordinate type. 'lonlat' ('lonlatheight') for
199 |         longitude, latitude in degree (and height in meters), 'xy' ('xyz') for
200 |         x, y (and z) in meters.
201 |     :param verbose: bool. If True, return interpoleted channel location and
202 |         segment interval.
203 |     :return: Interpoleted channel location if verbose is False.
204 |     """
205 |     known_pt = known_pt[known_pt[:,-1].argsort()]
206 |     dim = known_pt.shape[1] - 1
207 |     if 'lonlat' in data_type:
208 |         zone = np.floor((max(known_pt[:,0]) + min(known_pt[:,0])) / 2 / 6)\
209 |             .astype(int) + 31
210 |         DASProj = Proj(proj='utm', zone=zone, ellps='WGS84',
211 |                        preserve_units=False)
212 |         known_pt[:, 0], known_pt[:, 1] = DASProj(known_pt[:, 0], known_pt[:, 1])
213 |     else:
214 |         assert 'xy' in data_type, ('data_type should be \'lonlat\',\''
215 |                                    'lonlatheight\', \'xy\' or \'xyz\'')
216 | 
217 |     if track_pt is None:
218 |         seg_interval, interp_ch = _channel_location(known_pt)
219 |     else:
220 |         K = len(known_pt)
221 |         T = len(track_pt)
222 |         track_pt = np.c_[track_pt, np.zeros(T) - 1]
223 |         if 'lonlat' in data_type:
224 |             track_pt[:, 0], track_pt[:, 1] = DASProj(track_pt[:, 0],
225 |                                                      track_pt[:, 1])
226 | 
227 |         # insert the known points into the fiber track data
228 |         matrix = [np.tile(track_pt[:, d], (K, 1)) -
229 |                   np.tile(known_pt[:, d], (T, 1)).T for d in range(dim)]
230 | 
231 |         dist = np.sqrt(np.sum(np.array(matrix) ** 2, axis=0))
232 |         for k in range(K):
233 |             if min(dist[k]) < dx:
234 |                 t_list = np.sort(np.where(dist[k] == min(dist[k]))[0])
235 |                 for t in t_list:
236 |                     if track_pt[t, -1] == -1:
237 |                         track_pt[t, -1] = known_pt[k, -1]
238 |                         last_pt = t
239 |                         break
240 | 
241 |         # interpolation with regular spacing along the fiber track
242 |         try:
243 |             track_pt = track_pt[:last_pt + 1]
244 |         except NameError:
245 |             print('All known points are too far away from the track points. If '
246 |                   'they are reliable, they can be merged in sequence as track '
247 |                   'points to input')
248 |             return None
249 | 
250 |         seg_interval, interp_ch = _channel_location(track_pt)
251 | 
252 |     if data_type == 'lonlat':
253 |         interp_ch[:, 0], interp_ch[:, 1] = \
254 |             DASProj(interp_ch[:, 0], interp_ch[:, 1], inverse=True)
255 | 
256 |     if verbose:
257 |         return interp_ch, seg_interval
258 |     return interp_ch
259 | 
260 | 
261 | def _xcorr(x, y):
262 |     N = len(x)
263 |     meanx = np.mean(x)
264 |     meany = np.mean(y)
265 |     stdx = np.std(np.asarray(x))
266 |     stdy = np.std(np.asarray(y))
267 |     c = np.sum((y - meany) * (x - meanx)) / (N * stdx * stdy)
268 |     return c
269 | 
270 | 
271 | def _horizontal_angle_change(geo, gap=10):
272 |     nch = len(geo)
273 |     angle = np.zeros(nch)
274 |     for i in range(1, nch - 1):
275 |         lon, lat = geo[i]
276 |         lon_s, lat_s = geo[max(i - gap, 0)]
277 |         lon_e, lat_e = geo[min(i + gap, nch - 1)]
278 |         azi_s = Geodesic.WGS84.Inverse(lat_s, lon_s, lat, lon)['azi1']
279 |         azi_e = Geodesic.WGS84.Inverse(lat, lon, lat_e, lon_e)['azi1']
280 |         dazi = azi_e - azi_s
281 |         if abs(dazi) > 180:
282 |             dazi = -np.sign(dazi) * (360 - abs(dazi))
283 |         angle[i] = dazi
284 | 
285 |     return angle
286 | 
287 | 
288 | def _vertical_angle_change(geo, gap=10):
289 |     nch = len(geo)
290 |     angle = np.zeros(nch)
291 |     for i in range(1, nch - 1):
292 |         lon, lat, dep = geo[i]
293 |         lon_s, lat_s, dep_s = geo[max(i - gap, 0)]
294 |         lon_e, lat_e, dep_e = geo[min(i + gap, nch - 1)]
295 |         s12_s = Geodesic.WGS84.Inverse(lat_s, lon_s, lat, lon)['s12']
296 |         theta_s = np.arctan((dep - dep_s) / s12_s) / np.pi * 180
297 |         s12_e = Geodesic.WGS84.Inverse(lat, lon, lat_e, lon_e)['s12']
298 |         theta_e = np.arctan((dep_e - dep) / s12_e) / np.pi * 180
299 |         angle[i] = theta_e - theta_s
300 | 
301 |     return angle
302 | 
303 | 
304 | def _local_maximum_indexes(data, thresh):
305 |     idx = np.where(data > thresh)[0]
306 |     if len(idx):
307 |         i = list(np.where(np.diff(idx) > 1)[0] + 1)
308 |         if len(idx) - 1 not in i:
309 |             i.append(len(idx) - 1)
310 |         b = 0
311 |         max_idx = []
312 |         for e in i:
313 |             max_idx.append(idx[b] + np.argmax(data[idx[b]:idx[e]]))
314 |             b = e
315 |         return max_idx
316 |     else:
317 |         return []
318 | 
319 | 
320 | def turning_points(data, data_type='coordinate', thresh=5, depth_info=False,
321 |                    channel_gap=3):
322 |     """
323 |     Seek turning points in the DAS channel.
324 | 
325 |     :param data: numpy.ndarray. Data used to seek turning points.
326 |     :param data_type: str. If data_type is 'coordinate', data should include
327 |         longitude and latitude (first two columns), and can also include depth
328 |         (last column). If data_type is 'waveform', data should be continuous
329 |         waveform, preferably containing signal with strong coherence
330 |         (earthquake, traffic signal, etc.).
331 |     :param thresh: For coordinate data, when the angle of the optical cables on
332 |         both sides centered on a certain point exceeds thresh, it is considered
333 |         an turning point. For waveform, thresh means the MAD multiple of
334 |         adjacent channel cross-correlation values lower than their median.
335 |     :param depth_info: bool. Optional if data_type is 'coordinate'. Whether
336 |         depth (in meters) is included in the coordinate data and need to be
337 |         used.
338 |     :param channel_gap: int. Optional if data_type is 'coordinate'. The smaller
339 |         the value is, the finer the segmentation will be. It is recommended to
340 |         set it to half the ratio of gauge length and channel interval.
341 |     :return: list. Channel index of turning points.
342 |     """
343 |     if data_type == 'coordinate':
344 |         angle = _horizontal_angle_change(data[:, :2], gap=channel_gap)
345 |         turning_h = _local_maximum_indexes(abs(angle), thresh)
346 | 
347 |         if depth_info:
348 |             angle = _vertical_angle_change(data, gap=channel_gap)
349 |             turning_v = _local_maximum_indexes(abs(angle), thresh)
350 |             return turning_h, turning_v
351 | 
352 |         return turning_h
353 | 
354 |     elif data_type == 'waveform':
355 |         nch = len(data)
356 |         cc = np.zeros(nch - 1)
357 |         for i in range(nch - 1):
358 |             cc[i] = _xcorr(data[i], data[i + 1])
359 |         median = np.median(cc)
360 |         mad = np.median(abs(cc - median))
361 | 
362 |         return np.argwhere(cc < median - thresh * mad)[0]
363 | 
364 |     else:
365 |         raise ValueError('Data_type should be \'coordinate\' or \'waveform\'.')
366 | 
367 | 
368 | def channel_spacing(geometry, depth_info=False):
369 |     nch = len(geometry)
370 |     dist = np.zeros(nch - 1)
371 |     for i in range(nch - 1):
372 |         lon0, lat0 = geometry[i, :2]
373 |         lon1, lat1 = geometry[i+1, :2]
374 |         d = Geodesic.WGS84.Inverse(lat0, lon0, lat1, lon1)['s12']
375 |         if depth_info:
376 |             dist[i] = np.sqrt(d**2 + (geometry[i+1, 2] - geometry[i, 2]) ** 2)
377 |         else:
378 |             dist[i] = d
379 | 
380 |     return dist
381 | 
382 | 
383 | def distance_to_channels(geometry, points):
384 |     """
385 |     Calculate the distance from a point to each channel.
386 | 
387 |     :param geometry: numpy.ndarray. It needs to consist of two columns (
388 |         longitude, latitude), three columns (longitude, latitude and depth).
389 |     :param points: numpy.ndarray. A array consisting of longitude and
390 |         latitude or longitude, latitude and depth.
391 |     :return: numpy.ndarray. The distance from the given point to each channel.
392 |     """
393 |     if geometry.shape[1] == 3:
394 |         depth_info = True
395 |     else:
396 |         depth_info = False
397 | 
398 |     nch = len(geometry)
399 |     points = np.array(points)
400 |     if points.ndim == 1:
401 |         points = points.reshape(1, -1)
402 |     npt = len(points)
403 |     dist = np.zeros((npt, nch))
404 |     for i, pt in enumerate(points):
405 |         for j, geo in enumerate(geometry):
406 |             d = Geodesic.WGS84.Inverse(pt[1], pt[0], geo[1], geo[0])['s12']
407 |             if depth_info:
408 |                 dist[i, j] = np.sqrt(d**2 + (pt[2] - geo[2]) ** 2)
409 |             else:
410 |                 dist[i, j] = d
411 |     return dist
412 | 
413 | 
414 | def closest_channel_to_point(geometry, points, verbose=False):
415 |     """
416 |     Find the channel number closest to a given point.
417 | 
418 |     :param geometry: numpy.ndarray. It needs to consist of longitude, latitude
419 |         (and depth) or channel number, longitude, latitude (and depth).
420 |     :param points: numpy.ndarray. A tuple consisting of longitude and
421 |         latitude (and depth).
422 |     :param verbose: bool. Return the channel and the distance to the closest
423 |         channel if True.
424 |     :return: int. The channel number closest to the given point.
425 |     """
426 |     nch = len(geometry)
427 |     if points.shape[1] == geometry.shape[1]:
428 |         channels = np.arange(nch).astype(int)
429 |     else:
430 |         geometry = geometry[geometry[:, 0].argsort()]
431 |         channels = geometry[:, 0].astype(int)
432 |         geometry = geometry[:, 1:]
433 | 
434 |     dist = distance_to_channels(points, geometry)
435 |     closest_index = np.argmin(dist, axis=1)
436 |     if verbose:
437 |         return channels[closest_index], np.min(dist, axis=1)
438 |     return channels[closest_index]
439 | 
440 | 
441 | def _equally_spacing(dist, dx):
442 |     index = [[], []]
443 |     residual = [0, abs(dist[0]-dx)]
444 |     for i in range(2, len(dist)+1):
445 |         res = []
446 |         for j in range(i):
447 |             res.append(residual[j] + abs(dx - sum(dist[j:i])))
448 |         residual.append(min(res))
449 |         k = np.argmin(res)
450 |         if k > 0:
451 |             index.append(index[k] + [k])
452 |         else:
453 |             index.append(index[k])
454 |     
455 |     return index[-1]
456 | 
457 | 
458 | def equally_spaced_channels(geometry, dx, depth_info=False, verbose=False):
459 |     """
460 |     Find equally spaced channel numbers based on known DAS latitude and
461 |     longitude.
462 | 
463 |     :param geometry: numpy.ndarray. DAS geometry used to filter equally spaced
464 |         channels. It needs to consist of longitude, latitude (and depth) or
465 |         channel number, longitude, latitude (and depth).
466 |     :param dx: Channel interval.
467 |     :param depth_info: bool. Whether depth (in meters) is included in the
468 |         geometry and needed to be used.
469 |     :param verbose: bool. If True, return channel numbers for equally spaced
470 |         channels and channel intervals.
471 |     :return: Channel numbers for equally spaced channels if verbose is False.
472 |     """
473 |     nch = len(geometry)
474 |     if geometry.shape[1] == 2 + int(depth_info):
475 |         channels = np.arange(nch).astype(int)
476 |     else:
477 |         geometry = geometry[geometry[:, 0].argsort()]
478 |         channels = geometry[:, 0].astype(int)
479 |         geometry = geometry[:, 1:]
480 | 
481 |     dist = channel_spacing(geometry, depth_info=False)
482 | 
483 |     s = 0
484 |     idx_equal = [0]
485 |     for i in range(nch-2):
486 |         if dist[i] > dx * 2:
487 |             e = i
488 |             if e == s + 1:
489 |                 idx_equal.append(e)
490 |             elif e >= s + 2:
491 |                 idx_equal.extend([idx + s for idx in
492 |                                   _equally_spacing(dist[s:e], dx)])
493 |                 idx_equal.append(e)
494 |             s = e + 1
495 |             idx_equal.append(s)
496 |         elif dist[i] + dist[i+1] > dx * 1.5:
497 |             e = i + 1
498 |             if e == s + 1:
499 |                 idx_equal.append(e)
500 |             elif e >= s + 2:
501 |                 idx_equal.extend([idx + s for idx in
502 |                                   _equally_spacing(dist[s:e], dx)])
503 |                 idx_equal.append(e)
504 |             s = e
505 |     e = nch - 1
506 |     if e == s + 1:
507 |         idx_equal.append(e)
508 |     elif e >= s + 2:
509 |         idx_equal.extend([idx + s for idx in
510 |                         _equally_spacing(dist[s:e], dx)])
511 |         idx_equal.append(e)
512 | 
513 |     return channels[idx_equal]


--------------------------------------------------------------------------------
/daspy/advanced_tools/decomposition.py:
--------------------------------------------------------------------------------
  1 | # Purpose: Waveform decomposition
  2 | # Author: Minzhe Hu
  3 | # Date: 2024.5.13
  4 | # Email: hmz2018@mail.ustc.edu.cn
  5 | import numpy as np
  6 | from numpy.fft import irfft2, ifftshift
  7 | from daspy.basic_tools.preprocessing import padding, cosine_taper
  8 | from daspy.basic_tools.freqattributes import next_pow_2, fk_transform
  9 | from daspy.advanced_tools.denoising import curvelet_denoising
 10 | 
 11 | 
 12 | def fk_fan_mask(f, k, fmin=None, fmax=None, kmin=None, kmax=None, vmin=None,
 13 |                 vmax=None, edge=0.1, flag=None):
 14 |     """
 15 |     Make a fan mask in f-k domain for f-k filter.
 16 | 
 17 |     :param f: Frequency sequence.
 18 |     :param k: Wavenumber sequence.
 19 |     :param fmin, fmax, kmin, kmax, vmin, vmax: float or or sequence of 2 floats.
 20 |         Sequence of 2 floats represents the start and end of taper.
 21 |     :param edge: float. The width of fan mask taper edge.
 22 |     :param flag: -1 keep only negative apparent velocities, 0 keep both postive
 23 |         and negative apparent velocities, 1 keep only positive apparent
 24 |         velocities.
 25 |     :return: Fan mask.
 26 |     """
 27 |     ff = np.tile(f, (len(k), 1))
 28 |     kk = np.tile(k, (len(f), 1)).T
 29 |     vv = - np.divide(ff, kk, out=np.ones_like(ff) * 1e10, where=kk != 0)
 30 |     mask = np.ones(vv.shape)
 31 |     for phy_quan in ['f', 'k', 'v']:
 32 |         p = eval(phy_quan * 2)
 33 |         pmin = eval(phy_quan + 'min')
 34 |         if pmin:
 35 |             if isinstance(pmin, (tuple, list, np.ndarray)):
 36 |                 tp_b, tp_e = min(pmin), max(pmin)
 37 |             else:
 38 |                 tp_b, tp_e = pmin * max(1 - edge / 2, 0), pmin * (1 + edge / 2)
 39 |             tp_wid = tp_e - tp_b
 40 |             mask[(abs(p) <= tp_b)] = 0
 41 |             area = (abs(p) > tp_b) & (abs(p) < tp_e)
 42 |             mask[area] *= 0.5 - 0.5 * \
 43 |                 np.cos(((abs(p[area]) - tp_b) / tp_wid) * np.pi)
 44 | 
 45 |         pmax = eval(phy_quan + 'max')
 46 |         if pmax:
 47 |             if isinstance(pmax, (tuple, list, np.ndarray)):
 48 |                 tp_b, tp_e = max(pmax), min(pmax)
 49 |             else:
 50 |                 tp_b, tp_e = pmax * (1 + edge / 2), pmax * (1 - edge / 2)
 51 |             tp_wid = tp_b - tp_e
 52 |             mask[(abs(p) >= tp_b)] = 0
 53 |             area = (abs(p) > tp_e) & (abs(p) < tp_b)
 54 |             mask[area] *= 0.5 - 0.5 * \
 55 |                 np.cos(((tp_b - abs(p[area])) / tp_wid) * np.pi)
 56 | 
 57 |     if flag:
 58 |         mask[np.sign(vv) == flag] = 0
 59 |     return mask
 60 | 
 61 | 
 62 | def fk_filter(data, dx, fs, taper=(0.02, 0.05), pad='default', mode='decompose',
 63 |               fmin=None, fmax=None, kmin=None, kmax=None, vmin=None, vmax=None,
 64 |               edge=0.1, flag=None, verbose=False):
 65 |     """
 66 |     Transform the data to the f-k domain using 2-D Fourier transform method, and
 67 |     transform back to the x-t domain after filtering.
 68 | 
 69 |     :param data: numpy.ndarray. Data to do fk filter.
 70 |     :param dx: Channel interval in m.
 71 |     :param fs: Sampling rate in Hz.
 72 |     :param taper: float or sequence of floats. Each float means decimal
 73 |         percentage of Tukey taper for corresponding dimension (ranging from 0 to
 74 |         1). Default is 0.1 which tapers 5% from the beginning and 5% from the
 75 |         end.
 76 |     :param pad: Pad the data or not. It can be float or sequence of floats. Each
 77 |         float means padding percentage before FFT for corresponding dimension.
 78 |         If set to 0.1 will pad 5% before the beginning and after the end.
 79 |         'default' means pad both dimensions to next power of 2. None or False
 80 |         means don't pad data before or during Fast Fourier Transform.
 81 |     :param mode: str. 'remove' for denoising, 'retain' for extraction, and
 82 |         'decompose' for decomposition.
 83 |     :param fmin, fmax, kmin, kmax, vmin, vmax: float or or sequence of 2 floats.
 84 |         Sequence of 2 floats represents the start and end of taper.
 85 |     :param edge: float. The width of fan mask taper edge.
 86 |     :param flag: -1 keep only negative apparent velocities, 0 keep both postive
 87 |         and negative apparent velocities, 1 keep only positive apparent
 88 |         velocities.
 89 |     :param verbose: If True, return filtered data, f-k spectrum, frequency
 90 |         sequence, wavenumber sequence and f-k mask.
 91 |     :return: Filtered data and some variables in the process if verbose==True.
 92 |     """
 93 |     data_tp = cosine_taper(data, taper)
 94 |     if pad == 'default':
 95 |         nch, nt = data.shape
 96 |         dn = (next_pow_2(nch) - nch, next_pow_2(nt) - nt)
 97 |         nfft = None
 98 |     elif pad is None or pad is False:
 99 |         dn = 0
100 |         nfft = None
101 |     else:
102 |         dn = np.round(np.array(pad) * data.shape).astype(int)
103 |         nfft = 'default'
104 | 
105 |     data_pd = padding(data_tp, dn)
106 |     nch, nt = data_pd.shape
107 | 
108 |     fk, f, k = fk_transform(data_pd, dx, fs, taper=0, nfft=nfft)
109 | 
110 |     mask = fk_fan_mask(f, k, fmin, fmax, kmin, kmax, vmin, vmax, edge=edge,
111 |                        flag=flag)
112 | 
113 |     if mode == 'remove':
114 |         mask = 1 - mask
115 | 
116 |     if mode == 'decompose':
117 |         data_flt1 = irfft2(ifftshift(fk * mask, axes=0)).real[:nch, :nt]
118 |         data_flt1 = padding(data_flt1, dn, reverse=True)
119 |         data_flt2 = irfft2(ifftshift(fk * (1 - mask), axes=0)).real[:nch, :nt]
120 |         data_flt2 = padding(data_flt2, dn, reverse=True)
121 |         if verbose:
122 |             return data_flt1, data_flt2, fk, f, k, mask
123 |         else:
124 |             return data_flt1, data_flt2
125 |     else:
126 |         data_flt = irfft2(ifftshift(fk * mask, axes=0)).real[:nch, :nt]
127 |         data_flt = padding(data_flt, dn, reverse=True)
128 |         if verbose:
129 |             return data_flt, fk, f, k, mask
130 |         else:
131 |             return data_flt
132 | 
133 | 
134 | def curvelet_windowing(data, dx, fs, mode='decompose', vmin=0, vmax=np.inf,
135 |                        flag=None, pad=0.3, scale_begin=3, nbscales=None,
136 |                        nbangles=16, finest=1):
137 |     """
138 |     Use curevelet transform to keep cooherent signal with certain velocity
139 |     range. {Atterholt et al., 2022 , Geophys. J. Int.}
140 | 
141 |     :param data: numpy.ndarray. Data to decomposite.
142 |     :param dx: Channel interval in m.
143 |     :param fs: Sampling rate in Hz.
144 |     :param mode: str. 'remove' for denoising, 'retain' for extraction, and
145 |         'decompose' for decomposition.
146 |     :param vmin, vmax: float. Velocity range in m/s.
147 |     :param flag: -1 keep only negative apparent velocities, 0 keep both postive
148 |         and negative apparent velocities, 1 keep only positive apparent
149 |         velocities.
150 |     :param pad: float or sequence of floats. Each float means padding percentage
151 |         before FFT for corresponding dimension. If set to 0.1 will pad 5% before
152 |         the beginning and after the end.
153 |     :param scale_begin: int. The beginning scale to do coherent denoising.
154 |     :param nbscales: int. Number of scales including the coarsest wavelet level.
155 |         Default set to ceil(log2(min(M,N)) - 3).
156 |     :param nbangles: int. Number of angles at the 2nd coarsest level,
157 |         minimum 8, must be a multiple of 4.
158 |     :param finest: int. Objects at the finest scale. 1 for curvelets, 2 for
159 |         wavelets. Curvelets are more precise while wavelets are more efficient.
160 |     :return: numpy.ndarray. Decomposed data.
161 |     """
162 |     return curvelet_denoising(data, choice=1, pad=pad, vmin=vmin, vmax=vmax,
163 |                               flag=flag, dx=dx, fs=fs, mode=mode,
164 |                               scale_begin=scale_begin, nbscales=nbscales,
165 |                               nbangles=nbangles, finest=finest)
166 | 


--------------------------------------------------------------------------------
/daspy/advanced_tools/denoising.py:
--------------------------------------------------------------------------------
  1 | # Purpose: Remove noise from data
  2 | # Author: Minzhe Hu, Zefeng Li
  3 | # Date: 2024.5.13
  4 | # Email: hmz2018@mail.ustc.edu.cn
  5 | import numpy as np
  6 | from copy import deepcopy
  7 | from scipy.ndimage import median_filter
  8 | from scipy.interpolate import interp1d
  9 | from daspy.basic_tools.preprocessing import padding
 10 | from daspy.advanced_tools.fdct import fdct_wrapping, ifdct_wrapping
 11 | 
 12 | 
 13 | def spike_removal(data, nch=50, nsp=5, thresh=10):
 14 |     """
 15 |     Use a median filter to remove high-strain spikes in the data. Modified from
 16 |     https://github.com/atterholt/curvelet-denoising/blob/main/MedianFilter.m
 17 | 
 18 |     :param data: numpy.ndarray. Data to remove spikes from.
 19 |     :param nch: int. Number of channels over which to compute the median.
 20 |     :param nsp: int. Number of sampling points over which to compute the median.
 21 |     :param thresh: Ratio threshold over the median over which a number is
 22 |         considered to be an outlier.
 23 |     :return: numpy.ndarray. Data with spikes removed.
 24 |     """
 25 |     absdata = np.abs(data)
 26 | 
 27 |     medians1 = median_filter(absdata, (nch, 1))
 28 |     medians = median_filter(medians1, (1, nsp))
 29 |     ratio = absdata / medians  # comparisons matrix
 30 | 
 31 |     # find the bad values and interpolate with their neighbors
 32 |     data_dn = data.copy()
 33 |     out_i, out_j = np.where(ratio > thresh)
 34 |     for j in set(out_j):
 35 |         bch = out_i[out_j == j]
 36 |         gch = list(set(range(len(data))) - set(bch))
 37 |         f = interp1d(gch, data[gch, j], bounds_error=False,
 38 |                      fill_value=(data[gch[0], j], data[gch[-1], j]))
 39 |         data_dn[bch, j] = f(bch)
 40 | 
 41 |     return data_dn
 42 | 
 43 | 
 44 | def common_mode_noise_removal(data, method='median'):
 45 |     """
 46 |     Remove common mode noise (sometimes called horizontal noise) from data.
 47 | 
 48 |     :param data: numpy.ndarray. Data to remove common mode noise.
 49 |     :param method:str. Method for extracting commmon mode noise. 'median' or
 50 |         'mean'
 51 |     :return: numpy.ndarray. Denoised data.
 52 |     """
 53 |     nch, nt = data.shape
 54 |     if method == 'median':
 55 |         common = np.median(data, 0)
 56 |     elif method == 'mean':
 57 |         common = np.mean(data, 0)
 58 | 
 59 |     xx = np.sum(common ** 2)
 60 |     data_dn = np.zeros((nch, nt))
 61 |     for i in range(nch):
 62 |         xc = np.sum(common * data[i])
 63 |         data_dn[i] = data[i] - xc / xx * common
 64 | 
 65 |     return data_dn
 66 | 
 67 | 
 68 | def _noise_level(data, finest=2, nbscales=None, nbangles=16, percentile=95):
 69 |     """
 70 |     Find threshold for curvelet denoising with noise record.
 71 | 
 72 |     :param data: numpy.ndarray. Noise data.
 73 |     :param nbscales: int. Number of scales including the coarsest wavelet level.
 74 |         Default set to ceil(log2(min(M,N)) - 3).
 75 |     :param nbangles: int. Number of angles at the 2nd coarsest level,
 76 |         minimum 8, must be a multiple of 4.
 77 |     :param percentile: number. The threshold is taken as this percentile of the
 78 |         curvelet coefficient of the noise record
 79 |     :return: 2-D list. Threshold for curvelet coefficients.
 80 |     """
 81 |     C = fdct_wrapping(data, is_real=True, finest=finest, nbscales=nbscales,
 82 |                       nbangles_coarse=nbangles)
 83 | 
 84 |     E_noise = []
 85 |     for s in range(len(C)):
 86 |         E_noise.append([])
 87 |         for w in range(len(C[s])):
 88 |             threshold = np.percentile(abs(C[s][w]), percentile)
 89 |             E_noise[s].append(threshold)
 90 | 
 91 |     return E_noise
 92 | 
 93 | 
 94 | def _knee_points(C, factor=0.2):
 95 |     """
 96 |     Find threshold for curvelet denoising without noise record.
 97 | 
 98 |     :param C: 2-D list of np.ndarray. Array of curvelet coefficients.
 99 |     :param factor: float. Multiplication factor from 0 to 1. Small factor
100 |         corresponds to conservative strategy.
101 |     :return: 2-D list. Threshold for curvelet coefficients.
102 |     """
103 |     E_knee = []
104 |     for s in range(len(C)):
105 |         E_knee.append([])
106 |         for w in range(len(C[s])):
107 |             F, x = np.histogram(abs(C[s][w]), density=True)
108 |             x = (x[1:] + x[:-1]) / 2
109 |             F = np.cumsum(F) / np.sum(F)
110 |             slope = (x[-1] - x[0]) / (F[-1] - F[0])
111 |             tiltedplot = x - (slope * F)
112 |             idx = np.argmin(tiltedplot)
113 |             E_knee[s].append(x[idx] * factor)
114 | 
115 |     return E_knee
116 | 
117 | 
118 | def _velocity_bin(nbangles, fs, dx):
119 |     v_bounds = np.zeros(nbangles // 4 + 1)
120 |     half = nbangles // 8
121 |     v_bounds[half] = fs * dx
122 |     np.seterr(divide='ignore')
123 |     for i in range(half):
124 |         v_bounds[i] = i / half * fs * dx
125 |         v_bounds[half + i + 1] = np.divide(fs * dx, 1 - (i + 1) / half)
126 | 
127 |     np.seterr(divide='warn')
128 |     v_lows = list(range(half - 1, -1, -1)) + list(range(half * 2)) + \
129 |         list(range(2 * half - 1, half - 1, -1))
130 |     velocity = []
131 |     for i in range(nbangles // 2):
132 |         v_low = v_bounds[v_lows[i]]
133 |         v_high = v_bounds[v_lows[i] + 1]
134 |         velocity.append([v_low, v_high])
135 |     velocity = np.array(velocity * 2)
136 |     for i in range(half):
137 |         velocity[i] = -1 * velocity[i][::-1]
138 |         velocity[3 * half + i] = -1 * velocity[3 * half + i][::-1]
139 |         velocity[4 * half + i] = -1 * velocity[4 * half + i][::-1]
140 |         velocity[7 * half + i] = -1 * velocity[7 * half + i][::-1]
141 |     return velocity
142 | 
143 | 
144 | def _mask_factor(velocity, vmin, vmax, flag=0):
145 |     if flag:
146 |         if flag == -1:
147 |             vmin = -vmax
148 |             vmax = -vmin
149 |     else:
150 |         half = len(velocity) // 8
151 |         for i in range(half):
152 |             velocity[i] = -1 * velocity[i][::-1]
153 |             velocity[3 * half + i] = -1 * velocity[3 * half + i][::-1]
154 |             velocity[4 * half + i] = -1 * velocity[4 * half + i][::-1]
155 |             velocity[7 * half + i] = -1 * velocity[7 * half + i][::-1]
156 | 
157 |     factors = np.zeros(len(velocity))
158 |     for i, (v_low, v_high) in enumerate(velocity):
159 |         v1 = max(v_low, vmin)
160 |         v2 = min(v_high, vmax)
161 |         if v1 < v2:
162 |             if v_high == np.inf or v_low == -np.inf:
163 |                 factors[i] = 1
164 |             else:
165 |                 factors[i] = np.divide(v2 - v1, v_high - v_low)
166 | 
167 |     return factors
168 | 
169 | 
170 | def curvelet_denoising(data, choice=0, pad=0.3, noise=None, noise_perc=95,
171 |                        knee_fac=0.2, soft_thresh=True, vmin=0, vmax=np.inf,
172 |                        flag=0, dx=None, fs=None, mode='remove',
173 |                        scale_begin=3, nbscales=None, nbangles=16, finest=2):
174 |     """
175 |     Use curevelet transform to filter stochastic or/and coherent noise.
176 |     Modified from
177 |     https://github.com/atterholt/curvelet-denoising/blob/main/CurveletDenoising.m
178 |     {Atterholt et al., 2022 , Geophys. J. Int.}
179 | 
180 |     :param data: numpy.ndarray. Data to denoise.
181 |     :param choice: int. 0 for Gaussian denoising using soft thresholding, 1 for
182 |         velocity filtering using the standard FK methodology and 2 for both.
183 |     :param pad: float or sequence of floats. Each float means padding percentage
184 |         before FFT for corresponding dimension. If set to 0.1 will pad 5% before
185 |         the beginning and after the end.
186 |     :param noise: numpy.ndarray or daspy.Section. Noise record as reference.
187 |     :param noise_perc: number. The threshold is taken as this percentile of the
188 |         curvelet coefficient of the noise record. (only used when noise is
189 |         specified)
190 |     :param knee_fac: float. Multiplication factor from 0 to 1. Small factor
191 |         corresponds to conservative strategy. (only used when noise is not
192 |         specified)
193 |     :param soft_thresh: bool. True for soft thresholding and False for hard
194 |         thresholding.
195 |     :param vmin, vmax: float. Velocity range in m/s.
196 |     :param flag: -1 choose only negative apparent velocities, 0 choose both
197 |         postive and negative apparent velocities, 1 choose only positive
198 |         apparent velocities.
199 |     :param dx: Channel interval in m.
200 |     :param fs: Sampling rate in Hz.
201 |     :param mode: str. Only available when choice in (1,2). 'remove' for
202 |         denoising, 'retain' for extraction, and 'decompose' for decomposition.
203 |     :param scale_begin: int. The beginning scale to do coherent denoising.
204 |     :param nbscales: int. Number of scales including the coarsest wavelet level.
205 |         Default set to ceil(log2(min(M,N)) - 3).
206 |     :param nbangles: int. Number of angles at the 2nd coarsest level,
207 |         minimum 8, must be a multiple of 4.
208 |     :param finest: int. Objects at the finest scale. 1 for curvelets, 2 for
209 |         wavelets. Curvelets are more precise while wavelets are more efficient.
210 |     :return: numpy.ndarray. Denoised data.
211 |     """
212 |     if pad is None or pad is False:
213 |         pad = 0
214 |     dn = np.round(np.array(pad) * data.shape).astype(int)
215 |     data_pd = padding(data, dn)
216 | 
217 |     C = fdct_wrapping(data_pd, is_real=True, finest=finest, nbscales=nbscales,
218 |                       nbangles_coarse=nbangles)
219 | 
220 |     # apply Gaussian denoising
221 |     if choice in (0, 2):
222 |         # define threshold
223 |         if noise is None:
224 |             E = _knee_points(C, factor=knee_fac)
225 |         else:
226 |             if not isinstance(noise, np.ndarray):
227 |                 noise = noise.data
228 |             noise_pd = padding(noise,
229 |                                np.array(data_pd.shape) - np.array(noise.shape))
230 |             E = _noise_level(noise_pd, finest=finest, nbscales=nbscales,
231 |                              nbangles=nbangles, percentile=noise_perc)
232 |         for s in range(1, len(C)):
233 |             for w in range(len(C[s])):
234 |                 # first do a hard threshold
235 |                 C[s][w] = C[s][w] * (abs(C[s][w]) > abs(E[s][w]))
236 |                 if soft_thresh:
237 |                     # soften the existing coefficients
238 |                     C[s][w] = np.sign(C[s][w]) * (abs(C[s][w]) - abs(E[s][w]))
239 | 
240 |     # apply velocity filtering
241 |     if choice in (1, 2):
242 |         if dx is None or fs is None:
243 |             raise ValueError('Please set both dx and fs.')
244 | 
245 |         if mode == 'decompose':
246 |             lst = list(range(scale_begin - 1)) 
247 |             if finest == 2:
248 |                 lst.append(len(C) - 1)
249 |             for s in lst:
250 |                 for w in range(len(C[s])):
251 |                     C[s][w] /= 2
252 |             C_rt = deepcopy(C)
253 | 
254 |         for s in range(scale_begin - 1, len(C) - finest + 1):
255 |             nbangles = len(C[s])
256 |             velocity = _velocity_bin(nbangles, fs, dx)
257 |             factors = _mask_factor(velocity, vmin, vmax, flag=flag)
258 |             for w in range(nbangles):
259 |                 if mode == 'retain':
260 |                     C[s][w] *= factors[w]
261 |                 elif mode == 'remove':
262 |                     C[s][w] *= 1 - factors[w]
263 |                 elif mode == 'decompose':
264 |                     C[s][w] *= factors[w]
265 |                     C_rt[s][w] *= 1 - factors[w]
266 | 
267 |     # perform the inverse curvelet transform
268 |     data_dn = padding(ifdct_wrapping(C, is_real=True, size=data_pd.shape), dn,
269 |                       reverse=True)
270 |     
271 |     if mode == 'decompose':
272 |         data_n = padding(ifdct_wrapping(C_rt, is_real=True, size=data_pd.shape),
273 |                          dn, reverse=True)
274 |         return data_dn, data_n
275 |     else:
276 |         return data_dn


--------------------------------------------------------------------------------
/daspy/advanced_tools/fdct.py:
--------------------------------------------------------------------------------
  1 | # Purpose: Fast Discrete Curvelet Transform
  2 | # Author: Minzhe Hu
  3 | # Date: 2024.4.11
  4 | # Email: hmz2018@mail.ustc.edu.cn
  5 | # Modified from
  6 | # http://www.curvelet.org/download-secure.php?file=CurveLab-2.1.3.tar.gz
  7 | # (matlab version)
  8 | import numpy as np
  9 | from numpy.fft import fftshift, ifftshift, fft2, ifft2
 10 | 
 11 | 
 12 | def _round(x):
 13 |     return np.round(x).astype(int)
 14 | 
 15 | 
 16 | def _floor(x):
 17 |     return np.floor(x).astype(int)
 18 | 
 19 | 
 20 | def _ceil(x):
 21 |     return np.ceil(x).astype(int)
 22 | 
 23 | 
 24 | def fdct_wrapping_window(x):
 25 |     """
 26 |     Creates the two halves of a C**inf compactly supported window.
 27 | 
 28 |     :param x: vector or matrix of abscissae, the relevant ones from 0 to 1.
 29 |     :return: vector or matrix containing samples of the left, resp. right half
 30 |         of the window.
 31 |     """
 32 | 
 33 |     # Initialize the variables
 34 |     wr = np.zeros_like(x)
 35 |     wl = np.zeros_like(x)
 36 | 
 37 |     # Set values close to zero to zero
 38 |     x[np.abs(x) < 2**-52] = 0
 39 | 
 40 |     # Calculate wr and wl
 41 |     wr[(x > 0) & (x < 1)] = np.exp(
 42 |         1 - 1. / (1 - np.exp(1 - 1. / x[(x > 0) & (x < 1)])))
 43 |     wr[x <= 0] = 1
 44 |     wl[(x > 0) & (x < 1)] = np.exp(
 45 |         1 - 1. / (1 - np.exp(1 - 1. / (1 - x[(x > 0) & (x < 1)]))))
 46 |     wl[x >= 1] = 1
 47 | 
 48 |     # Normalize wr and wl
 49 |     normalization = np.sqrt(wl**2 + wr**2)
 50 |     wr = wr / normalization
 51 |     wl = wl / normalization
 52 | 
 53 |     return wl, wr
 54 | 
 55 | 
 56 | def fdct_wrapping(x, is_real=False, finest=2,
 57 |                   nbscales=None, nbangles_coarse=16):
 58 |     """
 59 |     Fast Discrete Curvelet Transform via wedge wrapping.
 60 | 
 61 |     :param x: np.array. M-by-N matrix.
 62 |     :param is_real: bool. Type of the transform, False for complex-valued
 63 |         curvelets and True for real-valued curvelets.
 64 |     :param finest: int. Chooses one of two possibilities for the coefficients at
 65 |         the finest level: 1 for curvelets and 2 for wavelets.
 66 |     :param nbscales: int. Number of scales including the coarsest wavele
 67 |         level. Default set to ceil(log2(min(M,N)) - 3).
 68 |     :param nbangles_coarse: int. Number of angles at the 2nd coarsest level,
 69 |         minimum 8, must be a multiple of 4.
 70 |     :return: 2-D list of np.ndarray. Array of curvelet coefficients.
 71 |         C[j][l][k1,k2] is the coefficient at scale j(from finest to coarsest
 72 |         scale), angle l(starts at the top-left corner and increases clockwise),
 73 |         position k1, k2(size varies with j and l). If is_real is 1, there are
 74 |         two types of curvelets, 'cosine' and 'sine'. For a given scale j, the
 75 |         'cosine' coefficients are stored in the first two quadrants (low values
 76 |         of l), the 'sine' coefficients in the last two quadrants (high values of
 77 |         l).
 78 |     """
 79 |     X = fftshift(fft2(ifftshift(x))) / np.sqrt(x.size)
 80 |     N1, N2 = X.shape
 81 |     if nbscales is None:
 82 |         nbscales = _ceil(np.log2(min(N1, N2)) - 3)
 83 | 
 84 |     # Initialization: data structure
 85 |     nbangles = [1] + [nbangles_coarse * 2 ** ((nbscales - i) // 2)
 86 |                       for i in range(nbscales, 1, -1)]
 87 |     if finest == 2:
 88 |         nbangles[-1] = 1
 89 | 
 90 |     C = []
 91 |     for j in range(nbscales):
 92 |         C.append([None] * nbangles[j])
 93 | 
 94 |     # Loop: pyramidal scale decomposition
 95 |     M1 = N1 / 3
 96 |     M2 = N2 / 3
 97 | 
 98 |     if finest == 1:
 99 |         # Initialization: smooth periodic extension of high frequencies
100 |         bigN1 = 2 * _floor(2 * M1) + 1
101 |         bigN2 = 2 * _floor(2 * M2) + 1
102 |         equiv_index_1 = (_floor(N1 / 2) - _floor(2 * M1) +
103 |                          np.arange(bigN1)) % N1
104 |         equiv_index_2 = (_floor(N2 / 2) - _floor(2 * M2) +
105 |                          np.arange(bigN2)) % N2
106 |         X = X[np.ix_(equiv_index_1, equiv_index_2)]
107 | 
108 |         window_length_1 = _floor(2 * M1) - _floor(M1) - (N1 % 3 == 0)
109 |         window_length_2 = _floor(2 * M2) - _floor(M2) - (N2 % 3 == 0)
110 |         coord_1 = np.linspace(0, 1, window_length_1)
111 |         coord_2 = np.linspace(0, 1, window_length_2)
112 |         wl_1, wr_1 = fdct_wrapping_window(coord_1)
113 |         wl_2, wr_2 = fdct_wrapping_window(coord_2)
114 | 
115 |         lowpass_1 = np.concatenate((wl_1, np.ones(2 * _floor(M1) + 1), wr_1))
116 |         if N1 % 3 == 0:
117 |             lowpass_1 = np.concatenate(([0], lowpass_1, [0]))
118 | 
119 |         lowpass_2 = np.concatenate((wl_2, np.ones(2 * _floor(M2) + 1), wr_2))
120 |         if N2 % 3 == 0:
121 |             lowpass_2 = np.concatenate(([0], lowpass_2, [0]))
122 | 
123 |         lowpass = np.outer(lowpass_1, lowpass_2)
124 |         Xlow = X * lowpass
125 |         scales = np.arange(nbscales, 1, -1)
126 | 
127 |     else:
128 |         M1 /= 2
129 |         M2 /= 2
130 | 
131 |         window_length_1 = _floor(2 * M1) - _floor(M1)
132 |         window_length_2 = _floor(2 * M2) - _floor(M2)
133 |         coord_1 = np.linspace(0, 1, window_length_1)
134 |         coord_2 = np.linspace(0, 1, window_length_2)
135 |         wl_1, wr_1 = fdct_wrapping_window(coord_1)
136 |         wl_2, wr_2 = fdct_wrapping_window(coord_2)
137 | 
138 |         lowpass_1 = np.concatenate((wl_1, np.ones(2 * _floor(M1) + 1), wr_1))
139 |         lowpass_2 = np.concatenate((wl_2, np.ones(2 * _floor(M2) + 1), wr_2))
140 |         lowpass = np.outer(lowpass_1, lowpass_2)
141 |         hipass = np.sqrt(1 - lowpass ** 2)
142 | 
143 |         Xlow_index_1 = np.arange(-_floor(2 * M1),
144 |                                  _floor(2 * M1) + 1) + _ceil((N1 + 1) / 2) - 1
145 |         Xlow_index_2 = np.arange(-_floor(2 * M2),
146 |                                  _floor(2 * M2) + 1) + _ceil((N2 + 1) / 2) - 1
147 |         Xlow = X[np.ix_(Xlow_index_1, Xlow_index_2)] * lowpass
148 |         Xhi = X.copy()
149 |         Xhi[np.ix_(Xlow_index_1, Xlow_index_2)] *= hipass
150 | 
151 |         C[nbscales - 1][0] = fftshift(ifft2(ifftshift(Xhi))
152 |                                       ) * np.sqrt(Xhi.size)
153 |         if is_real:
154 |             C[nbscales - 1][0] = C[nbscales - 1][0].real
155 | 
156 |         scales = np.arange(nbscales - 1, 1, -1)
157 |     for j in scales - 1:
158 |         M1 /= 2
159 |         M2 /= 2
160 |         window_length_1 = _floor(2 * M1) - _floor(M1)
161 |         window_length_2 = _floor(2 * M2) - _floor(M2)
162 |         coord_1 = np.linspace(0, 1, window_length_1)
163 |         coord_2 = np.linspace(0, 1, window_length_2)
164 |         wl_1, wr_1 = fdct_wrapping_window(coord_1)
165 |         wl_2, wr_2 = fdct_wrapping_window(coord_2)
166 | 
167 |         lowpass_1 = np.concatenate((wl_1, np.ones(2 * _floor(M1) + 1), wr_1))
168 |         lowpass_2 = np.concatenate((wl_2, np.ones(2 * _floor(M2) + 1), wr_2))
169 |         lowpass = np.outer(lowpass_1, lowpass_2)
170 |         hipass = np.sqrt(1 - lowpass ** 2)
171 | 
172 |         Xhi = Xlow.copy()
173 |         Xlow_index_1 = np.arange(-_floor(2 * M1),
174 |                                  _floor(2 * M1) + 1) + _floor(4 * M1)
175 |         Xlow_index_2 = np.arange(-_floor(2 * M2),
176 |                                  _floor(2 * M2) + 1) + _floor(4 * M2)
177 |         Xlow = Xlow[np.ix_(Xlow_index_1, Xlow_index_2)]
178 |         Xhi[np.ix_(Xlow_index_1, Xlow_index_2)] = Xlow * hipass
179 |         Xlow *= lowpass
180 | 
181 |         # Loop: angular decomposition
182 |         l = -1
183 |         nbquadrants = 2 + 2 * (not is_real)
184 |         nbangles_perquad = nbangles[j] // 4
185 |         for quadrant in range(1, nbquadrants + 1):
186 |             M_horiz = (M1, M2)[quadrant % 2]
187 |             M_vert = (M2, M1)[quadrant % 2]
188 |             wedge_ticks_left = _round(
189 |                 np.linspace(
190 |                     0,
191 |                     1,
192 |                     nbangles_perquad +
193 |                     1) *
194 |                 _floor(
195 |                     4 *
196 |                     M_horiz) +
197 |                 1)
198 |             wedge_ticks_right = 2 * _floor(4 * M_horiz) + 2 - wedge_ticks_left
199 |             if nbangles_perquad % 2:
200 |                 wedge_ticks = np.concatenate(
201 |                     (wedge_ticks_left, wedge_ticks_right[::-1]))
202 |             else:
203 |                 wedge_ticks = np.concatenate(
204 |                     (wedge_ticks_left, wedge_ticks_right[-2::-1]))
205 | 
206 |             wedge_endpoints = wedge_ticks[1:-1:2]
207 |             wedge_midpoints = (wedge_endpoints[:-1] + wedge_endpoints[1:]) / 2
208 |             # Left corner wedge
209 |             l += 1
210 |             first_wedge_endpoint_vert = _round(
211 |                 _floor(4 * M_vert) / nbangles_perquad + 1)
212 |             length_corner_wedge = _floor(4 * M_vert) - _floor(M_vert) + \
213 |                 _ceil(first_wedge_endpoint_vert / 4)
214 |             Y_corner = np.arange(length_corner_wedge) + 1
215 |             XX, YY = np.meshgrid(
216 |                 np.arange(2 * _floor(4 * M_horiz) + 1) + 1, Y_corner)
217 |             width_wedge = wedge_endpoints[1] + wedge_endpoints[0] - 1
218 |             slope_wedge = (_floor(4 * M_horiz) + 1 -
219 |                            wedge_endpoints[0]) / _floor(4 * M_vert)
220 |             left_line = _round(
221 |                 2 - wedge_endpoints[0] + slope_wedge * (Y_corner - 1))
222 |             wrapped_data = np.zeros(
223 |                 (length_corner_wedge, width_wedge), dtype=complex)
224 |             wrapped_XX = np.zeros(
225 |                 (length_corner_wedge, width_wedge), dtype=int)
226 |             wrapped_YY = np.zeros(
227 |                 (length_corner_wedge, width_wedge), dtype=int)
228 |             first_row = _floor(4 * M_vert) + 2 - \
229 |                 _ceil((length_corner_wedge + 1) / 2) + \
230 |                 (length_corner_wedge + 1) % 2 * (quadrant - 2 == quadrant % 2)
231 |             first_col = _floor(4 * M_horiz) + 2 - _ceil((width_wedge + 1) / 2) \
232 |                 + (width_wedge + 1) % 2 * (quadrant - 3 == (quadrant - 3) % 2)
233 |             for row in Y_corner - 1:
234 |                 cols = left_line[row] + \
235 |                     (np.arange(width_wedge) - (left_line[row] - first_col)) \
236 |                     % width_wedge
237 |                 admissible_cols = _round(0.5 * (cols + 1 + abs(cols - 1))) - 1
238 |                 new_row = (row - first_row + 1) % length_corner_wedge
239 |                 wrapped_data[new_row, :] = Xhi[row,
240 |                                                admissible_cols] * (cols > 0)
241 |                 wrapped_XX[new_row, :] = XX[row, admissible_cols]
242 |                 wrapped_YY[new_row, :] = YY[row, admissible_cols]
243 |             slope_wedge_right = (_floor(4 * M_horiz) + 1 -
244 |                                  wedge_midpoints[0]) / _floor(4 * M_vert)
245 |             mid_line_right = wedge_midpoints[0] + \
246 |                 slope_wedge_right * (wrapped_YY - 1)
247 |             coord_right = 0.5 + _floor(4 * M_vert) / \
248 |                 (wedge_endpoints[1] - wedge_endpoints[0]) * \
249 |                 (wrapped_XX - mid_line_right) / \
250 |                 (_floor(4 * M_vert) + 1 - wrapped_YY)
251 |             C2 = 1 / (1 / (2 * (_floor(4 * M_horiz)) / (wedge_endpoints[0] -
252 |                 1) - 1) + 1 / (2 * (_floor(4 * M_vert)) / (
253 |                 first_wedge_endpoint_vert - 1) - 1))
254 |             C1 = C2 / (2 * (_floor(4 * M_vert)) /
255 |                        (first_wedge_endpoint_vert - 1) - 1)
256 |             wrapped_XX[(wrapped_XX - 1) / _floor(4 * M_horiz) +
257 |                        (wrapped_YY - 1) / _floor(4 * M_vert) == 2] += 1
258 |             coord_corner = C1 + C2 * ((wrapped_XX - 1) / _floor(4 * M_horiz) -
259 |                 (wrapped_YY - 1) / _floor(4 * M_vert)) / (2 -
260 |                 ((wrapped_XX - 1) / _floor(4 * M_horiz) + (wrapped_YY - 1) /
261 |                 _floor(4 * M_vert)))
262 |             wl_left, _ = fdct_wrapping_window(coord_corner)
263 |             _, wr_right = fdct_wrapping_window(coord_right)
264 |             wrapped_data = wrapped_data * wl_left * wr_right
265 |             if not is_real:
266 |                 wrapped_data = np.rot90(wrapped_data, -(quadrant - 1))
267 |                 C[j][l] = fftshift(ifft2(ifftshift(wrapped_data))) * \
268 |                     np.sqrt(wrapped_data.size)
269 |             else:
270 |                 wrapped_data = np.rot90(wrapped_data, -(quadrant - 1))
271 |                 x = fftshift(ifft2(ifftshift(wrapped_data))) * \
272 |                     np.sqrt(wrapped_data.size)
273 |                 C[j][l] = np.sqrt(2) * x.real
274 |                 C[j][l + nbangles[j] // 2] = np.sqrt(2) * x.imag
275 | 
276 |             # Regular wedges
277 |             length_wedge = _floor(4 * M_vert) - _floor(M_vert)
278 |             Y = np.arange(length_wedge) + 1
279 |             first_row = _floor(4 * M_vert) + 2 - _ceil((length_wedge + 1) / 2) \
280 |                 + (length_wedge + 1) % 2 * (quadrant - 2 == quadrant % 2)
281 |             for subl in range(1, nbangles_perquad - 1):
282 |                 l += 1
283 |                 width_wedge = wedge_endpoints[subl +
284 |                                               1] - wedge_endpoints[subl - 1] + 1
285 |                 slope_wedge = ((_floor(4 * M_horiz) + 1) -
286 |                                wedge_endpoints[subl]) / _floor(4 * M_vert)
287 |                 left_line = _round(
288 |                     wedge_endpoints[subl - 1] + slope_wedge * (Y - 1))
289 |                 wrapped_data = np.zeros(
290 |                     (length_wedge, width_wedge), dtype=complex)
291 |                 wrapped_XX = np.zeros((length_wedge, width_wedge), dtype=int)
292 |                 wrapped_YY = np.zeros((length_wedge, width_wedge), dtype=int)
293 |                 first_col = _floor(4 * M_horiz) + 2 - \
294 |                     _ceil((width_wedge + 1) / 2) + \
295 |                     (width_wedge + 1) % 2 * (quadrant - 3 == (quadrant - 3) % 2)
296 |                 for row in Y - 1:
297 |                     cols = left_line[row] + (np.arange(width_wedge) -
298 |                         (left_line[row] - first_col)) % width_wedge - 1
299 |                     new_row = (row - first_row + 1) % length_wedge
300 |                     wrapped_data[new_row, :] = Xhi[row, cols]
301 |                     wrapped_XX[new_row, :] = XX[row, cols]
302 |                     wrapped_YY[new_row, :] = YY[row, cols]
303 |                 slope_wedge_left = ((_floor(4 * M_horiz) + 1) -
304 |                     wedge_midpoints[subl - 1]) / _floor(4 * M_vert)
305 |                 mid_line_left = wedge_midpoints[subl - 1] + \
306 |                     slope_wedge_left * (wrapped_YY - 1)
307 |                 coord_left = 0.5 + _floor(4 * M_vert) / \
308 |                     (wedge_endpoints[subl] - wedge_endpoints[subl - 1]) * \
309 |                     (wrapped_XX - mid_line_left) / \
310 |                     (_floor(4 * M_vert) + 1 - wrapped_YY)
311 |                 slope_wedge_right = ((_floor(4 * M_horiz) + 1) -
312 |                                      wedge_midpoints[subl]) / _floor(4 * M_vert)
313 |                 mid_line_right = wedge_midpoints[subl] + \
314 |                     slope_wedge_right * (wrapped_YY - 1)
315 |                 coord_right = 0.5 + _floor(4 * M_vert) / \
316 |                     (wedge_endpoints[subl + 1] - wedge_endpoints[subl]) * \
317 |                     (wrapped_XX - mid_line_right) / \
318 |                     (_floor(4 * M_vert) + 1 - wrapped_YY)
319 | 
320 |                 wl_left, _ = fdct_wrapping_window(coord_left)
321 |                 _, wr_right = fdct_wrapping_window(coord_right)
322 |                 wrapped_data = wrapped_data * wl_left * wr_right
323 |                 if not is_real:
324 |                     wrapped_data = np.rot90(wrapped_data, -(quadrant - 1))
325 |                     C[j][l] = fftshift(ifft2(ifftshift(wrapped_data))) * \
326 |                         np.sqrt(wrapped_data.size)
327 |                 else:
328 |                     wrapped_data = np.rot90(wrapped_data, -(quadrant - 1))
329 |                     x = fftshift(ifft2(ifftshift(wrapped_data))) * \
330 |                         np.sqrt(wrapped_data.size)
331 |                     C[j][l] = np.sqrt(2) * x.real
332 |                     C[j][l + nbangles[j] // 2] = np.sqrt(2) * x.imag
333 | 
334 |             # Right corner wedge
335 |             l += 1
336 |             width_wedge = 4 * _floor(4 * M_horiz) + 3 - \
337 |                 wedge_endpoints[-1] - wedge_endpoints[-2]
338 |             slope_wedge = ((_floor(4 * M_horiz) + 1) -
339 |                            wedge_endpoints[-1]) / _floor(4 * M_vert)
340 |             left_line = _round(
341 |                 wedge_endpoints[-2] + slope_wedge * (Y_corner - 1))
342 |             wrapped_data = np.zeros((length_corner_wedge, width_wedge),
343 |                                     dtype=complex)
344 |             wrapped_XX = np.zeros((length_corner_wedge, width_wedge), dtype=int)
345 |             wrapped_YY = np.zeros((length_corner_wedge, width_wedge), dtype=int)
346 |             first_row = _floor(4 * M_vert) + 2 - \
347 |                 _ceil((length_corner_wedge + 1) / 2) + \
348 |                 (length_corner_wedge + 1) % 2 * (quadrant - 2 == quadrant % 2)
349 |             first_col = _floor(4 * M_horiz) + 2 - _ceil((width_wedge + 1) / 2) + \
350 |                 (width_wedge + 1) % 2 * (quadrant - 3 == (quadrant - 3) % 2)
351 |             for row in Y_corner - 1:
352 |                 cols = left_line[row] + (np.arange(width_wedge) -
353 |                     (left_line[row] - first_col)) % width_wedge
354 |                 admissible_cols = _round(0.5 * (cols + 2 * _floor(4 * M_horiz)
355 |                     + 1 - np.abs(cols - (2 * _floor(4 * M_horiz) + 1)))) - 1
356 |                 new_row = (row - first_row + 1) % length_corner_wedge
357 |                 wrapped_data[new_row, :] = Xhi[row, admissible_cols] * \
358 |                     (cols <= (2 * _floor(4 * M_horiz) + 1))
359 |                 wrapped_XX[new_row, :] = XX[row, admissible_cols]
360 |                 wrapped_YY[new_row, :] = YY[row, admissible_cols]
361 | 
362 |             slope_wedge_left = ((_floor(4 * M_horiz) + 1) -
363 |                                 wedge_midpoints[-1]) / _floor(4 * M_vert)
364 |             mid_line_left = wedge_midpoints[-1] + \
365 |                 slope_wedge_left * (wrapped_YY - 1)
366 |             coord_left = 0.5 + _floor(4 * M_vert) / \
367 |                 (wedge_endpoints[-1] - wedge_endpoints[-2]) * \
368 |                 (wrapped_XX - mid_line_left) / \
369 |                 (_floor(4 * M_vert) + 1 - wrapped_YY)
370 |             C2 = -1 / (2 * (_floor(4 * M_horiz)) / (wedge_endpoints[-1] - 1) -
371 |                        1 + 1 / (2 * (_floor(4 * M_vert)) /
372 |                                 (first_wedge_endpoint_vert - 1) - 1))
373 |             C1 = -C2 * (2 * (_floor(4 * M_horiz)) /
374 |                         (wedge_endpoints[-1] - 1) - 1)
375 |             wrapped_XX[(wrapped_XX - 1) / _floor(4 * M_horiz) ==
376 |                        (wrapped_YY - 1) / _floor(4 * M_vert)] -= 1
377 |             coord_corner = C1 + C2 * (2 - ((wrapped_XX - 1) /
378 |                 _floor(4 * M_horiz) + (wrapped_YY - 1) / _floor(4 * M_vert))) \
379 |                 / ((wrapped_XX - 1) / _floor(4 * M_horiz) - (wrapped_YY - 1) /
380 |                 _floor(4 * M_vert))
381 |             wl_left, _ = fdct_wrapping_window(coord_left)
382 |             _, wr_right = fdct_wrapping_window(coord_corner)
383 |             wrapped_data = wrapped_data * wl_left * wr_right
384 |             if not is_real:
385 |                 wrapped_data = np.rot90(wrapped_data, -(quadrant - 1))
386 |                 C[j][l] = fftshift(ifft2(ifftshift(wrapped_data))
387 |                                    ) * np.sqrt(wrapped_data.size)
388 |             else:
389 |                 wrapped_data = np.rot90(wrapped_data, -(quadrant - 1))
390 |                 x = fftshift(ifft2(ifftshift(wrapped_data))) * \
391 |                     np.sqrt(wrapped_data.size)
392 |                 C[j][l] = np.sqrt(2) * x.real
393 |                 C[j][l + nbangles[j] // 2] = np.sqrt(2) * x.imag
394 | 
395 |             if quadrant < nbquadrants:
396 |                 Xhi = np.rot90(Xhi)
397 |     # Coarsest wavelet level
398 |     C[0][0] = fftshift(ifft2(ifftshift(Xlow))) * np.sqrt(Xlow.size)
399 |     if is_real:
400 |         C[0][0] = C[0][0].real
401 | 
402 |     return C
403 | 
404 | 
405 | def ifdct_wrapping(C, is_real=False, size=None):
406 |     """
407 |     Inverse Fast Discrete Curvelet Transform via wedge wrapping. This is in fact
408 |     the adjoint, also the pseudo-inverse
409 | 
410 |     :param C: 2-D list of np.ndarray. Array of curvelet coefficients.
411 |     :param is_real: bool. Type of the transform, False for complex-valued
412 |         curvelets and True for real-valued curvelets.
413 |     :param size: tuple of ints. Size of the image to be recovered (not necessary
414 |         if finest = 2)
415 |     :return: 2-D np.ndarray.
416 |     """
417 |     nbscales = len(C)
418 |     nbangles_coarse = len(C[1])
419 |     nbangles = [1] + [nbangles_coarse * 2 ** ((nbscales - i) // 2)
420 |                       for i in range(nbscales, 1, -1)]
421 |     if len(C[-1]) == 1:
422 |         finest = 2
423 |         nbangles[nbscales - 1] = 1
424 |     else:
425 |         finest = 1
426 | 
427 |     if size is None:
428 |         if finest == 1:
429 |             raise ValueError("Require output size.")
430 |         else:
431 |             N1, N2 = C[-1][0].shape
432 |     else:
433 |         N1, N2 = size
434 | 
435 |     M1 = N1 / 3
436 |     M2 = N2 / 3
437 | 
438 |     if finest == 1:
439 |         # Initialization: preparing the lowpass filter at finest scale
440 |         window_length_1 = _floor(2 * M1) - _floor(M1) - (N1 % 3 == 0)
441 |         window_length_2 = _floor(2 * M2) - _floor(M2) - (N2 % 3 == 0)
442 |         coord_1 = np.linspace(0, 1, window_length_1)
443 |         coord_2 = np.linspace(0, 1, window_length_2)
444 |         wl_1, wr_1 = fdct_wrapping_window(coord_1)
445 |         wl_2, wr_2 = fdct_wrapping_window(coord_2)
446 | 
447 |         lowpass_1 = np.concatenate((wl_1, np.ones(2 * _floor(M1) + 1), wr_1))
448 |         if N1 % 3 == 0:
449 |             lowpass_1 = np.concatenate(([0], lowpass_1, [0]))
450 | 
451 |         lowpass_2 = np.concatenate((wl_2, np.ones(2 * _floor(M2) + 1), wr_2))
452 |         if N2 % 3 == 0:
453 |             lowpass_2 = np.concatenate(([0], lowpass_2, [0]))
454 | 
455 |         lowpass = np.outer(lowpass_1, lowpass_2)
456 |         scales = np.arange(nbscales, 1, -1)
457 |     else:
458 |         M1 /= 2
459 |         M2 /= 2
460 | 
461 |         window_length_1 = _floor(2 * M1) - _floor(M1)
462 |         window_length_2 = _floor(2 * M2) - _floor(M2)
463 |         coord_1 = np.linspace(0, 1, window_length_1)
464 |         coord_2 = np.linspace(0, 1, window_length_2)
465 |         wl_1, wr_1 = fdct_wrapping_window(coord_1)
466 |         wl_2, wr_2 = fdct_wrapping_window(coord_2)
467 | 
468 |         lowpass_1 = np.concatenate((wl_1, np.ones(2 * _floor(M1) + 1), wr_1))
469 |         lowpass_2 = np.concatenate((wl_2, np.ones(2 * _floor(M2) + 1), wr_2))
470 |         lowpass = np.outer(lowpass_1, lowpass_2)
471 |         hipass_finest = np.sqrt(1 - lowpass ** 2)
472 | 
473 |         scales = np.arange(nbscales - 1, 1, -1)
474 | 
475 |     bigN1 = 2 * _floor(2 * M1) + 1
476 |     bigN2 = 2 * _floor(2 * M2) + 1
477 |     X = np.zeros((bigN1, bigN2), dtype=complex)
478 | 
479 |     # Loop: pyramidal reconstruction
480 | 
481 |     Xj_topleft_1 = 1
482 |     Xj_topleft_2 = 1
483 |     for j in scales - 1:
484 |         M1 /= 2
485 |         M2 /= 2
486 | 
487 |         window_length_1 = _floor(2 * M1) - _floor(M1)
488 |         window_length_2 = _floor(2 * M2) - _floor(M2)
489 |         coord_1 = np.linspace(0, 1, window_length_1)
490 |         coord_2 = np.linspace(0, 1, window_length_2)
491 |         wl_1, wr_1 = fdct_wrapping_window(coord_1)
492 |         wl_2, wr_2 = fdct_wrapping_window(coord_2)
493 | 
494 |         lowpass_1 = np.concatenate((wl_1, np.ones(2 * _floor(M1) + 1), wr_1))
495 |         lowpass_2 = np.concatenate((wl_2, np.ones(2 * _floor(M2) + 1), wr_2))
496 |         lowpass_next = np.outer(lowpass_1, lowpass_2)
497 |         hipass = np.sqrt(1 - lowpass_next ** 2)
498 |         Xj = np.zeros((2 * _floor(4 * M1) + 1, 2 * _floor(4 * M2) + 1),
499 |                       dtype=complex)
500 | 
501 |         # Loop: angles
502 |         l = -1
503 |         nbquadrants = 2 + 2 * (not is_real)
504 |         nbangles_perquad = nbangles[j] // 4
505 |         for quadrant in range(1, nbquadrants + 1):
506 |             M_horiz = (M1, M2)[quadrant % 2]
507 |             M_vert = (M2, M1)[quadrant % 2]
508 |             wedge_ticks_left = _round(np.linspace(0, 1, nbangles_perquad + 1) *
509 |                                       _floor(4 * M_horiz) + 1)
510 |             wedge_ticks_right = 2 * _floor(4 * M_horiz) + 2 - wedge_ticks_left
511 |             if nbangles_perquad % 2:
512 |                 wedge_ticks = np.concatenate(
513 |                     (wedge_ticks_left, wedge_ticks_right[::-1]))
514 |             else:
515 |                 wedge_ticks = np.concatenate(
516 |                     (wedge_ticks_left, wedge_ticks_right[-2::-1]))
517 |             wedge_endpoints = wedge_ticks[1:-1:2]
518 |             wedge_midpoints = (wedge_endpoints[:-1] + wedge_endpoints[1:]) / 2
519 | 
520 |             # Left corner wedge
521 |             l += 1
522 |             first_wedge_endpoint_vert = _round(_floor(4 * M_vert) /
523 |                                                nbangles_perquad + 1)
524 |             length_corner_wedge = _floor(4 * M_vert) - _floor(M_vert) + \
525 |                 _ceil(first_wedge_endpoint_vert / 4)
526 |             Y_corner = np.arange(length_corner_wedge) + 1
527 |             [XX, YY] = np.meshgrid(np.arange(1, 2 * _floor(4 * M_horiz) + 2),
528 |                                    Y_corner)
529 |             width_wedge = wedge_endpoints[1] + wedge_endpoints[0] - 1
530 |             slope_wedge = (_floor(4 * M_horiz) + 1 -
531 |                            wedge_endpoints[0]) / _floor(4 * M_vert)
532 |             left_line = _round(2 - wedge_endpoints[0] +
533 |                                slope_wedge * (Y_corner - 1))
534 |             wrapped_XX = np.zeros((length_corner_wedge, width_wedge), dtype=int)
535 |             wrapped_YY = np.zeros((length_corner_wedge, width_wedge), dtype=int)
536 |             first_row = _floor(4 * M_vert) + \
537 |                 2 - _ceil((length_corner_wedge + 1) / 2) + \
538 |                 (length_corner_wedge + 1) % 2 * (quadrant - 2 == quadrant % 2)
539 |             first_col = _floor(4 * M_horiz) + 2 - _ceil((width_wedge + 1) / 2) \
540 |                 + (width_wedge + 1) % 2 * (quadrant - 3 == (quadrant - 3) % 2)
541 |             for row in Y_corner - 1:
542 |                 cols = left_line[row] + (np.arange(width_wedge) -
543 |                     (left_line[row] - first_col)) % width_wedge
544 |                 new_row = (row - first_row + 1) % length_corner_wedge
545 |                 admissible_cols = _round(0.5 * (cols + 1 + abs(cols - 1))) - 1
546 |                 wrapped_XX[new_row, :] = XX[row, admissible_cols]
547 |                 wrapped_YY[new_row, :] = YY[row, admissible_cols]
548 | 
549 |             slope_wedge_right = (_floor(4 * M_horiz) + 1 - wedge_midpoints[0]) \
550 |                 / _floor(4 * M_vert)
551 |             mid_line_right = wedge_midpoints[0] + \
552 |                 slope_wedge_right * (wrapped_YY - 1)
553 |             coord_right = 0.5 + _floor(4 * M_vert) / (wedge_endpoints[1] -
554 |                 wedge_endpoints[0]) * (wrapped_XX - mid_line_right) / \
555 |                 (_floor(4 * M_vert) + 1 - wrapped_YY)
556 |             C2 = 1 / (1 / (2 * (_floor(4 * M_horiz)) /
557 |                 (wedge_endpoints[0] - 1) - 1) + 1 / (2 * (_floor(4 * M_vert))
558 |                 / (first_wedge_endpoint_vert - 1) - 1))
559 |             C1 = C2 / (2 * (_floor(4 * M_vert)) /
560 |                        (first_wedge_endpoint_vert - 1) - 1)
561 |             wrapped_XX[(wrapped_XX - 1) / _floor(4 * M_horiz) +
562 |                        (wrapped_YY - 1) / _floor(4 * M_vert) == 2] += 1
563 |             coord_corner = C1 + C2 * ((wrapped_XX - 1) / _floor(4 * M_horiz) -
564 |                 (wrapped_YY - 1) / _floor(4 * M_vert)) / (2 - ((wrapped_XX - 1)
565 |                 / _floor(4 * M_horiz) + (wrapped_YY - 1) / _floor(4 * M_vert)))
566 |             wl_left, _ = fdct_wrapping_window(coord_corner)
567 |             _, wr_right = fdct_wrapping_window(coord_right)
568 | 
569 |             if not is_real:
570 |                 wrapped_data = fftshift(fft2(ifftshift(C[j][l]))) / \
571 |                     np.sqrt(C[j][l].size)
572 |                 wrapped_data = np.rot90(wrapped_data, quadrant - 1)
573 |             else:
574 |                 x = C[j][l] + 1j * C[j][l + nbangles[j] // 2]
575 |                 wrapped_data = fftshift(fft2(ifftshift(x))) / \
576 |                     np.sqrt(x.size * 2)
577 |                 wrapped_data = np.rot90(wrapped_data, quadrant - 1)
578 | 
579 |             wrapped_data = wrapped_data * wl_left * wr_right
580 |             # Unwrapping data
581 |             for row in Y_corner - 1:
582 |                 cols = left_line[row] + (np.arange(width_wedge) -
583 |                     (left_line[row] - first_col)) % width_wedge
584 |                 admissible_cols = _round(0.5 * (cols + 1 + abs(cols - 1))) - 1
585 |                 new_row = (row - first_row + 1) % length_corner_wedge
586 |                 Xj[row, admissible_cols] += wrapped_data[new_row, :]
587 |                 # We use the following property: in an assignment A(B) = C where
588 |                 # B and C are vectors, if some value x repeats in B, then the
589 |                 # last occurrence of x is the one corresponding to the eventual
590 |                 # assignment.
591 | 
592 |             # Regular wedges
593 |             length_wedge = _floor(4 * M_vert) - _floor(M_vert)
594 |             Y = np.arange(length_wedge) + 1
595 |             first_row = _floor(4 * M_vert) + 2 - _ceil((length_wedge + 1) / 2) \
596 |                 + (length_wedge + 1) % 2 * (quadrant - 2 == quadrant % 2)
597 |             for subl in range(1, nbangles_perquad - 1):
598 |                 l += 1
599 |                 width_wedge = wedge_endpoints[subl + 1] - \
600 |                     wedge_endpoints[subl - 1] + 1
601 |                 slope_wedge = ((_floor(4 * M_horiz) + 1) -
602 |                                wedge_endpoints[subl]) / _floor(4 * M_vert)
603 |                 left_line = _round(wedge_endpoints[subl - 1] +
604 |                                    slope_wedge * (Y - 1))
605 |                 wrapped_XX = np.zeros((length_wedge, width_wedge), dtype=int)
606 |                 wrapped_YY = np.zeros((length_wedge, width_wedge), dtype=int)
607 |                 first_col = _floor(4 * M_horiz) + 2 - \
608 |                     _ceil((width_wedge + 1) / 2) + \
609 |                     (width_wedge + 1) % 2 * (quadrant - 3 == (quadrant - 3) % 2)
610 |                 for row in Y - 1:
611 |                     cols = left_line[row] + (np.arange(width_wedge) -
612 |                         (left_line[row] - first_col)) % width_wedge - 1
613 |                     new_row = (row - first_row + 1) % length_wedge
614 |                     wrapped_XX[new_row, :] = XX[row, cols]
615 |                     wrapped_YY[new_row, :] = YY[row, cols]
616 | 
617 |                 slope_wedge_left = ((_floor(4 * M_horiz) + 1) -
618 |                     wedge_midpoints[subl - 1]) / _floor(4 * M_vert)
619 |                 mid_line_left = wedge_midpoints[subl - 1] + \
620 |                     slope_wedge_left * (wrapped_YY - 1)
621 |                 coord_left = 0.5 + _floor(4 * M_vert) / (wedge_endpoints[subl]
622 |                     - wedge_endpoints[subl - 1]) * \
623 |                     (wrapped_XX - mid_line_left) / \
624 |                     (_floor(4 * M_vert) + 1 - wrapped_YY)
625 |                 slope_wedge_right = ((_floor(4 * M_horiz) + 1) -
626 |                                      wedge_midpoints[subl]) / _floor(4 * M_vert)
627 |                 mid_line_right = wedge_midpoints[subl] + \
628 |                     slope_wedge_right * (wrapped_YY - 1)
629 |                 coord_right = 0.5 + _floor(4 * M_vert) / \
630 |                     (wedge_endpoints[subl + 1] - wedge_endpoints[subl]) * \
631 |                     (wrapped_XX - mid_line_right) / \
632 |                     (_floor(4 * M_vert) + 1 - wrapped_YY)
633 |                 wl_left, _ = fdct_wrapping_window(coord_left)
634 |                 _, wr_right = fdct_wrapping_window(coord_right)
635 |                 if not is_real:
636 |                     wrapped_data = fftshift(fft2(ifftshift(C[j][l]))) / \
637 |                         np.sqrt(C[j][l].size)
638 |                     wrapped_data = np.rot90(wrapped_data, quadrant - 1)
639 |                 else:
640 |                     x = C[j][l] + 1j * C[j][l + nbangles[j] // 2]
641 |                     wrapped_data = fftshift(
642 |                         fft2(ifftshift(x))) / np.sqrt(x.size * 2)
643 |                     wrapped_data = np.rot90(wrapped_data, quadrant - 1)
644 | 
645 |                 wrapped_data = wrapped_data * wl_left * wr_right
646 | 
647 |                 # Unwrapping data
648 |                 for row in Y - 1:
649 |                     cols = left_line[row] + (np.arange(width_wedge) -
650 |                         (left_line[row] - first_col)) % width_wedge - 1
651 |                     new_row = (row + 1 - first_row) % length_wedge
652 |                     Xj[row, cols] += wrapped_data[new_row, :]
653 | 
654 |             # Right corner wedge
655 |             l += 1
656 |             width_wedge = 4 * _floor(4 * M_horiz) + 3 - \
657 |                 wedge_endpoints[-1] - wedge_endpoints[-2]
658 |             slope_wedge = ((_floor(4 * M_horiz) + 1) -
659 |                            wedge_endpoints[-1]) / _floor(4 * M_vert)
660 |             left_line = _round(
661 |                 wedge_endpoints[-2] + slope_wedge * (Y_corner - 1))
662 |             wrapped_XX = np.zeros(
663 |                 (length_corner_wedge, width_wedge), dtype=int)
664 |             wrapped_YY = np.zeros(
665 |                 (length_corner_wedge, width_wedge), dtype=int)
666 |             first_row = _floor(4 * M_vert) + 2 - \
667 |                 _ceil((length_corner_wedge + 1) / 2) + \
668 |                 (length_corner_wedge + 1) % 2 * (quadrant - 2 == quadrant % 2)
669 |             first_col = _floor(4 * M_horiz) + 2 - _ceil((width_wedge + 1) / 2) \
670 |                 + (width_wedge + 1) % 2 * (quadrant - 3 == (quadrant - 3) % 2)
671 |             for row in Y_corner - 1:
672 |                 cols = left_line[row] + (np.arange(width_wedge) -
673 |                     (left_line[row] - first_col)) % width_wedge
674 |                 admissible_cols = _round(0.5 * (cols + 2 * _floor(4 * M_horiz)
675 |                     + 1 - np.abs(cols - (2 * _floor(4 * M_horiz) + 1)))) - 1
676 |                 new_row = (row - first_row + 1) % length_corner_wedge
677 |                 wrapped_XX[new_row, :] = XX[row, admissible_cols]
678 |                 wrapped_YY[new_row, :] = YY[row, admissible_cols]
679 | 
680 |             slope_wedge_left = ((_floor(4 * M_horiz) + 1) -
681 |                                 wedge_midpoints[-1]) / _floor(4 * M_vert)
682 |             mid_line_left = wedge_midpoints[-1] + \
683 |                 slope_wedge_left * (wrapped_YY - 1)
684 |             coord_left = 0.5 + _floor(4 * M_vert) / \
685 |                 (wedge_endpoints[-1] - wedge_endpoints[-2]) * \
686 |                 (wrapped_XX - mid_line_left) / \
687 |                 (_floor(4 * M_vert) + 1 - wrapped_YY)
688 |             C2 = -1 / (2 * (_floor(4 * M_horiz)) / (wedge_endpoints[-1] - 1)
689 |                        - 1 + 1 / (2 * (_floor(4 * M_vert)) /
690 |                                   (first_wedge_endpoint_vert - 1) - 1))
691 |             C1 = -C2 * (2 * (_floor(4 * M_horiz)) /
692 |                         (wedge_endpoints[-1] - 1) - 1)
693 | 
694 |             wrapped_XX[(wrapped_XX - 1) / _floor(4 * M_horiz) ==
695 |                        (wrapped_YY - 1) / _floor(4 * M_vert)] -= 1
696 |             coord_corner = C1 + C2 * (2 - ((wrapped_XX - 1) /
697 |                 _floor(4 * M_horiz) + (wrapped_YY - 1) / _floor(4 * M_vert))) \
698 |                 / ((wrapped_XX - 1) / _floor(4 * M_horiz) - (wrapped_YY - 1) /
699 |                 _floor(4 * M_vert))
700 |             wl_left, _ = fdct_wrapping_window(coord_left)
701 |             _, wr_right = fdct_wrapping_window(coord_corner)
702 | 
703 |             if not is_real:
704 |                 wrapped_data = fftshift(
705 |                     fft2(ifftshift(C[j][l]))) / np.sqrt(C[j][l].size)
706 |                 wrapped_data = np.rot90(wrapped_data, quadrant - 1)
707 |             else:
708 |                 x = C[j][l] + 1j * C[j][l + nbangles[j] // 2]
709 |                 wrapped_data = fftshift(
710 |                     fft2(ifftshift(x))) / np.sqrt(x.size * 2)
711 |                 wrapped_data = np.rot90(wrapped_data, quadrant - 1)
712 | 
713 |             wrapped_data = wrapped_data * wl_left * wr_right
714 | 
715 |             # Unwrapping data
716 |             for row in Y_corner - 1:
717 |                 cols = left_line[row] + (np.arange(width_wedge) -
718 |                     (left_line[row] - first_col)) % width_wedge
719 |                 admissible_cols = _round(1 / 2 * (cols + 2 * _floor(4 * M_horiz)
720 |                     + 1 - abs(cols - (2 * _floor(4 * M_horiz) + 1)))) - 1
721 |                 new_row = (row + 1 - first_row) % length_corner_wedge
722 |                 Xj[row, np.flip(admissible_cols)] += wrapped_data[new_row, ::-1]
723 |                 # We use the following property: in an assignment A[B] = C where
724 |                 # B and C are vectors, if some value x repeats in B, then the
725 |                 # last occurrence of x is the one corresponding to the eventual
726 |                 # assignment.
727 | 
728 |             Xj = np.rot90(Xj)
729 | 
730 |         Xj *= lowpass
731 |         Xj_index1 = np.arange(-_floor(2 * M1),
732 |                               _floor(2 * M1) + 1) + _floor(4 * M1)
733 |         Xj_index2 = np.arange(-_floor(2 * M2),
734 |                               _floor(2 * M2) + 1) + _floor(4 * M2)
735 | 
736 |         Xj[np.ix_(Xj_index1, Xj_index2)] *= hipass
737 | 
738 |         loc_1 = Xj_topleft_1 + np.arange(2 * _floor(4 * M1) + 1) - 1
739 |         loc_2 = Xj_topleft_2 + np.arange(2 * _floor(4 * M2) + 1) - 1
740 |         X[np.ix_(loc_1, loc_2)] += Xj
741 | 
742 |         # Preparing for loop reentry or exit
743 |         Xj_topleft_1 += _floor(4 * M1) - _floor(2 * M1)
744 |         Xj_topleft_2 += _floor(4 * M2) - _floor(2 * M2)
745 | 
746 |         lowpass = lowpass_next
747 | 
748 |     if is_real:
749 |         Y = X
750 |         X = np.rot90(X, 2)
751 |         X = X + np.conj(Y)
752 | 
753 |     # Coarsest wavelet level
754 |     M1 = M1 / 2
755 |     M2 = M2 / 2
756 |     Xj = fftshift(fft2(ifftshift(C[0][0]))) / np.sqrt(C[0][0].size)
757 |     loc_1 = Xj_topleft_1 + np.arange(2 * _floor(4 * M1) + 1) - 1
758 |     loc_2 = Xj_topleft_2 + np.arange(2 * _floor(4 * M2) + 1) - 1
759 |     X[np.ix_(loc_1, loc_2)] += Xj * lowpass
760 | 
761 |     # Finest level
762 |     M1 = N1 / 3
763 |     M2 = N2 / 3
764 |     if finest == 1:
765 |         # Folding back onto N1-by-N2 matrix
766 |         shift_1 = _floor(2 * M1) - _floor(N1 / 2)
767 |         shift_2 = _floor(2 * M2) - _floor(N2 / 2)
768 |         Y = X[:, np.arange(N2) + shift_2]
769 |         Y[:, np.arange(N2 - shift_2, N2)] += X[:, :shift_2]
770 |         Y[:, :shift_2] += X[:, N2 + shift_2:N2 + 2 * shift_2]
771 |         X = Y[np.arange(N1) + shift_1, :]
772 |         X[np.arange(N1 - shift_1, N1), :] += Y[:shift_1, :]
773 |         X[:shift_1, :] += Y[N1 + shift_1:N1 + 2 * shift_1, :]
774 |     else:
775 |         # Extension to a N1-by-N2 matrix
776 |         Y = fftshift(fft2(ifftshift(C[nbscales - 1][0]))) / \
777 |             np.sqrt(C[nbscales - 1][0].size)
778 |         X_topleft_1 = _ceil((N1 + 1) / 2) - _floor(M1)
779 |         X_topleft_2 = _ceil((N2 + 1) / 2) - _floor(M2)
780 |         loc_1 = X_topleft_1 + np.arange(2 * _floor(M1) + 1) - 1
781 |         loc_2 = X_topleft_2 + np.arange(2 * _floor(M2) + 1) - 1
782 |         Y[np.ix_(loc_1, loc_2)] = Y[np.ix_(loc_1, loc_2)] * hipass_finest + X
783 |         X = Y
784 | 
785 |     x = fftshift(ifft2(ifftshift(X))) * np.sqrt(X.size)
786 |     if is_real:
787 |         x = np.real(x)
788 | 
789 |     return x
790 | 


--------------------------------------------------------------------------------
/daspy/advanced_tools/strain2vel.py:
--------------------------------------------------------------------------------
  1 | # Purpose: Convert strain rate data to velocity
  2 | # Author: Minzhe Hu
  3 | # Date: 2024.3.10
  4 | # Email: hmz2018@mail.ustc.edu.cn
  5 | import numpy as np
  6 | from numpy.fft import irfft2, ifftshift
  7 | from scipy.signal import hilbert
  8 | from daspy.basic_tools.freqattributes import next_pow_2, fk_transform
  9 | from daspy.basic_tools.preprocessing import padding, cosine_taper
 10 | from daspy.basic_tools.filter import bandpass
 11 | from daspy.advanced_tools.fdct import fdct_wrapping, ifdct_wrapping
 12 | from daspy.advanced_tools.denoising import _velocity_bin
 13 | from daspy.advanced_tools.decomposition import fk_fan_mask
 14 | 
 15 | 
 16 | def fk_rescaling(data, dx, fs, taper=(0.02, 0.05), pad='default', fmax=None,
 17 |                  kmin=(1 / 2000, 1 / 3000), vmax=(15000, 30000), edge=0.2,
 18 |                  turning=None, verbose=False):
 19 |     """
 20 |     Convert strain/strain rate to velocity/acceleration by fk rescaling.
 21 | 
 22 |     :param data: numpy.ndarray. Data to do fk rescaling.
 23 |     :param dx: Channel interval in m.
 24 |     :param fs: Sampling rate in Hz.
 25 |     :param taper: float or sequence of floats. Each float means decimal
 26 |         percentage of Tukey taper for corresponding dimension (ranging from 0 to
 27 |         1). Default is 0.1 which tapers 5% from the beginning and 5% from the
 28 |         end.
 29 |     :param pad: Pad the data or not. It can be float or sequence of floats. Each
 30 |         float means padding percentage before FFT for corresponding dimension.
 31 |         If set to 0.1 will pad 5% before the beginning and after the end.
 32 |         'default' means pad both dimensions to next power of 2. None or False
 33 |         means don't pad data before or during Fast Fourier Transform.
 34 |     :param fmax, kmin, vmax: float or or sequence of 2 floats. Sequence of 2
 35 |         floats represents the start and end of taper. Setting these parameters
 36 |         can reduce artifacts.
 37 |     :param edge: float. The width of fan mask taper edge.
 38 |     :param turning: Sequence of int. Channel number of turning points.
 39 |     :param verbose: If True, return converted data, f-k spectrum, frequency
 40 |         sequence, wavenumber sequence and f-k mask.
 41 |     :return: Converted data and some variables in the process if verbose==True.
 42 |     """
 43 |     if turning is not None:
 44 |         data_vel = np.zeros_like(data)
 45 |         start_ch = [0, *turning]
 46 |         end_ch = [*turning, len(data)]
 47 |         for (s, e) in zip(start_ch, end_ch):
 48 |             data_vel[s:e] = fk_rescaling(data[s:e], dx, fs, taper=taper,
 49 |                                          pad=pad, fmax=fmax, kmin=kmin,
 50 |                                          vmax=vmax, edge=edge, verbose=False)
 51 |     else:
 52 |         data_tp = cosine_taper(data, taper)
 53 | 
 54 |         if pad == 'default':
 55 |             nch, nt = data.shape
 56 |             dn = (next_pow_2(nch) - nch, next_pow_2(nt) - nt)
 57 |             nfft = None
 58 |         elif pad is None or pad is False:
 59 |             dn = 0
 60 |             nfft = None
 61 |         else:
 62 |             dn = np.round(np.array(pad) * data.shape).astype(int)
 63 |             nfft = 'default'
 64 | 
 65 |         data_pd = padding(data_tp, dn)
 66 |         nch, nt = data_pd.shape
 67 | 
 68 |         fk, f, k = fk_transform(data_pd, dx, fs, taper=taper, nfft=nfft)
 69 | 
 70 |         ff = np.tile(f, (len(k), 1))
 71 |         kk = np.tile(k, (len(f), 1)).T
 72 |         vv = - np.divide(ff, kk, out=np.ones_like(ff) * 1e10, where=kk != 0)
 73 | 
 74 |         mask = fk_fan_mask(f, k, fmax=fmax, kmin=kmin, vmax=vmax, edge=edge) * vv
 75 |         mask[kk == 0] = 0
 76 | 
 77 |         data_vel = irfft2(ifftshift(fk * mask, axes=0)).real[:nch, :nt]
 78 |         data_vel = padding(data_vel, dn, reverse=True)
 79 | 
 80 |         if verbose:
 81 |             return data_vel, fk, f, k, mask
 82 |     return data_vel
 83 | 
 84 | 
 85 | def curvelet_conversion(data, dx, fs, pad=0.3, scale_begin=2, nbscales=None,
 86 |                         nbangles=16, turning=None):
 87 |     """
 88 |     Use curevelet transform to convert strain/strain rate to
 89 |     velocity/acceleration. {Yang et al. , 2023, Geophys. Res. Lett.}
 90 | 
 91 |     :param data: numpy.ndarray. Data to convert.
 92 |     :param dx: Channel interval in m.
 93 |     :param fs: Sampling rate in Hz.
 94 |     :param pad: float or sequence of floats. Each float means padding percentage
 95 |         before FFT for corresponding dimension. If set to 0.1 will pad 5% before
 96 |         the beginning and after the end.
 97 |     :param scale_begin: int. The beginning scale to do conversion.
 98 |     :param nbscales: int. Number of scales including the coarsest wavelet level.
 99 |         Default set to ceil(log2(min(M,N)) - 3).
100 |     :param nbangles: int. Number of angles at the 2nd coarsest level,
101 |         minimum 8, must be a multiple of 4.
102 |     :param turning: Sequence of int. Channel number of turning points.
103 |     :return: numpy.ndarray. Converted data.
104 |     """
105 |     if turning is not None:
106 |         data_vel = np.zeros_like(data)
107 |         start_ch = [0, *turning]
108 |         end_ch = [*turning, len(data)]
109 |         for (s, e) in zip(start_ch, end_ch):
110 |             data_vel[s:e] = curvelet_conversion(data[s:e], dx, fs, pad=pad,
111 |                                                 scale_begin=scale_begin,
112 |                                                 nbscales=nbscales,
113 |                                                 nbangles=nbangles, turning=None)
114 |     else:
115 |         if pad is None or pad is False:
116 |             pad = 0
117 |         dn = np.round(np.array(pad) * data.shape).astype(int)
118 |         data_pd = padding(data, dn)
119 | 
120 |         C = fdct_wrapping(data_pd, is_real=True, finest=1, nbscales=nbscales,
121 |                           nbangles_coarse=nbangles)
122 | 
123 |         # rescale with velocity
124 |         np.seterr(divide='ignore')
125 |         for s in range(0, scale_begin - 1):
126 |             for w in range(len(C[s])):
127 |                 C[s][w] *= 0
128 | 
129 |         for s in range(scale_begin - 1, len(C)):
130 |             nbangles = len(C[s])
131 |             velocity = _velocity_bin(nbangles, fs, dx)
132 |             factors = np.mean(velocity, axis=1)
133 |             for w in range(nbangles):
134 |                 if abs(factors[w]) == np.inf:
135 |                     factors[w] = abs(velocity[w]).min() * \
136 |                         np.sign(velocity[w, 0]) * 2
137 |                 C[s][w] *= factors[w]
138 | 
139 |         data_vel = ifdct_wrapping(C, is_real=True, size=data_pd.shape)
140 |         data_vel = padding(data_vel, dn, reverse=True)
141 | 
142 |     return data_vel
143 | 
144 | 
145 | def slowness(g, dx, fs, slm, sls, swin=2):
146 |     """
147 |     Estimate the slowness time series by calculate semblance.
148 |     {Lior et al., 2021, Solid Earth}
149 | 
150 |     :param g: 2-dimensional array. time series of adjacent channels used for
151 |         estimating slowness
152 |     :param dx: float. Spatical sampling rate (in m)
153 |     :param fs: float. Sampling rate of records
154 |     :param slm: float. Slowness x max
155 |     :param sls: float. Slowness step
156 |     :param swin: int. Slowness smooth window
157 |     :return: Sequences of slowness and sembalence.
158 |     """
159 |     L = (len(g) - 1) // 2
160 |     nt = len(g[0])
161 |     h = np.imag(hilbert(g))
162 |     grdpnt = round(slm / sls)
163 |     sem = np.zeros((2 * grdpnt + 1, nt))
164 |     gap = round(slm * dx * L * fs)
165 | 
166 |     h_ex = np.zeros((len(g), nt + 2 * gap))
167 |     h_ex[:, gap:-gap] = h
168 |     g_ex = np.zeros((len(g), nt + 2 * gap))
169 |     g_ex[:, gap:-gap] = g
170 | 
171 |     for i in range(2 * grdpnt + 1):
172 |         px = (i - grdpnt) * sls
173 |         if abs(px) < 1e-5:
174 |             continue
175 |         gt = np.zeros(g.shape)
176 |         ht = np.zeros(h.shape)
177 |         for j in range(-L, L):
178 |             shift = round(px * j * dx * fs)
179 |             gt[j + L] = g_ex[j + L, gap + shift:gap + shift + nt]
180 |             ht[j + L] = h_ex[j + L, gap + shift:gap + shift + nt]
181 |         sem[i] = (np.sum(gt, axis=0)**2 + np.sum(ht, axis=0)**2) / \
182 |             np.sum(gt**2 + ht**2, axis=0) / (2 * L + 1)
183 |     p = (np.argmax(sem, axis=0) - grdpnt) * sls
184 |     # smooth P
185 |     for i in range(swin, nt - swin):
186 |         win = p[i - swin:i + swin + 1]
187 |         sign = np.sign(sum(np.sign(win)))
188 |         win = [px for px in win if np.sign(px) == sign]
189 |         p[i] = np.mean(win)
190 | 
191 |     return p, sem
192 | 
193 | 
194 | def slant_stacking(data, dx, fs, L=None, slm=0.01,
195 |                    sls=0.000125, frqlow=0.1, frqhigh=15, turning=None,
196 |                    channel='all'):
197 |     """
198 |     Convert strain to velocity based on slant-stack.
199 | 
200 |     :param data: 2-dimensional array. Axis 0 is channel number and axis 1 is
201 |         time series
202 |     :param dx: float. Spatical sampling rate (in m)
203 |     :param L: int. the number of adjacent channels over which slowness is
204 |         estimated
205 |     :param slm: float. Slowness x max
206 |     :param sls: float. slowness step
207 |     :param freqmin: Pass band low corner frequency.
208 |     :param freqmax: Pass band high corner frequency.
209 |     :param turning: Sequence of int. Channel number of turning points.
210 |     :param channel: int or list or 'all'. convert a certain channel number /
211 |         certain channel range / all channels.
212 |     :return: Converted velocity data
213 |     """
214 |     if L is None:
215 |         L = round(50 / dx)
216 | 
217 |     nch, nt = data.shape
218 |     if isinstance(channel, str) and channel == 'all':
219 |         channel = list(range(nch))
220 |     elif isinstance(channel, int):
221 |         channel = [channel]
222 | 
223 |     if turning is not None:
224 |         data_vel = np.zeros((0, len(data[0])))
225 |         start_ch = [0, *turning]
226 |         end_ch = [*turning, len(data)]
227 |         for (s, e) in zip(start_ch, end_ch):
228 |             channel_seg = [ch-s for ch in range(s,e) if ch in channel]
229 |             if len(channel_seg):
230 |                 d_vel = slant_stacking(data[s:e], dx, fs, L=L, slm=slm, sls=sls,
231 |                                        frqlow=frqlow, frqhigh=frqhigh,
232 |                                        turning=None, channel=channel_seg)
233 |                 data_vel = np.vstack((data_vel, d_vel))
234 |     else:
235 |         data_ex = padding(data, (2 * L, 0))
236 |         swin = int(max((1 / frqhigh * fs) // 2, 1))
237 |         data_vel = np.zeros((len(channel), nt))
238 |         for i, ch in enumerate(channel):
239 |             p, _ = slowness(data_ex[ch:ch + 2 * L + 1], dx, fs, slm, sls,
240 |                             swin=swin)
241 |             data_vel[i] = bandpass(data[ch] / p, fs=fs, freqmin=frqlow,
242 |                                    freqmax=frqhigh)
243 | 
244 |     return data_vel
245 | 


--------------------------------------------------------------------------------
/daspy/basic_tools/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HMZ-03/DASPy/9f308c93d7ad8f4e572705827b03c5d0fec3eac2/daspy/basic_tools/__init__.py


--------------------------------------------------------------------------------
/daspy/basic_tools/filter.py:
--------------------------------------------------------------------------------
  1 | # Purpose: Filter the waveform
  2 | # Author: Minzhe Hu
  3 | # Date: 2024.10.16
  4 | # Email: hmz2018@mail.ustc.edu.cn
  5 | # Modified from https://docs.obspy.org/_modules/obspy/signal/filter.html
  6 | import warnings
  7 | import numpy as np
  8 | from scipy.signal import cheb2ord, cheby2, hilbert, iirfilter, zpk2sos, sosfilt
  9 | 
 10 | 
 11 | def bandpass(data, fs, freqmin, freqmax, corners=4, zi=None, zerophase=True):
 12 |     """
 13 |     Filter data from 'freqmin' to 'freqmax' using Butterworth bandpass filter of
 14 |     'corners' corners.
 15 | 
 16 |     :param data: numpy.ndarray. Data to filter.
 17 |     :param fs: Sampling rate in Hz.
 18 |     :param freqmin: Pass band low corner frequency.
 19 |     :param freqmax: Pass band high corner frequency.
 20 |     :param corners: Filter corners / order.
 21 |     :param zi : None, 0, or array_like. Initial conditions for the cascaded
 22 |         filter delays. It is a vector of shape (n_sections, nch, 2). Set to 0 to
 23 |         trigger a output of the final filter delay values.
 24 |     :param zerophase: If True, apply filter once forwards and once backwards.
 25 |         This results in twice the number of corners but zero phase shift in
 26 |         the resulting filtered data. Only valid when zi is None.
 27 |     :return: Filtered data and the final filter delay values (if zi is not
 28 |         None).
 29 |     """
 30 |     if len(data.shape) == 1:
 31 |         data = data[np.newaxis, :]
 32 |     fe = 0.5 * fs
 33 |     low = freqmin / fe
 34 |     high = freqmax / fe
 35 |     # raise for some bad scenarios
 36 |     if high - 1.0 > -1e-6:
 37 |         msg = ('Selected high corner frequency ({}) of bandpass is at or ' +
 38 |                'above Nyquist ({}). Applying a high-pass instead.').format(
 39 |             freqmax, fe)
 40 |         warnings.warn(msg)
 41 |         return highpass(data, freq=freqmin, fs=fs, corners=corners,
 42 |                         zerophase=zerophase)
 43 |     if low > 1:
 44 |         msg = 'Selected low corner frequency is above Nyquist.'
 45 |         raise ValueError(msg)
 46 |     z, p, k = iirfilter(corners, [low, high], btype='band', ftype='butter',
 47 |                         output='zpk')
 48 |     sos = zpk2sos(z, p, k)
 49 |     if zi is None:
 50 |         data_flt = sosfilt(sos, data)
 51 |         if zerophase:
 52 |             data_flt = sosfilt(sos, data_flt[:, ::-1])[:, ::-1]
 53 |         return data_flt
 54 |     elif isinstance(zi, (int, float)):
 55 |         zi = np.ones((sos.shape[0], len(data), 2)) * zi
 56 |     
 57 |     data_flt, zf = sosfilt(sos, data, zi=zi)
 58 |     return data_flt, zf
 59 | 
 60 | 
 61 | def bandstop(data, fs, freqmin, freqmax, corners=4, zi=None, zerophase=False):
 62 |     """
 63 |     Filter data removing data between frequencies 'freqmin' and 'freqmax' using
 64 |     Butterworth bandstop filter of 'corners' corners.
 65 | 
 66 |     :param data: numpy.ndarray. Data to filter.
 67 |     :param fs: Sampling rate in Hz.
 68 |     :param freqmin: Stop band low corner frequency.
 69 |     :param freqmax: Stop band high corner frequency.
 70 |     :param corners: Filter corners / order.
 71 |     :param zi : None, 0, or array_like. Initial conditions for the cascaded
 72 |         filter delays. It is a vector of shape (n_sections, nch, 2). Set to 0 to
 73 |         trigger a output of the final filter delay values.
 74 |     :param zerophase: If True, apply filter once forwards and once backwards.
 75 |         This results in twice the number of corners but zero phase shift in
 76 |         the resulting filtered data. Only valid when zi is None.
 77 |     :return: Filtered data and the final filter delay values (if zi is not
 78 |         None).
 79 |     """
 80 |     if len(data.shape) == 1:
 81 |         data = data[np.newaxis, :]
 82 |     fe = 0.5 * fs
 83 |     low = freqmin / fe
 84 |     high = freqmax / fe
 85 |     # raise for some bad scenarios
 86 |     if high > 1:
 87 |         high = 1.0
 88 |         msg = 'Selected high corner frequency is above Nyquist. Setting ' + \
 89 |               'Nyquist as high corner.'
 90 |         warnings.warn(msg)
 91 |     if low > 1:
 92 |         msg = 'Selected low corner frequency is above Nyquist.'
 93 |         raise ValueError(msg)
 94 |     z, p, k = iirfilter(corners, [low, high],
 95 |                         btype='bandstop', ftype='butter', output='zpk')
 96 |     sos = zpk2sos(z, p, k)
 97 |     if zi is None:
 98 |         data_flt = sosfilt(sos, data)
 99 |         if zerophase:
100 |             data_flt = sosfilt(sos, data_flt[:, ::-1])[:, ::-1]
101 |         return data_flt
102 |     elif isinstance(zi, (int, float)):
103 |         zi = np.ones((sos.shape[0], len(data), 2)) * zi
104 |     
105 |     data_flt, zf = sosfilt(sos, data, zi=zi)
106 |     return data_flt, zf
107 | 
108 | 
109 | def lowpass(data, fs, freq, corners=4, zi=None, zerophase=False):
110 |     """
111 |     Filter data removing data over certain frequency 'freq' using Butterworth
112 |     lowpass filter of 'corners' corners.
113 | 
114 |     :param data: numpy.ndarray. Data to filter.
115 |     :param fs: Sampling rate in Hz.
116 |     :param freq: Filter corner frequency.
117 |     :param corners: Filter corners / order.
118 |     :param zi : None, 0, or array_like. Initial conditions for the cascaded
119 |         filter delays. It is a vector of shape (n_sections, nch, 2). Set to 0 to
120 |         trigger a output of the final filter delay values.
121 |     :param zerophase: If True, apply filter once forwards and once backwards.
122 |         This results in twice the number of corners but zero phase shift in
123 |         the resulting filtered data. Only valid when zi is None.
124 |     :return: Filtered data and the final filter delay values (if zi is not
125 |         None).
126 |     """
127 |     if len(data.shape) == 1:
128 |         data = data[np.newaxis, :]
129 |     fe = 0.5 * fs
130 |     f = freq / fe
131 |     # raise for some bad scenarios
132 |     if f > 1:
133 |         f = 1.0
134 |         msg = 'Selected corner frequency is above Nyquist. Setting Nyquist ' + \
135 |               'as high corner.'
136 |         warnings.warn(msg)
137 |     z, p, k = iirfilter(corners, f, btype='lowpass', ftype='butter',
138 |                         output='zpk')
139 |     sos = zpk2sos(z, p, k)
140 |     if zi is None:
141 |         data_flt = sosfilt(sos, data)
142 |         if zerophase:
143 |             data_flt = sosfilt(sos, data_flt[:, ::-1])[:, ::-1]
144 |         return data_flt
145 |     elif isinstance(zi, (int, float)):
146 |         zi = np.ones((sos.shape[0], len(data), 2)) * zi
147 |     
148 |     data_flt, zf = sosfilt(sos, data, zi=zi)
149 |     return data_flt, zf
150 | 
151 | 
152 | def lowpass_cheby_2(data, fs, freq, maxorder=12, zi=None, ba=False,
153 |                     freq_passband=False):
154 |     """
155 |     Filter data by passing data only below a certain frequency. The main purpose
156 |     of this cheby2 filter is downsampling. This method will iteratively design a
157 |     filter, whose pass band frequency is determined dynamically, such that the
158 |     values above the stop band frequency are lower than -96dB.
159 | 
160 |     :param data: numpy.ndarray. Data to filter.
161 |     :param fs: Sampling rate in Hz.
162 |     :param freq: The frequency above which signals are attenuated with 95 dB.
163 |     :param maxorder: Maximal order of the designed cheby2 filter.
164 |     :param zi : None, 0, or array_like. Initial conditions for the cascaded
165 |         filter delays. It is a vector of shape (n_sections, nch, 2). Set to 0 to
166 |         trigger a output of the final filter delay values.
167 |     :param ba: If True return only the filter coefficients (b, a) instead of
168 |         filtering.
169 |     :param freq_passband: If True return additionally to the filtered data, the
170 |         iteratively determined pass band frequency.
171 |     :return: Filtered data, the final filter delay values (if zi is not None)
172 |         and the determined pass band frequency (if freq_passband is True).
173 |     """
174 |     if data.ndim == 1:
175 |         data = data[np.newaxis, :]
176 | 
177 |     nyquist = fs * 0.5
178 |     # rp - maximum ripple of passband, rs - attenuation of stopband
179 |     rp, rs, order = 1, 96, 1e99
180 |     ws = freq / nyquist  # stop band frequency
181 |     wp = ws  # pass band frequency
182 |     # raise for some bad scenarios
183 |     if ws > 1:
184 |         ws = 1.0
185 |         warnings.warn('Selected corner frequency is above Nyquist. Setting '
186 |                       'Nyquist as high corner.')
187 |     while True:
188 |         if order <= maxorder:
189 |             break
190 |         wp = wp * 0.99
191 |         order, wn = cheb2ord(wp, ws, rp, rs, analog=0)
192 |     if ba:
193 |         return cheby2(order, rs, wn, btype='low', analog=0, output='ba')
194 |     z, p, k = cheby2(order, rs, wn, btype='low', analog=0, output='zpk')
195 |     sos = zpk2sos(z, p, k)
196 |     if zi is None:
197 |         data_flt = sosfilt(sos, data)
198 |         if freq_passband:
199 |             return data_flt, wp * nyquist
200 |         return data_flt
201 |     elif isinstance(zi, (int, float)):
202 |         zi = np.ones((sos.shape[0], len(data), 2)) * zi
203 |     
204 |     data_flt, zf = sosfilt(sos, data, zi=zi)
205 |     if freq_passband:
206 |         return data_flt, zf, wp * nyquist
207 |     return data_flt, zf
208 | 
209 | 
210 | def highpass(data, fs, freq, corners=4, zi=None, zerophase=False):
211 |     """
212 |     Filter data removing data below certain frequency 'freq' using Butterworth
213 |     highpass filter of 'corners' corners.
214 | 
215 |     :param data: numpy.ndarray. Data to filter.
216 |     :param fs: Sampling rate in Hz.
217 |     :param freq: Filter corner frequency.
218 |     :param corners: Filter corners / order.
219 |     :param zerophase: If True, apply filter once forwards and once backwards.
220 |         This results in twice the number of corners but zero phase shift in
221 |         the resulting filtered data. Only valid when zi is None.
222 |     :return: Filtered data and the final filter delay values (if zi is not
223 |         None).
224 |     """
225 |     if len(data.shape) == 1:
226 |         data = data[np.newaxis, :]
227 |     fe = 0.5 * fs
228 |     f = freq / fe
229 |     # raise for some bad scenarios
230 |     if f > 1:
231 |         msg = 'Selected corner frequency is above Nyquist.'
232 |         raise ValueError(msg)
233 |     z, p, k = iirfilter(corners, f, btype='highpass', ftype='butter',
234 |                         output='zpk')
235 |     sos = zpk2sos(z, p, k)
236 |     if zi is None:
237 |         data_flt = sosfilt(sos, data)
238 |         if zerophase:
239 |             data_flt = sosfilt(sos, data_flt[:, ::-1])[:, ::-1]
240 |         return data_flt
241 |     elif isinstance(zi, (int, float)):
242 |         zi = np.ones((sos.shape[0], len(data), 2)) * zi
243 |     
244 |     data_flt, zf = sosfilt(sos, data, zi=zi)
245 |     return data_flt, zf
246 | 
247 | def envelope(data):
248 |     """
249 |     Computes the envelope of the given data. The envelope is determined by
250 |     adding the squared amplitudes of the data and it's Hilbert-Transform and
251 |     then taking the square-root. The envelope at the start/end should not be
252 |     taken too seriously.
253 | 
254 |     :param data: numpy.ndarray. Data to make envelope of.
255 |     :return: Envelope of input data.
256 |     """
257 |     return abs(hilbert(data, axis=-1))
258 | 


--------------------------------------------------------------------------------
/daspy/basic_tools/freqattributes.py:
--------------------------------------------------------------------------------
  1 | # Purpose: Analyze frequency attribute and transform in frequency domain
  2 | # Author: Minzhe Hu
  3 | # Date: 2024.6.8
  4 | # Email: hmz2018@mail.ustc.edu.cn
  5 | import numpy as np
  6 | from numpy.fft import rfft, rfft2, fftshift, fftfreq, rfftfreq
  7 | from scipy.signal import stft
  8 | from daspy.basic_tools.preprocessing import demeaning, detrending, cosine_taper
  9 | 
 10 | 
 11 | def next_pow_2(i):
 12 |     """
 13 |     Find the next power of two.
 14 | 
 15 |     :param i: float or int.
 16 |     :return: int. The next power of two for i.
 17 |     """
 18 |     buf = np.ceil(np.log2(i))
 19 |     return np.power(2, buf).astype(int)
 20 | 
 21 | 
 22 | def spectrum(data, fs, taper=0.05, nfft='default'):
 23 |     """
 24 |     Computes the spectrum of the given data.
 25 | 
 26 |     :param data: numpy.ndarray. Data to make spectrum of.
 27 |     :param fs: Sampling rate in Hz.
 28 |     :param taper: Decimal percentage of Tukey taper.
 29 |     :param nfft: Number of points for FFT. None = sampling points, 'default' =
 30 |         next power of 2 of sampling points.
 31 |     :return: Spectrum and frequency sequence.
 32 |     """
 33 |     if len(data.shape) == 1:
 34 |         data = data.reshape(1, len(data))
 35 |     elif len(data.shape) != 2:
 36 |         raise ValueError("Data should be 1-D or 2-D array")
 37 |     data = cosine_taper(data, (0, taper))
 38 | 
 39 |     if nfft == 'default':
 40 |         nfft = next_pow_2(len(data[0]))
 41 |     elif nfft is None:
 42 |         nfft = len(data[0])
 43 | 
 44 |     spec = rfft(data, n=nfft, axis=1)
 45 |     f = rfftfreq(nfft, d=1 / fs)
 46 | 
 47 |     return spec, f
 48 | 
 49 | 
 50 | def spectrogram(data, fs, nperseg=256, noverlap=None, nfft=None, detrend=False,
 51 |                 boundary='zeros'):
 52 |     """
 53 |     Computes the spectrogram of the given data.
 54 | 
 55 |     :param data: 1-D or 2-D numpy.ndarray. Data to make spectrogram of.
 56 |     :param fs: Sampling rate in Hz.
 57 |     :param nperseg: int. Length of each segment.
 58 |     :param noverlap: int. Number of points to overlap between segments. If None,
 59 |         noverlap = nperseg // 2.
 60 |     :param nfft: int. Length of the FFT used. None = nperseg.
 61 |     :param detrend : str or bool. Specifies whether and how to detrend each
 62 |         segment.  'linear' or 'detrend' or True = detrend, 'constant' or
 63 |         'demean' = demean.
 64 |     :param boundary: str or None. Specifies whether the input signal is extended
 65 |         at both ends, and how to generate the new values, in order to center the
 66 |         first windowed segment on the first input point. This has the benefit of
 67 |         enabling reconstruction of the first input point when the employed
 68 |         window function starts at zero. Valid options are ['even', 'odd',
 69 |         'constant', 'zeros', None].
 70 |     :return: Spectrogram, frequency sequence and time sequence.
 71 |     """
 72 |     if detrend in [True, 'linear', 'detrend']:
 73 |         detrend = detrending
 74 |     elif detrend in ['constant', 'demean']:
 75 |         detrend = demeaning
 76 |     if data.ndim == 1:
 77 |         f, t, Zxx = stft(data, fs=fs, nperseg=nperseg, noverlap=noverlap,
 78 |                          nfft=nfft, detrend=detrend, boundary=boundary)
 79 |     elif len(data) == 1:
 80 |         f, t, Zxx = stft(data[0], fs=fs, nperseg=nperseg, noverlap=noverlap,
 81 |                          nfft=nfft, detrend=detrend, boundary=boundary)
 82 |     else:
 83 |         Zxx = []
 84 |         for d in data:
 85 |             f, t, Zxxi = stft(d, fs=fs, nperseg=nperseg, noverlap=noverlap,
 86 |                               nfft=nfft, detrend=detrend, boundary=boundary)
 87 |             Zxx.append(abs(Zxxi))
 88 |         Zxx = np.mean(np.array(Zxx), axis=0)
 89 | 
 90 |     return Zxx, f, t
 91 | 
 92 | 
 93 | def fk_transform(data, dx, fs, taper=(0, 0.05), nfft='default'):
 94 |     """
 95 |     Transform the data to the fk domain using 2-D Fourier transform method.
 96 | 
 97 |     :param data: numpy.ndarray. Data to do fk transform.
 98 |     :param dx: Channel interval in m.
 99 |     :param fs: Sampling rate in Hz.
100 |     :param taper: float or sequence of floats. Each float means decimal
101 |         percentage of Tukey taper for corresponding dimension (ranging from 0 to
102 |         1). Default is 0.1 which tapers 5% from the beginning and 5% from the
103 |         end.
104 |     :param nfft: Number of points for FFT. None means sampling points; 'default'
105 |         means next power of 2 of sampling points, which makes result smoother.
106 |     """
107 |     nch, nt = data.shape
108 |     data = cosine_taper(data, taper)
109 |     if nfft == 'default':
110 |         nfft = (next_pow_2(nch), next_pow_2(nt))
111 |     elif not nfft:
112 |         nfft = (nch, nt)
113 | 
114 |     fk = fftshift(rfft2(data, s=nfft), axes=0)
115 |     f = rfftfreq(nfft[1], d=1. / fs)
116 |     k = fftshift(fftfreq(nfft[0], d=dx))
117 |     return fk, f, k
118 | 


--------------------------------------------------------------------------------
/daspy/basic_tools/preprocessing.py:
--------------------------------------------------------------------------------
  1 | # Purpose: Some preprocess methods
  2 | # Author: Minzhe Hu
  3 | # Date: 2025.5.21
  4 | # Email: hmz2018@mail.ustc.edu.cn
  5 | import numpy as np
  6 | from scipy.signal import detrend
  7 | from scipy.signal.windows import tukey
  8 | from daspy.basic_tools.filter import lowpass_cheby_2
  9 | 
 10 | 
 11 | def phase2strain(data, lam, e, n, gl):
 12 |     """
 13 |     Convert the optical phase shift in radians to strain.
 14 | 
 15 |     :param data: numpy.ndarray. Data to convert.
 16 |     :param lam: float. Operational optical wavelength in vacuum.
 17 |     :param e: float. photo-slastic scaling factor for logitudinal strain in
 18 |         isotropic material.
 19 |     :param n: float. Refractive index of the sensing fiber.
 20 |     :paran gl: float. Gauge length.
 21 |     :return: Strain data.
 22 |     """
 23 |     return data * (lam * 1e-9) / (e * 4 * np.pi * n * gl)
 24 | 
 25 | 
 26 | def normalization(data, method='z-score'):
 27 |     """
 28 |     Normalize for each individual channel using Z-score method.
 29 | 
 30 |     :param data: numpy.ndarray. Data to normalize.
 31 |     :param method: str. Method for normalization, should be one of 'max',
 32 |         'z-score', 'MAD' or 'one-bit'.
 33 |     :return: Normalized data.
 34 |     """
 35 |     if data.ndim == 1:
 36 |         data = data.reshape(1, len(data))
 37 |     elif data.ndim != 2:
 38 |         raise ValueError("Data should be 1-D or 2-D array")
 39 | 
 40 |     if method.lower() == 'max':
 41 |         amp = np.max(abs(data), 1, keepdims=True)
 42 |         amp[amp == 0] = amp[amp > 0].min()
 43 |         return data / amp
 44 |     elif method.lower() == 'z-score':
 45 |         mean = np.mean(data, axis=1, keepdims=True)
 46 |         std = np.std(data, axis=1, keepdims=True)
 47 |         std[std == 0] = std[std > 0].min()
 48 |         return (data - mean) / std
 49 |     elif method.lower() == 'mad':
 50 |         median = np.median(data, axis=1, keepdims=True)
 51 |         mad = np.median(abs(data - median), axis=1, keepdims=True)
 52 |         mad[mad == 0] = mad[mad > 0].min()
 53 |         return (data - median) / mad
 54 |     elif method.lower() == 'one-bit':
 55 |         return np.sign(data)
 56 | 
 57 | 
 58 | def demeaning(data):
 59 |     """
 60 |     Demean signal by subtracted mean of each channel.
 61 | 
 62 |     :param data: numpy.ndarray. Data to demean.
 63 |     :return: Detrended data.
 64 |     """
 65 |     return detrend(data, type='constant')
 66 | 
 67 | 
 68 | def detrending(data):
 69 |     """
 70 |     Detrend signal by subtracted a linear least-squares fit to data.
 71 | 
 72 |     :param data: numpy.ndarray. Data to detrend.
 73 |     :return: Detrended data.
 74 |     """
 75 |     return detrend(data, type='linear')
 76 | 
 77 | 
 78 | def stacking(data: np.ndarray, N: int, step: int = None, average: bool = True):
 79 |     """
 80 |     Stack several channels to increase the signal-noise ratio(SNR).
 81 | 
 82 |     :param data: numpy.ndarray. Data to stack.
 83 |     :param N: int. N adjacent channels stacked into 1.
 84 |     :param step: int. Interval of data stacking.
 85 |     :param average: bool. True for calculating the average.
 86 |     :return: Stacked data.
 87 |     """
 88 |     if N == 1:
 89 |         return data
 90 |     if step is None:
 91 |         step = N
 92 |     nch, nt = data.shape
 93 |     begin = np.arange(0, nch - N + 1, step)
 94 |     end = begin + N
 95 |     nx1 = len(begin)
 96 |     data_stacked = np.zeros((nx1, nt))
 97 |     for i in range(nx1):
 98 |         data_stacked[i, :] = np.sum(data[begin[i]:end[i], :], axis=0)
 99 |     if average:
100 |         data_stacked /= N
101 |     return data_stacked
102 | 
103 | 
104 | def cosine_taper(data, p=0.1, side='both'):
105 |     """
106 |     Taper using Tukey window.
107 | 
108 |     :param data: numpy.ndarray. Data to taper.
109 |     :param p: float or sequence of floats. Each float means decimal percentage
110 |         of Tukey taper for corresponding dimension (ranging from 0 to 1).
111 |         Default is 0.1 which tapers 5% from the beginning and 5% from the end.
112 |         If only one float is given, it only do for time dimension.
113 |     :param side: str. 'both', 'left', or 'right'.
114 |     :return: Tapered data.
115 |     """
116 |     if data.ndim == 1:
117 |         data = data.reshape(1, -1)
118 |     nch, nt = data.shape
119 |     if not isinstance(p, (tuple, list, np.ndarray)):
120 |         win = tukey(nt, p)
121 |         if side == 'left':
122 |             win[round(nch/2):] = 1
123 |         elif side == 'right':
124 |             win[:round(len(win)/2)] = 1
125 |         return data * np.tile(win, (nch, 1))
126 |     else:
127 |         if p[0] > 0:
128 |             data = data * np.tile(tukey(nch, p[0]), (nt, 1)).T
129 |         return cosine_taper(data, p[1], side=side)
130 | 
131 | 
132 | def downsampling(data, xint=None, tint=None, stack=True, lowpass_filter=True):
133 |     """
134 |     Downsample DAS data.
135 | 
136 |     :param data: numpy.ndarray. Data to downsample can be 1-D or 2-D.
137 |     :param xint: int. Spatial downsampling factor.
138 |     :param tint: int. Time downsampling factor.
139 |     :param lowpass_filter: bool. Lowpass cheby2 filter before time downsampling
140 |         or not.
141 |     :return: Downsampled data.
142 |     """
143 |     data_ds = data.copy()
144 |     if xint and xint > 1:
145 |         if stack:
146 |             data_ds = stacking(data, xint)
147 |         else:
148 |             data_ds = data_ds[::xint].copy()
149 |     if tint and tint > 1:
150 |         if lowpass_filter:
151 |             data_ds = lowpass_cheby_2(data_ds, 1, 1 / 2 / tint)
152 |         if len(data_ds.shape) == 1:
153 |             data_ds = data_ds[::tint].copy()
154 |         else:
155 |             data_ds = data_ds[:, ::tint].copy()
156 |     return data_ds
157 | 
158 | 
159 | def trimming(data, dx=None, fs=None, xmin=0, xmax=None, tmin=0, tmax=None,
160 |              mode=0):
161 |     """
162 |     Cut data to given start and end distance/channel or time/sampling points.
163 | 
164 |     :param data: numpy.ndarray. Data to trim can be 1-D or 2-D.
165 |     :param dx: Channel interval in m.
166 |     :param fs: Sampling rate in Hz.
167 |     :param xmin, xmax, tmin, tmax: Boundary for trimming.
168 |     :param mode: 0 means the unit of boundary is channel number and sampling
169 |         points; 1 means the unit of boundary is meters and seconds.
170 |     :return: Trimmed data.
171 |     """
172 |     nch, nt = data.shape
173 |     if mode == 0:
174 |         if xmax is None:
175 |             xmax = nch
176 |         if tmax is None:
177 |             tmax = nt
178 |     elif mode == 1:
179 |         xmin = round(xmin / dx)
180 |         xmax = (round(xmax / dx), nch)[xmax is None]
181 |         tmin = round(tmin * fs)
182 |         tmax = (round(tmax * fs), nt)[tmax is None]
183 | 
184 |     return data[xmin:xmax, tmin:tmax].copy()
185 | 
186 | 
187 | def padding(data, dn, reverse=False):
188 |     """
189 |     Pad DAS data with 0.
190 | 
191 |     :param data: numpy.ndarray. 2D DAS data to pad.
192 |     :param dn: int or sequence of ints. Number of points to pad for both
193 |         dimensions.
194 |     :param reverse: bool. Set True to reverse the operation.
195 |     :return: Padded data.
196 |     """
197 |     nch, nt = data.shape
198 |     if isinstance(dn, int):
199 |         dn = (dn, dn)
200 | 
201 |     pad = (dn[0] // 2, dn[0] - dn[0] // 2, dn[1] // 2, dn[1] - dn[1] // 2)
202 |     if reverse:
203 |         return data[pad[0]:nch - pad[1], pad[2]:nt - pad[3]]
204 |     else:
205 |         data_pd = np.zeros((nch + dn[0], nt + dn[1]))
206 |         data_pd[pad[0]:nch + pad[0], pad[2]:nt + pad[2]] = data
207 |         return data_pd
208 | 
209 | 
210 | def time_integration(data, fs, c=0):
211 |     """
212 |     Integrate DAS data in time.
213 | 
214 |     :param data: numpy.ndarray. 2D DAS data.
215 |     :param fs: Sampling rate in Hz.
216 |     :param c: float. A constant added to the result.
217 |     :return: Integrated data.
218 |     """
219 |     return np.cumsum(data, axis=1) / fs + c
220 | 
221 | 
222 | def time_differential(data, fs, prepend=0):
223 |     """
224 |     Differentiate DAS data in time.
225 | 
226 |     :param data: numpy.ndarray. 2D DAS data.
227 |     :param fs: Sampling rate in Hz.
228 |     :param prepend: 'mean' or values to prepend to `data` along axis prior to
229 |         performing the difference. 
230 |     :return: Differentiated data.
231 |     """
232 |     if prepend == 'mean':
233 |         prepend = np.mean(data, axis=1).reshape((-1, 1))
234 |     return np.diff(data, axis=1, prepend=prepend) * fs
235 | 
236 | 
237 | def distance_integration(data, dx, c=0):
238 |     """
239 |     Integrate DAS data in distance.
240 | 
241 |     :param data: numpy.ndarray. 2D DAS data.
242 |     :param dx: Channel interval in m.
243 |     :param c: float. A constant added to the result.
244 |     :return: Integrated data.
245 |     """
246 |     return np.cumsum(data, axis=1) * dx + c


--------------------------------------------------------------------------------
/daspy/basic_tools/visualization.py:
--------------------------------------------------------------------------------
  1 | # Purpose: Plot data
  2 | # Author: Minzhe Hu
  3 | # Date: 2025.5.20
  4 | # Email: hmz2018@mail.ustc.edu.cn
  5 | import numpy as np
  6 | import matplotlib.pyplot as plt
  7 | from collections.abc import Sequence
  8 | 
  9 | 
 10 | def plot(data: np.ndarray, dx=None, fs=None, ax=None, obj='waveform', dpi=300,
 11 |          title=None, transpose=False, t0=0, x0=0, pick=None, f=None, k=None,
 12 |          t=None, c=None, cmap=None, vmin=None, vmin_per=None, vmax=None,
 13 |          vmax_per=None, dB=False, xmode='distance', tmode='time', xlim=None,
 14 |          ylim=None, xlog=False, ylog=False, xinv=False, yinv=False, xlabel=True,
 15 |          ylabel=True, xticklabels=True, yticklabels=True, colorbar=True,
 16 |          colorbar_label=None, savefig=None):
 17 |     """
 18 |     Plot several types of 2-D seismological data.
 19 | 
 20 |     :param data: numpy.ndarray. Data to plot.
 21 |     :param dx: Channel interval in m.
 22 |     :param fs: Sampling rate in Hz.
 23 |     :param ax: Matplotlib.axes.Axes or tuple. Axes to plot. A tuple for new
 24 |         figsize. If not specified, the function will directly display the image
 25 |         using matplotlib.pyplot.show().
 26 |     :param obj: str. Type of data to plot. It should be one of 'waveform',
 27 |         'phasepick', 'spectrum', 'spectrogram', 'fk', or 'dispersion'.
 28 |     :param dpi: int. The resolution of the figure in dots-per-inch.
 29 |     :param title: str. The title of this axes.
 30 |     :param transpose: bool. Transpose the figure or not.
 31 |     :param t0, x0: The beginning of time and space.
 32 |     :param pick: dictionary of sequence of picked phases. Key should be 'P' for
 33 |         P phase, 'S' for S phase and 'N' for unknown phase type. Required if
 34 |         obj=='phasepick'.
 35 |     :param f: Sequence of frequency. Required if obj is one of 'spectrum',
 36 |         'spectrogram', 'fk' or 'dispersion'.
 37 |     :param k: Wavenumber sequence. Required if obj=='fk'.
 38 |     :param t: Time sequence. Required if obj=='spectrogram'.
 39 |     :param c: Phase velocity sequence. Required if obj=='dispersion'.
 40 |     :param cmap: str or Colormap. The Colormap instance or registered colormap
 41 |         name used to map scalar data to colors.
 42 |     :param vmin, vmax: Define the data range that the colormap covers.
 43 |     :param vmin_per, vmax_per: float. Define the data range that the colormap
 44 |         covers by percentile.
 45 |     :param dB: bool. Transfer data unit to dB and take 1 as the reference value.
 46 |     :param xmode: str. 'distance' or 'channel'.
 47 |     :param tmode: str. 'time' or 'sampling'.
 48 |     :param xlim, ylim: Set the x-axis and y-axis view limits.
 49 |     :param xlog, ylog: bool. If True, set the x-axis' or y-axis' scale as log.
 50 |     :param xlabel, yinv: bool. If True, invert x-axis or y-axis.
 51 |     :param xlabel, ylabel: bool or str. Whether to plot a label or what label to
 52 |         plot for x-axis or y-axis.
 53 |     :param xticklabels, yticklabels: bool or sequence of str. Whether to plot
 54 |         ticklabels or what ticklabels to plot for x-axis or y-axis.
 55 |     :param colorbar: bool, str or Matplotlib.axes.Axes. Bool means plot colorbar
 56 |         or not. Str means the location of colorbar. Axes means the Axes into
 57 |         which the colorbar will be drawn.
 58 |     :param savefig: str or bool. Figure name to save if needed. If True,
 59 |         it will be set to parameter obj.
 60 |     """
 61 |     nch, nt = data.shape
 62 |     if ax is None:
 63 |         ax = (6, 5)
 64 |     if isinstance(ax, tuple):
 65 |         fig, ax = plt.subplots(1, figsize=ax, dpi=dpi)
 66 |         show = True
 67 |     else:
 68 |         show = False
 69 | 
 70 |     if obj in ['waveform', 'phasepick']:
 71 |         cmap = 'RdBu' if cmap is None else cmap
 72 |         if vmax is None:
 73 |             vmax_per = 80 if vmax_per is None else vmax_per
 74 |             vmax = np.percentile(data, vmax_per)
 75 |         vmin = -vmax if vmin is None else vmin
 76 |         origin = 'upper'
 77 |         if fs is None or tmode == 'sampling':
 78 |             ylabel_default = 'Sampling points'
 79 |             fs = 1
 80 |         elif tmode == 'time':
 81 |             ylabel_default = 'Time (s)'
 82 | 
 83 |         if dx is None or xmode.lower() == 'channel':
 84 |             xlabel_default = 'Channel'
 85 |             extent = [x0, x0 + nch, t0 + nt / fs, t0]
 86 |         elif xmode.lower() == 'distance':
 87 |             xlabel_default = 'Disitance (km)'
 88 |             extent = [x0 * 1e-3, (x0 + nch * dx) * 1e-3, t0 + nt / fs, t0]
 89 | 
 90 |         if obj == 'phasepick' and len(pick):
 91 |             pick_color = {'P': 'r', 'S': 'b', 'N': 'k'}
 92 |             for phase, pck in pick.items():
 93 |                 if len(pck):
 94 |                     pck = np.array(pck).astype(float)
 95 |                     if xmode.lower() == 'distance':
 96 |                         pck[:, 0] = (x0 + pck[:, 0] * dx) * 1e-3
 97 |                     elif xmode.lower() == 'channel':
 98 |                         pck[:, 0] = x0 + pck[:, 0]
 99 |                     if tmode.lower() == 'sampling':
100 |                         pck[:, 1] = pck[:, 1] / fs
101 |                     ax.scatter(pck[:,0], t0 + pck[:,1], marker=',', s=0.1,
102 |                             c=pick_color[phase])
103 | 
104 |     elif obj in ['spectrum', 'spectrogram', 'fk', 'dispersion']:
105 |         if np.iscomplex(data).any():
106 |             data = abs(data)
107 |         if dB:
108 |             data = 20 * np.log10(data)
109 |         cmap = 'jet' if cmap is None else cmap
110 | 
111 |         if vmax is None:
112 |             vmax_per = 80 if vmax_per is None else vmax_per
113 |             vmax = np.percentile(data, vmax_per)
114 |         if vmin is None:
115 |             vmin_per = 20 if vmin_per is None else vmin_per
116 |             vmin = np.percentile(data, vmin_per)
117 | 
118 |         if obj == 'spectrum':
119 |             origin = 'lower'
120 |             if dx is None or xmode.lower() == 'channel':
121 |                 xlabel_default = 'Channel'
122 |                 extent = [x0, x0 + nch, min(f), max(f)]
123 |             elif xmode.lower() == 'distance':
124 |                 xlabel_default = 'Disitance (km)'
125 |                 extent = [x0 * 1e-3, (x0 + nch * dx) * 1e-3, min(f), max(f)]
126 |             ylabel_default = 'Frequency (Hz)'
127 |         elif obj == 'spectrogram':
128 |             data = data.T
129 |             origin = 'lower'
130 |             xlabel_default = 'Time (s)'
131 |             ylabel_default = 'Frequency (Hz)'
132 |             extent = [t0 + min(t), t0 + max(t), min(f), max(f)]
133 |         elif obj == 'fk':
134 |             origin = 'lower'
135 |             xlabel_default = 'Wavenumber (m$^{-1}$)'
136 |             ylabel_default = 'Frequency (Hz)'
137 |             extent = [min(k), max(k), min(f), max(f)]
138 |         elif obj == 'dispersion':
139 |             data = data.T
140 |             origin = 'lower'
141 |             xlabel_default = 'Frequency (Hz)'
142 |             ylabel_default = 'Velocity (m/s)'
143 |             extent = [min(f), max(f), min(c), max(c)]
144 | 
145 |     if transpose:
146 |         if origin == 'lower':
147 |             extent = [extent[2], extent[3], extent[0], extent[1]]
148 |         else:
149 |             origin = 'lower'
150 |             extent = [extent[3], extent[2], extent[0], extent[1]]
151 |         (xlabel_default, ylabel_default) = (ylabel_default, xlabel_default)
152 |         data = data.T
153 | 
154 |     xlabel = xlabel if isinstance(xlabel, str) else \
155 |             xlabel_default if xlabel else None
156 |     ylabel = ylabel if isinstance(ylabel, str) else \
157 |             ylabel_default if ylabel else None
158 | 
159 |     bar = ax.imshow(data.T, vmin=vmin, vmax=vmax, extent=extent, aspect='auto',
160 |                     origin=origin, cmap=cmap)
161 |     if title:
162 |         ax.set_title(title)
163 |     ax.set_xlabel(xlabel)
164 |     ax.set_ylabel(ylabel)
165 |     if isinstance(xticklabels, Sequence):
166 |         ax.set_xticklabels(xticklabels)
167 |     elif not xticklabels:
168 |         ax.set_xticklabels([])
169 |     
170 |     if isinstance(yticklabels, Sequence):
171 |         ax.set_yticklabels(yticklabels)
172 |     elif not yticklabels:
173 |         ax.set_yticklabels([])
174 |     if xinv:
175 |         ax.invert_xaxis()
176 |     if yinv:
177 |         ax.invert_yaxis()
178 |     if ylim:
179 |         ax.set_ylim(ylim)
180 |     if xlim:
181 |         ax.set_xlim(xlim)
182 |     if xlog:
183 |         ax.set_xscale('log')
184 |     if ylog:
185 |         ax.set_yscale('log')
186 |     if colorbar:
187 |         if colorbar is True:
188 |             cbar = plt.colorbar(bar, ax=ax, location='right')
189 |         elif isinstance(colorbar, str):
190 |             cbar = plt.colorbar(bar, ax=ax, location=colorbar)
191 |         else:
192 |             cbar = plt.colorbar(bar, cax=colorbar)
193 |         if colorbar_label is not None:
194 |             cbar.set_label(colorbar_label)
195 | 
196 |     if savefig:
197 |         if not isinstance(savefig, str):
198 |             savefig = obj + '.png'
199 |         plt.tight_layout()
200 |         plt.savefig(savefig)
201 |         plt.close()
202 |     elif show:
203 |         plt.show()
204 |     else:
205 |         return ax
206 | 


--------------------------------------------------------------------------------
/daspy/core/__init__.py:
--------------------------------------------------------------------------------
1 | from daspy.core.section import Section
2 | from daspy.core.collection import Collection
3 | from daspy.core.read import read
4 | from daspy.core.dasdatetime import DASDateTime, local_tz, utc


--------------------------------------------------------------------------------
/daspy/core/collection.py:
--------------------------------------------------------------------------------
  1 | # Purpose: Module for handling Collection objects.
  2 | # Author: Minzhe Hu
  3 | # Date: 2025.6.4
  4 | # Email: hmz2018@mail.ustc.edu.cn
  5 | import os
  6 | import warnings
  7 | import pickle
  8 | import numpy as np
  9 | from copy import deepcopy
 10 | from tqdm import tqdm
 11 | from glob import glob
 12 | from datetime import datetime
 13 | from daspy.core.read import read
 14 | from daspy.core.dasdatetime import DASDateTime
 15 | 
 16 | 
 17 | class Collection(object):
 18 |     def __init__(self, fpath, ftype=None, flength=None, meta_from_file=True,
 19 |                  timeinfo_slice=slice(None), timeinfo_format=None, 
 20 |                  timeinfo_tz=None, timeinfo_from_basename=True, **kwargs):
 21 |         """
 22 |         :param fpath: str or Sequence of str. File path(s) containing data.
 23 |         :param ftype: None or str. None for automatic detection, or 'pkl',
 24 |             'pickle', 'tdms', 'h5', 'hdf5', 'segy', 'sgy', 'npy'.
 25 |         :param flength: float. The duration of a single file in senconds.
 26 |         :param meta_from_file: bool or 'all'. False for manually set dt, dx, fs
 27 |             and gauge_length. True for extracting dt, dx, fs and gauge_length
 28 |             from first 2 file. 'all' for exracting and checking these metadata
 29 |             from all file.
 30 |         :param timeinfo_slice: slice. Slice for extracting start time from file
 31 |             name.
 32 |         :param timeinfo_format: str. Format for extracting start time from file
 33 |             name.
 34 |         :param timeinfo_tz: datetime.timezone. Time zone for extracting start
 35 |             time from file name.
 36 |         :param timeinfo_from_basename: bool. If True, timeinfo_format will use
 37 |             DASDateTime.strptime to basename of fpath.
 38 |         :param nch: int. Channel number.
 39 |         :param nt: int. Sampling points of each file.
 40 |         :param dx: number. Channel interval in m.
 41 |         :param fs: number. Sampling rate in Hz.
 42 |         :param gauge_length: number. Gauge length in m.
 43 |         """
 44 |         if isinstance(fpath, (list, tuple)):
 45 |             self.flist = []
 46 |             for fp in fpath:
 47 |                 self.flist.extend(glob(fp))
 48 |         else:
 49 |             self.flist = glob(fpath)
 50 |         if not len(self.flist):
 51 |             raise ValueError('No file input.')
 52 |         self.flist.sort()
 53 |         self.ftype = ftype
 54 |         for key in ['nch', 'nt', 'dx', 'fs', 'gauge_length']:
 55 |             if key in kwargs.keys():
 56 |                 setattr(self, key, kwargs[key])
 57 |         if timeinfo_format is None and not meta_from_file:
 58 |             meta_from_file = True
 59 | 
 60 |         if meta_from_file == 'all':
 61 |             ftime = []
 62 |             metadata_list = []
 63 |             for f in self.flist:
 64 |                 sec = read(f, ftype=ftype, headonly=True)
 65 |                 if not hasattr(sec, 'gauge_length'):
 66 |                     sec.gauge_length = None
 67 |                 ftime.append(sec.start_time)
 68 |                 metadata_list.append((sec.nch, sec.nt, sec.dx, sec.fs,
 69 |                                       sec.gauge_length, sec.duration))
 70 | 
 71 |             if len(set(metadata_list)) > 1:
 72 |                 warnings.warn('More than one kind of setting detected.')
 73 |             metadata = max(metadata_list, key=metadata_list.count)
 74 |             for i, key in enumerate(['nch', 'nt', 'dx', 'fs', 'gauge_length']):
 75 |                 if not hasattr(self, key):
 76 |                     setattr(self, key, metadata[i])
 77 |                 if flength is None:
 78 |                     flength = metadata[-1]
 79 |             self.ftime = ftime
 80 |         elif meta_from_file:
 81 |             i = int(len(self.flist) > 1)
 82 |             sec = read(self.flist[i], ftype=ftype, headonly=True)
 83 |             if timeinfo_format is None:
 84 |                 if flength is None:
 85 |                     flength = sec.duration
 86 |                 self.ftime = [sec.start_time + (j - i) * flength for j in
 87 |                             range(len(self))]
 88 |             if not hasattr(sec, 'gauge_length'):
 89 |                 sec.gauge_length = None
 90 |             metadata = (sec.nch, sec.nt, sec.dx, sec.fs, sec.gauge_length)
 91 |             for i, key in enumerate(['nch', 'nt', 'dx', 'fs', 'gauge_length']):
 92 |                 if not hasattr(self, key):
 93 |                     setattr(self, key, metadata[i])
 94 | 
 95 |         if not hasattr(self, 'ftime'):
 96 |             if timeinfo_from_basename:
 97 |                 flist_use = [os.path.basename(f) for f in self.flist]
 98 |             else:
 99 |                 flist_use = self.flist
100 |             if timeinfo_tz is None:
101 |                 self.ftime = [DASDateTime.strptime(f[timeinfo_slice],
102 |                     timeinfo_format) for f in flist_use]
103 |             else:
104 |                 if '%z' in timeinfo_format.lower():
105 |                     self.ftime = [DASDateTime.strptime(f[timeinfo_slice],
106 |                         timeinfo_format).astimezone(timeinfo_tz) for f in
107 |                         flist_use]
108 |                 else:
109 |                     self.ftime = [DASDateTime.strptime(f[timeinfo_slice],
110 |                         timeinfo_format).replace(tzinfo=timeinfo_tz) for f in
111 |                         flist_use]
112 | 
113 |         self._sort()
114 |         if flength is None:
115 |             if len(self.flist) > 2:
116 |                 time_diff = np.round(np.diff(self.ftime[1:]).astype(float))
117 |                 flength_set, counts = np.unique(time_diff, return_counts=True)
118 |                 if len(flength_set) > 1:
119 |                     warnings.warn('File start times are unevenly spaced. Data '
120 |                                   'may not be continuous and self.flength may '
121 |                                   'be incorrectly detected.')
122 |                 flength = flength_set[counts.argmax()]
123 |             elif len(self.flist) == 2:
124 |                 flength = self.ftime[1] - self.ftime[0]
125 |             else:
126 |                 flength = read(self.flist[0], ftype=ftype,
127 |                                headonly=True).duration
128 |         elif flength <= 0:
129 |            raise ValueError('dt must > 0')
130 |         
131 |         self.flength = flength
132 | 
133 |     def __str__(self):
134 |         if len(self) == 1:
135 |             describe = f'       flist: {self.flist}\n'
136 |         elif len(self) <= 5:
137 |             describe = f'       flist: {len(self)} files\n' + \
138 |                        f'              {self.flist}\n'
139 |         else:
140 |             describe = f'       flist: {len(self)} files\n' + \
141 |                        f'              [{self[0]},\n' + \
142 |                        f'               {self[1]},\n' + \
143 |                        f'               ...,\n' + \
144 |                        f'               {self[-1]}]\n'
145 |             
146 |         describe += f'       ftime: {self.start_time} to {self.end_time}\n' + \
147 |                     f'     flength: {self.flength}\n'
148 |         for key in ['nch', 'nt', 'dx', 'fs', 'gauge_length']:
149 |             if hasattr(self, key):
150 |                 long_key = key.rjust(12)
151 |                 value = getattr(self, key)
152 |                 describe += f'{long_key}: {value}\n'
153 | 
154 |         return describe
155 | 
156 |     __repr__ = __str__
157 | 
158 |     def __getitem__(self, i):
159 |         return self.flist[i]
160 | 
161 |     def __len__(self):
162 |         return len(self.flist)
163 | 
164 |     def _sort(self):
165 |         sort = np.argsort(self.ftime)
166 |         self.ftime = [self.ftime[i] for i in sort]
167 |         self.flist = [self.flist[i] for i in sort]
168 |         return self
169 | 
170 |     @property
171 |     def start_time(self):
172 |         return self.ftime[0]
173 | 
174 |     @property
175 |     def end_time(self):
176 |         return self.ftime[-1] + self.flength
177 | 
178 |     @property
179 |     def duration(self):
180 |         return self.end_time - self.start_time
181 | 
182 |     @property
183 |     def file_size(self):
184 |         return os.path.getsize(self[1])
185 | 
186 |     def copy(self):
187 |         return deepcopy(self)
188 | 
189 |     def file_interruption(self, tolerance=0.5):
190 |         time_diff = np.diff(self.ftime)
191 |         return np.where(abs(time_diff - self.flength) > tolerance)[0]
192 | 
193 |     def select(self, start=0, end=None, readsec=False, **kwargs):
194 |         """
195 |         Select a period of data.
196 | 
197 |         :param stime, etime: DASDateTime or int. Start and end time or index of
198 |             required data.
199 |         :param readsec: bool. If True, read as a instance of daspy.Section and
200 |             return. If False, update self.flist.
201 |         :param ch1: int. The first channel required. Only works when
202 |             readsec=True.
203 |         :param ch2: int. The last channel required (not included). Only works
204 |             when readsec=True.
205 |         :param dch: int. Channel step. Only works when readsec=True.
206 |         """
207 |         if end is None:
208 |             end = len(self.flist)
209 |         if 'stime' in kwargs.keys():
210 |             start = kwargs.pop('stime')
211 |             warnings.warn('In future versions, the parameter \'stime\' will be '
212 |                           'replaced by \'start\'.')
213 |         if 'etime' in kwargs.keys():
214 |             end = kwargs.pop('etime')
215 |             warnings.warn('In future versions, the parameter \'etime\' will be '
216 |                           'replaced by \'end\'.')
217 | 
218 |         if isinstance(start, datetime):
219 |             for i, ftime in enumerate(self.ftime):
220 |                 if ftime > start:
221 |                     s = i - 1
222 |                     break
223 |                 elif ftime == start:
224 |                     s = i
225 |                     break
226 |         elif isinstance(start, int):
227 |             s = start
228 | 
229 |         if isinstance(end, datetime):
230 |             for i, ftime in enumerate(self.ftime[s:]):
231 |                 if ftime == end:
232 |                     e = s + i - 1
233 |                     break
234 |                 elif ftime > end:
235 |                     e = s + i
236 |                     break
237 |         elif isinstance(start, int):
238 |             e = end
239 | 
240 |         flist = self.flist[s:e]
241 |         if len(flist) == 0:
242 |             warnings.warn('No valid data was selected.')
243 |             return None
244 | 
245 |         if readsec:
246 |             sec = read(flist[0], **kwargs)
247 |             for f in flist[1:]:
248 |                 sec += read(f, **kwargs)
249 |             sec.trimming(tmin=start if isinstance(start, datetime) else None,
250 |                          tmax=end if isinstance(end, datetime) else None)
251 |             return sec
252 |         else:
253 |             self.flist = flist
254 |             self.ftime = self.ftime[s:e]
255 |             return self
256 | 
257 |     def _optimize_for_continuity(self, operations):
258 |         method_list = []
259 |         kwargs_list = []
260 |         if not isinstance(operations[0], (list, tuple)):
261 |             operations = [operations]
262 |         for opera in operations:
263 |             method, kwargs = opera
264 |             if method == 'downsampling':
265 |                 if ('lowpass_filter' in kwargs.keys() and not\
266 |                     kwargs['lowpass_filter']) or 'tint' not in kwargs.keys():
267 |                     method_list.append('downsampling')
268 |                     kwargs_list.append(kwargs)
269 |                 else:
270 |                     method_list.extend(['lowpass_cheby_2', 'downsampling'])
271 |                     kwargs['lowpass_filter'] = False
272 |                     kwargs0 = dict(freq=self.fs/2/kwargs['tint'], zi=0)
273 |                     kwargs_list.extend([kwargs0, kwargs])
274 |             else:
275 |                 if method in ['taper', 'cosine_taper']:
276 |                     kwargs.setdefault('side', 'both')
277 |                 elif method in ['bandpass', 'bandstop', 'lowpass', 'highpass',
278 |                                 'lowpass_cheby_2']:
279 |                     kwargs.setdefault('zi', 0)
280 | 
281 |                 method_list.append(method)
282 |                 kwargs_list.append(kwargs)
283 |         return method_list, kwargs_list
284 | 
285 |     def _kwargs_initialization(self, method_list, kwargs_list):
286 |         for j, method in enumerate(method_list):
287 |             if method == 'time_integration':
288 |                 kwargs_list[j]['c'] = 0
289 |             elif method == 'time_differential':
290 |                 kwargs_list[j]['prepend'] = 0
291 |             elif method in ['bandpass', 'bandstop', 'lowpass',
292 |                             'highpass', 'lowpass_cheby_2']:
293 |                 kwargs_list[j]['zi'] = 0
294 | 
295 |     def process(self, operations, savepath='./processed', merge=1,
296 |                 suffix='_pro', ftype=None, dtype=None, save_operations=False,
297 |                 tolerance=0.5, **read_kwargs):
298 |         """
299 |         :param operations: list or None. Each element of operations list
300 |             should be [str of method name, dict of kwargs]. None for read
301 |             files related to operations in savepath.
302 |         :param savepath: str. Path to save processed files.
303 |         :param merge: int or str. int for merge several processed files into 1.
304 |             'all' for merge all files.
305 |         :param suffix: str. Suffix for processed files.
306 |         :param ftype: None or str. File format for saving. None for automatic
307 |             detection, or 'pkl', 'pickle', 'tdms', 'h5', 'hdf5', 'segy', 'sgy',
308 |             'npy'.
309 |         :param dtype: str. The data type of the saved data.
310 |         :parma save_operations: bool. If True, save the operations to
311 |             method_list.pkl and kwargs_list.pkl in savepath.
312 |         :param tolerance: float. Tolerance for checking continuity of data.
313 |         :param read_kwargs: dict. Paramters for read function.
314 |         """
315 |         if not os.path.exists(savepath):
316 |             os.makedirs(savepath)
317 |         method_file = os.path.join(savepath, 'method_list.pkl')
318 |         kwargs_file = os.path.join(savepath, 'kwargs_list.pkl')
319 |         if operations is None:
320 |             if (not os.path.exists(method_file)) or \
321 |                 (not os.path.exists(kwargs_file)):
322 |                 raise ValueError('No operations input and no method_list.pkl '
323 |                                  'and kwargs_list.pkl found in savepath.')
324 |             with open(os.path.join(savepath, 'method_list.pkl'), 'wb') as f:
325 |                 method_list = pickle.load(f)
326 |             with open(os.path.join(savepath, 'kwargs_list.pkl'), 'wb') as f:
327 |                 kwargs_list = pickle.load(f)
328 |         else:
329 |             method_list, kwargs_list = self._optimize_for_continuity(operations)
330 |         if merge == 'all' or merge > len(self):
331 |             merge = len(self)
332 |         m = 0
333 |         try:
334 |             for i in tqdm(range(len(self))):
335 |                 f = self[i]
336 |                 if os.path.getsize(f) == 0:
337 |                     warnings.warn(f'{f} is an empty file. Continuous data is '
338 |                                   'interrupted here.')
339 |                     if m > 0:
340 |                         sec_merge.save(filepath, dtype=dtype)
341 |                         m = 0
342 |                     self._kwargs_initialization(method_list, kwargs_list)
343 |                     continue
344 |                 try:
345 |                     sec = read(f, ftype=self.ftype, **read_kwargs)
346 |                     if sec.data.size == 0:
347 |                         if m > 0:
348 |                             sec_merge.save(filepath, dtype=dtype)
349 |                             m = 0
350 |                         self._kwargs_initialization(method_list, kwargs_list)
351 |                         continue
352 |                 except Exception as e:
353 |                     warnings.warn(f'Error reading {f}: {e}. Continuous data is '
354 |                                   'interrupted here.')
355 |                     if m > 0:
356 |                         sec_merge.save(filepath, dtype=dtype)
357 |                         m = 0
358 |                     self._kwargs_initialization(method_list, kwargs_list)
359 |                     continue
360 |                 for j, method in enumerate(method_list):
361 |                     if method in ['taper', 'cosine_taper']:
362 |                         if not ((i==0 and kwargs_list[j]['side'] != 'right') or
363 |                                 (i == len(self) - 1 and kwargs_list[j]['side'] !=
364 |                                 'left')):
365 |                             continue
366 |                     out = getattr(sec, method)(**kwargs_list[j])
367 |                     if method == 'time_integration':
368 |                         kwargs_list[j]['c'] = sec.data[:, -1].copy()
369 |                     elif method == 'time_differential':
370 |                         kwargs_list[j]['prepend'] = sec.data[:, -1].copy()
371 |                     elif method in ['bandpass', 'bandstop', 'lowpass', 'highpass',
372 |                                     'lowpass_cheby_2']:
373 |                         kwargs_list[j]['zi'] = out
374 |                 
375 |                 if m == 0:
376 |                     sec_merge = sec
377 |                     f0, f1 = os.path.splitext(os.path.basename(f))
378 |                     f1 = f1 if ftype is None else ftype
379 |                     filepath = os.path.join(savepath, f0+suffix+f1)
380 |                 elif abs(sec_merge.end_time - sec.start_time) <= tolerance:
381 |                     sec_merge += sec
382 |                 else:
383 |                     warnings.warn(f'The start time of {f} does not correspond '
384 |                                   'to the end time of the previous file. '
385 |                                   'Continuous data is interrupted here.')
386 |                     sec_merge.save(filepath, dtype=dtype)
387 |                     sec_merge = sec
388 |                     f0, f1 = os.path.splitext(os.path.basename(f))
389 |                     f1 = f1 if ftype is None else ftype
390 |                     filepath = os.path.join(savepath, f0+suffix+f1)
391 |                     m = 0
392 |                 m += 1
393 |                 if m == merge:
394 |                     sec_merge.save(filepath, dtype=dtype)
395 |                     m = 0
396 |             if m > 0:
397 |                 sec_merge.save(filepath, dtype=dtype)
398 |         except KeyboardInterrupt as e:
399 |             with open(method_file, 'wb') as f:
400 |                 pickle.dump(method_list, f)
401 |             with open(kwargs_file, 'wb') as f:
402 |                 pickle.dump(kwargs_list, f)
403 |             print(f'Process interrupted. Saving method_list and kwargs_list.')
404 |             raise e
405 |         else:
406 |             if save_operations:
407 |                 with open(method_file, 'wb') as f:
408 |                     pickle.dump(method_list, f)
409 |                 with open(kwargs_file, 'wb') as f:
410 |                     pickle.dump(kwargs_list, f)
411 |                 print(f'Operations saved to {method_file} and {kwargs_file}.')
412 |             else:
413 |                 if os.path.exists(method_file):
414 |                     os.remove(method_file)
415 |                 if os.path.exists(kwargs_file):
416 |                     os.remove(kwargs_file)
417 | 
418 | 
419 | # Dynamically add methods for cascade_methods
420 | def _create_cascade_method(method_name):
421 |     def cascade_method(self, savepath='./processed', merge=1,
422 |                        suffix=f'_{method_name}', ftype=None, dtype=None,
423 |                        save_operations=False, **kwargs):
424 |         """
425 |         Automatically generated method for {method_name}.
426 |         Applies the {method_name} operation to the data and saves the result.
427 | 
428 |         :param savepath: str. Path to save processed files.
429 |         :param merge: int or str. int for merge several processed files into 1.
430 |             'all' for merge all files.
431 |         :param suffix: str. Suffix for processed files.
432 |         :param ftype: None or str. None for automatic detection, or 'pkl',
433 |             'pickle', 'tdms', 'h5', 'hdf5', 'segy', 'sgy', 'npy'.
434 |         :param dtype: str. The data type of the saved data.
435 |         :parma save_operations: bool. If True, save the operations to
436 |             method_list.pkl and kwargs_list.pkl in savepath.
437 |         :param kwargs: dict. Parameters for the {method_name} operation.
438 |         """
439 |         operations = [[method_name, kwargs]]
440 |         self.process(operations, savepath=savepath, merge=merge, suffix=suffix,
441 |                      ftype=ftype, dtype=dtype, save_operations=save_operations)
442 |     return cascade_method
443 | 
444 | 
445 | for method in ['time_integration', 'time_differential', 'downsampling',
446 |                'bandpass', 'bandstop', 'lowpass', 'highpass',
447 |                'lowpass_cheby_2']:
448 |     setattr(Collection, method, _create_cascade_method(method))


--------------------------------------------------------------------------------
/daspy/core/dasdatetime.py:
--------------------------------------------------------------------------------
  1 | # Purpose: Module for handling DASDateTime objects.
  2 | # Author: Minzhe Hu
  3 | # Date: 2025.3.29
  4 | # Email: hmz2018@mail.ustc.edu.cn
  5 | import time
  6 | from typing import Iterable
  7 | from datetime import datetime, timedelta, timezone
  8 | 
  9 | 
 10 | utc = timezone.utc
 11 | local_tz = timezone(timedelta(seconds=-time.altzone))
 12 | 
 13 | 
 14 | class DASDateTime(datetime):
 15 |     def __add__(self, other):
 16 |         if isinstance(other, Iterable):
 17 |             out = []
 18 |             for t in other:
 19 |                 out.append(self + t)
 20 |             return out
 21 |         elif not isinstance(other, timedelta):
 22 |             other = timedelta(seconds=float(other))
 23 |         return super().__add__(other)
 24 | 
 25 |     def __sub__(self, other):
 26 |         if isinstance(other, Iterable):
 27 |             out = []
 28 |             for t in other:
 29 |                 out.append(self - t)
 30 |             return out
 31 |         elif isinstance(other, datetime):
 32 |             return datetime.__sub__(*self._unify_tz(other)).total_seconds()
 33 |         elif not isinstance(other, timedelta):
 34 |             other = timedelta(seconds=other)
 35 |         return super().__sub__(other)
 36 | 
 37 |     def __le__(self, other):
 38 |         return datetime.__le__(*self._unify_tz(other))
 39 | 
 40 |     def __lt__(self, other):
 41 |         return datetime.__lt__(*self._unify_tz(other))
 42 | 
 43 |     def __ge__(self, other):
 44 |         return datetime.__ge__(*self._unify_tz(other))
 45 | 
 46 |     def __gt__(self, other):
 47 |         return datetime.__gt__(*self._unify_tz(other))
 48 | 
 49 |     def _unify_tz(self, other: datetime):
 50 |         if self.tzinfo and (not other.tzinfo):
 51 |             return self, other.replace(tzinfo=self.tzinfo)
 52 |         elif (not self.tzinfo) and other.tzinfo:
 53 |             return self.replace(tzinfo=other.tzinfo), other
 54 |         return self, other
 55 | 
 56 |     def local(self):
 57 |         return self.astimezone(tz=local_tz)
 58 | 
 59 |     def utc(self):
 60 |         return self.astimezone(tz=utc)
 61 |     
 62 |     def remove_tz(self):
 63 |         return self.replace(tzinfo=None)
 64 | 
 65 |     @classmethod
 66 |     def from_datetime(cls, dt: datetime):
 67 |         return cls.fromtimestamp(dt.timestamp(), tz=dt.tzinfo)
 68 | 
 69 |     @classmethod
 70 |     def from_obspy_UTCDateTime(cls, dt):
 71 |         return cls.from_datetime(dt.datetime)
 72 | 
 73 |     def to_datetime(self):
 74 |         return datetime.fromtimestamp(self.timestamp(), tz=self.tzinfo)
 75 | 
 76 |     def to_obspy_UTCDateTime(self):
 77 |         from obspy import UTCDateTime
 78 |         return UTCDateTime(UTCDateTime(self.to_datetime()))
 79 | 
 80 |     @classmethod
 81 |     def strptime(cls, date_string, format):
 82 |         """
 83 |         string, format -> new datetime parsed from a string
 84 |         (like time.strptime()).
 85 |         """
 86 |         from _strptime import _strptime
 87 |         tt, fraction, gmtoff_fraction = _strptime(date_string, format)
 88 |         tzname, gmtoff = tt[-2:]
 89 |         args = tt[:6] + (fraction,)
 90 |         if gmtoff is not None:
 91 |             tzdelta = timedelta(seconds=gmtoff, microseconds=gmtoff_fraction)
 92 |             if tzname:
 93 |                 tz = timezone(tzdelta, tzname)
 94 |             else:
 95 |                 tz = timezone(tzdelta)
 96 |             args += (tz,)
 97 |         elif tt[-3] == 0:
 98 |             args += (utc,)
 99 | 
100 |         return cls(*args)


--------------------------------------------------------------------------------
/daspy/core/example.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HMZ-03/DASPy/9f308c93d7ad8f4e572705827b03c5d0fec3eac2/daspy/core/example.pkl


--------------------------------------------------------------------------------
/daspy/core/read.py:
--------------------------------------------------------------------------------
  1 | # Purpose: Module for reading DAS data.
  2 | # Author: Minzhe Hu
  3 | # Date: 2025.5.21
  4 | # Email: hmz2018@mail.ustc.edu.cn
  5 | # Partially modified from
  6 | # https://github.com/RobbinLuo/das-toolkit/blob/main/DasTools/DasPrep.py
  7 | import warnings
  8 | import json
  9 | import pickle
 10 | import numpy as np
 11 | import h5py
 12 | import segyio
 13 | from typing import Union
 14 | from pathlib import Path
 15 | from nptdms import TdmsFile
 16 | from daspy.core.section import Section
 17 | from daspy.core.dasdatetime import DASDateTime, utc
 18 | 
 19 | 
 20 | def read(fname=None, output_type='section', ftype=None, headonly=False,
 21 |          dtype=None, **kwargs) -> Union[Section, tuple]:
 22 |     """
 23 |     Read a .pkl/.pickle, .tdms, .h5/.hdf5, .segy/.sgy file.
 24 | 
 25 |     :param fname: str or pathlib.PosixPath. Path of DAS data file.
 26 |     :param output_type: str. 'Section' means return an instance of
 27 |         daspy.Section, 'array' means return numpy.array for data and a
 28 |         dictionary for metadata.
 29 |     :param ftype: None, str or function. None for automatic detection, or str to
 30 |         specify a type of 'pkl', 'pickle', 'tdms', 'h5', 'hdf5', 'segy', 'sgy',
 31 |         or 'npy', or a function for read data and metadata.
 32 |     :param headonly. bool. If True, only metadata will be read, the returned
 33 |         data will be an array of all zeros of the same size as the original
 34 |         data.
 35 |     :param ch1: int. The first channel required.
 36 |     :param ch2: int. The last channel required (not included).
 37 |     :param dch: int. Channel step.
 38 |     :param dtype: str. The data type of the returned data.
 39 |     :return: An instance of daspy.Section, or numpy.array for data and a
 40 |         dictionary for metadata.
 41 |     """
 42 |     fun_map = {'pkl': _read_pkl, 'tdms': _read_tdms, 'h5': _read_h5,
 43 |                'sgy': _read_segy, 'npy': _read_npy}
 44 |     if fname is None:
 45 |         fname = Path(__file__).parent / 'example.pkl'
 46 |         ftype = 'pkl'
 47 |     if ftype is None:
 48 |         ftype = str(fname).split('.')[-1].lower()
 49 | 
 50 |     if callable(ftype):
 51 |         try:
 52 |             data, metadata = ftype(fname, headonly=headonly, **kwargs)
 53 |         except TypeError:
 54 |             data, metadata = ftype(fname)
 55 |     else:
 56 |         for rtp in [('pickle', 'pkl'), ('hdf5', 'h5'), ('segy', 'sgy')]:
 57 |             ftype = ftype.replace(*rtp)
 58 |         data, metadata = fun_map[ftype](fname, headonly=headonly, **kwargs)
 59 | 
 60 |     if dtype is not None:
 61 |         data = data.astype(dtype)
 62 |     if output_type.lower() == 'section':
 63 |         metadata['source'] = Path(fname)
 64 |         metadata['source_type'] = ftype
 65 |         data[np.isnan(data)] = 0
 66 |         return Section(data, **metadata)
 67 |     elif output_type.lower() == 'array':
 68 |         return data, metadata
 69 | 
 70 | 
 71 | def _read_pkl(fname, headonly=False, **kwargs):
 72 |     dch = kwargs.pop('dch', 1)
 73 |     with open(fname, 'rb') as f:
 74 |         pkl_data = pickle.load(f)
 75 |         if isinstance(pkl_data, np.ndarray):
 76 |             warnings.warn('This data format doesn\'t include channel interval'
 77 |                           'and sampling rate. Please set manually')
 78 |             if headonly:
 79 |                 return np.zeros_like(pkl_data), {'dx': None, 'fs': None}
 80 |             else:
 81 |                 ch1 = kwargs.pop('ch1', 0)
 82 |                 ch2 = kwargs.pop('ch2', len(pkl_data))
 83 |                 return pkl_data[ch1:ch2:dch], {'dx': None, 'fs': None}
 84 |         elif isinstance(pkl_data, dict):
 85 |             data = pkl_data.pop('data')
 86 |             if headonly:
 87 |                 data = np.zeros_like(data)
 88 |             else:
 89 |                 if 'ch1' in kwargs.keys() or 'ch2' in kwargs.keys():
 90 |                     if 'start_channel' in pkl_data.keys():
 91 |                         s_chn = pkl_data['start_channel']
 92 |                         print(f'Data is start with channel {s_chn}.')
 93 |                     else:
 94 |                         s_chn = 0
 95 |                     ch1 = kwargs.pop('ch1', s_chn)
 96 |                     ch2 = kwargs.pop('ch2', s_chn + len(data))
 97 |                     data = data[ch1 - s_chn:ch2 - s_chn, :]
 98 |                     pkl_data['start_channel'] = ch1
 99 |             return data, pkl_data
100 |         else:
101 |             raise TypeError('Unknown data type.')
102 | 
103 | 
104 | def _read_h5_headers(group):
105 |     headers = {}
106 |     if len(group.attrs) != 0:
107 |         headers['attrs'] = dict(group.attrs)
108 |     if isinstance(group, h5py._hl.dataset.Dataset):
109 |         return headers
110 |     for key, value in group.items():
111 |         try:
112 |             gp_headers = _read_h5_headers(value)
113 |         except AttributeError:
114 |             headers[key] = value
115 |         if len(gp_headers):
116 |             headers[key] = gp_headers
117 | 
118 |     return headers
119 | 
120 | 
121 | def _read_h5_starttime(h5_file):
122 |     try:
123 |         stime = h5_file['Acquisition/Raw[0]/RawData'].attrs['PartStartTime']
124 |     except KeyError:
125 |         try:
126 |             stime = h5_file['Acquisition'].attrs['MeasurementStartTime']
127 |         except KeyError:
128 |             try:
129 |                 stime = h5_file['Acquisition/Raw[0]/RawDataTime/'][0]
130 |             except KeyError:
131 |                 return 0
132 |     if isinstance(stime, bytes):
133 |         stime = stime.decode('ascii')
134 | 
135 |     if isinstance(stime, str):
136 |         if len(stime) > 26:
137 |             stime = DASDateTime.strptime(stime, '%Y-%m-%dT%H:%M:%S.%f%z')
138 |         else:
139 |             stime = DASDateTime.strptime(stime, '%Y-%m-%dT%H:%M:%S.%f').\
140 |                 astimezone(utc)
141 |     else:
142 |         stime = DASDateTime.fromtimestamp(stime / 1e6).astimezone(utc)
143 | 
144 |     return stime
145 | 
146 | 
147 | def _read_h5(fname, headonly=False, **kwargs):
148 |     with h5py.File(fname, 'r') as h5_file:
149 |         dch = kwargs.pop('dch', 1)
150 |         group = list(h5_file.keys())[0]
151 |         if len(h5_file.keys()) >= 10: # ASN/OptoDAS https://github.com/ASN-Norway/simpleDAS
152 |             ch1 = kwargs.pop('ch1', 0)
153 |             if h5_file['header/dimensionNames'][0] == b'time':
154 |                 nch = h5_file['data'].shape[1]
155 |                 if headonly:
156 |                     data = np.zeros_like(h5_file['data']).T
157 |                 else:
158 |                     ch2 = kwargs.pop('ch2', nch)
159 |                     data = h5_file['data'][:, ch1:ch2:dch].T
160 |             elif h5_file['header/dimensionNames'][0] == b'distance':
161 |                 nch = h5_file['data'].shape[1]
162 |                 if headonly:
163 |                     data = np.zeros_like(h5_file['data'])
164 |                 else:
165 |                     ch2 = kwargs.pop('ch2', nch)
166 |                     data = h5_file['data'][ch1:ch2:dch, :]
167 |             dx = h5_file['header/dx'][()]
168 |             start_time = DASDateTime.fromtimestamp(
169 |                 h5_file['header/time'][()]).utc()
170 |             metadata = {'dx': dx * dch, 'fs': 1 / h5_file['header/dt'][()],
171 |                         'start_time': start_time, 'start_channel': ch1,
172 |                         'start_distance': ch1 * dx,
173 |                         'scale': h5_file['header/dataScale'][()]}
174 |             if h5_file['header/gaugeLength'][()] != np.nan:
175 |                 metadata['guage_length'] = h5_file['header/gaugeLength'][()]
176 |         elif len(h5_file.keys()) == 5: # AP Sensing
177 |             # read data
178 |             nch = h5_file['strain'].shape[1]
179 |             ch1 = kwargs.pop('ch1', 0)
180 |             ch2 = kwargs.pop('ch2', nch)
181 |             if headonly:
182 |                 data = np.zeros_like(h5_file['strain']).T
183 |             else:
184 |                 data = h5_file['strain'][:, ch1:ch2:dch].T
185 | 
186 |             # read metadata
187 |             dx = h5_file['spatialsampling'][()]
188 |             metadata = {'fs': h5_file['RepetitionFrequency'][()],
189 |                         'dx': dx * dch, 'start_channel': ch1,
190 |                         'start_distance': ch1 * dx,
191 |                         'gauge_length': h5_file.get('GaugeLength')[()]}
192 |         elif len(h5_file.keys()) == 3: # OpataSense
193 |             nch = h5_file['data'].shape[1]
194 |             ch1 = kwargs.pop('ch1', 0)
195 |             ch2 = kwargs.pop('ch2', nch)
196 |             dch = kwargs.pop('dch', 1)
197 |             if headonly:
198 |                 data = np.zeros_like(h5_file['data'])
199 |             else:
200 |                 data = h5_file['data'][ch1:ch2:dch, :]
201 |             dx = (h5_file['x_axis'][-1] - h5_file['x_axis'][0]) / \
202 |                 (len(h5_file['x_axis']) - 1)
203 |             fs = (len(h5_file['t_axis']) - 1) / (h5_file['t_axis'][-1] -
204 |                                                  h5_file['t_axis'][0])
205 |             metadata = {'dx': dx, 'fs': fs, 'start_channel': ch1,
206 |                         'start_distance': h5_file['x_axis'][0] + dx * ch1,
207 |                         'start_time': h5_file['t_axis'][0]}
208 |         elif set(h5_file.keys()) == {'Mapping', 'Acquisition'}: # Silixa/iDAS
209 |             nch = h5_file['Acquisition/Raw[0]'].attrs['NumberOfLoci']
210 |             ch1 = kwargs.pop('ch1', 0)
211 |             ch2 = kwargs.pop('ch2', nch)
212 |             if h5_file['Acquisition/Raw[0]/RawData/'].shape[0] == nch:
213 |                 if headonly:
214 |                     data = np.zeros_like(h5_file['Acquisition/Raw[0]/RawData/'])
215 |                 else:
216 |                     data = h5_file['Acquisition/Raw[0]/RawData/']\
217 |                         [ch1:ch2:dch, :]
218 |             else:
219 |                 if headonly:
220 |                     data = np.zeros_like(
221 |                         h5_file['Acquisition/Raw[0]/RawData/']).T
222 |                 else:
223 |                     data = h5_file['Acquisition/Raw[0]/RawData/']\
224 |                         [:, ch1:ch2:dch].T
225 | 
226 |             dx = np.mean(h5_file['Mapping/MeasuredSpatialResolution'])
227 |             start_distance = h5_file['Acquisition/Custom/UserSettings'].\
228 |                     attrs['StartDistance'] + ch1 * dx
229 |             h5_file['Acquisition/Raw[0]/RawData'].attrs['PartStartTime']
230 |             fs = h5_file['Acquisition/Raw[0]'].attrs['OutputDataRate']
231 |             gauge_length = h5_file['Acquisition'].attrs['GaugeLength']
232 |             scale = h5_file['Acquisition/Raw[0]'].attrs['AmpScaling']
233 |             geometry = np.vstack((h5_file['Mapping/Lon'],
234 |                                   h5_file['Mapping/Lat'])).T
235 |             metadata = {'dx': dx * dch, 'fs': fs, 'start_channel': ch1,
236 |                         'start_distance': ch1 * dx,
237 |                         'gauge_length': gauge_length, 'geometry': geometry,
238 |                         'scale': scale}
239 |             metadata['start_time'] = _read_h5_starttime(h5_file)
240 |         elif group == 'Acquisition':
241 |             # OptaSens/ODH, Silixa/iDAS, Sintela/Onyx, Smart Sensing/ZD DAS
242 |             # read data
243 |             try:
244 |                 nch = h5_file['Acquisition'].attrs['NumberOfLoci']
245 |             except KeyError:
246 |                 nch = len(h5_file['Acquisition/Raw[0]/RawData/'])
247 |             ch1 = kwargs.pop('ch1', 0)
248 |             ch2 = kwargs.pop('ch2', nch)
249 |             if h5_file['Acquisition/Raw[0]/RawData/'].shape[0] == nch:
250 |                 if headonly:
251 |                     data = np.zeros_like(h5_file['Acquisition/Raw[0]/RawData/'])
252 |                 else:
253 |                     data = h5_file['Acquisition/Raw[0]/RawData/']\
254 |                         [ch1:ch2:dch, :]
255 |             else:
256 |                 if headonly:
257 |                     data = np.zeros_like(
258 |                         h5_file['Acquisition/Raw[0]/RawData/']).T
259 |                 else:
260 |                     data = h5_file['Acquisition/Raw[0]/RawData/']\
261 |                         [:, ch1:ch2:dch].T
262 | 
263 |             # read metadata
264 |             try:
265 |                 fs = h5_file['Acquisition/Raw[0]'].attrs['OutputDataRate']
266 |             except KeyError:
267 |                 time_arr = h5_file['Acquisition/Raw[0]/RawDataTime/']
268 |                 fs = 1 / (np.diff(time_arr).mean() / 1e6)
269 | 
270 |             dx = h5_file['Acquisition'].attrs['SpatialSamplingInterval']
271 |             gauge_length = h5_file['Acquisition'].attrs['GaugeLength']
272 |             metadata = {'dx': dx * dch, 'fs': fs, 'start_channel': ch1,
273 |                         'start_distance': ch1 * dx,
274 |                         'gauge_length': gauge_length}
275 | 
276 |             metadata['start_time'] = _read_h5_starttime(h5_file)
277 |         elif group == 'raw':
278 |             nch = len(h5_file['raw'])
279 |             ch1 = kwargs.pop('ch1', 0)
280 |             ch2 = kwargs.pop('ch2', nch)
281 |             if headonly:
282 |                 data = np.zeros_like(h5_file['raw'])
283 |             else:
284 |                 data = h5_file['raw'][ch1:ch2:dch, :]
285 |             fs = round(1 / np.diff(h5_file['timestamp']).mean())
286 |             start_time = DASDateTime.fromtimestamp(
287 |                 h5_file['timestamp'][0]).astimezone(utc)
288 |             warnings.warn('This data format doesn\'t include channel interval. '
289 |                           'Please set manually')
290 |             metadata = {'dx': None, 'fs': fs, 'start_channel': ch1,
291 |                         'start_time': start_time}
292 |         elif group == 'data': # https://ai4eps.github.io/homepage/ml4earth/seismic_event_format_das/
293 |             nch = h5_file['data'].shape[1]
294 |             ch1 = kwargs.pop('ch1', 0)
295 |             ch2 = kwargs.pop('ch2', nch)
296 |             dch = kwargs.pop('dch', 1)
297 |             if headonly:
298 |                 data = np.zeros_like(h5_file['data'])
299 |             else:
300 |                 data = h5_file['data'][ch1:ch2:dch, :]
301 |             attr = h5_file['data'].attrs
302 |             dx = attr['dx_m']
303 |             metadata = {'dx': dx, 'fs': 1 / attr['dt_s'], 'start_channel': ch1,
304 |                         'start_distance': ch1 * dx,
305 |                         'start_time': DASDateTime.strptime(
306 |                             attr['begin_time'], '%Y-%m-%dT%H:%M:%S.%f%z'),
307 |                         'data_type': attr['unit']}
308 |             if 'event_time' in attr.keys():
309 |                 try:
310 |                     origin_time = DASDateTime.strptime(
311 |                         attr['event_time'], '%Y-%m-%dT%H:%M:%S.%f%z')
312 |                 except ValueError:
313 |                     origin_time = DASDateTime.strptime(
314 |                         attr['event_time'], '%Y-%m-%dT%H:%M:%S.%f')
315 |                 metadata['origin_time'] = origin_time
316 | 
317 |         elif group == 'data_product':
318 |             # read data
319 |             nch = h5_file.attrs['nx']
320 |             ch1 = kwargs.pop('ch1', 0)
321 |             ch2 = kwargs.pop('ch2', nch)
322 |             array_shape = h5_file['data_product/data'].shape
323 |             if array_shape[0] == nch:
324 |                 if headonly:
325 |                     data = np.zeros_like(h5_file['data_product/data'])
326 |                 else:
327 |                     data = h5_file['data_product/data'][ch1:ch2:dch, :]
328 |             else:
329 |                 if headonly:
330 |                     data = np.zeros_like(h5_file['data_product/data']).T
331 |                 else:
332 |                     data = h5_file['data_product/data'][:, ch1:ch2:dch].T
333 | 
334 |             # read metadata
335 |             fs = 1 / h5_file.attrs['dt_computer']
336 |             dx = h5_file.attrs['dx']
337 |             gauge_length = h5_file.attrs['gauge_length']
338 |             if h5_file.attrs['saving_start_gps_time'] > 0:
339 |                 start_time = DASDateTime.fromtimestamp(
340 |                     h5_file.attrs['file_start_gps_time'])
341 |             else:
342 |                 start_time = DASDateTime.fromtimestamp(
343 |                     h5_file.attrs['file_start_computer_time'])
344 |             data_type = h5_file.attrs['data_product']
345 | 
346 |             metadata = {'dx': dx * dch, 'fs': fs, 'start_channel': ch1,
347 |                         'start_distance': ch1 * dx,
348 |                         'start_time': start_time.astimezone(utc),
349 |                         'gauge_length': gauge_length, 'data_type': data_type}
350 |         else: # Febus
351 |             acquisition = list(h5_file[f'{group}/Source1/Zone1'].keys())[0]
352 |             # read data
353 |             start_channel = int(h5_file[f'{group}/Source1/Zone1'].
354 |                                 attrs['Extent'][0])
355 |             dataset = h5_file[f'{group}/Source1/Zone1/{acquisition}']
356 |             nch = dataset.shape[-1]
357 |             ch1 = kwargs.pop('ch1', start_channel)
358 |             ch2 = kwargs.pop('ch2', start_channel + nch)
359 |             if headonly:
360 |                 data = np.zeros_like(dataset).T.reshape((nch, -1))
361 |             else:
362 |                 if len(dataset.shape) == 3: # Febus A1-R
363 |                     data = dataset[:, :, ch1 - start_channel:ch2 - start_channel
364 |                                    :dch].reshape((-1, (ch2 - ch1) // dch)).T
365 |                 elif len(dataset.shape) == 2: # Febus A1
366 |                     data = dataset[:, ch1 - start_channel:ch2 - start_channel:
367 |                                    dch].T
368 |             # read metadata
369 |             attrs = h5_file[f'{group}/Source1/Zone1'].attrs
370 |             dx = attrs['Spacing'][0]
371 |             try:
372 |                 fs = float(attrs['FreqRes'])
373 |             except KeyError:
374 |                 try:
375 |                     fs = (attrs['PulseRateFreq'][0] /
376 |                           attrs['SamplingRes'][0]) / 1000
377 |                 except KeyError:
378 |                     fs = attrs['SamplingRate'][0]
379 |             start_distance = attrs['Origin'][0]
380 |             time = h5_file[f'{group}/Source1/time']
381 |             if len(time.shape) == 2: # Febus A1-R
382 |                 start_time = DASDateTime.fromtimestamp(time[0, 0]).\
383 |                     astimezone(utc)
384 |             elif len(time.shape) == 1: # Febus A1
385 |                 start_time = DASDateTime.fromtimestamp(time[0]).astimezone(utc)
386 |             gauge_length = attrs['GaugeLength'][0]
387 |             metadata = {'dx': dx * dch, 'fs': fs, 'start_channel': ch1,
388 |                         'start_distance': start_distance +
389 |                                             (ch1 - start_channel) * dx,
390 |                         'start_time': start_time, 'gauge_length': gauge_length}
391 | 
392 |         metadata['headers'] = _read_h5_headers(h5_file)
393 | 
394 |     return data, metadata
395 | 
396 | 
397 | def _read_tdms(fname, headonly=False, **kwargs):
398 |     # https://nptdms.readthedocs.io/en/stable/quickstart.html
399 |     with TdmsFile.read(fname) as tdms_file:
400 |         group_name = [group.name for group in tdms_file.groups()]
401 |         if 'Measurement' in group_name:
402 |             key = 'Measurement'
403 |         elif 'DAS' in group_name:
404 |             key = 'DAS'
405 |         else:
406 |             key = group_name[0]
407 | 
408 |         headers = {**tdms_file.properties, **tdms_file[key].properties}
409 |         nch = len(tdms_file[key])
410 |         dch = kwargs.pop('dch', 1)
411 |         # read data
412 |         if nch > 1:
413 |             start_channel = min(int(channel.name) for channel in
414 |                                 tdms_file[key].channels())
415 |             ch1 = max(kwargs.pop('ch1', start_channel), start_channel)
416 |             ch2 = min(kwargs.pop('ch2', start_channel + nch),
417 |                       start_channel + nch)
418 |             if headonly:
419 |                 nt = len(tdms_file[key][str(start_channel)])
420 |                 data = np.zeros((nch, nt))
421 |             else:
422 |                 data = np.asarray([tdms_file[key][str(ch)]
423 |                                    for ch in range(ch1, ch2, dch)])
424 |         elif nch == 1:
425 |             try:
426 |                 start_channel = int(headers['Initial Channel'])
427 |             except KeyError:
428 |                 start_channel = 0
429 | 
430 |             ch1 = max(kwargs.pop('ch1', start_channel), start_channel)
431 |             nch = int(headers['Total Channels'])
432 |             ch2 = min(kwargs.pop('ch2', start_channel + nch),
433 |                       start_channel + nch)
434 |             if headonly:
435 |                 data = np.zeros(len(tdms_file[key].channels()[0])).\
436 |                     reshape((nch, -1))
437 |             else:
438 |                 data = np.asarray(tdms_file[key].channels()[0]).\
439 |                     reshape((-1, nch)).T
440 |                 data = data[ch1 - start_channel:ch2 - start_channel:dch]
441 | 
442 |         # read metadata
443 |         try:
444 |             dx = headers['SpatialResolution[m]']
445 |         except KeyError:
446 |             try:
447 |                 dx = headers['Spatial Resolution']
448 |             except KeyError:
449 |                 dx = None
450 | 
451 |         try:
452 |             fs = headers['SamplingFrequency[Hz]']
453 |         except KeyError:
454 |             try:
455 |                 fs = 1 / headers['Time Base']
456 |             except KeyError:
457 |                 fs = None
458 | 
459 |         try:
460 |             start_distance = headers['Start Distance (m)'] + \
461 |                 dx * (ch1 - start_channel)
462 |         except KeyError:
463 |             start_distance = dx * ch1
464 | 
465 |         try:
466 |             start_time = DASDateTime.strptime(headers['ISO8601 Timestamp'],
467 |                                               '%Y-%m-%dT%H:%M:%S.%f%z')
468 |         except ValueError:
469 |             start_time = DASDateTime.strptime(headers['ISO8601 Timestamp'],
470 |                                               '%Y-%m-%dT%H:%M:%S.%f')
471 |         except KeyError:
472 |             start_time = 0
473 |             for key in ['GPSTimeStamp', 'CPUTimeStamp', 'Trigger Time']:
474 |                 if key in headers.keys():
475 |                     if headers[key]:
476 |                         start_time = DASDateTime.from_datetime(headers[key].
477 |                                                                item())
478 |                         break
479 | 
480 |         if dx is not None:
481 |             dx *= dch
482 |         metadata = {'dx': dx, 'fs': fs, 'start_channel': ch1,
483 |                     'start_distance': start_distance, 'start_time': start_time,
484 |                     'headers': headers}
485 | 
486 |         if 'GaugeLength' in headers.keys():
487 |             metadata['gauge_length'] = headers['GaugeLength']
488 | 
489 |     return data, metadata
490 | 
491 | 
492 | def _read_segy(fname, headonly=False, **kwargs):
493 |     # https://github.com/equinor/segyio-notebooks/blob/master/notebooks/basic/02_segy_quicklook.ipynb
494 |     with segyio.open(fname, ignore_geometry=True) as segy_file:
495 |         nch = segy_file.tracecount
496 |         ch1 = kwargs.pop('ch1', 0)
497 |         ch2 = kwargs.pop('ch2', nch)
498 |         dch = kwargs.pop('dch', 1)
499 | 
500 |         # read data
501 |         if headonly:
502 |             data = np.zeros_like(segy_file.trace.raw[:])
503 |         else:
504 |             data = segy_file.trace.raw[ch1:ch2:dch]
505 | 
506 |         # read metadata:
507 |         fs = 1 / (segyio.tools.dt(segy_file) / 1e6)
508 |         metadata = {'dx': None, 'fs': fs, 'start_channel': ch1}
509 |         warnings.warn('This data format doesn\'t include channel interval.'
510 |                       'Please set manually')
511 | 
512 |         return data, metadata
513 | 
514 | 
515 | def _read_npy(fname, headonly=False, **kwargs):
516 |     data = np.load(fname)
517 |     if headonly:
518 |         return np.zeros_like(data), {'dx': None, 'fs': None}
519 |     else:
520 |         ch1 = kwargs.pop('ch1', 0)
521 |         ch2 = kwargs.pop('ch2', len(data))
522 |         dch = kwargs.pop('dch', 1)
523 |         warnings.warn('This data format doesn\'t include channel interval and '
524 |                     'sampling rate. Please set manually')
525 |         return data[ch1:ch2:dch], {'dx': None, 'fs': None}
526 | 
527 | 
528 | def read_json(fname, output_type='dict'):
529 |     """
530 |     Read .json metadata file. See {Lai et al. , 2024, Seismol. Res. Lett.}
531 | 
532 |     :param fname: str or pathlib.PosixPath. Path of json file.
533 |     :param output_type: str. 'dict' means return a dictionary, and 'Section'
534 |         means return a empty daspy.Section instance with metadata.
535 |     :return: A dictionary of metadata or an instance of daspy.Section without
536 |         data.
537 |     """
538 |     with open(fname, 'r') as fcc_file:
539 |         headers = json.load(fcc_file)
540 |     if output_type.lower() == 'dict':
541 |         return headers
542 |     elif output_type.lower() in ['section', 'sec']:
543 |         if len(headers['Overview']['Interrogator']) > 1:
544 |             case_type = 'Multiple interrogators, single cable'
545 |             sec_num = len(headers['Overview']['Interrogator'])
546 |             sec = []
547 |             for interrogator in headers['Overview']['Interrogator']:
548 |                 nch = interrogator['Acquisition'][0]['Attributes']['number_of_channels']
549 |                 data = np.zeros((nch, 0))
550 |                 dx = interrogator['Acquisition'][0]['Attributes']['spatial_sampling_interval']
551 |                 fs = interrogator['Acquisition'][0]['Attributes']['acquisition_sample_rate']
552 |                 gauge_length = interrogator['Acquisition'][0]['Attributes']['gauge_length']
553 |                 sec.append(Section(data, dx, fs, gauge_length=gauge_length,
554 |                                    headers=headers))
555 |         elif len(headers['Overview']['Interrogator'][0]['Acquisition']) > 1:
556 |             case_type = 'Active survey'
557 |             sec_num = len(
558 |                 headers['Overview']['Interrogator'][0]['Acquisition'])
559 |             sec = []
560 |             for acquisition in headers['Overview']['Interrogator'][0]['Acquisition']:
561 |                 nch = acquisition['Attributes']['number_of_channels']
562 |                 data = np.zeros((nch, 0))
563 |                 dx = acquisition['Attributes']['spatial_sampling_interval']
564 |                 fs = acquisition['Attributes']['acquisition_sample_rate']
565 |                 gauge_length = acquisition['Attributes']['gauge_length']
566 |                 sec.append(Section(data, dx, fs, gauge_length=gauge_length,
567 |                                    headers=headers))
568 |         else:
569 |             sec_num = 1
570 |             if len(headers['Overview']['Cable']) > 1:
571 |                 case_type = 'Single interrogators, multiple cable'
572 |             else:
573 |                 env = headers['Overview']['Cable'][0]['Attributes']['cable_environment']
574 |                 if env == 'trench':
575 |                     case_type = 'Direct buried'
576 |                 elif env == 'conduit':
577 |                     case_type = 'Dark fiber'
578 |                 elif env in ['wireline', 'outside borehole casing']:
579 |                     case_type = 'Borehole cable'
580 |             nch = headers['Overview']['Interrogator'][0]['Acquisition'][0]['Attributes']['number_of_channels']
581 |             dx = headers['Overview']['Interrogator'][0]['Acquisition'][0]['Attributes']['spatial_sampling_interval']
582 |             fs = headers['Overview']['Interrogator'][0]['Acquisition'][0]['Attributes']['acquisition_sample_rate']
583 |             gauge_length = headers['Overview']['Interrogator'][0]['Acquisition'][0]['Attributes']['gauge_length']
584 |             data = np.zeros((nch, 0))
585 |             sec = Section(data, dx, fs, gauge_length=gauge_length,
586 |                           headers=headers)
587 | 
588 |         print(f'For case of {case_type}, create {sec_num} empty daspy.Section '
589 |               'instance(s)')
590 |         return sec
591 | 


--------------------------------------------------------------------------------
/daspy/core/write.py:
--------------------------------------------------------------------------------
  1 | # Purpose: Module for writing DAS data.
  2 | # Author: Minzhe Hu
  3 | # Date: 2025.5.21
  4 | # Email: hmz2018@mail.ustc.edu.cn
  5 | import os
  6 | import warnings
  7 | import pickle
  8 | import numpy as np
  9 | import h5py
 10 | import segyio
 11 | from shutil import copyfile
 12 | from nptdms import TdmsFile, TdmsWriter, RootObject, GroupObject, ChannelObject
 13 | from datetime import datetime
 14 | 
 15 | 
 16 | def write(sec, fname, ftype=None, raw_fname=None, dtype=None):
 17 |     fun_map = {'tdms': _write_tdms, 'h5': _write_h5, 'sgy': _write_segy}
 18 |     if ftype is None:
 19 |         ftype = str(fname).lower().split('.')[-1]
 20 |     ftype.replace('hdf5', 'h5')
 21 |     ftype.replace('segy', 'sgy')
 22 |     if dtype is not None:
 23 |         sec = sec.copy()
 24 |         sec.data = sec.data.astype(dtype)
 25 |     if ftype == 'pkl':
 26 |         write_pkl(sec, fname)
 27 |     elif ftype == 'npy':
 28 |         np.save(fname, sec.data)
 29 |     else:
 30 |         fun_map[ftype](sec, fname, raw_fname=raw_fname)
 31 |     return None
 32 | 
 33 | 
 34 | def write_pkl(sec, fname):
 35 |     with open(fname, 'wb') as f:
 36 |         pickle.dump(sec.__dict__, f)
 37 |     return None
 38 | 
 39 | 
 40 | def _write_tdms(sec, fname, raw_fname=None):
 41 |     if raw_fname is None:
 42 |         key = 'Measurement'
 43 |         file_prop = {}
 44 |         group_prop = {}
 45 |     else:
 46 |         original_file = TdmsFile(raw_fname)
 47 |         group_name = [group.name for group in original_file.groups()]
 48 |         if 'Measurement' in group_name:
 49 |             key = 'Measurement'
 50 |         elif 'DAS' in group_name:
 51 |             key = 'DAS'
 52 |         else:
 53 |             key = group_name[0]
 54 |         file_prop = original_file.properties
 55 |         group_prop = original_file[key].properties
 56 | 
 57 |     if 'Spatial Resolution' in group_prop.keys():
 58 |         group_prop['Spatial Resolution'] = sec.dx
 59 |     else:
 60 |         file_prop['SpatialResolution[m]'] = sec.dx
 61 | 
 62 |     if 'Time Base' in group_prop.keys():
 63 |         group_prop['Time Base'] = 1. / sec.fs
 64 |     else:
 65 |         file_prop['SamplingFrequency[Hz]'] = sec.fs
 66 | 
 67 |     if 'Total Channels' in group_prop.keys():
 68 |         group_prop['Total Channels'] = sec.nch
 69 | 
 70 |     if 'Initial Channel' in group_prop.keys():
 71 |         group_prop['Initial Channel'] = sec.start_channel
 72 | 
 73 |     file_prop['Start Distance (m)'] = sec.start_distance
 74 |     if isinstance(sec.start_time, datetime):
 75 |         start_time = sec.start_time
 76 |     else:
 77 |         start_time = datetime.fromtimestamp(sec.start_time)
 78 | 
 79 |     if raw_fname is None:
 80 |         file_prop['ISO8601 Timestamp'] = start_time.strftime(
 81 |             '%Y-%m-%dT%H:%M:%S.%f%z')
 82 |         group_prop['Trigger Time'] = np.datetime64(start_time.remove_tz())
 83 |     else:
 84 |         if 'ISO8601 Timestamp' in file_prop.keys():
 85 |             file_prop['ISO8601 Timestamp'] = start_time.strftime(
 86 |                 '%Y-%m-%dT%H:%M:%S.%f%z')
 87 |         else:
 88 |             for s in ['GPSTimeStamp', 'CPUTimeStamp', 'Trigger Time']:
 89 |                 if s in group_prop.keys():
 90 |                     group_prop[s] = np.datetime64(start_time.remove_tz())
 91 |                     break
 92 | 
 93 |     if hasattr(sec, 'gauge_length'):
 94 |         file_prop['GaugeLength'] = sec.gauge_length
 95 | 
 96 |     with TdmsWriter(fname) as tdms_file:
 97 |         root_object = RootObject(file_prop)
 98 |         group_object = GroupObject(key, properties=group_prop)
 99 |         if raw_fname and len(original_file[key]) == 1:
100 |             channel = ChannelObject(key, original_file[key].channels()[0].name,
101 |                                     sec.data.T.flatten(), properties={})
102 |             tdms_file.write_segment([root_object, group_object, channel])
103 |         else:
104 |             channel_list = []
105 |             for ch, d in enumerate(sec.data):
106 |                 channel_list.append(ChannelObject(key,
107 |                                                   str(ch + sec.start_channel),
108 |                                                   d, properties={}))
109 | 
110 |             tdms_file.write_segment([root_object, group_object] + channel_list)
111 |     return None
112 | 
113 | 
114 | def _update_h5_dataset(h5_file, path, name, data):
115 |     attrs = h5_file[path + name].attrs
116 |     del h5_file[path + name]
117 |     h5_file.get(path).create_dataset(name, data=data)
118 |     for key, value in attrs.items():
119 |         h5_file[path + name].attrs[key] = value
120 |     return None
121 | 
122 | 
123 | def _write_h5(sec, fname, raw_fname=None):
124 |     if raw_fname is None:
125 |         with h5py.File(fname, 'w') as h5_file:
126 |             h5_file.create_group('Acquisition/Raw[0]')
127 |             h5_file.get('Acquisition/Raw[0]/').\
128 |                 create_dataset('RawData', data=sec.data)
129 |             if isinstance(sec.start_time, datetime):
130 |                 h5_file['Acquisition/Raw[0]/RawData'].attrs['PartStartTime'] = \
131 |                     np.bytes_(
132 |                     sec.start_time.strftime('%Y-%m-%dT%H:%M:%S.%f%z'))
133 |                 stime = sec.start_time.timestamp() * 1e6
134 |                 DataTime = np.arange(
135 |                     stime, stime + sec.nt / sec.fs, 1 / sec.fs)
136 |             else:
137 |                 h5_file['Acquisition/Raw[0]/RawData'].attrs['PartStartTime'] = \
138 |                     np.bytes_(str(sec.start_time))
139 |                 DataTime = sec.start_time + np.arange(0, sec.nt / sec.fs,
140 |                                                       1 / sec.fs)
141 | 
142 |             h5_file.get('Acquisition/Raw[0]/').\
143 |                 create_dataset('RawDataTime', data=DataTime)
144 |             h5_file['Acquisition/Raw[0]'].attrs['OutputDataRate'] = sec.fs
145 |             h5_file['Acquisition'].attrs['SpatialSamplingInterval'] = sec.dx
146 |             if hasattr(sec, 'gauge_length'):
147 |                 h5_file['Acquisition'].attrs['GaugeLength'] = sec.gauge_length
148 |             else:
149 |                 h5_file['Acquisition'].attrs['GaugeLength'] = np.nan
150 |     else:
151 |         if not os.path.exists(fname) or not os.path.samefile(raw_fname, fname):
152 |             copyfile(raw_fname, fname)
153 |         with h5py.File(fname, 'r+') as h5_file:
154 |             group = list(h5_file.keys())[0]
155 |             if len(h5_file.keys()) == 10:
156 |                 if h5_file['header/dimensionNames'][0] == b'time':
157 |                     _update_h5_dataset(h5_file, '/', 'data', sec.data.T)
158 |                 elif h5_file['header/dimensionNames'][0] == b'distance':
159 |                     _update_h5_dataset(h5_file, '/', 'data', sec.data)
160 | 
161 |                 _update_h5_dataset(h5_file, 'header', 'dx', sec.dx)
162 |                 _update_h5_dataset(h5_file, 'header', 'dt', 1 / sec.fs)
163 |                 if isinstance(sec.start_time, datetime):
164 |                     _update_h5_dataset(h5_file, 'header', 'time',
165 |                                        sec.start_time.timestamp())
166 |                 else:
167 |                     _update_h5_dataset(h5_file, 'header', 'time',
168 |                                        sec.start_time)
169 |                 if hasattr(sec, 'gauge_length'):
170 |                     _update_h5_dataset(h5_file, '/', 'gaugeLength',
171 |                                        sec.gauge_length)
172 |                 if hasattr(sec, 'scale'):
173 |                     _update_h5_dataset(h5_file, '/', 'dataScale', sec.scale)
174 |             elif len(h5_file.keys()) == 5:
175 |                 _update_h5_dataset(h5_file, '/', 'strain', sec.data.T)
176 |                 _update_h5_dataset(h5_file, '/', 'spatialsampling', sec.dx)
177 |                 _update_h5_dataset(h5_file, '/', 'RepetitionFrequency', sec.fs)
178 |                 if hasattr(sec, 'gauge_length'):
179 |                     _update_h5_dataset(h5_file, '/', 'GaugeLength',
180 |                                        sec.gauge_length)
181 |             elif len(h5_file.keys()) == 3:
182 |                 _update_h5_dataset(h5_file, '/', 'data', sec.data)
183 |                 _update_h5_dataset(h5_file, '/', 'x_axis',
184 |                     sec.start_distance + np.arange(sec.nch) * sec.dx)
185 |                 _update_h5_dataset(h5_file, '/', 't_axis',
186 |                                    sec.start_time + np.arange(sec.nt) * sec.dt)
187 |             elif group == 'Acquisition':
188 |                 h5_file['Acquisition'].attrs['NumberOfLoci'] = sec.nch
189 |                 _update_h5_dataset(h5_file, 'Acquisition/Raw[0]/', 'RawData',
190 |                                    sec.data)
191 |                 if isinstance(sec.start_time, datetime):
192 |                     if isinstance(h5_file['Acquisition/Raw[0]/RawData'].
193 |                                   attrs['PartStartTime'], bytes):
194 |                         h5_file['Acquisition/Raw[0]/RawData'].\
195 |                             attrs['PartStartTime'] = np.bytes_(
196 |                             sec.start_time.strftime('%Y-%m-%dT%H:%M:%S.%f%z'))
197 |                     else:
198 |                         h5_file['Acquisition/Raw[0]/RawData'].\
199 |                             attrs['PartStartTime'] = sec.start_time.strftime(
200 |                                 '%Y-%m-%dT%H:%M:%S.%f%z')
201 |                     stime = sec.start_time.timestamp() * 1e6
202 |                     DataTime = np.arange(
203 |                         stime, stime + sec.nt / sec.fs, 1 / sec.fs)
204 |                 else:
205 |                     h5_file['Acquisition/Raw[0]/RawData'].\
206 |                         attrs['PartStartTime'] = np.bytes_(str(sec.start_time))
207 |                     DataTime = sec.start_time + np.arange(0, sec.nt / sec.fs,
208 |                                                         1 / sec.fs)
209 |                 _update_h5_dataset(h5_file, 'Acquisition/Raw[0]/',
210 |                                    'RawDataTime', DataTime)
211 |                 h5_file['Acquisition/Raw[0]'].attrs['OutputDataRate'] = sec.fs
212 |                 h5_file['Acquisition'].attrs['SpatialSamplingInterval'] = sec.dx
213 |                 if hasattr(sec, 'gauge_length'):
214 |                     h5_file['Acquisition'].attrs['GaugeLength'] = \
215 |                         sec.gauge_length
216 |             elif group == 'raw':
217 |                 _update_h5_dataset(h5_file, '/', 'raw', sec.data)
218 |                 DataTime = sec.start_time.timestamp() + \
219 |                     np.arange(0, sec.nt / sec.fs, 1 / sec.fs)
220 |                 _update_h5_dataset(h5_file, '/', 'timestamp', DataTime)
221 |             elif group == 'data': # https://ai4eps.github.io/homepage/ml4earth/seismic_event_format_das/
222 |                 _update_h5_dataset(h5_file, '/', 'data', sec.data)
223 |                 h5_file['data'].attrs['dx_m'] = sec.dx
224 |                 h5_file['data'].attrs['dt_s'] = 1 / sec.fs
225 |                 h5_file['data'].attrs['begin_time'] = \
226 |                     datetime.strftime(sec.start_time, '%Y-%m-%dT%H:%M:%S.%f%z')
227 |                 h5_file['data'].attrs['unit'] = sec.data_type
228 |             elif group == 'data_product':
229 |                 _update_h5_dataset(h5_file, 'data_product/', 'data', sec.data)
230 |                 h5_file.attrs['dt_computer'] = 1 / sec.fs
231 |                 h5_file.attrs['dx'] = sec.dx
232 |                 h5_file.attrs['gauge_length'] = sec.gauge_length
233 |                 DataTime = sec.start_time.timestamp() + \
234 |                     np.arange(0, sec.nt / sec.fs, 1 / sec.fs)
235 |                 if h5_file.attrs['saving_start_gps_time'] > 0:
236 |                     h5_file.attrs['file_start_gps_time'] = \
237 |                         sec.start_time.timestamp()
238 |                     _update_h5_dataset(h5_file, 'data_product/', 'gps_time',
239 |                                        DataTime)
240 |                     del h5_file['data_product/posix_time']
241 |                 else:
242 |                     h5_file.attrs['file_start_computer_time'] = \
243 |                         sec.start_time.timestamp()
244 |                     _update_h5_dataset(h5_file, 'data_product/', 'posix_time',
245 |                                        DataTime)
246 |                     del h5_file['data_product/gps_time']
247 |                 h5_file.attrs['data_product'] = sec.data_type
248 |             else:
249 |                 acquisition = list(h5_file[f'{group}/Source1/Zone1'].keys())[0]
250 |                 data = sec.data
251 |                 fs = int(sec.fs)
252 |                 d = len(h5_file[f'{group}/Source1/Zone1/{acquisition}'].shape)
253 |                 if d == 3:
254 |                     mod = sec.nt % fs
255 |                     if mod:
256 |                         data = np.hstack((data, np.zeros((sec.nch, fs - mod))))
257 |                     data = data.reshape((sec.nch, fs, sec.nt//fs)).T
258 |                 elif d == 2:
259 |                     data = data.T
260 |                 _update_h5_dataset(h5_file, f'{group}/Source1/Zone1/',
261 |                                    acquisition, data)
262 | 
263 |                 h5_file[f'{group}/Source1/Zone1'].attrs['Spacing'][0] = sec.dx
264 |                 h5_file[f'{group}/Source1/Zone1'].attrs['FreqRes'] = \
265 |                     np.bytes_(sec.fs)
266 |                 h5_file[f'{group}/Source1/Zone1'].attrs['SamplingRate'][0] = \
267 |                     sec.fs
268 |                 h5_file[f'{group}/Source1/Zone1'].attrs['Extent'][0] = \
269 |                     sec.start_channel
270 |                 h5_file[f'{group}/Source1/Zone1'].attrs['Origin'][0] = \
271 |                     sec.start_distance
272 |                 h5_file[f'{group}/Source1/Zone1'].attrs['GaugeLength'][0] = \
273 |                     sec.gauge_length
274 |                 DataTime = sec.start_time.timestamp() + \
275 |                     np.arange(0, sec.nt / sec.fs, 1 / sec.fs)
276 |                 _update_h5_dataset(h5_file, f'{group}/Source1/',
277 |                                    'time', DataTime.reshape((1, -1)))
278 | 
279 |     return None
280 | 
281 | 
282 | def _write_segy(sec, fname, raw_fname=None):
283 |     spec = segyio.spec()
284 |     spec.samples = np.arange(sec.nt) / sec.fs * 1e3
285 |     spec.tracecount = sec.nch
286 |     if raw_fname is None:
287 |         spec.format = 1
288 |         with segyio.create(fname, spec) as new_file:
289 |             new_file.header.length = sec.nch
290 |             new_file.header.segy._filename = fname
291 |             new_file.trace = sec.data # .astype(np.float32)
292 |     else:
293 |         with segyio.open(raw_fname, ignore_geometry=True) as raw_file:
294 |             spec.sorting = raw_file.sorting
295 |             spec.format = raw_file.format
296 |             raw_file.header.length = sec.nch
297 |             raw_file.header.segy._filename = fname
298 |             with segyio.create(fname, spec) as new_file:
299 |                 new_file.text[0] = raw_file.text[0]
300 |                 new_file.header = raw_file.header
301 |                 new_file.trace = sec.data.astype(raw_file.trace.dtype)
302 | 
303 |     warnings.warn('This data format doesn\'t include channel interval.')
304 |     return None
305 | 


--------------------------------------------------------------------------------
/document/Ridgecrest_traffic_noise.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HMZ-03/DASPy/9f308c93d7ad8f4e572705827b03c5d0fec3eac2/document/Ridgecrest_traffic_noise.mat


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | 
 3 | 
 4 | setup(
 5 |     name='DASPy-toolbox', version='1.1.6',
 6 |     description=(
 7 |         'DASPy is an open-source project dedicated to provide a python package '
 8 |         'for DAS (Distributed Acoustic Sensing) data processing, which '
 9 |         'comprises classic seismic data processing techniques and Specialized '
10 |         'algorithms for DAS applications.'
11 |         ),
12 |     long_description=open('README.md').read(),
13 |     author='Minzhe Hu, Zefeng Li',
14 |     author_email='hmz2018@mail.ustc.edu.cn',
15 |     maintainer='Minzhe Hu',
16 |     maintainer_email='hmz2018@mail.ustc.edu.cn',
17 |     license='MIT License',
18 |     url='https://github.com/HMZ-03/DASPy',
19 |     packages=find_packages(),
20 |     entry_points={  
21 |         'console_scripts': [
22 |             'daspy = daspy.main:main',
23 |             ]
24 |     },
25 |     include_package_data=True,
26 |     package_data={
27 |         'daspy': ['core/example.pkl']
28 |     },
29 |     classifiers=[
30 |         'Operating System :: OS Independent',
31 |         'License :: OSI Approved :: MIT License',
32 |         'Programming Language :: Python :: 3'
33 |     ],
34 |     python_requires='>=3.9',
35 |     install_requires=[
36 |         'numpy',
37 |         'scipy>=1.13',
38 |         'matplotlib',
39 |         'geographiclib',
40 |         'pyproj',
41 |         'h5py',
42 |         'segyio',
43 |         'nptdms',
44 |         'tqdm'
45 |     ]
46 | )
47 | 


--------------------------------------------------------------------------------
/website/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HMZ-03/DASPy/9f308c93d7ad8f4e572705827b03c5d0fec3eac2/website/logo.png


--------------------------------------------------------------------------------
/website/waveform.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HMZ-03/DASPy/9f308c93d7ad8f4e572705827b03c5d0fec3eac2/website/waveform.png


--------------------------------------------------------------------------------