├── .github
└── workflows
│ └── workflow.yml
├── .gitignore
├── CONTRIBUTING.md
├── CONTRIBUTORS.txt
├── LICENSE
├── README.md
├── daspy
├── CONTRIBUTORS.txt
├── __init__.py
├── advanced_tools
│ ├── __init__.py
│ ├── channel.py
│ ├── decomposition.py
│ ├── denoising.py
│ ├── fdct.py
│ └── strain2vel.py
├── basic_tools
│ ├── __init__.py
│ ├── filter.py
│ ├── freqattributes.py
│ ├── preprocessing.py
│ └── visualization.py
└── core
│ ├── __init__.py
│ ├── collection.py
│ ├── dasdatetime.py
│ ├── example.pkl
│ ├── read.py
│ ├── section.py
│ └── write.py
├── document
├── Ridgecrest_traffic_noise.mat
└── example.ipynb
├── setup.py
└── website
├── logo.png
└── waveform.png
/.github/workflows/workflow.yml:
--------------------------------------------------------------------------------
1 | name: Publish Python 🐍 distribution 📦 to PyPI and TestPyPI
2 |
3 | on: push
4 |
5 | jobs:
6 | build:
7 | name: Build distribution 📦
8 | runs-on: ubuntu-latest
9 |
10 | steps:
11 | - uses: actions/checkout@v4
12 | with:
13 | persist-credentials: false
14 | - name: Set up Python
15 | uses: actions/setup-python@v5
16 | with:
17 | python-version: "3.x"
18 | - name: Install pypa/build
19 | run: >-
20 | python3 -m
21 | pip install
22 | build
23 | --user
24 | - name: Build a binary wheel and a source tarball
25 | run: python3 -m build
26 | - name: Store the distribution packages
27 | uses: actions/upload-artifact@v4
28 | with:
29 | name: python-package-distributions
30 | path: dist/
31 |
32 | publish-to-pypi:
33 | name: >-
34 | Publish Python 🐍 distribution 📦 to PyPI
35 | if: startsWith(github.ref, 'refs/tags/') # only publish to PyPI on tag pushes
36 | needs:
37 | - build
38 | runs-on: ubuntu-latest
39 | environment:
40 | name: pypi
41 | url: https://pypi.org/p/DASPy-toolbox
42 | permissions:
43 | id-token: write # IMPORTANT: mandatory for trusted publishing
44 |
45 | steps:
46 | - name: Download all the dists
47 | uses: actions/download-artifact@v4
48 | with:
49 | name: python-package-distributions
50 | path: dist/
51 | - name: Publish distribution 📦 to PyPI
52 | uses: pypa/gh-action-pypi-publish@release/v1
53 |
54 | github-release:
55 | name: >-
56 | Sign the Python 🐍 distribution 📦 with Sigstore
57 | and upload them to GitHub Release
58 | needs:
59 | - publish-to-pypi
60 | runs-on: ubuntu-latest
61 |
62 | permissions:
63 | contents: write # IMPORTANT: mandatory for making GitHub Releases
64 | id-token: write # IMPORTANT: mandatory for sigstore
65 |
66 | steps:
67 | - name: Download all the dists
68 | uses: actions/download-artifact@v4
69 | with:
70 | name: python-package-distributions
71 | path: dist/
72 | - name: Sign the dists with Sigstore
73 | uses: sigstore/gh-action-sigstore-python@v3.0.0
74 | with:
75 | inputs: >-
76 | ./dist/*.tar.gz
77 | ./dist/*.whl
78 | - name: Create GitHub Release
79 | env:
80 | GITHUB_TOKEN: ${{ github.token }}
81 | run: >-
82 | gh release create
83 | "$GITHUB_REF_NAME"
84 | --repo "$GITHUB_REPOSITORY"
85 | --notes ""
86 | - name: Upload artifact signatures to GitHub Release
87 | env:
88 | GITHUB_TOKEN: ${{ github.token }}
89 | # Upload to GitHub Release using the `gh` CLI.
90 | # `dist/` contains the built packages, and the
91 | # sigstore-produced signatures and certificates.
92 | run: >-
93 | gh release upload
94 | "$GITHUB_REF_NAME" dist/**
95 | --repo "$GITHUB_REPOSITORY"
96 |
97 | publish-to-testpypi:
98 | name: Publish Python 🐍 distribution 📦 to TestPyPI
99 | if: startsWith(github.ref, 'refs/tags/') # only publish to TestPyPI on tag pushes
100 | needs:
101 | - build
102 | runs-on: ubuntu-latest
103 |
104 | environment:
105 | name: testpypi
106 | url: https://test.pypi.org/p/DASPy-toolbox
107 |
108 | permissions:
109 | id-token: write # IMPORTANT: mandatory for trusted publishing
110 |
111 | steps:
112 | - name: Download all the dists
113 | uses: actions/download-artifact@v4
114 | with:
115 | name: python-package-distributions
116 | path: dist/
117 | - name: Publish distribution 📦 to TestPyPI
118 | uses: pypa/gh-action-pypi-publish@release/v1
119 | with:
120 | repository-url: https://test.pypi.org/legacy/
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Contributing to DASPy
2 |
3 | We hope you to submit your changes through Git's Pull Request feature.
4 |
5 | ## Submit a Pull Request
6 |
7 | Here's a quick guide:
8 |
9 | 1. Fork the repo.
10 | 2. Make a new branch based at `main`.
11 | 3. Push to your fork and submit a pull request.
12 | 4. Wait for our review. We may suggest some changes or improvements or alternatives.
13 |
14 | ## DASPy Coding Style Guide
15 |
16 | Like most Python projects, we try to adhere to [PEP 8](https://peps.python.org/pep-0008/) (Style Guide for Python Code) and [PEP 257](https://peps.python.org/pep-0257/) (Docstring Conventions) with the modifications documented here. Be sure to read all documents if you intend to contribute code to DASPy.
17 |
18 | ## Naming
19 |
20 | ### Names to Avoid
21 |
22 | * single character names except for counters or iterators
23 | * dashes (-) in any package/module name
24 | * **__double_leading_and_trailing_underscore__** names (reserved by Python)
25 |
26 | ### Naming Convention
27 |
28 | * Use meaningful variable/function/method names; these will help other people a lot when reading your code.
29 | * Prepending a single underscore (_) means an object is “internal” / “private”, which means that it is not supposed to be used by end-users and the API might change internally without notice to users (in contrast to API changes in public objects which get handled with deprecation warnings for one release cycle).
30 | * Prepending a double underscore (__) to an instance variable or method effectively serves to make the variable or method private to its class (using name mangling).
31 | * Place related classes and top-level functions together in a module. Unlike Java, there is no need to limit yourself to one class per module.
32 | * Use CamelCase for class names, but snake_case for module names, variables and functions/methods.
33 |
--------------------------------------------------------------------------------
/CONTRIBUTORS.txt:
--------------------------------------------------------------------------------
1 | daspy/CONTRIBUTORS.txt
2 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2021 University of Science and Technology of China
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | [](https://pypi.org/project/DASPy-toolbox/)
4 | [](https://opensource.org/license/mit)
5 | [](https://pypi.org/project/DASPy-toolbox/)
6 |
7 | [](https://doi.org/10.1785/0220240124)
8 | [](https://pypi.org/project/DASPy-toolbox/)
9 | [](https://anaconda.org/conda-forge/daspy-toolbox)
10 |
11 | DASPy is an open-source project dedicated to provide a python package for DAS (Distributed Acoustic Sensing) data processing.
12 |
13 | The goal of the DASPy project is to lower the bar of DAS data processing. DASPy includes:
14 | * Classic seismic data processing techniques, including preprocessing, filter, spectrum analysis, and visualization
15 | * Specialized algorithms for DAS applications, including denoising, waveform decomposition, channel attribute analysis, and strain-velocity conversion.
16 |
17 | DASPy is licensed under the MIT License. [An English version of DASPy tutorial](https://daspy-tutorial.readthedocs.io/en/latest/), [a Chinese version of DASPy tutorial](https://daspy-tutorial-cn.readthedocs.io/zh-cn/latest/) and [an example of Jupyter notebook](document/example.ipynb) is available. If you have any questions, please contact me via .
18 |
19 | ## Installation
20 | DASPy runs on Linux, Windows and Mac OS and on Python 3.9 and up.
21 |
22 | ### Pip
23 | ```
24 | pip install daspy-toolbox
25 | ```
26 |
27 | Install the latest version from GitHub:
28 |
29 | ```
30 | pip install git+https://github.com/HMZ-03/DASPy.git
31 | ```
32 |
33 | ### Conda
34 |
35 | ```
36 | conda install daspy-toolbox
37 | ```
38 |
39 | or
40 |
41 | ```
42 | conda install conda-forge::daspy-toolbox
43 | ```
44 |
45 | ### Manual installation
46 | 1. Install dependent packages: numpy, scipy >=1.13, matplotlib, geographiclib, pyproj, h5py, segyio, nptdms, tqdm
47 |
48 | 2. Add DASPy into your Python path.
49 |
50 | ## Getting started
51 | ```
52 | from daspy import read
53 | sec = read() # load example waveform
54 | sec.bandpass(1, 15)
55 | sec.plot()
56 | ```
57 |
58 |
59 | ### Contributing
60 |
61 | Please see details on how to contribute to the project [here](CONTRIBUTING.md) and [here](CodingStyleGuide.md).
62 |
63 | ### Reference
64 |
65 | * Minzhe Hu and Zefeng Li (2024), [DASPy: A Python Toolbox for DAS Seismology](https://pubs.geoscienceworld.org/ssa/srl/article/95/5/3055/645865/DASPy-A-Python-Toolbox-for-DAS-Seismology), *Seismological Research Letters*, 95(5), 3055–3066, doi: `https://doi.org/10.1785/0220240124`.
66 |
--------------------------------------------------------------------------------
/daspy/CONTRIBUTORS.txt:
--------------------------------------------------------------------------------
1 | Hu, Minzhe
2 | Li, Zefeng
3 | Zhang, Ji
--------------------------------------------------------------------------------
/daspy/__init__.py:
--------------------------------------------------------------------------------
1 | from daspy.core.section import Section
2 | from daspy.core.collection import Collection
3 | from daspy.core.read import read
4 | from daspy.core.dasdatetime import DASDateTime, local_tz, utc
--------------------------------------------------------------------------------
/daspy/advanced_tools/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HMZ-03/DASPy/9f308c93d7ad8f4e572705827b03c5d0fec3eac2/daspy/advanced_tools/__init__.py
--------------------------------------------------------------------------------
/daspy/advanced_tools/channel.py:
--------------------------------------------------------------------------------
1 | # Purpose: Several functions for analysis data quality and geometry of channels
2 | # Author: Minzhe Hu, Zefeng Li
3 | # Date: 2025.3.31
4 | # Email: hmz2018@mail.ustc.edu.cn
5 | import numpy as np
6 | from copy import deepcopy
7 | from geographiclib.geodesic import Geodesic
8 | from pyproj import Proj
9 |
10 |
11 | def robust_polyfit(data, deg, thresh):
12 | """
13 | Fit a curve with a robust weighted polynomial.
14 |
15 | :param data: 1-dimensional array.
16 | :param deg: int. Degree of the fitting polynomial
17 | :param thresh: int or float. Defined MAD multiple of outliers.
18 | :return: Fitting data
19 | """
20 | nch = len(data)
21 | channels = np.arange(nch)
22 | p_coef = np.polyfit(channels, data, deg)
23 | p_fit = np.poly1d(p_coef)
24 | old_data = p_fit(channels)
25 | mse = 1
26 |
27 | # robust fitting until the fitting curve changes < 0.1% at every point.
28 | while mse > 0.001:
29 | rsl = abs(data - old_data)
30 | mad = np.median(rsl)
31 | weights = np.zeros(nch)
32 | weights[rsl < thresh * mad] = 1
33 | p_coef = np.polyfit(channels, data, deg, w=weights)
34 | p_fit = np.poly1d(p_coef)
35 | new_data = p_fit(channels)
36 | mse = np.nanmax(np.abs((new_data - old_data) / old_data))
37 | old_data = new_data
38 |
39 | return new_data, weights
40 |
41 |
42 | def _continuity_checking(lst1, lst2, adjacent=2, toleration=2):
43 | lst1_raw = deepcopy(lst1)
44 | for chn in lst1_raw:
45 | discont = [a for a in lst2 if abs(a - chn) <= adjacent]
46 | if len(discont) >= adjacent * 2 + 1 - toleration:
47 | lst1.remove(chn)
48 | lst2.append(chn)
49 |
50 | return lst1, lst2
51 |
52 |
53 | def channel_checking(data, deg=10, thresh=5, continuity=True, adjacent=2,
54 | toleration=2, mode='low', verbose=False):
55 | """
56 | Use the energy of each channel to determine which channels are bad.
57 |
58 | :param data: 2-dimensional np.ndarray. Axis 0 is channel number and axis 1
59 | is time series.
60 | :param deg: int. Degree of the fitting polynomial
61 | :param thresh: int or float. The MAD multiple of bad channel energy lower
62 | than good channels.
63 | :param continuity: bool. Perform continuity checks on bad channels and good
64 | channels.
65 | :param adjacent: int. The number of nearby channels for continuity checks.
66 | :param toleration: int. The number of discontinuous channel allowed in each
67 | channel (including itself) in the continuity check.
68 | :param mode: str. 'low' means bad channels have low amplitude, 'high' means
69 | bad channels have high amplitude, and 'both' means bad channels are
70 | likely to have low or high amplitude.
71 | :return: Good channels and bad channels.
72 | """
73 | nch = len(data)
74 | energy = np.log10(np.sum(data**2, axis=1))
75 | energy[energy == -np.inf] = -308
76 |
77 | # Remove abnormal value by robust polynomial fitting.
78 | fitted_energy, weights = robust_polyfit(energy, deg, thresh)
79 | deviation = energy - fitted_energy
80 |
81 | # Iterate eliminates outliers.
82 | mad = np.median(abs(deviation[weights > 0]))
83 | if mode == 'low':
84 | bad_chn = np.argwhere(deviation < -thresh * mad).ravel().tolist()
85 | elif mode == 'high':
86 | bad_chn = np.argwhere(deviation > thresh * mad).ravel().tolist()
87 | elif mode == 'both':
88 | bad_chn = np.argwhere(deviation < -thresh * mad).ravel().tolist() + \
89 | np.argwhere(deviation > thresh * mad).ravel().tolist()
90 | good_chn = list(set(range(nch)) - set(bad_chn))
91 |
92 | if continuity:
93 | # Discontinuous normal value are part of bad channels.
94 | good_chn, bad_chn = _continuity_checking(good_chn, bad_chn,
95 | adjacent=adjacent,
96 | toleration=toleration)
97 |
98 | # Discontinuous outliers are usually not bad channels.
99 | bad_chn, good_chn = _continuity_checking(bad_chn, good_chn,
100 | adjacent=adjacent,
101 | toleration=toleration)
102 |
103 | bad_chn = np.sort(np.array(bad_chn))
104 | good_chn = np.sort(np.array(good_chn))
105 | if verbose:
106 | return good_chn, bad_chn, energy, fitted_energy - thresh * mad
107 |
108 | return good_chn, bad_chn
109 |
110 |
111 | def _channel_location(track_pt):
112 | track, tn = track_pt[:, :-1], track_pt[:, -1]
113 | dim = track.shape[1]
114 | l_track = np.sqrt(np.sum(np.diff(track, axis=0) ** 2, axis=1))
115 | l_track_cum = np.hstack(([0], np.cumsum(l_track)))
116 | idx_kp = np.where(tn >= 0)[0]
117 |
118 | interp_ch = []
119 | chn = np.floor(tn[idx_kp[0]]).astype(int)
120 | interp_ch.append([*track[idx_kp[0]], chn])
121 | if abs(chn - tn[idx_kp[0]]) > 1e-6:
122 | chn += 1
123 |
124 | seg_interval = []
125 | for i in range(1, len(idx_kp)):
126 | # calculate actual interval between known-channel points
127 | istart, iend = idx_kp[i - 1], idx_kp[i]
128 | n_chn_kp = tn[iend] - tn[istart]
129 | d_interp = (l_track_cum[iend] - l_track_cum[istart]) / n_chn_kp
130 | seg_interval.append([tn[istart], tn[iend], d_interp])
131 |
132 | l_res = 0 # remaining fiber length before counting the next segment
133 | # consider if the given channelnumber is not an integer
134 | chn_res = tn[istart] - int(tn[istart])
135 | if d_interp == 0:
136 | while chn < int(tn[iend]):
137 | chn += 1
138 | interp_ch.append([*track[istart, :], chn])
139 | continue
140 | for j in range(istart, iend):
141 | l_start = l_track[j] + l_res
142 |
143 | # if tp segment length is large for more than one interval, get the
144 | # channel loc
145 | if l_start >= d_interp * (1 - chn_res - 1e-6):
146 | # floor int, num of channel available
147 | n_chn_tp = int(l_start / d_interp + chn_res)
148 | l_new = (np.arange(n_chn_tp) + 1 - chn_res) * d_interp - \
149 | l_res # channel distance from segment start
150 |
151 | # interpolate the channel loc
152 | t_new = np.zeros((len(l_new), dim))
153 | for d in range(dim):
154 | t_new[:, d] = np.interp(l_new, [0, l_track[j]],
155 | [track[j, d], track[j + 1, d]])
156 |
157 | # remaining length to add to next segment
158 | l_res = l_start - n_chn_tp * d_interp
159 |
160 | # write interpolated channel loc
161 | for ti in t_new:
162 | chn += 1
163 | interp_ch.append([*ti, chn])
164 |
165 | # handle floor int problem when l_start/d_interp is near an
166 | # interger
167 | if (d_interp - l_res) / d_interp < 1e-6:
168 | chn += 1
169 | interp_ch.append([*track[j + 1, :], int(tn[j + 1])])
170 | l_res = 0
171 | chn_res = 0
172 | # if tp segment length is not enough for one interval, simply add
173 | # the length to next segment
174 | elif l_start < d_interp:
175 | l_res = l_start
176 |
177 | if abs(tn[iend] - int(tn[iend])) > 1e-6:
178 | chn += 1
179 | interp_ch.append([*track[iend, :], chn])
180 |
181 | return np.array(seg_interval), np.array(interp_ch)
182 |
183 |
184 | def location_interpolation(known_pt, track_pt=None, dx=2, data_type='lonlat',
185 | verbose=False):
186 | """
187 | Interpolate to obtain the positions of all channels.
188 |
189 | :param known_pt: np.ndarray. Points with known channel numbers. Each row
190 | includes 2 or 3 coordinates and a channel number.
191 | :param track_pt: np.ndarray. Optional fiber spatial track points without
192 | channel numbers. Each row includes 2 or 3 coordinates. Please ensure
193 | that the track points are arranged in increasing order of track number.
194 | If track points is not dense enough, please insert the coordinates of
195 | known points into track points in order.
196 | :param dx: Known points far from the track (> dx) will be excluded.
197 | Recommended setting is channel interval. The unit is m.
198 | :param data_type: str. Coordinate type. 'lonlat' ('lonlatheight') for
199 | longitude, latitude in degree (and height in meters), 'xy' ('xyz') for
200 | x, y (and z) in meters.
201 | :param verbose: bool. If True, return interpoleted channel location and
202 | segment interval.
203 | :return: Interpoleted channel location if verbose is False.
204 | """
205 | known_pt = known_pt[known_pt[:,-1].argsort()]
206 | dim = known_pt.shape[1] - 1
207 | if 'lonlat' in data_type:
208 | zone = np.floor((max(known_pt[:,0]) + min(known_pt[:,0])) / 2 / 6)\
209 | .astype(int) + 31
210 | DASProj = Proj(proj='utm', zone=zone, ellps='WGS84',
211 | preserve_units=False)
212 | known_pt[:, 0], known_pt[:, 1] = DASProj(known_pt[:, 0], known_pt[:, 1])
213 | else:
214 | assert 'xy' in data_type, ('data_type should be \'lonlat\',\''
215 | 'lonlatheight\', \'xy\' or \'xyz\'')
216 |
217 | if track_pt is None:
218 | seg_interval, interp_ch = _channel_location(known_pt)
219 | else:
220 | K = len(known_pt)
221 | T = len(track_pt)
222 | track_pt = np.c_[track_pt, np.zeros(T) - 1]
223 | if 'lonlat' in data_type:
224 | track_pt[:, 0], track_pt[:, 1] = DASProj(track_pt[:, 0],
225 | track_pt[:, 1])
226 |
227 | # insert the known points into the fiber track data
228 | matrix = [np.tile(track_pt[:, d], (K, 1)) -
229 | np.tile(known_pt[:, d], (T, 1)).T for d in range(dim)]
230 |
231 | dist = np.sqrt(np.sum(np.array(matrix) ** 2, axis=0))
232 | for k in range(K):
233 | if min(dist[k]) < dx:
234 | t_list = np.sort(np.where(dist[k] == min(dist[k]))[0])
235 | for t in t_list:
236 | if track_pt[t, -1] == -1:
237 | track_pt[t, -1] = known_pt[k, -1]
238 | last_pt = t
239 | break
240 |
241 | # interpolation with regular spacing along the fiber track
242 | try:
243 | track_pt = track_pt[:last_pt + 1]
244 | except NameError:
245 | print('All known points are too far away from the track points. If '
246 | 'they are reliable, they can be merged in sequence as track '
247 | 'points to input')
248 | return None
249 |
250 | seg_interval, interp_ch = _channel_location(track_pt)
251 |
252 | if data_type == 'lonlat':
253 | interp_ch[:, 0], interp_ch[:, 1] = \
254 | DASProj(interp_ch[:, 0], interp_ch[:, 1], inverse=True)
255 |
256 | if verbose:
257 | return interp_ch, seg_interval
258 | return interp_ch
259 |
260 |
261 | def _xcorr(x, y):
262 | N = len(x)
263 | meanx = np.mean(x)
264 | meany = np.mean(y)
265 | stdx = np.std(np.asarray(x))
266 | stdy = np.std(np.asarray(y))
267 | c = np.sum((y - meany) * (x - meanx)) / (N * stdx * stdy)
268 | return c
269 |
270 |
271 | def _horizontal_angle_change(geo, gap=10):
272 | nch = len(geo)
273 | angle = np.zeros(nch)
274 | for i in range(1, nch - 1):
275 | lon, lat = geo[i]
276 | lon_s, lat_s = geo[max(i - gap, 0)]
277 | lon_e, lat_e = geo[min(i + gap, nch - 1)]
278 | azi_s = Geodesic.WGS84.Inverse(lat_s, lon_s, lat, lon)['azi1']
279 | azi_e = Geodesic.WGS84.Inverse(lat, lon, lat_e, lon_e)['azi1']
280 | dazi = azi_e - azi_s
281 | if abs(dazi) > 180:
282 | dazi = -np.sign(dazi) * (360 - abs(dazi))
283 | angle[i] = dazi
284 |
285 | return angle
286 |
287 |
288 | def _vertical_angle_change(geo, gap=10):
289 | nch = len(geo)
290 | angle = np.zeros(nch)
291 | for i in range(1, nch - 1):
292 | lon, lat, dep = geo[i]
293 | lon_s, lat_s, dep_s = geo[max(i - gap, 0)]
294 | lon_e, lat_e, dep_e = geo[min(i + gap, nch - 1)]
295 | s12_s = Geodesic.WGS84.Inverse(lat_s, lon_s, lat, lon)['s12']
296 | theta_s = np.arctan((dep - dep_s) / s12_s) / np.pi * 180
297 | s12_e = Geodesic.WGS84.Inverse(lat, lon, lat_e, lon_e)['s12']
298 | theta_e = np.arctan((dep_e - dep) / s12_e) / np.pi * 180
299 | angle[i] = theta_e - theta_s
300 |
301 | return angle
302 |
303 |
304 | def _local_maximum_indexes(data, thresh):
305 | idx = np.where(data > thresh)[0]
306 | if len(idx):
307 | i = list(np.where(np.diff(idx) > 1)[0] + 1)
308 | if len(idx) - 1 not in i:
309 | i.append(len(idx) - 1)
310 | b = 0
311 | max_idx = []
312 | for e in i:
313 | max_idx.append(idx[b] + np.argmax(data[idx[b]:idx[e]]))
314 | b = e
315 | return max_idx
316 | else:
317 | return []
318 |
319 |
320 | def turning_points(data, data_type='coordinate', thresh=5, depth_info=False,
321 | channel_gap=3):
322 | """
323 | Seek turning points in the DAS channel.
324 |
325 | :param data: numpy.ndarray. Data used to seek turning points.
326 | :param data_type: str. If data_type is 'coordinate', data should include
327 | longitude and latitude (first two columns), and can also include depth
328 | (last column). If data_type is 'waveform', data should be continuous
329 | waveform, preferably containing signal with strong coherence
330 | (earthquake, traffic signal, etc.).
331 | :param thresh: For coordinate data, when the angle of the optical cables on
332 | both sides centered on a certain point exceeds thresh, it is considered
333 | an turning point. For waveform, thresh means the MAD multiple of
334 | adjacent channel cross-correlation values lower than their median.
335 | :param depth_info: bool. Optional if data_type is 'coordinate'. Whether
336 | depth (in meters) is included in the coordinate data and need to be
337 | used.
338 | :param channel_gap: int. Optional if data_type is 'coordinate'. The smaller
339 | the value is, the finer the segmentation will be. It is recommended to
340 | set it to half the ratio of gauge length and channel interval.
341 | :return: list. Channel index of turning points.
342 | """
343 | if data_type == 'coordinate':
344 | angle = _horizontal_angle_change(data[:, :2], gap=channel_gap)
345 | turning_h = _local_maximum_indexes(abs(angle), thresh)
346 |
347 | if depth_info:
348 | angle = _vertical_angle_change(data, gap=channel_gap)
349 | turning_v = _local_maximum_indexes(abs(angle), thresh)
350 | return turning_h, turning_v
351 |
352 | return turning_h
353 |
354 | elif data_type == 'waveform':
355 | nch = len(data)
356 | cc = np.zeros(nch - 1)
357 | for i in range(nch - 1):
358 | cc[i] = _xcorr(data[i], data[i + 1])
359 | median = np.median(cc)
360 | mad = np.median(abs(cc - median))
361 |
362 | return np.argwhere(cc < median - thresh * mad)[0]
363 |
364 | else:
365 | raise ValueError('Data_type should be \'coordinate\' or \'waveform\'.')
366 |
367 |
368 | def channel_spacing(geometry, depth_info=False):
369 | nch = len(geometry)
370 | dist = np.zeros(nch - 1)
371 | for i in range(nch - 1):
372 | lon0, lat0 = geometry[i, :2]
373 | lon1, lat1 = geometry[i+1, :2]
374 | d = Geodesic.WGS84.Inverse(lat0, lon0, lat1, lon1)['s12']
375 | if depth_info:
376 | dist[i] = np.sqrt(d**2 + (geometry[i+1, 2] - geometry[i, 2]) ** 2)
377 | else:
378 | dist[i] = d
379 |
380 | return dist
381 |
382 |
383 | def distance_to_channels(geometry, points):
384 | """
385 | Calculate the distance from a point to each channel.
386 |
387 | :param geometry: numpy.ndarray. It needs to consist of two columns (
388 | longitude, latitude), three columns (longitude, latitude and depth).
389 | :param points: numpy.ndarray. A array consisting of longitude and
390 | latitude or longitude, latitude and depth.
391 | :return: numpy.ndarray. The distance from the given point to each channel.
392 | """
393 | if geometry.shape[1] == 3:
394 | depth_info = True
395 | else:
396 | depth_info = False
397 |
398 | nch = len(geometry)
399 | points = np.array(points)
400 | if points.ndim == 1:
401 | points = points.reshape(1, -1)
402 | npt = len(points)
403 | dist = np.zeros((npt, nch))
404 | for i, pt in enumerate(points):
405 | for j, geo in enumerate(geometry):
406 | d = Geodesic.WGS84.Inverse(pt[1], pt[0], geo[1], geo[0])['s12']
407 | if depth_info:
408 | dist[i, j] = np.sqrt(d**2 + (pt[2] - geo[2]) ** 2)
409 | else:
410 | dist[i, j] = d
411 | return dist
412 |
413 |
414 | def closest_channel_to_point(geometry, points, verbose=False):
415 | """
416 | Find the channel number closest to a given point.
417 |
418 | :param geometry: numpy.ndarray. It needs to consist of longitude, latitude
419 | (and depth) or channel number, longitude, latitude (and depth).
420 | :param points: numpy.ndarray. A tuple consisting of longitude and
421 | latitude (and depth).
422 | :param verbose: bool. Return the channel and the distance to the closest
423 | channel if True.
424 | :return: int. The channel number closest to the given point.
425 | """
426 | nch = len(geometry)
427 | if points.shape[1] == geometry.shape[1]:
428 | channels = np.arange(nch).astype(int)
429 | else:
430 | geometry = geometry[geometry[:, 0].argsort()]
431 | channels = geometry[:, 0].astype(int)
432 | geometry = geometry[:, 1:]
433 |
434 | dist = distance_to_channels(points, geometry)
435 | closest_index = np.argmin(dist, axis=1)
436 | if verbose:
437 | return channels[closest_index], np.min(dist, axis=1)
438 | return channels[closest_index]
439 |
440 |
441 | def _equally_spacing(dist, dx):
442 | index = [[], []]
443 | residual = [0, abs(dist[0]-dx)]
444 | for i in range(2, len(dist)+1):
445 | res = []
446 | for j in range(i):
447 | res.append(residual[j] + abs(dx - sum(dist[j:i])))
448 | residual.append(min(res))
449 | k = np.argmin(res)
450 | if k > 0:
451 | index.append(index[k] + [k])
452 | else:
453 | index.append(index[k])
454 |
455 | return index[-1]
456 |
457 |
458 | def equally_spaced_channels(geometry, dx, depth_info=False, verbose=False):
459 | """
460 | Find equally spaced channel numbers based on known DAS latitude and
461 | longitude.
462 |
463 | :param geometry: numpy.ndarray. DAS geometry used to filter equally spaced
464 | channels. It needs to consist of longitude, latitude (and depth) or
465 | channel number, longitude, latitude (and depth).
466 | :param dx: Channel interval.
467 | :param depth_info: bool. Whether depth (in meters) is included in the
468 | geometry and needed to be used.
469 | :param verbose: bool. If True, return channel numbers for equally spaced
470 | channels and channel intervals.
471 | :return: Channel numbers for equally spaced channels if verbose is False.
472 | """
473 | nch = len(geometry)
474 | if geometry.shape[1] == 2 + int(depth_info):
475 | channels = np.arange(nch).astype(int)
476 | else:
477 | geometry = geometry[geometry[:, 0].argsort()]
478 | channels = geometry[:, 0].astype(int)
479 | geometry = geometry[:, 1:]
480 |
481 | dist = channel_spacing(geometry, depth_info=False)
482 |
483 | s = 0
484 | idx_equal = [0]
485 | for i in range(nch-2):
486 | if dist[i] > dx * 2:
487 | e = i
488 | if e == s + 1:
489 | idx_equal.append(e)
490 | elif e >= s + 2:
491 | idx_equal.extend([idx + s for idx in
492 | _equally_spacing(dist[s:e], dx)])
493 | idx_equal.append(e)
494 | s = e + 1
495 | idx_equal.append(s)
496 | elif dist[i] + dist[i+1] > dx * 1.5:
497 | e = i + 1
498 | if e == s + 1:
499 | idx_equal.append(e)
500 | elif e >= s + 2:
501 | idx_equal.extend([idx + s for idx in
502 | _equally_spacing(dist[s:e], dx)])
503 | idx_equal.append(e)
504 | s = e
505 | e = nch - 1
506 | if e == s + 1:
507 | idx_equal.append(e)
508 | elif e >= s + 2:
509 | idx_equal.extend([idx + s for idx in
510 | _equally_spacing(dist[s:e], dx)])
511 | idx_equal.append(e)
512 |
513 | return channels[idx_equal]
--------------------------------------------------------------------------------
/daspy/advanced_tools/decomposition.py:
--------------------------------------------------------------------------------
1 | # Purpose: Waveform decomposition
2 | # Author: Minzhe Hu
3 | # Date: 2024.5.13
4 | # Email: hmz2018@mail.ustc.edu.cn
5 | import numpy as np
6 | from numpy.fft import irfft2, ifftshift
7 | from daspy.basic_tools.preprocessing import padding, cosine_taper
8 | from daspy.basic_tools.freqattributes import next_pow_2, fk_transform
9 | from daspy.advanced_tools.denoising import curvelet_denoising
10 |
11 |
12 | def fk_fan_mask(f, k, fmin=None, fmax=None, kmin=None, kmax=None, vmin=None,
13 | vmax=None, edge=0.1, flag=None):
14 | """
15 | Make a fan mask in f-k domain for f-k filter.
16 |
17 | :param f: Frequency sequence.
18 | :param k: Wavenumber sequence.
19 | :param fmin, fmax, kmin, kmax, vmin, vmax: float or or sequence of 2 floats.
20 | Sequence of 2 floats represents the start and end of taper.
21 | :param edge: float. The width of fan mask taper edge.
22 | :param flag: -1 keep only negative apparent velocities, 0 keep both postive
23 | and negative apparent velocities, 1 keep only positive apparent
24 | velocities.
25 | :return: Fan mask.
26 | """
27 | ff = np.tile(f, (len(k), 1))
28 | kk = np.tile(k, (len(f), 1)).T
29 | vv = - np.divide(ff, kk, out=np.ones_like(ff) * 1e10, where=kk != 0)
30 | mask = np.ones(vv.shape)
31 | for phy_quan in ['f', 'k', 'v']:
32 | p = eval(phy_quan * 2)
33 | pmin = eval(phy_quan + 'min')
34 | if pmin:
35 | if isinstance(pmin, (tuple, list, np.ndarray)):
36 | tp_b, tp_e = min(pmin), max(pmin)
37 | else:
38 | tp_b, tp_e = pmin * max(1 - edge / 2, 0), pmin * (1 + edge / 2)
39 | tp_wid = tp_e - tp_b
40 | mask[(abs(p) <= tp_b)] = 0
41 | area = (abs(p) > tp_b) & (abs(p) < tp_e)
42 | mask[area] *= 0.5 - 0.5 * \
43 | np.cos(((abs(p[area]) - tp_b) / tp_wid) * np.pi)
44 |
45 | pmax = eval(phy_quan + 'max')
46 | if pmax:
47 | if isinstance(pmax, (tuple, list, np.ndarray)):
48 | tp_b, tp_e = max(pmax), min(pmax)
49 | else:
50 | tp_b, tp_e = pmax * (1 + edge / 2), pmax * (1 - edge / 2)
51 | tp_wid = tp_b - tp_e
52 | mask[(abs(p) >= tp_b)] = 0
53 | area = (abs(p) > tp_e) & (abs(p) < tp_b)
54 | mask[area] *= 0.5 - 0.5 * \
55 | np.cos(((tp_b - abs(p[area])) / tp_wid) * np.pi)
56 |
57 | if flag:
58 | mask[np.sign(vv) == flag] = 0
59 | return mask
60 |
61 |
62 | def fk_filter(data, dx, fs, taper=(0.02, 0.05), pad='default', mode='decompose',
63 | fmin=None, fmax=None, kmin=None, kmax=None, vmin=None, vmax=None,
64 | edge=0.1, flag=None, verbose=False):
65 | """
66 | Transform the data to the f-k domain using 2-D Fourier transform method, and
67 | transform back to the x-t domain after filtering.
68 |
69 | :param data: numpy.ndarray. Data to do fk filter.
70 | :param dx: Channel interval in m.
71 | :param fs: Sampling rate in Hz.
72 | :param taper: float or sequence of floats. Each float means decimal
73 | percentage of Tukey taper for corresponding dimension (ranging from 0 to
74 | 1). Default is 0.1 which tapers 5% from the beginning and 5% from the
75 | end.
76 | :param pad: Pad the data or not. It can be float or sequence of floats. Each
77 | float means padding percentage before FFT for corresponding dimension.
78 | If set to 0.1 will pad 5% before the beginning and after the end.
79 | 'default' means pad both dimensions to next power of 2. None or False
80 | means don't pad data before or during Fast Fourier Transform.
81 | :param mode: str. 'remove' for denoising, 'retain' for extraction, and
82 | 'decompose' for decomposition.
83 | :param fmin, fmax, kmin, kmax, vmin, vmax: float or or sequence of 2 floats.
84 | Sequence of 2 floats represents the start and end of taper.
85 | :param edge: float. The width of fan mask taper edge.
86 | :param flag: -1 keep only negative apparent velocities, 0 keep both postive
87 | and negative apparent velocities, 1 keep only positive apparent
88 | velocities.
89 | :param verbose: If True, return filtered data, f-k spectrum, frequency
90 | sequence, wavenumber sequence and f-k mask.
91 | :return: Filtered data and some variables in the process if verbose==True.
92 | """
93 | data_tp = cosine_taper(data, taper)
94 | if pad == 'default':
95 | nch, nt = data.shape
96 | dn = (next_pow_2(nch) - nch, next_pow_2(nt) - nt)
97 | nfft = None
98 | elif pad is None or pad is False:
99 | dn = 0
100 | nfft = None
101 | else:
102 | dn = np.round(np.array(pad) * data.shape).astype(int)
103 | nfft = 'default'
104 |
105 | data_pd = padding(data_tp, dn)
106 | nch, nt = data_pd.shape
107 |
108 | fk, f, k = fk_transform(data_pd, dx, fs, taper=0, nfft=nfft)
109 |
110 | mask = fk_fan_mask(f, k, fmin, fmax, kmin, kmax, vmin, vmax, edge=edge,
111 | flag=flag)
112 |
113 | if mode == 'remove':
114 | mask = 1 - mask
115 |
116 | if mode == 'decompose':
117 | data_flt1 = irfft2(ifftshift(fk * mask, axes=0)).real[:nch, :nt]
118 | data_flt1 = padding(data_flt1, dn, reverse=True)
119 | data_flt2 = irfft2(ifftshift(fk * (1 - mask), axes=0)).real[:nch, :nt]
120 | data_flt2 = padding(data_flt2, dn, reverse=True)
121 | if verbose:
122 | return data_flt1, data_flt2, fk, f, k, mask
123 | else:
124 | return data_flt1, data_flt2
125 | else:
126 | data_flt = irfft2(ifftshift(fk * mask, axes=0)).real[:nch, :nt]
127 | data_flt = padding(data_flt, dn, reverse=True)
128 | if verbose:
129 | return data_flt, fk, f, k, mask
130 | else:
131 | return data_flt
132 |
133 |
134 | def curvelet_windowing(data, dx, fs, mode='decompose', vmin=0, vmax=np.inf,
135 | flag=None, pad=0.3, scale_begin=3, nbscales=None,
136 | nbangles=16, finest=1):
137 | """
138 | Use curevelet transform to keep cooherent signal with certain velocity
139 | range. {Atterholt et al., 2022 , Geophys. J. Int.}
140 |
141 | :param data: numpy.ndarray. Data to decomposite.
142 | :param dx: Channel interval in m.
143 | :param fs: Sampling rate in Hz.
144 | :param mode: str. 'remove' for denoising, 'retain' for extraction, and
145 | 'decompose' for decomposition.
146 | :param vmin, vmax: float. Velocity range in m/s.
147 | :param flag: -1 keep only negative apparent velocities, 0 keep both postive
148 | and negative apparent velocities, 1 keep only positive apparent
149 | velocities.
150 | :param pad: float or sequence of floats. Each float means padding percentage
151 | before FFT for corresponding dimension. If set to 0.1 will pad 5% before
152 | the beginning and after the end.
153 | :param scale_begin: int. The beginning scale to do coherent denoising.
154 | :param nbscales: int. Number of scales including the coarsest wavelet level.
155 | Default set to ceil(log2(min(M,N)) - 3).
156 | :param nbangles: int. Number of angles at the 2nd coarsest level,
157 | minimum 8, must be a multiple of 4.
158 | :param finest: int. Objects at the finest scale. 1 for curvelets, 2 for
159 | wavelets. Curvelets are more precise while wavelets are more efficient.
160 | :return: numpy.ndarray. Decomposed data.
161 | """
162 | return curvelet_denoising(data, choice=1, pad=pad, vmin=vmin, vmax=vmax,
163 | flag=flag, dx=dx, fs=fs, mode=mode,
164 | scale_begin=scale_begin, nbscales=nbscales,
165 | nbangles=nbangles, finest=finest)
166 |
--------------------------------------------------------------------------------
/daspy/advanced_tools/denoising.py:
--------------------------------------------------------------------------------
1 | # Purpose: Remove noise from data
2 | # Author: Minzhe Hu, Zefeng Li
3 | # Date: 2024.5.13
4 | # Email: hmz2018@mail.ustc.edu.cn
5 | import numpy as np
6 | from copy import deepcopy
7 | from scipy.ndimage import median_filter
8 | from scipy.interpolate import interp1d
9 | from daspy.basic_tools.preprocessing import padding
10 | from daspy.advanced_tools.fdct import fdct_wrapping, ifdct_wrapping
11 |
12 |
13 | def spike_removal(data, nch=50, nsp=5, thresh=10):
14 | """
15 | Use a median filter to remove high-strain spikes in the data. Modified from
16 | https://github.com/atterholt/curvelet-denoising/blob/main/MedianFilter.m
17 |
18 | :param data: numpy.ndarray. Data to remove spikes from.
19 | :param nch: int. Number of channels over which to compute the median.
20 | :param nsp: int. Number of sampling points over which to compute the median.
21 | :param thresh: Ratio threshold over the median over which a number is
22 | considered to be an outlier.
23 | :return: numpy.ndarray. Data with spikes removed.
24 | """
25 | absdata = np.abs(data)
26 |
27 | medians1 = median_filter(absdata, (nch, 1))
28 | medians = median_filter(medians1, (1, nsp))
29 | ratio = absdata / medians # comparisons matrix
30 |
31 | # find the bad values and interpolate with their neighbors
32 | data_dn = data.copy()
33 | out_i, out_j = np.where(ratio > thresh)
34 | for j in set(out_j):
35 | bch = out_i[out_j == j]
36 | gch = list(set(range(len(data))) - set(bch))
37 | f = interp1d(gch, data[gch, j], bounds_error=False,
38 | fill_value=(data[gch[0], j], data[gch[-1], j]))
39 | data_dn[bch, j] = f(bch)
40 |
41 | return data_dn
42 |
43 |
44 | def common_mode_noise_removal(data, method='median'):
45 | """
46 | Remove common mode noise (sometimes called horizontal noise) from data.
47 |
48 | :param data: numpy.ndarray. Data to remove common mode noise.
49 | :param method:str. Method for extracting commmon mode noise. 'median' or
50 | 'mean'
51 | :return: numpy.ndarray. Denoised data.
52 | """
53 | nch, nt = data.shape
54 | if method == 'median':
55 | common = np.median(data, 0)
56 | elif method == 'mean':
57 | common = np.mean(data, 0)
58 |
59 | xx = np.sum(common ** 2)
60 | data_dn = np.zeros((nch, nt))
61 | for i in range(nch):
62 | xc = np.sum(common * data[i])
63 | data_dn[i] = data[i] - xc / xx * common
64 |
65 | return data_dn
66 |
67 |
68 | def _noise_level(data, finest=2, nbscales=None, nbangles=16, percentile=95):
69 | """
70 | Find threshold for curvelet denoising with noise record.
71 |
72 | :param data: numpy.ndarray. Noise data.
73 | :param nbscales: int. Number of scales including the coarsest wavelet level.
74 | Default set to ceil(log2(min(M,N)) - 3).
75 | :param nbangles: int. Number of angles at the 2nd coarsest level,
76 | minimum 8, must be a multiple of 4.
77 | :param percentile: number. The threshold is taken as this percentile of the
78 | curvelet coefficient of the noise record
79 | :return: 2-D list. Threshold for curvelet coefficients.
80 | """
81 | C = fdct_wrapping(data, is_real=True, finest=finest, nbscales=nbscales,
82 | nbangles_coarse=nbangles)
83 |
84 | E_noise = []
85 | for s in range(len(C)):
86 | E_noise.append([])
87 | for w in range(len(C[s])):
88 | threshold = np.percentile(abs(C[s][w]), percentile)
89 | E_noise[s].append(threshold)
90 |
91 | return E_noise
92 |
93 |
94 | def _knee_points(C, factor=0.2):
95 | """
96 | Find threshold for curvelet denoising without noise record.
97 |
98 | :param C: 2-D list of np.ndarray. Array of curvelet coefficients.
99 | :param factor: float. Multiplication factor from 0 to 1. Small factor
100 | corresponds to conservative strategy.
101 | :return: 2-D list. Threshold for curvelet coefficients.
102 | """
103 | E_knee = []
104 | for s in range(len(C)):
105 | E_knee.append([])
106 | for w in range(len(C[s])):
107 | F, x = np.histogram(abs(C[s][w]), density=True)
108 | x = (x[1:] + x[:-1]) / 2
109 | F = np.cumsum(F) / np.sum(F)
110 | slope = (x[-1] - x[0]) / (F[-1] - F[0])
111 | tiltedplot = x - (slope * F)
112 | idx = np.argmin(tiltedplot)
113 | E_knee[s].append(x[idx] * factor)
114 |
115 | return E_knee
116 |
117 |
118 | def _velocity_bin(nbangles, fs, dx):
119 | v_bounds = np.zeros(nbangles // 4 + 1)
120 | half = nbangles // 8
121 | v_bounds[half] = fs * dx
122 | np.seterr(divide='ignore')
123 | for i in range(half):
124 | v_bounds[i] = i / half * fs * dx
125 | v_bounds[half + i + 1] = np.divide(fs * dx, 1 - (i + 1) / half)
126 |
127 | np.seterr(divide='warn')
128 | v_lows = list(range(half - 1, -1, -1)) + list(range(half * 2)) + \
129 | list(range(2 * half - 1, half - 1, -1))
130 | velocity = []
131 | for i in range(nbangles // 2):
132 | v_low = v_bounds[v_lows[i]]
133 | v_high = v_bounds[v_lows[i] + 1]
134 | velocity.append([v_low, v_high])
135 | velocity = np.array(velocity * 2)
136 | for i in range(half):
137 | velocity[i] = -1 * velocity[i][::-1]
138 | velocity[3 * half + i] = -1 * velocity[3 * half + i][::-1]
139 | velocity[4 * half + i] = -1 * velocity[4 * half + i][::-1]
140 | velocity[7 * half + i] = -1 * velocity[7 * half + i][::-1]
141 | return velocity
142 |
143 |
144 | def _mask_factor(velocity, vmin, vmax, flag=0):
145 | if flag:
146 | if flag == -1:
147 | vmin = -vmax
148 | vmax = -vmin
149 | else:
150 | half = len(velocity) // 8
151 | for i in range(half):
152 | velocity[i] = -1 * velocity[i][::-1]
153 | velocity[3 * half + i] = -1 * velocity[3 * half + i][::-1]
154 | velocity[4 * half + i] = -1 * velocity[4 * half + i][::-1]
155 | velocity[7 * half + i] = -1 * velocity[7 * half + i][::-1]
156 |
157 | factors = np.zeros(len(velocity))
158 | for i, (v_low, v_high) in enumerate(velocity):
159 | v1 = max(v_low, vmin)
160 | v2 = min(v_high, vmax)
161 | if v1 < v2:
162 | if v_high == np.inf or v_low == -np.inf:
163 | factors[i] = 1
164 | else:
165 | factors[i] = np.divide(v2 - v1, v_high - v_low)
166 |
167 | return factors
168 |
169 |
170 | def curvelet_denoising(data, choice=0, pad=0.3, noise=None, noise_perc=95,
171 | knee_fac=0.2, soft_thresh=True, vmin=0, vmax=np.inf,
172 | flag=0, dx=None, fs=None, mode='remove',
173 | scale_begin=3, nbscales=None, nbangles=16, finest=2):
174 | """
175 | Use curevelet transform to filter stochastic or/and coherent noise.
176 | Modified from
177 | https://github.com/atterholt/curvelet-denoising/blob/main/CurveletDenoising.m
178 | {Atterholt et al., 2022 , Geophys. J. Int.}
179 |
180 | :param data: numpy.ndarray. Data to denoise.
181 | :param choice: int. 0 for Gaussian denoising using soft thresholding, 1 for
182 | velocity filtering using the standard FK methodology and 2 for both.
183 | :param pad: float or sequence of floats. Each float means padding percentage
184 | before FFT for corresponding dimension. If set to 0.1 will pad 5% before
185 | the beginning and after the end.
186 | :param noise: numpy.ndarray or daspy.Section. Noise record as reference.
187 | :param noise_perc: number. The threshold is taken as this percentile of the
188 | curvelet coefficient of the noise record. (only used when noise is
189 | specified)
190 | :param knee_fac: float. Multiplication factor from 0 to 1. Small factor
191 | corresponds to conservative strategy. (only used when noise is not
192 | specified)
193 | :param soft_thresh: bool. True for soft thresholding and False for hard
194 | thresholding.
195 | :param vmin, vmax: float. Velocity range in m/s.
196 | :param flag: -1 choose only negative apparent velocities, 0 choose both
197 | postive and negative apparent velocities, 1 choose only positive
198 | apparent velocities.
199 | :param dx: Channel interval in m.
200 | :param fs: Sampling rate in Hz.
201 | :param mode: str. Only available when choice in (1,2). 'remove' for
202 | denoising, 'retain' for extraction, and 'decompose' for decomposition.
203 | :param scale_begin: int. The beginning scale to do coherent denoising.
204 | :param nbscales: int. Number of scales including the coarsest wavelet level.
205 | Default set to ceil(log2(min(M,N)) - 3).
206 | :param nbangles: int. Number of angles at the 2nd coarsest level,
207 | minimum 8, must be a multiple of 4.
208 | :param finest: int. Objects at the finest scale. 1 for curvelets, 2 for
209 | wavelets. Curvelets are more precise while wavelets are more efficient.
210 | :return: numpy.ndarray. Denoised data.
211 | """
212 | if pad is None or pad is False:
213 | pad = 0
214 | dn = np.round(np.array(pad) * data.shape).astype(int)
215 | data_pd = padding(data, dn)
216 |
217 | C = fdct_wrapping(data_pd, is_real=True, finest=finest, nbscales=nbscales,
218 | nbangles_coarse=nbangles)
219 |
220 | # apply Gaussian denoising
221 | if choice in (0, 2):
222 | # define threshold
223 | if noise is None:
224 | E = _knee_points(C, factor=knee_fac)
225 | else:
226 | if not isinstance(noise, np.ndarray):
227 | noise = noise.data
228 | noise_pd = padding(noise,
229 | np.array(data_pd.shape) - np.array(noise.shape))
230 | E = _noise_level(noise_pd, finest=finest, nbscales=nbscales,
231 | nbangles=nbangles, percentile=noise_perc)
232 | for s in range(1, len(C)):
233 | for w in range(len(C[s])):
234 | # first do a hard threshold
235 | C[s][w] = C[s][w] * (abs(C[s][w]) > abs(E[s][w]))
236 | if soft_thresh:
237 | # soften the existing coefficients
238 | C[s][w] = np.sign(C[s][w]) * (abs(C[s][w]) - abs(E[s][w]))
239 |
240 | # apply velocity filtering
241 | if choice in (1, 2):
242 | if dx is None or fs is None:
243 | raise ValueError('Please set both dx and fs.')
244 |
245 | if mode == 'decompose':
246 | lst = list(range(scale_begin - 1))
247 | if finest == 2:
248 | lst.append(len(C) - 1)
249 | for s in lst:
250 | for w in range(len(C[s])):
251 | C[s][w] /= 2
252 | C_rt = deepcopy(C)
253 |
254 | for s in range(scale_begin - 1, len(C) - finest + 1):
255 | nbangles = len(C[s])
256 | velocity = _velocity_bin(nbangles, fs, dx)
257 | factors = _mask_factor(velocity, vmin, vmax, flag=flag)
258 | for w in range(nbangles):
259 | if mode == 'retain':
260 | C[s][w] *= factors[w]
261 | elif mode == 'remove':
262 | C[s][w] *= 1 - factors[w]
263 | elif mode == 'decompose':
264 | C[s][w] *= factors[w]
265 | C_rt[s][w] *= 1 - factors[w]
266 |
267 | # perform the inverse curvelet transform
268 | data_dn = padding(ifdct_wrapping(C, is_real=True, size=data_pd.shape), dn,
269 | reverse=True)
270 |
271 | if mode == 'decompose':
272 | data_n = padding(ifdct_wrapping(C_rt, is_real=True, size=data_pd.shape),
273 | dn, reverse=True)
274 | return data_dn, data_n
275 | else:
276 | return data_dn
--------------------------------------------------------------------------------
/daspy/advanced_tools/fdct.py:
--------------------------------------------------------------------------------
1 | # Purpose: Fast Discrete Curvelet Transform
2 | # Author: Minzhe Hu
3 | # Date: 2024.4.11
4 | # Email: hmz2018@mail.ustc.edu.cn
5 | # Modified from
6 | # http://www.curvelet.org/download-secure.php?file=CurveLab-2.1.3.tar.gz
7 | # (matlab version)
8 | import numpy as np
9 | from numpy.fft import fftshift, ifftshift, fft2, ifft2
10 |
11 |
12 | def _round(x):
13 | return np.round(x).astype(int)
14 |
15 |
16 | def _floor(x):
17 | return np.floor(x).astype(int)
18 |
19 |
20 | def _ceil(x):
21 | return np.ceil(x).astype(int)
22 |
23 |
24 | def fdct_wrapping_window(x):
25 | """
26 | Creates the two halves of a C**inf compactly supported window.
27 |
28 | :param x: vector or matrix of abscissae, the relevant ones from 0 to 1.
29 | :return: vector or matrix containing samples of the left, resp. right half
30 | of the window.
31 | """
32 |
33 | # Initialize the variables
34 | wr = np.zeros_like(x)
35 | wl = np.zeros_like(x)
36 |
37 | # Set values close to zero to zero
38 | x[np.abs(x) < 2**-52] = 0
39 |
40 | # Calculate wr and wl
41 | wr[(x > 0) & (x < 1)] = np.exp(
42 | 1 - 1. / (1 - np.exp(1 - 1. / x[(x > 0) & (x < 1)])))
43 | wr[x <= 0] = 1
44 | wl[(x > 0) & (x < 1)] = np.exp(
45 | 1 - 1. / (1 - np.exp(1 - 1. / (1 - x[(x > 0) & (x < 1)]))))
46 | wl[x >= 1] = 1
47 |
48 | # Normalize wr and wl
49 | normalization = np.sqrt(wl**2 + wr**2)
50 | wr = wr / normalization
51 | wl = wl / normalization
52 |
53 | return wl, wr
54 |
55 |
56 | def fdct_wrapping(x, is_real=False, finest=2,
57 | nbscales=None, nbangles_coarse=16):
58 | """
59 | Fast Discrete Curvelet Transform via wedge wrapping.
60 |
61 | :param x: np.array. M-by-N matrix.
62 | :param is_real: bool. Type of the transform, False for complex-valued
63 | curvelets and True for real-valued curvelets.
64 | :param finest: int. Chooses one of two possibilities for the coefficients at
65 | the finest level: 1 for curvelets and 2 for wavelets.
66 | :param nbscales: int. Number of scales including the coarsest wavele
67 | level. Default set to ceil(log2(min(M,N)) - 3).
68 | :param nbangles_coarse: int. Number of angles at the 2nd coarsest level,
69 | minimum 8, must be a multiple of 4.
70 | :return: 2-D list of np.ndarray. Array of curvelet coefficients.
71 | C[j][l][k1,k2] is the coefficient at scale j(from finest to coarsest
72 | scale), angle l(starts at the top-left corner and increases clockwise),
73 | position k1, k2(size varies with j and l). If is_real is 1, there are
74 | two types of curvelets, 'cosine' and 'sine'. For a given scale j, the
75 | 'cosine' coefficients are stored in the first two quadrants (low values
76 | of l), the 'sine' coefficients in the last two quadrants (high values of
77 | l).
78 | """
79 | X = fftshift(fft2(ifftshift(x))) / np.sqrt(x.size)
80 | N1, N2 = X.shape
81 | if nbscales is None:
82 | nbscales = _ceil(np.log2(min(N1, N2)) - 3)
83 |
84 | # Initialization: data structure
85 | nbangles = [1] + [nbangles_coarse * 2 ** ((nbscales - i) // 2)
86 | for i in range(nbscales, 1, -1)]
87 | if finest == 2:
88 | nbangles[-1] = 1
89 |
90 | C = []
91 | for j in range(nbscales):
92 | C.append([None] * nbangles[j])
93 |
94 | # Loop: pyramidal scale decomposition
95 | M1 = N1 / 3
96 | M2 = N2 / 3
97 |
98 | if finest == 1:
99 | # Initialization: smooth periodic extension of high frequencies
100 | bigN1 = 2 * _floor(2 * M1) + 1
101 | bigN2 = 2 * _floor(2 * M2) + 1
102 | equiv_index_1 = (_floor(N1 / 2) - _floor(2 * M1) +
103 | np.arange(bigN1)) % N1
104 | equiv_index_2 = (_floor(N2 / 2) - _floor(2 * M2) +
105 | np.arange(bigN2)) % N2
106 | X = X[np.ix_(equiv_index_1, equiv_index_2)]
107 |
108 | window_length_1 = _floor(2 * M1) - _floor(M1) - (N1 % 3 == 0)
109 | window_length_2 = _floor(2 * M2) - _floor(M2) - (N2 % 3 == 0)
110 | coord_1 = np.linspace(0, 1, window_length_1)
111 | coord_2 = np.linspace(0, 1, window_length_2)
112 | wl_1, wr_1 = fdct_wrapping_window(coord_1)
113 | wl_2, wr_2 = fdct_wrapping_window(coord_2)
114 |
115 | lowpass_1 = np.concatenate((wl_1, np.ones(2 * _floor(M1) + 1), wr_1))
116 | if N1 % 3 == 0:
117 | lowpass_1 = np.concatenate(([0], lowpass_1, [0]))
118 |
119 | lowpass_2 = np.concatenate((wl_2, np.ones(2 * _floor(M2) + 1), wr_2))
120 | if N2 % 3 == 0:
121 | lowpass_2 = np.concatenate(([0], lowpass_2, [0]))
122 |
123 | lowpass = np.outer(lowpass_1, lowpass_2)
124 | Xlow = X * lowpass
125 | scales = np.arange(nbscales, 1, -1)
126 |
127 | else:
128 | M1 /= 2
129 | M2 /= 2
130 |
131 | window_length_1 = _floor(2 * M1) - _floor(M1)
132 | window_length_2 = _floor(2 * M2) - _floor(M2)
133 | coord_1 = np.linspace(0, 1, window_length_1)
134 | coord_2 = np.linspace(0, 1, window_length_2)
135 | wl_1, wr_1 = fdct_wrapping_window(coord_1)
136 | wl_2, wr_2 = fdct_wrapping_window(coord_2)
137 |
138 | lowpass_1 = np.concatenate((wl_1, np.ones(2 * _floor(M1) + 1), wr_1))
139 | lowpass_2 = np.concatenate((wl_2, np.ones(2 * _floor(M2) + 1), wr_2))
140 | lowpass = np.outer(lowpass_1, lowpass_2)
141 | hipass = np.sqrt(1 - lowpass ** 2)
142 |
143 | Xlow_index_1 = np.arange(-_floor(2 * M1),
144 | _floor(2 * M1) + 1) + _ceil((N1 + 1) / 2) - 1
145 | Xlow_index_2 = np.arange(-_floor(2 * M2),
146 | _floor(2 * M2) + 1) + _ceil((N2 + 1) / 2) - 1
147 | Xlow = X[np.ix_(Xlow_index_1, Xlow_index_2)] * lowpass
148 | Xhi = X.copy()
149 | Xhi[np.ix_(Xlow_index_1, Xlow_index_2)] *= hipass
150 |
151 | C[nbscales - 1][0] = fftshift(ifft2(ifftshift(Xhi))
152 | ) * np.sqrt(Xhi.size)
153 | if is_real:
154 | C[nbscales - 1][0] = C[nbscales - 1][0].real
155 |
156 | scales = np.arange(nbscales - 1, 1, -1)
157 | for j in scales - 1:
158 | M1 /= 2
159 | M2 /= 2
160 | window_length_1 = _floor(2 * M1) - _floor(M1)
161 | window_length_2 = _floor(2 * M2) - _floor(M2)
162 | coord_1 = np.linspace(0, 1, window_length_1)
163 | coord_2 = np.linspace(0, 1, window_length_2)
164 | wl_1, wr_1 = fdct_wrapping_window(coord_1)
165 | wl_2, wr_2 = fdct_wrapping_window(coord_2)
166 |
167 | lowpass_1 = np.concatenate((wl_1, np.ones(2 * _floor(M1) + 1), wr_1))
168 | lowpass_2 = np.concatenate((wl_2, np.ones(2 * _floor(M2) + 1), wr_2))
169 | lowpass = np.outer(lowpass_1, lowpass_2)
170 | hipass = np.sqrt(1 - lowpass ** 2)
171 |
172 | Xhi = Xlow.copy()
173 | Xlow_index_1 = np.arange(-_floor(2 * M1),
174 | _floor(2 * M1) + 1) + _floor(4 * M1)
175 | Xlow_index_2 = np.arange(-_floor(2 * M2),
176 | _floor(2 * M2) + 1) + _floor(4 * M2)
177 | Xlow = Xlow[np.ix_(Xlow_index_1, Xlow_index_2)]
178 | Xhi[np.ix_(Xlow_index_1, Xlow_index_2)] = Xlow * hipass
179 | Xlow *= lowpass
180 |
181 | # Loop: angular decomposition
182 | l = -1
183 | nbquadrants = 2 + 2 * (not is_real)
184 | nbangles_perquad = nbangles[j] // 4
185 | for quadrant in range(1, nbquadrants + 1):
186 | M_horiz = (M1, M2)[quadrant % 2]
187 | M_vert = (M2, M1)[quadrant % 2]
188 | wedge_ticks_left = _round(
189 | np.linspace(
190 | 0,
191 | 1,
192 | nbangles_perquad +
193 | 1) *
194 | _floor(
195 | 4 *
196 | M_horiz) +
197 | 1)
198 | wedge_ticks_right = 2 * _floor(4 * M_horiz) + 2 - wedge_ticks_left
199 | if nbangles_perquad % 2:
200 | wedge_ticks = np.concatenate(
201 | (wedge_ticks_left, wedge_ticks_right[::-1]))
202 | else:
203 | wedge_ticks = np.concatenate(
204 | (wedge_ticks_left, wedge_ticks_right[-2::-1]))
205 |
206 | wedge_endpoints = wedge_ticks[1:-1:2]
207 | wedge_midpoints = (wedge_endpoints[:-1] + wedge_endpoints[1:]) / 2
208 | # Left corner wedge
209 | l += 1
210 | first_wedge_endpoint_vert = _round(
211 | _floor(4 * M_vert) / nbangles_perquad + 1)
212 | length_corner_wedge = _floor(4 * M_vert) - _floor(M_vert) + \
213 | _ceil(first_wedge_endpoint_vert / 4)
214 | Y_corner = np.arange(length_corner_wedge) + 1
215 | XX, YY = np.meshgrid(
216 | np.arange(2 * _floor(4 * M_horiz) + 1) + 1, Y_corner)
217 | width_wedge = wedge_endpoints[1] + wedge_endpoints[0] - 1
218 | slope_wedge = (_floor(4 * M_horiz) + 1 -
219 | wedge_endpoints[0]) / _floor(4 * M_vert)
220 | left_line = _round(
221 | 2 - wedge_endpoints[0] + slope_wedge * (Y_corner - 1))
222 | wrapped_data = np.zeros(
223 | (length_corner_wedge, width_wedge), dtype=complex)
224 | wrapped_XX = np.zeros(
225 | (length_corner_wedge, width_wedge), dtype=int)
226 | wrapped_YY = np.zeros(
227 | (length_corner_wedge, width_wedge), dtype=int)
228 | first_row = _floor(4 * M_vert) + 2 - \
229 | _ceil((length_corner_wedge + 1) / 2) + \
230 | (length_corner_wedge + 1) % 2 * (quadrant - 2 == quadrant % 2)
231 | first_col = _floor(4 * M_horiz) + 2 - _ceil((width_wedge + 1) / 2) \
232 | + (width_wedge + 1) % 2 * (quadrant - 3 == (quadrant - 3) % 2)
233 | for row in Y_corner - 1:
234 | cols = left_line[row] + \
235 | (np.arange(width_wedge) - (left_line[row] - first_col)) \
236 | % width_wedge
237 | admissible_cols = _round(0.5 * (cols + 1 + abs(cols - 1))) - 1
238 | new_row = (row - first_row + 1) % length_corner_wedge
239 | wrapped_data[new_row, :] = Xhi[row,
240 | admissible_cols] * (cols > 0)
241 | wrapped_XX[new_row, :] = XX[row, admissible_cols]
242 | wrapped_YY[new_row, :] = YY[row, admissible_cols]
243 | slope_wedge_right = (_floor(4 * M_horiz) + 1 -
244 | wedge_midpoints[0]) / _floor(4 * M_vert)
245 | mid_line_right = wedge_midpoints[0] + \
246 | slope_wedge_right * (wrapped_YY - 1)
247 | coord_right = 0.5 + _floor(4 * M_vert) / \
248 | (wedge_endpoints[1] - wedge_endpoints[0]) * \
249 | (wrapped_XX - mid_line_right) / \
250 | (_floor(4 * M_vert) + 1 - wrapped_YY)
251 | C2 = 1 / (1 / (2 * (_floor(4 * M_horiz)) / (wedge_endpoints[0] -
252 | 1) - 1) + 1 / (2 * (_floor(4 * M_vert)) / (
253 | first_wedge_endpoint_vert - 1) - 1))
254 | C1 = C2 / (2 * (_floor(4 * M_vert)) /
255 | (first_wedge_endpoint_vert - 1) - 1)
256 | wrapped_XX[(wrapped_XX - 1) / _floor(4 * M_horiz) +
257 | (wrapped_YY - 1) / _floor(4 * M_vert) == 2] += 1
258 | coord_corner = C1 + C2 * ((wrapped_XX - 1) / _floor(4 * M_horiz) -
259 | (wrapped_YY - 1) / _floor(4 * M_vert)) / (2 -
260 | ((wrapped_XX - 1) / _floor(4 * M_horiz) + (wrapped_YY - 1) /
261 | _floor(4 * M_vert)))
262 | wl_left, _ = fdct_wrapping_window(coord_corner)
263 | _, wr_right = fdct_wrapping_window(coord_right)
264 | wrapped_data = wrapped_data * wl_left * wr_right
265 | if not is_real:
266 | wrapped_data = np.rot90(wrapped_data, -(quadrant - 1))
267 | C[j][l] = fftshift(ifft2(ifftshift(wrapped_data))) * \
268 | np.sqrt(wrapped_data.size)
269 | else:
270 | wrapped_data = np.rot90(wrapped_data, -(quadrant - 1))
271 | x = fftshift(ifft2(ifftshift(wrapped_data))) * \
272 | np.sqrt(wrapped_data.size)
273 | C[j][l] = np.sqrt(2) * x.real
274 | C[j][l + nbangles[j] // 2] = np.sqrt(2) * x.imag
275 |
276 | # Regular wedges
277 | length_wedge = _floor(4 * M_vert) - _floor(M_vert)
278 | Y = np.arange(length_wedge) + 1
279 | first_row = _floor(4 * M_vert) + 2 - _ceil((length_wedge + 1) / 2) \
280 | + (length_wedge + 1) % 2 * (quadrant - 2 == quadrant % 2)
281 | for subl in range(1, nbangles_perquad - 1):
282 | l += 1
283 | width_wedge = wedge_endpoints[subl +
284 | 1] - wedge_endpoints[subl - 1] + 1
285 | slope_wedge = ((_floor(4 * M_horiz) + 1) -
286 | wedge_endpoints[subl]) / _floor(4 * M_vert)
287 | left_line = _round(
288 | wedge_endpoints[subl - 1] + slope_wedge * (Y - 1))
289 | wrapped_data = np.zeros(
290 | (length_wedge, width_wedge), dtype=complex)
291 | wrapped_XX = np.zeros((length_wedge, width_wedge), dtype=int)
292 | wrapped_YY = np.zeros((length_wedge, width_wedge), dtype=int)
293 | first_col = _floor(4 * M_horiz) + 2 - \
294 | _ceil((width_wedge + 1) / 2) + \
295 | (width_wedge + 1) % 2 * (quadrant - 3 == (quadrant - 3) % 2)
296 | for row in Y - 1:
297 | cols = left_line[row] + (np.arange(width_wedge) -
298 | (left_line[row] - first_col)) % width_wedge - 1
299 | new_row = (row - first_row + 1) % length_wedge
300 | wrapped_data[new_row, :] = Xhi[row, cols]
301 | wrapped_XX[new_row, :] = XX[row, cols]
302 | wrapped_YY[new_row, :] = YY[row, cols]
303 | slope_wedge_left = ((_floor(4 * M_horiz) + 1) -
304 | wedge_midpoints[subl - 1]) / _floor(4 * M_vert)
305 | mid_line_left = wedge_midpoints[subl - 1] + \
306 | slope_wedge_left * (wrapped_YY - 1)
307 | coord_left = 0.5 + _floor(4 * M_vert) / \
308 | (wedge_endpoints[subl] - wedge_endpoints[subl - 1]) * \
309 | (wrapped_XX - mid_line_left) / \
310 | (_floor(4 * M_vert) + 1 - wrapped_YY)
311 | slope_wedge_right = ((_floor(4 * M_horiz) + 1) -
312 | wedge_midpoints[subl]) / _floor(4 * M_vert)
313 | mid_line_right = wedge_midpoints[subl] + \
314 | slope_wedge_right * (wrapped_YY - 1)
315 | coord_right = 0.5 + _floor(4 * M_vert) / \
316 | (wedge_endpoints[subl + 1] - wedge_endpoints[subl]) * \
317 | (wrapped_XX - mid_line_right) / \
318 | (_floor(4 * M_vert) + 1 - wrapped_YY)
319 |
320 | wl_left, _ = fdct_wrapping_window(coord_left)
321 | _, wr_right = fdct_wrapping_window(coord_right)
322 | wrapped_data = wrapped_data * wl_left * wr_right
323 | if not is_real:
324 | wrapped_data = np.rot90(wrapped_data, -(quadrant - 1))
325 | C[j][l] = fftshift(ifft2(ifftshift(wrapped_data))) * \
326 | np.sqrt(wrapped_data.size)
327 | else:
328 | wrapped_data = np.rot90(wrapped_data, -(quadrant - 1))
329 | x = fftshift(ifft2(ifftshift(wrapped_data))) * \
330 | np.sqrt(wrapped_data.size)
331 | C[j][l] = np.sqrt(2) * x.real
332 | C[j][l + nbangles[j] // 2] = np.sqrt(2) * x.imag
333 |
334 | # Right corner wedge
335 | l += 1
336 | width_wedge = 4 * _floor(4 * M_horiz) + 3 - \
337 | wedge_endpoints[-1] - wedge_endpoints[-2]
338 | slope_wedge = ((_floor(4 * M_horiz) + 1) -
339 | wedge_endpoints[-1]) / _floor(4 * M_vert)
340 | left_line = _round(
341 | wedge_endpoints[-2] + slope_wedge * (Y_corner - 1))
342 | wrapped_data = np.zeros((length_corner_wedge, width_wedge),
343 | dtype=complex)
344 | wrapped_XX = np.zeros((length_corner_wedge, width_wedge), dtype=int)
345 | wrapped_YY = np.zeros((length_corner_wedge, width_wedge), dtype=int)
346 | first_row = _floor(4 * M_vert) + 2 - \
347 | _ceil((length_corner_wedge + 1) / 2) + \
348 | (length_corner_wedge + 1) % 2 * (quadrant - 2 == quadrant % 2)
349 | first_col = _floor(4 * M_horiz) + 2 - _ceil((width_wedge + 1) / 2) + \
350 | (width_wedge + 1) % 2 * (quadrant - 3 == (quadrant - 3) % 2)
351 | for row in Y_corner - 1:
352 | cols = left_line[row] + (np.arange(width_wedge) -
353 | (left_line[row] - first_col)) % width_wedge
354 | admissible_cols = _round(0.5 * (cols + 2 * _floor(4 * M_horiz)
355 | + 1 - np.abs(cols - (2 * _floor(4 * M_horiz) + 1)))) - 1
356 | new_row = (row - first_row + 1) % length_corner_wedge
357 | wrapped_data[new_row, :] = Xhi[row, admissible_cols] * \
358 | (cols <= (2 * _floor(4 * M_horiz) + 1))
359 | wrapped_XX[new_row, :] = XX[row, admissible_cols]
360 | wrapped_YY[new_row, :] = YY[row, admissible_cols]
361 |
362 | slope_wedge_left = ((_floor(4 * M_horiz) + 1) -
363 | wedge_midpoints[-1]) / _floor(4 * M_vert)
364 | mid_line_left = wedge_midpoints[-1] + \
365 | slope_wedge_left * (wrapped_YY - 1)
366 | coord_left = 0.5 + _floor(4 * M_vert) / \
367 | (wedge_endpoints[-1] - wedge_endpoints[-2]) * \
368 | (wrapped_XX - mid_line_left) / \
369 | (_floor(4 * M_vert) + 1 - wrapped_YY)
370 | C2 = -1 / (2 * (_floor(4 * M_horiz)) / (wedge_endpoints[-1] - 1) -
371 | 1 + 1 / (2 * (_floor(4 * M_vert)) /
372 | (first_wedge_endpoint_vert - 1) - 1))
373 | C1 = -C2 * (2 * (_floor(4 * M_horiz)) /
374 | (wedge_endpoints[-1] - 1) - 1)
375 | wrapped_XX[(wrapped_XX - 1) / _floor(4 * M_horiz) ==
376 | (wrapped_YY - 1) / _floor(4 * M_vert)] -= 1
377 | coord_corner = C1 + C2 * (2 - ((wrapped_XX - 1) /
378 | _floor(4 * M_horiz) + (wrapped_YY - 1) / _floor(4 * M_vert))) \
379 | / ((wrapped_XX - 1) / _floor(4 * M_horiz) - (wrapped_YY - 1) /
380 | _floor(4 * M_vert))
381 | wl_left, _ = fdct_wrapping_window(coord_left)
382 | _, wr_right = fdct_wrapping_window(coord_corner)
383 | wrapped_data = wrapped_data * wl_left * wr_right
384 | if not is_real:
385 | wrapped_data = np.rot90(wrapped_data, -(quadrant - 1))
386 | C[j][l] = fftshift(ifft2(ifftshift(wrapped_data))
387 | ) * np.sqrt(wrapped_data.size)
388 | else:
389 | wrapped_data = np.rot90(wrapped_data, -(quadrant - 1))
390 | x = fftshift(ifft2(ifftshift(wrapped_data))) * \
391 | np.sqrt(wrapped_data.size)
392 | C[j][l] = np.sqrt(2) * x.real
393 | C[j][l + nbangles[j] // 2] = np.sqrt(2) * x.imag
394 |
395 | if quadrant < nbquadrants:
396 | Xhi = np.rot90(Xhi)
397 | # Coarsest wavelet level
398 | C[0][0] = fftshift(ifft2(ifftshift(Xlow))) * np.sqrt(Xlow.size)
399 | if is_real:
400 | C[0][0] = C[0][0].real
401 |
402 | return C
403 |
404 |
405 | def ifdct_wrapping(C, is_real=False, size=None):
406 | """
407 | Inverse Fast Discrete Curvelet Transform via wedge wrapping. This is in fact
408 | the adjoint, also the pseudo-inverse
409 |
410 | :param C: 2-D list of np.ndarray. Array of curvelet coefficients.
411 | :param is_real: bool. Type of the transform, False for complex-valued
412 | curvelets and True for real-valued curvelets.
413 | :param size: tuple of ints. Size of the image to be recovered (not necessary
414 | if finest = 2)
415 | :return: 2-D np.ndarray.
416 | """
417 | nbscales = len(C)
418 | nbangles_coarse = len(C[1])
419 | nbangles = [1] + [nbangles_coarse * 2 ** ((nbscales - i) // 2)
420 | for i in range(nbscales, 1, -1)]
421 | if len(C[-1]) == 1:
422 | finest = 2
423 | nbangles[nbscales - 1] = 1
424 | else:
425 | finest = 1
426 |
427 | if size is None:
428 | if finest == 1:
429 | raise ValueError("Require output size.")
430 | else:
431 | N1, N2 = C[-1][0].shape
432 | else:
433 | N1, N2 = size
434 |
435 | M1 = N1 / 3
436 | M2 = N2 / 3
437 |
438 | if finest == 1:
439 | # Initialization: preparing the lowpass filter at finest scale
440 | window_length_1 = _floor(2 * M1) - _floor(M1) - (N1 % 3 == 0)
441 | window_length_2 = _floor(2 * M2) - _floor(M2) - (N2 % 3 == 0)
442 | coord_1 = np.linspace(0, 1, window_length_1)
443 | coord_2 = np.linspace(0, 1, window_length_2)
444 | wl_1, wr_1 = fdct_wrapping_window(coord_1)
445 | wl_2, wr_2 = fdct_wrapping_window(coord_2)
446 |
447 | lowpass_1 = np.concatenate((wl_1, np.ones(2 * _floor(M1) + 1), wr_1))
448 | if N1 % 3 == 0:
449 | lowpass_1 = np.concatenate(([0], lowpass_1, [0]))
450 |
451 | lowpass_2 = np.concatenate((wl_2, np.ones(2 * _floor(M2) + 1), wr_2))
452 | if N2 % 3 == 0:
453 | lowpass_2 = np.concatenate(([0], lowpass_2, [0]))
454 |
455 | lowpass = np.outer(lowpass_1, lowpass_2)
456 | scales = np.arange(nbscales, 1, -1)
457 | else:
458 | M1 /= 2
459 | M2 /= 2
460 |
461 | window_length_1 = _floor(2 * M1) - _floor(M1)
462 | window_length_2 = _floor(2 * M2) - _floor(M2)
463 | coord_1 = np.linspace(0, 1, window_length_1)
464 | coord_2 = np.linspace(0, 1, window_length_2)
465 | wl_1, wr_1 = fdct_wrapping_window(coord_1)
466 | wl_2, wr_2 = fdct_wrapping_window(coord_2)
467 |
468 | lowpass_1 = np.concatenate((wl_1, np.ones(2 * _floor(M1) + 1), wr_1))
469 | lowpass_2 = np.concatenate((wl_2, np.ones(2 * _floor(M2) + 1), wr_2))
470 | lowpass = np.outer(lowpass_1, lowpass_2)
471 | hipass_finest = np.sqrt(1 - lowpass ** 2)
472 |
473 | scales = np.arange(nbscales - 1, 1, -1)
474 |
475 | bigN1 = 2 * _floor(2 * M1) + 1
476 | bigN2 = 2 * _floor(2 * M2) + 1
477 | X = np.zeros((bigN1, bigN2), dtype=complex)
478 |
479 | # Loop: pyramidal reconstruction
480 |
481 | Xj_topleft_1 = 1
482 | Xj_topleft_2 = 1
483 | for j in scales - 1:
484 | M1 /= 2
485 | M2 /= 2
486 |
487 | window_length_1 = _floor(2 * M1) - _floor(M1)
488 | window_length_2 = _floor(2 * M2) - _floor(M2)
489 | coord_1 = np.linspace(0, 1, window_length_1)
490 | coord_2 = np.linspace(0, 1, window_length_2)
491 | wl_1, wr_1 = fdct_wrapping_window(coord_1)
492 | wl_2, wr_2 = fdct_wrapping_window(coord_2)
493 |
494 | lowpass_1 = np.concatenate((wl_1, np.ones(2 * _floor(M1) + 1), wr_1))
495 | lowpass_2 = np.concatenate((wl_2, np.ones(2 * _floor(M2) + 1), wr_2))
496 | lowpass_next = np.outer(lowpass_1, lowpass_2)
497 | hipass = np.sqrt(1 - lowpass_next ** 2)
498 | Xj = np.zeros((2 * _floor(4 * M1) + 1, 2 * _floor(4 * M2) + 1),
499 | dtype=complex)
500 |
501 | # Loop: angles
502 | l = -1
503 | nbquadrants = 2 + 2 * (not is_real)
504 | nbangles_perquad = nbangles[j] // 4
505 | for quadrant in range(1, nbquadrants + 1):
506 | M_horiz = (M1, M2)[quadrant % 2]
507 | M_vert = (M2, M1)[quadrant % 2]
508 | wedge_ticks_left = _round(np.linspace(0, 1, nbangles_perquad + 1) *
509 | _floor(4 * M_horiz) + 1)
510 | wedge_ticks_right = 2 * _floor(4 * M_horiz) + 2 - wedge_ticks_left
511 | if nbangles_perquad % 2:
512 | wedge_ticks = np.concatenate(
513 | (wedge_ticks_left, wedge_ticks_right[::-1]))
514 | else:
515 | wedge_ticks = np.concatenate(
516 | (wedge_ticks_left, wedge_ticks_right[-2::-1]))
517 | wedge_endpoints = wedge_ticks[1:-1:2]
518 | wedge_midpoints = (wedge_endpoints[:-1] + wedge_endpoints[1:]) / 2
519 |
520 | # Left corner wedge
521 | l += 1
522 | first_wedge_endpoint_vert = _round(_floor(4 * M_vert) /
523 | nbangles_perquad + 1)
524 | length_corner_wedge = _floor(4 * M_vert) - _floor(M_vert) + \
525 | _ceil(first_wedge_endpoint_vert / 4)
526 | Y_corner = np.arange(length_corner_wedge) + 1
527 | [XX, YY] = np.meshgrid(np.arange(1, 2 * _floor(4 * M_horiz) + 2),
528 | Y_corner)
529 | width_wedge = wedge_endpoints[1] + wedge_endpoints[0] - 1
530 | slope_wedge = (_floor(4 * M_horiz) + 1 -
531 | wedge_endpoints[0]) / _floor(4 * M_vert)
532 | left_line = _round(2 - wedge_endpoints[0] +
533 | slope_wedge * (Y_corner - 1))
534 | wrapped_XX = np.zeros((length_corner_wedge, width_wedge), dtype=int)
535 | wrapped_YY = np.zeros((length_corner_wedge, width_wedge), dtype=int)
536 | first_row = _floor(4 * M_vert) + \
537 | 2 - _ceil((length_corner_wedge + 1) / 2) + \
538 | (length_corner_wedge + 1) % 2 * (quadrant - 2 == quadrant % 2)
539 | first_col = _floor(4 * M_horiz) + 2 - _ceil((width_wedge + 1) / 2) \
540 | + (width_wedge + 1) % 2 * (quadrant - 3 == (quadrant - 3) % 2)
541 | for row in Y_corner - 1:
542 | cols = left_line[row] + (np.arange(width_wedge) -
543 | (left_line[row] - first_col)) % width_wedge
544 | new_row = (row - first_row + 1) % length_corner_wedge
545 | admissible_cols = _round(0.5 * (cols + 1 + abs(cols - 1))) - 1
546 | wrapped_XX[new_row, :] = XX[row, admissible_cols]
547 | wrapped_YY[new_row, :] = YY[row, admissible_cols]
548 |
549 | slope_wedge_right = (_floor(4 * M_horiz) + 1 - wedge_midpoints[0]) \
550 | / _floor(4 * M_vert)
551 | mid_line_right = wedge_midpoints[0] + \
552 | slope_wedge_right * (wrapped_YY - 1)
553 | coord_right = 0.5 + _floor(4 * M_vert) / (wedge_endpoints[1] -
554 | wedge_endpoints[0]) * (wrapped_XX - mid_line_right) / \
555 | (_floor(4 * M_vert) + 1 - wrapped_YY)
556 | C2 = 1 / (1 / (2 * (_floor(4 * M_horiz)) /
557 | (wedge_endpoints[0] - 1) - 1) + 1 / (2 * (_floor(4 * M_vert))
558 | / (first_wedge_endpoint_vert - 1) - 1))
559 | C1 = C2 / (2 * (_floor(4 * M_vert)) /
560 | (first_wedge_endpoint_vert - 1) - 1)
561 | wrapped_XX[(wrapped_XX - 1) / _floor(4 * M_horiz) +
562 | (wrapped_YY - 1) / _floor(4 * M_vert) == 2] += 1
563 | coord_corner = C1 + C2 * ((wrapped_XX - 1) / _floor(4 * M_horiz) -
564 | (wrapped_YY - 1) / _floor(4 * M_vert)) / (2 - ((wrapped_XX - 1)
565 | / _floor(4 * M_horiz) + (wrapped_YY - 1) / _floor(4 * M_vert)))
566 | wl_left, _ = fdct_wrapping_window(coord_corner)
567 | _, wr_right = fdct_wrapping_window(coord_right)
568 |
569 | if not is_real:
570 | wrapped_data = fftshift(fft2(ifftshift(C[j][l]))) / \
571 | np.sqrt(C[j][l].size)
572 | wrapped_data = np.rot90(wrapped_data, quadrant - 1)
573 | else:
574 | x = C[j][l] + 1j * C[j][l + nbangles[j] // 2]
575 | wrapped_data = fftshift(fft2(ifftshift(x))) / \
576 | np.sqrt(x.size * 2)
577 | wrapped_data = np.rot90(wrapped_data, quadrant - 1)
578 |
579 | wrapped_data = wrapped_data * wl_left * wr_right
580 | # Unwrapping data
581 | for row in Y_corner - 1:
582 | cols = left_line[row] + (np.arange(width_wedge) -
583 | (left_line[row] - first_col)) % width_wedge
584 | admissible_cols = _round(0.5 * (cols + 1 + abs(cols - 1))) - 1
585 | new_row = (row - first_row + 1) % length_corner_wedge
586 | Xj[row, admissible_cols] += wrapped_data[new_row, :]
587 | # We use the following property: in an assignment A(B) = C where
588 | # B and C are vectors, if some value x repeats in B, then the
589 | # last occurrence of x is the one corresponding to the eventual
590 | # assignment.
591 |
592 | # Regular wedges
593 | length_wedge = _floor(4 * M_vert) - _floor(M_vert)
594 | Y = np.arange(length_wedge) + 1
595 | first_row = _floor(4 * M_vert) + 2 - _ceil((length_wedge + 1) / 2) \
596 | + (length_wedge + 1) % 2 * (quadrant - 2 == quadrant % 2)
597 | for subl in range(1, nbangles_perquad - 1):
598 | l += 1
599 | width_wedge = wedge_endpoints[subl + 1] - \
600 | wedge_endpoints[subl - 1] + 1
601 | slope_wedge = ((_floor(4 * M_horiz) + 1) -
602 | wedge_endpoints[subl]) / _floor(4 * M_vert)
603 | left_line = _round(wedge_endpoints[subl - 1] +
604 | slope_wedge * (Y - 1))
605 | wrapped_XX = np.zeros((length_wedge, width_wedge), dtype=int)
606 | wrapped_YY = np.zeros((length_wedge, width_wedge), dtype=int)
607 | first_col = _floor(4 * M_horiz) + 2 - \
608 | _ceil((width_wedge + 1) / 2) + \
609 | (width_wedge + 1) % 2 * (quadrant - 3 == (quadrant - 3) % 2)
610 | for row in Y - 1:
611 | cols = left_line[row] + (np.arange(width_wedge) -
612 | (left_line[row] - first_col)) % width_wedge - 1
613 | new_row = (row - first_row + 1) % length_wedge
614 | wrapped_XX[new_row, :] = XX[row, cols]
615 | wrapped_YY[new_row, :] = YY[row, cols]
616 |
617 | slope_wedge_left = ((_floor(4 * M_horiz) + 1) -
618 | wedge_midpoints[subl - 1]) / _floor(4 * M_vert)
619 | mid_line_left = wedge_midpoints[subl - 1] + \
620 | slope_wedge_left * (wrapped_YY - 1)
621 | coord_left = 0.5 + _floor(4 * M_vert) / (wedge_endpoints[subl]
622 | - wedge_endpoints[subl - 1]) * \
623 | (wrapped_XX - mid_line_left) / \
624 | (_floor(4 * M_vert) + 1 - wrapped_YY)
625 | slope_wedge_right = ((_floor(4 * M_horiz) + 1) -
626 | wedge_midpoints[subl]) / _floor(4 * M_vert)
627 | mid_line_right = wedge_midpoints[subl] + \
628 | slope_wedge_right * (wrapped_YY - 1)
629 | coord_right = 0.5 + _floor(4 * M_vert) / \
630 | (wedge_endpoints[subl + 1] - wedge_endpoints[subl]) * \
631 | (wrapped_XX - mid_line_right) / \
632 | (_floor(4 * M_vert) + 1 - wrapped_YY)
633 | wl_left, _ = fdct_wrapping_window(coord_left)
634 | _, wr_right = fdct_wrapping_window(coord_right)
635 | if not is_real:
636 | wrapped_data = fftshift(fft2(ifftshift(C[j][l]))) / \
637 | np.sqrt(C[j][l].size)
638 | wrapped_data = np.rot90(wrapped_data, quadrant - 1)
639 | else:
640 | x = C[j][l] + 1j * C[j][l + nbangles[j] // 2]
641 | wrapped_data = fftshift(
642 | fft2(ifftshift(x))) / np.sqrt(x.size * 2)
643 | wrapped_data = np.rot90(wrapped_data, quadrant - 1)
644 |
645 | wrapped_data = wrapped_data * wl_left * wr_right
646 |
647 | # Unwrapping data
648 | for row in Y - 1:
649 | cols = left_line[row] + (np.arange(width_wedge) -
650 | (left_line[row] - first_col)) % width_wedge - 1
651 | new_row = (row + 1 - first_row) % length_wedge
652 | Xj[row, cols] += wrapped_data[new_row, :]
653 |
654 | # Right corner wedge
655 | l += 1
656 | width_wedge = 4 * _floor(4 * M_horiz) + 3 - \
657 | wedge_endpoints[-1] - wedge_endpoints[-2]
658 | slope_wedge = ((_floor(4 * M_horiz) + 1) -
659 | wedge_endpoints[-1]) / _floor(4 * M_vert)
660 | left_line = _round(
661 | wedge_endpoints[-2] + slope_wedge * (Y_corner - 1))
662 | wrapped_XX = np.zeros(
663 | (length_corner_wedge, width_wedge), dtype=int)
664 | wrapped_YY = np.zeros(
665 | (length_corner_wedge, width_wedge), dtype=int)
666 | first_row = _floor(4 * M_vert) + 2 - \
667 | _ceil((length_corner_wedge + 1) / 2) + \
668 | (length_corner_wedge + 1) % 2 * (quadrant - 2 == quadrant % 2)
669 | first_col = _floor(4 * M_horiz) + 2 - _ceil((width_wedge + 1) / 2) \
670 | + (width_wedge + 1) % 2 * (quadrant - 3 == (quadrant - 3) % 2)
671 | for row in Y_corner - 1:
672 | cols = left_line[row] + (np.arange(width_wedge) -
673 | (left_line[row] - first_col)) % width_wedge
674 | admissible_cols = _round(0.5 * (cols + 2 * _floor(4 * M_horiz)
675 | + 1 - np.abs(cols - (2 * _floor(4 * M_horiz) + 1)))) - 1
676 | new_row = (row - first_row + 1) % length_corner_wedge
677 | wrapped_XX[new_row, :] = XX[row, admissible_cols]
678 | wrapped_YY[new_row, :] = YY[row, admissible_cols]
679 |
680 | slope_wedge_left = ((_floor(4 * M_horiz) + 1) -
681 | wedge_midpoints[-1]) / _floor(4 * M_vert)
682 | mid_line_left = wedge_midpoints[-1] + \
683 | slope_wedge_left * (wrapped_YY - 1)
684 | coord_left = 0.5 + _floor(4 * M_vert) / \
685 | (wedge_endpoints[-1] - wedge_endpoints[-2]) * \
686 | (wrapped_XX - mid_line_left) / \
687 | (_floor(4 * M_vert) + 1 - wrapped_YY)
688 | C2 = -1 / (2 * (_floor(4 * M_horiz)) / (wedge_endpoints[-1] - 1)
689 | - 1 + 1 / (2 * (_floor(4 * M_vert)) /
690 | (first_wedge_endpoint_vert - 1) - 1))
691 | C1 = -C2 * (2 * (_floor(4 * M_horiz)) /
692 | (wedge_endpoints[-1] - 1) - 1)
693 |
694 | wrapped_XX[(wrapped_XX - 1) / _floor(4 * M_horiz) ==
695 | (wrapped_YY - 1) / _floor(4 * M_vert)] -= 1
696 | coord_corner = C1 + C2 * (2 - ((wrapped_XX - 1) /
697 | _floor(4 * M_horiz) + (wrapped_YY - 1) / _floor(4 * M_vert))) \
698 | / ((wrapped_XX - 1) / _floor(4 * M_horiz) - (wrapped_YY - 1) /
699 | _floor(4 * M_vert))
700 | wl_left, _ = fdct_wrapping_window(coord_left)
701 | _, wr_right = fdct_wrapping_window(coord_corner)
702 |
703 | if not is_real:
704 | wrapped_data = fftshift(
705 | fft2(ifftshift(C[j][l]))) / np.sqrt(C[j][l].size)
706 | wrapped_data = np.rot90(wrapped_data, quadrant - 1)
707 | else:
708 | x = C[j][l] + 1j * C[j][l + nbangles[j] // 2]
709 | wrapped_data = fftshift(
710 | fft2(ifftshift(x))) / np.sqrt(x.size * 2)
711 | wrapped_data = np.rot90(wrapped_data, quadrant - 1)
712 |
713 | wrapped_data = wrapped_data * wl_left * wr_right
714 |
715 | # Unwrapping data
716 | for row in Y_corner - 1:
717 | cols = left_line[row] + (np.arange(width_wedge) -
718 | (left_line[row] - first_col)) % width_wedge
719 | admissible_cols = _round(1 / 2 * (cols + 2 * _floor(4 * M_horiz)
720 | + 1 - abs(cols - (2 * _floor(4 * M_horiz) + 1)))) - 1
721 | new_row = (row + 1 - first_row) % length_corner_wedge
722 | Xj[row, np.flip(admissible_cols)] += wrapped_data[new_row, ::-1]
723 | # We use the following property: in an assignment A[B] = C where
724 | # B and C are vectors, if some value x repeats in B, then the
725 | # last occurrence of x is the one corresponding to the eventual
726 | # assignment.
727 |
728 | Xj = np.rot90(Xj)
729 |
730 | Xj *= lowpass
731 | Xj_index1 = np.arange(-_floor(2 * M1),
732 | _floor(2 * M1) + 1) + _floor(4 * M1)
733 | Xj_index2 = np.arange(-_floor(2 * M2),
734 | _floor(2 * M2) + 1) + _floor(4 * M2)
735 |
736 | Xj[np.ix_(Xj_index1, Xj_index2)] *= hipass
737 |
738 | loc_1 = Xj_topleft_1 + np.arange(2 * _floor(4 * M1) + 1) - 1
739 | loc_2 = Xj_topleft_2 + np.arange(2 * _floor(4 * M2) + 1) - 1
740 | X[np.ix_(loc_1, loc_2)] += Xj
741 |
742 | # Preparing for loop reentry or exit
743 | Xj_topleft_1 += _floor(4 * M1) - _floor(2 * M1)
744 | Xj_topleft_2 += _floor(4 * M2) - _floor(2 * M2)
745 |
746 | lowpass = lowpass_next
747 |
748 | if is_real:
749 | Y = X
750 | X = np.rot90(X, 2)
751 | X = X + np.conj(Y)
752 |
753 | # Coarsest wavelet level
754 | M1 = M1 / 2
755 | M2 = M2 / 2
756 | Xj = fftshift(fft2(ifftshift(C[0][0]))) / np.sqrt(C[0][0].size)
757 | loc_1 = Xj_topleft_1 + np.arange(2 * _floor(4 * M1) + 1) - 1
758 | loc_2 = Xj_topleft_2 + np.arange(2 * _floor(4 * M2) + 1) - 1
759 | X[np.ix_(loc_1, loc_2)] += Xj * lowpass
760 |
761 | # Finest level
762 | M1 = N1 / 3
763 | M2 = N2 / 3
764 | if finest == 1:
765 | # Folding back onto N1-by-N2 matrix
766 | shift_1 = _floor(2 * M1) - _floor(N1 / 2)
767 | shift_2 = _floor(2 * M2) - _floor(N2 / 2)
768 | Y = X[:, np.arange(N2) + shift_2]
769 | Y[:, np.arange(N2 - shift_2, N2)] += X[:, :shift_2]
770 | Y[:, :shift_2] += X[:, N2 + shift_2:N2 + 2 * shift_2]
771 | X = Y[np.arange(N1) + shift_1, :]
772 | X[np.arange(N1 - shift_1, N1), :] += Y[:shift_1, :]
773 | X[:shift_1, :] += Y[N1 + shift_1:N1 + 2 * shift_1, :]
774 | else:
775 | # Extension to a N1-by-N2 matrix
776 | Y = fftshift(fft2(ifftshift(C[nbscales - 1][0]))) / \
777 | np.sqrt(C[nbscales - 1][0].size)
778 | X_topleft_1 = _ceil((N1 + 1) / 2) - _floor(M1)
779 | X_topleft_2 = _ceil((N2 + 1) / 2) - _floor(M2)
780 | loc_1 = X_topleft_1 + np.arange(2 * _floor(M1) + 1) - 1
781 | loc_2 = X_topleft_2 + np.arange(2 * _floor(M2) + 1) - 1
782 | Y[np.ix_(loc_1, loc_2)] = Y[np.ix_(loc_1, loc_2)] * hipass_finest + X
783 | X = Y
784 |
785 | x = fftshift(ifft2(ifftshift(X))) * np.sqrt(X.size)
786 | if is_real:
787 | x = np.real(x)
788 |
789 | return x
790 |
--------------------------------------------------------------------------------
/daspy/advanced_tools/strain2vel.py:
--------------------------------------------------------------------------------
1 | # Purpose: Convert strain rate data to velocity
2 | # Author: Minzhe Hu
3 | # Date: 2024.3.10
4 | # Email: hmz2018@mail.ustc.edu.cn
5 | import numpy as np
6 | from numpy.fft import irfft2, ifftshift
7 | from scipy.signal import hilbert
8 | from daspy.basic_tools.freqattributes import next_pow_2, fk_transform
9 | from daspy.basic_tools.preprocessing import padding, cosine_taper
10 | from daspy.basic_tools.filter import bandpass
11 | from daspy.advanced_tools.fdct import fdct_wrapping, ifdct_wrapping
12 | from daspy.advanced_tools.denoising import _velocity_bin
13 | from daspy.advanced_tools.decomposition import fk_fan_mask
14 |
15 |
16 | def fk_rescaling(data, dx, fs, taper=(0.02, 0.05), pad='default', fmax=None,
17 | kmin=(1 / 2000, 1 / 3000), vmax=(15000, 30000), edge=0.2,
18 | turning=None, verbose=False):
19 | """
20 | Convert strain/strain rate to velocity/acceleration by fk rescaling.
21 |
22 | :param data: numpy.ndarray. Data to do fk rescaling.
23 | :param dx: Channel interval in m.
24 | :param fs: Sampling rate in Hz.
25 | :param taper: float or sequence of floats. Each float means decimal
26 | percentage of Tukey taper for corresponding dimension (ranging from 0 to
27 | 1). Default is 0.1 which tapers 5% from the beginning and 5% from the
28 | end.
29 | :param pad: Pad the data or not. It can be float or sequence of floats. Each
30 | float means padding percentage before FFT for corresponding dimension.
31 | If set to 0.1 will pad 5% before the beginning and after the end.
32 | 'default' means pad both dimensions to next power of 2. None or False
33 | means don't pad data before or during Fast Fourier Transform.
34 | :param fmax, kmin, vmax: float or or sequence of 2 floats. Sequence of 2
35 | floats represents the start and end of taper. Setting these parameters
36 | can reduce artifacts.
37 | :param edge: float. The width of fan mask taper edge.
38 | :param turning: Sequence of int. Channel number of turning points.
39 | :param verbose: If True, return converted data, f-k spectrum, frequency
40 | sequence, wavenumber sequence and f-k mask.
41 | :return: Converted data and some variables in the process if verbose==True.
42 | """
43 | if turning is not None:
44 | data_vel = np.zeros_like(data)
45 | start_ch = [0, *turning]
46 | end_ch = [*turning, len(data)]
47 | for (s, e) in zip(start_ch, end_ch):
48 | data_vel[s:e] = fk_rescaling(data[s:e], dx, fs, taper=taper,
49 | pad=pad, fmax=fmax, kmin=kmin,
50 | vmax=vmax, edge=edge, verbose=False)
51 | else:
52 | data_tp = cosine_taper(data, taper)
53 |
54 | if pad == 'default':
55 | nch, nt = data.shape
56 | dn = (next_pow_2(nch) - nch, next_pow_2(nt) - nt)
57 | nfft = None
58 | elif pad is None or pad is False:
59 | dn = 0
60 | nfft = None
61 | else:
62 | dn = np.round(np.array(pad) * data.shape).astype(int)
63 | nfft = 'default'
64 |
65 | data_pd = padding(data_tp, dn)
66 | nch, nt = data_pd.shape
67 |
68 | fk, f, k = fk_transform(data_pd, dx, fs, taper=taper, nfft=nfft)
69 |
70 | ff = np.tile(f, (len(k), 1))
71 | kk = np.tile(k, (len(f), 1)).T
72 | vv = - np.divide(ff, kk, out=np.ones_like(ff) * 1e10, where=kk != 0)
73 |
74 | mask = fk_fan_mask(f, k, fmax=fmax, kmin=kmin, vmax=vmax, edge=edge) * vv
75 | mask[kk == 0] = 0
76 |
77 | data_vel = irfft2(ifftshift(fk * mask, axes=0)).real[:nch, :nt]
78 | data_vel = padding(data_vel, dn, reverse=True)
79 |
80 | if verbose:
81 | return data_vel, fk, f, k, mask
82 | return data_vel
83 |
84 |
85 | def curvelet_conversion(data, dx, fs, pad=0.3, scale_begin=2, nbscales=None,
86 | nbangles=16, turning=None):
87 | """
88 | Use curevelet transform to convert strain/strain rate to
89 | velocity/acceleration. {Yang et al. , 2023, Geophys. Res. Lett.}
90 |
91 | :param data: numpy.ndarray. Data to convert.
92 | :param dx: Channel interval in m.
93 | :param fs: Sampling rate in Hz.
94 | :param pad: float or sequence of floats. Each float means padding percentage
95 | before FFT for corresponding dimension. If set to 0.1 will pad 5% before
96 | the beginning and after the end.
97 | :param scale_begin: int. The beginning scale to do conversion.
98 | :param nbscales: int. Number of scales including the coarsest wavelet level.
99 | Default set to ceil(log2(min(M,N)) - 3).
100 | :param nbangles: int. Number of angles at the 2nd coarsest level,
101 | minimum 8, must be a multiple of 4.
102 | :param turning: Sequence of int. Channel number of turning points.
103 | :return: numpy.ndarray. Converted data.
104 | """
105 | if turning is not None:
106 | data_vel = np.zeros_like(data)
107 | start_ch = [0, *turning]
108 | end_ch = [*turning, len(data)]
109 | for (s, e) in zip(start_ch, end_ch):
110 | data_vel[s:e] = curvelet_conversion(data[s:e], dx, fs, pad=pad,
111 | scale_begin=scale_begin,
112 | nbscales=nbscales,
113 | nbangles=nbangles, turning=None)
114 | else:
115 | if pad is None or pad is False:
116 | pad = 0
117 | dn = np.round(np.array(pad) * data.shape).astype(int)
118 | data_pd = padding(data, dn)
119 |
120 | C = fdct_wrapping(data_pd, is_real=True, finest=1, nbscales=nbscales,
121 | nbangles_coarse=nbangles)
122 |
123 | # rescale with velocity
124 | np.seterr(divide='ignore')
125 | for s in range(0, scale_begin - 1):
126 | for w in range(len(C[s])):
127 | C[s][w] *= 0
128 |
129 | for s in range(scale_begin - 1, len(C)):
130 | nbangles = len(C[s])
131 | velocity = _velocity_bin(nbangles, fs, dx)
132 | factors = np.mean(velocity, axis=1)
133 | for w in range(nbangles):
134 | if abs(factors[w]) == np.inf:
135 | factors[w] = abs(velocity[w]).min() * \
136 | np.sign(velocity[w, 0]) * 2
137 | C[s][w] *= factors[w]
138 |
139 | data_vel = ifdct_wrapping(C, is_real=True, size=data_pd.shape)
140 | data_vel = padding(data_vel, dn, reverse=True)
141 |
142 | return data_vel
143 |
144 |
145 | def slowness(g, dx, fs, slm, sls, swin=2):
146 | """
147 | Estimate the slowness time series by calculate semblance.
148 | {Lior et al., 2021, Solid Earth}
149 |
150 | :param g: 2-dimensional array. time series of adjacent channels used for
151 | estimating slowness
152 | :param dx: float. Spatical sampling rate (in m)
153 | :param fs: float. Sampling rate of records
154 | :param slm: float. Slowness x max
155 | :param sls: float. Slowness step
156 | :param swin: int. Slowness smooth window
157 | :return: Sequences of slowness and sembalence.
158 | """
159 | L = (len(g) - 1) // 2
160 | nt = len(g[0])
161 | h = np.imag(hilbert(g))
162 | grdpnt = round(slm / sls)
163 | sem = np.zeros((2 * grdpnt + 1, nt))
164 | gap = round(slm * dx * L * fs)
165 |
166 | h_ex = np.zeros((len(g), nt + 2 * gap))
167 | h_ex[:, gap:-gap] = h
168 | g_ex = np.zeros((len(g), nt + 2 * gap))
169 | g_ex[:, gap:-gap] = g
170 |
171 | for i in range(2 * grdpnt + 1):
172 | px = (i - grdpnt) * sls
173 | if abs(px) < 1e-5:
174 | continue
175 | gt = np.zeros(g.shape)
176 | ht = np.zeros(h.shape)
177 | for j in range(-L, L):
178 | shift = round(px * j * dx * fs)
179 | gt[j + L] = g_ex[j + L, gap + shift:gap + shift + nt]
180 | ht[j + L] = h_ex[j + L, gap + shift:gap + shift + nt]
181 | sem[i] = (np.sum(gt, axis=0)**2 + np.sum(ht, axis=0)**2) / \
182 | np.sum(gt**2 + ht**2, axis=0) / (2 * L + 1)
183 | p = (np.argmax(sem, axis=0) - grdpnt) * sls
184 | # smooth P
185 | for i in range(swin, nt - swin):
186 | win = p[i - swin:i + swin + 1]
187 | sign = np.sign(sum(np.sign(win)))
188 | win = [px for px in win if np.sign(px) == sign]
189 | p[i] = np.mean(win)
190 |
191 | return p, sem
192 |
193 |
194 | def slant_stacking(data, dx, fs, L=None, slm=0.01,
195 | sls=0.000125, frqlow=0.1, frqhigh=15, turning=None,
196 | channel='all'):
197 | """
198 | Convert strain to velocity based on slant-stack.
199 |
200 | :param data: 2-dimensional array. Axis 0 is channel number and axis 1 is
201 | time series
202 | :param dx: float. Spatical sampling rate (in m)
203 | :param L: int. the number of adjacent channels over which slowness is
204 | estimated
205 | :param slm: float. Slowness x max
206 | :param sls: float. slowness step
207 | :param freqmin: Pass band low corner frequency.
208 | :param freqmax: Pass band high corner frequency.
209 | :param turning: Sequence of int. Channel number of turning points.
210 | :param channel: int or list or 'all'. convert a certain channel number /
211 | certain channel range / all channels.
212 | :return: Converted velocity data
213 | """
214 | if L is None:
215 | L = round(50 / dx)
216 |
217 | nch, nt = data.shape
218 | if isinstance(channel, str) and channel == 'all':
219 | channel = list(range(nch))
220 | elif isinstance(channel, int):
221 | channel = [channel]
222 |
223 | if turning is not None:
224 | data_vel = np.zeros((0, len(data[0])))
225 | start_ch = [0, *turning]
226 | end_ch = [*turning, len(data)]
227 | for (s, e) in zip(start_ch, end_ch):
228 | channel_seg = [ch-s for ch in range(s,e) if ch in channel]
229 | if len(channel_seg):
230 | d_vel = slant_stacking(data[s:e], dx, fs, L=L, slm=slm, sls=sls,
231 | frqlow=frqlow, frqhigh=frqhigh,
232 | turning=None, channel=channel_seg)
233 | data_vel = np.vstack((data_vel, d_vel))
234 | else:
235 | data_ex = padding(data, (2 * L, 0))
236 | swin = int(max((1 / frqhigh * fs) // 2, 1))
237 | data_vel = np.zeros((len(channel), nt))
238 | for i, ch in enumerate(channel):
239 | p, _ = slowness(data_ex[ch:ch + 2 * L + 1], dx, fs, slm, sls,
240 | swin=swin)
241 | data_vel[i] = bandpass(data[ch] / p, fs=fs, freqmin=frqlow,
242 | freqmax=frqhigh)
243 |
244 | return data_vel
245 |
--------------------------------------------------------------------------------
/daspy/basic_tools/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HMZ-03/DASPy/9f308c93d7ad8f4e572705827b03c5d0fec3eac2/daspy/basic_tools/__init__.py
--------------------------------------------------------------------------------
/daspy/basic_tools/filter.py:
--------------------------------------------------------------------------------
1 | # Purpose: Filter the waveform
2 | # Author: Minzhe Hu
3 | # Date: 2024.10.16
4 | # Email: hmz2018@mail.ustc.edu.cn
5 | # Modified from https://docs.obspy.org/_modules/obspy/signal/filter.html
6 | import warnings
7 | import numpy as np
8 | from scipy.signal import cheb2ord, cheby2, hilbert, iirfilter, zpk2sos, sosfilt
9 |
10 |
11 | def bandpass(data, fs, freqmin, freqmax, corners=4, zi=None, zerophase=True):
12 | """
13 | Filter data from 'freqmin' to 'freqmax' using Butterworth bandpass filter of
14 | 'corners' corners.
15 |
16 | :param data: numpy.ndarray. Data to filter.
17 | :param fs: Sampling rate in Hz.
18 | :param freqmin: Pass band low corner frequency.
19 | :param freqmax: Pass band high corner frequency.
20 | :param corners: Filter corners / order.
21 | :param zi : None, 0, or array_like. Initial conditions for the cascaded
22 | filter delays. It is a vector of shape (n_sections, nch, 2). Set to 0 to
23 | trigger a output of the final filter delay values.
24 | :param zerophase: If True, apply filter once forwards and once backwards.
25 | This results in twice the number of corners but zero phase shift in
26 | the resulting filtered data. Only valid when zi is None.
27 | :return: Filtered data and the final filter delay values (if zi is not
28 | None).
29 | """
30 | if len(data.shape) == 1:
31 | data = data[np.newaxis, :]
32 | fe = 0.5 * fs
33 | low = freqmin / fe
34 | high = freqmax / fe
35 | # raise for some bad scenarios
36 | if high - 1.0 > -1e-6:
37 | msg = ('Selected high corner frequency ({}) of bandpass is at or ' +
38 | 'above Nyquist ({}). Applying a high-pass instead.').format(
39 | freqmax, fe)
40 | warnings.warn(msg)
41 | return highpass(data, freq=freqmin, fs=fs, corners=corners,
42 | zerophase=zerophase)
43 | if low > 1:
44 | msg = 'Selected low corner frequency is above Nyquist.'
45 | raise ValueError(msg)
46 | z, p, k = iirfilter(corners, [low, high], btype='band', ftype='butter',
47 | output='zpk')
48 | sos = zpk2sos(z, p, k)
49 | if zi is None:
50 | data_flt = sosfilt(sos, data)
51 | if zerophase:
52 | data_flt = sosfilt(sos, data_flt[:, ::-1])[:, ::-1]
53 | return data_flt
54 | elif isinstance(zi, (int, float)):
55 | zi = np.ones((sos.shape[0], len(data), 2)) * zi
56 |
57 | data_flt, zf = sosfilt(sos, data, zi=zi)
58 | return data_flt, zf
59 |
60 |
61 | def bandstop(data, fs, freqmin, freqmax, corners=4, zi=None, zerophase=False):
62 | """
63 | Filter data removing data between frequencies 'freqmin' and 'freqmax' using
64 | Butterworth bandstop filter of 'corners' corners.
65 |
66 | :param data: numpy.ndarray. Data to filter.
67 | :param fs: Sampling rate in Hz.
68 | :param freqmin: Stop band low corner frequency.
69 | :param freqmax: Stop band high corner frequency.
70 | :param corners: Filter corners / order.
71 | :param zi : None, 0, or array_like. Initial conditions for the cascaded
72 | filter delays. It is a vector of shape (n_sections, nch, 2). Set to 0 to
73 | trigger a output of the final filter delay values.
74 | :param zerophase: If True, apply filter once forwards and once backwards.
75 | This results in twice the number of corners but zero phase shift in
76 | the resulting filtered data. Only valid when zi is None.
77 | :return: Filtered data and the final filter delay values (if zi is not
78 | None).
79 | """
80 | if len(data.shape) == 1:
81 | data = data[np.newaxis, :]
82 | fe = 0.5 * fs
83 | low = freqmin / fe
84 | high = freqmax / fe
85 | # raise for some bad scenarios
86 | if high > 1:
87 | high = 1.0
88 | msg = 'Selected high corner frequency is above Nyquist. Setting ' + \
89 | 'Nyquist as high corner.'
90 | warnings.warn(msg)
91 | if low > 1:
92 | msg = 'Selected low corner frequency is above Nyquist.'
93 | raise ValueError(msg)
94 | z, p, k = iirfilter(corners, [low, high],
95 | btype='bandstop', ftype='butter', output='zpk')
96 | sos = zpk2sos(z, p, k)
97 | if zi is None:
98 | data_flt = sosfilt(sos, data)
99 | if zerophase:
100 | data_flt = sosfilt(sos, data_flt[:, ::-1])[:, ::-1]
101 | return data_flt
102 | elif isinstance(zi, (int, float)):
103 | zi = np.ones((sos.shape[0], len(data), 2)) * zi
104 |
105 | data_flt, zf = sosfilt(sos, data, zi=zi)
106 | return data_flt, zf
107 |
108 |
109 | def lowpass(data, fs, freq, corners=4, zi=None, zerophase=False):
110 | """
111 | Filter data removing data over certain frequency 'freq' using Butterworth
112 | lowpass filter of 'corners' corners.
113 |
114 | :param data: numpy.ndarray. Data to filter.
115 | :param fs: Sampling rate in Hz.
116 | :param freq: Filter corner frequency.
117 | :param corners: Filter corners / order.
118 | :param zi : None, 0, or array_like. Initial conditions for the cascaded
119 | filter delays. It is a vector of shape (n_sections, nch, 2). Set to 0 to
120 | trigger a output of the final filter delay values.
121 | :param zerophase: If True, apply filter once forwards and once backwards.
122 | This results in twice the number of corners but zero phase shift in
123 | the resulting filtered data. Only valid when zi is None.
124 | :return: Filtered data and the final filter delay values (if zi is not
125 | None).
126 | """
127 | if len(data.shape) == 1:
128 | data = data[np.newaxis, :]
129 | fe = 0.5 * fs
130 | f = freq / fe
131 | # raise for some bad scenarios
132 | if f > 1:
133 | f = 1.0
134 | msg = 'Selected corner frequency is above Nyquist. Setting Nyquist ' + \
135 | 'as high corner.'
136 | warnings.warn(msg)
137 | z, p, k = iirfilter(corners, f, btype='lowpass', ftype='butter',
138 | output='zpk')
139 | sos = zpk2sos(z, p, k)
140 | if zi is None:
141 | data_flt = sosfilt(sos, data)
142 | if zerophase:
143 | data_flt = sosfilt(sos, data_flt[:, ::-1])[:, ::-1]
144 | return data_flt
145 | elif isinstance(zi, (int, float)):
146 | zi = np.ones((sos.shape[0], len(data), 2)) * zi
147 |
148 | data_flt, zf = sosfilt(sos, data, zi=zi)
149 | return data_flt, zf
150 |
151 |
152 | def lowpass_cheby_2(data, fs, freq, maxorder=12, zi=None, ba=False,
153 | freq_passband=False):
154 | """
155 | Filter data by passing data only below a certain frequency. The main purpose
156 | of this cheby2 filter is downsampling. This method will iteratively design a
157 | filter, whose pass band frequency is determined dynamically, such that the
158 | values above the stop band frequency are lower than -96dB.
159 |
160 | :param data: numpy.ndarray. Data to filter.
161 | :param fs: Sampling rate in Hz.
162 | :param freq: The frequency above which signals are attenuated with 95 dB.
163 | :param maxorder: Maximal order of the designed cheby2 filter.
164 | :param zi : None, 0, or array_like. Initial conditions for the cascaded
165 | filter delays. It is a vector of shape (n_sections, nch, 2). Set to 0 to
166 | trigger a output of the final filter delay values.
167 | :param ba: If True return only the filter coefficients (b, a) instead of
168 | filtering.
169 | :param freq_passband: If True return additionally to the filtered data, the
170 | iteratively determined pass band frequency.
171 | :return: Filtered data, the final filter delay values (if zi is not None)
172 | and the determined pass band frequency (if freq_passband is True).
173 | """
174 | if data.ndim == 1:
175 | data = data[np.newaxis, :]
176 |
177 | nyquist = fs * 0.5
178 | # rp - maximum ripple of passband, rs - attenuation of stopband
179 | rp, rs, order = 1, 96, 1e99
180 | ws = freq / nyquist # stop band frequency
181 | wp = ws # pass band frequency
182 | # raise for some bad scenarios
183 | if ws > 1:
184 | ws = 1.0
185 | warnings.warn('Selected corner frequency is above Nyquist. Setting '
186 | 'Nyquist as high corner.')
187 | while True:
188 | if order <= maxorder:
189 | break
190 | wp = wp * 0.99
191 | order, wn = cheb2ord(wp, ws, rp, rs, analog=0)
192 | if ba:
193 | return cheby2(order, rs, wn, btype='low', analog=0, output='ba')
194 | z, p, k = cheby2(order, rs, wn, btype='low', analog=0, output='zpk')
195 | sos = zpk2sos(z, p, k)
196 | if zi is None:
197 | data_flt = sosfilt(sos, data)
198 | if freq_passband:
199 | return data_flt, wp * nyquist
200 | return data_flt
201 | elif isinstance(zi, (int, float)):
202 | zi = np.ones((sos.shape[0], len(data), 2)) * zi
203 |
204 | data_flt, zf = sosfilt(sos, data, zi=zi)
205 | if freq_passband:
206 | return data_flt, zf, wp * nyquist
207 | return data_flt, zf
208 |
209 |
210 | def highpass(data, fs, freq, corners=4, zi=None, zerophase=False):
211 | """
212 | Filter data removing data below certain frequency 'freq' using Butterworth
213 | highpass filter of 'corners' corners.
214 |
215 | :param data: numpy.ndarray. Data to filter.
216 | :param fs: Sampling rate in Hz.
217 | :param freq: Filter corner frequency.
218 | :param corners: Filter corners / order.
219 | :param zerophase: If True, apply filter once forwards and once backwards.
220 | This results in twice the number of corners but zero phase shift in
221 | the resulting filtered data. Only valid when zi is None.
222 | :return: Filtered data and the final filter delay values (if zi is not
223 | None).
224 | """
225 | if len(data.shape) == 1:
226 | data = data[np.newaxis, :]
227 | fe = 0.5 * fs
228 | f = freq / fe
229 | # raise for some bad scenarios
230 | if f > 1:
231 | msg = 'Selected corner frequency is above Nyquist.'
232 | raise ValueError(msg)
233 | z, p, k = iirfilter(corners, f, btype='highpass', ftype='butter',
234 | output='zpk')
235 | sos = zpk2sos(z, p, k)
236 | if zi is None:
237 | data_flt = sosfilt(sos, data)
238 | if zerophase:
239 | data_flt = sosfilt(sos, data_flt[:, ::-1])[:, ::-1]
240 | return data_flt
241 | elif isinstance(zi, (int, float)):
242 | zi = np.ones((sos.shape[0], len(data), 2)) * zi
243 |
244 | data_flt, zf = sosfilt(sos, data, zi=zi)
245 | return data_flt, zf
246 |
247 | def envelope(data):
248 | """
249 | Computes the envelope of the given data. The envelope is determined by
250 | adding the squared amplitudes of the data and it's Hilbert-Transform and
251 | then taking the square-root. The envelope at the start/end should not be
252 | taken too seriously.
253 |
254 | :param data: numpy.ndarray. Data to make envelope of.
255 | :return: Envelope of input data.
256 | """
257 | return abs(hilbert(data, axis=-1))
258 |
--------------------------------------------------------------------------------
/daspy/basic_tools/freqattributes.py:
--------------------------------------------------------------------------------
1 | # Purpose: Analyze frequency attribute and transform in frequency domain
2 | # Author: Minzhe Hu
3 | # Date: 2024.6.8
4 | # Email: hmz2018@mail.ustc.edu.cn
5 | import numpy as np
6 | from numpy.fft import rfft, rfft2, fftshift, fftfreq, rfftfreq
7 | from scipy.signal import stft
8 | from daspy.basic_tools.preprocessing import demeaning, detrending, cosine_taper
9 |
10 |
11 | def next_pow_2(i):
12 | """
13 | Find the next power of two.
14 |
15 | :param i: float or int.
16 | :return: int. The next power of two for i.
17 | """
18 | buf = np.ceil(np.log2(i))
19 | return np.power(2, buf).astype(int)
20 |
21 |
22 | def spectrum(data, fs, taper=0.05, nfft='default'):
23 | """
24 | Computes the spectrum of the given data.
25 |
26 | :param data: numpy.ndarray. Data to make spectrum of.
27 | :param fs: Sampling rate in Hz.
28 | :param taper: Decimal percentage of Tukey taper.
29 | :param nfft: Number of points for FFT. None = sampling points, 'default' =
30 | next power of 2 of sampling points.
31 | :return: Spectrum and frequency sequence.
32 | """
33 | if len(data.shape) == 1:
34 | data = data.reshape(1, len(data))
35 | elif len(data.shape) != 2:
36 | raise ValueError("Data should be 1-D or 2-D array")
37 | data = cosine_taper(data, (0, taper))
38 |
39 | if nfft == 'default':
40 | nfft = next_pow_2(len(data[0]))
41 | elif nfft is None:
42 | nfft = len(data[0])
43 |
44 | spec = rfft(data, n=nfft, axis=1)
45 | f = rfftfreq(nfft, d=1 / fs)
46 |
47 | return spec, f
48 |
49 |
50 | def spectrogram(data, fs, nperseg=256, noverlap=None, nfft=None, detrend=False,
51 | boundary='zeros'):
52 | """
53 | Computes the spectrogram of the given data.
54 |
55 | :param data: 1-D or 2-D numpy.ndarray. Data to make spectrogram of.
56 | :param fs: Sampling rate in Hz.
57 | :param nperseg: int. Length of each segment.
58 | :param noverlap: int. Number of points to overlap between segments. If None,
59 | noverlap = nperseg // 2.
60 | :param nfft: int. Length of the FFT used. None = nperseg.
61 | :param detrend : str or bool. Specifies whether and how to detrend each
62 | segment. 'linear' or 'detrend' or True = detrend, 'constant' or
63 | 'demean' = demean.
64 | :param boundary: str or None. Specifies whether the input signal is extended
65 | at both ends, and how to generate the new values, in order to center the
66 | first windowed segment on the first input point. This has the benefit of
67 | enabling reconstruction of the first input point when the employed
68 | window function starts at zero. Valid options are ['even', 'odd',
69 | 'constant', 'zeros', None].
70 | :return: Spectrogram, frequency sequence and time sequence.
71 | """
72 | if detrend in [True, 'linear', 'detrend']:
73 | detrend = detrending
74 | elif detrend in ['constant', 'demean']:
75 | detrend = demeaning
76 | if data.ndim == 1:
77 | f, t, Zxx = stft(data, fs=fs, nperseg=nperseg, noverlap=noverlap,
78 | nfft=nfft, detrend=detrend, boundary=boundary)
79 | elif len(data) == 1:
80 | f, t, Zxx = stft(data[0], fs=fs, nperseg=nperseg, noverlap=noverlap,
81 | nfft=nfft, detrend=detrend, boundary=boundary)
82 | else:
83 | Zxx = []
84 | for d in data:
85 | f, t, Zxxi = stft(d, fs=fs, nperseg=nperseg, noverlap=noverlap,
86 | nfft=nfft, detrend=detrend, boundary=boundary)
87 | Zxx.append(abs(Zxxi))
88 | Zxx = np.mean(np.array(Zxx), axis=0)
89 |
90 | return Zxx, f, t
91 |
92 |
93 | def fk_transform(data, dx, fs, taper=(0, 0.05), nfft='default'):
94 | """
95 | Transform the data to the fk domain using 2-D Fourier transform method.
96 |
97 | :param data: numpy.ndarray. Data to do fk transform.
98 | :param dx: Channel interval in m.
99 | :param fs: Sampling rate in Hz.
100 | :param taper: float or sequence of floats. Each float means decimal
101 | percentage of Tukey taper for corresponding dimension (ranging from 0 to
102 | 1). Default is 0.1 which tapers 5% from the beginning and 5% from the
103 | end.
104 | :param nfft: Number of points for FFT. None means sampling points; 'default'
105 | means next power of 2 of sampling points, which makes result smoother.
106 | """
107 | nch, nt = data.shape
108 | data = cosine_taper(data, taper)
109 | if nfft == 'default':
110 | nfft = (next_pow_2(nch), next_pow_2(nt))
111 | elif not nfft:
112 | nfft = (nch, nt)
113 |
114 | fk = fftshift(rfft2(data, s=nfft), axes=0)
115 | f = rfftfreq(nfft[1], d=1. / fs)
116 | k = fftshift(fftfreq(nfft[0], d=dx))
117 | return fk, f, k
118 |
--------------------------------------------------------------------------------
/daspy/basic_tools/preprocessing.py:
--------------------------------------------------------------------------------
1 | # Purpose: Some preprocess methods
2 | # Author: Minzhe Hu
3 | # Date: 2025.5.21
4 | # Email: hmz2018@mail.ustc.edu.cn
5 | import numpy as np
6 | from scipy.signal import detrend
7 | from scipy.signal.windows import tukey
8 | from daspy.basic_tools.filter import lowpass_cheby_2
9 |
10 |
11 | def phase2strain(data, lam, e, n, gl):
12 | """
13 | Convert the optical phase shift in radians to strain.
14 |
15 | :param data: numpy.ndarray. Data to convert.
16 | :param lam: float. Operational optical wavelength in vacuum.
17 | :param e: float. photo-slastic scaling factor for logitudinal strain in
18 | isotropic material.
19 | :param n: float. Refractive index of the sensing fiber.
20 | :paran gl: float. Gauge length.
21 | :return: Strain data.
22 | """
23 | return data * (lam * 1e-9) / (e * 4 * np.pi * n * gl)
24 |
25 |
26 | def normalization(data, method='z-score'):
27 | """
28 | Normalize for each individual channel using Z-score method.
29 |
30 | :param data: numpy.ndarray. Data to normalize.
31 | :param method: str. Method for normalization, should be one of 'max',
32 | 'z-score', 'MAD' or 'one-bit'.
33 | :return: Normalized data.
34 | """
35 | if data.ndim == 1:
36 | data = data.reshape(1, len(data))
37 | elif data.ndim != 2:
38 | raise ValueError("Data should be 1-D or 2-D array")
39 |
40 | if method.lower() == 'max':
41 | amp = np.max(abs(data), 1, keepdims=True)
42 | amp[amp == 0] = amp[amp > 0].min()
43 | return data / amp
44 | elif method.lower() == 'z-score':
45 | mean = np.mean(data, axis=1, keepdims=True)
46 | std = np.std(data, axis=1, keepdims=True)
47 | std[std == 0] = std[std > 0].min()
48 | return (data - mean) / std
49 | elif method.lower() == 'mad':
50 | median = np.median(data, axis=1, keepdims=True)
51 | mad = np.median(abs(data - median), axis=1, keepdims=True)
52 | mad[mad == 0] = mad[mad > 0].min()
53 | return (data - median) / mad
54 | elif method.lower() == 'one-bit':
55 | return np.sign(data)
56 |
57 |
58 | def demeaning(data):
59 | """
60 | Demean signal by subtracted mean of each channel.
61 |
62 | :param data: numpy.ndarray. Data to demean.
63 | :return: Detrended data.
64 | """
65 | return detrend(data, type='constant')
66 |
67 |
68 | def detrending(data):
69 | """
70 | Detrend signal by subtracted a linear least-squares fit to data.
71 |
72 | :param data: numpy.ndarray. Data to detrend.
73 | :return: Detrended data.
74 | """
75 | return detrend(data, type='linear')
76 |
77 |
78 | def stacking(data: np.ndarray, N: int, step: int = None, average: bool = True):
79 | """
80 | Stack several channels to increase the signal-noise ratio(SNR).
81 |
82 | :param data: numpy.ndarray. Data to stack.
83 | :param N: int. N adjacent channels stacked into 1.
84 | :param step: int. Interval of data stacking.
85 | :param average: bool. True for calculating the average.
86 | :return: Stacked data.
87 | """
88 | if N == 1:
89 | return data
90 | if step is None:
91 | step = N
92 | nch, nt = data.shape
93 | begin = np.arange(0, nch - N + 1, step)
94 | end = begin + N
95 | nx1 = len(begin)
96 | data_stacked = np.zeros((nx1, nt))
97 | for i in range(nx1):
98 | data_stacked[i, :] = np.sum(data[begin[i]:end[i], :], axis=0)
99 | if average:
100 | data_stacked /= N
101 | return data_stacked
102 |
103 |
104 | def cosine_taper(data, p=0.1, side='both'):
105 | """
106 | Taper using Tukey window.
107 |
108 | :param data: numpy.ndarray. Data to taper.
109 | :param p: float or sequence of floats. Each float means decimal percentage
110 | of Tukey taper for corresponding dimension (ranging from 0 to 1).
111 | Default is 0.1 which tapers 5% from the beginning and 5% from the end.
112 | If only one float is given, it only do for time dimension.
113 | :param side: str. 'both', 'left', or 'right'.
114 | :return: Tapered data.
115 | """
116 | if data.ndim == 1:
117 | data = data.reshape(1, -1)
118 | nch, nt = data.shape
119 | if not isinstance(p, (tuple, list, np.ndarray)):
120 | win = tukey(nt, p)
121 | if side == 'left':
122 | win[round(nch/2):] = 1
123 | elif side == 'right':
124 | win[:round(len(win)/2)] = 1
125 | return data * np.tile(win, (nch, 1))
126 | else:
127 | if p[0] > 0:
128 | data = data * np.tile(tukey(nch, p[0]), (nt, 1)).T
129 | return cosine_taper(data, p[1], side=side)
130 |
131 |
132 | def downsampling(data, xint=None, tint=None, stack=True, lowpass_filter=True):
133 | """
134 | Downsample DAS data.
135 |
136 | :param data: numpy.ndarray. Data to downsample can be 1-D or 2-D.
137 | :param xint: int. Spatial downsampling factor.
138 | :param tint: int. Time downsampling factor.
139 | :param lowpass_filter: bool. Lowpass cheby2 filter before time downsampling
140 | or not.
141 | :return: Downsampled data.
142 | """
143 | data_ds = data.copy()
144 | if xint and xint > 1:
145 | if stack:
146 | data_ds = stacking(data, xint)
147 | else:
148 | data_ds = data_ds[::xint].copy()
149 | if tint and tint > 1:
150 | if lowpass_filter:
151 | data_ds = lowpass_cheby_2(data_ds, 1, 1 / 2 / tint)
152 | if len(data_ds.shape) == 1:
153 | data_ds = data_ds[::tint].copy()
154 | else:
155 | data_ds = data_ds[:, ::tint].copy()
156 | return data_ds
157 |
158 |
159 | def trimming(data, dx=None, fs=None, xmin=0, xmax=None, tmin=0, tmax=None,
160 | mode=0):
161 | """
162 | Cut data to given start and end distance/channel or time/sampling points.
163 |
164 | :param data: numpy.ndarray. Data to trim can be 1-D or 2-D.
165 | :param dx: Channel interval in m.
166 | :param fs: Sampling rate in Hz.
167 | :param xmin, xmax, tmin, tmax: Boundary for trimming.
168 | :param mode: 0 means the unit of boundary is channel number and sampling
169 | points; 1 means the unit of boundary is meters and seconds.
170 | :return: Trimmed data.
171 | """
172 | nch, nt = data.shape
173 | if mode == 0:
174 | if xmax is None:
175 | xmax = nch
176 | if tmax is None:
177 | tmax = nt
178 | elif mode == 1:
179 | xmin = round(xmin / dx)
180 | xmax = (round(xmax / dx), nch)[xmax is None]
181 | tmin = round(tmin * fs)
182 | tmax = (round(tmax * fs), nt)[tmax is None]
183 |
184 | return data[xmin:xmax, tmin:tmax].copy()
185 |
186 |
187 | def padding(data, dn, reverse=False):
188 | """
189 | Pad DAS data with 0.
190 |
191 | :param data: numpy.ndarray. 2D DAS data to pad.
192 | :param dn: int or sequence of ints. Number of points to pad for both
193 | dimensions.
194 | :param reverse: bool. Set True to reverse the operation.
195 | :return: Padded data.
196 | """
197 | nch, nt = data.shape
198 | if isinstance(dn, int):
199 | dn = (dn, dn)
200 |
201 | pad = (dn[0] // 2, dn[0] - dn[0] // 2, dn[1] // 2, dn[1] - dn[1] // 2)
202 | if reverse:
203 | return data[pad[0]:nch - pad[1], pad[2]:nt - pad[3]]
204 | else:
205 | data_pd = np.zeros((nch + dn[0], nt + dn[1]))
206 | data_pd[pad[0]:nch + pad[0], pad[2]:nt + pad[2]] = data
207 | return data_pd
208 |
209 |
210 | def time_integration(data, fs, c=0):
211 | """
212 | Integrate DAS data in time.
213 |
214 | :param data: numpy.ndarray. 2D DAS data.
215 | :param fs: Sampling rate in Hz.
216 | :param c: float. A constant added to the result.
217 | :return: Integrated data.
218 | """
219 | return np.cumsum(data, axis=1) / fs + c
220 |
221 |
222 | def time_differential(data, fs, prepend=0):
223 | """
224 | Differentiate DAS data in time.
225 |
226 | :param data: numpy.ndarray. 2D DAS data.
227 | :param fs: Sampling rate in Hz.
228 | :param prepend: 'mean' or values to prepend to `data` along axis prior to
229 | performing the difference.
230 | :return: Differentiated data.
231 | """
232 | if prepend == 'mean':
233 | prepend = np.mean(data, axis=1).reshape((-1, 1))
234 | return np.diff(data, axis=1, prepend=prepend) * fs
235 |
236 |
237 | def distance_integration(data, dx, c=0):
238 | """
239 | Integrate DAS data in distance.
240 |
241 | :param data: numpy.ndarray. 2D DAS data.
242 | :param dx: Channel interval in m.
243 | :param c: float. A constant added to the result.
244 | :return: Integrated data.
245 | """
246 | return np.cumsum(data, axis=1) * dx + c
--------------------------------------------------------------------------------
/daspy/basic_tools/visualization.py:
--------------------------------------------------------------------------------
1 | # Purpose: Plot data
2 | # Author: Minzhe Hu
3 | # Date: 2025.5.20
4 | # Email: hmz2018@mail.ustc.edu.cn
5 | import numpy as np
6 | import matplotlib.pyplot as plt
7 | from collections.abc import Sequence
8 |
9 |
10 | def plot(data: np.ndarray, dx=None, fs=None, ax=None, obj='waveform', dpi=300,
11 | title=None, transpose=False, t0=0, x0=0, pick=None, f=None, k=None,
12 | t=None, c=None, cmap=None, vmin=None, vmin_per=None, vmax=None,
13 | vmax_per=None, dB=False, xmode='distance', tmode='time', xlim=None,
14 | ylim=None, xlog=False, ylog=False, xinv=False, yinv=False, xlabel=True,
15 | ylabel=True, xticklabels=True, yticklabels=True, colorbar=True,
16 | colorbar_label=None, savefig=None):
17 | """
18 | Plot several types of 2-D seismological data.
19 |
20 | :param data: numpy.ndarray. Data to plot.
21 | :param dx: Channel interval in m.
22 | :param fs: Sampling rate in Hz.
23 | :param ax: Matplotlib.axes.Axes or tuple. Axes to plot. A tuple for new
24 | figsize. If not specified, the function will directly display the image
25 | using matplotlib.pyplot.show().
26 | :param obj: str. Type of data to plot. It should be one of 'waveform',
27 | 'phasepick', 'spectrum', 'spectrogram', 'fk', or 'dispersion'.
28 | :param dpi: int. The resolution of the figure in dots-per-inch.
29 | :param title: str. The title of this axes.
30 | :param transpose: bool. Transpose the figure or not.
31 | :param t0, x0: The beginning of time and space.
32 | :param pick: dictionary of sequence of picked phases. Key should be 'P' for
33 | P phase, 'S' for S phase and 'N' for unknown phase type. Required if
34 | obj=='phasepick'.
35 | :param f: Sequence of frequency. Required if obj is one of 'spectrum',
36 | 'spectrogram', 'fk' or 'dispersion'.
37 | :param k: Wavenumber sequence. Required if obj=='fk'.
38 | :param t: Time sequence. Required if obj=='spectrogram'.
39 | :param c: Phase velocity sequence. Required if obj=='dispersion'.
40 | :param cmap: str or Colormap. The Colormap instance or registered colormap
41 | name used to map scalar data to colors.
42 | :param vmin, vmax: Define the data range that the colormap covers.
43 | :param vmin_per, vmax_per: float. Define the data range that the colormap
44 | covers by percentile.
45 | :param dB: bool. Transfer data unit to dB and take 1 as the reference value.
46 | :param xmode: str. 'distance' or 'channel'.
47 | :param tmode: str. 'time' or 'sampling'.
48 | :param xlim, ylim: Set the x-axis and y-axis view limits.
49 | :param xlog, ylog: bool. If True, set the x-axis' or y-axis' scale as log.
50 | :param xlabel, yinv: bool. If True, invert x-axis or y-axis.
51 | :param xlabel, ylabel: bool or str. Whether to plot a label or what label to
52 | plot for x-axis or y-axis.
53 | :param xticklabels, yticklabels: bool or sequence of str. Whether to plot
54 | ticklabels or what ticklabels to plot for x-axis or y-axis.
55 | :param colorbar: bool, str or Matplotlib.axes.Axes. Bool means plot colorbar
56 | or not. Str means the location of colorbar. Axes means the Axes into
57 | which the colorbar will be drawn.
58 | :param savefig: str or bool. Figure name to save if needed. If True,
59 | it will be set to parameter obj.
60 | """
61 | nch, nt = data.shape
62 | if ax is None:
63 | ax = (6, 5)
64 | if isinstance(ax, tuple):
65 | fig, ax = plt.subplots(1, figsize=ax, dpi=dpi)
66 | show = True
67 | else:
68 | show = False
69 |
70 | if obj in ['waveform', 'phasepick']:
71 | cmap = 'RdBu' if cmap is None else cmap
72 | if vmax is None:
73 | vmax_per = 80 if vmax_per is None else vmax_per
74 | vmax = np.percentile(data, vmax_per)
75 | vmin = -vmax if vmin is None else vmin
76 | origin = 'upper'
77 | if fs is None or tmode == 'sampling':
78 | ylabel_default = 'Sampling points'
79 | fs = 1
80 | elif tmode == 'time':
81 | ylabel_default = 'Time (s)'
82 |
83 | if dx is None or xmode.lower() == 'channel':
84 | xlabel_default = 'Channel'
85 | extent = [x0, x0 + nch, t0 + nt / fs, t0]
86 | elif xmode.lower() == 'distance':
87 | xlabel_default = 'Disitance (km)'
88 | extent = [x0 * 1e-3, (x0 + nch * dx) * 1e-3, t0 + nt / fs, t0]
89 |
90 | if obj == 'phasepick' and len(pick):
91 | pick_color = {'P': 'r', 'S': 'b', 'N': 'k'}
92 | for phase, pck in pick.items():
93 | if len(pck):
94 | pck = np.array(pck).astype(float)
95 | if xmode.lower() == 'distance':
96 | pck[:, 0] = (x0 + pck[:, 0] * dx) * 1e-3
97 | elif xmode.lower() == 'channel':
98 | pck[:, 0] = x0 + pck[:, 0]
99 | if tmode.lower() == 'sampling':
100 | pck[:, 1] = pck[:, 1] / fs
101 | ax.scatter(pck[:,0], t0 + pck[:,1], marker=',', s=0.1,
102 | c=pick_color[phase])
103 |
104 | elif obj in ['spectrum', 'spectrogram', 'fk', 'dispersion']:
105 | if np.iscomplex(data).any():
106 | data = abs(data)
107 | if dB:
108 | data = 20 * np.log10(data)
109 | cmap = 'jet' if cmap is None else cmap
110 |
111 | if vmax is None:
112 | vmax_per = 80 if vmax_per is None else vmax_per
113 | vmax = np.percentile(data, vmax_per)
114 | if vmin is None:
115 | vmin_per = 20 if vmin_per is None else vmin_per
116 | vmin = np.percentile(data, vmin_per)
117 |
118 | if obj == 'spectrum':
119 | origin = 'lower'
120 | if dx is None or xmode.lower() == 'channel':
121 | xlabel_default = 'Channel'
122 | extent = [x0, x0 + nch, min(f), max(f)]
123 | elif xmode.lower() == 'distance':
124 | xlabel_default = 'Disitance (km)'
125 | extent = [x0 * 1e-3, (x0 + nch * dx) * 1e-3, min(f), max(f)]
126 | ylabel_default = 'Frequency (Hz)'
127 | elif obj == 'spectrogram':
128 | data = data.T
129 | origin = 'lower'
130 | xlabel_default = 'Time (s)'
131 | ylabel_default = 'Frequency (Hz)'
132 | extent = [t0 + min(t), t0 + max(t), min(f), max(f)]
133 | elif obj == 'fk':
134 | origin = 'lower'
135 | xlabel_default = 'Wavenumber (m$^{-1}$)'
136 | ylabel_default = 'Frequency (Hz)'
137 | extent = [min(k), max(k), min(f), max(f)]
138 | elif obj == 'dispersion':
139 | data = data.T
140 | origin = 'lower'
141 | xlabel_default = 'Frequency (Hz)'
142 | ylabel_default = 'Velocity (m/s)'
143 | extent = [min(f), max(f), min(c), max(c)]
144 |
145 | if transpose:
146 | if origin == 'lower':
147 | extent = [extent[2], extent[3], extent[0], extent[1]]
148 | else:
149 | origin = 'lower'
150 | extent = [extent[3], extent[2], extent[0], extent[1]]
151 | (xlabel_default, ylabel_default) = (ylabel_default, xlabel_default)
152 | data = data.T
153 |
154 | xlabel = xlabel if isinstance(xlabel, str) else \
155 | xlabel_default if xlabel else None
156 | ylabel = ylabel if isinstance(ylabel, str) else \
157 | ylabel_default if ylabel else None
158 |
159 | bar = ax.imshow(data.T, vmin=vmin, vmax=vmax, extent=extent, aspect='auto',
160 | origin=origin, cmap=cmap)
161 | if title:
162 | ax.set_title(title)
163 | ax.set_xlabel(xlabel)
164 | ax.set_ylabel(ylabel)
165 | if isinstance(xticklabels, Sequence):
166 | ax.set_xticklabels(xticklabels)
167 | elif not xticklabels:
168 | ax.set_xticklabels([])
169 |
170 | if isinstance(yticklabels, Sequence):
171 | ax.set_yticklabels(yticklabels)
172 | elif not yticklabels:
173 | ax.set_yticklabels([])
174 | if xinv:
175 | ax.invert_xaxis()
176 | if yinv:
177 | ax.invert_yaxis()
178 | if ylim:
179 | ax.set_ylim(ylim)
180 | if xlim:
181 | ax.set_xlim(xlim)
182 | if xlog:
183 | ax.set_xscale('log')
184 | if ylog:
185 | ax.set_yscale('log')
186 | if colorbar:
187 | if colorbar is True:
188 | cbar = plt.colorbar(bar, ax=ax, location='right')
189 | elif isinstance(colorbar, str):
190 | cbar = plt.colorbar(bar, ax=ax, location=colorbar)
191 | else:
192 | cbar = plt.colorbar(bar, cax=colorbar)
193 | if colorbar_label is not None:
194 | cbar.set_label(colorbar_label)
195 |
196 | if savefig:
197 | if not isinstance(savefig, str):
198 | savefig = obj + '.png'
199 | plt.tight_layout()
200 | plt.savefig(savefig)
201 | plt.close()
202 | elif show:
203 | plt.show()
204 | else:
205 | return ax
206 |
--------------------------------------------------------------------------------
/daspy/core/__init__.py:
--------------------------------------------------------------------------------
1 | from daspy.core.section import Section
2 | from daspy.core.collection import Collection
3 | from daspy.core.read import read
4 | from daspy.core.dasdatetime import DASDateTime, local_tz, utc
--------------------------------------------------------------------------------
/daspy/core/collection.py:
--------------------------------------------------------------------------------
1 | # Purpose: Module for handling Collection objects.
2 | # Author: Minzhe Hu
3 | # Date: 2025.6.4
4 | # Email: hmz2018@mail.ustc.edu.cn
5 | import os
6 | import warnings
7 | import pickle
8 | import numpy as np
9 | from copy import deepcopy
10 | from tqdm import tqdm
11 | from glob import glob
12 | from datetime import datetime
13 | from daspy.core.read import read
14 | from daspy.core.dasdatetime import DASDateTime
15 |
16 |
17 | class Collection(object):
18 | def __init__(self, fpath, ftype=None, flength=None, meta_from_file=True,
19 | timeinfo_slice=slice(None), timeinfo_format=None,
20 | timeinfo_tz=None, timeinfo_from_basename=True, **kwargs):
21 | """
22 | :param fpath: str or Sequence of str. File path(s) containing data.
23 | :param ftype: None or str. None for automatic detection, or 'pkl',
24 | 'pickle', 'tdms', 'h5', 'hdf5', 'segy', 'sgy', 'npy'.
25 | :param flength: float. The duration of a single file in senconds.
26 | :param meta_from_file: bool or 'all'. False for manually set dt, dx, fs
27 | and gauge_length. True for extracting dt, dx, fs and gauge_length
28 | from first 2 file. 'all' for exracting and checking these metadata
29 | from all file.
30 | :param timeinfo_slice: slice. Slice for extracting start time from file
31 | name.
32 | :param timeinfo_format: str. Format for extracting start time from file
33 | name.
34 | :param timeinfo_tz: datetime.timezone. Time zone for extracting start
35 | time from file name.
36 | :param timeinfo_from_basename: bool. If True, timeinfo_format will use
37 | DASDateTime.strptime to basename of fpath.
38 | :param nch: int. Channel number.
39 | :param nt: int. Sampling points of each file.
40 | :param dx: number. Channel interval in m.
41 | :param fs: number. Sampling rate in Hz.
42 | :param gauge_length: number. Gauge length in m.
43 | """
44 | if isinstance(fpath, (list, tuple)):
45 | self.flist = []
46 | for fp in fpath:
47 | self.flist.extend(glob(fp))
48 | else:
49 | self.flist = glob(fpath)
50 | if not len(self.flist):
51 | raise ValueError('No file input.')
52 | self.flist.sort()
53 | self.ftype = ftype
54 | for key in ['nch', 'nt', 'dx', 'fs', 'gauge_length']:
55 | if key in kwargs.keys():
56 | setattr(self, key, kwargs[key])
57 | if timeinfo_format is None and not meta_from_file:
58 | meta_from_file = True
59 |
60 | if meta_from_file == 'all':
61 | ftime = []
62 | metadata_list = []
63 | for f in self.flist:
64 | sec = read(f, ftype=ftype, headonly=True)
65 | if not hasattr(sec, 'gauge_length'):
66 | sec.gauge_length = None
67 | ftime.append(sec.start_time)
68 | metadata_list.append((sec.nch, sec.nt, sec.dx, sec.fs,
69 | sec.gauge_length, sec.duration))
70 |
71 | if len(set(metadata_list)) > 1:
72 | warnings.warn('More than one kind of setting detected.')
73 | metadata = max(metadata_list, key=metadata_list.count)
74 | for i, key in enumerate(['nch', 'nt', 'dx', 'fs', 'gauge_length']):
75 | if not hasattr(self, key):
76 | setattr(self, key, metadata[i])
77 | if flength is None:
78 | flength = metadata[-1]
79 | self.ftime = ftime
80 | elif meta_from_file:
81 | i = int(len(self.flist) > 1)
82 | sec = read(self.flist[i], ftype=ftype, headonly=True)
83 | if timeinfo_format is None:
84 | if flength is None:
85 | flength = sec.duration
86 | self.ftime = [sec.start_time + (j - i) * flength for j in
87 | range(len(self))]
88 | if not hasattr(sec, 'gauge_length'):
89 | sec.gauge_length = None
90 | metadata = (sec.nch, sec.nt, sec.dx, sec.fs, sec.gauge_length)
91 | for i, key in enumerate(['nch', 'nt', 'dx', 'fs', 'gauge_length']):
92 | if not hasattr(self, key):
93 | setattr(self, key, metadata[i])
94 |
95 | if not hasattr(self, 'ftime'):
96 | if timeinfo_from_basename:
97 | flist_use = [os.path.basename(f) for f in self.flist]
98 | else:
99 | flist_use = self.flist
100 | if timeinfo_tz is None:
101 | self.ftime = [DASDateTime.strptime(f[timeinfo_slice],
102 | timeinfo_format) for f in flist_use]
103 | else:
104 | if '%z' in timeinfo_format.lower():
105 | self.ftime = [DASDateTime.strptime(f[timeinfo_slice],
106 | timeinfo_format).astimezone(timeinfo_tz) for f in
107 | flist_use]
108 | else:
109 | self.ftime = [DASDateTime.strptime(f[timeinfo_slice],
110 | timeinfo_format).replace(tzinfo=timeinfo_tz) for f in
111 | flist_use]
112 |
113 | self._sort()
114 | if flength is None:
115 | if len(self.flist) > 2:
116 | time_diff = np.round(np.diff(self.ftime[1:]).astype(float))
117 | flength_set, counts = np.unique(time_diff, return_counts=True)
118 | if len(flength_set) > 1:
119 | warnings.warn('File start times are unevenly spaced. Data '
120 | 'may not be continuous and self.flength may '
121 | 'be incorrectly detected.')
122 | flength = flength_set[counts.argmax()]
123 | elif len(self.flist) == 2:
124 | flength = self.ftime[1] - self.ftime[0]
125 | else:
126 | flength = read(self.flist[0], ftype=ftype,
127 | headonly=True).duration
128 | elif flength <= 0:
129 | raise ValueError('dt must > 0')
130 |
131 | self.flength = flength
132 |
133 | def __str__(self):
134 | if len(self) == 1:
135 | describe = f' flist: {self.flist}\n'
136 | elif len(self) <= 5:
137 | describe = f' flist: {len(self)} files\n' + \
138 | f' {self.flist}\n'
139 | else:
140 | describe = f' flist: {len(self)} files\n' + \
141 | f' [{self[0]},\n' + \
142 | f' {self[1]},\n' + \
143 | f' ...,\n' + \
144 | f' {self[-1]}]\n'
145 |
146 | describe += f' ftime: {self.start_time} to {self.end_time}\n' + \
147 | f' flength: {self.flength}\n'
148 | for key in ['nch', 'nt', 'dx', 'fs', 'gauge_length']:
149 | if hasattr(self, key):
150 | long_key = key.rjust(12)
151 | value = getattr(self, key)
152 | describe += f'{long_key}: {value}\n'
153 |
154 | return describe
155 |
156 | __repr__ = __str__
157 |
158 | def __getitem__(self, i):
159 | return self.flist[i]
160 |
161 | def __len__(self):
162 | return len(self.flist)
163 |
164 | def _sort(self):
165 | sort = np.argsort(self.ftime)
166 | self.ftime = [self.ftime[i] for i in sort]
167 | self.flist = [self.flist[i] for i in sort]
168 | return self
169 |
170 | @property
171 | def start_time(self):
172 | return self.ftime[0]
173 |
174 | @property
175 | def end_time(self):
176 | return self.ftime[-1] + self.flength
177 |
178 | @property
179 | def duration(self):
180 | return self.end_time - self.start_time
181 |
182 | @property
183 | def file_size(self):
184 | return os.path.getsize(self[1])
185 |
186 | def copy(self):
187 | return deepcopy(self)
188 |
189 | def file_interruption(self, tolerance=0.5):
190 | time_diff = np.diff(self.ftime)
191 | return np.where(abs(time_diff - self.flength) > tolerance)[0]
192 |
193 | def select(self, start=0, end=None, readsec=False, **kwargs):
194 | """
195 | Select a period of data.
196 |
197 | :param stime, etime: DASDateTime or int. Start and end time or index of
198 | required data.
199 | :param readsec: bool. If True, read as a instance of daspy.Section and
200 | return. If False, update self.flist.
201 | :param ch1: int. The first channel required. Only works when
202 | readsec=True.
203 | :param ch2: int. The last channel required (not included). Only works
204 | when readsec=True.
205 | :param dch: int. Channel step. Only works when readsec=True.
206 | """
207 | if end is None:
208 | end = len(self.flist)
209 | if 'stime' in kwargs.keys():
210 | start = kwargs.pop('stime')
211 | warnings.warn('In future versions, the parameter \'stime\' will be '
212 | 'replaced by \'start\'.')
213 | if 'etime' in kwargs.keys():
214 | end = kwargs.pop('etime')
215 | warnings.warn('In future versions, the parameter \'etime\' will be '
216 | 'replaced by \'end\'.')
217 |
218 | if isinstance(start, datetime):
219 | for i, ftime in enumerate(self.ftime):
220 | if ftime > start:
221 | s = i - 1
222 | break
223 | elif ftime == start:
224 | s = i
225 | break
226 | elif isinstance(start, int):
227 | s = start
228 |
229 | if isinstance(end, datetime):
230 | for i, ftime in enumerate(self.ftime[s:]):
231 | if ftime == end:
232 | e = s + i - 1
233 | break
234 | elif ftime > end:
235 | e = s + i
236 | break
237 | elif isinstance(start, int):
238 | e = end
239 |
240 | flist = self.flist[s:e]
241 | if len(flist) == 0:
242 | warnings.warn('No valid data was selected.')
243 | return None
244 |
245 | if readsec:
246 | sec = read(flist[0], **kwargs)
247 | for f in flist[1:]:
248 | sec += read(f, **kwargs)
249 | sec.trimming(tmin=start if isinstance(start, datetime) else None,
250 | tmax=end if isinstance(end, datetime) else None)
251 | return sec
252 | else:
253 | self.flist = flist
254 | self.ftime = self.ftime[s:e]
255 | return self
256 |
257 | def _optimize_for_continuity(self, operations):
258 | method_list = []
259 | kwargs_list = []
260 | if not isinstance(operations[0], (list, tuple)):
261 | operations = [operations]
262 | for opera in operations:
263 | method, kwargs = opera
264 | if method == 'downsampling':
265 | if ('lowpass_filter' in kwargs.keys() and not\
266 | kwargs['lowpass_filter']) or 'tint' not in kwargs.keys():
267 | method_list.append('downsampling')
268 | kwargs_list.append(kwargs)
269 | else:
270 | method_list.extend(['lowpass_cheby_2', 'downsampling'])
271 | kwargs['lowpass_filter'] = False
272 | kwargs0 = dict(freq=self.fs/2/kwargs['tint'], zi=0)
273 | kwargs_list.extend([kwargs0, kwargs])
274 | else:
275 | if method in ['taper', 'cosine_taper']:
276 | kwargs.setdefault('side', 'both')
277 | elif method in ['bandpass', 'bandstop', 'lowpass', 'highpass',
278 | 'lowpass_cheby_2']:
279 | kwargs.setdefault('zi', 0)
280 |
281 | method_list.append(method)
282 | kwargs_list.append(kwargs)
283 | return method_list, kwargs_list
284 |
285 | def _kwargs_initialization(self, method_list, kwargs_list):
286 | for j, method in enumerate(method_list):
287 | if method == 'time_integration':
288 | kwargs_list[j]['c'] = 0
289 | elif method == 'time_differential':
290 | kwargs_list[j]['prepend'] = 0
291 | elif method in ['bandpass', 'bandstop', 'lowpass',
292 | 'highpass', 'lowpass_cheby_2']:
293 | kwargs_list[j]['zi'] = 0
294 |
295 | def process(self, operations, savepath='./processed', merge=1,
296 | suffix='_pro', ftype=None, dtype=None, save_operations=False,
297 | tolerance=0.5, **read_kwargs):
298 | """
299 | :param operations: list or None. Each element of operations list
300 | should be [str of method name, dict of kwargs]. None for read
301 | files related to operations in savepath.
302 | :param savepath: str. Path to save processed files.
303 | :param merge: int or str. int for merge several processed files into 1.
304 | 'all' for merge all files.
305 | :param suffix: str. Suffix for processed files.
306 | :param ftype: None or str. File format for saving. None for automatic
307 | detection, or 'pkl', 'pickle', 'tdms', 'h5', 'hdf5', 'segy', 'sgy',
308 | 'npy'.
309 | :param dtype: str. The data type of the saved data.
310 | :parma save_operations: bool. If True, save the operations to
311 | method_list.pkl and kwargs_list.pkl in savepath.
312 | :param tolerance: float. Tolerance for checking continuity of data.
313 | :param read_kwargs: dict. Paramters for read function.
314 | """
315 | if not os.path.exists(savepath):
316 | os.makedirs(savepath)
317 | method_file = os.path.join(savepath, 'method_list.pkl')
318 | kwargs_file = os.path.join(savepath, 'kwargs_list.pkl')
319 | if operations is None:
320 | if (not os.path.exists(method_file)) or \
321 | (not os.path.exists(kwargs_file)):
322 | raise ValueError('No operations input and no method_list.pkl '
323 | 'and kwargs_list.pkl found in savepath.')
324 | with open(os.path.join(savepath, 'method_list.pkl'), 'wb') as f:
325 | method_list = pickle.load(f)
326 | with open(os.path.join(savepath, 'kwargs_list.pkl'), 'wb') as f:
327 | kwargs_list = pickle.load(f)
328 | else:
329 | method_list, kwargs_list = self._optimize_for_continuity(operations)
330 | if merge == 'all' or merge > len(self):
331 | merge = len(self)
332 | m = 0
333 | try:
334 | for i in tqdm(range(len(self))):
335 | f = self[i]
336 | if os.path.getsize(f) == 0:
337 | warnings.warn(f'{f} is an empty file. Continuous data is '
338 | 'interrupted here.')
339 | if m > 0:
340 | sec_merge.save(filepath, dtype=dtype)
341 | m = 0
342 | self._kwargs_initialization(method_list, kwargs_list)
343 | continue
344 | try:
345 | sec = read(f, ftype=self.ftype, **read_kwargs)
346 | if sec.data.size == 0:
347 | if m > 0:
348 | sec_merge.save(filepath, dtype=dtype)
349 | m = 0
350 | self._kwargs_initialization(method_list, kwargs_list)
351 | continue
352 | except Exception as e:
353 | warnings.warn(f'Error reading {f}: {e}. Continuous data is '
354 | 'interrupted here.')
355 | if m > 0:
356 | sec_merge.save(filepath, dtype=dtype)
357 | m = 0
358 | self._kwargs_initialization(method_list, kwargs_list)
359 | continue
360 | for j, method in enumerate(method_list):
361 | if method in ['taper', 'cosine_taper']:
362 | if not ((i==0 and kwargs_list[j]['side'] != 'right') or
363 | (i == len(self) - 1 and kwargs_list[j]['side'] !=
364 | 'left')):
365 | continue
366 | out = getattr(sec, method)(**kwargs_list[j])
367 | if method == 'time_integration':
368 | kwargs_list[j]['c'] = sec.data[:, -1].copy()
369 | elif method == 'time_differential':
370 | kwargs_list[j]['prepend'] = sec.data[:, -1].copy()
371 | elif method in ['bandpass', 'bandstop', 'lowpass', 'highpass',
372 | 'lowpass_cheby_2']:
373 | kwargs_list[j]['zi'] = out
374 |
375 | if m == 0:
376 | sec_merge = sec
377 | f0, f1 = os.path.splitext(os.path.basename(f))
378 | f1 = f1 if ftype is None else ftype
379 | filepath = os.path.join(savepath, f0+suffix+f1)
380 | elif abs(sec_merge.end_time - sec.start_time) <= tolerance:
381 | sec_merge += sec
382 | else:
383 | warnings.warn(f'The start time of {f} does not correspond '
384 | 'to the end time of the previous file. '
385 | 'Continuous data is interrupted here.')
386 | sec_merge.save(filepath, dtype=dtype)
387 | sec_merge = sec
388 | f0, f1 = os.path.splitext(os.path.basename(f))
389 | f1 = f1 if ftype is None else ftype
390 | filepath = os.path.join(savepath, f0+suffix+f1)
391 | m = 0
392 | m += 1
393 | if m == merge:
394 | sec_merge.save(filepath, dtype=dtype)
395 | m = 0
396 | if m > 0:
397 | sec_merge.save(filepath, dtype=dtype)
398 | except KeyboardInterrupt as e:
399 | with open(method_file, 'wb') as f:
400 | pickle.dump(method_list, f)
401 | with open(kwargs_file, 'wb') as f:
402 | pickle.dump(kwargs_list, f)
403 | print(f'Process interrupted. Saving method_list and kwargs_list.')
404 | raise e
405 | else:
406 | if save_operations:
407 | with open(method_file, 'wb') as f:
408 | pickle.dump(method_list, f)
409 | with open(kwargs_file, 'wb') as f:
410 | pickle.dump(kwargs_list, f)
411 | print(f'Operations saved to {method_file} and {kwargs_file}.')
412 | else:
413 | if os.path.exists(method_file):
414 | os.remove(method_file)
415 | if os.path.exists(kwargs_file):
416 | os.remove(kwargs_file)
417 |
418 |
419 | # Dynamically add methods for cascade_methods
420 | def _create_cascade_method(method_name):
421 | def cascade_method(self, savepath='./processed', merge=1,
422 | suffix=f'_{method_name}', ftype=None, dtype=None,
423 | save_operations=False, **kwargs):
424 | """
425 | Automatically generated method for {method_name}.
426 | Applies the {method_name} operation to the data and saves the result.
427 |
428 | :param savepath: str. Path to save processed files.
429 | :param merge: int or str. int for merge several processed files into 1.
430 | 'all' for merge all files.
431 | :param suffix: str. Suffix for processed files.
432 | :param ftype: None or str. None for automatic detection, or 'pkl',
433 | 'pickle', 'tdms', 'h5', 'hdf5', 'segy', 'sgy', 'npy'.
434 | :param dtype: str. The data type of the saved data.
435 | :parma save_operations: bool. If True, save the operations to
436 | method_list.pkl and kwargs_list.pkl in savepath.
437 | :param kwargs: dict. Parameters for the {method_name} operation.
438 | """
439 | operations = [[method_name, kwargs]]
440 | self.process(operations, savepath=savepath, merge=merge, suffix=suffix,
441 | ftype=ftype, dtype=dtype, save_operations=save_operations)
442 | return cascade_method
443 |
444 |
445 | for method in ['time_integration', 'time_differential', 'downsampling',
446 | 'bandpass', 'bandstop', 'lowpass', 'highpass',
447 | 'lowpass_cheby_2']:
448 | setattr(Collection, method, _create_cascade_method(method))
--------------------------------------------------------------------------------
/daspy/core/dasdatetime.py:
--------------------------------------------------------------------------------
1 | # Purpose: Module for handling DASDateTime objects.
2 | # Author: Minzhe Hu
3 | # Date: 2025.3.29
4 | # Email: hmz2018@mail.ustc.edu.cn
5 | import time
6 | from typing import Iterable
7 | from datetime import datetime, timedelta, timezone
8 |
9 |
10 | utc = timezone.utc
11 | local_tz = timezone(timedelta(seconds=-time.altzone))
12 |
13 |
14 | class DASDateTime(datetime):
15 | def __add__(self, other):
16 | if isinstance(other, Iterable):
17 | out = []
18 | for t in other:
19 | out.append(self + t)
20 | return out
21 | elif not isinstance(other, timedelta):
22 | other = timedelta(seconds=float(other))
23 | return super().__add__(other)
24 |
25 | def __sub__(self, other):
26 | if isinstance(other, Iterable):
27 | out = []
28 | for t in other:
29 | out.append(self - t)
30 | return out
31 | elif isinstance(other, datetime):
32 | return datetime.__sub__(*self._unify_tz(other)).total_seconds()
33 | elif not isinstance(other, timedelta):
34 | other = timedelta(seconds=other)
35 | return super().__sub__(other)
36 |
37 | def __le__(self, other):
38 | return datetime.__le__(*self._unify_tz(other))
39 |
40 | def __lt__(self, other):
41 | return datetime.__lt__(*self._unify_tz(other))
42 |
43 | def __ge__(self, other):
44 | return datetime.__ge__(*self._unify_tz(other))
45 |
46 | def __gt__(self, other):
47 | return datetime.__gt__(*self._unify_tz(other))
48 |
49 | def _unify_tz(self, other: datetime):
50 | if self.tzinfo and (not other.tzinfo):
51 | return self, other.replace(tzinfo=self.tzinfo)
52 | elif (not self.tzinfo) and other.tzinfo:
53 | return self.replace(tzinfo=other.tzinfo), other
54 | return self, other
55 |
56 | def local(self):
57 | return self.astimezone(tz=local_tz)
58 |
59 | def utc(self):
60 | return self.astimezone(tz=utc)
61 |
62 | def remove_tz(self):
63 | return self.replace(tzinfo=None)
64 |
65 | @classmethod
66 | def from_datetime(cls, dt: datetime):
67 | return cls.fromtimestamp(dt.timestamp(), tz=dt.tzinfo)
68 |
69 | @classmethod
70 | def from_obspy_UTCDateTime(cls, dt):
71 | return cls.from_datetime(dt.datetime)
72 |
73 | def to_datetime(self):
74 | return datetime.fromtimestamp(self.timestamp(), tz=self.tzinfo)
75 |
76 | def to_obspy_UTCDateTime(self):
77 | from obspy import UTCDateTime
78 | return UTCDateTime(UTCDateTime(self.to_datetime()))
79 |
80 | @classmethod
81 | def strptime(cls, date_string, format):
82 | """
83 | string, format -> new datetime parsed from a string
84 | (like time.strptime()).
85 | """
86 | from _strptime import _strptime
87 | tt, fraction, gmtoff_fraction = _strptime(date_string, format)
88 | tzname, gmtoff = tt[-2:]
89 | args = tt[:6] + (fraction,)
90 | if gmtoff is not None:
91 | tzdelta = timedelta(seconds=gmtoff, microseconds=gmtoff_fraction)
92 | if tzname:
93 | tz = timezone(tzdelta, tzname)
94 | else:
95 | tz = timezone(tzdelta)
96 | args += (tz,)
97 | elif tt[-3] == 0:
98 | args += (utc,)
99 |
100 | return cls(*args)
--------------------------------------------------------------------------------
/daspy/core/example.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HMZ-03/DASPy/9f308c93d7ad8f4e572705827b03c5d0fec3eac2/daspy/core/example.pkl
--------------------------------------------------------------------------------
/daspy/core/read.py:
--------------------------------------------------------------------------------
1 | # Purpose: Module for reading DAS data.
2 | # Author: Minzhe Hu
3 | # Date: 2025.5.21
4 | # Email: hmz2018@mail.ustc.edu.cn
5 | # Partially modified from
6 | # https://github.com/RobbinLuo/das-toolkit/blob/main/DasTools/DasPrep.py
7 | import warnings
8 | import json
9 | import pickle
10 | import numpy as np
11 | import h5py
12 | import segyio
13 | from typing import Union
14 | from pathlib import Path
15 | from nptdms import TdmsFile
16 | from daspy.core.section import Section
17 | from daspy.core.dasdatetime import DASDateTime, utc
18 |
19 |
20 | def read(fname=None, output_type='section', ftype=None, headonly=False,
21 | dtype=None, **kwargs) -> Union[Section, tuple]:
22 | """
23 | Read a .pkl/.pickle, .tdms, .h5/.hdf5, .segy/.sgy file.
24 |
25 | :param fname: str or pathlib.PosixPath. Path of DAS data file.
26 | :param output_type: str. 'Section' means return an instance of
27 | daspy.Section, 'array' means return numpy.array for data and a
28 | dictionary for metadata.
29 | :param ftype: None, str or function. None for automatic detection, or str to
30 | specify a type of 'pkl', 'pickle', 'tdms', 'h5', 'hdf5', 'segy', 'sgy',
31 | or 'npy', or a function for read data and metadata.
32 | :param headonly. bool. If True, only metadata will be read, the returned
33 | data will be an array of all zeros of the same size as the original
34 | data.
35 | :param ch1: int. The first channel required.
36 | :param ch2: int. The last channel required (not included).
37 | :param dch: int. Channel step.
38 | :param dtype: str. The data type of the returned data.
39 | :return: An instance of daspy.Section, or numpy.array for data and a
40 | dictionary for metadata.
41 | """
42 | fun_map = {'pkl': _read_pkl, 'tdms': _read_tdms, 'h5': _read_h5,
43 | 'sgy': _read_segy, 'npy': _read_npy}
44 | if fname is None:
45 | fname = Path(__file__).parent / 'example.pkl'
46 | ftype = 'pkl'
47 | if ftype is None:
48 | ftype = str(fname).split('.')[-1].lower()
49 |
50 | if callable(ftype):
51 | try:
52 | data, metadata = ftype(fname, headonly=headonly, **kwargs)
53 | except TypeError:
54 | data, metadata = ftype(fname)
55 | else:
56 | for rtp in [('pickle', 'pkl'), ('hdf5', 'h5'), ('segy', 'sgy')]:
57 | ftype = ftype.replace(*rtp)
58 | data, metadata = fun_map[ftype](fname, headonly=headonly, **kwargs)
59 |
60 | if dtype is not None:
61 | data = data.astype(dtype)
62 | if output_type.lower() == 'section':
63 | metadata['source'] = Path(fname)
64 | metadata['source_type'] = ftype
65 | data[np.isnan(data)] = 0
66 | return Section(data, **metadata)
67 | elif output_type.lower() == 'array':
68 | return data, metadata
69 |
70 |
71 | def _read_pkl(fname, headonly=False, **kwargs):
72 | dch = kwargs.pop('dch', 1)
73 | with open(fname, 'rb') as f:
74 | pkl_data = pickle.load(f)
75 | if isinstance(pkl_data, np.ndarray):
76 | warnings.warn('This data format doesn\'t include channel interval'
77 | 'and sampling rate. Please set manually')
78 | if headonly:
79 | return np.zeros_like(pkl_data), {'dx': None, 'fs': None}
80 | else:
81 | ch1 = kwargs.pop('ch1', 0)
82 | ch2 = kwargs.pop('ch2', len(pkl_data))
83 | return pkl_data[ch1:ch2:dch], {'dx': None, 'fs': None}
84 | elif isinstance(pkl_data, dict):
85 | data = pkl_data.pop('data')
86 | if headonly:
87 | data = np.zeros_like(data)
88 | else:
89 | if 'ch1' in kwargs.keys() or 'ch2' in kwargs.keys():
90 | if 'start_channel' in pkl_data.keys():
91 | s_chn = pkl_data['start_channel']
92 | print(f'Data is start with channel {s_chn}.')
93 | else:
94 | s_chn = 0
95 | ch1 = kwargs.pop('ch1', s_chn)
96 | ch2 = kwargs.pop('ch2', s_chn + len(data))
97 | data = data[ch1 - s_chn:ch2 - s_chn, :]
98 | pkl_data['start_channel'] = ch1
99 | return data, pkl_data
100 | else:
101 | raise TypeError('Unknown data type.')
102 |
103 |
104 | def _read_h5_headers(group):
105 | headers = {}
106 | if len(group.attrs) != 0:
107 | headers['attrs'] = dict(group.attrs)
108 | if isinstance(group, h5py._hl.dataset.Dataset):
109 | return headers
110 | for key, value in group.items():
111 | try:
112 | gp_headers = _read_h5_headers(value)
113 | except AttributeError:
114 | headers[key] = value
115 | if len(gp_headers):
116 | headers[key] = gp_headers
117 |
118 | return headers
119 |
120 |
121 | def _read_h5_starttime(h5_file):
122 | try:
123 | stime = h5_file['Acquisition/Raw[0]/RawData'].attrs['PartStartTime']
124 | except KeyError:
125 | try:
126 | stime = h5_file['Acquisition'].attrs['MeasurementStartTime']
127 | except KeyError:
128 | try:
129 | stime = h5_file['Acquisition/Raw[0]/RawDataTime/'][0]
130 | except KeyError:
131 | return 0
132 | if isinstance(stime, bytes):
133 | stime = stime.decode('ascii')
134 |
135 | if isinstance(stime, str):
136 | if len(stime) > 26:
137 | stime = DASDateTime.strptime(stime, '%Y-%m-%dT%H:%M:%S.%f%z')
138 | else:
139 | stime = DASDateTime.strptime(stime, '%Y-%m-%dT%H:%M:%S.%f').\
140 | astimezone(utc)
141 | else:
142 | stime = DASDateTime.fromtimestamp(stime / 1e6).astimezone(utc)
143 |
144 | return stime
145 |
146 |
147 | def _read_h5(fname, headonly=False, **kwargs):
148 | with h5py.File(fname, 'r') as h5_file:
149 | dch = kwargs.pop('dch', 1)
150 | group = list(h5_file.keys())[0]
151 | if len(h5_file.keys()) >= 10: # ASN/OptoDAS https://github.com/ASN-Norway/simpleDAS
152 | ch1 = kwargs.pop('ch1', 0)
153 | if h5_file['header/dimensionNames'][0] == b'time':
154 | nch = h5_file['data'].shape[1]
155 | if headonly:
156 | data = np.zeros_like(h5_file['data']).T
157 | else:
158 | ch2 = kwargs.pop('ch2', nch)
159 | data = h5_file['data'][:, ch1:ch2:dch].T
160 | elif h5_file['header/dimensionNames'][0] == b'distance':
161 | nch = h5_file['data'].shape[1]
162 | if headonly:
163 | data = np.zeros_like(h5_file['data'])
164 | else:
165 | ch2 = kwargs.pop('ch2', nch)
166 | data = h5_file['data'][ch1:ch2:dch, :]
167 | dx = h5_file['header/dx'][()]
168 | start_time = DASDateTime.fromtimestamp(
169 | h5_file['header/time'][()]).utc()
170 | metadata = {'dx': dx * dch, 'fs': 1 / h5_file['header/dt'][()],
171 | 'start_time': start_time, 'start_channel': ch1,
172 | 'start_distance': ch1 * dx,
173 | 'scale': h5_file['header/dataScale'][()]}
174 | if h5_file['header/gaugeLength'][()] != np.nan:
175 | metadata['guage_length'] = h5_file['header/gaugeLength'][()]
176 | elif len(h5_file.keys()) == 5: # AP Sensing
177 | # read data
178 | nch = h5_file['strain'].shape[1]
179 | ch1 = kwargs.pop('ch1', 0)
180 | ch2 = kwargs.pop('ch2', nch)
181 | if headonly:
182 | data = np.zeros_like(h5_file['strain']).T
183 | else:
184 | data = h5_file['strain'][:, ch1:ch2:dch].T
185 |
186 | # read metadata
187 | dx = h5_file['spatialsampling'][()]
188 | metadata = {'fs': h5_file['RepetitionFrequency'][()],
189 | 'dx': dx * dch, 'start_channel': ch1,
190 | 'start_distance': ch1 * dx,
191 | 'gauge_length': h5_file.get('GaugeLength')[()]}
192 | elif len(h5_file.keys()) == 3: # OpataSense
193 | nch = h5_file['data'].shape[1]
194 | ch1 = kwargs.pop('ch1', 0)
195 | ch2 = kwargs.pop('ch2', nch)
196 | dch = kwargs.pop('dch', 1)
197 | if headonly:
198 | data = np.zeros_like(h5_file['data'])
199 | else:
200 | data = h5_file['data'][ch1:ch2:dch, :]
201 | dx = (h5_file['x_axis'][-1] - h5_file['x_axis'][0]) / \
202 | (len(h5_file['x_axis']) - 1)
203 | fs = (len(h5_file['t_axis']) - 1) / (h5_file['t_axis'][-1] -
204 | h5_file['t_axis'][0])
205 | metadata = {'dx': dx, 'fs': fs, 'start_channel': ch1,
206 | 'start_distance': h5_file['x_axis'][0] + dx * ch1,
207 | 'start_time': h5_file['t_axis'][0]}
208 | elif set(h5_file.keys()) == {'Mapping', 'Acquisition'}: # Silixa/iDAS
209 | nch = h5_file['Acquisition/Raw[0]'].attrs['NumberOfLoci']
210 | ch1 = kwargs.pop('ch1', 0)
211 | ch2 = kwargs.pop('ch2', nch)
212 | if h5_file['Acquisition/Raw[0]/RawData/'].shape[0] == nch:
213 | if headonly:
214 | data = np.zeros_like(h5_file['Acquisition/Raw[0]/RawData/'])
215 | else:
216 | data = h5_file['Acquisition/Raw[0]/RawData/']\
217 | [ch1:ch2:dch, :]
218 | else:
219 | if headonly:
220 | data = np.zeros_like(
221 | h5_file['Acquisition/Raw[0]/RawData/']).T
222 | else:
223 | data = h5_file['Acquisition/Raw[0]/RawData/']\
224 | [:, ch1:ch2:dch].T
225 |
226 | dx = np.mean(h5_file['Mapping/MeasuredSpatialResolution'])
227 | start_distance = h5_file['Acquisition/Custom/UserSettings'].\
228 | attrs['StartDistance'] + ch1 * dx
229 | h5_file['Acquisition/Raw[0]/RawData'].attrs['PartStartTime']
230 | fs = h5_file['Acquisition/Raw[0]'].attrs['OutputDataRate']
231 | gauge_length = h5_file['Acquisition'].attrs['GaugeLength']
232 | scale = h5_file['Acquisition/Raw[0]'].attrs['AmpScaling']
233 | geometry = np.vstack((h5_file['Mapping/Lon'],
234 | h5_file['Mapping/Lat'])).T
235 | metadata = {'dx': dx * dch, 'fs': fs, 'start_channel': ch1,
236 | 'start_distance': ch1 * dx,
237 | 'gauge_length': gauge_length, 'geometry': geometry,
238 | 'scale': scale}
239 | metadata['start_time'] = _read_h5_starttime(h5_file)
240 | elif group == 'Acquisition':
241 | # OptaSens/ODH, Silixa/iDAS, Sintela/Onyx, Smart Sensing/ZD DAS
242 | # read data
243 | try:
244 | nch = h5_file['Acquisition'].attrs['NumberOfLoci']
245 | except KeyError:
246 | nch = len(h5_file['Acquisition/Raw[0]/RawData/'])
247 | ch1 = kwargs.pop('ch1', 0)
248 | ch2 = kwargs.pop('ch2', nch)
249 | if h5_file['Acquisition/Raw[0]/RawData/'].shape[0] == nch:
250 | if headonly:
251 | data = np.zeros_like(h5_file['Acquisition/Raw[0]/RawData/'])
252 | else:
253 | data = h5_file['Acquisition/Raw[0]/RawData/']\
254 | [ch1:ch2:dch, :]
255 | else:
256 | if headonly:
257 | data = np.zeros_like(
258 | h5_file['Acquisition/Raw[0]/RawData/']).T
259 | else:
260 | data = h5_file['Acquisition/Raw[0]/RawData/']\
261 | [:, ch1:ch2:dch].T
262 |
263 | # read metadata
264 | try:
265 | fs = h5_file['Acquisition/Raw[0]'].attrs['OutputDataRate']
266 | except KeyError:
267 | time_arr = h5_file['Acquisition/Raw[0]/RawDataTime/']
268 | fs = 1 / (np.diff(time_arr).mean() / 1e6)
269 |
270 | dx = h5_file['Acquisition'].attrs['SpatialSamplingInterval']
271 | gauge_length = h5_file['Acquisition'].attrs['GaugeLength']
272 | metadata = {'dx': dx * dch, 'fs': fs, 'start_channel': ch1,
273 | 'start_distance': ch1 * dx,
274 | 'gauge_length': gauge_length}
275 |
276 | metadata['start_time'] = _read_h5_starttime(h5_file)
277 | elif group == 'raw':
278 | nch = len(h5_file['raw'])
279 | ch1 = kwargs.pop('ch1', 0)
280 | ch2 = kwargs.pop('ch2', nch)
281 | if headonly:
282 | data = np.zeros_like(h5_file['raw'])
283 | else:
284 | data = h5_file['raw'][ch1:ch2:dch, :]
285 | fs = round(1 / np.diff(h5_file['timestamp']).mean())
286 | start_time = DASDateTime.fromtimestamp(
287 | h5_file['timestamp'][0]).astimezone(utc)
288 | warnings.warn('This data format doesn\'t include channel interval. '
289 | 'Please set manually')
290 | metadata = {'dx': None, 'fs': fs, 'start_channel': ch1,
291 | 'start_time': start_time}
292 | elif group == 'data': # https://ai4eps.github.io/homepage/ml4earth/seismic_event_format_das/
293 | nch = h5_file['data'].shape[1]
294 | ch1 = kwargs.pop('ch1', 0)
295 | ch2 = kwargs.pop('ch2', nch)
296 | dch = kwargs.pop('dch', 1)
297 | if headonly:
298 | data = np.zeros_like(h5_file['data'])
299 | else:
300 | data = h5_file['data'][ch1:ch2:dch, :]
301 | attr = h5_file['data'].attrs
302 | dx = attr['dx_m']
303 | metadata = {'dx': dx, 'fs': 1 / attr['dt_s'], 'start_channel': ch1,
304 | 'start_distance': ch1 * dx,
305 | 'start_time': DASDateTime.strptime(
306 | attr['begin_time'], '%Y-%m-%dT%H:%M:%S.%f%z'),
307 | 'data_type': attr['unit']}
308 | if 'event_time' in attr.keys():
309 | try:
310 | origin_time = DASDateTime.strptime(
311 | attr['event_time'], '%Y-%m-%dT%H:%M:%S.%f%z')
312 | except ValueError:
313 | origin_time = DASDateTime.strptime(
314 | attr['event_time'], '%Y-%m-%dT%H:%M:%S.%f')
315 | metadata['origin_time'] = origin_time
316 |
317 | elif group == 'data_product':
318 | # read data
319 | nch = h5_file.attrs['nx']
320 | ch1 = kwargs.pop('ch1', 0)
321 | ch2 = kwargs.pop('ch2', nch)
322 | array_shape = h5_file['data_product/data'].shape
323 | if array_shape[0] == nch:
324 | if headonly:
325 | data = np.zeros_like(h5_file['data_product/data'])
326 | else:
327 | data = h5_file['data_product/data'][ch1:ch2:dch, :]
328 | else:
329 | if headonly:
330 | data = np.zeros_like(h5_file['data_product/data']).T
331 | else:
332 | data = h5_file['data_product/data'][:, ch1:ch2:dch].T
333 |
334 | # read metadata
335 | fs = 1 / h5_file.attrs['dt_computer']
336 | dx = h5_file.attrs['dx']
337 | gauge_length = h5_file.attrs['gauge_length']
338 | if h5_file.attrs['saving_start_gps_time'] > 0:
339 | start_time = DASDateTime.fromtimestamp(
340 | h5_file.attrs['file_start_gps_time'])
341 | else:
342 | start_time = DASDateTime.fromtimestamp(
343 | h5_file.attrs['file_start_computer_time'])
344 | data_type = h5_file.attrs['data_product']
345 |
346 | metadata = {'dx': dx * dch, 'fs': fs, 'start_channel': ch1,
347 | 'start_distance': ch1 * dx,
348 | 'start_time': start_time.astimezone(utc),
349 | 'gauge_length': gauge_length, 'data_type': data_type}
350 | else: # Febus
351 | acquisition = list(h5_file[f'{group}/Source1/Zone1'].keys())[0]
352 | # read data
353 | start_channel = int(h5_file[f'{group}/Source1/Zone1'].
354 | attrs['Extent'][0])
355 | dataset = h5_file[f'{group}/Source1/Zone1/{acquisition}']
356 | nch = dataset.shape[-1]
357 | ch1 = kwargs.pop('ch1', start_channel)
358 | ch2 = kwargs.pop('ch2', start_channel + nch)
359 | if headonly:
360 | data = np.zeros_like(dataset).T.reshape((nch, -1))
361 | else:
362 | if len(dataset.shape) == 3: # Febus A1-R
363 | data = dataset[:, :, ch1 - start_channel:ch2 - start_channel
364 | :dch].reshape((-1, (ch2 - ch1) // dch)).T
365 | elif len(dataset.shape) == 2: # Febus A1
366 | data = dataset[:, ch1 - start_channel:ch2 - start_channel:
367 | dch].T
368 | # read metadata
369 | attrs = h5_file[f'{group}/Source1/Zone1'].attrs
370 | dx = attrs['Spacing'][0]
371 | try:
372 | fs = float(attrs['FreqRes'])
373 | except KeyError:
374 | try:
375 | fs = (attrs['PulseRateFreq'][0] /
376 | attrs['SamplingRes'][0]) / 1000
377 | except KeyError:
378 | fs = attrs['SamplingRate'][0]
379 | start_distance = attrs['Origin'][0]
380 | time = h5_file[f'{group}/Source1/time']
381 | if len(time.shape) == 2: # Febus A1-R
382 | start_time = DASDateTime.fromtimestamp(time[0, 0]).\
383 | astimezone(utc)
384 | elif len(time.shape) == 1: # Febus A1
385 | start_time = DASDateTime.fromtimestamp(time[0]).astimezone(utc)
386 | gauge_length = attrs['GaugeLength'][0]
387 | metadata = {'dx': dx * dch, 'fs': fs, 'start_channel': ch1,
388 | 'start_distance': start_distance +
389 | (ch1 - start_channel) * dx,
390 | 'start_time': start_time, 'gauge_length': gauge_length}
391 |
392 | metadata['headers'] = _read_h5_headers(h5_file)
393 |
394 | return data, metadata
395 |
396 |
397 | def _read_tdms(fname, headonly=False, **kwargs):
398 | # https://nptdms.readthedocs.io/en/stable/quickstart.html
399 | with TdmsFile.read(fname) as tdms_file:
400 | group_name = [group.name for group in tdms_file.groups()]
401 | if 'Measurement' in group_name:
402 | key = 'Measurement'
403 | elif 'DAS' in group_name:
404 | key = 'DAS'
405 | else:
406 | key = group_name[0]
407 |
408 | headers = {**tdms_file.properties, **tdms_file[key].properties}
409 | nch = len(tdms_file[key])
410 | dch = kwargs.pop('dch', 1)
411 | # read data
412 | if nch > 1:
413 | start_channel = min(int(channel.name) for channel in
414 | tdms_file[key].channels())
415 | ch1 = max(kwargs.pop('ch1', start_channel), start_channel)
416 | ch2 = min(kwargs.pop('ch2', start_channel + nch),
417 | start_channel + nch)
418 | if headonly:
419 | nt = len(tdms_file[key][str(start_channel)])
420 | data = np.zeros((nch, nt))
421 | else:
422 | data = np.asarray([tdms_file[key][str(ch)]
423 | for ch in range(ch1, ch2, dch)])
424 | elif nch == 1:
425 | try:
426 | start_channel = int(headers['Initial Channel'])
427 | except KeyError:
428 | start_channel = 0
429 |
430 | ch1 = max(kwargs.pop('ch1', start_channel), start_channel)
431 | nch = int(headers['Total Channels'])
432 | ch2 = min(kwargs.pop('ch2', start_channel + nch),
433 | start_channel + nch)
434 | if headonly:
435 | data = np.zeros(len(tdms_file[key].channels()[0])).\
436 | reshape((nch, -1))
437 | else:
438 | data = np.asarray(tdms_file[key].channels()[0]).\
439 | reshape((-1, nch)).T
440 | data = data[ch1 - start_channel:ch2 - start_channel:dch]
441 |
442 | # read metadata
443 | try:
444 | dx = headers['SpatialResolution[m]']
445 | except KeyError:
446 | try:
447 | dx = headers['Spatial Resolution']
448 | except KeyError:
449 | dx = None
450 |
451 | try:
452 | fs = headers['SamplingFrequency[Hz]']
453 | except KeyError:
454 | try:
455 | fs = 1 / headers['Time Base']
456 | except KeyError:
457 | fs = None
458 |
459 | try:
460 | start_distance = headers['Start Distance (m)'] + \
461 | dx * (ch1 - start_channel)
462 | except KeyError:
463 | start_distance = dx * ch1
464 |
465 | try:
466 | start_time = DASDateTime.strptime(headers['ISO8601 Timestamp'],
467 | '%Y-%m-%dT%H:%M:%S.%f%z')
468 | except ValueError:
469 | start_time = DASDateTime.strptime(headers['ISO8601 Timestamp'],
470 | '%Y-%m-%dT%H:%M:%S.%f')
471 | except KeyError:
472 | start_time = 0
473 | for key in ['GPSTimeStamp', 'CPUTimeStamp', 'Trigger Time']:
474 | if key in headers.keys():
475 | if headers[key]:
476 | start_time = DASDateTime.from_datetime(headers[key].
477 | item())
478 | break
479 |
480 | if dx is not None:
481 | dx *= dch
482 | metadata = {'dx': dx, 'fs': fs, 'start_channel': ch1,
483 | 'start_distance': start_distance, 'start_time': start_time,
484 | 'headers': headers}
485 |
486 | if 'GaugeLength' in headers.keys():
487 | metadata['gauge_length'] = headers['GaugeLength']
488 |
489 | return data, metadata
490 |
491 |
492 | def _read_segy(fname, headonly=False, **kwargs):
493 | # https://github.com/equinor/segyio-notebooks/blob/master/notebooks/basic/02_segy_quicklook.ipynb
494 | with segyio.open(fname, ignore_geometry=True) as segy_file:
495 | nch = segy_file.tracecount
496 | ch1 = kwargs.pop('ch1', 0)
497 | ch2 = kwargs.pop('ch2', nch)
498 | dch = kwargs.pop('dch', 1)
499 |
500 | # read data
501 | if headonly:
502 | data = np.zeros_like(segy_file.trace.raw[:])
503 | else:
504 | data = segy_file.trace.raw[ch1:ch2:dch]
505 |
506 | # read metadata:
507 | fs = 1 / (segyio.tools.dt(segy_file) / 1e6)
508 | metadata = {'dx': None, 'fs': fs, 'start_channel': ch1}
509 | warnings.warn('This data format doesn\'t include channel interval.'
510 | 'Please set manually')
511 |
512 | return data, metadata
513 |
514 |
515 | def _read_npy(fname, headonly=False, **kwargs):
516 | data = np.load(fname)
517 | if headonly:
518 | return np.zeros_like(data), {'dx': None, 'fs': None}
519 | else:
520 | ch1 = kwargs.pop('ch1', 0)
521 | ch2 = kwargs.pop('ch2', len(data))
522 | dch = kwargs.pop('dch', 1)
523 | warnings.warn('This data format doesn\'t include channel interval and '
524 | 'sampling rate. Please set manually')
525 | return data[ch1:ch2:dch], {'dx': None, 'fs': None}
526 |
527 |
528 | def read_json(fname, output_type='dict'):
529 | """
530 | Read .json metadata file. See {Lai et al. , 2024, Seismol. Res. Lett.}
531 |
532 | :param fname: str or pathlib.PosixPath. Path of json file.
533 | :param output_type: str. 'dict' means return a dictionary, and 'Section'
534 | means return a empty daspy.Section instance with metadata.
535 | :return: A dictionary of metadata or an instance of daspy.Section without
536 | data.
537 | """
538 | with open(fname, 'r') as fcc_file:
539 | headers = json.load(fcc_file)
540 | if output_type.lower() == 'dict':
541 | return headers
542 | elif output_type.lower() in ['section', 'sec']:
543 | if len(headers['Overview']['Interrogator']) > 1:
544 | case_type = 'Multiple interrogators, single cable'
545 | sec_num = len(headers['Overview']['Interrogator'])
546 | sec = []
547 | for interrogator in headers['Overview']['Interrogator']:
548 | nch = interrogator['Acquisition'][0]['Attributes']['number_of_channels']
549 | data = np.zeros((nch, 0))
550 | dx = interrogator['Acquisition'][0]['Attributes']['spatial_sampling_interval']
551 | fs = interrogator['Acquisition'][0]['Attributes']['acquisition_sample_rate']
552 | gauge_length = interrogator['Acquisition'][0]['Attributes']['gauge_length']
553 | sec.append(Section(data, dx, fs, gauge_length=gauge_length,
554 | headers=headers))
555 | elif len(headers['Overview']['Interrogator'][0]['Acquisition']) > 1:
556 | case_type = 'Active survey'
557 | sec_num = len(
558 | headers['Overview']['Interrogator'][0]['Acquisition'])
559 | sec = []
560 | for acquisition in headers['Overview']['Interrogator'][0]['Acquisition']:
561 | nch = acquisition['Attributes']['number_of_channels']
562 | data = np.zeros((nch, 0))
563 | dx = acquisition['Attributes']['spatial_sampling_interval']
564 | fs = acquisition['Attributes']['acquisition_sample_rate']
565 | gauge_length = acquisition['Attributes']['gauge_length']
566 | sec.append(Section(data, dx, fs, gauge_length=gauge_length,
567 | headers=headers))
568 | else:
569 | sec_num = 1
570 | if len(headers['Overview']['Cable']) > 1:
571 | case_type = 'Single interrogators, multiple cable'
572 | else:
573 | env = headers['Overview']['Cable'][0]['Attributes']['cable_environment']
574 | if env == 'trench':
575 | case_type = 'Direct buried'
576 | elif env == 'conduit':
577 | case_type = 'Dark fiber'
578 | elif env in ['wireline', 'outside borehole casing']:
579 | case_type = 'Borehole cable'
580 | nch = headers['Overview']['Interrogator'][0]['Acquisition'][0]['Attributes']['number_of_channels']
581 | dx = headers['Overview']['Interrogator'][0]['Acquisition'][0]['Attributes']['spatial_sampling_interval']
582 | fs = headers['Overview']['Interrogator'][0]['Acquisition'][0]['Attributes']['acquisition_sample_rate']
583 | gauge_length = headers['Overview']['Interrogator'][0]['Acquisition'][0]['Attributes']['gauge_length']
584 | data = np.zeros((nch, 0))
585 | sec = Section(data, dx, fs, gauge_length=gauge_length,
586 | headers=headers)
587 |
588 | print(f'For case of {case_type}, create {sec_num} empty daspy.Section '
589 | 'instance(s)')
590 | return sec
591 |
--------------------------------------------------------------------------------
/daspy/core/write.py:
--------------------------------------------------------------------------------
1 | # Purpose: Module for writing DAS data.
2 | # Author: Minzhe Hu
3 | # Date: 2025.5.21
4 | # Email: hmz2018@mail.ustc.edu.cn
5 | import os
6 | import warnings
7 | import pickle
8 | import numpy as np
9 | import h5py
10 | import segyio
11 | from shutil import copyfile
12 | from nptdms import TdmsFile, TdmsWriter, RootObject, GroupObject, ChannelObject
13 | from datetime import datetime
14 |
15 |
16 | def write(sec, fname, ftype=None, raw_fname=None, dtype=None):
17 | fun_map = {'tdms': _write_tdms, 'h5': _write_h5, 'sgy': _write_segy}
18 | if ftype is None:
19 | ftype = str(fname).lower().split('.')[-1]
20 | ftype.replace('hdf5', 'h5')
21 | ftype.replace('segy', 'sgy')
22 | if dtype is not None:
23 | sec = sec.copy()
24 | sec.data = sec.data.astype(dtype)
25 | if ftype == 'pkl':
26 | write_pkl(sec, fname)
27 | elif ftype == 'npy':
28 | np.save(fname, sec.data)
29 | else:
30 | fun_map[ftype](sec, fname, raw_fname=raw_fname)
31 | return None
32 |
33 |
34 | def write_pkl(sec, fname):
35 | with open(fname, 'wb') as f:
36 | pickle.dump(sec.__dict__, f)
37 | return None
38 |
39 |
40 | def _write_tdms(sec, fname, raw_fname=None):
41 | if raw_fname is None:
42 | key = 'Measurement'
43 | file_prop = {}
44 | group_prop = {}
45 | else:
46 | original_file = TdmsFile(raw_fname)
47 | group_name = [group.name for group in original_file.groups()]
48 | if 'Measurement' in group_name:
49 | key = 'Measurement'
50 | elif 'DAS' in group_name:
51 | key = 'DAS'
52 | else:
53 | key = group_name[0]
54 | file_prop = original_file.properties
55 | group_prop = original_file[key].properties
56 |
57 | if 'Spatial Resolution' in group_prop.keys():
58 | group_prop['Spatial Resolution'] = sec.dx
59 | else:
60 | file_prop['SpatialResolution[m]'] = sec.dx
61 |
62 | if 'Time Base' in group_prop.keys():
63 | group_prop['Time Base'] = 1. / sec.fs
64 | else:
65 | file_prop['SamplingFrequency[Hz]'] = sec.fs
66 |
67 | if 'Total Channels' in group_prop.keys():
68 | group_prop['Total Channels'] = sec.nch
69 |
70 | if 'Initial Channel' in group_prop.keys():
71 | group_prop['Initial Channel'] = sec.start_channel
72 |
73 | file_prop['Start Distance (m)'] = sec.start_distance
74 | if isinstance(sec.start_time, datetime):
75 | start_time = sec.start_time
76 | else:
77 | start_time = datetime.fromtimestamp(sec.start_time)
78 |
79 | if raw_fname is None:
80 | file_prop['ISO8601 Timestamp'] = start_time.strftime(
81 | '%Y-%m-%dT%H:%M:%S.%f%z')
82 | group_prop['Trigger Time'] = np.datetime64(start_time.remove_tz())
83 | else:
84 | if 'ISO8601 Timestamp' in file_prop.keys():
85 | file_prop['ISO8601 Timestamp'] = start_time.strftime(
86 | '%Y-%m-%dT%H:%M:%S.%f%z')
87 | else:
88 | for s in ['GPSTimeStamp', 'CPUTimeStamp', 'Trigger Time']:
89 | if s in group_prop.keys():
90 | group_prop[s] = np.datetime64(start_time.remove_tz())
91 | break
92 |
93 | if hasattr(sec, 'gauge_length'):
94 | file_prop['GaugeLength'] = sec.gauge_length
95 |
96 | with TdmsWriter(fname) as tdms_file:
97 | root_object = RootObject(file_prop)
98 | group_object = GroupObject(key, properties=group_prop)
99 | if raw_fname and len(original_file[key]) == 1:
100 | channel = ChannelObject(key, original_file[key].channels()[0].name,
101 | sec.data.T.flatten(), properties={})
102 | tdms_file.write_segment([root_object, group_object, channel])
103 | else:
104 | channel_list = []
105 | for ch, d in enumerate(sec.data):
106 | channel_list.append(ChannelObject(key,
107 | str(ch + sec.start_channel),
108 | d, properties={}))
109 |
110 | tdms_file.write_segment([root_object, group_object] + channel_list)
111 | return None
112 |
113 |
114 | def _update_h5_dataset(h5_file, path, name, data):
115 | attrs = h5_file[path + name].attrs
116 | del h5_file[path + name]
117 | h5_file.get(path).create_dataset(name, data=data)
118 | for key, value in attrs.items():
119 | h5_file[path + name].attrs[key] = value
120 | return None
121 |
122 |
123 | def _write_h5(sec, fname, raw_fname=None):
124 | if raw_fname is None:
125 | with h5py.File(fname, 'w') as h5_file:
126 | h5_file.create_group('Acquisition/Raw[0]')
127 | h5_file.get('Acquisition/Raw[0]/').\
128 | create_dataset('RawData', data=sec.data)
129 | if isinstance(sec.start_time, datetime):
130 | h5_file['Acquisition/Raw[0]/RawData'].attrs['PartStartTime'] = \
131 | np.bytes_(
132 | sec.start_time.strftime('%Y-%m-%dT%H:%M:%S.%f%z'))
133 | stime = sec.start_time.timestamp() * 1e6
134 | DataTime = np.arange(
135 | stime, stime + sec.nt / sec.fs, 1 / sec.fs)
136 | else:
137 | h5_file['Acquisition/Raw[0]/RawData'].attrs['PartStartTime'] = \
138 | np.bytes_(str(sec.start_time))
139 | DataTime = sec.start_time + np.arange(0, sec.nt / sec.fs,
140 | 1 / sec.fs)
141 |
142 | h5_file.get('Acquisition/Raw[0]/').\
143 | create_dataset('RawDataTime', data=DataTime)
144 | h5_file['Acquisition/Raw[0]'].attrs['OutputDataRate'] = sec.fs
145 | h5_file['Acquisition'].attrs['SpatialSamplingInterval'] = sec.dx
146 | if hasattr(sec, 'gauge_length'):
147 | h5_file['Acquisition'].attrs['GaugeLength'] = sec.gauge_length
148 | else:
149 | h5_file['Acquisition'].attrs['GaugeLength'] = np.nan
150 | else:
151 | if not os.path.exists(fname) or not os.path.samefile(raw_fname, fname):
152 | copyfile(raw_fname, fname)
153 | with h5py.File(fname, 'r+') as h5_file:
154 | group = list(h5_file.keys())[0]
155 | if len(h5_file.keys()) == 10:
156 | if h5_file['header/dimensionNames'][0] == b'time':
157 | _update_h5_dataset(h5_file, '/', 'data', sec.data.T)
158 | elif h5_file['header/dimensionNames'][0] == b'distance':
159 | _update_h5_dataset(h5_file, '/', 'data', sec.data)
160 |
161 | _update_h5_dataset(h5_file, 'header', 'dx', sec.dx)
162 | _update_h5_dataset(h5_file, 'header', 'dt', 1 / sec.fs)
163 | if isinstance(sec.start_time, datetime):
164 | _update_h5_dataset(h5_file, 'header', 'time',
165 | sec.start_time.timestamp())
166 | else:
167 | _update_h5_dataset(h5_file, 'header', 'time',
168 | sec.start_time)
169 | if hasattr(sec, 'gauge_length'):
170 | _update_h5_dataset(h5_file, '/', 'gaugeLength',
171 | sec.gauge_length)
172 | if hasattr(sec, 'scale'):
173 | _update_h5_dataset(h5_file, '/', 'dataScale', sec.scale)
174 | elif len(h5_file.keys()) == 5:
175 | _update_h5_dataset(h5_file, '/', 'strain', sec.data.T)
176 | _update_h5_dataset(h5_file, '/', 'spatialsampling', sec.dx)
177 | _update_h5_dataset(h5_file, '/', 'RepetitionFrequency', sec.fs)
178 | if hasattr(sec, 'gauge_length'):
179 | _update_h5_dataset(h5_file, '/', 'GaugeLength',
180 | sec.gauge_length)
181 | elif len(h5_file.keys()) == 3:
182 | _update_h5_dataset(h5_file, '/', 'data', sec.data)
183 | _update_h5_dataset(h5_file, '/', 'x_axis',
184 | sec.start_distance + np.arange(sec.nch) * sec.dx)
185 | _update_h5_dataset(h5_file, '/', 't_axis',
186 | sec.start_time + np.arange(sec.nt) * sec.dt)
187 | elif group == 'Acquisition':
188 | h5_file['Acquisition'].attrs['NumberOfLoci'] = sec.nch
189 | _update_h5_dataset(h5_file, 'Acquisition/Raw[0]/', 'RawData',
190 | sec.data)
191 | if isinstance(sec.start_time, datetime):
192 | if isinstance(h5_file['Acquisition/Raw[0]/RawData'].
193 | attrs['PartStartTime'], bytes):
194 | h5_file['Acquisition/Raw[0]/RawData'].\
195 | attrs['PartStartTime'] = np.bytes_(
196 | sec.start_time.strftime('%Y-%m-%dT%H:%M:%S.%f%z'))
197 | else:
198 | h5_file['Acquisition/Raw[0]/RawData'].\
199 | attrs['PartStartTime'] = sec.start_time.strftime(
200 | '%Y-%m-%dT%H:%M:%S.%f%z')
201 | stime = sec.start_time.timestamp() * 1e6
202 | DataTime = np.arange(
203 | stime, stime + sec.nt / sec.fs, 1 / sec.fs)
204 | else:
205 | h5_file['Acquisition/Raw[0]/RawData'].\
206 | attrs['PartStartTime'] = np.bytes_(str(sec.start_time))
207 | DataTime = sec.start_time + np.arange(0, sec.nt / sec.fs,
208 | 1 / sec.fs)
209 | _update_h5_dataset(h5_file, 'Acquisition/Raw[0]/',
210 | 'RawDataTime', DataTime)
211 | h5_file['Acquisition/Raw[0]'].attrs['OutputDataRate'] = sec.fs
212 | h5_file['Acquisition'].attrs['SpatialSamplingInterval'] = sec.dx
213 | if hasattr(sec, 'gauge_length'):
214 | h5_file['Acquisition'].attrs['GaugeLength'] = \
215 | sec.gauge_length
216 | elif group == 'raw':
217 | _update_h5_dataset(h5_file, '/', 'raw', sec.data)
218 | DataTime = sec.start_time.timestamp() + \
219 | np.arange(0, sec.nt / sec.fs, 1 / sec.fs)
220 | _update_h5_dataset(h5_file, '/', 'timestamp', DataTime)
221 | elif group == 'data': # https://ai4eps.github.io/homepage/ml4earth/seismic_event_format_das/
222 | _update_h5_dataset(h5_file, '/', 'data', sec.data)
223 | h5_file['data'].attrs['dx_m'] = sec.dx
224 | h5_file['data'].attrs['dt_s'] = 1 / sec.fs
225 | h5_file['data'].attrs['begin_time'] = \
226 | datetime.strftime(sec.start_time, '%Y-%m-%dT%H:%M:%S.%f%z')
227 | h5_file['data'].attrs['unit'] = sec.data_type
228 | elif group == 'data_product':
229 | _update_h5_dataset(h5_file, 'data_product/', 'data', sec.data)
230 | h5_file.attrs['dt_computer'] = 1 / sec.fs
231 | h5_file.attrs['dx'] = sec.dx
232 | h5_file.attrs['gauge_length'] = sec.gauge_length
233 | DataTime = sec.start_time.timestamp() + \
234 | np.arange(0, sec.nt / sec.fs, 1 / sec.fs)
235 | if h5_file.attrs['saving_start_gps_time'] > 0:
236 | h5_file.attrs['file_start_gps_time'] = \
237 | sec.start_time.timestamp()
238 | _update_h5_dataset(h5_file, 'data_product/', 'gps_time',
239 | DataTime)
240 | del h5_file['data_product/posix_time']
241 | else:
242 | h5_file.attrs['file_start_computer_time'] = \
243 | sec.start_time.timestamp()
244 | _update_h5_dataset(h5_file, 'data_product/', 'posix_time',
245 | DataTime)
246 | del h5_file['data_product/gps_time']
247 | h5_file.attrs['data_product'] = sec.data_type
248 | else:
249 | acquisition = list(h5_file[f'{group}/Source1/Zone1'].keys())[0]
250 | data = sec.data
251 | fs = int(sec.fs)
252 | d = len(h5_file[f'{group}/Source1/Zone1/{acquisition}'].shape)
253 | if d == 3:
254 | mod = sec.nt % fs
255 | if mod:
256 | data = np.hstack((data, np.zeros((sec.nch, fs - mod))))
257 | data = data.reshape((sec.nch, fs, sec.nt//fs)).T
258 | elif d == 2:
259 | data = data.T
260 | _update_h5_dataset(h5_file, f'{group}/Source1/Zone1/',
261 | acquisition, data)
262 |
263 | h5_file[f'{group}/Source1/Zone1'].attrs['Spacing'][0] = sec.dx
264 | h5_file[f'{group}/Source1/Zone1'].attrs['FreqRes'] = \
265 | np.bytes_(sec.fs)
266 | h5_file[f'{group}/Source1/Zone1'].attrs['SamplingRate'][0] = \
267 | sec.fs
268 | h5_file[f'{group}/Source1/Zone1'].attrs['Extent'][0] = \
269 | sec.start_channel
270 | h5_file[f'{group}/Source1/Zone1'].attrs['Origin'][0] = \
271 | sec.start_distance
272 | h5_file[f'{group}/Source1/Zone1'].attrs['GaugeLength'][0] = \
273 | sec.gauge_length
274 | DataTime = sec.start_time.timestamp() + \
275 | np.arange(0, sec.nt / sec.fs, 1 / sec.fs)
276 | _update_h5_dataset(h5_file, f'{group}/Source1/',
277 | 'time', DataTime.reshape((1, -1)))
278 |
279 | return None
280 |
281 |
282 | def _write_segy(sec, fname, raw_fname=None):
283 | spec = segyio.spec()
284 | spec.samples = np.arange(sec.nt) / sec.fs * 1e3
285 | spec.tracecount = sec.nch
286 | if raw_fname is None:
287 | spec.format = 1
288 | with segyio.create(fname, spec) as new_file:
289 | new_file.header.length = sec.nch
290 | new_file.header.segy._filename = fname
291 | new_file.trace = sec.data # .astype(np.float32)
292 | else:
293 | with segyio.open(raw_fname, ignore_geometry=True) as raw_file:
294 | spec.sorting = raw_file.sorting
295 | spec.format = raw_file.format
296 | raw_file.header.length = sec.nch
297 | raw_file.header.segy._filename = fname
298 | with segyio.create(fname, spec) as new_file:
299 | new_file.text[0] = raw_file.text[0]
300 | new_file.header = raw_file.header
301 | new_file.trace = sec.data.astype(raw_file.trace.dtype)
302 |
303 | warnings.warn('This data format doesn\'t include channel interval.')
304 | return None
305 |
--------------------------------------------------------------------------------
/document/Ridgecrest_traffic_noise.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HMZ-03/DASPy/9f308c93d7ad8f4e572705827b03c5d0fec3eac2/document/Ridgecrest_traffic_noise.mat
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup, find_packages
2 |
3 |
4 | setup(
5 | name='DASPy-toolbox', version='1.1.6',
6 | description=(
7 | 'DASPy is an open-source project dedicated to provide a python package '
8 | 'for DAS (Distributed Acoustic Sensing) data processing, which '
9 | 'comprises classic seismic data processing techniques and Specialized '
10 | 'algorithms for DAS applications.'
11 | ),
12 | long_description=open('README.md').read(),
13 | author='Minzhe Hu, Zefeng Li',
14 | author_email='hmz2018@mail.ustc.edu.cn',
15 | maintainer='Minzhe Hu',
16 | maintainer_email='hmz2018@mail.ustc.edu.cn',
17 | license='MIT License',
18 | url='https://github.com/HMZ-03/DASPy',
19 | packages=find_packages(),
20 | entry_points={
21 | 'console_scripts': [
22 | 'daspy = daspy.main:main',
23 | ]
24 | },
25 | include_package_data=True,
26 | package_data={
27 | 'daspy': ['core/example.pkl']
28 | },
29 | classifiers=[
30 | 'Operating System :: OS Independent',
31 | 'License :: OSI Approved :: MIT License',
32 | 'Programming Language :: Python :: 3'
33 | ],
34 | python_requires='>=3.9',
35 | install_requires=[
36 | 'numpy',
37 | 'scipy>=1.13',
38 | 'matplotlib',
39 | 'geographiclib',
40 | 'pyproj',
41 | 'h5py',
42 | 'segyio',
43 | 'nptdms',
44 | 'tqdm'
45 | ]
46 | )
47 |
--------------------------------------------------------------------------------
/website/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HMZ-03/DASPy/9f308c93d7ad8f4e572705827b03c5d0fec3eac2/website/logo.png
--------------------------------------------------------------------------------
/website/waveform.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HMZ-03/DASPy/9f308c93d7ad8f4e572705827b03c5d0fec3eac2/website/waveform.png
--------------------------------------------------------------------------------