├── .github └── workflows │ └── workflow.yml ├── .gitignore ├── CONTRIBUTING.md ├── CONTRIBUTORS.txt ├── LICENSE ├── README.md ├── daspy ├── CONTRIBUTORS.txt ├── __init__.py ├── advanced_tools │ ├── __init__.py │ ├── channel.py │ ├── decomposition.py │ ├── denoising.py │ ├── fdct.py │ └── strain2vel.py ├── basic_tools │ ├── __init__.py │ ├── filter.py │ ├── freqattributes.py │ ├── preprocessing.py │ └── visualization.py └── core │ ├── __init__.py │ ├── collection.py │ ├── dasdatetime.py │ ├── example.pkl │ ├── read.py │ ├── section.py │ └── write.py ├── document ├── Ridgecrest_traffic_noise.mat └── example.ipynb ├── setup.py └── website ├── logo.png └── waveform.png /.github/workflows/workflow.yml: -------------------------------------------------------------------------------- 1 | name: Publish Python 🐍 distribution 📦 to PyPI and TestPyPI 2 | 3 | on: push 4 | 5 | jobs: 6 | build: 7 | name: Build distribution 📦 8 | runs-on: ubuntu-latest 9 | 10 | steps: 11 | - uses: actions/checkout@v4 12 | with: 13 | persist-credentials: false 14 | - name: Set up Python 15 | uses: actions/setup-python@v5 16 | with: 17 | python-version: "3.x" 18 | - name: Install pypa/build 19 | run: >- 20 | python3 -m 21 | pip install 22 | build 23 | --user 24 | - name: Build a binary wheel and a source tarball 25 | run: python3 -m build 26 | - name: Store the distribution packages 27 | uses: actions/upload-artifact@v4 28 | with: 29 | name: python-package-distributions 30 | path: dist/ 31 | 32 | publish-to-pypi: 33 | name: >- 34 | Publish Python 🐍 distribution 📦 to PyPI 35 | if: startsWith(github.ref, 'refs/tags/') # only publish to PyPI on tag pushes 36 | needs: 37 | - build 38 | runs-on: ubuntu-latest 39 | environment: 40 | name: pypi 41 | url: https://pypi.org/p/DASPy-toolbox 42 | permissions: 43 | id-token: write # IMPORTANT: mandatory for trusted publishing 44 | 45 | steps: 46 | - name: Download all the dists 47 | uses: actions/download-artifact@v4 48 | with: 49 | name: python-package-distributions 50 | path: dist/ 51 | - name: Publish distribution 📦 to PyPI 52 | uses: pypa/gh-action-pypi-publish@release/v1 53 | 54 | github-release: 55 | name: >- 56 | Sign the Python 🐍 distribution 📦 with Sigstore 57 | and upload them to GitHub Release 58 | needs: 59 | - publish-to-pypi 60 | runs-on: ubuntu-latest 61 | 62 | permissions: 63 | contents: write # IMPORTANT: mandatory for making GitHub Releases 64 | id-token: write # IMPORTANT: mandatory for sigstore 65 | 66 | steps: 67 | - name: Download all the dists 68 | uses: actions/download-artifact@v4 69 | with: 70 | name: python-package-distributions 71 | path: dist/ 72 | - name: Sign the dists with Sigstore 73 | uses: sigstore/gh-action-sigstore-python@v3.0.0 74 | with: 75 | inputs: >- 76 | ./dist/*.tar.gz 77 | ./dist/*.whl 78 | - name: Create GitHub Release 79 | env: 80 | GITHUB_TOKEN: ${{ github.token }} 81 | run: >- 82 | gh release create 83 | "$GITHUB_REF_NAME" 84 | --repo "$GITHUB_REPOSITORY" 85 | --notes "" 86 | - name: Upload artifact signatures to GitHub Release 87 | env: 88 | GITHUB_TOKEN: ${{ github.token }} 89 | # Upload to GitHub Release using the `gh` CLI. 90 | # `dist/` contains the built packages, and the 91 | # sigstore-produced signatures and certificates. 92 | run: >- 93 | gh release upload 94 | "$GITHUB_REF_NAME" dist/** 95 | --repo "$GITHUB_REPOSITORY" 96 | 97 | publish-to-testpypi: 98 | name: Publish Python 🐍 distribution 📦 to TestPyPI 99 | if: startsWith(github.ref, 'refs/tags/') # only publish to TestPyPI on tag pushes 100 | needs: 101 | - build 102 | runs-on: ubuntu-latest 103 | 104 | environment: 105 | name: testpypi 106 | url: https://test.pypi.org/p/DASPy-toolbox 107 | 108 | permissions: 109 | id-token: write # IMPORTANT: mandatory for trusted publishing 110 | 111 | steps: 112 | - name: Download all the dists 113 | uses: actions/download-artifact@v4 114 | with: 115 | name: python-package-distributions 116 | path: dist/ 117 | - name: Publish distribution 📦 to TestPyPI 118 | uses: pypa/gh-action-pypi-publish@release/v1 119 | with: 120 | repository-url: https://test.pypi.org/legacy/ -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to DASPy 2 | 3 | We hope you to submit your changes through Git's Pull Request feature. 4 | 5 | ## Submit a Pull Request 6 | 7 | Here's a quick guide: 8 | 9 | 1. Fork the repo. 10 | 2. Make a new branch based at `main`. 11 | 3. Push to your fork and submit a pull request. 12 | 4. Wait for our review. We may suggest some changes or improvements or alternatives. 13 | 14 | ## DASPy Coding Style Guide 15 | 16 | Like most Python projects, we try to adhere to [PEP 8](https://peps.python.org/pep-0008/) (Style Guide for Python Code) and [PEP 257](https://peps.python.org/pep-0257/) (Docstring Conventions) with the modifications documented here. Be sure to read all documents if you intend to contribute code to DASPy. 17 | 18 | ## Naming 19 | 20 | ### Names to Avoid 21 | 22 | * single character names except for counters or iterators 23 | * dashes (-) in any package/module name 24 | * **__double_leading_and_trailing_underscore__** names (reserved by Python) 25 | 26 | ### Naming Convention 27 | 28 | * Use meaningful variable/function/method names; these will help other people a lot when reading your code. 29 | * Prepending a single underscore (_) means an object is “internal” / “private”, which means that it is not supposed to be used by end-users and the API might change internally without notice to users (in contrast to API changes in public objects which get handled with deprecation warnings for one release cycle). 30 | * Prepending a double underscore (__) to an instance variable or method effectively serves to make the variable or method private to its class (using name mangling). 31 | * Place related classes and top-level functions together in a module. Unlike Java, there is no need to limit yourself to one class per module. 32 | * Use CamelCase for class names, but snake_case for module names, variables and functions/methods. 33 | -------------------------------------------------------------------------------- /CONTRIBUTORS.txt: -------------------------------------------------------------------------------- 1 | daspy/CONTRIBUTORS.txt 2 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 University of Science and Technology of China 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | [![Supported Python versions](https://img.shields.io/badge/python-3.9%20|%203.10%20|%203.11%20|%203.12-blue)](https://pypi.org/project/DASPy-toolbox/) 4 | [![License](https://img.shields.io/pypi/l/daspy-toolbox.svg)](https://opensource.org/license/mit) 5 | [![PyPI Version](https://img.shields.io/pypi/v/daspy-toolbox.svg)](https://pypi.org/project/DASPy-toolbox/) 6 | 7 | [![DOI](https://img.shields.io/badge/DOI-10.1785/0220240124-blue.svg)](https://doi.org/10.1785/0220240124) 8 | [![PyPI Downloads](https://img.shields.io/pypi/dm/daspy-toolbox.svg?label=pypi)](https://pypi.org/project/DASPy-toolbox/) 9 | [![Conda Downloads](https://img.shields.io/conda/dn/conda-forge/daspy-toolbox?label=conda)](https://anaconda.org/conda-forge/daspy-toolbox) 10 | 11 | DASPy is an open-source project dedicated to provide a python package for DAS (Distributed Acoustic Sensing) data processing. 12 | 13 | The goal of the DASPy project is to lower the bar of DAS data processing. DASPy includes: 14 | * Classic seismic data processing techniques, including preprocessing, filter, spectrum analysis, and visualization 15 | * Specialized algorithms for DAS applications, including denoising, waveform decomposition, channel attribute analysis, and strain-velocity conversion. 16 | 17 | DASPy is licensed under the MIT License. [An English version of DASPy tutorial](https://daspy-tutorial.readthedocs.io/en/latest/), [a Chinese version of DASPy tutorial](https://daspy-tutorial-cn.readthedocs.io/zh-cn/latest/) and [an example of Jupyter notebook](document/example.ipynb) is available. If you have any questions, please contact me via . 18 | 19 | ## Installation 20 | DASPy runs on Linux, Windows and Mac OS and on Python 3.9 and up. 21 | 22 | ### Pip 23 | ``` 24 | pip install daspy-toolbox 25 | ``` 26 | 27 | Install the latest version from GitHub: 28 | 29 | ``` 30 | pip install git+https://github.com/HMZ-03/DASPy.git 31 | ``` 32 | 33 | ### Conda 34 | 35 | ``` 36 | conda install daspy-toolbox 37 | ``` 38 | 39 | or 40 | 41 | ``` 42 | conda install conda-forge::daspy-toolbox 43 | ``` 44 | 45 | ### Manual installation 46 | 1. Install dependent packages: numpy, scipy >=1.13, matplotlib, geographiclib, pyproj, h5py, segyio, nptdms, tqdm 47 | 48 | 2. Add DASPy into your Python path. 49 | 50 | ## Getting started 51 | ``` 52 | from daspy import read 53 | sec = read() # load example waveform 54 | sec.bandpass(1, 15) 55 | sec.plot() 56 | ``` 57 | 58 | 59 | ### Contributing 60 | 61 | Please see details on how to contribute to the project [here](CONTRIBUTING.md) and [here](CodingStyleGuide.md). 62 | 63 | ### Reference 64 | 65 | * Minzhe Hu and Zefeng Li (2024), [DASPy: A Python Toolbox for DAS Seismology](https://pubs.geoscienceworld.org/ssa/srl/article/95/5/3055/645865/DASPy-A-Python-Toolbox-for-DAS-Seismology), *Seismological Research Letters*, 95(5), 3055–3066, doi: `https://doi.org/10.1785/0220240124`. 66 | -------------------------------------------------------------------------------- /daspy/CONTRIBUTORS.txt: -------------------------------------------------------------------------------- 1 | Hu, Minzhe 2 | Li, Zefeng 3 | Zhang, Ji -------------------------------------------------------------------------------- /daspy/__init__.py: -------------------------------------------------------------------------------- 1 | from daspy.core.section import Section 2 | from daspy.core.collection import Collection 3 | from daspy.core.read import read 4 | from daspy.core.dasdatetime import DASDateTime, local_tz, utc -------------------------------------------------------------------------------- /daspy/advanced_tools/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HMZ-03/DASPy/9f308c93d7ad8f4e572705827b03c5d0fec3eac2/daspy/advanced_tools/__init__.py -------------------------------------------------------------------------------- /daspy/advanced_tools/channel.py: -------------------------------------------------------------------------------- 1 | # Purpose: Several functions for analysis data quality and geometry of channels 2 | # Author: Minzhe Hu, Zefeng Li 3 | # Date: 2025.3.31 4 | # Email: hmz2018@mail.ustc.edu.cn 5 | import numpy as np 6 | from copy import deepcopy 7 | from geographiclib.geodesic import Geodesic 8 | from pyproj import Proj 9 | 10 | 11 | def robust_polyfit(data, deg, thresh): 12 | """ 13 | Fit a curve with a robust weighted polynomial. 14 | 15 | :param data: 1-dimensional array. 16 | :param deg: int. Degree of the fitting polynomial 17 | :param thresh: int or float. Defined MAD multiple of outliers. 18 | :return: Fitting data 19 | """ 20 | nch = len(data) 21 | channels = np.arange(nch) 22 | p_coef = np.polyfit(channels, data, deg) 23 | p_fit = np.poly1d(p_coef) 24 | old_data = p_fit(channels) 25 | mse = 1 26 | 27 | # robust fitting until the fitting curve changes < 0.1% at every point. 28 | while mse > 0.001: 29 | rsl = abs(data - old_data) 30 | mad = np.median(rsl) 31 | weights = np.zeros(nch) 32 | weights[rsl < thresh * mad] = 1 33 | p_coef = np.polyfit(channels, data, deg, w=weights) 34 | p_fit = np.poly1d(p_coef) 35 | new_data = p_fit(channels) 36 | mse = np.nanmax(np.abs((new_data - old_data) / old_data)) 37 | old_data = new_data 38 | 39 | return new_data, weights 40 | 41 | 42 | def _continuity_checking(lst1, lst2, adjacent=2, toleration=2): 43 | lst1_raw = deepcopy(lst1) 44 | for chn in lst1_raw: 45 | discont = [a for a in lst2 if abs(a - chn) <= adjacent] 46 | if len(discont) >= adjacent * 2 + 1 - toleration: 47 | lst1.remove(chn) 48 | lst2.append(chn) 49 | 50 | return lst1, lst2 51 | 52 | 53 | def channel_checking(data, deg=10, thresh=5, continuity=True, adjacent=2, 54 | toleration=2, mode='low', verbose=False): 55 | """ 56 | Use the energy of each channel to determine which channels are bad. 57 | 58 | :param data: 2-dimensional np.ndarray. Axis 0 is channel number and axis 1 59 | is time series. 60 | :param deg: int. Degree of the fitting polynomial 61 | :param thresh: int or float. The MAD multiple of bad channel energy lower 62 | than good channels. 63 | :param continuity: bool. Perform continuity checks on bad channels and good 64 | channels. 65 | :param adjacent: int. The number of nearby channels for continuity checks. 66 | :param toleration: int. The number of discontinuous channel allowed in each 67 | channel (including itself) in the continuity check. 68 | :param mode: str. 'low' means bad channels have low amplitude, 'high' means 69 | bad channels have high amplitude, and 'both' means bad channels are 70 | likely to have low or high amplitude. 71 | :return: Good channels and bad channels. 72 | """ 73 | nch = len(data) 74 | energy = np.log10(np.sum(data**2, axis=1)) 75 | energy[energy == -np.inf] = -308 76 | 77 | # Remove abnormal value by robust polynomial fitting. 78 | fitted_energy, weights = robust_polyfit(energy, deg, thresh) 79 | deviation = energy - fitted_energy 80 | 81 | # Iterate eliminates outliers. 82 | mad = np.median(abs(deviation[weights > 0])) 83 | if mode == 'low': 84 | bad_chn = np.argwhere(deviation < -thresh * mad).ravel().tolist() 85 | elif mode == 'high': 86 | bad_chn = np.argwhere(deviation > thresh * mad).ravel().tolist() 87 | elif mode == 'both': 88 | bad_chn = np.argwhere(deviation < -thresh * mad).ravel().tolist() + \ 89 | np.argwhere(deviation > thresh * mad).ravel().tolist() 90 | good_chn = list(set(range(nch)) - set(bad_chn)) 91 | 92 | if continuity: 93 | # Discontinuous normal value are part of bad channels. 94 | good_chn, bad_chn = _continuity_checking(good_chn, bad_chn, 95 | adjacent=adjacent, 96 | toleration=toleration) 97 | 98 | # Discontinuous outliers are usually not bad channels. 99 | bad_chn, good_chn = _continuity_checking(bad_chn, good_chn, 100 | adjacent=adjacent, 101 | toleration=toleration) 102 | 103 | bad_chn = np.sort(np.array(bad_chn)) 104 | good_chn = np.sort(np.array(good_chn)) 105 | if verbose: 106 | return good_chn, bad_chn, energy, fitted_energy - thresh * mad 107 | 108 | return good_chn, bad_chn 109 | 110 | 111 | def _channel_location(track_pt): 112 | track, tn = track_pt[:, :-1], track_pt[:, -1] 113 | dim = track.shape[1] 114 | l_track = np.sqrt(np.sum(np.diff(track, axis=0) ** 2, axis=1)) 115 | l_track_cum = np.hstack(([0], np.cumsum(l_track))) 116 | idx_kp = np.where(tn >= 0)[0] 117 | 118 | interp_ch = [] 119 | chn = np.floor(tn[idx_kp[0]]).astype(int) 120 | interp_ch.append([*track[idx_kp[0]], chn]) 121 | if abs(chn - tn[idx_kp[0]]) > 1e-6: 122 | chn += 1 123 | 124 | seg_interval = [] 125 | for i in range(1, len(idx_kp)): 126 | # calculate actual interval between known-channel points 127 | istart, iend = idx_kp[i - 1], idx_kp[i] 128 | n_chn_kp = tn[iend] - tn[istart] 129 | d_interp = (l_track_cum[iend] - l_track_cum[istart]) / n_chn_kp 130 | seg_interval.append([tn[istart], tn[iend], d_interp]) 131 | 132 | l_res = 0 # remaining fiber length before counting the next segment 133 | # consider if the given channelnumber is not an integer 134 | chn_res = tn[istart] - int(tn[istart]) 135 | if d_interp == 0: 136 | while chn < int(tn[iend]): 137 | chn += 1 138 | interp_ch.append([*track[istart, :], chn]) 139 | continue 140 | for j in range(istart, iend): 141 | l_start = l_track[j] + l_res 142 | 143 | # if tp segment length is large for more than one interval, get the 144 | # channel loc 145 | if l_start >= d_interp * (1 - chn_res - 1e-6): 146 | # floor int, num of channel available 147 | n_chn_tp = int(l_start / d_interp + chn_res) 148 | l_new = (np.arange(n_chn_tp) + 1 - chn_res) * d_interp - \ 149 | l_res # channel distance from segment start 150 | 151 | # interpolate the channel loc 152 | t_new = np.zeros((len(l_new), dim)) 153 | for d in range(dim): 154 | t_new[:, d] = np.interp(l_new, [0, l_track[j]], 155 | [track[j, d], track[j + 1, d]]) 156 | 157 | # remaining length to add to next segment 158 | l_res = l_start - n_chn_tp * d_interp 159 | 160 | # write interpolated channel loc 161 | for ti in t_new: 162 | chn += 1 163 | interp_ch.append([*ti, chn]) 164 | 165 | # handle floor int problem when l_start/d_interp is near an 166 | # interger 167 | if (d_interp - l_res) / d_interp < 1e-6: 168 | chn += 1 169 | interp_ch.append([*track[j + 1, :], int(tn[j + 1])]) 170 | l_res = 0 171 | chn_res = 0 172 | # if tp segment length is not enough for one interval, simply add 173 | # the length to next segment 174 | elif l_start < d_interp: 175 | l_res = l_start 176 | 177 | if abs(tn[iend] - int(tn[iend])) > 1e-6: 178 | chn += 1 179 | interp_ch.append([*track[iend, :], chn]) 180 | 181 | return np.array(seg_interval), np.array(interp_ch) 182 | 183 | 184 | def location_interpolation(known_pt, track_pt=None, dx=2, data_type='lonlat', 185 | verbose=False): 186 | """ 187 | Interpolate to obtain the positions of all channels. 188 | 189 | :param known_pt: np.ndarray. Points with known channel numbers. Each row 190 | includes 2 or 3 coordinates and a channel number. 191 | :param track_pt: np.ndarray. Optional fiber spatial track points without 192 | channel numbers. Each row includes 2 or 3 coordinates. Please ensure 193 | that the track points are arranged in increasing order of track number. 194 | If track points is not dense enough, please insert the coordinates of 195 | known points into track points in order. 196 | :param dx: Known points far from the track (> dx) will be excluded. 197 | Recommended setting is channel interval. The unit is m. 198 | :param data_type: str. Coordinate type. 'lonlat' ('lonlatheight') for 199 | longitude, latitude in degree (and height in meters), 'xy' ('xyz') for 200 | x, y (and z) in meters. 201 | :param verbose: bool. If True, return interpoleted channel location and 202 | segment interval. 203 | :return: Interpoleted channel location if verbose is False. 204 | """ 205 | known_pt = known_pt[known_pt[:,-1].argsort()] 206 | dim = known_pt.shape[1] - 1 207 | if 'lonlat' in data_type: 208 | zone = np.floor((max(known_pt[:,0]) + min(known_pt[:,0])) / 2 / 6)\ 209 | .astype(int) + 31 210 | DASProj = Proj(proj='utm', zone=zone, ellps='WGS84', 211 | preserve_units=False) 212 | known_pt[:, 0], known_pt[:, 1] = DASProj(known_pt[:, 0], known_pt[:, 1]) 213 | else: 214 | assert 'xy' in data_type, ('data_type should be \'lonlat\',\'' 215 | 'lonlatheight\', \'xy\' or \'xyz\'') 216 | 217 | if track_pt is None: 218 | seg_interval, interp_ch = _channel_location(known_pt) 219 | else: 220 | K = len(known_pt) 221 | T = len(track_pt) 222 | track_pt = np.c_[track_pt, np.zeros(T) - 1] 223 | if 'lonlat' in data_type: 224 | track_pt[:, 0], track_pt[:, 1] = DASProj(track_pt[:, 0], 225 | track_pt[:, 1]) 226 | 227 | # insert the known points into the fiber track data 228 | matrix = [np.tile(track_pt[:, d], (K, 1)) - 229 | np.tile(known_pt[:, d], (T, 1)).T for d in range(dim)] 230 | 231 | dist = np.sqrt(np.sum(np.array(matrix) ** 2, axis=0)) 232 | for k in range(K): 233 | if min(dist[k]) < dx: 234 | t_list = np.sort(np.where(dist[k] == min(dist[k]))[0]) 235 | for t in t_list: 236 | if track_pt[t, -1] == -1: 237 | track_pt[t, -1] = known_pt[k, -1] 238 | last_pt = t 239 | break 240 | 241 | # interpolation with regular spacing along the fiber track 242 | try: 243 | track_pt = track_pt[:last_pt + 1] 244 | except NameError: 245 | print('All known points are too far away from the track points. If ' 246 | 'they are reliable, they can be merged in sequence as track ' 247 | 'points to input') 248 | return None 249 | 250 | seg_interval, interp_ch = _channel_location(track_pt) 251 | 252 | if data_type == 'lonlat': 253 | interp_ch[:, 0], interp_ch[:, 1] = \ 254 | DASProj(interp_ch[:, 0], interp_ch[:, 1], inverse=True) 255 | 256 | if verbose: 257 | return interp_ch, seg_interval 258 | return interp_ch 259 | 260 | 261 | def _xcorr(x, y): 262 | N = len(x) 263 | meanx = np.mean(x) 264 | meany = np.mean(y) 265 | stdx = np.std(np.asarray(x)) 266 | stdy = np.std(np.asarray(y)) 267 | c = np.sum((y - meany) * (x - meanx)) / (N * stdx * stdy) 268 | return c 269 | 270 | 271 | def _horizontal_angle_change(geo, gap=10): 272 | nch = len(geo) 273 | angle = np.zeros(nch) 274 | for i in range(1, nch - 1): 275 | lon, lat = geo[i] 276 | lon_s, lat_s = geo[max(i - gap, 0)] 277 | lon_e, lat_e = geo[min(i + gap, nch - 1)] 278 | azi_s = Geodesic.WGS84.Inverse(lat_s, lon_s, lat, lon)['azi1'] 279 | azi_e = Geodesic.WGS84.Inverse(lat, lon, lat_e, lon_e)['azi1'] 280 | dazi = azi_e - azi_s 281 | if abs(dazi) > 180: 282 | dazi = -np.sign(dazi) * (360 - abs(dazi)) 283 | angle[i] = dazi 284 | 285 | return angle 286 | 287 | 288 | def _vertical_angle_change(geo, gap=10): 289 | nch = len(geo) 290 | angle = np.zeros(nch) 291 | for i in range(1, nch - 1): 292 | lon, lat, dep = geo[i] 293 | lon_s, lat_s, dep_s = geo[max(i - gap, 0)] 294 | lon_e, lat_e, dep_e = geo[min(i + gap, nch - 1)] 295 | s12_s = Geodesic.WGS84.Inverse(lat_s, lon_s, lat, lon)['s12'] 296 | theta_s = np.arctan((dep - dep_s) / s12_s) / np.pi * 180 297 | s12_e = Geodesic.WGS84.Inverse(lat, lon, lat_e, lon_e)['s12'] 298 | theta_e = np.arctan((dep_e - dep) / s12_e) / np.pi * 180 299 | angle[i] = theta_e - theta_s 300 | 301 | return angle 302 | 303 | 304 | def _local_maximum_indexes(data, thresh): 305 | idx = np.where(data > thresh)[0] 306 | if len(idx): 307 | i = list(np.where(np.diff(idx) > 1)[0] + 1) 308 | if len(idx) - 1 not in i: 309 | i.append(len(idx) - 1) 310 | b = 0 311 | max_idx = [] 312 | for e in i: 313 | max_idx.append(idx[b] + np.argmax(data[idx[b]:idx[e]])) 314 | b = e 315 | return max_idx 316 | else: 317 | return [] 318 | 319 | 320 | def turning_points(data, data_type='coordinate', thresh=5, depth_info=False, 321 | channel_gap=3): 322 | """ 323 | Seek turning points in the DAS channel. 324 | 325 | :param data: numpy.ndarray. Data used to seek turning points. 326 | :param data_type: str. If data_type is 'coordinate', data should include 327 | longitude and latitude (first two columns), and can also include depth 328 | (last column). If data_type is 'waveform', data should be continuous 329 | waveform, preferably containing signal with strong coherence 330 | (earthquake, traffic signal, etc.). 331 | :param thresh: For coordinate data, when the angle of the optical cables on 332 | both sides centered on a certain point exceeds thresh, it is considered 333 | an turning point. For waveform, thresh means the MAD multiple of 334 | adjacent channel cross-correlation values lower than their median. 335 | :param depth_info: bool. Optional if data_type is 'coordinate'. Whether 336 | depth (in meters) is included in the coordinate data and need to be 337 | used. 338 | :param channel_gap: int. Optional if data_type is 'coordinate'. The smaller 339 | the value is, the finer the segmentation will be. It is recommended to 340 | set it to half the ratio of gauge length and channel interval. 341 | :return: list. Channel index of turning points. 342 | """ 343 | if data_type == 'coordinate': 344 | angle = _horizontal_angle_change(data[:, :2], gap=channel_gap) 345 | turning_h = _local_maximum_indexes(abs(angle), thresh) 346 | 347 | if depth_info: 348 | angle = _vertical_angle_change(data, gap=channel_gap) 349 | turning_v = _local_maximum_indexes(abs(angle), thresh) 350 | return turning_h, turning_v 351 | 352 | return turning_h 353 | 354 | elif data_type == 'waveform': 355 | nch = len(data) 356 | cc = np.zeros(nch - 1) 357 | for i in range(nch - 1): 358 | cc[i] = _xcorr(data[i], data[i + 1]) 359 | median = np.median(cc) 360 | mad = np.median(abs(cc - median)) 361 | 362 | return np.argwhere(cc < median - thresh * mad)[0] 363 | 364 | else: 365 | raise ValueError('Data_type should be \'coordinate\' or \'waveform\'.') 366 | 367 | 368 | def channel_spacing(geometry, depth_info=False): 369 | nch = len(geometry) 370 | dist = np.zeros(nch - 1) 371 | for i in range(nch - 1): 372 | lon0, lat0 = geometry[i, :2] 373 | lon1, lat1 = geometry[i+1, :2] 374 | d = Geodesic.WGS84.Inverse(lat0, lon0, lat1, lon1)['s12'] 375 | if depth_info: 376 | dist[i] = np.sqrt(d**2 + (geometry[i+1, 2] - geometry[i, 2]) ** 2) 377 | else: 378 | dist[i] = d 379 | 380 | return dist 381 | 382 | 383 | def distance_to_channels(geometry, points): 384 | """ 385 | Calculate the distance from a point to each channel. 386 | 387 | :param geometry: numpy.ndarray. It needs to consist of two columns ( 388 | longitude, latitude), three columns (longitude, latitude and depth). 389 | :param points: numpy.ndarray. A array consisting of longitude and 390 | latitude or longitude, latitude and depth. 391 | :return: numpy.ndarray. The distance from the given point to each channel. 392 | """ 393 | if geometry.shape[1] == 3: 394 | depth_info = True 395 | else: 396 | depth_info = False 397 | 398 | nch = len(geometry) 399 | points = np.array(points) 400 | if points.ndim == 1: 401 | points = points.reshape(1, -1) 402 | npt = len(points) 403 | dist = np.zeros((npt, nch)) 404 | for i, pt in enumerate(points): 405 | for j, geo in enumerate(geometry): 406 | d = Geodesic.WGS84.Inverse(pt[1], pt[0], geo[1], geo[0])['s12'] 407 | if depth_info: 408 | dist[i, j] = np.sqrt(d**2 + (pt[2] - geo[2]) ** 2) 409 | else: 410 | dist[i, j] = d 411 | return dist 412 | 413 | 414 | def closest_channel_to_point(geometry, points, verbose=False): 415 | """ 416 | Find the channel number closest to a given point. 417 | 418 | :param geometry: numpy.ndarray. It needs to consist of longitude, latitude 419 | (and depth) or channel number, longitude, latitude (and depth). 420 | :param points: numpy.ndarray. A tuple consisting of longitude and 421 | latitude (and depth). 422 | :param verbose: bool. Return the channel and the distance to the closest 423 | channel if True. 424 | :return: int. The channel number closest to the given point. 425 | """ 426 | nch = len(geometry) 427 | if points.shape[1] == geometry.shape[1]: 428 | channels = np.arange(nch).astype(int) 429 | else: 430 | geometry = geometry[geometry[:, 0].argsort()] 431 | channels = geometry[:, 0].astype(int) 432 | geometry = geometry[:, 1:] 433 | 434 | dist = distance_to_channels(points, geometry) 435 | closest_index = np.argmin(dist, axis=1) 436 | if verbose: 437 | return channels[closest_index], np.min(dist, axis=1) 438 | return channels[closest_index] 439 | 440 | 441 | def _equally_spacing(dist, dx): 442 | index = [[], []] 443 | residual = [0, abs(dist[0]-dx)] 444 | for i in range(2, len(dist)+1): 445 | res = [] 446 | for j in range(i): 447 | res.append(residual[j] + abs(dx - sum(dist[j:i]))) 448 | residual.append(min(res)) 449 | k = np.argmin(res) 450 | if k > 0: 451 | index.append(index[k] + [k]) 452 | else: 453 | index.append(index[k]) 454 | 455 | return index[-1] 456 | 457 | 458 | def equally_spaced_channels(geometry, dx, depth_info=False, verbose=False): 459 | """ 460 | Find equally spaced channel numbers based on known DAS latitude and 461 | longitude. 462 | 463 | :param geometry: numpy.ndarray. DAS geometry used to filter equally spaced 464 | channels. It needs to consist of longitude, latitude (and depth) or 465 | channel number, longitude, latitude (and depth). 466 | :param dx: Channel interval. 467 | :param depth_info: bool. Whether depth (in meters) is included in the 468 | geometry and needed to be used. 469 | :param verbose: bool. If True, return channel numbers for equally spaced 470 | channels and channel intervals. 471 | :return: Channel numbers for equally spaced channels if verbose is False. 472 | """ 473 | nch = len(geometry) 474 | if geometry.shape[1] == 2 + int(depth_info): 475 | channels = np.arange(nch).astype(int) 476 | else: 477 | geometry = geometry[geometry[:, 0].argsort()] 478 | channels = geometry[:, 0].astype(int) 479 | geometry = geometry[:, 1:] 480 | 481 | dist = channel_spacing(geometry, depth_info=False) 482 | 483 | s = 0 484 | idx_equal = [0] 485 | for i in range(nch-2): 486 | if dist[i] > dx * 2: 487 | e = i 488 | if e == s + 1: 489 | idx_equal.append(e) 490 | elif e >= s + 2: 491 | idx_equal.extend([idx + s for idx in 492 | _equally_spacing(dist[s:e], dx)]) 493 | idx_equal.append(e) 494 | s = e + 1 495 | idx_equal.append(s) 496 | elif dist[i] + dist[i+1] > dx * 1.5: 497 | e = i + 1 498 | if e == s + 1: 499 | idx_equal.append(e) 500 | elif e >= s + 2: 501 | idx_equal.extend([idx + s for idx in 502 | _equally_spacing(dist[s:e], dx)]) 503 | idx_equal.append(e) 504 | s = e 505 | e = nch - 1 506 | if e == s + 1: 507 | idx_equal.append(e) 508 | elif e >= s + 2: 509 | idx_equal.extend([idx + s for idx in 510 | _equally_spacing(dist[s:e], dx)]) 511 | idx_equal.append(e) 512 | 513 | return channels[idx_equal] -------------------------------------------------------------------------------- /daspy/advanced_tools/decomposition.py: -------------------------------------------------------------------------------- 1 | # Purpose: Waveform decomposition 2 | # Author: Minzhe Hu 3 | # Date: 2024.5.13 4 | # Email: hmz2018@mail.ustc.edu.cn 5 | import numpy as np 6 | from numpy.fft import irfft2, ifftshift 7 | from daspy.basic_tools.preprocessing import padding, cosine_taper 8 | from daspy.basic_tools.freqattributes import next_pow_2, fk_transform 9 | from daspy.advanced_tools.denoising import curvelet_denoising 10 | 11 | 12 | def fk_fan_mask(f, k, fmin=None, fmax=None, kmin=None, kmax=None, vmin=None, 13 | vmax=None, edge=0.1, flag=None): 14 | """ 15 | Make a fan mask in f-k domain for f-k filter. 16 | 17 | :param f: Frequency sequence. 18 | :param k: Wavenumber sequence. 19 | :param fmin, fmax, kmin, kmax, vmin, vmax: float or or sequence of 2 floats. 20 | Sequence of 2 floats represents the start and end of taper. 21 | :param edge: float. The width of fan mask taper edge. 22 | :param flag: -1 keep only negative apparent velocities, 0 keep both postive 23 | and negative apparent velocities, 1 keep only positive apparent 24 | velocities. 25 | :return: Fan mask. 26 | """ 27 | ff = np.tile(f, (len(k), 1)) 28 | kk = np.tile(k, (len(f), 1)).T 29 | vv = - np.divide(ff, kk, out=np.ones_like(ff) * 1e10, where=kk != 0) 30 | mask = np.ones(vv.shape) 31 | for phy_quan in ['f', 'k', 'v']: 32 | p = eval(phy_quan * 2) 33 | pmin = eval(phy_quan + 'min') 34 | if pmin: 35 | if isinstance(pmin, (tuple, list, np.ndarray)): 36 | tp_b, tp_e = min(pmin), max(pmin) 37 | else: 38 | tp_b, tp_e = pmin * max(1 - edge / 2, 0), pmin * (1 + edge / 2) 39 | tp_wid = tp_e - tp_b 40 | mask[(abs(p) <= tp_b)] = 0 41 | area = (abs(p) > tp_b) & (abs(p) < tp_e) 42 | mask[area] *= 0.5 - 0.5 * \ 43 | np.cos(((abs(p[area]) - tp_b) / tp_wid) * np.pi) 44 | 45 | pmax = eval(phy_quan + 'max') 46 | if pmax: 47 | if isinstance(pmax, (tuple, list, np.ndarray)): 48 | tp_b, tp_e = max(pmax), min(pmax) 49 | else: 50 | tp_b, tp_e = pmax * (1 + edge / 2), pmax * (1 - edge / 2) 51 | tp_wid = tp_b - tp_e 52 | mask[(abs(p) >= tp_b)] = 0 53 | area = (abs(p) > tp_e) & (abs(p) < tp_b) 54 | mask[area] *= 0.5 - 0.5 * \ 55 | np.cos(((tp_b - abs(p[area])) / tp_wid) * np.pi) 56 | 57 | if flag: 58 | mask[np.sign(vv) == flag] = 0 59 | return mask 60 | 61 | 62 | def fk_filter(data, dx, fs, taper=(0.02, 0.05), pad='default', mode='decompose', 63 | fmin=None, fmax=None, kmin=None, kmax=None, vmin=None, vmax=None, 64 | edge=0.1, flag=None, verbose=False): 65 | """ 66 | Transform the data to the f-k domain using 2-D Fourier transform method, and 67 | transform back to the x-t domain after filtering. 68 | 69 | :param data: numpy.ndarray. Data to do fk filter. 70 | :param dx: Channel interval in m. 71 | :param fs: Sampling rate in Hz. 72 | :param taper: float or sequence of floats. Each float means decimal 73 | percentage of Tukey taper for corresponding dimension (ranging from 0 to 74 | 1). Default is 0.1 which tapers 5% from the beginning and 5% from the 75 | end. 76 | :param pad: Pad the data or not. It can be float or sequence of floats. Each 77 | float means padding percentage before FFT for corresponding dimension. 78 | If set to 0.1 will pad 5% before the beginning and after the end. 79 | 'default' means pad both dimensions to next power of 2. None or False 80 | means don't pad data before or during Fast Fourier Transform. 81 | :param mode: str. 'remove' for denoising, 'retain' for extraction, and 82 | 'decompose' for decomposition. 83 | :param fmin, fmax, kmin, kmax, vmin, vmax: float or or sequence of 2 floats. 84 | Sequence of 2 floats represents the start and end of taper. 85 | :param edge: float. The width of fan mask taper edge. 86 | :param flag: -1 keep only negative apparent velocities, 0 keep both postive 87 | and negative apparent velocities, 1 keep only positive apparent 88 | velocities. 89 | :param verbose: If True, return filtered data, f-k spectrum, frequency 90 | sequence, wavenumber sequence and f-k mask. 91 | :return: Filtered data and some variables in the process if verbose==True. 92 | """ 93 | data_tp = cosine_taper(data, taper) 94 | if pad == 'default': 95 | nch, nt = data.shape 96 | dn = (next_pow_2(nch) - nch, next_pow_2(nt) - nt) 97 | nfft = None 98 | elif pad is None or pad is False: 99 | dn = 0 100 | nfft = None 101 | else: 102 | dn = np.round(np.array(pad) * data.shape).astype(int) 103 | nfft = 'default' 104 | 105 | data_pd = padding(data_tp, dn) 106 | nch, nt = data_pd.shape 107 | 108 | fk, f, k = fk_transform(data_pd, dx, fs, taper=0, nfft=nfft) 109 | 110 | mask = fk_fan_mask(f, k, fmin, fmax, kmin, kmax, vmin, vmax, edge=edge, 111 | flag=flag) 112 | 113 | if mode == 'remove': 114 | mask = 1 - mask 115 | 116 | if mode == 'decompose': 117 | data_flt1 = irfft2(ifftshift(fk * mask, axes=0)).real[:nch, :nt] 118 | data_flt1 = padding(data_flt1, dn, reverse=True) 119 | data_flt2 = irfft2(ifftshift(fk * (1 - mask), axes=0)).real[:nch, :nt] 120 | data_flt2 = padding(data_flt2, dn, reverse=True) 121 | if verbose: 122 | return data_flt1, data_flt2, fk, f, k, mask 123 | else: 124 | return data_flt1, data_flt2 125 | else: 126 | data_flt = irfft2(ifftshift(fk * mask, axes=0)).real[:nch, :nt] 127 | data_flt = padding(data_flt, dn, reverse=True) 128 | if verbose: 129 | return data_flt, fk, f, k, mask 130 | else: 131 | return data_flt 132 | 133 | 134 | def curvelet_windowing(data, dx, fs, mode='decompose', vmin=0, vmax=np.inf, 135 | flag=None, pad=0.3, scale_begin=3, nbscales=None, 136 | nbangles=16, finest=1): 137 | """ 138 | Use curevelet transform to keep cooherent signal with certain velocity 139 | range. {Atterholt et al., 2022 , Geophys. J. Int.} 140 | 141 | :param data: numpy.ndarray. Data to decomposite. 142 | :param dx: Channel interval in m. 143 | :param fs: Sampling rate in Hz. 144 | :param mode: str. 'remove' for denoising, 'retain' for extraction, and 145 | 'decompose' for decomposition. 146 | :param vmin, vmax: float. Velocity range in m/s. 147 | :param flag: -1 keep only negative apparent velocities, 0 keep both postive 148 | and negative apparent velocities, 1 keep only positive apparent 149 | velocities. 150 | :param pad: float or sequence of floats. Each float means padding percentage 151 | before FFT for corresponding dimension. If set to 0.1 will pad 5% before 152 | the beginning and after the end. 153 | :param scale_begin: int. The beginning scale to do coherent denoising. 154 | :param nbscales: int. Number of scales including the coarsest wavelet level. 155 | Default set to ceil(log2(min(M,N)) - 3). 156 | :param nbangles: int. Number of angles at the 2nd coarsest level, 157 | minimum 8, must be a multiple of 4. 158 | :param finest: int. Objects at the finest scale. 1 for curvelets, 2 for 159 | wavelets. Curvelets are more precise while wavelets are more efficient. 160 | :return: numpy.ndarray. Decomposed data. 161 | """ 162 | return curvelet_denoising(data, choice=1, pad=pad, vmin=vmin, vmax=vmax, 163 | flag=flag, dx=dx, fs=fs, mode=mode, 164 | scale_begin=scale_begin, nbscales=nbscales, 165 | nbangles=nbangles, finest=finest) 166 | -------------------------------------------------------------------------------- /daspy/advanced_tools/denoising.py: -------------------------------------------------------------------------------- 1 | # Purpose: Remove noise from data 2 | # Author: Minzhe Hu, Zefeng Li 3 | # Date: 2024.5.13 4 | # Email: hmz2018@mail.ustc.edu.cn 5 | import numpy as np 6 | from copy import deepcopy 7 | from scipy.ndimage import median_filter 8 | from scipy.interpolate import interp1d 9 | from daspy.basic_tools.preprocessing import padding 10 | from daspy.advanced_tools.fdct import fdct_wrapping, ifdct_wrapping 11 | 12 | 13 | def spike_removal(data, nch=50, nsp=5, thresh=10): 14 | """ 15 | Use a median filter to remove high-strain spikes in the data. Modified from 16 | https://github.com/atterholt/curvelet-denoising/blob/main/MedianFilter.m 17 | 18 | :param data: numpy.ndarray. Data to remove spikes from. 19 | :param nch: int. Number of channels over which to compute the median. 20 | :param nsp: int. Number of sampling points over which to compute the median. 21 | :param thresh: Ratio threshold over the median over which a number is 22 | considered to be an outlier. 23 | :return: numpy.ndarray. Data with spikes removed. 24 | """ 25 | absdata = np.abs(data) 26 | 27 | medians1 = median_filter(absdata, (nch, 1)) 28 | medians = median_filter(medians1, (1, nsp)) 29 | ratio = absdata / medians # comparisons matrix 30 | 31 | # find the bad values and interpolate with their neighbors 32 | data_dn = data.copy() 33 | out_i, out_j = np.where(ratio > thresh) 34 | for j in set(out_j): 35 | bch = out_i[out_j == j] 36 | gch = list(set(range(len(data))) - set(bch)) 37 | f = interp1d(gch, data[gch, j], bounds_error=False, 38 | fill_value=(data[gch[0], j], data[gch[-1], j])) 39 | data_dn[bch, j] = f(bch) 40 | 41 | return data_dn 42 | 43 | 44 | def common_mode_noise_removal(data, method='median'): 45 | """ 46 | Remove common mode noise (sometimes called horizontal noise) from data. 47 | 48 | :param data: numpy.ndarray. Data to remove common mode noise. 49 | :param method:str. Method for extracting commmon mode noise. 'median' or 50 | 'mean' 51 | :return: numpy.ndarray. Denoised data. 52 | """ 53 | nch, nt = data.shape 54 | if method == 'median': 55 | common = np.median(data, 0) 56 | elif method == 'mean': 57 | common = np.mean(data, 0) 58 | 59 | xx = np.sum(common ** 2) 60 | data_dn = np.zeros((nch, nt)) 61 | for i in range(nch): 62 | xc = np.sum(common * data[i]) 63 | data_dn[i] = data[i] - xc / xx * common 64 | 65 | return data_dn 66 | 67 | 68 | def _noise_level(data, finest=2, nbscales=None, nbangles=16, percentile=95): 69 | """ 70 | Find threshold for curvelet denoising with noise record. 71 | 72 | :param data: numpy.ndarray. Noise data. 73 | :param nbscales: int. Number of scales including the coarsest wavelet level. 74 | Default set to ceil(log2(min(M,N)) - 3). 75 | :param nbangles: int. Number of angles at the 2nd coarsest level, 76 | minimum 8, must be a multiple of 4. 77 | :param percentile: number. The threshold is taken as this percentile of the 78 | curvelet coefficient of the noise record 79 | :return: 2-D list. Threshold for curvelet coefficients. 80 | """ 81 | C = fdct_wrapping(data, is_real=True, finest=finest, nbscales=nbscales, 82 | nbangles_coarse=nbangles) 83 | 84 | E_noise = [] 85 | for s in range(len(C)): 86 | E_noise.append([]) 87 | for w in range(len(C[s])): 88 | threshold = np.percentile(abs(C[s][w]), percentile) 89 | E_noise[s].append(threshold) 90 | 91 | return E_noise 92 | 93 | 94 | def _knee_points(C, factor=0.2): 95 | """ 96 | Find threshold for curvelet denoising without noise record. 97 | 98 | :param C: 2-D list of np.ndarray. Array of curvelet coefficients. 99 | :param factor: float. Multiplication factor from 0 to 1. Small factor 100 | corresponds to conservative strategy. 101 | :return: 2-D list. Threshold for curvelet coefficients. 102 | """ 103 | E_knee = [] 104 | for s in range(len(C)): 105 | E_knee.append([]) 106 | for w in range(len(C[s])): 107 | F, x = np.histogram(abs(C[s][w]), density=True) 108 | x = (x[1:] + x[:-1]) / 2 109 | F = np.cumsum(F) / np.sum(F) 110 | slope = (x[-1] - x[0]) / (F[-1] - F[0]) 111 | tiltedplot = x - (slope * F) 112 | idx = np.argmin(tiltedplot) 113 | E_knee[s].append(x[idx] * factor) 114 | 115 | return E_knee 116 | 117 | 118 | def _velocity_bin(nbangles, fs, dx): 119 | v_bounds = np.zeros(nbangles // 4 + 1) 120 | half = nbangles // 8 121 | v_bounds[half] = fs * dx 122 | np.seterr(divide='ignore') 123 | for i in range(half): 124 | v_bounds[i] = i / half * fs * dx 125 | v_bounds[half + i + 1] = np.divide(fs * dx, 1 - (i + 1) / half) 126 | 127 | np.seterr(divide='warn') 128 | v_lows = list(range(half - 1, -1, -1)) + list(range(half * 2)) + \ 129 | list(range(2 * half - 1, half - 1, -1)) 130 | velocity = [] 131 | for i in range(nbangles // 2): 132 | v_low = v_bounds[v_lows[i]] 133 | v_high = v_bounds[v_lows[i] + 1] 134 | velocity.append([v_low, v_high]) 135 | velocity = np.array(velocity * 2) 136 | for i in range(half): 137 | velocity[i] = -1 * velocity[i][::-1] 138 | velocity[3 * half + i] = -1 * velocity[3 * half + i][::-1] 139 | velocity[4 * half + i] = -1 * velocity[4 * half + i][::-1] 140 | velocity[7 * half + i] = -1 * velocity[7 * half + i][::-1] 141 | return velocity 142 | 143 | 144 | def _mask_factor(velocity, vmin, vmax, flag=0): 145 | if flag: 146 | if flag == -1: 147 | vmin = -vmax 148 | vmax = -vmin 149 | else: 150 | half = len(velocity) // 8 151 | for i in range(half): 152 | velocity[i] = -1 * velocity[i][::-1] 153 | velocity[3 * half + i] = -1 * velocity[3 * half + i][::-1] 154 | velocity[4 * half + i] = -1 * velocity[4 * half + i][::-1] 155 | velocity[7 * half + i] = -1 * velocity[7 * half + i][::-1] 156 | 157 | factors = np.zeros(len(velocity)) 158 | for i, (v_low, v_high) in enumerate(velocity): 159 | v1 = max(v_low, vmin) 160 | v2 = min(v_high, vmax) 161 | if v1 < v2: 162 | if v_high == np.inf or v_low == -np.inf: 163 | factors[i] = 1 164 | else: 165 | factors[i] = np.divide(v2 - v1, v_high - v_low) 166 | 167 | return factors 168 | 169 | 170 | def curvelet_denoising(data, choice=0, pad=0.3, noise=None, noise_perc=95, 171 | knee_fac=0.2, soft_thresh=True, vmin=0, vmax=np.inf, 172 | flag=0, dx=None, fs=None, mode='remove', 173 | scale_begin=3, nbscales=None, nbangles=16, finest=2): 174 | """ 175 | Use curevelet transform to filter stochastic or/and coherent noise. 176 | Modified from 177 | https://github.com/atterholt/curvelet-denoising/blob/main/CurveletDenoising.m 178 | {Atterholt et al., 2022 , Geophys. J. Int.} 179 | 180 | :param data: numpy.ndarray. Data to denoise. 181 | :param choice: int. 0 for Gaussian denoising using soft thresholding, 1 for 182 | velocity filtering using the standard FK methodology and 2 for both. 183 | :param pad: float or sequence of floats. Each float means padding percentage 184 | before FFT for corresponding dimension. If set to 0.1 will pad 5% before 185 | the beginning and after the end. 186 | :param noise: numpy.ndarray or daspy.Section. Noise record as reference. 187 | :param noise_perc: number. The threshold is taken as this percentile of the 188 | curvelet coefficient of the noise record. (only used when noise is 189 | specified) 190 | :param knee_fac: float. Multiplication factor from 0 to 1. Small factor 191 | corresponds to conservative strategy. (only used when noise is not 192 | specified) 193 | :param soft_thresh: bool. True for soft thresholding and False for hard 194 | thresholding. 195 | :param vmin, vmax: float. Velocity range in m/s. 196 | :param flag: -1 choose only negative apparent velocities, 0 choose both 197 | postive and negative apparent velocities, 1 choose only positive 198 | apparent velocities. 199 | :param dx: Channel interval in m. 200 | :param fs: Sampling rate in Hz. 201 | :param mode: str. Only available when choice in (1,2). 'remove' for 202 | denoising, 'retain' for extraction, and 'decompose' for decomposition. 203 | :param scale_begin: int. The beginning scale to do coherent denoising. 204 | :param nbscales: int. Number of scales including the coarsest wavelet level. 205 | Default set to ceil(log2(min(M,N)) - 3). 206 | :param nbangles: int. Number of angles at the 2nd coarsest level, 207 | minimum 8, must be a multiple of 4. 208 | :param finest: int. Objects at the finest scale. 1 for curvelets, 2 for 209 | wavelets. Curvelets are more precise while wavelets are more efficient. 210 | :return: numpy.ndarray. Denoised data. 211 | """ 212 | if pad is None or pad is False: 213 | pad = 0 214 | dn = np.round(np.array(pad) * data.shape).astype(int) 215 | data_pd = padding(data, dn) 216 | 217 | C = fdct_wrapping(data_pd, is_real=True, finest=finest, nbscales=nbscales, 218 | nbangles_coarse=nbangles) 219 | 220 | # apply Gaussian denoising 221 | if choice in (0, 2): 222 | # define threshold 223 | if noise is None: 224 | E = _knee_points(C, factor=knee_fac) 225 | else: 226 | if not isinstance(noise, np.ndarray): 227 | noise = noise.data 228 | noise_pd = padding(noise, 229 | np.array(data_pd.shape) - np.array(noise.shape)) 230 | E = _noise_level(noise_pd, finest=finest, nbscales=nbscales, 231 | nbangles=nbangles, percentile=noise_perc) 232 | for s in range(1, len(C)): 233 | for w in range(len(C[s])): 234 | # first do a hard threshold 235 | C[s][w] = C[s][w] * (abs(C[s][w]) > abs(E[s][w])) 236 | if soft_thresh: 237 | # soften the existing coefficients 238 | C[s][w] = np.sign(C[s][w]) * (abs(C[s][w]) - abs(E[s][w])) 239 | 240 | # apply velocity filtering 241 | if choice in (1, 2): 242 | if dx is None or fs is None: 243 | raise ValueError('Please set both dx and fs.') 244 | 245 | if mode == 'decompose': 246 | lst = list(range(scale_begin - 1)) 247 | if finest == 2: 248 | lst.append(len(C) - 1) 249 | for s in lst: 250 | for w in range(len(C[s])): 251 | C[s][w] /= 2 252 | C_rt = deepcopy(C) 253 | 254 | for s in range(scale_begin - 1, len(C) - finest + 1): 255 | nbangles = len(C[s]) 256 | velocity = _velocity_bin(nbangles, fs, dx) 257 | factors = _mask_factor(velocity, vmin, vmax, flag=flag) 258 | for w in range(nbangles): 259 | if mode == 'retain': 260 | C[s][w] *= factors[w] 261 | elif mode == 'remove': 262 | C[s][w] *= 1 - factors[w] 263 | elif mode == 'decompose': 264 | C[s][w] *= factors[w] 265 | C_rt[s][w] *= 1 - factors[w] 266 | 267 | # perform the inverse curvelet transform 268 | data_dn = padding(ifdct_wrapping(C, is_real=True, size=data_pd.shape), dn, 269 | reverse=True) 270 | 271 | if mode == 'decompose': 272 | data_n = padding(ifdct_wrapping(C_rt, is_real=True, size=data_pd.shape), 273 | dn, reverse=True) 274 | return data_dn, data_n 275 | else: 276 | return data_dn -------------------------------------------------------------------------------- /daspy/advanced_tools/fdct.py: -------------------------------------------------------------------------------- 1 | # Purpose: Fast Discrete Curvelet Transform 2 | # Author: Minzhe Hu 3 | # Date: 2024.4.11 4 | # Email: hmz2018@mail.ustc.edu.cn 5 | # Modified from 6 | # http://www.curvelet.org/download-secure.php?file=CurveLab-2.1.3.tar.gz 7 | # (matlab version) 8 | import numpy as np 9 | from numpy.fft import fftshift, ifftshift, fft2, ifft2 10 | 11 | 12 | def _round(x): 13 | return np.round(x).astype(int) 14 | 15 | 16 | def _floor(x): 17 | return np.floor(x).astype(int) 18 | 19 | 20 | def _ceil(x): 21 | return np.ceil(x).astype(int) 22 | 23 | 24 | def fdct_wrapping_window(x): 25 | """ 26 | Creates the two halves of a C**inf compactly supported window. 27 | 28 | :param x: vector or matrix of abscissae, the relevant ones from 0 to 1. 29 | :return: vector or matrix containing samples of the left, resp. right half 30 | of the window. 31 | """ 32 | 33 | # Initialize the variables 34 | wr = np.zeros_like(x) 35 | wl = np.zeros_like(x) 36 | 37 | # Set values close to zero to zero 38 | x[np.abs(x) < 2**-52] = 0 39 | 40 | # Calculate wr and wl 41 | wr[(x > 0) & (x < 1)] = np.exp( 42 | 1 - 1. / (1 - np.exp(1 - 1. / x[(x > 0) & (x < 1)]))) 43 | wr[x <= 0] = 1 44 | wl[(x > 0) & (x < 1)] = np.exp( 45 | 1 - 1. / (1 - np.exp(1 - 1. / (1 - x[(x > 0) & (x < 1)])))) 46 | wl[x >= 1] = 1 47 | 48 | # Normalize wr and wl 49 | normalization = np.sqrt(wl**2 + wr**2) 50 | wr = wr / normalization 51 | wl = wl / normalization 52 | 53 | return wl, wr 54 | 55 | 56 | def fdct_wrapping(x, is_real=False, finest=2, 57 | nbscales=None, nbangles_coarse=16): 58 | """ 59 | Fast Discrete Curvelet Transform via wedge wrapping. 60 | 61 | :param x: np.array. M-by-N matrix. 62 | :param is_real: bool. Type of the transform, False for complex-valued 63 | curvelets and True for real-valued curvelets. 64 | :param finest: int. Chooses one of two possibilities for the coefficients at 65 | the finest level: 1 for curvelets and 2 for wavelets. 66 | :param nbscales: int. Number of scales including the coarsest wavele 67 | level. Default set to ceil(log2(min(M,N)) - 3). 68 | :param nbangles_coarse: int. Number of angles at the 2nd coarsest level, 69 | minimum 8, must be a multiple of 4. 70 | :return: 2-D list of np.ndarray. Array of curvelet coefficients. 71 | C[j][l][k1,k2] is the coefficient at scale j(from finest to coarsest 72 | scale), angle l(starts at the top-left corner and increases clockwise), 73 | position k1, k2(size varies with j and l). If is_real is 1, there are 74 | two types of curvelets, 'cosine' and 'sine'. For a given scale j, the 75 | 'cosine' coefficients are stored in the first two quadrants (low values 76 | of l), the 'sine' coefficients in the last two quadrants (high values of 77 | l). 78 | """ 79 | X = fftshift(fft2(ifftshift(x))) / np.sqrt(x.size) 80 | N1, N2 = X.shape 81 | if nbscales is None: 82 | nbscales = _ceil(np.log2(min(N1, N2)) - 3) 83 | 84 | # Initialization: data structure 85 | nbangles = [1] + [nbangles_coarse * 2 ** ((nbscales - i) // 2) 86 | for i in range(nbscales, 1, -1)] 87 | if finest == 2: 88 | nbangles[-1] = 1 89 | 90 | C = [] 91 | for j in range(nbscales): 92 | C.append([None] * nbangles[j]) 93 | 94 | # Loop: pyramidal scale decomposition 95 | M1 = N1 / 3 96 | M2 = N2 / 3 97 | 98 | if finest == 1: 99 | # Initialization: smooth periodic extension of high frequencies 100 | bigN1 = 2 * _floor(2 * M1) + 1 101 | bigN2 = 2 * _floor(2 * M2) + 1 102 | equiv_index_1 = (_floor(N1 / 2) - _floor(2 * M1) + 103 | np.arange(bigN1)) % N1 104 | equiv_index_2 = (_floor(N2 / 2) - _floor(2 * M2) + 105 | np.arange(bigN2)) % N2 106 | X = X[np.ix_(equiv_index_1, equiv_index_2)] 107 | 108 | window_length_1 = _floor(2 * M1) - _floor(M1) - (N1 % 3 == 0) 109 | window_length_2 = _floor(2 * M2) - _floor(M2) - (N2 % 3 == 0) 110 | coord_1 = np.linspace(0, 1, window_length_1) 111 | coord_2 = np.linspace(0, 1, window_length_2) 112 | wl_1, wr_1 = fdct_wrapping_window(coord_1) 113 | wl_2, wr_2 = fdct_wrapping_window(coord_2) 114 | 115 | lowpass_1 = np.concatenate((wl_1, np.ones(2 * _floor(M1) + 1), wr_1)) 116 | if N1 % 3 == 0: 117 | lowpass_1 = np.concatenate(([0], lowpass_1, [0])) 118 | 119 | lowpass_2 = np.concatenate((wl_2, np.ones(2 * _floor(M2) + 1), wr_2)) 120 | if N2 % 3 == 0: 121 | lowpass_2 = np.concatenate(([0], lowpass_2, [0])) 122 | 123 | lowpass = np.outer(lowpass_1, lowpass_2) 124 | Xlow = X * lowpass 125 | scales = np.arange(nbscales, 1, -1) 126 | 127 | else: 128 | M1 /= 2 129 | M2 /= 2 130 | 131 | window_length_1 = _floor(2 * M1) - _floor(M1) 132 | window_length_2 = _floor(2 * M2) - _floor(M2) 133 | coord_1 = np.linspace(0, 1, window_length_1) 134 | coord_2 = np.linspace(0, 1, window_length_2) 135 | wl_1, wr_1 = fdct_wrapping_window(coord_1) 136 | wl_2, wr_2 = fdct_wrapping_window(coord_2) 137 | 138 | lowpass_1 = np.concatenate((wl_1, np.ones(2 * _floor(M1) + 1), wr_1)) 139 | lowpass_2 = np.concatenate((wl_2, np.ones(2 * _floor(M2) + 1), wr_2)) 140 | lowpass = np.outer(lowpass_1, lowpass_2) 141 | hipass = np.sqrt(1 - lowpass ** 2) 142 | 143 | Xlow_index_1 = np.arange(-_floor(2 * M1), 144 | _floor(2 * M1) + 1) + _ceil((N1 + 1) / 2) - 1 145 | Xlow_index_2 = np.arange(-_floor(2 * M2), 146 | _floor(2 * M2) + 1) + _ceil((N2 + 1) / 2) - 1 147 | Xlow = X[np.ix_(Xlow_index_1, Xlow_index_2)] * lowpass 148 | Xhi = X.copy() 149 | Xhi[np.ix_(Xlow_index_1, Xlow_index_2)] *= hipass 150 | 151 | C[nbscales - 1][0] = fftshift(ifft2(ifftshift(Xhi)) 152 | ) * np.sqrt(Xhi.size) 153 | if is_real: 154 | C[nbscales - 1][0] = C[nbscales - 1][0].real 155 | 156 | scales = np.arange(nbscales - 1, 1, -1) 157 | for j in scales - 1: 158 | M1 /= 2 159 | M2 /= 2 160 | window_length_1 = _floor(2 * M1) - _floor(M1) 161 | window_length_2 = _floor(2 * M2) - _floor(M2) 162 | coord_1 = np.linspace(0, 1, window_length_1) 163 | coord_2 = np.linspace(0, 1, window_length_2) 164 | wl_1, wr_1 = fdct_wrapping_window(coord_1) 165 | wl_2, wr_2 = fdct_wrapping_window(coord_2) 166 | 167 | lowpass_1 = np.concatenate((wl_1, np.ones(2 * _floor(M1) + 1), wr_1)) 168 | lowpass_2 = np.concatenate((wl_2, np.ones(2 * _floor(M2) + 1), wr_2)) 169 | lowpass = np.outer(lowpass_1, lowpass_2) 170 | hipass = np.sqrt(1 - lowpass ** 2) 171 | 172 | Xhi = Xlow.copy() 173 | Xlow_index_1 = np.arange(-_floor(2 * M1), 174 | _floor(2 * M1) + 1) + _floor(4 * M1) 175 | Xlow_index_2 = np.arange(-_floor(2 * M2), 176 | _floor(2 * M2) + 1) + _floor(4 * M2) 177 | Xlow = Xlow[np.ix_(Xlow_index_1, Xlow_index_2)] 178 | Xhi[np.ix_(Xlow_index_1, Xlow_index_2)] = Xlow * hipass 179 | Xlow *= lowpass 180 | 181 | # Loop: angular decomposition 182 | l = -1 183 | nbquadrants = 2 + 2 * (not is_real) 184 | nbangles_perquad = nbangles[j] // 4 185 | for quadrant in range(1, nbquadrants + 1): 186 | M_horiz = (M1, M2)[quadrant % 2] 187 | M_vert = (M2, M1)[quadrant % 2] 188 | wedge_ticks_left = _round( 189 | np.linspace( 190 | 0, 191 | 1, 192 | nbangles_perquad + 193 | 1) * 194 | _floor( 195 | 4 * 196 | M_horiz) + 197 | 1) 198 | wedge_ticks_right = 2 * _floor(4 * M_horiz) + 2 - wedge_ticks_left 199 | if nbangles_perquad % 2: 200 | wedge_ticks = np.concatenate( 201 | (wedge_ticks_left, wedge_ticks_right[::-1])) 202 | else: 203 | wedge_ticks = np.concatenate( 204 | (wedge_ticks_left, wedge_ticks_right[-2::-1])) 205 | 206 | wedge_endpoints = wedge_ticks[1:-1:2] 207 | wedge_midpoints = (wedge_endpoints[:-1] + wedge_endpoints[1:]) / 2 208 | # Left corner wedge 209 | l += 1 210 | first_wedge_endpoint_vert = _round( 211 | _floor(4 * M_vert) / nbangles_perquad + 1) 212 | length_corner_wedge = _floor(4 * M_vert) - _floor(M_vert) + \ 213 | _ceil(first_wedge_endpoint_vert / 4) 214 | Y_corner = np.arange(length_corner_wedge) + 1 215 | XX, YY = np.meshgrid( 216 | np.arange(2 * _floor(4 * M_horiz) + 1) + 1, Y_corner) 217 | width_wedge = wedge_endpoints[1] + wedge_endpoints[0] - 1 218 | slope_wedge = (_floor(4 * M_horiz) + 1 - 219 | wedge_endpoints[0]) / _floor(4 * M_vert) 220 | left_line = _round( 221 | 2 - wedge_endpoints[0] + slope_wedge * (Y_corner - 1)) 222 | wrapped_data = np.zeros( 223 | (length_corner_wedge, width_wedge), dtype=complex) 224 | wrapped_XX = np.zeros( 225 | (length_corner_wedge, width_wedge), dtype=int) 226 | wrapped_YY = np.zeros( 227 | (length_corner_wedge, width_wedge), dtype=int) 228 | first_row = _floor(4 * M_vert) + 2 - \ 229 | _ceil((length_corner_wedge + 1) / 2) + \ 230 | (length_corner_wedge + 1) % 2 * (quadrant - 2 == quadrant % 2) 231 | first_col = _floor(4 * M_horiz) + 2 - _ceil((width_wedge + 1) / 2) \ 232 | + (width_wedge + 1) % 2 * (quadrant - 3 == (quadrant - 3) % 2) 233 | for row in Y_corner - 1: 234 | cols = left_line[row] + \ 235 | (np.arange(width_wedge) - (left_line[row] - first_col)) \ 236 | % width_wedge 237 | admissible_cols = _round(0.5 * (cols + 1 + abs(cols - 1))) - 1 238 | new_row = (row - first_row + 1) % length_corner_wedge 239 | wrapped_data[new_row, :] = Xhi[row, 240 | admissible_cols] * (cols > 0) 241 | wrapped_XX[new_row, :] = XX[row, admissible_cols] 242 | wrapped_YY[new_row, :] = YY[row, admissible_cols] 243 | slope_wedge_right = (_floor(4 * M_horiz) + 1 - 244 | wedge_midpoints[0]) / _floor(4 * M_vert) 245 | mid_line_right = wedge_midpoints[0] + \ 246 | slope_wedge_right * (wrapped_YY - 1) 247 | coord_right = 0.5 + _floor(4 * M_vert) / \ 248 | (wedge_endpoints[1] - wedge_endpoints[0]) * \ 249 | (wrapped_XX - mid_line_right) / \ 250 | (_floor(4 * M_vert) + 1 - wrapped_YY) 251 | C2 = 1 / (1 / (2 * (_floor(4 * M_horiz)) / (wedge_endpoints[0] - 252 | 1) - 1) + 1 / (2 * (_floor(4 * M_vert)) / ( 253 | first_wedge_endpoint_vert - 1) - 1)) 254 | C1 = C2 / (2 * (_floor(4 * M_vert)) / 255 | (first_wedge_endpoint_vert - 1) - 1) 256 | wrapped_XX[(wrapped_XX - 1) / _floor(4 * M_horiz) + 257 | (wrapped_YY - 1) / _floor(4 * M_vert) == 2] += 1 258 | coord_corner = C1 + C2 * ((wrapped_XX - 1) / _floor(4 * M_horiz) - 259 | (wrapped_YY - 1) / _floor(4 * M_vert)) / (2 - 260 | ((wrapped_XX - 1) / _floor(4 * M_horiz) + (wrapped_YY - 1) / 261 | _floor(4 * M_vert))) 262 | wl_left, _ = fdct_wrapping_window(coord_corner) 263 | _, wr_right = fdct_wrapping_window(coord_right) 264 | wrapped_data = wrapped_data * wl_left * wr_right 265 | if not is_real: 266 | wrapped_data = np.rot90(wrapped_data, -(quadrant - 1)) 267 | C[j][l] = fftshift(ifft2(ifftshift(wrapped_data))) * \ 268 | np.sqrt(wrapped_data.size) 269 | else: 270 | wrapped_data = np.rot90(wrapped_data, -(quadrant - 1)) 271 | x = fftshift(ifft2(ifftshift(wrapped_data))) * \ 272 | np.sqrt(wrapped_data.size) 273 | C[j][l] = np.sqrt(2) * x.real 274 | C[j][l + nbangles[j] // 2] = np.sqrt(2) * x.imag 275 | 276 | # Regular wedges 277 | length_wedge = _floor(4 * M_vert) - _floor(M_vert) 278 | Y = np.arange(length_wedge) + 1 279 | first_row = _floor(4 * M_vert) + 2 - _ceil((length_wedge + 1) / 2) \ 280 | + (length_wedge + 1) % 2 * (quadrant - 2 == quadrant % 2) 281 | for subl in range(1, nbangles_perquad - 1): 282 | l += 1 283 | width_wedge = wedge_endpoints[subl + 284 | 1] - wedge_endpoints[subl - 1] + 1 285 | slope_wedge = ((_floor(4 * M_horiz) + 1) - 286 | wedge_endpoints[subl]) / _floor(4 * M_vert) 287 | left_line = _round( 288 | wedge_endpoints[subl - 1] + slope_wedge * (Y - 1)) 289 | wrapped_data = np.zeros( 290 | (length_wedge, width_wedge), dtype=complex) 291 | wrapped_XX = np.zeros((length_wedge, width_wedge), dtype=int) 292 | wrapped_YY = np.zeros((length_wedge, width_wedge), dtype=int) 293 | first_col = _floor(4 * M_horiz) + 2 - \ 294 | _ceil((width_wedge + 1) / 2) + \ 295 | (width_wedge + 1) % 2 * (quadrant - 3 == (quadrant - 3) % 2) 296 | for row in Y - 1: 297 | cols = left_line[row] + (np.arange(width_wedge) - 298 | (left_line[row] - first_col)) % width_wedge - 1 299 | new_row = (row - first_row + 1) % length_wedge 300 | wrapped_data[new_row, :] = Xhi[row, cols] 301 | wrapped_XX[new_row, :] = XX[row, cols] 302 | wrapped_YY[new_row, :] = YY[row, cols] 303 | slope_wedge_left = ((_floor(4 * M_horiz) + 1) - 304 | wedge_midpoints[subl - 1]) / _floor(4 * M_vert) 305 | mid_line_left = wedge_midpoints[subl - 1] + \ 306 | slope_wedge_left * (wrapped_YY - 1) 307 | coord_left = 0.5 + _floor(4 * M_vert) / \ 308 | (wedge_endpoints[subl] - wedge_endpoints[subl - 1]) * \ 309 | (wrapped_XX - mid_line_left) / \ 310 | (_floor(4 * M_vert) + 1 - wrapped_YY) 311 | slope_wedge_right = ((_floor(4 * M_horiz) + 1) - 312 | wedge_midpoints[subl]) / _floor(4 * M_vert) 313 | mid_line_right = wedge_midpoints[subl] + \ 314 | slope_wedge_right * (wrapped_YY - 1) 315 | coord_right = 0.5 + _floor(4 * M_vert) / \ 316 | (wedge_endpoints[subl + 1] - wedge_endpoints[subl]) * \ 317 | (wrapped_XX - mid_line_right) / \ 318 | (_floor(4 * M_vert) + 1 - wrapped_YY) 319 | 320 | wl_left, _ = fdct_wrapping_window(coord_left) 321 | _, wr_right = fdct_wrapping_window(coord_right) 322 | wrapped_data = wrapped_data * wl_left * wr_right 323 | if not is_real: 324 | wrapped_data = np.rot90(wrapped_data, -(quadrant - 1)) 325 | C[j][l] = fftshift(ifft2(ifftshift(wrapped_data))) * \ 326 | np.sqrt(wrapped_data.size) 327 | else: 328 | wrapped_data = np.rot90(wrapped_data, -(quadrant - 1)) 329 | x = fftshift(ifft2(ifftshift(wrapped_data))) * \ 330 | np.sqrt(wrapped_data.size) 331 | C[j][l] = np.sqrt(2) * x.real 332 | C[j][l + nbangles[j] // 2] = np.sqrt(2) * x.imag 333 | 334 | # Right corner wedge 335 | l += 1 336 | width_wedge = 4 * _floor(4 * M_horiz) + 3 - \ 337 | wedge_endpoints[-1] - wedge_endpoints[-2] 338 | slope_wedge = ((_floor(4 * M_horiz) + 1) - 339 | wedge_endpoints[-1]) / _floor(4 * M_vert) 340 | left_line = _round( 341 | wedge_endpoints[-2] + slope_wedge * (Y_corner - 1)) 342 | wrapped_data = np.zeros((length_corner_wedge, width_wedge), 343 | dtype=complex) 344 | wrapped_XX = np.zeros((length_corner_wedge, width_wedge), dtype=int) 345 | wrapped_YY = np.zeros((length_corner_wedge, width_wedge), dtype=int) 346 | first_row = _floor(4 * M_vert) + 2 - \ 347 | _ceil((length_corner_wedge + 1) / 2) + \ 348 | (length_corner_wedge + 1) % 2 * (quadrant - 2 == quadrant % 2) 349 | first_col = _floor(4 * M_horiz) + 2 - _ceil((width_wedge + 1) / 2) + \ 350 | (width_wedge + 1) % 2 * (quadrant - 3 == (quadrant - 3) % 2) 351 | for row in Y_corner - 1: 352 | cols = left_line[row] + (np.arange(width_wedge) - 353 | (left_line[row] - first_col)) % width_wedge 354 | admissible_cols = _round(0.5 * (cols + 2 * _floor(4 * M_horiz) 355 | + 1 - np.abs(cols - (2 * _floor(4 * M_horiz) + 1)))) - 1 356 | new_row = (row - first_row + 1) % length_corner_wedge 357 | wrapped_data[new_row, :] = Xhi[row, admissible_cols] * \ 358 | (cols <= (2 * _floor(4 * M_horiz) + 1)) 359 | wrapped_XX[new_row, :] = XX[row, admissible_cols] 360 | wrapped_YY[new_row, :] = YY[row, admissible_cols] 361 | 362 | slope_wedge_left = ((_floor(4 * M_horiz) + 1) - 363 | wedge_midpoints[-1]) / _floor(4 * M_vert) 364 | mid_line_left = wedge_midpoints[-1] + \ 365 | slope_wedge_left * (wrapped_YY - 1) 366 | coord_left = 0.5 + _floor(4 * M_vert) / \ 367 | (wedge_endpoints[-1] - wedge_endpoints[-2]) * \ 368 | (wrapped_XX - mid_line_left) / \ 369 | (_floor(4 * M_vert) + 1 - wrapped_YY) 370 | C2 = -1 / (2 * (_floor(4 * M_horiz)) / (wedge_endpoints[-1] - 1) - 371 | 1 + 1 / (2 * (_floor(4 * M_vert)) / 372 | (first_wedge_endpoint_vert - 1) - 1)) 373 | C1 = -C2 * (2 * (_floor(4 * M_horiz)) / 374 | (wedge_endpoints[-1] - 1) - 1) 375 | wrapped_XX[(wrapped_XX - 1) / _floor(4 * M_horiz) == 376 | (wrapped_YY - 1) / _floor(4 * M_vert)] -= 1 377 | coord_corner = C1 + C2 * (2 - ((wrapped_XX - 1) / 378 | _floor(4 * M_horiz) + (wrapped_YY - 1) / _floor(4 * M_vert))) \ 379 | / ((wrapped_XX - 1) / _floor(4 * M_horiz) - (wrapped_YY - 1) / 380 | _floor(4 * M_vert)) 381 | wl_left, _ = fdct_wrapping_window(coord_left) 382 | _, wr_right = fdct_wrapping_window(coord_corner) 383 | wrapped_data = wrapped_data * wl_left * wr_right 384 | if not is_real: 385 | wrapped_data = np.rot90(wrapped_data, -(quadrant - 1)) 386 | C[j][l] = fftshift(ifft2(ifftshift(wrapped_data)) 387 | ) * np.sqrt(wrapped_data.size) 388 | else: 389 | wrapped_data = np.rot90(wrapped_data, -(quadrant - 1)) 390 | x = fftshift(ifft2(ifftshift(wrapped_data))) * \ 391 | np.sqrt(wrapped_data.size) 392 | C[j][l] = np.sqrt(2) * x.real 393 | C[j][l + nbangles[j] // 2] = np.sqrt(2) * x.imag 394 | 395 | if quadrant < nbquadrants: 396 | Xhi = np.rot90(Xhi) 397 | # Coarsest wavelet level 398 | C[0][0] = fftshift(ifft2(ifftshift(Xlow))) * np.sqrt(Xlow.size) 399 | if is_real: 400 | C[0][0] = C[0][0].real 401 | 402 | return C 403 | 404 | 405 | def ifdct_wrapping(C, is_real=False, size=None): 406 | """ 407 | Inverse Fast Discrete Curvelet Transform via wedge wrapping. This is in fact 408 | the adjoint, also the pseudo-inverse 409 | 410 | :param C: 2-D list of np.ndarray. Array of curvelet coefficients. 411 | :param is_real: bool. Type of the transform, False for complex-valued 412 | curvelets and True for real-valued curvelets. 413 | :param size: tuple of ints. Size of the image to be recovered (not necessary 414 | if finest = 2) 415 | :return: 2-D np.ndarray. 416 | """ 417 | nbscales = len(C) 418 | nbangles_coarse = len(C[1]) 419 | nbangles = [1] + [nbangles_coarse * 2 ** ((nbscales - i) // 2) 420 | for i in range(nbscales, 1, -1)] 421 | if len(C[-1]) == 1: 422 | finest = 2 423 | nbangles[nbscales - 1] = 1 424 | else: 425 | finest = 1 426 | 427 | if size is None: 428 | if finest == 1: 429 | raise ValueError("Require output size.") 430 | else: 431 | N1, N2 = C[-1][0].shape 432 | else: 433 | N1, N2 = size 434 | 435 | M1 = N1 / 3 436 | M2 = N2 / 3 437 | 438 | if finest == 1: 439 | # Initialization: preparing the lowpass filter at finest scale 440 | window_length_1 = _floor(2 * M1) - _floor(M1) - (N1 % 3 == 0) 441 | window_length_2 = _floor(2 * M2) - _floor(M2) - (N2 % 3 == 0) 442 | coord_1 = np.linspace(0, 1, window_length_1) 443 | coord_2 = np.linspace(0, 1, window_length_2) 444 | wl_1, wr_1 = fdct_wrapping_window(coord_1) 445 | wl_2, wr_2 = fdct_wrapping_window(coord_2) 446 | 447 | lowpass_1 = np.concatenate((wl_1, np.ones(2 * _floor(M1) + 1), wr_1)) 448 | if N1 % 3 == 0: 449 | lowpass_1 = np.concatenate(([0], lowpass_1, [0])) 450 | 451 | lowpass_2 = np.concatenate((wl_2, np.ones(2 * _floor(M2) + 1), wr_2)) 452 | if N2 % 3 == 0: 453 | lowpass_2 = np.concatenate(([0], lowpass_2, [0])) 454 | 455 | lowpass = np.outer(lowpass_1, lowpass_2) 456 | scales = np.arange(nbscales, 1, -1) 457 | else: 458 | M1 /= 2 459 | M2 /= 2 460 | 461 | window_length_1 = _floor(2 * M1) - _floor(M1) 462 | window_length_2 = _floor(2 * M2) - _floor(M2) 463 | coord_1 = np.linspace(0, 1, window_length_1) 464 | coord_2 = np.linspace(0, 1, window_length_2) 465 | wl_1, wr_1 = fdct_wrapping_window(coord_1) 466 | wl_2, wr_2 = fdct_wrapping_window(coord_2) 467 | 468 | lowpass_1 = np.concatenate((wl_1, np.ones(2 * _floor(M1) + 1), wr_1)) 469 | lowpass_2 = np.concatenate((wl_2, np.ones(2 * _floor(M2) + 1), wr_2)) 470 | lowpass = np.outer(lowpass_1, lowpass_2) 471 | hipass_finest = np.sqrt(1 - lowpass ** 2) 472 | 473 | scales = np.arange(nbscales - 1, 1, -1) 474 | 475 | bigN1 = 2 * _floor(2 * M1) + 1 476 | bigN2 = 2 * _floor(2 * M2) + 1 477 | X = np.zeros((bigN1, bigN2), dtype=complex) 478 | 479 | # Loop: pyramidal reconstruction 480 | 481 | Xj_topleft_1 = 1 482 | Xj_topleft_2 = 1 483 | for j in scales - 1: 484 | M1 /= 2 485 | M2 /= 2 486 | 487 | window_length_1 = _floor(2 * M1) - _floor(M1) 488 | window_length_2 = _floor(2 * M2) - _floor(M2) 489 | coord_1 = np.linspace(0, 1, window_length_1) 490 | coord_2 = np.linspace(0, 1, window_length_2) 491 | wl_1, wr_1 = fdct_wrapping_window(coord_1) 492 | wl_2, wr_2 = fdct_wrapping_window(coord_2) 493 | 494 | lowpass_1 = np.concatenate((wl_1, np.ones(2 * _floor(M1) + 1), wr_1)) 495 | lowpass_2 = np.concatenate((wl_2, np.ones(2 * _floor(M2) + 1), wr_2)) 496 | lowpass_next = np.outer(lowpass_1, lowpass_2) 497 | hipass = np.sqrt(1 - lowpass_next ** 2) 498 | Xj = np.zeros((2 * _floor(4 * M1) + 1, 2 * _floor(4 * M2) + 1), 499 | dtype=complex) 500 | 501 | # Loop: angles 502 | l = -1 503 | nbquadrants = 2 + 2 * (not is_real) 504 | nbangles_perquad = nbangles[j] // 4 505 | for quadrant in range(1, nbquadrants + 1): 506 | M_horiz = (M1, M2)[quadrant % 2] 507 | M_vert = (M2, M1)[quadrant % 2] 508 | wedge_ticks_left = _round(np.linspace(0, 1, nbangles_perquad + 1) * 509 | _floor(4 * M_horiz) + 1) 510 | wedge_ticks_right = 2 * _floor(4 * M_horiz) + 2 - wedge_ticks_left 511 | if nbangles_perquad % 2: 512 | wedge_ticks = np.concatenate( 513 | (wedge_ticks_left, wedge_ticks_right[::-1])) 514 | else: 515 | wedge_ticks = np.concatenate( 516 | (wedge_ticks_left, wedge_ticks_right[-2::-1])) 517 | wedge_endpoints = wedge_ticks[1:-1:2] 518 | wedge_midpoints = (wedge_endpoints[:-1] + wedge_endpoints[1:]) / 2 519 | 520 | # Left corner wedge 521 | l += 1 522 | first_wedge_endpoint_vert = _round(_floor(4 * M_vert) / 523 | nbangles_perquad + 1) 524 | length_corner_wedge = _floor(4 * M_vert) - _floor(M_vert) + \ 525 | _ceil(first_wedge_endpoint_vert / 4) 526 | Y_corner = np.arange(length_corner_wedge) + 1 527 | [XX, YY] = np.meshgrid(np.arange(1, 2 * _floor(4 * M_horiz) + 2), 528 | Y_corner) 529 | width_wedge = wedge_endpoints[1] + wedge_endpoints[0] - 1 530 | slope_wedge = (_floor(4 * M_horiz) + 1 - 531 | wedge_endpoints[0]) / _floor(4 * M_vert) 532 | left_line = _round(2 - wedge_endpoints[0] + 533 | slope_wedge * (Y_corner - 1)) 534 | wrapped_XX = np.zeros((length_corner_wedge, width_wedge), dtype=int) 535 | wrapped_YY = np.zeros((length_corner_wedge, width_wedge), dtype=int) 536 | first_row = _floor(4 * M_vert) + \ 537 | 2 - _ceil((length_corner_wedge + 1) / 2) + \ 538 | (length_corner_wedge + 1) % 2 * (quadrant - 2 == quadrant % 2) 539 | first_col = _floor(4 * M_horiz) + 2 - _ceil((width_wedge + 1) / 2) \ 540 | + (width_wedge + 1) % 2 * (quadrant - 3 == (quadrant - 3) % 2) 541 | for row in Y_corner - 1: 542 | cols = left_line[row] + (np.arange(width_wedge) - 543 | (left_line[row] - first_col)) % width_wedge 544 | new_row = (row - first_row + 1) % length_corner_wedge 545 | admissible_cols = _round(0.5 * (cols + 1 + abs(cols - 1))) - 1 546 | wrapped_XX[new_row, :] = XX[row, admissible_cols] 547 | wrapped_YY[new_row, :] = YY[row, admissible_cols] 548 | 549 | slope_wedge_right = (_floor(4 * M_horiz) + 1 - wedge_midpoints[0]) \ 550 | / _floor(4 * M_vert) 551 | mid_line_right = wedge_midpoints[0] + \ 552 | slope_wedge_right * (wrapped_YY - 1) 553 | coord_right = 0.5 + _floor(4 * M_vert) / (wedge_endpoints[1] - 554 | wedge_endpoints[0]) * (wrapped_XX - mid_line_right) / \ 555 | (_floor(4 * M_vert) + 1 - wrapped_YY) 556 | C2 = 1 / (1 / (2 * (_floor(4 * M_horiz)) / 557 | (wedge_endpoints[0] - 1) - 1) + 1 / (2 * (_floor(4 * M_vert)) 558 | / (first_wedge_endpoint_vert - 1) - 1)) 559 | C1 = C2 / (2 * (_floor(4 * M_vert)) / 560 | (first_wedge_endpoint_vert - 1) - 1) 561 | wrapped_XX[(wrapped_XX - 1) / _floor(4 * M_horiz) + 562 | (wrapped_YY - 1) / _floor(4 * M_vert) == 2] += 1 563 | coord_corner = C1 + C2 * ((wrapped_XX - 1) / _floor(4 * M_horiz) - 564 | (wrapped_YY - 1) / _floor(4 * M_vert)) / (2 - ((wrapped_XX - 1) 565 | / _floor(4 * M_horiz) + (wrapped_YY - 1) / _floor(4 * M_vert))) 566 | wl_left, _ = fdct_wrapping_window(coord_corner) 567 | _, wr_right = fdct_wrapping_window(coord_right) 568 | 569 | if not is_real: 570 | wrapped_data = fftshift(fft2(ifftshift(C[j][l]))) / \ 571 | np.sqrt(C[j][l].size) 572 | wrapped_data = np.rot90(wrapped_data, quadrant - 1) 573 | else: 574 | x = C[j][l] + 1j * C[j][l + nbangles[j] // 2] 575 | wrapped_data = fftshift(fft2(ifftshift(x))) / \ 576 | np.sqrt(x.size * 2) 577 | wrapped_data = np.rot90(wrapped_data, quadrant - 1) 578 | 579 | wrapped_data = wrapped_data * wl_left * wr_right 580 | # Unwrapping data 581 | for row in Y_corner - 1: 582 | cols = left_line[row] + (np.arange(width_wedge) - 583 | (left_line[row] - first_col)) % width_wedge 584 | admissible_cols = _round(0.5 * (cols + 1 + abs(cols - 1))) - 1 585 | new_row = (row - first_row + 1) % length_corner_wedge 586 | Xj[row, admissible_cols] += wrapped_data[new_row, :] 587 | # We use the following property: in an assignment A(B) = C where 588 | # B and C are vectors, if some value x repeats in B, then the 589 | # last occurrence of x is the one corresponding to the eventual 590 | # assignment. 591 | 592 | # Regular wedges 593 | length_wedge = _floor(4 * M_vert) - _floor(M_vert) 594 | Y = np.arange(length_wedge) + 1 595 | first_row = _floor(4 * M_vert) + 2 - _ceil((length_wedge + 1) / 2) \ 596 | + (length_wedge + 1) % 2 * (quadrant - 2 == quadrant % 2) 597 | for subl in range(1, nbangles_perquad - 1): 598 | l += 1 599 | width_wedge = wedge_endpoints[subl + 1] - \ 600 | wedge_endpoints[subl - 1] + 1 601 | slope_wedge = ((_floor(4 * M_horiz) + 1) - 602 | wedge_endpoints[subl]) / _floor(4 * M_vert) 603 | left_line = _round(wedge_endpoints[subl - 1] + 604 | slope_wedge * (Y - 1)) 605 | wrapped_XX = np.zeros((length_wedge, width_wedge), dtype=int) 606 | wrapped_YY = np.zeros((length_wedge, width_wedge), dtype=int) 607 | first_col = _floor(4 * M_horiz) + 2 - \ 608 | _ceil((width_wedge + 1) / 2) + \ 609 | (width_wedge + 1) % 2 * (quadrant - 3 == (quadrant - 3) % 2) 610 | for row in Y - 1: 611 | cols = left_line[row] + (np.arange(width_wedge) - 612 | (left_line[row] - first_col)) % width_wedge - 1 613 | new_row = (row - first_row + 1) % length_wedge 614 | wrapped_XX[new_row, :] = XX[row, cols] 615 | wrapped_YY[new_row, :] = YY[row, cols] 616 | 617 | slope_wedge_left = ((_floor(4 * M_horiz) + 1) - 618 | wedge_midpoints[subl - 1]) / _floor(4 * M_vert) 619 | mid_line_left = wedge_midpoints[subl - 1] + \ 620 | slope_wedge_left * (wrapped_YY - 1) 621 | coord_left = 0.5 + _floor(4 * M_vert) / (wedge_endpoints[subl] 622 | - wedge_endpoints[subl - 1]) * \ 623 | (wrapped_XX - mid_line_left) / \ 624 | (_floor(4 * M_vert) + 1 - wrapped_YY) 625 | slope_wedge_right = ((_floor(4 * M_horiz) + 1) - 626 | wedge_midpoints[subl]) / _floor(4 * M_vert) 627 | mid_line_right = wedge_midpoints[subl] + \ 628 | slope_wedge_right * (wrapped_YY - 1) 629 | coord_right = 0.5 + _floor(4 * M_vert) / \ 630 | (wedge_endpoints[subl + 1] - wedge_endpoints[subl]) * \ 631 | (wrapped_XX - mid_line_right) / \ 632 | (_floor(4 * M_vert) + 1 - wrapped_YY) 633 | wl_left, _ = fdct_wrapping_window(coord_left) 634 | _, wr_right = fdct_wrapping_window(coord_right) 635 | if not is_real: 636 | wrapped_data = fftshift(fft2(ifftshift(C[j][l]))) / \ 637 | np.sqrt(C[j][l].size) 638 | wrapped_data = np.rot90(wrapped_data, quadrant - 1) 639 | else: 640 | x = C[j][l] + 1j * C[j][l + nbangles[j] // 2] 641 | wrapped_data = fftshift( 642 | fft2(ifftshift(x))) / np.sqrt(x.size * 2) 643 | wrapped_data = np.rot90(wrapped_data, quadrant - 1) 644 | 645 | wrapped_data = wrapped_data * wl_left * wr_right 646 | 647 | # Unwrapping data 648 | for row in Y - 1: 649 | cols = left_line[row] + (np.arange(width_wedge) - 650 | (left_line[row] - first_col)) % width_wedge - 1 651 | new_row = (row + 1 - first_row) % length_wedge 652 | Xj[row, cols] += wrapped_data[new_row, :] 653 | 654 | # Right corner wedge 655 | l += 1 656 | width_wedge = 4 * _floor(4 * M_horiz) + 3 - \ 657 | wedge_endpoints[-1] - wedge_endpoints[-2] 658 | slope_wedge = ((_floor(4 * M_horiz) + 1) - 659 | wedge_endpoints[-1]) / _floor(4 * M_vert) 660 | left_line = _round( 661 | wedge_endpoints[-2] + slope_wedge * (Y_corner - 1)) 662 | wrapped_XX = np.zeros( 663 | (length_corner_wedge, width_wedge), dtype=int) 664 | wrapped_YY = np.zeros( 665 | (length_corner_wedge, width_wedge), dtype=int) 666 | first_row = _floor(4 * M_vert) + 2 - \ 667 | _ceil((length_corner_wedge + 1) / 2) + \ 668 | (length_corner_wedge + 1) % 2 * (quadrant - 2 == quadrant % 2) 669 | first_col = _floor(4 * M_horiz) + 2 - _ceil((width_wedge + 1) / 2) \ 670 | + (width_wedge + 1) % 2 * (quadrant - 3 == (quadrant - 3) % 2) 671 | for row in Y_corner - 1: 672 | cols = left_line[row] + (np.arange(width_wedge) - 673 | (left_line[row] - first_col)) % width_wedge 674 | admissible_cols = _round(0.5 * (cols + 2 * _floor(4 * M_horiz) 675 | + 1 - np.abs(cols - (2 * _floor(4 * M_horiz) + 1)))) - 1 676 | new_row = (row - first_row + 1) % length_corner_wedge 677 | wrapped_XX[new_row, :] = XX[row, admissible_cols] 678 | wrapped_YY[new_row, :] = YY[row, admissible_cols] 679 | 680 | slope_wedge_left = ((_floor(4 * M_horiz) + 1) - 681 | wedge_midpoints[-1]) / _floor(4 * M_vert) 682 | mid_line_left = wedge_midpoints[-1] + \ 683 | slope_wedge_left * (wrapped_YY - 1) 684 | coord_left = 0.5 + _floor(4 * M_vert) / \ 685 | (wedge_endpoints[-1] - wedge_endpoints[-2]) * \ 686 | (wrapped_XX - mid_line_left) / \ 687 | (_floor(4 * M_vert) + 1 - wrapped_YY) 688 | C2 = -1 / (2 * (_floor(4 * M_horiz)) / (wedge_endpoints[-1] - 1) 689 | - 1 + 1 / (2 * (_floor(4 * M_vert)) / 690 | (first_wedge_endpoint_vert - 1) - 1)) 691 | C1 = -C2 * (2 * (_floor(4 * M_horiz)) / 692 | (wedge_endpoints[-1] - 1) - 1) 693 | 694 | wrapped_XX[(wrapped_XX - 1) / _floor(4 * M_horiz) == 695 | (wrapped_YY - 1) / _floor(4 * M_vert)] -= 1 696 | coord_corner = C1 + C2 * (2 - ((wrapped_XX - 1) / 697 | _floor(4 * M_horiz) + (wrapped_YY - 1) / _floor(4 * M_vert))) \ 698 | / ((wrapped_XX - 1) / _floor(4 * M_horiz) - (wrapped_YY - 1) / 699 | _floor(4 * M_vert)) 700 | wl_left, _ = fdct_wrapping_window(coord_left) 701 | _, wr_right = fdct_wrapping_window(coord_corner) 702 | 703 | if not is_real: 704 | wrapped_data = fftshift( 705 | fft2(ifftshift(C[j][l]))) / np.sqrt(C[j][l].size) 706 | wrapped_data = np.rot90(wrapped_data, quadrant - 1) 707 | else: 708 | x = C[j][l] + 1j * C[j][l + nbangles[j] // 2] 709 | wrapped_data = fftshift( 710 | fft2(ifftshift(x))) / np.sqrt(x.size * 2) 711 | wrapped_data = np.rot90(wrapped_data, quadrant - 1) 712 | 713 | wrapped_data = wrapped_data * wl_left * wr_right 714 | 715 | # Unwrapping data 716 | for row in Y_corner - 1: 717 | cols = left_line[row] + (np.arange(width_wedge) - 718 | (left_line[row] - first_col)) % width_wedge 719 | admissible_cols = _round(1 / 2 * (cols + 2 * _floor(4 * M_horiz) 720 | + 1 - abs(cols - (2 * _floor(4 * M_horiz) + 1)))) - 1 721 | new_row = (row + 1 - first_row) % length_corner_wedge 722 | Xj[row, np.flip(admissible_cols)] += wrapped_data[new_row, ::-1] 723 | # We use the following property: in an assignment A[B] = C where 724 | # B and C are vectors, if some value x repeats in B, then the 725 | # last occurrence of x is the one corresponding to the eventual 726 | # assignment. 727 | 728 | Xj = np.rot90(Xj) 729 | 730 | Xj *= lowpass 731 | Xj_index1 = np.arange(-_floor(2 * M1), 732 | _floor(2 * M1) + 1) + _floor(4 * M1) 733 | Xj_index2 = np.arange(-_floor(2 * M2), 734 | _floor(2 * M2) + 1) + _floor(4 * M2) 735 | 736 | Xj[np.ix_(Xj_index1, Xj_index2)] *= hipass 737 | 738 | loc_1 = Xj_topleft_1 + np.arange(2 * _floor(4 * M1) + 1) - 1 739 | loc_2 = Xj_topleft_2 + np.arange(2 * _floor(4 * M2) + 1) - 1 740 | X[np.ix_(loc_1, loc_2)] += Xj 741 | 742 | # Preparing for loop reentry or exit 743 | Xj_topleft_1 += _floor(4 * M1) - _floor(2 * M1) 744 | Xj_topleft_2 += _floor(4 * M2) - _floor(2 * M2) 745 | 746 | lowpass = lowpass_next 747 | 748 | if is_real: 749 | Y = X 750 | X = np.rot90(X, 2) 751 | X = X + np.conj(Y) 752 | 753 | # Coarsest wavelet level 754 | M1 = M1 / 2 755 | M2 = M2 / 2 756 | Xj = fftshift(fft2(ifftshift(C[0][0]))) / np.sqrt(C[0][0].size) 757 | loc_1 = Xj_topleft_1 + np.arange(2 * _floor(4 * M1) + 1) - 1 758 | loc_2 = Xj_topleft_2 + np.arange(2 * _floor(4 * M2) + 1) - 1 759 | X[np.ix_(loc_1, loc_2)] += Xj * lowpass 760 | 761 | # Finest level 762 | M1 = N1 / 3 763 | M2 = N2 / 3 764 | if finest == 1: 765 | # Folding back onto N1-by-N2 matrix 766 | shift_1 = _floor(2 * M1) - _floor(N1 / 2) 767 | shift_2 = _floor(2 * M2) - _floor(N2 / 2) 768 | Y = X[:, np.arange(N2) + shift_2] 769 | Y[:, np.arange(N2 - shift_2, N2)] += X[:, :shift_2] 770 | Y[:, :shift_2] += X[:, N2 + shift_2:N2 + 2 * shift_2] 771 | X = Y[np.arange(N1) + shift_1, :] 772 | X[np.arange(N1 - shift_1, N1), :] += Y[:shift_1, :] 773 | X[:shift_1, :] += Y[N1 + shift_1:N1 + 2 * shift_1, :] 774 | else: 775 | # Extension to a N1-by-N2 matrix 776 | Y = fftshift(fft2(ifftshift(C[nbscales - 1][0]))) / \ 777 | np.sqrt(C[nbscales - 1][0].size) 778 | X_topleft_1 = _ceil((N1 + 1) / 2) - _floor(M1) 779 | X_topleft_2 = _ceil((N2 + 1) / 2) - _floor(M2) 780 | loc_1 = X_topleft_1 + np.arange(2 * _floor(M1) + 1) - 1 781 | loc_2 = X_topleft_2 + np.arange(2 * _floor(M2) + 1) - 1 782 | Y[np.ix_(loc_1, loc_2)] = Y[np.ix_(loc_1, loc_2)] * hipass_finest + X 783 | X = Y 784 | 785 | x = fftshift(ifft2(ifftshift(X))) * np.sqrt(X.size) 786 | if is_real: 787 | x = np.real(x) 788 | 789 | return x 790 | -------------------------------------------------------------------------------- /daspy/advanced_tools/strain2vel.py: -------------------------------------------------------------------------------- 1 | # Purpose: Convert strain rate data to velocity 2 | # Author: Minzhe Hu 3 | # Date: 2024.3.10 4 | # Email: hmz2018@mail.ustc.edu.cn 5 | import numpy as np 6 | from numpy.fft import irfft2, ifftshift 7 | from scipy.signal import hilbert 8 | from daspy.basic_tools.freqattributes import next_pow_2, fk_transform 9 | from daspy.basic_tools.preprocessing import padding, cosine_taper 10 | from daspy.basic_tools.filter import bandpass 11 | from daspy.advanced_tools.fdct import fdct_wrapping, ifdct_wrapping 12 | from daspy.advanced_tools.denoising import _velocity_bin 13 | from daspy.advanced_tools.decomposition import fk_fan_mask 14 | 15 | 16 | def fk_rescaling(data, dx, fs, taper=(0.02, 0.05), pad='default', fmax=None, 17 | kmin=(1 / 2000, 1 / 3000), vmax=(15000, 30000), edge=0.2, 18 | turning=None, verbose=False): 19 | """ 20 | Convert strain/strain rate to velocity/acceleration by fk rescaling. 21 | 22 | :param data: numpy.ndarray. Data to do fk rescaling. 23 | :param dx: Channel interval in m. 24 | :param fs: Sampling rate in Hz. 25 | :param taper: float or sequence of floats. Each float means decimal 26 | percentage of Tukey taper for corresponding dimension (ranging from 0 to 27 | 1). Default is 0.1 which tapers 5% from the beginning and 5% from the 28 | end. 29 | :param pad: Pad the data or not. It can be float or sequence of floats. Each 30 | float means padding percentage before FFT for corresponding dimension. 31 | If set to 0.1 will pad 5% before the beginning and after the end. 32 | 'default' means pad both dimensions to next power of 2. None or False 33 | means don't pad data before or during Fast Fourier Transform. 34 | :param fmax, kmin, vmax: float or or sequence of 2 floats. Sequence of 2 35 | floats represents the start and end of taper. Setting these parameters 36 | can reduce artifacts. 37 | :param edge: float. The width of fan mask taper edge. 38 | :param turning: Sequence of int. Channel number of turning points. 39 | :param verbose: If True, return converted data, f-k spectrum, frequency 40 | sequence, wavenumber sequence and f-k mask. 41 | :return: Converted data and some variables in the process if verbose==True. 42 | """ 43 | if turning is not None: 44 | data_vel = np.zeros_like(data) 45 | start_ch = [0, *turning] 46 | end_ch = [*turning, len(data)] 47 | for (s, e) in zip(start_ch, end_ch): 48 | data_vel[s:e] = fk_rescaling(data[s:e], dx, fs, taper=taper, 49 | pad=pad, fmax=fmax, kmin=kmin, 50 | vmax=vmax, edge=edge, verbose=False) 51 | else: 52 | data_tp = cosine_taper(data, taper) 53 | 54 | if pad == 'default': 55 | nch, nt = data.shape 56 | dn = (next_pow_2(nch) - nch, next_pow_2(nt) - nt) 57 | nfft = None 58 | elif pad is None or pad is False: 59 | dn = 0 60 | nfft = None 61 | else: 62 | dn = np.round(np.array(pad) * data.shape).astype(int) 63 | nfft = 'default' 64 | 65 | data_pd = padding(data_tp, dn) 66 | nch, nt = data_pd.shape 67 | 68 | fk, f, k = fk_transform(data_pd, dx, fs, taper=taper, nfft=nfft) 69 | 70 | ff = np.tile(f, (len(k), 1)) 71 | kk = np.tile(k, (len(f), 1)).T 72 | vv = - np.divide(ff, kk, out=np.ones_like(ff) * 1e10, where=kk != 0) 73 | 74 | mask = fk_fan_mask(f, k, fmax=fmax, kmin=kmin, vmax=vmax, edge=edge) * vv 75 | mask[kk == 0] = 0 76 | 77 | data_vel = irfft2(ifftshift(fk * mask, axes=0)).real[:nch, :nt] 78 | data_vel = padding(data_vel, dn, reverse=True) 79 | 80 | if verbose: 81 | return data_vel, fk, f, k, mask 82 | return data_vel 83 | 84 | 85 | def curvelet_conversion(data, dx, fs, pad=0.3, scale_begin=2, nbscales=None, 86 | nbangles=16, turning=None): 87 | """ 88 | Use curevelet transform to convert strain/strain rate to 89 | velocity/acceleration. {Yang et al. , 2023, Geophys. Res. Lett.} 90 | 91 | :param data: numpy.ndarray. Data to convert. 92 | :param dx: Channel interval in m. 93 | :param fs: Sampling rate in Hz. 94 | :param pad: float or sequence of floats. Each float means padding percentage 95 | before FFT for corresponding dimension. If set to 0.1 will pad 5% before 96 | the beginning and after the end. 97 | :param scale_begin: int. The beginning scale to do conversion. 98 | :param nbscales: int. Number of scales including the coarsest wavelet level. 99 | Default set to ceil(log2(min(M,N)) - 3). 100 | :param nbangles: int. Number of angles at the 2nd coarsest level, 101 | minimum 8, must be a multiple of 4. 102 | :param turning: Sequence of int. Channel number of turning points. 103 | :return: numpy.ndarray. Converted data. 104 | """ 105 | if turning is not None: 106 | data_vel = np.zeros_like(data) 107 | start_ch = [0, *turning] 108 | end_ch = [*turning, len(data)] 109 | for (s, e) in zip(start_ch, end_ch): 110 | data_vel[s:e] = curvelet_conversion(data[s:e], dx, fs, pad=pad, 111 | scale_begin=scale_begin, 112 | nbscales=nbscales, 113 | nbangles=nbangles, turning=None) 114 | else: 115 | if pad is None or pad is False: 116 | pad = 0 117 | dn = np.round(np.array(pad) * data.shape).astype(int) 118 | data_pd = padding(data, dn) 119 | 120 | C = fdct_wrapping(data_pd, is_real=True, finest=1, nbscales=nbscales, 121 | nbangles_coarse=nbangles) 122 | 123 | # rescale with velocity 124 | np.seterr(divide='ignore') 125 | for s in range(0, scale_begin - 1): 126 | for w in range(len(C[s])): 127 | C[s][w] *= 0 128 | 129 | for s in range(scale_begin - 1, len(C)): 130 | nbangles = len(C[s]) 131 | velocity = _velocity_bin(nbangles, fs, dx) 132 | factors = np.mean(velocity, axis=1) 133 | for w in range(nbangles): 134 | if abs(factors[w]) == np.inf: 135 | factors[w] = abs(velocity[w]).min() * \ 136 | np.sign(velocity[w, 0]) * 2 137 | C[s][w] *= factors[w] 138 | 139 | data_vel = ifdct_wrapping(C, is_real=True, size=data_pd.shape) 140 | data_vel = padding(data_vel, dn, reverse=True) 141 | 142 | return data_vel 143 | 144 | 145 | def slowness(g, dx, fs, slm, sls, swin=2): 146 | """ 147 | Estimate the slowness time series by calculate semblance. 148 | {Lior et al., 2021, Solid Earth} 149 | 150 | :param g: 2-dimensional array. time series of adjacent channels used for 151 | estimating slowness 152 | :param dx: float. Spatical sampling rate (in m) 153 | :param fs: float. Sampling rate of records 154 | :param slm: float. Slowness x max 155 | :param sls: float. Slowness step 156 | :param swin: int. Slowness smooth window 157 | :return: Sequences of slowness and sembalence. 158 | """ 159 | L = (len(g) - 1) // 2 160 | nt = len(g[0]) 161 | h = np.imag(hilbert(g)) 162 | grdpnt = round(slm / sls) 163 | sem = np.zeros((2 * grdpnt + 1, nt)) 164 | gap = round(slm * dx * L * fs) 165 | 166 | h_ex = np.zeros((len(g), nt + 2 * gap)) 167 | h_ex[:, gap:-gap] = h 168 | g_ex = np.zeros((len(g), nt + 2 * gap)) 169 | g_ex[:, gap:-gap] = g 170 | 171 | for i in range(2 * grdpnt + 1): 172 | px = (i - grdpnt) * sls 173 | if abs(px) < 1e-5: 174 | continue 175 | gt = np.zeros(g.shape) 176 | ht = np.zeros(h.shape) 177 | for j in range(-L, L): 178 | shift = round(px * j * dx * fs) 179 | gt[j + L] = g_ex[j + L, gap + shift:gap + shift + nt] 180 | ht[j + L] = h_ex[j + L, gap + shift:gap + shift + nt] 181 | sem[i] = (np.sum(gt, axis=0)**2 + np.sum(ht, axis=0)**2) / \ 182 | np.sum(gt**2 + ht**2, axis=0) / (2 * L + 1) 183 | p = (np.argmax(sem, axis=0) - grdpnt) * sls 184 | # smooth P 185 | for i in range(swin, nt - swin): 186 | win = p[i - swin:i + swin + 1] 187 | sign = np.sign(sum(np.sign(win))) 188 | win = [px for px in win if np.sign(px) == sign] 189 | p[i] = np.mean(win) 190 | 191 | return p, sem 192 | 193 | 194 | def slant_stacking(data, dx, fs, L=None, slm=0.01, 195 | sls=0.000125, frqlow=0.1, frqhigh=15, turning=None, 196 | channel='all'): 197 | """ 198 | Convert strain to velocity based on slant-stack. 199 | 200 | :param data: 2-dimensional array. Axis 0 is channel number and axis 1 is 201 | time series 202 | :param dx: float. Spatical sampling rate (in m) 203 | :param L: int. the number of adjacent channels over which slowness is 204 | estimated 205 | :param slm: float. Slowness x max 206 | :param sls: float. slowness step 207 | :param freqmin: Pass band low corner frequency. 208 | :param freqmax: Pass band high corner frequency. 209 | :param turning: Sequence of int. Channel number of turning points. 210 | :param channel: int or list or 'all'. convert a certain channel number / 211 | certain channel range / all channels. 212 | :return: Converted velocity data 213 | """ 214 | if L is None: 215 | L = round(50 / dx) 216 | 217 | nch, nt = data.shape 218 | if isinstance(channel, str) and channel == 'all': 219 | channel = list(range(nch)) 220 | elif isinstance(channel, int): 221 | channel = [channel] 222 | 223 | if turning is not None: 224 | data_vel = np.zeros((0, len(data[0]))) 225 | start_ch = [0, *turning] 226 | end_ch = [*turning, len(data)] 227 | for (s, e) in zip(start_ch, end_ch): 228 | channel_seg = [ch-s for ch in range(s,e) if ch in channel] 229 | if len(channel_seg): 230 | d_vel = slant_stacking(data[s:e], dx, fs, L=L, slm=slm, sls=sls, 231 | frqlow=frqlow, frqhigh=frqhigh, 232 | turning=None, channel=channel_seg) 233 | data_vel = np.vstack((data_vel, d_vel)) 234 | else: 235 | data_ex = padding(data, (2 * L, 0)) 236 | swin = int(max((1 / frqhigh * fs) // 2, 1)) 237 | data_vel = np.zeros((len(channel), nt)) 238 | for i, ch in enumerate(channel): 239 | p, _ = slowness(data_ex[ch:ch + 2 * L + 1], dx, fs, slm, sls, 240 | swin=swin) 241 | data_vel[i] = bandpass(data[ch] / p, fs=fs, freqmin=frqlow, 242 | freqmax=frqhigh) 243 | 244 | return data_vel 245 | -------------------------------------------------------------------------------- /daspy/basic_tools/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HMZ-03/DASPy/9f308c93d7ad8f4e572705827b03c5d0fec3eac2/daspy/basic_tools/__init__.py -------------------------------------------------------------------------------- /daspy/basic_tools/filter.py: -------------------------------------------------------------------------------- 1 | # Purpose: Filter the waveform 2 | # Author: Minzhe Hu 3 | # Date: 2024.10.16 4 | # Email: hmz2018@mail.ustc.edu.cn 5 | # Modified from https://docs.obspy.org/_modules/obspy/signal/filter.html 6 | import warnings 7 | import numpy as np 8 | from scipy.signal import cheb2ord, cheby2, hilbert, iirfilter, zpk2sos, sosfilt 9 | 10 | 11 | def bandpass(data, fs, freqmin, freqmax, corners=4, zi=None, zerophase=True): 12 | """ 13 | Filter data from 'freqmin' to 'freqmax' using Butterworth bandpass filter of 14 | 'corners' corners. 15 | 16 | :param data: numpy.ndarray. Data to filter. 17 | :param fs: Sampling rate in Hz. 18 | :param freqmin: Pass band low corner frequency. 19 | :param freqmax: Pass band high corner frequency. 20 | :param corners: Filter corners / order. 21 | :param zi : None, 0, or array_like. Initial conditions for the cascaded 22 | filter delays. It is a vector of shape (n_sections, nch, 2). Set to 0 to 23 | trigger a output of the final filter delay values. 24 | :param zerophase: If True, apply filter once forwards and once backwards. 25 | This results in twice the number of corners but zero phase shift in 26 | the resulting filtered data. Only valid when zi is None. 27 | :return: Filtered data and the final filter delay values (if zi is not 28 | None). 29 | """ 30 | if len(data.shape) == 1: 31 | data = data[np.newaxis, :] 32 | fe = 0.5 * fs 33 | low = freqmin / fe 34 | high = freqmax / fe 35 | # raise for some bad scenarios 36 | if high - 1.0 > -1e-6: 37 | msg = ('Selected high corner frequency ({}) of bandpass is at or ' + 38 | 'above Nyquist ({}). Applying a high-pass instead.').format( 39 | freqmax, fe) 40 | warnings.warn(msg) 41 | return highpass(data, freq=freqmin, fs=fs, corners=corners, 42 | zerophase=zerophase) 43 | if low > 1: 44 | msg = 'Selected low corner frequency is above Nyquist.' 45 | raise ValueError(msg) 46 | z, p, k = iirfilter(corners, [low, high], btype='band', ftype='butter', 47 | output='zpk') 48 | sos = zpk2sos(z, p, k) 49 | if zi is None: 50 | data_flt = sosfilt(sos, data) 51 | if zerophase: 52 | data_flt = sosfilt(sos, data_flt[:, ::-1])[:, ::-1] 53 | return data_flt 54 | elif isinstance(zi, (int, float)): 55 | zi = np.ones((sos.shape[0], len(data), 2)) * zi 56 | 57 | data_flt, zf = sosfilt(sos, data, zi=zi) 58 | return data_flt, zf 59 | 60 | 61 | def bandstop(data, fs, freqmin, freqmax, corners=4, zi=None, zerophase=False): 62 | """ 63 | Filter data removing data between frequencies 'freqmin' and 'freqmax' using 64 | Butterworth bandstop filter of 'corners' corners. 65 | 66 | :param data: numpy.ndarray. Data to filter. 67 | :param fs: Sampling rate in Hz. 68 | :param freqmin: Stop band low corner frequency. 69 | :param freqmax: Stop band high corner frequency. 70 | :param corners: Filter corners / order. 71 | :param zi : None, 0, or array_like. Initial conditions for the cascaded 72 | filter delays. It is a vector of shape (n_sections, nch, 2). Set to 0 to 73 | trigger a output of the final filter delay values. 74 | :param zerophase: If True, apply filter once forwards and once backwards. 75 | This results in twice the number of corners but zero phase shift in 76 | the resulting filtered data. Only valid when zi is None. 77 | :return: Filtered data and the final filter delay values (if zi is not 78 | None). 79 | """ 80 | if len(data.shape) == 1: 81 | data = data[np.newaxis, :] 82 | fe = 0.5 * fs 83 | low = freqmin / fe 84 | high = freqmax / fe 85 | # raise for some bad scenarios 86 | if high > 1: 87 | high = 1.0 88 | msg = 'Selected high corner frequency is above Nyquist. Setting ' + \ 89 | 'Nyquist as high corner.' 90 | warnings.warn(msg) 91 | if low > 1: 92 | msg = 'Selected low corner frequency is above Nyquist.' 93 | raise ValueError(msg) 94 | z, p, k = iirfilter(corners, [low, high], 95 | btype='bandstop', ftype='butter', output='zpk') 96 | sos = zpk2sos(z, p, k) 97 | if zi is None: 98 | data_flt = sosfilt(sos, data) 99 | if zerophase: 100 | data_flt = sosfilt(sos, data_flt[:, ::-1])[:, ::-1] 101 | return data_flt 102 | elif isinstance(zi, (int, float)): 103 | zi = np.ones((sos.shape[0], len(data), 2)) * zi 104 | 105 | data_flt, zf = sosfilt(sos, data, zi=zi) 106 | return data_flt, zf 107 | 108 | 109 | def lowpass(data, fs, freq, corners=4, zi=None, zerophase=False): 110 | """ 111 | Filter data removing data over certain frequency 'freq' using Butterworth 112 | lowpass filter of 'corners' corners. 113 | 114 | :param data: numpy.ndarray. Data to filter. 115 | :param fs: Sampling rate in Hz. 116 | :param freq: Filter corner frequency. 117 | :param corners: Filter corners / order. 118 | :param zi : None, 0, or array_like. Initial conditions for the cascaded 119 | filter delays. It is a vector of shape (n_sections, nch, 2). Set to 0 to 120 | trigger a output of the final filter delay values. 121 | :param zerophase: If True, apply filter once forwards and once backwards. 122 | This results in twice the number of corners but zero phase shift in 123 | the resulting filtered data. Only valid when zi is None. 124 | :return: Filtered data and the final filter delay values (if zi is not 125 | None). 126 | """ 127 | if len(data.shape) == 1: 128 | data = data[np.newaxis, :] 129 | fe = 0.5 * fs 130 | f = freq / fe 131 | # raise for some bad scenarios 132 | if f > 1: 133 | f = 1.0 134 | msg = 'Selected corner frequency is above Nyquist. Setting Nyquist ' + \ 135 | 'as high corner.' 136 | warnings.warn(msg) 137 | z, p, k = iirfilter(corners, f, btype='lowpass', ftype='butter', 138 | output='zpk') 139 | sos = zpk2sos(z, p, k) 140 | if zi is None: 141 | data_flt = sosfilt(sos, data) 142 | if zerophase: 143 | data_flt = sosfilt(sos, data_flt[:, ::-1])[:, ::-1] 144 | return data_flt 145 | elif isinstance(zi, (int, float)): 146 | zi = np.ones((sos.shape[0], len(data), 2)) * zi 147 | 148 | data_flt, zf = sosfilt(sos, data, zi=zi) 149 | return data_flt, zf 150 | 151 | 152 | def lowpass_cheby_2(data, fs, freq, maxorder=12, zi=None, ba=False, 153 | freq_passband=False): 154 | """ 155 | Filter data by passing data only below a certain frequency. The main purpose 156 | of this cheby2 filter is downsampling. This method will iteratively design a 157 | filter, whose pass band frequency is determined dynamically, such that the 158 | values above the stop band frequency are lower than -96dB. 159 | 160 | :param data: numpy.ndarray. Data to filter. 161 | :param fs: Sampling rate in Hz. 162 | :param freq: The frequency above which signals are attenuated with 95 dB. 163 | :param maxorder: Maximal order of the designed cheby2 filter. 164 | :param zi : None, 0, or array_like. Initial conditions for the cascaded 165 | filter delays. It is a vector of shape (n_sections, nch, 2). Set to 0 to 166 | trigger a output of the final filter delay values. 167 | :param ba: If True return only the filter coefficients (b, a) instead of 168 | filtering. 169 | :param freq_passband: If True return additionally to the filtered data, the 170 | iteratively determined pass band frequency. 171 | :return: Filtered data, the final filter delay values (if zi is not None) 172 | and the determined pass band frequency (if freq_passband is True). 173 | """ 174 | if data.ndim == 1: 175 | data = data[np.newaxis, :] 176 | 177 | nyquist = fs * 0.5 178 | # rp - maximum ripple of passband, rs - attenuation of stopband 179 | rp, rs, order = 1, 96, 1e99 180 | ws = freq / nyquist # stop band frequency 181 | wp = ws # pass band frequency 182 | # raise for some bad scenarios 183 | if ws > 1: 184 | ws = 1.0 185 | warnings.warn('Selected corner frequency is above Nyquist. Setting ' 186 | 'Nyquist as high corner.') 187 | while True: 188 | if order <= maxorder: 189 | break 190 | wp = wp * 0.99 191 | order, wn = cheb2ord(wp, ws, rp, rs, analog=0) 192 | if ba: 193 | return cheby2(order, rs, wn, btype='low', analog=0, output='ba') 194 | z, p, k = cheby2(order, rs, wn, btype='low', analog=0, output='zpk') 195 | sos = zpk2sos(z, p, k) 196 | if zi is None: 197 | data_flt = sosfilt(sos, data) 198 | if freq_passband: 199 | return data_flt, wp * nyquist 200 | return data_flt 201 | elif isinstance(zi, (int, float)): 202 | zi = np.ones((sos.shape[0], len(data), 2)) * zi 203 | 204 | data_flt, zf = sosfilt(sos, data, zi=zi) 205 | if freq_passband: 206 | return data_flt, zf, wp * nyquist 207 | return data_flt, zf 208 | 209 | 210 | def highpass(data, fs, freq, corners=4, zi=None, zerophase=False): 211 | """ 212 | Filter data removing data below certain frequency 'freq' using Butterworth 213 | highpass filter of 'corners' corners. 214 | 215 | :param data: numpy.ndarray. Data to filter. 216 | :param fs: Sampling rate in Hz. 217 | :param freq: Filter corner frequency. 218 | :param corners: Filter corners / order. 219 | :param zerophase: If True, apply filter once forwards and once backwards. 220 | This results in twice the number of corners but zero phase shift in 221 | the resulting filtered data. Only valid when zi is None. 222 | :return: Filtered data and the final filter delay values (if zi is not 223 | None). 224 | """ 225 | if len(data.shape) == 1: 226 | data = data[np.newaxis, :] 227 | fe = 0.5 * fs 228 | f = freq / fe 229 | # raise for some bad scenarios 230 | if f > 1: 231 | msg = 'Selected corner frequency is above Nyquist.' 232 | raise ValueError(msg) 233 | z, p, k = iirfilter(corners, f, btype='highpass', ftype='butter', 234 | output='zpk') 235 | sos = zpk2sos(z, p, k) 236 | if zi is None: 237 | data_flt = sosfilt(sos, data) 238 | if zerophase: 239 | data_flt = sosfilt(sos, data_flt[:, ::-1])[:, ::-1] 240 | return data_flt 241 | elif isinstance(zi, (int, float)): 242 | zi = np.ones((sos.shape[0], len(data), 2)) * zi 243 | 244 | data_flt, zf = sosfilt(sos, data, zi=zi) 245 | return data_flt, zf 246 | 247 | def envelope(data): 248 | """ 249 | Computes the envelope of the given data. The envelope is determined by 250 | adding the squared amplitudes of the data and it's Hilbert-Transform and 251 | then taking the square-root. The envelope at the start/end should not be 252 | taken too seriously. 253 | 254 | :param data: numpy.ndarray. Data to make envelope of. 255 | :return: Envelope of input data. 256 | """ 257 | return abs(hilbert(data, axis=-1)) 258 | -------------------------------------------------------------------------------- /daspy/basic_tools/freqattributes.py: -------------------------------------------------------------------------------- 1 | # Purpose: Analyze frequency attribute and transform in frequency domain 2 | # Author: Minzhe Hu 3 | # Date: 2024.6.8 4 | # Email: hmz2018@mail.ustc.edu.cn 5 | import numpy as np 6 | from numpy.fft import rfft, rfft2, fftshift, fftfreq, rfftfreq 7 | from scipy.signal import stft 8 | from daspy.basic_tools.preprocessing import demeaning, detrending, cosine_taper 9 | 10 | 11 | def next_pow_2(i): 12 | """ 13 | Find the next power of two. 14 | 15 | :param i: float or int. 16 | :return: int. The next power of two for i. 17 | """ 18 | buf = np.ceil(np.log2(i)) 19 | return np.power(2, buf).astype(int) 20 | 21 | 22 | def spectrum(data, fs, taper=0.05, nfft='default'): 23 | """ 24 | Computes the spectrum of the given data. 25 | 26 | :param data: numpy.ndarray. Data to make spectrum of. 27 | :param fs: Sampling rate in Hz. 28 | :param taper: Decimal percentage of Tukey taper. 29 | :param nfft: Number of points for FFT. None = sampling points, 'default' = 30 | next power of 2 of sampling points. 31 | :return: Spectrum and frequency sequence. 32 | """ 33 | if len(data.shape) == 1: 34 | data = data.reshape(1, len(data)) 35 | elif len(data.shape) != 2: 36 | raise ValueError("Data should be 1-D or 2-D array") 37 | data = cosine_taper(data, (0, taper)) 38 | 39 | if nfft == 'default': 40 | nfft = next_pow_2(len(data[0])) 41 | elif nfft is None: 42 | nfft = len(data[0]) 43 | 44 | spec = rfft(data, n=nfft, axis=1) 45 | f = rfftfreq(nfft, d=1 / fs) 46 | 47 | return spec, f 48 | 49 | 50 | def spectrogram(data, fs, nperseg=256, noverlap=None, nfft=None, detrend=False, 51 | boundary='zeros'): 52 | """ 53 | Computes the spectrogram of the given data. 54 | 55 | :param data: 1-D or 2-D numpy.ndarray. Data to make spectrogram of. 56 | :param fs: Sampling rate in Hz. 57 | :param nperseg: int. Length of each segment. 58 | :param noverlap: int. Number of points to overlap between segments. If None, 59 | noverlap = nperseg // 2. 60 | :param nfft: int. Length of the FFT used. None = nperseg. 61 | :param detrend : str or bool. Specifies whether and how to detrend each 62 | segment. 'linear' or 'detrend' or True = detrend, 'constant' or 63 | 'demean' = demean. 64 | :param boundary: str or None. Specifies whether the input signal is extended 65 | at both ends, and how to generate the new values, in order to center the 66 | first windowed segment on the first input point. This has the benefit of 67 | enabling reconstruction of the first input point when the employed 68 | window function starts at zero. Valid options are ['even', 'odd', 69 | 'constant', 'zeros', None]. 70 | :return: Spectrogram, frequency sequence and time sequence. 71 | """ 72 | if detrend in [True, 'linear', 'detrend']: 73 | detrend = detrending 74 | elif detrend in ['constant', 'demean']: 75 | detrend = demeaning 76 | if data.ndim == 1: 77 | f, t, Zxx = stft(data, fs=fs, nperseg=nperseg, noverlap=noverlap, 78 | nfft=nfft, detrend=detrend, boundary=boundary) 79 | elif len(data) == 1: 80 | f, t, Zxx = stft(data[0], fs=fs, nperseg=nperseg, noverlap=noverlap, 81 | nfft=nfft, detrend=detrend, boundary=boundary) 82 | else: 83 | Zxx = [] 84 | for d in data: 85 | f, t, Zxxi = stft(d, fs=fs, nperseg=nperseg, noverlap=noverlap, 86 | nfft=nfft, detrend=detrend, boundary=boundary) 87 | Zxx.append(abs(Zxxi)) 88 | Zxx = np.mean(np.array(Zxx), axis=0) 89 | 90 | return Zxx, f, t 91 | 92 | 93 | def fk_transform(data, dx, fs, taper=(0, 0.05), nfft='default'): 94 | """ 95 | Transform the data to the fk domain using 2-D Fourier transform method. 96 | 97 | :param data: numpy.ndarray. Data to do fk transform. 98 | :param dx: Channel interval in m. 99 | :param fs: Sampling rate in Hz. 100 | :param taper: float or sequence of floats. Each float means decimal 101 | percentage of Tukey taper for corresponding dimension (ranging from 0 to 102 | 1). Default is 0.1 which tapers 5% from the beginning and 5% from the 103 | end. 104 | :param nfft: Number of points for FFT. None means sampling points; 'default' 105 | means next power of 2 of sampling points, which makes result smoother. 106 | """ 107 | nch, nt = data.shape 108 | data = cosine_taper(data, taper) 109 | if nfft == 'default': 110 | nfft = (next_pow_2(nch), next_pow_2(nt)) 111 | elif not nfft: 112 | nfft = (nch, nt) 113 | 114 | fk = fftshift(rfft2(data, s=nfft), axes=0) 115 | f = rfftfreq(nfft[1], d=1. / fs) 116 | k = fftshift(fftfreq(nfft[0], d=dx)) 117 | return fk, f, k 118 | -------------------------------------------------------------------------------- /daspy/basic_tools/preprocessing.py: -------------------------------------------------------------------------------- 1 | # Purpose: Some preprocess methods 2 | # Author: Minzhe Hu 3 | # Date: 2025.5.21 4 | # Email: hmz2018@mail.ustc.edu.cn 5 | import numpy as np 6 | from scipy.signal import detrend 7 | from scipy.signal.windows import tukey 8 | from daspy.basic_tools.filter import lowpass_cheby_2 9 | 10 | 11 | def phase2strain(data, lam, e, n, gl): 12 | """ 13 | Convert the optical phase shift in radians to strain. 14 | 15 | :param data: numpy.ndarray. Data to convert. 16 | :param lam: float. Operational optical wavelength in vacuum. 17 | :param e: float. photo-slastic scaling factor for logitudinal strain in 18 | isotropic material. 19 | :param n: float. Refractive index of the sensing fiber. 20 | :paran gl: float. Gauge length. 21 | :return: Strain data. 22 | """ 23 | return data * (lam * 1e-9) / (e * 4 * np.pi * n * gl) 24 | 25 | 26 | def normalization(data, method='z-score'): 27 | """ 28 | Normalize for each individual channel using Z-score method. 29 | 30 | :param data: numpy.ndarray. Data to normalize. 31 | :param method: str. Method for normalization, should be one of 'max', 32 | 'z-score', 'MAD' or 'one-bit'. 33 | :return: Normalized data. 34 | """ 35 | if data.ndim == 1: 36 | data = data.reshape(1, len(data)) 37 | elif data.ndim != 2: 38 | raise ValueError("Data should be 1-D or 2-D array") 39 | 40 | if method.lower() == 'max': 41 | amp = np.max(abs(data), 1, keepdims=True) 42 | amp[amp == 0] = amp[amp > 0].min() 43 | return data / amp 44 | elif method.lower() == 'z-score': 45 | mean = np.mean(data, axis=1, keepdims=True) 46 | std = np.std(data, axis=1, keepdims=True) 47 | std[std == 0] = std[std > 0].min() 48 | return (data - mean) / std 49 | elif method.lower() == 'mad': 50 | median = np.median(data, axis=1, keepdims=True) 51 | mad = np.median(abs(data - median), axis=1, keepdims=True) 52 | mad[mad == 0] = mad[mad > 0].min() 53 | return (data - median) / mad 54 | elif method.lower() == 'one-bit': 55 | return np.sign(data) 56 | 57 | 58 | def demeaning(data): 59 | """ 60 | Demean signal by subtracted mean of each channel. 61 | 62 | :param data: numpy.ndarray. Data to demean. 63 | :return: Detrended data. 64 | """ 65 | return detrend(data, type='constant') 66 | 67 | 68 | def detrending(data): 69 | """ 70 | Detrend signal by subtracted a linear least-squares fit to data. 71 | 72 | :param data: numpy.ndarray. Data to detrend. 73 | :return: Detrended data. 74 | """ 75 | return detrend(data, type='linear') 76 | 77 | 78 | def stacking(data: np.ndarray, N: int, step: int = None, average: bool = True): 79 | """ 80 | Stack several channels to increase the signal-noise ratio(SNR). 81 | 82 | :param data: numpy.ndarray. Data to stack. 83 | :param N: int. N adjacent channels stacked into 1. 84 | :param step: int. Interval of data stacking. 85 | :param average: bool. True for calculating the average. 86 | :return: Stacked data. 87 | """ 88 | if N == 1: 89 | return data 90 | if step is None: 91 | step = N 92 | nch, nt = data.shape 93 | begin = np.arange(0, nch - N + 1, step) 94 | end = begin + N 95 | nx1 = len(begin) 96 | data_stacked = np.zeros((nx1, nt)) 97 | for i in range(nx1): 98 | data_stacked[i, :] = np.sum(data[begin[i]:end[i], :], axis=0) 99 | if average: 100 | data_stacked /= N 101 | return data_stacked 102 | 103 | 104 | def cosine_taper(data, p=0.1, side='both'): 105 | """ 106 | Taper using Tukey window. 107 | 108 | :param data: numpy.ndarray. Data to taper. 109 | :param p: float or sequence of floats. Each float means decimal percentage 110 | of Tukey taper for corresponding dimension (ranging from 0 to 1). 111 | Default is 0.1 which tapers 5% from the beginning and 5% from the end. 112 | If only one float is given, it only do for time dimension. 113 | :param side: str. 'both', 'left', or 'right'. 114 | :return: Tapered data. 115 | """ 116 | if data.ndim == 1: 117 | data = data.reshape(1, -1) 118 | nch, nt = data.shape 119 | if not isinstance(p, (tuple, list, np.ndarray)): 120 | win = tukey(nt, p) 121 | if side == 'left': 122 | win[round(nch/2):] = 1 123 | elif side == 'right': 124 | win[:round(len(win)/2)] = 1 125 | return data * np.tile(win, (nch, 1)) 126 | else: 127 | if p[0] > 0: 128 | data = data * np.tile(tukey(nch, p[0]), (nt, 1)).T 129 | return cosine_taper(data, p[1], side=side) 130 | 131 | 132 | def downsampling(data, xint=None, tint=None, stack=True, lowpass_filter=True): 133 | """ 134 | Downsample DAS data. 135 | 136 | :param data: numpy.ndarray. Data to downsample can be 1-D or 2-D. 137 | :param xint: int. Spatial downsampling factor. 138 | :param tint: int. Time downsampling factor. 139 | :param lowpass_filter: bool. Lowpass cheby2 filter before time downsampling 140 | or not. 141 | :return: Downsampled data. 142 | """ 143 | data_ds = data.copy() 144 | if xint and xint > 1: 145 | if stack: 146 | data_ds = stacking(data, xint) 147 | else: 148 | data_ds = data_ds[::xint].copy() 149 | if tint and tint > 1: 150 | if lowpass_filter: 151 | data_ds = lowpass_cheby_2(data_ds, 1, 1 / 2 / tint) 152 | if len(data_ds.shape) == 1: 153 | data_ds = data_ds[::tint].copy() 154 | else: 155 | data_ds = data_ds[:, ::tint].copy() 156 | return data_ds 157 | 158 | 159 | def trimming(data, dx=None, fs=None, xmin=0, xmax=None, tmin=0, tmax=None, 160 | mode=0): 161 | """ 162 | Cut data to given start and end distance/channel or time/sampling points. 163 | 164 | :param data: numpy.ndarray. Data to trim can be 1-D or 2-D. 165 | :param dx: Channel interval in m. 166 | :param fs: Sampling rate in Hz. 167 | :param xmin, xmax, tmin, tmax: Boundary for trimming. 168 | :param mode: 0 means the unit of boundary is channel number and sampling 169 | points; 1 means the unit of boundary is meters and seconds. 170 | :return: Trimmed data. 171 | """ 172 | nch, nt = data.shape 173 | if mode == 0: 174 | if xmax is None: 175 | xmax = nch 176 | if tmax is None: 177 | tmax = nt 178 | elif mode == 1: 179 | xmin = round(xmin / dx) 180 | xmax = (round(xmax / dx), nch)[xmax is None] 181 | tmin = round(tmin * fs) 182 | tmax = (round(tmax * fs), nt)[tmax is None] 183 | 184 | return data[xmin:xmax, tmin:tmax].copy() 185 | 186 | 187 | def padding(data, dn, reverse=False): 188 | """ 189 | Pad DAS data with 0. 190 | 191 | :param data: numpy.ndarray. 2D DAS data to pad. 192 | :param dn: int or sequence of ints. Number of points to pad for both 193 | dimensions. 194 | :param reverse: bool. Set True to reverse the operation. 195 | :return: Padded data. 196 | """ 197 | nch, nt = data.shape 198 | if isinstance(dn, int): 199 | dn = (dn, dn) 200 | 201 | pad = (dn[0] // 2, dn[0] - dn[0] // 2, dn[1] // 2, dn[1] - dn[1] // 2) 202 | if reverse: 203 | return data[pad[0]:nch - pad[1], pad[2]:nt - pad[3]] 204 | else: 205 | data_pd = np.zeros((nch + dn[0], nt + dn[1])) 206 | data_pd[pad[0]:nch + pad[0], pad[2]:nt + pad[2]] = data 207 | return data_pd 208 | 209 | 210 | def time_integration(data, fs, c=0): 211 | """ 212 | Integrate DAS data in time. 213 | 214 | :param data: numpy.ndarray. 2D DAS data. 215 | :param fs: Sampling rate in Hz. 216 | :param c: float. A constant added to the result. 217 | :return: Integrated data. 218 | """ 219 | return np.cumsum(data, axis=1) / fs + c 220 | 221 | 222 | def time_differential(data, fs, prepend=0): 223 | """ 224 | Differentiate DAS data in time. 225 | 226 | :param data: numpy.ndarray. 2D DAS data. 227 | :param fs: Sampling rate in Hz. 228 | :param prepend: 'mean' or values to prepend to `data` along axis prior to 229 | performing the difference. 230 | :return: Differentiated data. 231 | """ 232 | if prepend == 'mean': 233 | prepend = np.mean(data, axis=1).reshape((-1, 1)) 234 | return np.diff(data, axis=1, prepend=prepend) * fs 235 | 236 | 237 | def distance_integration(data, dx, c=0): 238 | """ 239 | Integrate DAS data in distance. 240 | 241 | :param data: numpy.ndarray. 2D DAS data. 242 | :param dx: Channel interval in m. 243 | :param c: float. A constant added to the result. 244 | :return: Integrated data. 245 | """ 246 | return np.cumsum(data, axis=1) * dx + c -------------------------------------------------------------------------------- /daspy/basic_tools/visualization.py: -------------------------------------------------------------------------------- 1 | # Purpose: Plot data 2 | # Author: Minzhe Hu 3 | # Date: 2025.5.20 4 | # Email: hmz2018@mail.ustc.edu.cn 5 | import numpy as np 6 | import matplotlib.pyplot as plt 7 | from collections.abc import Sequence 8 | 9 | 10 | def plot(data: np.ndarray, dx=None, fs=None, ax=None, obj='waveform', dpi=300, 11 | title=None, transpose=False, t0=0, x0=0, pick=None, f=None, k=None, 12 | t=None, c=None, cmap=None, vmin=None, vmin_per=None, vmax=None, 13 | vmax_per=None, dB=False, xmode='distance', tmode='time', xlim=None, 14 | ylim=None, xlog=False, ylog=False, xinv=False, yinv=False, xlabel=True, 15 | ylabel=True, xticklabels=True, yticklabels=True, colorbar=True, 16 | colorbar_label=None, savefig=None): 17 | """ 18 | Plot several types of 2-D seismological data. 19 | 20 | :param data: numpy.ndarray. Data to plot. 21 | :param dx: Channel interval in m. 22 | :param fs: Sampling rate in Hz. 23 | :param ax: Matplotlib.axes.Axes or tuple. Axes to plot. A tuple for new 24 | figsize. If not specified, the function will directly display the image 25 | using matplotlib.pyplot.show(). 26 | :param obj: str. Type of data to plot. It should be one of 'waveform', 27 | 'phasepick', 'spectrum', 'spectrogram', 'fk', or 'dispersion'. 28 | :param dpi: int. The resolution of the figure in dots-per-inch. 29 | :param title: str. The title of this axes. 30 | :param transpose: bool. Transpose the figure or not. 31 | :param t0, x0: The beginning of time and space. 32 | :param pick: dictionary of sequence of picked phases. Key should be 'P' for 33 | P phase, 'S' for S phase and 'N' for unknown phase type. Required if 34 | obj=='phasepick'. 35 | :param f: Sequence of frequency. Required if obj is one of 'spectrum', 36 | 'spectrogram', 'fk' or 'dispersion'. 37 | :param k: Wavenumber sequence. Required if obj=='fk'. 38 | :param t: Time sequence. Required if obj=='spectrogram'. 39 | :param c: Phase velocity sequence. Required if obj=='dispersion'. 40 | :param cmap: str or Colormap. The Colormap instance or registered colormap 41 | name used to map scalar data to colors. 42 | :param vmin, vmax: Define the data range that the colormap covers. 43 | :param vmin_per, vmax_per: float. Define the data range that the colormap 44 | covers by percentile. 45 | :param dB: bool. Transfer data unit to dB and take 1 as the reference value. 46 | :param xmode: str. 'distance' or 'channel'. 47 | :param tmode: str. 'time' or 'sampling'. 48 | :param xlim, ylim: Set the x-axis and y-axis view limits. 49 | :param xlog, ylog: bool. If True, set the x-axis' or y-axis' scale as log. 50 | :param xlabel, yinv: bool. If True, invert x-axis or y-axis. 51 | :param xlabel, ylabel: bool or str. Whether to plot a label or what label to 52 | plot for x-axis or y-axis. 53 | :param xticklabels, yticklabels: bool or sequence of str. Whether to plot 54 | ticklabels or what ticklabels to plot for x-axis or y-axis. 55 | :param colorbar: bool, str or Matplotlib.axes.Axes. Bool means plot colorbar 56 | or not. Str means the location of colorbar. Axes means the Axes into 57 | which the colorbar will be drawn. 58 | :param savefig: str or bool. Figure name to save if needed. If True, 59 | it will be set to parameter obj. 60 | """ 61 | nch, nt = data.shape 62 | if ax is None: 63 | ax = (6, 5) 64 | if isinstance(ax, tuple): 65 | fig, ax = plt.subplots(1, figsize=ax, dpi=dpi) 66 | show = True 67 | else: 68 | show = False 69 | 70 | if obj in ['waveform', 'phasepick']: 71 | cmap = 'RdBu' if cmap is None else cmap 72 | if vmax is None: 73 | vmax_per = 80 if vmax_per is None else vmax_per 74 | vmax = np.percentile(data, vmax_per) 75 | vmin = -vmax if vmin is None else vmin 76 | origin = 'upper' 77 | if fs is None or tmode == 'sampling': 78 | ylabel_default = 'Sampling points' 79 | fs = 1 80 | elif tmode == 'time': 81 | ylabel_default = 'Time (s)' 82 | 83 | if dx is None or xmode.lower() == 'channel': 84 | xlabel_default = 'Channel' 85 | extent = [x0, x0 + nch, t0 + nt / fs, t0] 86 | elif xmode.lower() == 'distance': 87 | xlabel_default = 'Disitance (km)' 88 | extent = [x0 * 1e-3, (x0 + nch * dx) * 1e-3, t0 + nt / fs, t0] 89 | 90 | if obj == 'phasepick' and len(pick): 91 | pick_color = {'P': 'r', 'S': 'b', 'N': 'k'} 92 | for phase, pck in pick.items(): 93 | if len(pck): 94 | pck = np.array(pck).astype(float) 95 | if xmode.lower() == 'distance': 96 | pck[:, 0] = (x0 + pck[:, 0] * dx) * 1e-3 97 | elif xmode.lower() == 'channel': 98 | pck[:, 0] = x0 + pck[:, 0] 99 | if tmode.lower() == 'sampling': 100 | pck[:, 1] = pck[:, 1] / fs 101 | ax.scatter(pck[:,0], t0 + pck[:,1], marker=',', s=0.1, 102 | c=pick_color[phase]) 103 | 104 | elif obj in ['spectrum', 'spectrogram', 'fk', 'dispersion']: 105 | if np.iscomplex(data).any(): 106 | data = abs(data) 107 | if dB: 108 | data = 20 * np.log10(data) 109 | cmap = 'jet' if cmap is None else cmap 110 | 111 | if vmax is None: 112 | vmax_per = 80 if vmax_per is None else vmax_per 113 | vmax = np.percentile(data, vmax_per) 114 | if vmin is None: 115 | vmin_per = 20 if vmin_per is None else vmin_per 116 | vmin = np.percentile(data, vmin_per) 117 | 118 | if obj == 'spectrum': 119 | origin = 'lower' 120 | if dx is None or xmode.lower() == 'channel': 121 | xlabel_default = 'Channel' 122 | extent = [x0, x0 + nch, min(f), max(f)] 123 | elif xmode.lower() == 'distance': 124 | xlabel_default = 'Disitance (km)' 125 | extent = [x0 * 1e-3, (x0 + nch * dx) * 1e-3, min(f), max(f)] 126 | ylabel_default = 'Frequency (Hz)' 127 | elif obj == 'spectrogram': 128 | data = data.T 129 | origin = 'lower' 130 | xlabel_default = 'Time (s)' 131 | ylabel_default = 'Frequency (Hz)' 132 | extent = [t0 + min(t), t0 + max(t), min(f), max(f)] 133 | elif obj == 'fk': 134 | origin = 'lower' 135 | xlabel_default = 'Wavenumber (m$^{-1}$)' 136 | ylabel_default = 'Frequency (Hz)' 137 | extent = [min(k), max(k), min(f), max(f)] 138 | elif obj == 'dispersion': 139 | data = data.T 140 | origin = 'lower' 141 | xlabel_default = 'Frequency (Hz)' 142 | ylabel_default = 'Velocity (m/s)' 143 | extent = [min(f), max(f), min(c), max(c)] 144 | 145 | if transpose: 146 | if origin == 'lower': 147 | extent = [extent[2], extent[3], extent[0], extent[1]] 148 | else: 149 | origin = 'lower' 150 | extent = [extent[3], extent[2], extent[0], extent[1]] 151 | (xlabel_default, ylabel_default) = (ylabel_default, xlabel_default) 152 | data = data.T 153 | 154 | xlabel = xlabel if isinstance(xlabel, str) else \ 155 | xlabel_default if xlabel else None 156 | ylabel = ylabel if isinstance(ylabel, str) else \ 157 | ylabel_default if ylabel else None 158 | 159 | bar = ax.imshow(data.T, vmin=vmin, vmax=vmax, extent=extent, aspect='auto', 160 | origin=origin, cmap=cmap) 161 | if title: 162 | ax.set_title(title) 163 | ax.set_xlabel(xlabel) 164 | ax.set_ylabel(ylabel) 165 | if isinstance(xticklabels, Sequence): 166 | ax.set_xticklabels(xticklabels) 167 | elif not xticklabels: 168 | ax.set_xticklabels([]) 169 | 170 | if isinstance(yticklabels, Sequence): 171 | ax.set_yticklabels(yticklabels) 172 | elif not yticklabels: 173 | ax.set_yticklabels([]) 174 | if xinv: 175 | ax.invert_xaxis() 176 | if yinv: 177 | ax.invert_yaxis() 178 | if ylim: 179 | ax.set_ylim(ylim) 180 | if xlim: 181 | ax.set_xlim(xlim) 182 | if xlog: 183 | ax.set_xscale('log') 184 | if ylog: 185 | ax.set_yscale('log') 186 | if colorbar: 187 | if colorbar is True: 188 | cbar = plt.colorbar(bar, ax=ax, location='right') 189 | elif isinstance(colorbar, str): 190 | cbar = plt.colorbar(bar, ax=ax, location=colorbar) 191 | else: 192 | cbar = plt.colorbar(bar, cax=colorbar) 193 | if colorbar_label is not None: 194 | cbar.set_label(colorbar_label) 195 | 196 | if savefig: 197 | if not isinstance(savefig, str): 198 | savefig = obj + '.png' 199 | plt.tight_layout() 200 | plt.savefig(savefig) 201 | plt.close() 202 | elif show: 203 | plt.show() 204 | else: 205 | return ax 206 | -------------------------------------------------------------------------------- /daspy/core/__init__.py: -------------------------------------------------------------------------------- 1 | from daspy.core.section import Section 2 | from daspy.core.collection import Collection 3 | from daspy.core.read import read 4 | from daspy.core.dasdatetime import DASDateTime, local_tz, utc -------------------------------------------------------------------------------- /daspy/core/collection.py: -------------------------------------------------------------------------------- 1 | # Purpose: Module for handling Collection objects. 2 | # Author: Minzhe Hu 3 | # Date: 2025.6.4 4 | # Email: hmz2018@mail.ustc.edu.cn 5 | import os 6 | import warnings 7 | import pickle 8 | import numpy as np 9 | from copy import deepcopy 10 | from tqdm import tqdm 11 | from glob import glob 12 | from datetime import datetime 13 | from daspy.core.read import read 14 | from daspy.core.dasdatetime import DASDateTime 15 | 16 | 17 | class Collection(object): 18 | def __init__(self, fpath, ftype=None, flength=None, meta_from_file=True, 19 | timeinfo_slice=slice(None), timeinfo_format=None, 20 | timeinfo_tz=None, timeinfo_from_basename=True, **kwargs): 21 | """ 22 | :param fpath: str or Sequence of str. File path(s) containing data. 23 | :param ftype: None or str. None for automatic detection, or 'pkl', 24 | 'pickle', 'tdms', 'h5', 'hdf5', 'segy', 'sgy', 'npy'. 25 | :param flength: float. The duration of a single file in senconds. 26 | :param meta_from_file: bool or 'all'. False for manually set dt, dx, fs 27 | and gauge_length. True for extracting dt, dx, fs and gauge_length 28 | from first 2 file. 'all' for exracting and checking these metadata 29 | from all file. 30 | :param timeinfo_slice: slice. Slice for extracting start time from file 31 | name. 32 | :param timeinfo_format: str. Format for extracting start time from file 33 | name. 34 | :param timeinfo_tz: datetime.timezone. Time zone for extracting start 35 | time from file name. 36 | :param timeinfo_from_basename: bool. If True, timeinfo_format will use 37 | DASDateTime.strptime to basename of fpath. 38 | :param nch: int. Channel number. 39 | :param nt: int. Sampling points of each file. 40 | :param dx: number. Channel interval in m. 41 | :param fs: number. Sampling rate in Hz. 42 | :param gauge_length: number. Gauge length in m. 43 | """ 44 | if isinstance(fpath, (list, tuple)): 45 | self.flist = [] 46 | for fp in fpath: 47 | self.flist.extend(glob(fp)) 48 | else: 49 | self.flist = glob(fpath) 50 | if not len(self.flist): 51 | raise ValueError('No file input.') 52 | self.flist.sort() 53 | self.ftype = ftype 54 | for key in ['nch', 'nt', 'dx', 'fs', 'gauge_length']: 55 | if key in kwargs.keys(): 56 | setattr(self, key, kwargs[key]) 57 | if timeinfo_format is None and not meta_from_file: 58 | meta_from_file = True 59 | 60 | if meta_from_file == 'all': 61 | ftime = [] 62 | metadata_list = [] 63 | for f in self.flist: 64 | sec = read(f, ftype=ftype, headonly=True) 65 | if not hasattr(sec, 'gauge_length'): 66 | sec.gauge_length = None 67 | ftime.append(sec.start_time) 68 | metadata_list.append((sec.nch, sec.nt, sec.dx, sec.fs, 69 | sec.gauge_length, sec.duration)) 70 | 71 | if len(set(metadata_list)) > 1: 72 | warnings.warn('More than one kind of setting detected.') 73 | metadata = max(metadata_list, key=metadata_list.count) 74 | for i, key in enumerate(['nch', 'nt', 'dx', 'fs', 'gauge_length']): 75 | if not hasattr(self, key): 76 | setattr(self, key, metadata[i]) 77 | if flength is None: 78 | flength = metadata[-1] 79 | self.ftime = ftime 80 | elif meta_from_file: 81 | i = int(len(self.flist) > 1) 82 | sec = read(self.flist[i], ftype=ftype, headonly=True) 83 | if timeinfo_format is None: 84 | if flength is None: 85 | flength = sec.duration 86 | self.ftime = [sec.start_time + (j - i) * flength for j in 87 | range(len(self))] 88 | if not hasattr(sec, 'gauge_length'): 89 | sec.gauge_length = None 90 | metadata = (sec.nch, sec.nt, sec.dx, sec.fs, sec.gauge_length) 91 | for i, key in enumerate(['nch', 'nt', 'dx', 'fs', 'gauge_length']): 92 | if not hasattr(self, key): 93 | setattr(self, key, metadata[i]) 94 | 95 | if not hasattr(self, 'ftime'): 96 | if timeinfo_from_basename: 97 | flist_use = [os.path.basename(f) for f in self.flist] 98 | else: 99 | flist_use = self.flist 100 | if timeinfo_tz is None: 101 | self.ftime = [DASDateTime.strptime(f[timeinfo_slice], 102 | timeinfo_format) for f in flist_use] 103 | else: 104 | if '%z' in timeinfo_format.lower(): 105 | self.ftime = [DASDateTime.strptime(f[timeinfo_slice], 106 | timeinfo_format).astimezone(timeinfo_tz) for f in 107 | flist_use] 108 | else: 109 | self.ftime = [DASDateTime.strptime(f[timeinfo_slice], 110 | timeinfo_format).replace(tzinfo=timeinfo_tz) for f in 111 | flist_use] 112 | 113 | self._sort() 114 | if flength is None: 115 | if len(self.flist) > 2: 116 | time_diff = np.round(np.diff(self.ftime[1:]).astype(float)) 117 | flength_set, counts = np.unique(time_diff, return_counts=True) 118 | if len(flength_set) > 1: 119 | warnings.warn('File start times are unevenly spaced. Data ' 120 | 'may not be continuous and self.flength may ' 121 | 'be incorrectly detected.') 122 | flength = flength_set[counts.argmax()] 123 | elif len(self.flist) == 2: 124 | flength = self.ftime[1] - self.ftime[0] 125 | else: 126 | flength = read(self.flist[0], ftype=ftype, 127 | headonly=True).duration 128 | elif flength <= 0: 129 | raise ValueError('dt must > 0') 130 | 131 | self.flength = flength 132 | 133 | def __str__(self): 134 | if len(self) == 1: 135 | describe = f' flist: {self.flist}\n' 136 | elif len(self) <= 5: 137 | describe = f' flist: {len(self)} files\n' + \ 138 | f' {self.flist}\n' 139 | else: 140 | describe = f' flist: {len(self)} files\n' + \ 141 | f' [{self[0]},\n' + \ 142 | f' {self[1]},\n' + \ 143 | f' ...,\n' + \ 144 | f' {self[-1]}]\n' 145 | 146 | describe += f' ftime: {self.start_time} to {self.end_time}\n' + \ 147 | f' flength: {self.flength}\n' 148 | for key in ['nch', 'nt', 'dx', 'fs', 'gauge_length']: 149 | if hasattr(self, key): 150 | long_key = key.rjust(12) 151 | value = getattr(self, key) 152 | describe += f'{long_key}: {value}\n' 153 | 154 | return describe 155 | 156 | __repr__ = __str__ 157 | 158 | def __getitem__(self, i): 159 | return self.flist[i] 160 | 161 | def __len__(self): 162 | return len(self.flist) 163 | 164 | def _sort(self): 165 | sort = np.argsort(self.ftime) 166 | self.ftime = [self.ftime[i] for i in sort] 167 | self.flist = [self.flist[i] for i in sort] 168 | return self 169 | 170 | @property 171 | def start_time(self): 172 | return self.ftime[0] 173 | 174 | @property 175 | def end_time(self): 176 | return self.ftime[-1] + self.flength 177 | 178 | @property 179 | def duration(self): 180 | return self.end_time - self.start_time 181 | 182 | @property 183 | def file_size(self): 184 | return os.path.getsize(self[1]) 185 | 186 | def copy(self): 187 | return deepcopy(self) 188 | 189 | def file_interruption(self, tolerance=0.5): 190 | time_diff = np.diff(self.ftime) 191 | return np.where(abs(time_diff - self.flength) > tolerance)[0] 192 | 193 | def select(self, start=0, end=None, readsec=False, **kwargs): 194 | """ 195 | Select a period of data. 196 | 197 | :param stime, etime: DASDateTime or int. Start and end time or index of 198 | required data. 199 | :param readsec: bool. If True, read as a instance of daspy.Section and 200 | return. If False, update self.flist. 201 | :param ch1: int. The first channel required. Only works when 202 | readsec=True. 203 | :param ch2: int. The last channel required (not included). Only works 204 | when readsec=True. 205 | :param dch: int. Channel step. Only works when readsec=True. 206 | """ 207 | if end is None: 208 | end = len(self.flist) 209 | if 'stime' in kwargs.keys(): 210 | start = kwargs.pop('stime') 211 | warnings.warn('In future versions, the parameter \'stime\' will be ' 212 | 'replaced by \'start\'.') 213 | if 'etime' in kwargs.keys(): 214 | end = kwargs.pop('etime') 215 | warnings.warn('In future versions, the parameter \'etime\' will be ' 216 | 'replaced by \'end\'.') 217 | 218 | if isinstance(start, datetime): 219 | for i, ftime in enumerate(self.ftime): 220 | if ftime > start: 221 | s = i - 1 222 | break 223 | elif ftime == start: 224 | s = i 225 | break 226 | elif isinstance(start, int): 227 | s = start 228 | 229 | if isinstance(end, datetime): 230 | for i, ftime in enumerate(self.ftime[s:]): 231 | if ftime == end: 232 | e = s + i - 1 233 | break 234 | elif ftime > end: 235 | e = s + i 236 | break 237 | elif isinstance(start, int): 238 | e = end 239 | 240 | flist = self.flist[s:e] 241 | if len(flist) == 0: 242 | warnings.warn('No valid data was selected.') 243 | return None 244 | 245 | if readsec: 246 | sec = read(flist[0], **kwargs) 247 | for f in flist[1:]: 248 | sec += read(f, **kwargs) 249 | sec.trimming(tmin=start if isinstance(start, datetime) else None, 250 | tmax=end if isinstance(end, datetime) else None) 251 | return sec 252 | else: 253 | self.flist = flist 254 | self.ftime = self.ftime[s:e] 255 | return self 256 | 257 | def _optimize_for_continuity(self, operations): 258 | method_list = [] 259 | kwargs_list = [] 260 | if not isinstance(operations[0], (list, tuple)): 261 | operations = [operations] 262 | for opera in operations: 263 | method, kwargs = opera 264 | if method == 'downsampling': 265 | if ('lowpass_filter' in kwargs.keys() and not\ 266 | kwargs['lowpass_filter']) or 'tint' not in kwargs.keys(): 267 | method_list.append('downsampling') 268 | kwargs_list.append(kwargs) 269 | else: 270 | method_list.extend(['lowpass_cheby_2', 'downsampling']) 271 | kwargs['lowpass_filter'] = False 272 | kwargs0 = dict(freq=self.fs/2/kwargs['tint'], zi=0) 273 | kwargs_list.extend([kwargs0, kwargs]) 274 | else: 275 | if method in ['taper', 'cosine_taper']: 276 | kwargs.setdefault('side', 'both') 277 | elif method in ['bandpass', 'bandstop', 'lowpass', 'highpass', 278 | 'lowpass_cheby_2']: 279 | kwargs.setdefault('zi', 0) 280 | 281 | method_list.append(method) 282 | kwargs_list.append(kwargs) 283 | return method_list, kwargs_list 284 | 285 | def _kwargs_initialization(self, method_list, kwargs_list): 286 | for j, method in enumerate(method_list): 287 | if method == 'time_integration': 288 | kwargs_list[j]['c'] = 0 289 | elif method == 'time_differential': 290 | kwargs_list[j]['prepend'] = 0 291 | elif method in ['bandpass', 'bandstop', 'lowpass', 292 | 'highpass', 'lowpass_cheby_2']: 293 | kwargs_list[j]['zi'] = 0 294 | 295 | def process(self, operations, savepath='./processed', merge=1, 296 | suffix='_pro', ftype=None, dtype=None, save_operations=False, 297 | tolerance=0.5, **read_kwargs): 298 | """ 299 | :param operations: list or None. Each element of operations list 300 | should be [str of method name, dict of kwargs]. None for read 301 | files related to operations in savepath. 302 | :param savepath: str. Path to save processed files. 303 | :param merge: int or str. int for merge several processed files into 1. 304 | 'all' for merge all files. 305 | :param suffix: str. Suffix for processed files. 306 | :param ftype: None or str. File format for saving. None for automatic 307 | detection, or 'pkl', 'pickle', 'tdms', 'h5', 'hdf5', 'segy', 'sgy', 308 | 'npy'. 309 | :param dtype: str. The data type of the saved data. 310 | :parma save_operations: bool. If True, save the operations to 311 | method_list.pkl and kwargs_list.pkl in savepath. 312 | :param tolerance: float. Tolerance for checking continuity of data. 313 | :param read_kwargs: dict. Paramters for read function. 314 | """ 315 | if not os.path.exists(savepath): 316 | os.makedirs(savepath) 317 | method_file = os.path.join(savepath, 'method_list.pkl') 318 | kwargs_file = os.path.join(savepath, 'kwargs_list.pkl') 319 | if operations is None: 320 | if (not os.path.exists(method_file)) or \ 321 | (not os.path.exists(kwargs_file)): 322 | raise ValueError('No operations input and no method_list.pkl ' 323 | 'and kwargs_list.pkl found in savepath.') 324 | with open(os.path.join(savepath, 'method_list.pkl'), 'wb') as f: 325 | method_list = pickle.load(f) 326 | with open(os.path.join(savepath, 'kwargs_list.pkl'), 'wb') as f: 327 | kwargs_list = pickle.load(f) 328 | else: 329 | method_list, kwargs_list = self._optimize_for_continuity(operations) 330 | if merge == 'all' or merge > len(self): 331 | merge = len(self) 332 | m = 0 333 | try: 334 | for i in tqdm(range(len(self))): 335 | f = self[i] 336 | if os.path.getsize(f) == 0: 337 | warnings.warn(f'{f} is an empty file. Continuous data is ' 338 | 'interrupted here.') 339 | if m > 0: 340 | sec_merge.save(filepath, dtype=dtype) 341 | m = 0 342 | self._kwargs_initialization(method_list, kwargs_list) 343 | continue 344 | try: 345 | sec = read(f, ftype=self.ftype, **read_kwargs) 346 | if sec.data.size == 0: 347 | if m > 0: 348 | sec_merge.save(filepath, dtype=dtype) 349 | m = 0 350 | self._kwargs_initialization(method_list, kwargs_list) 351 | continue 352 | except Exception as e: 353 | warnings.warn(f'Error reading {f}: {e}. Continuous data is ' 354 | 'interrupted here.') 355 | if m > 0: 356 | sec_merge.save(filepath, dtype=dtype) 357 | m = 0 358 | self._kwargs_initialization(method_list, kwargs_list) 359 | continue 360 | for j, method in enumerate(method_list): 361 | if method in ['taper', 'cosine_taper']: 362 | if not ((i==0 and kwargs_list[j]['side'] != 'right') or 363 | (i == len(self) - 1 and kwargs_list[j]['side'] != 364 | 'left')): 365 | continue 366 | out = getattr(sec, method)(**kwargs_list[j]) 367 | if method == 'time_integration': 368 | kwargs_list[j]['c'] = sec.data[:, -1].copy() 369 | elif method == 'time_differential': 370 | kwargs_list[j]['prepend'] = sec.data[:, -1].copy() 371 | elif method in ['bandpass', 'bandstop', 'lowpass', 'highpass', 372 | 'lowpass_cheby_2']: 373 | kwargs_list[j]['zi'] = out 374 | 375 | if m == 0: 376 | sec_merge = sec 377 | f0, f1 = os.path.splitext(os.path.basename(f)) 378 | f1 = f1 if ftype is None else ftype 379 | filepath = os.path.join(savepath, f0+suffix+f1) 380 | elif abs(sec_merge.end_time - sec.start_time) <= tolerance: 381 | sec_merge += sec 382 | else: 383 | warnings.warn(f'The start time of {f} does not correspond ' 384 | 'to the end time of the previous file. ' 385 | 'Continuous data is interrupted here.') 386 | sec_merge.save(filepath, dtype=dtype) 387 | sec_merge = sec 388 | f0, f1 = os.path.splitext(os.path.basename(f)) 389 | f1 = f1 if ftype is None else ftype 390 | filepath = os.path.join(savepath, f0+suffix+f1) 391 | m = 0 392 | m += 1 393 | if m == merge: 394 | sec_merge.save(filepath, dtype=dtype) 395 | m = 0 396 | if m > 0: 397 | sec_merge.save(filepath, dtype=dtype) 398 | except KeyboardInterrupt as e: 399 | with open(method_file, 'wb') as f: 400 | pickle.dump(method_list, f) 401 | with open(kwargs_file, 'wb') as f: 402 | pickle.dump(kwargs_list, f) 403 | print(f'Process interrupted. Saving method_list and kwargs_list.') 404 | raise e 405 | else: 406 | if save_operations: 407 | with open(method_file, 'wb') as f: 408 | pickle.dump(method_list, f) 409 | with open(kwargs_file, 'wb') as f: 410 | pickle.dump(kwargs_list, f) 411 | print(f'Operations saved to {method_file} and {kwargs_file}.') 412 | else: 413 | if os.path.exists(method_file): 414 | os.remove(method_file) 415 | if os.path.exists(kwargs_file): 416 | os.remove(kwargs_file) 417 | 418 | 419 | # Dynamically add methods for cascade_methods 420 | def _create_cascade_method(method_name): 421 | def cascade_method(self, savepath='./processed', merge=1, 422 | suffix=f'_{method_name}', ftype=None, dtype=None, 423 | save_operations=False, **kwargs): 424 | """ 425 | Automatically generated method for {method_name}. 426 | Applies the {method_name} operation to the data and saves the result. 427 | 428 | :param savepath: str. Path to save processed files. 429 | :param merge: int or str. int for merge several processed files into 1. 430 | 'all' for merge all files. 431 | :param suffix: str. Suffix for processed files. 432 | :param ftype: None or str. None for automatic detection, or 'pkl', 433 | 'pickle', 'tdms', 'h5', 'hdf5', 'segy', 'sgy', 'npy'. 434 | :param dtype: str. The data type of the saved data. 435 | :parma save_operations: bool. If True, save the operations to 436 | method_list.pkl and kwargs_list.pkl in savepath. 437 | :param kwargs: dict. Parameters for the {method_name} operation. 438 | """ 439 | operations = [[method_name, kwargs]] 440 | self.process(operations, savepath=savepath, merge=merge, suffix=suffix, 441 | ftype=ftype, dtype=dtype, save_operations=save_operations) 442 | return cascade_method 443 | 444 | 445 | for method in ['time_integration', 'time_differential', 'downsampling', 446 | 'bandpass', 'bandstop', 'lowpass', 'highpass', 447 | 'lowpass_cheby_2']: 448 | setattr(Collection, method, _create_cascade_method(method)) -------------------------------------------------------------------------------- /daspy/core/dasdatetime.py: -------------------------------------------------------------------------------- 1 | # Purpose: Module for handling DASDateTime objects. 2 | # Author: Minzhe Hu 3 | # Date: 2025.3.29 4 | # Email: hmz2018@mail.ustc.edu.cn 5 | import time 6 | from typing import Iterable 7 | from datetime import datetime, timedelta, timezone 8 | 9 | 10 | utc = timezone.utc 11 | local_tz = timezone(timedelta(seconds=-time.altzone)) 12 | 13 | 14 | class DASDateTime(datetime): 15 | def __add__(self, other): 16 | if isinstance(other, Iterable): 17 | out = [] 18 | for t in other: 19 | out.append(self + t) 20 | return out 21 | elif not isinstance(other, timedelta): 22 | other = timedelta(seconds=float(other)) 23 | return super().__add__(other) 24 | 25 | def __sub__(self, other): 26 | if isinstance(other, Iterable): 27 | out = [] 28 | for t in other: 29 | out.append(self - t) 30 | return out 31 | elif isinstance(other, datetime): 32 | return datetime.__sub__(*self._unify_tz(other)).total_seconds() 33 | elif not isinstance(other, timedelta): 34 | other = timedelta(seconds=other) 35 | return super().__sub__(other) 36 | 37 | def __le__(self, other): 38 | return datetime.__le__(*self._unify_tz(other)) 39 | 40 | def __lt__(self, other): 41 | return datetime.__lt__(*self._unify_tz(other)) 42 | 43 | def __ge__(self, other): 44 | return datetime.__ge__(*self._unify_tz(other)) 45 | 46 | def __gt__(self, other): 47 | return datetime.__gt__(*self._unify_tz(other)) 48 | 49 | def _unify_tz(self, other: datetime): 50 | if self.tzinfo and (not other.tzinfo): 51 | return self, other.replace(tzinfo=self.tzinfo) 52 | elif (not self.tzinfo) and other.tzinfo: 53 | return self.replace(tzinfo=other.tzinfo), other 54 | return self, other 55 | 56 | def local(self): 57 | return self.astimezone(tz=local_tz) 58 | 59 | def utc(self): 60 | return self.astimezone(tz=utc) 61 | 62 | def remove_tz(self): 63 | return self.replace(tzinfo=None) 64 | 65 | @classmethod 66 | def from_datetime(cls, dt: datetime): 67 | return cls.fromtimestamp(dt.timestamp(), tz=dt.tzinfo) 68 | 69 | @classmethod 70 | def from_obspy_UTCDateTime(cls, dt): 71 | return cls.from_datetime(dt.datetime) 72 | 73 | def to_datetime(self): 74 | return datetime.fromtimestamp(self.timestamp(), tz=self.tzinfo) 75 | 76 | def to_obspy_UTCDateTime(self): 77 | from obspy import UTCDateTime 78 | return UTCDateTime(UTCDateTime(self.to_datetime())) 79 | 80 | @classmethod 81 | def strptime(cls, date_string, format): 82 | """ 83 | string, format -> new datetime parsed from a string 84 | (like time.strptime()). 85 | """ 86 | from _strptime import _strptime 87 | tt, fraction, gmtoff_fraction = _strptime(date_string, format) 88 | tzname, gmtoff = tt[-2:] 89 | args = tt[:6] + (fraction,) 90 | if gmtoff is not None: 91 | tzdelta = timedelta(seconds=gmtoff, microseconds=gmtoff_fraction) 92 | if tzname: 93 | tz = timezone(tzdelta, tzname) 94 | else: 95 | tz = timezone(tzdelta) 96 | args += (tz,) 97 | elif tt[-3] == 0: 98 | args += (utc,) 99 | 100 | return cls(*args) -------------------------------------------------------------------------------- /daspy/core/example.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HMZ-03/DASPy/9f308c93d7ad8f4e572705827b03c5d0fec3eac2/daspy/core/example.pkl -------------------------------------------------------------------------------- /daspy/core/read.py: -------------------------------------------------------------------------------- 1 | # Purpose: Module for reading DAS data. 2 | # Author: Minzhe Hu 3 | # Date: 2025.5.21 4 | # Email: hmz2018@mail.ustc.edu.cn 5 | # Partially modified from 6 | # https://github.com/RobbinLuo/das-toolkit/blob/main/DasTools/DasPrep.py 7 | import warnings 8 | import json 9 | import pickle 10 | import numpy as np 11 | import h5py 12 | import segyio 13 | from typing import Union 14 | from pathlib import Path 15 | from nptdms import TdmsFile 16 | from daspy.core.section import Section 17 | from daspy.core.dasdatetime import DASDateTime, utc 18 | 19 | 20 | def read(fname=None, output_type='section', ftype=None, headonly=False, 21 | dtype=None, **kwargs) -> Union[Section, tuple]: 22 | """ 23 | Read a .pkl/.pickle, .tdms, .h5/.hdf5, .segy/.sgy file. 24 | 25 | :param fname: str or pathlib.PosixPath. Path of DAS data file. 26 | :param output_type: str. 'Section' means return an instance of 27 | daspy.Section, 'array' means return numpy.array for data and a 28 | dictionary for metadata. 29 | :param ftype: None, str or function. None for automatic detection, or str to 30 | specify a type of 'pkl', 'pickle', 'tdms', 'h5', 'hdf5', 'segy', 'sgy', 31 | or 'npy', or a function for read data and metadata. 32 | :param headonly. bool. If True, only metadata will be read, the returned 33 | data will be an array of all zeros of the same size as the original 34 | data. 35 | :param ch1: int. The first channel required. 36 | :param ch2: int. The last channel required (not included). 37 | :param dch: int. Channel step. 38 | :param dtype: str. The data type of the returned data. 39 | :return: An instance of daspy.Section, or numpy.array for data and a 40 | dictionary for metadata. 41 | """ 42 | fun_map = {'pkl': _read_pkl, 'tdms': _read_tdms, 'h5': _read_h5, 43 | 'sgy': _read_segy, 'npy': _read_npy} 44 | if fname is None: 45 | fname = Path(__file__).parent / 'example.pkl' 46 | ftype = 'pkl' 47 | if ftype is None: 48 | ftype = str(fname).split('.')[-1].lower() 49 | 50 | if callable(ftype): 51 | try: 52 | data, metadata = ftype(fname, headonly=headonly, **kwargs) 53 | except TypeError: 54 | data, metadata = ftype(fname) 55 | else: 56 | for rtp in [('pickle', 'pkl'), ('hdf5', 'h5'), ('segy', 'sgy')]: 57 | ftype = ftype.replace(*rtp) 58 | data, metadata = fun_map[ftype](fname, headonly=headonly, **kwargs) 59 | 60 | if dtype is not None: 61 | data = data.astype(dtype) 62 | if output_type.lower() == 'section': 63 | metadata['source'] = Path(fname) 64 | metadata['source_type'] = ftype 65 | data[np.isnan(data)] = 0 66 | return Section(data, **metadata) 67 | elif output_type.lower() == 'array': 68 | return data, metadata 69 | 70 | 71 | def _read_pkl(fname, headonly=False, **kwargs): 72 | dch = kwargs.pop('dch', 1) 73 | with open(fname, 'rb') as f: 74 | pkl_data = pickle.load(f) 75 | if isinstance(pkl_data, np.ndarray): 76 | warnings.warn('This data format doesn\'t include channel interval' 77 | 'and sampling rate. Please set manually') 78 | if headonly: 79 | return np.zeros_like(pkl_data), {'dx': None, 'fs': None} 80 | else: 81 | ch1 = kwargs.pop('ch1', 0) 82 | ch2 = kwargs.pop('ch2', len(pkl_data)) 83 | return pkl_data[ch1:ch2:dch], {'dx': None, 'fs': None} 84 | elif isinstance(pkl_data, dict): 85 | data = pkl_data.pop('data') 86 | if headonly: 87 | data = np.zeros_like(data) 88 | else: 89 | if 'ch1' in kwargs.keys() or 'ch2' in kwargs.keys(): 90 | if 'start_channel' in pkl_data.keys(): 91 | s_chn = pkl_data['start_channel'] 92 | print(f'Data is start with channel {s_chn}.') 93 | else: 94 | s_chn = 0 95 | ch1 = kwargs.pop('ch1', s_chn) 96 | ch2 = kwargs.pop('ch2', s_chn + len(data)) 97 | data = data[ch1 - s_chn:ch2 - s_chn, :] 98 | pkl_data['start_channel'] = ch1 99 | return data, pkl_data 100 | else: 101 | raise TypeError('Unknown data type.') 102 | 103 | 104 | def _read_h5_headers(group): 105 | headers = {} 106 | if len(group.attrs) != 0: 107 | headers['attrs'] = dict(group.attrs) 108 | if isinstance(group, h5py._hl.dataset.Dataset): 109 | return headers 110 | for key, value in group.items(): 111 | try: 112 | gp_headers = _read_h5_headers(value) 113 | except AttributeError: 114 | headers[key] = value 115 | if len(gp_headers): 116 | headers[key] = gp_headers 117 | 118 | return headers 119 | 120 | 121 | def _read_h5_starttime(h5_file): 122 | try: 123 | stime = h5_file['Acquisition/Raw[0]/RawData'].attrs['PartStartTime'] 124 | except KeyError: 125 | try: 126 | stime = h5_file['Acquisition'].attrs['MeasurementStartTime'] 127 | except KeyError: 128 | try: 129 | stime = h5_file['Acquisition/Raw[0]/RawDataTime/'][0] 130 | except KeyError: 131 | return 0 132 | if isinstance(stime, bytes): 133 | stime = stime.decode('ascii') 134 | 135 | if isinstance(stime, str): 136 | if len(stime) > 26: 137 | stime = DASDateTime.strptime(stime, '%Y-%m-%dT%H:%M:%S.%f%z') 138 | else: 139 | stime = DASDateTime.strptime(stime, '%Y-%m-%dT%H:%M:%S.%f').\ 140 | astimezone(utc) 141 | else: 142 | stime = DASDateTime.fromtimestamp(stime / 1e6).astimezone(utc) 143 | 144 | return stime 145 | 146 | 147 | def _read_h5(fname, headonly=False, **kwargs): 148 | with h5py.File(fname, 'r') as h5_file: 149 | dch = kwargs.pop('dch', 1) 150 | group = list(h5_file.keys())[0] 151 | if len(h5_file.keys()) >= 10: # ASN/OptoDAS https://github.com/ASN-Norway/simpleDAS 152 | ch1 = kwargs.pop('ch1', 0) 153 | if h5_file['header/dimensionNames'][0] == b'time': 154 | nch = h5_file['data'].shape[1] 155 | if headonly: 156 | data = np.zeros_like(h5_file['data']).T 157 | else: 158 | ch2 = kwargs.pop('ch2', nch) 159 | data = h5_file['data'][:, ch1:ch2:dch].T 160 | elif h5_file['header/dimensionNames'][0] == b'distance': 161 | nch = h5_file['data'].shape[1] 162 | if headonly: 163 | data = np.zeros_like(h5_file['data']) 164 | else: 165 | ch2 = kwargs.pop('ch2', nch) 166 | data = h5_file['data'][ch1:ch2:dch, :] 167 | dx = h5_file['header/dx'][()] 168 | start_time = DASDateTime.fromtimestamp( 169 | h5_file['header/time'][()]).utc() 170 | metadata = {'dx': dx * dch, 'fs': 1 / h5_file['header/dt'][()], 171 | 'start_time': start_time, 'start_channel': ch1, 172 | 'start_distance': ch1 * dx, 173 | 'scale': h5_file['header/dataScale'][()]} 174 | if h5_file['header/gaugeLength'][()] != np.nan: 175 | metadata['guage_length'] = h5_file['header/gaugeLength'][()] 176 | elif len(h5_file.keys()) == 5: # AP Sensing 177 | # read data 178 | nch = h5_file['strain'].shape[1] 179 | ch1 = kwargs.pop('ch1', 0) 180 | ch2 = kwargs.pop('ch2', nch) 181 | if headonly: 182 | data = np.zeros_like(h5_file['strain']).T 183 | else: 184 | data = h5_file['strain'][:, ch1:ch2:dch].T 185 | 186 | # read metadata 187 | dx = h5_file['spatialsampling'][()] 188 | metadata = {'fs': h5_file['RepetitionFrequency'][()], 189 | 'dx': dx * dch, 'start_channel': ch1, 190 | 'start_distance': ch1 * dx, 191 | 'gauge_length': h5_file.get('GaugeLength')[()]} 192 | elif len(h5_file.keys()) == 3: # OpataSense 193 | nch = h5_file['data'].shape[1] 194 | ch1 = kwargs.pop('ch1', 0) 195 | ch2 = kwargs.pop('ch2', nch) 196 | dch = kwargs.pop('dch', 1) 197 | if headonly: 198 | data = np.zeros_like(h5_file['data']) 199 | else: 200 | data = h5_file['data'][ch1:ch2:dch, :] 201 | dx = (h5_file['x_axis'][-1] - h5_file['x_axis'][0]) / \ 202 | (len(h5_file['x_axis']) - 1) 203 | fs = (len(h5_file['t_axis']) - 1) / (h5_file['t_axis'][-1] - 204 | h5_file['t_axis'][0]) 205 | metadata = {'dx': dx, 'fs': fs, 'start_channel': ch1, 206 | 'start_distance': h5_file['x_axis'][0] + dx * ch1, 207 | 'start_time': h5_file['t_axis'][0]} 208 | elif set(h5_file.keys()) == {'Mapping', 'Acquisition'}: # Silixa/iDAS 209 | nch = h5_file['Acquisition/Raw[0]'].attrs['NumberOfLoci'] 210 | ch1 = kwargs.pop('ch1', 0) 211 | ch2 = kwargs.pop('ch2', nch) 212 | if h5_file['Acquisition/Raw[0]/RawData/'].shape[0] == nch: 213 | if headonly: 214 | data = np.zeros_like(h5_file['Acquisition/Raw[0]/RawData/']) 215 | else: 216 | data = h5_file['Acquisition/Raw[0]/RawData/']\ 217 | [ch1:ch2:dch, :] 218 | else: 219 | if headonly: 220 | data = np.zeros_like( 221 | h5_file['Acquisition/Raw[0]/RawData/']).T 222 | else: 223 | data = h5_file['Acquisition/Raw[0]/RawData/']\ 224 | [:, ch1:ch2:dch].T 225 | 226 | dx = np.mean(h5_file['Mapping/MeasuredSpatialResolution']) 227 | start_distance = h5_file['Acquisition/Custom/UserSettings'].\ 228 | attrs['StartDistance'] + ch1 * dx 229 | h5_file['Acquisition/Raw[0]/RawData'].attrs['PartStartTime'] 230 | fs = h5_file['Acquisition/Raw[0]'].attrs['OutputDataRate'] 231 | gauge_length = h5_file['Acquisition'].attrs['GaugeLength'] 232 | scale = h5_file['Acquisition/Raw[0]'].attrs['AmpScaling'] 233 | geometry = np.vstack((h5_file['Mapping/Lon'], 234 | h5_file['Mapping/Lat'])).T 235 | metadata = {'dx': dx * dch, 'fs': fs, 'start_channel': ch1, 236 | 'start_distance': ch1 * dx, 237 | 'gauge_length': gauge_length, 'geometry': geometry, 238 | 'scale': scale} 239 | metadata['start_time'] = _read_h5_starttime(h5_file) 240 | elif group == 'Acquisition': 241 | # OptaSens/ODH, Silixa/iDAS, Sintela/Onyx, Smart Sensing/ZD DAS 242 | # read data 243 | try: 244 | nch = h5_file['Acquisition'].attrs['NumberOfLoci'] 245 | except KeyError: 246 | nch = len(h5_file['Acquisition/Raw[0]/RawData/']) 247 | ch1 = kwargs.pop('ch1', 0) 248 | ch2 = kwargs.pop('ch2', nch) 249 | if h5_file['Acquisition/Raw[0]/RawData/'].shape[0] == nch: 250 | if headonly: 251 | data = np.zeros_like(h5_file['Acquisition/Raw[0]/RawData/']) 252 | else: 253 | data = h5_file['Acquisition/Raw[0]/RawData/']\ 254 | [ch1:ch2:dch, :] 255 | else: 256 | if headonly: 257 | data = np.zeros_like( 258 | h5_file['Acquisition/Raw[0]/RawData/']).T 259 | else: 260 | data = h5_file['Acquisition/Raw[0]/RawData/']\ 261 | [:, ch1:ch2:dch].T 262 | 263 | # read metadata 264 | try: 265 | fs = h5_file['Acquisition/Raw[0]'].attrs['OutputDataRate'] 266 | except KeyError: 267 | time_arr = h5_file['Acquisition/Raw[0]/RawDataTime/'] 268 | fs = 1 / (np.diff(time_arr).mean() / 1e6) 269 | 270 | dx = h5_file['Acquisition'].attrs['SpatialSamplingInterval'] 271 | gauge_length = h5_file['Acquisition'].attrs['GaugeLength'] 272 | metadata = {'dx': dx * dch, 'fs': fs, 'start_channel': ch1, 273 | 'start_distance': ch1 * dx, 274 | 'gauge_length': gauge_length} 275 | 276 | metadata['start_time'] = _read_h5_starttime(h5_file) 277 | elif group == 'raw': 278 | nch = len(h5_file['raw']) 279 | ch1 = kwargs.pop('ch1', 0) 280 | ch2 = kwargs.pop('ch2', nch) 281 | if headonly: 282 | data = np.zeros_like(h5_file['raw']) 283 | else: 284 | data = h5_file['raw'][ch1:ch2:dch, :] 285 | fs = round(1 / np.diff(h5_file['timestamp']).mean()) 286 | start_time = DASDateTime.fromtimestamp( 287 | h5_file['timestamp'][0]).astimezone(utc) 288 | warnings.warn('This data format doesn\'t include channel interval. ' 289 | 'Please set manually') 290 | metadata = {'dx': None, 'fs': fs, 'start_channel': ch1, 291 | 'start_time': start_time} 292 | elif group == 'data': # https://ai4eps.github.io/homepage/ml4earth/seismic_event_format_das/ 293 | nch = h5_file['data'].shape[1] 294 | ch1 = kwargs.pop('ch1', 0) 295 | ch2 = kwargs.pop('ch2', nch) 296 | dch = kwargs.pop('dch', 1) 297 | if headonly: 298 | data = np.zeros_like(h5_file['data']) 299 | else: 300 | data = h5_file['data'][ch1:ch2:dch, :] 301 | attr = h5_file['data'].attrs 302 | dx = attr['dx_m'] 303 | metadata = {'dx': dx, 'fs': 1 / attr['dt_s'], 'start_channel': ch1, 304 | 'start_distance': ch1 * dx, 305 | 'start_time': DASDateTime.strptime( 306 | attr['begin_time'], '%Y-%m-%dT%H:%M:%S.%f%z'), 307 | 'data_type': attr['unit']} 308 | if 'event_time' in attr.keys(): 309 | try: 310 | origin_time = DASDateTime.strptime( 311 | attr['event_time'], '%Y-%m-%dT%H:%M:%S.%f%z') 312 | except ValueError: 313 | origin_time = DASDateTime.strptime( 314 | attr['event_time'], '%Y-%m-%dT%H:%M:%S.%f') 315 | metadata['origin_time'] = origin_time 316 | 317 | elif group == 'data_product': 318 | # read data 319 | nch = h5_file.attrs['nx'] 320 | ch1 = kwargs.pop('ch1', 0) 321 | ch2 = kwargs.pop('ch2', nch) 322 | array_shape = h5_file['data_product/data'].shape 323 | if array_shape[0] == nch: 324 | if headonly: 325 | data = np.zeros_like(h5_file['data_product/data']) 326 | else: 327 | data = h5_file['data_product/data'][ch1:ch2:dch, :] 328 | else: 329 | if headonly: 330 | data = np.zeros_like(h5_file['data_product/data']).T 331 | else: 332 | data = h5_file['data_product/data'][:, ch1:ch2:dch].T 333 | 334 | # read metadata 335 | fs = 1 / h5_file.attrs['dt_computer'] 336 | dx = h5_file.attrs['dx'] 337 | gauge_length = h5_file.attrs['gauge_length'] 338 | if h5_file.attrs['saving_start_gps_time'] > 0: 339 | start_time = DASDateTime.fromtimestamp( 340 | h5_file.attrs['file_start_gps_time']) 341 | else: 342 | start_time = DASDateTime.fromtimestamp( 343 | h5_file.attrs['file_start_computer_time']) 344 | data_type = h5_file.attrs['data_product'] 345 | 346 | metadata = {'dx': dx * dch, 'fs': fs, 'start_channel': ch1, 347 | 'start_distance': ch1 * dx, 348 | 'start_time': start_time.astimezone(utc), 349 | 'gauge_length': gauge_length, 'data_type': data_type} 350 | else: # Febus 351 | acquisition = list(h5_file[f'{group}/Source1/Zone1'].keys())[0] 352 | # read data 353 | start_channel = int(h5_file[f'{group}/Source1/Zone1']. 354 | attrs['Extent'][0]) 355 | dataset = h5_file[f'{group}/Source1/Zone1/{acquisition}'] 356 | nch = dataset.shape[-1] 357 | ch1 = kwargs.pop('ch1', start_channel) 358 | ch2 = kwargs.pop('ch2', start_channel + nch) 359 | if headonly: 360 | data = np.zeros_like(dataset).T.reshape((nch, -1)) 361 | else: 362 | if len(dataset.shape) == 3: # Febus A1-R 363 | data = dataset[:, :, ch1 - start_channel:ch2 - start_channel 364 | :dch].reshape((-1, (ch2 - ch1) // dch)).T 365 | elif len(dataset.shape) == 2: # Febus A1 366 | data = dataset[:, ch1 - start_channel:ch2 - start_channel: 367 | dch].T 368 | # read metadata 369 | attrs = h5_file[f'{group}/Source1/Zone1'].attrs 370 | dx = attrs['Spacing'][0] 371 | try: 372 | fs = float(attrs['FreqRes']) 373 | except KeyError: 374 | try: 375 | fs = (attrs['PulseRateFreq'][0] / 376 | attrs['SamplingRes'][0]) / 1000 377 | except KeyError: 378 | fs = attrs['SamplingRate'][0] 379 | start_distance = attrs['Origin'][0] 380 | time = h5_file[f'{group}/Source1/time'] 381 | if len(time.shape) == 2: # Febus A1-R 382 | start_time = DASDateTime.fromtimestamp(time[0, 0]).\ 383 | astimezone(utc) 384 | elif len(time.shape) == 1: # Febus A1 385 | start_time = DASDateTime.fromtimestamp(time[0]).astimezone(utc) 386 | gauge_length = attrs['GaugeLength'][0] 387 | metadata = {'dx': dx * dch, 'fs': fs, 'start_channel': ch1, 388 | 'start_distance': start_distance + 389 | (ch1 - start_channel) * dx, 390 | 'start_time': start_time, 'gauge_length': gauge_length} 391 | 392 | metadata['headers'] = _read_h5_headers(h5_file) 393 | 394 | return data, metadata 395 | 396 | 397 | def _read_tdms(fname, headonly=False, **kwargs): 398 | # https://nptdms.readthedocs.io/en/stable/quickstart.html 399 | with TdmsFile.read(fname) as tdms_file: 400 | group_name = [group.name for group in tdms_file.groups()] 401 | if 'Measurement' in group_name: 402 | key = 'Measurement' 403 | elif 'DAS' in group_name: 404 | key = 'DAS' 405 | else: 406 | key = group_name[0] 407 | 408 | headers = {**tdms_file.properties, **tdms_file[key].properties} 409 | nch = len(tdms_file[key]) 410 | dch = kwargs.pop('dch', 1) 411 | # read data 412 | if nch > 1: 413 | start_channel = min(int(channel.name) for channel in 414 | tdms_file[key].channels()) 415 | ch1 = max(kwargs.pop('ch1', start_channel), start_channel) 416 | ch2 = min(kwargs.pop('ch2', start_channel + nch), 417 | start_channel + nch) 418 | if headonly: 419 | nt = len(tdms_file[key][str(start_channel)]) 420 | data = np.zeros((nch, nt)) 421 | else: 422 | data = np.asarray([tdms_file[key][str(ch)] 423 | for ch in range(ch1, ch2, dch)]) 424 | elif nch == 1: 425 | try: 426 | start_channel = int(headers['Initial Channel']) 427 | except KeyError: 428 | start_channel = 0 429 | 430 | ch1 = max(kwargs.pop('ch1', start_channel), start_channel) 431 | nch = int(headers['Total Channels']) 432 | ch2 = min(kwargs.pop('ch2', start_channel + nch), 433 | start_channel + nch) 434 | if headonly: 435 | data = np.zeros(len(tdms_file[key].channels()[0])).\ 436 | reshape((nch, -1)) 437 | else: 438 | data = np.asarray(tdms_file[key].channels()[0]).\ 439 | reshape((-1, nch)).T 440 | data = data[ch1 - start_channel:ch2 - start_channel:dch] 441 | 442 | # read metadata 443 | try: 444 | dx = headers['SpatialResolution[m]'] 445 | except KeyError: 446 | try: 447 | dx = headers['Spatial Resolution'] 448 | except KeyError: 449 | dx = None 450 | 451 | try: 452 | fs = headers['SamplingFrequency[Hz]'] 453 | except KeyError: 454 | try: 455 | fs = 1 / headers['Time Base'] 456 | except KeyError: 457 | fs = None 458 | 459 | try: 460 | start_distance = headers['Start Distance (m)'] + \ 461 | dx * (ch1 - start_channel) 462 | except KeyError: 463 | start_distance = dx * ch1 464 | 465 | try: 466 | start_time = DASDateTime.strptime(headers['ISO8601 Timestamp'], 467 | '%Y-%m-%dT%H:%M:%S.%f%z') 468 | except ValueError: 469 | start_time = DASDateTime.strptime(headers['ISO8601 Timestamp'], 470 | '%Y-%m-%dT%H:%M:%S.%f') 471 | except KeyError: 472 | start_time = 0 473 | for key in ['GPSTimeStamp', 'CPUTimeStamp', 'Trigger Time']: 474 | if key in headers.keys(): 475 | if headers[key]: 476 | start_time = DASDateTime.from_datetime(headers[key]. 477 | item()) 478 | break 479 | 480 | if dx is not None: 481 | dx *= dch 482 | metadata = {'dx': dx, 'fs': fs, 'start_channel': ch1, 483 | 'start_distance': start_distance, 'start_time': start_time, 484 | 'headers': headers} 485 | 486 | if 'GaugeLength' in headers.keys(): 487 | metadata['gauge_length'] = headers['GaugeLength'] 488 | 489 | return data, metadata 490 | 491 | 492 | def _read_segy(fname, headonly=False, **kwargs): 493 | # https://github.com/equinor/segyio-notebooks/blob/master/notebooks/basic/02_segy_quicklook.ipynb 494 | with segyio.open(fname, ignore_geometry=True) as segy_file: 495 | nch = segy_file.tracecount 496 | ch1 = kwargs.pop('ch1', 0) 497 | ch2 = kwargs.pop('ch2', nch) 498 | dch = kwargs.pop('dch', 1) 499 | 500 | # read data 501 | if headonly: 502 | data = np.zeros_like(segy_file.trace.raw[:]) 503 | else: 504 | data = segy_file.trace.raw[ch1:ch2:dch] 505 | 506 | # read metadata: 507 | fs = 1 / (segyio.tools.dt(segy_file) / 1e6) 508 | metadata = {'dx': None, 'fs': fs, 'start_channel': ch1} 509 | warnings.warn('This data format doesn\'t include channel interval.' 510 | 'Please set manually') 511 | 512 | return data, metadata 513 | 514 | 515 | def _read_npy(fname, headonly=False, **kwargs): 516 | data = np.load(fname) 517 | if headonly: 518 | return np.zeros_like(data), {'dx': None, 'fs': None} 519 | else: 520 | ch1 = kwargs.pop('ch1', 0) 521 | ch2 = kwargs.pop('ch2', len(data)) 522 | dch = kwargs.pop('dch', 1) 523 | warnings.warn('This data format doesn\'t include channel interval and ' 524 | 'sampling rate. Please set manually') 525 | return data[ch1:ch2:dch], {'dx': None, 'fs': None} 526 | 527 | 528 | def read_json(fname, output_type='dict'): 529 | """ 530 | Read .json metadata file. See {Lai et al. , 2024, Seismol. Res. Lett.} 531 | 532 | :param fname: str or pathlib.PosixPath. Path of json file. 533 | :param output_type: str. 'dict' means return a dictionary, and 'Section' 534 | means return a empty daspy.Section instance with metadata. 535 | :return: A dictionary of metadata or an instance of daspy.Section without 536 | data. 537 | """ 538 | with open(fname, 'r') as fcc_file: 539 | headers = json.load(fcc_file) 540 | if output_type.lower() == 'dict': 541 | return headers 542 | elif output_type.lower() in ['section', 'sec']: 543 | if len(headers['Overview']['Interrogator']) > 1: 544 | case_type = 'Multiple interrogators, single cable' 545 | sec_num = len(headers['Overview']['Interrogator']) 546 | sec = [] 547 | for interrogator in headers['Overview']['Interrogator']: 548 | nch = interrogator['Acquisition'][0]['Attributes']['number_of_channels'] 549 | data = np.zeros((nch, 0)) 550 | dx = interrogator['Acquisition'][0]['Attributes']['spatial_sampling_interval'] 551 | fs = interrogator['Acquisition'][0]['Attributes']['acquisition_sample_rate'] 552 | gauge_length = interrogator['Acquisition'][0]['Attributes']['gauge_length'] 553 | sec.append(Section(data, dx, fs, gauge_length=gauge_length, 554 | headers=headers)) 555 | elif len(headers['Overview']['Interrogator'][0]['Acquisition']) > 1: 556 | case_type = 'Active survey' 557 | sec_num = len( 558 | headers['Overview']['Interrogator'][0]['Acquisition']) 559 | sec = [] 560 | for acquisition in headers['Overview']['Interrogator'][0]['Acquisition']: 561 | nch = acquisition['Attributes']['number_of_channels'] 562 | data = np.zeros((nch, 0)) 563 | dx = acquisition['Attributes']['spatial_sampling_interval'] 564 | fs = acquisition['Attributes']['acquisition_sample_rate'] 565 | gauge_length = acquisition['Attributes']['gauge_length'] 566 | sec.append(Section(data, dx, fs, gauge_length=gauge_length, 567 | headers=headers)) 568 | else: 569 | sec_num = 1 570 | if len(headers['Overview']['Cable']) > 1: 571 | case_type = 'Single interrogators, multiple cable' 572 | else: 573 | env = headers['Overview']['Cable'][0]['Attributes']['cable_environment'] 574 | if env == 'trench': 575 | case_type = 'Direct buried' 576 | elif env == 'conduit': 577 | case_type = 'Dark fiber' 578 | elif env in ['wireline', 'outside borehole casing']: 579 | case_type = 'Borehole cable' 580 | nch = headers['Overview']['Interrogator'][0]['Acquisition'][0]['Attributes']['number_of_channels'] 581 | dx = headers['Overview']['Interrogator'][0]['Acquisition'][0]['Attributes']['spatial_sampling_interval'] 582 | fs = headers['Overview']['Interrogator'][0]['Acquisition'][0]['Attributes']['acquisition_sample_rate'] 583 | gauge_length = headers['Overview']['Interrogator'][0]['Acquisition'][0]['Attributes']['gauge_length'] 584 | data = np.zeros((nch, 0)) 585 | sec = Section(data, dx, fs, gauge_length=gauge_length, 586 | headers=headers) 587 | 588 | print(f'For case of {case_type}, create {sec_num} empty daspy.Section ' 589 | 'instance(s)') 590 | return sec 591 | -------------------------------------------------------------------------------- /daspy/core/write.py: -------------------------------------------------------------------------------- 1 | # Purpose: Module for writing DAS data. 2 | # Author: Minzhe Hu 3 | # Date: 2025.5.21 4 | # Email: hmz2018@mail.ustc.edu.cn 5 | import os 6 | import warnings 7 | import pickle 8 | import numpy as np 9 | import h5py 10 | import segyio 11 | from shutil import copyfile 12 | from nptdms import TdmsFile, TdmsWriter, RootObject, GroupObject, ChannelObject 13 | from datetime import datetime 14 | 15 | 16 | def write(sec, fname, ftype=None, raw_fname=None, dtype=None): 17 | fun_map = {'tdms': _write_tdms, 'h5': _write_h5, 'sgy': _write_segy} 18 | if ftype is None: 19 | ftype = str(fname).lower().split('.')[-1] 20 | ftype.replace('hdf5', 'h5') 21 | ftype.replace('segy', 'sgy') 22 | if dtype is not None: 23 | sec = sec.copy() 24 | sec.data = sec.data.astype(dtype) 25 | if ftype == 'pkl': 26 | write_pkl(sec, fname) 27 | elif ftype == 'npy': 28 | np.save(fname, sec.data) 29 | else: 30 | fun_map[ftype](sec, fname, raw_fname=raw_fname) 31 | return None 32 | 33 | 34 | def write_pkl(sec, fname): 35 | with open(fname, 'wb') as f: 36 | pickle.dump(sec.__dict__, f) 37 | return None 38 | 39 | 40 | def _write_tdms(sec, fname, raw_fname=None): 41 | if raw_fname is None: 42 | key = 'Measurement' 43 | file_prop = {} 44 | group_prop = {} 45 | else: 46 | original_file = TdmsFile(raw_fname) 47 | group_name = [group.name for group in original_file.groups()] 48 | if 'Measurement' in group_name: 49 | key = 'Measurement' 50 | elif 'DAS' in group_name: 51 | key = 'DAS' 52 | else: 53 | key = group_name[0] 54 | file_prop = original_file.properties 55 | group_prop = original_file[key].properties 56 | 57 | if 'Spatial Resolution' in group_prop.keys(): 58 | group_prop['Spatial Resolution'] = sec.dx 59 | else: 60 | file_prop['SpatialResolution[m]'] = sec.dx 61 | 62 | if 'Time Base' in group_prop.keys(): 63 | group_prop['Time Base'] = 1. / sec.fs 64 | else: 65 | file_prop['SamplingFrequency[Hz]'] = sec.fs 66 | 67 | if 'Total Channels' in group_prop.keys(): 68 | group_prop['Total Channels'] = sec.nch 69 | 70 | if 'Initial Channel' in group_prop.keys(): 71 | group_prop['Initial Channel'] = sec.start_channel 72 | 73 | file_prop['Start Distance (m)'] = sec.start_distance 74 | if isinstance(sec.start_time, datetime): 75 | start_time = sec.start_time 76 | else: 77 | start_time = datetime.fromtimestamp(sec.start_time) 78 | 79 | if raw_fname is None: 80 | file_prop['ISO8601 Timestamp'] = start_time.strftime( 81 | '%Y-%m-%dT%H:%M:%S.%f%z') 82 | group_prop['Trigger Time'] = np.datetime64(start_time.remove_tz()) 83 | else: 84 | if 'ISO8601 Timestamp' in file_prop.keys(): 85 | file_prop['ISO8601 Timestamp'] = start_time.strftime( 86 | '%Y-%m-%dT%H:%M:%S.%f%z') 87 | else: 88 | for s in ['GPSTimeStamp', 'CPUTimeStamp', 'Trigger Time']: 89 | if s in group_prop.keys(): 90 | group_prop[s] = np.datetime64(start_time.remove_tz()) 91 | break 92 | 93 | if hasattr(sec, 'gauge_length'): 94 | file_prop['GaugeLength'] = sec.gauge_length 95 | 96 | with TdmsWriter(fname) as tdms_file: 97 | root_object = RootObject(file_prop) 98 | group_object = GroupObject(key, properties=group_prop) 99 | if raw_fname and len(original_file[key]) == 1: 100 | channel = ChannelObject(key, original_file[key].channels()[0].name, 101 | sec.data.T.flatten(), properties={}) 102 | tdms_file.write_segment([root_object, group_object, channel]) 103 | else: 104 | channel_list = [] 105 | for ch, d in enumerate(sec.data): 106 | channel_list.append(ChannelObject(key, 107 | str(ch + sec.start_channel), 108 | d, properties={})) 109 | 110 | tdms_file.write_segment([root_object, group_object] + channel_list) 111 | return None 112 | 113 | 114 | def _update_h5_dataset(h5_file, path, name, data): 115 | attrs = h5_file[path + name].attrs 116 | del h5_file[path + name] 117 | h5_file.get(path).create_dataset(name, data=data) 118 | for key, value in attrs.items(): 119 | h5_file[path + name].attrs[key] = value 120 | return None 121 | 122 | 123 | def _write_h5(sec, fname, raw_fname=None): 124 | if raw_fname is None: 125 | with h5py.File(fname, 'w') as h5_file: 126 | h5_file.create_group('Acquisition/Raw[0]') 127 | h5_file.get('Acquisition/Raw[0]/').\ 128 | create_dataset('RawData', data=sec.data) 129 | if isinstance(sec.start_time, datetime): 130 | h5_file['Acquisition/Raw[0]/RawData'].attrs['PartStartTime'] = \ 131 | np.bytes_( 132 | sec.start_time.strftime('%Y-%m-%dT%H:%M:%S.%f%z')) 133 | stime = sec.start_time.timestamp() * 1e6 134 | DataTime = np.arange( 135 | stime, stime + sec.nt / sec.fs, 1 / sec.fs) 136 | else: 137 | h5_file['Acquisition/Raw[0]/RawData'].attrs['PartStartTime'] = \ 138 | np.bytes_(str(sec.start_time)) 139 | DataTime = sec.start_time + np.arange(0, sec.nt / sec.fs, 140 | 1 / sec.fs) 141 | 142 | h5_file.get('Acquisition/Raw[0]/').\ 143 | create_dataset('RawDataTime', data=DataTime) 144 | h5_file['Acquisition/Raw[0]'].attrs['OutputDataRate'] = sec.fs 145 | h5_file['Acquisition'].attrs['SpatialSamplingInterval'] = sec.dx 146 | if hasattr(sec, 'gauge_length'): 147 | h5_file['Acquisition'].attrs['GaugeLength'] = sec.gauge_length 148 | else: 149 | h5_file['Acquisition'].attrs['GaugeLength'] = np.nan 150 | else: 151 | if not os.path.exists(fname) or not os.path.samefile(raw_fname, fname): 152 | copyfile(raw_fname, fname) 153 | with h5py.File(fname, 'r+') as h5_file: 154 | group = list(h5_file.keys())[0] 155 | if len(h5_file.keys()) == 10: 156 | if h5_file['header/dimensionNames'][0] == b'time': 157 | _update_h5_dataset(h5_file, '/', 'data', sec.data.T) 158 | elif h5_file['header/dimensionNames'][0] == b'distance': 159 | _update_h5_dataset(h5_file, '/', 'data', sec.data) 160 | 161 | _update_h5_dataset(h5_file, 'header', 'dx', sec.dx) 162 | _update_h5_dataset(h5_file, 'header', 'dt', 1 / sec.fs) 163 | if isinstance(sec.start_time, datetime): 164 | _update_h5_dataset(h5_file, 'header', 'time', 165 | sec.start_time.timestamp()) 166 | else: 167 | _update_h5_dataset(h5_file, 'header', 'time', 168 | sec.start_time) 169 | if hasattr(sec, 'gauge_length'): 170 | _update_h5_dataset(h5_file, '/', 'gaugeLength', 171 | sec.gauge_length) 172 | if hasattr(sec, 'scale'): 173 | _update_h5_dataset(h5_file, '/', 'dataScale', sec.scale) 174 | elif len(h5_file.keys()) == 5: 175 | _update_h5_dataset(h5_file, '/', 'strain', sec.data.T) 176 | _update_h5_dataset(h5_file, '/', 'spatialsampling', sec.dx) 177 | _update_h5_dataset(h5_file, '/', 'RepetitionFrequency', sec.fs) 178 | if hasattr(sec, 'gauge_length'): 179 | _update_h5_dataset(h5_file, '/', 'GaugeLength', 180 | sec.gauge_length) 181 | elif len(h5_file.keys()) == 3: 182 | _update_h5_dataset(h5_file, '/', 'data', sec.data) 183 | _update_h5_dataset(h5_file, '/', 'x_axis', 184 | sec.start_distance + np.arange(sec.nch) * sec.dx) 185 | _update_h5_dataset(h5_file, '/', 't_axis', 186 | sec.start_time + np.arange(sec.nt) * sec.dt) 187 | elif group == 'Acquisition': 188 | h5_file['Acquisition'].attrs['NumberOfLoci'] = sec.nch 189 | _update_h5_dataset(h5_file, 'Acquisition/Raw[0]/', 'RawData', 190 | sec.data) 191 | if isinstance(sec.start_time, datetime): 192 | if isinstance(h5_file['Acquisition/Raw[0]/RawData']. 193 | attrs['PartStartTime'], bytes): 194 | h5_file['Acquisition/Raw[0]/RawData'].\ 195 | attrs['PartStartTime'] = np.bytes_( 196 | sec.start_time.strftime('%Y-%m-%dT%H:%M:%S.%f%z')) 197 | else: 198 | h5_file['Acquisition/Raw[0]/RawData'].\ 199 | attrs['PartStartTime'] = sec.start_time.strftime( 200 | '%Y-%m-%dT%H:%M:%S.%f%z') 201 | stime = sec.start_time.timestamp() * 1e6 202 | DataTime = np.arange( 203 | stime, stime + sec.nt / sec.fs, 1 / sec.fs) 204 | else: 205 | h5_file['Acquisition/Raw[0]/RawData'].\ 206 | attrs['PartStartTime'] = np.bytes_(str(sec.start_time)) 207 | DataTime = sec.start_time + np.arange(0, sec.nt / sec.fs, 208 | 1 / sec.fs) 209 | _update_h5_dataset(h5_file, 'Acquisition/Raw[0]/', 210 | 'RawDataTime', DataTime) 211 | h5_file['Acquisition/Raw[0]'].attrs['OutputDataRate'] = sec.fs 212 | h5_file['Acquisition'].attrs['SpatialSamplingInterval'] = sec.dx 213 | if hasattr(sec, 'gauge_length'): 214 | h5_file['Acquisition'].attrs['GaugeLength'] = \ 215 | sec.gauge_length 216 | elif group == 'raw': 217 | _update_h5_dataset(h5_file, '/', 'raw', sec.data) 218 | DataTime = sec.start_time.timestamp() + \ 219 | np.arange(0, sec.nt / sec.fs, 1 / sec.fs) 220 | _update_h5_dataset(h5_file, '/', 'timestamp', DataTime) 221 | elif group == 'data': # https://ai4eps.github.io/homepage/ml4earth/seismic_event_format_das/ 222 | _update_h5_dataset(h5_file, '/', 'data', sec.data) 223 | h5_file['data'].attrs['dx_m'] = sec.dx 224 | h5_file['data'].attrs['dt_s'] = 1 / sec.fs 225 | h5_file['data'].attrs['begin_time'] = \ 226 | datetime.strftime(sec.start_time, '%Y-%m-%dT%H:%M:%S.%f%z') 227 | h5_file['data'].attrs['unit'] = sec.data_type 228 | elif group == 'data_product': 229 | _update_h5_dataset(h5_file, 'data_product/', 'data', sec.data) 230 | h5_file.attrs['dt_computer'] = 1 / sec.fs 231 | h5_file.attrs['dx'] = sec.dx 232 | h5_file.attrs['gauge_length'] = sec.gauge_length 233 | DataTime = sec.start_time.timestamp() + \ 234 | np.arange(0, sec.nt / sec.fs, 1 / sec.fs) 235 | if h5_file.attrs['saving_start_gps_time'] > 0: 236 | h5_file.attrs['file_start_gps_time'] = \ 237 | sec.start_time.timestamp() 238 | _update_h5_dataset(h5_file, 'data_product/', 'gps_time', 239 | DataTime) 240 | del h5_file['data_product/posix_time'] 241 | else: 242 | h5_file.attrs['file_start_computer_time'] = \ 243 | sec.start_time.timestamp() 244 | _update_h5_dataset(h5_file, 'data_product/', 'posix_time', 245 | DataTime) 246 | del h5_file['data_product/gps_time'] 247 | h5_file.attrs['data_product'] = sec.data_type 248 | else: 249 | acquisition = list(h5_file[f'{group}/Source1/Zone1'].keys())[0] 250 | data = sec.data 251 | fs = int(sec.fs) 252 | d = len(h5_file[f'{group}/Source1/Zone1/{acquisition}'].shape) 253 | if d == 3: 254 | mod = sec.nt % fs 255 | if mod: 256 | data = np.hstack((data, np.zeros((sec.nch, fs - mod)))) 257 | data = data.reshape((sec.nch, fs, sec.nt//fs)).T 258 | elif d == 2: 259 | data = data.T 260 | _update_h5_dataset(h5_file, f'{group}/Source1/Zone1/', 261 | acquisition, data) 262 | 263 | h5_file[f'{group}/Source1/Zone1'].attrs['Spacing'][0] = sec.dx 264 | h5_file[f'{group}/Source1/Zone1'].attrs['FreqRes'] = \ 265 | np.bytes_(sec.fs) 266 | h5_file[f'{group}/Source1/Zone1'].attrs['SamplingRate'][0] = \ 267 | sec.fs 268 | h5_file[f'{group}/Source1/Zone1'].attrs['Extent'][0] = \ 269 | sec.start_channel 270 | h5_file[f'{group}/Source1/Zone1'].attrs['Origin'][0] = \ 271 | sec.start_distance 272 | h5_file[f'{group}/Source1/Zone1'].attrs['GaugeLength'][0] = \ 273 | sec.gauge_length 274 | DataTime = sec.start_time.timestamp() + \ 275 | np.arange(0, sec.nt / sec.fs, 1 / sec.fs) 276 | _update_h5_dataset(h5_file, f'{group}/Source1/', 277 | 'time', DataTime.reshape((1, -1))) 278 | 279 | return None 280 | 281 | 282 | def _write_segy(sec, fname, raw_fname=None): 283 | spec = segyio.spec() 284 | spec.samples = np.arange(sec.nt) / sec.fs * 1e3 285 | spec.tracecount = sec.nch 286 | if raw_fname is None: 287 | spec.format = 1 288 | with segyio.create(fname, spec) as new_file: 289 | new_file.header.length = sec.nch 290 | new_file.header.segy._filename = fname 291 | new_file.trace = sec.data # .astype(np.float32) 292 | else: 293 | with segyio.open(raw_fname, ignore_geometry=True) as raw_file: 294 | spec.sorting = raw_file.sorting 295 | spec.format = raw_file.format 296 | raw_file.header.length = sec.nch 297 | raw_file.header.segy._filename = fname 298 | with segyio.create(fname, spec) as new_file: 299 | new_file.text[0] = raw_file.text[0] 300 | new_file.header = raw_file.header 301 | new_file.trace = sec.data.astype(raw_file.trace.dtype) 302 | 303 | warnings.warn('This data format doesn\'t include channel interval.') 304 | return None 305 | -------------------------------------------------------------------------------- /document/Ridgecrest_traffic_noise.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HMZ-03/DASPy/9f308c93d7ad8f4e572705827b03c5d0fec3eac2/document/Ridgecrest_traffic_noise.mat -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | 4 | setup( 5 | name='DASPy-toolbox', version='1.1.6', 6 | description=( 7 | 'DASPy is an open-source project dedicated to provide a python package ' 8 | 'for DAS (Distributed Acoustic Sensing) data processing, which ' 9 | 'comprises classic seismic data processing techniques and Specialized ' 10 | 'algorithms for DAS applications.' 11 | ), 12 | long_description=open('README.md').read(), 13 | author='Minzhe Hu, Zefeng Li', 14 | author_email='hmz2018@mail.ustc.edu.cn', 15 | maintainer='Minzhe Hu', 16 | maintainer_email='hmz2018@mail.ustc.edu.cn', 17 | license='MIT License', 18 | url='https://github.com/HMZ-03/DASPy', 19 | packages=find_packages(), 20 | entry_points={ 21 | 'console_scripts': [ 22 | 'daspy = daspy.main:main', 23 | ] 24 | }, 25 | include_package_data=True, 26 | package_data={ 27 | 'daspy': ['core/example.pkl'] 28 | }, 29 | classifiers=[ 30 | 'Operating System :: OS Independent', 31 | 'License :: OSI Approved :: MIT License', 32 | 'Programming Language :: Python :: 3' 33 | ], 34 | python_requires='>=3.9', 35 | install_requires=[ 36 | 'numpy', 37 | 'scipy>=1.13', 38 | 'matplotlib', 39 | 'geographiclib', 40 | 'pyproj', 41 | 'h5py', 42 | 'segyio', 43 | 'nptdms', 44 | 'tqdm' 45 | ] 46 | ) 47 | -------------------------------------------------------------------------------- /website/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HMZ-03/DASPy/9f308c93d7ad8f4e572705827b03c5d0fec3eac2/website/logo.png -------------------------------------------------------------------------------- /website/waveform.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HMZ-03/DASPy/9f308c93d7ad8f4e572705827b03c5d0fec3eac2/website/waveform.png --------------------------------------------------------------------------------