├── .dockerignore ├── .gitignore ├── .gitmodules ├── LICENSE.md ├── README.md ├── datasets ├── first_break_picking.ipynb ├── ground-roll_attenuation.ipynb └── spherical_divergence_correction.ipynb ├── docker_containers ├── picking_docker │ ├── README.md │ ├── build │ │ └── Dockerfile │ ├── data │ │ └── .gitignore │ ├── picking_inference.py │ └── run │ │ └── run.sh └── utils │ └── install_docker.sh ├── models ├── First_break_picking │ ├── 1d_CNN │ │ ├── model_description.ipynb │ │ └── research.ipynb │ ├── Coppen's_unsupervised_method │ │ └── model_description.ipynb │ └── Hidden_Markov_model │ │ └── model_description.ipynb ├── Ground-roll_attenuation │ ├── Attention_model │ │ └── model_description.ipynb │ └── Unet_1D_model │ │ ├── model_description.ipynb │ │ ├── model_estimation.ipynb │ │ └── parameters_estimation.ipynb └── Spherical_divergence_correction │ └── model_description.ipynb ├── pylintrc ├── requirements.txt ├── seismicpro ├── __init__.py ├── models │ ├── __init__.py │ ├── hmm_model.py │ ├── metrics.py │ └── unet_attention.py └── src │ ├── __init__.py │ ├── file_utils.py │ ├── plot_utils.py │ ├── seismic_batch.py │ ├── seismic_dataset.py │ ├── seismic_index.py │ └── utils.py ├── setup.py ├── shippable.yml └── tutorials ├── 1.Index.ipynb ├── 2.Batch.ipynb ├── 3.Dataset.ipynb ├── 4.Models.ipynb └── 5. Preprocessing.ipynb /.dockerignore: -------------------------------------------------------------------------------- 1 | .git 2 | .dockerignore 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | .cache/ 3 | __pycache__/ 4 | .ipynb_checkpoints/ 5 | notebooks/ 6 | demo/ 7 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "seismicpro/batchflow"] 2 | path = seismicpro/batchflow 3 | url = https://github.com/analysiscenter/batchflow.git 4 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | ## Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International Public License 2 | 3 | By exercising the Licensed Rights (defined below), You accept and agree to be bound by the terms and conditions of this Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International Public License ("Public License"). To the extent this Public License may be interpreted as a contract, You are granted the Licensed Rights in consideration of Your acceptance of these terms and conditions, and the Licensor grants You such rights in consideration of benefits the Licensor receives from making the Licensed Material available under these terms and conditions. 4 | 5 | ### Section 1 – Definitions. 6 | 7 | a. __Adapted Material__ means material subject to Copyright and Similar Rights that is derived from or based upon the Licensed Material and in which the Licensed Material is translated, altered, arranged, transformed, or otherwise modified in a manner requiring permission under the Copyright and Similar Rights held by the Licensor. For purposes of this Public License, where the Licensed Material is a musical work, performance, or sound recording, Adapted Material is always produced where the Licensed Material is synched in timed relation with a moving image. 8 | 9 | b. __Adapter's License__ means the license You apply to Your Copyright and Similar Rights in Your contributions to Adapted Material in accordance with the terms and conditions of this Public License. 10 | 11 | c. __BY-NC-SA Compatible License__ means a license listed at [creativecommons.org/compatiblelicenses](http://creativecommons.org/compatiblelicenses), approved by Creative Commons as essentially the equivalent of this Public License. 12 | 13 | d. __Copyright and Similar Rights__ means copyright and/or similar rights closely related to copyright including, without limitation, performance, broadcast, sound recording, and Sui Generis Database Rights, without regard to how the rights are labeled or categorized. For purposes of this Public License, the rights specified in Section 2(b)(1)-(2) are not Copyright and Similar Rights. 14 | 15 | e. __Effective Technological Measures__ means those measures that, in the absence of proper authority, may not be circumvented under laws fulfilling obligations under Article 11 of the WIPO Copyright Treaty adopted on December 20, 1996, and/or similar international agreements. 16 | 17 | f. __Exceptions and Limitations__ means fair use, fair dealing, and/or any other exception or limitation to Copyright and Similar Rights that applies to Your use of the Licensed Material. 18 | 19 | g. __License Elements__ means the license attributes listed in the name of a Creative Commons Public License. The License Elements of this Public License are Attribution, NonCommercial, and ShareAlike. 20 | 21 | h. __Licensed Material__ means the artistic or literary work, database, or other material to which the Licensor applied this Public License. 22 | 23 | i. __Licensed Rights__ means the rights granted to You subject to the terms and conditions of this Public License, which are limited to all Copyright and Similar Rights that apply to Your use of the Licensed Material and that the Licensor has authority to license. 24 | 25 | j. __Licensor__ means the individual(s) or entity(ies) granting rights under this Public License. 26 | 27 | k. __NonCommercial__ means not primarily intended for or directed towards commercial advantage or monetary compensation. For purposes of this Public License, the exchange of the Licensed Material for other material subject to Copyright and Similar Rights by digital file-sharing or similar means is NonCommercial provided there is no payment of monetary compensation in connection with the exchange. 28 | 29 | l. __Share__ means to provide material to the public by any means or process that requires permission under the Licensed Rights, such as reproduction, public display, public performance, distribution, dissemination, communication, or importation, and to make material available to the public including in ways that members of the public may access the material from a place and at a time individually chosen by them. 30 | 31 | m. __Sui Generis Database Rights__ means rights other than copyright resulting from Directive 96/9/EC of the European Parliament and of the Council of 11 March 1996 on the legal protection of databases, as amended and/or succeeded, as well as other essentially equivalent rights anywhere in the world. 32 | 33 | n. __You__ means the individual or entity exercising the Licensed Rights under this Public License. Your has a corresponding meaning. 34 | 35 | ### Section 2 – Scope. 36 | 37 | a. ___License grant.___ 38 | 39 | 1. Subject to the terms and conditions of this Public License, the Licensor hereby grants You a worldwide, royalty-free, non-sublicensable, non-exclusive, irrevocable license to exercise the Licensed Rights in the Licensed Material to: 40 | 41 | A. reproduce and Share the Licensed Material, in whole or in part, for NonCommercial purposes only; and 42 | 43 | B. produce, reproduce, and Share Adapted Material for NonCommercial purposes only. 44 | 45 | 2. __Exceptions and Limitations.__ For the avoidance of doubt, where Exceptions and Limitations apply to Your use, this Public License does not apply, and You do not need to comply with its terms and conditions. 46 | 47 | 3. __Term.__ The term of this Public License is specified in Section 6(a). 48 | 49 | 4. __Media and formats; technical modifications allowed.__ The Licensor authorizes You to exercise the Licensed Rights in all media and formats whether now known or hereafter created, and to make technical modifications necessary to do so. The Licensor waives and/or agrees not to assert any right or authority to forbid You from making technical modifications necessary to exercise the Licensed Rights, including technical modifications necessary to circumvent Effective Technological Measures. For purposes of this Public License, simply making modifications authorized by this Section 2(a)(4) never produces Adapted Material. 50 | 51 | 5. __Downstream recipients.__ 52 | 53 | A. __Offer from the Licensor – Licensed Material.__ Every recipient of the Licensed Material automatically receives an offer from the Licensor to exercise the Licensed Rights under the terms and conditions of this Public License. 54 | 55 | B. __Additional offer from the Licensor – Adapted Material.__ Every recipient of Adapted Material from You automatically receives an offer from the Licensor to exercise the Licensed Rights in the Adapted Material under the conditions of the Adapter’s License You apply. 56 | 57 | C. __No downstream restrictions.__ You may not offer or impose any additional or different terms or conditions on, or apply any Effective Technological Measures to, the Licensed Material if doing so restricts exercise of the Licensed Rights by any recipient of the Licensed Material. 58 | 59 | 6. __No endorsement.__ Nothing in this Public License constitutes or may be construed as permission to assert or imply that You are, or that Your use of the Licensed Material is, connected with, or sponsored, endorsed, or granted official status by, the Licensor or others designated to receive attribution as provided in Section 3(a)(1)(A)(i). 60 | 61 | b. ___Other rights.___ 62 | 63 | 1. Moral rights, such as the right of integrity, are not licensed under this Public License, nor are publicity, privacy, and/or other similar personality rights; however, to the extent possible, the Licensor waives and/or agrees not to assert any such rights held by the Licensor to the limited extent necessary to allow You to exercise the Licensed Rights, but not otherwise. 64 | 65 | 2. Patent and trademark rights are not licensed under this Public License. 66 | 67 | 3. To the extent possible, the Licensor waives any right to collect royalties from You for the exercise of the Licensed Rights, whether directly or through a collecting society under any voluntary or waivable statutory or compulsory licensing scheme. In all other cases the Licensor expressly reserves any right to collect such royalties, including when the Licensed Material is used other than for NonCommercial purposes. 68 | 69 | ### Section 3 – License Conditions. 70 | 71 | Your exercise of the Licensed Rights is expressly made subject to the following conditions. 72 | 73 | a. ___Attribution.___ 74 | 75 | 1. If You Share the Licensed Material (including in modified form), You must: 76 | 77 | A. retain the following if it is supplied by the Licensor with the Licensed Material: 78 | 79 | i. identification of the creator(s) of the Licensed Material and any others designated to receive attribution, in any reasonable manner requested by the Licensor (including by pseudonym if designated); 80 | 81 | ii. a copyright notice; 82 | 83 | iii. a notice that refers to this Public License; 84 | 85 | iv. a notice that refers to the disclaimer of warranties; 86 | 87 | v. a URI or hyperlink to the Licensed Material to the extent reasonably practicable; 88 | 89 | B. indicate if You modified the Licensed Material and retain an indication of any previous modifications; and 90 | 91 | C. indicate the Licensed Material is licensed under this Public License, and include the text of, or the URI or hyperlink to, this Public License. 92 | 93 | 2. You may satisfy the conditions in Section 3(a)(1) in any reasonable manner based on the medium, means, and context in which You Share the Licensed Material. For example, it may be reasonable to satisfy the conditions by providing a URI or hyperlink to a resource that includes the required information. 94 | 95 | 3. If requested by the Licensor, You must remove any of the information required by Section 3(a)(1)(A) to the extent reasonably practicable. 96 | 97 | b. ___ShareAlike.___ 98 | 99 | In addition to the conditions in Section 3(a), if You Share Adapted Material You produce, the following conditions also apply. 100 | 101 | 1. The Adapter’s License You apply must be a Creative Commons license with the same License Elements, this version or later, or a BY-NC-SA Compatible License. 102 | 103 | 2. You must include the text of, or the URI or hyperlink to, the Adapter's License You apply. You may satisfy this condition in any reasonable manner based on the medium, means, and context in which You Share Adapted Material. 104 | 105 | 3. You may not offer or impose any additional or different terms or conditions on, or apply any Effective Technological Measures to, Adapted Material that restrict exercise of the rights granted under the Adapter's License You apply. 106 | 107 | ### Section 4 – Sui Generis Database Rights. 108 | 109 | Where the Licensed Rights include Sui Generis Database Rights that apply to Your use of the Licensed Material: 110 | 111 | a. for the avoidance of doubt, Section 2(a)(1) grants You the right to extract, reuse, reproduce, and Share all or a substantial portion of the contents of the database for NonCommercial purposes only; 112 | 113 | b. if You include all or a substantial portion of the database contents in a database in which You have Sui Generis Database Rights, then the database in which You have Sui Generis Database Rights (but not its individual contents) is Adapted Material, including for purposes of Section 3(b); and 114 | 115 | c. You must comply with the conditions in Section 3(a) if You Share all or a substantial portion of the contents of the database. 116 | 117 | For the avoidance of doubt, this Section 4 supplements and does not replace Your obligations under this Public License where the Licensed Rights include other Copyright and Similar Rights. 118 | 119 | ### Section 5 – Disclaimer of Warranties and Limitation of Liability. 120 | 121 | a. __Unless otherwise separately undertaken by the Licensor, to the extent possible, the Licensor offers the Licensed Material as-is and as-available, and makes no representations or warranties of any kind concerning the Licensed Material, whether express, implied, statutory, or other. This includes, without limitation, warranties of title, merchantability, fitness for a particular purpose, non-infringement, absence of latent or other defects, accuracy, or the presence or absence of errors, whether or not known or discoverable. Where disclaimers of warranties are not allowed in full or in part, this disclaimer may not apply to You.__ 122 | 123 | b. __To the extent possible, in no event will the Licensor be liable to You on any legal theory (including, without limitation, negligence) or otherwise for any direct, special, indirect, incidental, consequential, punitive, exemplary, or other losses, costs, expenses, or damages arising out of this Public License or use of the Licensed Material, even if the Licensor has been advised of the possibility of such losses, costs, expenses, or damages. Where a limitation of liability is not allowed in full or in part, this limitation may not apply to You.__ 124 | 125 | c. The disclaimer of warranties and limitation of liability provided above shall be interpreted in a manner that, to the extent possible, most closely approximates an absolute disclaimer and waiver of all liability. 126 | 127 | ### Section 6 – Term and Termination. 128 | 129 | a. This Public License applies for the term of the Copyright and Similar Rights licensed here. However, if You fail to comply with this Public License, then Your rights under this Public License terminate automatically. 130 | 131 | b. Where Your right to use the Licensed Material has terminated under Section 6(a), it reinstates: 132 | 133 | 1. automatically as of the date the violation is cured, provided it is cured within 30 days of Your discovery of the violation; or 134 | 135 | 2. upon express reinstatement by the Licensor. 136 | 137 | For the avoidance of doubt, this Section 6(b) does not affect any right the Licensor may have to seek remedies for Your violations of this Public License. 138 | 139 | c. For the avoidance of doubt, the Licensor may also offer the Licensed Material under separate terms or conditions or stop distributing the Licensed Material at any time; however, doing so will not terminate this Public License. 140 | 141 | d. Sections 1, 5, 6, 7, and 8 survive termination of this Public License. 142 | 143 | ### Section 7 – Other Terms and Conditions. 144 | 145 | a. The Licensor shall not be bound by any additional or different terms or conditions communicated by You unless expressly agreed. 146 | 147 | b. Any arrangements, understandings, or agreements regarding the Licensed Material not stated herein are separate from and independent of the terms and conditions of this Public License. 148 | 149 | ### Section 8 – Interpretation. 150 | 151 | a. For the avoidance of doubt, this Public License does not, and shall not be interpreted to, reduce, limit, restrict, or impose conditions on any use of the Licensed Material that could lawfully be made without permission under this Public License. 152 | 153 | b. To the extent possible, if any provision of this Public License is deemed unenforceable, it shall be automatically reformed to the minimum extent necessary to make it enforceable. If the provision cannot be reformed, it shall be severed from this Public License without affecting the enforceability of the remaining terms and conditions. 154 | 155 | c. No term or condition of this Public License will be waived and no failure to comply consented to unless expressly agreed to by the Licensor. 156 | 157 | d. Nothing in this Public License constitutes or may be interpreted as a limitation upon, or waiver of, any privileges and immunities that apply to the Licensor or You, including from the legal processes of any jurisdiction or authority. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![License: CC BY-NC-SA 4.0](https://img.shields.io/badge/License-CC%20BY--NC--SA%204.0-lightgrey.svg)](https://creativecommons.org/licenses/by-nc-sa/4.0/) 2 | [![Python](https://img.shields.io/badge/python-3.5-blue.svg)](https://python.org) 3 | [![Run Status](https://api.shippable.com/projects/5d5e601aed1bf40007051a93/badge?branch=master)](https://app.shippable.com/github/gazprom-neft/SeismicPro) 4 | 5 | # SeismicPro 6 | 7 | Machine learning for field seismic data processing. 8 | 9 | Content 10 | ================= 11 | 12 | * [About](#About) 13 | * [Installation](#Installation) 14 | * [Tutorials](#Tutorials) 15 | * [File formats](#File-formats) 16 | * [Seismic data](#Seismic-data) 17 | * [SPS data](#SPS-data) 18 | * [Picking data](#Picking-data) 19 | * [Datasets](#Datasets) 20 | * [Models](#Models) 21 | * [Installation](#Installation) 22 | * [Literature](#Literature) 23 | * [Citing](#Citing-SeismicPro) 24 | 25 | ## About 26 | 27 | SeismicPro provides a framework for machine learning on field seismic data. 28 | 29 | ## Installation 30 | 31 | ``` 32 | git clone --recursive https://github.com/gazprom-neft/SeismicPro.git 33 | ``` 34 | ## Tutorials 35 | 36 | A set of IPython Notebooks introduces step-by-step the SeismicPro framework: 37 | 38 | 1. [Index](tutorials/1.Index.ipynb) explains how to index data with respect to traces, field records, shot points etc. 39 | 2. [Batch](tutorials/2.Batch.ipynb) shows how to load data, perform various actions with seismic traces and visualize them. 40 | 3. [Dataset](tutorials/3.Dataset.ipynb) describes how to calculate some parameters for all dataset. 41 | 4. [Models](tutorials/4.Models.ipynb) notebook shows how to build and run pipelines for model training, inference and evaluation with respect to ground-roll noise attenuation problem. 42 | 43 | ## File formats 44 | 45 | ### Seismic data 46 | 47 | Seismic data are expected to be in SEG-Y format. 48 | 49 | ### SPS data 50 | 51 | SPS data are expected as R, S, X text files in csv (comma-separated-values) format with required and optional headers: 52 | * Required R file headers: **rline**, **rid**, **x**, **y**, **z**. 53 | * Required S file headers: **sline**, **sid**, **x**, **y**, **z**. 54 | * Required X file headers: **FieldRecord**, **sline**, **sid**, **from_channel**, **to_channel**, **from_recaiver**, **to_receiver**. 55 | 56 | ### Picking data 57 | 58 | File with first-break picking data is expected to be in csv (comma-separated-values) format with columns **FieldRecord**, **TraceNumber**, **FIRST_BREAK_TIME**. 59 | 60 | ## Datasets 61 | 62 | |Problem|Number of datasets|Number of fields| 63 | |---|---|---| 64 | |[Ground-roll attenuation](datasets/ground-roll_attenuation.ipynb)| 3 | 551, 991, 628 65 | |[First-break picking](datasets/first_break_picking.ipynb)| 3 | 1001, 1001, 460 66 | |[Spherical divergence correction](datasets/spherical_divergence_correction.ipynb) | 1 | 10 67 | 68 | ## Models 69 | 70 | |Model|Architecture|Metrics| 71 | |---|---|---| 72 | |[Ground-roll attenuation](models/Ground-roll_attenuation/Unet_1D_model/model_description.ipynb)| U-Net 1D| 0.004 MAE for dataset 1 73 | |[Ground-roll attenuation](models/Ground-roll_attenuation/Attention_model/model_description.ipynb)| U-Net Attention 1D| 0.007 MAE for dataset 1 74 | |[First-break picking](models/First_break_picking/1d_CNN/model_description.ipynb)| U-Net 1D | 0.06 MAE for dataset 1
0.7 MAE for dataset 2
15.9 MAE for dataset 3 75 | |[First-break picking](models/First_break_picking/Coppen's_unsupervised_method/model_description.ipynb)| Coppen's analytical method | 7.57 MAE for dataset 1
7.19 MAE for dataset 2
12.6 MAE for dataset 3 76 | |[First-break picking](models/First_break_picking/Hidden_Markov_model/model_description.ipynb)| Hidden Markov model | 2.6 MAE for dataset 1
23.4 MAE for dataset 2
8.0 MAE for dataset 3 77 | |[Spherical divergence correction](models/Spherical_divergence_correction/model_description.ipynb) | Time and speed based method | 0.0017 Derivative metric 78 | 79 | ## Installation 80 | 81 | > `SeismicPro` module is in the beta stage. Your suggestions and improvements are very welcome. 82 | 83 | > `SeismicPro` supports python 3.5 or higher. 84 | 85 | ### Installation as a python package 86 | 87 | With [pipenv](https://docs.pipenv.org/): 88 | 89 | pipenv install git+https://github.com/gazprom-neft/SeismicPro.git#egg=SeismicPro 90 | 91 | With [pip](https://pip.pypa.io/en/stable/): 92 | 93 | pip3 install git+https://github.com/gazprom-neft/SeismicPro.git 94 | 95 | After that just import `seismicpro`: 96 | ```python 97 | import seismicpro 98 | ``` 99 | 100 | ### Installation as a project repository 101 | 102 | When cloning repo from GitHub use flag ``--recursive`` to make sure that ``batchflow`` submodule is also cloned. 103 | 104 | git clone --recursive https://github.com/gazprom-neft/SeismicPro.git 105 | 106 | ## Literature 107 | 108 | Some articles related to seismic data processing: 109 | * [Deep learning tutorial for denoising](https://arxiv.org/pdf/1810.11614.pdf) 110 | * [Seismic images construction](http://lserv.deg.gubkin.ru/file.php?file=../../1/dfwikidata/Voskresenskij.JU.N.Postroenie.sejsmicheskih.izobrazhenij.%28M,.RGUNG%29%282006%29%28T%29_GsPs_.pdf) 111 | * [Difraction](https://mospolytech.ru/storage/43ec517d68b6edd3015b3edc9a11367b/files/LRNo93.pdf) 112 | * [Automatic first-breaks picking: New strategies and algorithms](https://www.researchgate.net/publication/249866374_Automatic_first-breaks_picking_New_strategies_and_algorithms) 113 | 114 | ## Citing SeismicPro 115 | 116 | Please cite SeismicPro in your publications if it helps your research. 117 | 118 | Khudorozhkov R., Illarionov E., Broilovskiy A., Kalashnikov N., Podvyaznikov D. SeismicPro library for seismic data processing and ML models training and inference. 2019. 119 | 120 | ``` 121 | @misc{seismicpro_2019, 122 | author = {R. Khudorozhkov and E. Illarionov and A. Broilovskiy and N. Kalashnikov and D. Podvyaznikov}, 123 | title = {SeismicPro library for seismic data processing and ML models training and inference}, 124 | year = 2019 125 | } 126 | ``` 127 | -------------------------------------------------------------------------------- /docker_containers/picking_docker/README.md: -------------------------------------------------------------------------------- 1 | ## Docker container with python 3 environment without GPU support for First break picking model inference 2 | 3 | 4 | ## Docker 5 | To install Docker execute `../utils/install_docker.sh` 6 | 7 | 8 | ## Inference Image 9 | To build the image for inference execute 2 following commands. First, we need to move 2 levels up to reach the repository level, which we want to put in the image, so it would be in the context of the image build. Then we build the image. This is the feature of the **Docker** and done in security purposes. You also can specify image name. 10 | 11 | default image name: `fb_inference` 12 | 13 | `cd ../..` 14 | `docker build -t fb_inference -f docker_containers/picking_docker/build/Dockerfile .` 15 | 16 | Come back to the root picking_docker folder afterwards. 17 | 18 | `cd docker_containers/picking_docker` 19 | 20 | ## How to run inference script 21 | In order to run the container with inference script you need to specify some variables, see details bellow. 22 | 23 | ### DATA_DIR 24 | default: `docker_containers/picking_docker/data`. 25 | 26 | Directory in the host system where SEGY files, model and inference results would be stored. 27 | 28 | ### SEGY 29 | Specify the name of the SEGY file located in `DATA_DIR` folder for which picking is being predicted. 30 | 31 | ### MODEL 32 | default: `fb_model.dill` 33 | 34 | Specify the model name located in `DATA_DIR` folder. 35 | 36 | ### DUMP_TO 37 | default: `dump.csv` 38 | 39 | Specify the filename in the `DATA_DIR` folder where the results would be dumped. 40 | 41 | The format of the resulted csv file is `FFID TraceNumber Predictions` 42 | 43 | ### IMAGE 44 | default: `fb_inference` 45 | 46 | Docker image to run in the container. Specify the `image_name` you assigned to the container when building it. 47 | 48 | ### BATCH_SIZE 49 | default: `1000` 50 | 51 | The number of traces in the batch during inference stage. 52 | 53 | ### NUM_ZERO 54 | default: `500` 55 | 56 | Required number of zero values for the trace to contain to be dropped from the batch. 57 | 58 | ### TRACE_LEN 59 | default: `751` 60 | 61 | The number of first samples of the trace to load from SEGY. 62 | 63 | ### DEVICE 64 | default: `cpu` 65 | 66 | The device for inference stage. Can be 'cpu' or 'gpu' in case you have GPU device. 67 | 68 | ## Examples 69 | 70 | `DATA_DIR=/home/user/data SEGY=segy_name.sgy MODEL=fb_model_2d.dill run/run.sh` 71 | This command runs the inference script on the *home/user/data/segy_name.sgy* file using the *home/user/data/fb_model_2d.dill* model. Result will be stored in *home/user/data/dump.csv* 72 | -------------------------------------------------------------------------------- /docker_containers/picking_docker/build/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM analysiscenter1/ds-py3:cpu 2 | MAINTAINER Roman Kh 3 | 4 | COPY docker_containers/ /notebooks/SeismicPro/docker_containers 5 | COPY seismicpro/ /notebooks/SeismicPro/seismicpro 6 | 7 | WORKDIR /notebooks/SeismicPro/docker_containers/picking_docker 8 | 9 | ENTRYPOINT ["python3", "picking_inference.py"] 10 | 11 | -------------------------------------------------------------------------------- /docker_containers/picking_docker/data/.gitignore: -------------------------------------------------------------------------------- 1 | # Ignore everything in this directory 2 | * 3 | # Except this file 4 | !.gitignore 5 | -------------------------------------------------------------------------------- /docker_containers/picking_docker/picking_inference.py: -------------------------------------------------------------------------------- 1 | """ Inference script that takes segy file and First break picking model, 2 | predicts picking for traces and dump them to csv file. 3 | """ 4 | import os 5 | import sys 6 | import argparse 7 | 8 | import torch 9 | import numpy as np 10 | 11 | sys.path.append('../..') 12 | 13 | from seismicpro.batchflow import Dataset, B 14 | from seismicpro.batchflow.models.torch import UNet 15 | from seismicpro.src import FieldIndex, TraceIndex, SeismicDataset 16 | 17 | def make_prediction(): 18 | """ Read the model and data paths and run inference pipeline. 19 | """ 20 | parser = argparse.ArgumentParser() 21 | parser.add_argument('-p', '--path_raw', type=str, help="Path to SEGY file.", 22 | required=True) 23 | parser.add_argument('-m', '--path_model', type=str, help="Path to trained model.", 24 | required=True) 25 | parser.add_argument('-d', '--path_dump', type=str, help="Path to CSV file where \ 26 | the results would be stored.", default='dump.csv') 27 | parser.add_argument('-n', '--num_zero', type=int, help="Required number of zero \ 28 | values for the trace to contain to droped.", default=100) 29 | parser.add_argument('-bs', '--batch_size', type=int, help="The number of traces in \ 30 | the batch for inference stage.", default=1000) 31 | parser.add_argument('-ts', '--trace_len', type=int, help="The number of first samples \ 32 | of the trace to load.", default=751) 33 | parser.add_argument('-dvc', '--device', type=str or torch.device, help="The device for \ 34 | inference. Can be 'cpu' or 'gpu'.", default=torch.device('cpu')) 35 | args = parser.parse_args() 36 | path_raw = args.path_raw 37 | model = args.path_model 38 | save_to = args.path_dump 39 | num_zero = args.num_zero 40 | batch_size = args.batch_size 41 | trace_len = args.trace_len 42 | device = args.device 43 | predict(path_raw, model, num_zero, save_to, batch_size, trace_len, device) 44 | 45 | def predict(path_raw, path_model, num_zero, save_to, batch_size, trace_len, device): 46 | """Make predictions and dump results using loaded model and path to data. 47 | 48 | Parameters 49 | ---------- 50 | path_raw: str 51 | Path to SEGY file. 52 | path_model: str 53 | Path to the file with trained model. 54 | num_zero: int, default: 100 55 | Reauired number of zero values in a row in the trace to drop such trace. 56 | save_to: str, default: 'dump.csv' 57 | Path to CSV file where the results will be stored. 58 | bs: int, default: 1000 59 | The batch size for inference. 60 | trace_len: int, default: 1000 61 | The number of first samples in the trace to load to the pipeline. 62 | device: str or torch.device, default: 'cpu' 63 | The device used for inference. Can be 'gpu' in case of avaliavle GPU. 64 | 65 | """ 66 | data = SeismicDataset(TraceIndex(name='raw', path=path_raw)) 67 | 68 | config_predict = { 69 | 'build': False, 70 | 'load/path': path_model, 71 | 'device': device 72 | } 73 | 74 | try: 75 | os.remove(save_to) 76 | except OSError: 77 | pass 78 | 79 | test_pipeline = (data.p 80 | .init_model('dynamic', UNet, 'my_model', config=config_predict) 81 | .load(components='raw', fmt='segy', tslice=np.arange(trace_len)) 82 | .drop_zero_traces(num_zero=num_zero, src='raw') 83 | .standardize(src='raw', dst='raw') 84 | .add_components(components='predictions') 85 | .apply_transform_all(src='raw', dst='raw', func=lambda x: np.stack(x)) 86 | .predict_model('my_model', B('raw'), fetches='predictions', 87 | save_to=B('predictions', mode='a')) 88 | .mask_to_pick(src='predictions', dst='predictions', labels=False) 89 | .dump(src='predictions', fmt='picks', path=save_to, 90 | traces='raw', to_samples=True)) 91 | 92 | test_pipeline.run(batch_size, n_epochs=1, drop_last=False, shuffle=False, bar=True) 93 | 94 | if __name__ == "__main__": 95 | sys.exit(make_prediction()) 96 | -------------------------------------------------------------------------------- /docker_containers/picking_docker/run/run.sh: -------------------------------------------------------------------------------- 1 | path="data/" 2 | 3 | data_vol=${DATA_DIR:-$PWD/data} 4 | image=${IMAGE:-fb_inference} 5 | segy=${SEGY:-filename.sgy} 6 | model=${MODEL:-fb_model.dill} 7 | save_to=${DUMP_TO:-dump.csv} 8 | batch_size=${BATCH_SIZE:-1000} 9 | num_zero=${NUM_ZERO:-500} 10 | trace_len=${TRACE_LEN:-750} 11 | device=${DEVICE:-cpu} 12 | 13 | sudo docker run --rm \ 14 | -v ${data_vol}:/notebooks/SeismicPro/docker_containers/picking_docker/data \ 15 | $@ ${image} \ 16 | -p $path$segy -m $path$model -d $path$save_to -n ${num_zero} \ 17 | -bs ${batch_size} -ts ${trace_len} -dvc ${device} 18 | -------------------------------------------------------------------------------- /docker_containers/utils/install_docker.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # install Docker 4 | sudo apt-get install -y apt-transport-https ca-certificates curl software-properties-common 5 | 6 | curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add - 7 | 8 | sudo add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu bionic stable" 9 | 10 | sudo apt-get update 11 | 12 | sudo apt-get install docker-ce 13 | -------------------------------------------------------------------------------- /models/Ground-roll_attenuation/Unet_1D_model/model_estimation.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Model Evaluation\n", 8 | "\n", 9 | "For model performance assessment we want to obtain the distribution of the model quality over 20 independent runs of the training procedure.\n", 10 | "\n", 11 | "* [The experiment details](#The-experiment-details)\n", 12 | "* [Results](#Results)\n", 13 | "\n", 14 | "### The experiment detalis\n", 15 | "Train and test dataset creation" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": 1, 21 | "metadata": {}, 22 | "outputs": [], 23 | "source": [ 24 | "import os\n", 25 | "import sys\n", 26 | "import numpy as np\n", 27 | "import matplotlib.pyplot as plt\n", 28 | "import tensorflow as tf\n", 29 | "\n", 30 | "sys.path.append('../../..')\n", 31 | "\n", 32 | "from seismicpro.batchflow import Pipeline, B, V, F, C\n", 33 | "from seismicpro.batchflow.models.tf import UNet\n", 34 | "from seismicpro.batchflow.research import Research\n", 35 | "from seismicpro.src import (SeismicDataset, FieldIndex, TraceIndex,\n", 36 | " draw_histogram)\n", 37 | "\n", 38 | "plt.style.use('ggplot')" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 2, 44 | "metadata": {}, 45 | "outputs": [], 46 | "source": [ 47 | "path_raw = '/data/NA/noise_dataset_1/DN02A_LIFT_AMPSCAL.sgy'\n", 48 | "path_lift = '/data/NA/noise_dataset_1/DN02B_SHOTS_LIFT1.sgy'\n", 49 | "\n", 50 | "index = (FieldIndex(name='raw', extra_headers=['offset'], path=path_raw)\n", 51 | " .merge(FieldIndex(name='lift', path=path_lift)))\n", 52 | "\n", 53 | "train_index = index.create_subset(index.indices[:5])\n", 54 | "train_set = SeismicDataset(TraceIndex(train_index))\n", 55 | "test_set = SeismicDataset(TraceIndex(index.create_subset(index.indices[20:21])))" 56 | ] 57 | }, 58 | { 59 | "cell_type": "markdown", 60 | "metadata": {}, 61 | "source": [ 62 | "Define model config" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": 2, 68 | "metadata": {}, 69 | "outputs": [], 70 | "source": [ 71 | "model_config = {\n", 72 | " 'initial_block/inputs': 'x',\n", 73 | " 'inputs': dict(x={'shape': (3000, 1)}, \n", 74 | " y={'name':'targets', 'shape': (3000, 1)}),\n", 75 | " 'body/filters': [16, 32, 64, 128, 256],\n", 76 | " 'body/encoder': dict(layout='caca', kernel_size=7, activation=tf.nn.elu),\n", 77 | " 'body/downsample': dict(layout='pd', pool_size=2, pool_strides=2, dropout_rate=0.05),\n", 78 | " 'body/decoder': dict(layout='caca', kernel_size=7, activation=tf.nn.elu),\n", 79 | " 'body/upsample': dict(layout='tad', kernel_size=7, strides=2,\n", 80 | " dropout_rate=0.05, activation=tf.nn.elu),\n", 81 | " 'loss': 'l1',\n", 82 | " 'optimizer': 'Adam'\n", 83 | "}" 84 | ] 85 | }, 86 | { 87 | "cell_type": "markdown", 88 | "metadata": {}, 89 | "source": [ 90 | "Define train and test pipelines" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": 3, 96 | "metadata": {}, 97 | "outputs": [], 98 | "source": [ 99 | "def make_data(batch, **kwagrs):\n", 100 | " return {'x': np.expand_dims(np.vstack(batch.raw), -1),\n", 101 | " 'y': np.expand_dims(np.vstack(batch.lift), -1)}\n", 102 | "\n", 103 | "B_SIZE = 16\n", 104 | "train_pipeline = (Pipeline()\n", 105 | " .load(components=('raw', 'lift'), fmt='segy', tslice=np.arange(3000))\n", 106 | " .init_variable('loss', init_on_each_run=list)\n", 107 | " .init_model('dynamic', UNet, 'unet', model_config)\n", 108 | " .train_model('unet', make_data=make_data,\n", 109 | " fetches='loss', save_to=V('loss', mode='w'))\n", 110 | " .run_later(B_SIZE, n_epochs=None, drop_last=True, shuffle=True)\n", 111 | " ) << train_set\n", 112 | "\n", 113 | "test_pipeline = (Pipeline()\n", 114 | " .import_model('unet', C('import_from'))\n", 115 | " .init_variable('res', init_on_each_run=list())\n", 116 | " .init_variable('raw', init_on_each_run=list())\n", 117 | " .init_variable('lift', init_on_each_run=list())\n", 118 | " .load(components=('raw', 'lift'), tslice=np.arange(3000), fmt='segy')\n", 119 | " .update_variable('raw', B('raw'), mode='a')\n", 120 | " .update_variable('lift', B('lift'), mode='a')\n", 121 | " .predict_model('unet', fetches='predictions', make_data=make_data,\n", 122 | " save_to=V('res', mode='a'))\n", 123 | " .run_later(B_SIZE, n_epochs=1, drop_last=True, shuffle=True)\n", 124 | " ) << test_set" 125 | ] 126 | }, 127 | { 128 | "cell_type": "markdown", 129 | "metadata": {}, 130 | "source": [ 131 | "Let's define the functions for calculating the metrics" 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": 4, 137 | "metadata": {}, 138 | "outputs": [], 139 | "source": [ 140 | "def get_l1(iteration, experiment, pipeline):\n", 141 | " \"\"\" Calculate l1 norm.\"\"\"\n", 142 | " _ = iteration\n", 143 | " pipeline = experiment[pipeline].pipeline\n", 144 | " res = np.squeeze(np.vstack(pipeline.v(\"res\")), axis=-1)\n", 145 | " lift = np.vstack(np.concatenate(pipeline.v(\"lift\")))\n", 146 | " return np.mean(np.abs(res - lift))" 147 | ] 148 | }, 149 | { 150 | "cell_type": "markdown", 151 | "metadata": {}, 152 | "source": [ 153 | "Create a research object" 154 | ] 155 | }, 156 | { 157 | "cell_type": "code", 158 | "execution_count": 5, 159 | "metadata": {}, 160 | "outputs": [], 161 | "source": [ 162 | "research = (Research()\n", 163 | " .add_pipeline(train_pipeline, variables='loss', name='train')\n", 164 | " .add_pipeline(test_pipeline, name='test_ppl', execute=5,\n", 165 | " run=True, import_from='train')\n", 166 | " .add_grid({})\n", 167 | " .add_function(get_l1, returns='l1', name='test',\n", 168 | " execute=5, pipeline='test_ppl')\n", 169 | ")" 170 | ] 171 | }, 172 | { 173 | "cell_type": "markdown", 174 | "metadata": {}, 175 | "source": [ 176 | "Run 20 independent training and test procedures" 177 | ] 178 | }, 179 | { 180 | "cell_type": "code", 181 | "execution_count": 6, 182 | "metadata": {}, 183 | "outputs": [ 184 | { 185 | "name": "stdout", 186 | "output_type": "stream", 187 | "text": [ 188 | "Research reserach_estimation is starting...\n" 189 | ] 190 | }, 191 | { 192 | "name": "stderr", 193 | "output_type": "stream", 194 | "text": [ 195 | " 61%|██████████| 10000/10000 [32:56<00:00, 5.06it/s]" 196 | ] 197 | } 198 | ], 199 | "source": [ 200 | "research.run(n_reps=20, n_iters=500, name='reserach_estimation', workers=5,\n", 201 | " gpu=[1, 2, 3, 6, 7], bar=True)" 202 | ] 203 | }, 204 | { 205 | "cell_type": "markdown", 206 | "metadata": {}, 207 | "source": [ 208 | "### Results\n", 209 | "\n", 210 | "Histogram and a median value of the test metrics" 211 | ] 212 | }, 213 | { 214 | "cell_type": "code", 215 | "execution_count": 7, 216 | "metadata": {}, 217 | "outputs": [ 218 | { 219 | "data": { 220 | "image/png": "\n", 221 | "text/plain": [ 222 | "
" 223 | ] 224 | }, 225 | "metadata": {}, 226 | "output_type": "display_data" 227 | }, 228 | { 229 | "name": "stdout", 230 | "output_type": "stream", 231 | "text": [ 232 | "Average value (Median) is 0.006435\n", 233 | "Std is 0.0004004\n" 234 | ] 235 | } 236 | ], 237 | "source": [ 238 | "df = research.load_results(use_alias=True)\n", 239 | "draw_histogram(df, 'test/l1', 100)" 240 | ] 241 | } 242 | ], 243 | "metadata": { 244 | "kernelspec": { 245 | "display_name": "Python 3", 246 | "language": "python", 247 | "name": "python3" 248 | }, 249 | "language_info": { 250 | "codemirror_mode": { 251 | "name": "ipython", 252 | "version": 3 253 | }, 254 | "file_extension": ".py", 255 | "mimetype": "text/x-python", 256 | "name": "python", 257 | "nbconvert_exporter": "python", 258 | "pygments_lexer": "ipython3", 259 | "version": "3.5.2" 260 | } 261 | }, 262 | "nbformat": 4, 263 | "nbformat_minor": 2 264 | } 265 | -------------------------------------------------------------------------------- /pylintrc: -------------------------------------------------------------------------------- 1 | [MASTER] 2 | ignore=batchflow 3 | extension-pkg-whitelist=numpy 4 | init-hook='import sys; sys.path.append(".")' 5 | 6 | [FORMAT] 7 | max-line-length=120 8 | max-attributes=8 9 | max-args=10 10 | max-locals=25 11 | good-names=B,C,D,F,L,P,R,V,W,I,_,a,ax,b,bar,c,d,df,e,ex,f,fn,fs,g,h,i,im,ix,j,k,l,lc,logger,lr,m,n,o,op,p,q,r,rc,s,t,v,w,x,xc,y,yc,z 12 | 13 | [MESSAGE CONTROL] 14 | disable=no-member, no-value-for-parameter, no-self-use, too-many-locals, too-few-public-methods, too-many-public-methods, too-many-branches, unsubscriptable-object, redefined-variable-type, too-many-star-expressions, duplicate-code, not-context-manager, too-many-lines, global-statement, locally-disabled, wrong-import-position, invalid-sequence-index, redundant-keyword-arg, bad-super-call, no-self-argument, redefined-builtin, arguments-differ, len-as-condition, keyword-arg-before-vararg, assignment-from-none, useless-return, useless-import-alias, unnecessary-pass, cyclic-import, assignment-from-no-return, comparison-with-callable, unnecessary-lambda, no-method-argument, blacklisted-name 15 | 16 | [TYPECHECK] 17 | ignored-modules=numpy 18 | 19 | [MISCELLANEOUS] 20 | notes= 21 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | . -------------------------------------------------------------------------------- /seismicpro/__init__.py: -------------------------------------------------------------------------------- 1 | """Init file""" 2 | from . import batchflow # pylint: disable=wildcard-import 3 | from .src import * # pylint: disable=wildcard-import 4 | 5 | __version__ = '0.1.0' 6 | -------------------------------------------------------------------------------- /seismicpro/models/__init__.py: -------------------------------------------------------------------------------- 1 | """Init file""" 2 | from .hmm_model import * # pylint: disable=wildcard-import 3 | from .unet_attention import * # pylint: disable=wildcard-import 4 | from .metrics import * # pylint: disable=wildcard-import 5 | -------------------------------------------------------------------------------- /seismicpro/models/hmm_model.py: -------------------------------------------------------------------------------- 1 | """ HMModel """ 2 | import numpy as np 3 | import dill 4 | 5 | from ..batchflow.batchflow.models.base import BaseModel 6 | 7 | def make_hmm_data(batch, model, components): 8 | """Prepare hmm input.""" 9 | _ = model 10 | if isinstance(components, str): 11 | components = (components, ) 12 | x = np.hstack([np.concatenate([np.concatenate(np.atleast_3d(arr)) for arr in getattr(batch, comp)]) 13 | for comp in components]) 14 | lengths = np.concatenate([[len(arr[0])] * len(arr) for arr in 15 | getattr(batch, components[0])]) 16 | shapes = np.array([len(arr) for arr in getattr(batch, components[0])]) 17 | return {"x": x, "lengths": lengths, "shapes": shapes} 18 | 19 | 20 | class HMModel(BaseModel): 21 | """ 22 | Hidden Markov Model. 23 | This implementation is based on ``hmmlearn`` API. It is supposed 24 | that estimators of ``HMModel`` are model classes of ``hmmlearn``. 25 | """ 26 | 27 | def __init__(self, *args, **kwargs): 28 | self.estimator = None 29 | super().__init__(*args, **kwargs) 30 | 31 | def build(self, *args, **kwargs): 32 | """ 33 | Set up estimator as an attribute and make initial settings. 34 | Uses estimator from model config variable as estimator. 35 | If config contains key ``init_param``, sets up initial 36 | values ``means_``, ``covars_``, ``transmat_`` and ``startprob_`` 37 | of the estimator as defined in ``init_params``. 38 | """ 39 | _ = args, kwargs 40 | self.estimator = self.get("estimator", config=self.config) 41 | init_params = self.get("init_params", config=self.config) 42 | if init_params is not None: 43 | if "m" not in self.estimator.init_params: 44 | self.estimator.means_ = init_params["means_"] 45 | if "c" not in self.estimator.init_params: 46 | self.estimator.covars_ = init_params["covars_"] 47 | if "t" not in self.estimator.init_params: 48 | self.estimator.transmat_ = init_params["transmat_"] 49 | if "s" not in self.estimator.init_params: 50 | self.estimator.startprob_ = init_params["startprob_"] 51 | 52 | def save(self, path): # pylint: disable=arguments-differ 53 | """Save ``HMModel`` with ``dill``. 54 | Parameters 55 | ---------- 56 | path : str 57 | Path to the file to save model to. 58 | """ 59 | if self.estimator is not None: 60 | with open(path, "wb") as file: 61 | dill.dump(self.estimator, file) 62 | else: 63 | raise ValueError("HMM estimator does not exist. Check your cofig for 'estimator'.") 64 | 65 | def load(self, path): # pylint: disable=arguments-differ 66 | """Load ``HMModel`` from file with ``dill``. 67 | Parameters 68 | ---------- 69 | path : str 70 | Path to the model. 71 | """ 72 | with open(path, "rb") as file: 73 | self.estimator = dill.load(file) 74 | 75 | def train(self, x, lengths=None, *args, **kwargs): 76 | """ Train the model using data provided. 77 | Parameters 78 | ---------- 79 | X : array-like 80 | A matrix of observations. 81 | Should be of shape (n_samples, n_features). 82 | lengths : array-like of integers optional 83 | If present, should be of shape (n_sequences, ). 84 | Lengths of the individual sequences in ``X``. The sum of 85 | these should be ``n_samples``. 86 | Notes 87 | ----- 88 | For more details and other parameters look at the documentation for the estimator used. 89 | """ 90 | _ = args, kwargs 91 | self.estimator.fit(x, lengths) 92 | return list(self.estimator.monitor_.history) 93 | 94 | def predict(self, x, lengths=None, shapes=None, *args, **kwargs): 95 | """ Make prediction with the data provided. 96 | Parameters 97 | ---------- 98 | x : array-like 99 | A matrix of observations. 100 | Should be of shape (n_samples, n_features). 101 | lengths : array-like of integers optional 102 | If present, should be of shape (n_sequences, ). 103 | Lengths of the individual sequences in ``x``. The sum of 104 | these should be ``n_samples``. 105 | Returns 106 | ------- 107 | output: array 108 | Labels for each sample of x. 109 | Notes 110 | ----- 111 | For more details and other parameters look at the documentation for the estimator used. 112 | """ 113 | _ = args, kwargs 114 | preds = self.estimator.predict(x, lengths) 115 | if lengths is not None: 116 | output = np.array(np.split(preds, np.cumsum(lengths)[:-1]) + [None])[:-1] 117 | else: 118 | output = preds 119 | if shapes is not None: 120 | output = np.array(np.split(output, np.cumsum(shapes)[:-1]) + [None])[:-1] 121 | return output 122 | -------------------------------------------------------------------------------- /seismicpro/models/metrics.py: -------------------------------------------------------------------------------- 1 | """Metrics for Seismic procesing tasks.""" 2 | import numpy as np 3 | 4 | from ..batchflow.batchflow.models.metrics import Metrics 5 | from ..src import measure_gain_amplitude 6 | 7 | class FieldMetrics(Metrics): 8 | """Class for seismic field record metrics. 9 | """ 10 | def __init__(self, targets, predictions): 11 | super().__init__() 12 | self.targets = targets 13 | self.predictions = predictions 14 | 15 | def iou(self): 16 | """Intersection-over-union metric.""" 17 | a = self.targets.astype(float) 18 | b = self.predictions.astype(float) 19 | return 2 * np.sum(a * b) / np.sum(a + b) 20 | 21 | def mae(self): 22 | """Mean absolute error metric.""" 23 | return np.mean(abs(self.targets - self.predictions)) 24 | 25 | def corr_coef(self, reduce='mean', **kwargs): 26 | """Correlation coeffitients.""" 27 | a = self.targets 28 | b = self.predictions 29 | a = (a - np.mean(a, axis=1, keepdims=True)) 30 | std = np.std(a, axis=1, keepdims=True) 31 | std[~(std > 0)] = 1 32 | a = a / std 33 | 34 | b = (b - np.mean(b, axis=1, keepdims=True)) 35 | std = np.std(b, axis=1, keepdims=True) 36 | std[~(std > 0)] = 1 37 | b = b / std 38 | 39 | corr = (a * b).sum(axis=1) / a.shape[1] 40 | if reduce is None: 41 | return corr 42 | if isinstance(reduce, str): 43 | return getattr(np, reduce)(corr, **kwargs) 44 | 45 | return reduce(corr, **kwargs) 46 | 47 | class PickingMetrics(Metrics): 48 | """Class for First Break picking task metrics. 49 | 50 | Parameters 51 | ---------- 52 | predictions : array-like 53 | Model predictions. 54 | targets : array-like 55 | Ground truth picking. 56 | gap : int (defaut=3) 57 | Maximum difference between prediction and target the trace considered correctly classified. 58 | """ 59 | def __init__(self, targets, predictions, gap=3): 60 | super().__init__() 61 | self.targets = np.array(targets) 62 | self.predictions = np.array(predictions) 63 | self.gap = gap 64 | 65 | def mae(self): 66 | """Mean absolute error metric.""" 67 | return np.mean(np.abs(self.targets - self.predictions)) 68 | 69 | def accuracy(self): 70 | """Accuracy metric in case the task is being interpreted as classification.""" 71 | abs_diff = np.abs(self.targets - self.predictions) 72 | return 100 * len(abs_diff[abs_diff < self.gap]) / len(abs_diff) 73 | 74 | def calc_derivative_diff(ampl_diff, window=51): 75 | """Derivative difference metric.""" 76 | result = measure_gain_amplitude(ampl_diff, window) 77 | return np.median(np.abs(np.gradient(result))) 78 | -------------------------------------------------------------------------------- /seismicpro/models/unet_attention.py: -------------------------------------------------------------------------------- 1 | """ UnetAttention model """ 2 | import tensorflow as tf 3 | 4 | from ..batchflow.batchflow.models.tf import EncoderDecoder 5 | from ..batchflow.batchflow.models.tf.layers import conv_block 6 | 7 | class UnetAtt(EncoderDecoder): 8 | """Class for Unet Attention model.""" 9 | 10 | @classmethod 11 | def default_config(cls): 12 | config = super().default_config() 13 | 14 | body_config = config['body'] 15 | 16 | config['body'] = None 17 | config['body/main'] = body_config 18 | config['body/attn'] = body_config 19 | 20 | return config 21 | 22 | def initial_block(self, inputs, *args, **kwargs): 23 | _ = args, kwargs 24 | return inputs 25 | 26 | def body(self, inputs, *args, **kwargs): 27 | _ = args 28 | raw, offset = inputs 29 | 30 | main_config = kwargs.pop('main') 31 | attn_config = kwargs.pop('attn') 32 | 33 | main = super().body(raw, name='main', **{**kwargs, **main_config}) # pylint: disable=not-a-mapping 34 | att = super().body(raw, name='attention', **{**kwargs, **attn_config}) # pylint: disable=not-a-mapping 35 | return main, att, raw, offset 36 | 37 | def head(self, inputs, *args, **kwargs): 38 | _ = args, kwargs 39 | main, att, raw, offset = inputs 40 | 41 | #Get a single channel with sigmoid activation for the attention branch 42 | att = conv_block(att, layout='ca', kernel_size=3, filters=1, units=1, 43 | activation=tf.nn.sigmoid, name='head_att') 44 | 45 | #Quick estimation of sigmoid center location 46 | att_sum = tf.reduce_sum(att, axis=1, keepdims=True) 47 | 48 | #Define a domain for sigmoid function 49 | sigm_x = tf.fill(tf.shape(att), 0.0) 50 | arange = tf.range(0, tf.cast(tf.shape(sigm_x)[1], 'float'), dtype='float') 51 | arange = tf.expand_dims(arange, axis=-1) 52 | sigm_x = sigm_x - arange 53 | 54 | #Shallow network that estimates sigmoid center location and shoothness 55 | #based on its quick estimation and offset 56 | shift_in = tf.concat([tf.squeeze(att_sum, axis=1), offset], axis=1) 57 | shift_in = tf.layers.dense(shift_in, 16, activation=tf.nn.elu) 58 | shift_in = tf.layers.dense(shift_in, 16, activation=tf.nn.elu) 59 | sigmoid_center = tf.layers.dense(shift_in, 1, activation=tf.nn.relu) 60 | self.store_to_attr("sigmoid_center", sigmoid_center) 61 | 62 | #Shift and stretch sigmoid domain based on network estimations 63 | sigmoid_center = tf.expand_dims(sigmoid_center, axis=-1) 64 | sigm_x = sigm_x + sigmoid_center[:, :1] 65 | 66 | #Apply sigmoid function to the above obtained domain 67 | attention_sigmoid = tf.sigmoid(sigm_x) 68 | self.store_to_attr("attention_sigmoid", attention_sigmoid) 69 | 70 | #Get a single channel with linear activation for the main branch 71 | main = conv_block(main, layout='c', filters=1, units=1, name='head_main') 72 | self.store_to_attr("out_main", main) 73 | 74 | #Get a model output that is a superposition of raw input and main branches 75 | #according to attention mask 76 | out_lift = raw * attention_sigmoid + main * (1 - attention_sigmoid) 77 | self.store_to_attr("out_lift", out_lift) 78 | 79 | return tf.stack([out_lift, attention_sigmoid], axis=0) 80 | 81 | def attention_loss(targets, predictions, balance, **kwargs): 82 | """Loss function for Unet Attention model 83 | 84 | Parameters 85 | ---------- 86 | targets : tensor 87 | Target values. 88 | predictions : tensor 89 | Predicted values. 90 | balance : tensor 91 | Balance coeffitient between L1 loss and attention mask area. 92 | 93 | Returns 94 | ------- 95 | loss : tensor 96 | Computed loss. 97 | """ 98 | _ = kwargs 99 | out_lift = predictions[0] 100 | attention_sigmoid = predictions[1] 101 | loss = (tf.losses.absolute_difference(targets, out_lift) + 102 | balance * tf.reduce_mean(1 - attention_sigmoid)) 103 | tf.losses.add_loss(loss) 104 | return loss 105 | -------------------------------------------------------------------------------- /seismicpro/src/__init__.py: -------------------------------------------------------------------------------- 1 | """Init file""" 2 | from .seismic_batch import SeismicBatch 3 | from .seismic_index import (FieldIndex, TraceIndex, BinsIndex, 4 | SegyFilesIndex, CustomIndex, KNNIndex) 5 | from .seismic_dataset import SeismicDataset 6 | 7 | from .plot_utils import (spectrum_plot, seismic_plot, statistics_plot, 8 | show_research, draw_histogram, gain_plot) 9 | from .utils import print_results, calculate_sdc_quality, measure_gain_amplitude 10 | from .file_utils import merge_segy_files, write_segy_file, merge_picking_files 11 | -------------------------------------------------------------------------------- /seismicpro/src/file_utils.py: -------------------------------------------------------------------------------- 1 | """ Utility functions for files """ 2 | import segyio 3 | import numpy as np 4 | import pandas as pd 5 | from tqdm import tqdm 6 | 7 | from ..batchflow import FilesIndex 8 | from .seismic_index import SegyFilesIndex 9 | 10 | def write_segy_file(data, df, samples, path, sorting=None, segy_format=1): 11 | """Write data and headers into SEGY file. 12 | 13 | Parameters 14 | ---------- 15 | data : array-like 16 | Array of traces. 17 | df : DataFrame 18 | DataFrame with trace headers data. 19 | samples : array, same length as traces 20 | Time samples for trace data. 21 | path : str 22 | Path to output file. 23 | sorting : int 24 | SEGY file sorting. 25 | format : int 26 | SEGY file format. 27 | 28 | Returns 29 | ------- 30 | """ 31 | spec = segyio.spec() 32 | spec.sorting = sorting 33 | spec.format = segy_format 34 | spec.samples = samples 35 | spec.tracecount = len(data) 36 | 37 | df.columns = [getattr(segyio.TraceField, k) for k in df.columns] 38 | df[getattr(segyio.TraceField, 'TRACE_SEQUENCE_FILE')] = np.arange(len(df)) + 1 39 | 40 | with segyio.create(path, spec) as file: 41 | file.trace = data 42 | meta = df.to_dict('index') 43 | for i, x in enumerate(file.header[:]): 44 | x.update(meta[i]) 45 | 46 | def merge_segy_files(output_path, bar=True, **kwargs): 47 | """Merge segy files into a single segy file. 48 | 49 | Parameters 50 | ---------- 51 | output_path : str 52 | Path to output file. 53 | bar : bool 54 | Whether to how progress bar (default = True). 55 | kwargs : dict 56 | Keyword arguments to index input segy files. 57 | 58 | Returns 59 | ------- 60 | """ 61 | segy_index = SegyFilesIndex(**kwargs, name='data') 62 | spec = segyio.spec() 63 | spec.sorting = None 64 | spec.format = 1 65 | spec.tracecount = sum(segy_index.tracecounts) 66 | with segyio.open(segy_index.indices[0], strict=False) as file: 67 | spec.samples = file.samples 68 | 69 | with segyio.create(output_path, spec) as dst: 70 | i = 0 71 | iterable = tqdm(segy_index.indices) if bar else segy_index.indices 72 | for index in iterable: 73 | with segyio.open(index, strict=False) as src: 74 | dst.trace[i: i + src.tracecount] = src.trace 75 | dst.header[i: i + src.tracecount] = src.header 76 | for j in range(src.tracecount): 77 | dst.header[i + j].update({segyio.TraceField.TRACE_SEQUENCE_FILE: i + j + 1}) 78 | 79 | i += src.tracecount 80 | 81 | def merge_picking_files(output_path, **kwargs): 82 | """Merge picking files into a single file. 83 | 84 | Parameters 85 | ---------- 86 | output_path : str 87 | Path to output file. 88 | kwargs : dict 89 | Keyword arguments to index input files. 90 | 91 | Returns 92 | ------- 93 | """ 94 | files_index = FilesIndex(**kwargs) 95 | dfs = [] 96 | for i in files_index.indices: 97 | path = files_index.get_fullpath(i) 98 | dfs.append(pd.read_csv(path)) 99 | 100 | df = pd.concat(dfs, ignore_index=True) 101 | df.to_csv(output_path, index=False) 102 | -------------------------------------------------------------------------------- /seismicpro/src/plot_utils.py: -------------------------------------------------------------------------------- 1 | """ Utilily functions for visualization """ 2 | import numpy as np 3 | import pandas as pd 4 | import matplotlib.pyplot as plt 5 | from matplotlib import patches, colors as mcolors 6 | from .utils import measure_gain_amplitude 7 | 8 | class IndexTracker: 9 | """Provides onscroll and update methods for matplotlib scroll_event.""" 10 | def __init__(self, ax, frames, frame_names, scroll_step=1, **kwargs): 11 | self.ax = ax 12 | self.frames = frames 13 | self.step = scroll_step 14 | self.frame_names = frame_names 15 | self.img_kwargs = kwargs 16 | self.ind = len(frames) // 2 17 | self.update() 18 | 19 | def onscroll(self, event): 20 | """Onscroll method.""" 21 | print("%s %s" % (event.button, event.step)) 22 | if event.button == 'up': 23 | self.ind = np.clip(self.ind + self.step, 0, len(self.frames) - 1) 24 | else: 25 | self.ind = np.clip(self.ind - self.step, 0, len(self.frames) - 1) 26 | self.update() 27 | 28 | def update(self): 29 | """Update method.""" 30 | self.ax.clear() 31 | img = self.frames[self.ind] 32 | img = np.squeeze(img) 33 | if img.ndim == 2: 34 | self.ax.imshow(img.T, **self.img_kwargs) 35 | elif img.ndim == 1: 36 | self.ax.plot(img.T, **self.img_kwargs) 37 | else: 38 | raise ValueError('Invalid ndim to plot data.') 39 | 40 | self.ax.set_title('%s' % self.frame_names[self.ind]) 41 | self.ax.set_aspect('auto') 42 | if img.ndim == 2: 43 | self.ax.set_ylim([img.shape[1], 0]) 44 | self.ax.set_xlim([0, img.shape[0]]) 45 | 46 | def seismic_plot(arrs, wiggle=False, xlim=None, ylim=None, std=1, # pylint: disable=too-many-branches, too-many-arguments 47 | pts=None, s=None, scatter_color=None, names=None, figsize=None, 48 | save_to=None, dpi=None, line_color=None, title=None, **kwargs): 49 | """Plot seismic traces. 50 | 51 | Parameters 52 | ---------- 53 | arrs : array-like 54 | Arrays of seismic traces to plot. 55 | wiggle : bool, default to False 56 | Show traces in a wiggle form. 57 | xlim : tuple, optional 58 | Range in x-axis to show. 59 | ylim : tuple, optional 60 | Range in y-axis to show. 61 | std : scalar, optional 62 | Amplitude scale for traces in wiggle form. 63 | pts : array_like, shape (n, ) 64 | The points data positions. 65 | s : scalar or array_like, shape (n, ), optional 66 | The marker size in points**2. 67 | scatter_color : color, sequence, or sequence of color, optional 68 | The marker color. 69 | names : str or array-like, optional 70 | Title names to identify subplots. 71 | figsize : array-like, optional 72 | Output plot size. 73 | save_to : str or None, optional 74 | If not None, save plot to given path. 75 | dpi : int, optional, default: None 76 | The resolution argument for matplotlib.pyplot.savefig. 77 | line_color : color, sequence, or sequence of color, optional, default: None 78 | The trace color. 79 | title : str 80 | Plot title. 81 | kwargs : dict 82 | Additional keyword arguments for plot. 83 | 84 | Returns 85 | ------- 86 | Multi-column subplots. 87 | 88 | Raises 89 | ------ 90 | ValueError 91 | If ```trace_col``` is sequence and it lenght is not equal to the number of traces. 92 | If dimensions of given ```arrs``` not in [1, 2]. 93 | 94 | """ 95 | if isinstance(arrs, np.ndarray) and arrs.ndim == 2: 96 | arrs = (arrs,) 97 | 98 | if isinstance(names, str): 99 | names = (names,) 100 | 101 | line_color = 'k' if line_color is None else line_color 102 | fig, ax = plt.subplots(1, len(arrs), figsize=figsize, squeeze=False) 103 | for i, arr in enumerate(arrs): 104 | 105 | if not wiggle: 106 | arr = np.squeeze(arr) 107 | 108 | if xlim is None: 109 | xlim = (0, len(arr)) 110 | 111 | if arr.ndim == 2: 112 | if ylim is None: 113 | ylim = (0, len(arr[0])) 114 | 115 | if wiggle: 116 | offsets = np.arange(*xlim) 117 | 118 | if isinstance(line_color, str): 119 | line_color = [line_color] * len(offsets) 120 | 121 | if len(line_color) != len(offsets): 122 | raise ValueError("Lenght of line_color must be equal to the number of traces.") 123 | 124 | y = np.arange(*ylim) 125 | for ix, k in enumerate(offsets): 126 | x = k + std * arr[k, slice(*ylim)] / np.std(arr) 127 | col = line_color[ix] 128 | ax[0, i].plot(x, y, '{}-'.format(col)) 129 | ax[0, i].fill_betweenx(y, k, x, where=(x > k), color=col) 130 | 131 | else: 132 | ax[0, i].imshow(arr.T, **kwargs) 133 | 134 | elif arr.ndim == 1: 135 | ax[0, i].plot(arr, **kwargs) 136 | else: 137 | raise ValueError('Invalid ndim to plot data.') 138 | 139 | if names is not None: 140 | ax[0, i].set_title(names[i]) 141 | 142 | if arr.ndim == 2: 143 | ax[0, i].set_ylim([ylim[1], ylim[0]]) 144 | if (not wiggle) or (pts is not None): 145 | ax[0, i].set_xlim(xlim) 146 | 147 | if arr.ndim == 1: 148 | plt.xlim(xlim) 149 | 150 | if pts is not None: 151 | ax[0, i].scatter(*pts, s=s, c=scatter_color) 152 | 153 | ax[0, i].set_aspect('auto') 154 | 155 | if title is not None: 156 | fig.suptitle(title) 157 | if save_to is not None: 158 | plt.savefig(save_to, dpi=dpi) 159 | plt.show() 160 | 161 | def spectrum_plot(arrs, frame, rate, max_freq=None, names=None, 162 | figsize=None, save_to=None, **kwargs): 163 | """Plot seismogram(s) and power spectrum of given region in the seismogram(s). 164 | 165 | Parameters 166 | ---------- 167 | arrs : array-like 168 | Seismogram or sequence of seismograms. 169 | frame : tuple 170 | List of slices that frame region of interest. 171 | rate : scalar 172 | Sampling rate. 173 | max_freq : scalar 174 | Upper frequence limit. 175 | names : str or array-like, optional 176 | Title names to identify subplots. 177 | figsize : array-like, optional 178 | Output plot size. 179 | save_to : str or None, optional 180 | If not None, save plot to given path. 181 | kwargs : dict 182 | Named argumets to matplotlib.pyplot.imshow. 183 | 184 | Returns 185 | ------- 186 | Plot of seismogram(s) and power spectrum(s). 187 | """ 188 | if isinstance(arrs, np.ndarray) and arrs.ndim == 2: 189 | arrs = (arrs,) 190 | 191 | if isinstance(names, str): 192 | names = (names,) 193 | 194 | _, ax = plt.subplots(2, len(arrs), figsize=figsize, squeeze=False) 195 | for i, arr in enumerate(arrs): 196 | ax[0, i].imshow(arr.T, **kwargs) 197 | rect = patches.Rectangle((frame[0].start, frame[1].start), 198 | frame[0].stop - frame[0].start, 199 | frame[1].stop - frame[1].start, 200 | edgecolor='r', facecolor='none', lw=2) 201 | ax[0, i].add_patch(rect) 202 | ax[0, i].set_title('Seismogram {}'.format(names[i] if names 203 | is not None else '')) 204 | ax[0, i].set_aspect('auto') 205 | spec = abs(np.fft.rfft(arr[frame], axis=1))**2 206 | freqs = np.fft.rfftfreq(len(arr[frame][0]), d=rate) 207 | if max_freq is None: 208 | max_freq = np.inf 209 | 210 | mask = freqs <= max_freq 211 | ax[1, i].plot(freqs[mask], np.mean(spec, axis=0)[mask], lw=2) 212 | ax[1, i].set_xlabel('Hz') 213 | ax[1, i].set_title('Spectrum plot {}'.format(names[i] if names 214 | is not None else '')) 215 | ax[1, i].set_aspect('auto') 216 | 217 | if save_to is not None: 218 | plt.savefig(save_to) 219 | 220 | plt.show() 221 | 222 | def gain_plot(arrs, window=51, xlim=None, ylim=None, figsize=None, names=None, **kwargs):# pylint: disable=too-many-branches 223 | r"""Gain's graph plots the ratio of the maximum mean value of 224 | the amplitude to the mean value of the smoothed amplitude at the moment t. 225 | 226 | First of all for each trace the smoothed version calculated by following formula: 227 | $$Am = \sqrt{\mathcal{H}(Am)^2 + Am^2}, \ where$$ 228 | Am - Amplitude of trace. 229 | $\mathcal{H}$ - is a Hilbert transformaion. 230 | 231 | Then the average values of the amplitudes (Am) at each time (t) are calculated. 232 | After it the resulted value received from the following equation: 233 | 234 | $$ G(t) = - \frac{\max{(Am)}}{Am(t)} $$ 235 | 236 | Parameters 237 | ---------- 238 | sample : array-like 239 | Seismogram. 240 | window : int, default 51 241 | Size of smoothing window of the median filter. 242 | xlim : tuple or list with size 2 243 | Bounds for plot's x-axis. 244 | ylim : tuple or list with size 2 245 | Bounds for plot's y-axis. 246 | figsize : array-like, optional 247 | Output plot size. 248 | names : str or array-like, optional 249 | Title names to identify subplots. 250 | 251 | Returns 252 | ------- 253 | Gain's plot. 254 | """ 255 | if isinstance(arrs, np.ndarray) and arrs.ndim == 2: 256 | arrs = (arrs,) 257 | 258 | _, ax = plt.subplots(1, len(arrs), figsize=figsize) 259 | ax = ax.reshape(-1) if isinstance(ax, np.ndarray) else [ax] 260 | 261 | for ix, sample in enumerate(arrs): 262 | result = measure_gain_amplitude(sample, window) 263 | ax[ix].plot(result, range(len(result)), **kwargs) 264 | if names is not None: 265 | ax[ix].set_title(names[ix]) 266 | if xlim is None: 267 | set_xlim = (max(result)-min(result)*.1, max(result)+min(result)*1.1) 268 | elif isinstance(xlim[0], (int, float)): 269 | set_xlim = xlim 270 | elif len(xlim) != len(arrs): 271 | raise ValueError('Incorrect format for xbounds.') 272 | else: 273 | set_xlim = xlim[ix] 274 | 275 | if ylim is None: 276 | set_ylim = (len(result)+100, -100) 277 | elif isinstance(ylim[0], (int, float)): 278 | set_ylim = ylim 279 | elif len(ylim) != len(arrs): 280 | raise ValueError('Incorrect format for ybounds.') 281 | else: 282 | set_ylim = ylim[ix] 283 | 284 | ax[ix].set_ylim(set_ylim) 285 | ax[ix].set_xlim(set_xlim) 286 | ax[ix].set_xlabel('Maxamp/Amp') 287 | ax[ix].set_ylabel('Time') 288 | plt.show() 289 | 290 | def statistics_plot(arrs, stats, rate=None, figsize=None, names=None, 291 | save_to=None, **kwargs): 292 | """Show seismograms and various trace statistics, e.g. rms amplitude and rms frequency. 293 | 294 | Parameters 295 | ---------- 296 | arrs : array-like 297 | Seismogram or sequence of seismograms. 298 | stats : str, callable or array-like 299 | Name of statistics in statistics zoo, custom function to be avaluated or array of stats. 300 | rate : scalar 301 | Sampling rate for spectral statistics. 302 | figsize : array-like, optional 303 | Output plot size. 304 | names : str or array-like, optional 305 | Title names to identify subplots. 306 | save_to : str or None, optional 307 | If not None, save plot to given path. 308 | kwargs : dict 309 | Named argumets to matplotlib.pyplot.imshow. 310 | 311 | Returns 312 | ------- 313 | Plots of seismorgams and trace statistics. 314 | """ 315 | def rms_freq(x, rate): 316 | "Calculate rms frequency." 317 | spec = abs(np.fft.rfft(x, axis=1))**2 318 | spec = spec / spec.sum(axis=1).reshape((-1, 1)) 319 | freqs = np.fft.rfftfreq(len(x[0]), d=rate) 320 | return np.sqrt((freqs**2 * spec).sum(axis=1)) 321 | 322 | statistics_zoo = dict(ma_ampl=lambda x, *args: np.mean(abs(x), axis=1), 323 | rms_ampl=lambda x, *args: np.sqrt(np.mean(x**2, axis=1)), 324 | std_ampl=lambda x, *args: np.std(x, axis=1), 325 | rms_freq=rms_freq) 326 | 327 | if isinstance(arrs, np.ndarray) and arrs.ndim == 2: 328 | arrs = (arrs,) 329 | 330 | if isinstance(stats, str) or callable(stats): 331 | stats = (stats,) 332 | 333 | if isinstance(names, str): 334 | names = (names,) 335 | 336 | _, ax = plt.subplots(2, len(arrs), figsize=figsize, squeeze=False) 337 | for i, arr in enumerate(arrs): 338 | for k in stats: 339 | if isinstance(k, str): 340 | func, label = statistics_zoo[k], k 341 | else: 342 | func, label = k, k.__name__ 343 | 344 | ax[0, i].plot(func(arr, rate), label=label) 345 | 346 | ax[0, i].legend() 347 | ax[0, i].set_xlim([0, len(arr)]) 348 | ax[0, i].set_aspect('auto') 349 | ax[0, i].set_title(names[i] if names is not None else '') 350 | ax[1, i].imshow(arr.T, **kwargs) 351 | ax[1, i].set_aspect('auto') 352 | 353 | if save_to is not None: 354 | plt.savefig(save_to) 355 | 356 | plt.show() 357 | 358 | def show_research(df, layout=None, average_repetitions=False, log_scale=False, rolling_window=None, color=None): # pylint: disable=too-many-branches 359 | """Show plots given by research dataframe. 360 | 361 | Parameters 362 | ---------- 363 | df : DataFrame 364 | Research's results 365 | layout : list, optional 366 | list of strings where each element consists two parts that splited by /. First part is the type 367 | of calculated value wrote in the "name" column. Second is name of column with the parameters 368 | that will be drawn. 369 | average_repetitions : bool, optional 370 | If True, then a separate line will be drawn for each repetition 371 | else one mean line will be drawn for each repetition. 372 | log_scale : bool, optional 373 | If True, values will be logarithmised. 374 | rolling_window : None or int, optional 375 | Size of rolling window. 376 | """ 377 | if layout is None: 378 | layout = [] 379 | for nlabel, ndf in df.groupby("name"): 380 | ndf = ndf.drop(['config', 'name', 'iteration', 'repetition'], axis=1).dropna(axis=1) 381 | for attr in ndf.columns.values: 382 | layout.append('/'.join([str(nlabel), str(attr)])) 383 | if isinstance(log_scale, bool): 384 | log_scale = [log_scale] * len(layout) 385 | if isinstance(rolling_window, int) or (rolling_window is None): 386 | rolling_window = [rolling_window] * len(layout) 387 | rolling_window = [x if x is not None else 1 for x in rolling_window] 388 | 389 | if color is None: 390 | color = list(mcolors.CSS4_COLORS.keys()) 391 | df_len = len(df['config'].unique()) 392 | replace = not len(color) > df_len 393 | chosen_colors = np.random.choice(color, replace=replace, size=df_len) 394 | 395 | _, ax = plt.subplots(1, len(layout), figsize=(9 * len(layout), 7)) 396 | if len(layout) == 1: 397 | ax = (ax, ) 398 | 399 | for i, (title, log, roll_w) in enumerate(list(zip(*[layout, log_scale, rolling_window]))): 400 | name, attr = title.split('/') 401 | ndf = df[df['name'] == name] 402 | for (clabel, cdf), curr_color in zip(ndf.groupby("config"), chosen_colors): 403 | cdf = cdf.drop(['config', 'name'], axis=1).dropna(axis=1).astype('float') 404 | if average_repetitions: 405 | idf = cdf.groupby('iteration').mean().drop('repetition', axis=1) 406 | y_values = idf[attr].rolling(roll_w).mean().values 407 | if log: 408 | y_values = np.log(y_values) 409 | ax[i].plot(idf.index.values, y_values, label=str(clabel), color=curr_color) 410 | else: 411 | for repet, rdf in cdf.groupby('repetition'): 412 | rdf = rdf.drop('repetition', axis=1) 413 | y_values = rdf[attr].rolling(roll_w).mean().values 414 | if log: 415 | y_values = np.log(y_values) 416 | ax[i].plot(rdf['iteration'].values, y_values, 417 | label='/'.join([str(repet), str(clabel)]), color=curr_color) 418 | ax[i].set_xlabel('iteration') 419 | ax[i].set_title(title) 420 | ax[i].legend() 421 | plt.show() 422 | 423 | def draw_histogram(df, layout, n_last): 424 | """Draw histogram of following attribute. 425 | 426 | Parameters 427 | ---------- 428 | df : DataFrame 429 | Research's results 430 | layout : str 431 | string where each element consists two parts that splited by /. First part is the type 432 | of calculated value wrote in the "name" column. Second is name of column with the parameters 433 | that will be drawn. 434 | n_last : int, optional 435 | The number of iterations at the end of which the averaging takes place. 436 | """ 437 | name, attr = layout.split('/') 438 | max_iter = df['iteration'].max() 439 | mean_val = df[(df['iteration'] > max_iter - n_last) & (df['name'] == name)].groupby('repetition').mean()[attr] 440 | plt.figure(figsize=(8, 6)) 441 | plt.title('Histogram of {}'.format(attr)) 442 | plt.hist(mean_val) 443 | plt.axvline(mean_val.mean(), color='b', linestyle='dashed', linewidth=1, label='mean {}'.format(attr)) 444 | plt.legend() 445 | plt.show() 446 | print('Average value (Median) is {:.4}\nStd is {:.4}'.format(mean_val.median(), mean_val.std())) 447 | 448 | def show_1d_heatmap(idf, figsize=None, save_to=None, dpi=300, **kwargs): 449 | """Plot point distribution within 1D bins. 450 | 451 | Parameters 452 | ---------- 453 | idf : pandas.DataFrame 454 | Index DataFrame. 455 | figsize : tuple 456 | Output figure size. 457 | save_to : str, optional 458 | If given, save plot to the path specified. 459 | dpi : int 460 | Resolution for saved figure. 461 | kwargs : dict 462 | Named argumets for ```matplotlib.pyplot.imshow```. 463 | 464 | Returns 465 | ------- 466 | Heatmap plot. 467 | """ 468 | bin_counts = idf.groupby(level=[0]).size() 469 | bins = np.array([i.split('/') for i in bin_counts.index]) 470 | 471 | bindf = pd.DataFrame(bins, columns=['line', 'pos']) 472 | bindf['line_code'] = bindf['line'].astype('category').cat.codes + 1 473 | bindf = bindf.astype({'pos': 'int'}) 474 | bindf['counts'] = bin_counts.values 475 | bindf = bindf.sort_values(by='line') 476 | 477 | brange = np.max(bindf[['line_code', 'pos']].values, axis=0) 478 | hist = np.zeros(brange, dtype=int) 479 | hist[bindf['line_code'].values - 1, bindf['pos'].values - 1] = bindf['counts'].values 480 | 481 | if figsize is not None: 482 | plt.figure(figsize=figsize) 483 | 484 | heatmap = plt.imshow(hist, **kwargs) 485 | plt.colorbar(heatmap) 486 | plt.yticks(np.arange(brange[0]), bindf['line'].drop_duplicates().values, fontsize=8) 487 | plt.xlabel("Bins index") 488 | plt.ylabel("Line index") 489 | plt.axes().set_aspect('auto') 490 | if save_to is not None: 491 | plt.savefig(save_to, dpi=dpi) 492 | 493 | plt.show() 494 | 495 | def show_2d_heatmap(idf, figsize=None, save_to=None, dpi=300, **kwargs): 496 | """Plot point distribution within 2D bins. 497 | 498 | Parameters 499 | ---------- 500 | idf : pandas.DataFrame 501 | Index DataFrame. 502 | figsize : tuple 503 | Output figure size. 504 | save_to : str, optional 505 | If given, save plot to the path specified. 506 | dpi : int 507 | Resolution for saved figure. 508 | kwargs : dict 509 | Named argumets for ```matplotlib.pyplot.imshow```. 510 | 511 | Returns 512 | ------- 513 | Heatmap plot. 514 | """ 515 | bin_counts = idf.groupby(level=[0]).size() 516 | bins = np.array([np.array(i.split('/')).astype(int) for i in bin_counts.index]) 517 | brange = np.max(bins, axis=0) 518 | 519 | hist = np.zeros(brange, dtype=int) 520 | hist[bins[:, 0] - 1, bins[:, 1] - 1] = bin_counts.values 521 | 522 | if figsize is not None: 523 | plt.figure(figsize=figsize) 524 | 525 | heatmap = plt.imshow(hist.T, origin='lower', **kwargs) 526 | plt.colorbar(heatmap) 527 | plt.xlabel('x-Bins') 528 | plt.ylabel('y-Bins') 529 | if save_to is not None: 530 | plt.savefig(save_to, dpi=dpi) 531 | plt.show() 532 | -------------------------------------------------------------------------------- /seismicpro/src/seismic_batch.py: -------------------------------------------------------------------------------- 1 | """Seismic batch.""" # pylint: disable=too-many-lines 2 | import os 3 | from textwrap import dedent 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | from scipy import signal 7 | import pywt 8 | import segyio 9 | 10 | from ..batchflow import action, inbatch_parallel, Batch, any_action_failed 11 | 12 | from .seismic_index import SegyFilesIndex, FieldIndex 13 | 14 | from .utils import (FILE_DEPENDEND_COLUMNS, partialmethod, calculate_sdc_for_field, massive_block, 15 | check_unique_fieldrecord_across_surveys) 16 | from .file_utils import write_segy_file 17 | from .plot_utils import IndexTracker, spectrum_plot, seismic_plot, statistics_plot, gain_plot 18 | 19 | 20 | PICKS_FILE_HEADERS = ['FieldRecord', 'TraceNumber', 'timeOffset'] 21 | 22 | 23 | ACTIONS_DICT = { 24 | "clip": (np.clip, "numpy.clip", "clip values"), 25 | "gradient": (np.gradient, "numpy.gradient", "gradient"), 26 | "fft2": (np.fft.fft2, "numpy.fft.fft2", "a Discrete 2D Fourier Transform"), 27 | "ifft2": (np.fft.ifft2, "numpy.fft.ifft2", "an inverse Discrete 2D Fourier Transform"), 28 | "fft": (np.fft.fft, "numpy.fft.fft", "a Discrete Fourier Transform"), 29 | "ifft": (np.fft.ifft, "numpy.fft.ifft", "an inverse Discrete Fourier Transform"), 30 | "rfft": (np.fft.rfft, "numpy.fft.rfft", "a real-input Discrete Fourier Transform"), 31 | "irfft": (np.fft.irfft, "numpy.fft.irfft", "a real-input inverse Discrete Fourier Transform"), 32 | "dwt": (pywt.dwt, "pywt.dwt", "a single level Discrete Wavelet Transform"), 33 | "idwt": (lambda x, *args, **kwargs: pywt.idwt(*x, *args, **kwargs), "pywt.idwt", 34 | "a single level inverse Discrete Wavelet Transform"), 35 | "wavedec": (pywt.wavedec, "pywt.wavedec", "a multilevel 1D Discrete Wavelet Transform"), 36 | "waverec": (lambda x, *args, **kwargs: pywt.waverec(list(x), *args, **kwargs), "pywt.waverec", 37 | "a multilevel 1D Inverse Discrete Wavelet Transform"), 38 | "pdwt": (lambda x, part, *args, **kwargs: pywt.downcoef(part, x, *args, **kwargs), "pywt.downcoef", 39 | "a partial Discrete Wavelet Transform data decomposition"), 40 | "cwt": (lambda x, *args, **kwargs: pywt.cwt(x, *args, **kwargs)[0].T, "pywt.cwt", "a Continuous Wavelet Transform"), 41 | } 42 | 43 | 44 | TEMPLATE_DOCSTRING = """ 45 | Compute {description} for each trace. 46 | This method simply wraps ``apply_along_axis`` method by setting the 47 | ``func`` argument to ``{full_name}``. 48 | 49 | Parameters 50 | ---------- 51 | src : str, optional 52 | Batch component to get the data from. 53 | dst : str, optional 54 | Batch component to put the result in. 55 | args : misc 56 | Any additional positional arguments to ``{full_name}``. 57 | kwargs : misc 58 | Any additional named arguments to ``{full_name}``. 59 | 60 | Returns 61 | ------- 62 | batch : SeismicBatch 63 | Transformed batch. Changes ``dst`` component. 64 | """ 65 | TEMPLATE_DOCSTRING = dedent(TEMPLATE_DOCSTRING).strip() 66 | 67 | def apply_to_each_component(method): 68 | """Combine list of src items and list dst items into pairs of src and dst items 69 | and apply the method to each pair. 70 | 71 | Parameters 72 | ---------- 73 | method : callable 74 | Method to be decorated. 75 | 76 | Returns 77 | ------- 78 | decorator : callable 79 | Decorated method. 80 | """ 81 | def decorator(self, *args, src, dst=None, **kwargs): 82 | """Returned decorator.""" 83 | if isinstance(src, str): 84 | src = (src, ) 85 | if dst is None: 86 | dst = src 87 | elif isinstance(dst, str): 88 | dst = (dst, ) 89 | 90 | res = [] 91 | for isrc, idst in zip(src, dst): 92 | res.append(method(self, *args, src=isrc, dst=idst, **kwargs)) 93 | return self if isinstance(res[0], SeismicBatch) else res 94 | return decorator 95 | 96 | def add_actions(actions_dict, template_docstring): 97 | """Add new actions in ``SeismicBatch`` by setting ``func`` argument in 98 | ``SeismicBatch.apply_to_each_trace`` method to given callables. 99 | 100 | Parameters 101 | ---------- 102 | actions_dict : dict 103 | A dictionary, containing new methods' names as keys and a callable, 104 | its full name and description for each method as values. 105 | template_docstring : str 106 | A string, that will be formatted for each new method from 107 | ``actions_dict`` using ``full_name`` and ``description`` parameters 108 | and assigned to its ``__doc__`` attribute. 109 | 110 | Returns 111 | ------- 112 | decorator : callable 113 | Class decorator. 114 | """ 115 | def decorator(cls): 116 | """Returned decorator.""" 117 | for method_name, (func, full_name, description) in actions_dict.items(): 118 | docstring = template_docstring.format(full_name=full_name, description=description) 119 | method = partialmethod(cls.apply_along_axis, func) 120 | method.__doc__ = docstring 121 | setattr(cls, method_name, method) 122 | 123 | return cls 124 | return decorator 125 | 126 | @add_actions(ACTIONS_DICT, TEMPLATE_DOCSTRING) # pylint: disable=too-many-public-methods,too-many-instance-attributes 127 | class SeismicBatch(Batch): 128 | """Batch class for seimsic data. Contains seismic traces, metadata and processing methods. 129 | 130 | Parameters 131 | ---------- 132 | index : TraceIndex 133 | Unique identifiers for sets of seismic traces. 134 | preloaded : tuple, optional 135 | Data to put in the batch if given. Defaults to ``None``. 136 | 137 | Attributes 138 | ---------- 139 | index : TraceIndex 140 | Unique identifiers for sets of seismic traces. 141 | meta : dict 142 | Metadata about batch components. 143 | components : tuple 144 | Array containing all component's name. Updated only by ``_init_component`` function 145 | if new component comes from ``dst`` or by ``load`` function. 146 | 147 | Note 148 | ---- 149 | There are only two ways to add a new components to ``components`` attribute. 150 | 1. Using parameter ``components`` in ``load``. 151 | 2. Using parameter ``dst`` with init function named ``_init_component``. 152 | """ 153 | def __init__(self, index, *args, preloaded=None, **kwargs): 154 | super().__init__(index, *args, preloaded=preloaded, **kwargs) 155 | if preloaded is None: 156 | self.meta = dict() 157 | 158 | def _init_component(self, *args, dst, **kwargs): 159 | """Create and preallocate a new attribute with the name ``dst`` if it 160 | does not exist and return batch indices.""" 161 | _ = args, kwargs 162 | dst = (dst, ) if isinstance(dst, str) else dst 163 | 164 | for comp in dst: 165 | self.meta[comp] = self.meta[comp] if comp in self.meta else dict() 166 | 167 | if self.components is None or comp not in self.components: 168 | self.add_components(comp, init=self.array_of_nones) 169 | 170 | return self.indices 171 | 172 | def _post_filter_by_mask(self, mask, *args, **kwargs): 173 | """Component filtration using the union of all the received masks. 174 | 175 | Parameters 176 | ---------- 177 | mask : list 178 | List of masks if ``src`` is ``str`` 179 | or list of lists if ``src`` is list. 180 | 181 | Returns 182 | ------- 183 | : SeismicBatch 184 | New batch class of filtered components. 185 | 186 | Note 187 | ---- 188 | All components will be changed with given mask and during the proccess, 189 | new SeismicBatch instance will be created. 190 | """ 191 | if any_action_failed(mask): 192 | all_errors = [error for error in mask if isinstance(error, Exception)] 193 | print(all_errors) 194 | raise ValueError(all_errors) 195 | 196 | _ = args 197 | src = kwargs.get('src', None) 198 | src = (src, ) if isinstance(src, str) else src 199 | 200 | mask = np.concatenate((np.array(mask))) 201 | new_idf = self.index.get_df(index=np.hstack((mask)), reset=False) 202 | new_index = new_idf.index.unique() 203 | 204 | batch_index = type(self.index).from_index(index=new_index, idf=new_idf, 205 | index_name=self.index.name) 206 | 207 | batch = type(self)(batch_index) 208 | batch.add_components(self.components) 209 | batch.meta = self.meta 210 | 211 | for comp in batch.components: 212 | setattr(batch, comp, np.array([None] * len(batch.index))) 213 | 214 | for i, index in enumerate(new_index): 215 | for isrc in batch.components: 216 | pos = self.get_pos(None, isrc, index) 217 | new_data = getattr(self, isrc)[pos][mask[pos]] 218 | getattr(batch, isrc)[i] = new_data 219 | return batch 220 | 221 | def trace_headers(self, header, flatten=False): 222 | """Get trace heades. 223 | 224 | Parameters 225 | ---------- 226 | header : string 227 | Header name. 228 | flatten : bool 229 | If False, array of headers will be splitted according to batch item sizes. 230 | If True, return a flattened array. Dafault to False. 231 | 232 | Returns 233 | ------- 234 | arr : ndarray 235 | Arrays of trace headers.""" 236 | tracecounts = self.index.tracecounts 237 | values = self.index.get_df()[header].values 238 | if flatten: 239 | return values 240 | 241 | return np.array(np.split(values, np.cumsum(tracecounts)[:-1]) + [None])[:-1] 242 | 243 | @action 244 | @inbatch_parallel(init="_init_component", target="threads") 245 | @apply_to_each_component 246 | def apply_along_axis(self, index, func, *args, src, dst=None, slice_axis=0, **kwargs): 247 | """Apply function along specified axis of batch items. 248 | 249 | Parameters 250 | ---------- 251 | func : callable 252 | A function to apply. Must accept a trace as its first argument. 253 | src : str, array-like 254 | Batch component name to get the data from. 255 | dst : str, array-like 256 | Batch component name to put the result in. 257 | item_axis : int, default: 0 258 | Batch item axis to apply ``func`` along. 259 | slice_axis : int 260 | Axis to iterate data over. 261 | args : misc 262 | Any additional positional arguments to ``func``. 263 | kwargs : misc 264 | Any additional named arguments to ``func``. 265 | 266 | Returns 267 | ------- 268 | batch : SeismicBatch 269 | Transformed batch. Changes ``dst`` component. 270 | """ 271 | i = self.get_pos(None, src, index) 272 | src_data = getattr(self, src)[i] 273 | dst_data = np.array([func(x, *args, **kwargs) for x in np.rollaxis(src_data, slice_axis)]) 274 | getattr(self, dst)[i] = dst_data 275 | 276 | @action 277 | @inbatch_parallel(init="_init_component", target="threads") 278 | @apply_to_each_component 279 | def band_pass_filter(self, index, *args, src, dst=None, lowcut=None, highcut=None, fs=1, order=5): 280 | """Apply a band pass filter. 281 | 282 | Parameters 283 | ---------- 284 | src : str, array-like 285 | The batch components to get the data from. 286 | dst : str, array-like 287 | The batch components to put the result in. 288 | lowcut : real, optional 289 | Lowcut frequency. 290 | highcut : real, optional 291 | Highcut frequency. 292 | order : int 293 | The order of the filter. 294 | fs : real 295 | Sampling rate. 296 | 297 | Returns 298 | ------- 299 | batch : SeismicBatch 300 | Batch with filtered traces. 301 | """ 302 | _ = args 303 | i = self.get_pos(None, src, index) 304 | traces = getattr(self, src)[i] 305 | nyq = 0.5 * fs 306 | if lowcut is None: 307 | b, a = signal.butter(order, highcut / nyq, btype='high') 308 | elif highcut is None: 309 | b, a = signal.butter(order, lowcut / nyq, btype='low') 310 | else: 311 | b, a = signal.butter(order, [lowcut / nyq, highcut / nyq], btype='band') 312 | 313 | getattr(self, dst)[i] = signal.lfilter(b, a, traces) 314 | 315 | @action 316 | @inbatch_parallel(init="_init_component", target="threads") 317 | @apply_to_each_component 318 | def to_2d(self, index, *args, src, dst=None, length_alignment=None, pad_value=0): 319 | """Convert array of 1d arrays to 2d array. 320 | 321 | Parameters 322 | ---------- 323 | src : str, array-like 324 | The batch components to get the data from. 325 | dst : str, array-like 326 | The batch components to put the result in. 327 | length_alignment : str, optional 328 | Defines what to do with arrays of diffetent lengths. 329 | If 'min', cut the end by minimal array length. 330 | If 'max', pad the end to maximal array length. 331 | If None, try to put array to 2d array as is. 332 | 333 | Returns 334 | ------- 335 | batch : SeismicBatch 336 | Batch with items converted to 2d arrays. 337 | """ 338 | _ = args 339 | pos = self.get_pos(None, src, index) 340 | data = getattr(self, src)[pos] 341 | if data is None or len(data) == 0: 342 | return 343 | 344 | try: 345 | data_2d = np.vstack(data) 346 | except ValueError as err: 347 | if length_alignment is None: 348 | raise ValueError(str(err) + '\nTry to set length_alingment to \'max\' or \'min\'') 349 | if length_alignment == 'min': 350 | nsamples = min([len(t) for t in data]) 351 | elif length_alignment == 'max': 352 | nsamples = max([len(t) for t in data]) 353 | else: 354 | raise NotImplementedError('Unknown length_alingment') 355 | shape = (len(data), nsamples) 356 | data_2d = np.full(shape, pad_value) 357 | for i, arr in enumerate(data): 358 | data_2d[i, :len(arr)] = arr[:nsamples] 359 | 360 | getattr(self, dst)[pos] = data_2d 361 | 362 | @action 363 | def dump(self, src, fmt, path, **kwargs): 364 | """Export data to file. 365 | 366 | Parameters 367 | ---------- 368 | src : str 369 | Batch component to dump data from. 370 | fmt : str 371 | Output data format. 372 | 373 | Returns 374 | ------- 375 | batch : SeismicBatch 376 | Unchanged batch. 377 | """ 378 | if fmt.lower() in ['sgy', 'segy']: 379 | return self._dump_segy(src, path, **kwargs) 380 | if fmt == 'picks': 381 | return self._dump_picking(src, path, **kwargs) 382 | raise NotImplementedError('Unknown format.') 383 | 384 | @action 385 | def _dump_segy(self, src, path, split=True): 386 | """Dump data to segy files. 387 | 388 | Parameters 389 | ---------- 390 | path : str 391 | Path for output files. 392 | src : str 393 | Batch component to dump data from. 394 | split : bool 395 | Whether to dump batch items into separate files. 396 | 397 | Returns 398 | ------- 399 | batch : SeismicBatch 400 | Unchanged batch. 401 | """ 402 | if split: 403 | return self._dump_split_segy(src, path) 404 | 405 | return self._dump_single_segy(src, path) 406 | 407 | @inbatch_parallel(init="indices", target="threads") 408 | def _dump_split_segy(self, index, src, path): 409 | """Dump data to segy files.""" 410 | pos = self.get_pos(None, src, index) 411 | data = np.atleast_2d(getattr(self, src)[pos]) 412 | 413 | path = os.path.join(path, str(index) + '.sgy') 414 | 415 | df = self.index.get_df([index], reset=False) 416 | sort_by = self.meta[src]['sorting'] 417 | if sort_by is not None: 418 | df = df.sort_values(by=sort_by) 419 | 420 | df.reset_index(drop=self.index.name is None, inplace=True) 421 | headers = list(set(df.columns.levels[0]) - set(FILE_DEPENDEND_COLUMNS)) 422 | segy_headers = [h for h in headers if hasattr(segyio.TraceField, h)] 423 | df = df[segy_headers] 424 | df.columns = df.columns.droplevel(1) 425 | 426 | write_segy_file(data, df, self.meta[src]['samples'], path) 427 | 428 | return self 429 | 430 | def _dump_single_segy(self, src, path): 431 | """Dump data to segy file.""" 432 | data = np.vstack(getattr(self, src)) 433 | 434 | df = self.index.get_df(reset=False) 435 | sort_by = self.meta[src]['sorting'] 436 | if sort_by is not None: 437 | df = df.sort_values(by=sort_by) 438 | 439 | df = df.loc[self.indices] 440 | df.reset_index(drop=self.index.name is None, inplace=True) 441 | headers = list(set(df.columns.levels[0]) - set(FILE_DEPENDEND_COLUMNS)) 442 | segy_headers = [h for h in headers if hasattr(segyio.TraceField, h)] 443 | df = df[segy_headers] 444 | df.columns = df.columns.droplevel(1) 445 | 446 | write_segy_file(data, df, self.meta[src]['samples'], path) 447 | 448 | return self 449 | 450 | @action 451 | def _dump_picking(self, src, path, traces, to_samples, columns=None): 452 | """Dump picking to file. 453 | 454 | Parameters 455 | ---------- 456 | src : str 457 | Source to get picking from. 458 | path : str 459 | Output file path. 460 | traces : str 461 | Batch component with corresponding traces. 462 | to_samples : bool 463 | Should be picks converted to time samples. 464 | columns: array_like, optional 465 | Columns to include in the output file. See PICKS_FILE_HEADERS for default format. 466 | 467 | Returns 468 | ------- 469 | batch : SeismicBatch 470 | Batch unchanged. 471 | """ 472 | data = getattr(self, src).astype(int) 473 | if to_samples: 474 | data = self.meta[traces]['samples'][data] 475 | 476 | if columns is None: 477 | columns = PICKS_FILE_HEADERS 478 | 479 | df = self.index.get_df(reset=False) 480 | sort_by = self.meta[traces]['sorting'] 481 | if sort_by is not None: 482 | df = df.sort_values(by=sort_by) 483 | 484 | df = df.loc[self.indices] 485 | df['timeOffset'] = data.astype(int) 486 | df = df.reset_index(drop=self.index.name is None)[columns] 487 | df.columns = df.columns.droplevel(1) 488 | 489 | for i in [0, 2, 4]: 490 | df.insert(i, str(i), "") 491 | df.to_csv(path, index=False, sep='\t', header=False, encoding='ascii', mode='a') 492 | return self 493 | 494 | @action 495 | def load(self, src=None, fmt=None, components=None, **kwargs): 496 | """Load data into components. 497 | 498 | Parameters 499 | ---------- 500 | src : misc, optional 501 | Source to load components from. 502 | fmt : str, optional 503 | Source format. 504 | components : str or array-like, optional 505 | Components to load. 506 | **kwargs: dict 507 | Any kwargs to be passed to load method. 508 | 509 | Returns 510 | ------- 511 | batch : SeismicBatch 512 | Batch with loaded components. 513 | """ 514 | if fmt.lower() in ['sgy', 'segy']: 515 | return self._load_segy(src=components, dst=components, **kwargs) 516 | if fmt == 'picks': 517 | return self._load_picking(components=components) 518 | 519 | return super().load(src=src, fmt=fmt, components=components, **kwargs) 520 | 521 | def _load_picking(self, components): 522 | """Load picking from file.""" 523 | idf = self.index.get_df(reset=False) 524 | res = np.split(idf.FIRST_BREAK_TIME.values, 525 | np.cumsum(self.index.tracecounts))[:-1] 526 | self.add_components(components, init=res) 527 | return self 528 | 529 | @apply_to_each_component 530 | def _load_segy(self, src, dst, tslice=None): 531 | """Load data from segy files. 532 | 533 | Parameters 534 | ---------- 535 | src : str, array-like 536 | Component to load. 537 | dst : str, array-like 538 | The batch component to put loaded data in. 539 | tslice: slice, optional 540 | Load a trace subset given by slice. 541 | 542 | Returns 543 | ------- 544 | batch : SeismicBatch 545 | Batch with loaded components. 546 | """ 547 | segy_index = SegyFilesIndex(self.index, name=src) 548 | sdf = segy_index.get_df() 549 | sdf['order'] = np.arange(len(sdf)) 550 | order = self.index.get_df().merge(sdf)['order'] 551 | 552 | batch = type(self)(segy_index)._load_from_segy_file(src=src, dst=dst, tslice=tslice) # pylint: disable=protected-access 553 | all_traces = np.concatenate(getattr(batch, dst))[order] 554 | self.meta[dst] = batch.meta[dst] 555 | 556 | if self.index.name is None: 557 | res = np.array(list(np.expand_dims(all_traces, 1)) + [None])[:-1] 558 | else: 559 | lens = self.index.tracecounts 560 | res = np.array(np.split(all_traces, np.cumsum(lens)[:-1]) + [None])[:-1] 561 | 562 | self.add_components(dst, init=res) 563 | 564 | return self 565 | 566 | @inbatch_parallel(init="_init_component", target="threads") 567 | def _load_from_segy_file(self, index, *args, src, dst, tslice=None): 568 | """Load from a single segy file.""" 569 | _ = src, args 570 | pos = self.get_pos(None, "indices", index) 571 | path = index 572 | trace_seq = self.index.get_df([index])[('TRACE_SEQUENCE_FILE', src)] 573 | if tslice is None: 574 | tslice = slice(None) 575 | 576 | with segyio.open(path, strict=False) as segyfile: 577 | traces = np.atleast_2d([segyfile.trace[i - 1][tslice] for i in 578 | np.atleast_1d(trace_seq).astype(int)]) 579 | samples = segyfile.samples[tslice] 580 | interval = segyfile.bin[segyio.BinField.Interval] 581 | 582 | getattr(self, dst)[pos] = traces 583 | if index == self.indices[0]: 584 | self.meta[dst]['samples'] = samples 585 | self.meta[dst]['interval'] = interval 586 | self.meta[dst]['sorting'] = None 587 | 588 | return self 589 | 590 | @action 591 | @inbatch_parallel(init="_init_component", target="threads") 592 | @apply_to_each_component 593 | def slice_traces(self, index, *args, src, slice_obj, dst=None): 594 | """ 595 | Slice traces. 596 | 597 | Parameters 598 | ---------- 599 | src : str, array-like 600 | The batch components to get the data from. 601 | dst : str, array-like 602 | The batch components to put the result in. 603 | slice_obj : slice 604 | Slice to extract from traces. 605 | 606 | Returns 607 | ------- 608 | batch : SeismicBatch 609 | Batch with sliced traces. 610 | """ 611 | _ = args 612 | pos = self.get_pos(None, src, index) 613 | data = getattr(self, src)[pos] 614 | getattr(self, dst)[pos] = data[:, slice_obj] 615 | return self 616 | 617 | @action 618 | @inbatch_parallel(init="_init_component", target="threads") 619 | @apply_to_each_component 620 | def pad_traces(self, index, *args, src, dst=None, **kwargs): 621 | """ 622 | Pad traces with ```numpy.pad```. 623 | 624 | Parameters 625 | ---------- 626 | src : str, array-like 627 | The batch components to get the data from. 628 | dst : str, array-like 629 | The batch components to put the result in. 630 | kwargs : dict 631 | Named arguments to ```numpy.pad```. 632 | 633 | Returns 634 | ------- 635 | batch : SeismicBatch 636 | Batch with padded traces. 637 | """ 638 | _ = args 639 | pos = self.get_pos(None, src, index) 640 | data = getattr(self, src)[pos] 641 | pad_width = kwargs['pad_width'] 642 | if isinstance(pad_width, int): 643 | pad_width = (pad_width, pad_width) 644 | 645 | kwargs['pad_width'] = [(0, 0)] + [pad_width] + [(0, 0)] * (data.ndim - 2) 646 | getattr(self, dst)[pos] = np.pad(data, **kwargs) 647 | return self 648 | 649 | @action 650 | @inbatch_parallel(init="_init_component", target="threads") 651 | @apply_to_each_component 652 | def sort_traces(self, index, *args, src, sort_by, dst=None): 653 | """Sort traces. 654 | 655 | Parameters 656 | ---------- 657 | src : str, array-like 658 | The batch components to get the data from. 659 | dst : str, array-like 660 | The batch components to put the result in. 661 | sort_by: str 662 | Sorting key. 663 | 664 | Returns 665 | ------- 666 | batch : SeismicBatch 667 | Batch with new trace sorting. 668 | """ 669 | _ = args 670 | pos = self.get_pos(None, src, index) 671 | df = self.index.get_df([index]) 672 | order = np.argsort(df[sort_by].tolist()) 673 | getattr(self, dst)[pos] = getattr(self, src)[pos][order] 674 | if pos == 0: 675 | self.meta[dst]['sorting'] = sort_by 676 | 677 | return self 678 | 679 | @action 680 | @inbatch_parallel(init="indices", post='_post_filter_by_mask', target="threads") 681 | @apply_to_each_component 682 | def drop_zero_traces(self, index, src, num_zero, **kwargs): 683 | """Drop traces with sequence of zeros longer than ```num_zero```. 684 | 685 | Parameters 686 | ---------- 687 | num_zero : int 688 | Size of the sequence of zeros. 689 | src : str, array-like 690 | The batch components to get the data from. 691 | 692 | Returns 693 | ------- 694 | : SeismicBatch 695 | Batch without dropped traces. 696 | """ 697 | _ = kwargs 698 | pos = self.get_pos(None, src, index) 699 | traces = getattr(self, src)[pos] 700 | mask = list() 701 | for _, trace in enumerate(traces != 0): 702 | diff_zeros = np.diff(np.append(np.where(trace)[0], len(trace))) 703 | mask.append(False if len(diff_zeros) == 0 else np.max(diff_zeros) < num_zero) 704 | return mask 705 | 706 | @action 707 | @inbatch_parallel(init='_init_component') 708 | def hodograph_straightening(self, index, speed, src=None, dst=None, num_mean_tr=4, sample_time=None): 709 | r""" Straightening up the travel time curve with normal grading. Shift for each 710 | time value calculated by following way: 711 | 712 | $$\vartriangle t = t(0) \left(\left( 1 + \left( \frac{x}{V(t) t(0)}\right)\right)^{1/2} - 1\right)$$ 713 | 714 | New amplitude value for t(0) is the mean value of ```num_mean_tr```'s adjacent 715 | amplitudes from $t(0) + \vartriangle t$. 716 | 717 | Parameters 718 | ---------- 719 | speed : array or array of arrays 720 | Speed law for traces. 721 | src : str, array-like 722 | The batch components to get the data from. 723 | dst : str, array-like 724 | The batch components to put the result in. 725 | num_mean_tr : int ,optional default 4 726 | Number of timestamps to meaning new amplitude value. 727 | sample_time : int, float, optional 728 | Difference between real time and samples. Note that ```sample_time``` is measured in milliseconds. 729 | 730 | Returns 731 | ------- 732 | : SeismicBatch 733 | Traces straightened on the basis of speed and time values. 734 | 735 | Note 736 | ---- 737 | 1. Works only with sorted traces by offset. 738 | 2. Works properly only with FieldIndex with CDP index. 739 | 740 | Raises 741 | ------ 742 | ValueError : Raise if traces is not sorted by offset. 743 | """ 744 | dst = src if dst is None else dst 745 | pos = self.get_pos(None, src, index) 746 | field = getattr(self, src)[pos] 747 | 748 | offset = np.sort(self.index.get_df(index=index)['offset']) 749 | speed_conc = np.array(speed[:field.shape[1]]) 750 | 751 | if self.meta[src]['sorting'] != 'offset': 752 | raise ValueError('All traces should be sorted by offset not {}'.format(self.meta[src]['sorting'])) 753 | if 'samples' in self.meta[src].keys(): 754 | sample_time = np.diff(self.meta[src]['samples'][:2])[0] 755 | elif sample_time is None: 756 | raise ValueError('Sample time should be specified or by self.meta[src] or by sample_time.') 757 | 758 | if len(speed_conc) != field.shape[1]: 759 | raise ValueError('Speed must have shape equal to trace length, not {} but {}'.format(speed_conc.shape[0], 760 | field.shape[1])) 761 | t_zero = (np.arange(1, field.shape[1]+1)*sample_time)/1000 762 | time_range = np.arange(0, field.shape[1]) 763 | new_field = [] 764 | calc_delta = lambda t_z, spd, ofst: t_z*((1 + (ofst/(spd*t_z+1e-6))**2)**.5 - 1) 765 | 766 | for ix, off in enumerate(offset): 767 | time_x = calc_delta(t_zero, speed_conc, off) 768 | shift = np.round((time_x*1000)/sample_time).astype(int) 769 | down_ix = time_range + shift 770 | 771 | left = -int(num_mean_tr/2) + (~num_mean_tr % 2) 772 | right = left + num_mean_tr 773 | mean_traces = np.arange(left, right).reshape(-1, 1) 774 | 775 | ix_to_mean = np.zeros((num_mean_tr, *down_ix.shape)) + [down_ix]*num_mean_tr + mean_traces 776 | ix_to_mean = np.clip(ix_to_mean, 0, time_range[-1]).astype(int) 777 | 778 | new_field.append(np.mean(field[ix][ix_to_mean], axis=0)) 779 | 780 | getattr(self, dst)[pos] = np.array(new_field) 781 | return self 782 | 783 | @action 784 | def correct_spherical_divergence(self, src, dst, speed, params, time=None): 785 | """Correction of spherical divergence with given parameers or with optimal parameters. 786 | 787 | There are two ways to use this funcion. The simplest way is to determine parameters then 788 | correction will be made with given parameters. Another approach is to find the parameters 789 | by ```find_sdc_params``` function from `SeismicDataset` class. In this case, optimal 790 | parameters can be stored in in dataset's attribute or pipeline variable and then passed 791 | to this action as `params` argument. 792 | 793 | Parameters 794 | ---------- 795 | src : str 796 | The batch components to get the data from. 797 | dst : str 798 | The batch components to put the result in. 799 | speed : array 800 | Wave propagation speed depending on the depth. 801 | Speed is measured in milliseconds. 802 | params : array of floats(or ints) with length 2 803 | Containter with parameters in the following order: [v_pow, t_pow]. 804 | time : array, optional 805 | Trace time values. If `None` defaults to self.meta[src]['samples']. 806 | Time measured in either in samples or in milliseconds. 807 | 808 | Returns 809 | ------- 810 | : SeismicBatch 811 | Batch of shot gathers with corrected spherical divergence. 812 | 813 | Note 814 | ---- 815 | Works properly only with FieldIndex. 816 | 817 | Raises 818 | ------ 819 | ValueError : If Index is not FieldIndex. 820 | ValueError : If length of ```params``` not equal to 2. 821 | """ 822 | if not isinstance(self.index, FieldIndex): 823 | raise ValueError("Index must be FieldIndex, not {}".format(type(self.index))) 824 | 825 | if len(params) != 2: 826 | raise ValueError("The length of the ```params``` must be equal to two, not {}.".format(len(params))) 827 | 828 | time = self.meta[src]['samples'] if time is None else np.array(time, dtype=int) 829 | step = np.diff(time[:2])[0].astype(int) 830 | speed = np.array(speed, dtype=int)[::step] 831 | v_pow, t_pow = params 832 | 833 | self._correct_sph_div(src=src, dst=dst, time=time, speed=speed, v_pow=v_pow, t_pow=t_pow) 834 | return self 835 | 836 | @inbatch_parallel(init='_init_component') 837 | def _correct_sph_div(self, index, src, dst, time, speed, v_pow, t_pow): 838 | """Correct spherical divergence with given parameters. """ 839 | pos = self.get_pos(None, src, index) 840 | field = getattr(self, src)[pos] 841 | 842 | correct_field = calculate_sdc_for_field(field, time, speed, v_pow=v_pow, t_pow=t_pow) 843 | 844 | getattr(self, dst)[pos] = correct_field 845 | return self 846 | 847 | def items_viewer(self, src, scroll_step=1, **kwargs): 848 | """Scroll and view batch items. Emaple of use: 849 | ``` 850 | %matplotlib notebook 851 | 852 | fig, tracker = batch.items_viewer('raw', vmin=-cv, vmax=cv, cmap='gray') 853 | fig.canvas.mpl_connect('scroll_event', tracker.onscroll) 854 | plt.show() 855 | ``` 856 | 857 | Parameters 858 | ---------- 859 | src : str 860 | The batch component with data to show. 861 | scroll_step : int, default: 1 862 | Number of batch items scrolled at one time. 863 | kwargs: dict 864 | Additional keyword arguments for plt. 865 | 866 | Returns 867 | ------- 868 | fig, tracker 869 | """ 870 | fig, ax = plt.subplots(1, 1) 871 | tracker = IndexTracker(ax, getattr(self, src), self.indices, 872 | scroll_step=scroll_step, **kwargs) 873 | return fig, tracker 874 | 875 | def seismic_plot(self, src, index, wiggle=False, xlim=None, ylim=None, std=1, # pylint: disable=too-many-branches, too-many-arguments 876 | src_picking=None, s=None, scatter_color=None, figsize=None, 877 | save_to=None, dpi=None, line_color=None, title=None, **kwargs): 878 | """Plot seismic traces. 879 | 880 | Parameters 881 | ---------- 882 | src : str or array of str 883 | The batch component(s) with data to show. 884 | index : same type as batch.indices 885 | Data index to show. 886 | wiggle : bool, default to False 887 | Show traces in a wiggle form. 888 | xlim : tuple, optionalgit 889 | Range in x-axis to show. 890 | ylim : tuple, optional 891 | Range in y-axis to show. 892 | std : scalar, optional 893 | Amplitude scale for traces in wiggle form. 894 | src_picking : str 895 | Component with picking data. 896 | s : scalar or array_like, shape (n, ), optional 897 | The marker size in points**2. 898 | scatter_color : color, sequence, or sequence of color, optional 899 | The marker color. 900 | figsize : array-like, optional 901 | Output plot size. 902 | save_to : str or None, optional 903 | If not None, save plot to given path. 904 | dpi : int, optional, default: None 905 | The resolution argument for matplotlib.pyplot.savefig. 906 | line_color : color, sequence, or sequence of color, optional, default: None 907 | The trace color. 908 | title : str 909 | Plot title. 910 | kwargs : dict 911 | Additional keyword arguments for plot. 912 | 913 | Returns 914 | ------- 915 | Multi-column subplots. 916 | """ 917 | pos = self.get_pos(None, 'indices', index) 918 | if len(np.atleast_1d(src)) == 1: 919 | src = (src,) 920 | 921 | if src_picking is not None: 922 | rate = self.meta[src[0]]['interval'] / 1e3 923 | picking = getattr(self, src_picking)[pos] / rate 924 | pts_picking = (range(len(picking)), picking) 925 | else: 926 | pts_picking = None 927 | 928 | arrs = [getattr(self, isrc)[pos] for isrc in src] 929 | names = [' '.join([i, str(index)]) for i in src] 930 | seismic_plot(arrs=arrs, wiggle=wiggle, xlim=xlim, ylim=ylim, std=std, 931 | pts=pts_picking, s=s, scatter_color=scatter_color, 932 | figsize=figsize, names=names, save_to=save_to, 933 | dpi=dpi, line_color=line_color, title=title, **kwargs) 934 | return self 935 | 936 | def spectrum_plot(self, src, index, frame, max_freq=None, 937 | figsize=None, save_to=None, **kwargs): 938 | """Plot seismogram(s) and power spectrum of given region in the seismogram(s). 939 | 940 | Parameters 941 | ---------- 942 | src : str or array of str 943 | The batch component(s) with data to show. 944 | index : same type as batch.indices 945 | Data index to show. 946 | frame : tuple 947 | List of slices that frame region of interest. 948 | max_freq : scalar 949 | Upper frequence limit. 950 | figsize : array-like, optional 951 | Output plot size. 952 | save_to : str or None, optional 953 | If not None, save plot to given path. 954 | kwargs : dict 955 | Named argumets to matplotlib.pyplot.imshow. 956 | 957 | Returns 958 | ------- 959 | Plot of seismogram(s) and power spectrum(s). 960 | """ 961 | pos = self.get_pos(None, 'indices', index) 962 | if len(np.atleast_1d(src)) == 1: 963 | src = (src,) 964 | 965 | arrs = [getattr(self, isrc)[pos] for isrc in src] 966 | names = [' '.join([i, str(index)]) for i in src] 967 | rate = self.meta[src[0]]['interval'] / 1e6 968 | spectrum_plot(arrs=arrs, frame=frame, rate=rate, max_freq=max_freq, 969 | names=names, figsize=figsize, save_to=save_to, **kwargs) 970 | return self 971 | 972 | def gain_plot(self, src, index, window=51, xlim=None, ylim=None, 973 | figsize=None, names=None, **kwargs): 974 | """Gain's graph plots the ratio of the maximum mean value of 975 | the amplitude to the mean value of the amplitude at the moment t. 976 | 977 | Parameters 978 | ---------- 979 | window : int, default 51 980 | Size of smoothing window of the median filter. 981 | xlim : tuple or list with size 2 982 | Bounds for plot's x-axis. 983 | ylim : tuple or list with size 2 984 | Bounds for plot's y-axis. 985 | figsize : array-like, optional 986 | Output plot size. 987 | names : str or array-like, optional 988 | Title names to identify subplots. 989 | 990 | Returns 991 | ------- 992 | Gain's plot. 993 | """ 994 | _ = kwargs 995 | pos = self.get_pos(None, 'indices', index) 996 | src = (src, ) if isinstance(src, str) else src 997 | sample = [getattr(self, source)[pos] for source in src] 998 | gain_plot(sample, window, xlim, ylim, figsize, names, **kwargs) 999 | return self 1000 | 1001 | def statistics_plot(self, src, index, stats, figsize=None, save_to=None, **kwargs): 1002 | """Plot seismogram(s) and various trace statistics. 1003 | 1004 | Parameters 1005 | ---------- 1006 | src : str or array of str 1007 | The batch component(s) with data to show. 1008 | index : same type as batch.indices 1009 | Data index to show. 1010 | stats : str, callable or array-like 1011 | Name of statistics in statistics zoo, custom function to be avaluated or array of stats. 1012 | figsize : array-like, optional 1013 | Output plot size. 1014 | save_to : str or None, optional 1015 | If not None, save plot to given path. 1016 | kwargs : dict 1017 | Named argumets to matplotlib.pyplot.imshow. 1018 | 1019 | Returns 1020 | ------- 1021 | Plot of seismogram(s) and power spectrum(s). 1022 | """ 1023 | pos = self.get_pos(None, 'indices', index) 1024 | if len(np.atleast_1d(src)) == 1: 1025 | src = (src,) 1026 | 1027 | arrs = [getattr(self, isrc)[pos] for isrc in src] 1028 | names = [' '.join([i, str(index)]) for i in src] 1029 | rate = self.meta[src[0]]['interval'] / 1e6 1030 | statistics_plot(arrs=arrs, stats=stats, rate=rate, names=names, figsize=figsize, 1031 | save_to=save_to, **kwargs) 1032 | return self 1033 | 1034 | @action 1035 | def standardize(self, src, dst): 1036 | """Standardize traces to zero mean and unit variance. 1037 | 1038 | Parameters 1039 | ---------- 1040 | src : str 1041 | The batch components to get the data from. 1042 | dst : str 1043 | The batch components to put the result in. 1044 | 1045 | Returns 1046 | ------- 1047 | batch : SeismicBatch 1048 | Batch with the standardized traces. 1049 | """ 1050 | data = np.concatenate(getattr(self, src)) 1051 | std_data = (data - np.mean(data, axis=1, keepdims=True)) / (np.std(data, axis=1, keepdims=True) + 10 ** -6) 1052 | 1053 | traces_in_item = [len(i) for i in getattr(self, src)] 1054 | ind = np.cumsum(traces_in_item)[:-1] 1055 | 1056 | dst_data = np.split(std_data, ind) 1057 | setattr(self, dst, np.array([i for i in dst_data] + [None])[:-1]) 1058 | return self 1059 | 1060 | @action 1061 | def picking_to_mask(self, src, dst, src_traces='raw'): 1062 | """Convert picking time to the mask for TraceIndex. 1063 | 1064 | Parameters 1065 | ---------- 1066 | src : str 1067 | The batch components to get the data from. 1068 | dst : str 1069 | The batch components to put the result in. 1070 | src_traces : str 1071 | The batch components which contains traces. 1072 | 1073 | Returns 1074 | ------- 1075 | batch : SeismicBatch 1076 | Batch with the mask corresponds to the picking. 1077 | """ 1078 | data = np.concatenate(getattr(self, src)) 1079 | 1080 | samples = self.meta[src_traces]['samples'] 1081 | tick = samples[1] - samples[0] 1082 | data = np.around(data / tick).astype('int') 1083 | 1084 | batch_size = data.shape[0] 1085 | trace_length = getattr(self, src_traces)[0].shape[1] 1086 | ind = tuple(np.array(list(zip(range(batch_size), data))).T) 1087 | ind[1][ind[1] < 0] = 0 1088 | mask = np.zeros((batch_size, trace_length)) 1089 | mask[ind] = 1 1090 | dst_data = np.cumsum(mask, axis=1) 1091 | 1092 | traces_in_item = [len(i) for i in getattr(self, src)] 1093 | ind = np.cumsum(traces_in_item)[:-1] 1094 | 1095 | dst_data = np.split(dst_data, ind) 1096 | dst_data = np.array([np.squeeze(i) for i in dst_data] + [None])[:-1] 1097 | setattr(self, dst, dst_data) 1098 | return self 1099 | 1100 | @action 1101 | def mask_to_pick(self, src, dst, labels=True): 1102 | """Convert the mask to picking time. Piciking time corresponds to the 1103 | begininning of the longest block of consecutive ones in the mask. 1104 | 1105 | Parameters 1106 | ---------- 1107 | src : str 1108 | The batch components to get the data from. 1109 | dst : str 1110 | The batch components to put the result in. 1111 | labels: bool, default: False 1112 | The flag indicates whether action's inputs probabilities or labels. 1113 | 1114 | Returns 1115 | ------- 1116 | batch : SeismicBatch 1117 | Batch with the predicted picking times. 1118 | """ 1119 | data = getattr(self, src) 1120 | if not labels: 1121 | data = np.argmax(data, axis=1) 1122 | 1123 | dst_data = massive_block(data) 1124 | setattr(self, dst, np.array([i for i in dst_data] + [None])[:-1]) 1125 | return self 1126 | 1127 | @action 1128 | def mcm(self, src, dst, eps=3, length_win=12): 1129 | """Creates for each trace corresponding Energy function. 1130 | Based on Coppens(1985) method. 1131 | 1132 | Parameters 1133 | ---------- 1134 | src : str 1135 | The batch components to get the data from. 1136 | dst : str 1137 | The batch components to put the result in. 1138 | eps: float, default: 3 1139 | Stabilization constant that helps reduce the rapid fluctuations of energy function. 1140 | length_win: int, default: 12 1141 | The leading window length. 1142 | 1143 | Returns 1144 | ------- 1145 | batch : SeismicBatch 1146 | Batch with the energy function. 1147 | """ 1148 | trace = np.concatenate(getattr(self, src)) 1149 | energy = np.cumsum(trace**2, axis=1) 1150 | long_win, lead_win = energy, energy 1151 | lead_win[:, length_win:] = lead_win[:, length_win:] - lead_win[:, :-length_win] 1152 | energy = lead_win / (long_win + eps) 1153 | self.add_components(dst, init=np.array([i for i in energy] + [None])[:-1]) 1154 | return self 1155 | 1156 | @action 1157 | def energy_to_picking(self, src, dst): 1158 | """Convert energy function of the trace to the picking time by taking derivative 1159 | and finding maximum. 1160 | 1161 | Parameters 1162 | ---------- 1163 | src : str 1164 | The batch components to get the data from. 1165 | dst : str 1166 | The batch components to put the result in. 1167 | 1168 | Returns 1169 | ------- 1170 | batch : SeismicBatch 1171 | Batch with the predicted picking by MCM method. 1172 | """ 1173 | energy = np.stack(getattr(self, src)) 1174 | energy = np.gradient(energy, axis=1) 1175 | picking = np.argmax(energy, axis=1) 1176 | self.add_components(dst, np.array([i for i in picking] + [None])[:-1]) 1177 | return self 1178 | 1179 | @action 1180 | @inbatch_parallel(init='_init_component') 1181 | def equalize(self, index, src, dst, params, survey_id_col=None): 1182 | """ Equalize amplitudes of different seismic surveys in dataset. 1183 | 1184 | This method performs quantile normalization by shifting and 1185 | scaling data in each batch item so that 95% of absolute values 1186 | seismic surveys that item belongs to lie between 0 and 1. 1187 | 1188 | `params` argument should contain a dictionary in a following form: 1189 | 1190 | {survey_name: 95th_perc, ...}, 1191 | 1192 | where `95_perc` is an estimate for 95th percentile of absolute 1193 | values for seismic survey with `survey_name`. 1194 | 1195 | One way to obtain such a dictionary is to use 1196 | `SeismicDataset.find_equalization_params' method, which calculates 1197 | esimated and saves them to `SeismicDataset`'s attribute. This method 1198 | can be used from pipeline. 1199 | 1200 | Other way is to provide user-defined dictionary for `params` argument. 1201 | 1202 | Parameters 1203 | ---------- 1204 | src : str 1205 | The batch components to get the data from. 1206 | dst : str 1207 | The batch components to put the result in. 1208 | params : dict or NamedExpr 1209 | Containter with parameters for equalization. 1210 | survey_id_col : str, optional 1211 | Column in index that indicate names of seismic 1212 | surveys from different seasons. 1213 | Optional if `params` is a result of `SeismicDataset`'s 1214 | method `find_equalization_params`. 1215 | 1216 | Returns 1217 | ------- 1218 | : SeismicBatch 1219 | Batch of shot gathers with equalized data. 1220 | 1221 | Raises 1222 | ------ 1223 | ValueError : If index is not FieldIndex. 1224 | ValueError : If shot gather with same id is contained in more 1225 | than one survey. 1226 | 1227 | Note 1228 | ---- 1229 | Works properly only with FieldIndex. 1230 | If `params` dict is user-defined, `survey_id_col` should be 1231 | provided excplicitly either as argument, or as `params` dict key-value 1232 | pair. 1233 | """ 1234 | if not isinstance(self.index, FieldIndex): 1235 | raise ValueError("Index must be FieldIndex, not {}".format(type(self.index))) 1236 | 1237 | pos = self.get_pos(None, src, index) 1238 | field = getattr(self, src)[pos] 1239 | 1240 | if survey_id_col is None: 1241 | survey_id_col = params['survey_id_col'] 1242 | 1243 | surveys_by_fieldrecord = np.unique(self.index.get_df(index=index)[survey_id_col]) 1244 | check_unique_fieldrecord_across_surveys(surveys_by_fieldrecord, index) 1245 | survey = surveys_by_fieldrecord[0] 1246 | 1247 | p_95 = params[survey] 1248 | 1249 | # shifting and scaling data so that 5th and 95th percentiles are -1 and 1 respectively 1250 | equalized_field = field / p_95 1251 | 1252 | getattr(self, dst)[pos] = equalized_field 1253 | return self 1254 | -------------------------------------------------------------------------------- /seismicpro/src/seismic_dataset.py: -------------------------------------------------------------------------------- 1 | """File contains seismic dataset.""" 2 | import numpy as np 3 | from scipy.optimize import minimize 4 | from tdigest import TDigest 5 | 6 | from ..batchflow import Dataset 7 | from .seismic_index import FieldIndex 8 | from .seismic_batch import SeismicBatch 9 | from .utils import check_unique_fieldrecord_across_surveys 10 | 11 | 12 | class SeismicDataset(Dataset): 13 | """Dataset for seismic data.""" 14 | 15 | def __init__(self, index, batch_class=SeismicBatch, preloaded=None, *args, **kwargs): 16 | super().__init__(index, batch_class=batch_class, preloaded=preloaded, *args, **kwargs) 17 | 18 | def find_sdc_params(self, component, speed, loss, indices=None, time=None, initial_point=None, 19 | method='Powell', bounds=None, tslice=None, **kwargs): 20 | """ Finding an optimal parameters for correction of spherical divergence. 21 | 22 | Parameters 23 | ---------- 24 | component : str 25 | Component with shot gathers. 26 | speed : array 27 | Wave propagation speed depending on the depth. 28 | Speed is measured in milliseconds. 29 | loss : callable 30 | Function to minimize. 31 | indices : array-like, optonal 32 | Which items from dataset to use in parameter estimation. 33 | If `None`, defaults to first element of dataset. 34 | time : array, optional 35 | Trace time values. If `None` defaults to self.meta[src]['samples']. 36 | Time measured in either in samples or in milliseconds. 37 | initial_point : array of 2 38 | Started values for $v_{pow}$ and $t_{pow}$. 39 | If None defaults to $v_{pow}=2$ and $t_{pow}=1$. 40 | method : str, optional, default ```Powell``` 41 | Minimization method, see ```scipy.optimize.minimize```. 42 | bounds : sequence, optional 43 | Sequence of (min, max) optimization bounds for each parameter. 44 | If `None` defaults to ((0, 5), (0, 5)). 45 | tslice : slice, optional 46 | Lenght of loaded traces. 47 | 48 | Returns 49 | ------- 50 | : array 51 | Coefficients for speed and time. 52 | 53 | Raises 54 | ------ 55 | ValueError : If Index is not FieldIndex. 56 | 57 | Note 58 | ---- 59 | To save parameters as SeismicDataset attribute use ```save_to=D('attr_name')``` (works only 60 | in pipeline). 61 | If you want to save parameters to pipeline variable use save_to argument with following 62 | syntax: ```save_to=V('variable_name')```. 63 | """ 64 | if not isinstance(self.index, FieldIndex): 65 | raise ValueError("Index must be FieldIndex, not {}".format(type(self.index))) 66 | 67 | if indices is None: 68 | indices = self.indices[:1] 69 | 70 | batch = self.create_batch(indices).load(components=component, fmt='segy', tslice=tslice) 71 | field = getattr(batch, component)[0] 72 | samples = batch.meta[component]['samples'] 73 | 74 | bounds = ((0, 5), (0, 5)) if bounds is None else bounds 75 | initial_point = (2, 1) if initial_point is None else initial_point 76 | 77 | time = samples if time is None else np.array(time, dtype=int) 78 | step = np.diff(time[:2])[0].astype(int) 79 | speed = np.array(speed, dtype=int)[::step] 80 | args = field, time, speed 81 | 82 | func = minimize(loss, initial_point, args=args, method=method, bounds=bounds, **kwargs) 83 | return func.x 84 | 85 | def find_equalization_params(self, batch, component, survey_id_col, sample_size=10000, 86 | container_name='equal_params', **kwargs): 87 | """ Estimates 95th percentile of absolute values for each seismic survey 88 | in dataset for equalization. 89 | 90 | This method utilizes t-digest structure for batch-wise estimation of rank-based statistics, 91 | namely 95th percentile. 92 | 93 | Parameters 94 | ---------- 95 | batch : SeismicBatch or B() named expression. 96 | Current batch from pipeline. 97 | component : str 98 | Component with shot gathers. 99 | survey_id_col : str 100 | Column in index that indicate names of seismic 101 | surveys from different seasons. 102 | sample_size: int, optional 103 | Number of elements to draw from each shot gather to update 104 | estimates if TDigest. Time for each update grows linearly 105 | with `sample_size`. Default is 10000. 106 | container_name: str, optional 107 | Name of the `SeismicDataset` attribute to store a dict 108 | with estimated percentile. Also contains `survey_id_col` 109 | key and corresponding value. 110 | kwargs: misc 111 | Parameters for TDigest objects. 112 | 113 | Raises 114 | ------ 115 | ValueError : If index is not FieldIndex. 116 | ValueError : If shot gather with same id is contained in more 117 | than one survey. 118 | 119 | Note 120 | ---- 121 | Dictoinary with estimated percentile can be obtained from pipeline using `D(container_name)`. 122 | """ 123 | if not isinstance(self.index, FieldIndex): 124 | raise ValueError("Index must be FieldIndex, not {}".format(type(self.index))) 125 | 126 | private_name = '_' + container_name 127 | params = getattr(self, private_name, None) 128 | if params is None: 129 | surveys = np.unique(self.index.get_df()[survey_id_col]) 130 | delta, k = kwargs.pop('delta', 0.01), kwargs.pop('K', 25) 131 | params = dict(zip(surveys, [TDigest(delta, k) for _ in surveys])) 132 | setattr(self, private_name, params) 133 | 134 | for idx in batch.indices: 135 | surveys_by_fieldrecord = np.unique(batch.index.get_df(index=idx)[survey_id_col]) 136 | check_unique_fieldrecord_across_surveys(surveys_by_fieldrecord, idx) 137 | survey = surveys_by_fieldrecord[0] 138 | 139 | pos = batch.get_pos(None, component, idx) 140 | sample = np.random.choice(getattr(batch, component)[pos].reshape(-1), size=sample_size) 141 | 142 | params[survey].batch_update(np.absolute(sample)) 143 | 144 | statistics = dict([survey, digest.percentile(95)] 145 | for survey, digest in params.items() if digest.n > 0) 146 | statistics['survey_id_col'] = survey_id_col 147 | setattr(self, container_name, statistics) 148 | -------------------------------------------------------------------------------- /seismicpro/src/seismic_index.py: -------------------------------------------------------------------------------- 1 | """Index for SeismicBatch.""" 2 | import numpy as np 3 | import pandas as pd 4 | from sklearn.neighbors import NearestNeighbors 5 | import segyio 6 | 7 | from ..batchflow import DatasetIndex 8 | 9 | from .utils import make_bin_index, build_sps_df, build_segy_df 10 | from .plot_utils import show_2d_heatmap, show_1d_heatmap 11 | 12 | 13 | class TraceIndex(DatasetIndex): 14 | """Index for individual seismic traces. 15 | 16 | Parameters 17 | ---------- 18 | kwargs : dict 19 | Named arguments for ```build_df``` method. 20 | Can be either a set of ```dfr```, ```dfs```, ```dfx``` arguments for 21 | building index from SPS files, or named arguments for ```batchflow.FilesIndex``` 22 | for building index from SEGY files. 23 | 24 | Attributes 25 | ---------- 26 | index_name : str or tuple of str 27 | Name of the DataFrame index. 28 | meta : dict 29 | Metadata about index. 30 | _idf : DataFrame 31 | DataFrame with rows corresponding to seismic traces and columns with metadata about 32 | traces. Set of columns includes FieldRecord, TraceNumber, TRACE_SEQUENCE_FILE, file_id and 33 | a number of extra_headers for index built from SEGY files or FieldRecord, TraceNumber and 34 | extra SPS file columns for index built from SPS files. 35 | """ 36 | def __init__(self, *args, index_name=None, **kwargs): 37 | self.meta = {} 38 | self._idf = pd.DataFrame() 39 | self._idf.index.name = index_name 40 | super().__init__(*args, **kwargs) 41 | 42 | @property 43 | def tracecounts(self): 44 | """Return a number of indexed traces for each index.""" 45 | return self._idf.groupby(self._idf.index, sort=False).size().values 46 | 47 | @property 48 | def name(self): 49 | """Return a number of indexed traces.""" 50 | return self._idf.index.name 51 | 52 | def get_df(self, index=None, reset=True): 53 | """Return index DataFrame. 54 | 55 | Parameters 56 | ---------- 57 | index : array-like, optional 58 | Subset of indices to loc from DataFrame. If None, get all the DataFrame. 59 | reset : bool, default to True 60 | Reset named DataFrame index. 61 | 62 | Returns 63 | ------- 64 | df : DataFrame 65 | Index DataFrame. 66 | """ 67 | if index is None: 68 | df = self._idf 69 | else: 70 | df = self._idf.loc[index] 71 | 72 | if reset: 73 | return df.reset_index(drop=self.name is None) 74 | 75 | return df 76 | 77 | def head(self, *args, **kwargs): 78 | """Return the first n rows of the index DataFrame. 79 | 80 | Parameters 81 | ---------- 82 | args : misc 83 | Positional arguments to ```DataFrame.head```. 84 | kwargs : dict 85 | Named arguments to ```DataFrame.head```. 86 | 87 | Returns 88 | ------- 89 | First n rows of the index DataFrame. 90 | """ 91 | return self._idf.head(*args, **kwargs) 92 | 93 | def tail(self, *args, **kwargs): 94 | """Return the last n rows of the index DataFrame. 95 | 96 | Parameters 97 | ---------- 98 | args : misc 99 | Positional arguments to ```DataFrame.tail```. 100 | kwargs : dict 101 | Named arguments to ```pDataFrame.tail```. 102 | 103 | Returns 104 | ------- 105 | Last n rows of the index DataFrame. 106 | """ 107 | return self._idf.tail(*args, **kwargs) 108 | 109 | def filter(self, columns, cond): 110 | """Filter DataFrame by condition. Only rows that meet the condition will kept. 111 | 112 | Parameters 113 | ---------- 114 | columns : str, tuple or list 115 | Group of columns that should meet the condition. 116 | cond : callable 117 | Condition to be evaluated. 118 | 119 | Returns 120 | ------- 121 | index : type(self) 122 | Filtered index. 123 | """ 124 | df = self.get_df() 125 | if isinstance(df[columns], pd.Series): 126 | df = df.loc[df[columns].apply(cond)] 127 | else: 128 | df = df.loc[df[columns].apply(cond, axis='columns')] 129 | 130 | if self.name is not None: 131 | df.set_index(self.name, inplace=True) 132 | 133 | indices = df.index.unique().sort_values() 134 | return type(self).from_index(index=indices, idf=df, index_name=self.name) 135 | 136 | def duplicated(self, keep='first'): 137 | """Get mask of duplicated ('FieldRecord', 'TraceNumber') pairs. 138 | 139 | Parameters 140 | ---------- 141 | keep : {‘first’, ‘last’, False}, default ‘first’ 142 | ‘first’ : Mark duplicates as True except for the first occurrence. 143 | ‘last’ : Mark duplicates as True except for the last occurrence. 144 | False : Mark all duplicates as True. 145 | 146 | Returns 147 | ------- 148 | mask : Series 149 | Mask of duplicated items. 150 | """ 151 | subset = [('FieldRecord', ''), ('TraceNumber', '')] 152 | return self.get_df().duplicated(subset=subset, keep=keep) 153 | 154 | def drop_duplicates(self, keep='first'): 155 | """Drop duplicated ('FieldRecord', 'TraceNumber') pairs.""" 156 | subset = [('FieldRecord', ''), ('TraceNumber', '')] 157 | df = self.get_df().drop_duplicates(subset=subset, keep=keep) 158 | if self.name is not None: 159 | df.set_index(self.name, inplace=True) 160 | indices = df.index.unique().sort_values() 161 | return type(self).from_index(index=indices, idf=df, index_name=self.name) 162 | 163 | def merge(self, x, **kwargs): 164 | """Merge two DataFrameIndex on common columns. 165 | 166 | Parameters 167 | ---------- 168 | x : DataFrame 169 | DataFrame to merge with. 170 | kwargs : dict 171 | Named arguments to ```DataFrame.merge```. 172 | 173 | Returns 174 | ------- 175 | df : DataFrame 176 | Merged DataFrame 177 | """ 178 | idf = self.get_df() 179 | xdf = x.get_df() 180 | df = idf.merge(xdf, **kwargs) 181 | if self.name is not None: 182 | df.set_index(self.name, inplace=True) 183 | indices = df.index.unique().sort_values() 184 | return type(self).from_index(index=indices, idf=df, index_name=self.name) 185 | 186 | def build_index(self, index=None, idf=None, **kwargs): 187 | """Build index.""" 188 | if index is not None: 189 | if idf is not None: 190 | return self.build_from_index(index, idf) 191 | idf = index.get_df() 192 | if self.name is not None: 193 | idf.set_index(self.name, inplace=True) 194 | 195 | self._idf = idf.sort_index() 196 | return self._idf.index.unique() 197 | 198 | df = self.build_df(**kwargs) 199 | df.reset_index(drop=df.index.name is None, inplace=True) 200 | if self.name is not None: 201 | df.set_index(self.name, inplace=True) 202 | 203 | self._idf = df.sort_index() 204 | return self._idf.index.unique() 205 | 206 | def build_df(self, **kwargs): 207 | """Build DataFrame.""" 208 | if 'dfx' in kwargs.keys(): 209 | return build_sps_df(**kwargs) 210 | 211 | return build_segy_df(**kwargs) 212 | 213 | def build_from_index(self, index, idf): 214 | """Build index from another index for indices given.""" 215 | self._idf = idf.loc[index] 216 | return index 217 | 218 | def create_subset(self, index): 219 | """Return a new Index based on the subset of indices given.""" 220 | return type(self).from_index(index=index, idf=self._idf, index_name=self.name) 221 | 222 | 223 | class SegyFilesIndex(TraceIndex): 224 | """Index for SEGY files. 225 | 226 | Parameters 227 | ---------- 228 | name : str 229 | Name that will be associated with traces of SEGY files. 230 | kwargs : dict 231 | Named arguments for ```batchflow.FilesIndex```. 232 | 233 | Attributes 234 | ---------- 235 | index_name : str or tuple of str 236 | Name of the DataFrame index. 237 | meta : dict 238 | Metadata about index. 239 | _idf : DataFrame 240 | DataFrame with rows corresponding to seismic traces and columns with metadata about 241 | traces. Columns include FieldRecord, TraceNumber, TRACE_SEQUENCE_FILE, file_id and 242 | a number of extra_headers if specified. 243 | """ 244 | def __init__(self, *args, **kwargs): 245 | kwargs['index_name'] = ('file_id', kwargs.get('name')) 246 | super().__init__(*args, **kwargs) 247 | 248 | 249 | class CustomIndex(TraceIndex): 250 | """Index for any SEGY header. 251 | 252 | Parameters 253 | ---------- 254 | name : str 255 | Any segyio.TraceField keyword that will be set as index. 256 | kwargs : dict 257 | Named arguments for ```batchflow.FilesIndex````. 258 | 259 | Attributes 260 | ---------- 261 | index_name : str or tuple of str 262 | Name of the DataFrame index. 263 | meta : dict 264 | Metadata about index. 265 | _idf : DataFrame 266 | DataFrame with rows corresponding to seismic traces and columns with metadata about 267 | traces. Columns include FieldRecord, TraceNumber, TRACE_SEQUENCE_FILE, file_id and 268 | a number of extra_headers if specified. 269 | """ 270 | def __init__(self, *args, **kwargs): 271 | index_name = kwargs['index_name'] 272 | if index_name is not None: 273 | extra_headers = kwargs.get('extra_headers', []) 274 | if extra_headers == 'all': 275 | extra_headers = [h.__str__() for h in segyio.TraceField.enums()] 276 | 277 | kwargs['extra_headers'] = list(set(extra_headers + [index_name])) 278 | super().__init__(*args, **kwargs) 279 | 280 | 281 | class KNNIndex(TraceIndex): 282 | """Index for groups of k nearest located seismic traces. 283 | 284 | Parameters 285 | ---------- 286 | n_neighbors : int 287 | Group size parameter. 288 | kwargs : dict 289 | Named arguments for ```batchflow.FilesIndex````. 290 | 291 | Attributes 292 | ---------- 293 | index_name : str or tuple of str 294 | Name of the DataFrame index. 295 | meta : dict 296 | Metadata about index. 297 | _idf : DataFrame 298 | DataFrame with rows corresponding to seismic traces and columns with metadata about 299 | traces. Columns include FieldRecord, TraceNumber, TRACE_SEQUENCE_FILE, file_id and 300 | a number of extra_headers if specified. 301 | """ 302 | def __init__(self, *args, **kwargs): 303 | kwargs['index_name'] = 'KNN' 304 | super().__init__(*args, **kwargs) 305 | 306 | def build_df(self, n_neighbors, **kwargs): 307 | """Build DataFrame.""" 308 | extra_headers = kwargs.get('extra_headers', []) 309 | if extra_headers == 'all': 310 | extra_headers = [h.__str__() for h in segyio.TraceField.enums()] 311 | 312 | kwargs['extra_headers'] = list(set(extra_headers + ['CDP_X', 'CDP_Y'])) 313 | field_index = FieldIndex(**kwargs) 314 | dfs = [] 315 | for fid in field_index.indices: 316 | df = field_index.get_df([fid]) 317 | data = np.stack([df['CDP_X'], df['CDP_Y']]).T 318 | nbrs = NearestNeighbors(n_neighbors=n_neighbors, algorithm='ball_tree') 319 | _, indices = nbrs.fit(data).kneighbors(data) 320 | if not np.all(indices[:, 0] == np.arange(len(data))): 321 | raise ValueError("Faild to build KNNIndex. Duplicated CDP.") 322 | 323 | dfs.append(df.iloc[np.hstack(indices)]) 324 | df = pd.concat(dfs).reset_index(drop=True) 325 | indices = np.repeat(np.arange(field_index.tracecounts.sum()), n_neighbors) 326 | df['KNN'] = indices 327 | return df 328 | 329 | 330 | class FieldIndex(TraceIndex): 331 | """Index for field records. 332 | 333 | Parameters 334 | ---------- 335 | kwargs : dict 336 | Named arguments for ```build_df```` method. 337 | Can be either a set of ```dfr```, ```dfs```, ```dfx``` arguments for 338 | building index from SPS files, or named arguments for ```batchflow.FilesIndex``` 339 | for building index from SEGY files. 340 | 341 | Attributes 342 | ---------- 343 | index_name : str or tuple of str 344 | Name of the DataFrame index. 345 | meta : dict 346 | Metadata about index. 347 | _idf : DataFrame 348 | DataFrame with rows corresponding to seismic traces and columns with metadata about 349 | traces. Set of columns includes FieldRecord, TraceNumber, TRACE_SEQUENCE_FILE, file_id and 350 | a number of extra_headers for index built from SEGY files or SPS file columns for index 351 | built from SPS files. 352 | """ 353 | def __init__(self, *args, **kwargs): 354 | kwargs['index_name'] = 'FieldRecord' 355 | super().__init__(*args, **kwargs) 356 | 357 | 358 | class BinsIndex(TraceIndex): 359 | """Index for bins of CDP. 360 | 361 | Parameters 362 | ---------- 363 | dfr : DataFrame 364 | SPS R file data. 365 | dfs : DataFrame 366 | SPS S file data. 367 | dfx : DataFrame 368 | SPS X file data. 369 | bin_size : scalar or tuple of scalars 370 | Grid bin size. 371 | origin : array-like 372 | Grid origin coordinates. 373 | phi : scalar or array-like 374 | Grid orientation. 375 | iters : int 376 | Maxiimal number of iterations for grid optimization algorithm. 377 | 378 | Attributes 379 | ---------- 380 | index_name : str or tuple of str 381 | Name of the DataFrame index. 382 | meta : dict 383 | Metadata about index. 384 | _idf : DataFrame 385 | DataFrame with rows corresponding to seismic traces and columns with metadata about 386 | traces. Set of columns includes FieldRecord, TraceNumber and extra SPS file columns. 387 | """ 388 | def __init__(self, *args, **kwargs): 389 | kwargs['index_name'] = 'bin_id' 390 | super().__init__(*args, **kwargs) 391 | 392 | def build_df(self, **kwargs): 393 | """Build DataFrame.""" 394 | df, meta = make_bin_index(**kwargs) 395 | self.meta.update(meta) 396 | return df 397 | 398 | def show_heatmap(self, **kwargs): 399 | """2d histogram of CDP distribution between bins.""" 400 | bin_size = self.meta['bin_size'] 401 | if isinstance(bin_size, (list, tuple, np.ndarray)): 402 | show_2d_heatmap(self._idf, **kwargs) 403 | else: 404 | show_1d_heatmap(self._idf, **kwargs) 405 | -------------------------------------------------------------------------------- /seismicpro/src/utils.py: -------------------------------------------------------------------------------- 1 | """ Seismic batch tools """ 2 | import functools 3 | import numpy as np 4 | import pandas as pd 5 | from sklearn.linear_model import LinearRegression 6 | from scipy.signal import medfilt, hilbert 7 | import segyio 8 | 9 | from ..batchflow import FilesIndex 10 | 11 | DEFAULT_SEGY_HEADERS = ['FieldRecord', 'TraceNumber', 'TRACE_SEQUENCE_FILE'] 12 | FILE_DEPENDEND_COLUMNS = ['TRACE_SEQUENCE_FILE', 'file_id'] 13 | 14 | def partialmethod(func, *frozen_args, **frozen_kwargs): 15 | """Wrap a method with partial application of given positional and keyword 16 | arguments. 17 | 18 | Parameters 19 | ---------- 20 | func : callable 21 | A method to wrap. 22 | frozen_args : misc 23 | Fixed positional arguments. 24 | frozen_kwargs : misc 25 | Fixed keyword arguments. 26 | 27 | Returns 28 | ------- 29 | method : callable 30 | Wrapped method. 31 | """ 32 | @functools.wraps(func) 33 | def method(self, *args, **kwargs): 34 | """Wrapped method.""" 35 | return func(self, *frozen_args, *args, **frozen_kwargs, **kwargs) 36 | return method 37 | 38 | def print_results(df, layout, average_repetitions=False, sort_by=None, ascending=True, n_last=100): 39 | """ Show results given by research dataframe. 40 | 41 | Parameters 42 | ---------- 43 | df : DataFrame 44 | Research's results 45 | layout : str 46 | string where each element consists two parts that splited by /. First part is the type 47 | of calculated value wrote in the "name" column. Second is name of column with the parameters 48 | that will be drawn. 49 | average_repetitions : bool, optional 50 | If True, then a separate values will be written 51 | else one mean value will be written. 52 | sort_by : str or None, optional 53 | If not None, column's name to sort. 54 | ascending : bool, None 55 | Same as in ```pd.sort_value```. 56 | n_last : int, optional 57 | The number of iterations at the end of which the averaging takes place. 58 | 59 | Returns 60 | ------- 61 | : DataFrame 62 | Research results in DataFrame, where indices is a config parameters and colums is `layout` values 63 | """ 64 | columns = [] 65 | data = [] 66 | index = [] 67 | name, attr = layout.split('/') 68 | ndf = df[df['name'] == name] 69 | if average_repetitions: 70 | columns.extend([name + '_mean', name + '_std']) 71 | else: 72 | columns.extend([name + '_' + str(i) for i in [*ndf['repetition'].unique(), 'mean', 'std']]) 73 | for config, cdf in ndf.groupby("config"): 74 | index.append(config) 75 | cdf = cdf.drop(['config', 'name'], axis=1).dropna(axis=1).astype('float') 76 | if average_repetitions: 77 | idf = cdf.groupby('iteration').mean().drop('repetition', axis=1) 78 | max_iter = idf.index.max() 79 | idf = idf[idf.index > max_iter - n_last] 80 | data.append([idf[attr].mean(), idf[attr].std()]) 81 | else: 82 | rep = [] 83 | for _, rdf in cdf.groupby('repetition'): 84 | rdf = rdf.drop('repetition', axis=1) 85 | max_iter = rdf['iteration'].max() 86 | rdf = rdf[rdf['iteration'] > max_iter - n_last] 87 | rep.append(rdf[attr].mean()) 88 | data.append([*rep, np.mean(rep), np.std(rep)]) 89 | 90 | res_df = pd.DataFrame(data=data, index=index, columns=columns) 91 | if sort_by: 92 | res_df.sort_values(by=sort_by, ascending=ascending, inplace=True) 93 | return res_df 94 | 95 | def line_inclination(x, y): 96 | """Get regression line inclination towards x-axis. 97 | 98 | Parameters 99 | ---------- 100 | x : array-like 101 | Data x coordinates. 102 | y : array-like 103 | Data y coordinates. 104 | 105 | Returns 106 | ------- 107 | phi : float 108 | Inclination towards x-axis. The value is within (-pi/2, pi/2) range. 109 | """ 110 | if np.std(y) < np.std(x): 111 | reg = LinearRegression().fit(x.reshape((-1, 1)), y) 112 | return np.arctan(reg.coef_[0]) 113 | reg = LinearRegression().fit(y.reshape((-1, 1)), x) 114 | if reg.coef_[0] < 0.: 115 | return -(np.pi / 2) - np.arctan(reg.coef_[0]) 116 | return (np.pi / 2) - np.arctan(reg.coef_[0]) 117 | 118 | def get_phi(dfr, dfs): 119 | """Get median absolute inclination for R and S lines. 120 | 121 | Parameters 122 | ---------- 123 | dfr : pandas.DataFrame 124 | Data from R file SPS. 125 | dfs : pandas.DataFrame 126 | Data from S file SPS. 127 | 128 | Returns 129 | ------- 130 | phi : float 131 | Median absolute inclination of R and S lines towards x-axis. 132 | The value is within (0, pi/2) range. 133 | """ 134 | incl = [] 135 | for _, group in dfs.groupby('sline'): 136 | x, y = group[['x', 'y']].values.T 137 | incl.append(line_inclination(x, y)) 138 | for _, group in dfr.groupby('rline'): 139 | x, y = group[['x', 'y']].values.T 140 | incl.append(line_inclination(x, y)) 141 | return np.median(np.array(incl) % (np.pi / 2)) 142 | 143 | def random_bins_shift(pts, bin_size, iters=100): 144 | """Monte-Carlo best shift estimation. 145 | 146 | Parameters 147 | ---------- 148 | pts : array-like 149 | Point coordinates. 150 | bin_size : scalar or tuple of scalars 151 | Bin size of 1D or 2D grid. 152 | iters : int 153 | Number of samples. 154 | 155 | Returns 156 | ------- 157 | shift : float or tuple of floats 158 | Optimal grid shift from its default origin that is np.min(pts, axis=0). 159 | """ 160 | t = np.max(pts, axis=0).reshape((-1, 1)) 161 | min_unif = np.inf 162 | best_shift = np.zeros(pts.ndim) 163 | for _ in range(iters): 164 | shift = -bin_size * np.random.random(pts.ndim) 165 | s = bin_size * ((np.min(pts, axis=0) - shift) // bin_size) 166 | bins = [np.arange(a, b + bin_size, bin_size) for a, b in zip(s + shift, t)] 167 | if pts.ndim == 2: 168 | h = np.histogram2d(*pts.T, bins=bins)[0] 169 | elif pts.ndim == 1: 170 | h = np.histogram(pts, bins=bins[0])[0] 171 | else: 172 | raise ValueError("pts should be ndim = 1 or 2.") 173 | 174 | unif = np.std(h[h > 0]) 175 | if unif < min_unif: 176 | min_unif = unif 177 | best_shift = shift 178 | 179 | return best_shift 180 | 181 | def gradient_bins_shift(pts, bin_size, max_iters=10, eps=1e-3): 182 | """Iterative best shift estimation. 183 | 184 | Parameters 185 | ---------- 186 | pts : array-like 187 | Point coordinates. 188 | bin_size : scalar or tuple of scalars 189 | Bin size of 1D or 2D grid. 190 | max_iters : int 191 | Maximal number of iterations. 192 | eps : float 193 | Iterations stop criteria. 194 | 195 | Returns 196 | ------- 197 | shift : float or tuple of floats 198 | Optimal grid shift from its default origin that is np.min(pts, axis=0). 199 | """ 200 | t = np.max(pts, axis=0).reshape((-1, 1)) 201 | shift = np.zeros(pts.ndim) 202 | states = [] 203 | states_std = [] 204 | for _ in range(max_iters): 205 | s = bin_size * ((np.min(pts, axis=0) - shift) // bin_size) 206 | bins = [np.arange(a, b + bin_size, bin_size) for a, b in zip(s + shift, t)] 207 | if pts.ndim == 2: 208 | h = np.histogram2d(*pts.T, bins=bins)[0] 209 | dif = np.diff(h, axis=0) / 2. 210 | vmax = np.vstack([np.max(h[i: i + 2], axis=0) for i in range(h.shape[0] - 1)]) 211 | ratio = dif[vmax > 0] / vmax[vmax > 0] 212 | xshift = bin_size * np.mean(ratio) 213 | dif = np.diff(h, axis=1) / 2. 214 | vmax = np.vstack([np.max(h[:, i: i + 2], axis=1) for i in range(h.shape[1] - 1)]).T 215 | ratio = dif[vmax > 0] / vmax[vmax > 0] 216 | yshift = bin_size * np.mean(ratio) 217 | move = np.array([xshift, yshift]) 218 | elif pts.ndim == 1: 219 | h = np.histogram(pts, bins=bins[0])[0] 220 | dif = np.diff(h) / 2. 221 | vmax = np.hstack([np.max(h[i: i + 2]) for i in range(len(h) - 1)]) 222 | ratio = dif[vmax > 0] / vmax[vmax > 0] 223 | xshift = bin_size * np.mean(ratio) 224 | move = np.array([xshift]) 225 | else: 226 | raise ValueError("pts should be ndim = 1 or 2.") 227 | 228 | states.append(shift.copy()) 229 | states_std.append(np.std(h[h > 0])) 230 | 231 | if np.linalg.norm(move) < bin_size * eps: 232 | break 233 | 234 | shift += move 235 | if states_std: 236 | i = np.argmin(states_std) 237 | return states[i] % bin_size 238 | 239 | return shift 240 | 241 | def rotate_2d(arr, phi): 242 | """Rotate 2D vector counter-clockwise. 243 | 244 | Parameters 245 | ---------- 246 | arr : array-like 247 | Vector coordinates. 248 | phi : radians 249 | Rotation angle. 250 | 251 | Returns 252 | ------- 253 | arr : array-like 254 | Rotated vector. 255 | """ 256 | c, s = np.cos(phi), np.sin(phi) 257 | rotm = np.array([[c, -s], [s, c]]) 258 | return np.dot(rotm, arr.T).T 259 | 260 | def make_1d_bin_index(dfr, dfs, dfx, bin_size, origin=None, phi=None, 261 | opt='gradient', **kwargs): 262 | """Get bins for 1d seismic geometry. 263 | 264 | Parameters 265 | ---------- 266 | dfr : pandas.DataFrame 267 | SPS R file data. 268 | dfs : pandas.DataFrame 269 | SPS S file data. 270 | dfx : pandas.DataFrame 271 | SPS X file data. 272 | bin_size : scalar 273 | Grid bin size. 274 | origin : dict 275 | Grid origin for each line. 276 | phi : dict 277 | Grid orientation for each line. 278 | opt : str 279 | Grid location optimizer. 280 | kwargs : dict 281 | Named argumets for optimizer. 282 | 283 | Returns 284 | ------- 285 | df : pandas.DataFrame 286 | DataFrame with bins indexing. 287 | """ 288 | rids = np.hstack([np.arange(s, e + 1) for s, e in 289 | list(zip(*[dfx['from_receiver'], dfx['to_receiver']]))]) 290 | channels = np.hstack([np.arange(s, e + 1) for s, e in 291 | list(zip(*[dfx['from_channel'], dfx['to_channel']]))]) 292 | n_reps = dfx['to_receiver'] - dfx['from_receiver'] + 1 293 | 294 | dtypes = dfx.dtypes.values 295 | dfx = pd.DataFrame(dfx.values.repeat(n_reps, axis=0), columns=dfx.columns) 296 | for i, c in enumerate(dfx.columns): 297 | dfx[c] = dfx[c].astype(dtypes[i]) 298 | 299 | dfx['rid'] = rids 300 | dfx['trace_number'] = channels 301 | dfm = (dfx 302 | .merge(dfs, on=['sline', 'sid']) 303 | .merge(dfr, on=['rline', 'rid'], suffixes=('_s', '_r'))) 304 | dfm['CDP_X'] = (dfm['x_s'] + dfm['x_r']) / 2. 305 | dfm['CDP_Y'] = (dfm['y_s'] + dfm['y_r']) / 2. 306 | dfm['azimuth'] = np.arctan2(dfm['y_r'] - dfm['y_s'], dfm['x_r'] - dfm['x_s']) 307 | 308 | dfm['x_index'] = None 309 | meta = {} 310 | 311 | for rline, group in dfm.groupby('rline'): 312 | pts = group[['CDP_X', 'CDP_Y']].values 313 | if phi is None: 314 | if np.std(pts[:, 0]) > np.std(pts[:, 1]): 315 | reg = LinearRegression().fit(pts[:, :1], pts[:, 1]) 316 | _phi = np.arctan(reg.coef_)[0] 317 | else: 318 | reg = LinearRegression().fit(pts[:, 1:], pts[:, 0]) 319 | _phi = np.arctan(1. / reg.coef_)[0] 320 | else: 321 | _phi = np.radians(phi[rline]) # pylint: disable=assignment-from-no-return 322 | 323 | pts = rotate_2d(pts, -_phi) 324 | ppx, y = pts[:, 0], np.mean(pts[:, 1]) 325 | 326 | if origin is None: 327 | if opt == 'gradient': 328 | shift = gradient_bins_shift(ppx, bin_size, **kwargs) 329 | elif opt == 'monte-carlo': 330 | shift = random_bins_shift(ppx, bin_size, **kwargs) 331 | else: 332 | raise ValueError('Unknown grid optimizer.') 333 | 334 | s = shift + bin_size * ((np.min(ppx) - shift) // bin_size) 335 | _origin = rotate_2d(np.array([[s, y]]), _phi)[0] 336 | else: 337 | _origin = origin[rline] 338 | s = rotate_2d(_origin.reshape((-1, 2)), -_phi)[0, 0] 339 | 340 | t = np.max(ppx) 341 | bins = np.arange(s, t + bin_size, bin_size) 342 | 343 | index = np.digitize(ppx, bins) 344 | 345 | dfm.loc[dfm['rline'] == rline, 'x_index'] = index 346 | meta.update({rline: dict(origin=_origin, 347 | phi=np.rad2deg(_phi), 348 | bin_size=bin_size)}) 349 | 350 | dfm['bin_id'] = (dfm['rline'].astype(str) + '/' + dfm['x_index'].astype(str)).values 351 | dfm.set_index('bin_id', inplace=True) 352 | 353 | dfm['offset'] = np.sqrt((dfm['x_s'] - dfm['x_r'])**2 + (dfm['y_s'] - dfm['y_r'])**2) / 2. 354 | 355 | dfm.drop(labels=['from_channel', 'to_channel', 356 | 'from_receiver', 'to_receiver', 357 | 'x_index'], axis=1, inplace=True) 358 | dfm.rename(columns={'x_s': 'SourceX', 'y_s': 'SourceY'}, inplace=True) 359 | 360 | return dfm, meta 361 | 362 | def make_2d_bin_index(dfr, dfs, dfx, bin_size, origin=None, phi=None, 363 | opt='gradient', **kwargs): 364 | """Get bins for 2d seismic geometry. 365 | 366 | Parameters 367 | ---------- 368 | dfr : pandas.DataFrame 369 | SPS R file data. 370 | dfs : pandas.DataFrame 371 | SPS S file data. 372 | dfx : pandas.DataFrame 373 | SPS X file data. 374 | bin_size : tuple 375 | Grid bin size. 376 | origin : dict 377 | Grid origin for each line. 378 | phi : dict 379 | Grid orientation for each line. 380 | opt : str 381 | Grid location optimizer. 382 | kwargs : dict 383 | Named argumets for optimizer. 384 | 385 | Returns 386 | ------- 387 | df : pandas.DataFrame 388 | DataFrame with bins indexing. 389 | """ 390 | if bin_size[0] != bin_size[1]: 391 | raise ValueError('Bins are not square') 392 | 393 | bin_size = bin_size[0] 394 | 395 | rids = np.hstack([np.arange(s, e + 1) for s, e in 396 | list(zip(*[dfx['from_receiver'], dfx['to_receiver']]))]) 397 | channels = np.hstack([np.arange(s, e + 1) for s, e in 398 | list(zip(*[dfx['from_channel'], dfx['to_channel']]))]) 399 | n_reps = dfx['to_receiver'] - dfx['from_receiver'] + 1 400 | 401 | dtypes = dfx.dtypes.values 402 | dfx = pd.DataFrame(dfx.values.repeat(n_reps, axis=0), columns=dfx.columns) 403 | for i, c in enumerate(dfx.columns): 404 | dfx[c] = dfx[c].astype(dtypes[i]) 405 | 406 | dfx['rid'] = rids 407 | dfx['TraceNumber'] = channels 408 | dfm = (dfx 409 | .merge(dfs, on=['sline', 'sid']) 410 | .merge(dfr, on=['rline', 'rid'], suffixes=('_s', '_r'))) 411 | dfm['CDP_X'] = (dfm['x_s'] + dfm['x_r']) / 2. 412 | dfm['CDP_Y'] = (dfm['y_s'] + dfm['y_r']) / 2. 413 | dfm['azimuth'] = np.arctan2(dfm['y_r'] - dfm['y_s'], dfm['x_r'] - dfm['x_s']) 414 | 415 | if phi is None: 416 | phi = get_phi(dfr, dfs) 417 | else: 418 | phi = np.radians(phi) # pylint: disable=assignment-from-no-return 419 | 420 | if phi > 0: 421 | phi += -np.pi / 2 422 | 423 | pts = rotate_2d(dfm[['CDP_X', 'CDP_Y']].values, -phi) # pylint: disable=invalid-unary-operand-type 424 | 425 | if origin is None: 426 | if opt == 'gradient': 427 | shift = gradient_bins_shift(pts, bin_size, **kwargs) 428 | elif opt == 'monte-carlo': 429 | shift = random_bins_shift(pts, bin_size, **kwargs) 430 | else: 431 | raise ValueError('Unknown grid optimizer.') 432 | 433 | s = shift + bin_size * ((np.min(pts, axis=0) - shift) // bin_size) 434 | origin = rotate_2d(s.reshape((1, 2)), phi)[0] 435 | else: 436 | s = rotate_2d(origin.reshape((1, 2)), -phi)[0] # pylint: disable=invalid-unary-operand-type 437 | 438 | t = np.max(pts, axis=0) 439 | xbins, ybins = np.array([np.arange(a, b + bin_size, bin_size) for a, b in zip(s, t)]) 440 | 441 | x_index = np.digitize(pts[:, 0], xbins) 442 | y_index = np.digitize(pts[:, 1], ybins) 443 | 444 | dfm['bin_id'] = np.array([ix + '/' + iy for ix, iy in zip(x_index.astype(str), y_index.astype(str))]) 445 | dfm.set_index('bin_id', inplace=True) 446 | 447 | dfm['offset'] = np.sqrt((dfm['x_s'] - dfm['x_r'])**2 + (dfm['y_s'] - dfm['y_r'])**2) / 2. 448 | 449 | dfm = dfm.drop(labels=['from_channel', 'to_channel', 450 | 'from_receiver', 'to_receiver'], axis=1) 451 | dfm.rename(columns={'x_s': 'SourceX', 'y_s': 'SourceY'}, inplace=True) 452 | meta = dict(origin=origin, phi=np.rad2deg(phi), bin_size=(bin_size, bin_size)) 453 | return dfm, meta 454 | 455 | def make_bin_index(dfr, dfs, dfx, bin_size, origin=None, phi=None, **kwargs): 456 | """Get bins for seismic geometry. 457 | 458 | Parameters 459 | ---------- 460 | dfr : pandas.DataFrame 461 | SPS R file data. 462 | dfs : pandas.DataFrame 463 | SPS S file data. 464 | dfx : pandas.DataFrame 465 | SPS X file data. 466 | bin_size : scalar or tuple of scalars 467 | Grid bin size. 468 | origin : dict 469 | Grid origin for each line. 470 | phi : dict 471 | Grid orientation for each line. 472 | opt : str 473 | Grid location optimizer. 474 | kwargs : dict 475 | Named argumets for optimizer. 476 | 477 | Returns 478 | ------- 479 | df : pandas.DataFrame 480 | DataFrame with bins indexing. 481 | """ 482 | if isinstance(bin_size, (list, tuple, np.ndarray)): 483 | df, meta = make_2d_bin_index(dfr, dfs, dfx, bin_size, origin, phi, **kwargs) 484 | else: 485 | df, meta = make_1d_bin_index(dfr, dfs, dfx, bin_size, origin, phi, **kwargs) 486 | 487 | df.columns = pd.MultiIndex.from_arrays([df.columns, [''] * len(df.columns)]) 488 | return df, meta 489 | 490 | def build_sps_df(dfr, dfs, dfx): 491 | """Index traces according to SPS data. 492 | 493 | Parameters 494 | ---------- 495 | dfr : pandas.DataFrame 496 | SPS R file data. 497 | dfs : pandas.DataFrame 498 | SPS S file data. 499 | dfx : pandas.DataFrame 500 | SPS X file data. 501 | 502 | Returns 503 | ------- 504 | df : pandas.DataFrame 505 | DataFrame with trace indexing. 506 | """ 507 | rids = np.hstack([np.arange(s, e + 1) for s, e in 508 | zip(*[dfx['from_receiver'], dfx['to_receiver']])]) 509 | channels = np.hstack([np.arange(s, e + 1) for s, e in 510 | zip(*[dfx['from_channel'], dfx['to_channel']])]) 511 | n_reps = dfx['to_receiver'] - dfx['from_receiver'] + 1 512 | 513 | dfx.drop(labels=['from_channel', 'to_channel', 'from_receiver', 'to_receiver'], 514 | axis=1, inplace=True) 515 | 516 | dtypes = dfx.dtypes.values 517 | dfx = pd.DataFrame(dfx.values.repeat(n_reps, axis=0), columns=dfx.columns) 518 | for i, c in enumerate(dfx.columns): 519 | dfx[c] = dfx[c].astype(dtypes[i]) 520 | 521 | dfx['rid'] = rids 522 | dfx['TraceNumber'] = channels 523 | dfm = (dfx 524 | .merge(dfs, on=['sline', 'sid']) 525 | .merge(dfr, on=['rline', 'rid'], suffixes=('_s', '_r'))) 526 | dfm['CDP_X'] = (dfm['x_s'] + dfm['x_r']) / 2. 527 | dfm['CDP_Y'] = (dfm['y_s'] + dfm['y_r']) / 2. 528 | dfm['azimuth'] = np.arctan2(dfm['y_r'] - dfm['y_s'], dfm['x_r'] - dfm['x_s']) 529 | dfm['offset'] = np.sqrt((dfm['x_s'] - dfm['x_r'])**2 + (dfm['y_s'] - dfm['y_r'])**2) / 2. 530 | dfm.rename(columns={'x_s': 'SourceX', 'y_s': 'SourceY'}, inplace=True) 531 | dfm.columns = pd.MultiIndex.from_arrays([dfm.columns, [''] * len(dfm.columns)]) 532 | 533 | return dfm 534 | 535 | def make_segy_index(filename, extra_headers=None, limits=None): 536 | """Index traces in a single SEGY file. 537 | 538 | Parameters 539 | ---------- 540 | filename : str 541 | Path to SEGY file. 542 | extra_headers : array-like or str 543 | Additional headers to put unto DataFrme. If 'all', all headers are included. 544 | limits : slice or int, default to None 545 | If int, index only first ```limits``` traces. If slice, index only traces 546 | within given range. If None, index all traces. 547 | 548 | Returns 549 | ------- 550 | df : pandas.DataFrame 551 | DataFrame with trace indexing. 552 | """ 553 | if not isinstance(limits, slice): 554 | limits = slice(limits) 555 | 556 | with segyio.open(filename, strict=False) as segyfile: 557 | segyfile.mmap() 558 | if extra_headers == 'all': 559 | headers = [h.__str__() for h in segyio.TraceField.enums()] 560 | elif extra_headers is None: 561 | headers = DEFAULT_SEGY_HEADERS 562 | else: 563 | headers = set(DEFAULT_SEGY_HEADERS + list(extra_headers)) 564 | 565 | meta = dict() 566 | 567 | for k in headers: 568 | meta[k] = segyfile.attributes(getattr(segyio.TraceField, k))[limits] 569 | 570 | meta['file_id'] = np.repeat(filename, segyfile.tracecount)[limits] 571 | 572 | df = pd.DataFrame(meta) 573 | return df 574 | 575 | def build_segy_df(extra_headers=None, name=None, limits=None, **kwargs): 576 | """Index traces in multiple SEGY files. 577 | 578 | Parameters 579 | ---------- 580 | extra_headers : array-like or str 581 | Additional headers to put unto DataFrme. If 'all', all headers are included. 582 | name : str 583 | Name that will be associated with indexed traces. 584 | limits : slice or int, default to None 585 | If int, index only first ```limits``` traces. If slice, index only traces 586 | within given range. If None, index all traces. 587 | kwargs : dict 588 | Named argumets for ```batchflow.FilesIndex```. 589 | 590 | Returns 591 | ------- 592 | df : pandas.DataFrame 593 | DataFrame with trace indexing. 594 | """ 595 | markup_path = kwargs.pop('markup_path', None) 596 | index = FilesIndex(**kwargs) 597 | df = pd.concat([make_segy_index(index.get_fullpath(i), extra_headers, limits) for 598 | i in sorted(index.indices)]) 599 | if markup_path is not None: 600 | markup = pd.read_csv(markup_path) 601 | df = df.merge(markup, how='inner') 602 | common_cols = list(set(df.columns) - set(FILE_DEPENDEND_COLUMNS)) 603 | df = df[common_cols + FILE_DEPENDEND_COLUMNS] 604 | df.columns = pd.MultiIndex.from_arrays([common_cols + FILE_DEPENDEND_COLUMNS, 605 | [''] * len(common_cols) + [name] * len(FILE_DEPENDEND_COLUMNS)]) 606 | return df 607 | 608 | def calc_v_rms(t, speed): 609 | r"""Calculate root mean square speed depend on time. 610 | Value calculated by following formula: 611 | 612 | $$ V_{rms} = \left(\frac{\sum_0^t V^2}{|V|} \right)^{1/2} $$ 613 | Where $|V|$ is a number of elements in V. 614 | 615 | Parameters 616 | ---------- 617 | t : int 618 | Time value to calculate $V_rms$. 619 | 620 | speed : array 621 | Speed (V) with time values at each moment. 622 | 623 | Returns 624 | ------- 625 | : float 626 | $V_{rms}$ 627 | """ 628 | return (np.mean(speed[:t+1]**2))**.5 629 | 630 | def calc_sdc(ix, time, speed, v_pow, t_pow): 631 | """ Calculate spherical divergence correction (SDC). 632 | This value has the following formula: 633 | $$ g(t) = \frac{V_{rms}^{v_{pow}} * t^{t_{pow}}}{V_0} $$ 634 | 635 | Here parameters $v_{pow} and t_{pow} is a hyperparameters. 636 | The quality of the correction depends on them. 637 | 638 | Parameters 639 | ---------- 640 | time : array 641 | Trace time values. 642 | Time measured in either in samples or in milliseconds. 643 | speed : array 644 | Wave propagation speed depending on the depth. 645 | Speed is measured in samples. 646 | v_pow : float or int 647 | Speed's power. 648 | t_pow : float or int 649 | Time's power. 650 | 651 | Returns 652 | ------- 653 | : float 654 | Correction value to suppress the spherical divergence. 655 | """ 656 | correction = (calc_v_rms(ix, speed) ** v_pow * time[ix] ** t_pow)/speed[0] 657 | if correction == 0: 658 | return 1. 659 | return correction 660 | 661 | def calculate_sdc_for_field(field, time, speed, v_pow=2, t_pow=1): 662 | """ Correction of spherical divergence. 663 | 664 | Parameters 665 | ---------- 666 | field : array or arrays 667 | Field for correction. 668 | time : array 669 | Trace time values. 670 | Time measured in either in samples or in milliseconds. 671 | speed : array 672 | Wave propagation speed depending on the depth. 673 | Speed is measured in samples. 674 | v_pow : float or int 675 | Speed's power. 676 | t_pow : float or int 677 | Time's power. 678 | 679 | Returns 680 | : array of arrays 681 | Corrected field. 682 | """ 683 | new_field = np.zeros_like(field) 684 | for ix in range(field.shape[1]): 685 | timestamp = field[:, ix] 686 | correction_coef = (calc_sdc(ix, time, speed, v_pow=v_pow, t_pow=t_pow) 687 | / calc_sdc(np.argmax(time), time, speed, v_pow=v_pow, t_pow=t_pow)) 688 | new_field[:, ix] = timestamp * correction_coef 689 | return new_field 690 | 691 | 692 | def measure_gain_amplitude(field, window): 693 | """Calculate the gain amplitude. 694 | 695 | Parameters 696 | ---------- 697 | field : array or arrays 698 | Field for amplitude measuring. 699 | 700 | Returns 701 | ------- 702 | : array 703 | amplitude values in each moment t 704 | after transformations. 705 | """ 706 | h_sample = [] 707 | for trace in field: 708 | hilb = hilbert(trace).real 709 | env = (trace**2 + hilb**2)**.5 710 | h_sample.append(env) 711 | 712 | h_sample = np.array(h_sample) 713 | mean_sample = np.mean(h_sample, axis=0) 714 | max_val = np.max(mean_sample) 715 | dt_val = (-1) * (max_val / mean_sample) 716 | result = medfilt(dt_val, window) 717 | return result 718 | 719 | def calculate_sdc_quality(parameters, field, time, speed, window=51): 720 | """Calculate the quality of estimated parameters. 721 | 722 | The quality caluclated as the median of absolute value of the first order derivative. 723 | 724 | Parameters 725 | ---------- 726 | parameters : list of 2 727 | Power values for speed and time. 728 | field : array or arrays 729 | Field for compensation. 730 | time : array 731 | Trace time values. 732 | Time measured in either in samples or in milliseconds. 733 | speed : array 734 | Wave propagation speed depending on the depth. 735 | Speed is measured in samples. 736 | window : int, default 51 737 | Size of smoothing window of the median filter. 738 | 739 | Returns 740 | ------- 741 | : float 742 | Error with given parameters. 743 | """ 744 | 745 | v_pow, t_pow = parameters 746 | new_field = calculate_sdc_for_field(field, time=time, speed=speed, 747 | v_pow=v_pow, t_pow=t_pow) 748 | 749 | result = measure_gain_amplitude(new_field, window) 750 | return np.median(np.abs(np.gradient(result))) 751 | 752 | def massive_block(data): 753 | """ Function that takes 2d array and returns the indices of the 754 | beginning of the longest block of ones in each row. 755 | 756 | Parameters 757 | ---------- 758 | data : np.array 759 | Array with masks. 760 | 761 | Returns 762 | ------- 763 | ind : list 764 | Indices of the beginning of the longest blocks for each row. 765 | """ 766 | arr = np.append(data, np.zeros((data.shape[0], 1)), axis=1) 767 | arr = np.insert(arr, 0, 0, axis=1) 768 | 769 | plus_one = np.argwhere((np.diff(arr)) == 1) 770 | minus_one = np.argwhere((np.diff(arr)) == -1) 771 | 772 | if len(plus_one) == 0: 773 | return [[0]] * data.shape[0] 774 | 775 | distance = minus_one[:, 1] - plus_one[:, 1] 776 | mask = minus_one[:, 0] 777 | 778 | idxs = np.argsort(distance, kind="stable") 779 | sort = idxs[np.argsort(mask[idxs], kind="stable")] 780 | ind = [0] * mask[0] 781 | for i in range(len(sort[:-1])): 782 | diff = mask[i +1] - mask[i] 783 | if diff > 1: 784 | ind.append(plus_one[:, 1][sort[i]]) 785 | ind.extend([0] * (diff - 1)) 786 | elif diff == 1: 787 | ind.append(plus_one[:, 1][sort[i]]) 788 | ind.append(plus_one[:, 1][sort[-1]]) 789 | ind.extend([0] * (arr.shape[0] - mask[-1] - 1)) 790 | return ind 791 | 792 | def check_unique_fieldrecord_across_surveys(surveys_by_fieldrecord, index): 793 | """ 794 | Check that FieldRecord with identifier `index` is present only in one survey. 795 | 796 | Parameters 797 | ---------- 798 | surveys_by_fieldrecord : array-like 799 | Unique survey identifiers for given FieldRecord. 800 | index : str, numeric 801 | FieldRecord identifier. 802 | """ 803 | if len(surveys_by_fieldrecord) != 1: 804 | raise ValueError('Field {} represents data from more than one survey!'.format(index)) 805 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | """ SeismiPro is a library for seismic data processing. """ 2 | 3 | from setuptools import setup, find_packages 4 | import re 5 | 6 | with open('seismicpro/__init__.py', 'r') as f: 7 | version = re.search(r'^__version__\s*=\s*[\'"]([^\'"]*)[\'"]', f.read(), re.MULTILINE).group(1) 8 | 9 | setup( 10 | name='SeismicPro', 11 | packages=find_packages(exclude=['tutorials', 'docker_containers', 'datasets', 'models']), 12 | version=version, 13 | url='https://github.com/gazprom-neft/SeismicPro', 14 | license='CC BY-NC-SA 4.0', 15 | author='Gazprom Neft DS team', 16 | author_email='rhudor@gmail.com', 17 | description='A framework for seismic data processing', 18 | long_description='', 19 | zip_safe=False, 20 | platforms='any', 21 | install_requires=[ 22 | 'numpy>=1.16.0', 23 | 'scipy>=1.2.0', 24 | 'pandas>=0.24.0', 25 | 'scikit-learn==0.21.3', 26 | 'PyWavelets>=1.0.1', 27 | 'matplotlib>=3.0.2', 28 | 'dill>=0.2.7.1', 29 | 'pint>=0.8.1', 30 | 'tdigest>=0.5.2.2', 31 | 'tqdm==4.30.0', 32 | 'segyio==1.8.3', 33 | 'scikit-image>=0.13.1', 34 | 'numba>=0.35.0' 35 | ], 36 | extras_require={ 37 | 'tensorflow': ['tensorflow>=1.12'], 38 | 'tensorflow-gpu': ['tensorflow-gpu>=1.12'], 39 | 'keras': ['keras>=2.0.0'], 40 | 'torch': ['torch>=1.0.0'], 41 | 'hmmlearn': ['hmmlearn==0.2.0'], 42 | }, 43 | classifiers=[ 44 | 'Development Status :: 4 - Beta', 45 | 'Intended Audience :: Developers', 46 | 'Intended Audience :: Science/Research', 47 | 'License :: OSI Approved :: Apache Software License', 48 | 'Operating System :: OS Independent', 49 | 'Programming Language :: Python', 50 | 'Programming Language :: Python :: 3', 51 | 'Programming Language :: Python :: 3.5', 52 | 'Programming Language :: Python :: 3.6', 53 | 'Topic :: Scientific/Engineering', 54 | ], 55 | ) 56 | -------------------------------------------------------------------------------- /shippable.yml: -------------------------------------------------------------------------------- 1 | language: none 2 | 3 | env: 4 | global: 5 | - DOCKER_ACC=analysiscenter1 6 | - DOCKER_REPO=ds-py3 7 | - TAG="cpu" 8 | 9 | build: 10 | pre_ci_boot: 11 | image_name: $DOCKER_ACC/$DOCKER_REPO 12 | image_tag: $TAG 13 | ci: 14 | - pip3 install pandas==0.22.0 15 | - pip3 install tdigest==0.5.2.2 16 | - pylint -rn --rcfile pylintrc seismicpro 17 | 18 | integrations: 19 | hub: 20 | - integrationName: Containers 21 | type: docker 22 | 23 | notifications: 24 | - integrationName: Notifier 25 | type: slack 26 | recipients: 27 | - "#commits" 28 | on_success: always 29 | on_failure: always 30 | -------------------------------------------------------------------------------- /tutorials/3.Dataset.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Working with SeismicDataset\n", 8 | "Seismic dataset contains functions that calculate some parameters for the dataset.\n", 9 | "\n", 10 | "* [Find parameters for spherical divergence correction](#Find-parameters-for-spherical-divergence-correction)" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 1, 16 | "metadata": {}, 17 | "outputs": [], 18 | "source": [ 19 | "import sys\n", 20 | "import numpy as np\n", 21 | "import matplotlib.pyplot as plt\n", 22 | "\n", 23 | "sys.path.append('..')\n", 24 | "\n", 25 | "from seismicpro.batchflow import Pipeline, V, D\n", 26 | "from seismicpro.src import (SeismicDataset, FieldIndex, calculate_sdc_quality)" 27 | ] 28 | }, 29 | { 30 | "cell_type": "markdown", 31 | "metadata": {}, 32 | "source": [ 33 | "First of all we have to create an index." 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": 2, 39 | "metadata": {}, 40 | "outputs": [], 41 | "source": [ 42 | "path_raw = '/data/SD/dataset_1/2_TAR_raw.sgy'\n", 43 | "\n", 44 | "field_index = FieldIndex(name='raw', extra_headers=['offset'], path=path_raw)" 45 | ] 46 | }, 47 | { 48 | "cell_type": "markdown", 49 | "metadata": {}, 50 | "source": [ 51 | "Based on indexd and dataset one can create a dataset instance." 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": 3, 57 | "metadata": {}, 58 | "outputs": [], 59 | "source": [ 60 | "dataset = SeismicDataset(field_index)" 61 | ] 62 | }, 63 | { 64 | "cell_type": "markdown", 65 | "metadata": {}, 66 | "source": [ 67 | "## Find parameters for spherical divergence correction\n", 68 | "Spherical divergence correction action from ```SeismicBatch``` takes parameters based on which correction will be made. These parameters could be found for all dataset by ```find_sdc_params``` function. It uses speed, time and loss function to find optimal parameters for spherical divergence correction. In this example, speed was calculated by a specialist. By default, time takes from ```meta``` comonent." 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": 4, 74 | "metadata": {}, 75 | "outputs": [], 76 | "source": [ 77 | "speed = np.array([1524]*700 + [1924.5]*300 + [2184.0]*400 + [2339.6]*400 + \n", 78 | " [2676]*150 + [2889.5]*2250 + [3566]*2800 + [4785.3]*1000)" 79 | ] 80 | }, 81 | { 82 | "cell_type": "markdown", 83 | "metadata": {}, 84 | "source": [ 85 | "Following line run optimization process. This process works with scipy optimization, so you can specify any parameters from ```scipy.optimize.minimize```. The optimization process takes a lot of time, this why it's calculated once for all dataset." 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": 5, 91 | "metadata": {}, 92 | "outputs": [], 93 | "source": [ 94 | "parameters = dataset.find_sdc_params(component='raw', speed=speed,\n", 95 | " loss=calculate_sdc_quality)" 96 | ] 97 | }, 98 | { 99 | "cell_type": "markdown", 100 | "metadata": {}, 101 | "source": [ 102 | "Founded parameters will be saved to the variable named ```parameters```." 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": 6, 108 | "metadata": {}, 109 | "outputs": [ 110 | { 111 | "name": "stdout", 112 | "output_type": "stream", 113 | "text": [ 114 | "Optimal parameters for v_pow is 2.06, for t_pow is 0.998.\n" 115 | ] 116 | } 117 | ], 118 | "source": [ 119 | "print('Optimal parameters for v_pow is {:.3}, for t_pow is {:.3}.'.format(*parameters))" 120 | ] 121 | }, 122 | { 123 | "cell_type": "markdown", 124 | "metadata": {}, 125 | "source": [ 126 | "More frequently this function will be used to calculate parameters once before main preprocessing pipeline. An example how to use ```pipeline.before``` to find parameters for spherical divergence correction shown in [model_description](../models/Spherical_divergence_correction/model_description.ipynb)." 127 | ] 128 | } 129 | ], 130 | "metadata": { 131 | "kernelspec": { 132 | "display_name": "Python 3", 133 | "language": "python", 134 | "name": "python3" 135 | }, 136 | "language_info": { 137 | "codemirror_mode": { 138 | "name": "ipython", 139 | "version": 3 140 | }, 141 | "file_extension": ".py", 142 | "mimetype": "text/x-python", 143 | "name": "python", 144 | "nbconvert_exporter": "python", 145 | "pygments_lexer": "ipython3", 146 | "version": "3.5.2" 147 | } 148 | }, 149 | "nbformat": 4, 150 | "nbformat_minor": 2 151 | } 152 | --------------------------------------------------------------------------------