├── .gitignore
├── .idea
├── .gitignore
├── inspectionProfiles
│ ├── Project_Default.xml
│ └── profiles_settings.xml
├── misc.xml
├── modules.xml
├── pys5p.iml
├── vcs.xml
└── workspace.xml
├── .readthedocs.yaml
├── CITATION.cff
├── ChangeLog.md
├── INSTALL.md
├── LICENSE
├── MANIFEST.in
├── README.md
├── ToDo.md
├── docs
├── Makefile
├── build.rst
├── conf.py
├── index.rst
├── make.bat
├── modules.rst
├── pys5p.lib.rst
├── pys5p.rst
└── quick.rst
├── examples
├── unit_test_s5p_ckd.py
└── unit_test_s5p_lv2.py
├── pyproject.toml
├── requiments.txt
└── src
└── pys5p
├── __init__.py
├── ckd_io.py
├── error_propagation.py
├── get_data_dir.py
├── icm_io.py
├── l1b_io.py
├── l1b_patch.py
├── lib
└── __init__.py
├── lv2_io.py
├── ocm_io.py
├── rls.py
├── s5p_msm.py
├── swir_region.py
├── swir_texp.py
└── version.py
/.gitignore:
--------------------------------------------------------------------------------
1 | *~
2 |
3 | # Compiled python modules.
4 | *.pyc
5 |
6 | # Setuptools distribution folder.
7 | /dist/
8 | /build/
9 | /pilots/
10 |
11 | # Python egg metadata, regenerated from source files by setuptools.
12 | .eggs
13 | *.egg-info
14 |
15 | venv/
16 |
--------------------------------------------------------------------------------
/.idea/.gitignore:
--------------------------------------------------------------------------------
1 | # Default ignored files
2 | /shelf/
3 | /workspace.xml
4 |
--------------------------------------------------------------------------------
/.idea/inspectionProfiles/Project_Default.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
13 |
14 |
15 |
22 |
23 |
24 |
--------------------------------------------------------------------------------
/.idea/inspectionProfiles/profiles_settings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/.idea/pys5p.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/.idea/workspace.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
53 |
54 |
55 |
56 |
57 | 1680180825934
58 |
59 |
60 | 1680180825934
61 |
62 |
63 |
64 |
65 |
66 |
67 |
--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
1 | version: 2
2 |
3 | build:
4 | os: "ubuntu-22.04"
5 | tools:
6 | python: "3.10"
7 |
8 | python:
9 | # Install our python package before building the docs
10 | install:
11 | - method: pip
12 | path: .
13 |
--------------------------------------------------------------------------------
/CITATION.cff:
--------------------------------------------------------------------------------
1 | cff-version: 1.2.0
2 | message: "If you use this software, please cite it as below."
3 | title: "pyS5p: a Python interface to S5p Tropomi products"
4 | authors:
5 | - family-names: "van Hees"
6 | given-names: "Richard"
7 | orcid: "https://orcid.org/0000-0002-3846-0753"
8 | license: BSD-3-Clause
9 | license-url: "https://github.com/rmvanhees/pys5p/LICENSE"
10 | repository-code: "https://github.com/rmvanhees/pys5p"
11 | doi: 10.5281/zenodo.5665827
12 | type: software
13 | url: "https://github.com/rmvanhees/pys5p"
14 |
--------------------------------------------------------------------------------
/ChangeLog.md:
--------------------------------------------------------------------------------
1 | version 1.0.6
2 | =============
3 |
4 | * s5p_plot.py [draw_qhist]: added grid-lines and denisty parameter, improved axis labels
5 | * Rearranged source tree to comply with PIP 517, 518 (requires: setuptools 42 or later)
6 | * Renamed lib.sw_version.py to version.py
7 | * Removed all test-modules and examples because these are obsolete. Will be replaced by up-to-date code in future commits
8 | * Fixed pylint warnings
9 | * Updated Copyright line in all modules
10 | * Updated files INSTALL and README
11 | * Added ToDo, which contains a listing of new functionality to be implemented before a new minor release
12 | * Added ChangeLog
13 |
--------------------------------------------------------------------------------
/INSTALL.md:
--------------------------------------------------------------------------------
1 | # Installing pys5p
2 |
3 | ## Wheels
4 | I you have an existing Python (v3.8+) installation, pys5p can be installed
5 | via pip from PyPI:
6 |
7 | pip install pys5p [--user]
8 |
9 |
10 | ## Python Distributions
11 | If you use a Python Distribution, the installation of pyS5p can be done on
12 | the command line via:
13 |
14 | conda install pys5p
15 |
16 | for [Anaconda](https://www.anaconda.com/)/[MiniConda](http://conda.pydata.org/miniconda.html).
17 |
18 |
19 | ## Install from source
20 | The latest release of pys5p is available from
21 | [gitHub](https://github.com/rmvanhees/pys5p).
22 | Where you can download the source code as a tar-file or zipped archive.
23 | Or you can use git do download the repository:
24 |
25 | git clone https://github.com/rmvanhees/pys5p.git
26 |
27 | Before you can install pys5p, you need:
28 |
29 | * Python version 3.8+ with development headers
30 | * HDF5, installed with development headers
31 | * netCDF4, installed with development headers
32 |
33 | And have the following Python modules available:
34 |
35 | * numpy v1.19+
36 | * h5py v3.5+
37 | * netCDF4 v1.5+
38 | * xarray v0.20+
39 |
40 | The software is known to work using:
41 |
42 | * HDF5 v1.8.21, netCDF4 v4.7.3 and python-netCDF4 v1.5+
43 | * HDF5 v1.10+, netCDF4 v4.7.3 or v4.8+ and python-netCDF4 v1.5+
44 | * HDF5 v1.12+, netCDF4 v4.8+ and python-netCDF4 v1.5+
45 |
46 | You can install pys5p once you have satisfied the requirements listed above.
47 | Run at the top of the source tree:
48 |
49 | python3 -m build
50 | pip3 install dist/pys5p-.whl [--user]
51 |
52 | The Python scripts can be found under `/usr/local/bin` or `$USER/.local/bin`.
53 |
54 |
55 | ## Known Issues
56 | * You may need to use the environment variable SETUPTOOLS\_SCM\_PRETEND\_VERSION
57 | if your source tree is not a git clone.
58 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | BSD 3-Clause License
2 |
3 | Copyright (c) 2016-2020 SRON - Netherlands Institute for Space Research
4 | All rights reserved.
5 |
6 | Redistribution and use in source and binary forms, with or without
7 | modification, are permitted provided that the following conditions are met:
8 |
9 | * Redistributions of source code must retain the above copyright notice, this
10 | list of conditions and the following disclaimer.
11 |
12 | * Redistributions in binary form must reproduce the above copyright notice,
13 | this list of conditions and the following disclaimer in the documentation
14 | and/or other materials provided with the distribution.
15 |
16 | * Neither the name of the copyright holder nor the names of its
17 | contributors may be used to endorse or promote products derived from
18 | this software without specific prior written permission.
19 |
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 |
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | prune docs/_build
2 | prune pilots
3 | prune .DS_Store
4 | prune .idea
5 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # pyS5p
2 | [](https://pypi.org/project/pys5p/)
3 | [](https://pypi.org/project/pys5p/)
4 | [](https://github.com/rmvanhees/pys5p/)
5 | [](https://doi.org/10.5281/zenodo.5665827)
6 |
7 | pyS5p provides a Python interface to S5p Tropomi Level-1B (and 2) products.
8 |
9 | For more information on the Sentinel 5 precursor mission visit:
10 |
11 | * https://earth.esa.int/web/guest/missions/esa-future-missions/sentinel-5P
12 | * http://www.tropomi.eu
13 |
14 | For more information on the Tropomi Level-1B products visit:
15 |
16 | * http://www.tropomi.eu/documents/level-0-1b-products
17 |
18 | ## Documentation
19 | Online documentation is available from [Read the Docs](https://pys5p.readthedocs.io).
20 |
21 | ## Installation
22 | The module pys5p requires Python3.8+ and Python modules: h5py, netCDF4, numpy and xarray.
23 |
24 | Installation instructions are provided on [Read the Docs](https://pys5p.readthedocs.io/en/latest/build.html) or in the INSTALL file.
25 |
26 | ## Note
27 | Most of the plotting related S/W has been moved from pyS5p (v2.1+) to [moniplot](https://pypi.org/project/moniplot).
28 | Removed are th following modules:
29 | * module biweight.py - contains a Python implementation of the Tukey's biweight algorithm.
30 | * module tol_colors.py - definition of colour schemes for lines and maps that also work for colour-blind
31 | people by [Paul Tol](https://personal.sron.nl/~pault/).
32 | * module s5p_plot.py - the class S5Pplot is rewritten and now available as MONplot in the module mon_plot.py.
33 |
--------------------------------------------------------------------------------
/ToDo.md:
--------------------------------------------------------------------------------
1 | ToDo before release v1.1.0
2 | ==========================
3 |
4 | Add examples
5 | ------------
6 | * Create examples directory
7 | * Add example code snippets to illustrate typical usage of pys5p modules
8 | * Add README
9 | * [TBD] The example code will probably require Tropomi data sets, which are too
10 | large to distribute with the code. How to solve this?
11 |
12 | Test Driven Development
13 | -----------------------
14 | * Introduce TDD with Python using Python’s built-in unittest module
15 |
--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Minimal makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line, and also
5 | # from the environment for the first two.
6 | SPHINXOPTS ?=
7 | SPHINXBUILD ?= sphinx-build
8 | SOURCEDIR = .
9 | BUILDDIR = _build
10 |
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 |
15 | .PHONY: help Makefile
16 |
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 |
--------------------------------------------------------------------------------
/docs/build.rst:
--------------------------------------------------------------------------------
1 | .. _install:
2 |
3 | Installation
4 | ============
5 |
6 | Wheels
7 | ------
8 |
9 | It is highly recommended that you use a pre-built wheel of `pys5p` from PyPI.
10 |
11 | If you have an existing Python (3.8+) installation (e.g. a python.org download,
12 | or one that comes with your OS), then on Windows, MacOS/OSX, and Linux on
13 | Intel computers, pre-built `pys5p` wheels can be installed via pip
14 | from PyPI::
15 |
16 | pip install [--user] pys5p
17 |
18 | OS-Specific remarks
19 | -------------------
20 |
21 | On a Debian Bullseye or Ubuntu 22.04 installation,
22 | we have successfully installed `pys5p` as follows::
23 |
24 | sudo apt install python3-numpy python3-scipy
25 | sudo apt install python3-h5py python3-netCDF4
26 | pip install --user pys5p
27 |
28 | This will also install a working version of the package xarray.
29 |
30 | .. important::
31 | The version of xarray which comes with the Debian package
32 | `python3-xarray` is too old, and will not work with `pys5p`.
33 |
34 | Building from source
35 | --------------------
36 |
37 | The latest release of `pys5p` is available from
38 | `gitHub `_.
39 | You can obtain the source code using::
40 |
41 | git clone https://github.com/rmvanhees/pys5p.git
42 |
43 | We develop the code using `Python `_ 3.10 using the
44 | latest stable release of the libraries
45 | `HDF5 `_ and
46 | `netCDF4 `_,
47 | and Python packages:
48 | `numpy `_, `h5py `_,
49 | `netCDF4-python `_
50 | and `xarray `_.
51 |
52 | To compile the code you need the Python packages: setuptools, setuptools-scm
53 | and wheels. Then you can install `pys5p` as follows::
54 |
55 | python3 -m build
56 | pip3 install dist/pys5p-.whl [--user]
57 |
58 |
--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
1 | # Configuration file for the Sphinx documentation builder.
2 | #
3 | # This file only contains a selection of the most common options. For a full
4 | # list see the documentation:
5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
6 | """The Sphinx configuration file for the package pyS5p."""
7 |
8 | import os
9 | import sys
10 | from importlib import metadata
11 |
12 | sys.path.insert(0, os.path.abspath('..'))
13 |
14 |
15 | # -- Project information -----------------------------------------------------
16 |
17 | project = 'pys5p'
18 | copyright = '2022, SRON'
19 | author = 'Richard van Hees'
20 |
21 | # The full version, including alpha/beta/rc tags
22 | release = metadata.version('pys5p').split('+')[0]
23 |
24 |
25 | # -- General configuration ---------------------------------------------------
26 |
27 | # Add any Sphinx extension module names here, as strings. They can be
28 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
29 | # ones.
30 | extensions = [
31 | 'sphinx.ext.autodoc',
32 | 'sphinx.ext.viewcode',
33 | 'sphinx.ext.napoleon'
34 | ]
35 |
36 | # Add any paths that contain templates here, relative to this directory.
37 | templates_path = ['_templates']
38 |
39 | # List of patterns, relative to source directory, that match files and
40 | # directories to ignore when looking for source files.
41 | # This pattern also affects html_static_path and html_extra_path.
42 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
43 |
44 |
45 | # -- Options for HTML output -------------------------------------------------
46 |
47 | # The theme to use for HTML and HTML Help pages. See the documentation for
48 | # a list of builtin themes.
49 | #
50 | html_theme = 'sphinx_rtd_theme'
51 |
52 | # Add any paths that contain custom static files (such as style sheets) here,
53 | # relative to this directory. They are copied after the builtin static files,
54 | # so a file named "default.css" will overwrite the builtin "default.css".
55 | html_static_path = ['_static']
56 |
--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
1 | .. pys5p documentation master file, created by
2 | sphinx-quickstart on Fri Sep 30 11:35:47 2022.
3 | You can adapt this file completely to your liking, but it should at least
4 | contain the root `toctree` directive.
5 |
6 | Package pyS5p User Manual
7 | ==========================
8 |
9 | pyS5p provides a Python interface to S5p Tropomi Level-1B (and 2) products.
10 |
11 | For more information on the Sentinel 5 precursor mission visit:
12 |
13 | * https://earth.esa.int/web/guest/missions/esa-future-missions/sentinel-5P
14 | * http://www.tropomi.eu
15 |
16 | For more information on the Tropomi Level-1B products visit:
17 |
18 | * http://www.tropomi.eu/documents/level-0-1b-products
19 |
20 |
21 | Quick-start
22 | -----------
23 |
24 | .. toctree::
25 | :maxdepth: 1
26 |
27 | quick
28 | build
29 |
30 |
31 | Module Documentation
32 | --------------------
33 |
34 | .. toctree::
35 | :maxdepth: 2
36 |
37 | modules
38 |
39 |
40 | Indices and tables
41 | ==================
42 |
43 | * :ref:`genindex`
44 | * :ref:`modindex`
45 | * :ref:`search`
46 |
--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
1 | @ECHO OFF
2 |
3 | pushd %~dp0
4 |
5 | REM Command file for Sphinx documentation
6 |
7 | if "%SPHINXBUILD%" == "" (
8 | set SPHINXBUILD=sphinx-build
9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 |
13 | if "%1" == "" goto help
14 |
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | echo.
18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | echo.installed, then set the SPHINXBUILD environment variable to point
20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | echo.may add the Sphinx directory to PATH.
22 | echo.
23 | echo.If you don't have Sphinx installed, grab it from
24 | echo.http://sphinx-doc.org/
25 | exit /b 1
26 | )
27 |
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 |
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 |
34 | :end
35 | popd
36 |
--------------------------------------------------------------------------------
/docs/modules.rst:
--------------------------------------------------------------------------------
1 | src
2 | ===
3 |
4 | .. toctree::
5 | :maxdepth: 4
6 |
7 | pys5p
8 |
--------------------------------------------------------------------------------
/docs/pys5p.lib.rst:
--------------------------------------------------------------------------------
1 | pys5p.lib package
2 | =================
3 |
4 | Module contents
5 | ---------------
6 |
7 | .. automodule:: pys5p.lib
8 | :members:
9 | :undoc-members:
10 | :show-inheritance:
11 |
--------------------------------------------------------------------------------
/docs/pys5p.rst:
--------------------------------------------------------------------------------
1 | pys5p package
2 | =============
3 |
4 | Subpackages
5 | -----------
6 |
7 | .. toctree::
8 | :maxdepth: 4
9 |
10 | pys5p.lib
11 |
12 | Submodules
13 | ----------
14 |
15 | pys5p.ckd\_io module
16 | --------------------
17 |
18 | .. automodule:: pys5p.ckd_io
19 | :members:
20 | :undoc-members:
21 | :show-inheritance:
22 |
23 | pys5p.error\_propagation module
24 | -------------------------------
25 |
26 | .. automodule:: pys5p.error_propagation
27 | :members:
28 | :undoc-members:
29 | :show-inheritance:
30 |
31 | pys5p.get\_data\_dir module
32 | ---------------------------
33 |
34 | .. automodule:: pys5p.get_data_dir
35 | :members:
36 | :undoc-members:
37 | :show-inheritance:
38 |
39 | pys5p.icm\_io module
40 | --------------------
41 |
42 | .. automodule:: pys5p.icm_io
43 | :members:
44 | :undoc-members:
45 | :show-inheritance:
46 |
47 | pys5p.l1b\_io module
48 | --------------------
49 |
50 | .. automodule:: pys5p.l1b_io
51 | :members:
52 | :undoc-members:
53 | :show-inheritance:
54 |
55 | pys5p.l1b\_patch module
56 | -----------------------
57 |
58 | .. automodule:: pys5p.l1b_patch
59 | :members:
60 | :undoc-members:
61 | :show-inheritance:
62 |
63 | pys5p.lv2\_io module
64 | --------------------
65 |
66 | .. automodule:: pys5p.lv2_io
67 | :members:
68 | :undoc-members:
69 | :show-inheritance:
70 |
71 | pys5p.ocm\_io module
72 | --------------------
73 |
74 | .. automodule:: pys5p.ocm_io
75 | :members:
76 | :undoc-members:
77 | :show-inheritance:
78 |
79 | pys5p.rls module
80 | ----------------
81 |
82 | .. automodule:: pys5p.rls
83 | :members:
84 | :undoc-members:
85 | :show-inheritance:
86 |
87 | pys5p.s5p\_msm module
88 | ---------------------
89 |
90 | .. automodule:: pys5p.s5p_msm
91 | :members:
92 | :undoc-members:
93 | :show-inheritance:
94 |
95 | pys5p.swir\_region module
96 | -------------------------
97 |
98 | .. automodule:: pys5p.swir_region
99 | :members:
100 | :undoc-members:
101 | :show-inheritance:
102 |
103 | pys5p.swir\_texp module
104 | -----------------------
105 |
106 | .. automodule:: pys5p.swir_texp
107 | :members:
108 | :undoc-members:
109 | :show-inheritance:
110 |
111 | pys5p.version module
112 | --------------------
113 |
114 | .. automodule:: pys5p.version
115 | :members:
116 | :undoc-members:
117 | :show-inheritance:
118 |
119 | Module contents
120 | ---------------
121 |
122 | .. automodule:: pys5p
123 | :members:
124 | :undoc-members:
125 | :show-inheritance:
126 |
--------------------------------------------------------------------------------
/docs/quick.rst:
--------------------------------------------------------------------------------
1 | .. _quick:
2 |
3 | Quick Start Guide
4 | =================
5 |
6 | Install
7 | -------
8 |
9 | If there are wheels for your platform (mac, linux, windows on x86),
10 | you can install ``pys5p`` via pip::
11 |
12 | pip install [--user] pys5p
13 |
14 | Or with `Anaconda `_ or
15 | `Miniconda `_::
16 |
17 | conda install pys5p
18 |
19 | To install `pys5p` from source see :ref:`install`.
20 |
21 |
22 | Core concepts
23 | -------------
24 |
25 | ...
26 |
--------------------------------------------------------------------------------
/examples/unit_test_s5p_ckd.py:
--------------------------------------------------------------------------------
1 | # This file is part of pyS5p
2 | #
3 | # https://github.com/rmvanhees/pys5p.git
4 | #
5 | # Copyright (c) 2017-2022 SRON - Netherlands Institute for Space Research
6 | # All Rights Reserved
7 | #
8 | # License: BSD-3-Clause
9 | """Perform a unit test on class CKDio."""
10 |
11 | import argparse
12 | from pathlib import Path
13 |
14 | from pys5p.ckd_io import CKDio
15 |
16 |
17 | def main():
18 | """Perform unit-tests on class CKDio (xarray version)."""
19 | parser = argparse.ArgumentParser(
20 | description=f"{Path(__file__).name}: run units-test on class CKDio"
21 | )
22 | parser.add_argument(
23 | "ckd_dir",
24 | nargs=1,
25 | type=str,
26 | default=None,
27 | help=("directory with CKD data with" " static CKD in a subdirectory static"),
28 | )
29 | args = parser.parse_args()
30 |
31 | with CKDio(args.ckd_dir[0], ckd_version=1) as ckd:
32 | print(ckd.ckd_file)
33 | for meth in dir(ckd):
34 | if (
35 | meth.startswith("_")
36 | or meth.startswith("ckd")
37 | or meth in ("close", "fid", "get_param")
38 | ):
39 | continue
40 | print(
41 | "-------------------------", meth, "[v1]", "-------------------------"
42 | )
43 | print(meth, getattr(ckd, meth)())
44 |
45 | with CKDio(args.ckd_dir[0], ckd_version=2) as ckd:
46 | print(ckd.ckd_file)
47 | for meth in dir(ckd):
48 | if (
49 | meth.startswith("_")
50 | or meth.startswith("ckd")
51 | or meth in ("close", "fid", "get_param")
52 | ):
53 | continue
54 | print(
55 | "-------------------------", meth, "[v2]", "-------------------------"
56 | )
57 | print(meth, getattr(ckd, meth)())
58 |
59 |
60 | # - main code --------------------------------------
61 | if __name__ == "__main__":
62 | main()
63 |
--------------------------------------------------------------------------------
/examples/unit_test_s5p_lv2.py:
--------------------------------------------------------------------------------
1 | # This file is part of pyS5p
2 | #
3 | # https://github.com/rmvanhees/pys5p.git
4 | #
5 | # Copyright (c) 2017-2022 SRON - Netherlands Institute for Space Research
6 | # All Rights Reserved
7 | #
8 | # License: BSD-3-Clause
9 | """Perform a unit test on class LV2io."""
10 |
11 | import argparse
12 | from pathlib import Path
13 |
14 | import numpy as np
15 | from pys5p.lv2_io import LV2io
16 |
17 |
18 | def read_lv2(l2_product):
19 | """Read Tropomi level 2 product."""
20 | with LV2io(l2_product) as lv2:
21 | # Class properties
22 | print("science_product: ", lv2.science_product)
23 | print("orbit: ", lv2.orbit)
24 | print("algorithm_version: ", lv2.algorithm_version)
25 | print("processor_version: ", lv2.processor_version)
26 | print("product_version: ", lv2.product_version)
27 | if not lv2.science_product:
28 | print("coverage_time: ", lv2.coverage_time)
29 | print("creation_time: ", lv2.creation_time)
30 | # Attributes
31 | print("get_attr: ", lv2.get_attr("title"))
32 | ds_name = "h2o_column" if lv2.science_product else "methane_mixing_ratio"
33 | print("get_attr: ", lv2.get_attr("long_name", ds_name))
34 |
35 | # Time information
36 | print("ref_time: ", lv2.ref_time)
37 | print("get_time: ", lv2.get_time())
38 | # Geolocation
39 | geo_var = "latitude_center" if lv2.science_product else "latitude"
40 | print("get_geo_data: ", lv2.get_geo_data()[geo_var].shape)
41 | # Footprints
42 | geo_var = "latitude"
43 | print("get_geo_bounds: ", lv2.get_geo_bounds()[geo_var].shape)
44 | print(
45 | "get_geo_bounds: ",
46 | lv2.get_geo_bounds(data_sel=np.s_[250:300, 100:110])[geo_var].shape,
47 | )
48 | # Datasets (numpy)
49 | ds_name = "h2o_column" if lv2.science_product else "methane_mixing_ratio"
50 | print("get_dataset: ", lv2.get_dataset(ds_name).shape)
51 | print(
52 | "get_dataset: ",
53 | lv2.get_dataset(ds_name, data_sel=np.s_[250:300, 100:110]).shape,
54 | )
55 | # Datasets (xarray)
56 | ds_name = "h2o_column" if lv2.science_product else "methane_mixing_ratio"
57 | print("get_data_as_xds: ", lv2.get_data_as_xds(ds_name))
58 |
59 |
60 | def main():
61 | """Perform unit testing on SWIR Level-2 products."""
62 | # parse command-line parameters
63 | parser = argparse.ArgumentParser(
64 | description=f"{Path(__file__).name}: run units-test on class LV2io"
65 | )
66 | parser.add_argument(
67 | "lv2_product",
68 | nargs=1,
69 | type=str,
70 | default=None,
71 | help="use this Tropomi level2 product",
72 | )
73 | args = parser.parse_args()
74 | print(args)
75 |
76 | read_lv2(args.lv2_product[0])
77 |
78 |
79 | # - main code --------------------------------------
80 | if __name__ == "__main__":
81 | main()
82 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | # pyproject.toml
2 | [build-system]
3 | requires = [
4 | "hatchling",
5 | "versioningit",
6 | ]
7 | build-backend = "hatchling.build"
8 |
9 | [project]
10 | name = "pys5p"
11 | description = "Software package to access Sentinel-5p Tropomi data products"
12 | readme = "README.md"
13 | license = "BSD-3-Clause"
14 | authors = [
15 | {name = "Richard van Hees", email = "r.m.van.hees@sron.nl"},
16 | {name = "Paul Tol", email = "P.J.J.Tol@sron.nl"}
17 | ]
18 | requires-python = ">=3.10"
19 | classifiers = [
20 | "Development Status :: 5 - Production/Stable",
21 | "Intended Audience :: Developers",
22 | "Intended Audience :: Science/Research",
23 | "Operating System :: OS Independent",
24 | "Programming Language :: Python :: 3 :: Only",
25 | "Programming Language :: Python :: 3.10",
26 | "Programming Language :: Python :: 3.11",
27 | "Programming Language :: Python :: 3.12",
28 | "Programming Language :: Python :: 3.13",
29 | "Topic :: Scientific/Engineering :: Atmospheric Science",
30 | ]
31 | keywords = [
32 | "Sentinel-5p",
33 | "Tropomi",
34 | ]
35 | dynamic = [
36 | "version"
37 | ]
38 | dependencies = [
39 | "h5py>=3.11",
40 | "moniplot>=1.0",
41 | "numpy>=1.26",
42 | "netCDF4>=1.7",
43 | "xarray>=2023.2",
44 | ]
45 |
46 | [project.scripts]
47 |
48 | [project.urls]
49 | homepage = "https://github.com/rmvanhees/pys5p"
50 | documentation = "https://pys5p.readthedocs.io/en/latest/"
51 | # Source = "https://github.com/rmvanhees/pys5p"
52 | # Changelog = "https://github.com/rmvanhees/pys5p/HISTORY.rst"
53 | "Issue tracker" = "https://github.com/rmvanhees/pys5p/issues"
54 |
55 | [tool.hatch.version]
56 | source = "versioningit"
57 |
58 | [tool.versioningit]
59 |
60 | [tool.versioningit.next-version]
61 | method = "smallest"
62 |
63 | [tool.versioningit.format]
64 | distance = "{next_version}.dev{distance}+{vcs}{rev}"
65 | # Example formatted version: 1.2.4.dev42+ge174a1f
66 |
67 | dirty = "{base_version}+d{build_date:%Y%m%d}"
68 | # Example formatted version: 1.2.3+d20230922
69 |
70 | distance-dirty = "{next_version}.dev{distance}+{vcs}{rev}.d{build_date:%Y%m%d}"
71 | # Example formatted version: 1.2.4.dev42+ge174a1f.d20230922
72 |
73 | [tool.ruff]
74 | line-length = 88
75 | target-version = "py312"
76 | # exclude = ["pilots"]
77 |
78 | [tool.ruff.lint]
79 | select = [
80 | "D", # pydocstyle
81 | "E", # pycodestyle
82 | "F", # pyflakes
83 | "I", # isort
84 | "N", # pep8-naming
85 | "W", # pycodestyle
86 | "ANN", # flake8-annotations
87 | "B", # flake8-bugbear
88 | "ISC", # flake8-implicit-str-concat
89 | "PGH", # flake8-pie
90 | "PYI", # flake8-pyi
91 | "Q", # flake8-quotes
92 | "SIM", # flake8-simplify
93 | "TID", # flake8-tidy-imports
94 | "TCH", # flake8-type-checking
95 | "NPY", # NumPy-specific
96 | "PERF", # Perflint
97 | "RUF", # Ruff Specific
98 | "UP", # pyupgrade
99 | ]
100 | ignore = ["D203", "D213", "ISC001"]
101 |
102 | [lint.pydocstyle]
103 | convention = "pep257"
104 |
--------------------------------------------------------------------------------
/requiments.txt:
--------------------------------------------------------------------------------
1 | certifi==2023.7.22
2 | cftime==1.6.2
3 | contourpy==1.1.0
4 | cycler==0.11.0
5 | fonttools==4.42.1
6 | h5py==3.9.0
7 | kiwisolver==1.4.4
8 | matplotlib==3.7.2
9 | moniplot==0.5.11
10 | netCDF4==1.6.4
11 | numpy==1.25.2
12 | packaging==23.1
13 | pandas==2.0.3
14 | Pillow==10.0.0
15 | pyparsing==3.0.9
16 | python-dateutil==2.8.2
17 | pytz==2023.3
18 | setuptools-scm==7.1.0
19 | six==1.16.0
20 | typing_extensions==4.7.1
21 | tzdata==2023.3
22 | xarray==2023.8.0
23 |
--------------------------------------------------------------------------------
/src/pys5p/__init__.py:
--------------------------------------------------------------------------------
1 | #
2 | # This file is part of pys5p
3 | #
4 | # https://github.com/rmvanhees/pys5p.git
5 | #
6 | # Copyright (c) 2017-2025 SRON
7 | # All Rights Reserved
8 | #
9 | # License: BSD-3-Clause
10 |
11 | """SRON Python package `pys5p`.
12 |
13 | It contains software to read Sentinel-5p Tropomi ICM, L1B and L2 products.
14 | """
15 |
16 | import contextlib
17 | from importlib.metadata import PackageNotFoundError, version
18 |
19 | with contextlib.suppress(PackageNotFoundError):
20 | __version__ = version(__name__)
21 |
--------------------------------------------------------------------------------
/src/pys5p/ckd_io.py:
--------------------------------------------------------------------------------
1 | #
2 | # This file is part of pyS5p
3 | #
4 | # https://github.com/rmvanhees/pys5p.git
5 | #
6 | # Copyright (c) 2017-2025 SRON
7 | # All Rights Reserved
8 | #
9 | # License: BSD-3-Clause
10 | """`CKDio`, class to read S5p Tropomi CKD data."""
11 |
12 | from __future__ import annotations
13 |
14 | __all__ = ["CKDio"]
15 |
16 | from pathlib import Path, PosixPath
17 | from typing import Self
18 |
19 | import h5py
20 | import numpy as np
21 | import xarray as xr
22 | from moniplot.image_to_xarray import h5_to_xr
23 |
24 |
25 | # - local functions ------------------------------
26 | def reject_row257(xarr: xr.DataArray | xr.Dataset) -> xr.DataArray | xr.Dataset:
27 | """Remove row 257 from DataArray or Dataset."""
28 | return xarr.isel(row=np.s_[0:256])
29 |
30 |
31 | # - class definition -------------------------------
32 | class CKDio:
33 | """Read Tropomi CKD from the Static CKD product or from dynamic CKD products.
34 |
35 | Parameters
36 | ----------
37 | ckd_dir : Path, default=Path('/nfs/Tropomi/share/ckd')
38 | Directory where the CKD files are stored
39 | ckd_version : int, default=1
40 | Version of the CKD
41 | ckd_file : str, optional
42 | Name of the CKD file, default=None then the CKD file is searched
43 | in the directory ckd_dir with ckd_version in the glob-string
44 |
45 | Notes
46 | -----
47 | Not all CKD are defined or derived for all bands.
48 | You can request a CKD for one band or for a channel (bands: '12', '34',
49 | '56', '78'). Do not mix bands from different channels
50 |
51 | The option to have dynamic CKD is not used for the Tropomi mission, only
52 | for S/W version 1 a dynamic CKD product is defined. This product contained
53 | the OCAL CKD and was not updated automatically. For version 2, all CKD are
54 | stored in one product, where some CKD have a time-axis to correct any
55 | in-flight degradation.
56 |
57 | Therefore, the logic to find a CKD is implemented as follows:
58 |
59 | 1) ckd_dir, defines the base directory to search for the CKD products
60 | (see below).
61 | 2) ckd_file, defines the full path to (static) CKD product;
62 | (version 1) any product with dynamic CKD has to be in the same
63 | directory.
64 |
65 | Version 1:
66 |
67 | * Static CKD are stored in one file: glob('*_AUX_L1_CKD_*')
68 | * Dynamic CKD are stored in two files:
69 |
70 | - UVN, use glob('*_ICM_CKDUVN_*')
71 | - SWIR, use glob('*_ICM_CKDSIR_*')
72 |
73 | Version 2+:
74 |
75 | * All CKD in one file: glob('*_AUX_L1_CKD_*')
76 | * Dynamic CKD are empty
77 |
78 | """
79 |
80 | def __init__(
81 | self: CKDio,
82 | ckd_dir: Path | None = None,
83 | ckd_version: int = 1,
84 | ckd_file: Path | None = None,
85 | ) -> None:
86 | """Create CKDio object."""
87 | if ckd_dir is None:
88 | ckd_dir = Path("/nfs/Tropomi/share/ckd")
89 | self.ckd_version = max(1, ckd_version)
90 | self.ckd_dyn_file = None
91 |
92 | # define path to CKD product
93 | if ckd_file is None:
94 | if not ckd_dir.is_dir():
95 | raise FileNotFoundError(f"Not found CKD directory: {ckd_dir.name}")
96 | self.ckd_dir = ckd_dir
97 | glob_str = f"*_AUX_L1_CKD_*_*_00000_{self.ckd_version:02d}_*_*.h5"
98 | if (self.ckd_dir / "static").is_dir():
99 | res = sorted((self.ckd_dir / "static").glob(glob_str))
100 | else:
101 | res = sorted(self.ckd_dir.glob(glob_str))
102 | if not res:
103 | raise FileNotFoundError("Static CKD product not found")
104 | self.ckd_file = res[-1]
105 | else:
106 | if not ckd_file.is_file():
107 | raise FileNotFoundError(f"Not found CKD file: {ckd_file.name}")
108 | self.ckd_dir = ckd_file.parent
109 | self.ckd_file = ckd_file
110 |
111 | # obtain path to dynamic CKD product (version 1, only)
112 | if self.ckd_version == 1:
113 | if (self.ckd_dir / "dynamic").is_dir():
114 | res = sorted((self.ckd_dir / "dynamic").glob("*_ICM_CKDSIR_*"))
115 | else:
116 | res = sorted(self.ckd_dir.glob("*_ICM_CKDSIR_*"))
117 | if res:
118 | self.ckd_dyn_file = res[-1]
119 |
120 | # open access to CKD product
121 | self.fid = h5py.File(self.ckd_file, "r")
122 |
123 | def __enter__(self: CKDio) -> Self:
124 | """Initiate the context manager."""
125 | return self
126 |
127 | def __exit__(self: CKDio, *args: object) -> bool:
128 | """Exit the context manager."""
129 | self.close()
130 | return False # any exception is raised by the with statement.
131 |
132 | def close(self: CKDio) -> None:
133 | """Make sure that we close all resources."""
134 | if self.fid is not None:
135 | self.fid.close()
136 |
137 | def creation_time(self: CKDio) -> str:
138 | """Return datetime when the L1b product was created."""
139 | if self.ckd_version == 2:
140 | attr = self.fid["METADATA"].attrs["production_datetime"]
141 | else:
142 | group = PosixPath(
143 | "METADATA", "earth_explorer_header", "fixed_header", "source"
144 | )
145 | attr = self.fid[str(group)].attrs["Creator_Date"][0]
146 |
147 | if isinstance(attr, bytes):
148 | attr = attr.decode("ascii")
149 | return attr
150 |
151 | def creator_version(self: CKDio) -> str:
152 | """Return version of Tropomi L01B processor."""
153 | group = PosixPath("METADATA", "earth_explorer_header", "fixed_header")
154 | attr = self.fid[str(group)].attrs["File_Version"]
155 | if self.ckd_version == 1:
156 | attr = attr[0]
157 | if isinstance(attr, bytes):
158 | attr = attr.decode("ascii")
159 | return attr
160 |
161 | @staticmethod
162 | def __get_spectral_channel(bands: str) -> str:
163 | """Check bands is valid: single band or belong to one channel.
164 |
165 | Parameters
166 | ----------
167 | bands : str
168 | Tropomi bands [1..8] or channels ['12', '34', '56', '78'],
169 |
170 | """
171 | band2channel = [
172 | "UNKNOWN",
173 | "UV",
174 | "UV",
175 | "VIS",
176 | "VIS",
177 | "NIR",
178 | "NIR",
179 | "SWIR",
180 | "SWIR",
181 | ]
182 |
183 | if 0 < len(bands) > 2:
184 | raise ValueError("read per band or channel, only")
185 |
186 | if (
187 | len(bands) == 2
188 | and band2channel[int(bands[0])] != band2channel[int(bands[1])]
189 | ):
190 | raise ValueError("bands should be of the same channel")
191 |
192 | return band2channel[int(bands[0])]
193 |
194 | def get_param(self, ds_name: str, band: str = "7") -> np.ndarray | float:
195 | """Return value(s) of a CKD parameter from the Static CKD product.
196 |
197 | Parameters
198 | ----------
199 | ds_name : str
200 | Name of the HDF5 dataset, default='pixel_full_well'
201 | band : str, default='7'
202 | Band identifier '1', '2', ..., '8'
203 |
204 | Returns
205 | -------
206 | numpy.ndarray or scalar
207 | CKD parameter value
208 |
209 | Notes
210 | -----
211 | Datasets of size=1 are return as scalar
212 |
213 | Handy function for scalar HDF5 datasets, such as:
214 |
215 | - dc_reference_temp
216 | - dpqf_threshold
217 | - pixel_full_well
218 | - pixel_fw_flag_thresh
219 |
220 | """
221 | if not 1 <= int(band) <= 8:
222 | raise ValueError("band must be between and 1 and 8")
223 |
224 | if ds_name not in self.fid[f"/BAND{band}"]:
225 | raise ValueError("dataset not available")
226 |
227 | return self.fid[f"/BAND{band}/{ds_name}"][()]
228 |
229 | # ---------- band or channel CKD's ----------
230 | def dn2v_factors(self: CKDio) -> np.ndarray:
231 | """Return digital number to Volt CKD, SWIR only.
232 |
233 | Notes
234 | -----
235 | The DN2V factor has no error attached to it.
236 |
237 | """
238 | return np.concatenate(
239 | (
240 | self.fid["/BAND7/dn2v_factor_swir"][2:],
241 | self.fid["/BAND8/dn2v_factor_swir"][2:],
242 | )
243 | )
244 |
245 | def v2c_factors(self: CKDio) -> np.ndarray:
246 | """Return Voltage to Charge CKD, SWIR only.
247 |
248 | Notes
249 | -----
250 | The V2C factor has no error attached to it.
251 |
252 | """
253 | # pylint: disable=no-member
254 | return np.concatenate(
255 | (
256 | self.fid["/BAND7/v2c_factor_swir"].fields("value")[2:],
257 | self.fid["/BAND8/v2c_factor_swir"].fields("value")[2:],
258 | )
259 | )
260 |
261 | # ---------- spectral-channel CKD's ----------
262 | def __rd_dataset(self: CKDio, dset_name: str, bands: str) -> xr.Dataset | None:
263 | """General function to read non-compound dataset into xarray::Dataset.
264 |
265 | Parameters
266 | ----------
267 | dset_name: str
268 | name (including path) of the dataset as '/BAND{}/'
269 | bands : str
270 | Tropomi bands [1..8] or channels ['12', '34', '56', '78'],
271 |
272 | Returns
273 | -------
274 | xarray.Dataset
275 | parameters of CKD with name 'dset_name'
276 |
277 | """
278 | ckd_val = None
279 | for band in bands:
280 | # try Static-CKD product
281 | if dset_name.format(band) in self.fid:
282 | if ckd_val is None:
283 | ckd_val = h5_to_xr(self.fid[dset_name.format(band)])
284 | else:
285 | ckd_val = xr.concat(
286 | (ckd_val, h5_to_xr(self.fid[dset_name.format(band)])),
287 | dim="column",
288 | )
289 | # try Dynamic-CKD product
290 | else:
291 | dyn_fid = h5py.File(self.ckd_dyn_file, "r")
292 | if dset_name.format(band) in dyn_fid:
293 | if ckd_val is None:
294 | ckd_val = h5_to_xr(dyn_fid[dset_name.format(band)])
295 | else:
296 | ckd_val = xr.concat(
297 | (ckd_val, h5_to_xr(dyn_fid[dset_name.format(band)])),
298 | dim="column",
299 | )
300 | dyn_fid.close()
301 |
302 | if ckd_val is None:
303 | return None
304 |
305 | # Use NaN as FillValue
306 | ckd_val = ckd_val.where(ckd_val != float.fromhex("0x1.ep+122"), other=np.nan)
307 |
308 | # combine DataArrays to Dataset
309 | return xr.Dataset({"value": ckd_val}, attrs=ckd_val.attrs)
310 |
311 | def __rd_datapoints(self: CKDio, dset_name: str, bands: str) -> xr.Dataset | None:
312 | """General function to read datapoint dataset into xarray::Dataset.
313 |
314 | Parameters
315 | ----------
316 | dset_name: str
317 | name (including path) of the dataset as '/BAND{}/'
318 | bands : str
319 | Tropomi bands [1..8] or channels ['12', '34', '56', '78'],
320 | default: '78'
321 |
322 | Returns
323 | -------
324 | xarray.Dataset
325 | parameters (value and uncertainty) of CKD with name 'dset_name'
326 |
327 | """
328 | ckd_val = None
329 | ckd_err = None
330 | for band in bands:
331 | # try Static-CKD product
332 | if dset_name.format(band) in self.fid:
333 | if ckd_val is None:
334 | ckd_val = h5_to_xr(self.fid[dset_name.format(band)], field="value")
335 | ckd_err = h5_to_xr(self.fid[dset_name.format(band)], field="error")
336 | else:
337 | ckd_val = xr.concat(
338 | (
339 | ckd_val,
340 | h5_to_xr(self.fid[dset_name.format(band)], field="value"),
341 | ),
342 | dim="column",
343 | )
344 | ckd_err = xr.concat(
345 | (
346 | ckd_err,
347 | h5_to_xr(self.fid[dset_name.format(band)], field="error"),
348 | ),
349 | dim="column",
350 | )
351 | # try Dynamic-CKD product
352 | else:
353 | dyn_fid = h5py.File(self.ckd_dyn_file, "r")
354 | if dset_name.format(band) in dyn_fid:
355 | if ckd_val is None:
356 | ckd_val = h5_to_xr(
357 | dyn_fid[dset_name.format(band)], field="value"
358 | )
359 | ckd_err = h5_to_xr(
360 | dyn_fid[dset_name.format(band)], field="error"
361 | )
362 | else:
363 | ckd_val = xr.concat(
364 | (
365 | ckd_val,
366 | h5_to_xr(
367 | dyn_fid[dset_name.format(band)], field="value"
368 | ),
369 | ),
370 | dim="column",
371 | )
372 | ckd_err = xr.concat(
373 | (
374 | ckd_err,
375 | h5_to_xr(
376 | dyn_fid[dset_name.format(band)], field="error"
377 | ),
378 | ),
379 | dim="column",
380 | )
381 | dyn_fid.close()
382 |
383 | if ckd_val is None:
384 | return None
385 |
386 | # Use NaN as FillValue
387 | ckd_val = ckd_val.where(ckd_val != float.fromhex("0x1.ep+122"), other=np.nan)
388 | ckd_err = ckd_err.where(ckd_err != float.fromhex("0x1.ep+122"), other=np.nan)
389 |
390 | # combine DataArrays to Dataset
391 | return xr.Dataset({"value": ckd_val, "error": ckd_err}, attrs=ckd_val.attrs)
392 |
393 | # ---------- static CKD's ----------
394 | def absirr(self: CKDio, qvd: int = 1, bands: str = "78") -> xr.Dataset:
395 | """Return absolute irradiance responsivity.
396 |
397 | Parameters
398 | ----------
399 | qvd : int, default: 1
400 | Tropomi QVD identifier. Valid values are 1 or 2
401 | bands : str, default: '78'
402 | Tropomi bands [1..8] or channels ['12', '34', '56', '78']
403 |
404 | """
405 | try:
406 | channel = self.__get_spectral_channel(bands)
407 | except Exception as exc:
408 | raise RuntimeError(exc) from exc
409 |
410 | dset_name = "/BAND{}" + f"/abs_irr_conv_factor_qvd{qvd}"
411 | ckd = self.__rd_datapoints(dset_name, bands)
412 | if "7" in bands or "8" in bands:
413 | ckd = reject_row257(ckd)
414 | ckd.attrs["long_name"] = f"{channel} absolute irradiance CKD (QVD={qvd})"
415 |
416 | return ckd.assign_coords(column=np.arange(ckd.column.size, dtype="u4"))
417 |
418 | def absrad(self: CKDio, bands: str = "78") -> xr.Dataset:
419 | """Return absolute radiance responsivity.
420 |
421 | Parameters
422 | ----------
423 | bands : str, default: '78'
424 | Tropomi bands [1..8] or channels ['12', '34', '56', '78']
425 |
426 | """
427 | try:
428 | channel = self.__get_spectral_channel(bands)
429 | except Exception as exc:
430 | raise RuntimeError(exc) from exc
431 |
432 | dset_name = "/BAND{}/abs_rad_conv_factor"
433 | ckd = self.__rd_datapoints(dset_name, bands)
434 | if "7" in bands or "8" in bands:
435 | ckd = reject_row257(ckd)
436 | ckd.attrs["long_name"] = f"{channel} absolute radiance CKD"
437 |
438 | return ckd.assign_coords(column=np.arange(ckd.column.size, dtype="u4"))
439 |
440 | def memory(self: CKDio) -> xr.Dataset:
441 | """Return memory CKD, SWIR only."""
442 | column = None
443 | ckd_parms = [
444 | "mem_lin_neg_swir",
445 | "mem_lin_pos_swir",
446 | "mem_qua_neg_swir",
447 | "mem_qua_pos_swir",
448 | ]
449 |
450 | ckd = xr.Dataset()
451 | ckd.attrs["long_name"] = "SWIR memory CKD"
452 | for key in ckd_parms:
453 | dset_name = f"/BAND7/{key}"
454 | ckd_val = h5_to_xr(self.fid[dset_name], field="value")
455 | ckd_err = h5_to_xr(self.fid[dset_name], field="error")
456 | dset_name = f"/BAND8/{key}"
457 | ckd_val = xr.concat(
458 | (ckd_val, h5_to_xr(self.fid[dset_name], field="value")), dim="column"
459 | )
460 | if column is None:
461 | column = np.arange(ckd_val.column.size, dtype="u4")
462 | ckd_val = ckd_val.assign_coords(column=column)
463 | ckd_err = xr.concat(
464 | (ckd_err, h5_to_xr(self.fid[dset_name], field="error")), dim="column"
465 | )
466 | ckd_err = ckd_err.assign_coords(column=column)
467 | ckd[key.replace("swir", "value")] = reject_row257(ckd_val)
468 | ckd[key.replace("swir", "error")] = reject_row257(ckd_err)
469 |
470 | return ckd
471 |
472 | def noise(self: CKDio, bands: str = "78") -> xr.Dataset:
473 | """Return readout-noise CKD, SWIR only.
474 |
475 | Parameters
476 | ----------
477 | bands : str, default: '78'
478 | Tropomi bands [1..8] or channels ['12', '34', '56', '78']
479 |
480 | """
481 | dset_name = "/BAND{}/readout_noise_swir"
482 | ckd = reject_row257(self.__rd_dataset(dset_name, bands))
483 | ckd.attrs["long_name"] = "SWIR readout-noise CKD"
484 |
485 | return ckd.assign_coords(column=np.arange(ckd.column.size, dtype="u4"))
486 |
487 | def prnu(self: CKDio, bands: str = "78") -> xr.Dataset:
488 | """Return Pixel Response Non-Uniformity (PRNU).
489 |
490 | Parameters
491 | ----------
492 | bands : str, default: '78'
493 | Tropomi bands [1..8] or channels ['12', '34', '56', '78']
494 |
495 | """
496 | try:
497 | channel = self.__get_spectral_channel(bands)
498 | except Exception as exc:
499 | raise RuntimeError(exc) from exc
500 |
501 | ckd = self.__rd_datapoints("/BAND{}/PRNU", bands)
502 | if "7" in bands or "8" in bands:
503 | ckd = reject_row257(ckd)
504 | ckd.attrs["long_name"] = f"{channel} PRNU CKD"
505 |
506 | return ckd.assign_coords(column=np.arange(ckd.column.size, dtype="u4"))
507 |
508 | def relirr(self: CKDio, qvd: int = 1, bands: str = "78") -> tuple[dict] | None:
509 | """Return relative irradiance correction.
510 |
511 | Parameters
512 | ----------
513 | bands : str, default: '78'
514 | Tropomi bands [1..8] or channels ['12', '34', '56', '78']
515 | qvd : int
516 | Tropomi QVD identifier. Valid values are 1 or 2, default: 1
517 |
518 | Returns
519 | -------
520 | dict
521 | CKD for relative irradiance correction as dictionaries with keys:
522 |
523 | - band: Tropomi spectral band ID
524 | - mapping_cols: coarse irregular mapping of the columns
525 | - mapping_rows: coarse irregular mapping of the rows
526 | - cheb_coefs: Chebyshev parameters for elevation and azimuth \
527 | for pixels on a coarse irregular grid
528 |
529 | """
530 | try:
531 | _ = self.__get_spectral_channel(bands)
532 | except Exception as exc:
533 | raise RuntimeError(exc) from exc
534 |
535 | res = ()
536 | for band in bands:
537 | ckd = {"band": int(band)}
538 |
539 | dsname = f"/BAND{band}/rel_irr_coarse_mapping_vert"
540 | ckd["mapping_rows"] = self.fid[dsname][:].astype(int)
541 |
542 | dsname = f"/BAND{band}/rel_irr_coarse_mapping_hor"
543 | # pylint: disable=no-member
544 | mapping_hor = self.fid[dsname][:].astype(int)
545 | mapping_hor[mapping_hor > 1000] -= 2**16
546 | ckd["mapping_cols"] = mapping_hor
547 |
548 | dsname = f"/BAND{band}/rel_irr_coarse_func_cheb_qvd{qvd}"
549 | ckd["cheb_coefs"] = self.fid[dsname]["coefs"][:]
550 | res += (ckd,)
551 |
552 | return res if res else None
553 |
554 | def saa(self: CKDio) -> dict:
555 | """Return definition of the SAA region."""
556 | return {"lat": self.fid["saa_latitude"][:], "lon": self.fid["saa_longitude"][:]}
557 |
558 | def wavelength(self: CKDio, bands: str = "78") -> xr.Dataset:
559 | """Return wavelength CKD.
560 |
561 | Parameters
562 | ----------
563 | bands : str, default: '78'
564 | Tropomi bands [1..8] or channels ['12', '34', '56', '78']
565 |
566 | Notes
567 | -----
568 | The wavelength CKD has no error attached to it.
569 |
570 | """
571 | try:
572 | channel = self.__get_spectral_channel(bands)
573 | except Exception as exc:
574 | raise RuntimeError(exc) from exc
575 |
576 | dset_name = "/BAND{}/wavelength_map"
577 | ckd = self.__rd_datapoints(dset_name, bands)
578 | if "7" in bands or "8" in bands:
579 | ckd = reject_row257(ckd)
580 | ckd.attrs["long_name"] = f"{channel} wavelength CKD"
581 |
582 | return ckd.assign_coords(column=np.arange(ckd.column.size, dtype="u4"))
583 |
584 | # ---------- static or dynamic CKD's ----------
585 | def darkflux(self: CKDio, bands: str = "78") -> xr.Dataset:
586 | """Return dark-flux CKD, SWIR only.
587 |
588 | Parameters
589 | ----------
590 | bands : str, default: '78'
591 | Tropomi SWIR bands '7', '8' or both '78'
592 |
593 | """
594 | dset_name = "/BAND{}/long_term_swir"
595 | ckd = reject_row257(self.__rd_datapoints(dset_name, bands))
596 | ckd.attrs["long_name"] = "SWIR dark-flux CKD"
597 |
598 | return ckd.assign_coords(column=np.arange(ckd.column.size, dtype="u4"))
599 |
600 | def offset(self: CKDio, bands: str = "78") -> xr.Dataset:
601 | """Return offset CKD, SWIR only.
602 |
603 | Parameters
604 | ----------
605 | bands : str, default: '78'
606 | Tropomi SWIR bands '7', '8' or both '78'
607 |
608 | """
609 | dset_name = "/BAND{}/analog_offset_swir"
610 | ckd = reject_row257(self.__rd_datapoints(dset_name, bands))
611 | ckd.attrs["long_name"] = "SWIR offset CKD"
612 |
613 | return ckd.assign_coords(column=np.arange(ckd.column.size, dtype="u4"))
614 |
615 | def pixel_quality(self: CKDio, bands: str = "78") -> xr.Dataset:
616 | """Return detector pixel-quality mask (float [0, 1]), SWIR only.
617 |
618 | Parameters
619 | ----------
620 | bands : str, default: '78'
621 | Tropomi SWIR bands '7', '8' or both '78'
622 |
623 | """
624 | dset_name = "/BAND{}/dpqf_map"
625 | ckd = reject_row257(self.__rd_dataset(dset_name, bands))
626 | ckd.attrs["long_name"] = "SWIR pixel-quality CKD"
627 |
628 | return ckd.assign_coords(column=np.arange(ckd.column.size, dtype="u4"))
629 |
630 | def dpqf(
631 | self: CKDio, threshold: float | None = None, bands: str = "78"
632 | ) -> xr.Dataset:
633 | """Return detector pixel-quality flags (boolean), SWIR only.
634 |
635 | Parameters
636 | ----------
637 | threshold: float, optional
638 | Value between [0..1], default is to read the threshold from CKD
639 | bands : str, default='78'
640 | Tropomi SWIR bands '7', '8', or both '78'
641 |
642 | Returns
643 | -------
644 | numpy ndarray
645 |
646 | """
647 | dpqf = None
648 | if threshold is None:
649 | threshold = self.fid["/BAND7/dpqf_threshold"][:]
650 |
651 | # try Static-CKD product
652 | if "/BAND7/dpqf_map" in self.fid:
653 | if bands == "7":
654 | dpqf = self.fid["/BAND7/dpqf_map"][:-1, :] < threshold
655 | elif bands == "8":
656 | dpqf = self.fid["/BAND8/dpqf_map"][:-1, :] < threshold
657 | elif bands == "78":
658 | dpqf_b7 = self.fid["/BAND7/dpqf_map"][:-1, :]
659 | dpqf_b8 = self.fid["/BAND8/dpqf_map"][:-1, :]
660 | dpqf = np.hstack((dpqf_b7, dpqf_b8)) < threshold
661 | else:
662 | # try Dynamic-CKD product
663 | with h5py.File(self.ckd_dyn_file, "r") as fid:
664 | if bands == "7":
665 | dpqf = fid["/BAND7/dpqf_map"][:-1, :] < threshold
666 | elif bands == "8":
667 | dpqf = fid["/BAND8/dpqf_map"][:-1, :] < threshold
668 | elif bands == "78":
669 | dpqf_b7 = fid["/BAND7/dpqf_map"][:-1, :]
670 | dpqf_b8 = fid["/BAND8/dpqf_map"][:-1, :]
671 | dpqf = np.hstack((dpqf_b7, dpqf_b8)) < threshold
672 |
673 | return dpqf
674 |
675 | def saturation(self: CKDio) -> xr.Dataset:
676 | """Return pixel-saturation values (pre-offset), SWIR only."""
677 | dset_name = "/BAND{}/saturation_preoffset"
678 | ckd_file = self.ckd_dir / "OCAL" / "ckd.saturation_preoffset.detector4.nc"
679 | with h5py.File(ckd_file, "r") as fid:
680 | ckd_val = xr.concat(
681 | (
682 | h5_to_xr(fid[dset_name.format(7)]),
683 | h5_to_xr(fid[dset_name.format(8)]),
684 | ),
685 | dim="column",
686 | )
687 |
688 | ckd = xr.Dataset({"value": ckd_val}, attrs=ckd_val.attrs)
689 | ckd = reject_row257(ckd)
690 | ckd.attrs["long_name"] = "SWIR pixel-saturation CKD (pre-offset)"
691 |
692 | return ckd.assign_coords(column=np.arange(ckd.column.size, dtype="u4"))
693 |
--------------------------------------------------------------------------------
/src/pys5p/error_propagation.py:
--------------------------------------------------------------------------------
1 | #
2 | # This file is part of pyS5p
3 | #
4 | # https://github.com/rmvanhees/pys5p.git
5 | #
6 | # Copyright (c) 2017-2025 SRON
7 | # All Rights Reserved
8 | #
9 | # License: BSD-3-Clause
10 | """Routines to divide or add (partial) uncertainties."""
11 |
12 | from __future__ import annotations
13 |
14 | __all__ = ["unc_div", "unc_sum"]
15 |
16 | import numpy as np
17 |
18 |
19 | def unc_div(
20 | value_a: np.ndarray, sigma_a: np.ndarray, value_b: np.ndarray, sigma_b: np.ndarray
21 | ) -> np.ndarray:
22 | r"""Absolute error for parameter `a` divided by `b`.
23 |
24 | .. math:: (a / b) * \sqrt{(\sigma_a / a)^2 + (\sigma_b / b)^2}
25 | """
26 | if not value_a.shape == value_b.shape == sigma_a.shape == sigma_b.shape:
27 | raise TypeError("dimensions of input arrays are not the same")
28 |
29 | mask = (
30 | np.isfinite(value_a)
31 | & np.isfinite(sigma_a)
32 | & np.isfinite(value_b)
33 | & np.isfinite(sigma_b)
34 | )
35 |
36 | if np.sum(mask) == sigma_a.size:
37 | return (value_a / value_b) * np.sqrt(
38 | (sigma_a / value_a) ** 2 + (sigma_b / value_b) ** 2
39 | )
40 |
41 | res = np.full(sigma_a.shape, np.nan)
42 | res[mask] = (value_a[mask] / value_b[mask]) * np.sqrt(
43 | (sigma_a[mask] / value_a[mask]) ** 2 + (sigma_b[mask] / value_b[mask]) ** 2
44 | )
45 | return res
46 |
47 |
48 | def unc_sum(sigma_a: np.ndarray, sigma_b: np.ndarray) -> np.ndarray:
49 | r"""Absolute error for the sum of the parameters `a` and `b`.
50 |
51 | .. math:: \sqrt{\sigma_a^2 + \sigma_b^2}
52 | """
53 | if sigma_a.shape != sigma_b.shape:
54 | raise TypeError("dimensions of sigma_a and sigma are not the same")
55 |
56 | mask = np.isfinite(sigma_a) & np.isfinite(sigma_b)
57 |
58 | if np.sum(mask) == sigma_a.size:
59 | return np.sqrt(sigma_a**2 + sigma_b**2)
60 |
61 | res = np.full(sigma_a.shape, np.nan)
62 | res[mask] = np.sqrt(sigma_a[mask] ** 2 + sigma_b[mask] ** 2)
63 |
64 | return res
65 |
--------------------------------------------------------------------------------
/src/pys5p/get_data_dir.py:
--------------------------------------------------------------------------------
1 | #
2 | # This file is part of pyS5p
3 | #
4 | # https://github.com/rmvanhees/pys5p.git
5 | #
6 | # Copyright (c) 2017-2025 SRON
7 | # All Rights Reserved
8 | #
9 | # License: BSD-3-Clause
10 | """Routine `get_data_dir` to discover test-datasets on your system."""
11 |
12 | from __future__ import annotations
13 |
14 | from os import environ
15 | from pathlib import Path
16 |
17 |
18 | def get_data_dir() -> str:
19 | """Obtain directory with test datasets.
20 |
21 | Limited to UNIX/Linux/macOS operating systems
22 |
23 | This module checks if the following directories are available:
24 | - /data/$USER/pys5p-data
25 | - /Users/$USER/pys5p-data
26 | - environment variable PYS5P_DATA_DIR
27 |
28 | It expects the data to be organized in the subdirectories:
29 | - CKD which should contain the SWIR dpqf CKD
30 | - OCM which should contain at least one directory of an on-ground
31 | calibration measurement with one or more OCAL LX products.
32 | - L1B which should contain at least one offline calibration, irradiance
33 | and radiance product.
34 | - ICM which contain at least one in-flight calibration product.
35 | """
36 | try:
37 | user = environ["USER"]
38 | except KeyError:
39 | print("*** Fatal: environment variable USER not set")
40 | return None
41 |
42 | guesses_data_dir = [f"/data/{user}/pys5p-data", f"/Users/{user}/pys5p-data"]
43 |
44 | try:
45 | _ = environ["PYS5P_DATA_DIR"]
46 | except KeyError:
47 | pass
48 | else:
49 | guesses_data_dir.append(environ["PYS5P_DATA_DIR"])
50 |
51 | for key in guesses_data_dir:
52 | if Path(key).is_dir():
53 | return key
54 |
55 | raise FileNotFoundError
56 |
--------------------------------------------------------------------------------
/src/pys5p/l1b_io.py:
--------------------------------------------------------------------------------
1 | #
2 | # This file is part of pyS5p
3 | #
4 | # https://github.com/rmvanhees/pys5p.git
5 | #
6 | # Copyright (c) 2017-2025 SRON
7 | # All Rights Reserved
8 | #
9 | # License: BSD-3-Clause
10 | """`L1Bio`, class to access Tropomi L1B products."""
11 |
12 | from __future__ import annotations
13 |
14 | __all__ = ["L1Bio", "L1BioENG", "L1BioIRR", "L1BioRAD"]
15 |
16 | from datetime import datetime, timedelta
17 | from pathlib import Path, PurePosixPath
18 | from typing import TYPE_CHECKING, Self
19 |
20 | import h5py
21 | import numpy as np
22 | from moniplot.biweight import Biweight
23 | from setuptools_scm import get_version
24 |
25 | from .swir_texp import swir_exp_time
26 |
27 | if TYPE_CHECKING:
28 | from collections.abc import Iterable
29 |
30 | # - global parameters ------------------------------
31 |
32 |
33 | # - local functions --------------------------------
34 | def pad_rows(arr1: np.ndarray, arr2: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
35 | """Pad the array with the least numer of rows with NaN's."""
36 | if arr2.ndim == 1:
37 | pass
38 | elif arr2.ndim == 2:
39 | if arr1.shape[0] < arr2.shape[0]:
40 | buff = arr1.copy()
41 | arr1 = np.full(arr2.shape, np.nan, dtype=arr2.dtype)
42 | arr1[0 : buff.shape[0], :] = buff
43 | elif arr1.shape[0] > arr2.shape[0]:
44 | buff = arr2.copy()
45 | arr2 = np.full(arr1.shape, np.nan, dtype=arr2.dtype)
46 | arr2[0 : buff.shape[0], :] = buff
47 | else:
48 | if arr1.shape[1] < arr2.shape[1]:
49 | buff = arr1.copy()
50 | arr1 = np.full(arr2.shape, np.nan, dtype=arr2.dtype)
51 | arr1[:, 0 : buff.shape[1], :] = buff
52 | elif arr1.shape[1] > arr2.shape[1]:
53 | buff = arr2.copy()
54 | arr2 = np.full(arr1.shape, np.nan, dtype=arr2.dtype)
55 | arr2[:, 0 : buff.shape[1], :] = buff
56 |
57 | return arr1, arr2
58 |
59 |
60 | # - class definition -------------------------------
61 | class L1Bio:
62 | """class with methods to access Tropomi L1B calibration products.
63 |
64 | The L1b calibration products are available for UVN (band 1-6)
65 | and SWIR (band 7-8).
66 |
67 | Parameters
68 | ----------
69 | l1b_product : Path | str
70 | name of the Tropomi L1B product
71 | readwrite : bool, default=False
72 | open file in read/write mode
73 | verbose : bool, default=False
74 | be verbose
75 |
76 | """
77 |
78 | band_groups = ("/BAND%_CALIBRATION", "/BAND%_IRRADIANCE", "/BAND%_RADIANCE")
79 | geo_dset = "satellite_latitude,satellite_longitude"
80 | msm_type = None
81 |
82 | def __init__(
83 | self: L1Bio,
84 | l1b_product: Path | str,
85 | readwrite: bool = False,
86 | verbose: bool = False,
87 | ) -> None:
88 | """Initialize access to a Tropomi offline L1b product."""
89 | # open L1b product as HDF5 file
90 | l1b_product = Path(l1b_product)
91 | if not l1b_product.is_file():
92 | raise FileNotFoundError(f"{l1b_product.name} does not exist")
93 |
94 | # initialize private class-attributes
95 | self.__rw = readwrite
96 | self.__verbose = verbose
97 | self.__msm_path = None
98 | self.__patched_msm = []
99 | self.filename = l1b_product
100 | self.bands = ""
101 |
102 | if readwrite:
103 | self.fid = h5py.File(l1b_product, "r+")
104 | else:
105 | self.fid = h5py.File(l1b_product, "r")
106 |
107 | def __iter__(self: L1Bio) -> None:
108 | """Allow iteration."""
109 | for attr in sorted(self.__dict__):
110 | if not attr.startswith("__"):
111 | yield attr
112 |
113 | def __enter__(self: L1Bio) -> Self:
114 | """Initiate the context manager."""
115 | return self
116 |
117 | def __exit__(self: L1Bio, *args: object) -> bool:
118 | """Exit the context manager."""
119 | self.close()
120 | return False # any exception is raised by the with statement.
121 |
122 | def close(self: L1Bio) -> None:
123 | """Close resources.
124 |
125 | Notes
126 | -----
127 | Before closing the product, we make sure that the output product
128 | describes what has been altered by the S/W. To keep any change
129 | traceable.
130 |
131 | In case the L1b product is altered, the attributes listed below are
132 | added to the group: ``/METADATA/SRON_METADATA``:
133 |
134 | - dateStamp ('now')
135 | - Git-version of S/W
136 | - list of patched datasets
137 | - auxiliary datasets used by patch-routines
138 |
139 | """
140 | if self.fid is None:
141 | return
142 |
143 | if self.__patched_msm:
144 | # pylint: disable=no-member
145 | sgrp = self.fid.require_group("/METADATA/SRON_METADATA")
146 | sgrp.attrs["dateStamp"] = datetime.utcnow().isoformat()
147 | sgrp.attrs["git_tag"] = get_version(root="..", relative_to=__file__)
148 | if "patched_datasets" not in sgrp:
149 | dtype = h5py.special_dtype(vlen=str)
150 | dset = sgrp.create_dataset(
151 | "patched_datasets",
152 | (len(self.__patched_msm),),
153 | maxshape=(None,),
154 | dtype=dtype,
155 | )
156 | dset[:] = np.asarray(self.__patched_msm)
157 | else:
158 | dset = sgrp["patched_datasets"]
159 | dset.resize(dset.shape[0] + len(self.__patched_msm), axis=0)
160 | dset[dset.shape[0] - 1 :] = np.asarray(self.__patched_msm)
161 |
162 | self.fid.close()
163 | self.fid = None
164 |
165 | # ---------- PUBLIC FUNCTIONS ----------
166 | def get_attr(self: L1Bio, attr_name: str) -> np.ndarray | None:
167 | """Obtain value of an HDF5 file attribute.
168 |
169 | Parameters
170 | ----------
171 | attr_name : string
172 | Name of the attribute
173 |
174 | """
175 | if attr_name not in self.fid.attrs:
176 | return None
177 |
178 | attr = self.fid.attrs[attr_name]
179 | if attr.shape is None:
180 | return None
181 |
182 | return attr
183 |
184 | def get_orbit(self: L1Bio) -> int | None:
185 | """Return absolute orbit number."""
186 | res = self.get_attr("orbit")
187 | if res is None:
188 | return None
189 |
190 | return int(res)
191 |
192 | def get_processor_version(self: L1Bio) -> str | None:
193 | """Return version of the L01b processor."""
194 | attr = self.get_attr("processor_version")
195 | if attr is None:
196 | return None
197 |
198 | # pylint: disable=no-member
199 | return attr.decode("ascii")
200 |
201 | def get_coverage_time(self: L1Bio) -> tuple[str, str] | None:
202 | """Return start and end of the measurement coverage time."""
203 | attr_start = self.get_attr("time_coverage_start")
204 | if attr_start is None:
205 | return None
206 |
207 | attr_end = self.get_attr("time_coverage_end")
208 | if attr_end is None:
209 | return None
210 |
211 | # pylint: disable=no-member
212 | return (attr_start.decode("ascii"), attr_end.decode("ascii"))
213 |
214 | def get_creation_time(self: L1Bio) -> str | None:
215 | """Return datetime when the L1b product was created."""
216 | grp = self.fid["/METADATA/ESA_METADATA/earth_explorer_header"]
217 | dset = grp["fixed_header/source"]
218 | if "Creation_Date" in self.fid.attrs:
219 | attr = dset.attrs["Creation_Date"]
220 | if isinstance(attr, bytes):
221 | return attr.decode("ascii")
222 |
223 | return attr
224 |
225 | return None
226 |
227 | def select(self: L1Bio, msm_type: str | None = None) -> str | None:
228 | """Select a calibration measurement as _.
229 |
230 | Parameters
231 | ----------
232 | msm_type : string
233 | Name of calibration measurement group as _
234 |
235 | Returns
236 | -------
237 | str
238 | String with spectral bands found in product
239 |
240 | Updated object attributes:
241 | - bands : available spectral bands
242 |
243 | """
244 | if msm_type is None:
245 | if self.msm_type is None:
246 | raise ValueError("parameter msm_type is not defined")
247 | msm_type = self.msm_type
248 |
249 | self.bands = ""
250 | for name in self.band_groups:
251 | for ii in "12345678":
252 | grp_path = PurePosixPath(name.replace("%", ii), msm_type)
253 | if str(grp_path) in self.fid:
254 | if self.__verbose:
255 | print("*** INFO: found: ", grp_path)
256 | self.bands += ii
257 |
258 | if self.bands:
259 | self.__msm_path = str(PurePosixPath(name, msm_type))
260 | break
261 |
262 | return self.bands
263 |
264 | def sequence(self: L1Bio, band: str | None = None) -> np.ndarray | None:
265 | """Return sequence number, ICID and delta_time for each measurement.
266 |
267 | Parameters
268 | ----------
269 | band : None or {'1', '2', '3', ..., '8'}
270 | Select one of the band present in the product
271 | Default is 'None' which returns the first available band
272 |
273 | Returns
274 | -------
275 | numpy.ndarray
276 | Numpy rec-array with sequence number, ICID and delta-time
277 |
278 | """
279 | if self.__msm_path is None:
280 | return None
281 |
282 | if band is None or len(band) > 1:
283 | band = self.bands[0]
284 |
285 | msm_path = self.__msm_path.replace("%", band)
286 | grp = self.fid[str(PurePosixPath(msm_path, "INSTRUMENT"))]
287 |
288 | icid_list = np.squeeze(grp["instrument_configuration"]["ic_id"])
289 | master_cycle = grp["instrument_settings"]["master_cycle_period_us"][0]
290 | master_cycle /= 1000
291 | grp = self.fid[str(PurePosixPath(msm_path, "OBSERVATIONS"))]
292 | delta_time = np.squeeze(grp["delta_time"])
293 |
294 | # define result as numpy array
295 | length = delta_time.size
296 | res = np.empty(
297 | (length,),
298 | dtype=[
299 | ("sequence", "u2"),
300 | ("icid", "u2"),
301 | ("delta_time", "u4"),
302 | ("index", "u4"),
303 | ],
304 | )
305 | res["sequence"] = [0]
306 | res["icid"] = icid_list
307 | res["delta_time"] = delta_time
308 | res["index"] = np.arange(length, dtype=np.uint32)
309 | if length == 1:
310 | return res
311 |
312 | # determine sequence number
313 | buff_icid = np.concatenate(
314 | ([icid_list[0] - 10], icid_list, [icid_list[-1] + 10])
315 | )
316 | dt_thres = 10 * master_cycle
317 | buff_time = np.concatenate(
318 | (
319 | [delta_time[0] - 10 * dt_thres],
320 | delta_time,
321 | [delta_time[-1] + 10 * dt_thres],
322 | )
323 | )
324 |
325 | indx = (
326 | ((buff_time[1:] - buff_time[0:-1]) > dt_thres)
327 | | ((buff_icid[1:] - buff_icid[0:-1]) != 0)
328 | ).nonzero()[0]
329 | for ii in range(len(indx) - 1):
330 | res["sequence"][indx[ii] : indx[ii + 1]] = ii
331 |
332 | return res
333 |
334 | def get_ref_time(self: L1Bio, band: str | None = None) -> datetime | None:
335 | """Return reference start time of measurements.
336 |
337 | Parameters
338 | ----------
339 | band : None or {'1', '2', '3', ..., '8'}
340 | Select one of the band present in the product.
341 | Default is 'None' which returns the first available band
342 |
343 | """
344 | if self.__msm_path is None:
345 | return None
346 |
347 | if band is None:
348 | band = self.bands[0]
349 |
350 | msm_path = self.__msm_path.replace("%", band)
351 | grp = self.fid[str(PurePosixPath(msm_path, "OBSERVATIONS"))]
352 |
353 | return datetime(2010, 1, 1, 0, 0, 0) + timedelta(seconds=int(grp["time"][0]))
354 |
355 | def get_delta_time(self: L1Bio, band: str | None = None) -> np.ndarray | None:
356 | """Return offset from the reference start time of measurement.
357 |
358 | Parameters
359 | ----------
360 | band : None or {'1', '2', '3', ..., '8'}
361 | Select one of the band present in the product.
362 | Default is 'None' which returns the first available band
363 |
364 | """
365 | if self.__msm_path is None:
366 | return None
367 |
368 | if band is None:
369 | band = self.bands[0]
370 |
371 | msm_path = self.__msm_path.replace("%", band)
372 | grp = self.fid[str(PurePosixPath(msm_path, "OBSERVATIONS"))]
373 |
374 | return grp["delta_time"][0, :].astype(int)
375 |
376 | def get_instrument_settings(
377 | self: L1Bio, band: str | None = None
378 | ) -> np.ndarray | None:
379 | """Return instrument settings of measurement.
380 |
381 | Parameters
382 | ----------
383 | band : None or {'1', '2', '3', ..., '8'}
384 | Select one of the band present in the product.
385 | Default is 'None' which returns the first available band
386 |
387 | """
388 | if self.__msm_path is None:
389 | return None
390 |
391 | if band is None:
392 | band = self.bands[0]
393 |
394 | msm_path = self.__msm_path.replace("%", band)
395 | #
396 | # Due to a bug in python module `h5py` (v2.6.0), it fails to read
397 | # the UVN instrument settings direct, with exception:
398 | # KeyError: 'Unable to open object (Component not found)'.
399 | # This is my workaround
400 | #
401 | grp = self.fid[str(PurePosixPath(msm_path, "INSTRUMENT"))]
402 | instr = np.empty(
403 | grp["instrument_settings"].shape, dtype=grp["instrument_settings"].dtype
404 | )
405 | grp["instrument_settings"].read_direct(instr)
406 | # for name in grp['instrument_settings'].dtype.names:
407 | # instr[name][:] = grp['instrument_settings'][name]
408 |
409 | return instr
410 |
411 | def get_exposure_time(self: L1Bio, band: str | None = None) -> np.ndarray | None:
412 | """Return pixel exposure time of the measurements.
413 |
414 | The exposure time is calculated from the parameters `int_delay` and
415 | `int_hold` for SWIR.
416 |
417 | Parameters
418 | ----------
419 | band : None or {'1', '2', '3', ..., '8'}
420 | Select one of the band present in the product
421 | Default is 'None' which returns the first available band
422 |
423 | """
424 | if band is None:
425 | band = self.bands[0]
426 |
427 | instr_arr = self.get_instrument_settings(band)
428 |
429 | # calculate exact exposure time
430 | if int(band) < 7:
431 | return [instr["exposure_time"] for instr in instr_arr]
432 |
433 | return [
434 | swir_exp_time(instr["int_delay"], instr["int_hold"]) for instr in instr_arr
435 | ]
436 |
437 | def get_housekeeping_data(
438 | self: L1Bio, band: str | None = None
439 | ) -> np.ndarray | None:
440 | """Return housekeeping data of measurements.
441 |
442 | Parameters
443 | ----------
444 | band : None or {'1', '2', '3', ..., '8'}
445 | Select one of the band present in the product
446 | Default is 'None' which returns the first available band
447 |
448 | """
449 | if self.__msm_path is None:
450 | return None
451 |
452 | if band is None:
453 | band = self.bands[0]
454 |
455 | msm_path = self.__msm_path.replace("%", band)
456 | grp = self.fid[str(PurePosixPath(msm_path, "INSTRUMENT"))]
457 |
458 | return np.squeeze(grp["housekeeping_data"])
459 |
460 | def get_geo_data(
461 | self: L1Bio, band: str | None = None, geo_dset: str | None = None
462 | ) -> dict | None:
463 | """Return data of selected datasets from the GEODATA group.
464 |
465 | Parameters
466 | ----------
467 | band : None or {'1', '2', '3', ..., '8'}
468 | Select one of the band present in the product
469 | Default is 'None' which returns the first available band
470 | geo_dset : str
471 | Name(s) of datasets in the GEODATA group, comma separated
472 | Default is 'satellite_latitude,satellite_longitude'
473 |
474 | Returns
475 | -------
476 | dict
477 | data of selected datasets from the GEODATA group
478 |
479 | """
480 | if self.__msm_path is None:
481 | return None
482 |
483 | if geo_dset is None:
484 | geo_dset = self.geo_dset
485 |
486 | if band is None:
487 | band = self.bands[0]
488 |
489 | msm_path = self.__msm_path.replace("%", band)
490 | grp = self.fid[str(PurePosixPath(msm_path, "GEODATA"))]
491 |
492 | res = {}
493 | for name in geo_dset.split(","):
494 | res[name] = grp[name][0, ...]
495 |
496 | return res
497 |
498 | def get_msm_attr(
499 | self: L1Bio, msm_dset: str, attr_name: str, band: str | None = None
500 | ) -> np.ndarray | float | None:
501 | """Return value attribute of measurement dataset "msm_dset".
502 |
503 | Parameters
504 | ----------
505 | attr_name : str
506 | Name of the attribute
507 | msm_dset : str
508 | Name of measurement dataset
509 | band : None or {'1', '2', '3', ..., '8'}
510 | Select one of the band present in the product
511 | Default is 'None' which returns the first available band
512 |
513 | Returns
514 | -------
515 | scalar or numpy.ndarray
516 | Value of attribute "attr_name"
517 |
518 | """
519 | if self.__msm_path is None:
520 | return None
521 |
522 | if band is None:
523 | band = self.bands[0]
524 |
525 | msm_path = self.__msm_path.replace("%", band)
526 | ds_path = str(PurePosixPath(msm_path, "OBSERVATIONS", msm_dset))
527 | if attr_name in self.fid[ds_path].attrs:
528 | attr = self.fid[ds_path].attrs[attr_name]
529 | if isinstance(attr, bytes):
530 | return attr.decode("ascii")
531 |
532 | return attr
533 |
534 | return None
535 |
536 | def get_msm_data(
537 | self: L1Bio,
538 | msm_dset: str,
539 | band: str | None = None,
540 | fill_as_nan: bool = False,
541 | msm_to_row: bool | None = None,
542 | ) -> np.ndarray | None:
543 | """Read data from dataset "msm_dset".
544 |
545 | Parameters
546 | ----------
547 | msm_dset : str
548 | Name of measurement dataset.
549 | band : None or {'1', '2', '3', ..., '8'}
550 | Select one of the band present in the product.
551 | Default is 'None' which returns:
552 |
553 | Radiance
554 | one band
555 |
556 | Calibration, Irradiance
557 | both bands (Calibration, Irradiance)
558 |
559 | fill_as_nan : bool
560 | Set data values equal (KNMI) FillValue to NaN
561 | msm_to_row : bool
562 | Combine two bands using padding if necessary
563 |
564 | Returns
565 | -------
566 | numpy.ndarray
567 | values read from or written to dataset "msm_dset"
568 |
569 | """
570 | fillvalue = float.fromhex("0x1.ep+122")
571 |
572 | if self.__msm_path is None:
573 | return None
574 |
575 | if band is None:
576 | band = self.bands
577 | elif not isinstance(band, str):
578 | raise TypeError("band must be a string")
579 | elif band not in self.bands:
580 | raise ValueError("band not found in product")
581 |
582 | if len(band) == 2 and msm_to_row is None:
583 | msm_to_row = "padding"
584 |
585 | data = ()
586 | for ii in band:
587 | msm_path = self.__msm_path.replace("%", ii)
588 | ds_path = str(PurePosixPath(msm_path, "OBSERVATIONS", msm_dset))
589 | dset = self.fid[ds_path]
590 |
591 | if fill_as_nan and dset.attrs["_FillValue"] == fillvalue:
592 | buff = np.squeeze(dset)
593 | buff[(buff == fillvalue)] = np.nan
594 | data += (buff,)
595 | else:
596 | data += (np.squeeze(dset),)
597 |
598 | if len(band) == 1:
599 | return data[0]
600 |
601 | if msm_to_row == "padding":
602 | data = pad_rows(data[0], data[1])
603 |
604 | return np.concatenate(data, axis=data[0].ndim - 1)
605 |
606 | def set_msm_data(
607 | self: L1Bio, msm_dset: str, new_data: np.ndarray | Iterable
608 | ) -> None:
609 | """Replace data of dataset "msm_dset" with new_data.
610 |
611 | Parameters
612 | ----------
613 | msm_dset : string
614 | Name of measurement dataset.
615 | new_data : array-like
616 | Data to be written with same dimensions as dataset "msm_dset"
617 |
618 | """
619 | if self.__msm_path is None:
620 | return
621 |
622 | # we will overwrite existing data, thus readwrite access is required
623 | if not self.__rw:
624 | raise PermissionError("read/write access required")
625 |
626 | # overwrite the data
627 | col = 0
628 | for ii in self.bands:
629 | msm_path = self.__msm_path.replace("%", ii)
630 | ds_path = str(PurePosixPath(msm_path, "OBSERVATIONS", msm_dset))
631 | dset = self.fid[ds_path]
632 |
633 | dims = dset.shape
634 | dset[0, ...] = new_data[..., col : col + dims[-1]]
635 | col += dims[-1]
636 |
637 | # update patch logging
638 | self.__patched_msm.append(ds_path)
639 |
640 |
641 | # --------------------------------------------------
642 | class L1BioIRR(L1Bio):
643 | """Class with methods to access Tropomi L1B irradiance products."""
644 |
645 | band_groups = ("/BAND%_IRRADIANCE",)
646 | geo_dset = "earth_sun_distance"
647 | msm_type = "STANDARD_MODE"
648 |
649 |
650 | # --------------------------------------------------
651 | class L1BioRAD(L1Bio):
652 | """Class with function to access Tropomi L1B radiance products."""
653 |
654 | band_groups = ("/BAND%_RADIANCE",)
655 | geo_dset = "latitude,longitude"
656 | msm_type = "STANDARD_MODE"
657 |
658 |
659 | # --------------------------------------------------
660 | class L1BioENG:
661 | """Class with methods to access Tropomi offline L1b engineering products.
662 |
663 | Parameters
664 | ----------
665 | l1b_product : Path | str
666 | name of the L1b engineering product
667 |
668 | Notes
669 | -----
670 | The L1b engineering products are available for UVN (band 1-6)
671 | and SWIR (band 7-8).
672 |
673 | """
674 |
675 | def __init__(self: L1BioENG, l1b_product: Path | str) -> None:
676 | """Initialize access to a Tropomi offline L1b product."""
677 | # open L1b product as HDF5 file
678 | l1b_product = Path(l1b_product)
679 | if not l1b_product.is_file():
680 | raise FileNotFoundError(f"{l1b_product} does not exist")
681 |
682 | # initialize private class-attributes
683 | self.filename = l1b_product
684 | self.fid = h5py.File(l1b_product, "r")
685 |
686 | def __iter__(self: L1BioENG) -> None:
687 | """Allow iteration."""
688 | for attr in sorted(self.__dict__):
689 | if not attr.startswith("__"):
690 | yield attr
691 |
692 | def __enter__(self: L1BioENG) -> Self:
693 | """Initiate the context manager."""
694 | return self
695 |
696 | def __exit__(self: L1BioENG, *args: object) -> bool:
697 | """Exit the context manager."""
698 | self.close()
699 | return False # any exception is raised by the with statement.
700 |
701 | def close(self: L1BioENG) -> None:
702 | """Close access to product."""
703 | if self.fid is None:
704 | return
705 |
706 | self.fid.close()
707 | self.fid = None
708 |
709 | # ---------- PUBLIC FUNCTIONS ----------
710 | def get_attr(self: L1BioENG, attr_name: str) -> np.ndarray | None:
711 | """Obtain value of an HDF5 file attribute.
712 |
713 | Parameters
714 | ----------
715 | attr_name : str
716 | Name of the attribute
717 |
718 | """
719 | if attr_name not in self.fid.attrs:
720 | return None
721 |
722 | attr = self.fid.attrs[attr_name]
723 | if attr.shape is None:
724 | return None
725 |
726 | return attr
727 |
728 | def get_orbit(self: L1BioENG) -> int | None:
729 | """Return absolute orbit number."""
730 | res = self.get_attr("orbit")
731 | if res is None:
732 | return None
733 |
734 | return int(res)
735 |
736 | def get_processor_version(self: L1BioENG) -> str | None:
737 | """Return version of the L01b processor."""
738 | attr = self.get_attr("processor_version")
739 | if attr is None:
740 | return None
741 |
742 | # pylint: disable=no-member
743 | return attr.decode("ascii")
744 |
745 | def get_coverage_time(self: L1BioENG) -> tuple[str, str] | None:
746 | """Return start and end of the measurement coverage time."""
747 | attr_start = self.get_attr("time_coverage_start")
748 | if attr_start is None:
749 | return None
750 |
751 | attr_end = self.get_attr("time_coverage_end")
752 | if attr_end is None:
753 | return None
754 |
755 | # pylint: disable=no-member
756 | return (attr_start.decode("ascii"), attr_end.decode("ascii"))
757 |
758 | def get_creation_time(self: L1BioENG) -> str | None:
759 | """Return datetime when the L1b product was created."""
760 | grp = self.fid["/METADATA/ESA_METADATA/earth_explorer_header"]
761 | dset = grp["fixed_header/source"]
762 | if "Creation_Date" in self.fid.attrs:
763 | attr = dset.attrs["Creation_Date"]
764 | if isinstance(attr, bytes):
765 | return attr.decode("ascii")
766 |
767 | return attr
768 |
769 | return None
770 |
771 | def get_ref_time(self: L1BioENG) -> int:
772 | """Return reference start time of measurements."""
773 | return int(self.fid["reference_time"][0])
774 |
775 | def get_delta_time(self: L1BioENG) -> np.ndarray:
776 | """Return offset from the reference start time of measurement."""
777 | return self.fid["/MSMTSET/msmtset"]["delta_time"][:].astype(int)
778 |
779 | def get_msmtset(self: L1BioENG) -> np.ndarray:
780 | """Return L1B_ENG_DB/SATELLITE_INFO/satellite_pos."""
781 | return self.fid["/SATELLITE_INFO/satellite_pos"][:]
782 |
783 | def get_msmtset_db(self: L1BioENG) -> np.ndarray:
784 | """Return compressed msmtset from L1B_ENG_DB/MSMTSET/msmtset.
785 |
786 | Notes
787 | -----
788 | This function is used to fill the SQLite product databases
789 |
790 | """
791 | dtype_msmt_db = np.dtype(
792 | [
793 | ("meta_id", np.int32),
794 | ("ic_id", np.uint16),
795 | ("ic_version", np.uint8),
796 | ("class", np.uint8),
797 | ("repeats", np.uint16),
798 | ("exp_per_mcp", np.uint16),
799 | ("exp_time_us", np.uint32),
800 | ("mcp_us", np.uint32),
801 | ("delta_time_start", np.int32),
802 | ("delta_time_end", np.int32),
803 | ]
804 | )
805 |
806 | # read full msmtset
807 | msmtset = self.fid["/MSMTSET/msmtset"][:]
808 |
809 | # get indices to start and end of every measurement (based in ICID)
810 | icid = msmtset["icid"]
811 | indx = (np.diff(icid) != 0).nonzero()[0] + 1
812 | indx = np.insert(indx, 0, 0)
813 | indx = np.append(indx, -1)
814 |
815 | # compress data from msmtset
816 | msmt = np.zeros(indx.size - 1, dtype=dtype_msmt_db)
817 | msmt["ic_id"][:] = msmtset["icid"][indx[0:-1]]
818 | msmt["ic_version"][:] = msmtset["icv"][indx[0:-1]]
819 | msmt["class"][:] = msmtset["class"][indx[0:-1]]
820 | msmt["delta_time_start"][:] = msmtset["delta_time"][indx[0:-1]]
821 | msmt["delta_time_end"][:] = msmtset["delta_time"][indx[1:]]
822 |
823 | # add SWIR timing information
824 | timing = self.fid["/DETECTOR4/timing"][:]
825 | msmt["mcp_us"][:] = timing["mcp_us"][indx[1:] - 1]
826 | msmt["exp_time_us"][:] = timing["exp_time_us"][indx[1:] - 1]
827 | msmt["exp_per_mcp"][:] = timing["exp_per_mcp"][indx[1:] - 1]
828 | # duration per ICID execution in micro-seconds
829 | duration = 1000 * (msmt["delta_time_end"] - msmt["delta_time_start"])
830 | # duration can be zero
831 | mask = msmt["mcp_us"] > 0
832 | # divide duration by measurement period in micro-seconds
833 | msmt["repeats"][mask] = (duration[mask] / (msmt["mcp_us"][mask])).astype(
834 | np.uint16
835 | )
836 |
837 | return msmt
838 |
839 | def get_swir_hk_db(
840 | self: L1BioENG, stats: str | None = None, fill_as_nan: bool | None = False
841 | ) -> np.ndarray | tuple[np.ndarray, np.ndarray] | None:
842 | """Return the most important SWIR housekeeping parameters.
843 |
844 | Parameters
845 | ----------
846 | stats : {'median', 'range', None}
847 | Add statistics on housekeeping parameters
848 | fill_as_nan : bool, default=False
849 | Replace (float) FillValues with Nan's, when True
850 |
851 | Notes
852 | -----
853 | This function is used to fill the SQLite product database and
854 | HDF5 monitoring database
855 |
856 | """
857 | dtype_hk_db = np.dtype(
858 | [
859 | ("detector_temp", np.float32),
860 | ("grating_temp", np.float32),
861 | ("imager_temp", np.float32),
862 | ("obm_temp", np.float32),
863 | ("calib_unit_temp", np.float32),
864 | ("fee_inner_temp", np.float32),
865 | ("fee_board_temp", np.float32),
866 | ("fee_ref_volt_temp", np.float32),
867 | ("fee_video_amp_temp", np.float32),
868 | ("fee_video_adc_temp", np.float32),
869 | ("detector_heater", np.float32),
870 | ("obm_heater_cycle", np.float32),
871 | ("fee_box_heater_cycle", np.float32),
872 | ("obm_heater", np.float32),
873 | ("fee_box_heater", np.float32),
874 | ]
875 | )
876 |
877 | num_eng_pkts = self.fid["nr_of_engdat_pkts"].size
878 | swir_hk = np.empty(num_eng_pkts, dtype=dtype_hk_db)
879 |
880 | hk_tbl = self.fid["/DETECTOR4/DETECTOR_HK/temperature_info"][:]
881 | swir_hk["detector_temp"] = hk_tbl["temp_det_ts2"]
882 | swir_hk["fee_inner_temp"] = hk_tbl["temp_d1_box"]
883 | swir_hk["fee_board_temp"] = hk_tbl["temp_d5_cold"]
884 | swir_hk["fee_ref_volt_temp"] = hk_tbl["temp_a3_vref"]
885 | swir_hk["fee_video_amp_temp"] = hk_tbl["temp_d6_vamp"]
886 | swir_hk["fee_video_adc_temp"] = hk_tbl["temp_d4_vadc"]
887 |
888 | hk_tbl = self.fid["/NOMINAL_HK/TEMPERATURES/hires_temperatures"][:]
889 | swir_hk["grating_temp"] = hk_tbl["hires_temp_1"]
890 |
891 | hk_tbl = self.fid["/NOMINAL_HK/TEMPERATURES/instr_temperatures"][:]
892 | swir_hk["imager_temp"] = hk_tbl["instr_temp_29"]
893 | swir_hk["obm_temp"] = hk_tbl["instr_temp_28"]
894 | swir_hk["calib_unit_temp"] = hk_tbl["instr_temp_25"]
895 |
896 | hk_tbl = self.fid["/DETECTOR4/DETECTOR_HK/heater_data"][:]
897 | swir_hk["detector_heater"] = hk_tbl["det_htr_curr"]
898 |
899 | hk_tbl = self.fid["/NOMINAL_HK/HEATERS/heater_data"][:]
900 | swir_hk["obm_heater"] = hk_tbl["meas_cur_val_htr12"]
901 | swir_hk["obm_heater_cycle"] = hk_tbl["last_pwm_val_htr12"]
902 | swir_hk["fee_box_heater"] = hk_tbl["meas_cur_val_htr13"]
903 | swir_hk["fee_box_heater_cycle"] = hk_tbl["last_pwm_val_htr13"]
904 |
905 | # CHECK: works only when all elements of swir_hk are floats
906 | if fill_as_nan:
907 | for key in dtype_hk_db.names:
908 | swir_hk[key][swir_hk[key] == 999.0] = np.nan
909 |
910 | if stats is None:
911 | return swir_hk
912 |
913 | if stats == "median":
914 | hk_median = np.empty(1, dtype=dtype_hk_db)
915 | for key in dtype_hk_db.names:
916 | if np.all(np.isnan(swir_hk[key])):
917 | hk_median[key][0] = np.nan
918 | elif np.nanmin(swir_hk[key]) == np.nanmax(swir_hk[key]):
919 | hk_median[key][0] = swir_hk[key][0]
920 | else:
921 | hk_median[key][0] = Biweight(swir_hk[key]).median
922 | return hk_median
923 |
924 | if stats == "range":
925 | hk_min = np.empty(1, dtype=dtype_hk_db)
926 | hk_max = np.empty(1, dtype=dtype_hk_db)
927 | for key in dtype_hk_db.names:
928 | if np.all(np.isnan(swir_hk[key])):
929 | hk_min[key][0] = np.nan
930 | hk_max[key][0] = np.nan
931 | elif np.nanmin(swir_hk[key]) == np.nanmax(swir_hk[key]):
932 | hk_min[key][0] = swir_hk[key][0]
933 | hk_max[key][0] = swir_hk[key][0]
934 | else:
935 | hk_min[key][0] = np.nanmin(swir_hk[key])
936 | hk_max[key][0] = np.nanmax(swir_hk[key])
937 | return hk_min, hk_max
938 |
939 | return None
940 |
--------------------------------------------------------------------------------
/src/pys5p/l1b_patch.py:
--------------------------------------------------------------------------------
1 | #
2 | # This file is part of pyS5p
3 | #
4 | # https://github.com/rmvanhees/pys5p.git
5 | #
6 | # Copyright (c) 2017-2025 SRON
7 | # All Rights Reserved
8 | #
9 | # License: BSD-3-Clause
10 | """`L1Bpatch`, class to modify an existing L1B product.
11 |
12 | .. warning:: Depreciated, this module is no longer maintained.
13 | """
14 |
15 | from __future__ import annotations
16 |
17 | __all__ = ["L1Bpatch"]
18 |
19 | import shutil
20 | from datetime import datetime
21 | from pathlib import Path
22 | from typing import Self
23 |
24 | import h5py
25 | import numpy as np
26 | from setuptools_scm import get_version
27 |
28 | from . import swir_region
29 | from .l1b_io import L1BioRAD
30 |
31 | # - global variables --------------------------------
32 | _MSG_ERR_IO_BAND_ = "spectral band of input and output products do not match"
33 |
34 |
35 | # - local functions --------------------------------
36 |
37 |
38 | # - class definition -------------------------------
39 | class L1Bpatch:
40 | """Definition off class L1Bpatch.
41 |
42 | Parameters
43 | ----------
44 | l1b_product : str
45 | name of the L1B product
46 | data_dir : str, default='/tmp'
47 | output directory for the patched L1B product
48 | ckd_dir : str, default='/nfs/Tropomi/share/ckd'
49 | location of the Tropomi CKD
50 |
51 | """
52 |
53 | def __init__(
54 | self: L1Bpatch,
55 | l1b_product: str,
56 | data_dir: str = "/tmp",
57 | ckd_dir: str = "/nfs/Tropomi/share/ckd",
58 | ) -> None:
59 | """Initialize access to a Tropomi offline L1b product."""
60 | prod_type = Path(l1b_product).name[0:15]
61 | if prod_type not in ("S5P_OFFL_L1B_RA", "S5P_RPRO_L1B_RA"):
62 | raise TypeError(
63 | "Warning: only implemented for Tropomi L1b radiance products"
64 | )
65 |
66 | # initialize private class-attributes
67 | self.data_dir = Path(data_dir)
68 | if not self.data_dir.is_dir():
69 | self.data_dir.mkdir(mode=0o755)
70 | self.ckd_dir = Path(ckd_dir)
71 | self.l1b_product: Path = Path(l1b_product)
72 | self.l1b_patched = self.data_dir / self.l1b_product.name.replace("_01_", "_99_")
73 | if self.l1b_patched.is_file():
74 | self.l1b_patched.unlink()
75 | self.__patched_msm = []
76 |
77 | def __enter__(self: L1Bpatch) -> Self:
78 | """Initiate the context manager."""
79 | return self
80 |
81 | def __exit__(self: L1Bpatch, *args: object) -> bool:
82 | """Exit the context manager."""
83 | self.close()
84 | return False # any exception is raised by the with statement.
85 |
86 | def close(self: L1Bpatch) -> None:
87 | """Close L1B product.
88 |
89 | Before closing the product, we make sure that the output product
90 | describes what has been altered by the S/W. To keep any change
91 | traceable.
92 |
93 | In case the L1b product is altered, the attributes listed below are
94 | added to the group: '/METADATA/SRON_METADATA':
95 |
96 | - dateStamp ('now')
97 | - Git-version of S/W
98 | - list of patched datasets
99 | - auxiliary datasets used by patch-routines
100 |
101 | """
102 | if not self.l1b_patched.is_file():
103 | return
104 |
105 | if not self.__patched_msm:
106 | return
107 |
108 | with h5py.File(self.l1b_patched, "r+") as fid:
109 | sgrp = fid.require_group("/METADATA/SRON_METADATA")
110 | sgrp.attrs["dateStamp"] = datetime.utcnow().isoformat()
111 | sgrp.attrs["git_tag"] = get_version(root="..", relative_to=__file__)
112 | if "patched_datasets" not in sgrp:
113 | dtype = h5py.special_dtype(vlen=str)
114 | dset = sgrp.create_dataset(
115 | "patched_datasets",
116 | (len(self.__patched_msm),),
117 | maxshape=(None,),
118 | dtype=dtype,
119 | )
120 | dset[:] = np.asarray(self.__patched_msm)
121 | else:
122 | dset = sgrp["patched_datasets"]
123 | dset.resize(dset.shape[0] + len(self.__patched_msm), axis=0)
124 | dset[dset.shape[0] - 1 :] = np.asarray(self.__patched_msm)
125 |
126 | # --------------------------------------------------
127 | def pixel_quality(self: L1Bpatch, dpqm: np.ndarray, threshold: float = 0.8) -> None:
128 | """Patch SWIR pixel_quality.
129 |
130 | Patched dataset: 'quality_level' and 'spectral_channel_quality'
131 |
132 | Requires (naive approach):
133 |
134 | * read original dataset 'spectral_channel_quality'
135 | * read pixel quality ckd
136 | * adjust second pixel of each byte of spectral_channel_quality
137 | * quality_level = int(100 * dpqm)
138 | * write updated datasets to patched product
139 |
140 | Parameters
141 | ----------
142 | dpqm : array-like
143 | SWIR pixel quality as a float value between 0 and 1
144 | threshold : float, optional
145 | threshold for good pixels, default 0.8
146 |
147 | Returns
148 | -------
149 | Nothing
150 |
151 | """
152 | if not self.l1b_patched.is_file():
153 | shutil.copy(self.l1b_product, self.l1b_patched)
154 |
155 | # read original data
156 | with L1BioRAD(self.l1b_product) as l1b:
157 | band = l1b.select("STANDARD_MODE")
158 | quality_level = l1b.get_msm_data("quality_level")
159 | print("quality_level", quality_level.dtype)
160 | chan_quality = l1b.get_msm_data("spectral_channel_quality")
161 | print("chan_quality", chan_quality.dtype)
162 |
163 | if band in ("7", "8"):
164 | l2_dpqm = dpqm[swir_region.coords(mode="level2", band=band)]
165 | else:
166 | raise ValueError("only implemented for band 7 or 8")
167 |
168 | # patch dataset 'quality_level'
169 | quality_level[...] = (100 * l2_dpqm).astype(np.uint8) # broadcasting
170 |
171 | # patch dataset 'spectral_channel_quality'
172 | buff = chan_quality & ~2 # set second bit to zero (all good)
173 | buff[:, l2_dpqm < threshold] += 2 # flag bad pixels
174 | chan_quality = buff.astype(np.uint8)
175 |
176 | # write patched dataset to new product
177 | with L1BioRAD(self.l1b_patched, readwrite=True) as l1b:
178 | res = l1b.select("STANDARD_MODE")
179 | if res != band:
180 | raise ValueError(_MSG_ERR_IO_BAND_)
181 | l1b.set_msm_data("quality_level", quality_level)
182 | l1b.set_msm_data("spectral_channel_quality", chan_quality)
183 |
184 | def offset(self: L1Bpatch) -> None:
185 | """Patch SWIR offset correction.
186 |
187 | Patched dataset: 'radiance' ('radiance_error' and 'radiance_noise'?)
188 |
189 | Requires (naive approach):
190 |
191 | * reverse applied radiance calibration
192 | * reverse applied stray-light correction
193 | * reverse applied PRNU correction
194 | * reverse applied dark-flux correction
195 | * reverse applied offset correction
196 | * apply (alternative) offset correction
197 | * apply (alternative) dark-flux correction
198 | * apply (alternative) PRNU correction
199 | * apply (alternative) stray-light correction
200 | * apply (alternative) radiance calibration
201 |
202 | Returns
203 | -------
204 | Nothing
205 |
206 | """
207 | if not self.l1b_patched.is_file():
208 | shutil.copy(self.l1b_product, self.l1b_patched)
209 |
210 | # read original data
211 | with L1BioRAD(self.l1b_product) as l1b:
212 | band = l1b.select("STANDARD_MODE")
213 | data = l1b.get_msm_data("radiance")
214 |
215 | # read required CKD's
216 |
217 | # patch dataset 'radiance'
218 |
219 | # write patched dataset to new product
220 | with L1BioRAD(self.l1b_patched, readwrite=True) as l1b:
221 | res = l1b.select("STANDARD_MODE")
222 | if res != band:
223 | raise ValueError(_MSG_ERR_IO_BAND_)
224 | l1b.set_msm_data("radiance", data)
225 |
226 | def darkflux(self: L1Bpatch) -> None:
227 | """Patch SWIR dark-flux correction.
228 |
229 | Patched dataset: 'radiance' ('radiance_error' and 'radiance_noise'?)
230 |
231 | Requires (naive approach):
232 |
233 | * reverse applied radiance calibration
234 | * reverse applied stray-light correction
235 | * reverse applied PRNU correction
236 | * reverse applied dark-flux correction
237 | * apply (alternative) dark-flux correction
238 | * apply (alternative) PRNU correction
239 | * apply (alternative) stray-light correction
240 | * apply (alternative) radiance calibration
241 |
242 | Returns
243 | -------
244 | Nothing
245 |
246 | """
247 | if not self.l1b_patched.is_file():
248 | shutil.copy(self.l1b_product, self.l1b_patched)
249 |
250 | # read original data
251 | with L1BioRAD(self.l1b_product) as l1b:
252 | band = l1b.select("STANDARD_MODE")
253 | data = l1b.get_msm_data("radiance")
254 |
255 | # read required CKD's
256 |
257 | # patch dataset 'radiance'
258 |
259 | # write patched dataset to new product
260 | with L1BioRAD(self.l1b_patched, readwrite=True) as l1b:
261 | res = l1b.select("STANDARD_MODE")
262 | if res != band:
263 | raise ValueError(_MSG_ERR_IO_BAND_)
264 | l1b.set_msm_data("radiance", data)
265 |
266 | def prnu(self: L1Bpatch) -> None:
267 | """Patch pixel response non-uniformity correction.
268 |
269 | Patched dataset: 'radiance' ('radiance_error' and 'radiance_noise'?)
270 |
271 | Requires (naive approach):
272 |
273 | * reverse applied radiance calibration
274 | * reverse applied stray-light correction
275 | * reverse applied PRNU correction
276 | * apply (alternative) PRNU correction
277 | * apply (alternative) stray-light correction
278 | * apply (alternative) radiance calibration
279 |
280 | Alternative: neglect impact stray-light, but apply patch to correct for
281 | spectral features.
282 |
283 | Returns
284 | -------
285 | None
286 |
287 | Notes
288 | -----
289 | It is assumed that for the PRNU correction the CKD has to be multiplied
290 | with the pixel signals.
291 |
292 | """
293 | if not self.l1b_patched.is_file():
294 | shutil.copy(self.l1b_product, self.l1b_patched)
295 |
296 | # read original data
297 | with L1BioRAD(self.l1b_product) as l1b:
298 | band = l1b.select("STANDARD_MODE")
299 | data = l1b.get_msm_data("radiance")
300 |
301 | # read required CKD's
302 |
303 | # patch dataset 'radiance'
304 |
305 | # write patched dataset to new product
306 | with L1BioRAD(self.l1b_patched, readwrite=True) as l1b:
307 | res = l1b.select("STANDARD_MODE")
308 | if res != band:
309 | raise ValueError(_MSG_ERR_IO_BAND_)
310 | l1b.set_msm_data("radiance", data)
311 |
312 | def relrad(self: L1Bpatch) -> None:
313 | """Patch relative radiance calibration.
314 |
315 | Patched dataset: 'radiance' ('radiance_error' and 'radiance_noise'?)
316 |
317 | Requires:
318 |
319 | * reverse applied radiance calibration
320 | * apply alternative radiance calibration
321 |
322 | Returns
323 | -------
324 | Nothing
325 |
326 | """
327 | if not self.l1b_patched.is_file():
328 | shutil.copy(self.l1b_product, self.l1b_patched)
329 |
330 | # read original data
331 | with L1BioRAD(self.l1b_product) as l1b:
332 | band = l1b.select("STANDARD_MODE")
333 | data = l1b.get_msm_data("radiance")
334 |
335 | # read required CKD's
336 |
337 | # patch dataset 'radiance'
338 |
339 | # write patched dataset to new product
340 | with L1BioRAD(self.l1b_patched, readwrite=True) as l1b:
341 | res = l1b.select("STANDARD_MODE")
342 | if res != band:
343 | raise ValueError(_MSG_ERR_IO_BAND_)
344 | l1b.set_msm_data("radiance", data)
345 |
346 | def absrad(self: L1Bpatch) -> None:
347 | """Patch absolute radiance calibration.
348 |
349 | Patched dataset: 'radiance' ('radiance_error' and 'radiance_noise'?)
350 |
351 | Requires:
352 |
353 | * reverse applied irradiance calibration
354 | * apply alternative irradiance calibration
355 |
356 | Returns
357 | -------
358 | Nothing
359 |
360 | """
361 | if not self.l1b_patched.is_file():
362 | shutil.copy(self.l1b_product, self.l1b_patched)
363 |
364 | # read original data
365 | with L1BioRAD(self.l1b_product) as l1b:
366 | band = l1b.select("STANDARD_MODE")
367 | data = l1b.get_msm_data("radiance")
368 |
369 | # read required CKD's
370 |
371 | # patch dataset 'radiance'
372 |
373 | # write patched dataset to new product
374 | with L1BioRAD(self.l1b_patched, readwrite=True) as l1b:
375 | res = l1b.select("STANDARD_MODE")
376 | if res != band:
377 | raise ValueError(_MSG_ERR_IO_BAND_)
378 | l1b.set_msm_data("radiance", data)
379 |
380 | def check(self: L1Bpatch) -> None:
381 | """Check patched dataset in L1B product."""
382 | if not self.l1b_patched.is_file():
383 | raise ValueError("patched product not found")
384 |
385 | with h5py.File(self.l1b_patched, "r+") as fid:
386 | if "SRON_METADATA" not in fid["/METADATA"]:
387 | raise ValueError("no SRON metadata defined in L1B product")
388 | sgrp = fid["/METADATA/SRON_METADATA"]
389 | if "patched_datasets" not in sgrp:
390 | raise ValueError("no patched datasets in L1B prduct")
391 | patched_datasets = sgrp["patched_datasets"][:]
392 |
393 | for ds_name in patched_datasets:
394 | with L1BioRAD(self.l1b_product) as l1b:
395 | l1b.select("STANDARD_MODE")
396 | orig = l1b.get_msm_data(ds_name.split("/")[-1])
397 |
398 | with L1BioRAD(self.l1b_patched) as l1b:
399 | l1b.select("STANDARD_MODE")
400 | patch = l1b.get_msm_data(ds_name.split("/")[-1])
401 |
402 | if np.issubdtype(orig.dtype, np.integer):
403 | if np.array_equiv(orig, patch):
404 | print(ds_name.split("/")[-1], " equal True")
405 | else:
406 | print(
407 | f"{ds_name.split('/')[-1]}"
408 | f" equal {np.sum(orig == patch)}"
409 | f" differ {np.sum(orig != patch)}"
410 | )
411 | else:
412 | print("test not yet defined")
413 |
--------------------------------------------------------------------------------
/src/pys5p/lib/__init__.py:
--------------------------------------------------------------------------------
1 | #
2 | # This file is part of pyS5p
3 | #
4 | # https://github.com/rmvanhees/pys5p.git
5 | #
6 | # Copyright (c) 2020-2025 SRON
7 | # All Rights Reserved
8 | #
9 | # License: BSD-3-Clause
10 | """Necessary but empty file."""
11 |
--------------------------------------------------------------------------------
/src/pys5p/lv2_io.py:
--------------------------------------------------------------------------------
1 | #
2 | # This file is part of pyS5p
3 | #
4 | # https://github.com/rmvanhees/pys5p.git
5 | #
6 | # Copyright (c) 2017-2025 SRON
7 | # All Rights Reserved
8 | #
9 | # License: BSD-3-Clause
10 | """`LV2io`, class to access Tropomi level-2 products."""
11 |
12 | from __future__ import annotations
13 |
14 | __all__ = ["LV2io"]
15 |
16 | from datetime import datetime, timedelta
17 | from typing import TYPE_CHECKING, Self
18 |
19 | import h5py
20 | import numpy as np
21 | from moniplot.image_to_xarray import data_to_xr, h5_to_xr
22 | from netCDF4 import Dataset
23 |
24 | if TYPE_CHECKING:
25 | from pathlib import Path
26 |
27 | import xarray as xr
28 |
29 | # - global parameters ------------------------------
30 |
31 |
32 | # - local functions --------------------------------
33 |
34 |
35 | # - class definition -------------------------------
36 | class LV2io:
37 | """A class to read Tropomi Level-2 (offline) products.
38 |
39 | Parameters
40 | ----------
41 | lv2_product : Path
42 | full path to S5P Tropomi level 2 product
43 |
44 | Notes
45 | -----
46 | The Python h5py module can read the operational netCDF4 products without
47 | any problems, however, the SRON science products contain incompatible
48 | attributes. Thus, should be fixed when more up-to-date netCDF software is
49 | used to generate the products. Currently, the Python netCDF4 module is
50 | used to read the science products.
51 |
52 | """
53 |
54 | def __init__(self: LV2io, lv2_product: Path) -> None:
55 | """Initialize access to an S5P_L2 product."""
56 | if not lv2_product.is_file():
57 | raise FileNotFoundError(f"{lv2_product.name} does not exist")
58 |
59 | # initialize class-attributes
60 | self.filename = lv2_product
61 |
62 | # open LV2 product as HDF5 file
63 | if self.science_product:
64 | self.fid = Dataset(lv2_product, "r")
65 | self.ground_pixel = self.fid["/instrument/ground_pixel"][:].max()
66 | self.ground_pixel += 1
67 | self.scanline = self.fid["/instrument/scanline"][:].max()
68 | self.scanline += 1
69 | # alternative set flag sparse
70 | if self.fid["/instrument/scanline"].size % self.ground_pixel != 0:
71 | raise ValueError("not all scanlines are complete")
72 | else:
73 | self.fid = h5py.File(lv2_product, "r")
74 | self.ground_pixel = self.fid["/PRODUCT/ground_pixel"].size
75 | self.scanline = self.fid["/PRODUCT/scanline"].size
76 |
77 | def __iter__(self: LV2io) -> None:
78 | """Allow itertion."""
79 | for attr in sorted(self.__dict__):
80 | if not attr.startswith("__"):
81 | yield attr
82 |
83 | def __enter__(self: LV2io) -> Self:
84 | """Initiate the context manager."""
85 | return self
86 |
87 | def __exit__(self: LV2io, *args: object) -> bool:
88 | """Exit the context manager."""
89 | self.close()
90 | return False # any exception is raised by the with statement.
91 |
92 | def close(self: LV2io) -> None:
93 | """Close the product."""
94 | if self.fid is not None:
95 | self.fid.close()
96 |
97 | # ----- Class properties --------------------
98 | @property
99 | def science_product(self: LV2io) -> bool:
100 | """Check if product is a science product."""
101 | science_inst = b"Space Research Organisation Netherlands"
102 |
103 | res = False
104 | with h5py.File(self.filename) as fid:
105 | if "institution" in fid.attrs and fid.attrs["institution"] == science_inst:
106 | res = True
107 |
108 | return res
109 |
110 | @property
111 | def orbit(self: LV2io) -> int:
112 | """Return reference orbit number."""
113 | if self.science_product:
114 | return int(self.__nc_attr("orbit", "l1b_file"))
115 |
116 | return int(self.__h5_attr("orbit", None)[0])
117 |
118 | @property
119 | def algorithm_version(self: LV2io) -> str | None:
120 | """Return version of the level 2 algorithm."""
121 | res = self.get_attr("algorithm_version")
122 |
123 | return res if res is not None else self.get_attr("version")
124 |
125 | @property
126 | def processor_version(self: LV2io) -> str | None:
127 | """Return version of the level 2 processor."""
128 | res = self.get_attr("processor_version")
129 |
130 | return res if res is not None else self.get_attr("version")
131 |
132 | @property
133 | def product_version(self: LV2io) -> str:
134 | """Return version of the level 2 product."""
135 | res = self.get_attr("product_version")
136 |
137 | return res if res is not None else self.get_attr("version")
138 |
139 | @property
140 | def coverage_time(self: LV2io) -> tuple[str, str]:
141 | """Return start and end of the measurement coverage time."""
142 | return (
143 | self.get_attr("time_coverage_start"),
144 | self.get_attr("time_coverage_end"),
145 | )
146 |
147 | @property
148 | def creation_time(self: LV2io) -> str:
149 | """Return creation date/time of the level 2 product."""
150 | return self.get_attr("date_created")
151 |
152 | # ----- Attributes --------------------
153 | def __h5_attr(
154 | self: LV2io, attr_name: str, ds_name: str | None
155 | ) -> np.ndarray | None:
156 | """Read attributes from operational products using hdf5."""
157 | if ds_name is not None:
158 | dset = self.fid[f"/PRODUCT/{ds_name}"]
159 | if attr_name not in dset.attrs:
160 | return None
161 |
162 | attr = dset.attrs[attr_name]
163 | else:
164 | if attr_name not in self.fid.attrs:
165 | return None
166 |
167 | attr = self.fid.attrs[attr_name]
168 |
169 | if isinstance(attr, bytes):
170 | return attr.decode("ascii")
171 |
172 | return attr
173 |
174 | def __nc_attr(self: LV2io, attr_name: str, ds_name: str) -> np.ndarray | None:
175 | """Read attributes from science products using netCDF4."""
176 | if ds_name is not None:
177 | for grp_name in ["target_product", "side_product", "instrument"]:
178 | if grp_name not in self.fid.groups:
179 | continue
180 |
181 | if ds_name not in self.fid[grp_name].variables:
182 | continue
183 |
184 | dset = self.fid[f"/{grp_name}/{ds_name}"]
185 | if attr_name in dset.ncattrs():
186 | return dset.getncattr(attr_name)
187 |
188 | return None
189 |
190 | if attr_name not in self.fid.ncattrs():
191 | return None
192 |
193 | return self.fid.getncattr(attr_name)
194 |
195 | def get_attr(
196 | self: LV2io, attr_name: str, ds_name: str | None = None
197 | ) -> np.ndarray | None:
198 | """Obtain value of an HDF5 file attribute or dataset attribute.
199 |
200 | Parameters
201 | ----------
202 | attr_name : str
203 | name of the attribute
204 | ds_name : str, optional
205 | name of dataset, default is to read the product attributes
206 |
207 | """
208 | if self.science_product:
209 | return self.__nc_attr(attr_name, ds_name)
210 |
211 | return self.__h5_attr(attr_name, ds_name)
212 |
213 | # ----- Time information ---------------
214 | @property
215 | def ref_time(self: LV2io) -> datetime | None:
216 | """Return reference start time of measurements."""
217 | if self.science_product:
218 | return None
219 |
220 | return datetime(2010, 1, 1, 0, 0, 0) + timedelta(
221 | seconds=int(self.fid["/PRODUCT/time"][0])
222 | )
223 |
224 | def get_time(self: LV2io) -> np.ndarray | None:
225 | """Return start time of measurement per scan-line."""
226 | if self.science_product:
227 | buff = self.get_dataset("time")[:: self.ground_pixel, :]
228 | return np.array([datetime(*x) for x in buff])
229 |
230 | buff = self.fid["/PRODUCT/delta_time"][0, :]
231 | return np.array([self.ref_time + timedelta(seconds=x / 1e3) for x in buff])
232 |
233 | # ----- Geolocation --------------------
234 | def __h5_geo_data(self: LV2io, geo_dsets: str) -> dict:
235 | """Read geolocation datasets from operational products using HDF5."""
236 | res = {}
237 | if geo_dsets is None:
238 | geo_dsets = "latitude,longitude"
239 |
240 | for key in geo_dsets.split(","):
241 | for grp_name in ["/PRODUCT", "/PRODUCT/SUPPORT_DATA/GEOLOCATIONS"]:
242 | if key in self.fid[grp_name]:
243 | res[key] = np.squeeze(self.fid[f"{grp_name}/{key}"])
244 | continue
245 |
246 | return res
247 |
248 | def __nc_geo_data(self: LV2io, geo_dsets: str) -> dict:
249 | """Read geolocation datasets from science products using netCDF4."""
250 | res = {}
251 | if geo_dsets is None:
252 | geo_dsets = "latitude_center,longitude_center"
253 |
254 | for key in geo_dsets.split(","):
255 | if key in self.fid["/instrument"].variables:
256 | ds_name = f"/instrument/{key}"
257 | res[key] = self.fid[ds_name][:].reshape(
258 | self.scanline, self.ground_pixel
259 | )
260 |
261 | return res
262 |
263 | def get_geo_data(self: LV2io, geo_dsets: str | None = None) -> dict:
264 | """Return data of selected datasets from the GEOLOCATIONS group.
265 |
266 | Parameters
267 | ----------
268 | geo_dsets : str, optional
269 | Name(s) of datasets, comma separated. Default:
270 |
271 | * operational: 'latitude,longitude'
272 | * science: 'latitude_center,longitude_center'
273 |
274 | Returns
275 | -------
276 | dict
277 | dictionary with arrays of selected datasets
278 |
279 | """
280 | if self.science_product:
281 | return self.__nc_geo_data(geo_dsets)
282 |
283 | return self.__h5_geo_data(geo_dsets)
284 |
285 | # ----- Footprints --------------------
286 | def __h5_geo_bounds(
287 | self: LV2io,
288 | extent: list[float, float, float, float],
289 | data_sel: tuple[slice | int],
290 | ) -> tuple:
291 | """Read latitude/longitude bounding box [HDF5]."""
292 | if extent is not None:
293 | if len(extent) != 4:
294 | raise ValueError("parameter extent must have 4 elements")
295 |
296 | lats = self.fid["/PRODUCT/latitude"][0, ...]
297 | lons = self.fid["/PRODUCT/longitude"][0, ...]
298 |
299 | indx = (
300 | (lons >= extent[0])
301 | & (lons <= extent[1])
302 | & (lats >= extent[2])
303 | & (lats <= extent[3])
304 | ).nonzero()
305 | data_sel = np.s_[
306 | indx[0].min() : indx[0].max(), indx[1].min() : indx[1].max()
307 | ]
308 |
309 | gid = self.fid["/PRODUCT/SUPPORT_DATA/GEOLOCATIONS"]
310 | if data_sel is None:
311 | lat_bounds = gid["latitude_bounds"][0, ...]
312 | lon_bounds = gid["longitude_bounds"][0, ...]
313 | else:
314 | data_sel0 = (0, *data_sel, slice(None))
315 | lat_bounds = gid["latitude_bounds"][data_sel0]
316 | lon_bounds = gid["longitude_bounds"][data_sel0]
317 |
318 | return data_sel, lon_bounds, lat_bounds
319 |
320 | def __nc_geo_bounds(
321 | self: LV2io,
322 | extent: list[float, float, float, float],
323 | data_sel: tuple[slice | int],
324 | ) -> tuple:
325 | """Read latitude/longitude bounding box [netCDF4]."""
326 | if extent is not None:
327 | if len(extent) != 4:
328 | raise ValueError("parameter extent must have 4 elements")
329 |
330 | lats = self.fid["/instrument/latitude_center"][:].reshape(
331 | self.scanline, self.ground_pixel
332 | )
333 | lons = self.fid["/instrument/longitude_center"][:].reshape(
334 | self.scanline, self.ground_pixel
335 | )
336 |
337 | indx = (
338 | (lons >= extent[0])
339 | & (lons <= extent[1])
340 | & (lats >= extent[2])
341 | & (lats <= extent[3])
342 | ).nonzero()
343 | data_sel = np.s_[
344 | indx[0].min() : indx[0].max(), indx[1].min() : indx[1].max()
345 | ]
346 |
347 | gid = self.fid["/instrument"]
348 | lat_bounds = gid["latitude_corners"][:].data.reshape(
349 | self.scanline, self.ground_pixel, 4
350 | )
351 | lon_bounds = gid["longitude_corners"][:].data.reshape(
352 | self.scanline, self.ground_pixel, 4
353 | )
354 | if data_sel is not None:
355 | lat_bounds = lat_bounds[(*data_sel, slice(None))]
356 | lon_bounds = lon_bounds[(*data_sel, slice(None))]
357 |
358 | return data_sel, lon_bounds, lat_bounds
359 |
360 | def get_geo_bounds(
361 | self: LV2io,
362 | extent: list[float, float, float, float] | None,
363 | data_sel: tuple[slice | int] | None,
364 | ) -> np.ndarray | tuple:
365 | """Return bounds of latitude/longitude as a mesh for plotting.
366 |
367 | Parameters
368 | ----------
369 | extent : list
370 | select data to cover a region with geolocation defined by:
371 | lon_min, lon_max, lat_min, lat_max and return numpy slice
372 | data_sel : numpy slice
373 | a 3-dimensional numpy slice: time, scan_line, ground_pixel
374 | Note 'data_sel' will be overwritten when 'extent' is defined
375 |
376 | Returns
377 | -------
378 | data_sel : numpy slice
379 | Select slice of data which covers geolocation defined by extent.
380 | Only provided if extent is not None.
381 | out : dictionary
382 | With numpy arrays for latitude and longitude
383 |
384 | """
385 | if self.science_product:
386 | res = self.__nc_geo_bounds(extent, data_sel)
387 | else:
388 | res = self.__h5_geo_bounds(extent, data_sel)
389 | data_sel, lon_bounds, lat_bounds = res
390 |
391 | res = {}
392 | _sz = lon_bounds.shape
393 | res["longitude"] = np.empty((_sz[0] + 1, _sz[1] + 1), dtype=float)
394 | res["longitude"][:-1, :-1] = lon_bounds[:, :, 0]
395 | res["longitude"][-1, :-1] = lon_bounds[-1, :, 1]
396 | res["longitude"][:-1, -1] = lon_bounds[:, -1, 1]
397 | res["longitude"][-1, -1] = lon_bounds[-1, -1, 2]
398 |
399 | res["latitude"] = np.empty((_sz[0] + 1, _sz[1] + 1), dtype=float)
400 | res["latitude"][:-1, :-1] = lat_bounds[:, :, 0]
401 | res["latitude"][-1, :-1] = lat_bounds[-1, :, 1]
402 | res["latitude"][:-1, -1] = lat_bounds[:, -1, 1]
403 | res["latitude"][-1, -1] = lat_bounds[-1, -1, 2]
404 |
405 | if extent is None:
406 | return res
407 |
408 | return data_sel, res
409 |
410 | # ----- Datasets (numpy) --------------------
411 | def __h5_dataset(
412 | self: LV2io, name: str, data_sel: tuple[slice | int], fill_as_nan: bool
413 | ) -> np.ndarray:
414 | """Read dataset from operational products using HDF5."""
415 | fillvalue = float.fromhex("0x1.ep+122")
416 |
417 | if name not in self.fid["/PRODUCT"]:
418 | raise ValueError(f"dataset {name} for found")
419 |
420 | dset = self.fid[f"/PRODUCT/{name}"]
421 | if data_sel is None:
422 | if dset.dtype == np.float32:
423 | res = dset.astype(float)[0, ...]
424 | else:
425 | res = dset[0, ...]
426 | else:
427 | if dset.dtype == np.float32:
428 | res = dset.astype(float)[(0, *data_sel)]
429 | else:
430 | res = dset[(0, *data_sel)]
431 |
432 | if fill_as_nan and dset.attrs["_FillValue"] == fillvalue:
433 | res[(res == fillvalue)] = np.nan
434 |
435 | return res
436 |
437 | def __nc_dataset(
438 | self: LV2io, name: str, data_sel: tuple[slice | int], fill_as_nan: bool
439 | ) -> np.ndarray:
440 | """Read dataset from science products using netCDF4."""
441 | if name in self.fid["/target_product"].variables:
442 | group = "/target_product"
443 | elif name in self.fid["/instrument"].variables:
444 | group = "/instrument"
445 | else:
446 | raise ValueError(f"dataset {name} for found")
447 |
448 | dset = self.fid[f"{group}/{name}"]
449 | if dset.size == self.scanline * self.ground_pixel:
450 | res = dset[:].reshape(self.scanline, self.ground_pixel)
451 | else:
452 | res = dset[:]
453 | if data_sel is not None:
454 | res = res[data_sel]
455 |
456 | if fill_as_nan:
457 | return res.filled(np.nan)
458 |
459 | return res.data
460 |
461 | def get_dataset(
462 | self: LV2io,
463 | name: str,
464 | data_sel: tuple[slice | int] | slice | None = None,
465 | fill_as_nan: bool = True,
466 | ) -> np.ndarray:
467 | """Read level 2 dataset from PRODUCT group.
468 |
469 | Parameters
470 | ----------
471 | name : string
472 | name of dataset with level 2 data
473 | data_sel : numpy slice
474 | a 3-dimensional numpy slice: time, scan_line, ground_pixel
475 | fill_as_nan : boolean
476 | Replace (float) FillValues with Nan's, when True
477 |
478 | Returns
479 | -------
480 | numpy.ndarray
481 |
482 | """
483 | if self.science_product:
484 | return self.__nc_dataset(name, data_sel, fill_as_nan)
485 |
486 | return self.__h5_dataset(name, data_sel, fill_as_nan)
487 |
488 | # ----- Dataset (xarray) --------------------
489 | def __h5_data_as_xds(
490 | self: LV2io, name: str, data_sel: tuple[slice | int]
491 | ) -> xr.DataArray:
492 | """Read dataset from group target_product using HDF5.
493 |
494 | Input: operational product
495 |
496 | Return: xarray.Dataset
497 | """
498 | if name not in self.fid["/PRODUCT"]:
499 | raise ValueError(f"dataset {name} for found")
500 | dset = self.fid[f"/PRODUCT/{name}"]
501 |
502 | # ToDo handle parameter mol_m2
503 | return h5_to_xr(dset, (0, *data_sel)).squeeze()
504 |
505 | def __nc_data_as_xds(
506 | self: LV2io, name: str, data_sel: tuple[slice | int]
507 | ) -> xr.DataArray:
508 | """Read dataset from group PRODUCT using netCDF4.
509 |
510 | Input: science product
511 |
512 | Return: xarray.DataArray
513 | """
514 | if name in self.fid["/target_product"].variables:
515 | group = "/target_product/"
516 | elif name in self.fid["/instrument"].variables:
517 | group = "/instrument/"
518 | else:
519 | raise ValueError("dataset {name} for found")
520 |
521 | return data_to_xr(
522 | self.get_dataset(group + name, data_sel),
523 | dims=["scanline", "ground_pixel"],
524 | name=name,
525 | long_name=self.get_attr("long_name", name),
526 | units=self.get_attr("units", name),
527 | )
528 |
529 | def get_data_as_xds(
530 | self: LV2io, name: str, data_sel: tuple[slice | int] | None = None
531 | ) -> xr.DataArray:
532 | """Read dataset from group PRODUCT/target_product group.
533 |
534 | Parameters
535 | ----------
536 | name : str
537 | name of dataset with level 2 data
538 | data_sel : numpy slice
539 | a 3-dimensional numpy slice: time, scan_line, ground_pixel
540 |
541 | Returns
542 | -------
543 | xarray.DataArray
544 |
545 | """
546 | if self.science_product:
547 | return self.__nc_data_as_xds(name, data_sel)
548 |
549 | return self.__h5_data_as_xds(name, data_sel)
550 |
--------------------------------------------------------------------------------
/src/pys5p/ocm_io.py:
--------------------------------------------------------------------------------
1 | #
2 | # This file is part of pyS5p
3 | #
4 | # https://github.com/rmvanhees/pys5p.git
5 | #
6 | # Copyright (c) 2017-2025 SRON
7 | # All Rights Reserved
8 | #
9 | # License: BSD-3-Clause
10 | """`OCMio`, class to access on-ground calibration data."""
11 |
12 | from __future__ import annotations
13 |
14 | __all__ = ["OCMio"]
15 |
16 | from datetime import datetime, timedelta
17 | from pathlib import Path, PurePosixPath
18 | from typing import Any, Self
19 |
20 | import h5py
21 | import numpy as np
22 | from moniplot.biweight import Biweight
23 |
24 | # - global parameters ------------------------------
25 |
26 |
27 | # - local functions --------------------------------
28 | def band2channel(
29 | dict_a: dict, dict_b: dict, mode: bool | None = None
30 | ) -> np.ndarray | tuple[Any, Any]:
31 | """Store data from a dictionary as returned by get_msm_data to a ndarray.
32 |
33 | Parameters
34 | ----------
35 | dict_a : dict
36 | data of the one spectral band
37 | dict_b : dict
38 | data of another spectral band
39 | mode : list ['combined', 'mean', 'median', 'biweight']
40 | 'combined'
41 | will combine data using np.concatenate((data_a, data_b),\
42 | axis=data_a.ndim-1)
43 |
44 | 'mean'
45 | is calculated using np.nanmean(data, axis=0)
46 |
47 | 'median'
48 | is calculated using np.nanmedian(data, axis=0)
49 |
50 | 'biweight'
51 | is calculated using biweight(data, axis=0)
52 |
53 | Returns
54 | -------
55 | numpy.ndarray
56 | Data from dictionary stored in a numpy array
57 |
58 | Examples
59 | --------
60 | > data = ocm.band2channel(dict_a, dict_b, mode=['combined', 'median'])
61 |
62 | """
63 | if mode is None:
64 | mode = []
65 |
66 | data_a = None
67 | for key in sorted(dict_a):
68 | buff = dict_a[key][...]
69 |
70 | data_a = buff if data_a is None else np.vstack((data_a, buff))
71 |
72 | if data_a is not None:
73 | if "mean" in mode:
74 | data_a = np.nanmean(data_a, axis=0)
75 | elif "median" in mode:
76 | data_a = np.nanmedian(data_a, axis=0)
77 | elif "biweight" in mode:
78 | data_a = Biweight(data_a, axis=0).median
79 |
80 | if dict_b is None:
81 | return data_a
82 |
83 | data_b = None
84 | for key in sorted(dict_b):
85 | buff = dict_b[key][...]
86 |
87 | data_b = buff if data_b is None else np.vstack((data_b, buff))
88 |
89 | if data_b is not None:
90 | if "mean" in mode:
91 | data_b = np.nanmean(data_b, axis=0)
92 | elif "median" in mode:
93 | data_b = np.nanmedian(data_b, axis=0)
94 | elif "biweight" in mode:
95 | data_b = Biweight(data_b, axis=0).median
96 |
97 | if "combined" in mode:
98 | return np.concatenate((data_a, data_b), axis=data_a.ndim - 1)
99 |
100 | return data_a, data_b
101 |
102 |
103 | # - class definition -------------------------------
104 | class OCMio:
105 | """A class to read Tropomi on-ground calibration products (Lx).
106 |
107 | Parameters
108 | ----------
109 | ocm_product : Path
110 | Full path to on-ground calibration measurement
111 |
112 | """
113 |
114 | def __init__(self: OCMio, ocm_product: Path) -> None:
115 | """Initialize access to an OCAL Lx product."""
116 | if not ocm_product.is_file():
117 | raise FileNotFoundError(f"{ocm_product.name} does not exist")
118 |
119 | # initialize class-attributes
120 | self.__msm_path = None
121 | self.band = None
122 | self.filename = ocm_product
123 |
124 | # open OCM product as HDF5 file
125 | self.fid = h5py.File(ocm_product, "r")
126 |
127 | def __iter__(self: OCMio) -> None:
128 | """Allow iteration."""
129 | for attr in sorted(self.__dict__):
130 | if not attr.startswith("__"):
131 | yield attr
132 |
133 | # def __del__(self):
134 | # """
135 | # called when the object is destroyed
136 | # """
137 | # self.close()
138 |
139 | def __enter__(self: OCMio) -> Self:
140 | """Initiate the context manager."""
141 | return self
142 |
143 | def __exit__(self: OCMio, *args: object) -> bool:
144 | """Exit the context manager."""
145 | self.close()
146 | return False # any exception is raised by the with statement.
147 |
148 | def close(self: OCMio) -> None:
149 | """Close resources."""
150 | self.band = None
151 | if self.fid is not None:
152 | self.fid.close()
153 | self.fid = None
154 |
155 | # ---------- RETURN VERSION of the S/W ----------
156 | # ---------- Functions that work before MSM selection ----------
157 | def get_processor_version(self: OCMio) -> str:
158 | """Return version of the L01b processor."""
159 | res = self.fid.attrs["processor_version"]
160 | if isinstance(res, bytes):
161 | # pylint: disable=no-member
162 | res = res.decode("ascii")
163 | return res
164 |
165 | def get_coverage_time(self: OCMio) -> tuple[str, str]:
166 | """Return start and end of the measurement coverage time."""
167 | t_bgn = self.fid.attrs["time_coverage_start"]
168 | if isinstance(t_bgn, bytes):
169 | # pylint: disable=no-member
170 | t_bgn = t_bgn.decode("ascii")
171 |
172 | t_end = self.fid.attrs["time_coverage_end"]
173 | if isinstance(t_end, bytes):
174 | # pylint: disable=no-member
175 | t_end = t_end.decode("ascii")
176 | return t_bgn, t_end
177 |
178 | def get_attr(self: OCMio, attr_name: str) -> np.ndarray | None:
179 | """Obtain value of an HDF5 file attribute.
180 |
181 | Parameters
182 | ----------
183 | attr_name : string
184 | name of the attribute
185 |
186 | """
187 | if attr_name in self.fid.attrs:
188 | return self.fid.attrs[attr_name]
189 |
190 | return None
191 |
192 | # ---------- Functions that only work after MSM selection ----------
193 | def get_ref_time(self: OCMio) -> np.ndarray | None:
194 | """Return reference start time of measurements."""
195 | if not self.__msm_path:
196 | return {}
197 |
198 | grp = self.fid[f"BAND{self.band}"]
199 | res = {}
200 | for msm in sorted(self.__msm_path):
201 | sgrp = grp[str(PurePosixPath(msm, "GEODATA"))]
202 | res[msm] = datetime(2010, 1, 1, 0, 0, 0)
203 | res[msm] += timedelta(seconds=int(sgrp["time"][0]))
204 |
205 | return res
206 |
207 | def get_delta_time(self: OCMio) -> np.ndarray | None:
208 | """Return offset from the reference start time of measurement."""
209 | if not self.__msm_path:
210 | return {}
211 |
212 | grp = self.fid[f"BAND{self.band}"]
213 | res = {}
214 | for msm in sorted(self.__msm_path):
215 | sgrp = grp[str(PurePosixPath(msm, "GEODATA"))]
216 | res[msm] = sgrp["delta_time"][:].astype(int)
217 |
218 | return res
219 |
220 | def get_instrument_settings(self: OCMio) -> np.ndarray | None:
221 | """Return instrument settings of measurement."""
222 | if not self.__msm_path:
223 | return {}
224 |
225 | grp = self.fid[f"BAND{self.band}"]
226 | res = {}
227 | for msm in sorted(self.__msm_path):
228 | sgrp = grp[str(PurePosixPath(msm, "INSTRUMENT"))]
229 | res[msm] = np.squeeze(sgrp["instrument_settings"])
230 |
231 | return res
232 |
233 | def get_gse_stimuli(self: OCMio) -> np.ndarray | None:
234 | """Return GSE stimuli parameters."""
235 | if not self.__msm_path:
236 | return {}
237 |
238 | grp = self.fid[f"BAND{self.band}"]
239 | res = {}
240 | for msm in sorted(self.__msm_path):
241 | sgrp = grp[str(PurePosixPath(msm, "INSTRUMENT"))]
242 | res[msm] = np.squeeze(sgrp["gse_stimuli"])
243 |
244 | return res
245 |
246 | def get_exposure_time(self: OCMio) -> np.ndarray | None:
247 | """Return the exact pixel exposure time of the measurements."""
248 | if not self.__msm_path:
249 | return None
250 |
251 | grp = self.fid[f"BAND{self.band}"]
252 | msm = self.__msm_path[0] # all measurement sets have the same ICID
253 | sgrp = grp[str(PurePosixPath(msm, "INSTRUMENT"))]
254 | instr = np.squeeze(sgrp["instrument_settings"])
255 |
256 | if int(self.band) > 6:
257 | return 1.25e-6 * (65540 - instr["int_delay"] + instr["int_hold"])
258 |
259 | return instr["exposure_time"]
260 |
261 | def get_housekeeping_data(self: OCMio) -> np.ndarray | None:
262 | """Return housekeeping data of measurements."""
263 | if not self.__msm_path:
264 | return {}
265 |
266 | grp = self.fid[f"BAND{self.band}"]
267 | res = {}
268 | for msm in sorted(self.__msm_path):
269 | sgrp = grp[str(PurePosixPath(msm, "INSTRUMENT"))]
270 | res[msm] = np.squeeze(sgrp["housekeeping_data"])
271 |
272 | return res
273 |
274 | # -------------------------
275 | def select(
276 | self: OCMio, ic_id: int | None = None, *, msm_grp: str | None = None
277 | ) -> int:
278 | """Select a measurement as BAND%/ICID__GROUP_%.
279 |
280 | Parameters
281 | ----------
282 | ic_id : int
283 | used as "BAND%/ICID_{}_GROUP_%".format(ic_id)
284 | msm_grp : str
285 | select measurement group with name msm_grp
286 |
287 | All measurements groups are shown when ic_id and msm_grp are None
288 |
289 | Returns
290 | -------
291 | scalar
292 | Number of measurements found
293 |
294 | Notes
295 | -----
296 | Updated object attributes:
297 | - bands : available spectral bands
298 |
299 | """
300 | self.band = ""
301 | self.__msm_path = []
302 | for ii in "87654321":
303 | if f"BAND{ii}" in self.fid:
304 | self.band = ii
305 | break
306 |
307 | if self.band:
308 | gid = self.fid[f"BAND{self.band}"]
309 | if msm_grp is not None and msm_grp in gid:
310 | self.__msm_path = [msm_grp]
311 | elif ic_id is None:
312 | grp_name = "ICID_"
313 | for kk in gid:
314 | if kk.startswith(grp_name):
315 | print(kk)
316 | else:
317 | grp_name = f"ICID_{ic_id:05}_GROUP"
318 | self.__msm_path = [s for s in gid if s.startswith(grp_name)]
319 |
320 | return len(self.__msm_path)
321 |
322 | # -------------------------
323 | def get_msm_attr(self: OCMio, msm_dset: str, attr_name: str) -> str | None:
324 | """Return attribute of measurement dataset "msm_dset".
325 |
326 | Parameters
327 | ----------
328 | msm_dset : str
329 | name of measurement dataset
330 | attr_name : str
331 | name of the attribute
332 |
333 | Returns
334 | -------
335 | scalar or numpy.ndarray
336 | value of attribute "attr_name"
337 |
338 | """
339 | if not self.__msm_path:
340 | return ""
341 |
342 | grp = self.fid[f"BAND{self.band}"]
343 | for msm_path in self.__msm_path:
344 | ds_path = str(PurePosixPath(msm_path, "OBSERVATIONS", msm_dset))
345 |
346 | if attr_name in grp[ds_path].attrs:
347 | attr = grp[ds_path].attrs[attr_name]
348 | if isinstance(attr, bytes):
349 | return attr.decode("ascii")
350 |
351 | return attr
352 |
353 | return None
354 |
355 | # -------------------------
356 | def get_msm_data(
357 | self: OCMio,
358 | msm_dset: str,
359 | fill_as_nan: bool = True,
360 | frames: list[int, int] | None = None,
361 | columns: list[int, int] | None = None,
362 | ) -> dict:
363 | """Return data of measurement dataset `msm_dset`.
364 |
365 | Parameters
366 | ----------
367 | msm_dset : str
368 | name of measurement dataset
369 | if msm_dset is None then show names of available datasets
370 |
371 | columns : [i, j]
372 | Slice data on fastest axis (columns) as, from index 'i' to 'j'
373 |
374 | frames : [i, j]
375 | Slice data on slowest axis (time) as, from index 'i' to 'j'
376 |
377 | fill_as_nan : boolean
378 | replace (float) FillValues with Nan's
379 |
380 | Returns
381 | -------
382 | dict
383 | Python dictionary with names of msm_groups as keys
384 |
385 | """
386 | fillvalue = float.fromhex("0x1.ep+122")
387 |
388 | if not self.__msm_path:
389 | return {}
390 |
391 | # show HDF5 dataset names and return
392 | grp = self.fid[f"BAND{self.band}"]
393 | if msm_dset is None:
394 | ds_path = str(PurePosixPath(self.__msm_path[0], "OBSERVATIONS"))
395 | for kk in grp[ds_path]:
396 | print(kk)
397 | return {}
398 |
399 | # skip row257 from the SWIR detector
400 | rows = None
401 | if self.band in ("7", "8"):
402 | rows = [0, -1]
403 |
404 | # combine data of all measurement groups in dictionary
405 | res = {}
406 | for msm_grp in sorted(self.__msm_path):
407 | dset = grp[str(PurePosixPath(msm_grp, "OBSERVATIONS", msm_dset))]
408 | data_sel = ()
409 | for ii in range(dset.ndim):
410 | dim_name = PurePosixPath(dset.dims[ii][0].name).name
411 | if dim_name == "msmt_time":
412 | if frames is None:
413 | data_sel += (slice(None),)
414 | else:
415 | data_sel += (slice(*frames),)
416 | elif dim_name == "row":
417 | if rows is None:
418 | data_sel += (slice(None),)
419 | else:
420 | data_sel += (slice(*rows),)
421 | elif dim_name == "column":
422 | if columns is None:
423 | data_sel += (slice(None),)
424 | else:
425 | data_sel += (slice(*columns),)
426 | else:
427 | raise ValueError
428 |
429 | # read data
430 | if dset.dtype == np.float32:
431 | data = np.squeeze(dset.astype(float)[data_sel])
432 | else:
433 | data = np.squeeze(dset[data_sel])
434 |
435 | if fill_as_nan and dset.attrs["_FillValue"] == fillvalue:
436 | data[(data == fillvalue)] = np.nan
437 |
438 | # add data to dictionary
439 | res[msm_grp] = data
440 |
441 | return res
442 |
443 | # -------------------------
444 | def read_direct_msm(
445 | self: OCMio,
446 | msm_dset: str,
447 | dest_sel: tuple[slice | int] | None = None,
448 | dest_dtype: type[Any] | None = None,
449 | fill_as_nan: bool = False,
450 | ) -> dict | None:
451 | """Return data of measurement dataset `msm_dset` (fast implementation).
452 |
453 | Parameters
454 | ----------
455 | msm_dset : string
456 | Name of measurement dataset
457 | dest_sel : numpy slice
458 | Selection must be the output of numpy.s_[].
459 | dest_dtype : numpy dtype
460 | Perform type conversion
461 | fill_as_nan : boolean
462 | Replace (float) FillValues with Nan's, when True
463 |
464 | Returns
465 | -------
466 | dict
467 | Python dictionary with names of msm_groups as keys
468 |
469 | """
470 | fillvalue = float.fromhex("0x1.ep+122")
471 |
472 | if not self.__msm_path:
473 | return None
474 |
475 | if dest_sel is None:
476 | dest_sel = np.s_[...]
477 |
478 | # combine data of all measurement groups in dictionary
479 | res = {}
480 | for msm_grp in sorted(self.__msm_path):
481 | dset = self.fid[
482 | str(
483 | PurePosixPath(f"BAND{self.band}", msm_grp, "OBSERVATIONS", msm_dset)
484 | )
485 | ]
486 |
487 | if dest_dtype is None:
488 | buff = dset[dest_sel]
489 | else:
490 | buff = dset.astype(dest_dtype)[dest_sel]
491 |
492 | if fill_as_nan and dset.attrs["_FillValue"] == fillvalue:
493 | buff[(buff == fillvalue)] = np.nan
494 |
495 | # add data to dictionary
496 | res[msm_grp] = buff
497 |
498 | return res
499 |
--------------------------------------------------------------------------------
/src/pys5p/rls.py:
--------------------------------------------------------------------------------
1 | #
2 | # This file is part of pyS5p
3 | #
4 | # https://github.com/rmvanhees/pys5p.git
5 | #
6 | # Copyright (c) 2017-2025 SRON
7 | # All Rights Reserved
8 | #
9 | # License: BSD-3-Clause
10 | """Implementation of the Relative Least-Squares regression (RLS).
11 |
12 | The RLS regression is used to find the linear dependence y(x) = c0 + c1 * x
13 | that best describes the data before and after correction, using absolute
14 | residuals y_i - (c0 + c1 * x_i) divided by the expected signals c1 * x_i in
15 | the least-squares sum. Offset c0 has an arbitrary size and should not affect
16 | the fit result. Weight factors are determined such to effectively spread the
17 | data points evenly over the whole range of x, making the result less
18 | sensitive to the actual spacing between the data points.
19 | """
20 |
21 | from __future__ import annotations
22 |
23 | __all__ = ["rls_fit", "rls_fit0"]
24 |
25 | import numpy as np
26 | from numpy import ma
27 |
28 |
29 | def calc_ma_weights(xdata: np.ndarray, masked: np.ndarray) -> ma.MaskedArray:
30 | """Generate weight factor per pixel.
31 |
32 | Notes
33 | -----
34 | It might be that np.apply_along_axis() is slightly faster, however, the
35 | for-loop 'row in buff' is also very efficient when using the specially
36 | designed MaskedArray 'buff' which we eventually use to store the weight
37 | values per pixel.
38 |
39 | """
40 | buff = ma.array(np.repeat([xdata], masked.shape[0], axis=0), mask=masked)
41 | for row in buff:
42 | valid = ma.compressed(row)
43 | if len(valid) < 2:
44 | continue
45 | wght = np.concatenate(
46 | (
47 | [2 * (valid[1] - valid[0])],
48 | valid[2:] - valid[0:-2],
49 | [2 * (valid[-1] - valid[-2])],
50 | )
51 | )
52 | row[~row.mask] = wght
53 |
54 | return buff
55 |
56 |
57 | def rls_fit(xdata: np.ndarray, ydata: np.ndarray | ma.MaskedArray) -> tuple:
58 | """Perform RLS regression finding linear dependence y(x) = c0 + c1 * x.
59 |
60 | Parameters
61 | ----------
62 | xdata : ndarray, shape (M,)
63 | X-coordinates of the M sample points (xdata[i], ydata[..., i])
64 | The array values have to be positive and increasing
65 | ydata : MaskedArray or ndarray, shape (..., M)
66 | Y-coordinates of the sample points
67 |
68 | Returns
69 | -------
70 | c0, c1, std_c0, std_c1 : tuple of ndarrays
71 | coefficients of the linear dependence and their standard deviations
72 |
73 | Notes
74 | -----
75 | Calling a rls-function with MaskedArrays is much slower than with
76 | plain ndarrays.
77 |
78 | The coefficients are set to NaN when the number of samples are less than 2.
79 |
80 | The standard deviations can only be calculated when the number of samples
81 | are larger than two, else the standard deviations are equal to zero.
82 |
83 | """
84 | # pylint: disable=too-many-locals
85 | if xdata.size < 2:
86 | raise RuntimeError("too few sample points for a fit")
87 | if xdata.size != ydata.shape[-1]:
88 | raise RuntimeError("number of samples not equal for xdata, ydata")
89 |
90 | # perform all computations on 2 dimensional arrays
91 | img_shape = ydata.shape[:-1]
92 | yy1 = ydata.reshape(-1, xdata.size)
93 |
94 | # calculate weights
95 | if ma.isMaskedArray(ydata):
96 | wghts = calc_ma_weights(xdata, ma.getmaskarray(yy1))
97 | else:
98 | buff = np.concatenate(
99 | (
100 | [2 * (xdata[1] - xdata[0])],
101 | xdata[2:] - xdata[0:-2],
102 | [2 * (xdata[-1] - xdata[-2])],
103 | )
104 | )
105 | wghts = np.repeat([buff], yy1.shape[0], axis=0)
106 | wx1 = wghts / xdata
107 | wx2 = wghts / xdata**2 # is wx1 / xdata faster?
108 |
109 | # calculate the Q elements
110 | q00 = wghts.sum(axis=1)
111 | q01 = wx1.sum(axis=1)
112 | q02 = wx2.sum(axis=1)
113 |
114 | q11 = (wx1 * yy1).sum(axis=1)
115 | q12 = (wx2 * yy1).sum(axis=1)
116 | q22 = (wx2 * yy1**2).sum(axis=1)
117 |
118 | # calculate the Z elements
119 | zz1 = q00 * q02 - q01**2
120 | zz2 = q00 * q12 - q01 * q11
121 | zz3 = q02 * q11 - q01 * q12
122 |
123 | # calculate fit parameters and their uncertainties
124 | num = yy1.count(axis=1) if ma.isMaskedArray(ydata) else len(xdata)
125 | cc0 = zz2 / zz1
126 | cc1 = zz3 / zz1
127 | if ma.isMaskedArray(ydata):
128 | chi2 = ma.abs(q22 - q12 * cc0 - q11 * cc1) / np.clip(num - 2, 1, None)
129 | chi2[num <= 2] = 0
130 | sc0 = ma.sqrt(q00 * chi2 / zz1)
131 | sc1 = ma.sqrt(q02 * chi2 / zz1)
132 |
133 | return (
134 | cc0.reshape(img_shape).filled(np.nan),
135 | cc1.reshape(img_shape).filled(np.nan),
136 | sc0.reshape(img_shape).filled(np.nan),
137 | sc1.reshape(img_shape).filled(np.nan),
138 | )
139 |
140 | # using only non-MaskedArray functions
141 | chi2 = np.abs(q22 - q12 * cc0 - q11 * cc1) / np.clip(num - 2, 1, None)
142 | chi2[num <= 2] = 0
143 | sc0 = np.sqrt(q00 * chi2 / zz1)
144 | sc1 = np.sqrt(q02 * chi2 / zz1)
145 |
146 | return (
147 | cc0.reshape(img_shape),
148 | cc1.reshape(img_shape),
149 | sc0.reshape(img_shape),
150 | sc1.reshape(img_shape),
151 | )
152 |
153 |
154 | def rls_fit0(xdata: np.ndarray, ydata: np.ndarray | ma.MaskedArray) -> tuple:
155 | """Perform RLS regression finding linear dependence y(x) = c1 * x.
156 |
157 | Parameters
158 | ----------
159 | xdata : ndarray, shape (M,)
160 | X-coordinates of the M sample points (xdata[i], ydata[..., i])
161 | The array values have to be positive and increasing
162 | ydata : MaskedArray or ndarray, shape (..., M)
163 | Y-coordinates of the sample points
164 |
165 | Returns
166 | -------
167 | c1, std_c1 : tuple of ndarrays
168 | coefficients of the linear dependence and their standard deviations
169 |
170 | Notes
171 | -----
172 | The coefficients are set to NaN when the number of samples are less than 2.
173 |
174 | The standard deviations can only be calculated when the number of samples
175 | are larger than two, else the standard deviations are equal to zero.
176 |
177 | """
178 | if xdata.size < 2:
179 | raise RuntimeError("too few points for a fit")
180 | if xdata.size != ydata.shape[-1]:
181 | raise RuntimeError("number of samples not equal for xdata, ydata")
182 |
183 | # perform all computations on 2 dimensional arrays
184 | img_shape = ydata.shape[:-1]
185 | yy1 = ydata.reshape(-1, xdata.size)
186 |
187 | # calculate weights
188 | if ma.isMaskedArray(ydata):
189 | wghts = calc_ma_weights(xdata, ma.getmaskarray(yy1))
190 | else:
191 | buff = np.concatenate(
192 | (
193 | [2 * (xdata[1] - xdata[0])],
194 | xdata[2:] - xdata[0:-2],
195 | [2 * (xdata[-1] - xdata[-2])],
196 | )
197 | )
198 | wghts = np.repeat([buff], yy1.shape[0], axis=0)
199 | wx1 = wghts / xdata
200 | wx2 = wghts / xdata**2
201 |
202 | # calculate the Q elements
203 | q00 = wghts.sum(axis=1)
204 | q11 = (wx1 * yy1).sum(axis=1)
205 | q22 = (wx2 * yy1**2).sum(axis=1)
206 |
207 | # calculate fit parameter and its variance
208 | num = yy1.count(axis=1) if ma.isMaskedArray(ydata) else len(xdata)
209 | cc1 = q11 / q00
210 | if ma.isMaskedArray(ydata):
211 | cc1[num < 1] = ma.masked
212 | chi2 = ma.abs(q22 - q00 * cc1**2) / np.clip(num - 1, 1, None)
213 | chi2[num <= 1] = ma.masked
214 | sc1 = ma.sqrt(chi2 / q00)
215 | return (
216 | cc1.reshape(img_shape).filled(np.nan),
217 | sc1.reshape(img_shape).filled(np.nan),
218 | )
219 |
220 | # using only non-MaskedArray functions
221 | cc1[num < 1] = np.nan
222 | chi2 = np.abs(q22 - q00 * cc1**2) / np.clip(num - 1, 1, None)
223 | chi2[num <= 1] = np.nan
224 | sc1 = np.sqrt(chi2 / q00)
225 | return cc1.reshape(img_shape), sc1.reshape(img_shape)
226 |
--------------------------------------------------------------------------------
/src/pys5p/s5p_msm.py:
--------------------------------------------------------------------------------
1 | #
2 | # This file is part of pyS5p
3 | #
4 | # https://github.com/rmvanhees/pys5p.git
5 | #
6 | # Copyright (c) 2017-2025 SRON
7 | # All Rights Reserved
8 | #
9 | # License: BSD-3-Clause
10 | """`S5Pmsm`, class to read HDF5 datasets with its coordinates and attributes.
11 |
12 | .. warning:: Depreciated, this module is no longer maintained.
13 | """
14 |
15 | from __future__ import annotations
16 |
17 | __all__ = ["S5Pmsm"]
18 |
19 | from copy import deepcopy
20 | from pathlib import PurePath
21 | from typing import NamedTuple
22 |
23 | import numpy as np
24 | from h5py import Dataset
25 | from moniplot.biweight import Biweight
26 |
27 | # The class S5Pmsm read HDF5 measurement data including its attributes and
28 | # dimensions. Initialization:
29 | #
30 | # S5Pmsm attribute | hdf5 dataset | Numpy array
31 | # -------------------------------------------------------------------------
32 | # name | h5_dset.name | 'value'
33 | # value | h5_dset.value['value'] | np.squeeze(data)
34 | # | or h5_dset.value |
35 | # error | h5_dset.value['error'] | None
36 | # | or None |
37 | # coords | h5_dset.dims | [[['time',] 'row',] 'column']
38 | # units | attrs['units'] | None
39 | # long_name | attrs['long_name'] | ''
40 | # fillvalue | h5_dset.fillvalue | None
41 | # coverage | None | None
42 | #
43 | # Limited to 3 dimensions
44 |
45 |
46 | # - local functions --------------------------------
47 | def pad_rows(arr1: np.ndarray, arr2: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
48 | """Pad the array with the least numer of rows with NaN's."""
49 | if arr2.ndim == 2:
50 | if arr1.shape[0] < arr2.shape[0]:
51 | buff = arr1.copy()
52 | arr1 = np.full_like(arr2, np.nan)
53 | arr1[0 : buff.shape[0], :] = buff
54 | elif arr1.shape[0] > arr2.shape[0]:
55 | buff = arr2.copy()
56 | arr2 = np.full_like(arr1, np.nan)
57 | arr2[0 : buff.shape[0], :] = buff
58 | else:
59 | if arr1.shape[1] < arr2.shape[1]:
60 | buff = arr1.copy()
61 | arr1 = np.full_like(arr2, np.nan)
62 | arr1[:, 0 : buff.shape[1], :] = buff
63 | elif arr1.shape[1] > arr2.shape[1]:
64 | buff = arr2.copy()
65 | arr2 = np.full_like(arr1, np.nan)
66 | arr2[:, 0 : buff.shape[1], :] = buff
67 |
68 | return arr1, arr2
69 |
70 |
71 | # - class definition -------------------------------
72 | class S5Pmsm:
73 | r"""A class to hold a HDF5 dataset and its attributes.
74 |
75 | Parameters
76 | ----------
77 | dset : h5py.Dataset or ndarray
78 | h5py dataset from which the data is read, data is used to
79 | initialize S5Pmsm object
80 | data_sel : numpy slice
81 | a numpy slice generated for example `numpy.s\_`
82 | datapoint : bool
83 | to indicate that the dataset is a compound of type datapoint
84 |
85 | Returns
86 | -------
87 | numpy structure with dataset data and attributes, including data,
88 | fillvalue, coordinates, units, ...
89 |
90 | """
91 |
92 | def __init__(
93 | self: S5Pmsm,
94 | dset: Dataset | np.ndarray,
95 | data_sel: tuple[slice | int] | None = None,
96 | datapoint: bool = False,
97 | ) -> None:
98 | """Read measurement data from a Tropomi OCAL, ICM, of L1B product."""
99 | # initialize object
100 | self.name = "value"
101 | self.value = None
102 | self.error = None
103 | self.coords = None
104 | self.coverage = None
105 | self.units = None
106 | self.long_name = ""
107 | self.fillvalue = None
108 |
109 | if isinstance(dset, Dataset):
110 | self.__from_h5_dset(dset, data_sel, datapoint)
111 | else:
112 | self.__from_ndarray(dset, data_sel)
113 |
114 | def __repr__(self: S5Pmsm) -> str:
115 | """Display info on the S5Pmsm object."""
116 | res = []
117 | for key, value in self.__dict__.items():
118 | if key.startswith("__"):
119 | continue
120 | if isinstance(value, np.ndarray):
121 | res.append(f"{key}: {value.shape}")
122 | else:
123 | res.append(f"{key}: {value}")
124 |
125 | return "\n".join(res)
126 |
127 | def coord_name(self: S5Pmsm, axis: int) -> str:
128 | """Return name of coordinate."""
129 | return self.coords._fields[axis]
130 |
131 | def coord_replace(self: S5Pmsm, key: str, dims: np.ndarray) -> NamedTuple:
132 | """Change values of a coordinate."""
133 | return self.coords._replace(**{key: dims})
134 |
135 | def __from_h5_dset(
136 | self: S5Pmsm,
137 | h5_dset: Dataset,
138 | data_sel: tuple[slice | int] | None,
139 | datapoint: bool,
140 | ) -> None:
141 | """Initialize S5Pmsm object from h5py dataset."""
142 | self.name = PurePath(h5_dset.name).name
143 |
144 | # copy dataset values (and error) to object
145 | if data_sel is None:
146 | if datapoint:
147 | self.value = h5_dset["value"]
148 | self.error = h5_dset["error"]
149 | else:
150 | self.value = h5_dset[...]
151 | else:
152 | # we need to keep all dimensions to get the dimensions
153 | # of the output data right
154 | if datapoint:
155 | self.value = h5_dset["value"][data_sel]
156 | self.error = h5_dset["error"][data_sel]
157 | if isinstance(data_sel, tuple):
158 | for ii, elmnt in enumerate(data_sel):
159 | if isinstance(elmnt, int | np.int64):
160 | self.value = np.expand_dims(self.value, axis=ii)
161 | self.error = np.expand_dims(self.error, axis=ii)
162 | else:
163 | self.value = h5_dset[data_sel]
164 | if isinstance(data_sel, tuple):
165 | for ii, elmnt in enumerate(data_sel):
166 | if isinstance(elmnt, int | np.int64):
167 | self.value = np.expand_dims(self.value, axis=ii)
168 |
169 | # set default dimension names
170 | if h5_dset.ndim == 1:
171 | keys_default = ["column"]
172 | elif h5_dset.ndim == 2:
173 | keys_default = ["row", "column"]
174 | elif h5_dset.ndim == 3:
175 | keys_default = ["time", "row", "column"]
176 | else:
177 | raise ValueError("not implemented for ndim > 3")
178 |
179 | # copy all dimensions with size longer then 1
180 | keys = []
181 | dims = []
182 | for ii in range(h5_dset.ndim):
183 | if self.value.shape[ii] == 1:
184 | continue
185 |
186 | if len(h5_dset.dims[ii]) != 1: # bug in some KMNI HDF5 files
187 | keys.append(keys_default[ii])
188 | dims.append(np.arange(self.value.shape[ii]))
189 | elif self.value.shape[ii] == h5_dset.shape[ii]:
190 | buff = PurePath(h5_dset.dims[ii][0].name).name
191 | if len(buff.split()) > 1:
192 | buff = buff.split()[0]
193 | keys.append(buff)
194 | if h5_dset.dims[ii][0][:].size == h5_dset.shape[ii]:
195 | buff = h5_dset.dims[ii][0][:]
196 | if np.all(buff == 0):
197 | buff = np.arange(buff.size)
198 | else: # bug in some KMNI HDF5 files
199 | buff = np.arange(h5_dset.shape[ii])
200 | dims.append(buff)
201 | else:
202 | buff = PurePath(h5_dset.dims[ii][0].name).name
203 | if len(buff.split()) > 1:
204 | buff = buff.split()[0]
205 | keys.append(buff)
206 | if h5_dset.dims[ii][0][:].size == h5_dset.shape[ii]:
207 | buff = h5_dset.dims[ii][0][:]
208 | if np.all(buff == 0):
209 | buff = np.arange(buff.size)
210 | else: # bug in some KMNI HDF5 files
211 | buff = np.arange(h5_dset.shape[ii])
212 |
213 | if isinstance(data_sel, slice):
214 | dims.append(buff[data_sel])
215 | elif len(data_sel) == h5_dset.ndim:
216 | dims.append(buff[data_sel[ii]])
217 | elif not isinstance(data_sel, tuple):
218 | dims.append(buff[data_sel])
219 | elif ii > len(data_sel):
220 | dims.append(buff[data_sel[-1]])
221 | else:
222 | dims.append(buff[data_sel[ii]])
223 |
224 | # add dimensions as a namedtuple
225 | coords_namedtuple = NamedTuple("Coords", keys)
226 | self.coords = coords_namedtuple._make(dims)
227 |
228 | # remove all dimensions with size equal 1 from value (and error)
229 | self.value = np.squeeze(self.value)
230 | if datapoint:
231 | self.error = np.squeeze(self.error)
232 |
233 | # copy FillValue (same for value/error in a datapoint)
234 | if datapoint:
235 | self.fillvalue = h5_dset.fillvalue[0]
236 | else:
237 | self.fillvalue = h5_dset.fillvalue
238 |
239 | # copy its units
240 | if "units" in h5_dset.attrs:
241 | if isinstance(h5_dset.attrs["units"], np.ndarray):
242 | if h5_dset.attrs["units"].size == 1:
243 | self.units = h5_dset.attrs["units"][0]
244 | if isinstance(self.units, bytes):
245 | self.units = self.units.decode("ascii")
246 | else:
247 | self.units = h5_dset.attrs["units"]
248 | if isinstance(self.units[0], bytes):
249 | self.units = self.units.astype(str)
250 | else:
251 | self.units = h5_dset.attrs["units"]
252 | if isinstance(self.units, bytes):
253 | self.units = self.units.decode("ascii")
254 |
255 | # copy its long_name
256 | if "long_name" in h5_dset.attrs:
257 | if isinstance(h5_dset.attrs["long_name"], bytes):
258 | self.long_name = h5_dset.attrs["long_name"].decode("ascii")
259 | else:
260 | self.long_name = h5_dset.attrs["long_name"]
261 |
262 | def __from_ndarray(
263 | self: S5Pmsm, data: np.ndarray, data_sel: tuple[slice | int] | None
264 | ) -> None:
265 | """Initialize S5Pmsm object from a ndarray."""
266 | # copy dataset values (and error) to object
267 | if data_sel is None:
268 | self.value = np.squeeze(data)
269 | else:
270 | self.value = np.squeeze(data[data_sel])
271 |
272 | # define coordinates
273 | dims = [np.arange(sz) for sz in self.value.shape]
274 | try:
275 | self.set_coords(dims, coords_name=None)
276 | except Exception as exc:
277 | raise RuntimeError("failed to set the coordinates") from exc
278 |
279 | def copy(self: S5Pmsm) -> S5Pmsm:
280 | """Return a deep copy of the current object."""
281 | return deepcopy(self)
282 |
283 | def set_coords(
284 | self: S5Pmsm,
285 | coords_data: list[np.ndarray],
286 | coords_name: list[str] | None = None,
287 | ) -> None:
288 | """Set coordinates of data.
289 |
290 | Parameters
291 | ----------
292 | coords_data : list of ndarrays
293 | list with coordinates data for each dimension
294 | coords_name : list of strings
295 | list with the names of each dimension
296 |
297 | """
298 | if coords_name is None:
299 | if len(coords_data) == 1:
300 | keys = ["column"]
301 | elif len(coords_data) == 2:
302 | keys = ["row", "column"]
303 | elif len(coords_data) == 3:
304 | keys = ["time", "row", "column"]
305 | else:
306 | raise ValueError("not implemented for ndim > 3")
307 | else:
308 | keys = [coords_name] if isinstance(coords_name, str) else coords_name
309 |
310 | # add dimensions as a namedtuple
311 | coords_namedtuple = NamedTuple("Coords", keys)
312 | self.coords = coords_namedtuple._make(coords_data)
313 |
314 | def set_coverage(
315 | self: S5Pmsm, coverage: tuple[str, str], force: bool = False
316 | ) -> None:
317 | """Set the coverage attribute, as (coverageStart, coverageEnd).
318 |
319 | Parameters
320 | ----------
321 | coverage : tuple[str, str]
322 | new value for the coverage attribute
323 | force : bool, default=False
324 | overwrite when force is true
325 |
326 | Notes
327 | -----
328 | Both elements are expected to be datatime objects.
329 |
330 | """
331 | if self.coverage is None or force:
332 | self.coverage = coverage
333 |
334 | def set_units(self: S5Pmsm, units: str | None, force: bool = False) -> None:
335 | """Set the units attribute, overwrite when force is true."""
336 | if self.units is None or force:
337 | self.units = units
338 |
339 | def set_fillvalue(self: S5Pmsm) -> None:
340 | """Set fillvalue to KNMI undefined."""
341 | if (
342 | np.issubdtype(self.value.dtype, np.floating) and self.fillvalue is None
343 | ) or self.fillvalue == 0.0:
344 | self.fillvalue = float.fromhex("0x1.ep+122")
345 |
346 | def set_long_name(self: S5Pmsm, name: str, force: bool = False) -> None:
347 | """Set the long_name attribute, overwrite when force is true."""
348 | if force or not self.long_name:
349 | self.long_name = name
350 |
351 | def fill_as_nan(self: S5Pmsm) -> None:
352 | """Replace fillvalues in data with NaN's.
353 |
354 | Works only on datasets with HDF5 datatype 'float' or 'datapoints'
355 | """
356 | if self.fillvalue == float.fromhex("0x1.ep+122"):
357 | self.value[(self.value == self.fillvalue)] = np.nan
358 | if self.error is not None:
359 | self.error[(self.error == self.fillvalue)] = np.nan
360 |
361 | def sort(self: S5Pmsm, axis: int = 0) -> None:
362 | """Sort data and its coordinate along a given axis.
363 |
364 | Parameters
365 | ----------
366 | axis : int, default=0
367 | axis for which the array will be sorted.
368 |
369 | """
370 | if not isinstance(axis, int):
371 | raise TypeError("axis not an integer")
372 | if not 0 <= axis < self.value.ndim:
373 | raise ValueError("axis out-of-range")
374 |
375 | indx = np.argsort(self.coords[axis][:])
376 | self.coords[axis][:] = self.coords[axis][indx]
377 |
378 | if axis == 0:
379 | self.value = self.value[indx, ...]
380 | if self.error is not None:
381 | if isinstance(self.error, list):
382 | self.error = (self.error[0][indx, ...], self.error[1][indx, ...])
383 | else:
384 | self.error = self.error[indx, :]
385 | elif axis == 1:
386 | self.value = self.value[:, indx, ...]
387 | if self.error is not None:
388 | if isinstance(self.error, list):
389 | self.error = (self.error[0][:, indx, :], self.error[1][:, indx, :])
390 | else:
391 | self.error = self.error[:, indx, :]
392 | elif axis == 2:
393 | self.value = self.value[:, :, indx]
394 | if self.error is not None:
395 | if isinstance(self.error, list):
396 | self.error = (self.error[0][:, :, indx], self.error[1][:, :, indx])
397 | else:
398 | self.error = self.error[:, :, indx]
399 | else:
400 | raise ValueError("S5Pmsm: implemented for ndim <= 3")
401 |
402 | def concatenate(self: S5Pmsm, msm: S5Pmsm, axis: int = 0) -> S5Pmsm:
403 | """Concatenate two measurement datasets, the current with another.
404 |
405 | Parameters
406 | ----------
407 | msm : pys5p.S5Pmsm
408 | an S5Pmsm object
409 | axis : int, default=0
410 | The axis for which the array will be joined.
411 |
412 | Returns
413 | -------
414 | The data of the new dataset is concatenated to the existing data along
415 | an existing axis. The affected coordinate is also extended.
416 |
417 | Note:
418 | - The arrays must have the same shape, except in the dimension
419 | corresponding to axis (the first, by default).
420 |
421 | """
422 | if self.name != PurePath(msm.name).name:
423 | raise TypeError("combining dataset with different name")
424 |
425 | # all but the last 2 dimensions have to be equal
426 | if self.value.shape[:-2] != msm.value.shape[:-2]:
427 | raise TypeError("all but the last 2 dimensions should be equal")
428 |
429 | if (self.error is None and msm.error is not None) or (
430 | self.error is not None and msm.error is None
431 | ):
432 | raise RuntimeError("S5Pmsm: combining non-datapoint and datapoint")
433 |
434 | # concatenate the values
435 | if axis == 0:
436 | self.value = np.concatenate((self.value, msm.value), axis=axis)
437 | elif axis == 1:
438 | if self.value.shape[0] == msm.value.shape[0]:
439 | self.value = np.concatenate((self.value, msm.value), axis=axis)
440 | else:
441 | self.value = np.concatenate(pad_rows(self.value, msm.value), axis=axis)
442 | elif axis == 2:
443 | if self.value.shape[1] == msm.value.shape[1]:
444 | self.value = np.concatenate((self.value, msm.value), axis=axis)
445 | else:
446 | self.value = np.concatenate(pad_rows(self.value, msm.value), axis=axis)
447 | else:
448 | raise ValueError("S5Pmsm: implemented for ndim <= 3")
449 |
450 | # concatenate the errors
451 | if self.error is not None and msm.error is not None:
452 | if axis == 0:
453 | self.error = np.concatenate((self.error, msm.error), axis=axis)
454 | elif axis == 1:
455 | if self.value.shape[0] == msm.value.shape[0]:
456 | self.error = np.concatenate((self.error, msm.error), axis=axis)
457 | else:
458 | self.error = np.concatenate(
459 | pad_rows(self.error, msm.error), axis=axis
460 | )
461 | elif axis == 2:
462 | if self.value.shape[1] == msm.value.shape[1]:
463 | self.error = np.concatenate((self.error, msm.error), axis=axis)
464 | else:
465 | self.error = np.concatenate(
466 | pad_rows(self.error, msm.error), axis=axis
467 | )
468 |
469 | # now extent coordinate of the fastest axis
470 | key = self.coord_name(axis)
471 | if msm.coords[axis][0] == 0:
472 | dims = np.concatenate(
473 | (self.coords[axis], len(self.coords[axis]) + msm.coords[axis])
474 | )
475 | else:
476 | dims = np.concatenate((self.coords[axis], msm.coords[axis]))
477 | self.coords = self.coord_replace(key, dims)
478 | return self
479 |
480 | def nanpercentile(
481 | self: S5Pmsm,
482 | vperc: int | list[float],
483 | data_sel: tuple[slice | int] | None = None,
484 | axis: int = 0,
485 | keepdims: bool = False,
486 | ) -> S5Pmsm:
487 | r"""Return percentile(s) of the data in the S5Pmsm.
488 |
489 | Parameters
490 | ----------
491 | vperc : list
492 | range to normalize luminance data between percentiles min and max of
493 | array data.
494 | data_sel : numpy slice
495 | A numpy slice generated for example `numpy.s\_`. Can be used to skip
496 | the first and/or last frame
497 | axis : int, default=0
498 | Axis or axes along which the medians are computed.
499 | keepdims : bool, default=False
500 | If this is set to True, the axes which are reduced are left in the
501 | result as dimensions with size one. With this option, the result
502 | will broadcast correctly against the original arr.
503 |
504 | Returns
505 | -------
506 | S5Pmsm object with the original data replaced by the percentiles along
507 | one of the axis, see below. The coordinates are adjusted, accordingly.
508 |
509 | You should at least supply one percentile and at most three.
510 | vperc is instance 'int' or len(vperc) == 1:
511 | 'value' is replaced by its (nan-)percentile vperc
512 | 'error' is unchanged
513 | len(vperc) == 2:
514 | 'vperc' is sorted
515 | 'value' is replaced by its (nan-)median
516 | 'error' is replaced by percentile('value', (vperc[0], vperc[1]))
517 | len(vperc) == 3:
518 | 'vperc' is sorted
519 | 'value' is replaced by percentile('value', vperc[1])
520 | 'error' is replaced by percentile('value', (vperc[0], vperc[2]))
521 |
522 | """
523 | if isinstance(vperc, int):
524 | vperc = (vperc,)
525 | else:
526 | if len(vperc) == 2:
527 | vperc += (50,)
528 | # make sure that the values are sorted
529 | vperc = tuple(sorted(vperc))
530 |
531 | if len(vperc) != 1 and len(vperc) != 3:
532 | raise TypeError("dimension vperc must be 1 or 3")
533 |
534 | if data_sel is None:
535 | if self.value.size <= 1 or self.value.ndim <= axis:
536 | return self
537 | perc = np.nanpercentile(self.value, vperc, axis=axis, keepdims=keepdims)
538 | else:
539 | if self.value[data_sel].size <= 1 or self.value[data_sel].ndim <= axis:
540 | return self
541 | perc = np.nanpercentile(
542 | self.value[data_sel], vperc, axis=axis, keepdims=keepdims
543 | )
544 | if len(vperc) == 3:
545 | self.value = perc[1, ...]
546 | self.error = [perc[0, ...], perc[2, ...]]
547 | else:
548 | self.value = perc[0, ...]
549 |
550 | # adjust the coordinates
551 | if keepdims:
552 | key = self.coord_name(axis)
553 | if self.coords[axis][0] == 0:
554 | dims = [0]
555 | else:
556 | dims = np.median(self.coords[axis], keepdims=keepdims)
557 | self.coords = self.coord_replace(key, dims)
558 | else:
559 | keys = []
560 | dims = []
561 | for ii in range(self.value.ndim + 1):
562 | if ii != axis:
563 | keys.append(self.coord_name(ii))
564 | dims.append(self.coords[ii][:])
565 | coords_namedtuple = NamedTuple("Coords", keys)
566 | self.coords = coords_namedtuple._make(dims)
567 |
568 | return self
569 |
570 | def biweight(
571 | self: S5Pmsm,
572 | data_sel: tuple[slice | int] | None = None,
573 | axis: int = 0,
574 | keepdims: bool = False,
575 | ) -> S5Pmsm:
576 | r"""Reduce this S5Pmsm data by applying biweight along some dimension.
577 |
578 | Parameters
579 | ----------
580 | data_sel : numpy slice
581 | A numpy slice generated for example `numpy.s\_`. Can be used to skip
582 | the first and/or last frame
583 | axis : int, default=0
584 | Axis or axes along which the medians are computed.
585 | keepdims : bool, default=False
586 | If this is set to True, the axes which are reduced are left in the
587 | result as dimensions with size one. With this option, the result
588 | will broadcast correctly against the original arr.
589 |
590 | Returns
591 | -------
592 | S5Pmsm object with its data (value & error) replaced by its biweight
593 | medians along one axis. The coordinates are adjusted, accordingly.
594 |
595 | """
596 | if data_sel is None:
597 | if self.error is not None:
598 | self.value = Biweight(self.value, axis=axis).median
599 | self.error = Biweight(self.error, axis=axis).median
600 | else:
601 | biwght = Biweight(self.value, axis=axis)
602 | self.value = biwght.median
603 | self.error = biwght.spread
604 | else:
605 | if self.error is not None:
606 | self.value = Biweight(self.value[data_sel], axis=axis).median
607 | self.error = Biweight(self.error[data_sel], axis=axis).spread
608 | else:
609 | biwght = Biweight(self.value[data_sel], axis=axis)
610 | self.value = biwght.median
611 | self.error = biwght.spread
612 | if keepdims:
613 | self.value = np.expand_dims(self.value, axis=axis)
614 | self.error = np.expand_dims(self.error, axis=axis)
615 |
616 | # adjust the coordinates
617 | if keepdims:
618 | key = self.coord_name(axis)
619 | if self.coords[axis][0] == 0:
620 | dims = [0]
621 | else:
622 | dims = np.median(self.coords[axis], keepdims=keepdims)
623 | self.coords = self.coord_replace(key, dims)
624 | else:
625 | keys = []
626 | dims = []
627 | for ii in range(self.value.ndim + 1):
628 | if ii != axis:
629 | keys.append(self.coord_name(ii))
630 | dims.append(self.coords[ii][:])
631 | coords_namedtuple = NamedTuple("Coords", keys)
632 | self.coords = coords_namedtuple._make(dims)
633 |
634 | return self
635 |
636 | def nanmedian(
637 | self: S5Pmsm,
638 | data_sel: tuple[slice | int] | None = None,
639 | axis: int = 0,
640 | keepdims: bool = False,
641 | ) -> S5Pmsm:
642 | r"""Reduce this S5Pmsm data by applying median along some dimension.
643 |
644 | Parameters
645 | ----------
646 | data_sel : numpy slice, optional
647 | A numpy slice generated for example `numpy.s\_`.
648 | Can be used to skip the first and/or last frame
649 | axis : int, default=0
650 | Axis or axes along which the medians are computed.
651 | keepdims : bool, default=False
652 | If this is set to True, the axes which are reduced are left in the
653 | result as dimensions with size one. With this option, the result
654 | will broadcast correctly against the original arr.
655 |
656 | Returns
657 | -------
658 | S5Pmsm object with its data (value & error) replaced by its nanmedian
659 | and standard deviation along one axis.
660 | The coordinates are adjusted, accordingly.
661 |
662 | """
663 | if data_sel is None:
664 | if self.error is not None:
665 | self.error = np.nanmedian(self.error, axis=axis, keepdims=keepdims)
666 | else:
667 | self.error = np.nanstd(self.value, ddof=1, axis=axis, keepdims=keepdims)
668 | self.value = np.nanmedian(self.value, axis=axis, keepdims=keepdims)
669 | else:
670 | if self.error is not None:
671 | self.error = np.nanmedian(
672 | self.error[data_sel], axis=axis, keepdims=keepdims
673 | )
674 | else:
675 | self.error = np.nanstd(
676 | self.value[data_sel], ddof=1, axis=axis, keepdims=keepdims
677 | )
678 | self.value = np.nanmedian(
679 | self.value[data_sel], axis=axis, keepdims=keepdims
680 | )
681 |
682 | # adjust the coordinates
683 | if keepdims:
684 | key = self.coord_name(axis)
685 | if self.coords[axis][0] == 0:
686 | dims = [0]
687 | else:
688 | dims = np.median(self.coords[axis], keepdims=keepdims)
689 | self.coords = self.coord_replace(key, dims)
690 | else:
691 | keys = []
692 | dims = []
693 | for ii in range(self.value.ndim + 1):
694 | if ii != axis:
695 | keys.append(self.coord_name(ii))
696 | dims.append(self.coords[ii][:])
697 | coords_namedtuple = NamedTuple("Coords", keys)
698 | self.coords = coords_namedtuple._make(dims)
699 |
700 | return self
701 |
702 | def nanmean(
703 | self: S5Pmsm,
704 | data_sel: tuple[slice | int] | None = None,
705 | axis: int = 0,
706 | keepdims: bool = False,
707 | ) -> S5Pmsm:
708 | r"""Reduce this S5Pmsm data by applying mean along some dimension.
709 |
710 | Parameters
711 | ----------
712 | data_sel : numpy slice, optional
713 | A numpy slice generated for example `numpy.s\_`.
714 | Can be used to skip the first and/or last frame
715 | axis : int, default=0
716 | Axis or axes along which the mean are computed.
717 | keepdims : bool, default=False
718 | If this is set to True, the axes which are reduced are left in the
719 | result as dimensions with size one. With this option, the result
720 | will broadcast correctly against the original arr.
721 |
722 | Returns
723 | -------
724 | S5Pmsm object with its data (value & error) replaced by its nanmean
725 | and standard deviation along one axis.
726 | The coordinates are adjusted, accordingly.
727 |
728 | """
729 | if data_sel is None:
730 | if self.error is not None:
731 | self.error = np.nanmean(self.error, axis=axis, keepdims=keepdims)
732 | else:
733 | self.error = np.nanstd(self.value, ddof=1, axis=axis, keepdims=keepdims)
734 | self.value = np.nanmean(self.value, axis=axis, keepdims=keepdims)
735 | else:
736 | if self.error is not None:
737 | self.error = np.nanmean(
738 | self.error[data_sel], axis=axis, keepdims=keepdims
739 | )
740 | else:
741 | self.error = np.nanstd(
742 | self.value[data_sel], ddof=1, axis=axis, keepdims=keepdims
743 | )
744 | self.value = np.nanmean(self.value[data_sel], axis=axis, keepdims=keepdims)
745 |
746 | # adjust the coordinates
747 | if keepdims:
748 | key = self.coord_name(axis)
749 | if self.coords[axis][0] == 0:
750 | dims = [0]
751 | else:
752 | dims = np.mean(self.coords[axis], keepdims=keepdims)
753 | self.coords = self.coord_replace(key, dims)
754 | else:
755 | keys = []
756 | dims = []
757 | for ii in range(self.value.ndim + 1):
758 | if ii != axis:
759 | keys.append(self.coord_name(ii))
760 | dims.append(self.coords[ii][:])
761 | coords_namedtuple = NamedTuple("Coords", keys)
762 | self.coords = coords_namedtuple._make(dims)
763 |
764 | return self
765 |
766 | def transpose(self: S5Pmsm) -> S5Pmsm:
767 | """Transpose data and coordinates of an S5Pmsm object."""
768 | if self.value.ndim <= 1:
769 | return self
770 |
771 | if self.error is not None:
772 | self.error = np.transpose(self.error)
773 | self.value = np.transpose(self.value)
774 |
775 | keys = []
776 | dims = []
777 | for ii in range(self.value.ndim):
778 | keys.append(self.coord_name(ii))
779 | dims.append(self.coords[ii][:])
780 | tmp = keys[1]
781 | keys[1] = keys[0]
782 | keys[0] = tmp
783 | tmp = dims[1]
784 | dims[1] = dims[0]
785 | dims[0] = tmp
786 | coords_namedtuple = NamedTuple("Coords", keys)
787 | self.coords = coords_namedtuple._make(dims)
788 |
789 | return self
790 |
--------------------------------------------------------------------------------
/src/pys5p/swir_region.py:
--------------------------------------------------------------------------------
1 | # This file is part of pyS5p
2 | #
3 | # https://github.com/rmvanhees/pys5p.git
4 | #
5 | # Copyright (c) 2017-2025 SRON
6 | # All Rights Reserved
7 | #
8 | # License: BSD-3-Clause
9 | """Return the usable area on the SWIR detector.
10 |
11 | There are two definitions::
12 |
13 | 'illuminated':
14 | Detector area illuminated by external sources, defined as
15 | a rectangular area where the signal is at least 50% of the
16 | maximum signal. Coordinates: rows [11:228], columns [16:991].
17 |
18 | 'level2':
19 | A smaller area used in official SWIR level 1B (ir)radiance
20 | products. Coordinates: rows [12:227], columns [20:980].
21 |
22 | Notes
23 | -----
24 | Row 257 of the SWIR detector is neglected.
25 |
26 | """
27 |
28 | __all__ = ["coords", "mask"]
29 |
30 | import numpy as np
31 |
32 |
33 | def coords(mode: str = "illuminated", band: str = "78") -> tuple[slice, slice]:
34 | """Return slice defining the illuminated region on the SWIR detector.
35 |
36 | Parameters
37 | ----------
38 | mode : {'illuminated', 'level2'}, optional
39 | default is 'illuminated'
40 | band : str, optional
41 | select band 7 or 8, default is both bands
42 |
43 | """
44 | if mode == "level2":
45 | if band == "7":
46 | return np.s_[12:227, 20:500]
47 | if band == "8":
48 | return np.s_[12:227, :480]
49 | # else
50 | return np.s_[12:227, 20:980]
51 |
52 | if band == "7":
53 | return np.s_[11:228, 16:500]
54 | if band == "8":
55 | return np.s_[11:228, :491]
56 | # else
57 | return np.s_[11:228, 16:991]
58 |
59 |
60 | def mask(mode: str = "illuminated", band: str = "78") -> np.ndarray:
61 | """Return mask of the illuminated region.
62 |
63 | Parameters
64 | ----------
65 | mode : {'illuminated', 'level2'}, optional
66 | default is 'illuminated'
67 | band : str, optional
68 | select band 7 or 8, default is both bands
69 |
70 | Notes
71 | -----
72 | Pixels within the illuminated region are set to True.
73 |
74 | """
75 | if band in ("7", "8"):
76 | res = np.full((256, 500), False)
77 | else:
78 | res = np.full((256, 1000), False)
79 |
80 | res[coords(mode, band)] = True
81 |
82 | return res
83 |
--------------------------------------------------------------------------------
/src/pys5p/swir_texp.py:
--------------------------------------------------------------------------------
1 | # This file is part of pyS5p
2 | #
3 | # https://github.com/rmvanhees/pys5p.git
4 | #
5 | # Copyright (c) 2017-2025 SRON
6 | # All Rights Reserved
7 | #
8 | # License: BSD-3-Clause
9 | """Calculate the Tropomi SWIR exposure time from detector settings."""
10 |
11 |
12 | def swir_exp_time(int_delay: int, int_hold: int) -> float:
13 | """Calculate the correct SWIR exposure time from detector settings.
14 |
15 | Parameters
16 | ----------
17 | int_delay : int
18 | parameters int_delay from the instrument_settings
19 | int_hold : int
20 | parameters int_holt from the instrument_settings
21 |
22 | Returns
23 | -------
24 | float
25 | exact (SWIR) pixel exposure time
26 |
27 | """
28 | return 1.25e-6 * (65540 - int_delay + int_hold)
29 |
--------------------------------------------------------------------------------
/src/pys5p/version.py:
--------------------------------------------------------------------------------
1 | # This file is part of pyS5p
2 | #
3 | # https://github.com/rmvanhees/pys5p.git
4 | #
5 | # Copyright (c) 2017-2025 SRON
6 | # All Rights Reserved
7 | #
8 | # License: BSD-3-Clause
9 | """Provide access to the software version as obtained from git."""
10 |
11 | __all__ = ["pys5p_version"]
12 |
13 | from . import __version__
14 |
15 |
16 | def pys5p_version(full: bool = False, githash: bool = False) -> str:
17 | """Return the software version as obtained from git.
18 |
19 | Examples
20 | --------
21 | Show the software version of the module pys5p::
22 |
23 | > from pys5p.version import pys5p_version
24 | > pys5p_version()
25 | '2.1.5'
26 |
27 | """
28 | if full:
29 | return __version__
30 |
31 | if githash:
32 | return __version__.split("+g")[1].split(".")[0]
33 |
34 | return __version__.split("+")[0]
35 |
--------------------------------------------------------------------------------