├── .gitignore ├── .idea ├── .gitignore ├── inspectionProfiles │ ├── Project_Default.xml │ └── profiles_settings.xml ├── misc.xml ├── modules.xml ├── pys5p.iml ├── vcs.xml └── workspace.xml ├── .readthedocs.yaml ├── CITATION.cff ├── ChangeLog.md ├── INSTALL.md ├── LICENSE ├── MANIFEST.in ├── README.md ├── ToDo.md ├── docs ├── Makefile ├── build.rst ├── conf.py ├── index.rst ├── make.bat ├── modules.rst ├── pys5p.lib.rst ├── pys5p.rst └── quick.rst ├── examples ├── unit_test_s5p_ckd.py └── unit_test_s5p_lv2.py ├── pyproject.toml ├── requiments.txt └── src └── pys5p ├── __init__.py ├── ckd_io.py ├── error_propagation.py ├── get_data_dir.py ├── icm_io.py ├── l1b_io.py ├── l1b_patch.py ├── lib └── __init__.py ├── lv2_io.py ├── ocm_io.py ├── rls.py ├── s5p_msm.py ├── swir_region.py ├── swir_texp.py └── version.py /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | 3 | # Compiled python modules. 4 | *.pyc 5 | 6 | # Setuptools distribution folder. 7 | /dist/ 8 | /build/ 9 | /pilots/ 10 | 11 | # Python egg metadata, regenerated from source files by setuptools. 12 | .eggs 13 | *.egg-info 14 | 15 | venv/ 16 | -------------------------------------------------------------------------------- /.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | /shelf/ 3 | /workspace.xml 4 | -------------------------------------------------------------------------------- /.idea/inspectionProfiles/Project_Default.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 24 | -------------------------------------------------------------------------------- /.idea/inspectionProfiles/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 7 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/pys5p.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 14 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /.idea/workspace.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 17 | 18 | 20 | 21 | 23 | 24 | 25 | 32 | 33 | 36 | 37 | 38 | 39 | 40 | 41 | 44 | 53 | 54 | 55 | 56 | 57 | 1680180825934 58 | 62 | 63 | 64 | 65 | 67 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | build: 4 | os: "ubuntu-22.04" 5 | tools: 6 | python: "3.10" 7 | 8 | python: 9 | # Install our python package before building the docs 10 | install: 11 | - method: pip 12 | path: . 13 | -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | cff-version: 1.2.0 2 | message: "If you use this software, please cite it as below." 3 | title: "pyS5p: a Python interface to S5p Tropomi products" 4 | authors: 5 | - family-names: "van Hees" 6 | given-names: "Richard" 7 | orcid: "https://orcid.org/0000-0002-3846-0753" 8 | license: BSD-3-Clause 9 | license-url: "https://github.com/rmvanhees/pys5p/LICENSE" 10 | repository-code: "https://github.com/rmvanhees/pys5p" 11 | doi: 10.5281/zenodo.5665827 12 | type: software 13 | url: "https://github.com/rmvanhees/pys5p" 14 | -------------------------------------------------------------------------------- /ChangeLog.md: -------------------------------------------------------------------------------- 1 | version 1.0.6 2 | ============= 3 | 4 | * s5p_plot.py [draw_qhist]: added grid-lines and denisty parameter, improved axis labels 5 | * Rearranged source tree to comply with PIP 517, 518 (requires: setuptools 42 or later) 6 | * Renamed lib.sw_version.py to version.py 7 | * Removed all test-modules and examples because these are obsolete. Will be replaced by up-to-date code in future commits 8 | * Fixed pylint warnings 9 | * Updated Copyright line in all modules 10 | * Updated files INSTALL and README 11 | * Added ToDo, which contains a listing of new functionality to be implemented before a new minor release 12 | * Added ChangeLog 13 | -------------------------------------------------------------------------------- /INSTALL.md: -------------------------------------------------------------------------------- 1 | # Installing pys5p 2 | 3 | ## Wheels 4 | I you have an existing Python (v3.8+) installation, pys5p can be installed 5 | via pip from PyPI: 6 | 7 | pip install pys5p [--user] 8 | 9 | 10 | ## Python Distributions 11 | If you use a Python Distribution, the installation of pyS5p can be done on 12 | the command line via: 13 | 14 | conda install pys5p 15 | 16 | for [Anaconda](https://www.anaconda.com/)/[MiniConda](http://conda.pydata.org/miniconda.html). 17 | 18 | 19 | ## Install from source 20 | The latest release of pys5p is available from 21 | [gitHub](https://github.com/rmvanhees/pys5p). 22 | Where you can download the source code as a tar-file or zipped archive. 23 | Or you can use git do download the repository: 24 | 25 | git clone https://github.com/rmvanhees/pys5p.git 26 | 27 | Before you can install pys5p, you need: 28 | 29 | * Python version 3.8+ with development headers 30 | * HDF5, installed with development headers 31 | * netCDF4, installed with development headers 32 | 33 | And have the following Python modules available: 34 | 35 | * numpy v1.19+ 36 | * h5py v3.5+ 37 | * netCDF4 v1.5+ 38 | * xarray v0.20+ 39 | 40 | The software is known to work using: 41 | 42 | * HDF5 v1.8.21, netCDF4 v4.7.3 and python-netCDF4 v1.5+ 43 | * HDF5 v1.10+, netCDF4 v4.7.3 or v4.8+ and python-netCDF4 v1.5+ 44 | * HDF5 v1.12+, netCDF4 v4.8+ and python-netCDF4 v1.5+ 45 | 46 | You can install pys5p once you have satisfied the requirements listed above. 47 | Run at the top of the source tree: 48 | 49 | python3 -m build 50 | pip3 install dist/pys5p-.whl [--user] 51 | 52 | The Python scripts can be found under `/usr/local/bin` or `$USER/.local/bin`. 53 | 54 | 55 | ## Known Issues 56 | * You may need to use the environment variable SETUPTOOLS\_SCM\_PRETEND\_VERSION 57 | if your source tree is not a git clone. 58 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2016-2020 SRON - Netherlands Institute for Space Research 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | * Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | * Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | prune docs/_build 2 | prune pilots 3 | prune .DS_Store 4 | prune .idea 5 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # pyS5p 2 | [![Package Release](https://img.shields.io/pypi/v/pys5p.svg?label=version)](https://pypi.org/project/pys5p/) 3 | [![Package Status](https://img.shields.io/pypi/status/pys5p.svg?label=status)](https://pypi.org/project/pys5p/) 4 | [![PyPI Downloads](https://img.shields.io/pypi/dm/pys5p.svg?label=PyPI%20downloads)](https://github.com/rmvanhees/pys5p/) 5 | [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5665827.svg)](https://doi.org/10.5281/zenodo.5665827) 6 | 7 | pyS5p provides a Python interface to S5p Tropomi Level-1B (and 2) products. 8 | 9 | For more information on the Sentinel 5 precursor mission visit: 10 | 11 | * https://earth.esa.int/web/guest/missions/esa-future-missions/sentinel-5P 12 | * http://www.tropomi.eu 13 | 14 | For more information on the Tropomi Level-1B products visit: 15 | 16 | * http://www.tropomi.eu/documents/level-0-1b-products 17 | 18 | ## Documentation 19 | Online documentation is available from [Read the Docs](https://pys5p.readthedocs.io). 20 | 21 | ## Installation 22 | The module pys5p requires Python3.8+ and Python modules: h5py, netCDF4, numpy and xarray. 23 | 24 | Installation instructions are provided on [Read the Docs](https://pys5p.readthedocs.io/en/latest/build.html) or in the INSTALL file. 25 | 26 | ## Note 27 | Most of the plotting related S/W has been moved from pyS5p (v2.1+) to [moniplot](https://pypi.org/project/moniplot). 28 | Removed are th following modules: 29 | * module biweight.py - contains a Python implementation of the Tukey's biweight algorithm. 30 | * module tol_colors.py - definition of colour schemes for lines and maps that also work for colour-blind 31 | people by [Paul Tol](https://personal.sron.nl/~pault/). 32 | * module s5p_plot.py - the class S5Pplot is rewritten and now available as MONplot in the module mon_plot.py. 33 | -------------------------------------------------------------------------------- /ToDo.md: -------------------------------------------------------------------------------- 1 | ToDo before release v1.1.0 2 | ========================== 3 | 4 | Add examples 5 | ------------ 6 | * Create examples directory 7 | * Add example code snippets to illustrate typical usage of pys5p modules 8 | * Add README 9 | * [TBD] The example code will probably require Tropomi data sets, which are too 10 | large to distribute with the code. How to solve this? 11 | 12 | Test Driven Development 13 | ----------------------- 14 | * Introduce TDD with Python using Python’s built-in unittest module 15 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/build.rst: -------------------------------------------------------------------------------- 1 | .. _install: 2 | 3 | Installation 4 | ============ 5 | 6 | Wheels 7 | ------ 8 | 9 | It is highly recommended that you use a pre-built wheel of `pys5p` from PyPI. 10 | 11 | If you have an existing Python (3.8+) installation (e.g. a python.org download, 12 | or one that comes with your OS), then on Windows, MacOS/OSX, and Linux on 13 | Intel computers, pre-built `pys5p` wheels can be installed via pip 14 | from PyPI:: 15 | 16 | pip install [--user] pys5p 17 | 18 | OS-Specific remarks 19 | ------------------- 20 | 21 | On a Debian Bullseye or Ubuntu 22.04 installation, 22 | we have successfully installed `pys5p` as follows:: 23 | 24 | sudo apt install python3-numpy python3-scipy 25 | sudo apt install python3-h5py python3-netCDF4 26 | pip install --user pys5p 27 | 28 | This will also install a working version of the package xarray. 29 | 30 | .. important:: 31 | The version of xarray which comes with the Debian package 32 | `python3-xarray` is too old, and will not work with `pys5p`. 33 | 34 | Building from source 35 | -------------------- 36 | 37 | The latest release of `pys5p` is available from 38 | `gitHub `_. 39 | You can obtain the source code using:: 40 | 41 | git clone https://github.com/rmvanhees/pys5p.git 42 | 43 | We develop the code using `Python `_ 3.10 using the 44 | latest stable release of the libraries 45 | `HDF5 `_ and 46 | `netCDF4 `_, 47 | and Python packages: 48 | `numpy `_, `h5py `_, 49 | `netCDF4-python `_ 50 | and `xarray `_. 51 | 52 | To compile the code you need the Python packages: setuptools, setuptools-scm 53 | and wheels. Then you can install `pys5p` as follows:: 54 | 55 | python3 -m build 56 | pip3 install dist/pys5p-.whl [--user] 57 | 58 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 6 | """The Sphinx configuration file for the package pyS5p.""" 7 | 8 | import os 9 | import sys 10 | from importlib import metadata 11 | 12 | sys.path.insert(0, os.path.abspath('..')) 13 | 14 | 15 | # -- Project information ----------------------------------------------------- 16 | 17 | project = 'pys5p' 18 | copyright = '2022, SRON' 19 | author = 'Richard van Hees' 20 | 21 | # The full version, including alpha/beta/rc tags 22 | release = metadata.version('pys5p').split('+')[0] 23 | 24 | 25 | # -- General configuration --------------------------------------------------- 26 | 27 | # Add any Sphinx extension module names here, as strings. They can be 28 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 29 | # ones. 30 | extensions = [ 31 | 'sphinx.ext.autodoc', 32 | 'sphinx.ext.viewcode', 33 | 'sphinx.ext.napoleon' 34 | ] 35 | 36 | # Add any paths that contain templates here, relative to this directory. 37 | templates_path = ['_templates'] 38 | 39 | # List of patterns, relative to source directory, that match files and 40 | # directories to ignore when looking for source files. 41 | # This pattern also affects html_static_path and html_extra_path. 42 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] 43 | 44 | 45 | # -- Options for HTML output ------------------------------------------------- 46 | 47 | # The theme to use for HTML and HTML Help pages. See the documentation for 48 | # a list of builtin themes. 49 | # 50 | html_theme = 'sphinx_rtd_theme' 51 | 52 | # Add any paths that contain custom static files (such as style sheets) here, 53 | # relative to this directory. They are copied after the builtin static files, 54 | # so a file named "default.css" will overwrite the builtin "default.css". 55 | html_static_path = ['_static'] 56 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. pys5p documentation master file, created by 2 | sphinx-quickstart on Fri Sep 30 11:35:47 2022. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Package pyS5p User Manual 7 | ========================== 8 | 9 | pyS5p provides a Python interface to S5p Tropomi Level-1B (and 2) products. 10 | 11 | For more information on the Sentinel 5 precursor mission visit: 12 | 13 | * https://earth.esa.int/web/guest/missions/esa-future-missions/sentinel-5P 14 | * http://www.tropomi.eu 15 | 16 | For more information on the Tropomi Level-1B products visit: 17 | 18 | * http://www.tropomi.eu/documents/level-0-1b-products 19 | 20 | 21 | Quick-start 22 | ----------- 23 | 24 | .. toctree:: 25 | :maxdepth: 1 26 | 27 | quick 28 | build 29 | 30 | 31 | Module Documentation 32 | -------------------- 33 | 34 | .. toctree:: 35 | :maxdepth: 2 36 | 37 | modules 38 | 39 | 40 | Indices and tables 41 | ================== 42 | 43 | * :ref:`genindex` 44 | * :ref:`modindex` 45 | * :ref:`search` 46 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/modules.rst: -------------------------------------------------------------------------------- 1 | src 2 | === 3 | 4 | .. toctree:: 5 | :maxdepth: 4 6 | 7 | pys5p 8 | -------------------------------------------------------------------------------- /docs/pys5p.lib.rst: -------------------------------------------------------------------------------- 1 | pys5p.lib package 2 | ================= 3 | 4 | Module contents 5 | --------------- 6 | 7 | .. automodule:: pys5p.lib 8 | :members: 9 | :undoc-members: 10 | :show-inheritance: 11 | -------------------------------------------------------------------------------- /docs/pys5p.rst: -------------------------------------------------------------------------------- 1 | pys5p package 2 | ============= 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | :maxdepth: 4 9 | 10 | pys5p.lib 11 | 12 | Submodules 13 | ---------- 14 | 15 | pys5p.ckd\_io module 16 | -------------------- 17 | 18 | .. automodule:: pys5p.ckd_io 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | pys5p.error\_propagation module 24 | ------------------------------- 25 | 26 | .. automodule:: pys5p.error_propagation 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | pys5p.get\_data\_dir module 32 | --------------------------- 33 | 34 | .. automodule:: pys5p.get_data_dir 35 | :members: 36 | :undoc-members: 37 | :show-inheritance: 38 | 39 | pys5p.icm\_io module 40 | -------------------- 41 | 42 | .. automodule:: pys5p.icm_io 43 | :members: 44 | :undoc-members: 45 | :show-inheritance: 46 | 47 | pys5p.l1b\_io module 48 | -------------------- 49 | 50 | .. automodule:: pys5p.l1b_io 51 | :members: 52 | :undoc-members: 53 | :show-inheritance: 54 | 55 | pys5p.l1b\_patch module 56 | ----------------------- 57 | 58 | .. automodule:: pys5p.l1b_patch 59 | :members: 60 | :undoc-members: 61 | :show-inheritance: 62 | 63 | pys5p.lv2\_io module 64 | -------------------- 65 | 66 | .. automodule:: pys5p.lv2_io 67 | :members: 68 | :undoc-members: 69 | :show-inheritance: 70 | 71 | pys5p.ocm\_io module 72 | -------------------- 73 | 74 | .. automodule:: pys5p.ocm_io 75 | :members: 76 | :undoc-members: 77 | :show-inheritance: 78 | 79 | pys5p.rls module 80 | ---------------- 81 | 82 | .. automodule:: pys5p.rls 83 | :members: 84 | :undoc-members: 85 | :show-inheritance: 86 | 87 | pys5p.s5p\_msm module 88 | --------------------- 89 | 90 | .. automodule:: pys5p.s5p_msm 91 | :members: 92 | :undoc-members: 93 | :show-inheritance: 94 | 95 | pys5p.swir\_region module 96 | ------------------------- 97 | 98 | .. automodule:: pys5p.swir_region 99 | :members: 100 | :undoc-members: 101 | :show-inheritance: 102 | 103 | pys5p.swir\_texp module 104 | ----------------------- 105 | 106 | .. automodule:: pys5p.swir_texp 107 | :members: 108 | :undoc-members: 109 | :show-inheritance: 110 | 111 | pys5p.version module 112 | -------------------- 113 | 114 | .. automodule:: pys5p.version 115 | :members: 116 | :undoc-members: 117 | :show-inheritance: 118 | 119 | Module contents 120 | --------------- 121 | 122 | .. automodule:: pys5p 123 | :members: 124 | :undoc-members: 125 | :show-inheritance: 126 | -------------------------------------------------------------------------------- /docs/quick.rst: -------------------------------------------------------------------------------- 1 | .. _quick: 2 | 3 | Quick Start Guide 4 | ================= 5 | 6 | Install 7 | ------- 8 | 9 | If there are wheels for your platform (mac, linux, windows on x86), 10 | you can install ``pys5p`` via pip:: 11 | 12 | pip install [--user] pys5p 13 | 14 | Or with `Anaconda `_ or 15 | `Miniconda `_:: 16 | 17 | conda install pys5p 18 | 19 | To install `pys5p` from source see :ref:`install`. 20 | 21 | 22 | Core concepts 23 | ------------- 24 | 25 | ... 26 | -------------------------------------------------------------------------------- /examples/unit_test_s5p_ckd.py: -------------------------------------------------------------------------------- 1 | # This file is part of pyS5p 2 | # 3 | # https://github.com/rmvanhees/pys5p.git 4 | # 5 | # Copyright (c) 2017-2022 SRON - Netherlands Institute for Space Research 6 | # All Rights Reserved 7 | # 8 | # License: BSD-3-Clause 9 | """Perform a unit test on class CKDio.""" 10 | 11 | import argparse 12 | from pathlib import Path 13 | 14 | from pys5p.ckd_io import CKDio 15 | 16 | 17 | def main(): 18 | """Perform unit-tests on class CKDio (xarray version).""" 19 | parser = argparse.ArgumentParser( 20 | description=f"{Path(__file__).name}: run units-test on class CKDio" 21 | ) 22 | parser.add_argument( 23 | "ckd_dir", 24 | nargs=1, 25 | type=str, 26 | default=None, 27 | help=("directory with CKD data with" " static CKD in a subdirectory static"), 28 | ) 29 | args = parser.parse_args() 30 | 31 | with CKDio(args.ckd_dir[0], ckd_version=1) as ckd: 32 | print(ckd.ckd_file) 33 | for meth in dir(ckd): 34 | if ( 35 | meth.startswith("_") 36 | or meth.startswith("ckd") 37 | or meth in ("close", "fid", "get_param") 38 | ): 39 | continue 40 | print( 41 | "-------------------------", meth, "[v1]", "-------------------------" 42 | ) 43 | print(meth, getattr(ckd, meth)()) 44 | 45 | with CKDio(args.ckd_dir[0], ckd_version=2) as ckd: 46 | print(ckd.ckd_file) 47 | for meth in dir(ckd): 48 | if ( 49 | meth.startswith("_") 50 | or meth.startswith("ckd") 51 | or meth in ("close", "fid", "get_param") 52 | ): 53 | continue 54 | print( 55 | "-------------------------", meth, "[v2]", "-------------------------" 56 | ) 57 | print(meth, getattr(ckd, meth)()) 58 | 59 | 60 | # - main code -------------------------------------- 61 | if __name__ == "__main__": 62 | main() 63 | -------------------------------------------------------------------------------- /examples/unit_test_s5p_lv2.py: -------------------------------------------------------------------------------- 1 | # This file is part of pyS5p 2 | # 3 | # https://github.com/rmvanhees/pys5p.git 4 | # 5 | # Copyright (c) 2017-2022 SRON - Netherlands Institute for Space Research 6 | # All Rights Reserved 7 | # 8 | # License: BSD-3-Clause 9 | """Perform a unit test on class LV2io.""" 10 | 11 | import argparse 12 | from pathlib import Path 13 | 14 | import numpy as np 15 | from pys5p.lv2_io import LV2io 16 | 17 | 18 | def read_lv2(l2_product): 19 | """Read Tropomi level 2 product.""" 20 | with LV2io(l2_product) as lv2: 21 | # Class properties 22 | print("science_product: ", lv2.science_product) 23 | print("orbit: ", lv2.orbit) 24 | print("algorithm_version: ", lv2.algorithm_version) 25 | print("processor_version: ", lv2.processor_version) 26 | print("product_version: ", lv2.product_version) 27 | if not lv2.science_product: 28 | print("coverage_time: ", lv2.coverage_time) 29 | print("creation_time: ", lv2.creation_time) 30 | # Attributes 31 | print("get_attr: ", lv2.get_attr("title")) 32 | ds_name = "h2o_column" if lv2.science_product else "methane_mixing_ratio" 33 | print("get_attr: ", lv2.get_attr("long_name", ds_name)) 34 | 35 | # Time information 36 | print("ref_time: ", lv2.ref_time) 37 | print("get_time: ", lv2.get_time()) 38 | # Geolocation 39 | geo_var = "latitude_center" if lv2.science_product else "latitude" 40 | print("get_geo_data: ", lv2.get_geo_data()[geo_var].shape) 41 | # Footprints 42 | geo_var = "latitude" 43 | print("get_geo_bounds: ", lv2.get_geo_bounds()[geo_var].shape) 44 | print( 45 | "get_geo_bounds: ", 46 | lv2.get_geo_bounds(data_sel=np.s_[250:300, 100:110])[geo_var].shape, 47 | ) 48 | # Datasets (numpy) 49 | ds_name = "h2o_column" if lv2.science_product else "methane_mixing_ratio" 50 | print("get_dataset: ", lv2.get_dataset(ds_name).shape) 51 | print( 52 | "get_dataset: ", 53 | lv2.get_dataset(ds_name, data_sel=np.s_[250:300, 100:110]).shape, 54 | ) 55 | # Datasets (xarray) 56 | ds_name = "h2o_column" if lv2.science_product else "methane_mixing_ratio" 57 | print("get_data_as_xds: ", lv2.get_data_as_xds(ds_name)) 58 | 59 | 60 | def main(): 61 | """Perform unit testing on SWIR Level-2 products.""" 62 | # parse command-line parameters 63 | parser = argparse.ArgumentParser( 64 | description=f"{Path(__file__).name}: run units-test on class LV2io" 65 | ) 66 | parser.add_argument( 67 | "lv2_product", 68 | nargs=1, 69 | type=str, 70 | default=None, 71 | help="use this Tropomi level2 product", 72 | ) 73 | args = parser.parse_args() 74 | print(args) 75 | 76 | read_lv2(args.lv2_product[0]) 77 | 78 | 79 | # - main code -------------------------------------- 80 | if __name__ == "__main__": 81 | main() 82 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | # pyproject.toml 2 | [build-system] 3 | requires = [ 4 | "hatchling", 5 | "versioningit", 6 | ] 7 | build-backend = "hatchling.build" 8 | 9 | [project] 10 | name = "pys5p" 11 | description = "Software package to access Sentinel-5p Tropomi data products" 12 | readme = "README.md" 13 | license = "BSD-3-Clause" 14 | authors = [ 15 | {name = "Richard van Hees", email = "r.m.van.hees@sron.nl"}, 16 | {name = "Paul Tol", email = "P.J.J.Tol@sron.nl"} 17 | ] 18 | requires-python = ">=3.10" 19 | classifiers = [ 20 | "Development Status :: 5 - Production/Stable", 21 | "Intended Audience :: Developers", 22 | "Intended Audience :: Science/Research", 23 | "Operating System :: OS Independent", 24 | "Programming Language :: Python :: 3 :: Only", 25 | "Programming Language :: Python :: 3.10", 26 | "Programming Language :: Python :: 3.11", 27 | "Programming Language :: Python :: 3.12", 28 | "Programming Language :: Python :: 3.13", 29 | "Topic :: Scientific/Engineering :: Atmospheric Science", 30 | ] 31 | keywords = [ 32 | "Sentinel-5p", 33 | "Tropomi", 34 | ] 35 | dynamic = [ 36 | "version" 37 | ] 38 | dependencies = [ 39 | "h5py>=3.11", 40 | "moniplot>=1.0", 41 | "numpy>=1.26", 42 | "netCDF4>=1.7", 43 | "xarray>=2023.2", 44 | ] 45 | 46 | [project.scripts] 47 | 48 | [project.urls] 49 | homepage = "https://github.com/rmvanhees/pys5p" 50 | documentation = "https://pys5p.readthedocs.io/en/latest/" 51 | # Source = "https://github.com/rmvanhees/pys5p" 52 | # Changelog = "https://github.com/rmvanhees/pys5p/HISTORY.rst" 53 | "Issue tracker" = "https://github.com/rmvanhees/pys5p/issues" 54 | 55 | [tool.hatch.version] 56 | source = "versioningit" 57 | 58 | [tool.versioningit] 59 | 60 | [tool.versioningit.next-version] 61 | method = "smallest" 62 | 63 | [tool.versioningit.format] 64 | distance = "{next_version}.dev{distance}+{vcs}{rev}" 65 | # Example formatted version: 1.2.4.dev42+ge174a1f 66 | 67 | dirty = "{base_version}+d{build_date:%Y%m%d}" 68 | # Example formatted version: 1.2.3+d20230922 69 | 70 | distance-dirty = "{next_version}.dev{distance}+{vcs}{rev}.d{build_date:%Y%m%d}" 71 | # Example formatted version: 1.2.4.dev42+ge174a1f.d20230922 72 | 73 | [tool.ruff] 74 | line-length = 88 75 | target-version = "py312" 76 | # exclude = ["pilots"] 77 | 78 | [tool.ruff.lint] 79 | select = [ 80 | "D", # pydocstyle 81 | "E", # pycodestyle 82 | "F", # pyflakes 83 | "I", # isort 84 | "N", # pep8-naming 85 | "W", # pycodestyle 86 | "ANN", # flake8-annotations 87 | "B", # flake8-bugbear 88 | "ISC", # flake8-implicit-str-concat 89 | "PGH", # flake8-pie 90 | "PYI", # flake8-pyi 91 | "Q", # flake8-quotes 92 | "SIM", # flake8-simplify 93 | "TID", # flake8-tidy-imports 94 | "TCH", # flake8-type-checking 95 | "NPY", # NumPy-specific 96 | "PERF", # Perflint 97 | "RUF", # Ruff Specific 98 | "UP", # pyupgrade 99 | ] 100 | ignore = ["D203", "D213", "ISC001"] 101 | 102 | [lint.pydocstyle] 103 | convention = "pep257" 104 | -------------------------------------------------------------------------------- /requiments.txt: -------------------------------------------------------------------------------- 1 | certifi==2023.7.22 2 | cftime==1.6.2 3 | contourpy==1.1.0 4 | cycler==0.11.0 5 | fonttools==4.42.1 6 | h5py==3.9.0 7 | kiwisolver==1.4.4 8 | matplotlib==3.7.2 9 | moniplot==0.5.11 10 | netCDF4==1.6.4 11 | numpy==1.25.2 12 | packaging==23.1 13 | pandas==2.0.3 14 | Pillow==10.0.0 15 | pyparsing==3.0.9 16 | python-dateutil==2.8.2 17 | pytz==2023.3 18 | setuptools-scm==7.1.0 19 | six==1.16.0 20 | typing_extensions==4.7.1 21 | tzdata==2023.3 22 | xarray==2023.8.0 23 | -------------------------------------------------------------------------------- /src/pys5p/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # This file is part of pys5p 3 | # 4 | # https://github.com/rmvanhees/pys5p.git 5 | # 6 | # Copyright (c) 2017-2025 SRON 7 | # All Rights Reserved 8 | # 9 | # License: BSD-3-Clause 10 | 11 | """SRON Python package `pys5p`. 12 | 13 | It contains software to read Sentinel-5p Tropomi ICM, L1B and L2 products. 14 | """ 15 | 16 | import contextlib 17 | from importlib.metadata import PackageNotFoundError, version 18 | 19 | with contextlib.suppress(PackageNotFoundError): 20 | __version__ = version(__name__) 21 | -------------------------------------------------------------------------------- /src/pys5p/ckd_io.py: -------------------------------------------------------------------------------- 1 | # 2 | # This file is part of pyS5p 3 | # 4 | # https://github.com/rmvanhees/pys5p.git 5 | # 6 | # Copyright (c) 2017-2025 SRON 7 | # All Rights Reserved 8 | # 9 | # License: BSD-3-Clause 10 | """`CKDio`, class to read S5p Tropomi CKD data.""" 11 | 12 | from __future__ import annotations 13 | 14 | __all__ = ["CKDio"] 15 | 16 | from pathlib import Path, PosixPath 17 | from typing import Self 18 | 19 | import h5py 20 | import numpy as np 21 | import xarray as xr 22 | from moniplot.image_to_xarray import h5_to_xr 23 | 24 | 25 | # - local functions ------------------------------ 26 | def reject_row257(xarr: xr.DataArray | xr.Dataset) -> xr.DataArray | xr.Dataset: 27 | """Remove row 257 from DataArray or Dataset.""" 28 | return xarr.isel(row=np.s_[0:256]) 29 | 30 | 31 | # - class definition ------------------------------- 32 | class CKDio: 33 | """Read Tropomi CKD from the Static CKD product or from dynamic CKD products. 34 | 35 | Parameters 36 | ---------- 37 | ckd_dir : Path, default=Path('/nfs/Tropomi/share/ckd') 38 | Directory where the CKD files are stored 39 | ckd_version : int, default=1 40 | Version of the CKD 41 | ckd_file : str, optional 42 | Name of the CKD file, default=None then the CKD file is searched 43 | in the directory ckd_dir with ckd_version in the glob-string 44 | 45 | Notes 46 | ----- 47 | Not all CKD are defined or derived for all bands. 48 | You can request a CKD for one band or for a channel (bands: '12', '34', 49 | '56', '78'). Do not mix bands from different channels 50 | 51 | The option to have dynamic CKD is not used for the Tropomi mission, only 52 | for S/W version 1 a dynamic CKD product is defined. This product contained 53 | the OCAL CKD and was not updated automatically. For version 2, all CKD are 54 | stored in one product, where some CKD have a time-axis to correct any 55 | in-flight degradation. 56 | 57 | Therefore, the logic to find a CKD is implemented as follows: 58 | 59 | 1) ckd_dir, defines the base directory to search for the CKD products 60 | (see below). 61 | 2) ckd_file, defines the full path to (static) CKD product; 62 | (version 1) any product with dynamic CKD has to be in the same 63 | directory. 64 | 65 | Version 1: 66 | 67 | * Static CKD are stored in one file: glob('*_AUX_L1_CKD_*') 68 | * Dynamic CKD are stored in two files: 69 | 70 | - UVN, use glob('*_ICM_CKDUVN_*') 71 | - SWIR, use glob('*_ICM_CKDSIR_*') 72 | 73 | Version 2+: 74 | 75 | * All CKD in one file: glob('*_AUX_L1_CKD_*') 76 | * Dynamic CKD are empty 77 | 78 | """ 79 | 80 | def __init__( 81 | self: CKDio, 82 | ckd_dir: Path | None = None, 83 | ckd_version: int = 1, 84 | ckd_file: Path | None = None, 85 | ) -> None: 86 | """Create CKDio object.""" 87 | if ckd_dir is None: 88 | ckd_dir = Path("/nfs/Tropomi/share/ckd") 89 | self.ckd_version = max(1, ckd_version) 90 | self.ckd_dyn_file = None 91 | 92 | # define path to CKD product 93 | if ckd_file is None: 94 | if not ckd_dir.is_dir(): 95 | raise FileNotFoundError(f"Not found CKD directory: {ckd_dir.name}") 96 | self.ckd_dir = ckd_dir 97 | glob_str = f"*_AUX_L1_CKD_*_*_00000_{self.ckd_version:02d}_*_*.h5" 98 | if (self.ckd_dir / "static").is_dir(): 99 | res = sorted((self.ckd_dir / "static").glob(glob_str)) 100 | else: 101 | res = sorted(self.ckd_dir.glob(glob_str)) 102 | if not res: 103 | raise FileNotFoundError("Static CKD product not found") 104 | self.ckd_file = res[-1] 105 | else: 106 | if not ckd_file.is_file(): 107 | raise FileNotFoundError(f"Not found CKD file: {ckd_file.name}") 108 | self.ckd_dir = ckd_file.parent 109 | self.ckd_file = ckd_file 110 | 111 | # obtain path to dynamic CKD product (version 1, only) 112 | if self.ckd_version == 1: 113 | if (self.ckd_dir / "dynamic").is_dir(): 114 | res = sorted((self.ckd_dir / "dynamic").glob("*_ICM_CKDSIR_*")) 115 | else: 116 | res = sorted(self.ckd_dir.glob("*_ICM_CKDSIR_*")) 117 | if res: 118 | self.ckd_dyn_file = res[-1] 119 | 120 | # open access to CKD product 121 | self.fid = h5py.File(self.ckd_file, "r") 122 | 123 | def __enter__(self: CKDio) -> Self: 124 | """Initiate the context manager.""" 125 | return self 126 | 127 | def __exit__(self: CKDio, *args: object) -> bool: 128 | """Exit the context manager.""" 129 | self.close() 130 | return False # any exception is raised by the with statement. 131 | 132 | def close(self: CKDio) -> None: 133 | """Make sure that we close all resources.""" 134 | if self.fid is not None: 135 | self.fid.close() 136 | 137 | def creation_time(self: CKDio) -> str: 138 | """Return datetime when the L1b product was created.""" 139 | if self.ckd_version == 2: 140 | attr = self.fid["METADATA"].attrs["production_datetime"] 141 | else: 142 | group = PosixPath( 143 | "METADATA", "earth_explorer_header", "fixed_header", "source" 144 | ) 145 | attr = self.fid[str(group)].attrs["Creator_Date"][0] 146 | 147 | if isinstance(attr, bytes): 148 | attr = attr.decode("ascii") 149 | return attr 150 | 151 | def creator_version(self: CKDio) -> str: 152 | """Return version of Tropomi L01B processor.""" 153 | group = PosixPath("METADATA", "earth_explorer_header", "fixed_header") 154 | attr = self.fid[str(group)].attrs["File_Version"] 155 | if self.ckd_version == 1: 156 | attr = attr[0] 157 | if isinstance(attr, bytes): 158 | attr = attr.decode("ascii") 159 | return attr 160 | 161 | @staticmethod 162 | def __get_spectral_channel(bands: str) -> str: 163 | """Check bands is valid: single band or belong to one channel. 164 | 165 | Parameters 166 | ---------- 167 | bands : str 168 | Tropomi bands [1..8] or channels ['12', '34', '56', '78'], 169 | 170 | """ 171 | band2channel = [ 172 | "UNKNOWN", 173 | "UV", 174 | "UV", 175 | "VIS", 176 | "VIS", 177 | "NIR", 178 | "NIR", 179 | "SWIR", 180 | "SWIR", 181 | ] 182 | 183 | if 0 < len(bands) > 2: 184 | raise ValueError("read per band or channel, only") 185 | 186 | if ( 187 | len(bands) == 2 188 | and band2channel[int(bands[0])] != band2channel[int(bands[1])] 189 | ): 190 | raise ValueError("bands should be of the same channel") 191 | 192 | return band2channel[int(bands[0])] 193 | 194 | def get_param(self, ds_name: str, band: str = "7") -> np.ndarray | float: 195 | """Return value(s) of a CKD parameter from the Static CKD product. 196 | 197 | Parameters 198 | ---------- 199 | ds_name : str 200 | Name of the HDF5 dataset, default='pixel_full_well' 201 | band : str, default='7' 202 | Band identifier '1', '2', ..., '8' 203 | 204 | Returns 205 | ------- 206 | numpy.ndarray or scalar 207 | CKD parameter value 208 | 209 | Notes 210 | ----- 211 | Datasets of size=1 are return as scalar 212 | 213 | Handy function for scalar HDF5 datasets, such as: 214 | 215 | - dc_reference_temp 216 | - dpqf_threshold 217 | - pixel_full_well 218 | - pixel_fw_flag_thresh 219 | 220 | """ 221 | if not 1 <= int(band) <= 8: 222 | raise ValueError("band must be between and 1 and 8") 223 | 224 | if ds_name not in self.fid[f"/BAND{band}"]: 225 | raise ValueError("dataset not available") 226 | 227 | return self.fid[f"/BAND{band}/{ds_name}"][()] 228 | 229 | # ---------- band or channel CKD's ---------- 230 | def dn2v_factors(self: CKDio) -> np.ndarray: 231 | """Return digital number to Volt CKD, SWIR only. 232 | 233 | Notes 234 | ----- 235 | The DN2V factor has no error attached to it. 236 | 237 | """ 238 | return np.concatenate( 239 | ( 240 | self.fid["/BAND7/dn2v_factor_swir"][2:], 241 | self.fid["/BAND8/dn2v_factor_swir"][2:], 242 | ) 243 | ) 244 | 245 | def v2c_factors(self: CKDio) -> np.ndarray: 246 | """Return Voltage to Charge CKD, SWIR only. 247 | 248 | Notes 249 | ----- 250 | The V2C factor has no error attached to it. 251 | 252 | """ 253 | # pylint: disable=no-member 254 | return np.concatenate( 255 | ( 256 | self.fid["/BAND7/v2c_factor_swir"].fields("value")[2:], 257 | self.fid["/BAND8/v2c_factor_swir"].fields("value")[2:], 258 | ) 259 | ) 260 | 261 | # ---------- spectral-channel CKD's ---------- 262 | def __rd_dataset(self: CKDio, dset_name: str, bands: str) -> xr.Dataset | None: 263 | """General function to read non-compound dataset into xarray::Dataset. 264 | 265 | Parameters 266 | ---------- 267 | dset_name: str 268 | name (including path) of the dataset as '/BAND{}/' 269 | bands : str 270 | Tropomi bands [1..8] or channels ['12', '34', '56', '78'], 271 | 272 | Returns 273 | ------- 274 | xarray.Dataset 275 | parameters of CKD with name 'dset_name' 276 | 277 | """ 278 | ckd_val = None 279 | for band in bands: 280 | # try Static-CKD product 281 | if dset_name.format(band) in self.fid: 282 | if ckd_val is None: 283 | ckd_val = h5_to_xr(self.fid[dset_name.format(band)]) 284 | else: 285 | ckd_val = xr.concat( 286 | (ckd_val, h5_to_xr(self.fid[dset_name.format(band)])), 287 | dim="column", 288 | ) 289 | # try Dynamic-CKD product 290 | else: 291 | dyn_fid = h5py.File(self.ckd_dyn_file, "r") 292 | if dset_name.format(band) in dyn_fid: 293 | if ckd_val is None: 294 | ckd_val = h5_to_xr(dyn_fid[dset_name.format(band)]) 295 | else: 296 | ckd_val = xr.concat( 297 | (ckd_val, h5_to_xr(dyn_fid[dset_name.format(band)])), 298 | dim="column", 299 | ) 300 | dyn_fid.close() 301 | 302 | if ckd_val is None: 303 | return None 304 | 305 | # Use NaN as FillValue 306 | ckd_val = ckd_val.where(ckd_val != float.fromhex("0x1.ep+122"), other=np.nan) 307 | 308 | # combine DataArrays to Dataset 309 | return xr.Dataset({"value": ckd_val}, attrs=ckd_val.attrs) 310 | 311 | def __rd_datapoints(self: CKDio, dset_name: str, bands: str) -> xr.Dataset | None: 312 | """General function to read datapoint dataset into xarray::Dataset. 313 | 314 | Parameters 315 | ---------- 316 | dset_name: str 317 | name (including path) of the dataset as '/BAND{}/' 318 | bands : str 319 | Tropomi bands [1..8] or channels ['12', '34', '56', '78'], 320 | default: '78' 321 | 322 | Returns 323 | ------- 324 | xarray.Dataset 325 | parameters (value and uncertainty) of CKD with name 'dset_name' 326 | 327 | """ 328 | ckd_val = None 329 | ckd_err = None 330 | for band in bands: 331 | # try Static-CKD product 332 | if dset_name.format(band) in self.fid: 333 | if ckd_val is None: 334 | ckd_val = h5_to_xr(self.fid[dset_name.format(band)], field="value") 335 | ckd_err = h5_to_xr(self.fid[dset_name.format(band)], field="error") 336 | else: 337 | ckd_val = xr.concat( 338 | ( 339 | ckd_val, 340 | h5_to_xr(self.fid[dset_name.format(band)], field="value"), 341 | ), 342 | dim="column", 343 | ) 344 | ckd_err = xr.concat( 345 | ( 346 | ckd_err, 347 | h5_to_xr(self.fid[dset_name.format(band)], field="error"), 348 | ), 349 | dim="column", 350 | ) 351 | # try Dynamic-CKD product 352 | else: 353 | dyn_fid = h5py.File(self.ckd_dyn_file, "r") 354 | if dset_name.format(band) in dyn_fid: 355 | if ckd_val is None: 356 | ckd_val = h5_to_xr( 357 | dyn_fid[dset_name.format(band)], field="value" 358 | ) 359 | ckd_err = h5_to_xr( 360 | dyn_fid[dset_name.format(band)], field="error" 361 | ) 362 | else: 363 | ckd_val = xr.concat( 364 | ( 365 | ckd_val, 366 | h5_to_xr( 367 | dyn_fid[dset_name.format(band)], field="value" 368 | ), 369 | ), 370 | dim="column", 371 | ) 372 | ckd_err = xr.concat( 373 | ( 374 | ckd_err, 375 | h5_to_xr( 376 | dyn_fid[dset_name.format(band)], field="error" 377 | ), 378 | ), 379 | dim="column", 380 | ) 381 | dyn_fid.close() 382 | 383 | if ckd_val is None: 384 | return None 385 | 386 | # Use NaN as FillValue 387 | ckd_val = ckd_val.where(ckd_val != float.fromhex("0x1.ep+122"), other=np.nan) 388 | ckd_err = ckd_err.where(ckd_err != float.fromhex("0x1.ep+122"), other=np.nan) 389 | 390 | # combine DataArrays to Dataset 391 | return xr.Dataset({"value": ckd_val, "error": ckd_err}, attrs=ckd_val.attrs) 392 | 393 | # ---------- static CKD's ---------- 394 | def absirr(self: CKDio, qvd: int = 1, bands: str = "78") -> xr.Dataset: 395 | """Return absolute irradiance responsivity. 396 | 397 | Parameters 398 | ---------- 399 | qvd : int, default: 1 400 | Tropomi QVD identifier. Valid values are 1 or 2 401 | bands : str, default: '78' 402 | Tropomi bands [1..8] or channels ['12', '34', '56', '78'] 403 | 404 | """ 405 | try: 406 | channel = self.__get_spectral_channel(bands) 407 | except Exception as exc: 408 | raise RuntimeError(exc) from exc 409 | 410 | dset_name = "/BAND{}" + f"/abs_irr_conv_factor_qvd{qvd}" 411 | ckd = self.__rd_datapoints(dset_name, bands) 412 | if "7" in bands or "8" in bands: 413 | ckd = reject_row257(ckd) 414 | ckd.attrs["long_name"] = f"{channel} absolute irradiance CKD (QVD={qvd})" 415 | 416 | return ckd.assign_coords(column=np.arange(ckd.column.size, dtype="u4")) 417 | 418 | def absrad(self: CKDio, bands: str = "78") -> xr.Dataset: 419 | """Return absolute radiance responsivity. 420 | 421 | Parameters 422 | ---------- 423 | bands : str, default: '78' 424 | Tropomi bands [1..8] or channels ['12', '34', '56', '78'] 425 | 426 | """ 427 | try: 428 | channel = self.__get_spectral_channel(bands) 429 | except Exception as exc: 430 | raise RuntimeError(exc) from exc 431 | 432 | dset_name = "/BAND{}/abs_rad_conv_factor" 433 | ckd = self.__rd_datapoints(dset_name, bands) 434 | if "7" in bands or "8" in bands: 435 | ckd = reject_row257(ckd) 436 | ckd.attrs["long_name"] = f"{channel} absolute radiance CKD" 437 | 438 | return ckd.assign_coords(column=np.arange(ckd.column.size, dtype="u4")) 439 | 440 | def memory(self: CKDio) -> xr.Dataset: 441 | """Return memory CKD, SWIR only.""" 442 | column = None 443 | ckd_parms = [ 444 | "mem_lin_neg_swir", 445 | "mem_lin_pos_swir", 446 | "mem_qua_neg_swir", 447 | "mem_qua_pos_swir", 448 | ] 449 | 450 | ckd = xr.Dataset() 451 | ckd.attrs["long_name"] = "SWIR memory CKD" 452 | for key in ckd_parms: 453 | dset_name = f"/BAND7/{key}" 454 | ckd_val = h5_to_xr(self.fid[dset_name], field="value") 455 | ckd_err = h5_to_xr(self.fid[dset_name], field="error") 456 | dset_name = f"/BAND8/{key}" 457 | ckd_val = xr.concat( 458 | (ckd_val, h5_to_xr(self.fid[dset_name], field="value")), dim="column" 459 | ) 460 | if column is None: 461 | column = np.arange(ckd_val.column.size, dtype="u4") 462 | ckd_val = ckd_val.assign_coords(column=column) 463 | ckd_err = xr.concat( 464 | (ckd_err, h5_to_xr(self.fid[dset_name], field="error")), dim="column" 465 | ) 466 | ckd_err = ckd_err.assign_coords(column=column) 467 | ckd[key.replace("swir", "value")] = reject_row257(ckd_val) 468 | ckd[key.replace("swir", "error")] = reject_row257(ckd_err) 469 | 470 | return ckd 471 | 472 | def noise(self: CKDio, bands: str = "78") -> xr.Dataset: 473 | """Return readout-noise CKD, SWIR only. 474 | 475 | Parameters 476 | ---------- 477 | bands : str, default: '78' 478 | Tropomi bands [1..8] or channels ['12', '34', '56', '78'] 479 | 480 | """ 481 | dset_name = "/BAND{}/readout_noise_swir" 482 | ckd = reject_row257(self.__rd_dataset(dset_name, bands)) 483 | ckd.attrs["long_name"] = "SWIR readout-noise CKD" 484 | 485 | return ckd.assign_coords(column=np.arange(ckd.column.size, dtype="u4")) 486 | 487 | def prnu(self: CKDio, bands: str = "78") -> xr.Dataset: 488 | """Return Pixel Response Non-Uniformity (PRNU). 489 | 490 | Parameters 491 | ---------- 492 | bands : str, default: '78' 493 | Tropomi bands [1..8] or channels ['12', '34', '56', '78'] 494 | 495 | """ 496 | try: 497 | channel = self.__get_spectral_channel(bands) 498 | except Exception as exc: 499 | raise RuntimeError(exc) from exc 500 | 501 | ckd = self.__rd_datapoints("/BAND{}/PRNU", bands) 502 | if "7" in bands or "8" in bands: 503 | ckd = reject_row257(ckd) 504 | ckd.attrs["long_name"] = f"{channel} PRNU CKD" 505 | 506 | return ckd.assign_coords(column=np.arange(ckd.column.size, dtype="u4")) 507 | 508 | def relirr(self: CKDio, qvd: int = 1, bands: str = "78") -> tuple[dict] | None: 509 | """Return relative irradiance correction. 510 | 511 | Parameters 512 | ---------- 513 | bands : str, default: '78' 514 | Tropomi bands [1..8] or channels ['12', '34', '56', '78'] 515 | qvd : int 516 | Tropomi QVD identifier. Valid values are 1 or 2, default: 1 517 | 518 | Returns 519 | ------- 520 | dict 521 | CKD for relative irradiance correction as dictionaries with keys: 522 | 523 | - band: Tropomi spectral band ID 524 | - mapping_cols: coarse irregular mapping of the columns 525 | - mapping_rows: coarse irregular mapping of the rows 526 | - cheb_coefs: Chebyshev parameters for elevation and azimuth \ 527 | for pixels on a coarse irregular grid 528 | 529 | """ 530 | try: 531 | _ = self.__get_spectral_channel(bands) 532 | except Exception as exc: 533 | raise RuntimeError(exc) from exc 534 | 535 | res = () 536 | for band in bands: 537 | ckd = {"band": int(band)} 538 | 539 | dsname = f"/BAND{band}/rel_irr_coarse_mapping_vert" 540 | ckd["mapping_rows"] = self.fid[dsname][:].astype(int) 541 | 542 | dsname = f"/BAND{band}/rel_irr_coarse_mapping_hor" 543 | # pylint: disable=no-member 544 | mapping_hor = self.fid[dsname][:].astype(int) 545 | mapping_hor[mapping_hor > 1000] -= 2**16 546 | ckd["mapping_cols"] = mapping_hor 547 | 548 | dsname = f"/BAND{band}/rel_irr_coarse_func_cheb_qvd{qvd}" 549 | ckd["cheb_coefs"] = self.fid[dsname]["coefs"][:] 550 | res += (ckd,) 551 | 552 | return res if res else None 553 | 554 | def saa(self: CKDio) -> dict: 555 | """Return definition of the SAA region.""" 556 | return {"lat": self.fid["saa_latitude"][:], "lon": self.fid["saa_longitude"][:]} 557 | 558 | def wavelength(self: CKDio, bands: str = "78") -> xr.Dataset: 559 | """Return wavelength CKD. 560 | 561 | Parameters 562 | ---------- 563 | bands : str, default: '78' 564 | Tropomi bands [1..8] or channels ['12', '34', '56', '78'] 565 | 566 | Notes 567 | ----- 568 | The wavelength CKD has no error attached to it. 569 | 570 | """ 571 | try: 572 | channel = self.__get_spectral_channel(bands) 573 | except Exception as exc: 574 | raise RuntimeError(exc) from exc 575 | 576 | dset_name = "/BAND{}/wavelength_map" 577 | ckd = self.__rd_datapoints(dset_name, bands) 578 | if "7" in bands or "8" in bands: 579 | ckd = reject_row257(ckd) 580 | ckd.attrs["long_name"] = f"{channel} wavelength CKD" 581 | 582 | return ckd.assign_coords(column=np.arange(ckd.column.size, dtype="u4")) 583 | 584 | # ---------- static or dynamic CKD's ---------- 585 | def darkflux(self: CKDio, bands: str = "78") -> xr.Dataset: 586 | """Return dark-flux CKD, SWIR only. 587 | 588 | Parameters 589 | ---------- 590 | bands : str, default: '78' 591 | Tropomi SWIR bands '7', '8' or both '78' 592 | 593 | """ 594 | dset_name = "/BAND{}/long_term_swir" 595 | ckd = reject_row257(self.__rd_datapoints(dset_name, bands)) 596 | ckd.attrs["long_name"] = "SWIR dark-flux CKD" 597 | 598 | return ckd.assign_coords(column=np.arange(ckd.column.size, dtype="u4")) 599 | 600 | def offset(self: CKDio, bands: str = "78") -> xr.Dataset: 601 | """Return offset CKD, SWIR only. 602 | 603 | Parameters 604 | ---------- 605 | bands : str, default: '78' 606 | Tropomi SWIR bands '7', '8' or both '78' 607 | 608 | """ 609 | dset_name = "/BAND{}/analog_offset_swir" 610 | ckd = reject_row257(self.__rd_datapoints(dset_name, bands)) 611 | ckd.attrs["long_name"] = "SWIR offset CKD" 612 | 613 | return ckd.assign_coords(column=np.arange(ckd.column.size, dtype="u4")) 614 | 615 | def pixel_quality(self: CKDio, bands: str = "78") -> xr.Dataset: 616 | """Return detector pixel-quality mask (float [0, 1]), SWIR only. 617 | 618 | Parameters 619 | ---------- 620 | bands : str, default: '78' 621 | Tropomi SWIR bands '7', '8' or both '78' 622 | 623 | """ 624 | dset_name = "/BAND{}/dpqf_map" 625 | ckd = reject_row257(self.__rd_dataset(dset_name, bands)) 626 | ckd.attrs["long_name"] = "SWIR pixel-quality CKD" 627 | 628 | return ckd.assign_coords(column=np.arange(ckd.column.size, dtype="u4")) 629 | 630 | def dpqf( 631 | self: CKDio, threshold: float | None = None, bands: str = "78" 632 | ) -> xr.Dataset: 633 | """Return detector pixel-quality flags (boolean), SWIR only. 634 | 635 | Parameters 636 | ---------- 637 | threshold: float, optional 638 | Value between [0..1], default is to read the threshold from CKD 639 | bands : str, default='78' 640 | Tropomi SWIR bands '7', '8', or both '78' 641 | 642 | Returns 643 | ------- 644 | numpy ndarray 645 | 646 | """ 647 | dpqf = None 648 | if threshold is None: 649 | threshold = self.fid["/BAND7/dpqf_threshold"][:] 650 | 651 | # try Static-CKD product 652 | if "/BAND7/dpqf_map" in self.fid: 653 | if bands == "7": 654 | dpqf = self.fid["/BAND7/dpqf_map"][:-1, :] < threshold 655 | elif bands == "8": 656 | dpqf = self.fid["/BAND8/dpqf_map"][:-1, :] < threshold 657 | elif bands == "78": 658 | dpqf_b7 = self.fid["/BAND7/dpqf_map"][:-1, :] 659 | dpqf_b8 = self.fid["/BAND8/dpqf_map"][:-1, :] 660 | dpqf = np.hstack((dpqf_b7, dpqf_b8)) < threshold 661 | else: 662 | # try Dynamic-CKD product 663 | with h5py.File(self.ckd_dyn_file, "r") as fid: 664 | if bands == "7": 665 | dpqf = fid["/BAND7/dpqf_map"][:-1, :] < threshold 666 | elif bands == "8": 667 | dpqf = fid["/BAND8/dpqf_map"][:-1, :] < threshold 668 | elif bands == "78": 669 | dpqf_b7 = fid["/BAND7/dpqf_map"][:-1, :] 670 | dpqf_b8 = fid["/BAND8/dpqf_map"][:-1, :] 671 | dpqf = np.hstack((dpqf_b7, dpqf_b8)) < threshold 672 | 673 | return dpqf 674 | 675 | def saturation(self: CKDio) -> xr.Dataset: 676 | """Return pixel-saturation values (pre-offset), SWIR only.""" 677 | dset_name = "/BAND{}/saturation_preoffset" 678 | ckd_file = self.ckd_dir / "OCAL" / "ckd.saturation_preoffset.detector4.nc" 679 | with h5py.File(ckd_file, "r") as fid: 680 | ckd_val = xr.concat( 681 | ( 682 | h5_to_xr(fid[dset_name.format(7)]), 683 | h5_to_xr(fid[dset_name.format(8)]), 684 | ), 685 | dim="column", 686 | ) 687 | 688 | ckd = xr.Dataset({"value": ckd_val}, attrs=ckd_val.attrs) 689 | ckd = reject_row257(ckd) 690 | ckd.attrs["long_name"] = "SWIR pixel-saturation CKD (pre-offset)" 691 | 692 | return ckd.assign_coords(column=np.arange(ckd.column.size, dtype="u4")) 693 | -------------------------------------------------------------------------------- /src/pys5p/error_propagation.py: -------------------------------------------------------------------------------- 1 | # 2 | # This file is part of pyS5p 3 | # 4 | # https://github.com/rmvanhees/pys5p.git 5 | # 6 | # Copyright (c) 2017-2025 SRON 7 | # All Rights Reserved 8 | # 9 | # License: BSD-3-Clause 10 | """Routines to divide or add (partial) uncertainties.""" 11 | 12 | from __future__ import annotations 13 | 14 | __all__ = ["unc_div", "unc_sum"] 15 | 16 | import numpy as np 17 | 18 | 19 | def unc_div( 20 | value_a: np.ndarray, sigma_a: np.ndarray, value_b: np.ndarray, sigma_b: np.ndarray 21 | ) -> np.ndarray: 22 | r"""Absolute error for parameter `a` divided by `b`. 23 | 24 | .. math:: (a / b) * \sqrt{(\sigma_a / a)^2 + (\sigma_b / b)^2} 25 | """ 26 | if not value_a.shape == value_b.shape == sigma_a.shape == sigma_b.shape: 27 | raise TypeError("dimensions of input arrays are not the same") 28 | 29 | mask = ( 30 | np.isfinite(value_a) 31 | & np.isfinite(sigma_a) 32 | & np.isfinite(value_b) 33 | & np.isfinite(sigma_b) 34 | ) 35 | 36 | if np.sum(mask) == sigma_a.size: 37 | return (value_a / value_b) * np.sqrt( 38 | (sigma_a / value_a) ** 2 + (sigma_b / value_b) ** 2 39 | ) 40 | 41 | res = np.full(sigma_a.shape, np.nan) 42 | res[mask] = (value_a[mask] / value_b[mask]) * np.sqrt( 43 | (sigma_a[mask] / value_a[mask]) ** 2 + (sigma_b[mask] / value_b[mask]) ** 2 44 | ) 45 | return res 46 | 47 | 48 | def unc_sum(sigma_a: np.ndarray, sigma_b: np.ndarray) -> np.ndarray: 49 | r"""Absolute error for the sum of the parameters `a` and `b`. 50 | 51 | .. math:: \sqrt{\sigma_a^2 + \sigma_b^2} 52 | """ 53 | if sigma_a.shape != sigma_b.shape: 54 | raise TypeError("dimensions of sigma_a and sigma are not the same") 55 | 56 | mask = np.isfinite(sigma_a) & np.isfinite(sigma_b) 57 | 58 | if np.sum(mask) == sigma_a.size: 59 | return np.sqrt(sigma_a**2 + sigma_b**2) 60 | 61 | res = np.full(sigma_a.shape, np.nan) 62 | res[mask] = np.sqrt(sigma_a[mask] ** 2 + sigma_b[mask] ** 2) 63 | 64 | return res 65 | -------------------------------------------------------------------------------- /src/pys5p/get_data_dir.py: -------------------------------------------------------------------------------- 1 | # 2 | # This file is part of pyS5p 3 | # 4 | # https://github.com/rmvanhees/pys5p.git 5 | # 6 | # Copyright (c) 2017-2025 SRON 7 | # All Rights Reserved 8 | # 9 | # License: BSD-3-Clause 10 | """Routine `get_data_dir` to discover test-datasets on your system.""" 11 | 12 | from __future__ import annotations 13 | 14 | from os import environ 15 | from pathlib import Path 16 | 17 | 18 | def get_data_dir() -> str: 19 | """Obtain directory with test datasets. 20 | 21 | Limited to UNIX/Linux/macOS operating systems 22 | 23 | This module checks if the following directories are available: 24 | - /data/$USER/pys5p-data 25 | - /Users/$USER/pys5p-data 26 | - environment variable PYS5P_DATA_DIR 27 | 28 | It expects the data to be organized in the subdirectories: 29 | - CKD which should contain the SWIR dpqf CKD 30 | - OCM which should contain at least one directory of an on-ground 31 | calibration measurement with one or more OCAL LX products. 32 | - L1B which should contain at least one offline calibration, irradiance 33 | and radiance product. 34 | - ICM which contain at least one in-flight calibration product. 35 | """ 36 | try: 37 | user = environ["USER"] 38 | except KeyError: 39 | print("*** Fatal: environment variable USER not set") 40 | return None 41 | 42 | guesses_data_dir = [f"/data/{user}/pys5p-data", f"/Users/{user}/pys5p-data"] 43 | 44 | try: 45 | _ = environ["PYS5P_DATA_DIR"] 46 | except KeyError: 47 | pass 48 | else: 49 | guesses_data_dir.append(environ["PYS5P_DATA_DIR"]) 50 | 51 | for key in guesses_data_dir: 52 | if Path(key).is_dir(): 53 | return key 54 | 55 | raise FileNotFoundError 56 | -------------------------------------------------------------------------------- /src/pys5p/l1b_io.py: -------------------------------------------------------------------------------- 1 | # 2 | # This file is part of pyS5p 3 | # 4 | # https://github.com/rmvanhees/pys5p.git 5 | # 6 | # Copyright (c) 2017-2025 SRON 7 | # All Rights Reserved 8 | # 9 | # License: BSD-3-Clause 10 | """`L1Bio`, class to access Tropomi L1B products.""" 11 | 12 | from __future__ import annotations 13 | 14 | __all__ = ["L1Bio", "L1BioENG", "L1BioIRR", "L1BioRAD"] 15 | 16 | from datetime import datetime, timedelta 17 | from pathlib import Path, PurePosixPath 18 | from typing import TYPE_CHECKING, Self 19 | 20 | import h5py 21 | import numpy as np 22 | from moniplot.biweight import Biweight 23 | from setuptools_scm import get_version 24 | 25 | from .swir_texp import swir_exp_time 26 | 27 | if TYPE_CHECKING: 28 | from collections.abc import Iterable 29 | 30 | # - global parameters ------------------------------ 31 | 32 | 33 | # - local functions -------------------------------- 34 | def pad_rows(arr1: np.ndarray, arr2: np.ndarray) -> tuple[np.ndarray, np.ndarray]: 35 | """Pad the array with the least numer of rows with NaN's.""" 36 | if arr2.ndim == 1: 37 | pass 38 | elif arr2.ndim == 2: 39 | if arr1.shape[0] < arr2.shape[0]: 40 | buff = arr1.copy() 41 | arr1 = np.full(arr2.shape, np.nan, dtype=arr2.dtype) 42 | arr1[0 : buff.shape[0], :] = buff 43 | elif arr1.shape[0] > arr2.shape[0]: 44 | buff = arr2.copy() 45 | arr2 = np.full(arr1.shape, np.nan, dtype=arr2.dtype) 46 | arr2[0 : buff.shape[0], :] = buff 47 | else: 48 | if arr1.shape[1] < arr2.shape[1]: 49 | buff = arr1.copy() 50 | arr1 = np.full(arr2.shape, np.nan, dtype=arr2.dtype) 51 | arr1[:, 0 : buff.shape[1], :] = buff 52 | elif arr1.shape[1] > arr2.shape[1]: 53 | buff = arr2.copy() 54 | arr2 = np.full(arr1.shape, np.nan, dtype=arr2.dtype) 55 | arr2[:, 0 : buff.shape[1], :] = buff 56 | 57 | return arr1, arr2 58 | 59 | 60 | # - class definition ------------------------------- 61 | class L1Bio: 62 | """class with methods to access Tropomi L1B calibration products. 63 | 64 | The L1b calibration products are available for UVN (band 1-6) 65 | and SWIR (band 7-8). 66 | 67 | Parameters 68 | ---------- 69 | l1b_product : Path | str 70 | name of the Tropomi L1B product 71 | readwrite : bool, default=False 72 | open file in read/write mode 73 | verbose : bool, default=False 74 | be verbose 75 | 76 | """ 77 | 78 | band_groups = ("/BAND%_CALIBRATION", "/BAND%_IRRADIANCE", "/BAND%_RADIANCE") 79 | geo_dset = "satellite_latitude,satellite_longitude" 80 | msm_type = None 81 | 82 | def __init__( 83 | self: L1Bio, 84 | l1b_product: Path | str, 85 | readwrite: bool = False, 86 | verbose: bool = False, 87 | ) -> None: 88 | """Initialize access to a Tropomi offline L1b product.""" 89 | # open L1b product as HDF5 file 90 | l1b_product = Path(l1b_product) 91 | if not l1b_product.is_file(): 92 | raise FileNotFoundError(f"{l1b_product.name} does not exist") 93 | 94 | # initialize private class-attributes 95 | self.__rw = readwrite 96 | self.__verbose = verbose 97 | self.__msm_path = None 98 | self.__patched_msm = [] 99 | self.filename = l1b_product 100 | self.bands = "" 101 | 102 | if readwrite: 103 | self.fid = h5py.File(l1b_product, "r+") 104 | else: 105 | self.fid = h5py.File(l1b_product, "r") 106 | 107 | def __iter__(self: L1Bio) -> None: 108 | """Allow iteration.""" 109 | for attr in sorted(self.__dict__): 110 | if not attr.startswith("__"): 111 | yield attr 112 | 113 | def __enter__(self: L1Bio) -> Self: 114 | """Initiate the context manager.""" 115 | return self 116 | 117 | def __exit__(self: L1Bio, *args: object) -> bool: 118 | """Exit the context manager.""" 119 | self.close() 120 | return False # any exception is raised by the with statement. 121 | 122 | def close(self: L1Bio) -> None: 123 | """Close resources. 124 | 125 | Notes 126 | ----- 127 | Before closing the product, we make sure that the output product 128 | describes what has been altered by the S/W. To keep any change 129 | traceable. 130 | 131 | In case the L1b product is altered, the attributes listed below are 132 | added to the group: ``/METADATA/SRON_METADATA``: 133 | 134 | - dateStamp ('now') 135 | - Git-version of S/W 136 | - list of patched datasets 137 | - auxiliary datasets used by patch-routines 138 | 139 | """ 140 | if self.fid is None: 141 | return 142 | 143 | if self.__patched_msm: 144 | # pylint: disable=no-member 145 | sgrp = self.fid.require_group("/METADATA/SRON_METADATA") 146 | sgrp.attrs["dateStamp"] = datetime.utcnow().isoformat() 147 | sgrp.attrs["git_tag"] = get_version(root="..", relative_to=__file__) 148 | if "patched_datasets" not in sgrp: 149 | dtype = h5py.special_dtype(vlen=str) 150 | dset = sgrp.create_dataset( 151 | "patched_datasets", 152 | (len(self.__patched_msm),), 153 | maxshape=(None,), 154 | dtype=dtype, 155 | ) 156 | dset[:] = np.asarray(self.__patched_msm) 157 | else: 158 | dset = sgrp["patched_datasets"] 159 | dset.resize(dset.shape[0] + len(self.__patched_msm), axis=0) 160 | dset[dset.shape[0] - 1 :] = np.asarray(self.__patched_msm) 161 | 162 | self.fid.close() 163 | self.fid = None 164 | 165 | # ---------- PUBLIC FUNCTIONS ---------- 166 | def get_attr(self: L1Bio, attr_name: str) -> np.ndarray | None: 167 | """Obtain value of an HDF5 file attribute. 168 | 169 | Parameters 170 | ---------- 171 | attr_name : string 172 | Name of the attribute 173 | 174 | """ 175 | if attr_name not in self.fid.attrs: 176 | return None 177 | 178 | attr = self.fid.attrs[attr_name] 179 | if attr.shape is None: 180 | return None 181 | 182 | return attr 183 | 184 | def get_orbit(self: L1Bio) -> int | None: 185 | """Return absolute orbit number.""" 186 | res = self.get_attr("orbit") 187 | if res is None: 188 | return None 189 | 190 | return int(res) 191 | 192 | def get_processor_version(self: L1Bio) -> str | None: 193 | """Return version of the L01b processor.""" 194 | attr = self.get_attr("processor_version") 195 | if attr is None: 196 | return None 197 | 198 | # pylint: disable=no-member 199 | return attr.decode("ascii") 200 | 201 | def get_coverage_time(self: L1Bio) -> tuple[str, str] | None: 202 | """Return start and end of the measurement coverage time.""" 203 | attr_start = self.get_attr("time_coverage_start") 204 | if attr_start is None: 205 | return None 206 | 207 | attr_end = self.get_attr("time_coverage_end") 208 | if attr_end is None: 209 | return None 210 | 211 | # pylint: disable=no-member 212 | return (attr_start.decode("ascii"), attr_end.decode("ascii")) 213 | 214 | def get_creation_time(self: L1Bio) -> str | None: 215 | """Return datetime when the L1b product was created.""" 216 | grp = self.fid["/METADATA/ESA_METADATA/earth_explorer_header"] 217 | dset = grp["fixed_header/source"] 218 | if "Creation_Date" in self.fid.attrs: 219 | attr = dset.attrs["Creation_Date"] 220 | if isinstance(attr, bytes): 221 | return attr.decode("ascii") 222 | 223 | return attr 224 | 225 | return None 226 | 227 | def select(self: L1Bio, msm_type: str | None = None) -> str | None: 228 | """Select a calibration measurement as _. 229 | 230 | Parameters 231 | ---------- 232 | msm_type : string 233 | Name of calibration measurement group as _ 234 | 235 | Returns 236 | ------- 237 | str 238 | String with spectral bands found in product 239 | 240 | Updated object attributes: 241 | - bands : available spectral bands 242 | 243 | """ 244 | if msm_type is None: 245 | if self.msm_type is None: 246 | raise ValueError("parameter msm_type is not defined") 247 | msm_type = self.msm_type 248 | 249 | self.bands = "" 250 | for name in self.band_groups: 251 | for ii in "12345678": 252 | grp_path = PurePosixPath(name.replace("%", ii), msm_type) 253 | if str(grp_path) in self.fid: 254 | if self.__verbose: 255 | print("*** INFO: found: ", grp_path) 256 | self.bands += ii 257 | 258 | if self.bands: 259 | self.__msm_path = str(PurePosixPath(name, msm_type)) 260 | break 261 | 262 | return self.bands 263 | 264 | def sequence(self: L1Bio, band: str | None = None) -> np.ndarray | None: 265 | """Return sequence number, ICID and delta_time for each measurement. 266 | 267 | Parameters 268 | ---------- 269 | band : None or {'1', '2', '3', ..., '8'} 270 | Select one of the band present in the product 271 | Default is 'None' which returns the first available band 272 | 273 | Returns 274 | ------- 275 | numpy.ndarray 276 | Numpy rec-array with sequence number, ICID and delta-time 277 | 278 | """ 279 | if self.__msm_path is None: 280 | return None 281 | 282 | if band is None or len(band) > 1: 283 | band = self.bands[0] 284 | 285 | msm_path = self.__msm_path.replace("%", band) 286 | grp = self.fid[str(PurePosixPath(msm_path, "INSTRUMENT"))] 287 | 288 | icid_list = np.squeeze(grp["instrument_configuration"]["ic_id"]) 289 | master_cycle = grp["instrument_settings"]["master_cycle_period_us"][0] 290 | master_cycle /= 1000 291 | grp = self.fid[str(PurePosixPath(msm_path, "OBSERVATIONS"))] 292 | delta_time = np.squeeze(grp["delta_time"]) 293 | 294 | # define result as numpy array 295 | length = delta_time.size 296 | res = np.empty( 297 | (length,), 298 | dtype=[ 299 | ("sequence", "u2"), 300 | ("icid", "u2"), 301 | ("delta_time", "u4"), 302 | ("index", "u4"), 303 | ], 304 | ) 305 | res["sequence"] = [0] 306 | res["icid"] = icid_list 307 | res["delta_time"] = delta_time 308 | res["index"] = np.arange(length, dtype=np.uint32) 309 | if length == 1: 310 | return res 311 | 312 | # determine sequence number 313 | buff_icid = np.concatenate( 314 | ([icid_list[0] - 10], icid_list, [icid_list[-1] + 10]) 315 | ) 316 | dt_thres = 10 * master_cycle 317 | buff_time = np.concatenate( 318 | ( 319 | [delta_time[0] - 10 * dt_thres], 320 | delta_time, 321 | [delta_time[-1] + 10 * dt_thres], 322 | ) 323 | ) 324 | 325 | indx = ( 326 | ((buff_time[1:] - buff_time[0:-1]) > dt_thres) 327 | | ((buff_icid[1:] - buff_icid[0:-1]) != 0) 328 | ).nonzero()[0] 329 | for ii in range(len(indx) - 1): 330 | res["sequence"][indx[ii] : indx[ii + 1]] = ii 331 | 332 | return res 333 | 334 | def get_ref_time(self: L1Bio, band: str | None = None) -> datetime | None: 335 | """Return reference start time of measurements. 336 | 337 | Parameters 338 | ---------- 339 | band : None or {'1', '2', '3', ..., '8'} 340 | Select one of the band present in the product. 341 | Default is 'None' which returns the first available band 342 | 343 | """ 344 | if self.__msm_path is None: 345 | return None 346 | 347 | if band is None: 348 | band = self.bands[0] 349 | 350 | msm_path = self.__msm_path.replace("%", band) 351 | grp = self.fid[str(PurePosixPath(msm_path, "OBSERVATIONS"))] 352 | 353 | return datetime(2010, 1, 1, 0, 0, 0) + timedelta(seconds=int(grp["time"][0])) 354 | 355 | def get_delta_time(self: L1Bio, band: str | None = None) -> np.ndarray | None: 356 | """Return offset from the reference start time of measurement. 357 | 358 | Parameters 359 | ---------- 360 | band : None or {'1', '2', '3', ..., '8'} 361 | Select one of the band present in the product. 362 | Default is 'None' which returns the first available band 363 | 364 | """ 365 | if self.__msm_path is None: 366 | return None 367 | 368 | if band is None: 369 | band = self.bands[0] 370 | 371 | msm_path = self.__msm_path.replace("%", band) 372 | grp = self.fid[str(PurePosixPath(msm_path, "OBSERVATIONS"))] 373 | 374 | return grp["delta_time"][0, :].astype(int) 375 | 376 | def get_instrument_settings( 377 | self: L1Bio, band: str | None = None 378 | ) -> np.ndarray | None: 379 | """Return instrument settings of measurement. 380 | 381 | Parameters 382 | ---------- 383 | band : None or {'1', '2', '3', ..., '8'} 384 | Select one of the band present in the product. 385 | Default is 'None' which returns the first available band 386 | 387 | """ 388 | if self.__msm_path is None: 389 | return None 390 | 391 | if band is None: 392 | band = self.bands[0] 393 | 394 | msm_path = self.__msm_path.replace("%", band) 395 | # 396 | # Due to a bug in python module `h5py` (v2.6.0), it fails to read 397 | # the UVN instrument settings direct, with exception: 398 | # KeyError: 'Unable to open object (Component not found)'. 399 | # This is my workaround 400 | # 401 | grp = self.fid[str(PurePosixPath(msm_path, "INSTRUMENT"))] 402 | instr = np.empty( 403 | grp["instrument_settings"].shape, dtype=grp["instrument_settings"].dtype 404 | ) 405 | grp["instrument_settings"].read_direct(instr) 406 | # for name in grp['instrument_settings'].dtype.names: 407 | # instr[name][:] = grp['instrument_settings'][name] 408 | 409 | return instr 410 | 411 | def get_exposure_time(self: L1Bio, band: str | None = None) -> np.ndarray | None: 412 | """Return pixel exposure time of the measurements. 413 | 414 | The exposure time is calculated from the parameters `int_delay` and 415 | `int_hold` for SWIR. 416 | 417 | Parameters 418 | ---------- 419 | band : None or {'1', '2', '3', ..., '8'} 420 | Select one of the band present in the product 421 | Default is 'None' which returns the first available band 422 | 423 | """ 424 | if band is None: 425 | band = self.bands[0] 426 | 427 | instr_arr = self.get_instrument_settings(band) 428 | 429 | # calculate exact exposure time 430 | if int(band) < 7: 431 | return [instr["exposure_time"] for instr in instr_arr] 432 | 433 | return [ 434 | swir_exp_time(instr["int_delay"], instr["int_hold"]) for instr in instr_arr 435 | ] 436 | 437 | def get_housekeeping_data( 438 | self: L1Bio, band: str | None = None 439 | ) -> np.ndarray | None: 440 | """Return housekeeping data of measurements. 441 | 442 | Parameters 443 | ---------- 444 | band : None or {'1', '2', '3', ..., '8'} 445 | Select one of the band present in the product 446 | Default is 'None' which returns the first available band 447 | 448 | """ 449 | if self.__msm_path is None: 450 | return None 451 | 452 | if band is None: 453 | band = self.bands[0] 454 | 455 | msm_path = self.__msm_path.replace("%", band) 456 | grp = self.fid[str(PurePosixPath(msm_path, "INSTRUMENT"))] 457 | 458 | return np.squeeze(grp["housekeeping_data"]) 459 | 460 | def get_geo_data( 461 | self: L1Bio, band: str | None = None, geo_dset: str | None = None 462 | ) -> dict | None: 463 | """Return data of selected datasets from the GEODATA group. 464 | 465 | Parameters 466 | ---------- 467 | band : None or {'1', '2', '3', ..., '8'} 468 | Select one of the band present in the product 469 | Default is 'None' which returns the first available band 470 | geo_dset : str 471 | Name(s) of datasets in the GEODATA group, comma separated 472 | Default is 'satellite_latitude,satellite_longitude' 473 | 474 | Returns 475 | ------- 476 | dict 477 | data of selected datasets from the GEODATA group 478 | 479 | """ 480 | if self.__msm_path is None: 481 | return None 482 | 483 | if geo_dset is None: 484 | geo_dset = self.geo_dset 485 | 486 | if band is None: 487 | band = self.bands[0] 488 | 489 | msm_path = self.__msm_path.replace("%", band) 490 | grp = self.fid[str(PurePosixPath(msm_path, "GEODATA"))] 491 | 492 | res = {} 493 | for name in geo_dset.split(","): 494 | res[name] = grp[name][0, ...] 495 | 496 | return res 497 | 498 | def get_msm_attr( 499 | self: L1Bio, msm_dset: str, attr_name: str, band: str | None = None 500 | ) -> np.ndarray | float | None: 501 | """Return value attribute of measurement dataset "msm_dset". 502 | 503 | Parameters 504 | ---------- 505 | attr_name : str 506 | Name of the attribute 507 | msm_dset : str 508 | Name of measurement dataset 509 | band : None or {'1', '2', '3', ..., '8'} 510 | Select one of the band present in the product 511 | Default is 'None' which returns the first available band 512 | 513 | Returns 514 | ------- 515 | scalar or numpy.ndarray 516 | Value of attribute "attr_name" 517 | 518 | """ 519 | if self.__msm_path is None: 520 | return None 521 | 522 | if band is None: 523 | band = self.bands[0] 524 | 525 | msm_path = self.__msm_path.replace("%", band) 526 | ds_path = str(PurePosixPath(msm_path, "OBSERVATIONS", msm_dset)) 527 | if attr_name in self.fid[ds_path].attrs: 528 | attr = self.fid[ds_path].attrs[attr_name] 529 | if isinstance(attr, bytes): 530 | return attr.decode("ascii") 531 | 532 | return attr 533 | 534 | return None 535 | 536 | def get_msm_data( 537 | self: L1Bio, 538 | msm_dset: str, 539 | band: str | None = None, 540 | fill_as_nan: bool = False, 541 | msm_to_row: bool | None = None, 542 | ) -> np.ndarray | None: 543 | """Read data from dataset "msm_dset". 544 | 545 | Parameters 546 | ---------- 547 | msm_dset : str 548 | Name of measurement dataset. 549 | band : None or {'1', '2', '3', ..., '8'} 550 | Select one of the band present in the product. 551 | Default is 'None' which returns: 552 | 553 | Radiance 554 | one band 555 | 556 | Calibration, Irradiance 557 | both bands (Calibration, Irradiance) 558 | 559 | fill_as_nan : bool 560 | Set data values equal (KNMI) FillValue to NaN 561 | msm_to_row : bool 562 | Combine two bands using padding if necessary 563 | 564 | Returns 565 | ------- 566 | numpy.ndarray 567 | values read from or written to dataset "msm_dset" 568 | 569 | """ 570 | fillvalue = float.fromhex("0x1.ep+122") 571 | 572 | if self.__msm_path is None: 573 | return None 574 | 575 | if band is None: 576 | band = self.bands 577 | elif not isinstance(band, str): 578 | raise TypeError("band must be a string") 579 | elif band not in self.bands: 580 | raise ValueError("band not found in product") 581 | 582 | if len(band) == 2 and msm_to_row is None: 583 | msm_to_row = "padding" 584 | 585 | data = () 586 | for ii in band: 587 | msm_path = self.__msm_path.replace("%", ii) 588 | ds_path = str(PurePosixPath(msm_path, "OBSERVATIONS", msm_dset)) 589 | dset = self.fid[ds_path] 590 | 591 | if fill_as_nan and dset.attrs["_FillValue"] == fillvalue: 592 | buff = np.squeeze(dset) 593 | buff[(buff == fillvalue)] = np.nan 594 | data += (buff,) 595 | else: 596 | data += (np.squeeze(dset),) 597 | 598 | if len(band) == 1: 599 | return data[0] 600 | 601 | if msm_to_row == "padding": 602 | data = pad_rows(data[0], data[1]) 603 | 604 | return np.concatenate(data, axis=data[0].ndim - 1) 605 | 606 | def set_msm_data( 607 | self: L1Bio, msm_dset: str, new_data: np.ndarray | Iterable 608 | ) -> None: 609 | """Replace data of dataset "msm_dset" with new_data. 610 | 611 | Parameters 612 | ---------- 613 | msm_dset : string 614 | Name of measurement dataset. 615 | new_data : array-like 616 | Data to be written with same dimensions as dataset "msm_dset" 617 | 618 | """ 619 | if self.__msm_path is None: 620 | return 621 | 622 | # we will overwrite existing data, thus readwrite access is required 623 | if not self.__rw: 624 | raise PermissionError("read/write access required") 625 | 626 | # overwrite the data 627 | col = 0 628 | for ii in self.bands: 629 | msm_path = self.__msm_path.replace("%", ii) 630 | ds_path = str(PurePosixPath(msm_path, "OBSERVATIONS", msm_dset)) 631 | dset = self.fid[ds_path] 632 | 633 | dims = dset.shape 634 | dset[0, ...] = new_data[..., col : col + dims[-1]] 635 | col += dims[-1] 636 | 637 | # update patch logging 638 | self.__patched_msm.append(ds_path) 639 | 640 | 641 | # -------------------------------------------------- 642 | class L1BioIRR(L1Bio): 643 | """Class with methods to access Tropomi L1B irradiance products.""" 644 | 645 | band_groups = ("/BAND%_IRRADIANCE",) 646 | geo_dset = "earth_sun_distance" 647 | msm_type = "STANDARD_MODE" 648 | 649 | 650 | # -------------------------------------------------- 651 | class L1BioRAD(L1Bio): 652 | """Class with function to access Tropomi L1B radiance products.""" 653 | 654 | band_groups = ("/BAND%_RADIANCE",) 655 | geo_dset = "latitude,longitude" 656 | msm_type = "STANDARD_MODE" 657 | 658 | 659 | # -------------------------------------------------- 660 | class L1BioENG: 661 | """Class with methods to access Tropomi offline L1b engineering products. 662 | 663 | Parameters 664 | ---------- 665 | l1b_product : Path | str 666 | name of the L1b engineering product 667 | 668 | Notes 669 | ----- 670 | The L1b engineering products are available for UVN (band 1-6) 671 | and SWIR (band 7-8). 672 | 673 | """ 674 | 675 | def __init__(self: L1BioENG, l1b_product: Path | str) -> None: 676 | """Initialize access to a Tropomi offline L1b product.""" 677 | # open L1b product as HDF5 file 678 | l1b_product = Path(l1b_product) 679 | if not l1b_product.is_file(): 680 | raise FileNotFoundError(f"{l1b_product} does not exist") 681 | 682 | # initialize private class-attributes 683 | self.filename = l1b_product 684 | self.fid = h5py.File(l1b_product, "r") 685 | 686 | def __iter__(self: L1BioENG) -> None: 687 | """Allow iteration.""" 688 | for attr in sorted(self.__dict__): 689 | if not attr.startswith("__"): 690 | yield attr 691 | 692 | def __enter__(self: L1BioENG) -> Self: 693 | """Initiate the context manager.""" 694 | return self 695 | 696 | def __exit__(self: L1BioENG, *args: object) -> bool: 697 | """Exit the context manager.""" 698 | self.close() 699 | return False # any exception is raised by the with statement. 700 | 701 | def close(self: L1BioENG) -> None: 702 | """Close access to product.""" 703 | if self.fid is None: 704 | return 705 | 706 | self.fid.close() 707 | self.fid = None 708 | 709 | # ---------- PUBLIC FUNCTIONS ---------- 710 | def get_attr(self: L1BioENG, attr_name: str) -> np.ndarray | None: 711 | """Obtain value of an HDF5 file attribute. 712 | 713 | Parameters 714 | ---------- 715 | attr_name : str 716 | Name of the attribute 717 | 718 | """ 719 | if attr_name not in self.fid.attrs: 720 | return None 721 | 722 | attr = self.fid.attrs[attr_name] 723 | if attr.shape is None: 724 | return None 725 | 726 | return attr 727 | 728 | def get_orbit(self: L1BioENG) -> int | None: 729 | """Return absolute orbit number.""" 730 | res = self.get_attr("orbit") 731 | if res is None: 732 | return None 733 | 734 | return int(res) 735 | 736 | def get_processor_version(self: L1BioENG) -> str | None: 737 | """Return version of the L01b processor.""" 738 | attr = self.get_attr("processor_version") 739 | if attr is None: 740 | return None 741 | 742 | # pylint: disable=no-member 743 | return attr.decode("ascii") 744 | 745 | def get_coverage_time(self: L1BioENG) -> tuple[str, str] | None: 746 | """Return start and end of the measurement coverage time.""" 747 | attr_start = self.get_attr("time_coverage_start") 748 | if attr_start is None: 749 | return None 750 | 751 | attr_end = self.get_attr("time_coverage_end") 752 | if attr_end is None: 753 | return None 754 | 755 | # pylint: disable=no-member 756 | return (attr_start.decode("ascii"), attr_end.decode("ascii")) 757 | 758 | def get_creation_time(self: L1BioENG) -> str | None: 759 | """Return datetime when the L1b product was created.""" 760 | grp = self.fid["/METADATA/ESA_METADATA/earth_explorer_header"] 761 | dset = grp["fixed_header/source"] 762 | if "Creation_Date" in self.fid.attrs: 763 | attr = dset.attrs["Creation_Date"] 764 | if isinstance(attr, bytes): 765 | return attr.decode("ascii") 766 | 767 | return attr 768 | 769 | return None 770 | 771 | def get_ref_time(self: L1BioENG) -> int: 772 | """Return reference start time of measurements.""" 773 | return int(self.fid["reference_time"][0]) 774 | 775 | def get_delta_time(self: L1BioENG) -> np.ndarray: 776 | """Return offset from the reference start time of measurement.""" 777 | return self.fid["/MSMTSET/msmtset"]["delta_time"][:].astype(int) 778 | 779 | def get_msmtset(self: L1BioENG) -> np.ndarray: 780 | """Return L1B_ENG_DB/SATELLITE_INFO/satellite_pos.""" 781 | return self.fid["/SATELLITE_INFO/satellite_pos"][:] 782 | 783 | def get_msmtset_db(self: L1BioENG) -> np.ndarray: 784 | """Return compressed msmtset from L1B_ENG_DB/MSMTSET/msmtset. 785 | 786 | Notes 787 | ----- 788 | This function is used to fill the SQLite product databases 789 | 790 | """ 791 | dtype_msmt_db = np.dtype( 792 | [ 793 | ("meta_id", np.int32), 794 | ("ic_id", np.uint16), 795 | ("ic_version", np.uint8), 796 | ("class", np.uint8), 797 | ("repeats", np.uint16), 798 | ("exp_per_mcp", np.uint16), 799 | ("exp_time_us", np.uint32), 800 | ("mcp_us", np.uint32), 801 | ("delta_time_start", np.int32), 802 | ("delta_time_end", np.int32), 803 | ] 804 | ) 805 | 806 | # read full msmtset 807 | msmtset = self.fid["/MSMTSET/msmtset"][:] 808 | 809 | # get indices to start and end of every measurement (based in ICID) 810 | icid = msmtset["icid"] 811 | indx = (np.diff(icid) != 0).nonzero()[0] + 1 812 | indx = np.insert(indx, 0, 0) 813 | indx = np.append(indx, -1) 814 | 815 | # compress data from msmtset 816 | msmt = np.zeros(indx.size - 1, dtype=dtype_msmt_db) 817 | msmt["ic_id"][:] = msmtset["icid"][indx[0:-1]] 818 | msmt["ic_version"][:] = msmtset["icv"][indx[0:-1]] 819 | msmt["class"][:] = msmtset["class"][indx[0:-1]] 820 | msmt["delta_time_start"][:] = msmtset["delta_time"][indx[0:-1]] 821 | msmt["delta_time_end"][:] = msmtset["delta_time"][indx[1:]] 822 | 823 | # add SWIR timing information 824 | timing = self.fid["/DETECTOR4/timing"][:] 825 | msmt["mcp_us"][:] = timing["mcp_us"][indx[1:] - 1] 826 | msmt["exp_time_us"][:] = timing["exp_time_us"][indx[1:] - 1] 827 | msmt["exp_per_mcp"][:] = timing["exp_per_mcp"][indx[1:] - 1] 828 | # duration per ICID execution in micro-seconds 829 | duration = 1000 * (msmt["delta_time_end"] - msmt["delta_time_start"]) 830 | # duration can be zero 831 | mask = msmt["mcp_us"] > 0 832 | # divide duration by measurement period in micro-seconds 833 | msmt["repeats"][mask] = (duration[mask] / (msmt["mcp_us"][mask])).astype( 834 | np.uint16 835 | ) 836 | 837 | return msmt 838 | 839 | def get_swir_hk_db( 840 | self: L1BioENG, stats: str | None = None, fill_as_nan: bool | None = False 841 | ) -> np.ndarray | tuple[np.ndarray, np.ndarray] | None: 842 | """Return the most important SWIR housekeeping parameters. 843 | 844 | Parameters 845 | ---------- 846 | stats : {'median', 'range', None} 847 | Add statistics on housekeeping parameters 848 | fill_as_nan : bool, default=False 849 | Replace (float) FillValues with Nan's, when True 850 | 851 | Notes 852 | ----- 853 | This function is used to fill the SQLite product database and 854 | HDF5 monitoring database 855 | 856 | """ 857 | dtype_hk_db = np.dtype( 858 | [ 859 | ("detector_temp", np.float32), 860 | ("grating_temp", np.float32), 861 | ("imager_temp", np.float32), 862 | ("obm_temp", np.float32), 863 | ("calib_unit_temp", np.float32), 864 | ("fee_inner_temp", np.float32), 865 | ("fee_board_temp", np.float32), 866 | ("fee_ref_volt_temp", np.float32), 867 | ("fee_video_amp_temp", np.float32), 868 | ("fee_video_adc_temp", np.float32), 869 | ("detector_heater", np.float32), 870 | ("obm_heater_cycle", np.float32), 871 | ("fee_box_heater_cycle", np.float32), 872 | ("obm_heater", np.float32), 873 | ("fee_box_heater", np.float32), 874 | ] 875 | ) 876 | 877 | num_eng_pkts = self.fid["nr_of_engdat_pkts"].size 878 | swir_hk = np.empty(num_eng_pkts, dtype=dtype_hk_db) 879 | 880 | hk_tbl = self.fid["/DETECTOR4/DETECTOR_HK/temperature_info"][:] 881 | swir_hk["detector_temp"] = hk_tbl["temp_det_ts2"] 882 | swir_hk["fee_inner_temp"] = hk_tbl["temp_d1_box"] 883 | swir_hk["fee_board_temp"] = hk_tbl["temp_d5_cold"] 884 | swir_hk["fee_ref_volt_temp"] = hk_tbl["temp_a3_vref"] 885 | swir_hk["fee_video_amp_temp"] = hk_tbl["temp_d6_vamp"] 886 | swir_hk["fee_video_adc_temp"] = hk_tbl["temp_d4_vadc"] 887 | 888 | hk_tbl = self.fid["/NOMINAL_HK/TEMPERATURES/hires_temperatures"][:] 889 | swir_hk["grating_temp"] = hk_tbl["hires_temp_1"] 890 | 891 | hk_tbl = self.fid["/NOMINAL_HK/TEMPERATURES/instr_temperatures"][:] 892 | swir_hk["imager_temp"] = hk_tbl["instr_temp_29"] 893 | swir_hk["obm_temp"] = hk_tbl["instr_temp_28"] 894 | swir_hk["calib_unit_temp"] = hk_tbl["instr_temp_25"] 895 | 896 | hk_tbl = self.fid["/DETECTOR4/DETECTOR_HK/heater_data"][:] 897 | swir_hk["detector_heater"] = hk_tbl["det_htr_curr"] 898 | 899 | hk_tbl = self.fid["/NOMINAL_HK/HEATERS/heater_data"][:] 900 | swir_hk["obm_heater"] = hk_tbl["meas_cur_val_htr12"] 901 | swir_hk["obm_heater_cycle"] = hk_tbl["last_pwm_val_htr12"] 902 | swir_hk["fee_box_heater"] = hk_tbl["meas_cur_val_htr13"] 903 | swir_hk["fee_box_heater_cycle"] = hk_tbl["last_pwm_val_htr13"] 904 | 905 | # CHECK: works only when all elements of swir_hk are floats 906 | if fill_as_nan: 907 | for key in dtype_hk_db.names: 908 | swir_hk[key][swir_hk[key] == 999.0] = np.nan 909 | 910 | if stats is None: 911 | return swir_hk 912 | 913 | if stats == "median": 914 | hk_median = np.empty(1, dtype=dtype_hk_db) 915 | for key in dtype_hk_db.names: 916 | if np.all(np.isnan(swir_hk[key])): 917 | hk_median[key][0] = np.nan 918 | elif np.nanmin(swir_hk[key]) == np.nanmax(swir_hk[key]): 919 | hk_median[key][0] = swir_hk[key][0] 920 | else: 921 | hk_median[key][0] = Biweight(swir_hk[key]).median 922 | return hk_median 923 | 924 | if stats == "range": 925 | hk_min = np.empty(1, dtype=dtype_hk_db) 926 | hk_max = np.empty(1, dtype=dtype_hk_db) 927 | for key in dtype_hk_db.names: 928 | if np.all(np.isnan(swir_hk[key])): 929 | hk_min[key][0] = np.nan 930 | hk_max[key][0] = np.nan 931 | elif np.nanmin(swir_hk[key]) == np.nanmax(swir_hk[key]): 932 | hk_min[key][0] = swir_hk[key][0] 933 | hk_max[key][0] = swir_hk[key][0] 934 | else: 935 | hk_min[key][0] = np.nanmin(swir_hk[key]) 936 | hk_max[key][0] = np.nanmax(swir_hk[key]) 937 | return hk_min, hk_max 938 | 939 | return None 940 | -------------------------------------------------------------------------------- /src/pys5p/l1b_patch.py: -------------------------------------------------------------------------------- 1 | # 2 | # This file is part of pyS5p 3 | # 4 | # https://github.com/rmvanhees/pys5p.git 5 | # 6 | # Copyright (c) 2017-2025 SRON 7 | # All Rights Reserved 8 | # 9 | # License: BSD-3-Clause 10 | """`L1Bpatch`, class to modify an existing L1B product. 11 | 12 | .. warning:: Depreciated, this module is no longer maintained. 13 | """ 14 | 15 | from __future__ import annotations 16 | 17 | __all__ = ["L1Bpatch"] 18 | 19 | import shutil 20 | from datetime import datetime 21 | from pathlib import Path 22 | from typing import Self 23 | 24 | import h5py 25 | import numpy as np 26 | from setuptools_scm import get_version 27 | 28 | from . import swir_region 29 | from .l1b_io import L1BioRAD 30 | 31 | # - global variables -------------------------------- 32 | _MSG_ERR_IO_BAND_ = "spectral band of input and output products do not match" 33 | 34 | 35 | # - local functions -------------------------------- 36 | 37 | 38 | # - class definition ------------------------------- 39 | class L1Bpatch: 40 | """Definition off class L1Bpatch. 41 | 42 | Parameters 43 | ---------- 44 | l1b_product : str 45 | name of the L1B product 46 | data_dir : str, default='/tmp' 47 | output directory for the patched L1B product 48 | ckd_dir : str, default='/nfs/Tropomi/share/ckd' 49 | location of the Tropomi CKD 50 | 51 | """ 52 | 53 | def __init__( 54 | self: L1Bpatch, 55 | l1b_product: str, 56 | data_dir: str = "/tmp", 57 | ckd_dir: str = "/nfs/Tropomi/share/ckd", 58 | ) -> None: 59 | """Initialize access to a Tropomi offline L1b product.""" 60 | prod_type = Path(l1b_product).name[0:15] 61 | if prod_type not in ("S5P_OFFL_L1B_RA", "S5P_RPRO_L1B_RA"): 62 | raise TypeError( 63 | "Warning: only implemented for Tropomi L1b radiance products" 64 | ) 65 | 66 | # initialize private class-attributes 67 | self.data_dir = Path(data_dir) 68 | if not self.data_dir.is_dir(): 69 | self.data_dir.mkdir(mode=0o755) 70 | self.ckd_dir = Path(ckd_dir) 71 | self.l1b_product: Path = Path(l1b_product) 72 | self.l1b_patched = self.data_dir / self.l1b_product.name.replace("_01_", "_99_") 73 | if self.l1b_patched.is_file(): 74 | self.l1b_patched.unlink() 75 | self.__patched_msm = [] 76 | 77 | def __enter__(self: L1Bpatch) -> Self: 78 | """Initiate the context manager.""" 79 | return self 80 | 81 | def __exit__(self: L1Bpatch, *args: object) -> bool: 82 | """Exit the context manager.""" 83 | self.close() 84 | return False # any exception is raised by the with statement. 85 | 86 | def close(self: L1Bpatch) -> None: 87 | """Close L1B product. 88 | 89 | Before closing the product, we make sure that the output product 90 | describes what has been altered by the S/W. To keep any change 91 | traceable. 92 | 93 | In case the L1b product is altered, the attributes listed below are 94 | added to the group: '/METADATA/SRON_METADATA': 95 | 96 | - dateStamp ('now') 97 | - Git-version of S/W 98 | - list of patched datasets 99 | - auxiliary datasets used by patch-routines 100 | 101 | """ 102 | if not self.l1b_patched.is_file(): 103 | return 104 | 105 | if not self.__patched_msm: 106 | return 107 | 108 | with h5py.File(self.l1b_patched, "r+") as fid: 109 | sgrp = fid.require_group("/METADATA/SRON_METADATA") 110 | sgrp.attrs["dateStamp"] = datetime.utcnow().isoformat() 111 | sgrp.attrs["git_tag"] = get_version(root="..", relative_to=__file__) 112 | if "patched_datasets" not in sgrp: 113 | dtype = h5py.special_dtype(vlen=str) 114 | dset = sgrp.create_dataset( 115 | "patched_datasets", 116 | (len(self.__patched_msm),), 117 | maxshape=(None,), 118 | dtype=dtype, 119 | ) 120 | dset[:] = np.asarray(self.__patched_msm) 121 | else: 122 | dset = sgrp["patched_datasets"] 123 | dset.resize(dset.shape[0] + len(self.__patched_msm), axis=0) 124 | dset[dset.shape[0] - 1 :] = np.asarray(self.__patched_msm) 125 | 126 | # -------------------------------------------------- 127 | def pixel_quality(self: L1Bpatch, dpqm: np.ndarray, threshold: float = 0.8) -> None: 128 | """Patch SWIR pixel_quality. 129 | 130 | Patched dataset: 'quality_level' and 'spectral_channel_quality' 131 | 132 | Requires (naive approach): 133 | 134 | * read original dataset 'spectral_channel_quality' 135 | * read pixel quality ckd 136 | * adjust second pixel of each byte of spectral_channel_quality 137 | * quality_level = int(100 * dpqm) 138 | * write updated datasets to patched product 139 | 140 | Parameters 141 | ---------- 142 | dpqm : array-like 143 | SWIR pixel quality as a float value between 0 and 1 144 | threshold : float, optional 145 | threshold for good pixels, default 0.8 146 | 147 | Returns 148 | ------- 149 | Nothing 150 | 151 | """ 152 | if not self.l1b_patched.is_file(): 153 | shutil.copy(self.l1b_product, self.l1b_patched) 154 | 155 | # read original data 156 | with L1BioRAD(self.l1b_product) as l1b: 157 | band = l1b.select("STANDARD_MODE") 158 | quality_level = l1b.get_msm_data("quality_level") 159 | print("quality_level", quality_level.dtype) 160 | chan_quality = l1b.get_msm_data("spectral_channel_quality") 161 | print("chan_quality", chan_quality.dtype) 162 | 163 | if band in ("7", "8"): 164 | l2_dpqm = dpqm[swir_region.coords(mode="level2", band=band)] 165 | else: 166 | raise ValueError("only implemented for band 7 or 8") 167 | 168 | # patch dataset 'quality_level' 169 | quality_level[...] = (100 * l2_dpqm).astype(np.uint8) # broadcasting 170 | 171 | # patch dataset 'spectral_channel_quality' 172 | buff = chan_quality & ~2 # set second bit to zero (all good) 173 | buff[:, l2_dpqm < threshold] += 2 # flag bad pixels 174 | chan_quality = buff.astype(np.uint8) 175 | 176 | # write patched dataset to new product 177 | with L1BioRAD(self.l1b_patched, readwrite=True) as l1b: 178 | res = l1b.select("STANDARD_MODE") 179 | if res != band: 180 | raise ValueError(_MSG_ERR_IO_BAND_) 181 | l1b.set_msm_data("quality_level", quality_level) 182 | l1b.set_msm_data("spectral_channel_quality", chan_quality) 183 | 184 | def offset(self: L1Bpatch) -> None: 185 | """Patch SWIR offset correction. 186 | 187 | Patched dataset: 'radiance' ('radiance_error' and 'radiance_noise'?) 188 | 189 | Requires (naive approach): 190 | 191 | * reverse applied radiance calibration 192 | * reverse applied stray-light correction 193 | * reverse applied PRNU correction 194 | * reverse applied dark-flux correction 195 | * reverse applied offset correction 196 | * apply (alternative) offset correction 197 | * apply (alternative) dark-flux correction 198 | * apply (alternative) PRNU correction 199 | * apply (alternative) stray-light correction 200 | * apply (alternative) radiance calibration 201 | 202 | Returns 203 | ------- 204 | Nothing 205 | 206 | """ 207 | if not self.l1b_patched.is_file(): 208 | shutil.copy(self.l1b_product, self.l1b_patched) 209 | 210 | # read original data 211 | with L1BioRAD(self.l1b_product) as l1b: 212 | band = l1b.select("STANDARD_MODE") 213 | data = l1b.get_msm_data("radiance") 214 | 215 | # read required CKD's 216 | 217 | # patch dataset 'radiance' 218 | 219 | # write patched dataset to new product 220 | with L1BioRAD(self.l1b_patched, readwrite=True) as l1b: 221 | res = l1b.select("STANDARD_MODE") 222 | if res != band: 223 | raise ValueError(_MSG_ERR_IO_BAND_) 224 | l1b.set_msm_data("radiance", data) 225 | 226 | def darkflux(self: L1Bpatch) -> None: 227 | """Patch SWIR dark-flux correction. 228 | 229 | Patched dataset: 'radiance' ('radiance_error' and 'radiance_noise'?) 230 | 231 | Requires (naive approach): 232 | 233 | * reverse applied radiance calibration 234 | * reverse applied stray-light correction 235 | * reverse applied PRNU correction 236 | * reverse applied dark-flux correction 237 | * apply (alternative) dark-flux correction 238 | * apply (alternative) PRNU correction 239 | * apply (alternative) stray-light correction 240 | * apply (alternative) radiance calibration 241 | 242 | Returns 243 | ------- 244 | Nothing 245 | 246 | """ 247 | if not self.l1b_patched.is_file(): 248 | shutil.copy(self.l1b_product, self.l1b_patched) 249 | 250 | # read original data 251 | with L1BioRAD(self.l1b_product) as l1b: 252 | band = l1b.select("STANDARD_MODE") 253 | data = l1b.get_msm_data("radiance") 254 | 255 | # read required CKD's 256 | 257 | # patch dataset 'radiance' 258 | 259 | # write patched dataset to new product 260 | with L1BioRAD(self.l1b_patched, readwrite=True) as l1b: 261 | res = l1b.select("STANDARD_MODE") 262 | if res != band: 263 | raise ValueError(_MSG_ERR_IO_BAND_) 264 | l1b.set_msm_data("radiance", data) 265 | 266 | def prnu(self: L1Bpatch) -> None: 267 | """Patch pixel response non-uniformity correction. 268 | 269 | Patched dataset: 'radiance' ('radiance_error' and 'radiance_noise'?) 270 | 271 | Requires (naive approach): 272 | 273 | * reverse applied radiance calibration 274 | * reverse applied stray-light correction 275 | * reverse applied PRNU correction 276 | * apply (alternative) PRNU correction 277 | * apply (alternative) stray-light correction 278 | * apply (alternative) radiance calibration 279 | 280 | Alternative: neglect impact stray-light, but apply patch to correct for 281 | spectral features. 282 | 283 | Returns 284 | ------- 285 | None 286 | 287 | Notes 288 | ----- 289 | It is assumed that for the PRNU correction the CKD has to be multiplied 290 | with the pixel signals. 291 | 292 | """ 293 | if not self.l1b_patched.is_file(): 294 | shutil.copy(self.l1b_product, self.l1b_patched) 295 | 296 | # read original data 297 | with L1BioRAD(self.l1b_product) as l1b: 298 | band = l1b.select("STANDARD_MODE") 299 | data = l1b.get_msm_data("radiance") 300 | 301 | # read required CKD's 302 | 303 | # patch dataset 'radiance' 304 | 305 | # write patched dataset to new product 306 | with L1BioRAD(self.l1b_patched, readwrite=True) as l1b: 307 | res = l1b.select("STANDARD_MODE") 308 | if res != band: 309 | raise ValueError(_MSG_ERR_IO_BAND_) 310 | l1b.set_msm_data("radiance", data) 311 | 312 | def relrad(self: L1Bpatch) -> None: 313 | """Patch relative radiance calibration. 314 | 315 | Patched dataset: 'radiance' ('radiance_error' and 'radiance_noise'?) 316 | 317 | Requires: 318 | 319 | * reverse applied radiance calibration 320 | * apply alternative radiance calibration 321 | 322 | Returns 323 | ------- 324 | Nothing 325 | 326 | """ 327 | if not self.l1b_patched.is_file(): 328 | shutil.copy(self.l1b_product, self.l1b_patched) 329 | 330 | # read original data 331 | with L1BioRAD(self.l1b_product) as l1b: 332 | band = l1b.select("STANDARD_MODE") 333 | data = l1b.get_msm_data("radiance") 334 | 335 | # read required CKD's 336 | 337 | # patch dataset 'radiance' 338 | 339 | # write patched dataset to new product 340 | with L1BioRAD(self.l1b_patched, readwrite=True) as l1b: 341 | res = l1b.select("STANDARD_MODE") 342 | if res != band: 343 | raise ValueError(_MSG_ERR_IO_BAND_) 344 | l1b.set_msm_data("radiance", data) 345 | 346 | def absrad(self: L1Bpatch) -> None: 347 | """Patch absolute radiance calibration. 348 | 349 | Patched dataset: 'radiance' ('radiance_error' and 'radiance_noise'?) 350 | 351 | Requires: 352 | 353 | * reverse applied irradiance calibration 354 | * apply alternative irradiance calibration 355 | 356 | Returns 357 | ------- 358 | Nothing 359 | 360 | """ 361 | if not self.l1b_patched.is_file(): 362 | shutil.copy(self.l1b_product, self.l1b_patched) 363 | 364 | # read original data 365 | with L1BioRAD(self.l1b_product) as l1b: 366 | band = l1b.select("STANDARD_MODE") 367 | data = l1b.get_msm_data("radiance") 368 | 369 | # read required CKD's 370 | 371 | # patch dataset 'radiance' 372 | 373 | # write patched dataset to new product 374 | with L1BioRAD(self.l1b_patched, readwrite=True) as l1b: 375 | res = l1b.select("STANDARD_MODE") 376 | if res != band: 377 | raise ValueError(_MSG_ERR_IO_BAND_) 378 | l1b.set_msm_data("radiance", data) 379 | 380 | def check(self: L1Bpatch) -> None: 381 | """Check patched dataset in L1B product.""" 382 | if not self.l1b_patched.is_file(): 383 | raise ValueError("patched product not found") 384 | 385 | with h5py.File(self.l1b_patched, "r+") as fid: 386 | if "SRON_METADATA" not in fid["/METADATA"]: 387 | raise ValueError("no SRON metadata defined in L1B product") 388 | sgrp = fid["/METADATA/SRON_METADATA"] 389 | if "patched_datasets" not in sgrp: 390 | raise ValueError("no patched datasets in L1B prduct") 391 | patched_datasets = sgrp["patched_datasets"][:] 392 | 393 | for ds_name in patched_datasets: 394 | with L1BioRAD(self.l1b_product) as l1b: 395 | l1b.select("STANDARD_MODE") 396 | orig = l1b.get_msm_data(ds_name.split("/")[-1]) 397 | 398 | with L1BioRAD(self.l1b_patched) as l1b: 399 | l1b.select("STANDARD_MODE") 400 | patch = l1b.get_msm_data(ds_name.split("/")[-1]) 401 | 402 | if np.issubdtype(orig.dtype, np.integer): 403 | if np.array_equiv(orig, patch): 404 | print(ds_name.split("/")[-1], " equal True") 405 | else: 406 | print( 407 | f"{ds_name.split('/')[-1]}" 408 | f" equal {np.sum(orig == patch)}" 409 | f" differ {np.sum(orig != patch)}" 410 | ) 411 | else: 412 | print("test not yet defined") 413 | -------------------------------------------------------------------------------- /src/pys5p/lib/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # This file is part of pyS5p 3 | # 4 | # https://github.com/rmvanhees/pys5p.git 5 | # 6 | # Copyright (c) 2020-2025 SRON 7 | # All Rights Reserved 8 | # 9 | # License: BSD-3-Clause 10 | """Necessary but empty file.""" 11 | -------------------------------------------------------------------------------- /src/pys5p/lv2_io.py: -------------------------------------------------------------------------------- 1 | # 2 | # This file is part of pyS5p 3 | # 4 | # https://github.com/rmvanhees/pys5p.git 5 | # 6 | # Copyright (c) 2017-2025 SRON 7 | # All Rights Reserved 8 | # 9 | # License: BSD-3-Clause 10 | """`LV2io`, class to access Tropomi level-2 products.""" 11 | 12 | from __future__ import annotations 13 | 14 | __all__ = ["LV2io"] 15 | 16 | from datetime import datetime, timedelta 17 | from typing import TYPE_CHECKING, Self 18 | 19 | import h5py 20 | import numpy as np 21 | from moniplot.image_to_xarray import data_to_xr, h5_to_xr 22 | from netCDF4 import Dataset 23 | 24 | if TYPE_CHECKING: 25 | from pathlib import Path 26 | 27 | import xarray as xr 28 | 29 | # - global parameters ------------------------------ 30 | 31 | 32 | # - local functions -------------------------------- 33 | 34 | 35 | # - class definition ------------------------------- 36 | class LV2io: 37 | """A class to read Tropomi Level-2 (offline) products. 38 | 39 | Parameters 40 | ---------- 41 | lv2_product : Path 42 | full path to S5P Tropomi level 2 product 43 | 44 | Notes 45 | ----- 46 | The Python h5py module can read the operational netCDF4 products without 47 | any problems, however, the SRON science products contain incompatible 48 | attributes. Thus, should be fixed when more up-to-date netCDF software is 49 | used to generate the products. Currently, the Python netCDF4 module is 50 | used to read the science products. 51 | 52 | """ 53 | 54 | def __init__(self: LV2io, lv2_product: Path) -> None: 55 | """Initialize access to an S5P_L2 product.""" 56 | if not lv2_product.is_file(): 57 | raise FileNotFoundError(f"{lv2_product.name} does not exist") 58 | 59 | # initialize class-attributes 60 | self.filename = lv2_product 61 | 62 | # open LV2 product as HDF5 file 63 | if self.science_product: 64 | self.fid = Dataset(lv2_product, "r") 65 | self.ground_pixel = self.fid["/instrument/ground_pixel"][:].max() 66 | self.ground_pixel += 1 67 | self.scanline = self.fid["/instrument/scanline"][:].max() 68 | self.scanline += 1 69 | # alternative set flag sparse 70 | if self.fid["/instrument/scanline"].size % self.ground_pixel != 0: 71 | raise ValueError("not all scanlines are complete") 72 | else: 73 | self.fid = h5py.File(lv2_product, "r") 74 | self.ground_pixel = self.fid["/PRODUCT/ground_pixel"].size 75 | self.scanline = self.fid["/PRODUCT/scanline"].size 76 | 77 | def __iter__(self: LV2io) -> None: 78 | """Allow itertion.""" 79 | for attr in sorted(self.__dict__): 80 | if not attr.startswith("__"): 81 | yield attr 82 | 83 | def __enter__(self: LV2io) -> Self: 84 | """Initiate the context manager.""" 85 | return self 86 | 87 | def __exit__(self: LV2io, *args: object) -> bool: 88 | """Exit the context manager.""" 89 | self.close() 90 | return False # any exception is raised by the with statement. 91 | 92 | def close(self: LV2io) -> None: 93 | """Close the product.""" 94 | if self.fid is not None: 95 | self.fid.close() 96 | 97 | # ----- Class properties -------------------- 98 | @property 99 | def science_product(self: LV2io) -> bool: 100 | """Check if product is a science product.""" 101 | science_inst = b"Space Research Organisation Netherlands" 102 | 103 | res = False 104 | with h5py.File(self.filename) as fid: 105 | if "institution" in fid.attrs and fid.attrs["institution"] == science_inst: 106 | res = True 107 | 108 | return res 109 | 110 | @property 111 | def orbit(self: LV2io) -> int: 112 | """Return reference orbit number.""" 113 | if self.science_product: 114 | return int(self.__nc_attr("orbit", "l1b_file")) 115 | 116 | return int(self.__h5_attr("orbit", None)[0]) 117 | 118 | @property 119 | def algorithm_version(self: LV2io) -> str | None: 120 | """Return version of the level 2 algorithm.""" 121 | res = self.get_attr("algorithm_version") 122 | 123 | return res if res is not None else self.get_attr("version") 124 | 125 | @property 126 | def processor_version(self: LV2io) -> str | None: 127 | """Return version of the level 2 processor.""" 128 | res = self.get_attr("processor_version") 129 | 130 | return res if res is not None else self.get_attr("version") 131 | 132 | @property 133 | def product_version(self: LV2io) -> str: 134 | """Return version of the level 2 product.""" 135 | res = self.get_attr("product_version") 136 | 137 | return res if res is not None else self.get_attr("version") 138 | 139 | @property 140 | def coverage_time(self: LV2io) -> tuple[str, str]: 141 | """Return start and end of the measurement coverage time.""" 142 | return ( 143 | self.get_attr("time_coverage_start"), 144 | self.get_attr("time_coverage_end"), 145 | ) 146 | 147 | @property 148 | def creation_time(self: LV2io) -> str: 149 | """Return creation date/time of the level 2 product.""" 150 | return self.get_attr("date_created") 151 | 152 | # ----- Attributes -------------------- 153 | def __h5_attr( 154 | self: LV2io, attr_name: str, ds_name: str | None 155 | ) -> np.ndarray | None: 156 | """Read attributes from operational products using hdf5.""" 157 | if ds_name is not None: 158 | dset = self.fid[f"/PRODUCT/{ds_name}"] 159 | if attr_name not in dset.attrs: 160 | return None 161 | 162 | attr = dset.attrs[attr_name] 163 | else: 164 | if attr_name not in self.fid.attrs: 165 | return None 166 | 167 | attr = self.fid.attrs[attr_name] 168 | 169 | if isinstance(attr, bytes): 170 | return attr.decode("ascii") 171 | 172 | return attr 173 | 174 | def __nc_attr(self: LV2io, attr_name: str, ds_name: str) -> np.ndarray | None: 175 | """Read attributes from science products using netCDF4.""" 176 | if ds_name is not None: 177 | for grp_name in ["target_product", "side_product", "instrument"]: 178 | if grp_name not in self.fid.groups: 179 | continue 180 | 181 | if ds_name not in self.fid[grp_name].variables: 182 | continue 183 | 184 | dset = self.fid[f"/{grp_name}/{ds_name}"] 185 | if attr_name in dset.ncattrs(): 186 | return dset.getncattr(attr_name) 187 | 188 | return None 189 | 190 | if attr_name not in self.fid.ncattrs(): 191 | return None 192 | 193 | return self.fid.getncattr(attr_name) 194 | 195 | def get_attr( 196 | self: LV2io, attr_name: str, ds_name: str | None = None 197 | ) -> np.ndarray | None: 198 | """Obtain value of an HDF5 file attribute or dataset attribute. 199 | 200 | Parameters 201 | ---------- 202 | attr_name : str 203 | name of the attribute 204 | ds_name : str, optional 205 | name of dataset, default is to read the product attributes 206 | 207 | """ 208 | if self.science_product: 209 | return self.__nc_attr(attr_name, ds_name) 210 | 211 | return self.__h5_attr(attr_name, ds_name) 212 | 213 | # ----- Time information --------------- 214 | @property 215 | def ref_time(self: LV2io) -> datetime | None: 216 | """Return reference start time of measurements.""" 217 | if self.science_product: 218 | return None 219 | 220 | return datetime(2010, 1, 1, 0, 0, 0) + timedelta( 221 | seconds=int(self.fid["/PRODUCT/time"][0]) 222 | ) 223 | 224 | def get_time(self: LV2io) -> np.ndarray | None: 225 | """Return start time of measurement per scan-line.""" 226 | if self.science_product: 227 | buff = self.get_dataset("time")[:: self.ground_pixel, :] 228 | return np.array([datetime(*x) for x in buff]) 229 | 230 | buff = self.fid["/PRODUCT/delta_time"][0, :] 231 | return np.array([self.ref_time + timedelta(seconds=x / 1e3) for x in buff]) 232 | 233 | # ----- Geolocation -------------------- 234 | def __h5_geo_data(self: LV2io, geo_dsets: str) -> dict: 235 | """Read geolocation datasets from operational products using HDF5.""" 236 | res = {} 237 | if geo_dsets is None: 238 | geo_dsets = "latitude,longitude" 239 | 240 | for key in geo_dsets.split(","): 241 | for grp_name in ["/PRODUCT", "/PRODUCT/SUPPORT_DATA/GEOLOCATIONS"]: 242 | if key in self.fid[grp_name]: 243 | res[key] = np.squeeze(self.fid[f"{grp_name}/{key}"]) 244 | continue 245 | 246 | return res 247 | 248 | def __nc_geo_data(self: LV2io, geo_dsets: str) -> dict: 249 | """Read geolocation datasets from science products using netCDF4.""" 250 | res = {} 251 | if geo_dsets is None: 252 | geo_dsets = "latitude_center,longitude_center" 253 | 254 | for key in geo_dsets.split(","): 255 | if key in self.fid["/instrument"].variables: 256 | ds_name = f"/instrument/{key}" 257 | res[key] = self.fid[ds_name][:].reshape( 258 | self.scanline, self.ground_pixel 259 | ) 260 | 261 | return res 262 | 263 | def get_geo_data(self: LV2io, geo_dsets: str | None = None) -> dict: 264 | """Return data of selected datasets from the GEOLOCATIONS group. 265 | 266 | Parameters 267 | ---------- 268 | geo_dsets : str, optional 269 | Name(s) of datasets, comma separated. Default: 270 | 271 | * operational: 'latitude,longitude' 272 | * science: 'latitude_center,longitude_center' 273 | 274 | Returns 275 | ------- 276 | dict 277 | dictionary with arrays of selected datasets 278 | 279 | """ 280 | if self.science_product: 281 | return self.__nc_geo_data(geo_dsets) 282 | 283 | return self.__h5_geo_data(geo_dsets) 284 | 285 | # ----- Footprints -------------------- 286 | def __h5_geo_bounds( 287 | self: LV2io, 288 | extent: list[float, float, float, float], 289 | data_sel: tuple[slice | int], 290 | ) -> tuple: 291 | """Read latitude/longitude bounding box [HDF5].""" 292 | if extent is not None: 293 | if len(extent) != 4: 294 | raise ValueError("parameter extent must have 4 elements") 295 | 296 | lats = self.fid["/PRODUCT/latitude"][0, ...] 297 | lons = self.fid["/PRODUCT/longitude"][0, ...] 298 | 299 | indx = ( 300 | (lons >= extent[0]) 301 | & (lons <= extent[1]) 302 | & (lats >= extent[2]) 303 | & (lats <= extent[3]) 304 | ).nonzero() 305 | data_sel = np.s_[ 306 | indx[0].min() : indx[0].max(), indx[1].min() : indx[1].max() 307 | ] 308 | 309 | gid = self.fid["/PRODUCT/SUPPORT_DATA/GEOLOCATIONS"] 310 | if data_sel is None: 311 | lat_bounds = gid["latitude_bounds"][0, ...] 312 | lon_bounds = gid["longitude_bounds"][0, ...] 313 | else: 314 | data_sel0 = (0, *data_sel, slice(None)) 315 | lat_bounds = gid["latitude_bounds"][data_sel0] 316 | lon_bounds = gid["longitude_bounds"][data_sel0] 317 | 318 | return data_sel, lon_bounds, lat_bounds 319 | 320 | def __nc_geo_bounds( 321 | self: LV2io, 322 | extent: list[float, float, float, float], 323 | data_sel: tuple[slice | int], 324 | ) -> tuple: 325 | """Read latitude/longitude bounding box [netCDF4].""" 326 | if extent is not None: 327 | if len(extent) != 4: 328 | raise ValueError("parameter extent must have 4 elements") 329 | 330 | lats = self.fid["/instrument/latitude_center"][:].reshape( 331 | self.scanline, self.ground_pixel 332 | ) 333 | lons = self.fid["/instrument/longitude_center"][:].reshape( 334 | self.scanline, self.ground_pixel 335 | ) 336 | 337 | indx = ( 338 | (lons >= extent[0]) 339 | & (lons <= extent[1]) 340 | & (lats >= extent[2]) 341 | & (lats <= extent[3]) 342 | ).nonzero() 343 | data_sel = np.s_[ 344 | indx[0].min() : indx[0].max(), indx[1].min() : indx[1].max() 345 | ] 346 | 347 | gid = self.fid["/instrument"] 348 | lat_bounds = gid["latitude_corners"][:].data.reshape( 349 | self.scanline, self.ground_pixel, 4 350 | ) 351 | lon_bounds = gid["longitude_corners"][:].data.reshape( 352 | self.scanline, self.ground_pixel, 4 353 | ) 354 | if data_sel is not None: 355 | lat_bounds = lat_bounds[(*data_sel, slice(None))] 356 | lon_bounds = lon_bounds[(*data_sel, slice(None))] 357 | 358 | return data_sel, lon_bounds, lat_bounds 359 | 360 | def get_geo_bounds( 361 | self: LV2io, 362 | extent: list[float, float, float, float] | None, 363 | data_sel: tuple[slice | int] | None, 364 | ) -> np.ndarray | tuple: 365 | """Return bounds of latitude/longitude as a mesh for plotting. 366 | 367 | Parameters 368 | ---------- 369 | extent : list 370 | select data to cover a region with geolocation defined by: 371 | lon_min, lon_max, lat_min, lat_max and return numpy slice 372 | data_sel : numpy slice 373 | a 3-dimensional numpy slice: time, scan_line, ground_pixel 374 | Note 'data_sel' will be overwritten when 'extent' is defined 375 | 376 | Returns 377 | ------- 378 | data_sel : numpy slice 379 | Select slice of data which covers geolocation defined by extent. 380 | Only provided if extent is not None. 381 | out : dictionary 382 | With numpy arrays for latitude and longitude 383 | 384 | """ 385 | if self.science_product: 386 | res = self.__nc_geo_bounds(extent, data_sel) 387 | else: 388 | res = self.__h5_geo_bounds(extent, data_sel) 389 | data_sel, lon_bounds, lat_bounds = res 390 | 391 | res = {} 392 | _sz = lon_bounds.shape 393 | res["longitude"] = np.empty((_sz[0] + 1, _sz[1] + 1), dtype=float) 394 | res["longitude"][:-1, :-1] = lon_bounds[:, :, 0] 395 | res["longitude"][-1, :-1] = lon_bounds[-1, :, 1] 396 | res["longitude"][:-1, -1] = lon_bounds[:, -1, 1] 397 | res["longitude"][-1, -1] = lon_bounds[-1, -1, 2] 398 | 399 | res["latitude"] = np.empty((_sz[0] + 1, _sz[1] + 1), dtype=float) 400 | res["latitude"][:-1, :-1] = lat_bounds[:, :, 0] 401 | res["latitude"][-1, :-1] = lat_bounds[-1, :, 1] 402 | res["latitude"][:-1, -1] = lat_bounds[:, -1, 1] 403 | res["latitude"][-1, -1] = lat_bounds[-1, -1, 2] 404 | 405 | if extent is None: 406 | return res 407 | 408 | return data_sel, res 409 | 410 | # ----- Datasets (numpy) -------------------- 411 | def __h5_dataset( 412 | self: LV2io, name: str, data_sel: tuple[slice | int], fill_as_nan: bool 413 | ) -> np.ndarray: 414 | """Read dataset from operational products using HDF5.""" 415 | fillvalue = float.fromhex("0x1.ep+122") 416 | 417 | if name not in self.fid["/PRODUCT"]: 418 | raise ValueError(f"dataset {name} for found") 419 | 420 | dset = self.fid[f"/PRODUCT/{name}"] 421 | if data_sel is None: 422 | if dset.dtype == np.float32: 423 | res = dset.astype(float)[0, ...] 424 | else: 425 | res = dset[0, ...] 426 | else: 427 | if dset.dtype == np.float32: 428 | res = dset.astype(float)[(0, *data_sel)] 429 | else: 430 | res = dset[(0, *data_sel)] 431 | 432 | if fill_as_nan and dset.attrs["_FillValue"] == fillvalue: 433 | res[(res == fillvalue)] = np.nan 434 | 435 | return res 436 | 437 | def __nc_dataset( 438 | self: LV2io, name: str, data_sel: tuple[slice | int], fill_as_nan: bool 439 | ) -> np.ndarray: 440 | """Read dataset from science products using netCDF4.""" 441 | if name in self.fid["/target_product"].variables: 442 | group = "/target_product" 443 | elif name in self.fid["/instrument"].variables: 444 | group = "/instrument" 445 | else: 446 | raise ValueError(f"dataset {name} for found") 447 | 448 | dset = self.fid[f"{group}/{name}"] 449 | if dset.size == self.scanline * self.ground_pixel: 450 | res = dset[:].reshape(self.scanline, self.ground_pixel) 451 | else: 452 | res = dset[:] 453 | if data_sel is not None: 454 | res = res[data_sel] 455 | 456 | if fill_as_nan: 457 | return res.filled(np.nan) 458 | 459 | return res.data 460 | 461 | def get_dataset( 462 | self: LV2io, 463 | name: str, 464 | data_sel: tuple[slice | int] | slice | None = None, 465 | fill_as_nan: bool = True, 466 | ) -> np.ndarray: 467 | """Read level 2 dataset from PRODUCT group. 468 | 469 | Parameters 470 | ---------- 471 | name : string 472 | name of dataset with level 2 data 473 | data_sel : numpy slice 474 | a 3-dimensional numpy slice: time, scan_line, ground_pixel 475 | fill_as_nan : boolean 476 | Replace (float) FillValues with Nan's, when True 477 | 478 | Returns 479 | ------- 480 | numpy.ndarray 481 | 482 | """ 483 | if self.science_product: 484 | return self.__nc_dataset(name, data_sel, fill_as_nan) 485 | 486 | return self.__h5_dataset(name, data_sel, fill_as_nan) 487 | 488 | # ----- Dataset (xarray) -------------------- 489 | def __h5_data_as_xds( 490 | self: LV2io, name: str, data_sel: tuple[slice | int] 491 | ) -> xr.DataArray: 492 | """Read dataset from group target_product using HDF5. 493 | 494 | Input: operational product 495 | 496 | Return: xarray.Dataset 497 | """ 498 | if name not in self.fid["/PRODUCT"]: 499 | raise ValueError(f"dataset {name} for found") 500 | dset = self.fid[f"/PRODUCT/{name}"] 501 | 502 | # ToDo handle parameter mol_m2 503 | return h5_to_xr(dset, (0, *data_sel)).squeeze() 504 | 505 | def __nc_data_as_xds( 506 | self: LV2io, name: str, data_sel: tuple[slice | int] 507 | ) -> xr.DataArray: 508 | """Read dataset from group PRODUCT using netCDF4. 509 | 510 | Input: science product 511 | 512 | Return: xarray.DataArray 513 | """ 514 | if name in self.fid["/target_product"].variables: 515 | group = "/target_product/" 516 | elif name in self.fid["/instrument"].variables: 517 | group = "/instrument/" 518 | else: 519 | raise ValueError("dataset {name} for found") 520 | 521 | return data_to_xr( 522 | self.get_dataset(group + name, data_sel), 523 | dims=["scanline", "ground_pixel"], 524 | name=name, 525 | long_name=self.get_attr("long_name", name), 526 | units=self.get_attr("units", name), 527 | ) 528 | 529 | def get_data_as_xds( 530 | self: LV2io, name: str, data_sel: tuple[slice | int] | None = None 531 | ) -> xr.DataArray: 532 | """Read dataset from group PRODUCT/target_product group. 533 | 534 | Parameters 535 | ---------- 536 | name : str 537 | name of dataset with level 2 data 538 | data_sel : numpy slice 539 | a 3-dimensional numpy slice: time, scan_line, ground_pixel 540 | 541 | Returns 542 | ------- 543 | xarray.DataArray 544 | 545 | """ 546 | if self.science_product: 547 | return self.__nc_data_as_xds(name, data_sel) 548 | 549 | return self.__h5_data_as_xds(name, data_sel) 550 | -------------------------------------------------------------------------------- /src/pys5p/ocm_io.py: -------------------------------------------------------------------------------- 1 | # 2 | # This file is part of pyS5p 3 | # 4 | # https://github.com/rmvanhees/pys5p.git 5 | # 6 | # Copyright (c) 2017-2025 SRON 7 | # All Rights Reserved 8 | # 9 | # License: BSD-3-Clause 10 | """`OCMio`, class to access on-ground calibration data.""" 11 | 12 | from __future__ import annotations 13 | 14 | __all__ = ["OCMio"] 15 | 16 | from datetime import datetime, timedelta 17 | from pathlib import Path, PurePosixPath 18 | from typing import Any, Self 19 | 20 | import h5py 21 | import numpy as np 22 | from moniplot.biweight import Biweight 23 | 24 | # - global parameters ------------------------------ 25 | 26 | 27 | # - local functions -------------------------------- 28 | def band2channel( 29 | dict_a: dict, dict_b: dict, mode: bool | None = None 30 | ) -> np.ndarray | tuple[Any, Any]: 31 | """Store data from a dictionary as returned by get_msm_data to a ndarray. 32 | 33 | Parameters 34 | ---------- 35 | dict_a : dict 36 | data of the one spectral band 37 | dict_b : dict 38 | data of another spectral band 39 | mode : list ['combined', 'mean', 'median', 'biweight'] 40 | 'combined' 41 | will combine data using np.concatenate((data_a, data_b),\ 42 | axis=data_a.ndim-1) 43 | 44 | 'mean' 45 | is calculated using np.nanmean(data, axis=0) 46 | 47 | 'median' 48 | is calculated using np.nanmedian(data, axis=0) 49 | 50 | 'biweight' 51 | is calculated using biweight(data, axis=0) 52 | 53 | Returns 54 | ------- 55 | numpy.ndarray 56 | Data from dictionary stored in a numpy array 57 | 58 | Examples 59 | -------- 60 | > data = ocm.band2channel(dict_a, dict_b, mode=['combined', 'median']) 61 | 62 | """ 63 | if mode is None: 64 | mode = [] 65 | 66 | data_a = None 67 | for key in sorted(dict_a): 68 | buff = dict_a[key][...] 69 | 70 | data_a = buff if data_a is None else np.vstack((data_a, buff)) 71 | 72 | if data_a is not None: 73 | if "mean" in mode: 74 | data_a = np.nanmean(data_a, axis=0) 75 | elif "median" in mode: 76 | data_a = np.nanmedian(data_a, axis=0) 77 | elif "biweight" in mode: 78 | data_a = Biweight(data_a, axis=0).median 79 | 80 | if dict_b is None: 81 | return data_a 82 | 83 | data_b = None 84 | for key in sorted(dict_b): 85 | buff = dict_b[key][...] 86 | 87 | data_b = buff if data_b is None else np.vstack((data_b, buff)) 88 | 89 | if data_b is not None: 90 | if "mean" in mode: 91 | data_b = np.nanmean(data_b, axis=0) 92 | elif "median" in mode: 93 | data_b = np.nanmedian(data_b, axis=0) 94 | elif "biweight" in mode: 95 | data_b = Biweight(data_b, axis=0).median 96 | 97 | if "combined" in mode: 98 | return np.concatenate((data_a, data_b), axis=data_a.ndim - 1) 99 | 100 | return data_a, data_b 101 | 102 | 103 | # - class definition ------------------------------- 104 | class OCMio: 105 | """A class to read Tropomi on-ground calibration products (Lx). 106 | 107 | Parameters 108 | ---------- 109 | ocm_product : Path 110 | Full path to on-ground calibration measurement 111 | 112 | """ 113 | 114 | def __init__(self: OCMio, ocm_product: Path) -> None: 115 | """Initialize access to an OCAL Lx product.""" 116 | if not ocm_product.is_file(): 117 | raise FileNotFoundError(f"{ocm_product.name} does not exist") 118 | 119 | # initialize class-attributes 120 | self.__msm_path = None 121 | self.band = None 122 | self.filename = ocm_product 123 | 124 | # open OCM product as HDF5 file 125 | self.fid = h5py.File(ocm_product, "r") 126 | 127 | def __iter__(self: OCMio) -> None: 128 | """Allow iteration.""" 129 | for attr in sorted(self.__dict__): 130 | if not attr.startswith("__"): 131 | yield attr 132 | 133 | # def __del__(self): 134 | # """ 135 | # called when the object is destroyed 136 | # """ 137 | # self.close() 138 | 139 | def __enter__(self: OCMio) -> Self: 140 | """Initiate the context manager.""" 141 | return self 142 | 143 | def __exit__(self: OCMio, *args: object) -> bool: 144 | """Exit the context manager.""" 145 | self.close() 146 | return False # any exception is raised by the with statement. 147 | 148 | def close(self: OCMio) -> None: 149 | """Close resources.""" 150 | self.band = None 151 | if self.fid is not None: 152 | self.fid.close() 153 | self.fid = None 154 | 155 | # ---------- RETURN VERSION of the S/W ---------- 156 | # ---------- Functions that work before MSM selection ---------- 157 | def get_processor_version(self: OCMio) -> str: 158 | """Return version of the L01b processor.""" 159 | res = self.fid.attrs["processor_version"] 160 | if isinstance(res, bytes): 161 | # pylint: disable=no-member 162 | res = res.decode("ascii") 163 | return res 164 | 165 | def get_coverage_time(self: OCMio) -> tuple[str, str]: 166 | """Return start and end of the measurement coverage time.""" 167 | t_bgn = self.fid.attrs["time_coverage_start"] 168 | if isinstance(t_bgn, bytes): 169 | # pylint: disable=no-member 170 | t_bgn = t_bgn.decode("ascii") 171 | 172 | t_end = self.fid.attrs["time_coverage_end"] 173 | if isinstance(t_end, bytes): 174 | # pylint: disable=no-member 175 | t_end = t_end.decode("ascii") 176 | return t_bgn, t_end 177 | 178 | def get_attr(self: OCMio, attr_name: str) -> np.ndarray | None: 179 | """Obtain value of an HDF5 file attribute. 180 | 181 | Parameters 182 | ---------- 183 | attr_name : string 184 | name of the attribute 185 | 186 | """ 187 | if attr_name in self.fid.attrs: 188 | return self.fid.attrs[attr_name] 189 | 190 | return None 191 | 192 | # ---------- Functions that only work after MSM selection ---------- 193 | def get_ref_time(self: OCMio) -> np.ndarray | None: 194 | """Return reference start time of measurements.""" 195 | if not self.__msm_path: 196 | return {} 197 | 198 | grp = self.fid[f"BAND{self.band}"] 199 | res = {} 200 | for msm in sorted(self.__msm_path): 201 | sgrp = grp[str(PurePosixPath(msm, "GEODATA"))] 202 | res[msm] = datetime(2010, 1, 1, 0, 0, 0) 203 | res[msm] += timedelta(seconds=int(sgrp["time"][0])) 204 | 205 | return res 206 | 207 | def get_delta_time(self: OCMio) -> np.ndarray | None: 208 | """Return offset from the reference start time of measurement.""" 209 | if not self.__msm_path: 210 | return {} 211 | 212 | grp = self.fid[f"BAND{self.band}"] 213 | res = {} 214 | for msm in sorted(self.__msm_path): 215 | sgrp = grp[str(PurePosixPath(msm, "GEODATA"))] 216 | res[msm] = sgrp["delta_time"][:].astype(int) 217 | 218 | return res 219 | 220 | def get_instrument_settings(self: OCMio) -> np.ndarray | None: 221 | """Return instrument settings of measurement.""" 222 | if not self.__msm_path: 223 | return {} 224 | 225 | grp = self.fid[f"BAND{self.band}"] 226 | res = {} 227 | for msm in sorted(self.__msm_path): 228 | sgrp = grp[str(PurePosixPath(msm, "INSTRUMENT"))] 229 | res[msm] = np.squeeze(sgrp["instrument_settings"]) 230 | 231 | return res 232 | 233 | def get_gse_stimuli(self: OCMio) -> np.ndarray | None: 234 | """Return GSE stimuli parameters.""" 235 | if not self.__msm_path: 236 | return {} 237 | 238 | grp = self.fid[f"BAND{self.band}"] 239 | res = {} 240 | for msm in sorted(self.__msm_path): 241 | sgrp = grp[str(PurePosixPath(msm, "INSTRUMENT"))] 242 | res[msm] = np.squeeze(sgrp["gse_stimuli"]) 243 | 244 | return res 245 | 246 | def get_exposure_time(self: OCMio) -> np.ndarray | None: 247 | """Return the exact pixel exposure time of the measurements.""" 248 | if not self.__msm_path: 249 | return None 250 | 251 | grp = self.fid[f"BAND{self.band}"] 252 | msm = self.__msm_path[0] # all measurement sets have the same ICID 253 | sgrp = grp[str(PurePosixPath(msm, "INSTRUMENT"))] 254 | instr = np.squeeze(sgrp["instrument_settings"]) 255 | 256 | if int(self.band) > 6: 257 | return 1.25e-6 * (65540 - instr["int_delay"] + instr["int_hold"]) 258 | 259 | return instr["exposure_time"] 260 | 261 | def get_housekeeping_data(self: OCMio) -> np.ndarray | None: 262 | """Return housekeeping data of measurements.""" 263 | if not self.__msm_path: 264 | return {} 265 | 266 | grp = self.fid[f"BAND{self.band}"] 267 | res = {} 268 | for msm in sorted(self.__msm_path): 269 | sgrp = grp[str(PurePosixPath(msm, "INSTRUMENT"))] 270 | res[msm] = np.squeeze(sgrp["housekeeping_data"]) 271 | 272 | return res 273 | 274 | # ------------------------- 275 | def select( 276 | self: OCMio, ic_id: int | None = None, *, msm_grp: str | None = None 277 | ) -> int: 278 | """Select a measurement as BAND%/ICID__GROUP_%. 279 | 280 | Parameters 281 | ---------- 282 | ic_id : int 283 | used as "BAND%/ICID_{}_GROUP_%".format(ic_id) 284 | msm_grp : str 285 | select measurement group with name msm_grp 286 | 287 | All measurements groups are shown when ic_id and msm_grp are None 288 | 289 | Returns 290 | ------- 291 | scalar 292 | Number of measurements found 293 | 294 | Notes 295 | ----- 296 | Updated object attributes: 297 | - bands : available spectral bands 298 | 299 | """ 300 | self.band = "" 301 | self.__msm_path = [] 302 | for ii in "87654321": 303 | if f"BAND{ii}" in self.fid: 304 | self.band = ii 305 | break 306 | 307 | if self.band: 308 | gid = self.fid[f"BAND{self.band}"] 309 | if msm_grp is not None and msm_grp in gid: 310 | self.__msm_path = [msm_grp] 311 | elif ic_id is None: 312 | grp_name = "ICID_" 313 | for kk in gid: 314 | if kk.startswith(grp_name): 315 | print(kk) 316 | else: 317 | grp_name = f"ICID_{ic_id:05}_GROUP" 318 | self.__msm_path = [s for s in gid if s.startswith(grp_name)] 319 | 320 | return len(self.__msm_path) 321 | 322 | # ------------------------- 323 | def get_msm_attr(self: OCMio, msm_dset: str, attr_name: str) -> str | None: 324 | """Return attribute of measurement dataset "msm_dset". 325 | 326 | Parameters 327 | ---------- 328 | msm_dset : str 329 | name of measurement dataset 330 | attr_name : str 331 | name of the attribute 332 | 333 | Returns 334 | ------- 335 | scalar or numpy.ndarray 336 | value of attribute "attr_name" 337 | 338 | """ 339 | if not self.__msm_path: 340 | return "" 341 | 342 | grp = self.fid[f"BAND{self.band}"] 343 | for msm_path in self.__msm_path: 344 | ds_path = str(PurePosixPath(msm_path, "OBSERVATIONS", msm_dset)) 345 | 346 | if attr_name in grp[ds_path].attrs: 347 | attr = grp[ds_path].attrs[attr_name] 348 | if isinstance(attr, bytes): 349 | return attr.decode("ascii") 350 | 351 | return attr 352 | 353 | return None 354 | 355 | # ------------------------- 356 | def get_msm_data( 357 | self: OCMio, 358 | msm_dset: str, 359 | fill_as_nan: bool = True, 360 | frames: list[int, int] | None = None, 361 | columns: list[int, int] | None = None, 362 | ) -> dict: 363 | """Return data of measurement dataset `msm_dset`. 364 | 365 | Parameters 366 | ---------- 367 | msm_dset : str 368 | name of measurement dataset 369 | if msm_dset is None then show names of available datasets 370 | 371 | columns : [i, j] 372 | Slice data on fastest axis (columns) as, from index 'i' to 'j' 373 | 374 | frames : [i, j] 375 | Slice data on slowest axis (time) as, from index 'i' to 'j' 376 | 377 | fill_as_nan : boolean 378 | replace (float) FillValues with Nan's 379 | 380 | Returns 381 | ------- 382 | dict 383 | Python dictionary with names of msm_groups as keys 384 | 385 | """ 386 | fillvalue = float.fromhex("0x1.ep+122") 387 | 388 | if not self.__msm_path: 389 | return {} 390 | 391 | # show HDF5 dataset names and return 392 | grp = self.fid[f"BAND{self.band}"] 393 | if msm_dset is None: 394 | ds_path = str(PurePosixPath(self.__msm_path[0], "OBSERVATIONS")) 395 | for kk in grp[ds_path]: 396 | print(kk) 397 | return {} 398 | 399 | # skip row257 from the SWIR detector 400 | rows = None 401 | if self.band in ("7", "8"): 402 | rows = [0, -1] 403 | 404 | # combine data of all measurement groups in dictionary 405 | res = {} 406 | for msm_grp in sorted(self.__msm_path): 407 | dset = grp[str(PurePosixPath(msm_grp, "OBSERVATIONS", msm_dset))] 408 | data_sel = () 409 | for ii in range(dset.ndim): 410 | dim_name = PurePosixPath(dset.dims[ii][0].name).name 411 | if dim_name == "msmt_time": 412 | if frames is None: 413 | data_sel += (slice(None),) 414 | else: 415 | data_sel += (slice(*frames),) 416 | elif dim_name == "row": 417 | if rows is None: 418 | data_sel += (slice(None),) 419 | else: 420 | data_sel += (slice(*rows),) 421 | elif dim_name == "column": 422 | if columns is None: 423 | data_sel += (slice(None),) 424 | else: 425 | data_sel += (slice(*columns),) 426 | else: 427 | raise ValueError 428 | 429 | # read data 430 | if dset.dtype == np.float32: 431 | data = np.squeeze(dset.astype(float)[data_sel]) 432 | else: 433 | data = np.squeeze(dset[data_sel]) 434 | 435 | if fill_as_nan and dset.attrs["_FillValue"] == fillvalue: 436 | data[(data == fillvalue)] = np.nan 437 | 438 | # add data to dictionary 439 | res[msm_grp] = data 440 | 441 | return res 442 | 443 | # ------------------------- 444 | def read_direct_msm( 445 | self: OCMio, 446 | msm_dset: str, 447 | dest_sel: tuple[slice | int] | None = None, 448 | dest_dtype: type[Any] | None = None, 449 | fill_as_nan: bool = False, 450 | ) -> dict | None: 451 | """Return data of measurement dataset `msm_dset` (fast implementation). 452 | 453 | Parameters 454 | ---------- 455 | msm_dset : string 456 | Name of measurement dataset 457 | dest_sel : numpy slice 458 | Selection must be the output of numpy.s_[]. 459 | dest_dtype : numpy dtype 460 | Perform type conversion 461 | fill_as_nan : boolean 462 | Replace (float) FillValues with Nan's, when True 463 | 464 | Returns 465 | ------- 466 | dict 467 | Python dictionary with names of msm_groups as keys 468 | 469 | """ 470 | fillvalue = float.fromhex("0x1.ep+122") 471 | 472 | if not self.__msm_path: 473 | return None 474 | 475 | if dest_sel is None: 476 | dest_sel = np.s_[...] 477 | 478 | # combine data of all measurement groups in dictionary 479 | res = {} 480 | for msm_grp in sorted(self.__msm_path): 481 | dset = self.fid[ 482 | str( 483 | PurePosixPath(f"BAND{self.band}", msm_grp, "OBSERVATIONS", msm_dset) 484 | ) 485 | ] 486 | 487 | if dest_dtype is None: 488 | buff = dset[dest_sel] 489 | else: 490 | buff = dset.astype(dest_dtype)[dest_sel] 491 | 492 | if fill_as_nan and dset.attrs["_FillValue"] == fillvalue: 493 | buff[(buff == fillvalue)] = np.nan 494 | 495 | # add data to dictionary 496 | res[msm_grp] = buff 497 | 498 | return res 499 | -------------------------------------------------------------------------------- /src/pys5p/rls.py: -------------------------------------------------------------------------------- 1 | # 2 | # This file is part of pyS5p 3 | # 4 | # https://github.com/rmvanhees/pys5p.git 5 | # 6 | # Copyright (c) 2017-2025 SRON 7 | # All Rights Reserved 8 | # 9 | # License: BSD-3-Clause 10 | """Implementation of the Relative Least-Squares regression (RLS). 11 | 12 | The RLS regression is used to find the linear dependence y(x) = c0 + c1 * x 13 | that best describes the data before and after correction, using absolute 14 | residuals y_i - (c0 + c1 * x_i) divided by the expected signals c1 * x_i in 15 | the least-squares sum. Offset c0 has an arbitrary size and should not affect 16 | the fit result. Weight factors are determined such to effectively spread the 17 | data points evenly over the whole range of x, making the result less 18 | sensitive to the actual spacing between the data points. 19 | """ 20 | 21 | from __future__ import annotations 22 | 23 | __all__ = ["rls_fit", "rls_fit0"] 24 | 25 | import numpy as np 26 | from numpy import ma 27 | 28 | 29 | def calc_ma_weights(xdata: np.ndarray, masked: np.ndarray) -> ma.MaskedArray: 30 | """Generate weight factor per pixel. 31 | 32 | Notes 33 | ----- 34 | It might be that np.apply_along_axis() is slightly faster, however, the 35 | for-loop 'row in buff' is also very efficient when using the specially 36 | designed MaskedArray 'buff' which we eventually use to store the weight 37 | values per pixel. 38 | 39 | """ 40 | buff = ma.array(np.repeat([xdata], masked.shape[0], axis=0), mask=masked) 41 | for row in buff: 42 | valid = ma.compressed(row) 43 | if len(valid) < 2: 44 | continue 45 | wght = np.concatenate( 46 | ( 47 | [2 * (valid[1] - valid[0])], 48 | valid[2:] - valid[0:-2], 49 | [2 * (valid[-1] - valid[-2])], 50 | ) 51 | ) 52 | row[~row.mask] = wght 53 | 54 | return buff 55 | 56 | 57 | def rls_fit(xdata: np.ndarray, ydata: np.ndarray | ma.MaskedArray) -> tuple: 58 | """Perform RLS regression finding linear dependence y(x) = c0 + c1 * x. 59 | 60 | Parameters 61 | ---------- 62 | xdata : ndarray, shape (M,) 63 | X-coordinates of the M sample points (xdata[i], ydata[..., i]) 64 | The array values have to be positive and increasing 65 | ydata : MaskedArray or ndarray, shape (..., M) 66 | Y-coordinates of the sample points 67 | 68 | Returns 69 | ------- 70 | c0, c1, std_c0, std_c1 : tuple of ndarrays 71 | coefficients of the linear dependence and their standard deviations 72 | 73 | Notes 74 | ----- 75 | Calling a rls-function with MaskedArrays is much slower than with 76 | plain ndarrays. 77 | 78 | The coefficients are set to NaN when the number of samples are less than 2. 79 | 80 | The standard deviations can only be calculated when the number of samples 81 | are larger than two, else the standard deviations are equal to zero. 82 | 83 | """ 84 | # pylint: disable=too-many-locals 85 | if xdata.size < 2: 86 | raise RuntimeError("too few sample points for a fit") 87 | if xdata.size != ydata.shape[-1]: 88 | raise RuntimeError("number of samples not equal for xdata, ydata") 89 | 90 | # perform all computations on 2 dimensional arrays 91 | img_shape = ydata.shape[:-1] 92 | yy1 = ydata.reshape(-1, xdata.size) 93 | 94 | # calculate weights 95 | if ma.isMaskedArray(ydata): 96 | wghts = calc_ma_weights(xdata, ma.getmaskarray(yy1)) 97 | else: 98 | buff = np.concatenate( 99 | ( 100 | [2 * (xdata[1] - xdata[0])], 101 | xdata[2:] - xdata[0:-2], 102 | [2 * (xdata[-1] - xdata[-2])], 103 | ) 104 | ) 105 | wghts = np.repeat([buff], yy1.shape[0], axis=0) 106 | wx1 = wghts / xdata 107 | wx2 = wghts / xdata**2 # is wx1 / xdata faster? 108 | 109 | # calculate the Q elements 110 | q00 = wghts.sum(axis=1) 111 | q01 = wx1.sum(axis=1) 112 | q02 = wx2.sum(axis=1) 113 | 114 | q11 = (wx1 * yy1).sum(axis=1) 115 | q12 = (wx2 * yy1).sum(axis=1) 116 | q22 = (wx2 * yy1**2).sum(axis=1) 117 | 118 | # calculate the Z elements 119 | zz1 = q00 * q02 - q01**2 120 | zz2 = q00 * q12 - q01 * q11 121 | zz3 = q02 * q11 - q01 * q12 122 | 123 | # calculate fit parameters and their uncertainties 124 | num = yy1.count(axis=1) if ma.isMaskedArray(ydata) else len(xdata) 125 | cc0 = zz2 / zz1 126 | cc1 = zz3 / zz1 127 | if ma.isMaskedArray(ydata): 128 | chi2 = ma.abs(q22 - q12 * cc0 - q11 * cc1) / np.clip(num - 2, 1, None) 129 | chi2[num <= 2] = 0 130 | sc0 = ma.sqrt(q00 * chi2 / zz1) 131 | sc1 = ma.sqrt(q02 * chi2 / zz1) 132 | 133 | return ( 134 | cc0.reshape(img_shape).filled(np.nan), 135 | cc1.reshape(img_shape).filled(np.nan), 136 | sc0.reshape(img_shape).filled(np.nan), 137 | sc1.reshape(img_shape).filled(np.nan), 138 | ) 139 | 140 | # using only non-MaskedArray functions 141 | chi2 = np.abs(q22 - q12 * cc0 - q11 * cc1) / np.clip(num - 2, 1, None) 142 | chi2[num <= 2] = 0 143 | sc0 = np.sqrt(q00 * chi2 / zz1) 144 | sc1 = np.sqrt(q02 * chi2 / zz1) 145 | 146 | return ( 147 | cc0.reshape(img_shape), 148 | cc1.reshape(img_shape), 149 | sc0.reshape(img_shape), 150 | sc1.reshape(img_shape), 151 | ) 152 | 153 | 154 | def rls_fit0(xdata: np.ndarray, ydata: np.ndarray | ma.MaskedArray) -> tuple: 155 | """Perform RLS regression finding linear dependence y(x) = c1 * x. 156 | 157 | Parameters 158 | ---------- 159 | xdata : ndarray, shape (M,) 160 | X-coordinates of the M sample points (xdata[i], ydata[..., i]) 161 | The array values have to be positive and increasing 162 | ydata : MaskedArray or ndarray, shape (..., M) 163 | Y-coordinates of the sample points 164 | 165 | Returns 166 | ------- 167 | c1, std_c1 : tuple of ndarrays 168 | coefficients of the linear dependence and their standard deviations 169 | 170 | Notes 171 | ----- 172 | The coefficients are set to NaN when the number of samples are less than 2. 173 | 174 | The standard deviations can only be calculated when the number of samples 175 | are larger than two, else the standard deviations are equal to zero. 176 | 177 | """ 178 | if xdata.size < 2: 179 | raise RuntimeError("too few points for a fit") 180 | if xdata.size != ydata.shape[-1]: 181 | raise RuntimeError("number of samples not equal for xdata, ydata") 182 | 183 | # perform all computations on 2 dimensional arrays 184 | img_shape = ydata.shape[:-1] 185 | yy1 = ydata.reshape(-1, xdata.size) 186 | 187 | # calculate weights 188 | if ma.isMaskedArray(ydata): 189 | wghts = calc_ma_weights(xdata, ma.getmaskarray(yy1)) 190 | else: 191 | buff = np.concatenate( 192 | ( 193 | [2 * (xdata[1] - xdata[0])], 194 | xdata[2:] - xdata[0:-2], 195 | [2 * (xdata[-1] - xdata[-2])], 196 | ) 197 | ) 198 | wghts = np.repeat([buff], yy1.shape[0], axis=0) 199 | wx1 = wghts / xdata 200 | wx2 = wghts / xdata**2 201 | 202 | # calculate the Q elements 203 | q00 = wghts.sum(axis=1) 204 | q11 = (wx1 * yy1).sum(axis=1) 205 | q22 = (wx2 * yy1**2).sum(axis=1) 206 | 207 | # calculate fit parameter and its variance 208 | num = yy1.count(axis=1) if ma.isMaskedArray(ydata) else len(xdata) 209 | cc1 = q11 / q00 210 | if ma.isMaskedArray(ydata): 211 | cc1[num < 1] = ma.masked 212 | chi2 = ma.abs(q22 - q00 * cc1**2) / np.clip(num - 1, 1, None) 213 | chi2[num <= 1] = ma.masked 214 | sc1 = ma.sqrt(chi2 / q00) 215 | return ( 216 | cc1.reshape(img_shape).filled(np.nan), 217 | sc1.reshape(img_shape).filled(np.nan), 218 | ) 219 | 220 | # using only non-MaskedArray functions 221 | cc1[num < 1] = np.nan 222 | chi2 = np.abs(q22 - q00 * cc1**2) / np.clip(num - 1, 1, None) 223 | chi2[num <= 1] = np.nan 224 | sc1 = np.sqrt(chi2 / q00) 225 | return cc1.reshape(img_shape), sc1.reshape(img_shape) 226 | -------------------------------------------------------------------------------- /src/pys5p/s5p_msm.py: -------------------------------------------------------------------------------- 1 | # 2 | # This file is part of pyS5p 3 | # 4 | # https://github.com/rmvanhees/pys5p.git 5 | # 6 | # Copyright (c) 2017-2025 SRON 7 | # All Rights Reserved 8 | # 9 | # License: BSD-3-Clause 10 | """`S5Pmsm`, class to read HDF5 datasets with its coordinates and attributes. 11 | 12 | .. warning:: Depreciated, this module is no longer maintained. 13 | """ 14 | 15 | from __future__ import annotations 16 | 17 | __all__ = ["S5Pmsm"] 18 | 19 | from copy import deepcopy 20 | from pathlib import PurePath 21 | from typing import NamedTuple 22 | 23 | import numpy as np 24 | from h5py import Dataset 25 | from moniplot.biweight import Biweight 26 | 27 | # The class S5Pmsm read HDF5 measurement data including its attributes and 28 | # dimensions. Initialization: 29 | # 30 | # S5Pmsm attribute | hdf5 dataset | Numpy array 31 | # ------------------------------------------------------------------------- 32 | # name | h5_dset.name | 'value' 33 | # value | h5_dset.value['value'] | np.squeeze(data) 34 | # | or h5_dset.value | 35 | # error | h5_dset.value['error'] | None 36 | # | or None | 37 | # coords | h5_dset.dims | [[['time',] 'row',] 'column'] 38 | # units | attrs['units'] | None 39 | # long_name | attrs['long_name'] | '' 40 | # fillvalue | h5_dset.fillvalue | None 41 | # coverage | None | None 42 | # 43 | # Limited to 3 dimensions 44 | 45 | 46 | # - local functions -------------------------------- 47 | def pad_rows(arr1: np.ndarray, arr2: np.ndarray) -> tuple[np.ndarray, np.ndarray]: 48 | """Pad the array with the least numer of rows with NaN's.""" 49 | if arr2.ndim == 2: 50 | if arr1.shape[0] < arr2.shape[0]: 51 | buff = arr1.copy() 52 | arr1 = np.full_like(arr2, np.nan) 53 | arr1[0 : buff.shape[0], :] = buff 54 | elif arr1.shape[0] > arr2.shape[0]: 55 | buff = arr2.copy() 56 | arr2 = np.full_like(arr1, np.nan) 57 | arr2[0 : buff.shape[0], :] = buff 58 | else: 59 | if arr1.shape[1] < arr2.shape[1]: 60 | buff = arr1.copy() 61 | arr1 = np.full_like(arr2, np.nan) 62 | arr1[:, 0 : buff.shape[1], :] = buff 63 | elif arr1.shape[1] > arr2.shape[1]: 64 | buff = arr2.copy() 65 | arr2 = np.full_like(arr1, np.nan) 66 | arr2[:, 0 : buff.shape[1], :] = buff 67 | 68 | return arr1, arr2 69 | 70 | 71 | # - class definition ------------------------------- 72 | class S5Pmsm: 73 | r"""A class to hold a HDF5 dataset and its attributes. 74 | 75 | Parameters 76 | ---------- 77 | dset : h5py.Dataset or ndarray 78 | h5py dataset from which the data is read, data is used to 79 | initialize S5Pmsm object 80 | data_sel : numpy slice 81 | a numpy slice generated for example `numpy.s\_` 82 | datapoint : bool 83 | to indicate that the dataset is a compound of type datapoint 84 | 85 | Returns 86 | ------- 87 | numpy structure with dataset data and attributes, including data, 88 | fillvalue, coordinates, units, ... 89 | 90 | """ 91 | 92 | def __init__( 93 | self: S5Pmsm, 94 | dset: Dataset | np.ndarray, 95 | data_sel: tuple[slice | int] | None = None, 96 | datapoint: bool = False, 97 | ) -> None: 98 | """Read measurement data from a Tropomi OCAL, ICM, of L1B product.""" 99 | # initialize object 100 | self.name = "value" 101 | self.value = None 102 | self.error = None 103 | self.coords = None 104 | self.coverage = None 105 | self.units = None 106 | self.long_name = "" 107 | self.fillvalue = None 108 | 109 | if isinstance(dset, Dataset): 110 | self.__from_h5_dset(dset, data_sel, datapoint) 111 | else: 112 | self.__from_ndarray(dset, data_sel) 113 | 114 | def __repr__(self: S5Pmsm) -> str: 115 | """Display info on the S5Pmsm object.""" 116 | res = [] 117 | for key, value in self.__dict__.items(): 118 | if key.startswith("__"): 119 | continue 120 | if isinstance(value, np.ndarray): 121 | res.append(f"{key}: {value.shape}") 122 | else: 123 | res.append(f"{key}: {value}") 124 | 125 | return "\n".join(res) 126 | 127 | def coord_name(self: S5Pmsm, axis: int) -> str: 128 | """Return name of coordinate.""" 129 | return self.coords._fields[axis] 130 | 131 | def coord_replace(self: S5Pmsm, key: str, dims: np.ndarray) -> NamedTuple: 132 | """Change values of a coordinate.""" 133 | return self.coords._replace(**{key: dims}) 134 | 135 | def __from_h5_dset( 136 | self: S5Pmsm, 137 | h5_dset: Dataset, 138 | data_sel: tuple[slice | int] | None, 139 | datapoint: bool, 140 | ) -> None: 141 | """Initialize S5Pmsm object from h5py dataset.""" 142 | self.name = PurePath(h5_dset.name).name 143 | 144 | # copy dataset values (and error) to object 145 | if data_sel is None: 146 | if datapoint: 147 | self.value = h5_dset["value"] 148 | self.error = h5_dset["error"] 149 | else: 150 | self.value = h5_dset[...] 151 | else: 152 | # we need to keep all dimensions to get the dimensions 153 | # of the output data right 154 | if datapoint: 155 | self.value = h5_dset["value"][data_sel] 156 | self.error = h5_dset["error"][data_sel] 157 | if isinstance(data_sel, tuple): 158 | for ii, elmnt in enumerate(data_sel): 159 | if isinstance(elmnt, int | np.int64): 160 | self.value = np.expand_dims(self.value, axis=ii) 161 | self.error = np.expand_dims(self.error, axis=ii) 162 | else: 163 | self.value = h5_dset[data_sel] 164 | if isinstance(data_sel, tuple): 165 | for ii, elmnt in enumerate(data_sel): 166 | if isinstance(elmnt, int | np.int64): 167 | self.value = np.expand_dims(self.value, axis=ii) 168 | 169 | # set default dimension names 170 | if h5_dset.ndim == 1: 171 | keys_default = ["column"] 172 | elif h5_dset.ndim == 2: 173 | keys_default = ["row", "column"] 174 | elif h5_dset.ndim == 3: 175 | keys_default = ["time", "row", "column"] 176 | else: 177 | raise ValueError("not implemented for ndim > 3") 178 | 179 | # copy all dimensions with size longer then 1 180 | keys = [] 181 | dims = [] 182 | for ii in range(h5_dset.ndim): 183 | if self.value.shape[ii] == 1: 184 | continue 185 | 186 | if len(h5_dset.dims[ii]) != 1: # bug in some KMNI HDF5 files 187 | keys.append(keys_default[ii]) 188 | dims.append(np.arange(self.value.shape[ii])) 189 | elif self.value.shape[ii] == h5_dset.shape[ii]: 190 | buff = PurePath(h5_dset.dims[ii][0].name).name 191 | if len(buff.split()) > 1: 192 | buff = buff.split()[0] 193 | keys.append(buff) 194 | if h5_dset.dims[ii][0][:].size == h5_dset.shape[ii]: 195 | buff = h5_dset.dims[ii][0][:] 196 | if np.all(buff == 0): 197 | buff = np.arange(buff.size) 198 | else: # bug in some KMNI HDF5 files 199 | buff = np.arange(h5_dset.shape[ii]) 200 | dims.append(buff) 201 | else: 202 | buff = PurePath(h5_dset.dims[ii][0].name).name 203 | if len(buff.split()) > 1: 204 | buff = buff.split()[0] 205 | keys.append(buff) 206 | if h5_dset.dims[ii][0][:].size == h5_dset.shape[ii]: 207 | buff = h5_dset.dims[ii][0][:] 208 | if np.all(buff == 0): 209 | buff = np.arange(buff.size) 210 | else: # bug in some KMNI HDF5 files 211 | buff = np.arange(h5_dset.shape[ii]) 212 | 213 | if isinstance(data_sel, slice): 214 | dims.append(buff[data_sel]) 215 | elif len(data_sel) == h5_dset.ndim: 216 | dims.append(buff[data_sel[ii]]) 217 | elif not isinstance(data_sel, tuple): 218 | dims.append(buff[data_sel]) 219 | elif ii > len(data_sel): 220 | dims.append(buff[data_sel[-1]]) 221 | else: 222 | dims.append(buff[data_sel[ii]]) 223 | 224 | # add dimensions as a namedtuple 225 | coords_namedtuple = NamedTuple("Coords", keys) 226 | self.coords = coords_namedtuple._make(dims) 227 | 228 | # remove all dimensions with size equal 1 from value (and error) 229 | self.value = np.squeeze(self.value) 230 | if datapoint: 231 | self.error = np.squeeze(self.error) 232 | 233 | # copy FillValue (same for value/error in a datapoint) 234 | if datapoint: 235 | self.fillvalue = h5_dset.fillvalue[0] 236 | else: 237 | self.fillvalue = h5_dset.fillvalue 238 | 239 | # copy its units 240 | if "units" in h5_dset.attrs: 241 | if isinstance(h5_dset.attrs["units"], np.ndarray): 242 | if h5_dset.attrs["units"].size == 1: 243 | self.units = h5_dset.attrs["units"][0] 244 | if isinstance(self.units, bytes): 245 | self.units = self.units.decode("ascii") 246 | else: 247 | self.units = h5_dset.attrs["units"] 248 | if isinstance(self.units[0], bytes): 249 | self.units = self.units.astype(str) 250 | else: 251 | self.units = h5_dset.attrs["units"] 252 | if isinstance(self.units, bytes): 253 | self.units = self.units.decode("ascii") 254 | 255 | # copy its long_name 256 | if "long_name" in h5_dset.attrs: 257 | if isinstance(h5_dset.attrs["long_name"], bytes): 258 | self.long_name = h5_dset.attrs["long_name"].decode("ascii") 259 | else: 260 | self.long_name = h5_dset.attrs["long_name"] 261 | 262 | def __from_ndarray( 263 | self: S5Pmsm, data: np.ndarray, data_sel: tuple[slice | int] | None 264 | ) -> None: 265 | """Initialize S5Pmsm object from a ndarray.""" 266 | # copy dataset values (and error) to object 267 | if data_sel is None: 268 | self.value = np.squeeze(data) 269 | else: 270 | self.value = np.squeeze(data[data_sel]) 271 | 272 | # define coordinates 273 | dims = [np.arange(sz) for sz in self.value.shape] 274 | try: 275 | self.set_coords(dims, coords_name=None) 276 | except Exception as exc: 277 | raise RuntimeError("failed to set the coordinates") from exc 278 | 279 | def copy(self: S5Pmsm) -> S5Pmsm: 280 | """Return a deep copy of the current object.""" 281 | return deepcopy(self) 282 | 283 | def set_coords( 284 | self: S5Pmsm, 285 | coords_data: list[np.ndarray], 286 | coords_name: list[str] | None = None, 287 | ) -> None: 288 | """Set coordinates of data. 289 | 290 | Parameters 291 | ---------- 292 | coords_data : list of ndarrays 293 | list with coordinates data for each dimension 294 | coords_name : list of strings 295 | list with the names of each dimension 296 | 297 | """ 298 | if coords_name is None: 299 | if len(coords_data) == 1: 300 | keys = ["column"] 301 | elif len(coords_data) == 2: 302 | keys = ["row", "column"] 303 | elif len(coords_data) == 3: 304 | keys = ["time", "row", "column"] 305 | else: 306 | raise ValueError("not implemented for ndim > 3") 307 | else: 308 | keys = [coords_name] if isinstance(coords_name, str) else coords_name 309 | 310 | # add dimensions as a namedtuple 311 | coords_namedtuple = NamedTuple("Coords", keys) 312 | self.coords = coords_namedtuple._make(coords_data) 313 | 314 | def set_coverage( 315 | self: S5Pmsm, coverage: tuple[str, str], force: bool = False 316 | ) -> None: 317 | """Set the coverage attribute, as (coverageStart, coverageEnd). 318 | 319 | Parameters 320 | ---------- 321 | coverage : tuple[str, str] 322 | new value for the coverage attribute 323 | force : bool, default=False 324 | overwrite when force is true 325 | 326 | Notes 327 | ----- 328 | Both elements are expected to be datatime objects. 329 | 330 | """ 331 | if self.coverage is None or force: 332 | self.coverage = coverage 333 | 334 | def set_units(self: S5Pmsm, units: str | None, force: bool = False) -> None: 335 | """Set the units attribute, overwrite when force is true.""" 336 | if self.units is None or force: 337 | self.units = units 338 | 339 | def set_fillvalue(self: S5Pmsm) -> None: 340 | """Set fillvalue to KNMI undefined.""" 341 | if ( 342 | np.issubdtype(self.value.dtype, np.floating) and self.fillvalue is None 343 | ) or self.fillvalue == 0.0: 344 | self.fillvalue = float.fromhex("0x1.ep+122") 345 | 346 | def set_long_name(self: S5Pmsm, name: str, force: bool = False) -> None: 347 | """Set the long_name attribute, overwrite when force is true.""" 348 | if force or not self.long_name: 349 | self.long_name = name 350 | 351 | def fill_as_nan(self: S5Pmsm) -> None: 352 | """Replace fillvalues in data with NaN's. 353 | 354 | Works only on datasets with HDF5 datatype 'float' or 'datapoints' 355 | """ 356 | if self.fillvalue == float.fromhex("0x1.ep+122"): 357 | self.value[(self.value == self.fillvalue)] = np.nan 358 | if self.error is not None: 359 | self.error[(self.error == self.fillvalue)] = np.nan 360 | 361 | def sort(self: S5Pmsm, axis: int = 0) -> None: 362 | """Sort data and its coordinate along a given axis. 363 | 364 | Parameters 365 | ---------- 366 | axis : int, default=0 367 | axis for which the array will be sorted. 368 | 369 | """ 370 | if not isinstance(axis, int): 371 | raise TypeError("axis not an integer") 372 | if not 0 <= axis < self.value.ndim: 373 | raise ValueError("axis out-of-range") 374 | 375 | indx = np.argsort(self.coords[axis][:]) 376 | self.coords[axis][:] = self.coords[axis][indx] 377 | 378 | if axis == 0: 379 | self.value = self.value[indx, ...] 380 | if self.error is not None: 381 | if isinstance(self.error, list): 382 | self.error = (self.error[0][indx, ...], self.error[1][indx, ...]) 383 | else: 384 | self.error = self.error[indx, :] 385 | elif axis == 1: 386 | self.value = self.value[:, indx, ...] 387 | if self.error is not None: 388 | if isinstance(self.error, list): 389 | self.error = (self.error[0][:, indx, :], self.error[1][:, indx, :]) 390 | else: 391 | self.error = self.error[:, indx, :] 392 | elif axis == 2: 393 | self.value = self.value[:, :, indx] 394 | if self.error is not None: 395 | if isinstance(self.error, list): 396 | self.error = (self.error[0][:, :, indx], self.error[1][:, :, indx]) 397 | else: 398 | self.error = self.error[:, :, indx] 399 | else: 400 | raise ValueError("S5Pmsm: implemented for ndim <= 3") 401 | 402 | def concatenate(self: S5Pmsm, msm: S5Pmsm, axis: int = 0) -> S5Pmsm: 403 | """Concatenate two measurement datasets, the current with another. 404 | 405 | Parameters 406 | ---------- 407 | msm : pys5p.S5Pmsm 408 | an S5Pmsm object 409 | axis : int, default=0 410 | The axis for which the array will be joined. 411 | 412 | Returns 413 | ------- 414 | The data of the new dataset is concatenated to the existing data along 415 | an existing axis. The affected coordinate is also extended. 416 | 417 | Note: 418 | - The arrays must have the same shape, except in the dimension 419 | corresponding to axis (the first, by default). 420 | 421 | """ 422 | if self.name != PurePath(msm.name).name: 423 | raise TypeError("combining dataset with different name") 424 | 425 | # all but the last 2 dimensions have to be equal 426 | if self.value.shape[:-2] != msm.value.shape[:-2]: 427 | raise TypeError("all but the last 2 dimensions should be equal") 428 | 429 | if (self.error is None and msm.error is not None) or ( 430 | self.error is not None and msm.error is None 431 | ): 432 | raise RuntimeError("S5Pmsm: combining non-datapoint and datapoint") 433 | 434 | # concatenate the values 435 | if axis == 0: 436 | self.value = np.concatenate((self.value, msm.value), axis=axis) 437 | elif axis == 1: 438 | if self.value.shape[0] == msm.value.shape[0]: 439 | self.value = np.concatenate((self.value, msm.value), axis=axis) 440 | else: 441 | self.value = np.concatenate(pad_rows(self.value, msm.value), axis=axis) 442 | elif axis == 2: 443 | if self.value.shape[1] == msm.value.shape[1]: 444 | self.value = np.concatenate((self.value, msm.value), axis=axis) 445 | else: 446 | self.value = np.concatenate(pad_rows(self.value, msm.value), axis=axis) 447 | else: 448 | raise ValueError("S5Pmsm: implemented for ndim <= 3") 449 | 450 | # concatenate the errors 451 | if self.error is not None and msm.error is not None: 452 | if axis == 0: 453 | self.error = np.concatenate((self.error, msm.error), axis=axis) 454 | elif axis == 1: 455 | if self.value.shape[0] == msm.value.shape[0]: 456 | self.error = np.concatenate((self.error, msm.error), axis=axis) 457 | else: 458 | self.error = np.concatenate( 459 | pad_rows(self.error, msm.error), axis=axis 460 | ) 461 | elif axis == 2: 462 | if self.value.shape[1] == msm.value.shape[1]: 463 | self.error = np.concatenate((self.error, msm.error), axis=axis) 464 | else: 465 | self.error = np.concatenate( 466 | pad_rows(self.error, msm.error), axis=axis 467 | ) 468 | 469 | # now extent coordinate of the fastest axis 470 | key = self.coord_name(axis) 471 | if msm.coords[axis][0] == 0: 472 | dims = np.concatenate( 473 | (self.coords[axis], len(self.coords[axis]) + msm.coords[axis]) 474 | ) 475 | else: 476 | dims = np.concatenate((self.coords[axis], msm.coords[axis])) 477 | self.coords = self.coord_replace(key, dims) 478 | return self 479 | 480 | def nanpercentile( 481 | self: S5Pmsm, 482 | vperc: int | list[float], 483 | data_sel: tuple[slice | int] | None = None, 484 | axis: int = 0, 485 | keepdims: bool = False, 486 | ) -> S5Pmsm: 487 | r"""Return percentile(s) of the data in the S5Pmsm. 488 | 489 | Parameters 490 | ---------- 491 | vperc : list 492 | range to normalize luminance data between percentiles min and max of 493 | array data. 494 | data_sel : numpy slice 495 | A numpy slice generated for example `numpy.s\_`. Can be used to skip 496 | the first and/or last frame 497 | axis : int, default=0 498 | Axis or axes along which the medians are computed. 499 | keepdims : bool, default=False 500 | If this is set to True, the axes which are reduced are left in the 501 | result as dimensions with size one. With this option, the result 502 | will broadcast correctly against the original arr. 503 | 504 | Returns 505 | ------- 506 | S5Pmsm object with the original data replaced by the percentiles along 507 | one of the axis, see below. The coordinates are adjusted, accordingly. 508 | 509 | You should at least supply one percentile and at most three. 510 | vperc is instance 'int' or len(vperc) == 1: 511 | 'value' is replaced by its (nan-)percentile vperc 512 | 'error' is unchanged 513 | len(vperc) == 2: 514 | 'vperc' is sorted 515 | 'value' is replaced by its (nan-)median 516 | 'error' is replaced by percentile('value', (vperc[0], vperc[1])) 517 | len(vperc) == 3: 518 | 'vperc' is sorted 519 | 'value' is replaced by percentile('value', vperc[1]) 520 | 'error' is replaced by percentile('value', (vperc[0], vperc[2])) 521 | 522 | """ 523 | if isinstance(vperc, int): 524 | vperc = (vperc,) 525 | else: 526 | if len(vperc) == 2: 527 | vperc += (50,) 528 | # make sure that the values are sorted 529 | vperc = tuple(sorted(vperc)) 530 | 531 | if len(vperc) != 1 and len(vperc) != 3: 532 | raise TypeError("dimension vperc must be 1 or 3") 533 | 534 | if data_sel is None: 535 | if self.value.size <= 1 or self.value.ndim <= axis: 536 | return self 537 | perc = np.nanpercentile(self.value, vperc, axis=axis, keepdims=keepdims) 538 | else: 539 | if self.value[data_sel].size <= 1 or self.value[data_sel].ndim <= axis: 540 | return self 541 | perc = np.nanpercentile( 542 | self.value[data_sel], vperc, axis=axis, keepdims=keepdims 543 | ) 544 | if len(vperc) == 3: 545 | self.value = perc[1, ...] 546 | self.error = [perc[0, ...], perc[2, ...]] 547 | else: 548 | self.value = perc[0, ...] 549 | 550 | # adjust the coordinates 551 | if keepdims: 552 | key = self.coord_name(axis) 553 | if self.coords[axis][0] == 0: 554 | dims = [0] 555 | else: 556 | dims = np.median(self.coords[axis], keepdims=keepdims) 557 | self.coords = self.coord_replace(key, dims) 558 | else: 559 | keys = [] 560 | dims = [] 561 | for ii in range(self.value.ndim + 1): 562 | if ii != axis: 563 | keys.append(self.coord_name(ii)) 564 | dims.append(self.coords[ii][:]) 565 | coords_namedtuple = NamedTuple("Coords", keys) 566 | self.coords = coords_namedtuple._make(dims) 567 | 568 | return self 569 | 570 | def biweight( 571 | self: S5Pmsm, 572 | data_sel: tuple[slice | int] | None = None, 573 | axis: int = 0, 574 | keepdims: bool = False, 575 | ) -> S5Pmsm: 576 | r"""Reduce this S5Pmsm data by applying biweight along some dimension. 577 | 578 | Parameters 579 | ---------- 580 | data_sel : numpy slice 581 | A numpy slice generated for example `numpy.s\_`. Can be used to skip 582 | the first and/or last frame 583 | axis : int, default=0 584 | Axis or axes along which the medians are computed. 585 | keepdims : bool, default=False 586 | If this is set to True, the axes which are reduced are left in the 587 | result as dimensions with size one. With this option, the result 588 | will broadcast correctly against the original arr. 589 | 590 | Returns 591 | ------- 592 | S5Pmsm object with its data (value & error) replaced by its biweight 593 | medians along one axis. The coordinates are adjusted, accordingly. 594 | 595 | """ 596 | if data_sel is None: 597 | if self.error is not None: 598 | self.value = Biweight(self.value, axis=axis).median 599 | self.error = Biweight(self.error, axis=axis).median 600 | else: 601 | biwght = Biweight(self.value, axis=axis) 602 | self.value = biwght.median 603 | self.error = biwght.spread 604 | else: 605 | if self.error is not None: 606 | self.value = Biweight(self.value[data_sel], axis=axis).median 607 | self.error = Biweight(self.error[data_sel], axis=axis).spread 608 | else: 609 | biwght = Biweight(self.value[data_sel], axis=axis) 610 | self.value = biwght.median 611 | self.error = biwght.spread 612 | if keepdims: 613 | self.value = np.expand_dims(self.value, axis=axis) 614 | self.error = np.expand_dims(self.error, axis=axis) 615 | 616 | # adjust the coordinates 617 | if keepdims: 618 | key = self.coord_name(axis) 619 | if self.coords[axis][0] == 0: 620 | dims = [0] 621 | else: 622 | dims = np.median(self.coords[axis], keepdims=keepdims) 623 | self.coords = self.coord_replace(key, dims) 624 | else: 625 | keys = [] 626 | dims = [] 627 | for ii in range(self.value.ndim + 1): 628 | if ii != axis: 629 | keys.append(self.coord_name(ii)) 630 | dims.append(self.coords[ii][:]) 631 | coords_namedtuple = NamedTuple("Coords", keys) 632 | self.coords = coords_namedtuple._make(dims) 633 | 634 | return self 635 | 636 | def nanmedian( 637 | self: S5Pmsm, 638 | data_sel: tuple[slice | int] | None = None, 639 | axis: int = 0, 640 | keepdims: bool = False, 641 | ) -> S5Pmsm: 642 | r"""Reduce this S5Pmsm data by applying median along some dimension. 643 | 644 | Parameters 645 | ---------- 646 | data_sel : numpy slice, optional 647 | A numpy slice generated for example `numpy.s\_`. 648 | Can be used to skip the first and/or last frame 649 | axis : int, default=0 650 | Axis or axes along which the medians are computed. 651 | keepdims : bool, default=False 652 | If this is set to True, the axes which are reduced are left in the 653 | result as dimensions with size one. With this option, the result 654 | will broadcast correctly against the original arr. 655 | 656 | Returns 657 | ------- 658 | S5Pmsm object with its data (value & error) replaced by its nanmedian 659 | and standard deviation along one axis. 660 | The coordinates are adjusted, accordingly. 661 | 662 | """ 663 | if data_sel is None: 664 | if self.error is not None: 665 | self.error = np.nanmedian(self.error, axis=axis, keepdims=keepdims) 666 | else: 667 | self.error = np.nanstd(self.value, ddof=1, axis=axis, keepdims=keepdims) 668 | self.value = np.nanmedian(self.value, axis=axis, keepdims=keepdims) 669 | else: 670 | if self.error is not None: 671 | self.error = np.nanmedian( 672 | self.error[data_sel], axis=axis, keepdims=keepdims 673 | ) 674 | else: 675 | self.error = np.nanstd( 676 | self.value[data_sel], ddof=1, axis=axis, keepdims=keepdims 677 | ) 678 | self.value = np.nanmedian( 679 | self.value[data_sel], axis=axis, keepdims=keepdims 680 | ) 681 | 682 | # adjust the coordinates 683 | if keepdims: 684 | key = self.coord_name(axis) 685 | if self.coords[axis][0] == 0: 686 | dims = [0] 687 | else: 688 | dims = np.median(self.coords[axis], keepdims=keepdims) 689 | self.coords = self.coord_replace(key, dims) 690 | else: 691 | keys = [] 692 | dims = [] 693 | for ii in range(self.value.ndim + 1): 694 | if ii != axis: 695 | keys.append(self.coord_name(ii)) 696 | dims.append(self.coords[ii][:]) 697 | coords_namedtuple = NamedTuple("Coords", keys) 698 | self.coords = coords_namedtuple._make(dims) 699 | 700 | return self 701 | 702 | def nanmean( 703 | self: S5Pmsm, 704 | data_sel: tuple[slice | int] | None = None, 705 | axis: int = 0, 706 | keepdims: bool = False, 707 | ) -> S5Pmsm: 708 | r"""Reduce this S5Pmsm data by applying mean along some dimension. 709 | 710 | Parameters 711 | ---------- 712 | data_sel : numpy slice, optional 713 | A numpy slice generated for example `numpy.s\_`. 714 | Can be used to skip the first and/or last frame 715 | axis : int, default=0 716 | Axis or axes along which the mean are computed. 717 | keepdims : bool, default=False 718 | If this is set to True, the axes which are reduced are left in the 719 | result as dimensions with size one. With this option, the result 720 | will broadcast correctly against the original arr. 721 | 722 | Returns 723 | ------- 724 | S5Pmsm object with its data (value & error) replaced by its nanmean 725 | and standard deviation along one axis. 726 | The coordinates are adjusted, accordingly. 727 | 728 | """ 729 | if data_sel is None: 730 | if self.error is not None: 731 | self.error = np.nanmean(self.error, axis=axis, keepdims=keepdims) 732 | else: 733 | self.error = np.nanstd(self.value, ddof=1, axis=axis, keepdims=keepdims) 734 | self.value = np.nanmean(self.value, axis=axis, keepdims=keepdims) 735 | else: 736 | if self.error is not None: 737 | self.error = np.nanmean( 738 | self.error[data_sel], axis=axis, keepdims=keepdims 739 | ) 740 | else: 741 | self.error = np.nanstd( 742 | self.value[data_sel], ddof=1, axis=axis, keepdims=keepdims 743 | ) 744 | self.value = np.nanmean(self.value[data_sel], axis=axis, keepdims=keepdims) 745 | 746 | # adjust the coordinates 747 | if keepdims: 748 | key = self.coord_name(axis) 749 | if self.coords[axis][0] == 0: 750 | dims = [0] 751 | else: 752 | dims = np.mean(self.coords[axis], keepdims=keepdims) 753 | self.coords = self.coord_replace(key, dims) 754 | else: 755 | keys = [] 756 | dims = [] 757 | for ii in range(self.value.ndim + 1): 758 | if ii != axis: 759 | keys.append(self.coord_name(ii)) 760 | dims.append(self.coords[ii][:]) 761 | coords_namedtuple = NamedTuple("Coords", keys) 762 | self.coords = coords_namedtuple._make(dims) 763 | 764 | return self 765 | 766 | def transpose(self: S5Pmsm) -> S5Pmsm: 767 | """Transpose data and coordinates of an S5Pmsm object.""" 768 | if self.value.ndim <= 1: 769 | return self 770 | 771 | if self.error is not None: 772 | self.error = np.transpose(self.error) 773 | self.value = np.transpose(self.value) 774 | 775 | keys = [] 776 | dims = [] 777 | for ii in range(self.value.ndim): 778 | keys.append(self.coord_name(ii)) 779 | dims.append(self.coords[ii][:]) 780 | tmp = keys[1] 781 | keys[1] = keys[0] 782 | keys[0] = tmp 783 | tmp = dims[1] 784 | dims[1] = dims[0] 785 | dims[0] = tmp 786 | coords_namedtuple = NamedTuple("Coords", keys) 787 | self.coords = coords_namedtuple._make(dims) 788 | 789 | return self 790 | -------------------------------------------------------------------------------- /src/pys5p/swir_region.py: -------------------------------------------------------------------------------- 1 | # This file is part of pyS5p 2 | # 3 | # https://github.com/rmvanhees/pys5p.git 4 | # 5 | # Copyright (c) 2017-2025 SRON 6 | # All Rights Reserved 7 | # 8 | # License: BSD-3-Clause 9 | """Return the usable area on the SWIR detector. 10 | 11 | There are two definitions:: 12 | 13 | 'illuminated': 14 | Detector area illuminated by external sources, defined as 15 | a rectangular area where the signal is at least 50% of the 16 | maximum signal. Coordinates: rows [11:228], columns [16:991]. 17 | 18 | 'level2': 19 | A smaller area used in official SWIR level 1B (ir)radiance 20 | products. Coordinates: rows [12:227], columns [20:980]. 21 | 22 | Notes 23 | ----- 24 | Row 257 of the SWIR detector is neglected. 25 | 26 | """ 27 | 28 | __all__ = ["coords", "mask"] 29 | 30 | import numpy as np 31 | 32 | 33 | def coords(mode: str = "illuminated", band: str = "78") -> tuple[slice, slice]: 34 | """Return slice defining the illuminated region on the SWIR detector. 35 | 36 | Parameters 37 | ---------- 38 | mode : {'illuminated', 'level2'}, optional 39 | default is 'illuminated' 40 | band : str, optional 41 | select band 7 or 8, default is both bands 42 | 43 | """ 44 | if mode == "level2": 45 | if band == "7": 46 | return np.s_[12:227, 20:500] 47 | if band == "8": 48 | return np.s_[12:227, :480] 49 | # else 50 | return np.s_[12:227, 20:980] 51 | 52 | if band == "7": 53 | return np.s_[11:228, 16:500] 54 | if band == "8": 55 | return np.s_[11:228, :491] 56 | # else 57 | return np.s_[11:228, 16:991] 58 | 59 | 60 | def mask(mode: str = "illuminated", band: str = "78") -> np.ndarray: 61 | """Return mask of the illuminated region. 62 | 63 | Parameters 64 | ---------- 65 | mode : {'illuminated', 'level2'}, optional 66 | default is 'illuminated' 67 | band : str, optional 68 | select band 7 or 8, default is both bands 69 | 70 | Notes 71 | ----- 72 | Pixels within the illuminated region are set to True. 73 | 74 | """ 75 | if band in ("7", "8"): 76 | res = np.full((256, 500), False) 77 | else: 78 | res = np.full((256, 1000), False) 79 | 80 | res[coords(mode, band)] = True 81 | 82 | return res 83 | -------------------------------------------------------------------------------- /src/pys5p/swir_texp.py: -------------------------------------------------------------------------------- 1 | # This file is part of pyS5p 2 | # 3 | # https://github.com/rmvanhees/pys5p.git 4 | # 5 | # Copyright (c) 2017-2025 SRON 6 | # All Rights Reserved 7 | # 8 | # License: BSD-3-Clause 9 | """Calculate the Tropomi SWIR exposure time from detector settings.""" 10 | 11 | 12 | def swir_exp_time(int_delay: int, int_hold: int) -> float: 13 | """Calculate the correct SWIR exposure time from detector settings. 14 | 15 | Parameters 16 | ---------- 17 | int_delay : int 18 | parameters int_delay from the instrument_settings 19 | int_hold : int 20 | parameters int_holt from the instrument_settings 21 | 22 | Returns 23 | ------- 24 | float 25 | exact (SWIR) pixel exposure time 26 | 27 | """ 28 | return 1.25e-6 * (65540 - int_delay + int_hold) 29 | -------------------------------------------------------------------------------- /src/pys5p/version.py: -------------------------------------------------------------------------------- 1 | # This file is part of pyS5p 2 | # 3 | # https://github.com/rmvanhees/pys5p.git 4 | # 5 | # Copyright (c) 2017-2025 SRON 6 | # All Rights Reserved 7 | # 8 | # License: BSD-3-Clause 9 | """Provide access to the software version as obtained from git.""" 10 | 11 | __all__ = ["pys5p_version"] 12 | 13 | from . import __version__ 14 | 15 | 16 | def pys5p_version(full: bool = False, githash: bool = False) -> str: 17 | """Return the software version as obtained from git. 18 | 19 | Examples 20 | -------- 21 | Show the software version of the module pys5p:: 22 | 23 | > from pys5p.version import pys5p_version 24 | > pys5p_version() 25 | '2.1.5' 26 | 27 | """ 28 | if full: 29 | return __version__ 30 | 31 | if githash: 32 | return __version__.split("+g")[1].split(".")[0] 33 | 34 | return __version__.split("+")[0] 35 | --------------------------------------------------------------------------------