├── xbpch ├── util │ ├── __init__.py │ ├── diaginfo.py │ ├── cf.py │ └── gridspec.py ├── __init__.py ├── common.py ├── uff.py ├── grid.py ├── bpch.py └── core.py ├── MANIFEST.in ├── doc ├── dask_graphs │ ├── sample_read.png │ └── sample_normalized.png ├── example_plots │ ├── cartopy_example.png │ └── cartopy_seasonal_facet.png ├── environment.yml ├── reading.rst ├── Makefile ├── make.bat ├── index.rst ├── installation.rst ├── quick_start.rst ├── conf.py └── usage.rst ├── readthedocs.yml ├── ci ├── environment-py27.yml ├── environment-py35.yml └── environment-py36.yml ├── LICENSE ├── RELEASE_GUIDE.rst ├── .gitignore ├── setup.py ├── scripts └── bpch_to_nc └── README.rst /xbpch/util/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include LICENSE 2 | recursive-include doc * 3 | prune doc/_build 4 | global-exclude .DS_Store -------------------------------------------------------------------------------- /doc/dask_graphs/sample_read.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darothen/xbpch/HEAD/doc/dask_graphs/sample_read.png -------------------------------------------------------------------------------- /readthedocs.yml: -------------------------------------------------------------------------------- 1 | conda: 2 | file: doc/environment.yml 3 | python: 4 | version: 3 5 | setup_py_install: true -------------------------------------------------------------------------------- /doc/dask_graphs/sample_normalized.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darothen/xbpch/HEAD/doc/dask_graphs/sample_normalized.png -------------------------------------------------------------------------------- /doc/example_plots/cartopy_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darothen/xbpch/HEAD/doc/example_plots/cartopy_example.png -------------------------------------------------------------------------------- /doc/example_plots/cartopy_seasonal_facet.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darothen/xbpch/HEAD/doc/example_plots/cartopy_seasonal_facet.png -------------------------------------------------------------------------------- /xbpch/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | try: 3 | from . version import __version__ 4 | except: 5 | pass 6 | 7 | from . bpch import BPCHFile 8 | from . core import open_bpchdataset, open_mfbpchdataset -------------------------------------------------------------------------------- /doc/environment.yml: -------------------------------------------------------------------------------- 1 | name: xbpch 2 | channels: 3 | - conda-forge 4 | dependencies: 5 | - python=3.5 6 | - dask>=0.14 7 | - xarray>=0.9 8 | - pandas 9 | - ipython 10 | - future 11 | - cartopy 12 | - pyproj 13 | - matplotlib 14 | - numpydoc 15 | - sphinx 16 | -------------------------------------------------------------------------------- /ci/environment-py27.yml: -------------------------------------------------------------------------------- 1 | name: test_xbpch 2 | channels: 3 | - conda-forge 4 | dependencies: 5 | - python=2.7 6 | - cython 7 | - dask>=0.14 8 | - future 9 | - numpy 10 | - pytest 11 | - xarray>=0.9 12 | - pip: 13 | - codecov 14 | - pytest-cov -------------------------------------------------------------------------------- /ci/environment-py35.yml: -------------------------------------------------------------------------------- 1 | name: test_xbpch 2 | channels: 3 | - conda-forge 4 | - defaults 5 | dependencies: 6 | - python=3.5 7 | - cython 8 | - dask>=0.14 9 | - future 10 | - numpy 11 | - pytest 12 | - xarray>=0.12 13 | - pip: 14 | - codecov 15 | - pytest-cov -------------------------------------------------------------------------------- /ci/environment-py36.yml: -------------------------------------------------------------------------------- 1 | name: test_xbpch 2 | channels: 3 | - conda-forge 4 | - defaults 5 | dependencies: 6 | - python=3.6 7 | - cython 8 | - dask>=0.14 9 | - future 10 | - numpy 11 | - pytest 12 | - xarray>=0.12 13 | - pip: 14 | - codecov 15 | - pytest-cov -------------------------------------------------------------------------------- /doc/reading.rst: -------------------------------------------------------------------------------- 1 | 2 | Reading BPCH Files 3 | ================== 4 | 5 | **xbpch** provides three main utilities for reading bpch files, all of which 6 | are provided as top-level package imports. For most purposes, you should use 7 | ``open_bpchdataset()``, however a lower-level interface, ``BPCHFile()`` is also 8 | provided in case you would prefer manually processing the bpch contents. 9 | 10 | See :doc:`/usage` for more details. 11 | 12 | .. autofunction:: xbpch.open_bpchdataset 13 | 14 | .. autofunction:: xbpch.open_mfbpchdataset 15 | 16 | .. autoclass:: xbpch.BPCHFile 17 | :members: 18 | :private-members: 19 | :special-members: -------------------------------------------------------------------------------- /doc/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SPHINXPROJ = xbpch 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) -------------------------------------------------------------------------------- /doc/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | set SPHINXPROJ=xbpch 13 | 14 | if "%1" == "" goto help 15 | 16 | %SPHINXBUILD% >NUL 2>NUL 17 | if errorlevel 9009 ( 18 | echo. 19 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 20 | echo.installed, then set the SPHINXBUILD environment variable to point 21 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 22 | echo.may add the Sphinx directory to PATH. 23 | echo. 24 | echo.If you don't have Sphinx installed, grab it from 25 | echo.http://sphinx-doc.org/ 26 | exit /b 1 27 | ) 28 | 29 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 30 | goto end 31 | 32 | :help 33 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 34 | 35 | :end 36 | popd 37 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Daniel Rothenberg 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /doc/index.rst: -------------------------------------------------------------------------------- 1 | 2 | xbpch 3 | ===== 4 | 5 | .. image:: https://badge.fury.io/py/xbpch.svg 6 | :target: https://badge.fury.io/py/xbpch 7 | :alt: PyPI version 8 | .. image:: https://readthedocs.org/projects/xbpch/badge/?version=latest 9 | :target: http://xbpch.readthedocs.io/en/latest/?badge=latest 10 | :alt: Documentation Status 11 | .. image:: https://zenodo.org/badge/89022822.svg 12 | :target: https://zenodo.org/badge/latestdoi/89022822 13 | :alt: Zenodo DOI 14 | 15 | **xpbch** is a simple utility for reading the proprietary 16 | `binary punch format (bpch) outputs `_ 17 | used in versions of GEOS-Chem_ earlier than v11-02. The utility allows a user 18 | to load this data into an xarray_- and dask_-powered workflow without 19 | necessarily pre-processing the data using GAMAP or IDL. This opens the door 20 | to out-of-core and parallel processing of GEOS-Chem_ output. 21 | 22 | .. toctree:: 23 | :maxdepth: 2 24 | 25 | installation 26 | quick_start 27 | usage 28 | reading 29 | 30 | Recent Updates 31 | -------------- 32 | 33 | **v0.3.5 (May 19, 2019)** 34 | 35 | - Fixed incompatibility with xarray arising from backward incompatible changes with Python 2.7 36 | 37 | **v0.3.4 (January 20, 2019)** 38 | 39 | - Miscellaneous fixes for issues filed on GitHub Issue Tracker 40 | 41 | **v0.3.3 (March 18, 2018)** 42 | 43 | - Clean-up for xarray v0.10.2 compatibility 44 | - Tweak to more reliably infer and unpack 3D field shape (from Jenny Fisher) 45 | 46 | 47 | .. _dask: http://dask.pydata.org 48 | .. _xarray: http://xarray.pydata.org 49 | .. _GEOS-Chem: http://www.geos-chem.org 50 | 51 | License 52 | ------- 53 | 54 | Copyright (c) 2019 Daniel Rothenberg 55 | 56 | This work is licensed_ under a permissive MIT License. 57 | I acknowledge important contributions from Benoît Bovy, 58 | Gerrit Kuhlmann, and Christoph Keller. 59 | 60 | .. _licensed: http://github.com/darothen/xbpch/master/LICENSE 61 | -------------------------------------------------------------------------------- /RELEASE_GUIDE.rst: -------------------------------------------------------------------------------- 1 | Releasing a new version of xbpch 2 | ================================ 3 | 4 | So you've jut incorporated a new patch or feature into **xbpch** - congratulations! 5 | This short guide is intended to help you cut a new release of the package incorporating this new work. 6 | By the end of this process, all **xbpch** users should be able to easily upgrade their version of the code via *pip* or *conda*. 7 | 8 | 1. Upgrade your local repository to reflect the head on "master" 9 | 10 | $ git pull upstream master 11 | 12 | 2. Ensure that "doc/index.rst" has an entry under "Recent Changes" reflecting any new work you're including in this release 13 | 14 | 3. Open "setup.py" and increment the version number - in most cases, you'll probably increment the **MICRO** version, but for significant changes you'll probably want to reset **MICRO** to 0 and increment the **MINOR**; see `Semantic Versioning `_ for more information 15 | 16 | 4. Commit the documentation and version changes with a commit message indicating that this is a version release 17 | 18 | $ git commit -a -m "Release v0.X.Y" 19 | 20 | 5. Tag the release 21 | 22 | $ git tag -a v0.X.Y -m 'v0.X.Y' 23 | 24 | 6. Push the changes and version tag upstream to master 25 | 26 | $ git push upstream master 27 | $ git push upstream --tags 28 | 29 | 7. Via the project GitHub page, click the "releases" button and then "Draft a new release". Select v0.X.Y and create the release; you can add documentation notes if you would like, but historically we've maintained these via the official documentation. 30 | 31 | At this point, the automatic machinery from conda-forge and ReadTheDocs should just "work" and update the package appropriately at those places. 32 | You should keep an eye on the `conda-forge feedstock `_ to ensure that it builds a new release within a few hours. 33 | However, you'll manually need to cut a new release for PyPi. 34 | To do this: 35 | 36 | 1. Navigate to your repository directory and issue a command to build a wheel: 37 | 38 | $ python setup.py bdist_wheel sdist 39 | 40 | This should create the files "dist/xbpch-0.X.Y.tar.gz" and "dist/xbpch-0.X.Y-py3-none-any.whl" 41 | 42 | 2. Upload your new wheel via twine 43 | 44 | $ twine upload dist/xbpch-0.X.Y* 45 | 46 | -------------------------------------------------------------------------------- /doc/installation.rst: -------------------------------------------------------------------------------- 1 | 2 | Installation 3 | ============ 4 | 5 | Requirements 6 | ------------ 7 | 8 | **xbpch** is written in pure Python (version >= 3.5), and leans on two important 9 | libraries: 10 | 11 | 1. xarray_ (version >= 0.9): a pandas-like toolkit for working with 12 | labeled, *n*-dimensional data 13 | 14 | 2. dask_ (version >= 0.14): a library for performing out-of-core, 15 | parallel computations on both tabular and array-like datasets 16 | 17 | The easiest way to install these libraries is to use the conda_ 18 | package manager:: 19 | 20 | $ conda install -c conda-forge xarray dask 21 | 22 | conda_ can be obtained as part of the Anaconda_ Python distribution 23 | from Continuum IO, although you do not need all of the packages it 24 | provides in order to use **xbpch**. Note that we recommend installing the latest 25 | versions from community-maintained `conda-forge `_ 26 | collection, since these usually contain bug-fixes and additional features. 27 | 28 | .. note:: 29 | 30 | Basic support for Python 2.7 is available in **xbpch** but it has not been 31 | tested, since the evolutionary GCPy package will only support Python 3. If, 32 | for some reason, you must use Python 2.7 and encounter problems, please 33 | reach out to us and we may be able to fix them. 34 | 35 | 36 | Installation via conda 37 | ---------------------- 38 | 39 | The preferred way to install **xbpch** is also via conda_:: 40 | 41 | $ conda install -c conda-forge xbpch 42 | 43 | 44 | Installation via pip 45 | -------------------- 46 | 47 | **xbpch** is available on `PyPI `_, and 48 | can be installed using setuptools:: 49 | 50 | $ pip install xbpch 51 | 52 | Installation from source 53 | ------------------------ 54 | 55 | If you're developing or contributing to **xbpch**, you may wish 56 | instead to install directly from a local copy of the source code. To do so, 57 | you must first clone the the master repository (or a fork) and install locally 58 | via pip:: 59 | 60 | $ git clone https://github.com/darothen/xbpch.git 61 | $ cd xbpch 62 | $ python setup.py install 63 | 64 | You will need to substitute in the path to your preferred repository/mirror 65 | of the source code. 66 | 67 | Note that you can also install directly from the source using setuptools:: 68 | 69 | $ pip install git+https://github.com/darothen/xbpch.git 70 | 71 | .. _Anaconda: https://www.continuum.io/downloads 72 | .. _conda: http://conda.pydata.org 73 | .. _dask: http://dask.pydata.org 74 | .. _xarray: http://xarray.pydata.org -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | # Created by https://www.gitignore.io/api/pydev,emacs,python 3 | 4 | .idea 5 | xbpch/version.py 6 | WORKING_NOTES.md 7 | 8 | ### Emacs ### 9 | # -*- mode: gitignore; -*- 10 | *~ 11 | \#*\# 12 | /.emacs.desktop 13 | /.emacs.desktop.lock 14 | *.elc 15 | auto-save-list 16 | tramp 17 | .\#* 18 | 19 | # Org-mode 20 | .org-id-locations 21 | *_archive 22 | 23 | # flymake-mode 24 | *_flymake.* 25 | 26 | # eshell files 27 | /eshell/history 28 | /eshell/lastdir 29 | 30 | # elpa packages 31 | /elpa/ 32 | 33 | # reftex files 34 | *.rel 35 | 36 | # AUCTeX auto folder 37 | /auto/ 38 | 39 | # cask packages 40 | .cask/ 41 | dist/ 42 | 43 | # Flycheck 44 | flycheck_*.el 45 | 46 | # server auth directory 47 | /server/ 48 | 49 | # projectiles files 50 | .projectile 51 | 52 | # directory configuration 53 | .dir-locals.el 54 | 55 | ### pydev ### 56 | .pydevproject 57 | 58 | ### Python ### 59 | # Byte-compiled / optimized / DLL files 60 | __pycache__/ 61 | *.py[cod] 62 | *$py.class 63 | 64 | # C extensions 65 | *.so 66 | 67 | # Distribution / packaging 68 | .Python 69 | env/ 70 | build/ 71 | develop-eggs/ 72 | downloads/ 73 | eggs/ 74 | .eggs/ 75 | lib/ 76 | lib64/ 77 | parts/ 78 | sdist/ 79 | var/ 80 | wheels/ 81 | *.egg-info/ 82 | .installed.cfg 83 | *.egg 84 | 85 | # PyInstaller 86 | # Usually these files are written by a python script from a template 87 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 88 | *.manifest 89 | *.spec 90 | 91 | # Installer logs 92 | pip-log.txt 93 | pip-delete-this-directory.txt 94 | 95 | # Unit test / coverage reports 96 | htmlcov/ 97 | .tox/ 98 | .coverage 99 | .coverage.* 100 | .cache 101 | nosetests.xml 102 | coverage.xml 103 | *,cover 104 | .hypothesis/ 105 | 106 | # Translations 107 | *.mo 108 | *.pot 109 | 110 | # Django stuff: 111 | *.log 112 | local_settings.py 113 | 114 | # Flask stuff: 115 | instance/ 116 | .webassets-cache 117 | 118 | # Scrapy stuff: 119 | .scrapy 120 | 121 | # Sphinx documentation 122 | docs/_build/ 123 | 124 | # PyBuilder 125 | target/ 126 | 127 | # Jupyter Notebook 128 | .ipynb_checkpoints 129 | 130 | # pyenv 131 | .python-version 132 | 133 | # celery beat schedule file 134 | celerybeat-schedule 135 | 136 | # SageMath parsed files 137 | *.sage.py 138 | 139 | # dotenv 140 | .env 141 | 142 | # virtualenv 143 | .venv 144 | venv/ 145 | ENV/ 146 | 147 | # Spyder project settings 148 | .spyderproject 149 | .spyproject 150 | 151 | # Rope project settings 152 | .ropeproject 153 | 154 | # mkdocs documentation 155 | /site 156 | 157 | # End of https://www.gitignore.io/api/pydev,emacs,python -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import os 4 | import warnings 5 | 6 | from setuptools import setup, find_packages 7 | 8 | from textwrap import dedent 9 | 10 | MAJOR = 0 11 | MINOR = 3 12 | MICRO = 5 13 | VERSION = "{}.{}.{}".format(MAJOR, MINOR, MICRO) 14 | DEV = False 15 | 16 | # Correct versioning with git info if DEV 17 | if DEV: 18 | import subprocess 19 | 20 | pipe = subprocess.Popen( 21 | ['git', "describe", "--always", "--match", "v[0-9]*"], 22 | stdout=subprocess.PIPE) 23 | so, err = pipe.communicate() 24 | 25 | if pipe.returncode != 0: 26 | # no git or something wrong with git (not in dir?) 27 | warnings.warn("WARNING: Couldn't identify git revision, using generic version string") 28 | VERSION += ".dev" 29 | else: 30 | git_rev = so.strip() 31 | git_rev = git_rev.decode('ascii') # necessary for Python >= 3 32 | 33 | VERSION += ".dev-{}".format(git_rev) 34 | 35 | DESCRIPTION = "xarray interface for bpch files" 36 | LONG_DESCRIPTION = """\ 37 | **xpbch** is a simple utility for reading the proprietary binary punch format 38 | (bpch) outputs used in versions of GEOS-Chem earlier than v11-02. The utility 39 | allows a user to load this data into an xarray/dask-powered workflow without 40 | necessarily pre-processing the data using GAMAP or IDL. 41 | """ 42 | 43 | DISTNAME = "xbpch" 44 | AUTHOR = "Daniel Rothenberg" 45 | AUTHOR_EMAIL = "darothen@mit.edu" 46 | URL = "https://github.com/darothen/xbpch" 47 | LICENSE = "MIT" 48 | DOWNLOAD_URL = ("https://github.com/darothen/xbpch/archive/v{}.tar.gz" 49 | .format(VERSION)) 50 | 51 | CLASSIFIERS = [ 52 | 'Development Status :: 5 - Production/Stable', 53 | 'License :: OSI Approved :: MIT License', 54 | 'Operating System :: OS Independent', 55 | 'Intended Audience :: Science/Research', 56 | 'Programming Language :: Python', 57 | 'Programming Language :: Python :: 2.7', 58 | 'Programming Language :: Python :: 3.4', 59 | 'Programming Language :: Python :: 3.5', 60 | 'Programming Language :: Python :: 3.6', 61 | 'Topic :: Scientific/Engineering', 62 | ] 63 | 64 | def _write_version_file(): 65 | 66 | fn = os.path.join(os.path.dirname(__file__), DISTNAME, 'version.py') 67 | 68 | version_str = dedent(""" 69 | __version__ = '{}' 70 | """) 71 | 72 | # Write version file 73 | with open(fn, 'w') as version_file: 74 | version_file.write(version_str.format(VERSION)) 75 | 76 | # Write version and install 77 | _write_version_file() 78 | 79 | setup( 80 | name = DISTNAME, 81 | author = AUTHOR, 82 | author_email = AUTHOR_EMAIL, 83 | maintainer = AUTHOR, 84 | maintainer_email = AUTHOR_EMAIL, 85 | description = DESCRIPTION, 86 | long_description = LONG_DESCRIPTION, 87 | license = LICENSE, 88 | url = URL, 89 | version = VERSION, 90 | download_url = DOWNLOAD_URL, 91 | 92 | packages = find_packages(), 93 | package_data = {}, 94 | scripts = [ 95 | 'scripts/bpch_to_nc', 96 | ], 97 | 98 | classifiers = CLASSIFIERS 99 | ) 100 | -------------------------------------------------------------------------------- /xbpch/common.py: -------------------------------------------------------------------------------- 1 | 2 | from datetime import datetime 3 | 4 | import numpy as np 5 | 6 | # physical or chemical constants 7 | C_MOLECULAR_WEIGHT = 12e-3 # molecular weight of C atoms (kg/mole) 8 | 9 | def broadcast_1d_array(arr, ndim, axis=1): 10 | """ 11 | Broadcast 1-d array `arr` to `ndim` dimensions on the first axis 12 | (`axis`=0) or on the last axis (`axis`=1). 13 | 14 | Useful for 'outer' calculations involving 1-d arrays that are related to 15 | different axes on a multidimensional grid. 16 | """ 17 | ext_arr = arr 18 | for i in range(ndim - 1): 19 | ext_arr = np.expand_dims(ext_arr, axis=axis) 20 | return ext_arr 21 | 22 | 23 | def get_timestamp(time=True, date=True, fmt=None): 24 | """ Return the current timestamp in machine local time. 25 | 26 | Parameters: 27 | ----------- 28 | time, date : Boolean 29 | Flag to include the time or date components, respectively, 30 | in the output. 31 | fmt : str, optional 32 | If passed, will override the time/date choice and use as 33 | the format string passed to `strftime`. 34 | """ 35 | 36 | time_format = "%H:%M:%S" 37 | date_format = "%m-%d-%Y" 38 | 39 | if fmt is None: 40 | if time and date: 41 | fmt = time_format + " " + date_format 42 | elif time: 43 | fmt = time_format 44 | elif date: 45 | fmt = date_format 46 | else: 47 | raise ValueError("One of `date` or `time` must be True!") 48 | 49 | return datetime.now().strftime(fmt) 50 | 51 | 52 | def fix_attr_encoding(ds): 53 | """ This is a temporary hot-fix to handle the way metadata is encoded 54 | when we read data directly from bpch files. It removes the 'scale_factor' 55 | and 'units' attributes we encode with the data we ingest, converts the 56 | 'hydrocarbon' and 'chemical' attribute to a binary integer instead of a 57 | boolean, and removes the 'units' attribute from the "time" dimension since 58 | that too is implicitly encoded. 59 | 60 | In future versions of this library, when upstream issues in decoding 61 | data wrapped in dask arrays is fixed, this won't be necessary and will be 62 | removed. 63 | 64 | """ 65 | 66 | def _maybe_del_attr(da, attr): 67 | """ Possibly delete an attribute on a DataArray if it's present """ 68 | if attr in da.attrs: 69 | del da.attrs[attr] 70 | return da 71 | 72 | def _maybe_decode_attr(da, attr): 73 | # TODO: Fix this so that bools get written as attributes just fine 74 | """ Possibly coerce an attribute on a DataArray to an easier type 75 | to write to disk. """ 76 | # bool -> int 77 | if (attr in da.attrs) and (type(da.attrs[attr] == bool)): 78 | da.attrs[attr] = int(da.attrs[attr]) 79 | return da 80 | 81 | for v in ds.data_vars: 82 | da = ds[v] 83 | da = _maybe_del_attr(da, 'scale_factor') 84 | da = _maybe_del_attr(da, 'units') 85 | da = _maybe_decode_attr(da, 'hydrocarbon') 86 | da = _maybe_decode_attr(da, 'chemical') 87 | # Also delete attributes on time. 88 | if hasattr(ds, 'time'): 89 | times = ds.time 90 | times = _maybe_del_attr(times, 'units') 91 | 92 | return ds 93 | -------------------------------------------------------------------------------- /scripts/bpch_to_nc: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Convert a BPCH file (or files) to a CF-compliant, NetCDF dataset. 4 | 5 | This script is a simple utility for opening (and optionally concatenating) BPCH 6 | files and then immediately writing them out to disk in NetCDF format. It's a 7 | thin wrapper designed to avoid having to drop into an interactive Python session 8 | to accomplish this task. 9 | 10 | """ 11 | 12 | import os, sys 13 | 14 | from xbpch import open_bpchdataset, open_mfbpchdataset 15 | from dask.diagnostics import ProgressBar 16 | 17 | from argparse import ArgumentParser, RawDescriptionHelpFormatter 18 | parser = ArgumentParser(description=__doc__, 19 | formatter_class=RawDescriptionHelpFormatter) 20 | parser.add_argument("bpch_files", type=str, nargs="+", 21 | help="Paths to BPCH file(s) to load (, concatenate)" 22 | " and write back to disk") 23 | parser.add_argument("output_nc", type=str, 24 | help="Name of output file to write") 25 | parser.add_argument("-t", "--tracerinfo", metavar="tracerinfo.dat", 26 | type=str, default="tracerinfo.dat", 27 | help="Path to tracerinfo.dat, if not in current directory") 28 | parser.add_argument("-d", "--diaginfo", metavar="diaginfo.dat", 29 | type=str, default="diaginfo.dat", 30 | help="Path to diaginfo.dat, if not in current directory") 31 | 32 | def _maybe_del_attr(da, attr): 33 | """ Possibly delete an attribute on a DataArray if it's present """ 34 | if attr in da.attrs: 35 | del da.attrs[attr] 36 | 37 | return da 38 | 39 | 40 | def _maybe_decode_attr(da, attr): 41 | # TODO: Fix this so that bools get written as attributes just fine 42 | """ Possibly coerce an attribute on a DataArray to an easier type 43 | to write to disk. """ 44 | # bool -> int 45 | if (attr in da.attrs) and (type(da.attrs[attr] == bool)): 46 | da.attrs[attr] = int(da.attrs[attr]) 47 | 48 | return da 49 | 50 | 51 | if __name__ == "__main__": 52 | 53 | args = parser.parse_args() 54 | 55 | # Check if the output already exists; if so, exit immediately 56 | if os.path.exists(args.output_nc): 57 | print("ERROR: Can't write to output file that already exists.") 58 | sys.exit(1) 59 | 60 | # Check that all input files exist 61 | bad_files = [fn for fn in args.bpch_files if not os.path.exists(fn)] 62 | if bad_files: 63 | print("ERROR: Couldn't find the following input files:") 64 | for fn in bad_files: 65 | print(" " + fn) 66 | sys.exit(1) 67 | 68 | # Else, we should be good to read in and concatenate 69 | open_kws = { 70 | "tracerinfo_file": args.tracerinfo, 71 | "diaginfo_file": args.diaginfo, 72 | "memmap": True, "dask": True 73 | } 74 | 75 | print("\nReading in file(s)...") 76 | if len(args.bpch_files) == 1: 77 | ds = open_bpchdataset(args.bpch_files[0], **open_kws) 78 | else: 79 | ds = open_mfbpchdataset(args.bpch_files, **open_kws) 80 | 81 | # This block of code is hack to fix the encoding of attributes 82 | # on the DataArrays in this Dataset. They are being 83 | # set at a very low level when we read in the data, and manually 84 | # specifying the encoding doesn't work. 85 | # However, deleting them from the attributes dict 86 | # doesn't end up removing them from the final output file - they get 87 | # written just fine. 88 | print("\nDecoding variables...") 89 | for v in ds.data_vars: 90 | da = ds[v] 91 | da = _maybe_del_attr(da, 'scale_factor') 92 | da = _maybe_del_attr(da, 'units') 93 | da = _maybe_decode_attr(da, 'hydrocarbon') 94 | da = _maybe_decode_attr(da, 'chemical') 95 | # Also delete attributes on time. 96 | if hasattr(ds, 'time'): 97 | times = ds.time 98 | times = _maybe_del_attr(times, 'units') 99 | 100 | print("\nWriting to " + args.output_nc + " ...") 101 | with ProgressBar(): 102 | ds.to_netcdf(args.output_nc) 103 | 104 | print("\n done!") 105 | 106 | -------------------------------------------------------------------------------- /xbpch/util/diaginfo.py: -------------------------------------------------------------------------------- 1 | 2 | from collections import namedtuple 3 | from warnings import warn 4 | 5 | import os 6 | import pandas as pd 7 | 8 | from .. common import C_MOLECULAR_WEIGHT 9 | 10 | #: Info for parsing diagnostic records 11 | diag_rec = namedtuple("diag_rec", 12 | ["name", "width", "type", "default", "read_only", "desc"]) 13 | diag_recs = [ 14 | diag_rec('offset', 8, int, 0, True, 15 | "Offset (constant to add to tracer numbers in order to" 16 | " distinguish between diff categories, as stored in" 17 | " tracerinfo.dat)"), 18 | diag_rec("-0", 1, str, ' ', True, None), 19 | diag_rec('name', 40, str, None, True, "Name of the category"), 20 | diag_rec('description', 100, str, None, True, "Description of category"), 21 | diag_rec("-1", 1, str, ' ', True, None) 22 | ] 23 | 24 | #: Info for parsing tracer records 25 | tracer_rec = diag_rec 26 | tracer_recs = [ 27 | tracer_rec('name', 8, str, None, True, "Tracer name"), 28 | tracer_rec("-0", 1, str, ' ', True, None), 29 | tracer_rec('full_name', 30, str, None, True, "Full tracer name"), 30 | tracer_rec('molwt', 10, float, 1., True, "Molecular weight (kg/mole)"), 31 | tracer_rec('C', 3, int, 1, True, "# moles C/moles tracer for HCs"), 32 | tracer_rec('tracer', 9, int, None, True, "Tracer number"), 33 | tracer_rec('scale', 10, float, 1e9, True, "Standard scale factor to convert to" 34 | " given units"), 35 | tracer_rec("-1", 1, str, ' ', True, None), 36 | tracer_rec('unit', 40, str, 'ppbv', True, "Unit string"), 37 | ] 38 | 39 | def get_diaginfo(diaginfo_file): 40 | """ 41 | Read an output's diaginfo.dat file and parse into a DataFrame for 42 | use in selecting and parsing categories. 43 | 44 | Parameters 45 | ---------- 46 | diaginfo_file : str 47 | Path to diaginfo.dat 48 | 49 | Returns 50 | ------- 51 | DataFrame containing the category information. 52 | 53 | """ 54 | 55 | widths = [rec.width for rec in diag_recs] 56 | col_names = [rec.name for rec in diag_recs] 57 | dtypes = [rec.type for rec in diag_recs] 58 | usecols = [name for name in col_names if not name.startswith('-')] 59 | 60 | diag_df = pd.read_fwf(diaginfo_file, widths=widths, names=col_names, 61 | dtypes=dtypes, comment="#", header=None, 62 | usecols=usecols) 63 | diag_desc = {diag.name: diag.desc for diag in diag_recs 64 | if not diag.name.startswith('-')} 65 | 66 | return diag_df, diag_desc 67 | 68 | 69 | def get_tracerinfo(tracerinfo_file): 70 | """ 71 | Read an output's tracerinfo.dat file and parse into a DataFrame for 72 | use in selecting and parsing categories. 73 | 74 | Parameters 75 | ---------- 76 | tracerinfo_file : str 77 | Path to tracerinfo.dat 78 | 79 | Returns 80 | ------- 81 | DataFrame containing the tracer information. 82 | 83 | """ 84 | 85 | widths = [rec.width for rec in tracer_recs] 86 | col_names = [rec.name for rec in tracer_recs] 87 | dtypes = [rec.type for rec in tracer_recs] 88 | usecols = [name for name in col_names if not name.startswith('-')] 89 | 90 | tracer_df = pd.read_fwf(tracerinfo_file, widths=widths, names=col_names, 91 | dtypes=dtypes, comment="#", header=None, 92 | usecols=usecols) 93 | 94 | # Check an edge case related to a bug in GEOS-Chem v12.0.3 which 95 | # erroneously dropped short/long tracer names in certain tracerinfo.dat outputs. 96 | # What we do here is figure out which rows were erroneously processed (they'll 97 | # have NaNs in them) and raise a warning if there are any 98 | na_free = tracer_df.dropna(subset=['tracer', 'scale']) 99 | only_na = tracer_df[~tracer_df.index.isin(na_free.index)] 100 | if len(only_na) > 0: 101 | warn("At least one row in {} wasn't decoded correctly; we strongly" 102 | " recommend you manually check that file to see that all" 103 | " tracers are properly recorded." 104 | .format(tracerinfo_file)) 105 | 106 | tracer_desc = {tracer.name: tracer.desc for tracer in tracer_recs 107 | if not tracer.name.startswith('-')} 108 | 109 | # Process some of the information about which variables are hydrocarbons 110 | # and chemical tracers versus other diagnostics. 111 | def _assign_hydrocarbon(row): 112 | if row['C'] != 1: 113 | row['hydrocarbon'] = True 114 | row['molwt'] = C_MOLECULAR_WEIGHT 115 | else: 116 | row['hydrocarbon'] = False 117 | return row 118 | 119 | tracer_df = ( 120 | tracer_df 121 | .apply(_assign_hydrocarbon, axis=1) 122 | .assign(chemical=lambda x: x['molwt'].astype(bool)) 123 | ) 124 | 125 | return tracer_df, tracer_desc -------------------------------------------------------------------------------- /doc/quick_start.rst: -------------------------------------------------------------------------------- 1 | .. _quick start: 2 | 3 | Quick Start 4 | =========== 5 | 6 | Assuming you're already familiar with xarray_, it's easy to dive right in to 7 | begin reading bpch data. If you don't have any GEOS-Chem_ data handy to test 8 | with, I've archived 9 | `a sample dataset here `_ 10 | here consisting of 14 days of hourly, ND49 output - good for diagnosing 11 | surface air quality statistics. 12 | 13 | Download the data and extract it to some directory:: 14 | 15 | $ wget https://ndownloader.figshare.com/files/8251094 16 | $ tar -xvzf sample_nd49.tar.gz 17 | 18 | You should now see 14 ``bpch`` files in your directory, and two ``.dat`` files. 19 | 20 | The whole point of **xbpch** is to read these data files natively into an 21 | `xarray.Dataset `_. 22 | You can do this with the :py:func:`xbpch.open_bpchdataset` method: 23 | 24 | .. ipython:: python 25 | :verbatim: 26 | 27 | import xbpch 28 | fn = "ND49_20060102_ref_e2006_m2010.bpch" 29 | ds = xbpch.open_bpchdataset(fn) 30 | 31 | If we print the dataset back out, we'll get a familiar representation: 32 | 33 | .. parsed-literal:: 34 | 35 | 36 | Dimensions: (lat: 91, lev: 47, lon: 144, nv: 2, time: 24) 37 | Coordinates: 38 | * lev (lev) float64 0.9925 0.9775 0.9624 0.9473 0.9322 0.9171 ... 39 | * lon (lon) float64 -180.0 -177.5 -175.0 -172.5 -170.0 -167.5 ... 40 | * lat (lat) float64 -89.5 -88.0 -86.0 -84.0 -82.0 -80.0 -78.0 ... 41 | * time (time) datetime64[ns] 2006-01-01T01:00:00 ... 42 | * nv (nv) int64 0 1 43 | Data variables: 44 | IJ_AVG_S_NO (time, lon, lat) float32 1.16601e-12 1.1599e-12 ... 45 | time_bnds (time, nv) datetime64[ns] 2006-01-01T01:00:00 ... 46 | IJ_AVG_S_O3 (time, lon, lat) float32 9.25816e-09 9.25042e-09 ... 47 | IJ_AVG_S_SO4 (time, lon, lat) float32 1.41706e-10 1.4142e-10 ... 48 | IJ_AVG_S_NH4 (time, lon, lat) float32 1.16908e-11 1.16658e-11 ... 49 | IJ_AVG_S_NIT (time, lon, lat) float32 9.99837e-31 9.99897e-31 ... 50 | IJ_AVG_S_BCPI (time, lon, lat) float32 2.46206e-12 2.45698e-12 ... 51 | IJ_AVG_S_OCPI (time, lon, lat) float32 2.65303e-11 2.6476e-11 ... 52 | IJ_AVG_S_BCPO (time, lon, lat) float32 4.19881e-19 4.18213e-19 ... 53 | IJ_AVG_S_OCPO (time, lon, lat) float32 2.49109e-22 2.53752e-22 ... 54 | IJ_AVG_S_DST1 (time, lon, lat) float32 7.11484e-12 7.10209e-12 ... 55 | IJ_AVG_S_DST2 (time, lon, lat) float32 1.55181e-11 1.54779e-11 ... 56 | IJ_AVG_S_SALA (time, lon, lat) float32 3.70387e-11 3.69923e-11 ... 57 | OD_MAP_S_AOD (time, lon, lat) float32 0.292372 0.325568 0.358368 ... 58 | OD_MAP_S_DSTAOD (time, lon, lat) float32 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 59 | Attributes: 60 | modelname: GEOS5_47L 61 | halfpolar: 1 62 | res: (2.5, 2.0) 63 | center180: 1 64 | tracerinfo: tracerinfo.dat 65 | diaginfo: diaginfo.dat 66 | filetitle: b'GEOS-CHEM DIAG49 instantaneous timeseries' 67 | source: ND49_20060101_ref_e2006_m2010.bpch 68 | filetype: b'CTM bin 02' 69 | Conventions: CF1.6 70 | 71 | You can then proceed to process the data using the conventional routines 72 | you'd use in any xarray_-powered workflow. 73 | 74 | In the sample dataset highlighted here, the 14 days of hourly output are 75 | split across 14 files - one for each day's worth of data. **xbpch** 76 | provides a second method, :py:func:`xbpch.open_mfbpchdataset`, for reading in 77 | multiple-file datasets like these, and automatically concatenating them 78 | on the *time* record dimension: 79 | 80 | .. ipython:: python 81 | :verbatim: 82 | 83 | import xbpch 84 | 85 | from glob import glob 86 | 87 | # List all the bpch files in the current directory 88 | fns = glob("ND49_*.bpch") 89 | 90 | # Helper function to extract spatial mean O3 from each file 91 | def _preprocess(ds): 92 | return ds[['IJ_AVG_S_O3', ]].mean(['lon', 'lat']) 93 | 94 | ds = xbpch.open_mfbpchdataset( 95 | fns, preprocess=_preprocess, dask=True, memmap=True 96 | ) 97 | 98 | Again, printing yields the expected results: 99 | 100 | .. parsed-literal:: 101 | 102 | 103 | Dimensions: (time: 336) 104 | Coordinates: 105 | * time (time) datetime64[ns] 2006-01-01T01:00:00 ... 106 | Data variables: 107 | IJ_AVG_S_O3 (time) float32 2.5524e-08 2.55541e-08 2.55588e-08 ... 108 | 109 | Finally, if you don't want to drop into a Python interpreter but just want 110 | to quickly convert your binary data to NetCDF, you can run the utility 111 | script `bpch_to_nc` which is shipped with this library:: 112 | 113 | $ bpch_to_nc /path/to/my/data.bpch /path/to/my/output.nc 114 | 115 | Reading in file(s)... 116 | 117 | Decoding variables... 118 | 119 | Writing to /path/to/my/output.nc ... 120 | syncing 121 | [####################################] | 100% Completed | 52.1s 122 | 123 | .. _GEOS-Chem: http://www.geos-chem.org 124 | .. _dask: http://dask.pydata.org 125 | .. _xarray: http://xarray.pydata.org 126 | -------------------------------------------------------------------------------- /doc/conf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # xbpch documentation build configuration file, created by 5 | # sphinx-quickstart on Mon Apr 24 17:47:50 2017. 6 | # 7 | # This file is execfile()d with the current directory set to its 8 | # containing dir. 9 | # 10 | # Note that not all possible configuration values are present in this 11 | # autogenerated file. 12 | # 13 | # All configuration values have a default; values that are commented out 14 | # serve to show the default. 15 | 16 | # If extensions (or modules to document with autodoc) are in another directory, 17 | # add these directories to sys.path here. If the directory is relative to the 18 | # documentation root, use os.path.abspath to make it absolute, like shown here. 19 | # 20 | 21 | import os 22 | import sys 23 | import xbpch 24 | 25 | 26 | # -- General configuration ------------------------------------------------ 27 | 28 | # If your documentation needs a minimal Sphinx version, state it here. 29 | # 30 | # needs_sphinx = '1.0' 31 | 32 | # Add any Sphinx extension module names here, as strings. They can be 33 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 34 | # ones. 35 | extensions = [ 36 | 'sphinx.ext.autodoc', 37 | 'sphinx.ext.todo', 38 | 'sphinx.ext.coverage', 39 | 'sphinx.ext.mathjax', 40 | 'numpydoc', 41 | 'IPython.sphinxext.ipython_directive', 42 | 'IPython.sphinxext.ipython_console_highlighting', 43 | ] 44 | 45 | # Extensions arguments 46 | autosummary_generate = True 47 | numpydoc_class_members_toctree = True 48 | numpydoc_show_class_members = False 49 | 50 | # Add any paths that contain templates here, relative to this directory. 51 | templates_path = ['_templates'] 52 | 53 | # The suffix(es) of source filenames. 54 | # You can specify multiple suffix as a list of string: 55 | # 56 | # source_suffix = ['.rst', '.md'] 57 | source_suffix = '.rst' 58 | 59 | # The master toctree document. 60 | master_doc = 'index' 61 | 62 | # General information about the project. 63 | project = 'xbpch' 64 | copyright = '2017, Daniel Rothenberg' 65 | author = 'Daniel Rothenberg' 66 | 67 | # The version info for the project you're documenting, acts as replacement for 68 | # |version| and |release|, also used in various other places throughout the 69 | # built documents. 70 | # 71 | # The short X.Y version. 72 | version = '0.2.0' 73 | # The full version, including alpha/beta/rc tags. 74 | release = '0.2.0' 75 | 76 | # The language for content autogenerated by Sphinx. Refer to documentation 77 | # for a list of supported languages. 78 | # 79 | # This is also used if you do content translation via gettext catalogs. 80 | # Usually you set "language" from the command line for these cases. 81 | language = None 82 | 83 | # List of patterns, relative to source directory, that match files and 84 | # directories to ignore when looking for source files. 85 | # This patterns also effect to html_static_path and html_extra_path 86 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] 87 | 88 | # The name of the Pygments (syntax highlighting) style to use. 89 | pygments_style = 'sphinx' 90 | 91 | # If true, `todo` and `todoList` produce output, else they produce nothing. 92 | todo_include_todos = False 93 | 94 | 95 | # -- Options for HTML output ---------------------------------------------- 96 | 97 | # Customization for building on RTD 98 | on_rtd = os.environ.get('READTHEDOCS', None) == 'True' 99 | if not on_rtd: # only import and set the theme if we're building docs locally 100 | import sphinx_rtd_theme 101 | html_theme = 'sphinx_rtd_theme' 102 | html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] 103 | 104 | # The theme to use for HTML and HTML Help pages. See the documentation for 105 | # a list of builtin themes. 106 | # 107 | # html_theme = 'default' 108 | 109 | 110 | # Theme options are theme-specific and customize the look and feel of a theme 111 | # further. For a list of options available for each theme, see the 112 | # documentation. 113 | # 114 | # html_theme_options = {} 115 | 116 | # Add any paths that contain custom static files (such as style sheets) here, 117 | # relative to this directory. They are copied after the builtin static files, 118 | # so a file named "default.css" will overwrite the builtin "default.css". 119 | html_static_path = ['_static'] 120 | 121 | 122 | # -- Options for HTMLHelp output ------------------------------------------ 123 | 124 | # Output file base name for HTML help builder. 125 | htmlhelp_basename = 'xbpchdoc' 126 | 127 | 128 | # -- Options for LaTeX output --------------------------------------------- 129 | 130 | latex_elements = { 131 | # The paper size ('letterpaper' or 'a4paper'). 132 | # 133 | # 'papersize': 'letterpaper', 134 | 135 | # The font size ('10pt', '11pt' or '12pt'). 136 | # 137 | # 'pointsize': '10pt', 138 | 139 | # Additional stuff for the LaTeX preamble. 140 | # 141 | # 'preamble': '', 142 | 143 | # Latex figure (float) alignment 144 | # 145 | # 'figure_align': 'htbp', 146 | } 147 | 148 | # Grouping the document tree into LaTeX files. List of tuples 149 | # (source start file, target name, title, 150 | # author, documentclass [howto, manual, or own class]). 151 | latex_documents = [ 152 | (master_doc, 'xbpch.tex', 'xbpch Documentation', 153 | 'Daniel Rothenberg', 'manual'), 154 | ] 155 | 156 | 157 | # -- Options for manual page output --------------------------------------- 158 | 159 | # One entry per manual page. List of tuples 160 | # (source start file, name, description, authors, manual section). 161 | man_pages = [ 162 | (master_doc, 'xbpch', 'xbpch Documentation', 163 | [author], 1) 164 | ] 165 | 166 | 167 | # -- Options for Texinfo output ------------------------------------------- 168 | 169 | # Grouping the document tree into Texinfo files. List of tuples 170 | # (source start file, target name, title, author, 171 | # dir menu entry, description, category) 172 | texinfo_documents = [ 173 | (master_doc, 'xbpch', 'xbpch Documentation', 174 | author, 'xbpch', 'One line description of project.', 175 | 'Miscellaneous'), 176 | ] 177 | 178 | 179 | 180 | -------------------------------------------------------------------------------- /xbpch/uff.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utilities for reading unformatted Fortran binary files 3 | 4 | Reproduced from PyGChem 5 | 6 | Copyright (C) 2012-2014 Gerrit Kuhlmann, Benoît Bovy 7 | see https://github.com/benbovy/PyGChem/blob/master/LICENSE.txt for more details 8 | 9 | """ 10 | 11 | from __future__ import division 12 | from __future__ import unicode_literals 13 | from __future__ import print_function 14 | from __future__ import absolute_import 15 | 16 | from future import standard_library 17 | standard_library.install_aliases() 18 | from builtins import * 19 | from builtins import zip 20 | from builtins import str 21 | from past.builtins import basestring 22 | from past.utils import old_div 23 | import struct 24 | import io 25 | 26 | 27 | _FIX_ERROR = ("Pre- and suffix of line do not match. This can happen, if the" 28 | " `endian` is incorrect.") 29 | 30 | 31 | class FortranFile(io.FileIO): 32 | """ 33 | A class for reading and writing unformatted binary Fortran files. 34 | 35 | Parameters 36 | ---------- 37 | filename : string 38 | filename 39 | mode : {'rb', 'wb'} 40 | mode of the file: 'rb' (reading binary, default) or 'wb' 41 | (writing binary). 42 | endian : {'@', '<', '>'} 43 | byte order, size and alignment of the data in the file. 44 | '@' native, '<' little-endian, and '>' big-endian (default). 45 | 46 | Notes 47 | ----- 48 | Fortran writes data as "lines" when using the PRINT or WRITE statements. 49 | Each line consists of: 50 | - a prefix (4 byte integer gives the size of the data) 51 | - the real data 52 | - a suffix (same as prefix). 53 | 54 | This class can be used to read and write these "lines", in a similar 55 | way as reading "real lines" in a text file. A format can be given, 56 | while reading or writing to pack or unpack data into a binary 57 | format, using the 'struct' module from the Python standard library. 58 | 59 | See Documentation of Python's struct module for details on endians and 60 | format strings: https://docs.python.org/library/struct.html 61 | """ 62 | 63 | def __init__(self, filename, mode='rb', endian='>'): 64 | self.endian = endian 65 | super(FortranFile, self).__init__(filename, mode) 66 | 67 | def _fix(self, fmt='i'): 68 | """ 69 | Read pre- or suffix of line at current position with given 70 | format `fmt` (default 'i'). 71 | """ 72 | fmt = self.endian + fmt 73 | fix = self.read(struct.calcsize(fmt)) 74 | if fix: 75 | return struct.unpack(fmt, fix)[0] 76 | else: 77 | raise EOFError 78 | 79 | def readline(self, fmt=None): 80 | """ 81 | Return next unformatted "line". If format is given, unpack content, 82 | otherwise return byte string. 83 | """ 84 | prefix_size = self._fix() 85 | 86 | if fmt is None: 87 | content = self.read(prefix_size) 88 | else: 89 | fmt = self.endian + fmt 90 | fmt = _replace_star(fmt, prefix_size) 91 | content = struct.unpack(fmt, self.read(prefix_size)) 92 | 93 | try: 94 | suffix_size = self._fix() 95 | except EOFError: 96 | # when endian is invalid and prefix_size > total file size 97 | suffix_size = -1 98 | 99 | if prefix_size != suffix_size: 100 | raise IOError(_FIX_ERROR) 101 | 102 | return content 103 | 104 | def readlines(self): 105 | """ 106 | Return list strings, each a line from the file. 107 | """ 108 | return [line for line in self] 109 | 110 | def skipline(self): 111 | """ 112 | Skip the next line and returns position and size of line. 113 | Raises IOError if pre- and suffix of line do not match. 114 | """ 115 | position = self.tell() 116 | prefix = self._fix() 117 | self.seek(prefix, 1) # skip content 118 | suffix = self._fix() 119 | 120 | if prefix != suffix: 121 | raise IOError(_FIX_ERROR) 122 | 123 | return position, prefix 124 | 125 | def writeline(self, fmt, *args): 126 | """ 127 | Write `line` (list of objects) with given `fmt` to file. The 128 | `line` will be chained if object is iterable (except for 129 | basestrings). 130 | """ 131 | fmt = self.endian + fmt 132 | size = struct.calcsize(fmt) 133 | 134 | fix = struct.pack(self.endian + 'i', size) 135 | line = struct.pack(fmt, *args) 136 | 137 | self.write(fix) 138 | self.write(line) 139 | self.write(fix) 140 | 141 | def writelines(self, lines, fmt): 142 | """ 143 | Write `lines` with given `format`. 144 | """ 145 | if isinstance(fmt, basestring): 146 | fmt = [fmt] * len(lines) 147 | for f, line in zip(fmt, lines): 148 | self.writeline(f, line, self.endian) 149 | 150 | def __iter__(self): 151 | return self 152 | 153 | def next(self, fmt=None): 154 | try: 155 | return self.readline(fmt) 156 | except EOFError: 157 | raise StopIteration 158 | 159 | 160 | def _replace_star(fmt, size): 161 | """ 162 | Replace the `*` placeholder in a format string (fmt), so that 163 | struct.calcsize(fmt) is equal to the given `size` using the format 164 | following the placeholder. 165 | 166 | Raises `ValueError` if number of `*` is larger than 1. If no `*` 167 | in `fmt`, returns `fmt` without checking its size! 168 | 169 | Examples 170 | -------- 171 | >>> _replace_star('ii*fi', 40) 172 | 'ii7fi' 173 | """ 174 | n_stars = fmt.count('*') 175 | 176 | if n_stars > 1: 177 | raise ValueError("More than one `*` in format (%s)." % fmt) 178 | 179 | if n_stars: 180 | i = fmt.find('*') 181 | s = struct.calcsize(fmt.replace(fmt[i:i + 2], '')) 182 | n = old_div((size - s), struct.calcsize(fmt[i + 1])) 183 | 184 | fmt = fmt.replace('*', str(n)) 185 | 186 | return fmt 187 | -------------------------------------------------------------------------------- /xbpch/util/cf.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module provides the capability to interpret CTM metadata according 3 | to the 'NetCDF Climate and Forecast (CF) Metadata Conventions' 4 | 5 | References: 6 | 7 | [CF] NetCDF Climate and Forecast (CF) Metadata conventions, Version 1.6, 8 | December, 2011. 9 | """ 10 | 11 | import datetime 12 | 13 | from xarray.core.variable import as_variable, Variable 14 | 15 | #: CTM timestamp definitions 16 | CTM_TIME_UNIT_STR = 'hours since 1985-01-01 00:00:00' 17 | CTM_TIME_REF_DT = datetime.datetime(1985, 1, 1) 18 | 19 | 20 | def tau2time(tau, reference=CTM_TIME_REF_DT): 21 | """ 22 | Convert given hours since reference (default: 01.01.1985 00:00) 23 | into a datetime object. 24 | """ 25 | return reference + datetime.timedelta(hours=tau) 26 | 27 | 28 | def time2tau(time, reference=CTM_TIME_REF_DT): 29 | """ 30 | Convert a datetime object into given hours since reference 31 | (default: 01.01.1985 00:00). 32 | """ 33 | return (time - reference).total_seconds() / 3600.0 34 | 35 | 36 | #: Mapping for unit names: CTM -> udunits2 37 | UNITS_MAP_CTM2CF = ( 38 | ('molec CO2', 'count'), 39 | ('molec', 'count'), 40 | ('atoms S', 'count'), 41 | ('atoms C', 'count'), 42 | ('ppbC', 'ppb'), # prefix or suffix required (nb. of carbon atoms) 43 | ('kg C', 'kg'), # prefix or suffix required (?) 44 | ('molC', 'mol'), # prefix or suffix required ? TODO: 45 | ('gC', 'g'), # prefix or suffix required ? 46 | ('kg S', 'kg'), 47 | ('kg OH', 'kg'), 48 | ('kg NO3', 'kg'), 49 | ('kg H2O2', 'kg'), 50 | ('unitless', '1'), 51 | ('unitles', '1'), # typo found in tracerinfo or diaginfo 52 | ('v/v', '1'), 53 | ('level', '1'), # allowed in CF1.6 but not compatible with udunits2 54 | ('Eta', '1'), 55 | ('Fraction', '1'), 56 | ('fraction', '1'), 57 | ('ratio', '1'), 58 | ('factor', '1'), 59 | ('none', '1'), 60 | ('[percentage]', '%'), 61 | ('deg C', 'Celsius'), 62 | ('C', 'Celsius'), 63 | ('mm/da', 'mm/day'), # typo in tracerinfo.dat 4/17/12 64 | ('kg/m2/', 'kg/m2')) # ?? (tracerinfo.dat 6801 (line 1075) 65 | 66 | 67 | def get_cfcompliant_units(units, prefix='', suffix=''): 68 | """ 69 | Get equivalent units that are compatible with the udunits2 library 70 | (thus CF-compliant). 71 | 72 | Parameters 73 | ---------- 74 | units : string 75 | A string representation of the units. 76 | prefix : string 77 | Will be added at the beginning of the returned string 78 | (must be a valid udunits2 expression). 79 | suffix : string 80 | Will be added at the end of the returned string 81 | (must be a valid udunits2 expression). 82 | 83 | Returns 84 | ------- 85 | A string representation of the conforming units. 86 | 87 | References 88 | ---------- 89 | The udunits2 package : http://www.unidata.ucar.edu/software/udunits/ 90 | 91 | Notes 92 | ----- 93 | This function only relies on the table stored in :attr:`UNITS_MAP_CTM2CF`. 94 | Therefore, the units string returned by this function is not certified to 95 | be compatible with udunits2. 96 | 97 | Examples 98 | -------- 99 | >>> get_cfcompliant_units('molec/cm2') 100 | 'count/cm2' 101 | >>> get_cfcompliant_units('v/v') 102 | '1' 103 | >>> get_cfcompliant_units('ppbC', prefix='3') 104 | '3ppb 105 | 106 | """ 107 | compliant_units = units 108 | 109 | for gcunits, udunits in UNITS_MAP_CTM2CF: 110 | compliant_units = str.replace(compliant_units, gcunits, udunits) 111 | 112 | return prefix + compliant_units + suffix 113 | 114 | 115 | VARNAME_MAP_CHAR = ( 116 | ('$', 'S'), 117 | (':', '_'), 118 | ('=', '_'), 119 | ('-', '_'), 120 | ) 121 | # TODO: Variables like BXHGHT_S_N(AIR) should have *(AIR) replaced with 122 | # just *_AIR 123 | def get_valid_varname(varname): 124 | """ 125 | Replace characters (e.g., ':', '$', '=', '-') of a variable name, which 126 | may cause problems when using with (CF-)netCDF based packages. 127 | 128 | Parameters 129 | ---------- 130 | varname : string 131 | variable name. 132 | 133 | Notes 134 | ----- 135 | Characters replacement is based on the table stored in 136 | :attr:`VARNAME_MAP_CHAR`. 137 | 138 | """ 139 | vname = varname 140 | for s, r in VARNAME_MAP_CHAR: 141 | vname = vname.replace(s, r) 142 | 143 | return vname 144 | 145 | 146 | def enforce_cf_variable(var, mask_and_scale=True): 147 | """ Given a Variable constructed from GEOS-Chem output, enforce 148 | CF-compliant metadata and formatting. 149 | 150 | Until a bug with lazily-loaded data and masking/scaling is resolved in 151 | xarray, you have the option to manually mask and scale the data here. 152 | 153 | Parameters 154 | ---------- 155 | var : xarray.Variable 156 | A variable holding information decoded from GEOS-Chem output. 157 | mask_and_scale : bool 158 | Flag to scale and mask the data given the unit conversions provided 159 | 160 | Returns 161 | ------- 162 | out : xarray.Variable 163 | The original variable processed to conform to CF standards 164 | 165 | .. note:: 166 | 167 | This method borrows heavily from the ideas in ``xarray.decode_cf_variable`` 168 | 169 | """ 170 | var = as_variable(var) 171 | data = var._data # avoid loading by accessing _data instead of data 172 | dims = var.dims 173 | attrs = var.attrs.copy() 174 | encoding = var.encoding.copy() 175 | orig_dtype = data.dtype 176 | 177 | # Process masking/scaling coordinates. We only expect a "scale" value 178 | # for the units with this output. 179 | if 'scale' in attrs: 180 | scale = attrs.pop('scale') 181 | attrs['scale_factor'] = scale 182 | encoding['scale_factor'] = scale 183 | 184 | # TODO: Once the xr.decode_cf bug is fixed, we won't need to manually 185 | # handle masking/scaling 186 | if mask_and_scale: 187 | data = scale*data 188 | 189 | # Process units 190 | # TODO: How do we want to handle parts-per-* units? These are not part of 191 | # the udunits standard, and the CF conventions suggest using units 192 | # like 1e-6 for parts-per-million. But we potentially mix mass and 193 | # volume/molar mixing ratios in GEOS-Chem output, so we need a way 194 | # to handle that edge case. 195 | if 'unit' in attrs: 196 | unit = attrs.pop('unit') 197 | unit = get_cfcompliant_units(unit) 198 | attrs['units'] = unit 199 | 200 | # TODO: Once the xr.decode_cf bug is fixed, we won't need to manually 201 | # handle masking/scaling 202 | return Variable(dims, data, attrs, encoding=encoding) 203 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | xbpch: xarray interface for bpch files 2 | ====================================== 3 | 4 | .. image:: https://badge.fury.io/py/xbpch.svg 5 | :target: https://badge.fury.io/py/xbpch 6 | :alt: PyPI version 7 | .. image:: https://readthedocs.org/projects/xbpch/badge/?version=latest 8 | :target: http://xbpch.readthedocs.io/en/latest/?badge=latest 9 | :alt: Documentation Status 10 | .. image:: https://zenodo.org/badge/89022822.svg 11 | :target: https://zenodo.org/badge/latestdoi/89022822 12 | :alt: Zenodo DOI 13 | 14 | **xpbch** is a simple utility for reading the proprietary 15 | `binary punch format (bpch) outputs `_ used in versions 16 | of GEOS-Chem_ earlier than v11-02. The utility allows a user to load this 17 | data into an xarray_- and dask_-powered workflow without necessarily 18 | pre-processing the data using GAMAP_ or IDL. 19 | 20 | This package is maintained as part of a broader, community effort to 21 | tackle `big data problems in geoscience `_. 22 | 23 | What's the Deal? 24 | ---------------- 25 | 26 | The `contemporary scientific Python software stack `_ 27 | provides free, powerful tools for nearly all of your data processing, analysis, 28 | and visualization needs. These tools are `well supported `_ 29 | by a large community of heavily invested users and developers from academia, 30 | government, and industry. They are also developed (mostly) as part of community-based, 31 | open-source, and user-driven projects. 32 | 33 | For nearly any application you might have in the geosciences, you can start using 34 | this powerful, free software stack *today* with minimal friction. However, 35 | one friction point that has tripped up adoption by GEOS-Chem users is that it 36 | is difficult to work with legacy bpch-format diagnostics files. **xbpch** 37 | solves this problem by providing a convenient and performant way to read 38 | these files into a modern Python-based analysis or workflow. 39 | 40 | Furthermore, **xbpch** is 100% future-proof. In two years, when your GEOS-Chem 41 | simulations are writing NetCDF diagnostics, you won't need to change more than a 42 | single line of code in any of your scripts using **xbpch**. All you'll need to do 43 | is swap out **xbpch**'s function for reading data and instead defer to it's parent 44 | package (xarray). It will *literally* take less than 10 keystrokes to make this 45 | change in your code. Plus - you'll be backwards compatible with any legacy 46 | output you need to analyze. 47 | 48 | So give **xbpch** a try, and let me know what issues you run in to! If we solve 49 | them once today, they'll be solved in perpetuity, which means more time for you 50 | to do science and less time to worry about processing data. 51 | 52 | 53 | Installation 54 | ------------ 55 | 56 | Requirements 57 | ^^^^^^^^^^^^ 58 | 59 | **xbpch** is only intended for use with Python 3, although with some 60 | modifications it would likely work with Python 2.7 (`Pull Requests are 61 | welcome! `_). As the package 62 | description implies, it requires up-to-date copies of xarray_ 63 | (>= version 0.9) and dask_ (>= version 0.14). The best way to install 64 | these packages is by using the conda_ package management system, or 65 | the `Anaconda Python distribution `_. 66 | 67 | To install **xbpch** and its dependencies using conda, execute from a terminal:: 68 | 69 | $ conda install -c conda-forge xbpch xarray dask 70 | 71 | Alternatively, you can install **xbpch** `from PyPI `_:: 73 | 74 | $ pip install xbpch 75 | 76 | You can also install **xbpch** from its source. To do this, you 77 | can either clone the source directory and manually install:: 78 | 79 | $ git clone https://github.com/darothen/xbpch.git 80 | $ cd xbpch 81 | $ python setup.py install 82 | 83 | or, you can install via pip directly from git:: 84 | 85 | $ pip install git+https://github.com/darothen/xbpch.git 86 | 87 | Please note that if you locally clone the repository from GitHub but do not 88 | explicitly install the package using ``setup.py``, the file ``xbpch/version.py`` 89 | will not get written properly and you will not be able to use the package. 90 | We strongly recommend you install the package using traditional techniques to 91 | ensure that all dependencies are properly added to your environment. 92 | 93 | Quick Start 94 | ----------- 95 | 96 | If you're already familiar with loading and manipulating data with 97 | xarray_, then it's easy to dive right into **xbpch**. Navigate to a 98 | directory on disk which contains your ``.bpch`` output, as well as 99 | ``tracerinfo.dat`` and ``diaginfo.dat``, and execute from a Python 100 | interpeter: 101 | 102 | .. code:: python 103 | 104 | from xbpch import open_bpchdataset 105 | fn = "my_geos_chem_output.bpch" 106 | ds = open_bpchdataset(fn) 107 | 108 | After a few seconds (depending on your hard-drive speed) you should be 109 | able to interact with ``ds`` just as you would any *xarray.Dataset* 110 | object. 111 | 112 | Caveats and Future Notes 113 | ------------------------ 114 | 115 | **xbpch** should work for most simple workflows, especially if you need 116 | a quick-and-dirty way to ingest legacy GEOS-Chem_ output. It is **not** 117 | tested against the majority of output grids, including data for the Hg 118 | model or nested models. Grid information (at least for the vertical) is 119 | hard-coded and may not be accurate for the most recent versions of 120 | GEOS-Chem_. 121 | 122 | Most importantly, **xbpch** does not yet solve the problem of manually 123 | scanning bpch files before producing a dataset on disk. Because the bpch 124 | format does not encode metadata about *what its contents actually are*, 125 | we must manually process this from any output file we wish to load. For 126 | the time being, we do **not** short-circuit this process because we 127 | cannot necessarily predict file position offsets in the bpch files we 128 | read. In the future, I hope to come up with an elegant solution for 129 | solving this problem. 130 | 131 | Acknowledgments 132 | --------------- 133 | 134 | This utility packages together a few pre-existing toolkits which 135 | have been floating around the Python-GEOS-Chem community. In particular, 136 | I would like to acknowledge the following pieces of software which I have 137 | built this utility around: 138 | 139 | - `PyGChem `_ by 140 | `Benoit Bovy `_ 141 | - `gchem `_ by 142 | `Gerrit Kuhlmann `_ 143 | 144 | Furthermore, the strategies used to load and process binary output on disk 145 | through xarray_\'s ``DataStore`` API is heavily inspired by `Ryan 146 | Abernathey's `_ package `xmitgcm 147 | `_. 148 | 149 | 150 | License 151 | ------- 152 | 153 | Copyright (c) 2017 `Daniel Rothenberg`_ 154 | 155 | This work is licensed_ under a permissive MIT License. I acknowledge 156 | important contributions from Benoît Bovy, Gerrit Kuhlmann, and Christoph 157 | Keller in the form of prior work which helped create the foundation for 158 | this package. 159 | 160 | Contact 161 | ------- 162 | 163 | `Daniel Rothenberg`_ - darothen@mit.edu 164 | 165 | .. _`Daniel Rothenberg`: http://github.com/darothen 166 | .. _conda: http://conda.pydata.org/docs/ 167 | .. _dask: http://dask.pydata.org/ 168 | .. _GAMAP: http://acmg.seas.harvard.edu/gamap/ 169 | .. _licensed: LICENSE 170 | .. _GEOS-Chem: http://www.geos-chem.org 171 | .. _xarray: http://xarray.pydata.org/ 172 | 173 | 174 | -------------------------------------------------------------------------------- /xbpch/grid.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utilities and information for re-constructing GEOS-Chem horizontal and vertical 3 | grids. 4 | """ 5 | 6 | import numpy as np 7 | 8 | from collections import OrderedDict 9 | 10 | from .common import broadcast_1d_array 11 | from .util.gridspec import _get_model_info, prof_altitude 12 | 13 | #: Hard-coded dimension variables to use with any Dataset read in 14 | BASE_DIMENSIONS = OrderedDict( 15 | lon=dict( 16 | dims=['lon', ], 17 | attrs={ 18 | 'standard_name': 'longitude', 19 | 'axis': 'X', 20 | } 21 | ), 22 | lat=dict( 23 | dims=['lat', ], 24 | attrs={ 25 | 'standard_name': 'latitude', 26 | 'axis': 'Y', 27 | }, 28 | ), 29 | time=dict(dims=['time', ], attrs={}), 30 | nv=dict(dims=['nv', ], attrs={}), 31 | ) 32 | 33 | 34 | #: CF/COARDS recommended dimension order; non-spatiotemporal dimensions 35 | #: should precede these. 36 | DIM_ORDER_PRIORITY = ['time', 'lev', 'lat', 'lon'] 37 | 38 | 39 | class CTMGrid(object): 40 | """ 41 | Set-up the grid of a CTM (2)3D model. 42 | 43 | Parameters 44 | ---------- 45 | model_name : string 46 | Name of the model. If it is one of the supported models, 47 | (see :class:`CTMGrid`.supported_models), it is better to use 48 | :class:`CTMGrid`.from_model or :class:`CTMGrid`.copy_from_model 49 | to set-up the grid with appropriate parameter values. 50 | resolution : (float, float) 51 | Horizontal grid resolution (lon, lat) or (DI, DJ) [degrees] 52 | (default: (5, 4)) 53 | halfpolar : bool 54 | Indicates whether polar grid boxes span half (True) or same (False) 55 | latitude as all other boxes (default: True) 56 | center180 : bool 57 | True if lon grid is centered at 180 degrees (default: True) 58 | hybrid : bool 59 | indicates whether the model is a sigma-pressure hybrid (True) or 60 | pure sigma (False) level model (default: True). 61 | Nlayers : int or None 62 | Number of vertical model layers. This number must correspond to the 63 | number of layers in the model output files and is used in 64 | conjunction with Ptop to convert sigma levels into pressure 65 | altitudes. Set value to None if the model has no vertical 66 | layer (2D) (default: None). 67 | Ntrop : int or None 68 | Number of layers in the troposphere (default: None) 69 | Psurf : float 70 | Average surface pressure [hPa] (default: 1013.15) 71 | Ptop : float 72 | Pressure at model top [hPa] (default: 0.01) 73 | description : string 74 | Model grid description 75 | model_family : string 76 | Model family (e.g., 'GEOS' for 'GEOS5') 77 | 78 | Other Parameters 79 | ---------------- 80 | Ap, Bp : 1-d array_like 81 | Parameters for computing ETA coordinates of the vertical grid 82 | levels, if hybrid (Ap [hPa] ; Bp [unitless]). 83 | csig, esig : 1-d array_like 84 | Pre-defined sigma coordinates the centers and the bottom edges of 85 | the vertical grid, if pure sigma. 86 | 87 | Attributes 88 | ---------- 89 | Attributes are the same than the parameters above, except `model_name` 90 | which becomes :attr:`model`. 91 | 92 | """ 93 | 94 | def __init__(self, model_name, resolution=(5, 4), halfpolar=True, 95 | center180=True, hybrid=True, Nlayers=None, Ntrop=None, 96 | Psurf=1013.25, Ptop=0.01, description='', model_family='', 97 | **kwargs): 98 | 99 | self.model = model_name 100 | self.description = description 101 | self.model_family = model_family 102 | self.resolution = resolution 103 | self.halfpolar = bool(halfpolar) 104 | self.center180 = bool(center180) 105 | self.hybrid = bool(hybrid) 106 | self.Ap = None 107 | self.Bp = None 108 | self.esig = None 109 | self.csig = None 110 | try: 111 | self.Nlayers = int(Nlayers) 112 | self.Ntrop = int(Ntrop) 113 | except TypeError: 114 | self.Nlayers = Nlayers 115 | self.Ntrop = Ntrop 116 | self.Psurf = Psurf 117 | self.Ptop = Ptop 118 | 119 | self._lonlat_edges = None 120 | self._lonlat_centers = None 121 | self._eta_edges = None 122 | self._eta_centers = None 123 | self._sigma_edges = None 124 | self._sigma_centers = None 125 | self._pressure_edges = None 126 | self._pressure_centers = None 127 | self._altitude_edges = None 128 | self._altitude_centers = None 129 | 130 | for k, v in kwargs.items(): 131 | self.__setattr__(k, v) 132 | 133 | # Pre-compute grid info / coordinates 134 | layers = self.get_layers() 135 | for k, v in layers.items(): 136 | self.__setattr__(k, v) 137 | lonlats = self.get_lonlat() 138 | for k, v in lonlats.items(): 139 | self.__setattr__(k, v) 140 | 141 | 142 | @classmethod 143 | def from_model(cls, model_name, **kwargs): 144 | """ 145 | Define a grid using the specifications of a given model. 146 | 147 | Parameters 148 | ---------- 149 | model_name : string 150 | Name the model (see :func:`get_supported_models` for available 151 | model names). 152 | Supports multiple formats (e.g., 'GEOS5', 'GEOS-5' or 'GEOS_5'). 153 | **kwargs : string 154 | Parameters that override the model or default grid 155 | settings (See Other Parameters below). 156 | 157 | Returns 158 | ------- 159 | A :class:`CTMGrid` object. 160 | 161 | Other Parameters 162 | ---------------- 163 | resolution : (float, float) 164 | Horizontal grid resolution (lon, lat) or (DI, DJ) [degrees] 165 | Psurf : float 166 | Average surface pressure [hPa] (default: 1013.15) 167 | 168 | Notes 169 | ----- 170 | Regridded vertical models may have several valid names (e.g., 171 | 'GEOS5_47L' and 'GEOS5_REDUCED' refer to the same model). 172 | 173 | """ 174 | settings = _get_model_info(model_name) 175 | model = settings.pop('model_name') 176 | for k, v in list(kwargs.items()): 177 | if k in ('resolution', 'Psurf'): 178 | settings[k] = v 179 | 180 | return cls(model, **settings) 181 | 182 | @classmethod 183 | def copy_from_model(cls, model_name, reference, **kwargs): 184 | """ 185 | Set-up a user-defined grid using specifications of a reference 186 | grid model. 187 | 188 | Parameters 189 | ---------- 190 | model_name : string 191 | name of the user-defined grid model. 192 | reference : string or :class:`CTMGrid` instance 193 | Name of the reference model (see :func:`get_supported_models`), 194 | or a :class:`CTMGrid` object from which grid set-up is copied. 195 | **kwargs 196 | Any set-up parameter which will override the settings of the 197 | reference model (see :class:`CTMGrid` parameters). 198 | 199 | Returns 200 | ------- 201 | A :class:`CTMGrid` object. 202 | 203 | """ 204 | if isinstance(reference, cls): 205 | settings = reference.__dict__.copy() 206 | settings.pop('model') 207 | else: 208 | settings = _get_model_info(reference) 209 | settings.pop('model_name') 210 | 211 | settings.update(kwargs) 212 | settings['reference'] = reference 213 | 214 | return cls(model_name, **settings) 215 | 216 | 217 | def get_layers(self, Psurf=1013.25, Ptop=0.01, **kwargs): 218 | """ 219 | Compute scalars or coordinates associated to the vertical layers. 220 | 221 | Parameters 222 | ---------- 223 | grid_spec : CTMGrid object 224 | CTMGrid containing the information necessary to re-construct grid 225 | levels for a given model coordinate system. 226 | 227 | Returns 228 | ------- 229 | dictionary of vertical grid components, including eta (unitless), 230 | sigma (unitless), pressure (hPa), and altitude (km) on both layer centers 231 | and edges, ordered from bottom-to-top. 232 | 233 | Notes 234 | ----- 235 | For pure sigma grids, sigma coordinates are given by the esig (edges) and 236 | csig (centers). 237 | 238 | For both pure sigma and hybrid grids, pressures at layers edges L are 239 | calculated as follows: 240 | 241 | .. math:: P_e(L) = A_p(L) + B_p(L) * (P_{surf} - C_p) 242 | 243 | where 244 | 245 | :math:`P_{surf}`, :math:`P_{top}` 246 | Air pressures at the surface and the top of the modeled atmosphere 247 | (:attr:`Psurf` and :attr:`Ptop` attributes of the :class:`CTMGrid` 248 | instance). 249 | :math:`A_p(L)`, :math:`Bp(L)` 250 | Specified in the grid set-up (`Ap` and `Bp` attributes) for hybrid 251 | grids, or respectively equals :math:`P_{top}` and :attr:`esig` 252 | attribute for pure sigma grids. 253 | :math:`Cp(L)` 254 | equals :math:`P_{top}` for pure sigma grids or equals 0 for hybrid 255 | grids. 256 | 257 | Pressures at grid centers are averages of pressures at grid edges: 258 | 259 | .. math:: P_c(L) = (P_e(L) + P_e(L+1)) / 2 260 | 261 | For hybrid grids, ETA coordinates of grid edges and grid centers are 262 | given by; 263 | 264 | .. math:: ETA_{e}(L) = (P_e(L) - P_{top}) / (P_{surf} - P_{top}) 265 | .. math:: ETA_{c}(L) = (P_c(L) - P_{top}) / (P_{surf} - P_{top}) 266 | 267 | Altitude values are fit using a 5th-degree polynomial; see 268 | `gridspec.prof_altitude` for more details. 269 | 270 | """ 271 | 272 | Psurf = np.asarray(Psurf) 273 | output_ndims = Psurf.ndim + 1 274 | if output_ndims > 3: 275 | raise ValueError("`Psurf` argument must be a float or an array" 276 | " with <= 2 dimensions (or None)") 277 | 278 | # Compute all variables: takes not much memory, fast 279 | # and better for code reading 280 | SIGe = None 281 | SIGc = None 282 | ETAe = None 283 | ETAc = None 284 | 285 | if self.hybrid: 286 | try: 287 | Ap = broadcast_1d_array(self.Ap, output_ndims) 288 | Bp = broadcast_1d_array(self.Bp, output_ndims) 289 | except KeyError: 290 | raise ValueError("Impossible to compute vertical levels," 291 | " data is missing (Ap, Bp)") 292 | Cp = 0. 293 | else: 294 | try: 295 | Bp = SIGe = broadcast_1d_array(self.esig, output_ndims) 296 | SIGc = broadcast_1d_array(self.csig, output_ndims) 297 | except KeyError: 298 | raise ValueError("Impossible to compute vertical levels," 299 | " data is missing (esig, csig)") 300 | Ap = Cp = Ptop 301 | 302 | Pe = Ap + Bp * (Psurf - Cp) 303 | Pc = 0.5 * (Pe[0:-1] + Pe[1:]) 304 | 305 | if self.hybrid: 306 | ETAe = (Pe - Ptop)/(Psurf - Ptop) 307 | ETAc = (Pc - Ptop)/(Psurf - Ptop) 308 | else: 309 | SIGe = SIGe * np.ones_like(Psurf) 310 | SIGc = SIGc * np.ones_like(Psurf) 311 | 312 | Ze = prof_altitude(Pe, **kwargs) 313 | Zc = prof_altitude(Pc, **kwargs) 314 | 315 | all_vars = {'eta_edges': ETAe, 316 | 'eta_centers': ETAc, 317 | 'sigma_edges': SIGe, 318 | 'sigma_centers': SIGc, 319 | 'pressure_edges': Pe, 320 | 'pressure_centers': Pc, 321 | 'altitude_edges': Ze, 322 | 'altitude_centers': Zc} 323 | 324 | return all_vars 325 | 326 | 327 | def get_lonlat(self): 328 | """ 329 | Calculate longitude-latitude grid for a specified resolution and 330 | configuration / ordering. 331 | 332 | Parameters 333 | ---------- 334 | rlon, rlat : float 335 | Resolution (in degrees) of longitude and latitude grids. 336 | halfpolar : bool (default=True) 337 | Polar grid boxes span half of rlat relative to the other grid cells. 338 | center180 : bool (default=True) 339 | Longitude grid should be centered at 180 degrees. 340 | 341 | """ 342 | 343 | rlon, rlat = self.resolution 344 | 345 | # Compute number of grid cells in each direction 346 | Nlon = int(360. / rlon) 347 | Nlat = int(180. / rlat) + self.halfpolar 348 | 349 | # Compute grid cell edges 350 | elon = np.arange(Nlon + 1) * rlon - np.array(180.) 351 | elon -= rlon / 2. * self.center180 352 | elat = np.arange(Nlat + 1) * rlat - np.array(90.) 353 | elat -= rlat / 2. * self.halfpolar 354 | elat[0] = -90. 355 | elat[-1] = 90. 356 | 357 | # Compute grid cell centers 358 | clon = (elon - (rlon / 2.))[1:] 359 | clat = np.arange(Nlat) * rlat - np.array(90.) 360 | 361 | # Fix grid boundaries if halfpolar 362 | if self.halfpolar: 363 | clat[0] = (elat[0] + elat[1]) / 2. 364 | clat[-1] = -clat[0] 365 | else: 366 | clat += (elat[1] - elat[0]) / 2. 367 | 368 | return { 369 | "lon_centers": clon, "lat_centers": clat, 370 | "lon_edges": elon, "lat_edges": elat 371 | } 372 | 373 | 374 | def get_grid_spec(model_name): 375 | """ 376 | Pass-through to look-up the grid specifications for a given GEOS-Chem 377 | configuration. 378 | 379 | Parameters 380 | ---------- 381 | model_name : str 382 | Name of the model; variations in naming format are permissible, e.g. 383 | "GEOS5" can be requested as "GEOS-5" or "GEOS_5". 384 | resolution : tuple of floats 385 | Longitude x latitude resolution of the model. 386 | 387 | Returns 388 | ------- 389 | grid_spec : dict 390 | Critical grid information as items in a dictionary. 391 | 392 | """ 393 | return _get_model_info(model_name) 394 | -------------------------------------------------------------------------------- /xbpch/bpch.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utility classes and tools for handling data contained in bpch files 3 | 4 | """ 5 | 6 | from dask import delayed 7 | import dask.array as da 8 | import numpy as np 9 | import os 10 | 11 | from collections import OrderedDict 12 | 13 | from . uff import FortranFile 14 | from . util import cf 15 | from . util.diaginfo import get_diaginfo, get_tracerinfo 16 | 17 | #: Default datatype for legacy bpch output 18 | DEFAULT_DTYPE = 'f4' 19 | 20 | class BPCHDataBundle(object): 21 | """ A single slice of a single variable inside a bpch file, and all 22 | of its critical accompanying metadata. """ 23 | 24 | __slots__ = ('_shape', 'dtype', 'endian', 'filename', 'file_position', 25 | 'time', 'metadata', '_data', '_mmap', '_dask') 26 | 27 | def __init__(self, shape, endian, filename, file_position, time, 28 | metadata, data=None, dtype=None, 29 | use_mmap=False, dask_delayed=False): 30 | self._shape = shape 31 | self.dtype = dtype 32 | self.endian = endian 33 | self.filename = filename 34 | self.file_position = file_position 35 | self.time = time 36 | self.metadata = metadata 37 | 38 | if dtype is None: 39 | self.dtype = np.dtype(self.endian + DEFAULT_DTYPE) 40 | else: 41 | self.dtype = dtype 42 | 43 | # Note that data is initially prescribed as None, but we keep a hook 44 | # here so that we can inject payloads at load time, if we want 45 | # (for instance, to avoid reading/memmapping through a file) 46 | self._data = data 47 | self._mmap = use_mmap 48 | self._dask = dask_delayed 49 | 50 | @property 51 | def shape(self): 52 | return self._shape 53 | 54 | @property 55 | def ndim(self): 56 | return len(self.shape) 57 | 58 | @property 59 | def array(self): 60 | return self.data 61 | 62 | @property 63 | def data(self): 64 | if self._data is None: 65 | self._data = self._read() 66 | return self._data 67 | 68 | def _read(self): 69 | """ Helper function to load the data referenced by this bundle. """ 70 | if self._dask: 71 | d = da.from_delayed( 72 | delayed(read_from_bpch, )( 73 | self.filename, self.file_position, self.shape, 74 | self.dtype, self.endian, use_mmap=self._mmap 75 | ), 76 | self.shape, self.dtype 77 | ) 78 | else: 79 | d = read_from_bpch( 80 | self.filename, self.file_position, self.shape, 81 | self.dtype, self.endian, use_mmap=self._mmap 82 | ) 83 | 84 | return d 85 | 86 | 87 | class BPCHFile(object): 88 | """ A file object for representing BPCH data on disk 89 | 90 | Attributes 91 | ---------- 92 | fp : FortranFile 93 | A pointer to the open unformatted Fortran binary output (the original 94 | bpch file) 95 | var_data, var_attrs : dict 96 | Containers of `BPCHDataBundle`s and dicts, respectively, holding 97 | the accessor functions to the raw bpch data and their associated 98 | metadata 99 | 100 | """ 101 | 102 | def __init__(self, filename, mode='rb', endian='>', 103 | diaginfo_file='', tracerinfo_file='', eager=False, 104 | use_mmap=False, dask_delayed=False): 105 | """ Load a BPCHFile 106 | 107 | Parameters 108 | ---------- 109 | filename : str 110 | Path to the bpch file on disk 111 | mode : str 112 | Mode string to pass to the file opener; this is currently fixed to 113 | "rb" and all other values will be rejected 114 | endian : str {">", "<", ":"} 115 | Endian-ness of the Fortran output file 116 | {tracerinfo, diaginfo}_file : str 117 | Path to the tracerinfo.dat and diaginfo.dat files containing 118 | metadata pertaining to the output in the bpch file being read. 119 | eager : bool 120 | Flag to immediately read variable data; if "False", then nothing 121 | will be read from the file and you'll need to do so manually 122 | use_mmap : bool 123 | Use memory-mapping to read data from file 124 | dask_delayed : bool 125 | Use dask to create delayed references to the data-reading functions 126 | """ 127 | 128 | self.mode = mode 129 | if not mode.startswith('r'): 130 | raise ValueError("Currently only know how to 'r(b)'ead bpch files.") 131 | 132 | self.filename = filename 133 | self.fsize = os.path.getsize(self.filename) 134 | self.endian = endian 135 | 136 | # Open a pointer to the file 137 | self.fp = FortranFile(self.filename, self.mode, self.endian) 138 | 139 | dir_path = os.path.abspath(os.path.dirname(filename)) 140 | if not dir_path: 141 | dir_path = os.getcwd() 142 | if not tracerinfo_file: 143 | tracerinfo_file = os.path.join(dir_path, "tracerinfo.dat") 144 | if not os.path.exists(tracerinfo_file): 145 | tracerinfo_file = '' 146 | self.tracerinfo_file = tracerinfo_file 147 | if not diaginfo_file: 148 | diaginfo_file = os.path.join(dir_path, "diaginfo.dat") 149 | if not os.path.exists(diaginfo_file): 150 | diaginfo_file = '' 151 | self.diaginfo_file = diaginfo_file 152 | 153 | # Container to record file metadata 154 | self._attributes = OrderedDict() 155 | 156 | # Don't necessarily need to save diag/tracer_dict yet 157 | self.diaginfo_df, _ = get_diaginfo(self.diaginfo_file) 158 | self.tracerinfo_df, _ = get_tracerinfo(self.tracerinfo_file) 159 | 160 | # Container for bundles contained in the output file. 161 | self.var_data = {} 162 | self.var_attrs = {} 163 | 164 | # Critical information for accessing file contents 165 | self._header_pos = None 166 | 167 | # Data loading strategy 168 | self.use_mmap = use_mmap 169 | self.dask_delayed = dask_delayed 170 | 171 | # Control eager versus deferring reading 172 | self.eager = eager 173 | if (mode.startswith('r') and self.eager): 174 | self._read() 175 | 176 | def close(self): 177 | """ Close this bpch file. 178 | 179 | """ 180 | 181 | if not self.fp.closed: 182 | for v in list(self.var_data): 183 | del self.var_data[v] 184 | 185 | self.fp.close() 186 | 187 | def __enter__(self): 188 | return self 189 | 190 | def __exit__(self, type, value, traceback): 191 | self.close() 192 | 193 | def _read(self): 194 | """ Parse the entire bpch file on disk and set up easy access to meta- 195 | and data blocks. 196 | 197 | """ 198 | 199 | self._read_metadata() 200 | self._read_header() 201 | self._read_var_data() 202 | 203 | def _read_metadata(self): 204 | """ Read the main metadata packaged within a bpch file, indicating 205 | the output filetype and its title. 206 | 207 | """ 208 | 209 | filetype = self.fp.readline().strip() 210 | filetitle = self.fp.readline().strip() 211 | # Decode to UTF string, if possible 212 | try: 213 | filetype = str(filetype, 'utf-8') 214 | filetitle = str(filetitle, 'utf-8') 215 | except: 216 | # TODO: Handle this edge-case of converting file metadata more elegantly. 217 | pass 218 | 219 | self.__setattr__('filetype', filetype) 220 | self.__setattr__('filetitle', filetitle) 221 | 222 | def _read_header(self): 223 | """ Process the header information (data model / grid spec) """ 224 | 225 | self._header_pos = self.fp.tell() 226 | 227 | line = self.fp.readline('20sffii') 228 | modelname, res0, res1, halfpolar, center180 = line 229 | self._attributes.update({ 230 | "modelname": str(modelname, 'utf-8').strip(), 231 | "halfpolar": halfpolar, 232 | "center180": center180, 233 | "res": (res0, res1) 234 | }) 235 | self.__setattr__('modelname', modelname) 236 | self.__setattr__('res', (res0, res1)) 237 | self.__setattr__('halfpolar', halfpolar) 238 | self.__setattr__('center180', center180) 239 | 240 | # Re-wind the file 241 | self.fp.seek(self._header_pos) 242 | 243 | 244 | def _read_var_data(self): 245 | """ Iterate over the block of this bpch file and return handlers 246 | in the form of `BPCHDataBundle`s for access to the data contained 247 | therein. 248 | 249 | """ 250 | 251 | var_bundles = OrderedDict() 252 | var_attrs = OrderedDict() 253 | 254 | n_vars = 0 255 | 256 | while self.fp.tell() < self.fsize: 257 | 258 | var_attr = OrderedDict() 259 | 260 | # read first and second header lines 261 | line = self.fp.readline('20sffii') 262 | modelname, res0, res1, halfpolar, center180 = line 263 | 264 | line = self.fp.readline('40si40sdd40s7i') 265 | category_name, number, unit, tau0, tau1, reserved = line[:6] 266 | dim0, dim1, dim2, dim3, dim4, dim5, skip = line[6:] 267 | var_attr['number'] = number 268 | 269 | # Decode byte-strings to utf-8 270 | category_name = str(category_name, 'utf-8') 271 | var_attr['category'] = category_name.strip() 272 | unit = str(unit, 'utf-8') 273 | 274 | # get additional metadata from tracerinfo / diaginfo 275 | try: 276 | cat_df = self.diaginfo_df[ 277 | self.diaginfo_df.name == category_name.strip() 278 | ] 279 | # TODO: Safer logic for handling case where more than one 280 | # tracer metadata match was made 281 | # if len(cat_df > 1): 282 | # raise ValueError( 283 | # "More than one category matching {} found in " 284 | # "diaginfo.dat".format( 285 | # category_name.strip() 286 | # ) 287 | # ) 288 | # Safe now to select the only row in the DataFrame 289 | cat = cat_df.T.squeeze() 290 | 291 | tracer_num = int(cat.offset) + int(number) 292 | diag_df = self.tracerinfo_df[ 293 | self.tracerinfo_df.tracer == tracer_num 294 | ] 295 | # TODO: Safer logic for handling case where more than one 296 | # tracer metadata match was made 297 | # if len(diag_df > 1): 298 | # raise ValueError( 299 | # "More than one tracer matching {:d} found in " 300 | # "tracerinfo.dat".format(tracer_num) 301 | # ) 302 | # Safe now to select only row in the DataFrame 303 | diag = diag_df.T.squeeze() 304 | diag_attr = diag.to_dict() 305 | 306 | if not unit.strip(): # unit may be empty in bpch 307 | unit = diag_attr['unit'] # but not in tracerinfo 308 | var_attr.update(diag_attr) 309 | except: 310 | diag = {'name': '', 'scale': 1} 311 | var_attr.update(diag) 312 | var_attr['unit'] = unit 313 | 314 | vname = diag['name'] 315 | fullname = category_name.strip() + "_" + vname 316 | 317 | # parse metadata, get data or set a data proxy 318 | if dim2 == 1: 319 | data_shape = (dim0, dim1) # 2D field 320 | else: 321 | data_shape = (dim0, dim1, dim2) 322 | var_attr['original_shape'] = data_shape 323 | 324 | # Add proxy time dimension to shape 325 | data_shape = tuple([1, ] + list(data_shape)) 326 | origin = (dim3, dim4, dim5) 327 | var_attr['origin'] = origin 328 | 329 | timelo, timehi = cf.tau2time(tau0), cf.tau2time(tau1) 330 | 331 | pos = self.fp.tell() 332 | # Note that we don't pass a dtype, and assume everything is 333 | # single-fp floats with the correct endian, as hard-coded 334 | var_bundle = BPCHDataBundle( 335 | data_shape, self.endian, self.filename, pos, [timelo, timehi], 336 | metadata=var_attr, 337 | use_mmap=self.use_mmap, dask_delayed=self.dask_delayed 338 | ) 339 | self.fp.skipline() 340 | 341 | # Save the data as a "bundle" for concatenating in the final step 342 | if fullname in var_bundles: 343 | var_bundles[fullname].append(var_bundle) 344 | else: 345 | var_bundles[fullname] = [var_bundle, ] 346 | var_attrs[fullname] = var_attr 347 | n_vars += 1 348 | 349 | self.var_data = var_bundles 350 | self.var_attrs = var_attrs 351 | 352 | 353 | def read_from_bpch(filename, file_position, shape, dtype, endian, 354 | use_mmap=False): 355 | """ Read a chunk of data from a bpch output file. 356 | 357 | Parameters 358 | ---------- 359 | filename : str 360 | Path to file on disk containing the data 361 | file_position : int 362 | Position (bytes) where desired data chunk begins 363 | shape : tuple of ints 364 | Resultant (n-dimensional) shape of requested data; the chunk 365 | will be read sequentially from disk and then re-shaped 366 | dtype : dtype 367 | Dtype of data; for best results, pass a dtype which includes 368 | an endian indicator, e.g. `dtype = np.dtype('>f4')` 369 | endian : str 370 | Endianness of data; should be consistent with `dtype` 371 | use_mmap : bool 372 | Memory map the chunk of data to the file on disk, else read 373 | immediately 374 | 375 | Returns 376 | ------- 377 | Array with shape `shape` and dtype `dtype` containing the requested 378 | chunk of data from `filename`. 379 | 380 | """ 381 | offset = file_position + 4 382 | if use_mmap: 383 | d = np.memmap(filename, dtype=dtype, mode='r', shape=shape, 384 | offset=offset, order='F') 385 | else: 386 | with FortranFile(filename, 'rb', endian) as ff: 387 | ff.seek(file_position) 388 | d = np.array(ff.readline('*f')) 389 | d = d.reshape(shape, order='F') 390 | 391 | # As a sanity check, *be sure* that the resulting data block has the 392 | # correct shape, and fail early if it doesn't. 393 | if (d.shape != shape): 394 | raise IOError("Data chunk read from {} does not have the right shape," 395 | " (expected {} but got {})" 396 | .format(filename, shape, d.shape)) 397 | 398 | return d 399 | -------------------------------------------------------------------------------- /doc/usage.rst: -------------------------------------------------------------------------------- 1 | 2 | Usage and Examples 3 | ================== 4 | 5 | Reading Output 6 | -------------- 7 | 8 | The routines for reading bpch files from disk into ``xarray.Dataset``\s is 9 | based mostly on the ``xarray.open_dataset`` method. However, to handle 10 | some of the idiosyncrasies of GEOS-Chem output, our implementation of 11 | :py:func:`~xbpch.open_bpchdataset` has a few additional arguments to know 12 | about. 13 | 14 | Main :py:func:`~xbpch.open_bpchdataset` Arguments 15 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 16 | 17 | The majority of the time, you'll want to load/read data via xarray, using the 18 | method :py:func:`~xbpch.open_bpchdataset`, as shown in the :ref:`quick start`. 19 | This routine fundamentally requires three arguments: 20 | 21 | - ``filename``: the full path to the output file you want to load 22 | - ``tracerinfo_file``: the full path to the file *tracerinfo.dat*, which 23 | contains the names and indices of each tracer output by GEOS-Chem 24 | - ``diaginfo_file``: the full path to the file *diaginfo.dat*, which contains 25 | the listing of categories and their tracer number offsets in the tracer 26 | output index. 27 | 28 | If you don't pass a value for either ``tracerinfo_file`` or ``diaginfo_file``, 29 | **xbpch** will look for them in the current directory, assuming the Default 30 | naming scheme. However, if it *still* can't find a file, it'll raise an error 31 | (we do not assume to know what is in your output!) 32 | 33 | In many simulations, GEOS-Chem will write multiple timesteps of a large number 34 | of fields to a single output file. This can result in outputs on the order of 35 | 10's of GB! If you know for certain that you only want a specific tracer or 36 | category of tracers, you can supply a list of their names to either ``fields`` 37 | or ``categories``. 38 | 39 | For instance, using the `v11-01 diagnostics `_ 40 | for reference, we can load in any tracer with the name "O3" by passing 41 | 42 | .. ipython:: python 43 | :verbatim: 44 | 45 | import xbpch 46 | o3_data = xbpch.open_bpchdataset("my_data.bpch", fields=['O3', ]) 47 | 48 | Alternatively, we can load all the tracers associated with a given category 49 | by specifying the ``categories`` argument. To grab all the saved 2D meteorology 50 | fields, this would entail 51 | 52 | .. ipython:: python 53 | :verbatim: 54 | 55 | met_data = xbpch.open_bpchdataset( 56 | "my_data.bpch", categories=["DAO-FLDS", ] 57 | ) 58 | 59 | 60 | What Works and Doesn't Work 61 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^ 62 | 63 | **xbpch** should work with most standard GEOS-Chem outputs going back to at 64 | least v9-02. It has been tested against some of these standard outputs, but 65 | not exhaustively. If you have an idiosyncratic GEOS-Chem output (e.g. from a 66 | specialized version of the model with custom tracers or a new grid), please 67 | give **xbpch** a try and if it fails, post a `an Issue on our GitHub page `_ 68 | to let us know. 69 | 70 | The following configurations have been tested and vetted: 71 | 72 | - Standard output on standard grids 73 | - ND49 output on standard grids 74 | - ND49 output on nested North America grid (should work for all nested grids) 75 | 76 | 77 | Eager vs Lazy Loading 78 | ^^^^^^^^^^^^^^^^^^^^^ 79 | 80 | One of the main advantages to using **xbpch** is that it allows you to access 81 | data without immediately need to read it all from disk. On a modern 82 | analysis cluster, this isn't a problem, but if you want to process output 83 | on your laptop, you can quickly run into situations where all of your data 84 | won't fit in memory. In those situations, you have to tediously block your 85 | analysis algorithms/pipeline. 86 | 87 | .. note:: 88 | 89 | Even though you may request lazily-loaded data, **xpbch** still needs 90 | to read your input file to parse its contents. This requires iterating 91 | line-by-line through the input file, so it may take some time (about 92 | ~10 seconds to read a 6GB file on my late-2016 MacBook Pro). 93 | Unfortunately, if we don't do this, we can't infer the tracers or their 94 | distribution over multiple timesteps containined in the input file. 95 | 96 | The keyword arguments ``memmap`` and ``dask`` control how data is read from 97 | your bpch files. 98 | 99 | ``memmap`` 100 | if enabled, the data for each timestep and variable will be 101 | accessed through a memory-map into the input file 102 | ``dask`` 103 | if enabled, the function to read each timestep for each variable 104 | will be wrapped in a ``dask.delayed`` object, initiating a task graph 105 | for accessing the data 106 | 107 | .. warning:: 108 | 109 | Opening a dataset using ``memmap=True`` and ``dask=False`` *will not work*. 110 | Each memory-mapped array counts as an open file, which will quickly add up 111 | and hit your operating system's limit on simultaneously open files. 112 | 113 | If ``dask=True`` is used to open a dataset, then all of the data in the bpch 114 | file is represented by ``dask.array``\s, and all operations are lazy. That is, 115 | they are not evaluated until the user explicitly instruct them to be, and 116 | instead a graph representing your computation is constructed. 117 | 118 | 119 | Chunking 120 | ^^^^^^^^ 121 | 122 | When data is loaded with the ``dask`` flag enabled, all the operations 123 | necessary to create contiguous chunks of data are deferred. Because of the way 124 | data is written to bpch files by GEOS-Chem, these deferred actions are all 125 | based on single timesteps of data for each variable by default. Thus, in the 126 | parlance of dask, all the data is implicitly chunked on the **time** dimension. 127 | 128 | When dask encounters chunked calculations, it will automatically attempt 129 | to parallelize them across all the cores available on your machine, and will 130 | attempt to limit the amount of data held in-memory at any give time. 131 | 132 | To illustrate this, consider a monthly history dataset ``ds`` loaded via 133 | :py:func:`~xbpch.open_bpchdataset`. The inital task graph representing this 134 | data may look something like: 135 | 136 | .. figure:: dask_graphs/sample_read.png 137 | :scale: 100% 138 | :alt: Monthly history dask read/concat graph 139 | 140 | Tasks for reading and processing monthly output for a single variable in 141 | a year-long bpch output file 142 | 143 | This graph illustrates that dask is expected to process 12 chunks of data - one 144 | for each month (timestep) in the dataset. The graph shows the operations for 145 | reading the data, casting it to the correct data type, and re-scaling, which are 146 | applied automatically by **xbpch** and xarray. 147 | 148 | At this point, the data has only been processed in such a way that it fits 149 | the numpy.ndarray memory model, and thus can be used to construct xarray 150 | objects. A trivial calculation on this data may be to normalize the timeseries 151 | of data in each grid cell to have zero mean and unit variance. For any 152 | ``xarray.DataArray`` we could write this operation as 153 | 154 | .. ipython:: python 155 | :verbatim: 156 | 157 | da_normal = (da - da.mean('time'))/da.std('time') 158 | 159 | which produces the computational graph 160 | 161 | .. figure:: dask_graphs/sample_normalized.png 162 | :scale: 100% 163 | :alt: Normalization calculation on monthly data 164 | 165 | Computational graph for normalizing monthly data 166 | 167 | A second key function of ``dask`` is to analyze and parse these computational 168 | graphs into a simplified form. In practice, the resulting graph will be 169 | much simpler, which can dramatically speed up your analysis. For instance, if 170 | you sub-sample the variables and timesteps used in your analysis, **xbpch** 171 | (through dask) will avoid reading extra, unused data from the input files you passed 172 | it. 173 | 174 | .. note:: 175 | 176 | Sometimes it's advantagous to re-chunk a dataset (see 177 | `here `_ for a discussion on 178 | when this may be the case). This is easily accomplished through xarray, or 179 | can be done directly on the ``dask.array``\s containing your data if you 180 | have a more complex analysis to perform. 181 | 182 | 183 | Finally, it's important to know that the computational graphs that dask 184 | produces are never evaluated until you explicitly call ``.load()`` on a dask 185 | array or xarray Data{Array,set}. Different computations or uses for your data 186 | might imply an automatic ``load()``; for instance, if you use the plotting 187 | wrapper built into xarray, it will (necessarily) eagerly load your data. If you'd 188 | like to monitor the progress of a very long analysis built through 189 | **xbpch**/xarray/dask, you can use the built-in diagnostic tools from dask: 190 | 191 | .. ipython:: python 192 | :verbatim: 193 | 194 | from dask.diagnostics import ProgressBar 195 | 196 | # Construct some analysis 197 | my_ds = ... 198 | 199 | # Eagerly compute the results 200 | with ProgressBar() as pb: 201 | my_ds.load() 202 | 203 | .. parsed-literal:: 204 | [####################################] | 100% Completed | 10.2s 205 | 206 | Geographic Visualization 207 | ------------------------ 208 | 209 | One easy application of **xbpch** is for the visualization of your data. 210 | For cartographic or geographic plots, we recommend using the cartopy_ package 211 | maintained by the UK Met Office. 212 | 213 | Plotting on a cartopy_ map is straightforward. Suppose we have a Dataset ``ds`` 214 | read from a bpch file. We can first compute an analysis of interest - say, 215 | the difference between mean fields for summer versus winter: 216 | 217 | .. ipython:: python 218 | :verbatim: 219 | 220 | ds_seas = ds.groupby("time.season").mean('time') 221 | diff = ds_seas.sel(season='DJF') - ds_seas.sel(season='JJA') 222 | 223 | .. parsed-literal:: 224 | 225 | 226 | Dimensions: (lat: 91, lev: 47, lon: 144, nv: 2) 227 | Coordinates: 228 | * lev (lev) float64 0.9925 0.9775 0.9624 0.9473 0.9322 0.9171 ... 229 | * lon (lon) float64 -180.0 -177.5 -175.0 -172.5 -170.0 -167.5 ... 230 | * lat (lat) float64 -89.5 -88.0 -86.0 -84.0 -82.0 -80.0 -78.0 ... 231 | * nv (nv) int64 0 1 232 | Data variables: 233 | ANTHSRCE_O3 (lon, lat) float32 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 234 | IJ_AVG_S_O3 (lon, lat, lev) float32 -23.1014 -23.2715 -23.4614 -23.5216 ... 235 | 236 | Plotting a portion of this dataset on a cartopy_ map is straightforward. First, 237 | we create a figure and add an axes with the map projection information 238 | encoded: 239 | 240 | .. ipython:: python 241 | :verbatim: 242 | 243 | import matplotlib.pyplot as plt 244 | import cartopy.crs as ccrs 245 | 246 | fig = plt.figure() 247 | ax = fig.add_subplot(111, projection=ccrs.PlateCarree(), aspect='auto') 248 | 249 | Then, we can plot our data as normal. cartopy_ has a few helper functions which 250 | we can use to add basic geographic elements such as coastlines and borders to 251 | the plot. 252 | 253 | .. ipython:: python 254 | :verbatim: 255 | 256 | import cartopy.feature as cfeature 257 | 258 | # Select some data to plot 259 | da = diff.isel(lev=0).IJ_AVG_S_O3 260 | 261 | im = ax.contourf(da.lon.values, da.lat.values, da.values.T) 262 | cb = fig.colorbar(im, ax=ax, orientation='horizontal') 263 | ax.add_feature(cfeature.COASTLINE) 264 | ax.add_feature(cfeature.BORDERS) 265 | 266 | .. figure:: example_plots/cartopy_example.png 267 | :scale: 100% 268 | :alt: cartopy plot 269 | 270 | Example of a simple plot with cartopy_ 271 | 272 | Alternatively, we can use `xarray's matplotlib wrappers `_ 273 | to automate some of this plotting for us. For instance, we can quickly make 274 | a faceted plot of our seasonal data (including with a cartopy_ axis) with 275 | just a few lines of code: 276 | 277 | .. ipython:: python 278 | :verbatim: 279 | 280 | # Select some data to plot 281 | da = ds_seas.isel(lev=0).IJ_AVG_S_O3 282 | da = da - ds.isel(lev=0).IJ_AVG_S_O3.mean('time') 283 | 284 | g = da.plot.imshow('lon', 'lat', col='season', col_wrap=2, 285 | subplot_kws=dict(projection=ccrs.Robinson()), transform=ccrs.PlateCarree()) 286 | for ax in g.axes.flatten(): 287 | ax.add_feature(cfeature.COASTLINE) 288 | 289 | .. figure:: example_plots/cartopy_seasonal_facet.png 290 | :scale: 100% 291 | :alt: cartopy plot 292 | 293 | Faceting over a non-coordinate dimension using xarray's built-in plotting 294 | tools. 295 | 296 | There's a lot going on in this code sample: 297 | 298 | 1. First, we take the seasonal mean data we formerly computed. 299 | 2. Subtract out the annual mean from each seasonal mean. 300 | 3. Use `imshow `_ 301 | to plot each grid cell in our dataset. 302 | 303 | - We tell the plotting function to use ``"lon"`` and ```"lat"`` as the keys 304 | to access the x/y data for the dataset 305 | - We further instruct xarray to facet over the ```"season"`` coordinate, and 306 | include two columns per row in the resulting facet grid 307 | - We pass a dictionary of keyword arguments to ``subplot_kws``, which is used 308 | when creating each subplot in our facet grid. In this case, we tell each 309 | subplot to use a Robinson map projection 310 | - We pass a final keyword argument, ``transform``, which is passed to each 311 | invocation of ``imshow()`` on the facet grid; this tells cartopy_ how to 312 | map from the projection data to our actual data. Here, a ``ccrs.PlateCarree()`` 313 | is a standard, equally-spaced latitude-longitude grid 314 | 4. Iterate over each axis in the facet grid, and add our coastlines to it. 315 | 316 | .. _cartopy: http://scitools.org.uk/cartopy/docs/v0.13/index.html 317 | 318 | 319 | Timeseries Analysis 320 | ------------------- 321 | 322 | Another application that **xbpch**/xarray makes easy is timeseries analysis. 323 | For example, consider the timesries of ND49 output from the :ref:`quick start`. 324 | A classic timeseries analysis atmospheric chemistry is computing the daily 325 | maximum 8-hour average for a given tracer. The core of this computation can be 326 | achieved in just a few lines of code via xarray: 327 | 328 | .. ipython:: python 329 | :verbatim: 330 | 331 | o3 = ds.IJ_AVG_S_O3 332 | mda8_o3 = ( 333 | o3.rolling(time=8, min_periods=6).mean() 334 | .resample("D", "time", how='max') 335 | ) 336 | 337 | This code is highly performant; the ``.rolling()`` operation is farmed out to 338 | a high-performance C library (`bottleneck `_) 339 | and all operations are applied by broadcasting over the time dimension. 340 | 341 | .. note:: 342 | 343 | bottleneck does not work with dask arrays, so you will need to eagerly 344 | ``.load()`` the data into memory if it hasn't already been done. Future 345 | versions of xarray will wrap functionality in dask to perform these 346 | operations in parallel, but this is a work in progress. 347 | 348 | 349 | Save to NetCDF 350 | -------------- 351 | 352 | Without any extra work, datsets read in via **xbpch** can easily be serialized 353 | back to disk in NetCDF format 354 | 355 | .. ipython:: python 356 | :verbatim: 357 | 358 | ds.to_netcdf("my_bpch_data.nc") 359 | 360 | They can then be read back in via xarray 361 | 362 | .. ipython:: python 363 | :verbatim: 364 | 365 | import xarray as xr 366 | ds = xr.open_dataset("my_bpch_data.nc") 367 | 368 | .. note:: 369 | 370 | As of v0.2.0, immediately writing to netcdf may not work due to the way variable 371 | units and scaling factors are encoded when they are read into **xbpch**. This 372 | will be fixed once some upstream issues with xarray are patched. If you run into 373 | the following ``ValueError``:: 374 | 375 | ValueError: Failed hard to prevent overwriting key 'scale_factor' 376 | 377 | then before you save it, process it with the :meth:`xbpch.common.fix_attr_encoding()` 378 | method 379 | 380 | .. ipython:: python 381 | :verbatim: 382 | 383 | my_ds = xbpch.common.fix_attr_encoding(my_ds) 384 | 385 | my_ds.to_netcdf("my_data.nc") 386 | -------------------------------------------------------------------------------- /xbpch/core.py: -------------------------------------------------------------------------------- 1 | """ 2 | API for reading BPCH files via xarray 3 | 4 | """ 5 | from __future__ import print_function, division 6 | 7 | from glob import glob 8 | import os 9 | import numpy as np 10 | import xarray as xr 11 | import warnings 12 | 13 | import dask.array as da 14 | 15 | from collections import OrderedDict 16 | 17 | from xarray.backends.common import AbstractDataStore 18 | from xarray.core.utils import Frozen 19 | 20 | from . bpch import BPCHFile 21 | from . common import get_timestamp 22 | from . grid import BASE_DIMENSIONS, CTMGrid 23 | from . util import cf 24 | from . version import __version__ as ver 25 | 26 | 27 | def open_bpchdataset(filename, fields=[], categories=[], 28 | tracerinfo_file='tracerinfo.dat', 29 | diaginfo_file='diaginfo.dat', 30 | endian=">", decode_cf=True, 31 | memmap=True, dask=True, return_store=False): 32 | """ Open a GEOS-Chem BPCH file output as an xarray Dataset. 33 | 34 | Parameters 35 | ---------- 36 | filename : string 37 | Path to the output file to read in. 38 | {tracerinfo,diaginfo}_file : string, optional 39 | Path to the metadata "info" .dat files which are used to decipher 40 | the metadata corresponding to each variable in the output dataset. 41 | If not provided, will look for them in the current directory or 42 | fall back on a generic set. 43 | fields : list, optional 44 | List of a subset of variable names to return. This can substantially 45 | improve read performance. Note that the field here is just the tracer 46 | name - not the category, e.g. 'O3' instead of 'IJ-AVG-$_O3'. 47 | categories : list, optional 48 | List a subset of variable categories to look through. This can 49 | substantially improve read performance. 50 | endian : {'=', '>', '<'}, optional 51 | Endianness of file on disk. By default, "big endian" (">") is assumed. 52 | decode_cf : bool 53 | Enforce CF conventions for variable names, units, and other metadata 54 | default_dtype : numpy.dtype, optional 55 | Default datatype for variables encoded in file on disk (single-precision 56 | float by default). 57 | memmap : bool 58 | Flag indicating that data should be memory-mapped from disk instead of 59 | eagerly loaded into memory 60 | dask : bool 61 | Flag indicating that data reading should be deferred (delayed) to 62 | construct a task-graph for later execution 63 | return_store : bool 64 | Also return the underlying DataStore to the user 65 | 66 | Returns 67 | ------- 68 | ds : xarray.Dataset 69 | Dataset containing the requested fields (or the entire file), with data 70 | contained in proxy containers for access later. 71 | store : xarray.AbstractDataStore 72 | Underlying DataStore which handles the loading and processing of 73 | bpch files on disk 74 | 75 | """ 76 | 77 | store = BPCHDataStore( 78 | filename, fields=fields, categories=categories, 79 | tracerinfo_file=tracerinfo_file, 80 | diaginfo_file=diaginfo_file, endian=endian, 81 | use_mmap=memmap, dask_delayed=dask 82 | ) 83 | ds = xr.Dataset.load_store(store) 84 | 85 | # Handle CF corrections 86 | if decode_cf: 87 | decoded_vars = OrderedDict() 88 | rename_dict = {} 89 | for v in ds.variables: 90 | cf_name = cf.get_valid_varname(v) 91 | rename_dict[v] = cf_name 92 | new_var = cf.enforce_cf_variable(ds[v]) 93 | decoded_vars[cf_name] = new_var 94 | ds = xr.Dataset(decoded_vars, attrs=ds.attrs.copy()) 95 | 96 | # ds.rename(rename_dict, inplace=True) 97 | 98 | # TODO: There's a bug with xr.decode_cf which eagerly loads data. 99 | # Re-enable this once that bug is fixed 100 | # Note that we do not need to decode the times because we explicitly 101 | # kept track of them as we parsed the data. 102 | # ds = xr.decode_cf(ds, decode_times=False) 103 | 104 | # Set attributes for CF conventions 105 | ts = get_timestamp() 106 | ds.attrs.update(dict( 107 | Conventions='CF1.6', 108 | source=filename, 109 | tracerinfo=tracerinfo_file, 110 | diaginfo=diaginfo_file, 111 | filetype=store._bpch.filetype, 112 | filetitle=store._bpch.filetitle, 113 | history=( 114 | "{}: Processed/loaded by xbpch-{} from {}" 115 | .format(ts, ver, filename) 116 | ), 117 | )) 118 | 119 | # # Record what the file object underlying the store which we culled this 120 | # # Dataset from is so that we can clean it up later 121 | # ds._file_obj = store._bpch 122 | try: 123 | # xarray 0.17 + 124 | ds.set_close(store.close) 125 | except AttributeError: 126 | ds._file_obj = store._bpch 127 | 128 | # To immediately load the data from the BPCHDataProxy paylods, need 129 | # to execute ds.data_vars for some reason... 130 | if return_store: 131 | return ds, store 132 | else: 133 | return ds 134 | 135 | 136 | def open_mfbpchdataset(paths, concat_dim='time', compat='no_conflicts', 137 | preprocess=None, lock=None, **kwargs): 138 | """ Open multiple bpch files as a single dataset. 139 | 140 | You must have dask installed for this to work, as this greatly 141 | simplifies issues relating to multi-file I/O. 142 | 143 | Also, please note that this is not a very performant routine. I/O is still 144 | limited by the fact that we need to manually scan/read through each bpch 145 | file so that we can figure out what its contents are, since that metadata 146 | isn't saved anywhere. So this routine will actually sequentially load 147 | Datasets for each bpch file, then concatenate them along the "time" axis. 148 | You may wish to simply process each file individually, coerce to NetCDF, 149 | and then ingest through xarray as normal. 150 | 151 | Parameters 152 | ---------- 153 | paths : list of strs 154 | Filenames to load; order doesn't matter as they will be 155 | lexicographically sorted before we read in the data 156 | concat_dim : str, default='time' 157 | Dimension to concatenate Datasets over. We default to "time" since this 158 | is how GEOS-Chem splits output files 159 | compat : str (optional) 160 | String indicating how to compare variables of the same name for 161 | potential conflicts when merging: 162 | 163 | - 'broadcast_equals': all values must be equal when variables are 164 | broadcast against each other to ensure common dimensions. 165 | - 'equals': all values and dimensions must be the same. 166 | - 'identical': all values, dimensions and attributes must be the 167 | same. 168 | - 'no_conflicts': only values which are not null in both datasets 169 | must be equal. The returned dataset then contains the combination 170 | of all non-null values. 171 | preprocess : callable (optional) 172 | A pre-processing function to apply to each Dataset prior to 173 | concatenation 174 | lock : False, True, or threading.Lock (optional) 175 | Passed to :py:func:`dask.array.from_array`. By default, xarray 176 | employs a per-variable lock when reading data from NetCDF files, 177 | but this model has not yet been extended or implemented for bpch files 178 | and so this is not actually used. However, it is likely necessary 179 | before dask's multi-threaded backend can be used 180 | **kwargs : optional 181 | Additional arguments to pass to :py:func:`xbpch.open_bpchdataset`. 182 | 183 | """ 184 | try: 185 | from xarray.backends.api import _MultiFileCloser 186 | except ImportError: 187 | pass 188 | 189 | # TODO: Include file locks? 190 | 191 | # Check for dask 192 | dask = kwargs.pop('dask', False) 193 | if not dask: 194 | raise ValueError("Reading multiple files without dask is not supported") 195 | kwargs['dask'] = True 196 | 197 | # Add th 198 | 199 | if isinstance(paths, str): 200 | paths = sorted(glob(paths)) 201 | if not paths: 202 | raise IOError("No paths to files were passed into open_mfbpchdataset") 203 | 204 | datasets = [open_bpchdataset(filename, **kwargs) 205 | for filename in paths] 206 | 207 | if preprocess is not None: 208 | datasets = [preprocess(ds) for ds in datasets] 209 | 210 | # Concatenate over time 211 | combined = xr.combine_nested(datasets, compat=compat, concat_dim=concat_dim) 212 | 213 | try: 214 | # xarray 0.17 + 215 | combined.set_close(lambda : [ds.close() for ds in datasets]) 216 | except AttributeError: 217 | combined._file_obj = _MultiFileCloser([ds._file_obj for ds in datasets]) 218 | 219 | combined.attrs = datasets[0].attrs 220 | ts = get_timestamp() 221 | fns_str = " ".join(paths) 222 | combined.attrs['history'] = ( 223 | "{}: Processed/loaded by xbpch-{} from {}" 224 | .format(ts, ver, fns_str) 225 | ) 226 | 227 | return combined 228 | 229 | 230 | class BPCHDataStore(AbstractDataStore): 231 | """ Store for reading data from binary punch files. 232 | 233 | Note that this is intended as a backend only; to open and read a given 234 | bpch file, use :meth:`open_bpchdataset`. 235 | 236 | Examples of other extensions using the core DataStore API can be found at: 237 | 238 | - https://github.com/pydata/xarray/blob/master/xarray/conventions.py 239 | - https://github.com/xgcm/xmitgcm/blob/master/xmitgcm/mds_store.py 240 | 241 | """ 242 | 243 | def __init__(self, filename, fields=[], categories=[], fix_cf=False, 244 | mode='r', endian='>', 245 | diaginfo_file='', tracerinfo_file='', 246 | use_mmap=False, dask_delayed=False): 247 | 248 | # Track the metadata accompanying this dataset. 249 | dir_path = os.path.abspath(os.path.dirname(filename)) 250 | if not dir_path: 251 | dir_path = os.getcwd() 252 | if not tracerinfo_file: 253 | tracerinfo_file = os.path.join(dir_path, 'tracerinfo.dat') 254 | if not os.path.exists(tracerinfo_file): 255 | tracerinfo_file = '' 256 | self.tracerinfo_file = tracerinfo_file 257 | if not diaginfo_file: 258 | diaginfo_file = os.path.join(dir_path, 'diaginfo.dat') 259 | if not os.path.exists(diaginfo_file): 260 | diaginfo_file = '' 261 | self.diaginfo_file = diaginfo_file 262 | 263 | self.filename = filename 264 | self.fsize = os.path.getsize(self.filename) 265 | self.mode = mode 266 | if not mode.startswith('r'): 267 | raise ValueError("Currently only know how to 'r(b)'ead bpch files.") 268 | 269 | # Check endianness flag 270 | if endian not in ['>', '<', '=']: 271 | raise ValueError("Invalid byte order (endian={})".format(endian)) 272 | self.endian = endian 273 | 274 | # Open the raw output file, but don't yet read all the data 275 | self._mmap = use_mmap 276 | self._dask = dask_delayed 277 | self._bpch = BPCHFile(self.filename, self.mode, self.endian, 278 | tracerinfo_file=tracerinfo_file, 279 | diaginfo_file=diaginfo_file, 280 | eager=False, use_mmap=self._mmap, 281 | dask_delayed=self._dask) 282 | self.fields = fields 283 | self.categories = categories 284 | 285 | # Peek into the raw output file and read the header and metadata 286 | # so that we can get a head start at building the output grid 287 | self._bpch._read_metadata() 288 | self._bpch._read_header() 289 | 290 | # Parse the binary file and prepare to add variables to the DataStore 291 | self._bpch._read_var_data() 292 | 293 | # Create storage dicts for variables and attributes, to be used later 294 | # when xarray needs to access the data 295 | self._variables = OrderedDict() 296 | self._attributes = OrderedDict() 297 | self._attributes.update(self._bpch._attributes) 298 | self._dimensions = [d for d in BASE_DIMENSIONS] 299 | 300 | # Begin constructing the coordinate dimensions shared by the 301 | # output dataset variables 302 | dim_coords = {} 303 | self.ctm_info = CTMGrid.from_model( 304 | self._attributes['modelname'], resolution=self._attributes['res'] 305 | ) 306 | 307 | # Add vertical dimensions 308 | self._dimensions.append( 309 | dict(dims=['lev', ], attrs={'axis': 'Z'}) 310 | ) 311 | self._dimensions.append( 312 | dict(dims=['lev_trop', ], attrs={'axis': 'Z'}) 313 | ) 314 | self._dimensions.append( 315 | dict(dims=['lev_edge', ], attrs={'axis': 'Z'}) 316 | ) 317 | eta_centers = self.ctm_info.eta_centers 318 | sigma_centers = self.ctm_info.sigma_centers 319 | 320 | # Add time dimensions 321 | self._dimensions.append( 322 | dict(dims=['time', ], attrs={'axis': 'T', 'long_name': 'time', 323 | 'standard_name': 'time'}) 324 | ) 325 | 326 | # Add lat/lon dimensions 327 | self._dimensions.append( 328 | dict(dims=['lon', ], attrs={ 329 | 'axis': 'X', 'long_name': 'longitude coordinate', 330 | 'standard_name': 'longitude' 331 | }) 332 | ) 333 | self._dimensions.append( 334 | dict(dims=['lat', ], attrs={ 335 | 'axis': 'y', 'long_name': 'latitude coordinate', 336 | 'standard_name': 'latitude' 337 | }) 338 | ) 339 | 340 | if eta_centers is not None: 341 | lev_vals = eta_centers 342 | lev_attrs = { 343 | 'standard_name': 'atmosphere_hybrid_sigma_pressure_coordinate', 344 | 'axis': 'Z' 345 | } 346 | else: 347 | lev_vals = sigma_centers 348 | lev_attrs = { 349 | 'standard_name': 'atmosphere_hybrid_sigma_pressure_coordinate', 350 | 'axis': 'Z' 351 | } 352 | self._variables['lev'] = xr.Variable(['lev', ], lev_vals, lev_attrs) 353 | 354 | ## Latitude / Longitude 355 | # TODO: Add lon/lat bounds 356 | 357 | # Detect if we're on a nested grid; in that case, we'll have a displaced 358 | # origin set in the variable attributes we previously read 359 | ref_key = list(self._bpch.var_attrs.keys())[0] 360 | ref_attrs = self._bpch.var_attrs[ref_key] 361 | self.is_nested = (ref_attrs['origin'] != (1, 1, 1)) 362 | 363 | lon_centers = self.ctm_info.lon_centers 364 | lat_centers = self.ctm_info.lat_centers 365 | 366 | if self.is_nested: 367 | ix, iy, _ = ref_attrs['origin'] 368 | nx, ny, *_ = ref_attrs['original_shape'] 369 | # Correct i{x,y} for IDL->Python indexing (1-indexed -> 0-indexed) 370 | ix -= 1 371 | iy -= 1 372 | lon_centers = lon_centers[ix:ix+nx] 373 | lat_centers = lat_centers[iy:iy+ny] 374 | 375 | self._variables['lon'] = xr.Variable( 376 | ['lon'], lon_centers, 377 | {'long_name': 'longitude', 'units': 'degrees_east'} 378 | ) 379 | self._variables['lat'] = xr.Variable( 380 | ['lat'], lat_centers, 381 | {'long_name': 'latitude', 'units': 'degrees_north'} 382 | ) 383 | # TODO: Fix longitudes if ctm_grid.center180 384 | 385 | # Add variables from the parsed BPCH file to our DataStore 386 | for vname in list(self._bpch.var_data.keys()): 387 | 388 | var_data = self._bpch.var_data[vname] 389 | var_attr = self._bpch.var_attrs[vname] 390 | 391 | if fields and (var_attr['name'] not in fields): 392 | continue 393 | if categories and (var_attr['category'] not in categories): 394 | continue 395 | 396 | # Process dimensions 397 | dims = ['time', 'lon', 'lat', ] 398 | dshape = var_attr['original_shape'] 399 | if len(dshape) == 3: 400 | # Process the vertical coordinate. A few things can happen here: 401 | # 1) We have cell-centered values on the "Nlayer" grid; we can take these variables and map them to 'lev' 402 | # 2) We have edge value on an "Nlayer" + 1 grid; we can take these and use them with 'lev_edge' 403 | # 3) We have troposphere values on "Ntrop"; we can take these and use them with 'lev_trop', but we won't have coordinate information yet 404 | # All other cases we do not handle yet; this includes the aircraft emissions and a few other things. Note that tracer sources do not have a vertical coord to worry about! 405 | nlev = dshape[-1] 406 | grid_nlev = self.ctm_info.Nlayers 407 | grid_ntrop = self.ctm_info.Ntrop 408 | try: 409 | if nlev == grid_nlev: 410 | dims.append('lev') 411 | elif nlev == grid_nlev + 1: 412 | dims.append('lev_edge') 413 | elif nlev == grid_ntrop: 414 | dims.append('lev_trop') 415 | else: 416 | continue 417 | except AttributeError: 418 | warnings.warn("Couldn't resolve grid_spec vertical layout") 419 | continue 420 | 421 | # xarray Variables are thin wrappers for numpy.ndarrays, or really 422 | # any object that extends the ndarray interface. A critical part of 423 | # the original ndarray interface is that the underlying data has to 424 | # be contiguous in memory. We can enforce this to happen by 425 | # concatenating each bundle in the variable data bundles we read 426 | # from the bpch file 427 | data = self._concat([v.data for v in var_data]) 428 | 429 | # Is the variable time-invariant? If it is, kill the time dim. 430 | # Here, we mean it only as one sample in the dataset. 431 | if data.shape[0] == 1: 432 | dims = dims[1:] 433 | data = data.squeeze() 434 | 435 | # Create a variable containing this data 436 | var = xr.Variable(dims, data, var_attr) 437 | 438 | # Shuffle dims for CF/COARDS compliance if requested 439 | # TODO: For this to work, we have to force a load of the data. 440 | # Is there a way to re-write BPCHDataProxy so that that's not 441 | # necessary? 442 | # Actually, we can't even force a load becase var.data is a 443 | # numpy.ndarray. Weird. 444 | # if fix_dims: 445 | # target_dims = [d for d in DIM_ORDER_PRIORITY if d in dims] 446 | # var = var.transpose(*target_dims) 447 | 448 | self._variables[vname] = var 449 | 450 | # Try to add a time dimension 451 | # TODO: Time units? 452 | if (len(var_data) > 1) and 'time' not in self._variables: 453 | time_bnds = np.asarray([v.time for v in var_data]) 454 | times = time_bnds[:, 0] 455 | 456 | self._variables['time'] = xr.Variable( 457 | ['time', ], times, 458 | {'bounds': 'time_bnds', 'units': cf.CTM_TIME_UNIT_STR} 459 | ) 460 | self._variables['time_bnds'] = xr.Variable( 461 | ['time', 'nv'], time_bnds, 462 | {'units': cf.CTM_TIME_UNIT_STR} 463 | ) 464 | self._variables['nv'] = xr.Variable(['nv', ], [0, 1]) 465 | 466 | # Create the dimension variables; we have a lot of options 467 | # here with regards to the vertical coordinate. For now, 468 | # we'll just use the sigma or eta coordinates. 469 | # Useful CF info: http://cfconventions.org/cf-conventions/v1.6.0/cf-conventions.html#_atmosphere_hybrid_sigma_pressure_coordinate 470 | # self._variables['Ap'] = 471 | # self._variables['Bp'] = 472 | # self._variables['altitude'] = 473 | 474 | # Time dimensions 475 | # self._times = self.ds.times 476 | # self._time_bnds = self.ds.time_bnds 477 | 478 | 479 | def _concat(self, *args, **kwargs): 480 | if self._dask: 481 | return da.concatenate(*args, **kwargs) 482 | else: 483 | return np.concatenate(*args, **kwargs) 484 | 485 | def get_variables(self): 486 | return self._variables 487 | 488 | def get_attrs(self): 489 | return Frozen(self._attributes) 490 | 491 | def get_dimensions(self): 492 | return Frozen(self._dimensions) 493 | 494 | def close(self): 495 | self._bpch.close() 496 | for var in list(self._variables): 497 | del self._variables[var] 498 | 499 | def __exit__(self, type, value, traceback): 500 | self.close() 501 | -------------------------------------------------------------------------------- /xbpch/util/gridspec.py: -------------------------------------------------------------------------------- 1 | """ 2 | Specification of various grid models used by GEOS-Chem (GEOS, MERRA, etc...). 3 | 4 | `MODELS` defines the default grid set-up (dict) for several models: 5 | - Model names (keys) should be uppercase. 6 | - A model can inherit grid specifications from another model, using the 7 | key 'reference' (useful for model groups, similar models or 8 | multiple model names). 9 | - A model consists a family of models if the value 10 | of 'reference' is set to None (e.g., 'GEOS'). 11 | - When a key is redefined (i.e., present in both a model and its inherited 12 | model), it overrides the specification of the inherited model. 13 | 14 | It is more appropriate to get model names and grid specifications using 15 | :func:`_get_supported_models` and :func:`_get_model_info`. 16 | 17 | `CSIG_`, `ESIG_`, `Ap_` and `Bp_` can be used to compute the vertical grid 18 | levels for the specified models. 19 | 20 | This implementation is based on Benoit Bovy's PyGChem code, and uses his 21 | hand-curated grid level definitions as a starting point. See PyGChem/license.txt 22 | for more details on the provenance of this code. 23 | 24 | """ 25 | 26 | import re 27 | import itertools 28 | 29 | import numpy as np 30 | 31 | # pre-defined sigma coordinates 32 | CSIG_GEOS1 = np.array([ 33 | 0.993936, 0.971301, 0.929925, 0.874137, 0.807833, 34 | 0.734480, 0.657114, 0.578390, 0.500500, 0.424750, 35 | 0.352000, 0.283750, 0.222750, 0.172150, 0.132200, 36 | 0.100050, 0.073000, 0.049750, 0.029000, 0.009500 37 | ]) 38 | 39 | ESIG_GEOS1 = np.array([ 40 | 1.000000, 0.987871, 0.954730, 0.905120, 0.843153, 41 | 0.772512, 0.696448, 0.617779, 0.539000, 0.462000, 42 | 0.387500, 0.316500, 0.251000, 0.194500, 0.149800, 43 | 0.114600, 0.085500, 0.060500, 0.039000, 0.019000, 44 | 0.000000 45 | ]) 46 | 47 | CSIG_GEOS_STRAT = np.array([ 48 | 0.993935, 0.971300, 0.929925, 0.875060, 0.812500, 49 | 0.745000, 0.674500, 0.604500, 0.536500, 0.471500, 50 | 0.410000, 0.352500, 0.301500, 0.257977, 0.220273, 51 | 0.187044, 0.157881, 0.132807, 0.111722, 0.094035, 52 | 0.079233, 0.066873, 0.056574, 0.044794, 0.028825, 53 | 0.009979 54 | ]) 55 | 56 | ESIG_GEOS_STRAT = np.array([ 57 | 1.000000, 0.987871, 0.954730, 0.905120, 0.845000, 58 | 0.780000, 0.710000, 0.639000, 0.570000, 0.503000, 59 | 0.440000, 0.380000, 0.325000, 0.278000, 0.237954, 60 | 0.202593, 0.171495, 0.144267, 0.121347, 0.102098, 61 | 0.085972, 0.072493, 0.061252, 0.051896, 0.037692, 62 | 0.019958, 0.000000 63 | ]) 64 | 65 | CSIG_GEOS_STRAT_46L = np.array([ 66 | 0.993935, 0.971300, 0.929925, 0.875060, 0.812500, 67 | 0.745000, 0.674500, 0.604500, 0.536500, 0.471500, 68 | 0.410000, 0.352500, 0.301500, 0.257977, 0.220273, 69 | 0.187044, 0.157881, 0.132807, 0.111722, 0.094035, 70 | 0.079233, 0.066873, 0.056574, 0.048012, 0.040910, 71 | 0.034927, 0.029792, 0.025395, 0.021663, 0.018439, 72 | 0.015571, 0.013036, 0.010808, 0.008864, 0.007181, 73 | 0.005737, 0.004510, 0.003480, 0.002625, 0.001928, 74 | 0.001369, 0.000929, 0.000593, 0.000344, 0.000167, 75 | 0.000047 76 | ]) 77 | 78 | ESIG_GEOS_STRAT_46L = np.array([ 79 | 1.000000, 0.987871, 0.954730, 0.905120, 0.845000, 80 | 0.780000, 0.710000, 0.639000, 0.570000, 0.503000, 81 | 0.440000, 0.380000, 0.325000, 0.278000, 0.237954, 82 | 0.202593, 0.171495, 0.144267, 0.121347, 0.102098, 83 | 0.085972, 0.072493, 0.061252, 0.051896, 0.044128, 84 | 0.037692, 0.032162, 0.027422, 0.023367, 0.019958, 85 | 0.016919, 0.014223, 0.011848, 0.009767, 0.007960, 86 | 0.006402, 0.005072, 0.003948, 0.003011, 0.002240, 87 | 0.001616, 0.001121, 0.000737, 0.000449, 0.000239, 88 | 0.000094, 0.000000 89 | ]) 90 | 91 | CSIG_GEOS2 = np.array([ 92 | 9.985475e-01, 9.942475e-01, 9.871500e-01, 9.772000e-01, 93 | 9.642500e-01, 9.481150e-01, 9.285650e-01, 9.053219e-01, 94 | 8.781569e-01, 8.469350e-01, 8.116350e-01, 7.724569e-01, 95 | 7.299198e-01, 6.847475e-01, 6.377244e-01, 5.896341e-01, 96 | 5.412270e-01, 4.932176e-01, 4.462150e-01, 4.007400e-01, 97 | 3.572600e-01, 3.161750e-01, 2.779150e-01, 2.429000e-01, 98 | 2.114000e-01, 1.834250e-01, 1.587150e-01, 1.369425e-01, 99 | 1.178165e-01, 1.010651e-01, 8.644427e-02, 7.372377e-02, 100 | 6.269240e-02, 5.314686e-02, 4.489815e-02, 3.779315e-02, 101 | 3.171021e-02, 2.329529e-02, 1.512403e-02, 9.817761e-03, 102 | 6.371968e-03, 4.134332e-03, 2.681253e-03, 1.737650e-03, 103 | 1.124892e-03, 7.269780e-04, 6.706442e-05 104 | ]) 105 | 106 | ESIG_GEOS2 = np.array([ 107 | 1.000000e+00, 9.970951e-01, 9.914000e-01, 9.829000e-01, 108 | 9.715000e-01, 9.570000e-01, 9.392300e-01, 9.179000e-01, 109 | 8.927438e-01, 8.635700e-01, 8.303000e-01, 7.929700e-01, 110 | 7.519437e-01, 7.078959e-01, 6.615992e-01, 6.138495e-01, 111 | 5.654188e-01, 5.170351e-01, 4.694000e-01, 4.230300e-01, 112 | 3.784500e-01, 3.360700e-01, 2.962800e-01, 2.595500e-01, 113 | 2.262500e-01, 1.965500e-01, 1.703000e-01, 1.471300e-01, 114 | 1.267550e-01, 1.088781e-01, 9.325208e-02, 7.963646e-02, 115 | 6.781108e-02, 5.757372e-02, 4.872000e-02, 4.107631e-02, 116 | 3.451000e-02, 2.891042e-02, 1.877039e-02, 1.218564e-02, 117 | 7.909625e-03, 5.132859e-03, 3.329678e-03, 2.158725e-03, 118 | 1.398330e-03, 9.045439e-04, 5.838880e-04, 0.000000e+00 119 | ]) 120 | 121 | CSIG_GEOS2_70L = np.array([ 122 | 0.998548, 0.994248, 0.987150, 0.977200, 0.964250, 123 | 0.948115, 0.928565, 0.905322, 0.878157, 0.846935, 124 | 0.811635, 0.772457, 0.729920, 0.684748, 0.637724, 125 | 0.589634, 0.541227, 0.493218, 0.446215, 0.400740, 126 | 0.357260, 0.316175, 0.277915, 0.242900, 0.211400, 127 | 0.183425, 0.158715, 0.136943, 0.117817, 0.101065, 128 | 0.086444, 0.073724, 0.062692, 0.053147, 0.044898, 129 | 0.037793, 0.031710, 0.026527, 0.022123, 0.018394, 130 | 0.015247, 0.012600, 0.010381, 0.008526, 0.006982, 131 | 0.005699, 0.004638, 0.003763, 0.003043, 0.002453, 132 | 0.001971, 0.001579, 0.001261, 0.001003, 0.000795, 133 | 0.000628, 0.000494, 0.000386, 0.000300, 0.000232, 134 | 0.000179, 0.000136, 0.000103, 0.000077, 0.000057, 135 | 0.000041, 0.000028, 0.000018, 0.000010, 0.000003 136 | ]) 137 | 138 | ESIG_GEOS2_70L = np.array([ 139 | 1.000000, 0.997095, 0.991400, 0.982900, 0.971500, 140 | 0.957000, 0.939230, 0.917900, 0.892744, 0.863570, 141 | 0.830300, 0.792970, 0.751944, 0.707896, 0.661599, 142 | 0.613850, 0.565419, 0.517035, 0.469400, 0.423030, 143 | 0.378450, 0.336070, 0.296280, 0.259550, 0.226250, 144 | 0.196550, 0.170300, 0.147130, 0.126755, 0.108878, 145 | 0.093252, 0.079636, 0.067811, 0.057574, 0.048720, 146 | 0.041076, 0.034510, 0.028910, 0.024144, 0.020102, 147 | 0.016686, 0.013808, 0.011392, 0.009370, 0.007683, 148 | 0.006280, 0.005118, 0.004158, 0.003367, 0.002719, 149 | 0.002188, 0.001755, 0.001403, 0.001118, 0.000888, 150 | 0.000702, 0.000553, 0.000434, 0.000338, 0.000262, 151 | 0.000202, 0.000155, 0.000118, 0.000089, 0.000066, 152 | 0.000048, 0.000034, 0.000023, 0.000014, 0.000006, 153 | 0.000000 154 | ]) 155 | 156 | CSIG_GEOS3 = np.array([ 157 | 0.998548, 0.994148, 0.986350, 0.974300, 158 | 0.956950, 0.933150, 0.901750, 0.861500, 159 | 0.811000, 0.750600, 0.682900, 0.610850, 160 | 0.537050, 0.463900, 0.393650, 0.328275, 161 | 0.269500, 0.218295, 0.174820, 0.138840, 162 | 0.109790, 0.0866900, 0.0684150, 0.0539800, 163 | 0.0425750, 0.0335700, 0.0264650, 0.0208550, 164 | 0.0164300, 0.0129425, 0.0101900, 0.00800750, 165 | 0.00627000, 0.00489000, 0.00379000, 0.00291500, 166 | 0.00221500, 0.00167000, 0.00125000, 0.000912500, 167 | 0.000652500, 0.000455000, 0.00030750, 0.000200000, 168 | 0.000123500, 6.97500e-05, 3.25900e-05, 8.84000e-06 169 | ]) 170 | 171 | ESIG_GEOS3 = np.array([ 172 | 1.000000, 0.997095, 0.991200, 0.981500, 173 | 0.967100, 0.946800, 0.919500, 0.884000, 174 | 0.839000, 0.783000, 0.718200, 0.647600, 175 | 0.574100, 0.500000, 0.427800, 0.359500, 176 | 0.297050, 0.241950, 0.194640, 0.155000, 177 | 0.122680, 0.0969000, 0.0764800, 0.0603500, 178 | 0.0476100, 0.0375400, 0.0296000, 0.0233300, 179 | 0.0183800, 0.0144800, 0.0114050, 0.00897500, 180 | 0.00704000, 0.00550000, 0.00428000, 0.00330000, 181 | 0.00253000, 0.00190000, 0.00144000, 0.00106000, 182 | 0.000765000, 0.000540000, 0.000370000, 0.000245000, 183 | 0.000155000, 9.20000e-05, 4.75000e-05, 1.76800e-05, 184 | 0.00000 185 | ]) 186 | 187 | CSIG_GEOS3_30L = np.array([ 188 | 0.998548, 0.994148, 0.986350, 0.974300, 189 | 0.956950, 0.933150, 0.901750, 0.861500, 190 | 0.811000, 0.750600, 0.682900, 0.610850, 191 | 0.537050, 0.463900, 0.393650, 0.328275, 192 | 0.269500, 0.218295, 0.174820, 0.138840, 193 | 0.109790, 0.0866900, 0.0620450, 0.0386050, 194 | 0.0239900, 0.0127100, 0.00478500, 0.00164750, 195 | 0.000460000, 7.75000e-05 196 | ]) 197 | 198 | ESIG_GEOS3_30L = np.array([ 199 | 1.000000, 0.997095, 0.991200, 0.981500, 200 | 0.967100, 0.946800, 0.919500, 0.884000, 201 | 0.839000, 0.783000, 0.718200, 0.647600, 202 | 0.574100, 0.500000, 0.427800, 0.359500, 203 | 0.297050, 0.241950, 0.194640, 0.155000, 204 | 0.122680, 0.0969000, 0.0764800, 0.0476100, 205 | 0.0296000, 0.0183800, 0.00704000, 0.00253000, 206 | 0.000765000, 0.000155000, 0.00000 207 | ]) 208 | 209 | # pre-defined parameter values for computing ETA vertical levels: 210 | # A [hPa] ; B [unitless] 211 | Ap_GEOS4 = np.array([ 212 | 0.000000, 0.000000, 12.704939, 35.465965, 213 | 66.098427, 101.671654, 138.744400, 173.403183, 214 | 198.737839, 215.417526, 223.884689, 224.362869, 215 | 216.864929, 201.192093, 176.929993, 150.393005, 216 | 127.837006, 108.663429, 92.365662, 78.512299, 217 | 66.603378, 56.387939, 47.643932, 40.175419, 218 | 33.809956, 28.367815, 23.730362, 19.791553, 219 | 16.457071, 13.643393, 11.276889, 9.292943, 220 | 7.619839, 6.216800, 5.046805, 4.076567, 221 | 3.276433, 2.620212, 2.084972, 1.650792, 222 | 1.300508, 1.019442, 0.795134, 0.616779, 223 | 0.475806, 0.365041, 0.278526, 0.211349, 224 | 0.159495, 0.119703, 0.089345, 0.066000, 225 | 0.047585, 0.032700, 0.020000, 0.010000 226 | ]) 227 | 228 | Bp_GEOS4 = np.array([ 229 | 1.000000, 0.985110, 0.943290, 0.867830, 230 | 0.764920, 0.642710, 0.510460, 0.378440, 231 | 0.270330, 0.183300, 0.115030, 0.063720, 232 | 0.028010, 0.006960, 0.000000, 0.000000, 233 | 0.000000, 0.000000, 0.000000, 0.000000, 234 | 0.000000, 0.000000, 0.000000, 0.000000, 235 | 0.000000, 0.000000, 0.000000, 0.000000, 236 | 0.000000, 0.000000, 0.000000, 0.000000, 237 | 0.000000, 0.000000, 0.000000, 0.000000, 238 | 0.000000, 0.000000, 0.000000, 0.000000, 239 | 0.000000, 0.000000, 0.000000, 0.000000, 240 | 0.000000, 0.000000, 0.000000, 0.000000, 241 | 0.000000, 0.000000, 0.000000, 0.000000, 242 | 0.000000, 0.000000, 0.000000, 0.000000 243 | ]) 244 | 245 | Ap_GEOS4_REDUCED = np.array([ 246 | 0.000000, 0.000000, 12.704939, 35.465965, 247 | 66.098427, 101.671654, 138.744400, 173.403183, 248 | 198.737839, 215.417526, 223.884689, 224.362869, 249 | 216.864929, 201.192093, 176.929993, 150.393005, 250 | 127.837006, 108.663429, 92.365662, 78.512299, 251 | 56.387939, 40.175419, 28.367815, 19.791553, 252 | 9.292943, 4.076567, 1.650792, 0.616779, 253 | 0.211349, 0.066000, 0.010000]) 254 | 255 | Bp_GEOS4_REDUCED = np.array([ 256 | 1.000000, 0.985110, 0.943290, 0.867830, 257 | 0.764920, 0.642710, 0.510460, 0.378440, 258 | 0.270330, 0.183300, 0.115030, 0.063720, 259 | 0.028010, 0.006960, 0.000000, 0.000000, 260 | 0.000000, 0.000000, 0.000000, 0.000000, 261 | 0.000000, 0.000000, 0.000000, 0.000000, 262 | 0.000000, 0.000000, 0.000000, 0.000000, 263 | 0.000000, 0.000000, 0.000000 264 | ]) 265 | 266 | Ap_GEOS5 = np.array([ 267 | 0.00000000e+00, 4.80482600e-02, 6.59375200e+00, 268 | 1.31348000e+01, 1.96131100e+01, 2.60920100e+01, 269 | 3.25708100e+01, 3.89820100e+01, 4.53390100e+01, 270 | 5.16961100e+01, 5.80532100e+01, 6.43626400e+01, 271 | 7.06219800e+01, 7.88342200e+01, 8.90999200e+01, 272 | 9.93652100e+01, 1.09181700e+02, 1.18958600e+02, 273 | 1.28695900e+02, 1.42910000e+02, 1.56260000e+02, 274 | 1.69609000e+02, 1.81619000e+02, 1.93097000e+02, 275 | 2.03259000e+02, 2.12150000e+02, 2.18776000e+02, 276 | 2.23898000e+02, 2.24363000e+02, 2.16865000e+02, 277 | 2.01192000e+02, 1.76930000e+02, 1.50393000e+02, 278 | 1.27837000e+02, 1.08663000e+02, 9.23657200e+01, 279 | 7.85123100e+01, 6.66034100e+01, 5.63879100e+01, 280 | 4.76439100e+01, 4.01754100e+01, 3.38100100e+01, 281 | 2.83678100e+01, 2.37304100e+01, 1.97916000e+01, 282 | 1.64571000e+01, 1.36434000e+01, 1.12769000e+01, 283 | 9.29294200e+00, 7.61984200e+00, 6.21680100e+00, 284 | 5.04680100e+00, 4.07657100e+00, 3.27643100e+00, 285 | 2.62021100e+00, 2.08497000e+00, 1.65079000e+00, 286 | 1.30051000e+00, 1.01944000e+00, 7.95134100e-01, 287 | 6.16779100e-01, 4.75806100e-01, 3.65041100e-01, 288 | 2.78526100e-01, 2.11349000e-01, 1.59495000e-01, 289 | 1.19703000e-01, 8.93450200e-02, 6.60000100e-02, 290 | 4.75850100e-02, 3.27000000e-02, 2.00000000e-02, 291 | 1.00000000e-02 292 | ]) 293 | 294 | Bp_GEOS5 = np.array([ 295 | 1.00000000e+00, 9.84952000e-01, 9.63406000e-01, 296 | 9.41865000e-01, 9.20387000e-01, 8.98908000e-01, 297 | 8.77429000e-01, 8.56018000e-01, 8.34660900e-01, 298 | 8.13303900e-01, 7.91946900e-01, 7.70637500e-01, 299 | 7.49378200e-01, 7.21166000e-01, 6.85899900e-01, 300 | 6.50634900e-01, 6.15818400e-01, 5.81041500e-01, 301 | 5.46304200e-01, 4.94590200e-01, 4.43740200e-01, 302 | 3.92891100e-01, 3.43381100e-01, 2.94403100e-01, 303 | 2.46741100e-01, 2.00350100e-01, 1.56224100e-01, 304 | 1.13602100e-01, 6.37200600e-02, 2.80100400e-02, 305 | 6.96002500e-03, 8.17541300e-09, 0.00000000e+00, 306 | 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 307 | 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 308 | 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 309 | 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 310 | 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 311 | 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 312 | 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 313 | 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 314 | 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 315 | 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 316 | 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 317 | 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 318 | 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 319 | 0.00000000e+00 320 | ]) 321 | 322 | Ap_GEOS5_REDUCED = np.array([ 323 | 0.00000000e+00, 4.80482600e-02, 6.59375200e+00, 324 | 1.31348000e+01, 1.96131100e+01, 2.60920100e+01, 325 | 3.25708100e+01, 3.89820100e+01, 4.53390100e+01, 326 | 5.16961100e+01, 5.80532100e+01, 6.43626400e+01, 327 | 7.06219800e+01, 7.88342200e+01, 8.90999200e+01, 328 | 9.93652100e+01, 1.09181700e+02, 1.18958600e+02, 329 | 1.28695900e+02, 1.42910000e+02, 1.56260000e+02, 330 | 1.69609000e+02, 1.81619000e+02, 1.93097000e+02, 331 | 2.03259000e+02, 2.12150000e+02, 2.18776000e+02, 332 | 2.23898000e+02, 2.24363000e+02, 2.16865000e+02, 333 | 2.01192000e+02, 1.76930000e+02, 1.50393000e+02, 334 | 1.27837000e+02, 1.08663000e+02, 9.23657200e+01, 335 | 7.85123100e+01, 5.63879100e+01, 4.01754100e+01, 336 | 2.83678100e+01, 1.97916000e+01, 9.29294200e+00, 337 | 4.07657100e+00, 1.65079000e+00, 6.16779100e-01, 338 | 2.11349000e-01, 6.60000100e-02, 1.00000000e-02 339 | ]) 340 | 341 | Bp_GEOS5_REDUCED = np.array([ 342 | 1.00000000e+00, 9.84952000e-01, 9.63406000e-01, 343 | 9.41865000e-01, 9.20387000e-01, 8.98908000e-01, 344 | 8.77429000e-01, 8.56018000e-01, 8.34660900e-01, 345 | 8.13303900e-01, 7.91946900e-01, 7.70637500e-01, 346 | 7.49378200e-01, 7.21166000e-01, 6.85899900e-01, 347 | 6.50634900e-01, 6.15818400e-01, 5.81041500e-01, 348 | 5.46304200e-01, 4.94590200e-01, 4.43740200e-01, 349 | 3.92891100e-01, 3.43381100e-01, 2.94403100e-01, 350 | 2.46741100e-01, 2.00350100e-01, 1.56224100e-01, 351 | 1.13602100e-01, 6.37200600e-02, 2.80100400e-02, 352 | 6.96002500e-03, 8.17541300e-09, 0.00000000e+00, 353 | 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 354 | 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 355 | 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 356 | 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 357 | 0.00000000e+00, 0.00000000e+00, 0.00000000e+00 358 | ]) 359 | 360 | 361 | MODELS = { 362 | 'GEOS': {'reference': None, 363 | 'description': 'GEOS model family', 364 | 'resolution': (5, 4), 365 | 'Ptop': 1e-2, 366 | 'halfpolar': True, 367 | 'center180': True}, 368 | 'GENERIC': {'reference': None, 369 | 'description': 'GENERIC grids', 370 | 'resolution': (1, 1), 371 | 'Nlayers': None, 372 | 'Ntrop': None, 373 | 'Ptop': 1e-2, 374 | 'halfpolar': False, 375 | 'center180': False, 376 | 'hybrid': False}, 377 | 'GEOS1': {'reference': 'GEOS', 378 | 'description': 'GEOS-1 pure sigma', 379 | 'Nlayers': 20, 380 | 'Ntrop': 16, 381 | 'hybrid': False, 382 | 'csig': CSIG_GEOS1, 383 | 'esig': ESIG_GEOS1}, 384 | 'GEOS_STRAT': {'reference': 'GEOS', 385 | 'description': 'GEOS-STRAT pure sigma vertically' 386 | ' regridded', 387 | 'Nlayers': 26, 388 | 'Ntrop': 19, 389 | 'Ptop': 1e-4, 390 | 'hybrid': False, 391 | 'csig': CSIG_GEOS_STRAT, 392 | 'esig': ESIG_GEOS_STRAT}, 393 | 'GEOS_STRAT_46L': {'reference': 'GEOS_STRAT', 394 | 'description': 'GEOS-STRAT pure sigma' 395 | ' original resolution', 396 | 'Nlayers': 46, 397 | 'csig': CSIG_GEOS_STRAT_46L, 398 | 'esig': ESIG_GEOS_STRAT_46L}, 399 | 'GEOS2': {'reference': 'GEOS', 400 | 'description': 'GEOS-2 pure sigma', 401 | 'Nlayers': 47, 402 | 'Ntrop': 32, 403 | 'hybrid': False, 404 | 'csig': CSIG_GEOS2, 405 | 'esig': ESIG_GEOS2}, 406 | 'GEOS2_70L': {'reference': 'GEOS2', 407 | 'description': 'GEOS-2 pure sigma' 408 | ' original resolution', 409 | 'Nlayers': 70, 410 | 'csig': CSIG_GEOS2_70L, 411 | 'esig': ESIG_GEOS2_70L}, 412 | 'GEOS3': {'reference': 'GEOS', 413 | 'description': 'GEOS-3 pure sigma', 414 | 'Nlayers': 48, 415 | 'Ntrop': 20, 416 | 'hybrid': False, 417 | 'csig': CSIG_GEOS3, 418 | 'esig': ESIG_GEOS3}, 419 | 'GEOS3_30L': {'reference': 'GEOS3', 420 | 'description': 'GEOS-3 pure sigma reduced', 421 | 'Nlayers': 30, 422 | 'csig': CSIG_GEOS3_30L, 423 | 'esig': ESIG_GEOS3_30L}, 424 | 'GEOS3_REDUCED': {'reference': 'GEOS3_30L'}, 425 | 'GEOS4': {'reference': 'GEOS', 426 | 'description': 'GEOS-4 hybrid', 427 | 'Nlayers': 55, 428 | 'Ntrop': 17, 429 | 'hybrid': True, 430 | 'Ap': Ap_GEOS4, 431 | 'Bp': Bp_GEOS4}, 432 | 'FVDAS': {'reference': 'GEOS4'}, 433 | 'GEOS4_30L': {'reference': 'GEOS4', 434 | 'description': 'GEOS-4 hybrid reduced', 435 | 'Nlayers': 30, 436 | 'Ap': Ap_GEOS4_REDUCED, 437 | 'Bp': Bp_GEOS4_REDUCED}, 438 | 'GEOS4_REDUCED': {'reference': 'GEOS4_30L'}, 439 | 'GEOS5': {'reference': 'GEOS', 440 | 'description': 'GEOS-5.2.0 hybrid', 441 | 'Nlayers': 72, 442 | 'Ntrop': 38, 443 | 'hybrid': True, 444 | 'Ap': Ap_GEOS5, 445 | 'Bp': Bp_GEOS5}, 446 | 'GEOS5_NATIVE': {'reference': 'GEOS5'}, 447 | 'GEOS5_47L': {'reference': 'GEOS5', 448 | 'description': 'GEOS-5.2.0 hybrid reduced', 449 | 'Nlayers': 47, 450 | 'Ap': Ap_GEOS5_REDUCED, 451 | 'Bp': Bp_GEOS5_REDUCED}, 452 | 'GEOS5_REDUCED': {'reference': 'GEOS5_47L'}, 453 | 'GEOS57': {'reference': 'GEOS5', 454 | 'description': 'GEOS-5.7.x hybrid'}, 455 | 'GEOS57_NATIVE': {'reference': 'GEOS57'}, 456 | 'GEOS57_47L': {'reference': 'GEOS5_47L', 457 | 'description': 'GEOS-5.7.x hybrid reduced'}, 458 | 'GEOS57_REDUCED': {'reference': 'GEOS57_47L'}, 459 | 'GEOSFP': {'reference': 'GEOS57', 460 | 'description': 'GEOS-5.11.x (FP) hybrid'}, 461 | 'GEOSFP_NATIVE': {'reference': 'GEOSFP'}, 462 | 'GEOSFP_47L': {'reference': 'GEOS57_47L', 463 | 'description': 'GEOS-5.11.x (FP) hybrid reduced'}, 464 | 'GEOSFP_REDUCED': {'reference': 'GEOSFP_47L'}, 465 | 'MERRA': {'reference': 'GEOS5', 466 | 'description': 'MERRA hybrid'}, 467 | 'MERRA_NATIVE': {'reference': 'MERRA'}, 468 | 'MERRA_47L': {'reference': 'GEOS5_47L', 469 | 'description': 'MERRA hybrid reduced'}, 470 | 'MERRA_REDUCED': {'reference': 'MERRA_47L'}, 471 | 'MERRA2': {'reference': 'GEOSFP', 472 | 'description': 'MERRA2 hybrid'}, 473 | 'MERRA2_47L': {'reference': 'GEOS5_47L'}, 474 | } 475 | 476 | 477 | def prof_altitude(pressure, p_coef=(-0.028389, -0.0493698, 0.485718, 0.278656, 478 | -17.5703, 48.0926)): 479 | """ 480 | Return altitude for given pressure. 481 | 482 | This function evaluates a polynomial at log10(pressure) values. 483 | 484 | Parameters 485 | ---------- 486 | pressure : array-like 487 | pressure values [hPa]. 488 | p_coef : array-like 489 | coefficients of the polynomial (default values are for the US 490 | Standard Atmosphere). 491 | 492 | Returns 493 | ------- 494 | altitude : array-like 495 | altitude values [km] (same shape than the pressure input array). 496 | 497 | See Also 498 | -------- 499 | prof_pressure : Returns pressure for 500 | given altitude. 501 | prof_temperature : Returns air temperature for 502 | given altitude. 503 | 504 | Notes 505 | ----- 506 | Default coefficient values represent a 5th degree polynomial which had 507 | been fitted to USSA data from 0-100 km. Accuracy is on the order of 1% for 508 | 0-100 km and 0.5% below 30 km. This function, with default values, may thus 509 | produce bad results with pressure less than about 3e-4 hPa. 510 | 511 | Examples 512 | -------- 513 | >>> prof_altitude([1000, 800, 600]) 514 | array([ 0.1065092 , 1.95627858, 4.2060627 ]) 515 | 516 | """ 517 | pressure = np.asarray(pressure) 518 | altitude = np.polyval(p_coef, np.log10(pressure.flatten())) 519 | return altitude.reshape(pressure.shape) 520 | 521 | 522 | def prof_pressure(altitude, z_coef=(1.94170e-9, -5.14580e-7, 4.57018e-5, 523 | -1.55620e-3, -4.61994e-2, 2.99955)): 524 | """ 525 | Return pressure for given altitude. 526 | 527 | This function evaluates a polynomial at altitudes values. 528 | 529 | Parameters 530 | ---------- 531 | altitude : array-like 532 | altitude values [km]. 533 | z_coef : array-like 534 | coefficients of the polynomial (default values are for the US 535 | Standard Atmosphere). 536 | 537 | Returns 538 | ------- 539 | pressure : array-like 540 | pressure values [hPa] (same shape than the altitude input array). 541 | 542 | See Also 543 | -------- 544 | prof_altitude : Returns altitude for 545 | given pressure. 546 | prof_temperature : Returns air temperature for 547 | given altitude. 548 | 549 | Notes 550 | ----- 551 | Default coefficient values represent a 5th degree polynomial which had 552 | been fitted to USA data from 0-100 km. Accuracy is on the order of 1% for 553 | 0-100 km and 0.5% below 30 km. This function, with default values, may thus 554 | produce bad results with altitude > 100 km. 555 | 556 | Examples 557 | -------- 558 | >>> prof_pressure([0, 10, 20]) 559 | array([ 998.96437334, 264.658697 , 55.28114631]) 560 | 561 | """ 562 | altitude = np.asarray(altitude) 563 | pressure = np.power(10, np.polyval(z_coef, altitude.flatten())) 564 | return pressure.reshape(altitude.shape) 565 | 566 | 567 | def _get_supported_models(): 568 | """ 569 | Returns a tuple of the names of the models for which grid specifications 570 | are available. 571 | """ 572 | return tuple(MODELS.keys()) 573 | 574 | 575 | def _find_references(model_name, references=None): 576 | """ 577 | Iterate over model references for `model_name` 578 | and return a list of parent model specifications (including those of 579 | `model_name`, ordered from parent to child). 580 | """ 581 | references = references or [] 582 | references.append(model_name) 583 | 584 | ref = MODELS[model_name].get('reference') 585 | if ref is not None: 586 | _find_references(ref, references) 587 | 588 | parent_models = [m for m in references] 589 | parent_models.reverse() 590 | 591 | return parent_models 592 | 593 | 594 | def _get_model_info(model_name): 595 | """ 596 | Get the grid specifications for a given model. 597 | 598 | Parameters 599 | ---------- 600 | model_name : string 601 | Name of the model. Supports multiple formats 602 | (e.g., 'GEOS5', 'GEOS-5' or 'GEOS_5'). 603 | 604 | Returns 605 | ------- 606 | specifications : dict 607 | Grid specifications as a dictionary. 608 | 609 | Raises 610 | ------ 611 | ValueError 612 | If the model is not supported (see `models`) or if the given 613 | `model_name` corresponds to several entries in the list of 614 | supported models. 615 | 616 | """ 617 | # trying to get as much as possible a valid model name from the given 618 | # `model_name`, using regular expressions. 619 | split_name = re.split(r'[\-_\s]', model_name.strip().upper()) 620 | sep_chars = ('', ' ', '-', '_') 621 | gen_seps = itertools.combinations_with_replacement( 622 | sep_chars, len(split_name) - 1 623 | ) 624 | test_names = ("".join((n for n in itertools.chain(*list(zip(split_name, 625 | s + ('',)))))) 626 | for s in gen_seps) 627 | match_names = list([name for name in test_names if name 628 | in _get_supported_models()]) 629 | 630 | if not len(match_names): 631 | raise ValueError("Model '{0}' is not supported".format(model_name)) 632 | elif len(match_names) > 1: 633 | raise ValueError("Multiple matched models for given model name '{0}'" 634 | .format(model_name)) 635 | 636 | valid_model_name = match_names[0] 637 | parent_models = _find_references(valid_model_name) 638 | 639 | model_spec = dict() 640 | for m in parent_models: 641 | model_spec.update(MODELS[m]) 642 | model_spec.pop('reference') 643 | model_spec['model_family'] = parent_models[0] 644 | model_spec['model_name'] = valid_model_name 645 | 646 | return model_spec 647 | --------------------------------------------------------------------------------