├── xbpch
    ├── util
    │   ├── __init__.py
    │   ├── diaginfo.py
    │   ├── cf.py
    │   └── gridspec.py
    ├── __init__.py
    ├── common.py
    ├── uff.py
    ├── grid.py
    ├── bpch.py
    └── core.py
├── MANIFEST.in
├── doc
    ├── dask_graphs
    │   ├── sample_read.png
    │   └── sample_normalized.png
    ├── example_plots
    │   ├── cartopy_example.png
    │   └── cartopy_seasonal_facet.png
    ├── environment.yml
    ├── reading.rst
    ├── Makefile
    ├── make.bat
    ├── index.rst
    ├── installation.rst
    ├── quick_start.rst
    ├── conf.py
    └── usage.rst
├── readthedocs.yml
├── ci
    ├── environment-py27.yml
    ├── environment-py35.yml
    └── environment-py36.yml
├── LICENSE
├── RELEASE_GUIDE.rst
├── .gitignore
├── setup.py
├── scripts
    └── bpch_to_nc
└── README.rst


/xbpch/util/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include LICENSE
2 | recursive-include doc *
3 | prune doc/_build
4 | global-exclude .DS_Store


--------------------------------------------------------------------------------
/doc/dask_graphs/sample_read.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darothen/xbpch/HEAD/doc/dask_graphs/sample_read.png


--------------------------------------------------------------------------------
/readthedocs.yml:
--------------------------------------------------------------------------------
1 | conda:
2 |     file: doc/environment.yml
3 | python:
4 |     version: 3
5 |     setup_py_install: true


--------------------------------------------------------------------------------
/doc/dask_graphs/sample_normalized.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darothen/xbpch/HEAD/doc/dask_graphs/sample_normalized.png


--------------------------------------------------------------------------------
/doc/example_plots/cartopy_example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darothen/xbpch/HEAD/doc/example_plots/cartopy_example.png


--------------------------------------------------------------------------------
/doc/example_plots/cartopy_seasonal_facet.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darothen/xbpch/HEAD/doc/example_plots/cartopy_seasonal_facet.png


--------------------------------------------------------------------------------
/xbpch/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | try:
3 |     from . version import __version__
4 | except:
5 |     pass
6 | 
7 | from . bpch import BPCHFile
8 | from . core import open_bpchdataset, open_mfbpchdataset


--------------------------------------------------------------------------------
/doc/environment.yml:
--------------------------------------------------------------------------------
 1 | name: xbpch
 2 | channels:
 3 |   - conda-forge
 4 | dependencies:
 5 |   - python=3.5
 6 |   - dask>=0.14
 7 |   - xarray>=0.9
 8 |   - pandas
 9 |   - ipython
10 |   - future
11 |   - cartopy
12 |   - pyproj
13 |   - matplotlib
14 |   - numpydoc
15 |   - sphinx
16 | 


--------------------------------------------------------------------------------
/ci/environment-py27.yml:
--------------------------------------------------------------------------------
 1 | name: test_xbpch
 2 | channels:
 3 |     - conda-forge
 4 | dependencies:
 5 |     - python=2.7
 6 |     - cython
 7 |     - dask>=0.14
 8 |     - future
 9 |     - numpy
10 |     - pytest
11 |     - xarray>=0.9
12 |     - pip:
13 |         - codecov
14 |         - pytest-cov


--------------------------------------------------------------------------------
/ci/environment-py35.yml:
--------------------------------------------------------------------------------
 1 | name: test_xbpch
 2 | channels:
 3 |     - conda-forge
 4 |     - defaults
 5 | dependencies:
 6 |     - python=3.5
 7 |     - cython
 8 |     - dask>=0.14
 9 |     - future
10 |     - numpy
11 |     - pytest
12 |     - xarray>=0.12
13 |     - pip:
14 |         - codecov
15 |         - pytest-cov


--------------------------------------------------------------------------------
/ci/environment-py36.yml:
--------------------------------------------------------------------------------
 1 | name: test_xbpch
 2 | channels:
 3 |     - conda-forge
 4 |     - defaults
 5 | dependencies:
 6 |     - python=3.6
 7 |     - cython
 8 |     - dask>=0.14
 9 |     - future
10 |     - numpy
11 |     - pytest
12 |     - xarray>=0.12
13 |     - pip:
14 |         - codecov
15 |         - pytest-cov


--------------------------------------------------------------------------------
/doc/reading.rst:
--------------------------------------------------------------------------------
 1 | 
 2 | Reading BPCH Files
 3 | ==================
 4 | 
 5 | **xbpch** provides three main utilities for reading bpch files, all of which
 6 | are provided as top-level package imports. For most purposes, you should use
 7 | ``open_bpchdataset()``, however a lower-level interface, ``BPCHFile()`` is also
 8 | provided in case you would prefer manually processing the bpch contents.
 9 | 
10 | See :doc:`/usage` for more details.
11 | 
12 | .. autofunction:: xbpch.open_bpchdataset
13 | 
14 | .. autofunction:: xbpch.open_mfbpchdataset
15 | 
16 | .. autoclass:: xbpch.BPCHFile
17 |     :members:
18 |     :private-members:
19 |     :special-members:


--------------------------------------------------------------------------------
/doc/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXBUILD   = sphinx-build
 7 | SPHINXPROJ    = xbpch
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)


--------------------------------------------------------------------------------
/doc/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 | set SPHINXPROJ=xbpch
13 | 
14 | if "%1" == "" goto help
15 | 
16 | %SPHINXBUILD% >NUL 2>NUL
17 | if errorlevel 9009 (
18 | 	echo.
19 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
20 | 	echo.installed, then set the SPHINXBUILD environment variable to point
21 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
22 | 	echo.may add the Sphinx directory to PATH.
23 | 	echo.
24 | 	echo.If you don't have Sphinx installed, grab it from
25 | 	echo.http://sphinx-doc.org/
26 | 	exit /b 1
27 | )
28 | 
29 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
30 | goto end
31 | 
32 | :help
33 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
34 | 
35 | :end
36 | popd
37 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 Daniel Rothenberg
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/doc/index.rst:
--------------------------------------------------------------------------------
 1 | 
 2 | xbpch
 3 | =====
 4 | 
 5 | .. image:: https://badge.fury.io/py/xbpch.svg
 6 |     :target: https://badge.fury.io/py/xbpch
 7 |     :alt: PyPI version
 8 | .. image:: https://readthedocs.org/projects/xbpch/badge/?version=latest
 9 |     :target: http://xbpch.readthedocs.io/en/latest/?badge=latest
10 |     :alt: Documentation Status
11 | .. image:: https://zenodo.org/badge/89022822.svg
12 |     :target: https://zenodo.org/badge/latestdoi/89022822
13 |     :alt: Zenodo DOI
14 | 
15 | **xpbch** is a simple utility for reading the proprietary
16 | `binary punch format (bpch) outputs <http://wiki.seas.harvard.edu/geos-chem/index.php/GEOS-Chem_Output_Files#Binary_Punch_File_Format>`_
17 | used in versions of GEOS-Chem_ earlier than v11-02. The utility allows a user
18 | to load this data into an xarray_- and dask_-powered workflow without
19 | necessarily pre-processing the data using GAMAP or IDL. This opens the door
20 | to out-of-core and parallel processing of GEOS-Chem_ output.
21 | 
22 | .. toctree::
23 |     :maxdepth: 2
24 | 
25 |     installation
26 |     quick_start
27 |     usage
28 |     reading
29 | 
30 | Recent Updates
31 | --------------
32 | 
33 | **v0.3.5 (May 19, 2019)**
34 | 
35 | - Fixed incompatibility with xarray arising from backward incompatible changes with Python 2.7
36 | 
37 | **v0.3.4 (January 20, 2019)**
38 | 
39 | - Miscellaneous fixes for issues filed on GitHub Issue Tracker
40 | 
41 | **v0.3.3 (March 18, 2018)**
42 | 
43 | - Clean-up for xarray v0.10.2 compatibility
44 | - Tweak to more reliably infer and unpack 3D field shape (from Jenny Fisher)
45 | 
46 | 
47 | .. _dask: http://dask.pydata.org
48 | .. _xarray: http://xarray.pydata.org
49 | .. _GEOS-Chem: http://www.geos-chem.org
50 | 
51 | License
52 | -------
53 | 
54 | Copyright (c) 2019 Daniel Rothenberg
55 | 
56 | This work is licensed_ under a permissive MIT License.
57 | I acknowledge important contributions from Benoît Bovy,
58 | Gerrit Kuhlmann, and Christoph Keller.
59 | 
60 | .. _licensed: http://github.com/darothen/xbpch/master/LICENSE
61 | 


--------------------------------------------------------------------------------
/RELEASE_GUIDE.rst:
--------------------------------------------------------------------------------
 1 | Releasing a new version of xbpch
 2 | ================================
 3 | 
 4 | So you've jut incorporated a new patch or feature into **xbpch** - congratulations!
 5 | This short guide is intended to help you cut a new release of the package incorporating this new work.
 6 | By the end of this process, all **xbpch** users should be able to easily upgrade their version of the code via *pip* or *conda*.
 7 | 
 8 | 1. Upgrade your local repository to reflect the head on "master"
 9 | 
10 | $ git pull upstream master
11 | 
12 | 2. Ensure that "doc/index.rst" has an entry under "Recent Changes" reflecting any new work you're including in this release
13 | 
14 | 3. Open "setup.py" and increment the version number - in most cases, you'll probably increment the **MICRO** version, but for significant changes you'll probably want to reset **MICRO** to 0 and increment the **MINOR**; see `Semantic Versioning <https://semver.org/>`_ for more information
15 | 
16 | 4. Commit the documentation and version changes with a commit message indicating that this is a version release
17 | 
18 | $ git commit -a -m "Release v0.X.Y"
19 | 
20 | 5. Tag the release
21 | 
22 | $ git tag -a v0.X.Y -m 'v0.X.Y'
23 | 
24 | 6. Push the changes and version tag upstream to master
25 | 
26 | $ git push upstream master
27 | $ git push upstream --tags
28 | 
29 | 7. Via the project GitHub page, click the "releases" button and then "Draft a new release". Select v0.X.Y and create the release; you can add documentation notes if you would like, but historically we've maintained these via the official documentation.
30 | 
31 | At this point, the automatic machinery from conda-forge and ReadTheDocs should just "work" and update the package appropriately at those places. 
32 | You should keep an eye on the `conda-forge feedstock <https://github.com/conda-forge/xbpch-feedstock/>`_ to ensure that it builds a new release within a few hours. 
33 | However, you'll manually need to cut a new release for PyPi.
34 | To do this:
35 | 
36 | 1. Navigate to your repository directory and issue a command to build a wheel:
37 | 
38 | $ python setup.py bdist_wheel sdist
39 | 
40 | This should create the files "dist/xbpch-0.X.Y.tar.gz" and "dist/xbpch-0.X.Y-py3-none-any.whl"
41 | 
42 | 2. Upload your new wheel via twine
43 | 
44 | $ twine upload dist/xbpch-0.X.Y*
45 | 
46 | 


--------------------------------------------------------------------------------
/doc/installation.rst:
--------------------------------------------------------------------------------
 1 | 
 2 | Installation
 3 | ============
 4 | 
 5 | Requirements
 6 | ------------
 7 | 
 8 | **xbpch** is written in pure Python (version >= 3.5), and leans on two important
 9 | libraries:
10 | 
11 | 1. xarray_ (version >= 0.9): a pandas-like toolkit for working with
12 | labeled, *n*-dimensional data
13 | 
14 | 2. dask_ (version >= 0.14): a library for performing out-of-core,
15 | parallel computations on both tabular and array-like datasets
16 | 
17 | The easiest way to install these libraries is to use the conda_
18 | package manager::
19 | 
20 |     $ conda install -c conda-forge xarray dask
21 | 
22 | conda_ can be obtained as part of the Anaconda_ Python distribution
23 | from Continuum IO, although you do not need all of the packages it
24 | provides in order to use **xbpch**. Note that we recommend installing the latest
25 | versions from community-maintained `conda-forge <https://conda-forge.org/>`_
26 | collection, since these usually contain bug-fixes and additional features.
27 | 
28 | .. note::
29 | 
30 |     Basic support for Python 2.7 is available in **xbpch** but it has not been
31 |     tested, since the evolutionary GCPy package will only support Python 3. If,
32 |     for some reason, you must use Python 2.7 and encounter problems, please
33 |     reach out to us and we may be able to fix them.
34 | 
35 | 
36 | Installation via conda
37 | ----------------------
38 | 
39 | The preferred way to install **xbpch** is also via conda_::
40 | 
41 |     $ conda install -c conda-forge xbpch
42 | 
43 | 
44 | Installation via pip
45 | --------------------
46 | 
47 | **xbpch** is available on `PyPI <https://pypi.python.org/pypi/xbpch/>`_, and
48 | can be installed using setuptools::
49 | 
50 |     $ pip install xbpch
51 | 
52 | Installation from source
53 | ------------------------
54 | 
55 | If you're developing or contributing to **xbpch**, you may wish
56 | instead to install directly from a local copy of the source code. To do so,
57 | you must first clone the the master repository (or a fork) and install locally
58 | via pip::
59 | 
60 |     $ git clone https://github.com/darothen/xbpch.git
61 |     $ cd xbpch
62 |     $ python setup.py install
63 | 
64 | You will need to substitute in the path to your preferred repository/mirror
65 | of the source code.
66 | 
67 | Note that you can also install directly from the source using setuptools::
68 | 
69 |     $ pip install git+https://github.com/darothen/xbpch.git
70 | 
71 | .. _Anaconda: https://www.continuum.io/downloads
72 | .. _conda: http://conda.pydata.org
73 | .. _dask: http://dask.pydata.org
74 | .. _xarray: http://xarray.pydata.org


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | 
  2 | # Created by https://www.gitignore.io/api/pydev,emacs,python
  3 | 
  4 | .idea
  5 | xbpch/version.py
  6 | WORKING_NOTES.md
  7 | 
  8 | ### Emacs ###
  9 | # -*- mode: gitignore; -*-
 10 | *~
 11 | \#*\#
 12 | /.emacs.desktop
 13 | /.emacs.desktop.lock
 14 | *.elc
 15 | auto-save-list
 16 | tramp
 17 | .\#*
 18 | 
 19 | # Org-mode
 20 | .org-id-locations
 21 | *_archive
 22 | 
 23 | # flymake-mode
 24 | *_flymake.*
 25 | 
 26 | # eshell files
 27 | /eshell/history
 28 | /eshell/lastdir
 29 | 
 30 | # elpa packages
 31 | /elpa/
 32 | 
 33 | # reftex files
 34 | *.rel
 35 | 
 36 | # AUCTeX auto folder
 37 | /auto/
 38 | 
 39 | # cask packages
 40 | .cask/
 41 | dist/
 42 | 
 43 | # Flycheck
 44 | flycheck_*.el
 45 | 
 46 | # server auth directory
 47 | /server/
 48 | 
 49 | # projectiles files
 50 | .projectile
 51 | 
 52 | # directory configuration
 53 | .dir-locals.el
 54 | 
 55 | ### pydev ###
 56 | .pydevproject
 57 | 
 58 | ### Python ###
 59 | # Byte-compiled / optimized / DLL files
 60 | __pycache__/
 61 | *.py[cod]
 62 | *$py.class
 63 | 
 64 | # C extensions
 65 | *.so
 66 | 
 67 | # Distribution / packaging
 68 | .Python
 69 | env/
 70 | build/
 71 | develop-eggs/
 72 | downloads/
 73 | eggs/
 74 | .eggs/
 75 | lib/
 76 | lib64/
 77 | parts/
 78 | sdist/
 79 | var/
 80 | wheels/
 81 | *.egg-info/
 82 | .installed.cfg
 83 | *.egg
 84 | 
 85 | # PyInstaller
 86 | #  Usually these files are written by a python script from a template
 87 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 88 | *.manifest
 89 | *.spec
 90 | 
 91 | # Installer logs
 92 | pip-log.txt
 93 | pip-delete-this-directory.txt
 94 | 
 95 | # Unit test / coverage reports
 96 | htmlcov/
 97 | .tox/
 98 | .coverage
 99 | .coverage.*
100 | .cache
101 | nosetests.xml
102 | coverage.xml
103 | *,cover
104 | .hypothesis/
105 | 
106 | # Translations
107 | *.mo
108 | *.pot
109 | 
110 | # Django stuff:
111 | *.log
112 | local_settings.py
113 | 
114 | # Flask stuff:
115 | instance/
116 | .webassets-cache
117 | 
118 | # Scrapy stuff:
119 | .scrapy
120 | 
121 | # Sphinx documentation
122 | docs/_build/
123 | 
124 | # PyBuilder
125 | target/
126 | 
127 | # Jupyter Notebook
128 | .ipynb_checkpoints
129 | 
130 | # pyenv
131 | .python-version
132 | 
133 | # celery beat schedule file
134 | celerybeat-schedule
135 | 
136 | # SageMath parsed files
137 | *.sage.py
138 | 
139 | # dotenv
140 | .env
141 | 
142 | # virtualenv
143 | .venv
144 | venv/
145 | ENV/
146 | 
147 | # Spyder project settings
148 | .spyderproject
149 | .spyproject
150 | 
151 | # Rope project settings
152 | .ropeproject
153 | 
154 | # mkdocs documentation
155 | /site
156 | 
157 | # End of https://www.gitignore.io/api/pydev,emacs,python


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | import os
  4 | import warnings
  5 | 
  6 | from setuptools import setup, find_packages
  7 | 
  8 | from textwrap import dedent
  9 | 
 10 | MAJOR = 0
 11 | MINOR = 3
 12 | MICRO = 5
 13 | VERSION = "{}.{}.{}".format(MAJOR, MINOR, MICRO)
 14 | DEV = False
 15 | 
 16 | # Correct versioning with git info if DEV
 17 | if DEV:
 18 |     import subprocess
 19 | 
 20 |     pipe = subprocess.Popen(
 21 |         ['git', "describe", "--always", "--match", "v[0-9]*"],
 22 |         stdout=subprocess.PIPE)
 23 |     so, err = pipe.communicate()
 24 | 
 25 |     if pipe.returncode != 0:
 26 |         # no git or something wrong with git (not in dir?)
 27 |         warnings.warn("WARNING: Couldn't identify git revision, using generic version string")
 28 |         VERSION += ".dev"
 29 |     else:
 30 |         git_rev = so.strip()
 31 |         git_rev = git_rev.decode('ascii') # necessary for Python >= 3
 32 | 
 33 |         VERSION += ".dev-{}".format(git_rev)
 34 | 
 35 | DESCRIPTION = "xarray interface for bpch files"
 36 | LONG_DESCRIPTION = """\
 37 | **xpbch** is a simple utility for reading the proprietary binary punch format
 38 | (bpch) outputs used in versions of GEOS-Chem earlier than v11-02. The utility
 39 | allows a user to load this data into an xarray/dask-powered workflow without
 40 | necessarily pre-processing the data using GAMAP or IDL.
 41 | """
 42 | 
 43 | DISTNAME = "xbpch"
 44 | AUTHOR = "Daniel Rothenberg"
 45 | AUTHOR_EMAIL = "darothen@mit.edu"
 46 | URL = "https://github.com/darothen/xbpch"
 47 | LICENSE = "MIT"
 48 | DOWNLOAD_URL = ("https://github.com/darothen/xbpch/archive/v{}.tar.gz"
 49 |                 .format(VERSION))
 50 | 
 51 | CLASSIFIERS = [
 52 |     'Development Status :: 5 - Production/Stable',
 53 |     'License :: OSI Approved :: MIT License',
 54 |     'Operating System :: OS Independent',
 55 |     'Intended Audience :: Science/Research',
 56 |     'Programming Language :: Python',
 57 |     'Programming Language :: Python :: 2.7',
 58 |     'Programming Language :: Python :: 3.4',
 59 |     'Programming Language :: Python :: 3.5',
 60 |     'Programming Language :: Python :: 3.6',
 61 |     'Topic :: Scientific/Engineering',
 62 | ]
 63 | 
 64 | def _write_version_file():
 65 | 
 66 |     fn = os.path.join(os.path.dirname(__file__), DISTNAME, 'version.py')
 67 | 
 68 |     version_str = dedent("""
 69 |         __version__ = '{}'
 70 |         """)
 71 | 
 72 |     # Write version file
 73 |     with open(fn, 'w') as version_file:
 74 |         version_file.write(version_str.format(VERSION))
 75 | 
 76 | # Write version and install
 77 | _write_version_file()
 78 | 
 79 | setup(
 80 |     name = DISTNAME,
 81 |     author = AUTHOR,
 82 |     author_email = AUTHOR_EMAIL,
 83 |     maintainer = AUTHOR,
 84 |     maintainer_email = AUTHOR_EMAIL,
 85 |     description = DESCRIPTION,
 86 |     long_description = LONG_DESCRIPTION,
 87 |     license = LICENSE,
 88 |     url = URL,
 89 |     version = VERSION,
 90 |     download_url = DOWNLOAD_URL,
 91 | 
 92 |     packages = find_packages(),
 93 |     package_data = {},
 94 |     scripts = [
 95 |         'scripts/bpch_to_nc',
 96 |     ],
 97 | 
 98 |     classifiers = CLASSIFIERS
 99 | )
100 | 


--------------------------------------------------------------------------------
/xbpch/common.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from datetime import datetime
 3 | 
 4 | import numpy as np
 5 | 
 6 | # physical or chemical constants
 7 | C_MOLECULAR_WEIGHT = 12e-3    # molecular weight of C atoms (kg/mole)
 8 | 
 9 | def broadcast_1d_array(arr, ndim, axis=1):
10 |     """
11 |     Broadcast 1-d array `arr` to `ndim` dimensions on the first axis
12 |     (`axis`=0) or on the last axis (`axis`=1).
13 | 
14 |     Useful for 'outer' calculations involving 1-d arrays that are related to
15 |     different axes on a multidimensional grid.
16 |     """
17 |     ext_arr = arr
18 |     for i in range(ndim - 1):
19 |         ext_arr = np.expand_dims(ext_arr, axis=axis)
20 |     return ext_arr
21 | 
22 | 
23 | def get_timestamp(time=True, date=True, fmt=None):
24 |     """ Return the current timestamp in machine local time.
25 | 
26 |     Parameters:
27 |     -----------
28 |     time, date : Boolean
29 |         Flag to include the time or date components, respectively,
30 |         in the output.
31 |     fmt : str, optional
32 |         If passed, will override the time/date choice and use as
33 |         the format string passed to `strftime`.
34 |     """
35 | 
36 |     time_format = "%H:%M:%S"
37 |     date_format = "%m-%d-%Y"
38 | 
39 |     if fmt is None:
40 |         if time and date:
41 |             fmt = time_format + " " + date_format
42 |         elif time:
43 |             fmt = time_format
44 |         elif date:
45 |             fmt = date_format
46 |         else:
47 |             raise ValueError("One of `date` or `time` must be True!")
48 | 
49 |     return datetime.now().strftime(fmt)
50 | 
51 | 
52 | def fix_attr_encoding(ds):
53 |     """ This is a temporary hot-fix to handle the way metadata is encoded
54 |     when we read data directly from bpch files. It removes the 'scale_factor'
55 |     and 'units' attributes we encode with the data we ingest, converts the
56 |     'hydrocarbon' and 'chemical' attribute to a binary integer instead of a
57 |     boolean, and removes the 'units' attribute from the "time" dimension since
58 |     that too is implicitly encoded.
59 | 
60 |     In future versions of this library, when upstream issues in decoding
61 |     data wrapped in dask arrays is fixed, this won't be necessary and will be
62 |     removed.
63 | 
64 |     """
65 | 
66 |     def _maybe_del_attr(da, attr):
67 |         """ Possibly delete an attribute on a DataArray if it's present """
68 |         if attr in da.attrs:
69 |             del da.attrs[attr]
70 |         return da
71 | 
72 |     def _maybe_decode_attr(da, attr):
73 |         # TODO: Fix this so that bools get written as attributes just fine
74 |         """ Possibly coerce an attribute on a DataArray to an easier type
75 |         to write to disk. """
76 |         # bool -> int
77 |         if (attr in da.attrs) and (type(da.attrs[attr] == bool)):
78 |             da.attrs[attr] = int(da.attrs[attr])
79 |         return da
80 | 
81 |     for v in ds.data_vars:
82 |         da = ds[v]
83 |         da = _maybe_del_attr(da, 'scale_factor')
84 |         da = _maybe_del_attr(da, 'units')
85 |         da = _maybe_decode_attr(da, 'hydrocarbon')
86 |         da = _maybe_decode_attr(da, 'chemical')
87 |     # Also delete attributes on time.
88 |     if hasattr(ds, 'time'):
89 |         times = ds.time
90 |         times = _maybe_del_attr(times, 'units')
91 | 
92 |     return ds
93 | 


--------------------------------------------------------------------------------
/scripts/bpch_to_nc:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | """
  3 | Convert a BPCH file (or files) to a CF-compliant, NetCDF dataset.
  4 | 
  5 | This script is a simple utility for opening (and optionally concatenating) BPCH
  6 | files and then immediately writing them out to disk in NetCDF format. It's a
  7 | thin wrapper designed to avoid having to drop into an interactive Python session
  8 | to accomplish this task.
  9 | 
 10 | """
 11 | 
 12 | import os, sys
 13 | 
 14 | from xbpch import open_bpchdataset, open_mfbpchdataset
 15 | from dask.diagnostics import ProgressBar
 16 | 
 17 | from argparse import ArgumentParser, RawDescriptionHelpFormatter
 18 | parser = ArgumentParser(description=__doc__,
 19 |                         formatter_class=RawDescriptionHelpFormatter)
 20 | parser.add_argument("bpch_files", type=str, nargs="+",
 21 |                     help="Paths to BPCH file(s) to load (, concatenate)"
 22 |                          " and write back to disk")
 23 | parser.add_argument("output_nc", type=str,
 24 |                     help="Name of output file to write")
 25 | parser.add_argument("-t", "--tracerinfo", metavar="tracerinfo.dat",
 26 |                     type=str, default="tracerinfo.dat",
 27 |                     help="Path to tracerinfo.dat, if not in current directory")
 28 | parser.add_argument("-d", "--diaginfo", metavar="diaginfo.dat",
 29 |                     type=str, default="diaginfo.dat",
 30 |                     help="Path to diaginfo.dat, if not in current directory")
 31 | 
 32 | def _maybe_del_attr(da, attr):
 33 |     """ Possibly delete an attribute on a DataArray if it's present """
 34 |     if attr in da.attrs:
 35 |         del da.attrs[attr]
 36 | 
 37 |     return da
 38 | 
 39 | 
 40 | def _maybe_decode_attr(da, attr):
 41 |     # TODO: Fix this so that bools get written as attributes just fine
 42 |     """ Possibly coerce an attribute on a DataArray to an easier type
 43 |     to write to disk. """
 44 |     # bool -> int
 45 |     if (attr in da.attrs) and (type(da.attrs[attr] == bool)):
 46 |         da.attrs[attr] = int(da.attrs[attr])
 47 | 
 48 |     return da
 49 | 
 50 | 
 51 | if __name__ == "__main__":
 52 | 
 53 |     args = parser.parse_args()
 54 | 
 55 |     # Check if the output already exists; if so, exit immediately
 56 |     if os.path.exists(args.output_nc):
 57 |         print("ERROR: Can't write to output file that already exists.")
 58 |         sys.exit(1)
 59 | 
 60 |     # Check that all input files exist
 61 |     bad_files = [fn for fn in args.bpch_files if not os.path.exists(fn)]
 62 |     if bad_files:
 63 |         print("ERROR: Couldn't find the following input files:")
 64 |         for fn in bad_files:
 65 |             print("   " + fn)
 66 |         sys.exit(1)
 67 | 
 68 |     # Else, we should be good to read in and concatenate
 69 |     open_kws = {
 70 |         "tracerinfo_file": args.tracerinfo,
 71 |         "diaginfo_file": args.diaginfo,
 72 |         "memmap": True, "dask": True
 73 |     }
 74 | 
 75 |     print("\nReading in file(s)...")
 76 |     if len(args.bpch_files) == 1:
 77 |         ds = open_bpchdataset(args.bpch_files[0], **open_kws)
 78 |     else:
 79 |         ds = open_mfbpchdataset(args.bpch_files, **open_kws)
 80 | 
 81 |     # This block of code is hack to fix the encoding of attributes
 82 |     # on the DataArrays in this Dataset. They are being
 83 |     # set at a very low level when we read in the data, and manually
 84 |     # specifying the encoding doesn't work.
 85 |     # However, deleting them from the attributes dict
 86 |     # doesn't end up removing them from the final output file - they get
 87 |     # written just fine.
 88 |     print("\nDecoding variables...")
 89 |     for v in ds.data_vars:
 90 |         da = ds[v]
 91 |         da = _maybe_del_attr(da, 'scale_factor')
 92 |         da = _maybe_del_attr(da, 'units')
 93 |         da = _maybe_decode_attr(da, 'hydrocarbon')
 94 |         da = _maybe_decode_attr(da, 'chemical')
 95 |     # Also delete attributes on time.
 96 |     if hasattr(ds, 'time'):
 97 |         times = ds.time
 98 |         times = _maybe_del_attr(times, 'units')
 99 | 
100 |     print("\nWriting to " + args.output_nc + " ...")
101 |     with ProgressBar():
102 |         ds.to_netcdf(args.output_nc)
103 | 
104 |     print("\n done!")
105 | 
106 | 


--------------------------------------------------------------------------------
/xbpch/util/diaginfo.py:
--------------------------------------------------------------------------------
  1 | 
  2 | from collections import namedtuple
  3 | from warnings import warn
  4 | 
  5 | import os
  6 | import pandas as pd
  7 | 
  8 | from .. common import C_MOLECULAR_WEIGHT
  9 | 
 10 | #: Info for parsing diagnostic records
 11 | diag_rec = namedtuple("diag_rec",
 12 |                       ["name", "width", "type", "default", "read_only", "desc"])
 13 | diag_recs = [
 14 |     diag_rec('offset', 8, int, 0, True,
 15 |              "Offset (constant to add to tracer numbers in order to"
 16 |              " distinguish between diff categories, as stored in"
 17 |              " tracerinfo.dat)"),
 18 |     diag_rec("-0", 1, str, ' ', True, None),
 19 |     diag_rec('name', 40, str, None, True, "Name of the category"),
 20 |     diag_rec('description', 100, str, None, True, "Description of category"),
 21 |     diag_rec("-1", 1, str, ' ', True, None)
 22 | ]
 23 | 
 24 | #: Info for parsing tracer records
 25 | tracer_rec = diag_rec
 26 | tracer_recs = [
 27 |     tracer_rec('name', 8, str, None, True, "Tracer name"),
 28 |     tracer_rec("-0", 1, str, ' ', True, None),
 29 |     tracer_rec('full_name', 30, str, None, True, "Full tracer name"),
 30 |     tracer_rec('molwt', 10, float, 1., True, "Molecular weight (kg/mole)"),
 31 |     tracer_rec('C', 3, int, 1, True, "# moles C/moles tracer for HCs"),
 32 |     tracer_rec('tracer', 9, int, None, True, "Tracer number"),
 33 |     tracer_rec('scale', 10, float, 1e9, True, "Standard scale factor to convert to"
 34 |                                               " given units"),
 35 |     tracer_rec("-1", 1, str, ' ', True, None),
 36 |     tracer_rec('unit', 40, str, 'ppbv', True, "Unit string"),
 37 | ]
 38 | 
 39 | def get_diaginfo(diaginfo_file):
 40 |     """
 41 |     Read an output's diaginfo.dat file and parse into a DataFrame for
 42 |     use in selecting and parsing categories.
 43 | 
 44 |     Parameters
 45 |     ----------
 46 |     diaginfo_file : str
 47 |         Path to diaginfo.dat
 48 | 
 49 |     Returns
 50 |     -------
 51 |     DataFrame containing the category information.
 52 | 
 53 |     """
 54 | 
 55 |     widths = [rec.width for rec in diag_recs]
 56 |     col_names = [rec.name for rec in diag_recs]
 57 |     dtypes = [rec.type for rec in diag_recs]
 58 |     usecols = [name for name in col_names if not name.startswith('-')]
 59 | 
 60 |     diag_df = pd.read_fwf(diaginfo_file, widths=widths, names=col_names,
 61 |                           dtypes=dtypes, comment="#", header=None,
 62 |                           usecols=usecols)
 63 |     diag_desc = {diag.name: diag.desc for diag in diag_recs
 64 |                  if not diag.name.startswith('-')}
 65 | 
 66 |     return diag_df, diag_desc
 67 | 
 68 | 
 69 | def get_tracerinfo(tracerinfo_file):
 70 |     """
 71 |     Read an output's tracerinfo.dat file and parse into a DataFrame for
 72 |     use in selecting and parsing categories.
 73 | 
 74 |     Parameters
 75 |     ----------
 76 |     tracerinfo_file : str
 77 |         Path to tracerinfo.dat
 78 | 
 79 |     Returns
 80 |     -------
 81 |     DataFrame containing the tracer information.
 82 | 
 83 |     """
 84 | 
 85 |     widths = [rec.width for rec in tracer_recs]
 86 |     col_names = [rec.name for rec in tracer_recs]
 87 |     dtypes = [rec.type for rec in tracer_recs]
 88 |     usecols = [name for name in col_names if not name.startswith('-')]
 89 | 
 90 |     tracer_df = pd.read_fwf(tracerinfo_file, widths=widths, names=col_names,
 91 |                             dtypes=dtypes, comment="#", header=None,
 92 |                             usecols=usecols)
 93 | 
 94 |     # Check an edge case related to a bug in GEOS-Chem v12.0.3 which 
 95 |     # erroneously dropped short/long tracer names in certain tracerinfo.dat outputs.
 96 |     # What we do here is figure out which rows were erroneously processed (they'll 
 97 |     # have NaNs in them) and raise a warning if there are any
 98 |     na_free = tracer_df.dropna(subset=['tracer', 'scale'])
 99 |     only_na = tracer_df[~tracer_df.index.isin(na_free.index)]
100 |     if len(only_na) > 0:
101 |         warn("At least one row in {} wasn't decoded correctly; we strongly"
102 |              " recommend you manually check that file to see that all"
103 |              " tracers are properly recorded."
104 |              .format(tracerinfo_file)) 
105 | 
106 |     tracer_desc = {tracer.name: tracer.desc for tracer in tracer_recs
107 |                    if not tracer.name.startswith('-')}
108 | 
109 |     # Process some of the information about which variables are hydrocarbons
110 |     # and chemical tracers versus other diagnostics.
111 |     def _assign_hydrocarbon(row):
112 |         if row['C'] != 1:
113 |             row['hydrocarbon'] = True
114 |             row['molwt'] = C_MOLECULAR_WEIGHT
115 |         else:
116 |             row['hydrocarbon'] = False
117 |         return row
118 | 
119 |     tracer_df = (
120 |         tracer_df
121 |             .apply(_assign_hydrocarbon, axis=1)
122 |             .assign(chemical=lambda x: x['molwt'].astype(bool))
123 |     )
124 | 
125 |     return tracer_df, tracer_desc


--------------------------------------------------------------------------------
/doc/quick_start.rst:
--------------------------------------------------------------------------------
  1 | .. _quick start:
  2 | 
  3 | Quick Start
  4 | ===========
  5 | 
  6 | Assuming you're already familiar with xarray_, it's easy to dive right in to
  7 | begin reading bpch data. If you don't have any GEOS-Chem_ data handy to test
  8 | with, I've archived
  9 | `a sample dataset here <https://figshare.com/articles/Sample_ND49_Dataset/4905755>`_
 10 | here consisting of 14 days of hourly, ND49 output - good for diagnosing
 11 | surface air quality statistics.
 12 | 
 13 | Download the data and extract it to some directory::
 14 | 
 15 |     $ wget https://ndownloader.figshare.com/files/8251094
 16 |     $ tar -xvzf sample_nd49.tar.gz
 17 | 
 18 | You should now see 14 ``bpch`` files in your directory, and two ``.dat`` files.
 19 | 
 20 | The whole point of **xbpch** is to read these data files natively into an
 21 | `xarray.Dataset <http://xarray.pydata.org/en/stable/data-structures.html#dataset>`_.
 22 | You can do this with the :py:func:`xbpch.open_bpchdataset` method:
 23 | 
 24 | .. ipython:: python
 25 |     :verbatim:
 26 | 
 27 |     import xbpch
 28 |     fn = "ND49_20060102_ref_e2006_m2010.bpch"
 29 |     ds = xbpch.open_bpchdataset(fn)
 30 | 
 31 | If we print the dataset back out, we'll get a familiar representation:
 32 | 
 33 | .. parsed-literal::
 34 | 
 35 |     <xarray.Dataset>
 36 |     Dimensions:          (lat: 91, lev: 47, lon: 144, nv: 2, time: 24)
 37 |     Coordinates:
 38 |       * lev              (lev) float64 0.9925 0.9775 0.9624 0.9473 0.9322 0.9171 ...
 39 |       * lon              (lon) float64 -180.0 -177.5 -175.0 -172.5 -170.0 -167.5 ...
 40 |       * lat              (lat) float64 -89.5 -88.0 -86.0 -84.0 -82.0 -80.0 -78.0 ...
 41 |       * time             (time) datetime64[ns] 2006-01-01T01:00:00 ...
 42 |       * nv               (nv) int64 0 1
 43 |     Data variables:
 44 |         IJ_AVG_S_NO      (time, lon, lat) float32 1.16601e-12 1.1599e-12 ...
 45 |         time_bnds        (time, nv) datetime64[ns] 2006-01-01T01:00:00 ...
 46 |         IJ_AVG_S_O3      (time, lon, lat) float32 9.25816e-09 9.25042e-09 ...
 47 |         IJ_AVG_S_SO4     (time, lon, lat) float32 1.41706e-10 1.4142e-10 ...
 48 |         IJ_AVG_S_NH4     (time, lon, lat) float32 1.16908e-11 1.16658e-11 ...
 49 |         IJ_AVG_S_NIT     (time, lon, lat) float32 9.99837e-31 9.99897e-31 ...
 50 |         IJ_AVG_S_BCPI    (time, lon, lat) float32 2.46206e-12 2.45698e-12 ...
 51 |         IJ_AVG_S_OCPI    (time, lon, lat) float32 2.65303e-11 2.6476e-11 ...
 52 |         IJ_AVG_S_BCPO    (time, lon, lat) float32 4.19881e-19 4.18213e-19 ...
 53 |         IJ_AVG_S_OCPO    (time, lon, lat) float32 2.49109e-22 2.53752e-22 ...
 54 |         IJ_AVG_S_DST1    (time, lon, lat) float32 7.11484e-12 7.10209e-12 ...
 55 |         IJ_AVG_S_DST2    (time, lon, lat) float32 1.55181e-11 1.54779e-11 ...
 56 |         IJ_AVG_S_SALA    (time, lon, lat) float32 3.70387e-11 3.69923e-11 ...
 57 |         OD_MAP_S_AOD     (time, lon, lat) float32 0.292372 0.325568 0.358368 ...
 58 |         OD_MAP_S_DSTAOD  (time, lon, lat) float32 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ...
 59 |     Attributes:
 60 |         modelname:    GEOS5_47L
 61 |         halfpolar:    1
 62 |         res:          (2.5, 2.0)
 63 |         center180:    1
 64 |         tracerinfo:   tracerinfo.dat
 65 |         diaginfo:     diaginfo.dat
 66 |         filetitle:    b'GEOS-CHEM DIAG49 instantaneous timeseries'
 67 |         source:       ND49_20060101_ref_e2006_m2010.bpch
 68 |         filetype:     b'CTM bin 02'
 69 |         Conventions:  CF1.6
 70 | 
 71 | You can then proceed to process the data using the conventional routines
 72 | you'd use in any xarray_-powered workflow.
 73 | 
 74 | In the sample dataset highlighted here, the 14 days of hourly output are
 75 | split across 14 files - one for each day's worth of data. **xbpch**
 76 | provides a second method, :py:func:`xbpch.open_mfbpchdataset`, for reading in
 77 | multiple-file datasets like these, and automatically concatenating them
 78 | on the *time* record dimension:
 79 | 
 80 | .. ipython:: python
 81 |     :verbatim:
 82 | 
 83 |     import xbpch
 84 | 
 85 |     from glob import glob
 86 | 
 87 |     # List all the bpch files in the current directory
 88 |     fns = glob("ND49_*.bpch")
 89 |     
 90 |     # Helper function to extract spatial mean O3 from each file
 91 |     def _preprocess(ds):
 92 |         return ds[['IJ_AVG_S_O3', ]].mean(['lon', 'lat'])
 93 |         
 94 |     ds = xbpch.open_mfbpchdataset(
 95 |         fns, preprocess=_preprocess, dask=True, memmap=True
 96 |     )
 97 | 
 98 | Again, printing yields the expected results:
 99 | 
100 | .. parsed-literal::
101 | 
102 |     <xarray.Dataset>
103 |     Dimensions:      (time: 336)
104 |     Coordinates:
105 |       * time         (time) datetime64[ns] 2006-01-01T01:00:00 ...
106 |     Data variables:
107 |         IJ_AVG_S_O3  (time) float32 2.5524e-08 2.55541e-08 2.55588e-08 ...
108 | 
109 | Finally, if you don't want to drop into a Python interpreter but just want
110 | to quickly convert your binary data to NetCDF, you can run the utility
111 | script `bpch_to_nc` which is shipped with this library::
112 | 
113 |     $ bpch_to_nc /path/to/my/data.bpch /path/to/my/output.nc
114 | 
115 |     Reading in file(s)...
116 | 
117 |     Decoding variables...
118 | 
119 |     Writing to /path/to/my/output.nc ...
120 |     syncing
121 |     [####################################] | 100% Completed | 52.1s
122 | 
123 | .. _GEOS-Chem: http://www.geos-chem.org
124 | .. _dask: http://dask.pydata.org
125 | .. _xarray: http://xarray.pydata.org
126 | 


--------------------------------------------------------------------------------
/doc/conf.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | #
  4 | # xbpch documentation build configuration file, created by
  5 | # sphinx-quickstart on Mon Apr 24 17:47:50 2017.
  6 | #
  7 | # This file is execfile()d with the current directory set to its
  8 | # containing dir.
  9 | #
 10 | # Note that not all possible configuration values are present in this
 11 | # autogenerated file.
 12 | #
 13 | # All configuration values have a default; values that are commented out
 14 | # serve to show the default.
 15 | 
 16 | # If extensions (or modules to document with autodoc) are in another directory,
 17 | # add these directories to sys.path here. If the directory is relative to the
 18 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 19 | #
 20 | 
 21 | import os
 22 | import sys
 23 | import xbpch
 24 | 
 25 | 
 26 | # -- General configuration ------------------------------------------------
 27 | 
 28 | # If your documentation needs a minimal Sphinx version, state it here.
 29 | #
 30 | # needs_sphinx = '1.0'
 31 | 
 32 | # Add any Sphinx extension module names here, as strings. They can be
 33 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 34 | # ones.
 35 | extensions = [
 36 |     'sphinx.ext.autodoc',
 37 |     'sphinx.ext.todo',
 38 |     'sphinx.ext.coverage',
 39 |     'sphinx.ext.mathjax',
 40 |     'numpydoc',
 41 |     'IPython.sphinxext.ipython_directive',
 42 |     'IPython.sphinxext.ipython_console_highlighting',
 43 | ]
 44 | 
 45 | # Extensions arguments
 46 | autosummary_generate = True
 47 | numpydoc_class_members_toctree = True
 48 | numpydoc_show_class_members = False
 49 | 
 50 | # Add any paths that contain templates here, relative to this directory.
 51 | templates_path = ['_templates']
 52 | 
 53 | # The suffix(es) of source filenames.
 54 | # You can specify multiple suffix as a list of string:
 55 | #
 56 | # source_suffix = ['.rst', '.md']
 57 | source_suffix = '.rst'
 58 | 
 59 | # The master toctree document.
 60 | master_doc = 'index'
 61 | 
 62 | # General information about the project.
 63 | project = 'xbpch'
 64 | copyright = '2017, Daniel Rothenberg'
 65 | author = 'Daniel Rothenberg'
 66 | 
 67 | # The version info for the project you're documenting, acts as replacement for
 68 | # |version| and |release|, also used in various other places throughout the
 69 | # built documents.
 70 | #
 71 | # The short X.Y version.
 72 | version = '0.2.0'
 73 | # The full version, including alpha/beta/rc tags.
 74 | release = '0.2.0'
 75 | 
 76 | # The language for content autogenerated by Sphinx. Refer to documentation
 77 | # for a list of supported languages.
 78 | #
 79 | # This is also used if you do content translation via gettext catalogs.
 80 | # Usually you set "language" from the command line for these cases.
 81 | language = None
 82 | 
 83 | # List of patterns, relative to source directory, that match files and
 84 | # directories to ignore when looking for source files.
 85 | # This patterns also effect to html_static_path and html_extra_path
 86 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
 87 | 
 88 | # The name of the Pygments (syntax highlighting) style to use.
 89 | pygments_style = 'sphinx'
 90 | 
 91 | # If true, `todo` and `todoList` produce output, else they produce nothing.
 92 | todo_include_todos = False
 93 | 
 94 | 
 95 | # -- Options for HTML output ----------------------------------------------
 96 | 
 97 | # Customization for building on RTD
 98 | on_rtd = os.environ.get('READTHEDOCS', None) == 'True'
 99 | if not on_rtd:  # only import and set the theme if we're building docs locally
100 |     import sphinx_rtd_theme
101 |     html_theme = 'sphinx_rtd_theme'
102 |     html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
103 | 
104 | # The theme to use for HTML and HTML Help pages.  See the documentation for
105 | # a list of builtin themes.
106 | #
107 | # html_theme = 'default'
108 | 
109 | 
110 | # Theme options are theme-specific and customize the look and feel of a theme
111 | # further.  For a list of options available for each theme, see the
112 | # documentation.
113 | #
114 | # html_theme_options = {}
115 | 
116 | # Add any paths that contain custom static files (such as style sheets) here,
117 | # relative to this directory. They are copied after the builtin static files,
118 | # so a file named "default.css" will overwrite the builtin "default.css".
119 | html_static_path = ['_static']
120 | 
121 | 
122 | # -- Options for HTMLHelp output ------------------------------------------
123 | 
124 | # Output file base name for HTML help builder.
125 | htmlhelp_basename = 'xbpchdoc'
126 | 
127 | 
128 | # -- Options for LaTeX output ---------------------------------------------
129 | 
130 | latex_elements = {
131 |     # The paper size ('letterpaper' or 'a4paper').
132 |     #
133 |     # 'papersize': 'letterpaper',
134 | 
135 |     # The font size ('10pt', '11pt' or '12pt').
136 |     #
137 |     # 'pointsize': '10pt',
138 | 
139 |     # Additional stuff for the LaTeX preamble.
140 |     #
141 |     # 'preamble': '',
142 | 
143 |     # Latex figure (float) alignment
144 |     #
145 |     # 'figure_align': 'htbp',
146 | }
147 | 
148 | # Grouping the document tree into LaTeX files. List of tuples
149 | # (source start file, target name, title,
150 | #  author, documentclass [howto, manual, or own class]).
151 | latex_documents = [
152 |     (master_doc, 'xbpch.tex', 'xbpch Documentation',
153 |      'Daniel Rothenberg', 'manual'),
154 | ]
155 | 
156 | 
157 | # -- Options for manual page output ---------------------------------------
158 | 
159 | # One entry per manual page. List of tuples
160 | # (source start file, name, description, authors, manual section).
161 | man_pages = [
162 |     (master_doc, 'xbpch', 'xbpch Documentation',
163 |      [author], 1)
164 | ]
165 | 
166 | 
167 | # -- Options for Texinfo output -------------------------------------------
168 | 
169 | # Grouping the document tree into Texinfo files. List of tuples
170 | # (source start file, target name, title, author,
171 | #  dir menu entry, description, category)
172 | texinfo_documents = [
173 |     (master_doc, 'xbpch', 'xbpch Documentation',
174 |      author, 'xbpch', 'One line description of project.',
175 |      'Miscellaneous'),
176 | ]
177 | 
178 | 
179 | 
180 | 


--------------------------------------------------------------------------------
/xbpch/uff.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Utilities for reading unformatted Fortran binary files
  3 | 
  4 | Reproduced from PyGChem
  5 | 
  6 | Copyright (C) 2012-2014 Gerrit Kuhlmann, Benoît Bovy
  7 | see https://github.com/benbovy/PyGChem/blob/master/LICENSE.txt for more details
  8 | 
  9 | """
 10 | 
 11 | from __future__ import division
 12 | from __future__ import unicode_literals
 13 | from __future__ import print_function
 14 | from __future__ import absolute_import
 15 | 
 16 | from future import standard_library
 17 | standard_library.install_aliases()
 18 | from builtins import *
 19 | from builtins import zip
 20 | from builtins import str
 21 | from past.builtins import basestring
 22 | from past.utils import old_div
 23 | import struct
 24 | import io
 25 | 
 26 | 
 27 | _FIX_ERROR = ("Pre- and suffix of line do not match. This can happen, if the"
 28 |               " `endian` is incorrect.")
 29 | 
 30 | 
 31 | class FortranFile(io.FileIO):
 32 |     """
 33 |     A class for reading and writing unformatted binary Fortran files.
 34 | 
 35 |     Parameters
 36 |     ----------
 37 |     filename : string
 38 |         filename
 39 |     mode : {'rb', 'wb'}
 40 |         mode of the file: 'rb' (reading binary, default) or 'wb'
 41 |         (writing binary).
 42 |     endian : {'@', '<', '>'}
 43 |         byte order, size and alignment of the data in the file.
 44 |         '@' native, '<' little-endian, and '>' big-endian (default).
 45 | 
 46 |     Notes
 47 |     -----
 48 |     Fortran writes data as "lines" when using the PRINT or WRITE statements.
 49 |     Each line consists of:
 50 |         - a prefix (4 byte integer gives the size of the data)
 51 |         - the real data
 52 |         - a suffix (same as prefix).
 53 | 
 54 |     This class can be used to read and write these "lines", in a similar
 55 |     way as reading "real lines" in a text file. A format can be given,
 56 |     while reading or writing to pack or unpack data into a binary
 57 |     format, using the 'struct' module from the Python standard library.
 58 | 
 59 |     See Documentation of Python's struct module for details on endians and
 60 |     format strings: https://docs.python.org/library/struct.html
 61 |     """
 62 | 
 63 |     def __init__(self, filename, mode='rb', endian='>'):
 64 |         self.endian = endian
 65 |         super(FortranFile, self).__init__(filename, mode)
 66 | 
 67 |     def _fix(self, fmt='i'):
 68 |         """
 69 |         Read pre- or suffix of line at current position with given
 70 |         format `fmt` (default 'i').
 71 |         """
 72 |         fmt = self.endian + fmt
 73 |         fix = self.read(struct.calcsize(fmt))
 74 |         if fix:
 75 |             return struct.unpack(fmt, fix)[0]
 76 |         else:
 77 |             raise EOFError
 78 | 
 79 |     def readline(self, fmt=None):
 80 |         """
 81 |         Return next unformatted "line". If format is given, unpack content,
 82 |         otherwise return byte string.
 83 |         """
 84 |         prefix_size = self._fix()
 85 | 
 86 |         if fmt is None:
 87 |             content = self.read(prefix_size)
 88 |         else:
 89 |             fmt = self.endian + fmt
 90 |             fmt = _replace_star(fmt, prefix_size)
 91 |             content = struct.unpack(fmt, self.read(prefix_size))
 92 | 
 93 |         try:
 94 |             suffix_size = self._fix()
 95 |         except EOFError:
 96 |             # when endian is invalid and prefix_size > total file size
 97 |             suffix_size = -1
 98 | 
 99 |         if prefix_size != suffix_size:
100 |             raise IOError(_FIX_ERROR)
101 | 
102 |         return content
103 | 
104 |     def readlines(self):
105 |         """
106 |         Return list strings, each a line from the file.
107 |         """
108 |         return [line for line in self]
109 | 
110 |     def skipline(self):
111 |         """
112 |         Skip the next line and returns position and size of line.
113 |         Raises IOError if pre- and suffix of line do not match.
114 |         """
115 |         position = self.tell()
116 |         prefix = self._fix()
117 |         self.seek(prefix, 1)  # skip content
118 |         suffix = self._fix()
119 | 
120 |         if prefix != suffix:
121 |             raise IOError(_FIX_ERROR)
122 | 
123 |         return position, prefix
124 | 
125 |     def writeline(self, fmt, *args):
126 |         """
127 |         Write `line` (list of objects) with given `fmt` to file. The
128 |         `line` will be chained if object is iterable (except for
129 |         basestrings).
130 |         """
131 |         fmt = self.endian + fmt
132 |         size = struct.calcsize(fmt)
133 | 
134 |         fix = struct.pack(self.endian + 'i', size)
135 |         line = struct.pack(fmt, *args)
136 | 
137 |         self.write(fix)
138 |         self.write(line)
139 |         self.write(fix)
140 | 
141 |     def writelines(self, lines, fmt):
142 |         """
143 |         Write `lines` with given `format`.
144 |         """
145 |         if isinstance(fmt, basestring):
146 |             fmt = [fmt] * len(lines)
147 |         for f, line in zip(fmt, lines):
148 |             self.writeline(f, line, self.endian)
149 | 
150 |     def __iter__(self):
151 |         return self
152 | 
153 |     def next(self, fmt=None):
154 |         try:
155 |             return self.readline(fmt)
156 |         except EOFError:
157 |             raise StopIteration
158 | 
159 | 
160 | def _replace_star(fmt, size):
161 |     """
162 |     Replace the `*` placeholder in a format string (fmt), so that
163 |     struct.calcsize(fmt) is equal to the given `size` using the format
164 |     following the placeholder.
165 | 
166 |     Raises `ValueError` if number of `*` is larger than 1. If no `*`
167 |     in `fmt`, returns `fmt` without checking its size!
168 | 
169 |     Examples
170 |     --------
171 |     >>> _replace_star('ii*fi', 40)
172 |     'ii7fi'
173 |     """
174 |     n_stars = fmt.count('*')
175 | 
176 |     if n_stars > 1:
177 |         raise ValueError("More than one `*` in format (%s)." % fmt)
178 | 
179 |     if n_stars:
180 |         i = fmt.find('*')
181 |         s = struct.calcsize(fmt.replace(fmt[i:i + 2], ''))
182 |         n = old_div((size - s), struct.calcsize(fmt[i + 1]))
183 | 
184 |         fmt = fmt.replace('*', str(n))
185 | 
186 |     return fmt
187 | 


--------------------------------------------------------------------------------
/xbpch/util/cf.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This module provides the capability to interpret CTM metadata according
  3 | to the 'NetCDF Climate and Forecast (CF) Metadata Conventions'
  4 | 
  5 | References:
  6 | 
  7 |     [CF]  NetCDF Climate and Forecast (CF) Metadata conventions, Version 1.6,
  8 |     December, 2011.
  9 | """
 10 | 
 11 | import datetime
 12 | 
 13 | from xarray.core.variable import as_variable, Variable
 14 | 
 15 | #: CTM timestamp definitions
 16 | CTM_TIME_UNIT_STR = 'hours since 1985-01-01 00:00:00'
 17 | CTM_TIME_REF_DT = datetime.datetime(1985, 1, 1)
 18 | 
 19 | 
 20 | def tau2time(tau, reference=CTM_TIME_REF_DT):
 21 |     """
 22 |     Convert given hours since reference (default: 01.01.1985 00:00)
 23 |     into a datetime object.
 24 |     """
 25 |     return reference + datetime.timedelta(hours=tau)
 26 | 
 27 | 
 28 | def time2tau(time, reference=CTM_TIME_REF_DT):
 29 |     """
 30 |     Convert a datetime object into given hours since reference
 31 |     (default: 01.01.1985 00:00).
 32 |     """
 33 |     return (time - reference).total_seconds() / 3600.0
 34 | 
 35 | 
 36 | #: Mapping for unit names: CTM -> udunits2
 37 | UNITS_MAP_CTM2CF = (
 38 |     ('molec CO2', 'count'),
 39 |     ('molec', 'count'),
 40 |     ('atoms S', 'count'),
 41 |     ('atoms C', 'count'),
 42 |     ('ppbC', 'ppb'),        # prefix or suffix required (nb. of carbon atoms)
 43 |     ('kg C', 'kg'),         # prefix or suffix required (?)
 44 |     ('molC', 'mol'),        # prefix or suffix required ?     TODO:
 45 |     ('gC', 'g'),            # prefix or suffix required ?
 46 |     ('kg S', 'kg'),
 47 |     ('kg OH', 'kg'),
 48 |     ('kg NO3', 'kg'),
 49 |     ('kg H2O2', 'kg'),
 50 |     ('unitless', '1'),
 51 |     ('unitles', '1'),       # typo found in tracerinfo or diaginfo
 52 |     ('v/v', '1'),
 53 |     ('level', '1'),         # allowed in CF1.6 but not compatible with udunits2
 54 |     ('Eta', '1'),
 55 |     ('Fraction', '1'),
 56 |     ('fraction', '1'),
 57 |     ('ratio', '1'),
 58 |     ('factor', '1'),
 59 |     ('none', '1'),
 60 |     ('[percentage]', '%'),
 61 |     ('deg C', 'Celsius'),
 62 |     ('C', 'Celsius'),
 63 |     ('mm/da', 'mm/day'),    # typo in tracerinfo.dat 4/17/12
 64 |     ('kg/m2/', 'kg/m2'))    # ?? (tracerinfo.dat 6801 (line 1075)
 65 | 
 66 | 
 67 | def get_cfcompliant_units(units, prefix='', suffix=''):
 68 |     """
 69 |     Get equivalent units that are compatible with the udunits2 library
 70 |     (thus CF-compliant).
 71 | 
 72 |     Parameters
 73 |     ----------
 74 |     units : string
 75 |         A string representation of the units.
 76 |     prefix : string
 77 |         Will be added at the beginning of the returned string
 78 |         (must be a valid udunits2 expression).
 79 |     suffix : string
 80 |         Will be added at the end of the returned string
 81 |         (must be a valid udunits2 expression).
 82 | 
 83 |     Returns
 84 |     -------
 85 |     A string representation of the conforming units.
 86 | 
 87 |     References
 88 |     ----------
 89 |     The udunits2 package : http://www.unidata.ucar.edu/software/udunits/
 90 | 
 91 |     Notes
 92 |     -----
 93 |     This function only relies on the table stored in :attr:`UNITS_MAP_CTM2CF`.
 94 |     Therefore, the units string returned by this function is not certified to
 95 |     be compatible with udunits2.
 96 | 
 97 |     Examples
 98 |     --------
 99 |     >>> get_cfcompliant_units('molec/cm2')
100 |     'count/cm2'
101 |     >>> get_cfcompliant_units('v/v')
102 |     '1'
103 |     >>> get_cfcompliant_units('ppbC', prefix='3')
104 |     '3ppb
105 | 
106 |     """
107 |     compliant_units = units
108 | 
109 |     for gcunits, udunits in UNITS_MAP_CTM2CF:
110 |         compliant_units = str.replace(compliant_units, gcunits, udunits)
111 | 
112 |     return prefix + compliant_units + suffix
113 | 
114 | 
115 | VARNAME_MAP_CHAR = (
116 |     ('$', 'S'),
117 |     (':', '_'),
118 |     ('=', '_'),
119 |     ('-', '_'),
120 | )
121 | # TODO: Variables like BXHGHT_S_N(AIR) should have *(AIR) replaced with
122 | #       just *_AIR
123 | def get_valid_varname(varname):
124 |     """
125 |     Replace characters (e.g., ':', '$', '=', '-') of a variable name, which
126 |     may cause problems when using with (CF-)netCDF based packages.
127 | 
128 |     Parameters
129 |     ----------
130 |     varname : string
131 |         variable name.
132 | 
133 |     Notes
134 |     -----
135 |     Characters replacement is based on the table stored in
136 |     :attr:`VARNAME_MAP_CHAR`.
137 | 
138 |     """
139 |     vname = varname
140 |     for s, r in VARNAME_MAP_CHAR:
141 |         vname = vname.replace(s, r)
142 | 
143 |     return vname
144 | 
145 | 
146 | def enforce_cf_variable(var, mask_and_scale=True):
147 |     """ Given a Variable constructed from GEOS-Chem output, enforce
148 |     CF-compliant metadata and formatting.
149 | 
150 |     Until a bug with lazily-loaded data and masking/scaling is resolved in
151 |     xarray, you have the option to manually mask and scale the data here.
152 | 
153 |     Parameters
154 |     ----------
155 |     var : xarray.Variable
156 |         A variable holding information decoded from GEOS-Chem output.
157 |     mask_and_scale : bool
158 |         Flag to scale and mask the data given the unit conversions provided
159 | 
160 |     Returns
161 |     -------
162 |     out : xarray.Variable
163 |         The original variable processed to conform to CF standards
164 | 
165 |     .. note::
166 | 
167 |         This method borrows heavily from the ideas in ``xarray.decode_cf_variable``
168 | 
169 |     """
170 |     var = as_variable(var)
171 |     data = var._data  # avoid loading by accessing _data instead of data
172 |     dims = var.dims
173 |     attrs = var.attrs.copy()
174 |     encoding = var.encoding.copy()
175 |     orig_dtype = data.dtype
176 | 
177 |     # Process masking/scaling coordinates. We only expect a "scale" value
178 |     # for the units with this output.
179 |     if 'scale' in attrs:
180 |         scale = attrs.pop('scale')
181 |         attrs['scale_factor'] = scale
182 |         encoding['scale_factor'] = scale
183 | 
184 |         # TODO: Once the xr.decode_cf bug is fixed, we won't need to manually
185 |         #       handle masking/scaling
186 |         if mask_and_scale:
187 |             data = scale*data
188 | 
189 |     # Process units
190 |     # TODO: How do we want to handle parts-per-* units? These are not part of
191 |     #       the udunits standard, and the CF conventions suggest using units
192 |     #       like 1e-6 for parts-per-million. But we potentially mix mass and
193 |     #       volume/molar mixing ratios in GEOS-Chem output, so we need a way
194 |     #       to handle that edge case.
195 |     if 'unit' in attrs:
196 |         unit = attrs.pop('unit')
197 |         unit = get_cfcompliant_units(unit)
198 |         attrs['units'] = unit
199 | 
200 |     # TODO: Once the xr.decode_cf bug is fixed, we won't need to manually
201 |     #       handle masking/scaling
202 |     return Variable(dims, data, attrs, encoding=encoding)
203 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
  1 | xbpch: xarray interface for bpch files
  2 | ======================================
  3 | 
  4 | .. image:: https://badge.fury.io/py/xbpch.svg
  5 |     :target: https://badge.fury.io/py/xbpch
  6 |     :alt: PyPI version
  7 | .. image:: https://readthedocs.org/projects/xbpch/badge/?version=latest
  8 |     :target: http://xbpch.readthedocs.io/en/latest/?badge=latest
  9 |     :alt: Documentation Status
 10 | .. image:: https://zenodo.org/badge/89022822.svg
 11 |     :target: https://zenodo.org/badge/latestdoi/89022822
 12 |     :alt: Zenodo DOI
 13 | 
 14 | **xpbch** is a simple utility for reading the proprietary
 15 | `binary punch format (bpch) outputs <http://wiki.seas.harvard.edu/geos-chem/index.php/GEOS-Chem_Output_Files#Binary_Punch_File_Format)>`_ used in versions
 16 | of GEOS-Chem_ earlier than v11-02. The utility allows a user to load this
 17 | data into an xarray_- and dask_-powered workflow without necessarily
 18 | pre-processing the data using GAMAP_ or IDL.
 19 | 
 20 | This package is maintained as part of a broader, community effort to
 21 | tackle `big data problems in geoscience <https://pangeo-data.github.io/>`_.
 22 | 
 23 | What's the Deal?
 24 | ----------------
 25 | 
 26 | The `contemporary scientific Python software stack <https://speakerdeck.com/jakevdp/the-state-of-the-stack-scipy-2015-keynote>`_
 27 | provides free, powerful tools for nearly all of your data processing, analysis,
 28 | and visualization needs. These tools are `well supported <https://www.numfocus.org/>`_
 29 | by a large community of heavily invested users and developers from academia,
 30 | government, and industry. They are also developed (mostly) as part of community-based,
 31 | open-source, and user-driven projects.
 32 | 
 33 | For nearly any application you might have in the geosciences, you can start using
 34 | this powerful, free software stack *today* with minimal friction. However,
 35 | one friction point that has tripped up adoption by GEOS-Chem users is that it
 36 | is difficult to work with legacy bpch-format diagnostics files. **xbpch**
 37 | solves this problem by providing a convenient and performant way to read
 38 | these files into a modern Python-based analysis or workflow.
 39 | 
 40 | Furthermore, **xbpch** is 100% future-proof. In two years, when your GEOS-Chem
 41 | simulations are writing NetCDF diagnostics, you won't need to change more than a
 42 | single line of code in any of your scripts using **xbpch**. All you'll need to do
 43 | is swap out **xbpch**'s function for reading data and instead defer to it's parent
 44 | package (xarray). It will *literally* take less than 10 keystrokes to make this
 45 | change in your code. Plus - you'll be backwards compatible with any legacy
 46 | output you need to analyze.
 47 | 
 48 | So give **xbpch** a try, and let me know what issues you run in to! If we solve
 49 | them once today, they'll be solved in perpetuity, which means more time for you
 50 | to do science and less time to worry about processing data.
 51 | 
 52 | 
 53 | Installation
 54 | ------------
 55 | 
 56 | Requirements
 57 | ^^^^^^^^^^^^
 58 | 
 59 | **xbpch** is only intended for use with Python 3, although with some
 60 | modifications it  would likely work with Python 2.7 (`Pull Requests are
 61 | welcome! <https://github.com/darothen/xbpch/pulls>`_). As the package
 62 | description implies, it requires up-to-date copies of xarray_
 63 | (>= version 0.9) and dask_ (>= version 0.14). The best way to install
 64 | these packages is by using the conda_ package management system, or
 65 | the `Anaconda Python distribution <https://www.continuum.io/downloads>`_.
 66 | 
 67 | To install **xbpch** and its dependencies using conda, execute from a terminal::
 68 | 
 69 |     $ conda install -c conda-forge xbpch xarray dask
 70 | 
 71 | Alternatively, you can install **xbpch** `from PyPI <https://pypi.python
 72 | .org/pypi/xbpch/>`_::
 73 | 
 74 |     $ pip install xbpch
 75 | 
 76 | You can also install **xbpch** from its source. To do this, you
 77 | can either clone the source directory and manually install::
 78 | 
 79 |     $ git clone https://github.com/darothen/xbpch.git
 80 |     $ cd xbpch
 81 |     $ python setup.py install
 82 | 
 83 | or, you can install via pip directly from git::
 84 | 
 85 |     $ pip install git+https://github.com/darothen/xbpch.git
 86 | 
 87 | Please note that if you locally clone the repository from GitHub but do not
 88 | explicitly install the package using ``setup.py``, the file ``xbpch/version.py``
 89 | will not get written properly and you will not be able to use the package. 
 90 | We strongly recommend you install the package using traditional techniques to
 91 | ensure that all dependencies are properly added to your environment.
 92 | 
 93 | Quick Start
 94 | -----------
 95 | 
 96 | If you're already familiar with loading and manipulating data with
 97 | xarray_, then it's easy to dive right into **xbpch**. Navigate to a
 98 | directory on disk which contains your ``.bpch`` output, as well as
 99 | ``tracerinfo.dat`` and ``diaginfo.dat``, and execute from a Python
100 | interpeter:
101 | 
102 | .. code:: python
103 | 
104 |     from xbpch import open_bpchdataset
105 |     fn = "my_geos_chem_output.bpch"
106 |     ds = open_bpchdataset(fn)
107 | 
108 | After a few seconds (depending on your hard-drive speed) you should be
109 | able to interact with ``ds`` just as you would any *xarray.Dataset*
110 | object.
111 | 
112 | Caveats and Future Notes
113 | ------------------------
114 | 
115 | **xbpch** should work for most simple workflows, especially if you need
116 | a quick-and-dirty way to ingest legacy GEOS-Chem_ output. It is **not**
117 | tested against the majority of output grids, including data for the Hg
118 | model or nested models. Grid information (at least for the vertical) is
119 | hard-coded and may not be accurate for the most recent versions of
120 | GEOS-Chem_.
121 | 
122 | Most importantly, **xbpch** does not yet solve the problem of manually
123 | scanning bpch files before producing a dataset on disk. Because the bpch
124 | format does not encode metadata about *what its contents actually are*,
125 | we must manually process this from any output file we wish to load. For
126 | the time being, we do **not** short-circuit this process because we
127 | cannot necessarily predict file position offsets in the bpch files we
128 | read. In the future, I hope to come up with an elegant solution for
129 | solving this problem.
130 | 
131 | Acknowledgments
132 | ---------------
133 | 
134 | This utility packages together a few pre-existing toolkits which
135 | have been floating around the Python-GEOS-Chem community. In particular,
136 | I would like to acknowledge the following pieces of software which I have
137 | built this utility around:
138 | 
139 | - `PyGChem <https://github.com/benbovy/PyGChem>`_ by
140 |   `Benoit Bovy <https://github.com/benbovy>`_
141 | - `gchem <https://github.com/gkuhl/gchem>`_ by
142 |   `Gerrit Kuhlmann <https://github.com/gkuhl>`_
143 | 
144 | Furthermore, the strategies used to load and process binary output on disk
145 | through xarray_\'s ``DataStore`` API is heavily inspired by `Ryan
146 | Abernathey's <https://github.com/rabernat>`_ package `xmitgcm
147 | <https://github.com/rabernat/xmitgcm>`_. 
148 | 
149 |   
150 | License
151 | -------
152 | 
153 | Copyright (c) 2017 `Daniel Rothenberg`_
154 | 
155 | This work is licensed_ under a permissive MIT License. I acknowledge
156 | important contributions from Benoît Bovy, Gerrit Kuhlmann, and Christoph
157 | Keller in the form of prior work which helped create the foundation for
158 | this package.
159 | 
160 | Contact
161 | -------
162 | 
163 | `Daniel Rothenberg`_ - darothen@mit.edu
164 | 
165 | .. _`Daniel Rothenberg`: http://github.com/darothen
166 | .. _conda: http://conda.pydata.org/docs/
167 | .. _dask: http://dask.pydata.org/
168 | .. _GAMAP: http://acmg.seas.harvard.edu/gamap/
169 | .. _licensed: LICENSE
170 | .. _GEOS-Chem: http://www.geos-chem.org
171 | .. _xarray: http://xarray.pydata.org/
172 | 
173 | 
174 | 


--------------------------------------------------------------------------------
/xbpch/grid.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Utilities and information for re-constructing GEOS-Chem horizontal and vertical
  3 | grids.
  4 | """
  5 | 
  6 | import numpy as np
  7 | 
  8 | from collections import OrderedDict
  9 | 
 10 | from .common import broadcast_1d_array
 11 | from .util.gridspec import _get_model_info, prof_altitude
 12 | 
 13 | #: Hard-coded dimension variables to use with any Dataset read in
 14 | BASE_DIMENSIONS = OrderedDict(
 15 |     lon=dict(
 16 |         dims=['lon', ],
 17 |         attrs={
 18 |             'standard_name': 'longitude',
 19 |             'axis': 'X',
 20 |         }
 21 |     ),
 22 |     lat=dict(
 23 |         dims=['lat', ],
 24 |         attrs={
 25 |             'standard_name': 'latitude',
 26 |             'axis': 'Y',
 27 |         },
 28 |     ),
 29 |     time=dict(dims=['time', ], attrs={}),
 30 |     nv=dict(dims=['nv', ], attrs={}),
 31 | )
 32 | 
 33 | 
 34 | #: CF/COARDS recommended dimension order; non-spatiotemporal dimensions
 35 | #: should precede these.
 36 | DIM_ORDER_PRIORITY = ['time', 'lev', 'lat', 'lon']
 37 | 
 38 | 
 39 | class CTMGrid(object):
 40 |     """
 41 |     Set-up the grid of a CTM (2)3D model.
 42 | 
 43 |     Parameters
 44 |     ----------
 45 |     model_name : string
 46 |         Name of the model. If it is one of the supported models,
 47 |         (see :class:`CTMGrid`.supported_models), it is better to use
 48 |         :class:`CTMGrid`.from_model or :class:`CTMGrid`.copy_from_model
 49 |         to set-up the grid with appropriate parameter values.
 50 |     resolution : (float, float)
 51 |         Horizontal grid resolution (lon, lat) or (DI, DJ) [degrees]
 52 |         (default: (5, 4))
 53 |     halfpolar : bool
 54 |         Indicates whether polar grid boxes span half (True) or same (False)
 55 |         latitude as all other boxes (default: True)
 56 |     center180 : bool
 57 |         True if lon grid is centered at 180 degrees (default: True)
 58 |     hybrid : bool
 59 |         indicates whether the model is a sigma-pressure hybrid (True) or
 60 |         pure sigma (False) level model (default: True).
 61 |     Nlayers : int or None
 62 |         Number of vertical model layers. This number must correspond to the
 63 |         number of layers in the model output files and is used in
 64 |         conjunction with Ptop to convert sigma levels into pressure
 65 |         altitudes. Set value to None if the model has no vertical
 66 |         layer (2D) (default: None).
 67 |     Ntrop : int or None
 68 |         Number of layers in the troposphere (default: None)
 69 |     Psurf : float
 70 |         Average surface pressure [hPa] (default: 1013.15)
 71 |     Ptop : float
 72 |         Pressure at model top [hPa] (default: 0.01)
 73 |     description : string
 74 |         Model grid description
 75 |     model_family : string
 76 |         Model family (e.g., 'GEOS' for 'GEOS5')
 77 | 
 78 |     Other Parameters
 79 |     ----------------
 80 |     Ap, Bp : 1-d array_like
 81 |         Parameters for computing ETA coordinates of the vertical grid
 82 |         levels, if hybrid (Ap [hPa] ; Bp [unitless]).
 83 |     csig, esig : 1-d array_like
 84 |         Pre-defined sigma coordinates the centers and the bottom edges of
 85 |         the vertical grid, if pure sigma.
 86 | 
 87 |     Attributes
 88 |     ----------
 89 |     Attributes are the same than the parameters above, except `model_name`
 90 |     which becomes :attr:`model`.
 91 | 
 92 |     """
 93 | 
 94 |     def __init__(self, model_name, resolution=(5, 4), halfpolar=True,
 95 |                  center180=True, hybrid=True, Nlayers=None, Ntrop=None,
 96 |                  Psurf=1013.25, Ptop=0.01, description='', model_family='',
 97 |                  **kwargs):
 98 | 
 99 |         self.model = model_name
100 |         self.description = description
101 |         self.model_family = model_family
102 |         self.resolution = resolution
103 |         self.halfpolar = bool(halfpolar)
104 |         self.center180 = bool(center180)
105 |         self.hybrid = bool(hybrid)
106 |         self.Ap = None
107 |         self.Bp = None
108 |         self.esig = None
109 |         self.csig = None
110 |         try:
111 |             self.Nlayers = int(Nlayers)
112 |             self.Ntrop = int(Ntrop)
113 |         except TypeError:
114 |             self.Nlayers = Nlayers
115 |             self.Ntrop = Ntrop
116 |         self.Psurf = Psurf
117 |         self.Ptop = Ptop
118 | 
119 |         self._lonlat_edges = None
120 |         self._lonlat_centers = None
121 |         self._eta_edges = None
122 |         self._eta_centers = None
123 |         self._sigma_edges = None
124 |         self._sigma_centers = None
125 |         self._pressure_edges = None
126 |         self._pressure_centers = None
127 |         self._altitude_edges = None
128 |         self._altitude_centers = None
129 | 
130 |         for k, v in kwargs.items():
131 |             self.__setattr__(k, v)
132 | 
133 |         # Pre-compute grid info / coordinates
134 |         layers = self.get_layers()
135 |         for k, v in layers.items():
136 |             self.__setattr__(k, v)
137 |         lonlats = self.get_lonlat()
138 |         for k, v in lonlats.items():
139 |             self.__setattr__(k, v)
140 | 
141 | 
142 |     @classmethod
143 |     def from_model(cls, model_name, **kwargs):
144 |         """
145 |         Define a grid using the specifications of a given model.
146 | 
147 |         Parameters
148 |         ----------
149 |         model_name : string
150 |             Name the model (see :func:`get_supported_models` for available
151 |             model names).
152 |             Supports multiple formats (e.g., 'GEOS5', 'GEOS-5' or 'GEOS_5').
153 |         **kwargs : string
154 |             Parameters that override the model  or default grid
155 |           settings (See Other Parameters below).
156 | 
157 |         Returns
158 |         -------
159 |         A :class:`CTMGrid` object.
160 | 
161 |         Other Parameters
162 |         ----------------
163 |         resolution : (float, float)
164 |             Horizontal grid resolution (lon, lat) or (DI, DJ) [degrees]
165 |         Psurf : float
166 |             Average surface pressure [hPa] (default: 1013.15)
167 | 
168 |         Notes
169 |         -----
170 |         Regridded vertical models may have several valid names (e.g.,
171 |         'GEOS5_47L' and 'GEOS5_REDUCED' refer to the same model).
172 | 
173 |         """
174 |         settings = _get_model_info(model_name)
175 |         model = settings.pop('model_name')
176 |         for k, v in list(kwargs.items()):
177 |             if k in ('resolution', 'Psurf'):
178 |                 settings[k] = v
179 | 
180 |         return cls(model, **settings)
181 | 
182 |     @classmethod
183 |     def copy_from_model(cls, model_name, reference, **kwargs):
184 |         """
185 |         Set-up a user-defined grid using specifications of a reference
186 |         grid model.
187 | 
188 |         Parameters
189 |         ----------
190 |         model_name : string
191 |             name of the user-defined grid model.
192 |         reference : string or :class:`CTMGrid` instance
193 |             Name of the reference model (see :func:`get_supported_models`),
194 |             or a :class:`CTMGrid` object from which grid set-up is copied.
195 |         **kwargs
196 |             Any set-up parameter which will override the settings of the
197 |             reference model (see :class:`CTMGrid` parameters).
198 | 
199 |         Returns
200 |         -------
201 |         A :class:`CTMGrid` object.
202 | 
203 |         """
204 |         if isinstance(reference, cls):
205 |             settings = reference.__dict__.copy()
206 |             settings.pop('model')
207 |         else:
208 |             settings = _get_model_info(reference)
209 |             settings.pop('model_name')
210 | 
211 |         settings.update(kwargs)
212 |         settings['reference'] = reference
213 | 
214 |         return cls(model_name, **settings)
215 | 
216 | 
217 |     def get_layers(self, Psurf=1013.25, Ptop=0.01, **kwargs):
218 |         """
219 |         Compute scalars or coordinates associated to the vertical layers.
220 | 
221 |         Parameters
222 |         ----------
223 |         grid_spec : CTMGrid object
224 |             CTMGrid containing the information necessary to re-construct grid
225 |             levels for a given model coordinate system.
226 | 
227 |         Returns
228 |         -------
229 |         dictionary of vertical grid components, including eta (unitless),
230 |         sigma (unitless), pressure (hPa), and altitude (km) on both layer centers
231 |         and edges, ordered from bottom-to-top.
232 | 
233 |         Notes
234 |         -----
235 |         For pure sigma grids, sigma coordinates are given by the esig (edges) and
236 |         csig (centers).
237 | 
238 |         For both pure sigma and hybrid grids, pressures at layers edges L are
239 |         calculated as follows:
240 | 
241 |         .. math:: P_e(L) = A_p(L) + B_p(L) * (P_{surf} - C_p)
242 | 
243 |         where
244 | 
245 |         :math:`P_{surf}`, :math:`P_{top}`
246 |             Air pressures at the surface and the top of the modeled atmosphere
247 |             (:attr:`Psurf` and :attr:`Ptop` attributes of the :class:`CTMGrid`
248 |             instance).
249 |         :math:`A_p(L)`, :math:`Bp(L)`
250 |             Specified in the grid set-up (`Ap` and `Bp` attributes) for hybrid
251 |             grids, or respectively equals :math:`P_{top}` and :attr:`esig`
252 |             attribute for pure sigma grids.
253 |         :math:`Cp(L)`
254 |             equals :math:`P_{top}` for pure sigma grids or equals 0 for hybrid
255 |             grids.
256 | 
257 |         Pressures at grid centers are averages of pressures at grid edges:
258 | 
259 |         .. math:: P_c(L) = (P_e(L) + P_e(L+1)) / 2
260 | 
261 |         For hybrid grids, ETA coordinates of grid edges and grid centers are
262 |         given by;
263 | 
264 |         .. math:: ETA_{e}(L) = (P_e(L) - P_{top}) / (P_{surf} - P_{top})
265 |         .. math:: ETA_{c}(L) = (P_c(L) - P_{top}) / (P_{surf} - P_{top})
266 | 
267 |         Altitude values are fit using a 5th-degree polynomial; see
268 |         `gridspec.prof_altitude` for more details.
269 | 
270 |         """
271 | 
272 |         Psurf = np.asarray(Psurf)
273 |         output_ndims = Psurf.ndim + 1
274 |         if output_ndims > 3:
275 |             raise ValueError("`Psurf` argument must be a float or an array"
276 |                              " with <= 2 dimensions (or None)")
277 | 
278 |         # Compute all variables: takes not much memory, fast
279 |         # and better for code reading
280 |         SIGe = None
281 |         SIGc = None
282 |         ETAe = None
283 |         ETAc = None
284 | 
285 |         if self.hybrid:
286 |             try:
287 |                 Ap = broadcast_1d_array(self.Ap, output_ndims)
288 |                 Bp = broadcast_1d_array(self.Bp, output_ndims)
289 |             except KeyError:
290 |                 raise ValueError("Impossible to compute vertical levels,"
291 |                                  " data is missing (Ap, Bp)")
292 |             Cp = 0.
293 |         else:
294 |             try:
295 |                 Bp = SIGe = broadcast_1d_array(self.esig, output_ndims)
296 |                 SIGc = broadcast_1d_array(self.csig, output_ndims)
297 |             except KeyError:
298 |                 raise ValueError("Impossible to compute vertical levels,"
299 |                                  " data is missing (esig, csig)")
300 |             Ap = Cp = Ptop
301 | 
302 |         Pe = Ap + Bp * (Psurf - Cp)
303 |         Pc = 0.5 * (Pe[0:-1] + Pe[1:])
304 | 
305 |         if self.hybrid:
306 |             ETAe = (Pe - Ptop)/(Psurf - Ptop)
307 |             ETAc = (Pc - Ptop)/(Psurf - Ptop)
308 |         else:
309 |             SIGe = SIGe * np.ones_like(Psurf)
310 |             SIGc = SIGc * np.ones_like(Psurf)
311 | 
312 |         Ze = prof_altitude(Pe, **kwargs)
313 |         Zc = prof_altitude(Pc, **kwargs)
314 | 
315 |         all_vars = {'eta_edges': ETAe,
316 |                     'eta_centers': ETAc,
317 |                     'sigma_edges': SIGe,
318 |                     'sigma_centers': SIGc,
319 |                     'pressure_edges': Pe,
320 |                     'pressure_centers': Pc,
321 |                     'altitude_edges': Ze,
322 |                     'altitude_centers': Zc}
323 | 
324 |         return all_vars
325 | 
326 | 
327 |     def get_lonlat(self):
328 |         """
329 |         Calculate longitude-latitude grid for a specified resolution and
330 |         configuration / ordering.
331 | 
332 |         Parameters
333 |         ----------
334 |         rlon, rlat : float
335 |             Resolution (in degrees) of longitude and latitude grids.
336 |         halfpolar : bool (default=True)
337 |             Polar grid boxes span half of rlat relative to the other grid cells.
338 |         center180 : bool (default=True)
339 |             Longitude grid should be centered at 180 degrees.
340 | 
341 |         """
342 | 
343 |         rlon, rlat = self.resolution
344 | 
345 |         # Compute number of grid cells in each direction
346 |         Nlon = int(360. / rlon)
347 |         Nlat = int(180. / rlat) + self.halfpolar
348 | 
349 |         # Compute grid cell edges
350 |         elon = np.arange(Nlon + 1) * rlon - np.array(180.)
351 |         elon -= rlon / 2. * self.center180
352 |         elat = np.arange(Nlat + 1) * rlat - np.array(90.)
353 |         elat -= rlat / 2. * self.halfpolar
354 |         elat[0] = -90.
355 |         elat[-1] = 90.
356 | 
357 |         # Compute grid cell centers
358 |         clon = (elon - (rlon / 2.))[1:]
359 |         clat = np.arange(Nlat) * rlat - np.array(90.)
360 | 
361 |         # Fix grid boundaries if halfpolar
362 |         if self.halfpolar:
363 |             clat[0] = (elat[0] + elat[1]) / 2.
364 |             clat[-1] = -clat[0]
365 |         else:
366 |             clat += (elat[1] - elat[0]) / 2.
367 | 
368 |         return {
369 |             "lon_centers": clon, "lat_centers": clat,
370 |             "lon_edges": elon, "lat_edges": elat
371 |         }
372 | 
373 | 
374 | def get_grid_spec(model_name):
375 |     """
376 |     Pass-through to look-up the grid specifications for a given GEOS-Chem
377 |     configuration.
378 | 
379 |     Parameters
380 |     ----------
381 |     model_name : str
382 |         Name of the model; variations in naming format are permissible, e.g.
383 |         "GEOS5" can be requested as "GEOS-5" or "GEOS_5".
384 |     resolution : tuple of floats
385 |         Longitude x latitude resolution of the model.
386 | 
387 |     Returns
388 |     -------
389 |     grid_spec : dict
390 |         Critical grid information as items in a dictionary.
391 | 
392 |     """
393 |     return _get_model_info(model_name)
394 | 


--------------------------------------------------------------------------------
/xbpch/bpch.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Utility classes and tools for handling data contained in bpch files
  3 | 
  4 | """
  5 | 
  6 | from dask import delayed
  7 | import dask.array as da
  8 | import numpy as np
  9 | import os
 10 | 
 11 | from collections import OrderedDict
 12 | 
 13 | from . uff import FortranFile
 14 | from . util import cf
 15 | from . util.diaginfo import get_diaginfo, get_tracerinfo
 16 | 
 17 | #: Default datatype for legacy bpch output
 18 | DEFAULT_DTYPE = 'f4'
 19 | 
 20 | class BPCHDataBundle(object):
 21 |     """ A single slice of a single variable inside a bpch file, and all
 22 |     of its critical accompanying metadata. """
 23 | 
 24 |     __slots__ = ('_shape', 'dtype', 'endian', 'filename', 'file_position',
 25 |                  'time', 'metadata', '_data', '_mmap', '_dask')
 26 | 
 27 |     def __init__(self, shape,  endian, filename, file_position, time,
 28 |                  metadata, data=None, dtype=None,
 29 |                  use_mmap=False, dask_delayed=False):
 30 |         self._shape = shape
 31 |         self.dtype = dtype
 32 |         self.endian = endian
 33 |         self.filename = filename
 34 |         self.file_position = file_position
 35 |         self.time = time
 36 |         self.metadata = metadata
 37 | 
 38 |         if dtype is None:
 39 |             self.dtype = np.dtype(self.endian + DEFAULT_DTYPE)
 40 |         else:
 41 |             self.dtype = dtype
 42 | 
 43 |         # Note that data is initially prescribed as None, but we keep a hook
 44 |         # here so that we can inject payloads at load time, if we want
 45 |         # (for instance, to avoid reading/memmapping through a file)
 46 |         self._data = data
 47 |         self._mmap = use_mmap
 48 |         self._dask = dask_delayed
 49 | 
 50 |     @property
 51 |     def shape(self):
 52 |         return self._shape
 53 | 
 54 |     @property
 55 |     def ndim(self):
 56 |         return len(self.shape)
 57 | 
 58 |     @property
 59 |     def array(self):
 60 |         return self.data
 61 | 
 62 |     @property
 63 |     def data(self):
 64 |         if self._data is None:
 65 |             self._data = self._read()
 66 |         return self._data
 67 | 
 68 |     def _read(self):
 69 |         """ Helper function to load the data referenced by this bundle. """
 70 |         if self._dask:
 71 |             d = da.from_delayed(
 72 |                 delayed(read_from_bpch, )(
 73 |                     self.filename, self.file_position, self.shape,
 74 |                     self.dtype, self.endian, use_mmap=self._mmap
 75 |                 ),
 76 |                 self.shape, self.dtype
 77 |             )
 78 |         else:
 79 |             d = read_from_bpch(
 80 |                     self.filename, self.file_position, self.shape,
 81 |                     self.dtype, self.endian, use_mmap=self._mmap
 82 |             )
 83 | 
 84 |         return d
 85 | 
 86 | 
 87 | class BPCHFile(object):
 88 |     """ A file object for representing BPCH data on disk
 89 | 
 90 |     Attributes
 91 |     ----------
 92 |     fp : FortranFile
 93 |         A pointer to the open unformatted Fortran binary output (the original
 94 |         bpch file)
 95 |     var_data, var_attrs : dict
 96 |         Containers of `BPCHDataBundle`s and dicts, respectively, holding
 97 |         the accessor functions to the raw bpch data and their associated
 98 |         metadata
 99 | 
100 |     """
101 | 
102 |     def __init__(self, filename, mode='rb', endian='>',
103 |                  diaginfo_file='', tracerinfo_file='', eager=False,
104 |                  use_mmap=False, dask_delayed=False):
105 |         """ Load a BPCHFile
106 | 
107 |         Parameters
108 |         ----------
109 |         filename : str
110 |             Path to the bpch file on disk
111 |         mode : str
112 |             Mode string to pass to the file opener; this is currently fixed to
113 |             "rb" and all other values will be rejected
114 |         endian : str {">", "<", ":"}
115 |             Endian-ness of the Fortran output file
116 |         {tracerinfo, diaginfo}_file : str
117 |             Path to the tracerinfo.dat and diaginfo.dat files containing
118 |             metadata pertaining to the output in the bpch file being read.
119 |         eager : bool
120 |             Flag to immediately read variable data; if "False", then nothing
121 |             will be read from the file and you'll need to do so manually
122 |         use_mmap : bool
123 |             Use memory-mapping to read data from file
124 |         dask_delayed : bool
125 |             Use dask to create delayed references to the data-reading functions
126 |         """
127 | 
128 |         self.mode = mode
129 |         if not mode.startswith('r'):
130 |             raise ValueError("Currently only know how to 'r(b)'ead bpch files.")
131 | 
132 |         self.filename = filename
133 |         self.fsize = os.path.getsize(self.filename)
134 |         self.endian = endian
135 | 
136 |         # Open a pointer to the file
137 |         self.fp = FortranFile(self.filename, self.mode, self.endian)
138 | 
139 |         dir_path = os.path.abspath(os.path.dirname(filename))
140 |         if not dir_path:
141 |             dir_path = os.getcwd()
142 |         if not tracerinfo_file:
143 |             tracerinfo_file = os.path.join(dir_path, "tracerinfo.dat")
144 |             if not os.path.exists(tracerinfo_file):
145 |                 tracerinfo_file = ''
146 |         self.tracerinfo_file = tracerinfo_file
147 |         if not diaginfo_file:
148 |             diaginfo_file = os.path.join(dir_path, "diaginfo.dat")
149 |             if not os.path.exists(diaginfo_file):
150 |                 diaginfo_file = ''
151 |         self.diaginfo_file = diaginfo_file
152 | 
153 |         # Container to record file metadata
154 |         self._attributes = OrderedDict()
155 | 
156 |         # Don't necessarily need to save diag/tracer_dict yet
157 |         self.diaginfo_df, _ = get_diaginfo(self.diaginfo_file)
158 |         self.tracerinfo_df, _ = get_tracerinfo(self.tracerinfo_file)
159 | 
160 |         # Container for bundles contained in the output file.
161 |         self.var_data = {}
162 |         self.var_attrs = {}
163 | 
164 |         # Critical information for accessing file contents
165 |         self._header_pos = None
166 | 
167 |         # Data loading strategy
168 |         self.use_mmap = use_mmap
169 |         self.dask_delayed = dask_delayed
170 | 
171 |         # Control eager versus deferring reading
172 |         self.eager = eager
173 |         if (mode.startswith('r') and self.eager):
174 |             self._read()
175 | 
176 |     def close(self):
177 |         """ Close this bpch file.
178 | 
179 |         """
180 | 
181 |         if not self.fp.closed:
182 |             for v in list(self.var_data):
183 |                 del self.var_data[v]
184 | 
185 |             self.fp.close()
186 | 
187 |     def __enter__(self):
188 |         return self
189 | 
190 |     def __exit__(self, type, value, traceback):
191 |         self.close()
192 | 
193 |     def _read(self):
194 |         """ Parse the entire bpch file on disk and set up easy access to meta-
195 |         and data blocks.
196 | 
197 |         """
198 | 
199 |         self._read_metadata()
200 |         self._read_header()
201 |         self._read_var_data()
202 | 
203 |     def _read_metadata(self):
204 |         """ Read the main metadata packaged within a bpch file, indicating
205 |         the output filetype and its title.
206 | 
207 |         """
208 | 
209 |         filetype = self.fp.readline().strip()
210 |         filetitle = self.fp.readline().strip()
211 |         # Decode to UTF string, if possible
212 |         try:
213 |             filetype = str(filetype, 'utf-8')
214 |             filetitle = str(filetitle, 'utf-8')
215 |         except:
216 |             # TODO: Handle this edge-case of converting file metadata more elegantly.
217 |             pass
218 | 
219 |         self.__setattr__('filetype', filetype)
220 |         self.__setattr__('filetitle', filetitle)
221 | 
222 |     def _read_header(self):
223 |         """ Process the header information (data model / grid spec) """
224 | 
225 |         self._header_pos = self.fp.tell()
226 | 
227 |         line = self.fp.readline('20sffii')
228 |         modelname, res0, res1, halfpolar, center180 = line
229 |         self._attributes.update({
230 |             "modelname": str(modelname, 'utf-8').strip(),
231 |             "halfpolar": halfpolar,
232 |             "center180": center180,
233 |             "res": (res0, res1)
234 |         })
235 |         self.__setattr__('modelname', modelname)
236 |         self.__setattr__('res', (res0, res1))
237 |         self.__setattr__('halfpolar', halfpolar)
238 |         self.__setattr__('center180', center180)
239 | 
240 |         # Re-wind the file
241 |         self.fp.seek(self._header_pos)
242 | 
243 | 
244 |     def _read_var_data(self):
245 |         """ Iterate over the block of this bpch file and return handlers
246 |         in the form of `BPCHDataBundle`s for access to the data contained
247 |         therein.
248 | 
249 |         """
250 | 
251 |         var_bundles = OrderedDict()
252 |         var_attrs = OrderedDict()
253 | 
254 |         n_vars = 0
255 | 
256 |         while self.fp.tell() < self.fsize:
257 | 
258 |             var_attr = OrderedDict()
259 | 
260 |             # read first and second header lines
261 |             line = self.fp.readline('20sffii')
262 |             modelname, res0, res1, halfpolar, center180 = line
263 | 
264 |             line = self.fp.readline('40si40sdd40s7i')
265 |             category_name, number, unit, tau0, tau1, reserved = line[:6]
266 |             dim0, dim1, dim2, dim3, dim4, dim5, skip = line[6:]
267 |             var_attr['number'] = number
268 | 
269 |             # Decode byte-strings to utf-8
270 |             category_name = str(category_name, 'utf-8')
271 |             var_attr['category'] = category_name.strip()
272 |             unit = str(unit, 'utf-8')
273 | 
274 |             # get additional metadata from tracerinfo / diaginfo
275 |             try:
276 |                 cat_df = self.diaginfo_df[
277 |                     self.diaginfo_df.name == category_name.strip()
278 |                 ]
279 |                 # TODO: Safer logic for handling case where more than one
280 |                 #       tracer metadata match was made
281 |                 # if len(cat_df > 1):
282 |                 #     raise ValueError(
283 |                 #         "More than one category matching {} found in "
284 |                 #         "diaginfo.dat".format(
285 |                 #             category_name.strip()
286 |                 #         )
287 |                 #     )
288 |                 # Safe now to select the only row in the DataFrame
289 |                 cat = cat_df.T.squeeze()
290 | 
291 |                 tracer_num = int(cat.offset) + int(number)
292 |                 diag_df = self.tracerinfo_df[
293 |                     self.tracerinfo_df.tracer == tracer_num
294 |                 ]
295 |                 # TODO: Safer logic for handling case where more than one
296 |                 #       tracer metadata match was made
297 |                 # if len(diag_df > 1):
298 |                 #     raise ValueError(
299 |                 #         "More than one tracer matching {:d} found in "
300 |                 #         "tracerinfo.dat".format(tracer_num)
301 |                 #     )
302 |                 # Safe now to select only row in the DataFrame
303 |                 diag = diag_df.T.squeeze()
304 |                 diag_attr = diag.to_dict()
305 | 
306 |                 if not unit.strip():  # unit may be empty in bpch
307 |                     unit = diag_attr['unit']  # but not in tracerinfo
308 |                 var_attr.update(diag_attr)
309 |             except:
310 |                 diag = {'name': '', 'scale': 1}
311 |                 var_attr.update(diag)
312 |             var_attr['unit'] = unit
313 | 
314 |             vname = diag['name']
315 |             fullname = category_name.strip() + "_" + vname
316 | 
317 |             # parse metadata, get data or set a data proxy
318 |             if dim2 == 1:
319 |                 data_shape = (dim0, dim1)         # 2D field
320 |             else:
321 |                 data_shape = (dim0, dim1, dim2)
322 |             var_attr['original_shape'] = data_shape
323 | 
324 |             # Add proxy time dimension to shape
325 |             data_shape = tuple([1, ] + list(data_shape))
326 |             origin = (dim3, dim4, dim5)
327 |             var_attr['origin'] = origin
328 | 
329 |             timelo, timehi = cf.tau2time(tau0), cf.tau2time(tau1)
330 | 
331 |             pos = self.fp.tell()
332 |             # Note that we don't pass a dtype, and assume everything is
333 |             # single-fp floats with the correct endian, as hard-coded
334 |             var_bundle = BPCHDataBundle(
335 |                 data_shape, self.endian, self.filename, pos, [timelo, timehi],
336 |                 metadata=var_attr,
337 |                 use_mmap=self.use_mmap, dask_delayed=self.dask_delayed
338 |             )
339 |             self.fp.skipline()
340 | 
341 |             # Save the data as a "bundle" for concatenating in the final step
342 |             if fullname in var_bundles:
343 |                 var_bundles[fullname].append(var_bundle)
344 |             else:
345 |                 var_bundles[fullname] = [var_bundle, ]
346 |                 var_attrs[fullname] = var_attr
347 |                 n_vars += 1
348 | 
349 |         self.var_data = var_bundles
350 |         self.var_attrs = var_attrs
351 | 
352 | 
353 | def read_from_bpch(filename, file_position, shape, dtype, endian,
354 |                    use_mmap=False):
355 |     """ Read a chunk of data from a bpch output file.
356 | 
357 |     Parameters
358 |     ----------
359 |     filename : str
360 |         Path to file on disk containing the  data
361 |     file_position : int
362 |         Position (bytes) where desired data chunk begins
363 |     shape : tuple of ints
364 |         Resultant (n-dimensional) shape of requested data; the chunk
365 |         will be read sequentially from disk and then re-shaped
366 |     dtype : dtype
367 |         Dtype of data; for best results, pass a dtype which includes
368 |         an endian indicator, e.g. `dtype = np.dtype('>f4')`
369 |     endian : str
370 |         Endianness of data; should be consistent with `dtype`
371 |     use_mmap : bool
372 |         Memory map the chunk of data to the file on disk, else read
373 |         immediately
374 | 
375 |     Returns
376 |     -------
377 |     Array with shape `shape` and dtype `dtype` containing the requested
378 |     chunk of data from `filename`.
379 | 
380 |     """
381 |     offset = file_position + 4
382 |     if use_mmap:
383 |         d = np.memmap(filename, dtype=dtype, mode='r', shape=shape,
384 |                       offset=offset, order='F')
385 |     else:
386 |         with FortranFile(filename, 'rb', endian) as ff:
387 |             ff.seek(file_position)
388 |             d = np.array(ff.readline('*f'))
389 |             d = d.reshape(shape, order='F')
390 | 
391 |     # As a sanity check, *be sure* that the resulting data block has the
392 |     # correct shape, and fail early if it doesn't.
393 |     if (d.shape != shape):
394 |         raise IOError("Data chunk read from {} does not have the right shape,"
395 |                       " (expected {} but got {})"
396 |                       .format(filename, shape, d.shape))
397 | 
398 |     return d
399 | 


--------------------------------------------------------------------------------
/doc/usage.rst:
--------------------------------------------------------------------------------
  1 | 
  2 | Usage and Examples
  3 | ==================
  4 | 
  5 | Reading Output
  6 | --------------
  7 | 
  8 | The routines for reading bpch files from disk into ``xarray.Dataset``\s is
  9 | based mostly on the ``xarray.open_dataset`` method. However, to handle
 10 | some of the idiosyncrasies of GEOS-Chem output, our implementation of
 11 | :py:func:`~xbpch.open_bpchdataset` has a few additional arguments to know
 12 | about.
 13 | 
 14 | Main :py:func:`~xbpch.open_bpchdataset` Arguments
 15 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 16 | 
 17 | The majority of the time, you'll want to load/read data via xarray, using the
 18 | method :py:func:`~xbpch.open_bpchdataset`, as shown in the :ref:`quick start`.
 19 | This routine fundamentally requires three arguments:
 20 | 
 21 | - ``filename``: the full path to the output file you want to load
 22 | - ``tracerinfo_file``: the full path to the file *tracerinfo.dat*, which
 23 |   contains the names and indices of each tracer output by GEOS-Chem
 24 | - ``diaginfo_file``: the full path to the file *diaginfo.dat*, which contains
 25 |   the listing of categories and their tracer number offsets in the tracer
 26 |   output index.
 27 | 
 28 | If you don't pass a value for either ``tracerinfo_file`` or ``diaginfo_file``,
 29 | **xbpch** will look for them in the current directory, assuming the Default
 30 | naming scheme. However, if it *still* can't find a file, it'll raise an error
 31 | (we do not assume to know what is in your output!)
 32 | 
 33 | In many simulations, GEOS-Chem will write multiple timesteps of a large number
 34 | of fields to a single output file. This can result in outputs on the order of
 35 | 10's of GB! If you know for certain that you only want a specific tracer or
 36 | category of tracers, you can supply a list of their names to either ``fields``
 37 | or ``categories``.
 38 | 
 39 | For instance, using the `v11-01 diagnostics <http://wiki.seas.harvard.edu/geos-chem/index.php/List_of_diagnostics_for_v11-01>`_
 40 | for reference, we can load in any tracer with the name "O3" by passing
 41 | 
 42 | .. ipython:: python
 43 |     :verbatim:
 44 | 
 45 |     import xbpch
 46 |     o3_data = xbpch.open_bpchdataset("my_data.bpch", fields=['O3', ])
 47 | 
 48 | Alternatively, we can load all the tracers associated with a given category
 49 | by specifying the ``categories`` argument. To grab all the saved 2D meteorology
 50 | fields, this would entail
 51 | 
 52 | .. ipython:: python
 53 |     :verbatim:
 54 | 
 55 |     met_data = xbpch.open_bpchdataset(
 56 |         "my_data.bpch", categories=["DAO-FLDS", ]
 57 |     )
 58 | 
 59 | 
 60 | What Works and Doesn't Work
 61 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^
 62 | 
 63 | **xbpch** should work with most standard GEOS-Chem outputs going back to at
 64 | least v9-02. It has been tested against some of these standard outputs, but
 65 | not exhaustively. If you have an idiosyncratic GEOS-Chem output (e.g. from a
 66 | specialized version of the model with custom tracers or a new grid), please
 67 | give **xbpch** a try and if it fails, post a `an Issue on our GitHub page <https://github.com/darothen/xbpch/issues>`_
 68 | to let us know.
 69 | 
 70 | The following configurations have been tested and vetted:
 71 | 
 72 | - Standard output on standard grids
 73 | - ND49 output on standard grids
 74 | - ND49 output on nested North America grid (should work for all nested grids)
 75 | 
 76 | 
 77 | Eager vs Lazy Loading
 78 | ^^^^^^^^^^^^^^^^^^^^^
 79 | 
 80 | One of the main advantages to using **xbpch** is that it allows you to access
 81 | data without immediately need to read it all from disk. On a modern
 82 | analysis cluster, this isn't a problem, but if you want to process output
 83 | on your laptop, you can quickly run into situations where all of your data
 84 | won't fit in memory. In those situations, you have to tediously block your
 85 | analysis algorithms/pipeline.
 86 | 
 87 | .. note::
 88 | 
 89 |     Even though you may request lazily-loaded data, **xpbch** still needs
 90 |     to read your input file to parse its contents. This requires iterating
 91 |     line-by-line through the input file, so it may take some time (about
 92 |     ~10 seconds to read a 6GB file on my late-2016 MacBook Pro).
 93 |     Unfortunately, if we don't do this, we can't infer the tracers or their
 94 |     distribution over multiple timesteps containined in the input file.
 95 | 
 96 | The keyword arguments ``memmap`` and ``dask`` control how data is read from
 97 | your bpch files.
 98 | 
 99 | ``memmap``
100 |   if enabled, the data for each timestep and variable will be
101 |   accessed through a memory-map into the input file
102 | ``dask``
103 |   if enabled, the function to read each timestep for each variable
104 |   will be wrapped in a ``dask.delayed`` object, initiating a task graph
105 |   for accessing the data
106 | 
107 | .. warning::
108 | 
109 |     Opening a dataset using ``memmap=True`` and ``dask=False`` *will not work*.
110 |     Each memory-mapped array counts as an open file, which will quickly add up
111 |     and hit your operating system's limit on simultaneously open files.
112 | 
113 | If ``dask=True`` is used to open a dataset, then all of the data in the bpch
114 | file is represented by  ``dask.array``\s, and all operations are lazy. That is,
115 | they are not evaluated until the user explicitly instruct them to be, and
116 | instead a graph representing your computation is constructed.
117 | 
118 | 
119 | Chunking
120 | ^^^^^^^^
121 | 
122 | When data is loaded with the ``dask`` flag enabled, all the operations
123 | necessary to create contiguous chunks of data are deferred. Because of the way
124 | data is written to bpch files by GEOS-Chem, these deferred actions are all
125 | based on single timesteps of data for each variable by default. Thus, in the
126 | parlance of dask, all the data is implicitly chunked on the **time** dimension.
127 | 
128 | When dask encounters chunked calculations, it will automatically attempt
129 | to parallelize them across all the cores available on your machine, and will
130 | attempt to limit the amount of data held in-memory at any give time.
131 | 
132 | To illustrate this, consider a monthly history dataset ``ds`` loaded via
133 | :py:func:`~xbpch.open_bpchdataset`. The inital task graph representing this
134 | data may look something like:
135 | 
136 | .. figure:: dask_graphs/sample_read.png
137 |     :scale: 100%
138 |     :alt: Monthly history dask read/concat graph
139 | 
140 |     Tasks for reading and processing monthly output for a single variable in
141 |     a year-long bpch output file
142 | 
143 | This graph illustrates that dask is expected to process 12 chunks of data - one
144 | for each month (timestep) in the dataset. The graph shows the operations for
145 | reading the data, casting it to the correct data type, and re-scaling, which are
146 | applied automatically by **xbpch** and xarray.
147 | 
148 | At this point, the data has only been processed in such a way that it fits
149 | the numpy.ndarray memory model, and thus can be used to construct xarray
150 | objects. A trivial calculation on this data may be to normalize the timeseries
151 | of data in each grid cell to have zero mean and unit variance. For any
152 | ``xarray.DataArray`` we could write this operation as
153 | 
154 | .. ipython:: python
155 |     :verbatim:
156 | 
157 |     da_normal = (da - da.mean('time'))/da.std('time')
158 | 
159 | which produces the computational graph
160 | 
161 | .. figure:: dask_graphs/sample_normalized.png
162 |     :scale: 100%
163 |     :alt: Normalization calculation on monthly data
164 | 
165 |     Computational graph for normalizing monthly data
166 | 
167 | A second key function of ``dask`` is to analyze and parse these computational
168 | graphs into a simplified form. In practice, the resulting graph will be
169 | much simpler, which can dramatically speed up your analysis. For instance, if
170 | you sub-sample the variables and timesteps used in your analysis, **xbpch**
171 | (through dask) will avoid reading extra, unused data from the input files you passed
172 | it.
173 | 
174 | .. note::
175 | 
176 |     Sometimes it's advantagous to re-chunk a dataset (see
177 |     `here <http://xarray.pydata.org/en/stable/dask.html>`_ for a discussion on
178 |     when this may be the case). This is easily accomplished through xarray, or
179 |     can be done directly on the ``dask.array``\s containing your data if you
180 |     have a more complex analysis to perform.
181 | 
182 | 
183 | Finally, it's important to know that the computational graphs that dask
184 | produces are never evaluated until you explicitly call ``.load()`` on a dask
185 | array or xarray Data{Array,set}. Different computations or uses for your data
186 | might imply an automatic ``load()``; for instance, if you use the plotting
187 | wrapper built into xarray, it will (necessarily) eagerly load your data. If you'd
188 | like to monitor the progress of a very long analysis built through
189 | **xbpch**/xarray/dask, you can use the built-in diagnostic tools from dask:
190 | 
191 | .. ipython:: python
192 |     :verbatim:
193 | 
194 |     from dask.diagnostics import ProgressBar
195 | 
196 |     # Construct some analysis
197 |     my_ds = ...
198 | 
199 |     # Eagerly compute the results
200 |     with ProgressBar() as pb:
201 |         my_ds.load()
202 | 
203 | .. parsed-literal::
204 |    [####################################] | 100% Completed | 10.2s
205 | 
206 | Geographic Visualization
207 | ------------------------
208 | 
209 | One easy application of **xbpch** is for the visualization of your data.
210 | For cartographic or geographic plots, we recommend using the cartopy_ package
211 | maintained by the UK Met Office.
212 | 
213 | Plotting on a cartopy_ map is straightforward. Suppose we have a Dataset ``ds``
214 | read from a bpch file. We can first compute an analysis of interest - say,
215 | the difference between mean fields for summer versus winter:
216 | 
217 | .. ipython:: python
218 |     :verbatim:
219 | 
220 |     ds_seas = ds.groupby("time.season").mean('time')
221 |     diff = ds_seas.sel(season='DJF') - ds_seas.sel(season='JJA')
222 | 
223 | .. parsed-literal::
224 | 
225 |     <xarray.Dataset>
226 |     Dimensions:      (lat: 91, lev: 47, lon: 144, nv: 2)
227 |     Coordinates:
228 |       * lev          (lev) float64 0.9925 0.9775 0.9624 0.9473 0.9322 0.9171 ...
229 |       * lon          (lon) float64 -180.0 -177.5 -175.0 -172.5 -170.0 -167.5 ...
230 |       * lat          (lat) float64 -89.5 -88.0 -86.0 -84.0 -82.0 -80.0 -78.0 ...
231 |       * nv           (nv) int64 0 1
232 |     Data variables:
233 |         ANTHSRCE_O3  (lon, lat) float32 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ...
234 |         IJ_AVG_S_O3  (lon, lat, lev) float32 -23.1014 -23.2715 -23.4614 -23.5216 ...
235 | 
236 | Plotting a portion of this dataset on a cartopy_ map is straightforward. First,
237 | we create a figure and add an axes with the map projection information
238 | encoded:
239 | 
240 | .. ipython:: python
241 |     :verbatim:
242 | 
243 |     import matplotlib.pyplot as plt
244 |     import cartopy.crs as ccrs
245 | 
246 |     fig = plt.figure()
247 |     ax = fig.add_subplot(111, projection=ccrs.PlateCarree(), aspect='auto')
248 | 
249 | Then, we can plot our data as normal. cartopy_ has a few helper functions which
250 | we can use to add basic geographic elements such as coastlines and borders to
251 | the plot.
252 | 
253 | .. ipython:: python
254 |     :verbatim:
255 | 
256 |     import cartopy.feature as cfeature
257 | 
258 |     # Select some data to plot
259 |     da = diff.isel(lev=0).IJ_AVG_S_O3
260 | 
261 |     im = ax.contourf(da.lon.values, da.lat.values, da.values.T)
262 |     cb = fig.colorbar(im, ax=ax, orientation='horizontal')
263 |     ax.add_feature(cfeature.COASTLINE)
264 |     ax.add_feature(cfeature.BORDERS)
265 | 
266 | .. figure:: example_plots/cartopy_example.png
267 |     :scale: 100%
268 |     :alt: cartopy plot
269 | 
270 |     Example of a simple plot with cartopy_
271 | 
272 | Alternatively, we can use `xarray's matplotlib wrappers <http://xarray.pydata.org/en/stable/plotting.html>`_
273 | to automate some of this plotting for us. For instance, we can quickly make
274 | a faceted plot of our seasonal data (including with a cartopy_ axis) with
275 | just a few lines of code:
276 | 
277 | .. ipython:: python
278 |     :verbatim:
279 | 
280 |     # Select some data to plot
281 |     da = ds_seas.isel(lev=0).IJ_AVG_S_O3
282 |     da = da - ds.isel(lev=0).IJ_AVG_S_O3.mean('time')
283 | 
284 |     g = da.plot.imshow('lon', 'lat', col='season', col_wrap=2,
285 |                        subplot_kws=dict(projection=ccrs.Robinson()), transform=ccrs.PlateCarree())
286 |     for ax in g.axes.flatten():
287 |         ax.add_feature(cfeature.COASTLINE)
288 | 
289 | .. figure:: example_plots/cartopy_seasonal_facet.png
290 |     :scale: 100%
291 |     :alt: cartopy plot
292 | 
293 |     Faceting over a non-coordinate dimension using xarray's built-in plotting
294 |     tools.
295 | 
296 | There's a lot going on in this code sample:
297 | 
298 | 1. First, we take the seasonal mean data we formerly computed.
299 | 2. Subtract out the annual mean from each seasonal mean.
300 | 3. Use `imshow <https://matplotlib.org/devdocs/api/pyplot_api.html#matplotlib.pyplot.imshow>`_
301 |    to plot each grid cell in our dataset.
302 | 
303 |    - We tell the plotting function to use ``"lon"`` and ```"lat"`` as the keys
304 |      to access the x/y data for the dataset
305 |    - We further instruct xarray to facet over the ```"season"`` coordinate, and
306 |      include two columns per row in the resulting facet grid
307 |    - We pass a dictionary of keyword arguments to ``subplot_kws``, which is used
308 |      when creating each subplot in our facet grid. In this case, we tell each
309 |      subplot to use a Robinson map projection
310 |    - We pass a final keyword argument, ``transform``, which is passed to each
311 |      invocation of ``imshow()`` on the facet grid; this tells cartopy_ how to
312 |      map from the projection data to our actual data. Here, a ``ccrs.PlateCarree()``
313 |      is a standard, equally-spaced latitude-longitude grid
314 | 4. Iterate over each axis in the facet grid, and add our coastlines to it.
315 | 
316 | .. _cartopy: http://scitools.org.uk/cartopy/docs/v0.13/index.html
317 | 
318 | 
319 | Timeseries Analysis
320 | -------------------
321 | 
322 | Another application that **xbpch**/xarray makes easy is timeseries analysis.
323 | For example, consider the timesries of ND49 output from the :ref:`quick start`.
324 | A classic timeseries analysis atmospheric chemistry is computing the daily
325 | maximum 8-hour average for a given tracer. The core of this computation can be
326 | achieved in just a few lines of code via xarray:
327 | 
328 | .. ipython:: python
329 |     :verbatim:
330 | 
331 |     o3 = ds.IJ_AVG_S_O3
332 |     mda8_o3 = (
333 |         o3.rolling(time=8, min_periods=6).mean()
334 |           .resample("D", "time", how='max')
335 |     )
336 | 
337 | This code is highly performant; the ``.rolling()`` operation is farmed out to
338 | a high-performance C library (`bottleneck <https://pypi.python.org/pypi/Bottleneck>`_)
339 | and all operations are applied by broadcasting over the time dimension.
340 | 
341 | .. note::
342 | 
343 |     bottleneck does not work with dask arrays, so you will need to eagerly
344 |     ``.load()`` the data into memory if it hasn't already been done. Future
345 |     versions of xarray will wrap functionality in dask to perform these
346 |     operations in parallel, but this is a work in progress.
347 | 
348 | 
349 | Save to NetCDF
350 | --------------
351 | 
352 | Without any extra work, datsets read in via **xbpch** can easily be serialized
353 | back to disk in NetCDF format
354 | 
355 | .. ipython:: python
356 |     :verbatim:
357 | 
358 |     ds.to_netcdf("my_bpch_data.nc")
359 | 
360 | They can then be read back in via xarray
361 | 
362 | .. ipython:: python
363 |     :verbatim:
364 | 
365 |     import xarray as xr
366 |     ds = xr.open_dataset("my_bpch_data.nc")
367 | 
368 | .. note::
369 | 
370 |    As of v0.2.0, immediately writing to netcdf may not work due to the way variable
371 |    units and scaling factors are encoded when they are read into **xbpch**. This
372 |    will be fixed once some upstream issues with xarray are patched. If you run into
373 |    the following ``ValueError``::
374 | 
375 |      ValueError: Failed hard to prevent overwriting key 'scale_factor'
376 | 
377 |    then before you save it, process it with the :meth:`xbpch.common.fix_attr_encoding()`
378 |    method
379 | 
380 |    .. ipython:: python
381 |      :verbatim:
382 | 
383 |      my_ds = xbpch.common.fix_attr_encoding(my_ds)
384 | 
385 |      my_ds.to_netcdf("my_data.nc")
386 | 


--------------------------------------------------------------------------------
/xbpch/core.py:
--------------------------------------------------------------------------------
  1 | """
  2 | API for reading BPCH files via xarray
  3 | 
  4 | """
  5 | from __future__ import print_function, division
  6 | 
  7 | from glob import glob
  8 | import os
  9 | import numpy as np
 10 | import xarray as xr
 11 | import warnings
 12 | 
 13 | import dask.array as da
 14 | 
 15 | from collections import OrderedDict
 16 | 
 17 | from xarray.backends.common import AbstractDataStore
 18 | from xarray.core.utils import Frozen
 19 | 
 20 | from . bpch import BPCHFile
 21 | from . common import get_timestamp
 22 | from . grid import BASE_DIMENSIONS, CTMGrid
 23 | from . util import cf
 24 | from . version import __version__ as ver
 25 | 
 26 | 
 27 | def open_bpchdataset(filename, fields=[], categories=[],
 28 |                      tracerinfo_file='tracerinfo.dat',
 29 |                      diaginfo_file='diaginfo.dat',
 30 |                      endian=">", decode_cf=True,
 31 |                      memmap=True, dask=True, return_store=False):
 32 |     """ Open a GEOS-Chem BPCH file output as an xarray Dataset.
 33 | 
 34 |     Parameters
 35 |     ----------
 36 |     filename : string
 37 |         Path to the output file to read in.
 38 |     {tracerinfo,diaginfo}_file : string, optional
 39 |         Path to the metadata "info" .dat files which are used to decipher
 40 |         the metadata corresponding to each variable in the output dataset.
 41 |         If not provided, will look for them in the current directory or
 42 |         fall back on a generic set.
 43 |     fields : list, optional
 44 |         List of a subset of variable names to return. This can substantially
 45 |         improve read performance. Note that the field here is just the tracer
 46 |         name - not the category, e.g. 'O3' instead of 'IJ-AVG-$_O3'.
 47 |     categories : list, optional
 48 |         List a subset of variable categories to look through. This can
 49 |         substantially improve read performance.
 50 |     endian : {'=', '>', '<'}, optional
 51 |         Endianness of file on disk. By default, "big endian" (">") is assumed.
 52 |     decode_cf : bool
 53 |         Enforce CF conventions for variable names, units, and other metadata
 54 |     default_dtype : numpy.dtype, optional
 55 |         Default datatype for variables encoded in file on disk (single-precision
 56 |         float by default).
 57 |     memmap : bool
 58 |         Flag indicating that data should be memory-mapped from disk instead of
 59 |         eagerly loaded into memory
 60 |     dask : bool
 61 |         Flag indicating that data reading should be deferred (delayed) to
 62 |         construct a task-graph for later execution
 63 |     return_store : bool
 64 |         Also return the underlying DataStore to the user
 65 | 
 66 |     Returns
 67 |     -------
 68 |     ds : xarray.Dataset
 69 |         Dataset containing the requested fields (or the entire file), with data
 70 |         contained in proxy containers for access later.
 71 |     store : xarray.AbstractDataStore
 72 |         Underlying DataStore which handles the loading and processing of
 73 |         bpch files on disk
 74 | 
 75 |     """
 76 | 
 77 |     store = BPCHDataStore(
 78 |         filename, fields=fields, categories=categories,
 79 |         tracerinfo_file=tracerinfo_file,
 80 |         diaginfo_file=diaginfo_file, endian=endian,
 81 |         use_mmap=memmap, dask_delayed=dask
 82 |     )
 83 |     ds = xr.Dataset.load_store(store)
 84 | 
 85 |     # Handle CF corrections
 86 |     if decode_cf:
 87 |         decoded_vars = OrderedDict()
 88 |         rename_dict = {}
 89 |         for v in ds.variables:
 90 |             cf_name = cf.get_valid_varname(v)
 91 |             rename_dict[v] = cf_name
 92 |             new_var = cf.enforce_cf_variable(ds[v])
 93 |             decoded_vars[cf_name] = new_var
 94 |         ds = xr.Dataset(decoded_vars, attrs=ds.attrs.copy())
 95 | 
 96 |         # ds.rename(rename_dict, inplace=True)
 97 | 
 98 |         # TODO: There's a bug with xr.decode_cf which eagerly loads data.
 99 |         #       Re-enable this once that bug is fixed
100 |         # Note that we do not need to decode the times because we explicitly
101 |         # kept track of them as we parsed the data.
102 |         # ds = xr.decode_cf(ds, decode_times=False)
103 | 
104 |     # Set attributes for CF conventions
105 |     ts = get_timestamp()
106 |     ds.attrs.update(dict(
107 |         Conventions='CF1.6',
108 |         source=filename,
109 |         tracerinfo=tracerinfo_file,
110 |         diaginfo=diaginfo_file,
111 |         filetype=store._bpch.filetype,
112 |         filetitle=store._bpch.filetitle,
113 |         history=(
114 |             "{}: Processed/loaded by xbpch-{} from {}"
115 |             .format(ts, ver, filename)
116 |         ),
117 |     ))
118 | 
119 |     # # Record what the file object underlying the store which we culled this
120 |     # # Dataset from is so that we can clean it up later
121 |     # ds._file_obj = store._bpch
122 |     try:
123 |         # xarray 0.17 +
124 |         ds.set_close(store.close)
125 |     except AttributeError:
126 |         ds._file_obj = store._bpch
127 | 
128 |     # To immediately load the data from the BPCHDataProxy paylods, need
129 |     # to execute ds.data_vars for some reason...
130 |     if return_store:
131 |         return ds, store
132 |     else:
133 |         return ds
134 | 
135 | 
136 | def open_mfbpchdataset(paths, concat_dim='time', compat='no_conflicts',
137 |                        preprocess=None, lock=None, **kwargs):
138 |     """ Open multiple bpch files as a single dataset.
139 | 
140 |     You must have dask installed for this to work, as this greatly
141 |     simplifies issues relating to multi-file I/O.
142 | 
143 |     Also, please note that this is not a very performant routine. I/O is still
144 |     limited by the fact that we need to manually scan/read through each bpch
145 |     file so that we can figure out what its contents are, since that metadata
146 |     isn't saved anywhere. So this routine will actually sequentially load
147 |     Datasets for each bpch file, then concatenate them along the "time" axis.
148 |     You may wish to simply process each file individually, coerce to NetCDF,
149 |     and then ingest through xarray as normal.
150 | 
151 |     Parameters
152 |     ----------
153 |     paths : list of strs
154 |         Filenames to load; order doesn't matter as they will be
155 |         lexicographically sorted before we read in the data
156 |     concat_dim : str, default='time'
157 |         Dimension to concatenate Datasets over. We default to "time" since this
158 |         is how GEOS-Chem splits output files
159 |     compat : str (optional)
160 |         String indicating how to compare variables of the same name for
161 |         potential conflicts when merging:
162 | 
163 |         - 'broadcast_equals': all values must be equal when variables are
164 |           broadcast against each other to ensure common dimensions.
165 |         - 'equals': all values and dimensions must be the same.
166 |         - 'identical': all values, dimensions and attributes must be the
167 |           same.
168 |         - 'no_conflicts': only values which are not null in both datasets
169 |           must be equal. The returned dataset then contains the combination
170 |           of all non-null values.
171 |     preprocess : callable (optional)
172 |         A pre-processing function to apply to each Dataset prior to
173 |         concatenation
174 |     lock : False, True, or threading.Lock (optional)
175 |         Passed to :py:func:`dask.array.from_array`. By default, xarray
176 |         employs a per-variable lock when reading data from NetCDF files,
177 |         but this model has not yet been extended or implemented for bpch files
178 |         and so this is not actually used. However, it is likely necessary
179 |         before dask's multi-threaded backend can be used
180 |     **kwargs : optional
181 |         Additional arguments to pass to :py:func:`xbpch.open_bpchdataset`.
182 |     
183 |     """
184 |     try:
185 |         from xarray.backends.api import _MultiFileCloser
186 |     except ImportError:
187 |         pass
188 | 
189 |     # TODO: Include file locks?
190 | 
191 |     # Check for dask
192 |     dask = kwargs.pop('dask', False)
193 |     if not dask:
194 |         raise ValueError("Reading multiple files without dask is not supported")
195 |     kwargs['dask'] = True
196 | 
197 |     # Add th
198 | 
199 |     if isinstance(paths, str):
200 |         paths = sorted(glob(paths))
201 |     if not paths:
202 |         raise IOError("No paths to files were passed into open_mfbpchdataset")
203 | 
204 |     datasets = [open_bpchdataset(filename, **kwargs)
205 |                 for filename in paths]
206 | 
207 |     if preprocess is not None:
208 |         datasets = [preprocess(ds) for ds in datasets]
209 | 
210 |     # Concatenate over time
211 |     combined = xr.combine_nested(datasets, compat=compat, concat_dim=concat_dim)
212 | 
213 |     try:
214 |         # xarray 0.17 +
215 |         combined.set_close(lambda : [ds.close() for ds in datasets])
216 |     except AttributeError:
217 |         combined._file_obj = _MultiFileCloser([ds._file_obj for ds in datasets])
218 | 
219 |     combined.attrs = datasets[0].attrs
220 |     ts = get_timestamp()
221 |     fns_str = " ".join(paths)
222 |     combined.attrs['history'] = (
223 |         "{}: Processed/loaded by xbpch-{} from {}"
224 |         .format(ts, ver, fns_str)
225 |     )
226 | 
227 |     return combined
228 | 
229 | 
230 | class BPCHDataStore(AbstractDataStore):
231 |     """ Store for reading data from binary punch files.
232 | 
233 |     Note that this is intended as a backend only; to open and read a given
234 |     bpch file, use :meth:`open_bpchdataset`.
235 | 
236 |     Examples of other extensions using the core DataStore API can be found at:
237 | 
238 |     - https://github.com/pydata/xarray/blob/master/xarray/conventions.py
239 |     - https://github.com/xgcm/xmitgcm/blob/master/xmitgcm/mds_store.py
240 | 
241 |     """
242 | 
243 |     def __init__(self, filename, fields=[], categories=[], fix_cf=False,
244 |                  mode='r', endian='>',
245 |                  diaginfo_file='', tracerinfo_file='',
246 |                  use_mmap=False, dask_delayed=False):
247 | 
248 |         # Track the metadata accompanying this dataset.
249 |         dir_path = os.path.abspath(os.path.dirname(filename))
250 |         if not dir_path:
251 |             dir_path = os.getcwd()
252 |         if not tracerinfo_file:
253 |             tracerinfo_file = os.path.join(dir_path, 'tracerinfo.dat')
254 |             if not os.path.exists(tracerinfo_file):
255 |                 tracerinfo_file = ''
256 |         self.tracerinfo_file = tracerinfo_file
257 |         if not diaginfo_file:
258 |             diaginfo_file = os.path.join(dir_path, 'diaginfo.dat')
259 |             if not os.path.exists(diaginfo_file):
260 |                 diaginfo_file = ''
261 |         self.diaginfo_file = diaginfo_file
262 | 
263 |         self.filename = filename
264 |         self.fsize = os.path.getsize(self.filename)
265 |         self.mode = mode
266 |         if not mode.startswith('r'):
267 |             raise ValueError("Currently only know how to 'r(b)'ead bpch files.")
268 | 
269 |         # Check endianness flag
270 |         if endian not in ['>', '<', '=']:
271 |             raise ValueError("Invalid byte order (endian={})".format(endian))
272 |         self.endian = endian
273 | 
274 |         # Open the raw output file, but don't yet read all the data
275 |         self._mmap = use_mmap
276 |         self._dask = dask_delayed
277 |         self._bpch = BPCHFile(self.filename, self.mode, self.endian,
278 |                               tracerinfo_file=tracerinfo_file,
279 |                               diaginfo_file=diaginfo_file,
280 |                               eager=False, use_mmap=self._mmap,
281 |                               dask_delayed=self._dask)
282 |         self.fields = fields
283 |         self.categories = categories
284 | 
285 |         # Peek into the raw output file and read the header and metadata
286 |         # so that we can get a head start at building the output grid
287 |         self._bpch._read_metadata()
288 |         self._bpch._read_header()
289 | 
290 |         # Parse the binary file and prepare to add variables to the DataStore
291 |         self._bpch._read_var_data()
292 | 
293 |         # Create storage dicts for variables and attributes, to be used later
294 |         # when xarray needs to access the data
295 |         self._variables = OrderedDict()
296 |         self._attributes = OrderedDict()
297 |         self._attributes.update(self._bpch._attributes)
298 |         self._dimensions = [d for d in BASE_DIMENSIONS]
299 | 
300 |         # Begin constructing the coordinate dimensions shared by the
301 |         # output dataset variables
302 |         dim_coords = {}
303 |         self.ctm_info = CTMGrid.from_model(
304 |             self._attributes['modelname'], resolution=self._attributes['res']
305 |         )
306 | 
307 |         # Add vertical dimensions
308 |         self._dimensions.append(
309 |             dict(dims=['lev', ], attrs={'axis': 'Z'})
310 |         )
311 |         self._dimensions.append(
312 |             dict(dims=['lev_trop', ], attrs={'axis': 'Z'})
313 |         )
314 |         self._dimensions.append(
315 |             dict(dims=['lev_edge', ], attrs={'axis': 'Z'})
316 |         )
317 |         eta_centers = self.ctm_info.eta_centers
318 |         sigma_centers = self.ctm_info.sigma_centers
319 | 
320 |         # Add time dimensions
321 |         self._dimensions.append(
322 |             dict(dims=['time', ], attrs={'axis': 'T', 'long_name': 'time',
323 |                                          'standard_name': 'time'})
324 |         )
325 | 
326 |         # Add lat/lon dimensions
327 |         self._dimensions.append(
328 |             dict(dims=['lon', ], attrs={
329 |                 'axis': 'X', 'long_name': 'longitude coordinate',
330 |                 'standard_name': 'longitude'
331 |             })
332 |         )
333 |         self._dimensions.append(
334 |             dict(dims=['lat', ], attrs={
335 |                 'axis': 'y', 'long_name': 'latitude coordinate',
336 |                 'standard_name': 'latitude'
337 |             })
338 |         )
339 | 
340 |         if eta_centers is not None:
341 |             lev_vals = eta_centers
342 |             lev_attrs = {
343 |                 'standard_name': 'atmosphere_hybrid_sigma_pressure_coordinate',
344 |                 'axis': 'Z'
345 |             }
346 |         else:
347 |             lev_vals = sigma_centers
348 |             lev_attrs = {
349 |                 'standard_name': 'atmosphere_hybrid_sigma_pressure_coordinate',
350 |                 'axis': 'Z'
351 |             }
352 |         self._variables['lev'] = xr.Variable(['lev', ], lev_vals, lev_attrs)
353 | 
354 |         ## Latitude / Longitude
355 |         # TODO: Add lon/lat bounds
356 | 
357 |         # Detect if we're on a nested grid; in that case, we'll have a displaced
358 |         # origin set in the variable attributes we previously read
359 |         ref_key = list(self._bpch.var_attrs.keys())[0]
360 |         ref_attrs = self._bpch.var_attrs[ref_key]
361 |         self.is_nested = (ref_attrs['origin'] != (1, 1, 1))
362 | 
363 |         lon_centers = self.ctm_info.lon_centers
364 |         lat_centers = self.ctm_info.lat_centers
365 | 
366 |         if self.is_nested:
367 |             ix, iy, _ = ref_attrs['origin']
368 |             nx, ny, *_ = ref_attrs['original_shape']
369 |             # Correct i{x,y} for IDL->Python indexing (1-indexed -> 0-indexed)
370 |             ix -= 1
371 |             iy -= 1
372 |             lon_centers = lon_centers[ix:ix+nx]
373 |             lat_centers = lat_centers[iy:iy+ny]
374 | 
375 |         self._variables['lon'] = xr.Variable(
376 |             ['lon'], lon_centers,
377 |             {'long_name': 'longitude', 'units': 'degrees_east'}
378 |         )
379 |         self._variables['lat'] = xr.Variable(
380 |             ['lat'], lat_centers,
381 |             {'long_name': 'latitude', 'units': 'degrees_north'}
382 |         )
383 |         # TODO: Fix longitudes if ctm_grid.center180
384 | 
385 |         # Add variables from the parsed BPCH file to our DataStore
386 |         for vname in list(self._bpch.var_data.keys()):
387 | 
388 |             var_data = self._bpch.var_data[vname]
389 |             var_attr = self._bpch.var_attrs[vname]
390 | 
391 |             if fields and (var_attr['name'] not in fields):
392 |                 continue
393 |             if categories and (var_attr['category'] not in categories):
394 |                 continue
395 | 
396 |             # Process dimensions
397 |             dims = ['time', 'lon', 'lat', ]
398 |             dshape = var_attr['original_shape']
399 |             if len(dshape) == 3:
400 |                 # Process the vertical coordinate. A few things can happen here:
401 |                 # 1) We have cell-centered values on the "Nlayer" grid; we can take these variables and map them to 'lev'
402 |                 # 2) We have edge value on an "Nlayer" + 1 grid; we can take these and use them with 'lev_edge'
403 |                 # 3) We have troposphere values on "Ntrop"; we can take these and use them with 'lev_trop', but we won't have coordinate information yet
404 |                 # All other cases we do not handle yet; this includes the aircraft emissions and a few other things. Note that tracer sources do not have a vertical coord to worry about!
405 |                 nlev = dshape[-1]
406 |                 grid_nlev = self.ctm_info.Nlayers
407 |                 grid_ntrop = self.ctm_info.Ntrop
408 |                 try:
409 |                     if nlev == grid_nlev:
410 |                         dims.append('lev')
411 |                     elif nlev == grid_nlev + 1:
412 |                         dims.append('lev_edge')
413 |                     elif nlev == grid_ntrop:
414 |                         dims.append('lev_trop')
415 |                     else:
416 |                         continue
417 |                 except AttributeError:
418 |                     warnings.warn("Couldn't resolve grid_spec vertical layout")
419 |                     continue
420 | 
421 |             # xarray Variables are thin wrappers for numpy.ndarrays, or really
422 |             # any object that extends the ndarray interface. A critical part of
423 |             # the original ndarray interface is that the underlying data has to
424 |             # be contiguous in memory. We can enforce this to happen by
425 |             # concatenating each bundle in the variable data bundles we read
426 |             # from the bpch file
427 |             data = self._concat([v.data for v in var_data])
428 | 
429 |             # Is the variable time-invariant? If it is, kill the time dim.
430 |             # Here, we mean it only as one sample in the dataset.
431 |             if data.shape[0] == 1:
432 |                 dims = dims[1:]
433 |                 data = data.squeeze()
434 | 
435 |             # Create a variable containing this data
436 |             var = xr.Variable(dims, data, var_attr)
437 | 
438 |             # Shuffle dims for CF/COARDS compliance if requested
439 |             # TODO: For this to work, we have to force a load of the data.
440 |             #       Is there a way to re-write BPCHDataProxy so that that's not
441 |             #       necessary?
442 |             #       Actually, we can't even force a load becase var.data is a
443 |             #       numpy.ndarray. Weird.
444 |             # if fix_dims:
445 |             #     target_dims = [d for d in DIM_ORDER_PRIORITY if d in dims]
446 |             #     var = var.transpose(*target_dims)
447 | 
448 |             self._variables[vname] = var
449 | 
450 |             # Try to add a time dimension
451 |             # TODO: Time units?
452 |             if (len(var_data) > 1) and 'time' not in self._variables:
453 |                 time_bnds = np.asarray([v.time for v in var_data])
454 |                 times = time_bnds[:, 0]
455 | 
456 |                 self._variables['time'] = xr.Variable(
457 |                     ['time', ], times,
458 |                     {'bounds': 'time_bnds', 'units': cf.CTM_TIME_UNIT_STR}
459 |                 )
460 |                 self._variables['time_bnds'] = xr.Variable(
461 |                     ['time', 'nv'], time_bnds,
462 |                     {'units': cf.CTM_TIME_UNIT_STR}
463 |                 )
464 |                 self._variables['nv'] = xr.Variable(['nv', ], [0, 1])
465 | 
466 |         # Create the dimension variables; we have a lot of options
467 |         # here with regards to the vertical coordinate. For now,
468 |         # we'll just use the sigma or eta coordinates.
469 |         # Useful CF info: http://cfconventions.org/cf-conventions/v1.6.0/cf-conventions.html#_atmosphere_hybrid_sigma_pressure_coordinate
470 |         # self._variables['Ap'] =
471 |         # self._variables['Bp'] =
472 |         # self._variables['altitude'] =
473 | 
474 |         # Time dimensions
475 |         # self._times = self.ds.times
476 |         # self._time_bnds = self.ds.time_bnds
477 | 
478 | 
479 |     def _concat(self, *args, **kwargs):
480 |         if self._dask:
481 |             return da.concatenate(*args, **kwargs)
482 |         else:
483 |             return np.concatenate(*args, **kwargs)
484 | 
485 |     def get_variables(self):
486 |         return self._variables
487 | 
488 |     def get_attrs(self):
489 |         return Frozen(self._attributes)
490 | 
491 |     def get_dimensions(self):
492 |         return Frozen(self._dimensions)
493 | 
494 |     def close(self):
495 |         self._bpch.close()
496 |         for var in list(self._variables):
497 |             del self._variables[var]
498 | 
499 |     def __exit__(self, type, value, traceback):
500 |         self.close()
501 | 


--------------------------------------------------------------------------------
/xbpch/util/gridspec.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Specification of various grid models used by GEOS-Chem (GEOS, MERRA, etc...).
  3 | 
  4 | `MODELS` defines the default grid set-up (dict) for several models:
  5 |     - Model names (keys) should be uppercase.
  6 |     - A model can inherit grid specifications from another model, using the
  7 |       key 'reference' (useful for model groups, similar models or
  8 |       multiple model names).
  9 |     - A model consists a family of models if the value
 10 |       of 'reference' is set to None (e.g., 'GEOS').
 11 |     - When a key is redefined (i.e., present in both a model and its inherited
 12 |       model), it overrides the specification of the inherited model.
 13 | 
 14 | It is more appropriate to get model names and grid specifications using
 15 | :func:`_get_supported_models` and :func:`_get_model_info`.
 16 | 
 17 | `CSIG_`, `ESIG_`, `Ap_` and `Bp_` can be used to compute the vertical grid
 18 | levels for the specified models.
 19 | 
 20 | This implementation is based on Benoit Bovy's PyGChem code, and uses his
 21 | hand-curated grid level definitions as a starting point. See PyGChem/license.txt
 22 | for more details on the provenance of this code.
 23 | 
 24 | """
 25 | 
 26 | import re
 27 | import itertools
 28 | 
 29 | import numpy as np
 30 | 
 31 | # pre-defined sigma coordinates
 32 | CSIG_GEOS1 = np.array([
 33 |     0.993936, 0.971301, 0.929925, 0.874137, 0.807833,
 34 |     0.734480, 0.657114, 0.578390, 0.500500, 0.424750,
 35 |     0.352000, 0.283750, 0.222750, 0.172150, 0.132200,
 36 |     0.100050, 0.073000, 0.049750, 0.029000, 0.009500
 37 | ])
 38 | 
 39 | ESIG_GEOS1 = np.array([
 40 |     1.000000, 0.987871, 0.954730, 0.905120, 0.843153,
 41 |     0.772512, 0.696448, 0.617779, 0.539000, 0.462000,
 42 |     0.387500, 0.316500, 0.251000, 0.194500, 0.149800,
 43 |     0.114600, 0.085500, 0.060500, 0.039000, 0.019000,
 44 |     0.000000
 45 | ])
 46 | 
 47 | CSIG_GEOS_STRAT = np.array([
 48 |     0.993935, 0.971300, 0.929925, 0.875060, 0.812500,
 49 |     0.745000, 0.674500, 0.604500, 0.536500, 0.471500,
 50 |     0.410000, 0.352500, 0.301500, 0.257977, 0.220273,
 51 |     0.187044, 0.157881, 0.132807, 0.111722, 0.094035,
 52 |     0.079233, 0.066873, 0.056574, 0.044794, 0.028825,
 53 |     0.009979
 54 | ])
 55 | 
 56 | ESIG_GEOS_STRAT = np.array([
 57 |     1.000000, 0.987871, 0.954730, 0.905120, 0.845000,
 58 |     0.780000, 0.710000, 0.639000, 0.570000, 0.503000,
 59 |     0.440000, 0.380000, 0.325000, 0.278000, 0.237954,
 60 |     0.202593, 0.171495, 0.144267, 0.121347, 0.102098,
 61 |     0.085972, 0.072493, 0.061252, 0.051896, 0.037692,
 62 |     0.019958, 0.000000
 63 | ])
 64 | 
 65 | CSIG_GEOS_STRAT_46L = np.array([
 66 |     0.993935, 0.971300, 0.929925, 0.875060, 0.812500,
 67 |     0.745000, 0.674500, 0.604500, 0.536500, 0.471500,
 68 |     0.410000, 0.352500, 0.301500, 0.257977, 0.220273,
 69 |     0.187044, 0.157881, 0.132807, 0.111722, 0.094035,
 70 |     0.079233, 0.066873, 0.056574, 0.048012, 0.040910,
 71 |     0.034927, 0.029792, 0.025395, 0.021663, 0.018439,
 72 |     0.015571, 0.013036, 0.010808, 0.008864, 0.007181,
 73 |     0.005737, 0.004510, 0.003480, 0.002625, 0.001928,
 74 |     0.001369, 0.000929, 0.000593, 0.000344, 0.000167,
 75 |     0.000047
 76 | ])
 77 | 
 78 | ESIG_GEOS_STRAT_46L = np.array([
 79 |     1.000000, 0.987871, 0.954730, 0.905120, 0.845000,
 80 |     0.780000, 0.710000, 0.639000, 0.570000, 0.503000,
 81 |     0.440000, 0.380000, 0.325000, 0.278000, 0.237954,
 82 |     0.202593, 0.171495, 0.144267, 0.121347, 0.102098,
 83 |     0.085972, 0.072493, 0.061252, 0.051896, 0.044128,
 84 |     0.037692, 0.032162, 0.027422, 0.023367, 0.019958,
 85 |     0.016919, 0.014223, 0.011848, 0.009767, 0.007960,
 86 |     0.006402, 0.005072, 0.003948, 0.003011, 0.002240,
 87 |     0.001616, 0.001121, 0.000737, 0.000449, 0.000239,
 88 |     0.000094, 0.000000
 89 | ])
 90 | 
 91 | CSIG_GEOS2 = np.array([
 92 |     9.985475e-01, 9.942475e-01, 9.871500e-01, 9.772000e-01,
 93 |     9.642500e-01, 9.481150e-01, 9.285650e-01, 9.053219e-01,
 94 |     8.781569e-01, 8.469350e-01, 8.116350e-01, 7.724569e-01,
 95 |     7.299198e-01, 6.847475e-01, 6.377244e-01, 5.896341e-01,
 96 |     5.412270e-01, 4.932176e-01, 4.462150e-01, 4.007400e-01,
 97 |     3.572600e-01, 3.161750e-01, 2.779150e-01, 2.429000e-01,
 98 |     2.114000e-01, 1.834250e-01, 1.587150e-01, 1.369425e-01,
 99 |     1.178165e-01, 1.010651e-01, 8.644427e-02, 7.372377e-02,
100 |     6.269240e-02, 5.314686e-02, 4.489815e-02, 3.779315e-02,
101 |     3.171021e-02, 2.329529e-02, 1.512403e-02, 9.817761e-03,
102 |     6.371968e-03, 4.134332e-03, 2.681253e-03, 1.737650e-03,
103 |     1.124892e-03, 7.269780e-04, 6.706442e-05
104 | ])
105 | 
106 | ESIG_GEOS2 = np.array([
107 |     1.000000e+00, 9.970951e-01, 9.914000e-01, 9.829000e-01,
108 |     9.715000e-01, 9.570000e-01, 9.392300e-01, 9.179000e-01,
109 |     8.927438e-01, 8.635700e-01, 8.303000e-01, 7.929700e-01,
110 |     7.519437e-01, 7.078959e-01, 6.615992e-01, 6.138495e-01,
111 |     5.654188e-01, 5.170351e-01, 4.694000e-01, 4.230300e-01,
112 |     3.784500e-01, 3.360700e-01, 2.962800e-01, 2.595500e-01,
113 |     2.262500e-01, 1.965500e-01, 1.703000e-01, 1.471300e-01,
114 |     1.267550e-01, 1.088781e-01, 9.325208e-02, 7.963646e-02,
115 |     6.781108e-02, 5.757372e-02, 4.872000e-02, 4.107631e-02,
116 |     3.451000e-02, 2.891042e-02, 1.877039e-02, 1.218564e-02,
117 |     7.909625e-03, 5.132859e-03, 3.329678e-03, 2.158725e-03,
118 |     1.398330e-03, 9.045439e-04, 5.838880e-04, 0.000000e+00
119 | ])
120 | 
121 | CSIG_GEOS2_70L = np.array([
122 |     0.998548, 0.994248, 0.987150, 0.977200, 0.964250,
123 |     0.948115, 0.928565, 0.905322, 0.878157, 0.846935,
124 |     0.811635, 0.772457, 0.729920, 0.684748, 0.637724,
125 |     0.589634, 0.541227, 0.493218, 0.446215, 0.400740,
126 |     0.357260, 0.316175, 0.277915, 0.242900, 0.211400,
127 |     0.183425, 0.158715, 0.136943, 0.117817, 0.101065,
128 |     0.086444, 0.073724, 0.062692, 0.053147, 0.044898,
129 |     0.037793, 0.031710, 0.026527, 0.022123, 0.018394,
130 |     0.015247, 0.012600, 0.010381, 0.008526, 0.006982,
131 |     0.005699, 0.004638, 0.003763, 0.003043, 0.002453,
132 |     0.001971, 0.001579, 0.001261, 0.001003, 0.000795,
133 |     0.000628, 0.000494, 0.000386, 0.000300, 0.000232,
134 |     0.000179, 0.000136, 0.000103, 0.000077, 0.000057,
135 |     0.000041, 0.000028, 0.000018, 0.000010, 0.000003
136 | ])
137 | 
138 | ESIG_GEOS2_70L = np.array([
139 |     1.000000, 0.997095, 0.991400, 0.982900, 0.971500,
140 |     0.957000, 0.939230, 0.917900, 0.892744, 0.863570,
141 |     0.830300, 0.792970, 0.751944, 0.707896, 0.661599,
142 |     0.613850, 0.565419, 0.517035, 0.469400, 0.423030,
143 |     0.378450, 0.336070, 0.296280, 0.259550, 0.226250,
144 |     0.196550, 0.170300, 0.147130, 0.126755, 0.108878,
145 |     0.093252, 0.079636, 0.067811, 0.057574, 0.048720,
146 |     0.041076, 0.034510, 0.028910, 0.024144, 0.020102,
147 |     0.016686, 0.013808, 0.011392, 0.009370, 0.007683,
148 |     0.006280, 0.005118, 0.004158, 0.003367, 0.002719,
149 |     0.002188, 0.001755, 0.001403, 0.001118, 0.000888,
150 |     0.000702, 0.000553, 0.000434, 0.000338, 0.000262,
151 |     0.000202, 0.000155, 0.000118, 0.000089, 0.000066,
152 |     0.000048, 0.000034, 0.000023, 0.000014, 0.000006,
153 |     0.000000
154 | ])
155 | 
156 | CSIG_GEOS3 = np.array([
157 |     0.998548,    0.994148,    0.986350,    0.974300,
158 |     0.956950,    0.933150,    0.901750,    0.861500,
159 |     0.811000,    0.750600,    0.682900,    0.610850,
160 |     0.537050,    0.463900,    0.393650,    0.328275,
161 |     0.269500,    0.218295,    0.174820,    0.138840,
162 |     0.109790,    0.0866900,   0.0684150,   0.0539800,
163 |     0.0425750,   0.0335700,   0.0264650,   0.0208550,
164 |     0.0164300,   0.0129425,   0.0101900,   0.00800750,
165 |     0.00627000,  0.00489000,  0.00379000,  0.00291500,
166 |     0.00221500,  0.00167000,  0.00125000,  0.000912500,
167 |     0.000652500, 0.000455000, 0.00030750,  0.000200000,
168 |     0.000123500, 6.97500e-05, 3.25900e-05, 8.84000e-06
169 | ])
170 | 
171 | ESIG_GEOS3 = np.array([
172 |     1.000000,    0.997095,    0.991200,    0.981500,
173 |     0.967100,    0.946800,    0.919500,    0.884000,
174 |     0.839000,    0.783000,    0.718200,    0.647600,
175 |     0.574100,    0.500000,    0.427800,    0.359500,
176 |     0.297050,    0.241950,    0.194640,    0.155000,
177 |     0.122680,    0.0969000,   0.0764800,   0.0603500,
178 |     0.0476100,   0.0375400,   0.0296000,   0.0233300,
179 |     0.0183800,   0.0144800,   0.0114050,   0.00897500,
180 |     0.00704000,  0.00550000,  0.00428000,  0.00330000,
181 |     0.00253000,  0.00190000,  0.00144000,  0.00106000,
182 |     0.000765000, 0.000540000, 0.000370000, 0.000245000,
183 |     0.000155000, 9.20000e-05, 4.75000e-05, 1.76800e-05,
184 |     0.00000
185 | ])
186 | 
187 | CSIG_GEOS3_30L = np.array([
188 |     0.998548,    0.994148,    0.986350,    0.974300,
189 |     0.956950,    0.933150,    0.901750,    0.861500,
190 |     0.811000,    0.750600,    0.682900,    0.610850,
191 |     0.537050,    0.463900,    0.393650,    0.328275,
192 |     0.269500,    0.218295,    0.174820,    0.138840,
193 |     0.109790,    0.0866900,   0.0620450,   0.0386050,
194 |     0.0239900,   0.0127100,   0.00478500,  0.00164750,
195 |     0.000460000, 7.75000e-05
196 | ])
197 | 
198 | ESIG_GEOS3_30L = np.array([
199 |     1.000000,    0.997095,    0.991200,    0.981500,
200 |     0.967100,    0.946800,    0.919500,    0.884000,
201 |     0.839000,    0.783000,    0.718200,    0.647600,
202 |     0.574100,    0.500000,    0.427800,    0.359500,
203 |     0.297050,    0.241950,    0.194640,    0.155000,
204 |     0.122680,    0.0969000,   0.0764800,   0.0476100,
205 |     0.0296000,   0.0183800,   0.00704000,  0.00253000,
206 |     0.000765000, 0.000155000, 0.00000
207 | ])
208 | 
209 | # pre-defined parameter values for computing ETA vertical levels:
210 | # A [hPa] ; B [unitless]
211 | Ap_GEOS4 = np.array([
212 |     0.000000,      0.000000,      12.704939,     35.465965,
213 |     66.098427,     101.671654,    138.744400,    173.403183,
214 |     198.737839,    215.417526,    223.884689,    224.362869,
215 |     216.864929,    201.192093,    176.929993,    150.393005,
216 |     127.837006,    108.663429,    92.365662,     78.512299,
217 |     66.603378,     56.387939,     47.643932,     40.175419,
218 |     33.809956,     28.367815,     23.730362,     19.791553,
219 |     16.457071,     13.643393,     11.276889,     9.292943,
220 |     7.619839,      6.216800,      5.046805,      4.076567,
221 |     3.276433,      2.620212,      2.084972,      1.650792,
222 |     1.300508,      1.019442,      0.795134,      0.616779,
223 |     0.475806,      0.365041,      0.278526,      0.211349,
224 |     0.159495,      0.119703,      0.089345,      0.066000,
225 |     0.047585,      0.032700,      0.020000,      0.010000
226 | ])
227 | 
228 | Bp_GEOS4 = np.array([
229 |     1.000000,      0.985110,      0.943290,      0.867830,
230 |     0.764920,      0.642710,      0.510460,      0.378440,
231 |     0.270330,      0.183300,      0.115030,      0.063720,
232 |     0.028010,      0.006960,      0.000000,      0.000000,
233 |     0.000000,      0.000000,      0.000000,      0.000000,
234 |     0.000000,      0.000000,      0.000000,      0.000000,
235 |     0.000000,      0.000000,      0.000000,      0.000000,
236 |     0.000000,      0.000000,      0.000000,      0.000000,
237 |     0.000000,      0.000000,      0.000000,      0.000000,
238 |     0.000000,      0.000000,      0.000000,      0.000000,
239 |     0.000000,      0.000000,      0.000000,      0.000000,
240 |     0.000000,      0.000000,      0.000000,      0.000000,
241 |     0.000000,      0.000000,      0.000000,      0.000000,
242 |     0.000000,      0.000000,      0.000000,      0.000000
243 | ])
244 | 
245 | Ap_GEOS4_REDUCED = np.array([
246 |     0.000000,      0.000000,      12.704939,     35.465965,
247 |     66.098427,     101.671654,    138.744400,    173.403183,
248 |     198.737839,    215.417526,    223.884689,    224.362869,
249 |     216.864929,    201.192093,    176.929993,    150.393005,
250 |     127.837006,    108.663429,    92.365662,     78.512299,
251 |     56.387939,     40.175419,     28.367815,     19.791553,
252 |     9.292943,      4.076567,      1.650792,      0.616779,
253 |     0.211349,      0.066000,      0.010000])
254 | 
255 | Bp_GEOS4_REDUCED = np.array([
256 |     1.000000,      0.985110,      0.943290,      0.867830,
257 |     0.764920,      0.642710,      0.510460,      0.378440,
258 |     0.270330,      0.183300,      0.115030,      0.063720,
259 |     0.028010,      0.006960,      0.000000,      0.000000,
260 |     0.000000,      0.000000,      0.000000,      0.000000,
261 |     0.000000,      0.000000,      0.000000,      0.000000,
262 |     0.000000,      0.000000,      0.000000,      0.000000,
263 |     0.000000,      0.000000,      0.000000
264 | ])
265 | 
266 | Ap_GEOS5 = np.array([
267 |     0.00000000e+00,   4.80482600e-02,   6.59375200e+00,
268 |     1.31348000e+01,   1.96131100e+01,   2.60920100e+01,
269 |     3.25708100e+01,   3.89820100e+01,   4.53390100e+01,
270 |     5.16961100e+01,   5.80532100e+01,   6.43626400e+01,
271 |     7.06219800e+01,   7.88342200e+01,   8.90999200e+01,
272 |     9.93652100e+01,   1.09181700e+02,   1.18958600e+02,
273 |     1.28695900e+02,   1.42910000e+02,   1.56260000e+02,
274 |     1.69609000e+02,   1.81619000e+02,   1.93097000e+02,
275 |     2.03259000e+02,   2.12150000e+02,   2.18776000e+02,
276 |     2.23898000e+02,   2.24363000e+02,   2.16865000e+02,
277 |     2.01192000e+02,   1.76930000e+02,   1.50393000e+02,
278 |     1.27837000e+02,   1.08663000e+02,   9.23657200e+01,
279 |     7.85123100e+01,   6.66034100e+01,   5.63879100e+01,
280 |     4.76439100e+01,   4.01754100e+01,   3.38100100e+01,
281 |     2.83678100e+01,   2.37304100e+01,   1.97916000e+01,
282 |     1.64571000e+01,   1.36434000e+01,   1.12769000e+01,
283 |     9.29294200e+00,   7.61984200e+00,   6.21680100e+00,
284 |     5.04680100e+00,   4.07657100e+00,   3.27643100e+00,
285 |     2.62021100e+00,   2.08497000e+00,   1.65079000e+00,
286 |     1.30051000e+00,   1.01944000e+00,   7.95134100e-01,
287 |     6.16779100e-01,   4.75806100e-01,   3.65041100e-01,
288 |     2.78526100e-01,   2.11349000e-01,   1.59495000e-01,
289 |     1.19703000e-01,   8.93450200e-02,   6.60000100e-02,
290 |     4.75850100e-02,   3.27000000e-02,   2.00000000e-02,
291 |     1.00000000e-02
292 | ])
293 | 
294 | Bp_GEOS5 = np.array([
295 |     1.00000000e+00,   9.84952000e-01,   9.63406000e-01,
296 |     9.41865000e-01,   9.20387000e-01,   8.98908000e-01,
297 |     8.77429000e-01,   8.56018000e-01,   8.34660900e-01,
298 |     8.13303900e-01,   7.91946900e-01,   7.70637500e-01,
299 |     7.49378200e-01,   7.21166000e-01,   6.85899900e-01,
300 |     6.50634900e-01,   6.15818400e-01,   5.81041500e-01,
301 |     5.46304200e-01,   4.94590200e-01,   4.43740200e-01,
302 |     3.92891100e-01,   3.43381100e-01,   2.94403100e-01,
303 |     2.46741100e-01,   2.00350100e-01,   1.56224100e-01,
304 |     1.13602100e-01,   6.37200600e-02,   2.80100400e-02,
305 |     6.96002500e-03,   8.17541300e-09,   0.00000000e+00,
306 |     0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
307 |     0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
308 |     0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
309 |     0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
310 |     0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
311 |     0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
312 |     0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
313 |     0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
314 |     0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
315 |     0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
316 |     0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
317 |     0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
318 |     0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
319 |     0.00000000e+00
320 | ])
321 | 
322 | Ap_GEOS5_REDUCED = np.array([
323 |     0.00000000e+00,   4.80482600e-02,   6.59375200e+00,
324 |     1.31348000e+01,   1.96131100e+01,   2.60920100e+01,
325 |     3.25708100e+01,   3.89820100e+01,   4.53390100e+01,
326 |     5.16961100e+01,   5.80532100e+01,   6.43626400e+01,
327 |     7.06219800e+01,   7.88342200e+01,   8.90999200e+01,
328 |     9.93652100e+01,   1.09181700e+02,   1.18958600e+02,
329 |     1.28695900e+02,   1.42910000e+02,   1.56260000e+02,
330 |     1.69609000e+02,   1.81619000e+02,   1.93097000e+02,
331 |     2.03259000e+02,   2.12150000e+02,   2.18776000e+02,
332 |     2.23898000e+02,   2.24363000e+02,   2.16865000e+02,
333 |     2.01192000e+02,   1.76930000e+02,   1.50393000e+02,
334 |     1.27837000e+02,   1.08663000e+02,   9.23657200e+01,
335 |     7.85123100e+01,   5.63879100e+01,   4.01754100e+01,
336 |     2.83678100e+01,   1.97916000e+01,   9.29294200e+00,
337 |     4.07657100e+00,   1.65079000e+00,   6.16779100e-01,
338 |     2.11349000e-01,   6.60000100e-02,   1.00000000e-02
339 | ])
340 | 
341 | Bp_GEOS5_REDUCED = np.array([
342 |     1.00000000e+00,   9.84952000e-01,   9.63406000e-01,
343 |     9.41865000e-01,   9.20387000e-01,   8.98908000e-01,
344 |     8.77429000e-01,   8.56018000e-01,   8.34660900e-01,
345 |     8.13303900e-01,   7.91946900e-01,   7.70637500e-01,
346 |     7.49378200e-01,   7.21166000e-01,   6.85899900e-01,
347 |     6.50634900e-01,   6.15818400e-01,   5.81041500e-01,
348 |     5.46304200e-01,   4.94590200e-01,   4.43740200e-01,
349 |     3.92891100e-01,   3.43381100e-01,   2.94403100e-01,
350 |     2.46741100e-01,   2.00350100e-01,   1.56224100e-01,
351 |     1.13602100e-01,   6.37200600e-02,   2.80100400e-02,
352 |     6.96002500e-03,   8.17541300e-09,   0.00000000e+00,
353 |     0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
354 |     0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
355 |     0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
356 |     0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
357 |     0.00000000e+00,   0.00000000e+00,   0.00000000e+00
358 | ])
359 | 
360 | 
361 | MODELS = {
362 |     'GEOS': {'reference': None,
363 |              'description': 'GEOS model family',
364 |              'resolution': (5, 4),
365 |              'Ptop': 1e-2,
366 |              'halfpolar': True,
367 |              'center180': True},
368 |     'GENERIC': {'reference': None,
369 |                 'description': 'GENERIC grids',
370 |                 'resolution': (1, 1),
371 |                 'Nlayers': None,
372 |                 'Ntrop': None,
373 |                 'Ptop': 1e-2,
374 |                 'halfpolar': False,
375 |                 'center180': False,
376 |                 'hybrid': False},
377 |     'GEOS1': {'reference': 'GEOS',
378 |               'description': 'GEOS-1 pure sigma',
379 |               'Nlayers': 20,
380 |               'Ntrop': 16,
381 |               'hybrid': False,
382 |               'csig': CSIG_GEOS1,
383 |               'esig': ESIG_GEOS1},
384 |     'GEOS_STRAT': {'reference': 'GEOS',
385 |                    'description': 'GEOS-STRAT pure sigma vertically'
386 |                                   ' regridded',
387 |                    'Nlayers': 26,
388 |                    'Ntrop': 19,
389 |                    'Ptop': 1e-4,
390 |                    'hybrid': False,
391 |                    'csig': CSIG_GEOS_STRAT,
392 |                    'esig': ESIG_GEOS_STRAT},
393 |     'GEOS_STRAT_46L': {'reference': 'GEOS_STRAT',
394 |                        'description': 'GEOS-STRAT pure sigma'
395 |                                       ' original resolution',
396 |                        'Nlayers': 46,
397 |                        'csig': CSIG_GEOS_STRAT_46L,
398 |                        'esig': ESIG_GEOS_STRAT_46L},
399 |     'GEOS2': {'reference': 'GEOS',
400 |               'description': 'GEOS-2 pure sigma',
401 |               'Nlayers': 47,
402 |               'Ntrop': 32,
403 |               'hybrid': False,
404 |               'csig': CSIG_GEOS2,
405 |               'esig': ESIG_GEOS2},
406 |     'GEOS2_70L': {'reference': 'GEOS2',
407 |                   'description': 'GEOS-2 pure sigma'
408 |                                  ' original resolution',
409 |                   'Nlayers': 70,
410 |                   'csig': CSIG_GEOS2_70L,
411 |                   'esig': ESIG_GEOS2_70L},
412 |     'GEOS3': {'reference': 'GEOS',
413 |               'description': 'GEOS-3 pure sigma',
414 |               'Nlayers': 48,
415 |               'Ntrop': 20,
416 |               'hybrid': False,
417 |               'csig': CSIG_GEOS3,
418 |               'esig': ESIG_GEOS3},
419 |     'GEOS3_30L': {'reference': 'GEOS3',
420 |                   'description': 'GEOS-3 pure sigma reduced',
421 |                   'Nlayers': 30,
422 |                   'csig': CSIG_GEOS3_30L,
423 |                   'esig': ESIG_GEOS3_30L},
424 |     'GEOS3_REDUCED': {'reference': 'GEOS3_30L'},
425 |     'GEOS4': {'reference': 'GEOS',
426 |               'description': 'GEOS-4 hybrid',
427 |               'Nlayers': 55,
428 |               'Ntrop': 17,
429 |               'hybrid': True,
430 |               'Ap': Ap_GEOS4,
431 |               'Bp': Bp_GEOS4},
432 |     'FVDAS': {'reference': 'GEOS4'},
433 |     'GEOS4_30L': {'reference': 'GEOS4',
434 |                   'description': 'GEOS-4 hybrid reduced',
435 |                   'Nlayers': 30,
436 |                   'Ap': Ap_GEOS4_REDUCED,
437 |                   'Bp': Bp_GEOS4_REDUCED},
438 |     'GEOS4_REDUCED': {'reference': 'GEOS4_30L'},
439 |     'GEOS5': {'reference': 'GEOS',
440 |               'description': 'GEOS-5.2.0 hybrid',
441 |               'Nlayers': 72,
442 |               'Ntrop': 38,
443 |               'hybrid': True,
444 |               'Ap': Ap_GEOS5,
445 |               'Bp': Bp_GEOS5},
446 |     'GEOS5_NATIVE': {'reference': 'GEOS5'},
447 |     'GEOS5_47L': {'reference': 'GEOS5',
448 |                   'description': 'GEOS-5.2.0 hybrid reduced',
449 |                   'Nlayers': 47,
450 |                   'Ap': Ap_GEOS5_REDUCED,
451 |                   'Bp': Bp_GEOS5_REDUCED},
452 |     'GEOS5_REDUCED': {'reference': 'GEOS5_47L'},
453 |     'GEOS57': {'reference': 'GEOS5',
454 |                'description': 'GEOS-5.7.x hybrid'},
455 |     'GEOS57_NATIVE': {'reference': 'GEOS57'},
456 |     'GEOS57_47L': {'reference': 'GEOS5_47L',
457 |                    'description': 'GEOS-5.7.x hybrid reduced'},
458 |     'GEOS57_REDUCED': {'reference': 'GEOS57_47L'},
459 |     'GEOSFP': {'reference': 'GEOS57',
460 |                'description': 'GEOS-5.11.x (FP) hybrid'},
461 |     'GEOSFP_NATIVE': {'reference': 'GEOSFP'},
462 |     'GEOSFP_47L': {'reference': 'GEOS57_47L',
463 |                    'description': 'GEOS-5.11.x (FP) hybrid reduced'},
464 |     'GEOSFP_REDUCED': {'reference': 'GEOSFP_47L'},
465 |     'MERRA': {'reference': 'GEOS5',
466 |               'description': 'MERRA hybrid'},
467 |     'MERRA_NATIVE': {'reference': 'MERRA'},
468 |     'MERRA_47L': {'reference': 'GEOS5_47L',
469 |                   'description': 'MERRA hybrid reduced'},
470 |     'MERRA_REDUCED': {'reference': 'MERRA_47L'},
471 |     'MERRA2': {'reference': 'GEOSFP',
472 |               'description': 'MERRA2 hybrid'},
473 |     'MERRA2_47L': {'reference': 'GEOS5_47L'},
474 | }
475 | 
476 | 
477 | def prof_altitude(pressure, p_coef=(-0.028389, -0.0493698, 0.485718, 0.278656,
478 |                                     -17.5703, 48.0926)):
479 |     """
480 |     Return altitude for given pressure.
481 | 
482 |     This function evaluates a polynomial at log10(pressure) values.
483 | 
484 |     Parameters
485 |     ----------
486 |     pressure : array-like
487 |         pressure values [hPa].
488 |     p_coef : array-like
489 |         coefficients of the polynomial (default values are for the US
490 |         Standard Atmosphere).
491 | 
492 |     Returns
493 |     -------
494 |     altitude : array-like
495 |         altitude values [km] (same shape than the pressure input array).
496 | 
497 |     See Also
498 |     --------
499 |     prof_pressure : Returns pressure for
500 |         given altitude.
501 |     prof_temperature : Returns air temperature for
502 |         given altitude.
503 | 
504 |     Notes
505 |     -----
506 |     Default coefficient values represent a 5th degree polynomial which had
507 |     been fitted to USSA data from 0-100 km. Accuracy is on the order of 1% for
508 |     0-100 km and 0.5% below 30 km. This function, with default values, may thus
509 |     produce bad results with pressure less than about 3e-4 hPa.
510 | 
511 |     Examples
512 |     --------
513 |     >>> prof_altitude([1000, 800, 600])
514 |     array([ 0.1065092 ,  1.95627858,  4.2060627 ])
515 | 
516 |     """
517 |     pressure = np.asarray(pressure)
518 |     altitude = np.polyval(p_coef, np.log10(pressure.flatten()))
519 |     return altitude.reshape(pressure.shape)
520 | 
521 | 
522 | def prof_pressure(altitude, z_coef=(1.94170e-9, -5.14580e-7, 4.57018e-5,
523 |                                     -1.55620e-3, -4.61994e-2, 2.99955)):
524 |     """
525 |     Return pressure for given altitude.
526 | 
527 |     This function evaluates a polynomial at altitudes values.
528 | 
529 |     Parameters
530 |     ----------
531 |     altitude : array-like
532 |         altitude values [km].
533 |     z_coef : array-like
534 |         coefficients of the polynomial (default values are for the US
535 |         Standard Atmosphere).
536 | 
537 |     Returns
538 |     -------
539 |     pressure : array-like
540 |         pressure values [hPa] (same shape than the altitude input array).
541 | 
542 |     See Also
543 |     --------
544 |     prof_altitude : Returns altitude for
545 |         given pressure.
546 |     prof_temperature : Returns air temperature for
547 |         given altitude.
548 | 
549 |     Notes
550 |     -----
551 |     Default coefficient values represent a 5th degree polynomial which had
552 |     been fitted to USA data from 0-100 km. Accuracy is on the order of 1% for
553 |     0-100 km and 0.5% below 30 km. This function, with default values, may thus
554 |     produce bad results with altitude > 100 km.
555 | 
556 |     Examples
557 |     --------
558 |     >>> prof_pressure([0, 10, 20])
559 |     array([ 998.96437334,  264.658697  ,   55.28114631])
560 | 
561 |     """
562 |     altitude = np.asarray(altitude)
563 |     pressure = np.power(10, np.polyval(z_coef, altitude.flatten()))
564 |     return pressure.reshape(altitude.shape)
565 | 
566 | 
567 | def _get_supported_models():
568 |     """
569 |     Returns a tuple of the names of the models for which grid specifications
570 |     are available.
571 |     """
572 |     return tuple(MODELS.keys())
573 | 
574 | 
575 | def _find_references(model_name, references=None):
576 |     """
577 |     Iterate over model references for `model_name`
578 |     and return a list of parent model specifications (including those of
579 |     `model_name`, ordered from parent to child).
580 |     """
581 |     references = references or []
582 |     references.append(model_name)
583 | 
584 |     ref = MODELS[model_name].get('reference')
585 |     if ref is not None:
586 |         _find_references(ref, references)
587 | 
588 |     parent_models = [m for m in references]
589 |     parent_models.reverse()
590 | 
591 |     return parent_models
592 | 
593 | 
594 | def _get_model_info(model_name):
595 |     """
596 |     Get the grid specifications for a given model.
597 | 
598 |     Parameters
599 |     ----------
600 |     model_name : string
601 |         Name of the model. Supports multiple formats
602 |         (e.g., 'GEOS5', 'GEOS-5' or 'GEOS_5').
603 | 
604 |     Returns
605 |     -------
606 |     specifications : dict
607 |         Grid specifications as a dictionary.
608 | 
609 |     Raises
610 |     ------
611 |     ValueError
612 |         If the model is not supported (see `models`) or if the given
613 |         `model_name` corresponds to several entries in the list of
614 |         supported models.
615 | 
616 |     """
617 |     # trying to get as much as possible a valid model name from the given
618 |     # `model_name`, using regular expressions.
619 |     split_name = re.split(r'[\-_\s]', model_name.strip().upper())
620 |     sep_chars = ('', ' ', '-', '_')
621 |     gen_seps = itertools.combinations_with_replacement(
622 |         sep_chars, len(split_name) - 1
623 |     )
624 |     test_names = ("".join((n for n in itertools.chain(*list(zip(split_name,
625 |                                                            s + ('',))))))
626 |                   for s in gen_seps)
627 |     match_names = list([name for name in test_names if name
628 |                         in _get_supported_models()])
629 | 
630 |     if not len(match_names):
631 |         raise ValueError("Model '{0}' is not supported".format(model_name))
632 |     elif len(match_names) > 1:
633 |         raise ValueError("Multiple matched models for given model name '{0}'"
634 |                          .format(model_name))
635 | 
636 |     valid_model_name = match_names[0]
637 |     parent_models = _find_references(valid_model_name)
638 | 
639 |     model_spec = dict()
640 |     for m in parent_models:
641 |         model_spec.update(MODELS[m])
642 |     model_spec.pop('reference')
643 |     model_spec['model_family'] = parent_models[0]
644 |     model_spec['model_name'] = valid_model_name
645 | 
646 |     return model_spec
647 | 


--------------------------------------------------------------------------------