├── .coveragerc
├── .github
└── workflows
│ └── ci.yaml
├── .gitignore
├── .pep8speaks.yml
├── .travis.yml
├── README.md
├── doc
├── .gitignore
├── Makefile
├── conf.py
├── evaluation.rst
├── examples.rst
├── examples
│ ├── ex_01_end_to_end.ipynb
│ ├── ex_02_docker_jupyter.ipynb
│ ├── ex_03_cycle_simulation.ipynb
│ └── ex_04_ensembles.ipynb
├── index.rst
├── installation.rst
├── model_api.rst
├── output.rst
├── requirements.txt
├── source
│ ├── modules.rst
│ ├── wrfhydropy.core.rst
│ └── wrfhydropy.rst
├── utilities.rst
└── what-and-why.rst
├── readthedocs.yml
├── requirements.txt
├── setup.py
├── whp_test_env.yml
└── wrfhydropy
├── __init__.py
├── core
├── __init__.py
├── collection.py
├── cycle.py
├── domain.py
├── ensemble.py
├── ensemble_tools.py
├── evaluation.py
├── ioutils.py
├── job.py
├── model.py
├── namelist.py
├── outputdiffs.py
├── schedulers.py
├── simulation.py
└── teams.py
├── data
└── flood_thresholds_to_nc_w_qc.py
├── tests
├── .coveragerc
├── .gitignore
├── __init__.py
├── conftest.py
├── data
│ ├── .gitignore
│ ├── __init__.py
│ ├── collection_data_download.py
│ ├── collection_data_recipe.py
│ ├── evaluation_answer_reprs.py
│ ├── gdrive_download.py
│ ├── nan_na_data
│ │ ├── fill_value.nc
│ │ ├── nan_fill.nc
│ │ ├── nan_value.nc
│ │ └── value_value.nc
│ ├── nan_na_files_recipe.py
│ └── nodefile_pbs_example_copy.txt
├── test_collection.py
├── test_cycle.py
├── test_domain.py
├── test_ensemble.py
├── test_evaluation.py
├── test_ioutils.py
├── test_job.py
├── test_model.py
├── test_namelist.py
├── test_outputdiffs.py
├── test_schedulers_pbs.py
├── test_simulation.py
└── test_utils.py
└── util
├── __init__.py
├── xrcmp.py
└── xrnan.py
/.coveragerc:
--------------------------------------------------------------------------------
1 | [run]
2 | omit =
3 | */data/*
4 |
--------------------------------------------------------------------------------
/.github/workflows/ci.yaml:
--------------------------------------------------------------------------------
1 | name: CI
2 | on:
3 | push:
4 | branches:
5 | - "*"
6 | pull_request:
7 | branches:
8 | - "*"
9 |
10 | jobs:
11 |
12 | wrfhydropy_setup:
13 | name: standard installation
14 | runs-on: ubuntu-latest
15 | strategy:
16 | fail-fast: false
17 | defaults:
18 | run:
19 | shell: bash
20 | steps:
21 | - name: Checkout repo
22 | uses: actions/checkout@v4
23 |
24 | #- name: Set environment variables
25 | # run: |
26 |
27 | - name: Setup Python
28 | uses: actions/setup-python@v5
29 | with:
30 | python-version: "3.11"
31 |
32 | - name: Upgrade pip and install build and twine
33 | run: |
34 | python -m pip install --upgrade pip
35 | pip install wheel build twine
36 |
37 | - name: Base installation
38 | run: |
39 | pip --verbose install .
40 |
41 | # - name: Print pyhmn version
42 | # run: |
43 | # python -c "import wrfhydropy; print(wrfhydropy.__version__)"
44 |
45 | # - name: Build wrfhydropy, check dist outputs
46 | # run: |
47 | # python -m build
48 | # twine check --strict dist/*
49 |
50 | # wrfhydropy_lint:
51 | # name: linting
52 | # runs-on: ubuntu-latest
53 | # strategy:
54 | # fail-fast: false
55 | # defaults:
56 | # run:
57 | # shell: bash
58 | # steps:
59 | # - name: Checkout repo
60 | # uses: actions/checkout@v3
61 |
62 | # - name: Setup Python
63 | # uses: actions/setup-python@v5
64 | # with:
65 | # python-version: 3.10
66 |
67 | # - name: Install dependencies
68 | # run: |
69 | # pip install wheel
70 | # pip install -r ./ci/requirements/environment.txt
71 |
72 | # - name: Version info
73 | # run: |
74 | # pip -V
75 | # pip list
76 |
77 | # - name: Run isort
78 | # run: |
79 | # echo "if isort check fails update isort using"
80 | # echo " pip install isort --upgrade"
81 | # echo "and run"
82 | # echo " isort ./wrfhydropy ./autotest"
83 | # echo "and then commit the changes."
84 | # isort --check --diff ./wrfhydropy
85 |
86 | # - name: Run black
87 | # run: |
88 | # echo "if black check fails update black using"
89 | # echo " pip install black --upgrade"
90 | # echo "and run"
91 | # echo " black ./wrfhydropy ./autotest"
92 | # echo "and then commit the changes."
93 | # black --check --diff ./wrfhydropy
94 |
95 | # - name: Run flake8
96 | # run: |
97 | # flake8 --count --show-source --exit-zero ./wrfhydropy ./autotest
98 |
99 | # - name: Run pylint
100 | # run: |
101 | # pylint --jobs=2 --errors-only --exit-zero ./wrfhydropy ./autotest
102 |
103 | test:
104 | name: ${{ matrix.os}} py${{ matrix.python-version }}
105 | runs-on: ${{ matrix.os }}
106 | defaults:
107 | run:
108 | shell: bash -l {0}
109 | strategy:
110 | fail-fast: false
111 | matrix:
112 | # os: [ "ubuntu-latest", "macos-latest", "windows-latest" ]
113 | # for debugging purposes run github actions only on ubuntu-latest until its passing
114 | os: [ "ubuntu-latest" ]
115 | python-version: [ "3.11" ]
116 | steps:
117 | - name: Checkout repo
118 | uses: actions/checkout@v4
119 |
120 | - name: Set environment variables
121 | run: |
122 | echo "PYTHON_VERSION=${{ matrix.python-version }}" >> $GITHUB_ENV
123 |
124 | # - name: Setup gfortran
125 | # uses: awvwgk/setup-fortran@main
126 | # with:
127 | # compiler: gcc
128 | # version: 11
129 |
130 | # - name: Setup Python
131 | # uses: actions/setup-python@v5
132 | # with:
133 | # python-version: ${{ matrix.python-version }}
134 | # architecture: x64
135 |
136 | - name: Install Dependencies via Micromamba
137 | if: matrix.os == 'ubuntu-latest' || matrix.os == 'macos-latest'
138 | uses: mamba-org/setup-micromamba@v1
139 | with:
140 | environment-file: ./whp_test_env.yml
141 | cache-downloads: true
142 | cache-environment: true
143 | create-args: >-
144 | python=${{ matrix.python-version }} nccmp
145 |
146 | - name: Install Dependencies via Micromamba
147 | if: matrix.os == 'windows-latest'
148 | uses: mamba-org/setup-micromamba@v1
149 | with:
150 | environment-file: ./whp_test_env.yml
151 | cache-downloads: true
152 | cache-environment: true
153 | create-args: >-
154 | python=${{ matrix.python-version }}
155 |
156 | - name: Install nccmp on Windows
157 | if: matrix.os == 'windows-latest'
158 | run: |
159 | curl -kL https://downloads.sourceforge.net/project/nccmp/windows/x86_64/nccmp-1.8.2.0-msys2-x86_64.zip -o nccmp-1.8.2.0-msys2-x86_64.zip
160 | unzip nccmp-1.8.2.0-msys2-x86_64.zip
161 | echo "${PWD}/usr/local/bin" >> $GITHUB_PATH
162 |
163 |
164 | # - name: Install nccmp on Ubuntu or MacOS
165 | # uses: mamba-org/setup-micromamba@v1
166 | # with:
167 | # cache-downloads: true
168 | # cache-environment: true
169 | # create-args: nccmp
170 |
171 | - name: Install wrfhydropy
172 | run: |
173 | pip install .
174 |
175 | - name: Version info
176 | run: |
177 | pip -V
178 | pip list
179 |
180 | - name: Run tests
181 | working-directory: wrfhydropy/tests
182 | run: pytest
183 | -vv
184 | --durations=0
185 | --cov=wrfhydropy
186 | --cov-report=xml
187 | --junitxml=pytest.xml
188 | # -n=auto
189 |
190 | # - name: Upload test results
191 | # if: always()
192 | # uses: actions/upload-artifact@v2
193 | # with:
194 | # name: Test results for ${{ runner.os }}-${{ matrix.python-version }}
195 | # path: ./wrfhydropy/tests/pytest.xml
196 |
197 | # - name: Upload code coverage to Codecov
198 | # uses: codecov/codecov-action@v2.1.0
199 | # with:
200 | # file: ./autotest/coverage.xml
201 | # # flags: unittests
202 | # env_vars: RUNNER_OS,PYTHON_VERSION
203 | # # name: codecov-umbrella
204 | # fail_ci_if_error: false
205 | # version: "v0.1.15"
206 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # fortran compiled files
2 | *.mod
3 | *.o
4 |
5 | *.gz
6 | *.tar
7 |
8 | # Byte-compiled / optimized / DLL files
9 | __pycache__/
10 | *.py[cod]
11 | *$py.class
12 | .pytest_cache/
13 |
14 | # C extensions
15 | *.so
16 |
17 | # Distribution / packaging
18 | .Python
19 | env/
20 | build/
21 | develop-eggs/
22 | dist/
23 | downloads/
24 | eggs/
25 | .eggs/
26 | lib/
27 | lib64/
28 | parts/
29 | sdist/
30 | var/
31 | wheels/
32 | *.egg-info/
33 | .installed.cfg
34 | *.egg
35 |
36 | # PyInstaller
37 | # Usually these files are written by a python script from a template
38 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
39 | *.manifest
40 | *.spec
41 |
42 | # Installer logs
43 | pip-log.txt
44 | pip-delete-this-directory.txt
45 |
46 | # Unit test / coverage reports
47 | htmlcov/
48 | .tox/
49 | .coverage
50 | .coverage.*
51 | .cache
52 | nosetests.xml
53 | coverage.xml
54 | *.cover
55 | .hypothesis/
56 |
57 | # Translations
58 | *.mo
59 | *.pot
60 |
61 | # Django stuff:
62 | *.log
63 | local_settings.py
64 |
65 | # Flask stuff:
66 | instance/
67 | .webassets-cache
68 |
69 | # Scrapy stuff:
70 | .scrapy
71 |
72 | # Sphinx documentation
73 | docs/_build/
74 |
75 | # PyBuilder
76 | target/
77 |
78 | # Jupyter Notebook
79 | .ipynb_checkpoints
80 |
81 | # pyenv
82 | .python-version
83 |
84 | # celery beat schedule file
85 | celerybeat-schedule
86 |
87 | # SageMath parsed files
88 | *.sage.py
89 |
90 | # dotenv
91 | .env
92 |
93 | # virtualenv
94 | .venv
95 | venv/
96 | ENV/
97 |
98 | # Spyder project settings
99 | .spyderproject
100 | .spyproject
101 |
102 | # Rope project settings
103 | .ropeproject
104 |
105 | # mkdocs documentation
106 | /site
107 |
108 | # mypy
109 | .mypy_cache/
110 |
111 | #macstuff
112 | .DS_Store
113 |
114 | #Pycharm stuff
115 | /.idea
116 |
117 | #Emacs
118 | *~
119 |
120 |
121 |
--------------------------------------------------------------------------------
/.pep8speaks.yml:
--------------------------------------------------------------------------------
1 | # File : .pep8speaks.yml
2 |
3 | scanner:
4 | diff_only: False # If False, the entire file touched by the Pull Request is scanned for errors. If True, only the diff is scanned.
5 | linter: pycodestyle # Other option is flake8
6 |
7 | pycodestyle: # Same as scanner.linter value. Other option is flake8
8 | max-line-length: 100 # Default is 79 in PEP 8
9 |
10 | no_blank_comment: False # If True, no comment is made on PR without any errors.
11 | descending_issues_order: False # If True, PEP 8 issues in message will be displayed in descending order of line numbers in the file
12 |
13 | message: # Customize the comment made by the bot
14 | opened: # Messages when a new PR is submitted
15 | header: "Hello @{name}! Thanks for opening this PR. "
16 | # The keyword {name} is converted into the author's username
17 | footer: "Local linting (style checking) can be peformed using [pycodestyle](https://github.com/PyCQA/pycodestyle) General guidelines can be found at the [Hitchhiker's guide to code style](https://goo.gl/hqbW4r)"
18 | # The messages can be written as they would over GitHub
19 | updated: # Messages when new commits are added to the PR
20 | header: "Hello @{name}! Thanks for updating this PR. "
21 | footer: "" # Why to comment the link to the style guide everytime? :)
22 | no_errors: "There are currently no PEP 8 issues detected in this Pull Request. Cheers! :beers: "
23 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: python
2 | sudo: true
3 |
4 | notifications:
5 | email: false
6 | python:
7 | - "3.8"
8 | before_install:
9 | - sudo add-apt-repository ppa:remik-ziemlinski/nccmp -y
10 | - sudo apt-get update
11 | - sudo apt-get install -y --allow-unauthenticated nccmp
12 | - pip install --upgrade pytest pytest-cov
13 | - pip install --upgrade coveralls
14 | - pip install -r requirements.txt
15 | - python setup.py install
16 | script:
17 | - pytest -v --cov=wrfhydropy
18 | after_success:
19 | - coveralls
20 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # WRF-HYDRO-PY
2 |
3 | [](https://github.com/NCAR/wrf_hydro_py/blob/main/.github/workflows/ci.yaml)
4 | [](https://coveralls.io/github/NCAR/wrf_hydro_py?branch=master)
5 | [](https://pypi.python.org/pypi/wrfhydropy)
6 | [](https://github.com/NCAR/wrf_hydro_py/releases/latest)
7 | [](https://wrfhydropy.readthedocs.io/en/latest/?badge=latest)
8 |
9 |
10 |
11 | 
12 |
13 |
14 | **IMPORTANT:** This package is in the very early stages of development and the package API may change at any time. It is not recommended that this package be used for significant work until version 0.1
15 |
16 | ## Description
17 | *wrfhydropy* provides an end-to-end python interface to support reproducible research and construction of workflows involving the
18 | WRF-Hydro model. See the docs for an extended description of [what-and-why wrfhydropy](https://wrfhydropy.readthedocs.io/en/latest/what-and-why.html).
19 |
20 | ## Documentation
21 | Documentation is available on-line through `help()` and via [readthedocs](https://wrfhydropy.readthedocs.io/en/latest/index.html). Documentation is a work in progress, please feel free to help improve the documentation or to make an issue when the docs are inaccurate!
22 |
23 | ## Contributing standards
24 | Failure to adhere to contributing standards may result in your Pull Request being rejected.
25 |
26 | ### pep8speaks
27 | All pull requests will be linted automatically by pep8speaks and reported as a comment into the pull request. The pep8speaks configuration is specified in .pep8speaks.yml. All pull requests must satisfy pep8speaks.
28 | Local linting can be performed after a `pip install` of [pycodestyle](https://github.com/PyCQA/pycodestyle). Pep8speaks linting reports also update with updated pull requests.
29 |
30 | ### Additional Style Guidelines
31 | * Max line length: 100 chars.
32 | * docstrings: [Google style](http://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html)
33 | * All other guidance follows [Google style guide](https://google.github.io/styleguide/pyguide.html)
34 | * General advice: [Hitchhiker's guide to code style](https://goo.gl/hqbW4r)
35 |
36 | ### Testing
37 | All pull requests must pass automated testing (via TravisCI). Testing can be performed locally by running `pytest` in the `wrfhydropy/tests` directory. Currently, this testing relies on the [`nccp`](https://gitlab.com/remikz/nccmp) binary for comparing netcdf files. A docker container can be supplied for testing on request (and documentation will subsequently be placed here).
38 |
39 | ### Coverage
40 | Testing concludes by submitting a request to [coveralls](https://coveralls.io/). This will automatically report changes of code coverage by the testing. Coverage should be maximized with every pull request. That is all new functions or classes must be accompanied by comprehensive additional unit/integration tests in the `wrf_hydro_py/wrfhydropy/tests` directory. Running coverage locally can be achieved by `pip` installing [`coverage`](https://pypi.org/project/coverage/) and [`pytest-cov`](https://pypi.org/project/pytest-cov/) following a process similar to the following:
41 | ```
42 | cd wrfhydropy/tests/
43 | pytest --cov=wrfhydropy
44 | coverage html -d coverage_html
45 | chrome coverage_html/index.html # or your browser of choice
46 | ```
47 |
--------------------------------------------------------------------------------
/doc/.gitignore:
--------------------------------------------------------------------------------
1 | _build/
2 | source/
--------------------------------------------------------------------------------
/doc/Makefile:
--------------------------------------------------------------------------------
1 | # Minimal makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line.
5 | SPHINXOPTS =
6 | SPHINXBUILD = sphinx-build
7 | SPHINXPROJ = wrfhydropy
8 | SOURCEDIR = .
9 | BUILDDIR = _build
10 |
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 |
15 | .PHONY: help Makefile
16 |
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
--------------------------------------------------------------------------------
/doc/conf.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | #
3 | # Configuration file for the Sphinx documentation builder.
4 | #
5 | # This file does only contain a selection of the most common options. For a
6 | # full list see the documentation:
7 | # http://www.sphinx-doc.org/en/stable/config
8 |
9 | # -- Path setup --------------------------------------------------------------
10 |
11 | # If extensions (or modules to document with autodoc) are in another directory,
12 | # add these directories to sys.path here. If the directory is relative to the
13 | # documentation root, use os.path.abspath to make it absolute, like shown here.
14 | #
15 | import datetime
16 | import os
17 | import sys
18 | sys.path.insert(0, os.path.abspath('.'))
19 | sys.path.insert(0, os.path.abspath('../'))
20 |
21 | # -- Project information -----------------------------------------------------
22 |
23 | project = 'wrfhydropy'
24 | copyright = '2018-%s, wrfhydropy Devlopers' % datetime.datetime.now().year
25 |
26 | # The short X.Y version
27 | version = ''
28 | # The full version, including alpha/beta/rc tags
29 | release = '0.0.3'
30 |
31 |
32 | # -- General configuration ---------------------------------------------------
33 |
34 | # If your documentation needs a minimal Sphinx version, state it here.
35 | #
36 | # needs_sphinx = '1.0'
37 |
38 | # Add any Sphinx extension module names here, as strings. They can be
39 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
40 | # ones.
41 | extensions = [
42 | 'sphinx.ext.autodoc',
43 | 'sphinx.ext.viewcode',
44 | 'sphinx.ext.napoleon',
45 | 'nbsphinx',
46 | "sphinx.ext.autosummary",
47 | "sphinx.ext.intersphinx",
48 | # "sphinx.ext.extlinks",
49 | "sphinx.ext.mathjax",
50 | "numpydoc",
51 | "IPython.sphinxext.ipython_directive",
52 | "IPython.sphinxext.ipython_console_highlighting",
53 | ]
54 |
55 | nbsphinx_timeout = 600
56 | #nbsphinx_execute = "always"
57 | #nbsphinx_allow_errors = True
58 | #nbsphinx_prolog = """
59 | #{% set docname = env.doc2path(env.docname, base=None) %}
60 | #You can run this notebook in a `live session `_ |Binder| or view it `on Github `_.
61 | #.. |Binder| image:: https://mybinder.org/badge.svg
62 | # :target: https://mybinder.org/v2/gh/pydata/xarray/master?urlpath=lab/tree/doc/{{ docname }}
63 | #"""
64 |
65 | autosummary_generate = True
66 |
67 | napoleon_google_docstring = True
68 | napoleon_use_param = False
69 | napoleon_use_ivar = True
70 |
71 | # Add any paths that contain templates here, relative to this directory.
72 | templates_path = ['_templates']
73 |
74 | # The suffix(es) of source filenames.
75 | # You can specify multiple suffix as a list of string:
76 | #
77 | # source_suffix = ['.rst', '.md']
78 | source_suffix = '.rst'
79 |
80 | # The master toctree document.
81 | master_doc = 'index'
82 |
83 | # The language for content autogenerated by Sphinx. Refer to documentation
84 | # for a list of supported languages.
85 | #
86 | # This is also used if you do content translation via gettext catalogs.
87 | # Usually you set "language" from the command line for these cases.
88 | language = 'en'
89 |
90 | # List of patterns, relative to source directory, that match files and
91 | # directories to ignore when looking for source files.
92 | # This pattern also affects html_static_path and html_extra_path .
93 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
94 |
95 | # The name of the Pygments (syntax highlighting) style to use.
96 | pygments_style = 'sphinx'
97 |
98 |
99 | # -- Options for HTML output -------------------------------------------------
100 |
101 | # The theme to use for HTML and HTML Help pages. See the documentation for
102 | # a list of builtin themes.
103 | #
104 | html_theme = 'sphinx_rtd_theme'
105 |
106 | # Theme options are theme-specific and customize the look and feel of a theme
107 | # further. For a list of options available for each theme, see the
108 | # documentation.
109 | #
110 | # html_theme_options = {}
111 |
112 | # Add any paths that contain custom static files (such as style sheets) here,
113 | # relative to this directory. They are copied after the builtin static files,
114 | # so a file named "default.css" will overwrite the builtin "default.css".
115 | html_static_path = ['_static']
116 |
117 | # Custom sidebar templates, must be a dictionary that maps document names
118 | # to template names.
119 | #
120 | # The default sidebars (for documents that don't match any pattern) are
121 | # defined by theme itself. Builtin themes are using these templates by
122 | # default: ``['localtoc.html', 'relations.html', 'sourcelink.html',
123 | # 'searchbox.html']``.
124 | #
125 | # html_sidebars = {}
126 |
127 |
128 | # -- Options for HTMLHelp output ---------------------------------------------
129 |
130 | # Output file base name for HTML help builder.
131 | htmlhelp_basename = 'wrfhydropydoc'
132 |
133 |
134 | # -- Options for LaTeX output ------------------------------------------------
135 |
136 | latex_elements = {
137 | # The paper size ('letterpaper' or 'a4paper').
138 | #
139 | # 'papersize': 'letterpaper',
140 |
141 | # The font size ('10pt', '11pt' or '12pt').
142 | #
143 | # 'pointsize': '10pt',
144 |
145 | # Additional stuff for the LaTeX preamble.
146 | #
147 | # 'preamble': '',
148 |
149 | # Latex figure (float) alignment
150 | #
151 | # 'figure_align': 'htbp',
152 | }
153 |
154 | # Grouping the document tree into LaTeX files. List of tuples
155 | # (source start file, target name, title,
156 | # author, documentclass [howto, manual, or own class]).
157 | latex_documents = [
158 | (master_doc, 'wrfhydropy.tex', 'wrfhydropy Documentation',
159 | 'wrfhydropy Developers', 'manual'),
160 | ]
161 |
162 |
163 | # -- Options for manual page output ------------------------------------------
164 |
165 | # One entry per manual page. List of tuples
166 | # (source start file, name, description, authors, manual section).
167 | man_pages = [
168 | (master_doc, 'wrfhydropy', 'wrfhydropy Documentation', 1)
169 | ]
170 |
171 |
172 | # -- Options for Texinfo output ----------------------------------------------
173 |
174 | # Grouping the document tree into Texinfo files. List of tuples
175 | # (source start file, target name, title, author,
176 | # dir menu entry, description, category)
177 | texinfo_documents = [
178 | (master_doc, 'wrfhydropy', 'wrfhydropy Documentation',
179 | 'wrfhydropy', 'One line description of project.',
180 | 'Miscellaneous'),
181 | ]
182 |
183 |
184 | # -- Extension configuration -------------------------------------------------
185 |
--------------------------------------------------------------------------------
/doc/evaluation.rst:
--------------------------------------------------------------------------------
1 | .. currentmodule:: wrfhydropy
2 |
3 | #############
4 | Evaluation
5 | #############
6 |
7 | .. autosummary::
8 | :toctree: generated/
9 |
10 | Evaluation
11 | Evaluation.brier
12 | Evaluation.contingency
13 | Evaluation.crps
14 | Evaluation.event
15 | Evaluation.gof
16 |
--------------------------------------------------------------------------------
/doc/examples.rst:
--------------------------------------------------------------------------------
1 | Examples
2 | ========
3 |
4 | .. toctree::
5 | :maxdepth: 2
6 |
7 | examples/ex_01_end_to_end
8 | examples/ex_02_docker_jupyter
9 | examples/ex_03_cycle_simulation
10 | examples/ex_04_ensembles
11 |
--------------------------------------------------------------------------------
/doc/examples/ex_02_docker_jupyter.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Running wrfhydropy through docker\n",
8 | "The first example shows running `wrfhydropy` from the cheyenne machine. This is similar to being on a linux envionment where model compilation is not a problem. \n",
9 | "\n",
10 | "For developers on laptops and non-linux use cases, the `docker` used as shown here. The prerequisites are \n",
11 | "* Docker installed\n",
12 | "* Local version of `wrf_hydro_py` repository\n",
13 | "* Local version of `wrf_hydro_nwm_public` repository\n",
14 | "\n",
15 | "On the host machine:\n",
16 | "```bash\n",
17 | "docker pull wrfhydro/dev:conda\n",
18 | "docker run -it -p 8899:8888 \\\n",
19 | " -v /Users/jamesmcc/WRF_Hydro/wrf_hydro_nwm_public:/wrf_hydro_nwm_public \\\n",
20 | " -v /Users/jamesmcc/WRF_Hydro/wrf_hydro_py:/wrf_hydro_py \\\n",
21 | " wrfhydro/dev:conda\n",
22 | "```\n",
23 | "You will need to customize the paths to `wrf_hydro_py` and `wrf_hydro_nwm_public` on the host side of the volume mounts. Leaving the docker side of the volume mounts will streamline the code below. Note that we are using different ports on host:docker.\n",
24 | "\n",
25 | "\n",
26 | "The above command will enter you into the docker image. In docker, first let's install the mounted `wrfhydropy` (this install will be lost when docker is exited but the source modifications will remain in the mounted drive - so it's just annoying to reinstall):\n",
27 | "\n",
28 | "```\n",
29 | "cd /wrf_hydro_py\n",
30 | "python setup.py develop\n",
31 | "```\n",
32 | "\n",
33 | "then, to start jupyter lab, execute: \n",
34 | "```bash\n",
35 | "cd doc/examples\n",
36 | "jupyter-lab --ip 0.0.0.0 --no-browser --allow-root\n",
37 | "```\n",
38 | "\n",
39 | "This will start the jupyter lab in docker, it will print a URL with a token embedded, like this (dont use this one): \n",
40 | "\n",
41 | "```\n",
42 | "http://(ac61502766bc or 127.0.0.1):8888/?token=a824b4cdb345e944d3754f1d5a97d2aedb4b003b2e76e625\n",
43 | "```\n",
44 | "To connect to jupyter lab on the host, transform the above URL to the following: \n",
45 | "\n",
46 | "```\n",
47 | "http://localhost:8899/?token=a824b4cdb345e944d3754f1d5a97d2aedb4b003b2e76e625\n",
48 | "```\n",
49 | "(keeping the token the same) and paste it in the browser on your local machine. Note that the port on the local host is not the port indicated by jupyter lab, it's the one we selected in docker.\n",
50 | "\n",
51 | "Finally, select the `wrfhydropy` example notebook you want to run!"
52 | ]
53 | }
54 | ],
55 | "metadata": {
56 | "kernelspec": {
57 | "display_name": "Python 3",
58 | "language": "python",
59 | "name": "python3"
60 | },
61 | "language_info": {
62 | "codemirror_mode": {
63 | "name": "ipython",
64 | "version": 3
65 | },
66 | "file_extension": ".py",
67 | "mimetype": "text/x-python",
68 | "name": "python",
69 | "nbconvert_exporter": "python",
70 | "pygments_lexer": "ipython3",
71 | "version": "3.7.1"
72 | }
73 | },
74 | "nbformat": 4,
75 | "nbformat_minor": 2
76 | }
77 |
--------------------------------------------------------------------------------
/doc/index.rst:
--------------------------------------------------------------------------------
1 | .. wrfhydropy documentation master file, created by
2 | sphinx-quickstart on Wed Jun 13 08:51:59 2018.
3 | You can adapt this file completely to your liking, but it should at least
4 | contain the root `toctree` directive.
5 |
6 | wrfhydropy: An API for the WRF-Hydro model and more.
7 | ====================================================
8 |
9 | Documentation
10 | -------------
11 |
12 | **Getting Started**
13 |
14 | * :doc:`what-and-why`
15 | * :doc:`installation`
16 | * :doc:`examples`
17 |
18 | .. toctree::
19 | :maxdepth: 2
20 | :hidden:
21 | :caption: Getting Started
22 |
23 | what-and-why
24 | installation
25 | examples
26 |
27 |
28 | **Reference**
29 |
30 | * :doc:`model_api`
31 | * :doc:`output`
32 | * :doc:`utilities`
33 |
34 | .. toctree::
35 | :maxdepth: 2
36 | :hidden:
37 | :caption: Reference
38 |
39 | model_api
40 | output
41 | evaluation
42 | utilities
43 |
44 |
45 | **Help & Index**
46 |
47 | * :ref:`genindex`
48 | * :ref:`modindex`
49 | * :ref:`search`
50 |
51 | .. toctree::
52 | :maxdepth: 1
53 | :hidden:
54 | :caption: Help & Index
55 |
56 | search
57 |
--------------------------------------------------------------------------------
/doc/installation.rst:
--------------------------------------------------------------------------------
1 | Installation
2 | ============
3 |
4 | Dependencies
5 | ------------
6 |
7 | Instructions
8 | ------------
9 | The easiest way::
10 |
11 | $ pip install wrfhydropy
12 |
13 | Development installation::
14 |
15 | $ git clone https://github.com/yourhandle/wrf_hydro_py.git
16 | $ cd wrf_hydro_py
17 | $ python setup.py develop
18 |
19 |
--------------------------------------------------------------------------------
/doc/model_api.rst:
--------------------------------------------------------------------------------
1 | .. currentmodule:: wrfhydropy
2 |
3 | #############
4 | Model API
5 | #############
6 |
7 | .. autosummary::
8 | :toctree: generated/
9 |
10 | Model
11 | Domain
12 | Job
13 | PBSCheyenne
14 | Simulation
15 | EnsembleSimulation
16 | CycleSimulation
17 | parallel_teams_run
18 |
--------------------------------------------------------------------------------
/doc/output.rst:
--------------------------------------------------------------------------------
1 | .. currentmodule:: wrfhydropy
2 |
3 | #############
4 | Output
5 | #############
6 |
7 | .. autosummary::
8 | :toctree: generated/
9 |
10 | open_whp_dataset
11 |
12 |
--------------------------------------------------------------------------------
/doc/requirements.txt:
--------------------------------------------------------------------------------
1 | ipython
2 | nbsphinx
3 | numpydoc
4 | sphinx-rtd-theme
5 | pandoc
6 |
--------------------------------------------------------------------------------
/doc/source/modules.rst:
--------------------------------------------------------------------------------
1 | wrfhydropy
2 | ==========
3 |
4 | .. toctree::
5 | :maxdepth: 4
6 |
7 | wrfhydropy
8 |
--------------------------------------------------------------------------------
/doc/source/wrfhydropy.core.rst:
--------------------------------------------------------------------------------
1 | wrfhydropy.core package
2 | =======================
3 |
4 | Submodules
5 | ----------
6 |
7 | wrfhydropy.core.domain module
8 | -----------------------------
9 |
10 | .. automodule:: wrfhydropy.core.domain
11 | :members:
12 | :undoc-members:
13 | :show-inheritance:
14 |
15 | wrfhydropy.core.ensemble module
16 | -------------------------------
17 |
18 | .. automodule:: wrfhydropy.core.ensemble
19 | :members:
20 | :undoc-members:
21 | :show-inheritance:
22 |
23 | wrfhydropy.core.ensemble\_tools module
24 | --------------------------------------
25 |
26 | .. automodule:: wrfhydropy.core.ensemble_tools
27 | :members:
28 | :undoc-members:
29 | :show-inheritance:
30 |
31 | wrfhydropy.core.ioutils module
32 | ------------------------------
33 |
34 | .. automodule:: wrfhydropy.core.ioutils
35 | :members:
36 | :undoc-members:
37 | :show-inheritance:
38 |
39 | wrfhydropy.core.job module
40 | --------------------------
41 |
42 | .. automodule:: wrfhydropy.core.job
43 | :members:
44 | :undoc-members:
45 | :show-inheritance:
46 |
47 | wrfhydropy.core.model module
48 | ----------------------------
49 |
50 | .. automodule:: wrfhydropy.core.model
51 | :members:
52 | :undoc-members:
53 | :show-inheritance:
54 |
55 | wrfhydropy.core.namelist module
56 | -------------------------------
57 |
58 | .. automodule:: wrfhydropy.core.namelist
59 | :members:
60 | :undoc-members:
61 | :show-inheritance:
62 |
63 | wrfhydropy.core.outputdiffs module
64 | ----------------------------------
65 |
66 | .. automodule:: wrfhydropy.core.outputdiffs
67 | :members:
68 | :undoc-members:
69 | :show-inheritance:
70 |
71 | wrfhydropy.core.schedulers module
72 | ---------------------------------
73 |
74 | .. automodule:: wrfhydropy.core.schedulers
75 | :members:
76 | :undoc-members:
77 | :show-inheritance:
78 |
79 | wrfhydropy.core.simulation module
80 | ---------------------------------
81 |
82 | .. automodule:: wrfhydropy.core.simulation
83 | :members:
84 | :undoc-members:
85 | :show-inheritance:
86 |
87 |
88 | Module contents
89 | ---------------
90 |
91 | .. automodule:: wrfhydropy.core
92 | :members:
93 | :undoc-members:
94 | :show-inheritance:
95 |
--------------------------------------------------------------------------------
/doc/source/wrfhydropy.rst:
--------------------------------------------------------------------------------
1 | wrfhydropy package
2 | ==================
3 |
4 | Subpackages
5 | -----------
6 |
7 | .. toctree::
8 |
9 | wrfhydropy.core
10 | wrfhydropy.tests
11 |
12 | Module contents
13 | ---------------
14 |
15 | .. automodule:: wrfhydropy
16 | :members:
17 | :undoc-members:
18 | :show-inheritance:
19 |
--------------------------------------------------------------------------------
/doc/utilities.rst:
--------------------------------------------------------------------------------
1 | .. currentmodule:: wrfhydropy
2 |
3 | #############
4 | Utilities
5 | #############
6 |
7 | .. autosummary::
8 | :toctree: generated/
9 |
10 | xrnan
11 | xrcmp
12 | diff_namelist
13 |
--------------------------------------------------------------------------------
/doc/what-and-why.rst:
--------------------------------------------------------------------------------
1 | Overview: What and why wrfhydropy?
2 | ==================================
3 |
4 | What is wrfhydropy?
5 | -------------------
6 |
7 | **wrfhydropy** provides an end-to-end python interface to support
8 | reproducible research and construction of workflows involving the
9 | WRF-Hydro model.
10 |
11 | **wrfhydropy**:
12 | * Is a Python API for the WRF-Hydro modelling system.
13 | * Provides tools for working with WRF-Hydro input (preparation)
14 | and output (analysis), largely based on xarray_.
15 | * Is tested_ and coverage_ is calculated.
16 |
17 | The package provides fine-grained control over the model and its
18 | inputs and outputs. Generally, high-level workflows are not found here
19 | **but should be and can easily be built from wrfhydropy**.
20 |
21 | **wrfhydropy** facilitates all aspects of working with WRF-Hydro including:
22 | * compiling
23 | * setting up experiments (manipulating input files and namelists)
24 | * running and scheduling jobs
25 | * collecting output
26 | * analysis (input and output)
27 | * sharing and reproducing results (jupyter notebooks)
28 |
29 | The wrfhydropy package is **user supported and community contributed**. That
30 | means you can help add to and improve it!
31 |
32 |
33 | Why wrfhydropy?
34 | ---------------
35 | The WRF-Hydro model was not originally built with many applications or workflows
36 | in mind. Without significant investment in rewriting the code, a scripting
37 | language is needed to adapt the FORTRAN model API to something suited to other
38 | purposes. Python is a good choice for this secondary API language for a vareity of
39 | reasons (widely adopted, multi-platform, great packages for scientific analysis,
40 | etc ...). Python therefore provides a mechanism for developing a better (for many
41 | purposes) model interface that is afforded by the underlying model. For this reason,
42 | a few conceptualizations in wrfhydropy are formalized differently than in FORTRAN.
43 | These are summarized in `Key concepts`_. The model API as developed in python may begin
44 | to make its way back to the underlying FORTRAN code with time.
45 |
46 | wrfhydropy was initally developed to handle the WRF-Hydro model testing
47 | (`wrf_hydro_nwm_public/tests `_)
48 | and, in particularly, the need to be able to
49 | easily swap domains while holding model options constant. Another early
50 | application was the construction and execuation of ensembles and ensemble
51 | forecasts. The examples_ included in this documentation will grow to show other
52 | applications of the package.
53 |
54 |
55 | Limitations of wrfhydropy
56 | -------------------------
57 |
58 | The wrfhydropy package does many things but also has limitations
59 | which are worth acknowledging up-front. The development of wrfhydropy has
60 | mostly emerged to support testing and other applications of the NWM. While
61 | wrfhydropy supports other modes of running WRF-Hydro, the further away from
62 | the NWM you get the less likely wrfhydropy will support your needs. This
63 | guidance is highly dependent on the differences from the NWM. If the differences
64 | are containted in the namelists only, you are likely not going to have issues. But
65 | attempting to use the Noah model instead of NoahMP, for example, will
66 | simply not work. wrfhydropy is open to changes/enhancements to support your needs,
67 | but may require you to implement *and test* them to get them into the master branch.
68 |
69 | wrfhydropy does not provide an in-memory connection between WRF-Hydro and Python.
70 | The API is implemented through system calls (Python's subprocess) and all information
71 | between Python and the model passes through disk. There is no magic in wrfhydropy,
72 | just convenience: you still need a system and environment in which WRF-Hydro can be
73 | compiled and run. (Such as our `development docker container`_.)
74 |
75 |
76 | Key concepts
77 | ------------
78 |
79 | Here we summarize a few concepts in wrfhydropy which differ from how WRF-Hydro is generally
80 | used. Links are provided to examples.
81 |
82 |
83 | Object Oriented API
84 | ###################
85 | THe wrfhydropy model API follows an object oriented approach. Composition
86 | of objects is a theme of the design. That is: core building blocks are put
87 | together to form more complicated objects. The separation of concerns of these
88 | objects is important (and sometimes challenging), but often rewarding.
89 |
90 | Upper case means a class (and will link to the class definition).
91 | Lower case means an instance of a class (not linked).
92 | The left arrow means object composition, also known as a "has a" relationship.
93 |
94 | Core objects:
95 | * Domain
96 | * Model
97 | * Job
98 | * Scheduler
99 |
100 | Higher-level objects:
101 | * Simulation <- domain, model, job [, scheduler]
102 | * Ensemble <- simulation, job [, scheduler]
103 | * Cycle <- simulation|ensemble, job [, scheduler]
104 |
105 | The first example in the documentation,
106 | `End-to-end overview of wrfhydropy: Simulation evaluation`_
107 | details the core objects, their initialization and their composition into
108 | a Simulation object.
109 |
110 |
111 | Namelists: Model and domain sides
112 | #################################
113 | Namelists are treated by wrfhydropy in a completely different way
114 | than WRF-Hydro model users experience them. The input namelists to the model,
115 | namelist.hrldas and hydro.namelist are each split in to two pieces, the model-side
116 | and domain-side options. The new namelist files collect many different potential
117 | namelists using named configurations. The motivation for this and the details are
118 | explained in depth in `namelist section`_ of the first example of the documentation.
119 |
120 |
121 | Jobs:
122 | #####
123 | The notion of a Job is formalized by wrfhydropy and can be a bit surprising to
124 | WRF-Hydro users. Jobs are essential model time and frequency interventions into the
125 | model namelists. Each job has a different call to the executable and a subdirectory
126 | of the run directory dedicated to its provenance and its artifacts. Details are
127 | provided in the `Job section`_ of the first example of the documentation.
128 |
129 |
130 | .. _xarray: http://xarray.pydata.org/en/stable/
131 | .. _tested: https://github.com/NCAR/wrf_hydro_py/tree/master/wrfhydropy/tests
132 | .. _coverage: https://coveralls.io/github/NCAR/wrf_hydro_py
133 | .. _examples: https://wrfhydropy.readthedocs.io/en/latest/examples.html
134 | .. _`development docker container`: https://hub.docker.com/r/wrfhydro/dev
135 | .. _`End-to-end overview of wrfhydropy: Simulation evaluation`: https://wrfhydropy.readthedocs.io/en/latest/examples/ex_01_end_to_end.html
136 | .. _`namelist section`: https://wrfhydropy.readthedocs.io/en/latest/examples/ex_01_end_to_end.html#2.-Namelists-and-configurations-in-wrfhydropy
137 | .. _`Job section`: https://wrfhydropy.readthedocs.io/en/latest/examples/ex_01_end_to_end.html#7.-Job-object
--------------------------------------------------------------------------------
/readthedocs.yml:
--------------------------------------------------------------------------------
1 | # Read the Docs configuration file
2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
3 |
4 | # Required
5 | version: 2
6 |
7 | # Build documentation in the doc/ directory with Sphinx
8 | sphinx:
9 | configuration: doc/conf.py
10 |
11 | # Docker image used for build
12 | build:
13 | os: ubuntu-22.04
14 | tools:
15 | python: "3.10"
16 |
17 | # Optionally build your docs in additional formats such as PDF and ePub
18 | formats: []
19 |
20 | # Optionally set the version of Python and requirements required to build your docs
21 | python:
22 | install:
23 | - requirements: doc/requirements.txt
24 | - requirements: requirements.txt
25 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | boltons>=23.1.1
2 | bs4>=0.0.1
3 | dask[bag]>=2.14.0
4 | deepdiff>=6.2.3
5 | f90nml>=1.2
6 | importlib-metadata==4.13.0
7 | netCDF4>=1.5.3
8 | numpy>=1.23.5
9 | pandas>=1.3.5
10 | properscoring==0.1
11 | pytest<=7.4.4
12 | pytest-html>=3.0.0
13 | pytest-datadir-ng>=1.1.1
14 | pytest-lazy-fixture>=0.6.3
15 | requests>=2.23.0
16 | spotpy>=1.6.0
17 | urllib3>=2.0.2
18 | xarray>=0.19
19 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import find_packages, setup
2 |
3 | with open("README.md", "r", encoding="utf-8") as fh:
4 | long_description = fh.read()
5 |
6 | setup(
7 | name='wrfhydropy',
8 | version='0.0.21',
9 | packages=find_packages(),
10 | package_data={'wrfhydropy': ['core/data/*']},
11 | url='https://github.com/NCAR/wrf_hydro_py',
12 | license='MIT',
13 | install_requires=[
14 | 'boltons>=23.1.1',
15 | 'bs4>=0.0.1',
16 | 'dask[bag]>=2.14.0',
17 | 'deepdiff>=6.2.3',
18 | 'f90nml>=1.2',
19 | 'importlib-metadata==4.13.0',
20 | 'netCDF4>=1.5.3',
21 | 'numpy>=1.23.5',
22 | 'pandas>=1.3.5',
23 | 'properscoring==0.1',
24 | 'pytest<=7.4.4',
25 | 'pytest-html>=3.0.0',
26 | 'pytest-datadir-ng>=1.1.1',
27 | 'pytest-lazy-fixture>=0.6.3',
28 | 'requests>=2.23.0',
29 | 'spotpy>=1.6.0',
30 | 'urllib3>=2.0.2',
31 | 'xarray>=0.19'
32 | ],
33 | author='WRF-Hydro Team',
34 | author_email='@ucar.edu',
35 | description='API for the WRF-Hydro model',
36 | long_description=long_description,
37 | long_description_content_type="text/markdown",
38 | python_requires=">=3.7",
39 | )
40 |
--------------------------------------------------------------------------------
/whp_test_env.yml:
--------------------------------------------------------------------------------
1 | name: whp
2 | channels:
3 | - conda-forge
4 | - nodefaults
5 | dependencies:
6 | - boltons>=23.1.1
7 | - bs4>=0.0.1
8 | - dask[bag]>=2.14.0
9 | - deepdiff==6.3.0
10 | - f90nml>=1.2
11 | - netCDF4>=1.5.3
12 | - numpy>=1.23.5
13 | - pandas>=1.0.3
14 | - pathlib>=1.0.1
15 | - properscoring==0.1
16 | - pytest<=7.4.4
17 | - pytest-html>=3.0.0
18 | - pytest-lazy-fixture>=0.6.3
19 | - requests>=2.23.0
20 | - spotpy>=1.6.0
21 | - xarray>=0.19
22 | - pip
23 | - pip:
24 | - click != 8.1.0
25 | - black < 23.1.0
26 | - isort
27 | - flake8
28 | - pylint
29 | - pytest-datadir-ng>=1.1.1
30 | - pytest-cov
31 | - pytest-env
32 | - pytest-order
33 | - pytest-xdist
34 | - pyyaml
35 |
--------------------------------------------------------------------------------
/wrfhydropy/__init__.py:
--------------------------------------------------------------------------------
1 | from .core import ioutils
2 | from .core import namelist
3 | from .core import outputdiffs
4 | from .core import schedulers
5 | from .core.collection import open_whp_dataset
6 | from .core.cycle import *
7 | # from .core.cycle import CycleSimulation
8 | from .core.domain import *
9 | from .core.ensemble import *
10 | # from .core.ensemble import EnsembleSimulation
11 | from .core.evaluation import Evaluation
12 | from .core.job import Job
13 | from .core.model import Model
14 | from .core.namelist import diff_namelist
15 | from .core.schedulers import PBSCheyenne
16 | from .core.simulation import Simulation
17 | from .core.teams import parallel_teams_run
18 | from .util.xrcmp import xrcmp
19 | from .util.xrnan import xrnan
20 |
--------------------------------------------------------------------------------
/wrfhydropy/core/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NCAR/wrf_hydro_py/003f73f7efb2d38114794ae14daac702e59edc24/wrfhydropy/core/__init__.py
--------------------------------------------------------------------------------
/wrfhydropy/core/collection.py:
--------------------------------------------------------------------------------
1 | import collections
2 | import dask
3 | import dask.bag
4 | from datetime import datetime
5 | import itertools
6 | from multiprocessing.pool import Pool
7 | import numpy as np
8 | import pathlib
9 | from wrfhydropy.core.ioutils import timesince
10 | import xarray as xr
11 |
12 |
13 | def is_not_none(x):
14 | return x is not None
15 |
16 |
17 | def group_lead_time(ds: xr.Dataset) -> int:
18 | return ds.lead_time.item(0)
19 |
20 |
21 | def group_member_lead_time(ds: xr.Dataset) -> str:
22 | return str(ds.member.item(0)) + '-' + str(ds.lead_time.item(0))
23 |
24 |
25 | def group_member(ds: xr.Dataset) -> int:
26 | return ds.member.item(0)
27 |
28 |
29 | def group_identity(ds: xr.Dataset) -> int:
30 | return 1
31 |
32 |
33 | def merge_reference_time(ds_list: list) -> xr.Dataset:
34 | return xr.concat(ds_list, dim='reference_time', coords='minimal')
35 |
36 |
37 | def merge_member(ds_list: list) -> xr.Dataset:
38 | return xr.concat(ds_list, dim='member', coords='minimal')
39 |
40 |
41 | def merge_lead_time(ds_list: list) -> xr.Dataset:
42 | return xr.concat(ds_list, dim='lead_time', coords='minimal')
43 |
44 |
45 | def merge_time(ds_list: list) -> xr.Dataset:
46 | return xr.concat(ds_list, dim='time', coords='minimal')
47 |
48 |
49 | def preprocess_whp_data(
50 | path,
51 | isel: dict = None,
52 | drop_variables: list = None
53 | ) -> xr.Dataset:
54 | try:
55 | ds = xr.open_dataset(path)
56 | except OSError:
57 | print("Skipping file, unable to open: ", path)
58 | return None
59 |
60 | if drop_variables is not None:
61 | to_drop = set(ds.variables).intersection(set(drop_variables))
62 | if to_drop != set():
63 | ds = ds.drop_vars(to_drop)
64 |
65 | # Exception for RESTART.YYMMDDHHMM_DOMAIN1 files
66 | if 'RESTART.' in str(path):
67 | time = datetime.strptime(ds.Times.values[0].decode('utf-8'), '%Y-%m-%d_%H:%M:%S')
68 | ds = ds.squeeze('Time')
69 | ds = ds.drop_vars(['Times'])
70 | ds = ds.assign_coords(time=time)
71 |
72 | # Exception for HYDRO_RST.YY-MM-DD_HH:MM:SS_DOMAIN1 files
73 | if 'HYDRO_RST.' in str(path):
74 | time = datetime.strptime(ds.attrs['Restart_Time'], '%Y-%m-%d_%H:%M:%S')
75 | ds = ds.assign_coords(time=time)
76 |
77 | filename_parent = pathlib.Path(path).parent
78 | filename_grandparent = pathlib.Path(path).parent.parent
79 |
80 | # Member preprocess
81 | # Assumption is that parent dir is member_mmm
82 | # member = None
83 | if 'member' in filename_parent.name:
84 | # This is a double check that this convention is because of wrf_hydro_py
85 | assert filename_parent.parent.joinpath('WrfHydroEns.pkl').exists()
86 | member = int(filename_parent.name.split('_')[-1])
87 | ds.coords['member'] = member
88 |
89 | # Lead time preprocess
90 | # Assumption is that parent dir is cast_yymmddHH
91 | if 'cast_' in filename_parent.name or 'cast_' in filename_grandparent.name:
92 | # Exception for cast HYDRO_RST.YY-MM-DD_HH:MM:SS_DOMAIN1 and
93 | # RESTART.YYMMDDHHMM_DOMAIN1 files
94 | if 'HYDRO_RST.' in str(path) or 'RESTART' in str(path):
95 | cast_fmt = 'cast_%Y%m%d%H'
96 | if 'cast_' in filename_parent.name:
97 | # This is a double check that this convention is because of wrf_hydro_py
98 | assert filename_parent.parent.joinpath('WrfHydroCycle.pkl').exists()
99 | ds.coords['reference_time'] = datetime.strptime(filename_parent.name, cast_fmt)
100 | elif 'cast_' in filename_grandparent.name:
101 | # This is a double check that this convention is because of wrf_hydro_py
102 | assert filename_grandparent.parent.joinpath('WrfHydroCycle.pkl').exists()
103 | ds.coords['reference_time'] = \
104 | datetime.strptime(filename_grandparent.name, cast_fmt)
105 | ds.coords['lead_time'] = np.array(
106 | ds.time.values - ds.reference_time.values,
107 | dtype='timedelta64[ns]'
108 | )
109 | ds = ds.drop_vars('time')
110 |
111 | # Could create a valid time variable here, but I'm guessing it's more efficient
112 | # after all the data are collected.
113 | # ds['valid_time'] = np.datetime64(int(ds.lead_time) + int(ds.reference_time), 'ns')
114 |
115 | else:
116 | if 'reference_time' in ds.variables:
117 | ds = ds.drop_vars('reference_time')
118 |
119 | # Spatial subsetting
120 | if isel is not None:
121 | ds = ds.isel(isel)
122 |
123 | return ds
124 |
125 |
126 | def open_whp_dataset_inner(
127 | paths: list,
128 | chunks: dict = None,
129 | attrs_keep: list = ['featureType', 'proj4',
130 | 'station_dimension', 'esri_pe_string',
131 | 'Conventions', 'model_version'],
132 | isel: dict = None,
133 | drop_variables: list = None,
134 | npartitions: int = None,
135 | profile: int = False
136 | ) -> xr.Dataset:
137 |
138 | if profile:
139 | then = timesince()
140 |
141 | # This is totally arbitrary be seems to work ok.
142 | # if npartitions is None:
143 | # npartitions = dask.config.get('pool')._processes * 4
144 | # This choice does not seem to work well or at all, error?
145 | # npartitions = len(sorted(paths))
146 | paths_bag = dask.bag.from_sequence(paths, npartitions=npartitions)
147 |
148 | if profile:
149 | then = timesince(then)
150 | print('after paths_bag')
151 |
152 | ds_list = paths_bag.map(
153 | preprocess_whp_data,
154 | isel=isel,
155 | drop_variables=drop_variables
156 | ).filter(is_not_none).compute()
157 |
158 | if len(ds_list) == 0:
159 | return None
160 |
161 | if profile:
162 | then = timesince(then)
163 | print("after ds_list preprocess/filter")
164 |
165 | # Group by and merge by choices
166 | have_members = 'member' in ds_list[0].coords
167 | have_lead_time = 'lead_time' in ds_list[0].coords
168 | if have_lead_time:
169 | if have_members:
170 | group_list = [group_member_lead_time, group_lead_time]
171 | merge_list = [merge_reference_time, merge_member]
172 | else:
173 | group_list = [group_lead_time]
174 | merge_list = [merge_reference_time]
175 | else:
176 | if have_members:
177 | group_list = [group_member]
178 | merge_list = [merge_time]
179 | else:
180 | group_list = [group_identity]
181 | merge_list = [merge_time]
182 |
183 | for group, merge in zip(group_list, merge_list):
184 |
185 | if profile:
186 | then = timesince(then)
187 | print('before sort')
188 |
189 | the_sort = sorted(ds_list, key=group)
190 |
191 | if profile:
192 | then = timesince(then)
193 | print('after sort, before group')
194 |
195 | ds_groups = [list(it) for k, it in itertools.groupby(the_sort, group)]
196 |
197 | if profile:
198 | then = timesince(then)
199 | print('after group, before merge')
200 |
201 | # npartitons = len(ds_groups)
202 | group_bag = dask.bag.from_sequence(ds_groups, npartitions=npartitions)
203 | ds_list = group_bag.map(merge).compute()
204 |
205 | if profile:
206 | then = timesince(then)
207 | print('after merge')
208 |
209 | del group_bag, ds_groups, the_sort
210 |
211 | if have_lead_time:
212 | nwm_dataset = merge_lead_time(ds_list)
213 | elif have_members:
214 | nwm_dataset = merge_member(ds_list)
215 | else:
216 | nwm_dataset = ds_list[0]
217 |
218 | del ds_list
219 |
220 | # Impose some order.
221 | if have_members:
222 | nwm_dataset = nwm_dataset.sortby(['member'])
223 | if have_lead_time:
224 | nwm_dataset = nwm_dataset.sortby(['reference_time', 'lead_time'])
225 |
226 | # Create a valid_time variable. I'm estimating that doing it here is more efficient
227 | # than adding more data to the collection processes.
228 | def calc_valid_time(ref, lead):
229 | return np.datetime64(int(ref) + int(lead), 'ns')
230 | if have_lead_time:
231 | nwm_dataset['valid_time'] = xr.apply_ufunc(
232 | calc_valid_time,
233 | nwm_dataset['reference_time'],
234 | nwm_dataset['lead_time'],
235 | vectorize=True
236 | ).transpose() # Not sure this is consistently anti-transposed.
237 |
238 | # Xarray sets nan as the fill value when there is none. Dont allow that...
239 | for key, val in nwm_dataset.variables.items():
240 | if '_FillValue' not in nwm_dataset[key].encoding:
241 | nwm_dataset[key].encoding.update({'_FillValue': None})
242 |
243 | # Clean up attributes
244 | new_attrs = collections.OrderedDict()
245 | if attrs_keep is not None:
246 | for key, value in nwm_dataset.attrs.items():
247 | if key in attrs_keep:
248 | new_attrs[key] = nwm_dataset.attrs[key]
249 |
250 | nwm_dataset.attrs = new_attrs
251 |
252 | # Break into chunked dask array
253 | if chunks is not None:
254 | nwm_dataset = nwm_dataset.chunk(chunks=chunks)
255 |
256 | # I submitted a PR fix to xarray.
257 | # I will leave this here until the PR is merged.
258 | # Workaround/prevent https://github.com/pydata/xarray/issues/1849
259 | # for v in nwm_dataset.variables.values():
260 | # try:
261 | # del v.encoding["contiguous"]
262 | # except KeyError: # no problem
263 | # pass
264 |
265 | return nwm_dataset
266 |
267 |
268 | def open_whp_dataset_orig(
269 | paths: list,
270 | chunks: dict = None,
271 | attrs_keep: list = ['featureType', 'proj4',
272 | 'station_dimension', 'esri_pe_string',
273 | 'Conventions', 'model_version'],
274 | isel: dict = None,
275 | drop_variables: list = None,
276 | npartitions: int = None,
277 | profile: int = False,
278 | n_cores: int = 1
279 | ) -> xr.Dataset:
280 |
281 | import sys
282 | import os
283 |
284 | # print('n_cores', str(n_cores))
285 | the_pool = Pool(n_cores)
286 | with dask.config.set(scheduler='processes', pool=the_pool):
287 | whp_ds = open_whp_dataset_inner(
288 | paths,
289 | chunks,
290 | attrs_keep,
291 | isel,
292 | drop_variables,
293 | npartitions,
294 | profile
295 | )
296 | the_pool.close()
297 | return whp_ds
298 |
299 |
300 | def open_whp_dataset(
301 | paths: list,
302 | file_chunk_size: int = None,
303 | chunks: dict = None,
304 | attrs_keep: list = ['featureType', 'proj4',
305 | 'station_dimension', 'esri_pe_string',
306 | 'Conventions', 'model_version'],
307 | isel: dict = None,
308 | drop_variables: list = None,
309 | npartitions: int = None,
310 | profile: int = False,
311 | n_cores: int = 1,
312 | write_cumulative_file: pathlib.Path = None
313 | ) -> xr.Dataset:
314 |
315 | import sys
316 | import os
317 | import math
318 | import multiprocessing
319 | import pickle
320 |
321 | n_files = len(paths)
322 | print('n_files', str(n_files))
323 |
324 | # remove path to file if file doesn't exist
325 | for p in paths:
326 | if (os.path.exists(p) == False):
327 | print("removing file since it doesn't exist:", str(p))
328 | paths.remove(p)
329 |
330 | if file_chunk_size is None:
331 | file_chunk_size = n_files
332 |
333 | if file_chunk_size >= n_files:
334 | the_pool = Pool(n_cores)
335 | with dask.config.set(scheduler='processes', pool=the_pool):
336 | whp_ds = open_whp_dataset_inner(
337 | paths=paths,
338 | chunks=chunks,
339 | attrs_keep=attrs_keep,
340 | isel=isel,
341 | drop_variables=drop_variables,
342 | npartitions=npartitions,
343 | profile=profile
344 | )
345 | the_pool.close()
346 |
347 | else:
348 |
349 | n_file_chunks = math.ceil(n_files / file_chunk_size)
350 | start_list = [file_chunk_size * ii for ii in range(n_file_chunks)]
351 | end_list = [file_chunk_size * (ii + 1) - 1 for ii in range(n_file_chunks)]
352 | # adsf
353 |
354 | whp_ds = None
355 | for start_ind, end_ind in zip(start_list, end_list):
356 | the_pool = Pool(n_cores)
357 | with dask.config.set(scheduler='processes', pool=the_pool):
358 | ds_chunk = open_whp_dataset_inner(
359 | paths=paths[start_ind:(end_ind+1)],
360 | chunks=chunks,
361 | attrs_keep=attrs_keep,
362 | isel=isel,
363 | drop_variables=drop_variables,
364 | npartitions=npartitions,
365 | profile=profile
366 | )
367 | the_pool.close()
368 |
369 | if ds_chunk is not None:
370 | if whp_ds is None:
371 | whp_ds = ds_chunk
372 | else:
373 | whp_ds = xr.merge([whp_ds, ds_chunk])
374 | if write_cumulative_file is not None:
375 | if not write_cumulative_file.parent.exists():
376 | write_cumulative_file.parent.mkdir()
377 | whp_ds.to_netcdf(write_cumulative_file)
378 | cumulative_files_file = write_cumulative_file.parent / (
379 | write_cumulative_file.stem + '.files.pkl')
380 | pickle.dump(
381 | paths[0:end_ind],
382 | open(str(cumulative_files_file), 'wb'))
383 |
384 | return whp_ds
385 |
--------------------------------------------------------------------------------
/wrfhydropy/core/domain.py:
--------------------------------------------------------------------------------
1 | import pathlib
2 | import shutil
3 |
4 | from .ioutils import \
5 | WrfHydroStatic, \
6 | WrfHydroTs
7 | from .namelist import JSONNamelist
8 |
9 |
10 | class Domain(object):
11 | """Class for a WRF-Hydro domain, which constitutes all domain-specific files needed for a
12 | setup.
13 | """
14 |
15 | def __init__(self,
16 | domain_top_dir: str,
17 | domain_config: str,
18 | compatible_version: str = None,
19 | hydro_namelist_patch_file: str = 'hydro_namelist_patches.json',
20 | hrldas_namelist_patch_file: str = 'hrldas_namelist_patches.json'
21 | ):
22 | """Instantiate a Domain object
23 | Args:
24 | domain_top_dir: Parent directory containing all domain directories and files.
25 | domain_config: The domain configuration to use, options are 'NWM',
26 | 'Gridded', or 'Reach'
27 | compatible_version: String indicating the compatible model version, required if no
28 | .version file included in domain_top_dir.
29 | hydro_namelist_patch_file: Filename of json file containing namelist patches for
30 | hydro namelist
31 | hrldas_namelist_patch_file: Filename of json file containing namelist patches for
32 | hrldas namelist
33 | """
34 |
35 | # Instantiate arguments to object
36 | # Make file paths
37 | self.domain_top_dir = pathlib.Path(domain_top_dir).absolute()
38 | """pathlib.Path: pathlib.Paths to *.TBL files generated at compile-time."""
39 |
40 | self.domain_config = domain_config.lower()
41 | """str: Specified configuration for which the domain is to be used, e.g. 'NWM_ana'"""
42 |
43 | self.compatible_version = compatible_version
44 | """str: Source-code version for which the domain is to be used."""
45 |
46 | # Check .version file if compatible_version not specified
47 | if self.compatible_version is None:
48 | try:
49 | with self.domain_top_dir.joinpath('.version').open() as f:
50 | self.compatible_version = f.read()
51 | except FileNotFoundError:
52 | raise FileNotFoundError('file .version not found in directory ' +
53 | str(self.domain_top_dir) + ' and compatible_version not '
54 | 'specified')
55 |
56 | # Load namelist patches
57 | hydro_namelist_patch_file = self.domain_top_dir.joinpath(hydro_namelist_patch_file)
58 | hrldas_namelist_patch_file = self.domain_top_dir.joinpath(hrldas_namelist_patch_file)
59 |
60 | self.hydro_namelist_patches = JSONNamelist(str(hydro_namelist_patch_file))
61 | """Namelist: Domain-specific hydro namelist settings."""
62 | self.hydro_namelist_patches = self.hydro_namelist_patches.get_config(self.domain_config)
63 |
64 | self.hrldas_namelist_patches = JSONNamelist(str(hrldas_namelist_patch_file))
65 | """Namelist: Domain-specific hrldas namelist settings."""
66 | self.hrldas_namelist_patches = self.hrldas_namelist_patches.get_config(self.domain_config)
67 |
68 | self.hydro_files = list()
69 | """list: Files specified in hydro_nlist section of the domain namelist patches"""
70 | self.nudging_files = list()
71 | """list: Files specified in nudging_nlist section of the domain namelist patches"""
72 | self.lsm_files = list()
73 | """list: Files specified in noahlsm_offline section of the domain namelist patches"""
74 |
75 | self.nudging_dir = None
76 | """pathlib.Path: path to the nudging obs directory"""
77 |
78 | self.forcing_dir = None
79 | """pathlib.Path: path to the forcing directory"""
80 |
81 | ###
82 |
83 | # Create file paths from hydro namelist
84 | domain_hydro_nlist = self.hydro_namelist_patches['hydro_nlist']
85 |
86 | for key, value in domain_hydro_nlist.items():
87 | file_path = self.domain_top_dir.joinpath(str(value))
88 | if file_path.is_file() is True:
89 | if file_path.suffix == '.nc':
90 | self.hydro_files.append(WrfHydroStatic(file_path))
91 | else:
92 | self.hydro_files.append(file_path)
93 |
94 | # Create file paths from nudging namelist
95 | domain_nudging_nlist = self.hydro_namelist_patches['nudging_nlist']
96 |
97 | for key, value in domain_nudging_nlist.items():
98 | file_path = self.domain_top_dir.joinpath(str(value))
99 | if file_path.is_file() is True:
100 | if file_path.suffix == '.nc':
101 | self.nudging_files.append(WrfHydroStatic(file_path))
102 | else:
103 | self.nudging_files.append(file_path)
104 | if key == 'timeslicepath' and value != '':
105 | self.nudging_dir = file_path
106 | self.nudging_files.append(WrfHydroTs(file_path.glob('*')))
107 |
108 | # Create file paths from lsm namelist
109 | domain_lsm_nlist = \
110 | self.hrldas_namelist_patches["noahlsm_offline"]
111 |
112 | for key, value in domain_lsm_nlist.items():
113 | file_path = self.domain_top_dir.joinpath(str(value))
114 |
115 | if file_path.is_file() is True:
116 | if file_path.suffix == '.nc':
117 | self.lsm_files.append(WrfHydroStatic(file_path))
118 | else:
119 | self.lsm_files.append(file_path)
120 |
121 | if key == 'indir':
122 | self.forcing_dir = file_path
123 |
124 | self.forcing_data = WrfHydroTs(self.forcing_dir.glob('*'))
125 |
126 | def copy_files(self, dest_dir: str, symlink: bool = True):
127 | """Copy domain files to a new directory
128 | Args:
129 | dir: The destination directory for domain files
130 | symlink: Symlink domain files instead of copy
131 | """
132 |
133 | # Convert dir to pathlib.Path
134 | dest_dir = pathlib.Path(dest_dir)
135 |
136 | # Make directory if it does not exist.
137 | if not dest_dir.is_dir():
138 | dest_dir.mkdir(parents=True)
139 |
140 | # Create symlinks/copies
141 | # Symlink/copy in forcing
142 | from_dir = self.forcing_dir
143 | to_dir = dest_dir.joinpath(from_dir.name)
144 | if symlink:
145 | to_dir.symlink_to(from_dir, target_is_directory=True)
146 | else:
147 | shutil.copytree(str(from_dir), str(to_dir))
148 |
149 | # create DOMAIN directory and symlink in files
150 | # Symlink in hydro_files
151 | for from_path in self.hydro_files:
152 | # Get new file path for run directory, relative to the top-level domain directory
153 | # This is needed to ensure the path matches the domain namelist
154 | try:
155 | relative_path = from_path.relative_to(self.domain_top_dir)
156 | except ValueError:
157 | pass
158 | else:
159 | to_path = dest_dir.joinpath(relative_path)
160 | if to_path.parent.is_dir() is False:
161 | to_path.parent.mkdir(parents=True)
162 | if symlink:
163 | to_path.symlink_to(from_path)
164 | else:
165 | shutil.copy(str(from_path), str(to_path))
166 |
167 | # Symlink in nudging files
168 |
169 | # handling nudging obs files
170 | # Users may signal "None" by the null string (''), treat them the same.
171 | if not (self.nudging_dir is None or self.nudging_dir is ''):
172 | from_dir = self.nudging_dir
173 | try:
174 | to_dir = dest_dir.joinpath(from_dir.relative_to(self.domain_top_dir))
175 | except ValueError:
176 | pass
177 | else:
178 | if symlink:
179 | to_dir.symlink_to(from_dir, target_is_directory=True)
180 | else:
181 | shutil.copy(str(from_dir), str(to_dir))
182 |
183 | for from_path in self.nudging_files:
184 | # Get new file path for run directory, relative to the top-level domain directory
185 | # This is needed to ensure the path matches the domain namelist
186 | if type(from_path) is not WrfHydroTs:
187 | try:
188 | relative_path = from_path.relative_to(self.domain_top_dir)
189 | except ValueError:
190 | pass
191 | else:
192 | to_path = dest_dir.joinpath(relative_path)
193 | if to_path.parent.is_dir() is False:
194 | to_path.parent.mkdir(parents=True)
195 | if symlink:
196 | to_path.symlink_to(from_path)
197 | else:
198 | shutil.copy(str(from_path), str(to_path))
199 |
200 | # Symlink in lsm files
201 | for from_path in self.lsm_files:
202 | # Get new file path for run directory, relative to the top-level domain directory
203 | # This is needed to ensure the path matches the domain namelist
204 | try:
205 | relative_path = from_path.relative_to(self.domain_top_dir)
206 | except ValueError:
207 | pass
208 | else:
209 | to_path = dest_dir.joinpath(relative_path)
210 | if to_path.parent.is_dir() is False:
211 | to_path.parent.mkdir(parents=True)
212 | if symlink:
213 | to_path.symlink_to(from_path)
214 | else:
215 | shutil.copy(str(from_path), str(to_path))
216 |
217 | model_files = [*self.hydro_files,
218 | *self.nudging_files,
219 | *self.lsm_files]
220 | for ff in model_files:
221 | if type(ff) is not WrfHydroTs:
222 | if 'RESTART' in str(ff.name):
223 | to_path = dest_dir.joinpath(ff.name).absolute()
224 | if symlink:
225 | to_path.symlink_to(ff)
226 | else:
227 | shutil.copy(str(ff), str(to_path))
228 | if 'HYDRO_RST' in str(ff.name):
229 | to_path = dest_dir.joinpath(ff.name).absolute()
230 | if symlink:
231 | to_path.symlink_to(ff)
232 | else:
233 | shutil.copy(str(ff), str(to_path))
234 | if 'nudgingLastObs' in str(ff.name):
235 | to_path = dest_dir.joinpath(ff.name).absolute()
236 | if symlink:
237 | to_path.symlink_to(ff)
238 | else:
239 | shutil.copy(str(ff), str(to_path))
240 |
--------------------------------------------------------------------------------
/wrfhydropy/core/ensemble_tools.py:
--------------------------------------------------------------------------------
1 | from boltons.iterutils import remap
2 | import copy
3 | import datetime
4 | from deepdiff.diff import DeepDiff
5 | import os
6 | import pathlib
7 | import sys
8 |
9 |
10 | def is_sub_obj(obj):
11 | """Test if an object is has a __dict__ (may not be the best definition of an object,
12 | but it works for classes in wrfhydropy)."""
13 |
14 | # If a dict, dont use __dict__
15 | if isinstance(obj, dict):
16 | return False
17 |
18 | try:
19 | _ = obj.__dict__
20 | except AttributeError:
21 | return False
22 | return True
23 |
24 |
25 | def get_sub_objs(obj):
26 | """Identify which attributes of an object are objects with __dicts__."""
27 | sub_obj_dict = {kk: is_sub_obj(obj[kk]) for (kk, vv) in obj.items()}
28 | return list(remap(sub_obj_dict, lambda p, k, v: v).keys())
29 |
30 |
31 | def dictify(obj):
32 | """Recursively transform deepcopy sub __dicts__ of an object into dicts for dictionary
33 | traversal of a deepcopy of the object."""
34 | the_dict = copy.deepcopy(obj.__dict__)
35 | sub_dicts = get_sub_objs(the_dict)
36 | for ss in sub_dicts:
37 | the_dict[ss] = dictify(the_dict[ss])
38 | return the_dict
39 |
40 |
41 | class DeepDiffEq(DeepDiff):
42 | """Extend Deep Diff to handle __eq__ for specified types."""
43 | def __init__(self,
44 | t1,
45 | t2,
46 | eq_types,
47 | ignore_order=False,
48 | report_repetition=False,
49 | significant_digits=None,
50 | exclude_paths=set(),
51 | # exclude_regex_paths=set(),
52 | exclude_types=set(),
53 | # include_string_type_changes=False,
54 | verbose_level=1,
55 | view='text',
56 | **kwargs):
57 |
58 | # Must set this first for some reason.
59 | self.eq_types = set(eq_types)
60 |
61 | super().__init__(t1,
62 | t2,
63 | ignore_order=False,
64 | report_repetition=False,
65 | significant_digits=None,
66 | exclude_paths=set(),
67 | # exclude_regex_paths=set(),
68 | exclude_types=set(),
69 | # include_string_type_changes=False,
70 | verbose_level=1,
71 | view='text',
72 | **kwargs)
73 |
74 | # Have to force override __diff_obj.
75 | def _DeepDiff__diff_obj(self, level, parents_ids=frozenset({}),
76 | is_namedtuple=False):
77 | """Difference of 2 objects using their __eq__ if requested"""
78 |
79 | if type(level.t1) in self.eq_types:
80 | if level.t1 == level.t2:
81 | return
82 | else:
83 | self._DeepDiff__report_result('values_changed', level)
84 | return
85 |
86 | super(DeepDiffEq, self)._DeepDiff__diff_obj(
87 | level,
88 | parents_ids=frozenset({}),
89 | is_namedtuple=False
90 | )
91 |
92 |
93 | # def get_ens_file_last_restart_datetime(run_dir):
94 | # """Use the filesystem to probe the current ensemble time."""
95 | # run_dir = pathlib.Path(run_dir)
96 | # mem_dirs = sorted(run_dir.glob("member_*"))
97 | # hydro_last = [sorted(mm.glob('HYDRO_RST.*'))[-1].name for mm in mem_dirs]
98 | # if not all([hydro_last[0] == hh for hh in hydro_last]):
99 | # raise ValueError("Not all ensemble members at the same time (HYDRO_RST files).")
100 | # if len(sorted(mem_dirs[0].glob('RESTART.*'))):
101 | # lsm_last = [sorted(mm.glob('RESTART.*'))[-1] for mm in mem_dirs]
102 | # if not all([lsm_last[0] == ll for ll in lsm_last]):
103 | # raise ValueError("Not all ensemble members at the same time (RESTART files).")
104 |
105 | # ens_time = datetime.datetime.strptime(
106 | # str(hydro_last[0]).split('_RST.')[-1],
107 | # '%Y-%m-%d_%H:%M_DOMAIN1'
108 | # )
109 | # return ens_time
110 |
111 |
112 | def get_ens_dotfile_end_datetime(run_dir):
113 | """Use the the .model_end_time files to get the current ensemble time."""
114 | run_dir = pathlib.Path(run_dir)
115 | mem_dirs = sorted(run_dir.glob("member_*"))
116 |
117 | def read_dot_file(file):
118 | with open(file) as f:
119 | content = f.readline()
120 | return datetime.datetime.strptime(content, '%Y-%m-%d %H:%M:%S')
121 |
122 | end_times = [read_dot_file(mm / '.model_end_time') for mm in mem_dirs]
123 | if not all([end_times[0] == ee for ee in end_times]):
124 | raise ValueError("Not all ensemble members at the same time (HYDRO_RST files).")
125 |
126 | return end_times[0]
127 |
128 |
129 | def mute():
130 | """A initializer for multiprocessing.Pool to keep the processes quiet."""
131 | sys.stdout = open(os.devnull, 'w')
132 | sys.stderr = open(os.devnull, 'w')
133 |
--------------------------------------------------------------------------------
/wrfhydropy/core/model.py:
--------------------------------------------------------------------------------
1 | import json
2 | import os
3 | import pathlib
4 | import pickle
5 | import shlex
6 | import shutil
7 | import subprocess
8 | import uuid
9 | import warnings
10 |
11 | from .namelist import JSONNamelist
12 |
13 |
14 | def get_git_revision_hash(the_dir: str) -> str:
15 | """Get the last git revision hash from a directory if directory is a git repository
16 | Args:
17 | the_dir: String for the directory path
18 | Returns:
19 | String with the git hash if a git repo or message if not
20 | """
21 |
22 | the_dir = pathlib.Path(the_dir)
23 |
24 | # First test if this is even a git repo. (Have to allow for this unless the wrfhydropy
25 | # testing brings in the wrf_hydro_code as a repo with a .git file.)
26 | dir_is_repo = subprocess.run(["git", "branch"],
27 | stderr=subprocess.STDOUT,
28 | stdout=open(os.devnull, 'w'),
29 | cwd=str(the_dir.absolute()))
30 | if dir_is_repo.returncode != 0:
31 | return 'could_not_get_hash'
32 |
33 | dirty = subprocess.run(['git', 'diff-index', 'HEAD'], # --quiet seems to give the wrong result.
34 | stdout=subprocess.PIPE,
35 | stderr=subprocess.PIPE,
36 | cwd=str(the_dir.absolute())).returncode
37 | the_hash = subprocess.check_output(['git', 'rev-parse', 'HEAD'], cwd=str(the_dir.absolute()))
38 | the_hash = the_hash.decode('utf-8').split()[0]
39 | if dirty:
40 | the_hash += '--DIRTY--'
41 | return the_hash
42 |
43 |
44 | class Model(object):
45 | """Class for a WRF-Hydro model, which consitutes the model source code and compiled binary.
46 | """
47 |
48 | def __init__(
49 | self,
50 | source_dir: str,
51 | model_config: str,
52 | hydro_namelist_config_file: str=None,
53 | hrldas_namelist_config_file: str=None,
54 | compile_options_config_file: str=None,
55 | compiler: str = 'gfort',
56 | pre_compile_cmd: str = None,
57 | compile_options: dict = None
58 | ):
59 |
60 | """Instantiate a Model object.
61 | Args:
62 | source_dir: Directory containing the source code, e.g.
63 | 'wrf_hydro_nwm/src'.
64 | model_config: The configuration of the model. Used to match a model to a domain
65 | configuration. Must be a key in both the *_namelists.json of in the source directory
66 | and the *_namelist_patches.json in the domain directory.
67 | machine_spec: Optional dictionary of machine specification or string containing the
68 | name of a known machine. Known machine names include 'cheyenne'. For an
69 | example of a machine specification see the 'cheyenne' machine specification using
70 | wrfhydropy.get_machine_spec('cheyenne').
71 | hydro_namelist_config_file: Path to a hydro namelist config file external to the model
72 | repository. Default(None) implies using the model src/hydro_namelists.json.
73 | hrldas_namelist_config_file: As for hydro_namelist_config_file, but for hrldas namelist.
74 | compile_options_config_file: As for hydro_namelist_config_file, but for compile options.
75 | compiler: The compiler to use, must be one of 'pgi','gfort',
76 | 'ifort', or 'luna'.
77 | compile_options: Changes to default compile-time options.
78 | """
79 |
80 | # Instantiate all attributes and methods
81 | # Attributes set by init args
82 | self.source_dir = pathlib.Path(source_dir)
83 | """pathlib.Path: pathlib.Path object for source code directory."""
84 |
85 | self.model_config = model_config.lower()
86 | """str: Specified configuration for which the model is to be used, e.g. 'nwm_ana'"""
87 |
88 | self.compiler = compiler
89 | """str: The compiler chosen at compile time."""
90 |
91 | self.pre_compile_cmd = pre_compile_cmd
92 | """str: Command string to be executed prior to model compilation, e.g. to load modules"""
93 |
94 | self.compile_options = dict()
95 | """dict: Compile-time options. Defaults are loaded from json file stored with source
96 | code."""
97 |
98 | # Set nameilst config file defaults while allowing None to be passed.
99 | self.hydro_namelist_config_file = hydro_namelist_config_file
100 | """Namelist: Hydro namelist file specified for model config"""
101 | self.hrldas_namelist_config_file = hrldas_namelist_config_file
102 | """Namelist: HRLDAS namelist file specified for model config."""
103 | self.compile_options_config_file = compile_options_config_file
104 | """Namelist: Compile options file specified for model config."""
105 |
106 | default_hydro_namelist_config_file = 'hydro_namelists.json'
107 | default_hrldas_namelist_config_file = 'hrldas_namelists.json'
108 | default_compile_options_config_file = 'compile_options.json'
109 |
110 | if self.hydro_namelist_config_file is None:
111 | self.hydro_namelist_config_file = default_hydro_namelist_config_file
112 | if self.hrldas_namelist_config_file is None:
113 | self.hrldas_namelist_config_file = default_hrldas_namelist_config_file
114 | if self.compile_options_config_file is None:
115 | self.compile_options_config_file = default_compile_options_config_file
116 |
117 | # Load master namelists
118 | self.hydro_namelists = JSONNamelist(
119 | str(self.source_dir.joinpath(self.hydro_namelist_config_file))
120 | )
121 | """Namelist: Hydro namelist for specified model config"""
122 | self.hydro_namelists = self.hydro_namelists.get_config(self.model_config)
123 |
124 | self.hrldas_namelists = JSONNamelist(
125 | str(self.source_dir.joinpath(self.hrldas_namelist_config_file))
126 | )
127 | """Namelist: HRLDAS namelist for specified model config"""
128 | self.hrldas_namelists = self.hrldas_namelists.get_config(self.model_config)
129 |
130 | # Attributes set by other methods
131 | self.compile_dir = None
132 | """pathlib.Path: pathlib.Path object pointing to the compile directory."""
133 |
134 | self.git_hash = self._get_githash()
135 | """str: The git revision hash if seld.source_dir is a git repository"""
136 |
137 | self.version = None
138 | """str: Source code version from .version file stored with the source code."""
139 |
140 | self.compile_dir = None
141 | """pathlib.Path: pathlib.Path object pointing to the compile directory."""
142 |
143 | self.configure_log = None
144 | """CompletedProcess: The subprocess object generated at configure."""
145 |
146 | self.compile_log = None
147 | """CompletedProcess: The subprocess object generated at compile."""
148 |
149 | self.object_id = None
150 | """str: A unique id to join object to compile directory."""
151 |
152 | self.table_files = list()
153 | """list: pathlib.Paths to *.TBL files generated at compile-time."""
154 |
155 | self.wrf_hydro_exe = None
156 | """pathlib.Path: pathlib.Path to wrf_hydro.exe file generated at compile-time."""
157 |
158 | # Set attributes
159 | # Get code version
160 | with self.source_dir.joinpath('.version').open() as f:
161 | self.version = f.read()
162 |
163 | # Load compile options
164 | self.compile_options = JSONNamelist(
165 | str(self.source_dir.joinpath(self.compile_options_config_file))
166 | )
167 | """Namelist: Hydro namelist for specified model config"""
168 | self.compile_options = self.compile_options.get_config(self.model_config)
169 |
170 | # "compile_options" is the argument to __init__
171 | if compile_options is not None:
172 | self.compile_options.update(compile_options)
173 |
174 | # Add compiler and compile options as attributes and update if needed
175 | self.compiler = compiler
176 |
177 | def compile(self,
178 | compile_dir: pathlib.Path) -> str:
179 | """Compiles WRF-Hydro using specified compiler and compile options.
180 | Args:
181 | compile_dir: A non-existant directory to use for compilation.
182 | Returns:
183 | Success of compilation and compile directory used. Sets additional
184 | attributes to WrfHydroModel
185 | """
186 |
187 | self.compile_dir = pathlib.Path(compile_dir).absolute()
188 |
189 | self.modules = subprocess.run('module list', shell=True, stderr=subprocess.PIPE).stderr
190 |
191 | # check compile directory.
192 | if not self.compile_dir.is_dir():
193 | warnings.warn(str(self.compile_dir.absolute()) + ' directory does not exist, creating')
194 | self.compile_dir.mkdir(parents=True)
195 |
196 | # Remove run directory if it exists in the source_dir
197 | source_compile_dir = self.source_dir.joinpath('Run')
198 | if source_compile_dir.is_dir():
199 | shutil.rmtree(str(source_compile_dir.absolute()))
200 |
201 | # Get directory for setEnvar
202 | compile_options_file = self.source_dir.joinpath('compile_options.sh')
203 |
204 | # Write setEnvar file
205 | with compile_options_file.open(mode='w') as file:
206 | for option, value in self.compile_options.items():
207 | file.write("export {}={}\n".format(option, value))
208 |
209 | # Compile
210 | # Create compile command for machine spec
211 | compile_cmd = '/bin/bash -c "'
212 | if self.pre_compile_cmd is not None:
213 | compile_cmd += self.pre_compile_cmd + '; '
214 | compile_cmd += './configure ' + self.compiler + '; '
215 | compile_cmd += './compile_offline_NoahMP.sh '
216 | compile_cmd += str(compile_options_file.absolute())
217 | compile_cmd += '"'
218 | compile_cmd = shlex.split(compile_cmd)
219 |
220 | self.compile_log = subprocess.run(
221 | compile_cmd,
222 | stdout=subprocess.PIPE,
223 | stderr=subprocess.PIPE,
224 | cwd=str(self.source_dir.absolute())
225 | )
226 |
227 | # Add in unique ID file to match this object to prevent assosciating
228 | # this directory with another object
229 | self.object_id = str(uuid.uuid4())
230 |
231 | with self.compile_dir.joinpath('.uid').open(mode='w') as f:
232 | f.write(self.object_id)
233 |
234 | if self.compile_log.returncode == 0:
235 | # Open permissions on compiled files
236 | subprocess.run(['chmod', '-R', '755', str(self.source_dir.joinpath('Run'))])
237 |
238 | # Wrf hydro always puts files in source directory under a new directory called 'Run'
239 | # Copy files to the specified simulation directory if its not the same as the
240 | # source code directory
241 | if len(self.table_files) == 0:
242 | self.table_files = list(self.source_dir.joinpath('Run').glob('*.TBL'))
243 |
244 | shutil.copyfile(str(self.source_dir.joinpath('Run').joinpath('wrf_hydro.exe')),
245 | str(self.compile_dir.joinpath('wrf_hydro.exe')))
246 |
247 | # Remove old files
248 | # shutil.rmtree(str(self.source_dir.joinpath('Run')))
249 |
250 | # Open permissions on copied compiled files
251 | subprocess.run(['chmod', '-R', '755', str(self.compile_dir)])
252 |
253 | # Get file lists as attributes
254 | # Get list of table file paths
255 |
256 | # Get wrf_hydro.exe file path
257 | self.wrf_hydro_exe = self.compile_dir.joinpath('wrf_hydro.exe')
258 |
259 | # Save the object out to the compile directory
260 | with self.compile_dir.joinpath('WrfHydroModel.pkl').open(mode='wb') as f:
261 | pickle.dump(self, f, 2)
262 |
263 | print('Model successfully compiled into ' + str(self.compile_dir.absolute()))
264 | else:
265 | # Save the object out to the compile directory
266 | with self.compile_dir.joinpath('WrfHydroModel.pkl').open(mode='wb') as f:
267 | pickle.dump(self, f, 2)
268 | raise ValueError('Model did not successfully compile.' +
269 | self.compile_log.stderr.decode('utf-8'))
270 |
271 | def copy_files(self, dest_dir: str, symlink: bool = True):
272 | """Copy domain files to new directory
273 | Args:
274 | dest_dir: The destination directory for files
275 | symlink: Symlink files instead of copy
276 | """
277 |
278 | # Convert dir to pathlib.Path
279 | dest_dir = pathlib.Path(dest_dir)
280 |
281 | # Make directory if it does not exist.
282 | if not dest_dir.is_dir():
283 | dest_dir.mkdir(parents=True)
284 |
285 | # Symlink/copy in exe
286 | from_file = self.wrf_hydro_exe
287 | to_file = dest_dir.joinpath(from_file.name)
288 | if symlink:
289 | to_file.symlink_to(from_file)
290 | else:
291 | shutil.copy(str(from_file), str(to_file))
292 |
293 | def _get_githash(self) -> str:
294 | """Private method to get the git hash if source_dir is a git repository
295 | Returns:
296 | git hash string
297 | """
298 | return get_git_revision_hash(self.source_dir)
299 |
--------------------------------------------------------------------------------
/wrfhydropy/core/namelist.py:
--------------------------------------------------------------------------------
1 | import copy
2 | import deepdiff
3 | import f90nml
4 | import json
5 | from typing import Union
6 | import warnings
7 |
8 |
9 | def load_namelist(nml_path: str) -> dict:
10 | """Load a F90 namelist into a wrfhydropy.Namelist object
11 | Args:
12 | nml_path: String containing path to F90 namelist
13 | Returns:
14 | dict interpretation of namelist
15 | """
16 | nml_dict = Namelist(json.loads(json.dumps(f90nml.read(nml_path), sort_keys=True)))
17 | return nml_dict
18 |
19 |
20 | class JSONNamelist(object):
21 | """Class for a WRF-Hydro JSON namelist containing one more configurations"""
22 | def __init__(
23 | self,
24 | file_path: str):
25 | """Instantiate a Namelist object.
26 | Args:
27 | file_path: Path to the namelist file to open, can be a json or fortran90 namelist.
28 | """
29 | self._json_namelist = json.load(open(file_path, mode='r'))
30 | self.configs = self._json_namelist.keys()
31 |
32 | def get_config(self, config: str):
33 | """Get a namelist for a given configuration. This works internally by grabbing the base
34 | namelist and updating with the config-specific changes.
35 | Args:
36 | config: The configuration to retrieve
37 | """
38 |
39 | # This ifelse statement is to make the compile options files.
40 | # backwards-compatible. Should be left in through v2.1 (that makes sure v2.0 is covered).
41 | if 'base' in self._json_namelist.keys():
42 | base_namelist = copy.deepcopy(self._json_namelist['base'])
43 | config_patches = copy.deepcopy(self._json_namelist[config])
44 | # Update the base namelist with the config patches
45 | config_namelist = dict_merge(base_namelist, config_patches)
46 |
47 | else:
48 | # One can pass any "nwm_*" config to get the compile options.
49 | # if that specific config is not there, "nwm" config is used
50 | # for the compile options with a warning.
51 | if config not in self._json_namelist.keys():
52 | if 'nwm' in config and 'nwm' in self._json_namelist.keys():
53 | config = 'nwm'
54 | warnings.warn(
55 | "The compile configuration 'nwm' is inferred from the"
56 | " configuration passed: " + config)
57 | config_namelist = copy.deepcopy(self._json_namelist[config])
58 |
59 | return Namelist(config_namelist)
60 |
61 |
62 | class Namelist(dict):
63 | """Class for a WRF-Hydro namelist"""
64 |
65 | def write(self, path: str, mode='x'):
66 | """Write a namelist to file as a fortran-compatible namelist
67 | Args:
68 | path: The file path
69 | """
70 | with open(str(path), mode=mode) as nml_file:
71 | f90nml.write(self, nml_file)
72 |
73 | def patch(self, patch: dict):
74 | """Recursively patch a namelist with key values from another namelist
75 | Args:
76 | patch: A Namelist or dict object containing the patches
77 | """
78 | patched_namelist = dict_merge(copy.deepcopy(self),
79 | copy.deepcopy(patch))
80 | return patched_namelist
81 |
82 |
83 | def dict_merge(dct: dict, merge_dct: dict) -> dict:
84 | """ Recursive dict merge. Inspired by :meth:``dict.update()``, instead of
85 | updating only top-level keys, dict_merge recurses down into dicts nested
86 | to an arbitrary depth, updating keys. The ``merge_dct`` is merged into
87 | ``dct``.
88 | Args:
89 | dct: dict onto which the merge is executed
90 | merge_dct: dct merged into dct
91 | Returns:
92 | The merged dict
93 | """
94 |
95 | for key, value in merge_dct.items():
96 | if key in dct.keys() and type(value) is dict:
97 | dict_merge(dct[key], merge_dct[key])
98 | else:
99 | dct[key] = merge_dct[key]
100 |
101 | return(dct)
102 |
103 |
104 | def diff_namelist(
105 | old_namelist: Union[Namelist, str],
106 | new_namelist: Union[Namelist, str], **kwargs) -> dict:
107 | """Diff two Namelist objects or fortran 90 namelist files and return a dictionary of
108 | differences.
109 |
110 | Args:
111 | old_namelist: String containing path to the first namelist file, referred to as 'old' in
112 | outputs.
113 | new_namelist: String containing path to the second namelist file, referred to as 'new' in
114 | outputs.
115 | **kwargs: Additional arguments passed onto deepdiff.DeepDiff method
116 | Returns:
117 | The differences between the two namelists
118 | """
119 |
120 | # If supplied as strings try and read in from file path
121 | if type(old_namelist) == str:
122 | old_namelist = load_namelist(old_namelist)
123 | if type(new_namelist) == str:
124 | new_namelist = load_namelist(new_namelist)
125 |
126 | # Diff the namelists
127 | differences = deepdiff.DeepDiff(old_namelist, new_namelist, ignore_order=True, **kwargs)
128 | differences_dict = dict(differences)
129 | return (differences_dict)
130 |
--------------------------------------------------------------------------------
/wrfhydropy/core/schedulers.py:
--------------------------------------------------------------------------------
1 | # Note: All other imports for individual schedulers should be done in the respective scheduler
2 | # class functions so that imports can be isolated to relevant schedulers
3 |
4 | from abc import ABC, abstractmethod
5 |
6 |
7 | class Scheduler(ABC):
8 | def __init__(self):
9 | super().__init__()
10 |
11 | @abstractmethod
12 | def schedule(self, jobs):
13 | pass
14 |
15 |
16 | class PBSCheyenne(Scheduler):
17 |
18 | """A Scheduler object compatible with PBS on the NCAR Cheyenne system."""
19 | def __init__(
20 | self,
21 | account: str,
22 | nproc: int,
23 | nnodes: int,
24 | mem: int = None,
25 | ppn: int = None,
26 | queue: str = 'regular',
27 | walltime: str = "12:00:00",
28 | email_who: str = None,
29 | email_when: str = 'abe',
30 | custom: dict = {}
31 | ):
32 | """Initialize an PBSCheyenne object.
33 | Args:
34 | account: The account string
35 | nproc: Number of processors to request
36 | nnodes: Number of nodes to request
37 | ppn: Number of processors per node
38 | mem: Memory in GB usage/request on node (109 for fat nodes).
39 | email_who: Email address for PBS notifications
40 | email_when: PBS email frequency options. Options include 'a' for on abort,
41 | 'b' for before each job, and 'e' for after each job.
42 | queue: The queue to use, options are 'regular', 'premium', and 'shared'
43 | walltime: The wall clock time in HH:MM:SS format, max time is 12:00:00
44 | """
45 |
46 | # Declare attributes.
47 | # property construction
48 | self._sim_dir = None
49 | self._nproc = nproc
50 | self._nnodes = nnodes
51 | self._ppn = ppn
52 |
53 | # Scheduler options dict
54 | # TODO: Make this more elegant than hard coding for maintenance sake
55 | self.scheduler_opts = {
56 | 'account': account,
57 | 'email_when': email_when,
58 | 'email_who': email_who,
59 | 'queue': queue,
60 | 'walltime': walltime,
61 | 'mem': mem,
62 | 'custom': custom
63 | }
64 |
65 | def schedule(self, jobs: list):
66 | """Schedule one or more jobs using the scheduler scheduler
67 | Args:
68 | jobs: list of jobs to schedule
69 | """
70 | import subprocess
71 | import shlex
72 | import pathlib
73 | import os
74 |
75 | current_dir = pathlib.Path(os.curdir)
76 |
77 | # TODO: Find a way to protect the job order so that once someone executes schedule...
78 | # they can't change the order, may not be an issue except for if scheduling fails
79 | # somewhere
80 |
81 | self._write_job_pbs(jobs=jobs)
82 |
83 | # Make lists to store pbs scripts and pbs job ids to get previous dependency
84 | pbs_jids = []
85 | pbs_scripts = []
86 |
87 | qsub_str = "/bin/bash -c '"
88 | for job_num, option in enumerate(jobs):
89 |
90 | # This gets the pbs script name and pbs jid for submission
91 | # the obs jid is stored in a list so that the previous jid can be retrieved for
92 | # dependency
93 | job_id = jobs[job_num].job_id
94 | pbs_scripts.append(str(jobs[job_num].job_dir) + "/job_" + job_id + ".pbs")
95 | pbs_jids.append("job_" + job_id)
96 |
97 | # If first job, schedule using hold
98 | if job_num == 0:
99 | qsub_str += pbs_jids[job_num] + "=`qsub -h " + pbs_scripts[job_num] + "`;"
100 | # Else schedule using job dependency on previous pbs jid
101 | else:
102 | qsub_str += pbs_jids[job_num] + "=`qsub -W depend=afterok:${" + pbs_jids[
103 | job_num-1] + "} " + pbs_scripts[job_num] + "`;"
104 |
105 | qsub_str += "qrls ${" + pbs_jids[0] + "};"
106 | qsub_str += "'"
107 |
108 | # Just for debugging purposes
109 | print("qsub_str: ", qsub_str)
110 | # This stacks up dependent jobs in PBS in the same order as the job list
111 | subprocess.run(shlex.split(qsub_str),
112 | cwd=str(current_dir))
113 |
114 | def _write_job_pbs(self, jobs):
115 | """Private method to write bash PBS scripts for submitting each job """
116 | import copy
117 | import sys
118 |
119 | # Get the current pytohn executable to handle virtual environments in the scheduler
120 | python_path = sys.executable
121 |
122 | for job in jobs:
123 | # Copy the job because the exe cmd is edited below
124 | job = copy.deepcopy(job)
125 | custom = self.scheduler_opts['custom']
126 |
127 | # Write PBS script
128 | jobstr = ""
129 | jobstr += "#!/bin/sh\n"
130 | jobstr += "#PBS -N {0}\n".format(job.job_id)
131 | jobstr += "#PBS -A {0}\n".format(self.scheduler_opts['account'])
132 | jobstr += "#PBS -q {0}\n".format(self.scheduler_opts['queue'])
133 |
134 | if self.scheduler_opts['email_who'] is not None:
135 | jobstr += "#PBS -M {0}\n".format(self.scheduler_opts['email_who'])
136 | jobstr += "#PBS -m {0}\n".format(self.scheduler_opts['email_when'])
137 | jobstr += "\n"
138 |
139 | if '-l' not in custom or (
140 | '-l' in custom and 'walltime' not in custom['-l']):
141 | jobstr += "#PBS -l walltime={0}\n".format(self.scheduler_opts['walltime'])
142 |
143 | if '-l' not in custom or (
144 | '-l' in custom and 'select' not in custom['-l']):
145 | prcstr = "select={0}:ncpus={1}:mpiprocs={1}"
146 | prcstr = prcstr.format(self.nnodes, self.ppn)
147 | if self.scheduler_opts['mem'] is not None:
148 | prcstr = prcstr + ":mem={0}GB"
149 | prcstr = prcstr.format(self.scheduler_opts['mem'])
150 | prcstr = prcstr
151 | jobstr += "#PBS -l " + prcstr + "\n"
152 | jobstr += "\n"
153 |
154 | if '-l' in custom:
155 | jobstr += "#PBS -l " + custom['-l'] + "\n"
156 | jobstr += "\n"
157 |
158 | jobstr += "# Not using PBS standard error and out files to capture model output\n"
159 | jobstr += "# but these files might catch output and errors from the scheduler.\n"
160 | jobstr += "#PBS -o {0}\n".format(job.job_dir)
161 | jobstr += "#PBS -e {0}\n".format(job.job_dir)
162 | jobstr += "\n"
163 |
164 | # End PBS Header
165 |
166 | # if job.modules:
167 | # jobstr += 'module purge\n'
168 | # jobstr += 'module load {0}\n'.format(job.modules)
169 | # jobstr += "\n"
170 |
171 | jobstr += "# CISL suggests users set TMPDIR when running batch jobs on Cheyenne.\n"
172 | jobstr += "export TMPDIR=/glade/scratch/$USER/temp\n"
173 | jobstr += "mkdir -p $TMPDIR\n"
174 | jobstr += "\n"
175 |
176 | if self.scheduler_opts['queue'] == 'share':
177 | jobstr += "export MPI_USE_ARRAY=false\n"
178 |
179 | jobstr += "{0} run_job.py --job_id {1}\n".format(python_path, job.job_id)
180 | jobstr += "exit $?\n"
181 |
182 | pbs_file = job.job_dir.joinpath("job_" + job.job_id + ".pbs")
183 | with pbs_file.open(mode='w') as f:
184 | f.write(jobstr)
185 |
186 | # Write the python run script for the job
187 | if '{nproc}' in job._exe_cmd:
188 | # If the job exe uses "nproc" then apply the schedulers value.
189 | job._exe_cmd = job._exe_cmd.format(**{'nproc': self.nproc})
190 | else:
191 | # regression tests use "{0}" format, try that here too
192 | job._exe_cmd = job._exe_cmd.format(self.nproc)
193 |
194 | job._write_run_script()
195 |
196 | def _solve_nodes_cores(self):
197 | """Private method to solve the number of nodes and cores if not all three specified"""
198 |
199 | import math
200 |
201 | if not self._nproc and self._nnodes and self._ppn:
202 | self._nproc = self._nnodes * self._ppn
203 | if not self._nnodes and self._nproc and self._ppn:
204 | self._nnodes = math.ceil(self._nproc / self._ppn)
205 | if not self._ppn and self._nnodes and self._nproc:
206 | self._ppn = math.ceil(self._nproc / self._nnodes)
207 |
208 | if None in [self._nproc, self._nnodes, self._ppn]:
209 | raise ValueError("Not enough information to solve all of nproc, nnodes, ppn.")
210 |
211 | @property
212 | def nproc(self):
213 | self._solve_nodes_cores()
214 | return self._nproc
215 |
216 | @nproc.setter
217 | def nproc(self, value):
218 | self._nproc = value
219 |
220 | @property
221 | def nnodes(self):
222 | self._solve_nodes_cores()
223 | return self._nnodes
224 |
225 | @nnodes.setter
226 | def nnodes(self, value):
227 | self._nnodes = value
228 |
229 | @property
230 | def ppn(self):
231 | self._solve_nodes_cores()
232 | return self._ppn
233 |
234 | @ppn.setter
235 | def ppn(self, value):
236 | self._ppn = value
237 |
--------------------------------------------------------------------------------
/wrfhydropy/core/teams.py:
--------------------------------------------------------------------------------
1 | import collections
2 | import itertools
3 | import math
4 | import operator
5 | import os
6 | import pathlib
7 | import pickle
8 | from pprint import pprint
9 | import wrfhydropy
10 |
11 |
12 | def set_cycle_ens_sim_jobs(ens_obj, job):
13 | members = ens_obj.members
14 | for mem in members:
15 | # Currently these are always strings, never in memory.
16 | if isinstance(mem, str):
17 | pkl_file = ens_obj._compose_dir / (mem + '/WrfHydroSim.pkl')
18 | sim = pickle.load(pkl_file.open('rb'))
19 | sim.jobs[0]._entry_cmd = job._entry_cmd
20 | sim.jobs[0]._exe_cmd = job._exe_cmd
21 | sim.jobs[0]._exit_cmd = job._exit_cmd
22 | sim.pickle(pkl_file)
23 |
24 |
25 | def get_cycle_ens_sim_job_exits(cycle_obj):
26 | members = cycle_obj.members
27 | statuses = {}
28 | for mem in members:
29 | pkl_file = cycle_obj._compose_dir / (mem + '/WrfHydroSim.pkl')
30 | sim = pickle.load(pkl_file.open('rb'))
31 | statuses.update({pkl_file: sim.jobs[0].exit_status})
32 | success = all([value == 0 for key, value in statuses.items()])
33 | if success:
34 | return 0
35 | else:
36 | return 1
37 |
38 |
39 | def parallel_teams_run(arg_dict):
40 | """
41 | Parallelizable function to run simulations across nodes.
42 | On the master node, python runs multiprocessing. Each separate process
43 | is a "team" of simulations to run. Multiprocessing makes MPI calls with
44 | a specific syntax to run the MPI executable on specfific (potentially
45 | other) nodes. This provides 2 levels of parallelism.
46 |
47 | This function is called (in parallel) once for each team by
48 | multiprocessing. Each team runs its set of simulations sequentially but
49 | each simulation it runs is parallel via MPI. (In the case of
50 | ensemble-cycles each team runs an ensemble but the ensemble runs its
51 | members sequentially.)
52 |
53 | Input:
54 | arg_dict:
55 | arg_dict == {
56 | 'obj_name' : string, either "member" or "cast" (or some other
57 | object), matches the object name used in the
58 | team_dict below (first argument)
59 | 'compose_dir': ,
62 | 'team_dict' :
64 | }
65 | where:
66 | team_dict == {
67 | object_name: key/name is either 'members' or 'casts', the value
68 | is a
70 | 'nodes' : ,
72 | 'exe_cmd' : ,
74 | 'env' :
76 | }
77 |
78 | The 'exe_cmd' is a form of invocation for the distribution of MPI to be
79 | used. For openmpi, for example for OpenMPI, this is
80 | exe_cmd: 'mpirun --host {nodelist} -np {nproc} {cmd}'
81 | The variables in brackets are expanded by internal variables. The
82 | 'exe_cmd' command substitutes the wrfhydropy of 'wrf_hydro.exe'
83 | convention for {cmd}.
84 | The {nproc} argument is the length of the list passed in the nodes
85 | argument, and the {nodellist} are the comma separated arguments in that
86 | list.
87 |
88 | The "entry_cmd" and "exit_cmd" ARE TAKEN FROM THE JOB object.
89 | 1) can be semicolon-separated commands
90 | 2) where these are run depends on MPI. OpenMPI, for example, handles
91 | these on the same processor set as the model runs.
92 |
93 | Notes:
94 | Currently this is working/tested with openmpi and intel mpi.
95 | MPT requires MPI_SHEPERD env variable and it's performance is not
96 | satisfactory so far.
97 | """
98 |
99 | obj_name = arg_dict['obj_name']
100 | compose_dir = arg_dict['compose_dir']
101 | team_dict = arg_dict['team_dict']
102 |
103 | exit_statuses = {}
104 | for obj in team_dict[obj_name]:
105 | if type(obj) is str:
106 | os.chdir(str(pathlib.Path(compose_dir) / obj))
107 | else:
108 | os.chdir(str(pathlib.Path(compose_dir) / obj.run_dir))
109 |
110 | # The cycle ensemble has an extra level of ensemble between the casts and the sims.
111 | # An ensemble and a non-ensemble-cycle have sim objects at this level
112 | have_cycle_ens = False
113 | object_pkl_file = pathlib.Path("WrfHydroSim.pkl")
114 | if not object_pkl_file.exists():
115 | # But a cycle ensemble will have ensembles at this level....
116 | have_cycle_ens = True
117 | object_pkl_file = pathlib.Path("WrfHydroEns.pkl")
118 | if not object_pkl_file.exists():
119 | raise FileNotFoundError(
120 | "No appropriate pickle object for running " + obj_name + ".")
121 |
122 | object_pkl = pickle.load(open(object_pkl_file, "rb"))
123 | job = object_pkl.jobs[0]
124 |
125 | if job._entry_cmd is not None:
126 | entry_cmds = job._entry_cmd.split(';')
127 | new_entry_cmd = []
128 | for cmd in entry_cmds:
129 | if 'mpirun' not in cmd:
130 | new_entry_cmd.append(
131 | # Switch out the ./wrf_hydro.exe cmd with each command.
132 | team_dict['exe_cmd'].format(
133 | **{
134 | 'cmd': cmd,
135 | 'nodelist': team_dict['nodes'][0], # only use one task
136 | 'nproc': 1
137 | }
138 | )
139 | )
140 | else:
141 | new_entry_cmd.append(cmd)
142 | job._entry_cmd = '; '.join(new_entry_cmd)
143 |
144 | if job._exit_cmd is not None:
145 | exit_cmds = job._exit_cmd.split(';')
146 | new_exit_cmd = []
147 | for cmd in exit_cmds:
148 | if 'mpirun' not in cmd:
149 | new_exit_cmd.append(
150 | # Switch out the ./wrf_hydro.exe cmd with each command.
151 | team_dict['exe_cmd'].format(
152 | **{
153 | 'cmd': cmd,
154 | 'nodelist': team_dict['nodes'][0], # only use one task
155 | 'nproc': 1
156 | }
157 | )
158 | )
159 | else:
160 | new_exit_cmd.append(cmd)
161 | job._exit_cmd = '; '.join(new_exit_cmd)
162 |
163 | job._exe_cmd = team_dict['exe_cmd'].format(
164 | **{
165 | 'cmd': './wrf_hydro.exe',
166 | 'nodelist': ','.join(team_dict['nodes']),
167 | 'nproc': len(team_dict['nodes'])
168 | }
169 | )
170 |
171 | # This will write the cmd to be executed into the member dir.
172 | # with open('team_run_cmd', 'w') as opened_file:
173 | # opened_file.write(job._exe_cmd)
174 |
175 | object_pkl.pickle(object_pkl_file)
176 | if have_cycle_ens:
177 | # An ensemble-cycle neeeds the job components set on the simulations.
178 | # This object is acutally an ensemble...
179 | set_cycle_ens_sim_jobs(object_pkl, job)
180 |
181 | object_pkl.run(env=team_dict['env'])
182 |
183 | if have_cycle_ens:
184 | # An ensemble-cycle neeeds the job components set on the simulations.
185 | exit_status = get_cycle_ens_sim_job_exits(object_pkl)
186 | else:
187 | exit_status = object_pkl.jobs[0].exit_status
188 |
189 | exit_statuses.update({obj: exit_status})
190 | return exit_statuses
191 |
192 |
193 | def assign_teams(
194 | obj,
195 | teams_exe_cmd: str,
196 | teams_exe_cmd_nproc: int,
197 | teams_node_file: dict = None,
198 | scheduler: str = 'pbs',
199 | env: dict = None
200 | ) -> dict:
201 | """
202 | Assign teams for parallel runs across nodes.
203 | Inputs:
204 | obj: The ensemble or cycle object, containin lists of members or casts
205 | to be run.
206 | teams_exe_cmd: str, The mpi-specific syntax needed. For example
207 | 'mpirun --host {nodelist} -np {nproc} {cmd}'
208 | teams_exe_cmd_nproc: int, The number of cores per model/wrf_hydro
209 | simulation to be run.
210 | teams_node_file: [str, pathlib.Path] = None,
211 | Optional file that acts like a node file.
212 | It is not currently implemented but the key specifies the scheduler
213 | format that the file follows. An example pbs node file is in
214 | tests/data and this argument is used here to test without a sched.
215 | env: dict = None, optional envionment to pass to the run.
216 | Outputs:
217 | dict: the teams_dict to be used by parallel_teams_run. See requirements
218 | above.
219 | """
220 | if 'casts' in dir(obj):
221 | object_list = obj.casts
222 | object_name = 'casts'
223 | elif 'members' in dir(obj):
224 | object_list = obj.members
225 | object_name = 'members'
226 |
227 | n_runs = len(object_list)
228 |
229 | if scheduler is 'pbs':
230 |
231 | if teams_node_file is None:
232 | teams_node_file = os.environ.get('PBS_NODEFILE')
233 |
234 | pbs_nodes = []
235 | # TODO: comment the target format here.
236 | with open(teams_node_file, 'r') as infile:
237 | for line in infile:
238 | pbs_nodes.append(line.rstrip())
239 |
240 | n_total_processors = len(pbs_nodes) # less may be used.
241 | n_teams = min(math.floor(len(pbs_nodes) / teams_exe_cmd_nproc), n_runs)
242 | pbs_nodes_counts = dict(collections.Counter(pbs_nodes))
243 | if n_teams == 0:
244 | raise ValueError("teams_exe_cmd_nproc > total number of cores available")
245 | if (n_teams > 1 and
246 | any([ teams_exe_cmd_nproc > val for val in pbs_nodes_counts.values()])):
247 | raise ValueError("teams_exe_cmd_nproc > number of cores/node: "
248 | 'teams does not currently function in this capacity.')
249 |
250 | # Map the objects on to the teams (this seems overly complicated, should prob
251 | # consider using pandas:
252 | teams_dict = {}
253 |
254 | # If the cast/ensemble is still in memory, this looks different.
255 | if isinstance(object_list[0], wrfhydropy.Simulation):
256 | object_dirs = [oo.run_dir for oo in object_list]
257 | else:
258 | object_dirs = object_list
259 |
260 | object_teams = [the_object % n_teams for the_object in range(n_runs)]
261 | object_team_seq = [[dir, team] for dir, team in zip(object_dirs, object_teams)]
262 | object_team_seq.sort(key=operator.itemgetter(1))
263 | team_groups = itertools.groupby(object_team_seq, operator.itemgetter(1))
264 | team_objects = [[item[0] for item in data] for (key, data) in team_groups]
265 |
266 | # Map the nodes on to the teams
267 | # Homogonization step here to avoid communication across nodes...
268 | # Sorting necessary for testing.
269 | unique_nodes = sorted([node for node in list(set(pbs_nodes))])
270 | print("\n*** Team " + object_name + ' ***')
271 | print("Running on nodes: " + ', '.join(unique_nodes))
272 | del pbs_nodes
273 | pbs_nodes = []
274 |
275 | # This is a proposal for cross-node execution setup that seems to work
276 | # but it crashes.
277 | # if any([ teams_exe_cmd_nproc > val for val in pbs_nodes_counts.values()]):
278 | # pbs_nodes_avail = [ nn.split('.')[0] for nn in pbs_nodes_in]
279 | # # copy.deepcopy(pbs_nodes_in)
280 | # for i_team in range(n_teams):
281 | # the_team_nodes = []
282 | # for ii in range(teams_exe_cmd_nproc):
283 | # the_team_nodes += [pbs_nodes_avail.pop(0)]
284 | # pbs_nodes += [the_team_nodes]
285 | # team_nodes = pbs_nodes
286 | # else:
287 |
288 | for i_team in range(n_teams):
289 | pbs_nodes = pbs_nodes + (
290 | [unique_nodes[i_team % len(unique_nodes)]] * teams_exe_cmd_nproc)
291 | node_teams = [the_node // teams_exe_cmd_nproc for the_node in range(len(pbs_nodes))]
292 | node_team_seq = [[node, team] for node, team in zip(pbs_nodes, node_teams)]
293 |
294 | node_team_seq.sort(key=operator.itemgetter(1))
295 | team_groups = itertools.groupby(node_team_seq, operator.itemgetter(1))
296 | team_nodes = [[item[0] for item in data] for (key, data) in team_groups]
297 | # End else
298 |
299 | # Get the entry and exit commands from the job on the first cast/member
300 | # Foolery for in/out of memory
301 | if isinstance(object_list[0], str):
302 | # An ensemble and a non-ensemble-cycle have sim objects at this level
303 | pkl_file = obj._compose_dir / (object_list[0] + '/WrfHydroSim.pkl')
304 | if not pkl_file.exists():
305 | # But a cycle ensemble will have ensembles at this level....
306 | pkl_file = obj._compose_dir / (object_list[0] + '/WrfHydroEns.pkl')
307 | if not pkl_file.exists():
308 | raise FileNotFoundError(
309 | "No appropriate pickle object for running " + object_name + ".")
310 | jobs = pickle.load(pkl_file.open('rb')).jobs
311 | else:
312 | jobs = object_list[0].jobs
313 | if len(jobs) > 1:
314 | raise ValueError('Teams runs only support single job simulations')
315 | entry_cmd = jobs[0]._entry_cmd
316 | exit_cmd = jobs[0]._entry_cmd
317 |
318 | # Assign teams!
319 | for team in range(n_teams):
320 | teams_dict.update({
321 | team: {
322 | object_name: team_objects[team],
323 | 'nodes': team_nodes[team],
324 | 'entry_cmd': entry_cmd,
325 | 'exit_cmd': exit_cmd,
326 | 'exe_cmd': teams_exe_cmd,
327 | 'env': env
328 | }
329 | })
330 |
331 | print('\nPBS_NODE_FILE present: ')
332 | print(' ' + str(len(unique_nodes)) + ' nodes with')
333 | print(' ' + str(n_total_processors) + ' TOTAL processors requested.')
334 |
335 | print('\nTeams parallelization:')
336 | print(' ' + str(n_runs) + ' total ' + object_name)
337 | print(' ' + str(n_teams) + ' concurrent teams using')
338 | print(' ' + str(teams_exe_cmd_nproc) + ' processors each.')
339 |
340 | print('\nTeams dict:')
341 | pprint(teams_dict)
342 | print('\n')
343 |
344 | return teams_dict
345 |
--------------------------------------------------------------------------------
/wrfhydropy/data/flood_thresholds_to_nc_w_qc.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pandas as pd
3 | import pathlib
4 | import wrfhydropy
5 | import xarray as xr
6 |
7 | wrf_hydro_py_dir = pathlib.Path(wrfhydropy.__file__).parent
8 | thresh_file = wrf_hydro_py_dir / 'data/flood_thresholds.txt'
9 |
10 | # -------------------------------------------------------
11 | # Load the text file
12 | thresh_df = pd.read_table(
13 | thresh_file,
14 | sep=' ',
15 | na_values='NA',
16 | dtype={'site_no': 'str'})
17 |
18 | # Pretty up
19 | # move "site_no" to "gage"
20 | thresh_df = thresh_df.reset_index().rename(columns={'site_no': 'gage'}).drop(columns='index')
21 |
22 |
23 | # -------------------------------------------------------
24 | # QC
25 | # 1. Duplicates feature_ids dropped.
26 | dup_feats = thresh_df[thresh_df.duplicated(subset='feature_id')].feature_id.to_list()
27 | # 3 duplicated features...
28 | thresh_df[thresh_df['feature_id'].isin(dup_feats)].sort_values(by='feature_id')
29 | # For now, just drop the duplicated. This serendipitiously selects the
30 | # rows I would manually choose.
31 | thresh_df = thresh_df.drop_duplicates(subset="feature_id")
32 |
33 | # 2. No duplicated gages
34 | dup_gages = thresh_df[thresh_df.duplicated(subset='gage')].gage.to_list()
35 |
36 | # 3. There are 28 positive longitudes (31 before removing the 3 duplicated features)
37 | (thresh_df[thresh_df['lon'] > 0])
38 | thresh_df.loc[thresh_df['lon'] > 0, 'lon'] = -1 * abs(thresh_df.loc[thresh_df['lon'] > 0, 'lon'])
39 | # Not removing... for now.
40 |
41 | # 4. There are no negative latitudes
42 | len(thresh_df[thresh_df['lat'] < 0])
43 |
44 | # 5. Check the consistency of the various levels.
45 | # Have basically found that "record" is a wild card... or at least I dont understand it.
46 | ge_dict = {
47 | 'minor': {'action'},
48 | 'moderate': {'minor', 'action'},
49 | 'major': {'moderate', 'minor', 'action'},
50 | # 'record': {'major', 'moderate', 'minor', 'action'}
51 | }
52 |
53 |
54 | def check_thresh_orders(row):
55 | errors = []
56 | for var in ['stage', 'flow']:
57 | for thresh, thresh_below in ge_dict.items():
58 | var_thresh = thresh + '_' + var
59 | for below in thresh_below:
60 | var_thresh_below = below + '_' + var
61 | if np.isnan(row[var_thresh_below]) or np.isnan(row[var_thresh]):
62 | continue
63 | if row[var_thresh_below] > row[var_thresh]:
64 | errors += [var_thresh_below + ' > ' + var_thresh]
65 | if errors == []:
66 | return(None)
67 | else:
68 | return(errors)
69 |
70 |
71 | results = {}
72 | for gage, row in thresh_df.iterrows():
73 | results[gage] = check_thresh_orders(row)
74 |
75 | # remove the good=none results
76 | results2 = {gage: result for gage, result in results.items() if result is not None}
77 |
78 | # Only two gages with this contradiction
79 | funky_gages = list(results2.keys())
80 | funky_ones = thresh_df[thresh_df.index.isin(funky_gages)].sort_values(by='feature_id')
81 |
82 | with pd.option_context('display.max_rows', None, 'display.max_columns', None):
83 | print(funky_ones)
84 |
85 | # Just set the conflicting thresholds to none by hand!
86 | thresh_df.loc[thresh_df.gage == '07159750', 'action_stage'] = np.NaN
87 | thresh_df.loc[thresh_df.gage == '11156500', 'action_flow'] = np.NaN
88 |
89 | funky_ones = thresh_df[thresh_df.index.isin(funky_gages)].sort_values(by='feature_id')
90 | with pd.option_context('display.max_rows', None, 'display.max_columns', None):
91 | print(funky_ones)
92 |
93 | # QC Done
94 |
95 | # -------------------------------------------------------
96 | # Write it out
97 |
98 | thresh_ds_write = thresh_df.set_index('gage').to_xarray()
99 |
100 | # Convert cfs to cms
101 | cfs_to_cms = 0.0280
102 | thresh_flows = ['action_flow', 'minor_flow', 'moderate_flow', 'major_flow', 'record_flow']
103 | for col in thresh_flows:
104 | thresh_ds_write[col] = thresh_ds_write[col] * cfs_to_cms
105 | thresh_ds_write[col].attrs['units'] = 'm^3/s'
106 | thresh_ds_write[col].encoding = {'dtype': 'float32'}
107 |
108 | # Convert to m
109 | ft_to_m = 0.3048
110 | thresh_stages = ['action_stage', 'minor_stage', 'moderate_stage', 'major_stage', 'record_stage']
111 | for col in thresh_stages:
112 | thresh_ds_write[col] = thresh_ds_write[col] * ft_to_m
113 | thresh_ds_write[col].attrs['units'] = 'meters'
114 | thresh_ds_write[col].encoding = {'dtype': 'float32'}
115 |
116 | # Save this to a netcdf file.
117 | thresh_nc_file = wrf_hydro_py_dir / 'data/flood_thresholds_metric_units.nc'
118 | thresh_ds_write.to_netcdf(thresh_nc_file)
119 |
--------------------------------------------------------------------------------
/wrfhydropy/tests/.coveragerc:
--------------------------------------------------------------------------------
1 | ../../.coveragerc
--------------------------------------------------------------------------------
/wrfhydropy/tests/.gitignore:
--------------------------------------------------------------------------------
1 | coverage_html/
--------------------------------------------------------------------------------
/wrfhydropy/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NCAR/wrf_hydro_py/003f73f7efb2d38114794ae14daac702e59edc24/wrfhydropy/tests/__init__.py
--------------------------------------------------------------------------------
/wrfhydropy/tests/conftest.py:
--------------------------------------------------------------------------------
1 | import json
2 | import pathlib
3 | import subprocess
4 | import numpy as np
5 | import pandas as pd
6 | import pytest
7 | import xarray as xr
8 |
9 | from wrfhydropy.core.domain import Domain
10 | from wrfhydropy.core.job import Job
11 | from wrfhydropy.core.model import Model
12 | from wrfhydropy.core.schedulers import PBSCheyenne
13 |
14 |
15 | @pytest.fixture(scope='function')
16 | def ds_1d():
17 | # Create a dummy dataset
18 | vals_1d = np.random.randn(3)
19 | time = pd.to_datetime('1984-10-14')
20 | location = ['loc1', 'loc2', 'loc3']
21 |
22 | ds_1d = xr.Dataset({'var1': (('location'), vals_1d)},
23 | {'Time': time, 'location': location})
24 | ds_1d.var1.encoding['_FillValue'] = False
25 |
26 | return ds_1d
27 |
28 |
29 | @pytest.fixture(scope='function')
30 | def ds_1d_has_nans():
31 | # Create a dummy dataset
32 | vals_1d = np.random.randn(3)
33 | time = pd.to_datetime('1984-10-14')
34 | location = ['loc1', 'loc2', 'loc3']
35 |
36 | ds_1d = xr.Dataset({'var1': (('location'), vals_1d)},
37 | {'Time': time, 'location': location})
38 |
39 | return ds_1d
40 |
41 |
42 | @pytest.fixture(scope='function')
43 | def ds_2d():
44 | x = [10, 11, 12]
45 | y = [101, 102, 103]
46 | vals_2d = np.random.randn(3, 3)
47 |
48 | time = pd.to_datetime('1984-10-14')
49 |
50 | ds_2d = xr.Dataset(
51 | {'var1': (('x', 'y'), vals_2d)},
52 | {'Time': time, 'x': x, 'y': y})
53 | ds_2d.var1.encoding['_FillValue'] = False
54 |
55 | return ds_2d
56 |
57 |
58 | @pytest.fixture(scope='function')
59 | def ds_timeseries():
60 | # Create a dummy dataset
61 | vals_ts = np.random.randn(3, 3)
62 | time = pd.to_datetime(['1984-10-14 00:00:00', '1984-10-14 01:00:00', '1984-10-14 02:00:00'])
63 | location = ['loc1', 'loc2', 'loc3']
64 |
65 | ds_ts = xr.Dataset({'var1': (('location', 'Time'), vals_ts)},
66 | {'Time': time,
67 | 'location': location})
68 |
69 | return ds_ts
70 |
71 |
72 | @pytest.fixture(scope='function')
73 | def domain_dir(tmpdir, ds_1d):
74 | domain_top_dir_path = pathlib.Path(tmpdir).joinpath('example_case')
75 | domain_dir_path = domain_top_dir_path.joinpath('NWM/DOMAIN')
76 | restart_dir_path = domain_top_dir_path.joinpath('NWM/RESTART')
77 | forcing_dir_path = domain_top_dir_path.joinpath('FORCING')
78 |
79 | domain_top_dir_path.mkdir(parents=True)
80 | domain_dir_path.mkdir(parents=True)
81 | restart_dir_path.mkdir(parents=True)
82 | forcing_dir_path.mkdir(parents=True)
83 |
84 | # Make a list of DOMAIN filenames to create
85 | domain_file_names = ['Fulldom_hires.nc',
86 | 'Route_Link.nc',
87 | 'soil_properties.nc',
88 | 'GEOGRID_LDASOUT_Spatial_Metadata.nc',
89 | 'geo_em.d01.nc',
90 | 'spatialweights.nc',
91 | 'GWBUCKPARM.nc',
92 | 'hydro2dtbl.nc',
93 | 'wrfinput_d01.nc',
94 | 'LAKEPARM.nc',
95 | 'nudgingParams.nc']
96 | for file in domain_file_names:
97 | file_path = domain_dir_path.joinpath(file)
98 | ds_1d.to_netcdf(str(file_path))
99 |
100 | # Make restart files
101 | restart_file_names = ['HYDRO_RST.2011-08-26_00:00_DOMAIN1',
102 | 'nudgingLastObs.2011-08-26_00:00:00.nc',
103 | 'RESTART.2011082600_DOMAIN1']
104 |
105 | for file in restart_file_names:
106 | file_path = restart_dir_path.joinpath(file)
107 | ds_1d.to_netcdf(str(file_path))
108 |
109 | # Make forcing files
110 | forcing_file_names = ['2011082600.LDASIN_DOMAIN1',
111 | '2011082601.LDASIN_DOMAIN1',
112 | '2011082602.LDASIN_DOMAIN1']
113 |
114 | for file in forcing_file_names:
115 | file_path = forcing_dir_path.joinpath(file)
116 | ds_1d.to_netcdf(str(file_path))
117 |
118 | # Make namelist patch files
119 | hrldas_namelist = {
120 | "base": {
121 | "noahlsm_offline": {
122 | "hrldas_setup_file": "./NWM/DOMAIN/wrfinput_d01.nc",
123 | "restart_filename_requested": "./NWM/RESTART/RESTART.2011082600_DOMAIN1",
124 | "indir": "./FORCING",
125 | },
126 | "wrf_hydro_offline": {
127 | "forc_typ": 1
128 | }
129 | },
130 | "nwm_ana": {
131 | "noahlsm_offline": {},
132 | "wrf_hydro_offline": {
133 | "forc_typ": 1
134 | }
135 | }
136 | }
137 |
138 | hydro_namelist = {
139 | "base": {
140 | "hydro_nlist": {
141 | "geo_static_flnm": "./NWM/DOMAIN/geo_em.d01.nc",
142 | "restart_file": "./NWM/RESTART/HYDRO_RST.2011-08-26_00:00_DOMAIN1",
143 | "aggfactrt": 4,
144 | "udmp_opt": 1,
145 | },
146 | "nudging_nlist": {
147 | "nudginglastobsfile": "./NWM/RESTART/nudgingLastObs.2011-08-26_00:00:00.nc"
148 | }
149 | },
150 |
151 | "nwm_ana": {
152 | "hydro_nlist": {},
153 | "nudging_nlist": {}
154 | }
155 | }
156 |
157 | json.dump(
158 | hrldas_namelist,
159 | domain_top_dir_path.joinpath('hrldas_namelist_patches.json').open('w')
160 | )
161 |
162 | json.dump(
163 | hydro_namelist,
164 | domain_top_dir_path.joinpath('hydro_namelist_patches.json').open('w')
165 | )
166 |
167 | return domain_top_dir_path
168 |
169 |
170 | @pytest.fixture(scope='function')
171 | def model_dir(tmpdir):
172 |
173 | model_dir_path = pathlib.Path(tmpdir).joinpath('wrf_hydro_nwm_public/src')
174 | model_dir_path.mkdir(parents=True)
175 |
176 | # Make namelist files
177 | hrldas_namelist = {
178 | "base": {
179 | "noahlsm_offline": {
180 | "btr_option": 1,
181 | "canopy_stomatal_resistance_option": 1,
182 | 'restart_frequency_hours': 24,
183 | 'output_timestep': 86400
184 | },
185 | "wrf_hydro_offline": {
186 | "forc_typ": "NULL_specified_in_domain.json"
187 | }
188 | },
189 | "nwm_ana": {
190 | "noahlsm_offline": {},
191 | "wrf_hydro_offline": {}
192 | }
193 | }
194 |
195 | hydro_namelist = {
196 | "base": {
197 | "hydro_nlist": {
198 | "channel_option": 2,
199 | "chanobs_domain": 0,
200 | "chanrtswcrt": 1,
201 | "chrtout_domain": 1,
202 | 'rst_dt': 1440,
203 | 'out_dt': 1440
204 | },
205 | "nudging_nlist": {
206 | "maxagepairsbiaspersist": 3,
207 | "minnumpairsbiaspersist": 1,
208 | }
209 | },
210 |
211 | "nwm_ana": {
212 | "hydro_nlist": {},
213 | "nudging_nlist": {}
214 | }
215 | }
216 |
217 | json.dump(
218 | hrldas_namelist,
219 | model_dir_path.joinpath('hrldas_namelists.json').open('w')
220 | )
221 |
222 | json.dump(
223 | hydro_namelist,
224 | model_dir_path.joinpath('hydro_namelists.json').open('w')
225 | )
226 |
227 | compile_options = {
228 | "nwm": {
229 | "WRF_HYDRO": 1,
230 | "HYDRO_D": 0,
231 | "SPATIAL_SOIL": 1,
232 | "WRF_HYDRO_RAPID": 0,
233 | "WRFIO_NCD_LARGE_FILE_SUPPORT": 1,
234 | "NCEP_WCOSS": 0,
235 | "WRF_HYDRO_NUDGING": 1
236 | }
237 | }
238 |
239 | json.dump(
240 | compile_options,
241 | model_dir_path.joinpath('compile_options.json').open('w')
242 | )
243 |
244 | with model_dir_path.joinpath('.version').open('w') as f:
245 | f.write('v5.1.0')
246 |
247 | with model_dir_path.joinpath('configure').open('w') as f:
248 | f.write('# dummy configure \n')
249 |
250 | # Arugments passed to wrf_hydro.exe are echoed to diag_hydro.00000.
251 | dummy_compile = (
252 | "#!/bin/bash \n"
253 | "# dummy compile \n"
254 | "mkdir Run \n"
255 | "echo '#!/bin/bash \n"
256 | "echo $@ > diag_hydro.00000\n"
257 | "echo \'The model finished successfully.......\' >> diag_hydro.00000\n"
258 | "exit 0' > Run/wrf_hydro.exe\n"
259 | "touch Run/DUMMY.TBL \n"
260 | )
261 | with model_dir_path.joinpath('./compile_offline_NoahMP.sh').open('w') as f:
262 | f.write(dummy_compile)
263 |
264 | subprocess.run(['chmod', '-R', '755', str(model_dir_path)])
265 |
266 | return model_dir_path
267 |
268 |
269 | @pytest.fixture(scope='function')
270 | def compile_dir(tmpdir):
271 | compile_dir = pathlib.Path(tmpdir).joinpath('compile_dir')
272 | compile_dir.mkdir(parents=True)
273 |
274 | # Set table files and exe file attributes
275 | table_files = [compile_dir.joinpath('file1.tbl'), compile_dir.joinpath('file2.tbl')]
276 | wrf_hydro_exe = compile_dir.joinpath('wrf_hydro.exe')
277 |
278 | # Make fake run directory with files that would have been produced at compile
279 | with wrf_hydro_exe.open('w') as f:
280 | f.write('#dummy exe file')
281 |
282 | for file in table_files:
283 | with file.open('w') as f:
284 | f.write('#dummy table file')
285 |
286 | return compile_dir
287 |
288 |
289 | @pytest.fixture(scope='function')
290 | def model(model_dir):
291 | model = Model(
292 | source_dir=model_dir,
293 | model_config='nwm_ana'
294 | )
295 | return model
296 |
297 |
298 | @pytest.fixture(scope='function')
299 | def domain(domain_dir):
300 | domain = Domain(
301 | domain_top_dir=domain_dir,
302 | domain_config='nwm_ana',
303 | compatible_version='v5.1.0'
304 | )
305 | return domain
306 |
307 |
308 | @pytest.fixture(scope='function')
309 | # TODO: this should be changed to job_cold_start
310 | def job():
311 | job = Job(
312 | job_id='test_job_1',
313 | model_start_time='1984-10-14',
314 | model_end_time='2017-01-04',
315 | restart=False,
316 | exe_cmd='./wrf_hydro.exe',
317 | entry_cmd='bogus entry cmd',
318 | exit_cmd='bogus exit cmd'
319 | )
320 | return job
321 |
322 |
323 | @pytest.fixture(scope='function')
324 | def job_restart():
325 | job = Job(
326 | job_id='test_job_1',
327 | model_start_time='1984-10-14',
328 | model_end_time='2017-01-04',
329 | restart=True,
330 | restart_file_time='2013-10-13',
331 | exe_cmd='./wrf_hydro.exe',
332 | entry_cmd='bogus entry cmd',
333 | exit_cmd='bogus exit cmd'
334 | )
335 | return job
336 |
337 |
338 | @pytest.fixture(scope='function')
339 | def scheduler():
340 | scheduler = PBSCheyenne(account='fake_acct',
341 | email_who='elmo',
342 | email_when='abe',
343 | nproc=216,
344 | nnodes=6,
345 | ppn=None,
346 | queue='regular',
347 | walltime="12:00:00")
348 | return scheduler
349 |
350 |
351 | @pytest.fixture(scope='function')
352 | def sim_output(tmpdir, ds_1d, ds_1d_has_nans, ds_2d):
353 |
354 | tmpdir = pathlib.Path(tmpdir)
355 | sim_out_dir = tmpdir.joinpath('sim_out')
356 |
357 | sim_out_dir.mkdir(parents=True)
358 |
359 | # Make a list of DOMAIN filenames to create
360 | file_names = [
361 | 'CHRTOUT_DOMAIN1_TEST',
362 | 'CHRTOUT_GRID1_TEST'
363 | 'CHANOBS_TEST',
364 | 'LAKEOUT_TEST',
365 | 'HYDRO_RST_TEST',
366 | 'RESTART_TEST',
367 | 'nudgingLastObs_TEST',
368 | '.RTOUT_',
369 | 'LDASOUT'
370 | ]
371 |
372 | for counter in range(3):
373 | for file in file_names:
374 | filename = file + '_' + str(counter)
375 | file_path = sim_out_dir.joinpath(filename)
376 | ds_2d.to_netcdf(str(file_path))
377 |
378 | for counter in range(3):
379 | filename = 'GWOUT_' + str(counter)
380 | file_path = sim_out_dir.joinpath(filename)
381 | ds_1d_has_nans.to_netcdf(str(file_path))
382 |
383 | return sim_out_dir
384 |
--------------------------------------------------------------------------------
/wrfhydropy/tests/data/.gitignore:
--------------------------------------------------------------------------------
1 | collection_data/
2 | collection_data.tar.gz
--------------------------------------------------------------------------------
/wrfhydropy/tests/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NCAR/wrf_hydro_py/003f73f7efb2d38114794ae14daac702e59edc24/wrfhydropy/tests/data/__init__.py
--------------------------------------------------------------------------------
/wrfhydropy/tests/data/collection_data_download.py:
--------------------------------------------------------------------------------
1 | import os
2 | import pathlib
3 | import shutil
4 | from .gdrive_download import download_file_from_google_drive, untar
5 | from wrfhydropy.core.ioutils import md5
6 |
7 |
8 | def download(version='latest'):
9 | orig_dir = pathlib.Path('.').resolve()
10 | data_dir = os.path.dirname(os.path.realpath(__file__))
11 | os.chdir(data_dir)
12 | id_md5_dict = {
13 | 'latest': {
14 | 'id': '1VrWVve8fhYobDg2xDrgHfiAi7VBDmV9T',
15 | 'md5': '51847a29eaeea0377bfece7ea662500e'
16 | }
17 | }
18 | id = id_md5_dict[version]['id']
19 | the_md5 = id_md5_dict[version]['md5']
20 | target_name = pathlib.Path('collection_data.tar.gz')
21 | if target_name.exists() and md5(target_name) == the_md5:
22 | if not pathlib.Path('collection_data').exists():
23 | untar(str(target_name))
24 | os.chdir(orig_dir)
25 | return None
26 | if target_name.exists():
27 | target_name.unlink()
28 | if pathlib.Path('collection_data').exists():
29 | shutil.rmtree('collection_data')
30 | download_file_from_google_drive(id, str(target_name))
31 | untar(str(target_name))
32 | os.chdir(orig_dir)
33 |
34 |
35 | if __name__ == "__main__":
36 | download()
37 |
--------------------------------------------------------------------------------
/wrfhydropy/tests/data/collection_data_recipe.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # coding: utf-8
3 |
4 | import datetime
5 | import os
6 | import pathlib
7 | import pickle
8 | import pywrfhydro
9 | import sys
10 | import wrfhydropy
11 | import xarray as xa
12 |
13 | # Configuration
14 | scratch_dir = pathlib.Path('/glade/scratch/jamesmcc/')
15 | work_dir = pathlib.Path('/glade/work/jamesmcc/')
16 | home_dir = pathlib.Path('/glade/u/home/jamesmcc/')
17 |
18 | experiment_dir = scratch_dir / 'ens_cycle_example'
19 |
20 | domain_dir = experiment_dir / 'croton_NY'
21 | model_dir = home_dir / 'WRF_Hydro/wrf_hydro_nwm_public'
22 | compile_dir = experiment_dir / 'compile'
23 |
24 | configuration = 'nwm_ana'
25 |
26 | ens_routelink_dir = experiment_dir / 'routelink_ens'
27 | sim_dir = experiment_dir / 'sim'
28 | ens_dir = experiment_dir / "ens_sim"
29 | ens_ana_dir = experiment_dir / "ens_ana"
30 |
31 |
32 | # ## Data
33 |
34 | # Set up the experiment directory and pull the croton domain:
35 | if not experiment_dir.exists():
36 | os.mkdir(experiment_dir)
37 |
38 | # This will hang/fail on a cheyenne compute node...
39 | if not domain_dir.exists():
40 | file_id = "1xFYB--zm9f8bFHESzgP5X5i7sZryQzJe"
41 | download_script = model_dir / 'tests/local/utils/gdrive_download.py'
42 | function_name = "download_file_from_google_drive"
43 | sys.path.insert(0, str(download_script.parent))
44 | download_file_from_google_drive = getattr(
45 | __import__(str(download_script.stem), fromlist=[function_name]),
46 | function_name
47 | )
48 | download_file_from_google_drive(file_id, str(experiment_dir / 'croton_NY.tar.gz'))
49 |
50 | get_ipython().run_cell_magic(
51 | 'bash',
52 | '',
53 | 'cd /glade/scratch/jamesmcc/ens_cycle_example/ ;\n' +
54 | 'tar xzf croton_NY.tar.gz ;\n' +
55 | 'mv example_case croton_NY'
56 | )
57 |
58 |
59 | # ## Building Blocks
60 | # ### Domain
61 |
62 | domain = wrfhydropy.Domain(
63 | domain_top_dir=domain_dir,
64 | domain_config=configuration
65 | )
66 |
67 |
68 | # ### Model
69 |
70 | model = wrfhydropy.Model(
71 | source_dir=model_dir / 'src',
72 | model_config=configuration,
73 | compiler='ifort'
74 | )
75 |
76 |
77 | model_pkl = compile_dir / 'WrfHydroModel.pkl'
78 | if not model_pkl.exists():
79 | model.compile(compile_dir)
80 | else:
81 | model = pickle.load(model_pkl.open('rb'))
82 |
83 |
84 | # ### Job
85 |
86 | model_start_time = datetime.datetime(2018, 8, 1, 0)
87 | model_end_time = model_start_time + datetime.timedelta(hours=2)
88 | job = wrfhydropy.Job(
89 | job_id='flo_sim',
90 | model_start_time=model_start_time,
91 | model_end_time=model_end_time,
92 | output_freq_hr=1,
93 | restart_freq_hr=1,
94 | exe_cmd='mpirun -np 1 ./wrf_hydro.exe'
95 | )
96 |
97 |
98 | # ### Simulation
99 |
100 | sim = wrfhydropy.Simulation()
101 | sim.add(domain)
102 | sim.add(model)
103 | sim.add(job)
104 |
105 | # ### Ensemble
106 |
107 | ens = wrfhydropy.EnsembleSimulation()
108 | ens.add(sim)
109 | ens.add(job)
110 | ens.replicate_member(3)
111 |
112 |
113 | # #### Routelink ensemble
114 |
115 | rl_file = domain_dir / 'NWM/DOMAIN/Route_Link.nc'
116 | routelink = xa.open_dataset(rl_file)
117 | mannings_n = routelink['n']
118 |
119 | if not ens_routelink_dir.exists():
120 | ens_routelink_dir.mkdir(parents=True)
121 | deltas = [.3, 1.0, 1.7]
122 | for delta in deltas:
123 | out_file = ens_routelink_dir / ('Route_Link_edit_' + str(delta) + '.nc')
124 | values_dict = {'n': mannings_n + delta}
125 | result = pywrfhydro.routelink_edit(values_df=values_dict, in_file=rl_file, out_file=out_file)
126 | print(result)
127 | routelink_files = [str(ff) for ff in sorted(ens_routelink_dir.glob("Route_Link*.nc"))]
128 | print(routelink_files)
129 | ens.set_member_diffs(
130 | att_tuple=('base_hydro_namelist', 'hydro_nlist', 'route_link_f'),
131 | values=routelink_files
132 | )
133 |
134 | ens.member_diffs
135 |
136 |
137 | # ## Ensemble Cycle
138 |
139 | init_times = [
140 | datetime.datetime(2011, 8, 26, 0),
141 | datetime.datetime(2011, 8, 26, 1),
142 | datetime.datetime(2011, 8, 26, 2),
143 | datetime.datetime(2011, 8, 26, 3)
144 | ]
145 | n_members = len(ens)
146 | # Look back units are in hours, not casts.
147 | restart_dirs = [['.'] * n_members, [-1] * n_members, ['-1'] * n_members, ['-1'] * n_members]
148 |
149 | ens_ana = wrfhydropy.CycleSimulation(
150 | init_times=init_times,
151 | restart_dirs=restart_dirs,
152 | ncores=1
153 | )
154 |
155 | ens_ana.add(ens)
156 | ens_ana.add(job)
157 |
158 | if not ens_ana_dir.exists():
159 | os.mkdir(ens_ana_dir)
160 | os.chdir(ens_ana_dir)
161 | ens_ana.compose()
162 | return_code = ens_ana.run(n_concurrent=1)
163 |
164 | print(return_code)
165 |
166 |
167 | # ## Wrap up
168 | # Clean up unnecessary items in the experiment directory. Then package it up.
169 |
170 | # Resolve all symlinks to be relative symlinks.
171 | top = experiment_dir.resolve()
172 | files = top.glob('**/*')
173 | links = [ff for ff in files if ff.is_symlink()]
174 | for ll in links:
175 | target = os.path.relpath(str(ll.resolve()), start=str(ll.parent))
176 | ll.unlink()
177 | ll.symlink_to(target)
178 |
179 | get_ipython().run_cell_magic(
180 | 'bash',
181 | '',
182 | 'cd /glade/scratch/jamesmcc/ens_cycle_example/\nrm croton_NY.tar.gz\n'
183 | 'rm compile/wrf_hydro.exe\n'
184 | 'rm ens_ana/cast_201108260*/member_*/HYDRO_RST.2011-08-26_00:00_DOMAIN1\n'
185 | 'rm ens_ana/cast_201108260*/member_*/RESTART.2011082600_DOMAIN1\n'
186 | 'rm ens_ana/cast_201108260*/member_*/nudgingLastObs.2011-08-26_00:00:00.nc\n'
187 | 'cd croton_NY\n'
188 | 'rm -rf Gridded Gridded_no_lakes/ Reach/ supplemental/\n'
189 | 'rm USGS_obs.csv Readme.txt study_map.PNG\n'
190 | 'rm hydro_namelist_patches.json hrldas_namelist_patches.json\n'
191 | 'rm example_case hydro_namelist_patches.json~ hrldas_namelist_patches.json~ \n'
192 | 'cd NWM\n'
193 | 'rm -rf DOMAIN_LR/ RESTART_LR/ referenceSim/\n'
194 | 'rm hydro.namelist namelist.hrldas \n'
195 | 'cd nudgingTimeSliceObs\n'
196 | 'rm 2011-09*.usgsTimeSlice.ncdf 2011-08-3*.usgsTimeSlice.ncdf \n'
197 | 'rm 2011-08-2[7-9]*.usgsTimeSlice.ncdf \n'
198 | 'rm 2011-08-26_[1-2]*.usgsTimeSlice.ncdf 2011-08-26_0[6-9]*.usgsTimeSlice.ncdf \n'
199 | 'rm 2011-08-25*.usgsTimeSlice.ncdf\n'
200 | 'cd ../../FORCING/\n'
201 | 'rm 201109*LDASIN_DOMAIN1 2011083*.LDASIN_DOMAIN1 2011082[7-9]*.LDASIN_DOMAIN1\n'
202 | 'rm 20110826[1-2]*.LDASIN_DOMAIN1 201108260[6-9]*.LDASIN_DOMAIN1\n'
203 | 'rm 2011082600.LDASIN_DOMAIN1'
204 | )
205 |
206 | get_ipython().run_cell_magic(
207 | 'bash',
208 | '',
209 | 'cd /glade/scratch/jamesmcc/\n'
210 | 'mv ens_cycle_example collection_data\n'
211 | 'tar czf collection_data.tar.gz collection_data'
212 | )
213 |
--------------------------------------------------------------------------------
/wrfhydropy/tests/data/gdrive_download.py:
--------------------------------------------------------------------------------
1 | from argparse import ArgumentParser
2 | import requests
3 | import tarfile
4 |
5 |
6 | def download_file_from_google_drive(id, destination):
7 | print('downloading google drive file id ' + id + ' to ' + destination)
8 | URL = "https://docs.google.com/uc?export=download"
9 |
10 | session = requests.Session()
11 |
12 | response = session.get(URL, params={'id': id}, stream=True)
13 | token = get_confirm_token(response)
14 |
15 | if token:
16 | params = {'id': id, 'confirm': token}
17 | response = session.get(URL, params=params, stream=True)
18 |
19 | save_response_content(response, destination)
20 |
21 |
22 | def get_confirm_token(response):
23 | for key, value in response.cookies.items():
24 | if key.startswith('download_warning'):
25 | return value
26 |
27 | return None
28 |
29 |
30 | def save_response_content(response, destination):
31 | CHUNK_SIZE = 32768
32 |
33 | with open(destination, "wb") as f:
34 | for chunk in response.iter_content(CHUNK_SIZE):
35 | if chunk: # filter out keep-alive new chunks
36 | f.write(chunk)
37 |
38 |
39 | def untar(fname):
40 | if (fname.endswith("tar.gz")):
41 | tar = tarfile.open(fname, "r:gz")
42 | tar.extractall()
43 | tar.close()
44 | elif (fname.endswith("tar")):
45 | tar = tarfile.open(fname, "r:")
46 | tar.extractall()
47 | tar.close()
48 |
49 |
50 | def main():
51 |
52 | parser = ArgumentParser()
53 | parser.add_argument("--file_id",
54 | dest="file_id",
55 | help="Google drive file ID. Get from shareable link")
56 | parser.add_argument("--dest_file",
57 | dest="dest_file",
58 | help="Full path including filename for downloaded file.")
59 |
60 | args = parser.parse_args()
61 | file_id = args.file_id
62 | dest_file = args.dest_file
63 |
64 | download_file_from_google_drive(file_id, dest_file)
65 |
66 |
67 | if __name__ == "__main__":
68 | main()
69 |
--------------------------------------------------------------------------------
/wrfhydropy/tests/data/nan_na_data/fill_value.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NCAR/wrf_hydro_py/003f73f7efb2d38114794ae14daac702e59edc24/wrfhydropy/tests/data/nan_na_data/fill_value.nc
--------------------------------------------------------------------------------
/wrfhydropy/tests/data/nan_na_data/nan_fill.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NCAR/wrf_hydro_py/003f73f7efb2d38114794ae14daac702e59edc24/wrfhydropy/tests/data/nan_na_data/nan_fill.nc
--------------------------------------------------------------------------------
/wrfhydropy/tests/data/nan_na_data/nan_value.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NCAR/wrf_hydro_py/003f73f7efb2d38114794ae14daac702e59edc24/wrfhydropy/tests/data/nan_na_data/nan_value.nc
--------------------------------------------------------------------------------
/wrfhydropy/tests/data/nan_na_data/value_value.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NCAR/wrf_hydro_py/003f73f7efb2d38114794ae14daac702e59edc24/wrfhydropy/tests/data/nan_na_data/value_value.nc
--------------------------------------------------------------------------------
/wrfhydropy/tests/data/nan_na_files_recipe.py:
--------------------------------------------------------------------------------
1 | # Create the data necessary for testing if netcdf files contain nans
2 | # as distinct from the fill value.
3 | # This creates the following files
4 | # nan_na_data/fill_value.nc
5 | # nan_na_data/nan_fill.nc
6 | # nan_na_data/nan_value.nc
7 | # nan_na_data/value_value.nc
8 | # and runs the basic test to show that these are the right files for the job.
9 | # These files could be created on the fly by the tests, but it's kinda "6-of-one".
10 |
11 | import xarray as xr
12 | import numpy as np
13 |
14 | the_nan = float('nan')
15 | the_fill = -9999.0
16 | the_value = 0.0
17 |
18 | all_combos = {
19 | 'value_value': [the_value, the_value],
20 | 'nan_value': [the_nan, the_value],
21 | 'fill_value': [the_fill, the_value],
22 | 'nan_fill': [the_nan, the_fill]
23 | }
24 |
25 | for name, value in all_combos.items():
26 | ds = xr.Dataset()
27 | da = xr.DataArray(
28 | np.array(value),
29 | coords=[np.array([0, 1])],
30 | dims='dim'
31 | )
32 | ds['some_var'] = da # np.array(value)
33 | ds.encoding = {'_FillValue': the_fill}
34 | ds.reset_coords('some_var')
35 | the_file = 'nan_na_data/' + name + '.nc'
36 | ds.to_netcdf(the_file)
37 | # This is just an xarray based check.
38 | ds_in = xr.open_dataset(the_file, mask_and_scale=False)
39 | print('')
40 | print(name)
41 | print(ds_in)
42 |
--------------------------------------------------------------------------------
/wrfhydropy/tests/data/nodefile_pbs_example_copy.txt:
--------------------------------------------------------------------------------
1 | r10i1n1.ib0.cheyenne.ucar.edu
2 | r10i1n1.ib0.cheyenne.ucar.edu
3 | r10i1n2.ib0.cheyenne.ucar.edu
4 | r10i1n2.ib0.cheyenne.ucar.edu
5 | r10i1n3.ib0.cheyenne.ucar.edu
6 | r10i1n3.ib0.cheyenne.ucar.edu
7 |
--------------------------------------------------------------------------------
/wrfhydropy/tests/test_collection.py:
--------------------------------------------------------------------------------
1 | import os
2 | import pathlib
3 | import pytest
4 | import shutil
5 | import xarray as xr
6 | from wrfhydropy import open_whp_dataset
7 | from .data import collection_data_download
8 |
9 | test_dir = pathlib.Path(os.path.dirname(os.path.realpath(__file__)))
10 | # The collection_data gets wiped...
11 | answer_dir = test_dir / 'data/collection_data/test_answers'
12 |
13 | os.chdir(str(test_dir))
14 | # The data are found here.
15 | collection_data_download.download()
16 |
17 | # Issues raised by these tests
18 | # https://github.com/NCAR/wrf_hydro_nwm_public/issues/301
19 | # Make an issue: The restart files should have reference time and time just like the other files.
20 |
21 | # TODO: Test multiple versions (current and previous)
22 | version_file = test_dir.joinpath('data/collection_data/croton_NY/.version')
23 | version = version_file.open('r').read().split('-')[0]
24 |
25 | # Simulation
26 | # Make a sim dir to a single simulation.
27 | sim_dir = test_dir / 'data/collection_data/simulation'
28 | if sim_dir.is_symlink():
29 | sim_dir.unlink()
30 | sim_dir.symlink_to(test_dir / 'data/collection_data/ens_ana/cast_2011082600/member_000')
31 |
32 |
33 | @pytest.mark.parametrize(
34 | ['file_glob', 'ans_file', 'n_cores'],
35 | [
36 | ('*CHRTOUT_DOMAIN1', version + '/simulation/CHRTOUT.nc', 1),
37 | ('*LAKEOUT_DOMAIN1', version + '/simulation/LAKEOUT.nc', 1),
38 | ('*CHANOBS_DOMAIN1', version + '/simulation/CHANOBS.nc', 1),
39 | ('*GWOUT_DOMAIN1', version + '/simulation/GWOUT.nc', 1),
40 | ('*[0-9].RTOUT_DOMAIN1', version + '/simulation/RTOUT.nc', 2),
41 | ('*LDASOUT_DOMAIN1', version + '/simulation/LDASOUT.nc', 3),
42 | ('*LSMOUT_DOMAIN', version + '/simulation/LSMOUT.nc', 2),
43 | ('RESTART.*_DOMAIN1', version + '/simulation/RESTART.nc', 2),
44 | ('HYDRO_RST.*_DOMAIN1', version + '/simulation/HYDRO_RST.nc', 3),
45 | ],
46 | ids=[
47 | 'simulation-CHRTOUT_DOMAIN1',
48 | 'simulation-LAKEOUT_DOMAIN1',
49 | 'simulation-CHANOBS_DOMAIN1',
50 | 'simulation-GWOUT_DOMAIN1',
51 | 'simulation-RTOUT_DOMAIN1',
52 | 'simulation-LDASOUT_DOMAIN1',
53 | 'simulation-LSMOUT_DOMAIN',
54 | 'simulation-RESTART.*_DOMAIN1',
55 | 'simulation-HYDRO_RST.*_DOMAIN1'
56 | ]
57 | )
58 | def test_collect_simulation(
59 | file_glob,
60 | ans_file,
61 | n_cores
62 | ):
63 | sim_path = test_dir.joinpath(sim_dir)
64 | files = sorted(sim_path.glob(file_glob))
65 | sim_ds = open_whp_dataset(files, n_cores=n_cores)
66 | ans = xr.open_dataset(answer_dir / ans_file)
67 | xr.testing.assert_equal(sim_ds, ans)
68 |
69 |
70 | # Cycle
71 | # Make a cycle dir and set it up from the ensemble cycle.
72 | cycle_dir = test_dir / 'data/collection_data/cycle'
73 | # delete the directory here.
74 | if cycle_dir.exists():
75 | shutil.rmtree(str(cycle_dir))
76 | cycle_dir.mkdir()
77 | os.chdir(str(cycle_dir))
78 | cycle_dir.joinpath('WrfHydroCycle.pkl').symlink_to(
79 | test_dir.joinpath('data/collection_data/ens_ana/WrfHydroCycle.pkl')
80 | )
81 | for cast in test_dir.joinpath('data/collection_data/ens_ana').glob('cast_*'):
82 | cast_name = pathlib.Path(cast.name)
83 | cast_name.symlink_to(cast.joinpath('member_000'))
84 |
85 |
86 | @pytest.mark.parametrize(
87 | ['file_glob', 'ans_file', 'n_cores'],
88 | [
89 | ('*/*CHRTOUT_DOMAIN1', version + '/cycle/CHRTOUT.nc', 1),
90 | ('*/*LAKEOUT_DOMAIN1', version + '/cycle/LAKEOUT.nc', 1),
91 | ('*/*CHANOBS_DOMAIN1', version + '/cycle/CHANOBS.nc', 1),
92 | ('*/*GWOUT_DOMAIN1', version + '/cycle/GWOUT.nc', 1),
93 | ('*/*[0-9].RTOUT_DOMAIN1', version + '/cycle/RTOUT.nc', 2),
94 | ('*/*LDASOUT_DOMAIN1', version + '/cycle/LDASOUT.nc', 3),
95 | ('*/*LSMOUT_DOMAIN', version + '/cycle/LSMOUT.nc', 2),
96 | ('*/RESTART.*DOMAIN1', version + '/cycle/RESTART.nc', 3),
97 | ('*/HYDRO_RST.*DOMAIN1', version + '/cycle/HYDRO_RST.nc', 3),
98 | ],
99 | ids=[
100 | 'cycle-CHRTOUT_DOMAIN1',
101 | 'cycle-LAKEOUT_DOMAIN1',
102 | 'cycle-CHANOBS_DOMAIN1',
103 | 'cycle-GWOUT_DOMAIN1',
104 | 'cycle-RTOUT_DOMAIN1',
105 | 'cycle-LDASOUT_DOMAIN1',
106 | 'cycle-LSMOUT_DOMAIN',
107 | 'cycle-RESTART.*_DOMAIN1',
108 | 'cycle-HYDRO_RST.*_DOMAIN1'
109 | ]
110 | )
111 | def test_collect_cycle(
112 | file_glob,
113 | ans_file,
114 | n_cores
115 | ):
116 | cycle_path = test_dir.joinpath(cycle_dir)
117 | files = sorted(cycle_path.glob(file_glob))
118 | cycle_ds = open_whp_dataset(files, n_cores=n_cores)
119 | ans = xr.open_dataset(answer_dir / ans_file)
120 | xr.testing.assert_equal(cycle_ds, ans)
121 |
122 |
123 | # Ensemble
124 | # Make an ensemble dir and set it up from the ensemble cycle.
125 | ens_dir = test_dir / 'data/collection_data/ensemble'
126 | # delete the directory here.
127 | if ens_dir.is_symlink():
128 | ens_dir.unlink()
129 | ens_dir.symlink_to(test_dir / 'data/collection_data/ens_ana/cast_2011082600')
130 |
131 |
132 | @pytest.mark.parametrize(
133 | ['file_glob', 'ans_file', 'n_cores'],
134 | [
135 | ('*/*CHRTOUT_DOMAIN1', version + '/ensemble/CHRTOUT.nc', 1),
136 | ('*/*LAKEOUT_DOMAIN1', version + '/ensemble/LAKEOUT.nc', 1),
137 | ('*/*CHANOBS_DOMAIN1', version + '/ensemble/CHANOBS.nc', 1),
138 | ('*/*GWOUT_DOMAIN1', version + '/ensemble/GWOUT.nc', 1),
139 | ('*/*[0-9].RTOUT_DOMAIN1', version + '/ensemble/RTOUT.nc', 2),
140 | ('*/*LDASOUT_DOMAIN1', version + '/ensemble/LDASOUT.nc', 3),
141 | ('*/*LSMOUT_DOMAIN', version + '/ensemble/LSMOUT_DOMAIN', 2),
142 | ('*/RESTART.*_DOMAIN1', version + '/ensemble/RESTART.nc', 3),
143 | ('*/HYDRO_RST.*_DOMAIN1', version + '/ensemble/HYDRO_RST.nc', 3),
144 | ],
145 | ids=[
146 | 'ensemble-CHRTOUT_DOMAIN1',
147 | 'ensemble-LAKEOUT_DOMAIN1',
148 | 'ensemble-CHANOBS_DOMAIN1',
149 | 'ensemble-GWOUT_DOMAIN1',
150 | 'ensemble-RTOUT_DOMAIN1',
151 | 'ensemble-LDASOUT_DOMAIN1',
152 | 'ensemble-LSMOUT_DOMAIN',
153 | 'ensemble-RESTART.*_DOMAIN1',
154 | 'ensemble-HYDRO_RST.*_DOMAIN1'
155 | ]
156 | )
157 | def test_collect_ensemble(
158 | file_glob,
159 | ans_file,
160 | n_cores
161 | ):
162 | ens_path = test_dir.joinpath(ens_dir)
163 | files = sorted(ens_path.glob(file_glob))
164 | ens_ds = open_whp_dataset(files, n_cores=n_cores)
165 | ans = xr.open_dataset(answer_dir / ans_file)
166 | xr.testing.assert_equal(ens_ds, ans)
167 |
168 |
169 | # Ensemble Cycle
170 | @pytest.mark.parametrize(
171 | ['file_glob', 'ans_file', 'n_cores'],
172 | [
173 | (
174 | '*/*/*CHRTOUT_DOMAIN1',
175 | version + '/ensemble_cycle/CHRTOUT.nc',
176 | 1
177 | ),
178 | (
179 | '*/*/*LAKEOUT_DOMAIN1',
180 | version + '/ensemble_cycle/LAKEOUT.nc',
181 | 2
182 | ),
183 | (
184 | '*/*/*CHANOBS_DOMAIN1',
185 | version + '/ensemble_cycle/CHANOBS.nc',
186 | 1
187 | ),
188 | (
189 | '*/*/*GWOUT_DOMAIN1',
190 | version + '/ensemble_cycle/GWOUT.nc',
191 | 1
192 | ),
193 | (
194 | '*/*/*[0-9].RTOUT_DOMAIN1',
195 | version + '/ensemble_cycle/RTOUT.nc',
196 | 1),
197 | (
198 | '*/*/*LDASOUT_DOMAIN1',
199 | version + '/ensemble_cycle/LDASOUT.nc',
200 | 3
201 | ),
202 | (
203 | '*/*/*LSMOUT_DOMAIN',
204 | version + '/ensemble_cycle/LSMOUT.nc',
205 | 2
206 | ),
207 | (
208 | '*/*/RESTART.*_DOMAIN1',
209 | version + '/ensemble_cycle/RESTART.nc',
210 | 3
211 | ),
212 | (
213 | '*/*/HYDRO_RST.*_DOMAIN1',
214 | version + '/ensemble_cycle/HYDRO_RST.nc',
215 | 3
216 | ),
217 | ],
218 | ids=[
219 | 'ensemble_cycle-CHRTOUT_DOMAIN1',
220 | 'ensemble_cycle-LAKEOUT_DOMAIN1',
221 | 'ensemble_cycle-CHANOBS_DOMAIN1',
222 | 'ensemble_cycle-GWOUT_DOMAIN1',
223 | 'ensemble_cycle-RTOUT_DOMAIN1',
224 | 'ensemble_cycle-LDASOUT_DOMAIN1',
225 | 'ensemble_cycle-LSMOUT_DOMAIN',
226 | 'ensemble_cycle-RESTART.*_DOMAIN1',
227 | 'ensemble_cycle-HYDRO_RST.*_DOMAIN1'
228 | ]
229 | )
230 | def test_collect_ensemble_cycle(
231 | file_glob,
232 | ans_file,
233 | n_cores
234 | ):
235 | ens_cycle_path = test_dir.joinpath('data/collection_data/ens_ana')
236 | files = sorted(ens_cycle_path.glob(file_glob))
237 | ens_cycle_ds = open_whp_dataset(files, n_cores=n_cores)
238 | ans = xr.open_dataset(answer_dir / ans_file)
239 | xr.testing.assert_equal(ens_cycle_ds, ans)
240 |
241 | # Test that hierarchical collects are identical
242 | # Speed up this super slow one...
243 | file_chunk_size = 1
244 | if file_glob == '*/*/*LDASOUT_DOMAIN1':
245 | file_chunk_size = 50
246 | ens_cycle_ds_chunk = open_whp_dataset(
247 | files, n_cores=n_cores, file_chunk_size=file_chunk_size)
248 | xr.testing.assert_equal(ens_cycle_ds_chunk, ens_cycle_ds)
249 |
250 |
251 | # Missing/bogus files.
252 | # Do this for ensemble cycle as that's the most complicated relationship to the missing file.
253 | miss_ens_cycle_dir = test_dir / 'data/collection_data/miss_ens_cycle'
254 | if miss_ens_cycle_dir.exists():
255 | shutil.rmtree(str(miss_ens_cycle_dir))
256 | miss_ens_cycle_dir.mkdir()
257 | os.chdir(str(miss_ens_cycle_dir))
258 | orig_dir = test_dir / 'data/collection_data/ens_ana/'
259 | casts = sorted(orig_dir.glob('cast_*'))
260 | pkl_file = sorted(orig_dir.glob("*.pkl"))[0]
261 | pathlib.Path(pkl_file.name).symlink_to(pkl_file)
262 | for cc in casts:
263 | pathlib.Path(cc.name).symlink_to(cc)
264 | # Break the last one.
265 | pathlib.Path(cc.name).unlink()
266 | pathlib.Path(cc.name).mkdir()
267 | os.chdir(cc.name)
268 | member_dirs = \
269 | sorted((test_dir / ('data/collection_data/ens_ana/' + cc.name)).glob('member_*'))
270 | for mm in member_dirs:
271 | pathlib.Path(mm.name).symlink_to(mm)
272 | # Break the last one.
273 | pathlib.Path(mm.name).unlink()
274 | pathlib.Path(mm.name).mkdir()
275 | orig_ens_dir = test_dir / ('data/collection_data/ens_ana/' + cc.name)
276 | orig_sim_dir = orig_ens_dir / mm.name
277 | pkl_file = sorted(orig_ens_dir.glob("*.pkl"))[0]
278 | pathlib.Path(pkl_file.name).symlink_to(pkl_file)
279 | os.chdir(mm.name)
280 | chrtout_files = sorted(orig_sim_dir.glob('*CHRTOUT*'))
281 | for cc in chrtout_files:
282 | pathlib.Path(cc.name).symlink_to(cc)
283 | pathlib.Path(cc.name).unlink()
284 | pathlib.Path(cc.name).symlink_to('/foo/bar')
285 |
286 |
287 | @pytest.mark.parametrize(
288 | ['file_glob', 'ans_file', 'n_cores'],
289 | [
290 | (
291 | '*/*/*CHRTOUT_DOMAIN1',
292 | version + '/missing_ens_cycle/CHRTOUT.nc',
293 | 1
294 | ),
295 | (
296 | '*/*/RESTART.*_DOMAIN1',
297 | version + '/missing_ens_cycle/RESTART.nc',
298 | 2
299 | ),
300 | (
301 | '*/*/HYDRO_RST.*_DOMAIN1',
302 | version + '/missing_ens_cycle/HYDRO_RST.nc',
303 | 3
304 | )
305 | ],
306 | ids=[
307 | 'missing_ens_cycle-CHRTOUT_DOMAIN1',
308 | 'missing_ens_cycle-RESTART.*_DOMAIN1',
309 | 'missing_ens_cycle-HYDRO_RST.*_DOMAIN1'
310 | ]
311 | )
312 | def test_collect_missing_ens_cycle(
313 | file_glob,
314 | ans_file,
315 | n_cores
316 | ):
317 | miss_ens_cycle_path = test_dir.joinpath(miss_ens_cycle_dir)
318 | files = sorted(miss_ens_cycle_path.glob(file_glob))
319 | ens_cycle_ds = open_whp_dataset(files, n_cores=n_cores)
320 | # There is a bit of tricky encoding to deal with NaN in strings in netcdf
321 | # and type conversions
322 | if 'crs' in ens_cycle_ds.variables:
323 | ens_cycle_ds['crs'] = ens_cycle_ds['crs'].astype('S8')
324 | ens_cycle_ds['crs'].encoding['_FillValue'] = 'nan'
325 | # This is mostly because int32 is changed to float64 bc of nans
326 | for vv in ens_cycle_ds.variables:
327 | if 'time' not in vv:
328 | ens_cycle_ds[vv].encoding['dtype'] = ens_cycle_ds[vv].dtype
329 |
330 | ans = xr.open_dataset(answer_dir / ans_file)
331 | xr.testing.assert_equal(ens_cycle_ds, ans)
332 |
333 | ens_cycle_ds_chunk = open_whp_dataset(files, n_cores=n_cores, file_chunk_size=1)
334 | if 'crs' in ens_cycle_ds.variables:
335 | ens_cycle_ds_chunk['crs'] = ens_cycle_ds_chunk['crs'].astype('S8')
336 | xr.testing.assert_equal(ens_cycle_ds_chunk, ens_cycle_ds)
337 |
338 |
339 | # Exercise profile and chunking.
340 | @pytest.mark.parametrize(
341 | ['file_glob', 'ans_file', 'n_cores'],
342 | [
343 | ('*CHRTOUT_DOMAIN1', version + '/profile_chunking/CHRTOUT.nc', 1)
344 | ],
345 | ids=[
346 | 'profile_chunking-CHRTOUT_DOMAIN1'
347 | ]
348 | )
349 | def test_collect_profile_chunking(
350 | file_glob,
351 | ans_file,
352 | n_cores
353 | ):
354 | sim_path = test_dir.joinpath(sim_dir)
355 | files = sorted(sim_path.glob(file_glob))
356 | sim_ds = open_whp_dataset(files, n_cores=n_cores, profile=True, chunks=15)
357 | ans = xr.open_dataset(answer_dir / ans_file)
358 | xr.testing.assert_equal(sim_ds, ans)
359 |
360 | # if file_chunk_size > and chunk is not None there is an error.
361 | sim_ds_chunk = open_whp_dataset(
362 | files, n_cores=n_cores, profile=True, chunks=15, file_chunk_size=1)
363 | xr.testing.assert_equal(sim_ds_chunk, ans)
364 |
365 |
366 | # Test spatial index selection
367 | # Ensemble Cycle
368 | @pytest.mark.parametrize(
369 | ['file_glob', 'ans_file', 'n_cores', 'isel'],
370 | [
371 | (
372 | '*/*/*CHRTOUT_DOMAIN1',
373 | version + '/ensemble_cycle_isel/CHRTOUT.nc',
374 | 1,
375 | {'feature_id': [1, 2]}
376 | ),
377 | (
378 | '*/*/RESTART.*_DOMAIN1',
379 | version + '/ensemble_cycle_isel/RESTART.nc',
380 | 3,
381 | {'snow_layers': [1, 2], 'west_east': [0, 1, 2]}
382 | ),
383 | (
384 | '*/*/HYDRO_RST.*_DOMAIN1',
385 | version + '/ensemble_cycle_isel/HYDRO_RST.nc',
386 | 3,
387 | {'links': [0], 'lakes':[0], 'iy':[0, 1]}
388 | ),
389 | ],
390 | ids=[
391 | 'ensemble_cycle_isel-CHRTOUT_DOMAIN1',
392 | 'ensemble_cycle_isel-RESTART.*_DOMAIN1',
393 | 'ensemble_cycle_isel-HYDRO_RST.*_DOMAIN1'
394 | ]
395 | )
396 | def test_collect_ensemble_cycle_isel(
397 | file_glob,
398 | ans_file,
399 | n_cores,
400 | isel
401 | ):
402 | ens_cycle_path = test_dir.joinpath('data/collection_data/ens_ana')
403 | files = sorted(ens_cycle_path.glob(file_glob))
404 | ens_cycle_ds = open_whp_dataset(files, n_cores=n_cores, isel=isel)
405 | ans = xr.open_dataset(answer_dir / ans_file)
406 | xr.testing.assert_equal(ens_cycle_ds, ans)
407 |
408 | ens_cycle_ds_chunk = open_whp_dataset(files, n_cores=n_cores, isel=isel, file_chunk_size=2)
409 | xr.testing.assert_equal(ens_cycle_ds_chunk, ans)
410 |
411 |
412 | # Test dropping/keeping variables
413 | # Ensemble Cycle
414 | @pytest.mark.parametrize(
415 | ['file_glob', 'ans_file', 'n_cores', 'drop_vars'],
416 | [
417 | (
418 | '*/*/*CHRTOUT_DOMAIN1',
419 | version + '/ensemble_cycle_drop_vars/CHRTOUT.nc',
420 | 1,
421 | ['Head', 'crs']
422 | ),
423 | (
424 | '*/*/RESTART.*_DOMAIN1',
425 | version + '/ensemble_cycle_drop_vars/RESTART.nc',
426 | 3,
427 | ['SOIL_T', 'SNOW_T', 'SMC', 'SH2O', 'ZSNSO']
428 | ),
429 | (
430 | '*/*/HYDRO_RST.*_DOMAIN1',
431 | version + '/ensemble_cycle_drop_vars/HYDRO_RST.nc',
432 | 3,
433 | ['z_gwsubbas', 'resht', 'sfcheadsubrt']
434 | ),
435 | ],
436 | ids=[
437 | 'ensemble_cycle_drop_vars-CHRTOUT_DOMAIN1',
438 | 'ensemble_cycle_drop_vars-RESTART.*_DOMAIN1',
439 | 'ensemble_cycle_drop_vars-HYDRO_RST.*_DOMAIN1'
440 | ]
441 | )
442 | def test_collect_ensemble_cycle_drop_vars(
443 | file_glob,
444 | ans_file,
445 | n_cores,
446 | drop_vars
447 | ):
448 | ens_cycle_path = test_dir.joinpath('data/collection_data/ens_ana')
449 | files = sorted(ens_cycle_path.glob(file_glob))
450 | ens_cycle_ds = open_whp_dataset(files, n_cores=n_cores, drop_variables=drop_vars)
451 | ans = xr.open_dataset(answer_dir / ans_file)
452 | xr.testing.assert_equal(ens_cycle_ds, ans)
453 |
454 | ens_cycle_ds_chunk = open_whp_dataset(
455 | files, n_cores=n_cores, drop_variables=drop_vars, file_chunk_size=1)
456 | xr.testing.assert_equal(ens_cycle_ds_chunk, ans)
457 |
--------------------------------------------------------------------------------
/wrfhydropy/tests/test_domain.py:
--------------------------------------------------------------------------------
1 | import pathlib
2 |
3 | from wrfhydropy import Domain, WrfHydroStatic, WrfHydroTs
4 |
5 |
6 | def test_domain_init(domain_dir):
7 | domain = Domain(domain_top_dir=domain_dir,
8 | domain_config='nwm_ana',
9 | compatible_version='v5.0.1')
10 | assert type(domain) == Domain
11 |
12 | def test_domain_namelists(domain_dir):
13 | domain = Domain(domain_top_dir=domain_dir,
14 | domain_config='nwm_ana',
15 | compatible_version='v5.0.1')
16 |
17 | # Check namelist configuration
18 | assert domain.hydro_namelist_patches == {
19 | 'hydro_nlist':
20 | {'geo_static_flnm': './NWM/DOMAIN/geo_em.d01.nc',
21 | 'restart_file': './NWM/RESTART/HYDRO_RST.2011-08-26_00:00_DOMAIN1',
22 | 'aggfactrt': 4,
23 | 'udmp_opt': 1},
24 | 'nudging_nlist': {
25 | 'nudginglastobsfile': './NWM/RESTART/nudgingLastObs.2011-08-26_00:00:00.nc'}
26 | }, 'hydro_namelist JSONNamelist did not return expected dictionary ' \
27 | 'for config nwm_ana'
28 |
29 | assert domain.hrldas_namelist_patches == {
30 | 'noahlsm_offline':
31 | {'hrldas_setup_file': './NWM/DOMAIN/wrfinput_d01.nc',
32 | 'restart_filename_requested': './NWM/RESTART/RESTART.2011082600_DOMAIN1',
33 | 'indir': './FORCING'},
34 | 'wrf_hydro_offline': {'forc_typ': 1}}, 'hrldas_namelist JSONNamelist did not return ' \
35 | 'expected dictionary for config nwm_ana'
36 |
37 | def test_domain_filepaths(domain_dir):
38 | domain = Domain(domain_top_dir=domain_dir,
39 | domain_config='nwm_ana',
40 | compatible_version='v5.0.1')
41 | assert type(domain.hydro_files) == list and type(domain.hydro_files[0]) == WrfHydroStatic, \
42 | 'hydro files not imported correctly'
43 | assert type(domain.lsm_files) == list and type(domain.lsm_files[0]) == WrfHydroStatic, \
44 | 'lsm files not imported correctly'
45 | assert type(domain.forcing_data) == WrfHydroTs and len(domain.forcing_data) == 3, \
46 | 'forcing files not imported correctly'
47 |
48 | def test_domain_copyfiles(tmpdir,domain_dir):
49 | domain = Domain(domain_top_dir=str(domain_dir),
50 | domain_config='nwm_ana',
51 | compatible_version='v5.0.1')
52 | tmpdir = pathlib.Path(tmpdir)
53 | copy_dir = tmpdir.joinpath('domain_copy_test')
54 | domain.copy_files(str(copy_dir))
55 |
56 | namelist_files = []
57 | for item in domain.hydro_files:
58 | # Make relative for ease of comparison
59 | relative_path = item.absolute().relative_to(domain_dir.absolute())
60 | namelist_files.append(str(relative_path))
61 | for item in domain.lsm_files:
62 | relative_path = item.absolute().relative_to(domain_dir.absolute())
63 | namelist_files.append(str(relative_path))
64 | for item in domain.nudging_files:
65 | relative_path = item.absolute().relative_to(domain_dir.absolute())
66 | namelist_files.append(str(relative_path))
67 |
68 | copied_files = []
69 | for file in list(copy_dir.rglob('*')):
70 | # Get path as relative so that can be compared to namelist paths
71 | relative_path = file.absolute().relative_to(copy_dir.absolute())
72 | copied_files.append(str(relative_path))
73 |
74 | # Manually check that FORCING got copied, rglob is ignoring contents of symlinked dir
75 | assert 'FORCING' in copied_files, 'Forcing data not copied'
76 |
77 | # Check the rest of the files
78 | for file in namelist_files:
79 | if file not in ['FORCING']:
80 | assert file in copied_files, 'file ' + file + ' was not copied successfully'
81 |
82 | # Check the special case of RESTARTS which should be symlinked into main dir
83 | restart_file_patterns = ['*RESTART*','*HYDRO_RST*','*nudgingLastObs*']
84 | for file_pattern in restart_file_patterns:
85 | assert len(list(copy_dir.glob(file_pattern))) == 1, \
86 | 'restart file ' + file_pattern + ' not copied'
87 |
88 |
--------------------------------------------------------------------------------
/wrfhydropy/tests/test_evaluation.py:
--------------------------------------------------------------------------------
1 | import copy
2 | import datetime
3 | import math
4 | import numpy as np
5 | import os
6 | import pathlib
7 | import pandas as pd
8 | import pytest
9 | import warnings
10 | import xarray as xr
11 |
12 | from io import StringIO
13 | from pandas.testing import assert_frame_equal
14 | from wrfhydropy import Evaluation, open_whp_dataset
15 | from .data import collection_data_download
16 | from .data.evaluation_answer_reprs import *
17 |
18 | # Testing helper functons for data frames. Serialization is a PITA.
19 | float_form = '.2f'
20 |
21 |
22 | def assert_frame_close(df1, df2):
23 | assert_frame_equal(df1, df2, check_exact=False)
24 |
25 |
26 | def str_to_frame(string: str):
27 | return(pd.read_csv(StringIO(string)))
28 |
29 |
30 | def frame_to_str(frame: pd.DataFrame):
31 | return(frame.to_csv(float_format='%' + float_form))
32 |
33 |
34 | def round_trip_df_serial(frame: pd.DataFrame):
35 | return(str_to_frame(frame_to_str(frame)))
36 |
37 |
38 | pd.options.display.float_format = ('{:' + float_form + '}').format
39 |
40 | test_dir = pathlib.Path(os.path.dirname(os.path.realpath(__file__)))
41 |
42 | # Get the full reprs
43 | pd.set_option('display.max_rows', None)
44 | pd.set_option('display.max_columns', None)
45 |
46 | # The data are found here. Uses the same data as collection.
47 | os.chdir(str(test_dir))
48 | collection_data_download.download()
49 |
50 | engine = ['pd', 'xr']
51 |
52 |
53 | @pytest.mark.parametrize(
54 | ['mod_dir', 'mod_glob'],
55 | [
56 | (test_dir / 'data/collection_data/simulation', '*CHRTOUT_DOMAIN1'),
57 | ],
58 | ids=[
59 | 'init-simulation',
60 | ]
61 | )
62 | def test_init(mod_dir, mod_glob):
63 |
64 | files = sorted(mod_dir.glob(mod_glob))
65 | mod = open_whp_dataset(files)
66 | mod_df = mod.streamflow.to_dataframe()
67 | obs_df = mod_df
68 | streamflow_eval = Evaluation(mod_df, obs_df)
69 | assert type(streamflow_eval) == Evaluation
70 |
71 |
72 | # Should there be a "stage" prt of collection_data_download? I
73 | # would have to look more closely at test_collection. The
74 | # following is certainly repeated code
75 | sim_dir = test_dir / 'data/collection_data/simulation'
76 | if sim_dir.exists():
77 | sim_dir.unlink()
78 | sim_dir.symlink_to(test_dir / 'data/collection_data/ens_ana/cast_2011082600/member_000')
79 |
80 |
81 | @pytest.mark.parametrize('engine', engine)
82 | @pytest.mark.parametrize('group_by_in', [None, 'space'])
83 | @pytest.mark.parametrize(
84 | ['transform', 'transform_key'],
85 | [(lambda x: x, 'identity'),
86 | (lambda x: [ii for ii in range(len(x))], 'index')],
87 | ids=['lambda_identity', 'lambda_index'])
88 | @pytest.mark.parametrize(
89 | ['mod_dir', 'mod_glob', 'indices_dict', 'join_on', 'variable', 'expected_key'],
90 | [
91 | (test_dir / 'data/collection_data/simulation',
92 | '*CHRTOUT_DOMAIN1',
93 | {'feature_id': [1, 39, 56, 34]},
94 | ['time', 'feature_id'],
95 | 'streamflow',
96 | '*CHRTOUT_DOMAIN1'),
97 | (test_dir / 'data/collection_data/simulation',
98 | '*LDASOUT_DOMAIN1',
99 | {'x': [1, 3, 5], 'y': [2, 4, 6], 'soil_layers_stag': [2]},
100 | ['time', 'x', 'y', 'soil_layers_stag'],
101 | 'SOIL_M',
102 | '*LDASOUT_DOMAIN1'),
103 | ],
104 | ids=[
105 | 'gof-simulation-CHRTOUT',
106 | 'gof-simulation-LSMOUT',
107 | ]
108 | )
109 | def test_gof_perfect(
110 | engine,
111 | mod_dir,
112 | mod_glob,
113 | indices_dict,
114 | join_on,
115 | variable,
116 | group_by_in,
117 | transform,
118 | transform_key,
119 | expected_key
120 | ):
121 | # Keep this variable agnostic
122 | files = sorted(mod_dir.glob(mod_glob))
123 | mod = open_whp_dataset(files).isel(indices_dict)
124 |
125 | if group_by_in is None:
126 | group_by_key = ''
127 | group_by = None
128 | elif group_by_in == 'space':
129 | group_by_key = '-' + group_by_in
130 | group_by = copy.deepcopy(join_on)
131 | group_by.remove('time')
132 | else:
133 | raise ValueError("not a valid grouping for this test: ", group_by)
134 |
135 | expected_answer_key = expected_key + group_by_key + '_' + transform_key
136 | # expected = gof_answer_reprs[expected_answer_key]
137 | expected = str_to_frame(gof_answer_reprs[expected_answer_key])
138 |
139 | if engine == 'pd':
140 | mod_df = mod[variable].to_dataframe().rename(
141 | columns={variable: 'modeled'})
142 | obs_df = mod[variable].to_dataframe().rename(
143 | columns={variable: 'observed'})
144 | mod_df.modeled = transform(mod_df.modeled)
145 | the_eval = Evaluation(mod_df, obs_df, join_on=join_on)
146 | gof = the_eval.gof(group_by=group_by)
147 | assert_frame_close(round_trip_df_serial(gof), expected)
148 |
149 | elif engine == 'xr':
150 | if group_by_in is not None:
151 | pytest.skip("Currently not grouping using xarray.")
152 | mod_ds = mod.rename({variable: 'modeled'})['modeled']
153 | obs_ds = mod.rename({variable: 'observed'})['observed']
154 | new_data = np.array(transform(mod_ds.to_dataframe().modeled)).reshape(mod_ds.shape)
155 | mod_ds.values = new_data
156 | # mod_ds = xr.DataArray(new_data, dims=mod_ds.dims, coords=mod_ds.coords)
157 | the_eval = Evaluation(mod_ds, obs_ds, join_on=join_on)
158 | gof = the_eval.gof(group_by=group_by).to_dataframe()
159 | # assert repr(gof) == expected
160 | assert_frame_close(round_trip_df_serial(gof), expected)
161 |
162 |
163 | @pytest.mark.parametrize('engine', engine)
164 | @pytest.mark.parametrize('the_stat', ['crps', 'brier'])
165 | def test_crps_brier_basic(
166 | the_stat,
167 | engine
168 | ):
169 |
170 | # The input data for the test
171 | ens0 = np.linspace(-5, 5, num=1000)
172 | ens1 = np.linspace(-500, 500, num=1000)
173 | obs = 0.0000
174 |
175 | # WOw i must be a dunce, this is way too much work.
176 | t0 = datetime.datetime(2000, 1, 1)
177 | t1 = datetime.datetime(2000, 1, 2)
178 | modeled = pd.DataFrame(
179 | np.array([ens0, ens1]).transpose(),
180 | columns=[t0, t1]
181 | )
182 | modeled.index.name = 'member'
183 | modeled = modeled.reset_index()
184 | modeled = modeled.melt(
185 | id_vars=['member'],
186 | var_name='time',
187 | value_name='modeled'
188 | ).set_index(['time', 'member'])
189 | observed = modeled.rename(columns={'modeled': 'observed'}) * obs
190 |
191 | if engine == 'xr':
192 | pytest.skip("Currently using xarray for brier and crps.")
193 | modeled = modeled.to_xarray()['modeled']
194 | observed = observed.to_xarray()['observed']
195 |
196 | the_eval = Evaluation(modeled, observed)
197 |
198 | if the_stat == 'crps':
199 | # Generate the answer
200 | # import properscoring as ps
201 | # answer = np.array([ps.crps_ensemble(obs, mod) for mod in [ens0, ens1]])
202 | answer = pd.DataFrame(
203 | {'time': [t0, t1],
204 | 'crps': np.array([0.83416917, 83.41691692])}
205 | ).set_index('time')
206 | crps = the_eval.crps()
207 | assert_frame_close(crps, answer)
208 |
209 | elif the_stat == 'brier':
210 | threshold = 1
211 | # Generate the answer
212 | # import properscoring as ps
213 | # answer = np.array([ps.threshold_brier_score(obs, mod, threshold=threshold)
214 | # for mod in [ens0, ens1]])
215 | # answer = pd.DataFrame(
216 | # {'time': [t0, t1],
217 | # 'crps': np.array([ 0.83416917, 83.41691692])}
218 | # ).set_index('time')
219 | answer = np.array([0.16, 0.249001])
220 | brier = the_eval.brier(threshold)
221 | assert np.isclose(brier, answer).all()
222 |
223 |
224 | # Inputs for contingency and event stat calculations.
225 | # Answers are in data/evaluation_answer_reprs.py
226 | base_dum_time = datetime.datetime(2000, 1, 1)
227 | dumtime = [base_dum_time + datetime.timedelta(hours=dd) for dd in range(4)]
228 |
229 | # Easy to read and interpret inputs and grouped output.
230 | contingency_known_data_input = pd.DataFrame({
231 | # hits #mix # misses # false pos # corr_neg
232 | 'mod': [1, 1, 1, 1, 1, -1, 1, -1, -1, -1, -1, -1, 1, 1, 1, 1, -1, -1, -1, -1],
233 | 'obs': [1, 1, 1, 1, 1, 1, -1, -1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1],
234 | 'tsh': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
235 | 'loc': (['hits']*4)+ (['mix']*4)+ (['miss']*4)+ (['false_pos']*4)+ (['corr_neg']*4),
236 | 'time': dumtime + dumtime + dumtime + dumtime + dumtime,
237 | }).set_index(['loc', 'time'])
238 |
239 | # A threshold that varies across the group on which the calcuation is made.
240 | contingency_known_data_input_2 = pd.DataFrame({
241 | # hits #mix # misses # false pos # corr_neg
242 | 'mod': [1, 11, 111, 1, 1, 1, 111, 1, 0, 10, 110, -1, 1, 11, 111, 2, 0, 2, 10, 17],
243 | 'obs': [1, 11, 111, 1, 1, 11, 1, 1, 2, 11, 111, 1, 0, 10, 110, 1, 0, 2, 10, 13],
244 | 'tsh': [0, 10, 110, 0, 0, 10, 110, 10, 1, 10, 110, 0, 0, 10, 110, 1, 1, 3, 11, 20],
245 | 'loc': (['hits']*4)+ (['mix']*4)+ (['miss']*4)+ (['false_pos']*4)+ (['corr_neg']*4),
246 | 'time': dumtime + dumtime + dumtime + dumtime + dumtime,
247 | }).set_index(['loc', 'time'])
248 |
249 | # TODO: test NaNs in the data
250 |
251 |
252 | # @pytest.mark.parametrize('engine', engine)
253 | @pytest.mark.parametrize(
254 | 'input_data',
255 | [contingency_known_data_input, contingency_known_data_input_2])
256 | def test_contingency_known_data(input_data):
257 | known_data = input_data.to_xarray().set_coords("tsh")
258 | mod = known_data.mod.drop('tsh')
259 | obs = known_data.obs
260 | result = mod.eval.obs(obs).contingency(threshold='tsh', group_by='loc')
261 | result = round_trip_df_serial(result)
262 | expected = str_to_frame(contingency_known_data_answer)
263 | assert_frame_close(result, expected)
264 |
265 |
266 | # @pytest.mark.parametrize('engine', engine)
267 | @pytest.mark.parametrize(
268 | 'input_data',
269 | [contingency_known_data_input, contingency_known_data_input_2])
270 | def test_contingency_missing_columns(input_data):
271 | known_data = input_data.to_xarray().set_coords("tsh")
272 | mod = known_data.mod.drop('tsh')
273 | obs = known_data.obs
274 | result = mod.eval.obs(obs).contingency(threshold='tsh', group_by='loc')
275 | result = round_trip_df_serial(result)
276 | expected = str_to_frame(contingency_known_data_answer)
277 | assert_frame_close(result, expected)
278 |
279 |
280 | @pytest.mark.parametrize(
281 | 'input_data',
282 | [contingency_known_data_input, contingency_known_data_input_2])
283 | def test_event_known_data(input_data):
284 | known_data = input_data.to_xarray().set_coords("tsh")
285 | mod = known_data.mod.drop('tsh')
286 | obs = known_data.obs
287 | result = mod.eval.obs(obs).event(threshold='tsh', group_by='loc')
288 | result = round_trip_df_serial(result)
289 | expected = str_to_frame(event_known_data_answer)
290 | assert_frame_close(result, expected)
291 |
--------------------------------------------------------------------------------
/wrfhydropy/tests/test_ioutils.py:
--------------------------------------------------------------------------------
1 | from bs4 import BeautifulSoup
2 | import datetime
3 | import numpy as np
4 | import pandas as pd
5 | import pathlib
6 | import pytest
7 | import re
8 | import requests
9 | import warnings
10 | import xarray as xr
11 |
12 | from wrfhydropy.core.ioutils import \
13 | open_wh_dataset, WrfHydroTs, WrfHydroStatic, check_input_files, nwm_forcing_to_ldasin
14 |
15 | from wrfhydropy.core.namelist import JSONNamelist
16 |
17 |
18 | @pytest.fixture(scope='function')
19 | def ds_timeseries(tmpdir):
20 | ts_dir = pathlib.Path(tmpdir).joinpath('timeseries_data')
21 | ts_dir.mkdir(parents=True)
22 |
23 | # Create a dummy dataset
24 | with warnings.catch_warnings():
25 | warnings.simplefilter("ignore")
26 | vals_ts = np.array([np.log(-1.0), 2.0, 3.0], dtype='float')
27 |
28 | reference_times = pd.to_datetime([
29 | '1984-10-14 00:00:00',
30 | '1984-10-14 01:00:00',
31 | '1984-10-14 02:00:00'
32 | ])
33 | times = pd.to_datetime([
34 | '1984-10-14 01:00:00',
35 | '1984-10-14 02:00:00',
36 | '1984-10-14 03:00:00'
37 | ])
38 | location = ['loc1', 'loc2', 'loc3']
39 |
40 | for idx in enumerate(times):
41 | idx = idx[0]
42 | time_array = [times[idx]]
43 | ref_time_array = [reference_times[idx]]
44 | ds_ts = xr.Dataset({'var1': ('location', vals_ts)},
45 | {'time': time_array,
46 | 'reference_time': ref_time_array,
47 | 'location': location})
48 | filename = 'timeseries_' + str(idx) + '.nc'
49 | ds_ts.to_netcdf(ts_dir.joinpath(filename))
50 | return ts_dir
51 |
52 |
53 | def test_open_wh_dataset_no_forecast(ds_timeseries):
54 | ds_paths = sorted(ds_timeseries.rglob('*.nc'))
55 | the_ds = open_wh_dataset(
56 | paths=ds_paths,
57 | chunks=None,
58 | forecast=False
59 | )
60 |
61 | the_ref_times = np.array(
62 | ['1970-01-01T00:00:00.000000000'], dtype='datetime64[ns]')
63 | assert (the_ds['reference_time'].values == the_ref_times).all()
64 |
65 | the_ds['time'].values.sort()
66 | assert np.all(the_ds['time'].values == np.array(['1984-10-14T01:00:00.000000000',
67 | '1984-10-14T02:00:00.000000000',
68 | '1984-10-14T03:00:00.000000000'],
69 | dtype='datetime64[ns]'))
70 |
71 |
72 | def test_open_wh_dataset_forecast(ds_timeseries):
73 | ds_paths = list(ds_timeseries.rglob('*.nc'))
74 | the_ds = open_wh_dataset(
75 | paths=ds_paths,
76 | chunks=None,
77 | forecast=True
78 | )
79 |
80 | the_ds['reference_time'].values.sort()
81 | assert np.all(the_ds['reference_time'].values == np.array(['1984-10-14T00:00:00.000000000',
82 | '1984-10-14T01:00:00.000000000',
83 | '1984-10-14T02:00:00.000000000'],
84 | dtype='datetime64[ns]'))
85 |
86 | the_ds['time'].values.sort()
87 | assert np.all(the_ds['time'].values == np.array(['1984-10-14T01:00:00.000000000',
88 | '1984-10-14T02:00:00.000000000',
89 | '1984-10-14T03:00:00.000000000'],
90 | dtype='datetime64[ns]'))
91 | # print(the_ds)
92 | # print(the_ds['var1'].values)
93 | # assert np.all(the_ds['var1'].values == np.array([[[1.0,2.0,3.0]]], dtype='int'))
94 |
95 |
96 | def test_wrfhydrots(ds_timeseries):
97 | ts_obj = WrfHydroTs(list(ds_timeseries.rglob('*.nc')))
98 |
99 | ts_obj_open = ts_obj.open()
100 |
101 | assert type(ts_obj_open) == xr.core.dataset.Dataset
102 | assert type(ts_obj.check_nans()) == dict
103 |
104 |
105 | def test_wrfhydrostatic(ds_timeseries):
106 |
107 | static_obj = WrfHydroStatic(list(ds_timeseries.rglob('*.nc'))[0])
108 |
109 | static_obj_open = static_obj.open()
110 |
111 | assert type(static_obj_open) == xr.core.dataset.Dataset
112 | assert type(static_obj.check_nans()) == dict
113 |
114 |
115 | def test_check_input_files(domain_dir):
116 | hrldas_namelist = JSONNamelist(domain_dir.joinpath('hrldas_namelist_patches.json'))
117 | hrldas_namelist = hrldas_namelist.get_config('nwm_ana')
118 | hydro_namelist = JSONNamelist(domain_dir.joinpath('hydro_namelist_patches.json'))
119 | hydro_namelist = hydro_namelist.get_config('nwm_ana')
120 |
121 | input_file_check = check_input_files(hrldas_namelist=hrldas_namelist,
122 | hydro_namelist=hydro_namelist,
123 | sim_dir=domain_dir)
124 | assert input_file_check is None
125 |
126 | # Alter one file to cause a false in check_input_files
127 | hydro_namelist['hydro_nlist']['geo_static_flnm'] = 'no_such_file'
128 |
129 | with pytest.raises(ValueError) as excinfo:
130 | check_input_files(hrldas_namelist=hrldas_namelist,
131 | hydro_namelist=hydro_namelist,
132 | sim_dir=domain_dir)
133 |
134 | assert str(excinfo.value) == 'The namelist file geo_static_flnm = no_such_file does not exist'
135 |
136 |
137 | def test_nwm_forcing_to_ldasin(tmpdir):
138 | tmpdir = pathlib.Path(tmpdir)
139 |
140 | def url_index_anchor_regex(url, regex=''):
141 | page = requests.get(url).text
142 | soup = BeautifulSoup(page, 'html.parser')
143 | anchors = [url + '/' + node.get('href') for
144 | node in soup.find_all('a') if re.search(regex, node.get('href'))]
145 | return anchors
146 |
147 | nwm_yesterday = (datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(days=1))
148 | nwm_yesterday = nwm_yesterday.strftime("nwm.%Y%m%d")
149 | prod_url = 'http://nomads.ncep.noaa.gov/pub/data/nccf/com/nwm/prod/' + nwm_yesterday
150 | para_url = 'http://para.nomads.ncep.noaa.gov/pub/data/nccf/com/nwm/para/' + nwm_yesterday
151 | version_dict = {
152 | # 'para': para_url,
153 | 'prod': prod_url}
154 |
155 | for version_name, model_version in version_dict.items():
156 |
157 | forcing_dirs = url_index_anchor_regex(model_version, r'^forcing_analysis_assim/$')
158 | for forcing_range in forcing_dirs:
159 |
160 | forcing_files = url_index_anchor_regex(forcing_range, r'\.nc$')
161 | for file in forcing_files:
162 | the_split = file.split('/')
163 | the_base = '/'.join(file.split('/')[(the_split.index(version_name)+1):])
164 | the_file = tmpdir.joinpath(version_name).joinpath(the_base)
165 | the_file.parent.mkdir(exist_ok=True, parents=True)
166 | the_file.touch()
167 |
168 | # The argument to nwm_forcing_dir is a list of "nwm.YYYYMMDD" dirs.
169 | ldasin_dir_list = tmpdir.joinpath(
170 | 'ldasin_' + version_name + '_from_list/' + pathlib.Path(forcing_range).name
171 | )
172 | ldasin_dir_list.mkdir(parents=True)
173 | nwm_forcing_to_ldasin(
174 | nwm_forcing_dir=[tmpdir.joinpath(version_name).joinpath(nwm_yesterday)],
175 | ldasin_dir=ldasin_dir_list,
176 | range=pathlib.Path(forcing_range).name
177 | )
178 | ldasin_list_files = sorted(ldasin_dir_list.glob('*/*'))
179 | assert len(ldasin_list_files) == len(forcing_files)
180 |
181 | # The argument to nwm_forcing_dir is a path which contains "nwm.YYYYMMDD" dirs.
182 | ldasin_dir = tmpdir.joinpath(
183 | 'ldasin_' + version_name + '/' + pathlib.Path(forcing_range).name
184 | )
185 | ldasin_dir.mkdir(parents=True)
186 | nwm_forcing_to_ldasin(
187 | nwm_forcing_dir=tmpdir.joinpath(version_name),
188 | ldasin_dir=ldasin_dir,
189 | range=pathlib.Path(forcing_range).name
190 | )
191 | ldasin_files = sorted(ldasin_dir.glob('*/*'))
192 | assert len(ldasin_files) == len(forcing_files)
193 |
--------------------------------------------------------------------------------
/wrfhydropy/tests/test_model.py:
--------------------------------------------------------------------------------
1 | import pathlib
2 | import warnings
3 |
4 | from wrfhydropy import Model
5 |
6 |
7 | def test_model_init(model_dir):
8 | model = Model(source_dir=model_dir,
9 | model_config='nwm_ana')
10 | assert type(model) == Model
11 |
12 | def test_model_setenvar(model_dir,tmpdir):
13 | model = Model(source_dir=model_dir,
14 | model_config='nwm_ana')
15 |
16 | assert model.compile_options == {
17 | "WRF_HYDRO": 1,
18 | "HYDRO_D": 0,
19 | "SPATIAL_SOIL": 1,
20 | "WRF_HYDRO_RAPID": 0,
21 | "WRFIO_NCD_LARGE_FILE_SUPPORT": 1,
22 | "NCEP_WCOSS": 0,
23 | "WRF_HYDRO_NUDGING": 1
24 | }
25 |
26 | compile_dir = pathlib.Path(tmpdir).joinpath('compile_dir_setenvar')
27 |
28 | # Compile will fail so trap axception and check compile artifacts instead
29 | try:
30 | with warnings.catch_warnings():
31 | warnings.simplefilter("ignore")
32 | model.compile(compile_dir=compile_dir)
33 | except:
34 | pass
35 |
36 | with model_dir.joinpath('compile_options.sh').open('r') as f:
37 | assert f.read() == 'export WRF_HYDRO=1\n' \
38 | 'export HYDRO_D=0\n' \
39 | 'export SPATIAL_SOIL=1\n' \
40 | 'export WRF_HYDRO_RAPID=0\n' \
41 | 'export WRFIO_NCD_LARGE_FILE_SUPPORT=1\n' \
42 | 'export NCEP_WCOSS=0\n' \
43 | 'export WRF_HYDRO_NUDGING=1\n'
44 |
45 | #model_dir=pathlib.Path('test')
46 | def test_model_compile(model_dir,tmpdir):
47 | model = Model(source_dir=model_dir,
48 | model_config='nwm_ana')
49 |
50 | compile_dir = pathlib.Path(tmpdir).joinpath('compile_dir_compile')
51 |
52 | # Compile will fail so trap exception and check compile artifacts instead
53 | try:
54 | with warnings.catch_warnings():
55 | warnings.simplefilter("ignore")
56 | model.compile(compile_dir=compile_dir)
57 | except:
58 | pass
59 |
60 | assert model.compile_log.returncode == 0
61 |
62 | def test_model_copyfiles(model_dir, tmpdir, compile_dir):
63 |
64 | model = Model(source_dir=model_dir,
65 | model_config='nwm_ana')
66 |
67 | # compile_dir = pathlib.Path(tmpdir).joinpath('compile_dir_compile')
68 | # compile_dir.mkdir(parents=True)
69 | copy_dir = pathlib.Path(tmpdir).joinpath('compile_dir_copy')
70 | copy_dir.mkdir(parents=True)
71 |
72 | # Set table files and exe file attributes
73 | model.table_files = [compile_dir.joinpath('file1.tbl'),compile_dir.joinpath('file2.tbl')]
74 | model.wrf_hydro_exe = compile_dir.joinpath('wrf_hydro.exe')
75 |
76 | # Make fake run directory with files that would have been produced at compile
77 | with model.wrf_hydro_exe.open('w') as f:
78 | f.write('#dummy exe file')
79 |
80 | for file in model.table_files:
81 | with file.open('w') as f:
82 | f.write('#dummy table file')
83 |
84 | model.copy_files(str(copy_dir))
85 |
86 | actual_files_list = list(copy_dir.glob('*'))
87 | expected_files_list = list()
88 | for file in model.table_files:
89 | expected_files_list.append(file.name)
90 | expected_files_list.append(model.wrf_hydro_exe.name)
91 |
92 | for file in actual_files_list:
93 | assert file.name in expected_files_list
94 |
--------------------------------------------------------------------------------
/wrfhydropy/tests/test_namelist.py:
--------------------------------------------------------------------------------
1 | from wrfhydropy import namelist
2 | import copy
3 | import json
4 |
5 | # Make some test dicts
6 | main_dict = {'key_1': 'value_1',
7 | 'key_2': 1,
8 | 'sub_dict1': {
9 | 'subdict1_key1': 'sub_value1',
10 | 'subdict1_key2': 2,
11 | },
12 | 'sub_dict2': {
13 | 'subdict2_key1': 1}
14 | }
15 |
16 | patch_dict = {
17 | 'sub_dict1': {
18 | 'subdict1_key1': 'patched_value'
19 | },
20 | 'key_2': 'patched_value'
21 | }
22 |
23 | # Make some test namelists
24 | main_nl = namelist.Namelist(main_dict)
25 | patch_nl = namelist.Namelist(patch_dict)
26 |
27 | def test_namelist_patch():
28 | patched_nl = main_nl.patch(patch_nl)
29 |
30 | assert patched_nl == {'key_1': 'value_1',
31 | 'key_2': 'patched_value',
32 | 'sub_dict1': {'subdict1_key1': 'patched_value', 'subdict1_key2': 2},
33 | 'sub_dict2': {'subdict2_key1': 1}}
34 |
35 | def test_namelist_write_read(tmpdir):
36 | file_path = tmpdir + '/test_nml_write_f90'
37 | # Note that for F90nml write method the first key of hte dict must have a value of a dict
38 | write_nml = namelist.Namelist({'nml1':main_nl})
39 | write_nml.write(str(file_path))
40 |
41 | read_nl = namelist.load_namelist(str(file_path))
42 |
43 | assert write_nml == read_nl, 'written namelist does not match read namelist'
44 |
45 |
46 | def test_namelist_diff():
47 | main_nl_altered = copy.deepcopy(main_nl)
48 | del main_nl_altered['key_1']
49 | main_nl_altered['sub_dict2']['subdict2_key1'] = 'altered_key1'
50 |
51 | nl_diffs = namelist.diff_namelist(main_nl,main_nl_altered)
52 |
53 | assert nl_diffs == {'type_changes':
54 | {"root['sub_dict2']['subdict2_key1']": {'old_type': int,
55 | 'new_type': str,
56 | 'old_value': 1,
57 | 'new_value': 'altered_key1'}
58 | },
59 | 'dictionary_item_removed': {"root['key_1']"}
60 | }
61 |
62 |
63 | def test_namelist_dictmerge():
64 | patched_dict = namelist.dict_merge(main_dict,patch_dict)
65 | assert patched_dict == {'key_1': 'value_1',
66 | 'key_2': 'patched_value',
67 | 'sub_dict1':
68 | {'subdict1_key1': 'patched_value', 'subdict1_key2': 2},
69 | 'sub_dict2': {'subdict2_key1': 1}
70 | }
71 |
72 | def test_namelist_jsonnamelist(tmpdir):
73 | file_path = tmpdir + '/test_json.json'
74 |
75 |
76 | json_string = json.loads('{"base":{"key1":1,"key2":"value2"},"a_config":{'
77 | '"key2":"config_value2"}}')
78 | json.dump(json_string,open(file_path,'w'))
79 |
80 | json_nl = namelist.JSONNamelist(file_path)
81 | json_nl_config = json_nl.get_config('a_config')
82 |
83 | assert json_nl_config == {'key1': 1, 'key2': 'config_value2'}
84 | assert type(json_nl_config) == namelist.Namelist
85 |
--------------------------------------------------------------------------------
/wrfhydropy/tests/test_outputdiffs.py:
--------------------------------------------------------------------------------
1 | from wrfhydropy.core.outputdiffs import compare_ncfiles, OutputDataDiffs, OutputMetaDataDiffs
2 | from wrfhydropy.core.simulation import SimulationOutput
3 | import os
4 |
5 | def test_outputdiffs_compare_ncfiles(sim_output):
6 |
7 | chrtout = list(sim_output.glob('*CHRTOUT_DOMAIN1*'))
8 | gwout = list(sim_output.glob('*GWOUT*'))
9 |
10 | assert compare_ncfiles(chrtout,chrtout) == [None,None,None]
11 | assert compare_ncfiles(chrtout,gwout) != [None,None,None]
12 |
13 |
14 | def test_outputdiffs_outputdatadiffs(sim_output):
15 |
16 | output=SimulationOutput()
17 | output.collect_output(sim_dir=sim_output)
18 |
19 | output_diffs = OutputDataDiffs(output,output)
20 | print(output_diffs.diff_counts)
21 | assert output_diffs.diff_counts == {
22 | 'channel_rt': 0, 'channel_rt_grid': 0, 'chanobs': 0,
23 | 'lakeout': 0, 'gwout': 0, 'restart_hydro': 0,
24 | 'restart_lsm': 0, 'restart_nudging': 0,
25 | 'ldasout': 0, 'rtout': 0
26 | }
27 |
28 |
29 | def test_outputdiffs_outputmetadatadiffs(sim_output):
30 |
31 | output=SimulationOutput()
32 | output.collect_output(sim_dir=sim_output)
33 |
34 | output_diffs = OutputMetaDataDiffs(output,output)
35 |
36 | assert output_diffs.diff_counts == {
37 | 'channel_rt': 0, 'chanobs': 0, 'lakeout': 0, 'gwout': 3,
38 | 'rtout': 0, 'ldasout': 0, 'restart_hydro': 0,
39 | 'restart_lsm': 0, 'restart_nudging': 0
40 | }
41 |
--------------------------------------------------------------------------------
/wrfhydropy/tests/test_schedulers_pbs.py:
--------------------------------------------------------------------------------
1 | import os
2 | import pathlib
3 | import pytest
4 |
5 | from wrfhydropy.core.schedulers import PBSCheyenne
6 | from wrfhydropy.core.job import Job
7 |
8 |
9 | @pytest.fixture(scope='function')
10 | def scheduler_regular():
11 | scheduler = PBSCheyenne(
12 | account='fake_acct',
13 | email_who='elmo',
14 | email_when='abe',
15 | nproc=216,
16 | nnodes=6,
17 | ppn=None,
18 | queue='regular',
19 | walltime="12:00:00")
20 | return scheduler
21 |
22 |
23 | @pytest.fixture(scope='function')
24 | def scheduler_shared():
25 | scheduler = PBSCheyenne(
26 | account='fake_acct',
27 | email_who='elmo',
28 | email_when='abe',
29 | nproc=216,
30 | nnodes=6,
31 | ppn=None,
32 | queue='shared',
33 | walltime="12:00:00")
34 | return scheduler
35 |
36 |
37 | def test_schedulers_pbs_solve_nodes(scheduler_regular):
38 |
39 | assert scheduler_regular.ppn == 36
40 |
41 | scheduler_regular.nproc = None
42 | scheduler_regular.nnodes = 5
43 | assert scheduler_regular.ppn == 36
44 | assert scheduler_regular.nnodes == 5
45 | assert scheduler_regular.nproc == 180
46 |
47 |
48 | expected_script_list = [
49 | '#!/bin/sh\n' ,
50 | '#PBS -N test_job_1\n' ,
51 | '#PBS -A fake_acct\n' ,
52 | '#PBS -q regular\n' ,
53 | '#PBS -M elmo\n' ,
54 | '#PBS -m abe\n' ,
55 | '\n' ,
56 | '#PBS -l walltime=12:00:00\n' ,
57 | '#PBS -l select=6:ncpus=36:mpiprocs=36\n' ,
58 | '\n' ,
59 | '# Not using PBS standard error and out files to capture model output\n' ,
60 | '# but these files might catch output and errors from the scheduler.\n' ,
61 | '#PBS -o job_test_job_1\n' ,
62 | '#PBS -e job_test_job_1\n' ,
63 | '\n' ,
64 | '# CISL suggests users set TMPDIR when running batch jobs on Cheyenne.\n' ,
65 | 'export TMPDIR=/glade/scratch/$USER/temp\n' ,
66 | 'mkdir -p $TMPDIR\n' ]
67 | # Beyond here in the script there is a system/user dependent path and exit line,
68 | # drop these and only compare on the length of this string.
69 |
70 | custom_none = {}
71 | expected_script = ''.join(expected_script_list)
72 |
73 | custom_l = {'-l': 'select=1:ncpus=36:mpiprocs=36:mem=109GB+1:ncpus=36:mpiprocs=36'}
74 | expected_script_custom_l = expected_script_list
75 | expected_script_custom_l[8] = '#PBS -l ' + custom_l['-l'] + '\n'
76 | expected_script_custom_l = ''.join(expected_script_custom_l)
77 |
78 |
79 | @pytest.mark.parametrize(
80 | ['sched', 'custom', 'expected'],
81 | [
82 | (pytest.lazy_fixture("scheduler_regular"), custom_none, expected_script),
83 | (pytest.lazy_fixture("scheduler_regular"), custom_l, expected_script_custom_l),
84 | ],
85 | ids=['no_custom', 'custom_l']
86 | )
87 | def test_schedulers_pbs_writescript(tmpdir, sched, custom, expected):
88 | job = Job(
89 | job_id='test_job_1',
90 | model_start_time='1984-10-14',
91 | model_end_time='2017-01-04',
92 | restart=False,
93 | exe_cmd='bogus exe cmd',
94 | entry_cmd='bogus entry cmd',
95 | exit_cmd='bogus exit cmd')
96 |
97 | sched.scheduler_opts['custom'] = custom
98 |
99 | os.chdir(tmpdir)
100 | job.job_dir.mkdir() # WHY IS THIS NOW NECESSARY?
101 |
102 | sched._write_job_pbs([job, job])
103 | script_path = job.job_dir.joinpath('job_' + job.job_id + '.pbs')
104 | with script_path.open(mode='r') as f:
105 | job_script = f.read()
106 |
107 | # Only comparing the first 400 lines because the last lines vary according to system
108 | len_expected = len(expected)
109 | assert job_script[0:len_expected] == expected
110 |
111 |
112 | def test_schedulers_pbs_schedule(scheduler_regular,capfd):
113 | job = Job(job_id='test_job_1',
114 | model_start_time='1984-10-14',
115 | model_end_time='2017-01-04',
116 | restart=False,
117 | exe_cmd='bogus exe cmd',
118 | entry_cmd='bogus entry cmd',
119 | exit_cmd='bogus exit cmd')
120 |
121 | try:
122 | scheduler_regular.schedule([job, job])
123 | out, err = capfd.readouterr()
124 | print(out)
125 | except:
126 | out, err = capfd.readouterr()
127 | pass
128 | assert out == "qsub_str: /bin/bash -c 'job_test_job_1=`qsub -h job_test_job_1/job_test_job_1.pbs`;" \
129 | "job_test_job_1=`qsub -W depend=afterok:${job_test_job_1} " \
130 | "job_test_job_1/job_test_job_1.pbs`;qrls ${job_test_job_1};'" \
131 | '\n'
132 |
--------------------------------------------------------------------------------
/wrfhydropy/tests/test_simulation.py:
--------------------------------------------------------------------------------
1 | import copy
2 | import deepdiff
3 | import os
4 | import pathlib
5 | import pickle
6 | import pytest
7 |
8 | from wrfhydropy.core.simulation import Simulation, SimulationOutput
9 | from wrfhydropy.core.ioutils import WrfHydroTs
10 | from wrfhydropy.core.ensemble_tools import DeepDiffEq
11 | from wrfhydropy.core.outputdiffs import check_unprocessed_diffs
12 |
13 |
14 | def test_simulation_add_model_domain(model, domain):
15 | sim = Simulation()
16 | sim.add(model)
17 | sim.add(domain)
18 |
19 | assert sim.base_hydro_namelist == \
20 | {'hydro_nlist':
21 | {
22 | 'channel_option': 2,
23 | 'chanobs_domain': 0,
24 | 'chanrtswcrt': 1,
25 | 'chrtout_domain': 1,
26 | 'geo_static_flnm': './NWM/DOMAIN/geo_em.d01.nc',
27 | 'restart_file': './NWM/RESTART/HYDRO_RST.2011-08-26_00:00_DOMAIN1',
28 | 'aggfactrt': 4,
29 | 'udmp_opt': 1,
30 | 'out_dt': 1440,
31 | 'rst_dt': 1440
32 | },
33 | 'nudging_nlist': {
34 | 'maxagepairsbiaspersist': 3,
35 | 'minnumpairsbiaspersist': 1,
36 | 'nudginglastobsfile':
37 | './NWM/RESTART/nudgingLastObs.2011-08-26_00:00:00.nc'
38 | }
39 | }
40 |
41 | assert sim.base_hrldas_namelist == \
42 | {'noahlsm_offline':
43 | {
44 | 'btr_option': 1,
45 | 'canopy_stomatal_resistance_option': 1,
46 | 'hrldas_setup_file': './NWM/DOMAIN/wrfinput_d01.nc',
47 | 'restart_filename_requested':
48 | './NWM/RESTART/RESTART.2011082600_DOMAIN1',
49 | 'indir': './FORCING',
50 | 'output_timestep': 86400,
51 | 'restart_frequency_hours': 24
52 | },
53 | 'wrf_hydro_offline': {'forc_typ': 1}
54 | }
55 |
56 |
57 | def test_simulation_add_job(model, domain, job):
58 | sim = Simulation()
59 | with pytest.raises(Exception) as e_info:
60 | sim.add(job)
61 |
62 | sim.add(model)
63 | sim.add(domain)
64 | sim.add(job)
65 |
66 |
67 | def test_simulation_compose(model, domain, job, capfd, tmpdir):
68 |
69 | sim = Simulation()
70 | sim.add(model)
71 | sim.add(domain)
72 | sim.add(job)
73 |
74 | # copy before compose
75 | sim_opts = copy.deepcopy(sim)
76 | sim_tbls = copy.deepcopy(sim)
77 |
78 | compose_dir = pathlib.Path(tmpdir).joinpath('sim_compose')
79 | os.mkdir(str(compose_dir))
80 | os.chdir(str(compose_dir))
81 |
82 | sim.compose()
83 |
84 | # Doing this thrice kinda asks for function...
85 | # This compose exercises the options to compose. Gives the same result.
86 | compose_dir_opts = pathlib.Path(tmpdir).joinpath('sim_compose_opts')
87 | os.mkdir(str(compose_dir_opts))
88 | os.chdir(str(compose_dir_opts))
89 |
90 | sim_opts.compose(
91 | symlink_domain=False,
92 | force=True,
93 | check_nlst_warn=True
94 | )
95 |
96 | actual_files = list(compose_dir.rglob('./*'))
97 | domain_files = domain.domain_top_dir.rglob('*')
98 | expected_files = [
99 | 'namelist.hrldas',
100 | 'hydro.namelist',
101 | 'job_test_job_1',
102 | '.uid',
103 | 'NWM',
104 | 'WrfHydroModel.pkl',
105 | 'FORCING',
106 | 'DUMMY.TBL',
107 | 'wrf_hydro.exe'
108 | ]
109 |
110 | for file in domain_files:
111 | expected_files.append(file.name)
112 |
113 | for file in actual_files:
114 | assert file.name in expected_files
115 |
116 | assert sim.model.table_files == sim_opts.model.table_files
117 | assert [str(ff.name) for ff in sim.model.table_files] == ['DUMMY.TBL']
118 |
119 | # These composes result in alternative, user selected table files.
120 | # Do it before and after model.compile()
121 | sim_tbls_postcompile = copy.deepcopy(sim_tbls)
122 |
123 | dummy_user_tbl = pathlib.Path(tmpdir).joinpath('DUMMY_USER.TBL')
124 | with dummy_user_tbl.open('w') as f:
125 | f.write('# dummy TBL \n')
126 |
127 | compose_dir_tbls = pathlib.Path(tmpdir).joinpath('sim_compose_tbls')
128 | os.mkdir(str(compose_dir_tbls))
129 | os.chdir(str(compose_dir_tbls))
130 | # before compile
131 | sim_tbls.model.table_files = [dummy_user_tbl]
132 | sim_tbls.compose()
133 |
134 | compose_dir_tbls_postcompile = pathlib.Path(tmpdir).joinpath('sim_compose_tbls_postcompile')
135 | compile_dir_tbls_postcompile = pathlib.Path(tmpdir).joinpath('sim_compile_tbls_postcompile')
136 | os.mkdir(str(compose_dir_tbls_postcompile))
137 | os.chdir(str(compose_dir_tbls_postcompile))
138 | sim_tbls_postcompile.model.compile(compile_dir_tbls_postcompile)
139 | sim_tbls_postcompile.model.table_files = [dummy_user_tbl]
140 | sim_tbls_postcompile.compose()
141 |
142 | assert sim_tbls.model.table_files == sim_tbls_postcompile.model.table_files
143 | assert sim_tbls.model.table_files == [dummy_user_tbl]
144 |
145 | actual_files = list(compose_dir_tbls.rglob('./*'))
146 | domain_files = domain.domain_top_dir.rglob('*')
147 | expected_files = [
148 | 'namelist.hrldas',
149 | 'hydro.namelist',
150 | 'job_test_job_1',
151 | '.uid',
152 | 'NWM',
153 | 'WrfHydroModel.pkl',
154 | 'FORCING',
155 | 'DUMMY_USER.TBL',
156 | 'wrf_hydro.exe'
157 | ]
158 |
159 | for file in domain_files:
160 | expected_files.append(file.name)
161 |
162 | for file in actual_files:
163 | assert file.name in expected_files
164 |
165 |
166 | def test_simulation_run_no_scheduler(model, domain, job, tmpdir, capfd):
167 | sim = Simulation()
168 | sim.add(model)
169 | sim.add(domain)
170 | sim.add(job)
171 |
172 | compose_dir = pathlib.Path(tmpdir).joinpath('sim_run_no_sched')
173 | os.mkdir(str(compose_dir))
174 | os.chdir(str(compose_dir))
175 |
176 | sim.compose()
177 | sim.run()
178 | assert sim.jobs[0].exit_status == 0, \
179 | "The job did not exit successfully."
180 |
181 |
182 | def test_simulation_collect(sim_output):
183 | sim = Simulation()
184 | sim.collect(sim_dir=sim_output)
185 | assert sim.output is not None
186 | assert type(sim.output) is SimulationOutput
187 |
188 |
189 | def test_simulation_output_checknans(sim_output):
190 | output = SimulationOutput()
191 | output.collect_output(sim_dir=sim_output)
192 | public_atts = [att for att in dir(output) if not att.startswith('__')]
193 | for att in public_atts:
194 | assert getattr(output, att) is not None
195 | assert output.check_output_nans() is None
196 |
197 |
198 | def test_simulation_pickle(model, domain, job, tmpdir):
199 | sim = Simulation()
200 | sim.add(model)
201 | sim.add(domain)
202 | sim.add(job)
203 | pickle_path = pathlib.Path(tmpdir).joinpath('Sim.pkl')
204 | sim.pickle(pickle_path)
205 | sim0 = copy.deepcopy(sim)
206 | del sim
207 | sim = pickle.load(pickle_path.open(mode='rb'))
208 |
209 | sim_diff = deepdiff.DeepDiff(sim, sim0)
210 | unprocessed_diffs = sim_diff.pop('unprocessed', [])
211 | if unprocessed_diffs:
212 | check_unprocessed_diffs(unprocessed_diffs)
213 | assert sim_diff == {}
214 |
215 |
216 | def test_simulation_sub_obj_pickle(model, domain, job, tmpdir):
217 | sim = Simulation()
218 | sim.add(model)
219 | sim.add(domain)
220 | sim.add(job)
221 |
222 | os.chdir(tmpdir)
223 | domain_path = pathlib.Path(tmpdir).joinpath('WrfHydroDomain.pkl')
224 | model_path = pathlib.Path(tmpdir).joinpath('WrfHydroModel.pkl')
225 | sim.pickle_sub_objs()
226 | assert sim.domain.resolve() == domain_path
227 | assert sim.model.resolve() == model_path
228 |
229 | sim.restore_sub_objs()
230 | domain_diff = deepdiff.DeepDiff(sim.domain, domain)
231 | unprocessed_diffs = domain_diff.pop('unprocessed', [])
232 | if unprocessed_diffs:
233 | check_unprocessed_diffs(unprocessed_diffs)
234 | assert domain_diff == {}
235 |
236 | model_diff = deepdiff.DeepDiff(sim.model, model)
237 | unprocessed_diffs = model_diff.pop('unprocessed', [])
238 | if unprocessed_diffs:
239 | check_unprocessed_diffs(unprocessed_diffs)
240 | assert model_diff == {}
241 |
--------------------------------------------------------------------------------
/wrfhydropy/tests/test_utils.py:
--------------------------------------------------------------------------------
1 | import os
2 | import pathlib
3 | import pytest
4 |
5 | from wrfhydropy.util.xrcmp import xrcmp
6 | from wrfhydropy.util.xrnan import xrnan
7 |
8 | test_dir = pathlib.Path(os.path.dirname(os.path.realpath(__file__)))
9 | collection_data_dir = test_dir / 'data/collection_data/simulation'
10 | nan_na_data_dir = test_dir / 'data/nan_na_data'
11 |
12 |
13 | @pytest.mark.parametrize(
14 | ['filename'],
15 | [
16 | ('201108260100.CHANOBS_DOMAIN1',),
17 | ('201108260100.CHRTOUT_DOMAIN1',),
18 | ('201108260100.GWOUT_DOMAIN1',),
19 | ('201108260100.LAKEOUT_DOMAIN1',),
20 | ('201108260100.LDASOUT_DOMAIN1',),
21 | ('201108260100.LSMOUT_DOMAIN',),
22 | ('201108260100.RTOUT_DOMAIN1',),
23 | ('HYDRO_RST.2011-08-26_01:00_DOMAIN1',),
24 | ('nudgingLastObs.2011-08-26_01:00:00.nc',),
25 | ('RESTART.2011082601_DOMAIN1',),
26 | ],
27 | ids=[
28 | 'xrcmp-equals-CHANOBS',
29 | 'xrcmp-equals-CHRTOUT',
30 | 'xrcmp-equals-GWOUT',
31 | 'xrcmp-equals-LAKEOUT',
32 | 'xrcmp-equals-LDASOUT',
33 | 'xrcmp-equals-LSMOUT',
34 | 'xrcmp-equals-RTOUT',
35 | 'xrcmp-equals-HYDRO_RST',
36 | 'xrcmp-equals-nudginglastobs',
37 | 'xrcmp-equals-RESTART',
38 | ]
39 | )
40 | def test_xrcmp_eq(filename, tmpdir):
41 | file_path = test_dir.joinpath(collection_data_dir)
42 | the_file = file_path.joinpath(filename)
43 | log_file = pathlib.Path(tmpdir).joinpath('log.txt')
44 | result = xrcmp(the_file, the_file, log_file)
45 | assert result == 0
46 |
47 |
48 | @pytest.mark.parametrize(
49 | ['filename1', 'filename2'],
50 | [
51 | ('201108260100.CHANOBS_DOMAIN1', '201108260200.CHANOBS_DOMAIN1'),
52 | ('201108260100.CHRTOUT_DOMAIN1', '201108260200.CHRTOUT_DOMAIN1'),
53 | ('201108260100.GWOUT_DOMAIN1', '201108260200.GWOUT_DOMAIN1'),
54 | ('201108260100.LAKEOUT_DOMAIN1', '201108260200.LAKEOUT_DOMAIN1'),
55 | # ('201108260100.LDASOUT_DOMAIN1', '201108260200.LDASOUT_DOMAIN1'),
56 | # ('201108260100.LSMOUT_DOMAIN', '201108260200.LSMOUT_DOMAIN'),
57 | # ('201108260100.RTOUT_DOMAIN1', '201108260200.RTOUT_DOMAIN1'),
58 | ('HYDRO_RST.2011-08-26_01:00_DOMAIN1', 'HYDRO_RST.2011-08-26_02:00_DOMAIN1'),
59 | # ('nudgingLastObs.2011-08-26_01:00:00.nc', 'nudgingLastObs.2011-08-26_02:00:00.nc'),
60 | # ('RESTART.2011082601_DOMAIN1', 'RESTART.2011082602_DOMAIN1'),
61 | ],
62 | ids=[
63 | 'xrcmp-unequal-CHANOBS',
64 | 'xrcmp-unequal-CHRTOUT',
65 | 'xrcmp-unequal-GWOUT',
66 | 'xrcmp-unequal-LAKEOUT',
67 | # 'xrcmp-unequal-LDASOUT',
68 | # 'xrcmp-unequal-LSMOUT',
69 | # 'xrcmp-unequal-RTOUT',
70 | 'xrcmp-unequal-HYDRO_RST',
71 | # 'xrcmp-unequal-nudginglastobs', # identical data is the problem
72 | # 'xrcmp-unequal-RESTART',
73 | ]
74 | )
75 | def test_xrcmp_uneq(filename1, filename2, tmpdir):
76 | file_path = test_dir.joinpath(collection_data_dir)
77 | the_file1 = file_path.joinpath(filename1)
78 | the_file2 = file_path.joinpath(filename2)
79 | log_file = pathlib.Path(tmpdir).joinpath('log.txt')
80 | result = xrcmp(the_file1, the_file2, log_file)
81 | assert result == 1
82 |
83 |
84 | @pytest.mark.parametrize(
85 | ['filename', 'expected'],
86 | [
87 | ('201108260200.CHANOBS_DOMAIN1', None),
88 | ('201108260200.CHRTOUT_DOMAIN1', None),
89 | ('201108260200.GWOUT_DOMAIN1', None),
90 | ('201108260200.LAKEOUT_DOMAIN1', None),
91 | ('201108260200.LDASOUT_DOMAIN1', None),
92 | ('201108260200.LSMOUT_DOMAIN', None),
93 | ('201108260200.RTOUT_DOMAIN1', None),
94 | ('HYDRO_RST.2011-08-26_02:00_DOMAIN1', None),
95 | ('nudgingLastObs.2011-08-26_02:00:00.nc', None),
96 | ('RESTART.2011082602_DOMAIN1', None),
97 | ],
98 | ids=[
99 | 'xrnan-CHANOBS',
100 | 'xrnan-CHRTOUT',
101 | 'xrnan-GWOUT',
102 | 'xrnan-LAKEOUT',
103 | 'xrnan-LDASOUT',
104 | 'xrnan-LSMOUT',
105 | 'xrnan-RTOUT',
106 | 'xrnan-HYDRO_RST',
107 | 'xrnan-nudginglastobs',
108 | 'xrnan-RESTART',
109 | ]
110 | )
111 | def test_xrnan_none(filename, expected, tmpdir):
112 | # Perhaps this test is extraneous?
113 | # Right now only have real data on hand without NaNs.
114 | file_path = test_dir.joinpath(collection_data_dir)
115 | the_file = file_path.joinpath(filename)
116 | result = xrnan(the_file)
117 | assert result is expected
118 |
119 |
120 | @pytest.mark.parametrize(
121 | ['filename', 'expected'],
122 | [
123 | ('fill_value.nc', 'None'),
124 | ('nan_fill.nc', "{'vars': ['some_var']}"),
125 | ('nan_value.nc', "{'vars': ['some_var']}"),
126 | ('value_value.nc', 'None'),
127 | ],
128 | ids=[
129 | 'xrnan-fill_value',
130 | 'xrnan-nan_fill',
131 | 'xrnan-nan_value',
132 | 'xrnan-value_value',
133 | ]
134 | )
135 | def test_xrnan_matrix(filename, expected, tmpdir):
136 | file_path = test_dir.joinpath(nan_na_data_dir)
137 | the_file = file_path.joinpath(filename)
138 | result = xrnan(the_file)
139 | assert repr(result) == expected
140 |
--------------------------------------------------------------------------------
/wrfhydropy/util/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NCAR/wrf_hydro_py/003f73f7efb2d38114794ae14daac702e59edc24/wrfhydropy/util/__init__.py
--------------------------------------------------------------------------------
/wrfhydropy/util/xrcmp.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | # Example Usage
4 | # ipython --pdb xrcmp.py -- \
5 | # --candidate conus_test/201806012300.RTOUT_DOMAIN1 \
6 | # --reference conus_test/201806020000.RTOUT_DOMAIN1 \
7 | # --n_cores 8 \
8 | # --log_file log.txt
9 |
10 | import math
11 | from multiprocessing import Pool
12 | import pathlib
13 | import sys
14 | # import time
15 | import xarray as xr
16 |
17 |
18 | # A dictionary of chunks for various variables for CONUS testing
19 | # These are for the larger fields which need some control
20 | conus_chunks_dict = {
21 | # RTOUT variables to control
22 | 'SOIL_M': {}, # with {} maxes out at < 18% memory when files do NOT match
23 | # HYDRO_RST variables: None currently
24 | }
25 |
26 |
27 | # # A decorator/closure to check timings.
28 | # def stopwatch(the_func):
29 | # def the_closure(*args, **kw):
30 | # ts = time.time()
31 | # result = the_func(*args, **kw)
32 | # te = time.time()
33 | # print('Timing: ' + the_func.__name__ + ' took ', round(te - ts, 2),' seconds.')
34 | # return result
35 | # return the_closure
36 |
37 |
38 | def calc_stats(arg_tuple):
39 | key = arg_tuple[0]
40 | can_file = arg_tuple[1]
41 | ref_file = arg_tuple[2]
42 | chunks = arg_tuple[3]
43 | exclude_vars = arg_tuple[4]
44 |
45 | # ignore excluded vars
46 | if key in exclude_vars:
47 | return None
48 |
49 | if chunks is None:
50 | chunks = {} # default is no chunks
51 | if key in conus_chunks_dict:
52 | chunks = conus_chunks_dict[key]
53 |
54 | can_ds = xr.open_dataset(can_file, chunks=chunks, mask_and_scale=False)
55 | ref_ds = xr.open_dataset(ref_file, chunks=chunks, mask_and_scale=False)
56 |
57 | # Check for variables in reference and not in candidate?
58 | # Check for variables in candidate and not in reference?
59 |
60 | if can_ds[key].equals(ref_ds[key]):
61 | return None
62 |
63 | else:
64 | cc = can_ds[key]
65 | rr = ref_ds[key]
66 |
67 | if '|S' in str(cc.dtype):
68 |
69 | # Deal with strings
70 | nz_xr = cc.where(cc != rr, drop=True)
71 | if len(nz_xr) == 0:
72 | return None
73 | else:
74 | the_count = nz_xr.count().load().item(0)
75 | inf = float('inf')
76 | result = {
77 | 'Variable': key,
78 | 'Count': the_count,
79 | 'Sum': inf,
80 | 'Min': inf,
81 | 'Max': inf,
82 | 'Range': inf,
83 | 'Mean': inf,
84 | 'StdDev': inf
85 | }
86 | return result
87 |
88 | else:
89 | # All non-string types
90 | cc = cc.astype(float)
91 | rr = rr.astype(float)
92 |
93 | # THIS NEEDS REMOVED AFTER TESTING IS COMPLETE
94 | # FOR convenience of comparing two files at different times.
95 | # if 'time' in rr.coords:
96 | # rr['time'] = cc.time
97 | # if key == 'time':
98 | # rr.values = cc.values
99 |
100 | diff_da = cc - rr
101 | diff_xr = xr.DataArray(diff_da.compute())
102 | # TODO: This threshold should be type dependent
103 | nz_xr = diff_xr.where(abs(diff_xr) > 0.000000, drop=True)
104 | if len(nz_xr) == 0:
105 | return None
106 |
107 | the_count = nz_xr.count().load().item(0)
108 | the_sum = nz_xr.sum().load().item(0)
109 | the_min = nz_xr.min().load().item(0)
110 | the_max = nz_xr.max().load().item(0)
111 | the_range = the_max - the_min
112 | the_mean = the_sum / the_count
113 | the_z = (nz_xr - the_mean)
114 | the_std = math.sqrt((the_z * the_z).sum() / the_count)
115 | del the_z
116 |
117 | result = {
118 | 'Variable': key,
119 | 'Count': the_count,
120 | 'Sum': the_sum,
121 | 'Min': the_min,
122 | 'Max': the_max,
123 | 'Range': the_range,
124 | 'Mean': the_mean,
125 | 'StdDev': the_std
126 | }
127 | return result
128 |
129 |
130 | # @stopwatch
131 | def xrcmp(
132 | can_file: str,
133 | ref_file: str,
134 | log_file: str,
135 | n_cores: int = 1,
136 | chunks={},
137 | exclude_vars: list = [],
138 | ) -> int:
139 |
140 | if exclude_vars is None:
141 | exclude_vars = []
142 |
143 | # Delete log file first
144 | # Should write a log file that says nothing yet determined?
145 | log_file = pathlib.Path(log_file)
146 | if log_file.exists():
147 | log_file.unlink()
148 |
149 | # Dont chunk, this is just a meta-data read.
150 | can_ds = xr.open_dataset(can_file)
151 | ref_ds = xr.open_dataset(ref_file)
152 |
153 | # May need to check that they have the same vars.
154 | can_vars = set([kk for kk in can_ds.variables.keys()])
155 | ref_vars = set([kk for kk in ref_ds.variables.keys()])
156 | have_same_variables = can_vars.difference(ref_vars) == set([])
157 | can_ds.close() # These are likely critical to the success
158 | ref_ds.close() # of multiprocessing
159 |
160 | # TODO: Check that the meta data matches
161 |
162 | # This is quick if not true
163 | # ds_equal = can_ds.equals(re_ds)
164 | # if not ds_equal:
165 |
166 | if n_cores == 1:
167 | all_stats_list = []
168 | for key, val in can_ds.items():
169 | result = calc_stats(
170 | (key, can_file, ref_file, chunks, exclude_vars))
171 | all_stats_list.append(result)
172 | else:
173 | the_args = [
174 | (key, can_file, ref_file, chunks, exclude_vars) for key in can_ds.keys()]
175 | with Pool(n_cores) as pool:
176 | all_stats_list = pool.map(calc_stats, the_args)
177 |
178 | all_stats = {item['Variable']: item for item in all_stats_list if item is not None}
179 |
180 | diff_var_names = sorted(all_stats.keys())
181 | if not diff_var_names:
182 | with open(log_file, 'w') as opened_file:
183 | opened_file.write("Files are identical\n")
184 | return 0
185 |
186 | # Formatting:
187 |
188 | # The goal is to print something like this which is what nccmp outputs.
189 | # channel_rt
190 | # Variable Group Count Sum ... Max Range Mean StdDev
191 | # 0 streamflow / 162 0.003022 ... 0.003832 0.004315 0.000019 0.000361
192 | # 1 nudge / 4 -0.001094 ... 0.000093 0.001272 -0.000274 0.000605
193 | # 2 q_lateral / 170 0.000345 ... 0.000700 0.001145 0.000002 0.000086
194 | # 3 velocity / 165 0.010788 ... 0.005488 0.006231 0.000065 0.000503
195 | # 4 Head / 177 0.002717 ... 0.002662 0.003292 0.000015 0.000258
196 |
197 | stat_names = sorted(all_stats[diff_var_names[0]].keys())
198 | stat_lens = {} # the length/width of each column/stat
199 | n_dec = 3 # number of decimals for floats
200 | n_dec_p = n_dec + 1 # plus the decimal point
201 |
202 | # The format for each type, where full_len sepcifices the width of the field.
203 | type_fmt = {
204 | str: '{{:{full_len}}}',
205 | int: '{{:{full_len}}}',
206 | float: '{{:{full_len}.' + str(n_dec) + 'f}}'
207 | }
208 |
209 | # Now solve the full_len field widths for all stats. Do this by
210 | # just formatting each as it's type and finding the max (best way
211 | # to handle negatives). For floats, take the integer part to find
212 | # its length to the left of the decimal.
213 | for stat_name in stat_names:
214 | all_lens = []
215 | for key, val in all_stats.items():
216 | the_val = val[stat_name]
217 | the_type = type(the_val)
218 | the_fmt0 = type_fmt[the_type]
219 | if the_type is str:
220 | full_len = len(the_val)
221 | elif not math.isfinite(the_val):
222 | full_len = len(str(the_val))
223 | else:
224 | full_len = len(str(int(the_val)))
225 | if the_type is float:
226 | full_len = full_len + n_dec_p
227 | the_fmt = the_fmt0.format(**{'full_len': full_len})
228 | the_string = the_fmt.format(*[the_val])
229 | all_lens.append(len(the_string))
230 |
231 | stat_lens[stat_name] = max(all_lens)
232 |
233 | header_string = (
234 | '{Variable:>' + str(stat_lens['Variable']) + '} '
235 | '{Count:>' + str(stat_lens['Count']) + '} '
236 | '{Sum:>' + str(stat_lens['Sum']) + '} '
237 | '{Min:>' + str(stat_lens['Min']) + '} '
238 | '{Max:>' + str(stat_lens['Max']) + '} '
239 | '{Range:>' + str(stat_lens['Range']) + '} '
240 | '{Mean:>' + str(stat_lens['Mean']) + '} '
241 | '{StdDev:>' + str(stat_lens['StdDev']) + '} \n'
242 | )
243 |
244 | var_string = (
245 | '{Variable:>' + str(stat_lens['Variable']) + '} '
246 | '{Count:>' + str(stat_lens['Count']) + '} '
247 | '{Sum:>' + str(stat_lens['Sum']) + '.' + str(n_dec) + 'f} '
248 | '{Min:>' + str(stat_lens['Min']) + '.' + str(n_dec) + 'f} '
249 | '{Max:>' + str(stat_lens['Max']) + '.' + str(n_dec) + 'f} '
250 | '{Range:>' + str(stat_lens['Range']) + '.' + str(n_dec) + 'f} '
251 | '{Mean:>' + str(stat_lens['Mean']) + '.' + str(n_dec) + 'f} '
252 | '{StdDev:>' + str(stat_lens['StdDev']) + '.' + str(n_dec) + 'f} \n'
253 | )
254 |
255 | header_dict = {name: name for name in stat_names}
256 | the_header = header_string.format(**header_dict)
257 |
258 | with open(log_file, 'w') as opened_file:
259 | opened_file.write(the_header)
260 | for key in all_stats.keys():
261 | opened_file.write(var_string.format(**all_stats[key]))
262 |
263 | return 1
264 |
265 |
266 | def parse_arguments():
267 |
268 | import argparse
269 | parser = argparse.ArgumentParser()
270 | parser.add_argument(
271 | "--candidate", metavar="FILE", type=str, required=True,
272 | help="Candidate file to compare."
273 | )
274 | parser.add_argument(
275 | "--reference", metavar="FILE", type=str, required=True,
276 | help="Reference file to compare."
277 | )
278 | parser.add_argument(
279 | "--log_file", metavar="FILE", type=str, required=True,
280 | help="File to log potential differences to. "
281 | "Existing file is clobbered."
282 | )
283 | parser.add_argument(
284 | "--n_cores", metavar="n_cores", type=int, required=False,
285 | default=1,
286 | help="The number of processors to use."
287 | )
288 | parser.add_argument(
289 | "--chunks", metavar="chunks", type=int, required=False,
290 | default=1,
291 | help="Chunks as integer."
292 | )
293 | args = parser.parse_args()
294 | can_file = args.candidate
295 | ref_file = args.reference
296 | log_file = args.log_file
297 | chunks = args.chunks
298 | n_cores = args.n_cores
299 |
300 | if chunks == 1:
301 | chunks = {} # No chunking
302 | elif chunks == 0:
303 | chunks = None # This will use the conus_chunks_dict
304 |
305 | return can_file, ref_file, log_file, chunks, n_cores
306 |
307 |
308 | if __name__ == "__main__":
309 |
310 | can_file, ref_file, log_file, chunks, n_cores = parse_arguments()
311 | ret = xrcmp(
312 | can_file=can_file,
313 | ref_file=ref_file,
314 | log_file=log_file,
315 | n_cores=n_cores,
316 | chunks=chunks
317 | )
318 | sys.exit(ret)
319 |
--------------------------------------------------------------------------------
/wrfhydropy/util/xrnan.py:
--------------------------------------------------------------------------------
1 | from multiprocessing import Pool
2 | import pathlib
3 | import sys
4 | from typing import Union
5 | import xarray as xr
6 |
7 |
8 | def check_nans(arg_dict):
9 | var_name = arg_dict['var_name']
10 | if 'path' in arg_dict.keys():
11 | path = arg_dict['path']
12 | ds = xr.open_dataset(path, mask_and_scale=False)
13 | else:
14 | ds = arg_dict['ds']
15 | if ds[var_name].isnull().any().values:
16 | return var_name
17 | else:
18 | return None
19 |
20 |
21 | def xrnan(
22 | dataset_or_path: Union[str, pathlib.Path, xr.Dataset],
23 | log_file: str = None,
24 | exclude_vars: list = [],
25 | chunks=None,
26 | n_cores: int = 1
27 | ) -> int:
28 | # Set filepath to strings
29 | if not isinstance(dataset_or_path, xr.Dataset):
30 | ds = xr.open_dataset(str(dataset_or_path), mask_and_scale=False, chunks=chunks)
31 | else:
32 | ds = dataset_or_path
33 |
34 | # Looping on variables is much faster for small applications and parallelizable
35 | # for larger ones.
36 | if n_cores < 2 or isinstance(dataset_or_path, xr.Dataset):
37 | nan_vars = []
38 | for var_name in ds.variables:
39 | nan_vars.append(check_nans({'var_name': var_name, 'ds': ds}))
40 | ds.close()
41 | else:
42 | # The following ds.close() is CRITICAL to the correct results being returned by
43 | # multiprocessing
44 | ds.close()
45 | the_args = [{'var_name': var_name, 'path': dataset_or_path}
46 | for var_name in ds.variables]
47 | with Pool(n_cores) as pool:
48 | nan_vars = pool.map(check_nans, the_args)
49 |
50 | nan_vars_2 = [var for var in nan_vars if var is not None]
51 |
52 | if len(nan_vars_2) == 0:
53 | return None
54 | else:
55 | for var in nan_vars_2:
56 | print(str(dataset_or_path), ': variable "' + var + ''
57 | '' + '" contains NaNs')
58 | return {'vars': nan_vars_2}
59 |
60 |
61 | def parse_arguments():
62 |
63 | import argparse
64 | parser = argparse.ArgumentParser()
65 | parser.add_argument(
66 | "--path", metavar="FILE", type=str, required=True,
67 | help="File to check for NaNs."
68 | )
69 | parser.add_argument(
70 | "--log_file", metavar="FILE", type=str, required=True,
71 | help="File to log potential differences to. "
72 | "Existing file is clobbered."
73 | )
74 | args = parser.parse_args()
75 | path = args.path
76 | log_file = args.log_file
77 | return path, log_file
78 |
79 |
80 | if __name__ == "__main__":
81 |
82 | path, log_file = parse_arguments()
83 | ret = xrnan(path, log_file=log_file)
84 | if ret is None:
85 | exit_code = 0
86 | else:
87 | exit_code = 1
88 | sys.exit(exit_code)
89 |
--------------------------------------------------------------------------------