├── .coveragerc ├── .github └── workflows │ └── ci.yaml ├── .gitignore ├── .pep8speaks.yml ├── .travis.yml ├── README.md ├── doc ├── .gitignore ├── Makefile ├── conf.py ├── evaluation.rst ├── examples.rst ├── examples │ ├── ex_01_end_to_end.ipynb │ ├── ex_02_docker_jupyter.ipynb │ ├── ex_03_cycle_simulation.ipynb │ └── ex_04_ensembles.ipynb ├── index.rst ├── installation.rst ├── model_api.rst ├── output.rst ├── requirements.txt ├── source │ ├── modules.rst │ ├── wrfhydropy.core.rst │ └── wrfhydropy.rst ├── utilities.rst └── what-and-why.rst ├── readthedocs.yml ├── requirements.txt ├── setup.py ├── whp_test_env.yml └── wrfhydropy ├── __init__.py ├── core ├── __init__.py ├── collection.py ├── cycle.py ├── domain.py ├── ensemble.py ├── ensemble_tools.py ├── evaluation.py ├── ioutils.py ├── job.py ├── model.py ├── namelist.py ├── outputdiffs.py ├── schedulers.py ├── simulation.py └── teams.py ├── data └── flood_thresholds_to_nc_w_qc.py ├── tests ├── .coveragerc ├── .gitignore ├── __init__.py ├── conftest.py ├── data │ ├── .gitignore │ ├── __init__.py │ ├── collection_data_download.py │ ├── collection_data_recipe.py │ ├── evaluation_answer_reprs.py │ ├── gdrive_download.py │ ├── nan_na_data │ │ ├── fill_value.nc │ │ ├── nan_fill.nc │ │ ├── nan_value.nc │ │ └── value_value.nc │ ├── nan_na_files_recipe.py │ └── nodefile_pbs_example_copy.txt ├── test_collection.py ├── test_cycle.py ├── test_domain.py ├── test_ensemble.py ├── test_evaluation.py ├── test_ioutils.py ├── test_job.py ├── test_model.py ├── test_namelist.py ├── test_outputdiffs.py ├── test_schedulers_pbs.py ├── test_simulation.py └── test_utils.py └── util ├── __init__.py ├── xrcmp.py └── xrnan.py /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | omit = 3 | */data/* 4 | -------------------------------------------------------------------------------- /.github/workflows/ci.yaml: -------------------------------------------------------------------------------- 1 | name: CI 2 | on: 3 | push: 4 | branches: 5 | - "*" 6 | pull_request: 7 | branches: 8 | - "*" 9 | 10 | jobs: 11 | 12 | wrfhydropy_setup: 13 | name: standard installation 14 | runs-on: ubuntu-latest 15 | strategy: 16 | fail-fast: false 17 | defaults: 18 | run: 19 | shell: bash 20 | steps: 21 | - name: Checkout repo 22 | uses: actions/checkout@v4 23 | 24 | #- name: Set environment variables 25 | # run: | 26 | 27 | - name: Setup Python 28 | uses: actions/setup-python@v5 29 | with: 30 | python-version: "3.11" 31 | 32 | - name: Upgrade pip and install build and twine 33 | run: | 34 | python -m pip install --upgrade pip 35 | pip install wheel build twine 36 | 37 | - name: Base installation 38 | run: | 39 | pip --verbose install . 40 | 41 | # - name: Print pyhmn version 42 | # run: | 43 | # python -c "import wrfhydropy; print(wrfhydropy.__version__)" 44 | 45 | # - name: Build wrfhydropy, check dist outputs 46 | # run: | 47 | # python -m build 48 | # twine check --strict dist/* 49 | 50 | # wrfhydropy_lint: 51 | # name: linting 52 | # runs-on: ubuntu-latest 53 | # strategy: 54 | # fail-fast: false 55 | # defaults: 56 | # run: 57 | # shell: bash 58 | # steps: 59 | # - name: Checkout repo 60 | # uses: actions/checkout@v3 61 | 62 | # - name: Setup Python 63 | # uses: actions/setup-python@v5 64 | # with: 65 | # python-version: 3.10 66 | 67 | # - name: Install dependencies 68 | # run: | 69 | # pip install wheel 70 | # pip install -r ./ci/requirements/environment.txt 71 | 72 | # - name: Version info 73 | # run: | 74 | # pip -V 75 | # pip list 76 | 77 | # - name: Run isort 78 | # run: | 79 | # echo "if isort check fails update isort using" 80 | # echo " pip install isort --upgrade" 81 | # echo "and run" 82 | # echo " isort ./wrfhydropy ./autotest" 83 | # echo "and then commit the changes." 84 | # isort --check --diff ./wrfhydropy 85 | 86 | # - name: Run black 87 | # run: | 88 | # echo "if black check fails update black using" 89 | # echo " pip install black --upgrade" 90 | # echo "and run" 91 | # echo " black ./wrfhydropy ./autotest" 92 | # echo "and then commit the changes." 93 | # black --check --diff ./wrfhydropy 94 | 95 | # - name: Run flake8 96 | # run: | 97 | # flake8 --count --show-source --exit-zero ./wrfhydropy ./autotest 98 | 99 | # - name: Run pylint 100 | # run: | 101 | # pylint --jobs=2 --errors-only --exit-zero ./wrfhydropy ./autotest 102 | 103 | test: 104 | name: ${{ matrix.os}} py${{ matrix.python-version }} 105 | runs-on: ${{ matrix.os }} 106 | defaults: 107 | run: 108 | shell: bash -l {0} 109 | strategy: 110 | fail-fast: false 111 | matrix: 112 | # os: [ "ubuntu-latest", "macos-latest", "windows-latest" ] 113 | # for debugging purposes run github actions only on ubuntu-latest until its passing 114 | os: [ "ubuntu-latest" ] 115 | python-version: [ "3.11" ] 116 | steps: 117 | - name: Checkout repo 118 | uses: actions/checkout@v4 119 | 120 | - name: Set environment variables 121 | run: | 122 | echo "PYTHON_VERSION=${{ matrix.python-version }}" >> $GITHUB_ENV 123 | 124 | # - name: Setup gfortran 125 | # uses: awvwgk/setup-fortran@main 126 | # with: 127 | # compiler: gcc 128 | # version: 11 129 | 130 | # - name: Setup Python 131 | # uses: actions/setup-python@v5 132 | # with: 133 | # python-version: ${{ matrix.python-version }} 134 | # architecture: x64 135 | 136 | - name: Install Dependencies via Micromamba 137 | if: matrix.os == 'ubuntu-latest' || matrix.os == 'macos-latest' 138 | uses: mamba-org/setup-micromamba@v1 139 | with: 140 | environment-file: ./whp_test_env.yml 141 | cache-downloads: true 142 | cache-environment: true 143 | create-args: >- 144 | python=${{ matrix.python-version }} nccmp 145 | 146 | - name: Install Dependencies via Micromamba 147 | if: matrix.os == 'windows-latest' 148 | uses: mamba-org/setup-micromamba@v1 149 | with: 150 | environment-file: ./whp_test_env.yml 151 | cache-downloads: true 152 | cache-environment: true 153 | create-args: >- 154 | python=${{ matrix.python-version }} 155 | 156 | - name: Install nccmp on Windows 157 | if: matrix.os == 'windows-latest' 158 | run: | 159 | curl -kL https://downloads.sourceforge.net/project/nccmp/windows/x86_64/nccmp-1.8.2.0-msys2-x86_64.zip -o nccmp-1.8.2.0-msys2-x86_64.zip 160 | unzip nccmp-1.8.2.0-msys2-x86_64.zip 161 | echo "${PWD}/usr/local/bin" >> $GITHUB_PATH 162 | 163 | 164 | # - name: Install nccmp on Ubuntu or MacOS 165 | # uses: mamba-org/setup-micromamba@v1 166 | # with: 167 | # cache-downloads: true 168 | # cache-environment: true 169 | # create-args: nccmp 170 | 171 | - name: Install wrfhydropy 172 | run: | 173 | pip install . 174 | 175 | - name: Version info 176 | run: | 177 | pip -V 178 | pip list 179 | 180 | - name: Run tests 181 | working-directory: wrfhydropy/tests 182 | run: pytest 183 | -vv 184 | --durations=0 185 | --cov=wrfhydropy 186 | --cov-report=xml 187 | --junitxml=pytest.xml 188 | # -n=auto 189 | 190 | # - name: Upload test results 191 | # if: always() 192 | # uses: actions/upload-artifact@v2 193 | # with: 194 | # name: Test results for ${{ runner.os }}-${{ matrix.python-version }} 195 | # path: ./wrfhydropy/tests/pytest.xml 196 | 197 | # - name: Upload code coverage to Codecov 198 | # uses: codecov/codecov-action@v2.1.0 199 | # with: 200 | # file: ./autotest/coverage.xml 201 | # # flags: unittests 202 | # env_vars: RUNNER_OS,PYTHON_VERSION 203 | # # name: codecov-umbrella 204 | # fail_ci_if_error: false 205 | # version: "v0.1.15" 206 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # fortran compiled files 2 | *.mod 3 | *.o 4 | 5 | *.gz 6 | *.tar 7 | 8 | # Byte-compiled / optimized / DLL files 9 | __pycache__/ 10 | *.py[cod] 11 | *$py.class 12 | .pytest_cache/ 13 | 14 | # C extensions 15 | *.so 16 | 17 | # Distribution / packaging 18 | .Python 19 | env/ 20 | build/ 21 | develop-eggs/ 22 | dist/ 23 | downloads/ 24 | eggs/ 25 | .eggs/ 26 | lib/ 27 | lib64/ 28 | parts/ 29 | sdist/ 30 | var/ 31 | wheels/ 32 | *.egg-info/ 33 | .installed.cfg 34 | *.egg 35 | 36 | # PyInstaller 37 | # Usually these files are written by a python script from a template 38 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 39 | *.manifest 40 | *.spec 41 | 42 | # Installer logs 43 | pip-log.txt 44 | pip-delete-this-directory.txt 45 | 46 | # Unit test / coverage reports 47 | htmlcov/ 48 | .tox/ 49 | .coverage 50 | .coverage.* 51 | .cache 52 | nosetests.xml 53 | coverage.xml 54 | *.cover 55 | .hypothesis/ 56 | 57 | # Translations 58 | *.mo 59 | *.pot 60 | 61 | # Django stuff: 62 | *.log 63 | local_settings.py 64 | 65 | # Flask stuff: 66 | instance/ 67 | .webassets-cache 68 | 69 | # Scrapy stuff: 70 | .scrapy 71 | 72 | # Sphinx documentation 73 | docs/_build/ 74 | 75 | # PyBuilder 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # pyenv 82 | .python-version 83 | 84 | # celery beat schedule file 85 | celerybeat-schedule 86 | 87 | # SageMath parsed files 88 | *.sage.py 89 | 90 | # dotenv 91 | .env 92 | 93 | # virtualenv 94 | .venv 95 | venv/ 96 | ENV/ 97 | 98 | # Spyder project settings 99 | .spyderproject 100 | .spyproject 101 | 102 | # Rope project settings 103 | .ropeproject 104 | 105 | # mkdocs documentation 106 | /site 107 | 108 | # mypy 109 | .mypy_cache/ 110 | 111 | #macstuff 112 | .DS_Store 113 | 114 | #Pycharm stuff 115 | /.idea 116 | 117 | #Emacs 118 | *~ 119 | 120 | 121 | -------------------------------------------------------------------------------- /.pep8speaks.yml: -------------------------------------------------------------------------------- 1 | # File : .pep8speaks.yml 2 | 3 | scanner: 4 | diff_only: False # If False, the entire file touched by the Pull Request is scanned for errors. If True, only the diff is scanned. 5 | linter: pycodestyle # Other option is flake8 6 | 7 | pycodestyle: # Same as scanner.linter value. Other option is flake8 8 | max-line-length: 100 # Default is 79 in PEP 8 9 | 10 | no_blank_comment: False # If True, no comment is made on PR without any errors. 11 | descending_issues_order: False # If True, PEP 8 issues in message will be displayed in descending order of line numbers in the file 12 | 13 | message: # Customize the comment made by the bot 14 | opened: # Messages when a new PR is submitted 15 | header: "Hello @{name}! Thanks for opening this PR. " 16 | # The keyword {name} is converted into the author's username 17 | footer: "Local linting (style checking) can be peformed using [pycodestyle](https://github.com/PyCQA/pycodestyle) General guidelines can be found at the [Hitchhiker's guide to code style](https://goo.gl/hqbW4r)" 18 | # The messages can be written as they would over GitHub 19 | updated: # Messages when new commits are added to the PR 20 | header: "Hello @{name}! Thanks for updating this PR. " 21 | footer: "" # Why to comment the link to the style guide everytime? :) 22 | no_errors: "There are currently no PEP 8 issues detected in this Pull Request. Cheers! :beers: " 23 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | sudo: true 3 | 4 | notifications: 5 | email: false 6 | python: 7 | - "3.8" 8 | before_install: 9 | - sudo add-apt-repository ppa:remik-ziemlinski/nccmp -y 10 | - sudo apt-get update 11 | - sudo apt-get install -y --allow-unauthenticated nccmp 12 | - pip install --upgrade pytest pytest-cov 13 | - pip install --upgrade coveralls 14 | - pip install -r requirements.txt 15 | - python setup.py install 16 | script: 17 | - pytest -v --cov=wrfhydropy 18 | after_success: 19 | - coveralls 20 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # WRF-HYDRO-PY 2 | 3 | [![Build Status](https://github.com/NCAR/wrf_hydro_py/actions/workflows/ci.yaml/badge.svg)](https://github.com/NCAR/wrf_hydro_py/blob/main/.github/workflows/ci.yaml) 4 | [![Coverage Status](https://coveralls.io/repos/github/NCAR/wrf_hydro_py/badge.svg?branch=master&service=github)](https://coveralls.io/github/NCAR/wrf_hydro_py?branch=master) 5 | [![PyPI](https://img.shields.io/pypi/v/wrfhydropy.svg)](https://pypi.python.org/pypi/wrfhydropy) 6 | [![GitHub release](https://img.shields.io/github/release/NCAR/wrf_hydro_py.svg)](https://github.com/NCAR/wrf_hydro_py/releases/latest) 7 | [![Documentation Status](https://readthedocs.org/projects/wrfhydropy/badge/?version=latest)](https://wrfhydropy.readthedocs.io/en/latest/?badge=latest) 8 | 9 | 10 | 11 | ![](https://ral.ucar.edu/sites/default/files/public/wrf_hydro_symbol_logo_2017_09_150pxby63px.png) 12 | 13 | 14 | **IMPORTANT:** This package is in the very early stages of development and the package API may change at any time. It is not recommended that this package be used for significant work until version 0.1 15 | 16 | ## Description 17 | *wrfhydropy* provides an end-to-end python interface to support reproducible research and construction of workflows involving the 18 | WRF-Hydro model. See the docs for an extended description of [what-and-why wrfhydropy](https://wrfhydropy.readthedocs.io/en/latest/what-and-why.html). 19 | 20 | ## Documentation 21 | Documentation is available on-line through `help()` and via [readthedocs](https://wrfhydropy.readthedocs.io/en/latest/index.html). Documentation is a work in progress, please feel free to help improve the documentation or to make an issue when the docs are inaccurate! 22 | 23 | ## Contributing standards 24 | Failure to adhere to contributing standards may result in your Pull Request being rejected. 25 | 26 | ### pep8speaks 27 | All pull requests will be linted automatically by pep8speaks and reported as a comment into the pull request. The pep8speaks configuration is specified in .pep8speaks.yml. All pull requests must satisfy pep8speaks. 28 | Local linting can be performed after a `pip install` of [pycodestyle](https://github.com/PyCQA/pycodestyle). Pep8speaks linting reports also update with updated pull requests. 29 | 30 | ### Additional Style Guidelines 31 | * Max line length: 100 chars. 32 | * docstrings: [Google style](http://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html) 33 | * All other guidance follows [Google style guide](https://google.github.io/styleguide/pyguide.html) 34 | * General advice: [Hitchhiker's guide to code style](https://goo.gl/hqbW4r) 35 | 36 | ### Testing 37 | All pull requests must pass automated testing (via TravisCI). Testing can be performed locally by running `pytest` in the `wrfhydropy/tests` directory. Currently, this testing relies on the [`nccp`](https://gitlab.com/remikz/nccmp) binary for comparing netcdf files. A docker container can be supplied for testing on request (and documentation will subsequently be placed here). 38 | 39 | ### Coverage 40 | Testing concludes by submitting a request to [coveralls](https://coveralls.io/). This will automatically report changes of code coverage by the testing. Coverage should be maximized with every pull request. That is all new functions or classes must be accompanied by comprehensive additional unit/integration tests in the `wrf_hydro_py/wrfhydropy/tests` directory. Running coverage locally can be achieved by `pip` installing [`coverage`](https://pypi.org/project/coverage/) and [`pytest-cov`](https://pypi.org/project/pytest-cov/) following a process similar to the following: 41 | ``` 42 | cd wrfhydropy/tests/ 43 | pytest --cov=wrfhydropy 44 | coverage html -d coverage_html 45 | chrome coverage_html/index.html # or your browser of choice 46 | ``` 47 | -------------------------------------------------------------------------------- /doc/.gitignore: -------------------------------------------------------------------------------- 1 | _build/ 2 | source/ -------------------------------------------------------------------------------- /doc/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SPHINXPROJ = wrfhydropy 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) -------------------------------------------------------------------------------- /doc/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Configuration file for the Sphinx documentation builder. 4 | # 5 | # This file does only contain a selection of the most common options. For a 6 | # full list see the documentation: 7 | # http://www.sphinx-doc.org/en/stable/config 8 | 9 | # -- Path setup -------------------------------------------------------------- 10 | 11 | # If extensions (or modules to document with autodoc) are in another directory, 12 | # add these directories to sys.path here. If the directory is relative to the 13 | # documentation root, use os.path.abspath to make it absolute, like shown here. 14 | # 15 | import datetime 16 | import os 17 | import sys 18 | sys.path.insert(0, os.path.abspath('.')) 19 | sys.path.insert(0, os.path.abspath('../')) 20 | 21 | # -- Project information ----------------------------------------------------- 22 | 23 | project = 'wrfhydropy' 24 | copyright = '2018-%s, wrfhydropy Devlopers' % datetime.datetime.now().year 25 | 26 | # The short X.Y version 27 | version = '' 28 | # The full version, including alpha/beta/rc tags 29 | release = '0.0.3' 30 | 31 | 32 | # -- General configuration --------------------------------------------------- 33 | 34 | # If your documentation needs a minimal Sphinx version, state it here. 35 | # 36 | # needs_sphinx = '1.0' 37 | 38 | # Add any Sphinx extension module names here, as strings. They can be 39 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 40 | # ones. 41 | extensions = [ 42 | 'sphinx.ext.autodoc', 43 | 'sphinx.ext.viewcode', 44 | 'sphinx.ext.napoleon', 45 | 'nbsphinx', 46 | "sphinx.ext.autosummary", 47 | "sphinx.ext.intersphinx", 48 | # "sphinx.ext.extlinks", 49 | "sphinx.ext.mathjax", 50 | "numpydoc", 51 | "IPython.sphinxext.ipython_directive", 52 | "IPython.sphinxext.ipython_console_highlighting", 53 | ] 54 | 55 | nbsphinx_timeout = 600 56 | #nbsphinx_execute = "always" 57 | #nbsphinx_allow_errors = True 58 | #nbsphinx_prolog = """ 59 | #{% set docname = env.doc2path(env.docname, base=None) %} 60 | #You can run this notebook in a `live session `_ |Binder| or view it `on Github `_. 61 | #.. |Binder| image:: https://mybinder.org/badge.svg 62 | # :target: https://mybinder.org/v2/gh/pydata/xarray/master?urlpath=lab/tree/doc/{{ docname }} 63 | #""" 64 | 65 | autosummary_generate = True 66 | 67 | napoleon_google_docstring = True 68 | napoleon_use_param = False 69 | napoleon_use_ivar = True 70 | 71 | # Add any paths that contain templates here, relative to this directory. 72 | templates_path = ['_templates'] 73 | 74 | # The suffix(es) of source filenames. 75 | # You can specify multiple suffix as a list of string: 76 | # 77 | # source_suffix = ['.rst', '.md'] 78 | source_suffix = '.rst' 79 | 80 | # The master toctree document. 81 | master_doc = 'index' 82 | 83 | # The language for content autogenerated by Sphinx. Refer to documentation 84 | # for a list of supported languages. 85 | # 86 | # This is also used if you do content translation via gettext catalogs. 87 | # Usually you set "language" from the command line for these cases. 88 | language = 'en' 89 | 90 | # List of patterns, relative to source directory, that match files and 91 | # directories to ignore when looking for source files. 92 | # This pattern also affects html_static_path and html_extra_path . 93 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] 94 | 95 | # The name of the Pygments (syntax highlighting) style to use. 96 | pygments_style = 'sphinx' 97 | 98 | 99 | # -- Options for HTML output ------------------------------------------------- 100 | 101 | # The theme to use for HTML and HTML Help pages. See the documentation for 102 | # a list of builtin themes. 103 | # 104 | html_theme = 'sphinx_rtd_theme' 105 | 106 | # Theme options are theme-specific and customize the look and feel of a theme 107 | # further. For a list of options available for each theme, see the 108 | # documentation. 109 | # 110 | # html_theme_options = {} 111 | 112 | # Add any paths that contain custom static files (such as style sheets) here, 113 | # relative to this directory. They are copied after the builtin static files, 114 | # so a file named "default.css" will overwrite the builtin "default.css". 115 | html_static_path = ['_static'] 116 | 117 | # Custom sidebar templates, must be a dictionary that maps document names 118 | # to template names. 119 | # 120 | # The default sidebars (for documents that don't match any pattern) are 121 | # defined by theme itself. Builtin themes are using these templates by 122 | # default: ``['localtoc.html', 'relations.html', 'sourcelink.html', 123 | # 'searchbox.html']``. 124 | # 125 | # html_sidebars = {} 126 | 127 | 128 | # -- Options for HTMLHelp output --------------------------------------------- 129 | 130 | # Output file base name for HTML help builder. 131 | htmlhelp_basename = 'wrfhydropydoc' 132 | 133 | 134 | # -- Options for LaTeX output ------------------------------------------------ 135 | 136 | latex_elements = { 137 | # The paper size ('letterpaper' or 'a4paper'). 138 | # 139 | # 'papersize': 'letterpaper', 140 | 141 | # The font size ('10pt', '11pt' or '12pt'). 142 | # 143 | # 'pointsize': '10pt', 144 | 145 | # Additional stuff for the LaTeX preamble. 146 | # 147 | # 'preamble': '', 148 | 149 | # Latex figure (float) alignment 150 | # 151 | # 'figure_align': 'htbp', 152 | } 153 | 154 | # Grouping the document tree into LaTeX files. List of tuples 155 | # (source start file, target name, title, 156 | # author, documentclass [howto, manual, or own class]). 157 | latex_documents = [ 158 | (master_doc, 'wrfhydropy.tex', 'wrfhydropy Documentation', 159 | 'wrfhydropy Developers', 'manual'), 160 | ] 161 | 162 | 163 | # -- Options for manual page output ------------------------------------------ 164 | 165 | # One entry per manual page. List of tuples 166 | # (source start file, name, description, authors, manual section). 167 | man_pages = [ 168 | (master_doc, 'wrfhydropy', 'wrfhydropy Documentation', 1) 169 | ] 170 | 171 | 172 | # -- Options for Texinfo output ---------------------------------------------- 173 | 174 | # Grouping the document tree into Texinfo files. List of tuples 175 | # (source start file, target name, title, author, 176 | # dir menu entry, description, category) 177 | texinfo_documents = [ 178 | (master_doc, 'wrfhydropy', 'wrfhydropy Documentation', 179 | 'wrfhydropy', 'One line description of project.', 180 | 'Miscellaneous'), 181 | ] 182 | 183 | 184 | # -- Extension configuration ------------------------------------------------- 185 | -------------------------------------------------------------------------------- /doc/evaluation.rst: -------------------------------------------------------------------------------- 1 | .. currentmodule:: wrfhydropy 2 | 3 | ############# 4 | Evaluation 5 | ############# 6 | 7 | .. autosummary:: 8 | :toctree: generated/ 9 | 10 | Evaluation 11 | Evaluation.brier 12 | Evaluation.contingency 13 | Evaluation.crps 14 | Evaluation.event 15 | Evaluation.gof 16 | -------------------------------------------------------------------------------- /doc/examples.rst: -------------------------------------------------------------------------------- 1 | Examples 2 | ======== 3 | 4 | .. toctree:: 5 | :maxdepth: 2 6 | 7 | examples/ex_01_end_to_end 8 | examples/ex_02_docker_jupyter 9 | examples/ex_03_cycle_simulation 10 | examples/ex_04_ensembles 11 | -------------------------------------------------------------------------------- /doc/examples/ex_02_docker_jupyter.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Running wrfhydropy through docker\n", 8 | "The first example shows running `wrfhydropy` from the cheyenne machine. This is similar to being on a linux envionment where model compilation is not a problem. \n", 9 | "\n", 10 | "For developers on laptops and non-linux use cases, the `docker` used as shown here. The prerequisites are \n", 11 | "* Docker installed\n", 12 | "* Local version of `wrf_hydro_py` repository\n", 13 | "* Local version of `wrf_hydro_nwm_public` repository\n", 14 | "\n", 15 | "On the host machine:\n", 16 | "```bash\n", 17 | "docker pull wrfhydro/dev:conda\n", 18 | "docker run -it -p 8899:8888 \\\n", 19 | " -v /Users/jamesmcc/WRF_Hydro/wrf_hydro_nwm_public:/wrf_hydro_nwm_public \\\n", 20 | " -v /Users/jamesmcc/WRF_Hydro/wrf_hydro_py:/wrf_hydro_py \\\n", 21 | " wrfhydro/dev:conda\n", 22 | "```\n", 23 | "You will need to customize the paths to `wrf_hydro_py` and `wrf_hydro_nwm_public` on the host side of the volume mounts. Leaving the docker side of the volume mounts will streamline the code below. Note that we are using different ports on host:docker.\n", 24 | "\n", 25 | "\n", 26 | "The above command will enter you into the docker image. In docker, first let's install the mounted `wrfhydropy` (this install will be lost when docker is exited but the source modifications will remain in the mounted drive - so it's just annoying to reinstall):\n", 27 | "\n", 28 | "```\n", 29 | "cd /wrf_hydro_py\n", 30 | "python setup.py develop\n", 31 | "```\n", 32 | "\n", 33 | "then, to start jupyter lab, execute: \n", 34 | "```bash\n", 35 | "cd doc/examples\n", 36 | "jupyter-lab --ip 0.0.0.0 --no-browser --allow-root\n", 37 | "```\n", 38 | "\n", 39 | "This will start the jupyter lab in docker, it will print a URL with a token embedded, like this (dont use this one): \n", 40 | "\n", 41 | "```\n", 42 | "http://(ac61502766bc or 127.0.0.1):8888/?token=a824b4cdb345e944d3754f1d5a97d2aedb4b003b2e76e625\n", 43 | "```\n", 44 | "To connect to jupyter lab on the host, transform the above URL to the following: \n", 45 | "\n", 46 | "```\n", 47 | "http://localhost:8899/?token=a824b4cdb345e944d3754f1d5a97d2aedb4b003b2e76e625\n", 48 | "```\n", 49 | "(keeping the token the same) and paste it in the browser on your local machine. Note that the port on the local host is not the port indicated by jupyter lab, it's the one we selected in docker.\n", 50 | "\n", 51 | "Finally, select the `wrfhydropy` example notebook you want to run!" 52 | ] 53 | } 54 | ], 55 | "metadata": { 56 | "kernelspec": { 57 | "display_name": "Python 3", 58 | "language": "python", 59 | "name": "python3" 60 | }, 61 | "language_info": { 62 | "codemirror_mode": { 63 | "name": "ipython", 64 | "version": 3 65 | }, 66 | "file_extension": ".py", 67 | "mimetype": "text/x-python", 68 | "name": "python", 69 | "nbconvert_exporter": "python", 70 | "pygments_lexer": "ipython3", 71 | "version": "3.7.1" 72 | } 73 | }, 74 | "nbformat": 4, 75 | "nbformat_minor": 2 76 | } 77 | -------------------------------------------------------------------------------- /doc/index.rst: -------------------------------------------------------------------------------- 1 | .. wrfhydropy documentation master file, created by 2 | sphinx-quickstart on Wed Jun 13 08:51:59 2018. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | wrfhydropy: An API for the WRF-Hydro model and more. 7 | ==================================================== 8 | 9 | Documentation 10 | ------------- 11 | 12 | **Getting Started** 13 | 14 | * :doc:`what-and-why` 15 | * :doc:`installation` 16 | * :doc:`examples` 17 | 18 | .. toctree:: 19 | :maxdepth: 2 20 | :hidden: 21 | :caption: Getting Started 22 | 23 | what-and-why 24 | installation 25 | examples 26 | 27 | 28 | **Reference** 29 | 30 | * :doc:`model_api` 31 | * :doc:`output` 32 | * :doc:`utilities` 33 | 34 | .. toctree:: 35 | :maxdepth: 2 36 | :hidden: 37 | :caption: Reference 38 | 39 | model_api 40 | output 41 | evaluation 42 | utilities 43 | 44 | 45 | **Help & Index** 46 | 47 | * :ref:`genindex` 48 | * :ref:`modindex` 49 | * :ref:`search` 50 | 51 | .. toctree:: 52 | :maxdepth: 1 53 | :hidden: 54 | :caption: Help & Index 55 | 56 | search 57 | -------------------------------------------------------------------------------- /doc/installation.rst: -------------------------------------------------------------------------------- 1 | Installation 2 | ============ 3 | 4 | Dependencies 5 | ------------ 6 | 7 | Instructions 8 | ------------ 9 | The easiest way:: 10 | 11 | $ pip install wrfhydropy 12 | 13 | Development installation:: 14 | 15 | $ git clone https://github.com/yourhandle/wrf_hydro_py.git 16 | $ cd wrf_hydro_py 17 | $ python setup.py develop 18 | 19 | -------------------------------------------------------------------------------- /doc/model_api.rst: -------------------------------------------------------------------------------- 1 | .. currentmodule:: wrfhydropy 2 | 3 | ############# 4 | Model API 5 | ############# 6 | 7 | .. autosummary:: 8 | :toctree: generated/ 9 | 10 | Model 11 | Domain 12 | Job 13 | PBSCheyenne 14 | Simulation 15 | EnsembleSimulation 16 | CycleSimulation 17 | parallel_teams_run 18 | -------------------------------------------------------------------------------- /doc/output.rst: -------------------------------------------------------------------------------- 1 | .. currentmodule:: wrfhydropy 2 | 3 | ############# 4 | Output 5 | ############# 6 | 7 | .. autosummary:: 8 | :toctree: generated/ 9 | 10 | open_whp_dataset 11 | 12 | -------------------------------------------------------------------------------- /doc/requirements.txt: -------------------------------------------------------------------------------- 1 | ipython 2 | nbsphinx 3 | numpydoc 4 | sphinx-rtd-theme 5 | pandoc 6 | -------------------------------------------------------------------------------- /doc/source/modules.rst: -------------------------------------------------------------------------------- 1 | wrfhydropy 2 | ========== 3 | 4 | .. toctree:: 5 | :maxdepth: 4 6 | 7 | wrfhydropy 8 | -------------------------------------------------------------------------------- /doc/source/wrfhydropy.core.rst: -------------------------------------------------------------------------------- 1 | wrfhydropy.core package 2 | ======================= 3 | 4 | Submodules 5 | ---------- 6 | 7 | wrfhydropy.core.domain module 8 | ----------------------------- 9 | 10 | .. automodule:: wrfhydropy.core.domain 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | wrfhydropy.core.ensemble module 16 | ------------------------------- 17 | 18 | .. automodule:: wrfhydropy.core.ensemble 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | wrfhydropy.core.ensemble\_tools module 24 | -------------------------------------- 25 | 26 | .. automodule:: wrfhydropy.core.ensemble_tools 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | wrfhydropy.core.ioutils module 32 | ------------------------------ 33 | 34 | .. automodule:: wrfhydropy.core.ioutils 35 | :members: 36 | :undoc-members: 37 | :show-inheritance: 38 | 39 | wrfhydropy.core.job module 40 | -------------------------- 41 | 42 | .. automodule:: wrfhydropy.core.job 43 | :members: 44 | :undoc-members: 45 | :show-inheritance: 46 | 47 | wrfhydropy.core.model module 48 | ---------------------------- 49 | 50 | .. automodule:: wrfhydropy.core.model 51 | :members: 52 | :undoc-members: 53 | :show-inheritance: 54 | 55 | wrfhydropy.core.namelist module 56 | ------------------------------- 57 | 58 | .. automodule:: wrfhydropy.core.namelist 59 | :members: 60 | :undoc-members: 61 | :show-inheritance: 62 | 63 | wrfhydropy.core.outputdiffs module 64 | ---------------------------------- 65 | 66 | .. automodule:: wrfhydropy.core.outputdiffs 67 | :members: 68 | :undoc-members: 69 | :show-inheritance: 70 | 71 | wrfhydropy.core.schedulers module 72 | --------------------------------- 73 | 74 | .. automodule:: wrfhydropy.core.schedulers 75 | :members: 76 | :undoc-members: 77 | :show-inheritance: 78 | 79 | wrfhydropy.core.simulation module 80 | --------------------------------- 81 | 82 | .. automodule:: wrfhydropy.core.simulation 83 | :members: 84 | :undoc-members: 85 | :show-inheritance: 86 | 87 | 88 | Module contents 89 | --------------- 90 | 91 | .. automodule:: wrfhydropy.core 92 | :members: 93 | :undoc-members: 94 | :show-inheritance: 95 | -------------------------------------------------------------------------------- /doc/source/wrfhydropy.rst: -------------------------------------------------------------------------------- 1 | wrfhydropy package 2 | ================== 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | 9 | wrfhydropy.core 10 | wrfhydropy.tests 11 | 12 | Module contents 13 | --------------- 14 | 15 | .. automodule:: wrfhydropy 16 | :members: 17 | :undoc-members: 18 | :show-inheritance: 19 | -------------------------------------------------------------------------------- /doc/utilities.rst: -------------------------------------------------------------------------------- 1 | .. currentmodule:: wrfhydropy 2 | 3 | ############# 4 | Utilities 5 | ############# 6 | 7 | .. autosummary:: 8 | :toctree: generated/ 9 | 10 | xrnan 11 | xrcmp 12 | diff_namelist 13 | -------------------------------------------------------------------------------- /doc/what-and-why.rst: -------------------------------------------------------------------------------- 1 | Overview: What and why wrfhydropy? 2 | ================================== 3 | 4 | What is wrfhydropy? 5 | ------------------- 6 | 7 | **wrfhydropy** provides an end-to-end python interface to support 8 | reproducible research and construction of workflows involving the 9 | WRF-Hydro model. 10 | 11 | **wrfhydropy**: 12 | * Is a Python API for the WRF-Hydro modelling system. 13 | * Provides tools for working with WRF-Hydro input (preparation) 14 | and output (analysis), largely based on xarray_. 15 | * Is tested_ and coverage_ is calculated. 16 | 17 | The package provides fine-grained control over the model and its 18 | inputs and outputs. Generally, high-level workflows are not found here 19 | **but should be and can easily be built from wrfhydropy**. 20 | 21 | **wrfhydropy** facilitates all aspects of working with WRF-Hydro including: 22 | * compiling 23 | * setting up experiments (manipulating input files and namelists) 24 | * running and scheduling jobs 25 | * collecting output 26 | * analysis (input and output) 27 | * sharing and reproducing results (jupyter notebooks) 28 | 29 | The wrfhydropy package is **user supported and community contributed**. That 30 | means you can help add to and improve it! 31 | 32 | 33 | Why wrfhydropy? 34 | --------------- 35 | The WRF-Hydro model was not originally built with many applications or workflows 36 | in mind. Without significant investment in rewriting the code, a scripting 37 | language is needed to adapt the FORTRAN model API to something suited to other 38 | purposes. Python is a good choice for this secondary API language for a vareity of 39 | reasons (widely adopted, multi-platform, great packages for scientific analysis, 40 | etc ...). Python therefore provides a mechanism for developing a better (for many 41 | purposes) model interface that is afforded by the underlying model. For this reason, 42 | a few conceptualizations in wrfhydropy are formalized differently than in FORTRAN. 43 | These are summarized in `Key concepts`_. The model API as developed in python may begin 44 | to make its way back to the underlying FORTRAN code with time. 45 | 46 | wrfhydropy was initally developed to handle the WRF-Hydro model testing 47 | (`wrf_hydro_nwm_public/tests `_) 48 | and, in particularly, the need to be able to 49 | easily swap domains while holding model options constant. Another early 50 | application was the construction and execuation of ensembles and ensemble 51 | forecasts. The examples_ included in this documentation will grow to show other 52 | applications of the package. 53 | 54 | 55 | Limitations of wrfhydropy 56 | ------------------------- 57 | 58 | The wrfhydropy package does many things but also has limitations 59 | which are worth acknowledging up-front. The development of wrfhydropy has 60 | mostly emerged to support testing and other applications of the NWM. While 61 | wrfhydropy supports other modes of running WRF-Hydro, the further away from 62 | the NWM you get the less likely wrfhydropy will support your needs. This 63 | guidance is highly dependent on the differences from the NWM. If the differences 64 | are containted in the namelists only, you are likely not going to have issues. But 65 | attempting to use the Noah model instead of NoahMP, for example, will 66 | simply not work. wrfhydropy is open to changes/enhancements to support your needs, 67 | but may require you to implement *and test* them to get them into the master branch. 68 | 69 | wrfhydropy does not provide an in-memory connection between WRF-Hydro and Python. 70 | The API is implemented through system calls (Python's subprocess) and all information 71 | between Python and the model passes through disk. There is no magic in wrfhydropy, 72 | just convenience: you still need a system and environment in which WRF-Hydro can be 73 | compiled and run. (Such as our `development docker container`_.) 74 | 75 | 76 | Key concepts 77 | ------------ 78 | 79 | Here we summarize a few concepts in wrfhydropy which differ from how WRF-Hydro is generally 80 | used. Links are provided to examples. 81 | 82 | 83 | Object Oriented API 84 | ################### 85 | THe wrfhydropy model API follows an object oriented approach. Composition 86 | of objects is a theme of the design. That is: core building blocks are put 87 | together to form more complicated objects. The separation of concerns of these 88 | objects is important (and sometimes challenging), but often rewarding. 89 | 90 | Upper case means a class (and will link to the class definition). 91 | Lower case means an instance of a class (not linked). 92 | The left arrow means object composition, also known as a "has a" relationship. 93 | 94 | Core objects: 95 | * Domain 96 | * Model 97 | * Job 98 | * Scheduler 99 | 100 | Higher-level objects: 101 | * Simulation <- domain, model, job [, scheduler] 102 | * Ensemble <- simulation, job [, scheduler] 103 | * Cycle <- simulation|ensemble, job [, scheduler] 104 | 105 | The first example in the documentation, 106 | `End-to-end overview of wrfhydropy: Simulation evaluation`_ 107 | details the core objects, their initialization and their composition into 108 | a Simulation object. 109 | 110 | 111 | Namelists: Model and domain sides 112 | ################################# 113 | Namelists are treated by wrfhydropy in a completely different way 114 | than WRF-Hydro model users experience them. The input namelists to the model, 115 | namelist.hrldas and hydro.namelist are each split in to two pieces, the model-side 116 | and domain-side options. The new namelist files collect many different potential 117 | namelists using named configurations. The motivation for this and the details are 118 | explained in depth in `namelist section`_ of the first example of the documentation. 119 | 120 | 121 | Jobs: 122 | ##### 123 | The notion of a Job is formalized by wrfhydropy and can be a bit surprising to 124 | WRF-Hydro users. Jobs are essential model time and frequency interventions into the 125 | model namelists. Each job has a different call to the executable and a subdirectory 126 | of the run directory dedicated to its provenance and its artifacts. Details are 127 | provided in the `Job section`_ of the first example of the documentation. 128 | 129 | 130 | .. _xarray: http://xarray.pydata.org/en/stable/ 131 | .. _tested: https://github.com/NCAR/wrf_hydro_py/tree/master/wrfhydropy/tests 132 | .. _coverage: https://coveralls.io/github/NCAR/wrf_hydro_py 133 | .. _examples: https://wrfhydropy.readthedocs.io/en/latest/examples.html 134 | .. _`development docker container`: https://hub.docker.com/r/wrfhydro/dev 135 | .. _`End-to-end overview of wrfhydropy: Simulation evaluation`: https://wrfhydropy.readthedocs.io/en/latest/examples/ex_01_end_to_end.html 136 | .. _`namelist section`: https://wrfhydropy.readthedocs.io/en/latest/examples/ex_01_end_to_end.html#2.-Namelists-and-configurations-in-wrfhydropy 137 | .. _`Job section`: https://wrfhydropy.readthedocs.io/en/latest/examples/ex_01_end_to_end.html#7.-Job-object -------------------------------------------------------------------------------- /readthedocs.yml: -------------------------------------------------------------------------------- 1 | # Read the Docs configuration file 2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 3 | 4 | # Required 5 | version: 2 6 | 7 | # Build documentation in the doc/ directory with Sphinx 8 | sphinx: 9 | configuration: doc/conf.py 10 | 11 | # Docker image used for build 12 | build: 13 | os: ubuntu-22.04 14 | tools: 15 | python: "3.10" 16 | 17 | # Optionally build your docs in additional formats such as PDF and ePub 18 | formats: [] 19 | 20 | # Optionally set the version of Python and requirements required to build your docs 21 | python: 22 | install: 23 | - requirements: doc/requirements.txt 24 | - requirements: requirements.txt 25 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | boltons>=23.1.1 2 | bs4>=0.0.1 3 | dask[bag]>=2.14.0 4 | deepdiff>=6.2.3 5 | f90nml>=1.2 6 | importlib-metadata==4.13.0 7 | netCDF4>=1.5.3 8 | numpy>=1.23.5 9 | pandas>=1.3.5 10 | properscoring==0.1 11 | pytest<=7.4.4 12 | pytest-html>=3.0.0 13 | pytest-datadir-ng>=1.1.1 14 | pytest-lazy-fixture>=0.6.3 15 | requests>=2.23.0 16 | spotpy>=1.6.0 17 | urllib3>=2.0.2 18 | xarray>=0.19 19 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import find_packages, setup 2 | 3 | with open("README.md", "r", encoding="utf-8") as fh: 4 | long_description = fh.read() 5 | 6 | setup( 7 | name='wrfhydropy', 8 | version='0.0.21', 9 | packages=find_packages(), 10 | package_data={'wrfhydropy': ['core/data/*']}, 11 | url='https://github.com/NCAR/wrf_hydro_py', 12 | license='MIT', 13 | install_requires=[ 14 | 'boltons>=23.1.1', 15 | 'bs4>=0.0.1', 16 | 'dask[bag]>=2.14.0', 17 | 'deepdiff>=6.2.3', 18 | 'f90nml>=1.2', 19 | 'importlib-metadata==4.13.0', 20 | 'netCDF4>=1.5.3', 21 | 'numpy>=1.23.5', 22 | 'pandas>=1.3.5', 23 | 'properscoring==0.1', 24 | 'pytest<=7.4.4', 25 | 'pytest-html>=3.0.0', 26 | 'pytest-datadir-ng>=1.1.1', 27 | 'pytest-lazy-fixture>=0.6.3', 28 | 'requests>=2.23.0', 29 | 'spotpy>=1.6.0', 30 | 'urllib3>=2.0.2', 31 | 'xarray>=0.19' 32 | ], 33 | author='WRF-Hydro Team', 34 | author_email='@ucar.edu', 35 | description='API for the WRF-Hydro model', 36 | long_description=long_description, 37 | long_description_content_type="text/markdown", 38 | python_requires=">=3.7", 39 | ) 40 | -------------------------------------------------------------------------------- /whp_test_env.yml: -------------------------------------------------------------------------------- 1 | name: whp 2 | channels: 3 | - conda-forge 4 | - nodefaults 5 | dependencies: 6 | - boltons>=23.1.1 7 | - bs4>=0.0.1 8 | - dask[bag]>=2.14.0 9 | - deepdiff==6.3.0 10 | - f90nml>=1.2 11 | - netCDF4>=1.5.3 12 | - numpy>=1.23.5 13 | - pandas>=1.0.3 14 | - pathlib>=1.0.1 15 | - properscoring==0.1 16 | - pytest<=7.4.4 17 | - pytest-html>=3.0.0 18 | - pytest-lazy-fixture>=0.6.3 19 | - requests>=2.23.0 20 | - spotpy>=1.6.0 21 | - xarray>=0.19 22 | - pip 23 | - pip: 24 | - click != 8.1.0 25 | - black < 23.1.0 26 | - isort 27 | - flake8 28 | - pylint 29 | - pytest-datadir-ng>=1.1.1 30 | - pytest-cov 31 | - pytest-env 32 | - pytest-order 33 | - pytest-xdist 34 | - pyyaml 35 | -------------------------------------------------------------------------------- /wrfhydropy/__init__.py: -------------------------------------------------------------------------------- 1 | from .core import ioutils 2 | from .core import namelist 3 | from .core import outputdiffs 4 | from .core import schedulers 5 | from .core.collection import open_whp_dataset 6 | from .core.cycle import * 7 | # from .core.cycle import CycleSimulation 8 | from .core.domain import * 9 | from .core.ensemble import * 10 | # from .core.ensemble import EnsembleSimulation 11 | from .core.evaluation import Evaluation 12 | from .core.job import Job 13 | from .core.model import Model 14 | from .core.namelist import diff_namelist 15 | from .core.schedulers import PBSCheyenne 16 | from .core.simulation import Simulation 17 | from .core.teams import parallel_teams_run 18 | from .util.xrcmp import xrcmp 19 | from .util.xrnan import xrnan 20 | -------------------------------------------------------------------------------- /wrfhydropy/core/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NCAR/wrf_hydro_py/003f73f7efb2d38114794ae14daac702e59edc24/wrfhydropy/core/__init__.py -------------------------------------------------------------------------------- /wrfhydropy/core/collection.py: -------------------------------------------------------------------------------- 1 | import collections 2 | import dask 3 | import dask.bag 4 | from datetime import datetime 5 | import itertools 6 | from multiprocessing.pool import Pool 7 | import numpy as np 8 | import pathlib 9 | from wrfhydropy.core.ioutils import timesince 10 | import xarray as xr 11 | 12 | 13 | def is_not_none(x): 14 | return x is not None 15 | 16 | 17 | def group_lead_time(ds: xr.Dataset) -> int: 18 | return ds.lead_time.item(0) 19 | 20 | 21 | def group_member_lead_time(ds: xr.Dataset) -> str: 22 | return str(ds.member.item(0)) + '-' + str(ds.lead_time.item(0)) 23 | 24 | 25 | def group_member(ds: xr.Dataset) -> int: 26 | return ds.member.item(0) 27 | 28 | 29 | def group_identity(ds: xr.Dataset) -> int: 30 | return 1 31 | 32 | 33 | def merge_reference_time(ds_list: list) -> xr.Dataset: 34 | return xr.concat(ds_list, dim='reference_time', coords='minimal') 35 | 36 | 37 | def merge_member(ds_list: list) -> xr.Dataset: 38 | return xr.concat(ds_list, dim='member', coords='minimal') 39 | 40 | 41 | def merge_lead_time(ds_list: list) -> xr.Dataset: 42 | return xr.concat(ds_list, dim='lead_time', coords='minimal') 43 | 44 | 45 | def merge_time(ds_list: list) -> xr.Dataset: 46 | return xr.concat(ds_list, dim='time', coords='minimal') 47 | 48 | 49 | def preprocess_whp_data( 50 | path, 51 | isel: dict = None, 52 | drop_variables: list = None 53 | ) -> xr.Dataset: 54 | try: 55 | ds = xr.open_dataset(path) 56 | except OSError: 57 | print("Skipping file, unable to open: ", path) 58 | return None 59 | 60 | if drop_variables is not None: 61 | to_drop = set(ds.variables).intersection(set(drop_variables)) 62 | if to_drop != set(): 63 | ds = ds.drop_vars(to_drop) 64 | 65 | # Exception for RESTART.YYMMDDHHMM_DOMAIN1 files 66 | if 'RESTART.' in str(path): 67 | time = datetime.strptime(ds.Times.values[0].decode('utf-8'), '%Y-%m-%d_%H:%M:%S') 68 | ds = ds.squeeze('Time') 69 | ds = ds.drop_vars(['Times']) 70 | ds = ds.assign_coords(time=time) 71 | 72 | # Exception for HYDRO_RST.YY-MM-DD_HH:MM:SS_DOMAIN1 files 73 | if 'HYDRO_RST.' in str(path): 74 | time = datetime.strptime(ds.attrs['Restart_Time'], '%Y-%m-%d_%H:%M:%S') 75 | ds = ds.assign_coords(time=time) 76 | 77 | filename_parent = pathlib.Path(path).parent 78 | filename_grandparent = pathlib.Path(path).parent.parent 79 | 80 | # Member preprocess 81 | # Assumption is that parent dir is member_mmm 82 | # member = None 83 | if 'member' in filename_parent.name: 84 | # This is a double check that this convention is because of wrf_hydro_py 85 | assert filename_parent.parent.joinpath('WrfHydroEns.pkl').exists() 86 | member = int(filename_parent.name.split('_')[-1]) 87 | ds.coords['member'] = member 88 | 89 | # Lead time preprocess 90 | # Assumption is that parent dir is cast_yymmddHH 91 | if 'cast_' in filename_parent.name or 'cast_' in filename_grandparent.name: 92 | # Exception for cast HYDRO_RST.YY-MM-DD_HH:MM:SS_DOMAIN1 and 93 | # RESTART.YYMMDDHHMM_DOMAIN1 files 94 | if 'HYDRO_RST.' in str(path) or 'RESTART' in str(path): 95 | cast_fmt = 'cast_%Y%m%d%H' 96 | if 'cast_' in filename_parent.name: 97 | # This is a double check that this convention is because of wrf_hydro_py 98 | assert filename_parent.parent.joinpath('WrfHydroCycle.pkl').exists() 99 | ds.coords['reference_time'] = datetime.strptime(filename_parent.name, cast_fmt) 100 | elif 'cast_' in filename_grandparent.name: 101 | # This is a double check that this convention is because of wrf_hydro_py 102 | assert filename_grandparent.parent.joinpath('WrfHydroCycle.pkl').exists() 103 | ds.coords['reference_time'] = \ 104 | datetime.strptime(filename_grandparent.name, cast_fmt) 105 | ds.coords['lead_time'] = np.array( 106 | ds.time.values - ds.reference_time.values, 107 | dtype='timedelta64[ns]' 108 | ) 109 | ds = ds.drop_vars('time') 110 | 111 | # Could create a valid time variable here, but I'm guessing it's more efficient 112 | # after all the data are collected. 113 | # ds['valid_time'] = np.datetime64(int(ds.lead_time) + int(ds.reference_time), 'ns') 114 | 115 | else: 116 | if 'reference_time' in ds.variables: 117 | ds = ds.drop_vars('reference_time') 118 | 119 | # Spatial subsetting 120 | if isel is not None: 121 | ds = ds.isel(isel) 122 | 123 | return ds 124 | 125 | 126 | def open_whp_dataset_inner( 127 | paths: list, 128 | chunks: dict = None, 129 | attrs_keep: list = ['featureType', 'proj4', 130 | 'station_dimension', 'esri_pe_string', 131 | 'Conventions', 'model_version'], 132 | isel: dict = None, 133 | drop_variables: list = None, 134 | npartitions: int = None, 135 | profile: int = False 136 | ) -> xr.Dataset: 137 | 138 | if profile: 139 | then = timesince() 140 | 141 | # This is totally arbitrary be seems to work ok. 142 | # if npartitions is None: 143 | # npartitions = dask.config.get('pool')._processes * 4 144 | # This choice does not seem to work well or at all, error? 145 | # npartitions = len(sorted(paths)) 146 | paths_bag = dask.bag.from_sequence(paths, npartitions=npartitions) 147 | 148 | if profile: 149 | then = timesince(then) 150 | print('after paths_bag') 151 | 152 | ds_list = paths_bag.map( 153 | preprocess_whp_data, 154 | isel=isel, 155 | drop_variables=drop_variables 156 | ).filter(is_not_none).compute() 157 | 158 | if len(ds_list) == 0: 159 | return None 160 | 161 | if profile: 162 | then = timesince(then) 163 | print("after ds_list preprocess/filter") 164 | 165 | # Group by and merge by choices 166 | have_members = 'member' in ds_list[0].coords 167 | have_lead_time = 'lead_time' in ds_list[0].coords 168 | if have_lead_time: 169 | if have_members: 170 | group_list = [group_member_lead_time, group_lead_time] 171 | merge_list = [merge_reference_time, merge_member] 172 | else: 173 | group_list = [group_lead_time] 174 | merge_list = [merge_reference_time] 175 | else: 176 | if have_members: 177 | group_list = [group_member] 178 | merge_list = [merge_time] 179 | else: 180 | group_list = [group_identity] 181 | merge_list = [merge_time] 182 | 183 | for group, merge in zip(group_list, merge_list): 184 | 185 | if profile: 186 | then = timesince(then) 187 | print('before sort') 188 | 189 | the_sort = sorted(ds_list, key=group) 190 | 191 | if profile: 192 | then = timesince(then) 193 | print('after sort, before group') 194 | 195 | ds_groups = [list(it) for k, it in itertools.groupby(the_sort, group)] 196 | 197 | if profile: 198 | then = timesince(then) 199 | print('after group, before merge') 200 | 201 | # npartitons = len(ds_groups) 202 | group_bag = dask.bag.from_sequence(ds_groups, npartitions=npartitions) 203 | ds_list = group_bag.map(merge).compute() 204 | 205 | if profile: 206 | then = timesince(then) 207 | print('after merge') 208 | 209 | del group_bag, ds_groups, the_sort 210 | 211 | if have_lead_time: 212 | nwm_dataset = merge_lead_time(ds_list) 213 | elif have_members: 214 | nwm_dataset = merge_member(ds_list) 215 | else: 216 | nwm_dataset = ds_list[0] 217 | 218 | del ds_list 219 | 220 | # Impose some order. 221 | if have_members: 222 | nwm_dataset = nwm_dataset.sortby(['member']) 223 | if have_lead_time: 224 | nwm_dataset = nwm_dataset.sortby(['reference_time', 'lead_time']) 225 | 226 | # Create a valid_time variable. I'm estimating that doing it here is more efficient 227 | # than adding more data to the collection processes. 228 | def calc_valid_time(ref, lead): 229 | return np.datetime64(int(ref) + int(lead), 'ns') 230 | if have_lead_time: 231 | nwm_dataset['valid_time'] = xr.apply_ufunc( 232 | calc_valid_time, 233 | nwm_dataset['reference_time'], 234 | nwm_dataset['lead_time'], 235 | vectorize=True 236 | ).transpose() # Not sure this is consistently anti-transposed. 237 | 238 | # Xarray sets nan as the fill value when there is none. Dont allow that... 239 | for key, val in nwm_dataset.variables.items(): 240 | if '_FillValue' not in nwm_dataset[key].encoding: 241 | nwm_dataset[key].encoding.update({'_FillValue': None}) 242 | 243 | # Clean up attributes 244 | new_attrs = collections.OrderedDict() 245 | if attrs_keep is not None: 246 | for key, value in nwm_dataset.attrs.items(): 247 | if key in attrs_keep: 248 | new_attrs[key] = nwm_dataset.attrs[key] 249 | 250 | nwm_dataset.attrs = new_attrs 251 | 252 | # Break into chunked dask array 253 | if chunks is not None: 254 | nwm_dataset = nwm_dataset.chunk(chunks=chunks) 255 | 256 | # I submitted a PR fix to xarray. 257 | # I will leave this here until the PR is merged. 258 | # Workaround/prevent https://github.com/pydata/xarray/issues/1849 259 | # for v in nwm_dataset.variables.values(): 260 | # try: 261 | # del v.encoding["contiguous"] 262 | # except KeyError: # no problem 263 | # pass 264 | 265 | return nwm_dataset 266 | 267 | 268 | def open_whp_dataset_orig( 269 | paths: list, 270 | chunks: dict = None, 271 | attrs_keep: list = ['featureType', 'proj4', 272 | 'station_dimension', 'esri_pe_string', 273 | 'Conventions', 'model_version'], 274 | isel: dict = None, 275 | drop_variables: list = None, 276 | npartitions: int = None, 277 | profile: int = False, 278 | n_cores: int = 1 279 | ) -> xr.Dataset: 280 | 281 | import sys 282 | import os 283 | 284 | # print('n_cores', str(n_cores)) 285 | the_pool = Pool(n_cores) 286 | with dask.config.set(scheduler='processes', pool=the_pool): 287 | whp_ds = open_whp_dataset_inner( 288 | paths, 289 | chunks, 290 | attrs_keep, 291 | isel, 292 | drop_variables, 293 | npartitions, 294 | profile 295 | ) 296 | the_pool.close() 297 | return whp_ds 298 | 299 | 300 | def open_whp_dataset( 301 | paths: list, 302 | file_chunk_size: int = None, 303 | chunks: dict = None, 304 | attrs_keep: list = ['featureType', 'proj4', 305 | 'station_dimension', 'esri_pe_string', 306 | 'Conventions', 'model_version'], 307 | isel: dict = None, 308 | drop_variables: list = None, 309 | npartitions: int = None, 310 | profile: int = False, 311 | n_cores: int = 1, 312 | write_cumulative_file: pathlib.Path = None 313 | ) -> xr.Dataset: 314 | 315 | import sys 316 | import os 317 | import math 318 | import multiprocessing 319 | import pickle 320 | 321 | n_files = len(paths) 322 | print('n_files', str(n_files)) 323 | 324 | # remove path to file if file doesn't exist 325 | for p in paths: 326 | if (os.path.exists(p) == False): 327 | print("removing file since it doesn't exist:", str(p)) 328 | paths.remove(p) 329 | 330 | if file_chunk_size is None: 331 | file_chunk_size = n_files 332 | 333 | if file_chunk_size >= n_files: 334 | the_pool = Pool(n_cores) 335 | with dask.config.set(scheduler='processes', pool=the_pool): 336 | whp_ds = open_whp_dataset_inner( 337 | paths=paths, 338 | chunks=chunks, 339 | attrs_keep=attrs_keep, 340 | isel=isel, 341 | drop_variables=drop_variables, 342 | npartitions=npartitions, 343 | profile=profile 344 | ) 345 | the_pool.close() 346 | 347 | else: 348 | 349 | n_file_chunks = math.ceil(n_files / file_chunk_size) 350 | start_list = [file_chunk_size * ii for ii in range(n_file_chunks)] 351 | end_list = [file_chunk_size * (ii + 1) - 1 for ii in range(n_file_chunks)] 352 | # adsf 353 | 354 | whp_ds = None 355 | for start_ind, end_ind in zip(start_list, end_list): 356 | the_pool = Pool(n_cores) 357 | with dask.config.set(scheduler='processes', pool=the_pool): 358 | ds_chunk = open_whp_dataset_inner( 359 | paths=paths[start_ind:(end_ind+1)], 360 | chunks=chunks, 361 | attrs_keep=attrs_keep, 362 | isel=isel, 363 | drop_variables=drop_variables, 364 | npartitions=npartitions, 365 | profile=profile 366 | ) 367 | the_pool.close() 368 | 369 | if ds_chunk is not None: 370 | if whp_ds is None: 371 | whp_ds = ds_chunk 372 | else: 373 | whp_ds = xr.merge([whp_ds, ds_chunk]) 374 | if write_cumulative_file is not None: 375 | if not write_cumulative_file.parent.exists(): 376 | write_cumulative_file.parent.mkdir() 377 | whp_ds.to_netcdf(write_cumulative_file) 378 | cumulative_files_file = write_cumulative_file.parent / ( 379 | write_cumulative_file.stem + '.files.pkl') 380 | pickle.dump( 381 | paths[0:end_ind], 382 | open(str(cumulative_files_file), 'wb')) 383 | 384 | return whp_ds 385 | -------------------------------------------------------------------------------- /wrfhydropy/core/domain.py: -------------------------------------------------------------------------------- 1 | import pathlib 2 | import shutil 3 | 4 | from .ioutils import \ 5 | WrfHydroStatic, \ 6 | WrfHydroTs 7 | from .namelist import JSONNamelist 8 | 9 | 10 | class Domain(object): 11 | """Class for a WRF-Hydro domain, which constitutes all domain-specific files needed for a 12 | setup. 13 | """ 14 | 15 | def __init__(self, 16 | domain_top_dir: str, 17 | domain_config: str, 18 | compatible_version: str = None, 19 | hydro_namelist_patch_file: str = 'hydro_namelist_patches.json', 20 | hrldas_namelist_patch_file: str = 'hrldas_namelist_patches.json' 21 | ): 22 | """Instantiate a Domain object 23 | Args: 24 | domain_top_dir: Parent directory containing all domain directories and files. 25 | domain_config: The domain configuration to use, options are 'NWM', 26 | 'Gridded', or 'Reach' 27 | compatible_version: String indicating the compatible model version, required if no 28 | .version file included in domain_top_dir. 29 | hydro_namelist_patch_file: Filename of json file containing namelist patches for 30 | hydro namelist 31 | hrldas_namelist_patch_file: Filename of json file containing namelist patches for 32 | hrldas namelist 33 | """ 34 | 35 | # Instantiate arguments to object 36 | # Make file paths 37 | self.domain_top_dir = pathlib.Path(domain_top_dir).absolute() 38 | """pathlib.Path: pathlib.Paths to *.TBL files generated at compile-time.""" 39 | 40 | self.domain_config = domain_config.lower() 41 | """str: Specified configuration for which the domain is to be used, e.g. 'NWM_ana'""" 42 | 43 | self.compatible_version = compatible_version 44 | """str: Source-code version for which the domain is to be used.""" 45 | 46 | # Check .version file if compatible_version not specified 47 | if self.compatible_version is None: 48 | try: 49 | with self.domain_top_dir.joinpath('.version').open() as f: 50 | self.compatible_version = f.read() 51 | except FileNotFoundError: 52 | raise FileNotFoundError('file .version not found in directory ' + 53 | str(self.domain_top_dir) + ' and compatible_version not ' 54 | 'specified') 55 | 56 | # Load namelist patches 57 | hydro_namelist_patch_file = self.domain_top_dir.joinpath(hydro_namelist_patch_file) 58 | hrldas_namelist_patch_file = self.domain_top_dir.joinpath(hrldas_namelist_patch_file) 59 | 60 | self.hydro_namelist_patches = JSONNamelist(str(hydro_namelist_patch_file)) 61 | """Namelist: Domain-specific hydro namelist settings.""" 62 | self.hydro_namelist_patches = self.hydro_namelist_patches.get_config(self.domain_config) 63 | 64 | self.hrldas_namelist_patches = JSONNamelist(str(hrldas_namelist_patch_file)) 65 | """Namelist: Domain-specific hrldas namelist settings.""" 66 | self.hrldas_namelist_patches = self.hrldas_namelist_patches.get_config(self.domain_config) 67 | 68 | self.hydro_files = list() 69 | """list: Files specified in hydro_nlist section of the domain namelist patches""" 70 | self.nudging_files = list() 71 | """list: Files specified in nudging_nlist section of the domain namelist patches""" 72 | self.lsm_files = list() 73 | """list: Files specified in noahlsm_offline section of the domain namelist patches""" 74 | 75 | self.nudging_dir = None 76 | """pathlib.Path: path to the nudging obs directory""" 77 | 78 | self.forcing_dir = None 79 | """pathlib.Path: path to the forcing directory""" 80 | 81 | ### 82 | 83 | # Create file paths from hydro namelist 84 | domain_hydro_nlist = self.hydro_namelist_patches['hydro_nlist'] 85 | 86 | for key, value in domain_hydro_nlist.items(): 87 | file_path = self.domain_top_dir.joinpath(str(value)) 88 | if file_path.is_file() is True: 89 | if file_path.suffix == '.nc': 90 | self.hydro_files.append(WrfHydroStatic(file_path)) 91 | else: 92 | self.hydro_files.append(file_path) 93 | 94 | # Create file paths from nudging namelist 95 | domain_nudging_nlist = self.hydro_namelist_patches['nudging_nlist'] 96 | 97 | for key, value in domain_nudging_nlist.items(): 98 | file_path = self.domain_top_dir.joinpath(str(value)) 99 | if file_path.is_file() is True: 100 | if file_path.suffix == '.nc': 101 | self.nudging_files.append(WrfHydroStatic(file_path)) 102 | else: 103 | self.nudging_files.append(file_path) 104 | if key == 'timeslicepath' and value != '': 105 | self.nudging_dir = file_path 106 | self.nudging_files.append(WrfHydroTs(file_path.glob('*'))) 107 | 108 | # Create file paths from lsm namelist 109 | domain_lsm_nlist = \ 110 | self.hrldas_namelist_patches["noahlsm_offline"] 111 | 112 | for key, value in domain_lsm_nlist.items(): 113 | file_path = self.domain_top_dir.joinpath(str(value)) 114 | 115 | if file_path.is_file() is True: 116 | if file_path.suffix == '.nc': 117 | self.lsm_files.append(WrfHydroStatic(file_path)) 118 | else: 119 | self.lsm_files.append(file_path) 120 | 121 | if key == 'indir': 122 | self.forcing_dir = file_path 123 | 124 | self.forcing_data = WrfHydroTs(self.forcing_dir.glob('*')) 125 | 126 | def copy_files(self, dest_dir: str, symlink: bool = True): 127 | """Copy domain files to a new directory 128 | Args: 129 | dir: The destination directory for domain files 130 | symlink: Symlink domain files instead of copy 131 | """ 132 | 133 | # Convert dir to pathlib.Path 134 | dest_dir = pathlib.Path(dest_dir) 135 | 136 | # Make directory if it does not exist. 137 | if not dest_dir.is_dir(): 138 | dest_dir.mkdir(parents=True) 139 | 140 | # Create symlinks/copies 141 | # Symlink/copy in forcing 142 | from_dir = self.forcing_dir 143 | to_dir = dest_dir.joinpath(from_dir.name) 144 | if symlink: 145 | to_dir.symlink_to(from_dir, target_is_directory=True) 146 | else: 147 | shutil.copytree(str(from_dir), str(to_dir)) 148 | 149 | # create DOMAIN directory and symlink in files 150 | # Symlink in hydro_files 151 | for from_path in self.hydro_files: 152 | # Get new file path for run directory, relative to the top-level domain directory 153 | # This is needed to ensure the path matches the domain namelist 154 | try: 155 | relative_path = from_path.relative_to(self.domain_top_dir) 156 | except ValueError: 157 | pass 158 | else: 159 | to_path = dest_dir.joinpath(relative_path) 160 | if to_path.parent.is_dir() is False: 161 | to_path.parent.mkdir(parents=True) 162 | if symlink: 163 | to_path.symlink_to(from_path) 164 | else: 165 | shutil.copy(str(from_path), str(to_path)) 166 | 167 | # Symlink in nudging files 168 | 169 | # handling nudging obs files 170 | # Users may signal "None" by the null string (''), treat them the same. 171 | if not (self.nudging_dir is None or self.nudging_dir is ''): 172 | from_dir = self.nudging_dir 173 | try: 174 | to_dir = dest_dir.joinpath(from_dir.relative_to(self.domain_top_dir)) 175 | except ValueError: 176 | pass 177 | else: 178 | if symlink: 179 | to_dir.symlink_to(from_dir, target_is_directory=True) 180 | else: 181 | shutil.copy(str(from_dir), str(to_dir)) 182 | 183 | for from_path in self.nudging_files: 184 | # Get new file path for run directory, relative to the top-level domain directory 185 | # This is needed to ensure the path matches the domain namelist 186 | if type(from_path) is not WrfHydroTs: 187 | try: 188 | relative_path = from_path.relative_to(self.domain_top_dir) 189 | except ValueError: 190 | pass 191 | else: 192 | to_path = dest_dir.joinpath(relative_path) 193 | if to_path.parent.is_dir() is False: 194 | to_path.parent.mkdir(parents=True) 195 | if symlink: 196 | to_path.symlink_to(from_path) 197 | else: 198 | shutil.copy(str(from_path), str(to_path)) 199 | 200 | # Symlink in lsm files 201 | for from_path in self.lsm_files: 202 | # Get new file path for run directory, relative to the top-level domain directory 203 | # This is needed to ensure the path matches the domain namelist 204 | try: 205 | relative_path = from_path.relative_to(self.domain_top_dir) 206 | except ValueError: 207 | pass 208 | else: 209 | to_path = dest_dir.joinpath(relative_path) 210 | if to_path.parent.is_dir() is False: 211 | to_path.parent.mkdir(parents=True) 212 | if symlink: 213 | to_path.symlink_to(from_path) 214 | else: 215 | shutil.copy(str(from_path), str(to_path)) 216 | 217 | model_files = [*self.hydro_files, 218 | *self.nudging_files, 219 | *self.lsm_files] 220 | for ff in model_files: 221 | if type(ff) is not WrfHydroTs: 222 | if 'RESTART' in str(ff.name): 223 | to_path = dest_dir.joinpath(ff.name).absolute() 224 | if symlink: 225 | to_path.symlink_to(ff) 226 | else: 227 | shutil.copy(str(ff), str(to_path)) 228 | if 'HYDRO_RST' in str(ff.name): 229 | to_path = dest_dir.joinpath(ff.name).absolute() 230 | if symlink: 231 | to_path.symlink_to(ff) 232 | else: 233 | shutil.copy(str(ff), str(to_path)) 234 | if 'nudgingLastObs' in str(ff.name): 235 | to_path = dest_dir.joinpath(ff.name).absolute() 236 | if symlink: 237 | to_path.symlink_to(ff) 238 | else: 239 | shutil.copy(str(ff), str(to_path)) 240 | -------------------------------------------------------------------------------- /wrfhydropy/core/ensemble_tools.py: -------------------------------------------------------------------------------- 1 | from boltons.iterutils import remap 2 | import copy 3 | import datetime 4 | from deepdiff.diff import DeepDiff 5 | import os 6 | import pathlib 7 | import sys 8 | 9 | 10 | def is_sub_obj(obj): 11 | """Test if an object is has a __dict__ (may not be the best definition of an object, 12 | but it works for classes in wrfhydropy).""" 13 | 14 | # If a dict, dont use __dict__ 15 | if isinstance(obj, dict): 16 | return False 17 | 18 | try: 19 | _ = obj.__dict__ 20 | except AttributeError: 21 | return False 22 | return True 23 | 24 | 25 | def get_sub_objs(obj): 26 | """Identify which attributes of an object are objects with __dicts__.""" 27 | sub_obj_dict = {kk: is_sub_obj(obj[kk]) for (kk, vv) in obj.items()} 28 | return list(remap(sub_obj_dict, lambda p, k, v: v).keys()) 29 | 30 | 31 | def dictify(obj): 32 | """Recursively transform deepcopy sub __dicts__ of an object into dicts for dictionary 33 | traversal of a deepcopy of the object.""" 34 | the_dict = copy.deepcopy(obj.__dict__) 35 | sub_dicts = get_sub_objs(the_dict) 36 | for ss in sub_dicts: 37 | the_dict[ss] = dictify(the_dict[ss]) 38 | return the_dict 39 | 40 | 41 | class DeepDiffEq(DeepDiff): 42 | """Extend Deep Diff to handle __eq__ for specified types.""" 43 | def __init__(self, 44 | t1, 45 | t2, 46 | eq_types, 47 | ignore_order=False, 48 | report_repetition=False, 49 | significant_digits=None, 50 | exclude_paths=set(), 51 | # exclude_regex_paths=set(), 52 | exclude_types=set(), 53 | # include_string_type_changes=False, 54 | verbose_level=1, 55 | view='text', 56 | **kwargs): 57 | 58 | # Must set this first for some reason. 59 | self.eq_types = set(eq_types) 60 | 61 | super().__init__(t1, 62 | t2, 63 | ignore_order=False, 64 | report_repetition=False, 65 | significant_digits=None, 66 | exclude_paths=set(), 67 | # exclude_regex_paths=set(), 68 | exclude_types=set(), 69 | # include_string_type_changes=False, 70 | verbose_level=1, 71 | view='text', 72 | **kwargs) 73 | 74 | # Have to force override __diff_obj. 75 | def _DeepDiff__diff_obj(self, level, parents_ids=frozenset({}), 76 | is_namedtuple=False): 77 | """Difference of 2 objects using their __eq__ if requested""" 78 | 79 | if type(level.t1) in self.eq_types: 80 | if level.t1 == level.t2: 81 | return 82 | else: 83 | self._DeepDiff__report_result('values_changed', level) 84 | return 85 | 86 | super(DeepDiffEq, self)._DeepDiff__diff_obj( 87 | level, 88 | parents_ids=frozenset({}), 89 | is_namedtuple=False 90 | ) 91 | 92 | 93 | # def get_ens_file_last_restart_datetime(run_dir): 94 | # """Use the filesystem to probe the current ensemble time.""" 95 | # run_dir = pathlib.Path(run_dir) 96 | # mem_dirs = sorted(run_dir.glob("member_*")) 97 | # hydro_last = [sorted(mm.glob('HYDRO_RST.*'))[-1].name for mm in mem_dirs] 98 | # if not all([hydro_last[0] == hh for hh in hydro_last]): 99 | # raise ValueError("Not all ensemble members at the same time (HYDRO_RST files).") 100 | # if len(sorted(mem_dirs[0].glob('RESTART.*'))): 101 | # lsm_last = [sorted(mm.glob('RESTART.*'))[-1] for mm in mem_dirs] 102 | # if not all([lsm_last[0] == ll for ll in lsm_last]): 103 | # raise ValueError("Not all ensemble members at the same time (RESTART files).") 104 | 105 | # ens_time = datetime.datetime.strptime( 106 | # str(hydro_last[0]).split('_RST.')[-1], 107 | # '%Y-%m-%d_%H:%M_DOMAIN1' 108 | # ) 109 | # return ens_time 110 | 111 | 112 | def get_ens_dotfile_end_datetime(run_dir): 113 | """Use the the .model_end_time files to get the current ensemble time.""" 114 | run_dir = pathlib.Path(run_dir) 115 | mem_dirs = sorted(run_dir.glob("member_*")) 116 | 117 | def read_dot_file(file): 118 | with open(file) as f: 119 | content = f.readline() 120 | return datetime.datetime.strptime(content, '%Y-%m-%d %H:%M:%S') 121 | 122 | end_times = [read_dot_file(mm / '.model_end_time') for mm in mem_dirs] 123 | if not all([end_times[0] == ee for ee in end_times]): 124 | raise ValueError("Not all ensemble members at the same time (HYDRO_RST files).") 125 | 126 | return end_times[0] 127 | 128 | 129 | def mute(): 130 | """A initializer for multiprocessing.Pool to keep the processes quiet.""" 131 | sys.stdout = open(os.devnull, 'w') 132 | sys.stderr = open(os.devnull, 'w') 133 | -------------------------------------------------------------------------------- /wrfhydropy/core/model.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import pathlib 4 | import pickle 5 | import shlex 6 | import shutil 7 | import subprocess 8 | import uuid 9 | import warnings 10 | 11 | from .namelist import JSONNamelist 12 | 13 | 14 | def get_git_revision_hash(the_dir: str) -> str: 15 | """Get the last git revision hash from a directory if directory is a git repository 16 | Args: 17 | the_dir: String for the directory path 18 | Returns: 19 | String with the git hash if a git repo or message if not 20 | """ 21 | 22 | the_dir = pathlib.Path(the_dir) 23 | 24 | # First test if this is even a git repo. (Have to allow for this unless the wrfhydropy 25 | # testing brings in the wrf_hydro_code as a repo with a .git file.) 26 | dir_is_repo = subprocess.run(["git", "branch"], 27 | stderr=subprocess.STDOUT, 28 | stdout=open(os.devnull, 'w'), 29 | cwd=str(the_dir.absolute())) 30 | if dir_is_repo.returncode != 0: 31 | return 'could_not_get_hash' 32 | 33 | dirty = subprocess.run(['git', 'diff-index', 'HEAD'], # --quiet seems to give the wrong result. 34 | stdout=subprocess.PIPE, 35 | stderr=subprocess.PIPE, 36 | cwd=str(the_dir.absolute())).returncode 37 | the_hash = subprocess.check_output(['git', 'rev-parse', 'HEAD'], cwd=str(the_dir.absolute())) 38 | the_hash = the_hash.decode('utf-8').split()[0] 39 | if dirty: 40 | the_hash += '--DIRTY--' 41 | return the_hash 42 | 43 | 44 | class Model(object): 45 | """Class for a WRF-Hydro model, which consitutes the model source code and compiled binary. 46 | """ 47 | 48 | def __init__( 49 | self, 50 | source_dir: str, 51 | model_config: str, 52 | hydro_namelist_config_file: str=None, 53 | hrldas_namelist_config_file: str=None, 54 | compile_options_config_file: str=None, 55 | compiler: str = 'gfort', 56 | pre_compile_cmd: str = None, 57 | compile_options: dict = None 58 | ): 59 | 60 | """Instantiate a Model object. 61 | Args: 62 | source_dir: Directory containing the source code, e.g. 63 | 'wrf_hydro_nwm/src'. 64 | model_config: The configuration of the model. Used to match a model to a domain 65 | configuration. Must be a key in both the *_namelists.json of in the source directory 66 | and the *_namelist_patches.json in the domain directory. 67 | machine_spec: Optional dictionary of machine specification or string containing the 68 | name of a known machine. Known machine names include 'cheyenne'. For an 69 | example of a machine specification see the 'cheyenne' machine specification using 70 | wrfhydropy.get_machine_spec('cheyenne'). 71 | hydro_namelist_config_file: Path to a hydro namelist config file external to the model 72 | repository. Default(None) implies using the model src/hydro_namelists.json. 73 | hrldas_namelist_config_file: As for hydro_namelist_config_file, but for hrldas namelist. 74 | compile_options_config_file: As for hydro_namelist_config_file, but for compile options. 75 | compiler: The compiler to use, must be one of 'pgi','gfort', 76 | 'ifort', or 'luna'. 77 | compile_options: Changes to default compile-time options. 78 | """ 79 | 80 | # Instantiate all attributes and methods 81 | # Attributes set by init args 82 | self.source_dir = pathlib.Path(source_dir) 83 | """pathlib.Path: pathlib.Path object for source code directory.""" 84 | 85 | self.model_config = model_config.lower() 86 | """str: Specified configuration for which the model is to be used, e.g. 'nwm_ana'""" 87 | 88 | self.compiler = compiler 89 | """str: The compiler chosen at compile time.""" 90 | 91 | self.pre_compile_cmd = pre_compile_cmd 92 | """str: Command string to be executed prior to model compilation, e.g. to load modules""" 93 | 94 | self.compile_options = dict() 95 | """dict: Compile-time options. Defaults are loaded from json file stored with source 96 | code.""" 97 | 98 | # Set nameilst config file defaults while allowing None to be passed. 99 | self.hydro_namelist_config_file = hydro_namelist_config_file 100 | """Namelist: Hydro namelist file specified for model config""" 101 | self.hrldas_namelist_config_file = hrldas_namelist_config_file 102 | """Namelist: HRLDAS namelist file specified for model config.""" 103 | self.compile_options_config_file = compile_options_config_file 104 | """Namelist: Compile options file specified for model config.""" 105 | 106 | default_hydro_namelist_config_file = 'hydro_namelists.json' 107 | default_hrldas_namelist_config_file = 'hrldas_namelists.json' 108 | default_compile_options_config_file = 'compile_options.json' 109 | 110 | if self.hydro_namelist_config_file is None: 111 | self.hydro_namelist_config_file = default_hydro_namelist_config_file 112 | if self.hrldas_namelist_config_file is None: 113 | self.hrldas_namelist_config_file = default_hrldas_namelist_config_file 114 | if self.compile_options_config_file is None: 115 | self.compile_options_config_file = default_compile_options_config_file 116 | 117 | # Load master namelists 118 | self.hydro_namelists = JSONNamelist( 119 | str(self.source_dir.joinpath(self.hydro_namelist_config_file)) 120 | ) 121 | """Namelist: Hydro namelist for specified model config""" 122 | self.hydro_namelists = self.hydro_namelists.get_config(self.model_config) 123 | 124 | self.hrldas_namelists = JSONNamelist( 125 | str(self.source_dir.joinpath(self.hrldas_namelist_config_file)) 126 | ) 127 | """Namelist: HRLDAS namelist for specified model config""" 128 | self.hrldas_namelists = self.hrldas_namelists.get_config(self.model_config) 129 | 130 | # Attributes set by other methods 131 | self.compile_dir = None 132 | """pathlib.Path: pathlib.Path object pointing to the compile directory.""" 133 | 134 | self.git_hash = self._get_githash() 135 | """str: The git revision hash if seld.source_dir is a git repository""" 136 | 137 | self.version = None 138 | """str: Source code version from .version file stored with the source code.""" 139 | 140 | self.compile_dir = None 141 | """pathlib.Path: pathlib.Path object pointing to the compile directory.""" 142 | 143 | self.configure_log = None 144 | """CompletedProcess: The subprocess object generated at configure.""" 145 | 146 | self.compile_log = None 147 | """CompletedProcess: The subprocess object generated at compile.""" 148 | 149 | self.object_id = None 150 | """str: A unique id to join object to compile directory.""" 151 | 152 | self.table_files = list() 153 | """list: pathlib.Paths to *.TBL files generated at compile-time.""" 154 | 155 | self.wrf_hydro_exe = None 156 | """pathlib.Path: pathlib.Path to wrf_hydro.exe file generated at compile-time.""" 157 | 158 | # Set attributes 159 | # Get code version 160 | with self.source_dir.joinpath('.version').open() as f: 161 | self.version = f.read() 162 | 163 | # Load compile options 164 | self.compile_options = JSONNamelist( 165 | str(self.source_dir.joinpath(self.compile_options_config_file)) 166 | ) 167 | """Namelist: Hydro namelist for specified model config""" 168 | self.compile_options = self.compile_options.get_config(self.model_config) 169 | 170 | # "compile_options" is the argument to __init__ 171 | if compile_options is not None: 172 | self.compile_options.update(compile_options) 173 | 174 | # Add compiler and compile options as attributes and update if needed 175 | self.compiler = compiler 176 | 177 | def compile(self, 178 | compile_dir: pathlib.Path) -> str: 179 | """Compiles WRF-Hydro using specified compiler and compile options. 180 | Args: 181 | compile_dir: A non-existant directory to use for compilation. 182 | Returns: 183 | Success of compilation and compile directory used. Sets additional 184 | attributes to WrfHydroModel 185 | """ 186 | 187 | self.compile_dir = pathlib.Path(compile_dir).absolute() 188 | 189 | self.modules = subprocess.run('module list', shell=True, stderr=subprocess.PIPE).stderr 190 | 191 | # check compile directory. 192 | if not self.compile_dir.is_dir(): 193 | warnings.warn(str(self.compile_dir.absolute()) + ' directory does not exist, creating') 194 | self.compile_dir.mkdir(parents=True) 195 | 196 | # Remove run directory if it exists in the source_dir 197 | source_compile_dir = self.source_dir.joinpath('Run') 198 | if source_compile_dir.is_dir(): 199 | shutil.rmtree(str(source_compile_dir.absolute())) 200 | 201 | # Get directory for setEnvar 202 | compile_options_file = self.source_dir.joinpath('compile_options.sh') 203 | 204 | # Write setEnvar file 205 | with compile_options_file.open(mode='w') as file: 206 | for option, value in self.compile_options.items(): 207 | file.write("export {}={}\n".format(option, value)) 208 | 209 | # Compile 210 | # Create compile command for machine spec 211 | compile_cmd = '/bin/bash -c "' 212 | if self.pre_compile_cmd is not None: 213 | compile_cmd += self.pre_compile_cmd + '; ' 214 | compile_cmd += './configure ' + self.compiler + '; ' 215 | compile_cmd += './compile_offline_NoahMP.sh ' 216 | compile_cmd += str(compile_options_file.absolute()) 217 | compile_cmd += '"' 218 | compile_cmd = shlex.split(compile_cmd) 219 | 220 | self.compile_log = subprocess.run( 221 | compile_cmd, 222 | stdout=subprocess.PIPE, 223 | stderr=subprocess.PIPE, 224 | cwd=str(self.source_dir.absolute()) 225 | ) 226 | 227 | # Add in unique ID file to match this object to prevent assosciating 228 | # this directory with another object 229 | self.object_id = str(uuid.uuid4()) 230 | 231 | with self.compile_dir.joinpath('.uid').open(mode='w') as f: 232 | f.write(self.object_id) 233 | 234 | if self.compile_log.returncode == 0: 235 | # Open permissions on compiled files 236 | subprocess.run(['chmod', '-R', '755', str(self.source_dir.joinpath('Run'))]) 237 | 238 | # Wrf hydro always puts files in source directory under a new directory called 'Run' 239 | # Copy files to the specified simulation directory if its not the same as the 240 | # source code directory 241 | if len(self.table_files) == 0: 242 | self.table_files = list(self.source_dir.joinpath('Run').glob('*.TBL')) 243 | 244 | shutil.copyfile(str(self.source_dir.joinpath('Run').joinpath('wrf_hydro.exe')), 245 | str(self.compile_dir.joinpath('wrf_hydro.exe'))) 246 | 247 | # Remove old files 248 | # shutil.rmtree(str(self.source_dir.joinpath('Run'))) 249 | 250 | # Open permissions on copied compiled files 251 | subprocess.run(['chmod', '-R', '755', str(self.compile_dir)]) 252 | 253 | # Get file lists as attributes 254 | # Get list of table file paths 255 | 256 | # Get wrf_hydro.exe file path 257 | self.wrf_hydro_exe = self.compile_dir.joinpath('wrf_hydro.exe') 258 | 259 | # Save the object out to the compile directory 260 | with self.compile_dir.joinpath('WrfHydroModel.pkl').open(mode='wb') as f: 261 | pickle.dump(self, f, 2) 262 | 263 | print('Model successfully compiled into ' + str(self.compile_dir.absolute())) 264 | else: 265 | # Save the object out to the compile directory 266 | with self.compile_dir.joinpath('WrfHydroModel.pkl').open(mode='wb') as f: 267 | pickle.dump(self, f, 2) 268 | raise ValueError('Model did not successfully compile.' + 269 | self.compile_log.stderr.decode('utf-8')) 270 | 271 | def copy_files(self, dest_dir: str, symlink: bool = True): 272 | """Copy domain files to new directory 273 | Args: 274 | dest_dir: The destination directory for files 275 | symlink: Symlink files instead of copy 276 | """ 277 | 278 | # Convert dir to pathlib.Path 279 | dest_dir = pathlib.Path(dest_dir) 280 | 281 | # Make directory if it does not exist. 282 | if not dest_dir.is_dir(): 283 | dest_dir.mkdir(parents=True) 284 | 285 | # Symlink/copy in exe 286 | from_file = self.wrf_hydro_exe 287 | to_file = dest_dir.joinpath(from_file.name) 288 | if symlink: 289 | to_file.symlink_to(from_file) 290 | else: 291 | shutil.copy(str(from_file), str(to_file)) 292 | 293 | def _get_githash(self) -> str: 294 | """Private method to get the git hash if source_dir is a git repository 295 | Returns: 296 | git hash string 297 | """ 298 | return get_git_revision_hash(self.source_dir) 299 | -------------------------------------------------------------------------------- /wrfhydropy/core/namelist.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import deepdiff 3 | import f90nml 4 | import json 5 | from typing import Union 6 | import warnings 7 | 8 | 9 | def load_namelist(nml_path: str) -> dict: 10 | """Load a F90 namelist into a wrfhydropy.Namelist object 11 | Args: 12 | nml_path: String containing path to F90 namelist 13 | Returns: 14 | dict interpretation of namelist 15 | """ 16 | nml_dict = Namelist(json.loads(json.dumps(f90nml.read(nml_path), sort_keys=True))) 17 | return nml_dict 18 | 19 | 20 | class JSONNamelist(object): 21 | """Class for a WRF-Hydro JSON namelist containing one more configurations""" 22 | def __init__( 23 | self, 24 | file_path: str): 25 | """Instantiate a Namelist object. 26 | Args: 27 | file_path: Path to the namelist file to open, can be a json or fortran90 namelist. 28 | """ 29 | self._json_namelist = json.load(open(file_path, mode='r')) 30 | self.configs = self._json_namelist.keys() 31 | 32 | def get_config(self, config: str): 33 | """Get a namelist for a given configuration. This works internally by grabbing the base 34 | namelist and updating with the config-specific changes. 35 | Args: 36 | config: The configuration to retrieve 37 | """ 38 | 39 | # This ifelse statement is to make the compile options files. 40 | # backwards-compatible. Should be left in through v2.1 (that makes sure v2.0 is covered). 41 | if 'base' in self._json_namelist.keys(): 42 | base_namelist = copy.deepcopy(self._json_namelist['base']) 43 | config_patches = copy.deepcopy(self._json_namelist[config]) 44 | # Update the base namelist with the config patches 45 | config_namelist = dict_merge(base_namelist, config_patches) 46 | 47 | else: 48 | # One can pass any "nwm_*" config to get the compile options. 49 | # if that specific config is not there, "nwm" config is used 50 | # for the compile options with a warning. 51 | if config not in self._json_namelist.keys(): 52 | if 'nwm' in config and 'nwm' in self._json_namelist.keys(): 53 | config = 'nwm' 54 | warnings.warn( 55 | "The compile configuration 'nwm' is inferred from the" 56 | " configuration passed: " + config) 57 | config_namelist = copy.deepcopy(self._json_namelist[config]) 58 | 59 | return Namelist(config_namelist) 60 | 61 | 62 | class Namelist(dict): 63 | """Class for a WRF-Hydro namelist""" 64 | 65 | def write(self, path: str, mode='x'): 66 | """Write a namelist to file as a fortran-compatible namelist 67 | Args: 68 | path: The file path 69 | """ 70 | with open(str(path), mode=mode) as nml_file: 71 | f90nml.write(self, nml_file) 72 | 73 | def patch(self, patch: dict): 74 | """Recursively patch a namelist with key values from another namelist 75 | Args: 76 | patch: A Namelist or dict object containing the patches 77 | """ 78 | patched_namelist = dict_merge(copy.deepcopy(self), 79 | copy.deepcopy(patch)) 80 | return patched_namelist 81 | 82 | 83 | def dict_merge(dct: dict, merge_dct: dict) -> dict: 84 | """ Recursive dict merge. Inspired by :meth:``dict.update()``, instead of 85 | updating only top-level keys, dict_merge recurses down into dicts nested 86 | to an arbitrary depth, updating keys. The ``merge_dct`` is merged into 87 | ``dct``. 88 | Args: 89 | dct: dict onto which the merge is executed 90 | merge_dct: dct merged into dct 91 | Returns: 92 | The merged dict 93 | """ 94 | 95 | for key, value in merge_dct.items(): 96 | if key in dct.keys() and type(value) is dict: 97 | dict_merge(dct[key], merge_dct[key]) 98 | else: 99 | dct[key] = merge_dct[key] 100 | 101 | return(dct) 102 | 103 | 104 | def diff_namelist( 105 | old_namelist: Union[Namelist, str], 106 | new_namelist: Union[Namelist, str], **kwargs) -> dict: 107 | """Diff two Namelist objects or fortran 90 namelist files and return a dictionary of 108 | differences. 109 | 110 | Args: 111 | old_namelist: String containing path to the first namelist file, referred to as 'old' in 112 | outputs. 113 | new_namelist: String containing path to the second namelist file, referred to as 'new' in 114 | outputs. 115 | **kwargs: Additional arguments passed onto deepdiff.DeepDiff method 116 | Returns: 117 | The differences between the two namelists 118 | """ 119 | 120 | # If supplied as strings try and read in from file path 121 | if type(old_namelist) == str: 122 | old_namelist = load_namelist(old_namelist) 123 | if type(new_namelist) == str: 124 | new_namelist = load_namelist(new_namelist) 125 | 126 | # Diff the namelists 127 | differences = deepdiff.DeepDiff(old_namelist, new_namelist, ignore_order=True, **kwargs) 128 | differences_dict = dict(differences) 129 | return (differences_dict) 130 | -------------------------------------------------------------------------------- /wrfhydropy/core/schedulers.py: -------------------------------------------------------------------------------- 1 | # Note: All other imports for individual schedulers should be done in the respective scheduler 2 | # class functions so that imports can be isolated to relevant schedulers 3 | 4 | from abc import ABC, abstractmethod 5 | 6 | 7 | class Scheduler(ABC): 8 | def __init__(self): 9 | super().__init__() 10 | 11 | @abstractmethod 12 | def schedule(self, jobs): 13 | pass 14 | 15 | 16 | class PBSCheyenne(Scheduler): 17 | 18 | """A Scheduler object compatible with PBS on the NCAR Cheyenne system.""" 19 | def __init__( 20 | self, 21 | account: str, 22 | nproc: int, 23 | nnodes: int, 24 | mem: int = None, 25 | ppn: int = None, 26 | queue: str = 'regular', 27 | walltime: str = "12:00:00", 28 | email_who: str = None, 29 | email_when: str = 'abe', 30 | custom: dict = {} 31 | ): 32 | """Initialize an PBSCheyenne object. 33 | Args: 34 | account: The account string 35 | nproc: Number of processors to request 36 | nnodes: Number of nodes to request 37 | ppn: Number of processors per node 38 | mem: Memory in GB usage/request on node (109 for fat nodes). 39 | email_who: Email address for PBS notifications 40 | email_when: PBS email frequency options. Options include 'a' for on abort, 41 | 'b' for before each job, and 'e' for after each job. 42 | queue: The queue to use, options are 'regular', 'premium', and 'shared' 43 | walltime: The wall clock time in HH:MM:SS format, max time is 12:00:00 44 | """ 45 | 46 | # Declare attributes. 47 | # property construction 48 | self._sim_dir = None 49 | self._nproc = nproc 50 | self._nnodes = nnodes 51 | self._ppn = ppn 52 | 53 | # Scheduler options dict 54 | # TODO: Make this more elegant than hard coding for maintenance sake 55 | self.scheduler_opts = { 56 | 'account': account, 57 | 'email_when': email_when, 58 | 'email_who': email_who, 59 | 'queue': queue, 60 | 'walltime': walltime, 61 | 'mem': mem, 62 | 'custom': custom 63 | } 64 | 65 | def schedule(self, jobs: list): 66 | """Schedule one or more jobs using the scheduler scheduler 67 | Args: 68 | jobs: list of jobs to schedule 69 | """ 70 | import subprocess 71 | import shlex 72 | import pathlib 73 | import os 74 | 75 | current_dir = pathlib.Path(os.curdir) 76 | 77 | # TODO: Find a way to protect the job order so that once someone executes schedule... 78 | # they can't change the order, may not be an issue except for if scheduling fails 79 | # somewhere 80 | 81 | self._write_job_pbs(jobs=jobs) 82 | 83 | # Make lists to store pbs scripts and pbs job ids to get previous dependency 84 | pbs_jids = [] 85 | pbs_scripts = [] 86 | 87 | qsub_str = "/bin/bash -c '" 88 | for job_num, option in enumerate(jobs): 89 | 90 | # This gets the pbs script name and pbs jid for submission 91 | # the obs jid is stored in a list so that the previous jid can be retrieved for 92 | # dependency 93 | job_id = jobs[job_num].job_id 94 | pbs_scripts.append(str(jobs[job_num].job_dir) + "/job_" + job_id + ".pbs") 95 | pbs_jids.append("job_" + job_id) 96 | 97 | # If first job, schedule using hold 98 | if job_num == 0: 99 | qsub_str += pbs_jids[job_num] + "=`qsub -h " + pbs_scripts[job_num] + "`;" 100 | # Else schedule using job dependency on previous pbs jid 101 | else: 102 | qsub_str += pbs_jids[job_num] + "=`qsub -W depend=afterok:${" + pbs_jids[ 103 | job_num-1] + "} " + pbs_scripts[job_num] + "`;" 104 | 105 | qsub_str += "qrls ${" + pbs_jids[0] + "};" 106 | qsub_str += "'" 107 | 108 | # Just for debugging purposes 109 | print("qsub_str: ", qsub_str) 110 | # This stacks up dependent jobs in PBS in the same order as the job list 111 | subprocess.run(shlex.split(qsub_str), 112 | cwd=str(current_dir)) 113 | 114 | def _write_job_pbs(self, jobs): 115 | """Private method to write bash PBS scripts for submitting each job """ 116 | import copy 117 | import sys 118 | 119 | # Get the current pytohn executable to handle virtual environments in the scheduler 120 | python_path = sys.executable 121 | 122 | for job in jobs: 123 | # Copy the job because the exe cmd is edited below 124 | job = copy.deepcopy(job) 125 | custom = self.scheduler_opts['custom'] 126 | 127 | # Write PBS script 128 | jobstr = "" 129 | jobstr += "#!/bin/sh\n" 130 | jobstr += "#PBS -N {0}\n".format(job.job_id) 131 | jobstr += "#PBS -A {0}\n".format(self.scheduler_opts['account']) 132 | jobstr += "#PBS -q {0}\n".format(self.scheduler_opts['queue']) 133 | 134 | if self.scheduler_opts['email_who'] is not None: 135 | jobstr += "#PBS -M {0}\n".format(self.scheduler_opts['email_who']) 136 | jobstr += "#PBS -m {0}\n".format(self.scheduler_opts['email_when']) 137 | jobstr += "\n" 138 | 139 | if '-l' not in custom or ( 140 | '-l' in custom and 'walltime' not in custom['-l']): 141 | jobstr += "#PBS -l walltime={0}\n".format(self.scheduler_opts['walltime']) 142 | 143 | if '-l' not in custom or ( 144 | '-l' in custom and 'select' not in custom['-l']): 145 | prcstr = "select={0}:ncpus={1}:mpiprocs={1}" 146 | prcstr = prcstr.format(self.nnodes, self.ppn) 147 | if self.scheduler_opts['mem'] is not None: 148 | prcstr = prcstr + ":mem={0}GB" 149 | prcstr = prcstr.format(self.scheduler_opts['mem']) 150 | prcstr = prcstr 151 | jobstr += "#PBS -l " + prcstr + "\n" 152 | jobstr += "\n" 153 | 154 | if '-l' in custom: 155 | jobstr += "#PBS -l " + custom['-l'] + "\n" 156 | jobstr += "\n" 157 | 158 | jobstr += "# Not using PBS standard error and out files to capture model output\n" 159 | jobstr += "# but these files might catch output and errors from the scheduler.\n" 160 | jobstr += "#PBS -o {0}\n".format(job.job_dir) 161 | jobstr += "#PBS -e {0}\n".format(job.job_dir) 162 | jobstr += "\n" 163 | 164 | # End PBS Header 165 | 166 | # if job.modules: 167 | # jobstr += 'module purge\n' 168 | # jobstr += 'module load {0}\n'.format(job.modules) 169 | # jobstr += "\n" 170 | 171 | jobstr += "# CISL suggests users set TMPDIR when running batch jobs on Cheyenne.\n" 172 | jobstr += "export TMPDIR=/glade/scratch/$USER/temp\n" 173 | jobstr += "mkdir -p $TMPDIR\n" 174 | jobstr += "\n" 175 | 176 | if self.scheduler_opts['queue'] == 'share': 177 | jobstr += "export MPI_USE_ARRAY=false\n" 178 | 179 | jobstr += "{0} run_job.py --job_id {1}\n".format(python_path, job.job_id) 180 | jobstr += "exit $?\n" 181 | 182 | pbs_file = job.job_dir.joinpath("job_" + job.job_id + ".pbs") 183 | with pbs_file.open(mode='w') as f: 184 | f.write(jobstr) 185 | 186 | # Write the python run script for the job 187 | if '{nproc}' in job._exe_cmd: 188 | # If the job exe uses "nproc" then apply the schedulers value. 189 | job._exe_cmd = job._exe_cmd.format(**{'nproc': self.nproc}) 190 | else: 191 | # regression tests use "{0}" format, try that here too 192 | job._exe_cmd = job._exe_cmd.format(self.nproc) 193 | 194 | job._write_run_script() 195 | 196 | def _solve_nodes_cores(self): 197 | """Private method to solve the number of nodes and cores if not all three specified""" 198 | 199 | import math 200 | 201 | if not self._nproc and self._nnodes and self._ppn: 202 | self._nproc = self._nnodes * self._ppn 203 | if not self._nnodes and self._nproc and self._ppn: 204 | self._nnodes = math.ceil(self._nproc / self._ppn) 205 | if not self._ppn and self._nnodes and self._nproc: 206 | self._ppn = math.ceil(self._nproc / self._nnodes) 207 | 208 | if None in [self._nproc, self._nnodes, self._ppn]: 209 | raise ValueError("Not enough information to solve all of nproc, nnodes, ppn.") 210 | 211 | @property 212 | def nproc(self): 213 | self._solve_nodes_cores() 214 | return self._nproc 215 | 216 | @nproc.setter 217 | def nproc(self, value): 218 | self._nproc = value 219 | 220 | @property 221 | def nnodes(self): 222 | self._solve_nodes_cores() 223 | return self._nnodes 224 | 225 | @nnodes.setter 226 | def nnodes(self, value): 227 | self._nnodes = value 228 | 229 | @property 230 | def ppn(self): 231 | self._solve_nodes_cores() 232 | return self._ppn 233 | 234 | @ppn.setter 235 | def ppn(self, value): 236 | self._ppn = value 237 | -------------------------------------------------------------------------------- /wrfhydropy/core/teams.py: -------------------------------------------------------------------------------- 1 | import collections 2 | import itertools 3 | import math 4 | import operator 5 | import os 6 | import pathlib 7 | import pickle 8 | from pprint import pprint 9 | import wrfhydropy 10 | 11 | 12 | def set_cycle_ens_sim_jobs(ens_obj, job): 13 | members = ens_obj.members 14 | for mem in members: 15 | # Currently these are always strings, never in memory. 16 | if isinstance(mem, str): 17 | pkl_file = ens_obj._compose_dir / (mem + '/WrfHydroSim.pkl') 18 | sim = pickle.load(pkl_file.open('rb')) 19 | sim.jobs[0]._entry_cmd = job._entry_cmd 20 | sim.jobs[0]._exe_cmd = job._exe_cmd 21 | sim.jobs[0]._exit_cmd = job._exit_cmd 22 | sim.pickle(pkl_file) 23 | 24 | 25 | def get_cycle_ens_sim_job_exits(cycle_obj): 26 | members = cycle_obj.members 27 | statuses = {} 28 | for mem in members: 29 | pkl_file = cycle_obj._compose_dir / (mem + '/WrfHydroSim.pkl') 30 | sim = pickle.load(pkl_file.open('rb')) 31 | statuses.update({pkl_file: sim.jobs[0].exit_status}) 32 | success = all([value == 0 for key, value in statuses.items()]) 33 | if success: 34 | return 0 35 | else: 36 | return 1 37 | 38 | 39 | def parallel_teams_run(arg_dict): 40 | """ 41 | Parallelizable function to run simulations across nodes. 42 | On the master node, python runs multiprocessing. Each separate process 43 | is a "team" of simulations to run. Multiprocessing makes MPI calls with 44 | a specific syntax to run the MPI executable on specfific (potentially 45 | other) nodes. This provides 2 levels of parallelism. 46 | 47 | This function is called (in parallel) once for each team by 48 | multiprocessing. Each team runs its set of simulations sequentially but 49 | each simulation it runs is parallel via MPI. (In the case of 50 | ensemble-cycles each team runs an ensemble but the ensemble runs its 51 | members sequentially.) 52 | 53 | Input: 54 | arg_dict: 55 | arg_dict == { 56 | 'obj_name' : string, either "member" or "cast" (or some other 57 | object), matches the object name used in the 58 | team_dict below (first argument) 59 | 'compose_dir': , 62 | 'team_dict' : 64 | } 65 | where: 66 | team_dict == { 67 | object_name: key/name is either 'members' or 'casts', the value 68 | is a 70 | 'nodes' : , 72 | 'exe_cmd' : , 74 | 'env' : 76 | } 77 | 78 | The 'exe_cmd' is a form of invocation for the distribution of MPI to be 79 | used. For openmpi, for example for OpenMPI, this is 80 | exe_cmd: 'mpirun --host {nodelist} -np {nproc} {cmd}' 81 | The variables in brackets are expanded by internal variables. The 82 | 'exe_cmd' command substitutes the wrfhydropy of 'wrf_hydro.exe' 83 | convention for {cmd}. 84 | The {nproc} argument is the length of the list passed in the nodes 85 | argument, and the {nodellist} are the comma separated arguments in that 86 | list. 87 | 88 | The "entry_cmd" and "exit_cmd" ARE TAKEN FROM THE JOB object. 89 | 1) can be semicolon-separated commands 90 | 2) where these are run depends on MPI. OpenMPI, for example, handles 91 | these on the same processor set as the model runs. 92 | 93 | Notes: 94 | Currently this is working/tested with openmpi and intel mpi. 95 | MPT requires MPI_SHEPERD env variable and it's performance is not 96 | satisfactory so far. 97 | """ 98 | 99 | obj_name = arg_dict['obj_name'] 100 | compose_dir = arg_dict['compose_dir'] 101 | team_dict = arg_dict['team_dict'] 102 | 103 | exit_statuses = {} 104 | for obj in team_dict[obj_name]: 105 | if type(obj) is str: 106 | os.chdir(str(pathlib.Path(compose_dir) / obj)) 107 | else: 108 | os.chdir(str(pathlib.Path(compose_dir) / obj.run_dir)) 109 | 110 | # The cycle ensemble has an extra level of ensemble between the casts and the sims. 111 | # An ensemble and a non-ensemble-cycle have sim objects at this level 112 | have_cycle_ens = False 113 | object_pkl_file = pathlib.Path("WrfHydroSim.pkl") 114 | if not object_pkl_file.exists(): 115 | # But a cycle ensemble will have ensembles at this level.... 116 | have_cycle_ens = True 117 | object_pkl_file = pathlib.Path("WrfHydroEns.pkl") 118 | if not object_pkl_file.exists(): 119 | raise FileNotFoundError( 120 | "No appropriate pickle object for running " + obj_name + ".") 121 | 122 | object_pkl = pickle.load(open(object_pkl_file, "rb")) 123 | job = object_pkl.jobs[0] 124 | 125 | if job._entry_cmd is not None: 126 | entry_cmds = job._entry_cmd.split(';') 127 | new_entry_cmd = [] 128 | for cmd in entry_cmds: 129 | if 'mpirun' not in cmd: 130 | new_entry_cmd.append( 131 | # Switch out the ./wrf_hydro.exe cmd with each command. 132 | team_dict['exe_cmd'].format( 133 | **{ 134 | 'cmd': cmd, 135 | 'nodelist': team_dict['nodes'][0], # only use one task 136 | 'nproc': 1 137 | } 138 | ) 139 | ) 140 | else: 141 | new_entry_cmd.append(cmd) 142 | job._entry_cmd = '; '.join(new_entry_cmd) 143 | 144 | if job._exit_cmd is not None: 145 | exit_cmds = job._exit_cmd.split(';') 146 | new_exit_cmd = [] 147 | for cmd in exit_cmds: 148 | if 'mpirun' not in cmd: 149 | new_exit_cmd.append( 150 | # Switch out the ./wrf_hydro.exe cmd with each command. 151 | team_dict['exe_cmd'].format( 152 | **{ 153 | 'cmd': cmd, 154 | 'nodelist': team_dict['nodes'][0], # only use one task 155 | 'nproc': 1 156 | } 157 | ) 158 | ) 159 | else: 160 | new_exit_cmd.append(cmd) 161 | job._exit_cmd = '; '.join(new_exit_cmd) 162 | 163 | job._exe_cmd = team_dict['exe_cmd'].format( 164 | **{ 165 | 'cmd': './wrf_hydro.exe', 166 | 'nodelist': ','.join(team_dict['nodes']), 167 | 'nproc': len(team_dict['nodes']) 168 | } 169 | ) 170 | 171 | # This will write the cmd to be executed into the member dir. 172 | # with open('team_run_cmd', 'w') as opened_file: 173 | # opened_file.write(job._exe_cmd) 174 | 175 | object_pkl.pickle(object_pkl_file) 176 | if have_cycle_ens: 177 | # An ensemble-cycle neeeds the job components set on the simulations. 178 | # This object is acutally an ensemble... 179 | set_cycle_ens_sim_jobs(object_pkl, job) 180 | 181 | object_pkl.run(env=team_dict['env']) 182 | 183 | if have_cycle_ens: 184 | # An ensemble-cycle neeeds the job components set on the simulations. 185 | exit_status = get_cycle_ens_sim_job_exits(object_pkl) 186 | else: 187 | exit_status = object_pkl.jobs[0].exit_status 188 | 189 | exit_statuses.update({obj: exit_status}) 190 | return exit_statuses 191 | 192 | 193 | def assign_teams( 194 | obj, 195 | teams_exe_cmd: str, 196 | teams_exe_cmd_nproc: int, 197 | teams_node_file: dict = None, 198 | scheduler: str = 'pbs', 199 | env: dict = None 200 | ) -> dict: 201 | """ 202 | Assign teams for parallel runs across nodes. 203 | Inputs: 204 | obj: The ensemble or cycle object, containin lists of members or casts 205 | to be run. 206 | teams_exe_cmd: str, The mpi-specific syntax needed. For example 207 | 'mpirun --host {nodelist} -np {nproc} {cmd}' 208 | teams_exe_cmd_nproc: int, The number of cores per model/wrf_hydro 209 | simulation to be run. 210 | teams_node_file: [str, pathlib.Path] = None, 211 | Optional file that acts like a node file. 212 | It is not currently implemented but the key specifies the scheduler 213 | format that the file follows. An example pbs node file is in 214 | tests/data and this argument is used here to test without a sched. 215 | env: dict = None, optional envionment to pass to the run. 216 | Outputs: 217 | dict: the teams_dict to be used by parallel_teams_run. See requirements 218 | above. 219 | """ 220 | if 'casts' in dir(obj): 221 | object_list = obj.casts 222 | object_name = 'casts' 223 | elif 'members' in dir(obj): 224 | object_list = obj.members 225 | object_name = 'members' 226 | 227 | n_runs = len(object_list) 228 | 229 | if scheduler is 'pbs': 230 | 231 | if teams_node_file is None: 232 | teams_node_file = os.environ.get('PBS_NODEFILE') 233 | 234 | pbs_nodes = [] 235 | # TODO: comment the target format here. 236 | with open(teams_node_file, 'r') as infile: 237 | for line in infile: 238 | pbs_nodes.append(line.rstrip()) 239 | 240 | n_total_processors = len(pbs_nodes) # less may be used. 241 | n_teams = min(math.floor(len(pbs_nodes) / teams_exe_cmd_nproc), n_runs) 242 | pbs_nodes_counts = dict(collections.Counter(pbs_nodes)) 243 | if n_teams == 0: 244 | raise ValueError("teams_exe_cmd_nproc > total number of cores available") 245 | if (n_teams > 1 and 246 | any([ teams_exe_cmd_nproc > val for val in pbs_nodes_counts.values()])): 247 | raise ValueError("teams_exe_cmd_nproc > number of cores/node: " 248 | 'teams does not currently function in this capacity.') 249 | 250 | # Map the objects on to the teams (this seems overly complicated, should prob 251 | # consider using pandas: 252 | teams_dict = {} 253 | 254 | # If the cast/ensemble is still in memory, this looks different. 255 | if isinstance(object_list[0], wrfhydropy.Simulation): 256 | object_dirs = [oo.run_dir for oo in object_list] 257 | else: 258 | object_dirs = object_list 259 | 260 | object_teams = [the_object % n_teams for the_object in range(n_runs)] 261 | object_team_seq = [[dir, team] for dir, team in zip(object_dirs, object_teams)] 262 | object_team_seq.sort(key=operator.itemgetter(1)) 263 | team_groups = itertools.groupby(object_team_seq, operator.itemgetter(1)) 264 | team_objects = [[item[0] for item in data] for (key, data) in team_groups] 265 | 266 | # Map the nodes on to the teams 267 | # Homogonization step here to avoid communication across nodes... 268 | # Sorting necessary for testing. 269 | unique_nodes = sorted([node for node in list(set(pbs_nodes))]) 270 | print("\n*** Team " + object_name + ' ***') 271 | print("Running on nodes: " + ', '.join(unique_nodes)) 272 | del pbs_nodes 273 | pbs_nodes = [] 274 | 275 | # This is a proposal for cross-node execution setup that seems to work 276 | # but it crashes. 277 | # if any([ teams_exe_cmd_nproc > val for val in pbs_nodes_counts.values()]): 278 | # pbs_nodes_avail = [ nn.split('.')[0] for nn in pbs_nodes_in] 279 | # # copy.deepcopy(pbs_nodes_in) 280 | # for i_team in range(n_teams): 281 | # the_team_nodes = [] 282 | # for ii in range(teams_exe_cmd_nproc): 283 | # the_team_nodes += [pbs_nodes_avail.pop(0)] 284 | # pbs_nodes += [the_team_nodes] 285 | # team_nodes = pbs_nodes 286 | # else: 287 | 288 | for i_team in range(n_teams): 289 | pbs_nodes = pbs_nodes + ( 290 | [unique_nodes[i_team % len(unique_nodes)]] * teams_exe_cmd_nproc) 291 | node_teams = [the_node // teams_exe_cmd_nproc for the_node in range(len(pbs_nodes))] 292 | node_team_seq = [[node, team] for node, team in zip(pbs_nodes, node_teams)] 293 | 294 | node_team_seq.sort(key=operator.itemgetter(1)) 295 | team_groups = itertools.groupby(node_team_seq, operator.itemgetter(1)) 296 | team_nodes = [[item[0] for item in data] for (key, data) in team_groups] 297 | # End else 298 | 299 | # Get the entry and exit commands from the job on the first cast/member 300 | # Foolery for in/out of memory 301 | if isinstance(object_list[0], str): 302 | # An ensemble and a non-ensemble-cycle have sim objects at this level 303 | pkl_file = obj._compose_dir / (object_list[0] + '/WrfHydroSim.pkl') 304 | if not pkl_file.exists(): 305 | # But a cycle ensemble will have ensembles at this level.... 306 | pkl_file = obj._compose_dir / (object_list[0] + '/WrfHydroEns.pkl') 307 | if not pkl_file.exists(): 308 | raise FileNotFoundError( 309 | "No appropriate pickle object for running " + object_name + ".") 310 | jobs = pickle.load(pkl_file.open('rb')).jobs 311 | else: 312 | jobs = object_list[0].jobs 313 | if len(jobs) > 1: 314 | raise ValueError('Teams runs only support single job simulations') 315 | entry_cmd = jobs[0]._entry_cmd 316 | exit_cmd = jobs[0]._entry_cmd 317 | 318 | # Assign teams! 319 | for team in range(n_teams): 320 | teams_dict.update({ 321 | team: { 322 | object_name: team_objects[team], 323 | 'nodes': team_nodes[team], 324 | 'entry_cmd': entry_cmd, 325 | 'exit_cmd': exit_cmd, 326 | 'exe_cmd': teams_exe_cmd, 327 | 'env': env 328 | } 329 | }) 330 | 331 | print('\nPBS_NODE_FILE present: ') 332 | print(' ' + str(len(unique_nodes)) + ' nodes with') 333 | print(' ' + str(n_total_processors) + ' TOTAL processors requested.') 334 | 335 | print('\nTeams parallelization:') 336 | print(' ' + str(n_runs) + ' total ' + object_name) 337 | print(' ' + str(n_teams) + ' concurrent teams using') 338 | print(' ' + str(teams_exe_cmd_nproc) + ' processors each.') 339 | 340 | print('\nTeams dict:') 341 | pprint(teams_dict) 342 | print('\n') 343 | 344 | return teams_dict 345 | -------------------------------------------------------------------------------- /wrfhydropy/data/flood_thresholds_to_nc_w_qc.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import pathlib 4 | import wrfhydropy 5 | import xarray as xr 6 | 7 | wrf_hydro_py_dir = pathlib.Path(wrfhydropy.__file__).parent 8 | thresh_file = wrf_hydro_py_dir / 'data/flood_thresholds.txt' 9 | 10 | # ------------------------------------------------------- 11 | # Load the text file 12 | thresh_df = pd.read_table( 13 | thresh_file, 14 | sep=' ', 15 | na_values='NA', 16 | dtype={'site_no': 'str'}) 17 | 18 | # Pretty up 19 | # move "site_no" to "gage" 20 | thresh_df = thresh_df.reset_index().rename(columns={'site_no': 'gage'}).drop(columns='index') 21 | 22 | 23 | # ------------------------------------------------------- 24 | # QC 25 | # 1. Duplicates feature_ids dropped. 26 | dup_feats = thresh_df[thresh_df.duplicated(subset='feature_id')].feature_id.to_list() 27 | # 3 duplicated features... 28 | thresh_df[thresh_df['feature_id'].isin(dup_feats)].sort_values(by='feature_id') 29 | # For now, just drop the duplicated. This serendipitiously selects the 30 | # rows I would manually choose. 31 | thresh_df = thresh_df.drop_duplicates(subset="feature_id") 32 | 33 | # 2. No duplicated gages 34 | dup_gages = thresh_df[thresh_df.duplicated(subset='gage')].gage.to_list() 35 | 36 | # 3. There are 28 positive longitudes (31 before removing the 3 duplicated features) 37 | (thresh_df[thresh_df['lon'] > 0]) 38 | thresh_df.loc[thresh_df['lon'] > 0, 'lon'] = -1 * abs(thresh_df.loc[thresh_df['lon'] > 0, 'lon']) 39 | # Not removing... for now. 40 | 41 | # 4. There are no negative latitudes 42 | len(thresh_df[thresh_df['lat'] < 0]) 43 | 44 | # 5. Check the consistency of the various levels. 45 | # Have basically found that "record" is a wild card... or at least I dont understand it. 46 | ge_dict = { 47 | 'minor': {'action'}, 48 | 'moderate': {'minor', 'action'}, 49 | 'major': {'moderate', 'minor', 'action'}, 50 | # 'record': {'major', 'moderate', 'minor', 'action'} 51 | } 52 | 53 | 54 | def check_thresh_orders(row): 55 | errors = [] 56 | for var in ['stage', 'flow']: 57 | for thresh, thresh_below in ge_dict.items(): 58 | var_thresh = thresh + '_' + var 59 | for below in thresh_below: 60 | var_thresh_below = below + '_' + var 61 | if np.isnan(row[var_thresh_below]) or np.isnan(row[var_thresh]): 62 | continue 63 | if row[var_thresh_below] > row[var_thresh]: 64 | errors += [var_thresh_below + ' > ' + var_thresh] 65 | if errors == []: 66 | return(None) 67 | else: 68 | return(errors) 69 | 70 | 71 | results = {} 72 | for gage, row in thresh_df.iterrows(): 73 | results[gage] = check_thresh_orders(row) 74 | 75 | # remove the good=none results 76 | results2 = {gage: result for gage, result in results.items() if result is not None} 77 | 78 | # Only two gages with this contradiction 79 | funky_gages = list(results2.keys()) 80 | funky_ones = thresh_df[thresh_df.index.isin(funky_gages)].sort_values(by='feature_id') 81 | 82 | with pd.option_context('display.max_rows', None, 'display.max_columns', None): 83 | print(funky_ones) 84 | 85 | # Just set the conflicting thresholds to none by hand! 86 | thresh_df.loc[thresh_df.gage == '07159750', 'action_stage'] = np.NaN 87 | thresh_df.loc[thresh_df.gage == '11156500', 'action_flow'] = np.NaN 88 | 89 | funky_ones = thresh_df[thresh_df.index.isin(funky_gages)].sort_values(by='feature_id') 90 | with pd.option_context('display.max_rows', None, 'display.max_columns', None): 91 | print(funky_ones) 92 | 93 | # QC Done 94 | 95 | # ------------------------------------------------------- 96 | # Write it out 97 | 98 | thresh_ds_write = thresh_df.set_index('gage').to_xarray() 99 | 100 | # Convert cfs to cms 101 | cfs_to_cms = 0.0280 102 | thresh_flows = ['action_flow', 'minor_flow', 'moderate_flow', 'major_flow', 'record_flow'] 103 | for col in thresh_flows: 104 | thresh_ds_write[col] = thresh_ds_write[col] * cfs_to_cms 105 | thresh_ds_write[col].attrs['units'] = 'm^3/s' 106 | thresh_ds_write[col].encoding = {'dtype': 'float32'} 107 | 108 | # Convert to m 109 | ft_to_m = 0.3048 110 | thresh_stages = ['action_stage', 'minor_stage', 'moderate_stage', 'major_stage', 'record_stage'] 111 | for col in thresh_stages: 112 | thresh_ds_write[col] = thresh_ds_write[col] * ft_to_m 113 | thresh_ds_write[col].attrs['units'] = 'meters' 114 | thresh_ds_write[col].encoding = {'dtype': 'float32'} 115 | 116 | # Save this to a netcdf file. 117 | thresh_nc_file = wrf_hydro_py_dir / 'data/flood_thresholds_metric_units.nc' 118 | thresh_ds_write.to_netcdf(thresh_nc_file) 119 | -------------------------------------------------------------------------------- /wrfhydropy/tests/.coveragerc: -------------------------------------------------------------------------------- 1 | ../../.coveragerc -------------------------------------------------------------------------------- /wrfhydropy/tests/.gitignore: -------------------------------------------------------------------------------- 1 | coverage_html/ -------------------------------------------------------------------------------- /wrfhydropy/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NCAR/wrf_hydro_py/003f73f7efb2d38114794ae14daac702e59edc24/wrfhydropy/tests/__init__.py -------------------------------------------------------------------------------- /wrfhydropy/tests/conftest.py: -------------------------------------------------------------------------------- 1 | import json 2 | import pathlib 3 | import subprocess 4 | import numpy as np 5 | import pandas as pd 6 | import pytest 7 | import xarray as xr 8 | 9 | from wrfhydropy.core.domain import Domain 10 | from wrfhydropy.core.job import Job 11 | from wrfhydropy.core.model import Model 12 | from wrfhydropy.core.schedulers import PBSCheyenne 13 | 14 | 15 | @pytest.fixture(scope='function') 16 | def ds_1d(): 17 | # Create a dummy dataset 18 | vals_1d = np.random.randn(3) 19 | time = pd.to_datetime('1984-10-14') 20 | location = ['loc1', 'loc2', 'loc3'] 21 | 22 | ds_1d = xr.Dataset({'var1': (('location'), vals_1d)}, 23 | {'Time': time, 'location': location}) 24 | ds_1d.var1.encoding['_FillValue'] = False 25 | 26 | return ds_1d 27 | 28 | 29 | @pytest.fixture(scope='function') 30 | def ds_1d_has_nans(): 31 | # Create a dummy dataset 32 | vals_1d = np.random.randn(3) 33 | time = pd.to_datetime('1984-10-14') 34 | location = ['loc1', 'loc2', 'loc3'] 35 | 36 | ds_1d = xr.Dataset({'var1': (('location'), vals_1d)}, 37 | {'Time': time, 'location': location}) 38 | 39 | return ds_1d 40 | 41 | 42 | @pytest.fixture(scope='function') 43 | def ds_2d(): 44 | x = [10, 11, 12] 45 | y = [101, 102, 103] 46 | vals_2d = np.random.randn(3, 3) 47 | 48 | time = pd.to_datetime('1984-10-14') 49 | 50 | ds_2d = xr.Dataset( 51 | {'var1': (('x', 'y'), vals_2d)}, 52 | {'Time': time, 'x': x, 'y': y}) 53 | ds_2d.var1.encoding['_FillValue'] = False 54 | 55 | return ds_2d 56 | 57 | 58 | @pytest.fixture(scope='function') 59 | def ds_timeseries(): 60 | # Create a dummy dataset 61 | vals_ts = np.random.randn(3, 3) 62 | time = pd.to_datetime(['1984-10-14 00:00:00', '1984-10-14 01:00:00', '1984-10-14 02:00:00']) 63 | location = ['loc1', 'loc2', 'loc3'] 64 | 65 | ds_ts = xr.Dataset({'var1': (('location', 'Time'), vals_ts)}, 66 | {'Time': time, 67 | 'location': location}) 68 | 69 | return ds_ts 70 | 71 | 72 | @pytest.fixture(scope='function') 73 | def domain_dir(tmpdir, ds_1d): 74 | domain_top_dir_path = pathlib.Path(tmpdir).joinpath('example_case') 75 | domain_dir_path = domain_top_dir_path.joinpath('NWM/DOMAIN') 76 | restart_dir_path = domain_top_dir_path.joinpath('NWM/RESTART') 77 | forcing_dir_path = domain_top_dir_path.joinpath('FORCING') 78 | 79 | domain_top_dir_path.mkdir(parents=True) 80 | domain_dir_path.mkdir(parents=True) 81 | restart_dir_path.mkdir(parents=True) 82 | forcing_dir_path.mkdir(parents=True) 83 | 84 | # Make a list of DOMAIN filenames to create 85 | domain_file_names = ['Fulldom_hires.nc', 86 | 'Route_Link.nc', 87 | 'soil_properties.nc', 88 | 'GEOGRID_LDASOUT_Spatial_Metadata.nc', 89 | 'geo_em.d01.nc', 90 | 'spatialweights.nc', 91 | 'GWBUCKPARM.nc', 92 | 'hydro2dtbl.nc', 93 | 'wrfinput_d01.nc', 94 | 'LAKEPARM.nc', 95 | 'nudgingParams.nc'] 96 | for file in domain_file_names: 97 | file_path = domain_dir_path.joinpath(file) 98 | ds_1d.to_netcdf(str(file_path)) 99 | 100 | # Make restart files 101 | restart_file_names = ['HYDRO_RST.2011-08-26_00:00_DOMAIN1', 102 | 'nudgingLastObs.2011-08-26_00:00:00.nc', 103 | 'RESTART.2011082600_DOMAIN1'] 104 | 105 | for file in restart_file_names: 106 | file_path = restart_dir_path.joinpath(file) 107 | ds_1d.to_netcdf(str(file_path)) 108 | 109 | # Make forcing files 110 | forcing_file_names = ['2011082600.LDASIN_DOMAIN1', 111 | '2011082601.LDASIN_DOMAIN1', 112 | '2011082602.LDASIN_DOMAIN1'] 113 | 114 | for file in forcing_file_names: 115 | file_path = forcing_dir_path.joinpath(file) 116 | ds_1d.to_netcdf(str(file_path)) 117 | 118 | # Make namelist patch files 119 | hrldas_namelist = { 120 | "base": { 121 | "noahlsm_offline": { 122 | "hrldas_setup_file": "./NWM/DOMAIN/wrfinput_d01.nc", 123 | "restart_filename_requested": "./NWM/RESTART/RESTART.2011082600_DOMAIN1", 124 | "indir": "./FORCING", 125 | }, 126 | "wrf_hydro_offline": { 127 | "forc_typ": 1 128 | } 129 | }, 130 | "nwm_ana": { 131 | "noahlsm_offline": {}, 132 | "wrf_hydro_offline": { 133 | "forc_typ": 1 134 | } 135 | } 136 | } 137 | 138 | hydro_namelist = { 139 | "base": { 140 | "hydro_nlist": { 141 | "geo_static_flnm": "./NWM/DOMAIN/geo_em.d01.nc", 142 | "restart_file": "./NWM/RESTART/HYDRO_RST.2011-08-26_00:00_DOMAIN1", 143 | "aggfactrt": 4, 144 | "udmp_opt": 1, 145 | }, 146 | "nudging_nlist": { 147 | "nudginglastobsfile": "./NWM/RESTART/nudgingLastObs.2011-08-26_00:00:00.nc" 148 | } 149 | }, 150 | 151 | "nwm_ana": { 152 | "hydro_nlist": {}, 153 | "nudging_nlist": {} 154 | } 155 | } 156 | 157 | json.dump( 158 | hrldas_namelist, 159 | domain_top_dir_path.joinpath('hrldas_namelist_patches.json').open('w') 160 | ) 161 | 162 | json.dump( 163 | hydro_namelist, 164 | domain_top_dir_path.joinpath('hydro_namelist_patches.json').open('w') 165 | ) 166 | 167 | return domain_top_dir_path 168 | 169 | 170 | @pytest.fixture(scope='function') 171 | def model_dir(tmpdir): 172 | 173 | model_dir_path = pathlib.Path(tmpdir).joinpath('wrf_hydro_nwm_public/src') 174 | model_dir_path.mkdir(parents=True) 175 | 176 | # Make namelist files 177 | hrldas_namelist = { 178 | "base": { 179 | "noahlsm_offline": { 180 | "btr_option": 1, 181 | "canopy_stomatal_resistance_option": 1, 182 | 'restart_frequency_hours': 24, 183 | 'output_timestep': 86400 184 | }, 185 | "wrf_hydro_offline": { 186 | "forc_typ": "NULL_specified_in_domain.json" 187 | } 188 | }, 189 | "nwm_ana": { 190 | "noahlsm_offline": {}, 191 | "wrf_hydro_offline": {} 192 | } 193 | } 194 | 195 | hydro_namelist = { 196 | "base": { 197 | "hydro_nlist": { 198 | "channel_option": 2, 199 | "chanobs_domain": 0, 200 | "chanrtswcrt": 1, 201 | "chrtout_domain": 1, 202 | 'rst_dt': 1440, 203 | 'out_dt': 1440 204 | }, 205 | "nudging_nlist": { 206 | "maxagepairsbiaspersist": 3, 207 | "minnumpairsbiaspersist": 1, 208 | } 209 | }, 210 | 211 | "nwm_ana": { 212 | "hydro_nlist": {}, 213 | "nudging_nlist": {} 214 | } 215 | } 216 | 217 | json.dump( 218 | hrldas_namelist, 219 | model_dir_path.joinpath('hrldas_namelists.json').open('w') 220 | ) 221 | 222 | json.dump( 223 | hydro_namelist, 224 | model_dir_path.joinpath('hydro_namelists.json').open('w') 225 | ) 226 | 227 | compile_options = { 228 | "nwm": { 229 | "WRF_HYDRO": 1, 230 | "HYDRO_D": 0, 231 | "SPATIAL_SOIL": 1, 232 | "WRF_HYDRO_RAPID": 0, 233 | "WRFIO_NCD_LARGE_FILE_SUPPORT": 1, 234 | "NCEP_WCOSS": 0, 235 | "WRF_HYDRO_NUDGING": 1 236 | } 237 | } 238 | 239 | json.dump( 240 | compile_options, 241 | model_dir_path.joinpath('compile_options.json').open('w') 242 | ) 243 | 244 | with model_dir_path.joinpath('.version').open('w') as f: 245 | f.write('v5.1.0') 246 | 247 | with model_dir_path.joinpath('configure').open('w') as f: 248 | f.write('# dummy configure \n') 249 | 250 | # Arugments passed to wrf_hydro.exe are echoed to diag_hydro.00000. 251 | dummy_compile = ( 252 | "#!/bin/bash \n" 253 | "# dummy compile \n" 254 | "mkdir Run \n" 255 | "echo '#!/bin/bash \n" 256 | "echo $@ > diag_hydro.00000\n" 257 | "echo \'The model finished successfully.......\' >> diag_hydro.00000\n" 258 | "exit 0' > Run/wrf_hydro.exe\n" 259 | "touch Run/DUMMY.TBL \n" 260 | ) 261 | with model_dir_path.joinpath('./compile_offline_NoahMP.sh').open('w') as f: 262 | f.write(dummy_compile) 263 | 264 | subprocess.run(['chmod', '-R', '755', str(model_dir_path)]) 265 | 266 | return model_dir_path 267 | 268 | 269 | @pytest.fixture(scope='function') 270 | def compile_dir(tmpdir): 271 | compile_dir = pathlib.Path(tmpdir).joinpath('compile_dir') 272 | compile_dir.mkdir(parents=True) 273 | 274 | # Set table files and exe file attributes 275 | table_files = [compile_dir.joinpath('file1.tbl'), compile_dir.joinpath('file2.tbl')] 276 | wrf_hydro_exe = compile_dir.joinpath('wrf_hydro.exe') 277 | 278 | # Make fake run directory with files that would have been produced at compile 279 | with wrf_hydro_exe.open('w') as f: 280 | f.write('#dummy exe file') 281 | 282 | for file in table_files: 283 | with file.open('w') as f: 284 | f.write('#dummy table file') 285 | 286 | return compile_dir 287 | 288 | 289 | @pytest.fixture(scope='function') 290 | def model(model_dir): 291 | model = Model( 292 | source_dir=model_dir, 293 | model_config='nwm_ana' 294 | ) 295 | return model 296 | 297 | 298 | @pytest.fixture(scope='function') 299 | def domain(domain_dir): 300 | domain = Domain( 301 | domain_top_dir=domain_dir, 302 | domain_config='nwm_ana', 303 | compatible_version='v5.1.0' 304 | ) 305 | return domain 306 | 307 | 308 | @pytest.fixture(scope='function') 309 | # TODO: this should be changed to job_cold_start 310 | def job(): 311 | job = Job( 312 | job_id='test_job_1', 313 | model_start_time='1984-10-14', 314 | model_end_time='2017-01-04', 315 | restart=False, 316 | exe_cmd='./wrf_hydro.exe', 317 | entry_cmd='bogus entry cmd', 318 | exit_cmd='bogus exit cmd' 319 | ) 320 | return job 321 | 322 | 323 | @pytest.fixture(scope='function') 324 | def job_restart(): 325 | job = Job( 326 | job_id='test_job_1', 327 | model_start_time='1984-10-14', 328 | model_end_time='2017-01-04', 329 | restart=True, 330 | restart_file_time='2013-10-13', 331 | exe_cmd='./wrf_hydro.exe', 332 | entry_cmd='bogus entry cmd', 333 | exit_cmd='bogus exit cmd' 334 | ) 335 | return job 336 | 337 | 338 | @pytest.fixture(scope='function') 339 | def scheduler(): 340 | scheduler = PBSCheyenne(account='fake_acct', 341 | email_who='elmo', 342 | email_when='abe', 343 | nproc=216, 344 | nnodes=6, 345 | ppn=None, 346 | queue='regular', 347 | walltime="12:00:00") 348 | return scheduler 349 | 350 | 351 | @pytest.fixture(scope='function') 352 | def sim_output(tmpdir, ds_1d, ds_1d_has_nans, ds_2d): 353 | 354 | tmpdir = pathlib.Path(tmpdir) 355 | sim_out_dir = tmpdir.joinpath('sim_out') 356 | 357 | sim_out_dir.mkdir(parents=True) 358 | 359 | # Make a list of DOMAIN filenames to create 360 | file_names = [ 361 | 'CHRTOUT_DOMAIN1_TEST', 362 | 'CHRTOUT_GRID1_TEST' 363 | 'CHANOBS_TEST', 364 | 'LAKEOUT_TEST', 365 | 'HYDRO_RST_TEST', 366 | 'RESTART_TEST', 367 | 'nudgingLastObs_TEST', 368 | '.RTOUT_', 369 | 'LDASOUT' 370 | ] 371 | 372 | for counter in range(3): 373 | for file in file_names: 374 | filename = file + '_' + str(counter) 375 | file_path = sim_out_dir.joinpath(filename) 376 | ds_2d.to_netcdf(str(file_path)) 377 | 378 | for counter in range(3): 379 | filename = 'GWOUT_' + str(counter) 380 | file_path = sim_out_dir.joinpath(filename) 381 | ds_1d_has_nans.to_netcdf(str(file_path)) 382 | 383 | return sim_out_dir 384 | -------------------------------------------------------------------------------- /wrfhydropy/tests/data/.gitignore: -------------------------------------------------------------------------------- 1 | collection_data/ 2 | collection_data.tar.gz -------------------------------------------------------------------------------- /wrfhydropy/tests/data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NCAR/wrf_hydro_py/003f73f7efb2d38114794ae14daac702e59edc24/wrfhydropy/tests/data/__init__.py -------------------------------------------------------------------------------- /wrfhydropy/tests/data/collection_data_download.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pathlib 3 | import shutil 4 | from .gdrive_download import download_file_from_google_drive, untar 5 | from wrfhydropy.core.ioutils import md5 6 | 7 | 8 | def download(version='latest'): 9 | orig_dir = pathlib.Path('.').resolve() 10 | data_dir = os.path.dirname(os.path.realpath(__file__)) 11 | os.chdir(data_dir) 12 | id_md5_dict = { 13 | 'latest': { 14 | 'id': '1VrWVve8fhYobDg2xDrgHfiAi7VBDmV9T', 15 | 'md5': '51847a29eaeea0377bfece7ea662500e' 16 | } 17 | } 18 | id = id_md5_dict[version]['id'] 19 | the_md5 = id_md5_dict[version]['md5'] 20 | target_name = pathlib.Path('collection_data.tar.gz') 21 | if target_name.exists() and md5(target_name) == the_md5: 22 | if not pathlib.Path('collection_data').exists(): 23 | untar(str(target_name)) 24 | os.chdir(orig_dir) 25 | return None 26 | if target_name.exists(): 27 | target_name.unlink() 28 | if pathlib.Path('collection_data').exists(): 29 | shutil.rmtree('collection_data') 30 | download_file_from_google_drive(id, str(target_name)) 31 | untar(str(target_name)) 32 | os.chdir(orig_dir) 33 | 34 | 35 | if __name__ == "__main__": 36 | download() 37 | -------------------------------------------------------------------------------- /wrfhydropy/tests/data/collection_data_recipe.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | import datetime 5 | import os 6 | import pathlib 7 | import pickle 8 | import pywrfhydro 9 | import sys 10 | import wrfhydropy 11 | import xarray as xa 12 | 13 | # Configuration 14 | scratch_dir = pathlib.Path('/glade/scratch/jamesmcc/') 15 | work_dir = pathlib.Path('/glade/work/jamesmcc/') 16 | home_dir = pathlib.Path('/glade/u/home/jamesmcc/') 17 | 18 | experiment_dir = scratch_dir / 'ens_cycle_example' 19 | 20 | domain_dir = experiment_dir / 'croton_NY' 21 | model_dir = home_dir / 'WRF_Hydro/wrf_hydro_nwm_public' 22 | compile_dir = experiment_dir / 'compile' 23 | 24 | configuration = 'nwm_ana' 25 | 26 | ens_routelink_dir = experiment_dir / 'routelink_ens' 27 | sim_dir = experiment_dir / 'sim' 28 | ens_dir = experiment_dir / "ens_sim" 29 | ens_ana_dir = experiment_dir / "ens_ana" 30 | 31 | 32 | # ## Data 33 | 34 | # Set up the experiment directory and pull the croton domain: 35 | if not experiment_dir.exists(): 36 | os.mkdir(experiment_dir) 37 | 38 | # This will hang/fail on a cheyenne compute node... 39 | if not domain_dir.exists(): 40 | file_id = "1xFYB--zm9f8bFHESzgP5X5i7sZryQzJe" 41 | download_script = model_dir / 'tests/local/utils/gdrive_download.py' 42 | function_name = "download_file_from_google_drive" 43 | sys.path.insert(0, str(download_script.parent)) 44 | download_file_from_google_drive = getattr( 45 | __import__(str(download_script.stem), fromlist=[function_name]), 46 | function_name 47 | ) 48 | download_file_from_google_drive(file_id, str(experiment_dir / 'croton_NY.tar.gz')) 49 | 50 | get_ipython().run_cell_magic( 51 | 'bash', 52 | '', 53 | 'cd /glade/scratch/jamesmcc/ens_cycle_example/ ;\n' + 54 | 'tar xzf croton_NY.tar.gz ;\n' + 55 | 'mv example_case croton_NY' 56 | ) 57 | 58 | 59 | # ## Building Blocks 60 | # ### Domain 61 | 62 | domain = wrfhydropy.Domain( 63 | domain_top_dir=domain_dir, 64 | domain_config=configuration 65 | ) 66 | 67 | 68 | # ### Model 69 | 70 | model = wrfhydropy.Model( 71 | source_dir=model_dir / 'src', 72 | model_config=configuration, 73 | compiler='ifort' 74 | ) 75 | 76 | 77 | model_pkl = compile_dir / 'WrfHydroModel.pkl' 78 | if not model_pkl.exists(): 79 | model.compile(compile_dir) 80 | else: 81 | model = pickle.load(model_pkl.open('rb')) 82 | 83 | 84 | # ### Job 85 | 86 | model_start_time = datetime.datetime(2018, 8, 1, 0) 87 | model_end_time = model_start_time + datetime.timedelta(hours=2) 88 | job = wrfhydropy.Job( 89 | job_id='flo_sim', 90 | model_start_time=model_start_time, 91 | model_end_time=model_end_time, 92 | output_freq_hr=1, 93 | restart_freq_hr=1, 94 | exe_cmd='mpirun -np 1 ./wrf_hydro.exe' 95 | ) 96 | 97 | 98 | # ### Simulation 99 | 100 | sim = wrfhydropy.Simulation() 101 | sim.add(domain) 102 | sim.add(model) 103 | sim.add(job) 104 | 105 | # ### Ensemble 106 | 107 | ens = wrfhydropy.EnsembleSimulation() 108 | ens.add(sim) 109 | ens.add(job) 110 | ens.replicate_member(3) 111 | 112 | 113 | # #### Routelink ensemble 114 | 115 | rl_file = domain_dir / 'NWM/DOMAIN/Route_Link.nc' 116 | routelink = xa.open_dataset(rl_file) 117 | mannings_n = routelink['n'] 118 | 119 | if not ens_routelink_dir.exists(): 120 | ens_routelink_dir.mkdir(parents=True) 121 | deltas = [.3, 1.0, 1.7] 122 | for delta in deltas: 123 | out_file = ens_routelink_dir / ('Route_Link_edit_' + str(delta) + '.nc') 124 | values_dict = {'n': mannings_n + delta} 125 | result = pywrfhydro.routelink_edit(values_df=values_dict, in_file=rl_file, out_file=out_file) 126 | print(result) 127 | routelink_files = [str(ff) for ff in sorted(ens_routelink_dir.glob("Route_Link*.nc"))] 128 | print(routelink_files) 129 | ens.set_member_diffs( 130 | att_tuple=('base_hydro_namelist', 'hydro_nlist', 'route_link_f'), 131 | values=routelink_files 132 | ) 133 | 134 | ens.member_diffs 135 | 136 | 137 | # ## Ensemble Cycle 138 | 139 | init_times = [ 140 | datetime.datetime(2011, 8, 26, 0), 141 | datetime.datetime(2011, 8, 26, 1), 142 | datetime.datetime(2011, 8, 26, 2), 143 | datetime.datetime(2011, 8, 26, 3) 144 | ] 145 | n_members = len(ens) 146 | # Look back units are in hours, not casts. 147 | restart_dirs = [['.'] * n_members, [-1] * n_members, ['-1'] * n_members, ['-1'] * n_members] 148 | 149 | ens_ana = wrfhydropy.CycleSimulation( 150 | init_times=init_times, 151 | restart_dirs=restart_dirs, 152 | ncores=1 153 | ) 154 | 155 | ens_ana.add(ens) 156 | ens_ana.add(job) 157 | 158 | if not ens_ana_dir.exists(): 159 | os.mkdir(ens_ana_dir) 160 | os.chdir(ens_ana_dir) 161 | ens_ana.compose() 162 | return_code = ens_ana.run(n_concurrent=1) 163 | 164 | print(return_code) 165 | 166 | 167 | # ## Wrap up 168 | # Clean up unnecessary items in the experiment directory. Then package it up. 169 | 170 | # Resolve all symlinks to be relative symlinks. 171 | top = experiment_dir.resolve() 172 | files = top.glob('**/*') 173 | links = [ff for ff in files if ff.is_symlink()] 174 | for ll in links: 175 | target = os.path.relpath(str(ll.resolve()), start=str(ll.parent)) 176 | ll.unlink() 177 | ll.symlink_to(target) 178 | 179 | get_ipython().run_cell_magic( 180 | 'bash', 181 | '', 182 | 'cd /glade/scratch/jamesmcc/ens_cycle_example/\nrm croton_NY.tar.gz\n' 183 | 'rm compile/wrf_hydro.exe\n' 184 | 'rm ens_ana/cast_201108260*/member_*/HYDRO_RST.2011-08-26_00:00_DOMAIN1\n' 185 | 'rm ens_ana/cast_201108260*/member_*/RESTART.2011082600_DOMAIN1\n' 186 | 'rm ens_ana/cast_201108260*/member_*/nudgingLastObs.2011-08-26_00:00:00.nc\n' 187 | 'cd croton_NY\n' 188 | 'rm -rf Gridded Gridded_no_lakes/ Reach/ supplemental/\n' 189 | 'rm USGS_obs.csv Readme.txt study_map.PNG\n' 190 | 'rm hydro_namelist_patches.json hrldas_namelist_patches.json\n' 191 | 'rm example_case hydro_namelist_patches.json~ hrldas_namelist_patches.json~ \n' 192 | 'cd NWM\n' 193 | 'rm -rf DOMAIN_LR/ RESTART_LR/ referenceSim/\n' 194 | 'rm hydro.namelist namelist.hrldas \n' 195 | 'cd nudgingTimeSliceObs\n' 196 | 'rm 2011-09*.usgsTimeSlice.ncdf 2011-08-3*.usgsTimeSlice.ncdf \n' 197 | 'rm 2011-08-2[7-9]*.usgsTimeSlice.ncdf \n' 198 | 'rm 2011-08-26_[1-2]*.usgsTimeSlice.ncdf 2011-08-26_0[6-9]*.usgsTimeSlice.ncdf \n' 199 | 'rm 2011-08-25*.usgsTimeSlice.ncdf\n' 200 | 'cd ../../FORCING/\n' 201 | 'rm 201109*LDASIN_DOMAIN1 2011083*.LDASIN_DOMAIN1 2011082[7-9]*.LDASIN_DOMAIN1\n' 202 | 'rm 20110826[1-2]*.LDASIN_DOMAIN1 201108260[6-9]*.LDASIN_DOMAIN1\n' 203 | 'rm 2011082600.LDASIN_DOMAIN1' 204 | ) 205 | 206 | get_ipython().run_cell_magic( 207 | 'bash', 208 | '', 209 | 'cd /glade/scratch/jamesmcc/\n' 210 | 'mv ens_cycle_example collection_data\n' 211 | 'tar czf collection_data.tar.gz collection_data' 212 | ) 213 | -------------------------------------------------------------------------------- /wrfhydropy/tests/data/gdrive_download.py: -------------------------------------------------------------------------------- 1 | from argparse import ArgumentParser 2 | import requests 3 | import tarfile 4 | 5 | 6 | def download_file_from_google_drive(id, destination): 7 | print('downloading google drive file id ' + id + ' to ' + destination) 8 | URL = "https://docs.google.com/uc?export=download" 9 | 10 | session = requests.Session() 11 | 12 | response = session.get(URL, params={'id': id}, stream=True) 13 | token = get_confirm_token(response) 14 | 15 | if token: 16 | params = {'id': id, 'confirm': token} 17 | response = session.get(URL, params=params, stream=True) 18 | 19 | save_response_content(response, destination) 20 | 21 | 22 | def get_confirm_token(response): 23 | for key, value in response.cookies.items(): 24 | if key.startswith('download_warning'): 25 | return value 26 | 27 | return None 28 | 29 | 30 | def save_response_content(response, destination): 31 | CHUNK_SIZE = 32768 32 | 33 | with open(destination, "wb") as f: 34 | for chunk in response.iter_content(CHUNK_SIZE): 35 | if chunk: # filter out keep-alive new chunks 36 | f.write(chunk) 37 | 38 | 39 | def untar(fname): 40 | if (fname.endswith("tar.gz")): 41 | tar = tarfile.open(fname, "r:gz") 42 | tar.extractall() 43 | tar.close() 44 | elif (fname.endswith("tar")): 45 | tar = tarfile.open(fname, "r:") 46 | tar.extractall() 47 | tar.close() 48 | 49 | 50 | def main(): 51 | 52 | parser = ArgumentParser() 53 | parser.add_argument("--file_id", 54 | dest="file_id", 55 | help="Google drive file ID. Get from shareable link") 56 | parser.add_argument("--dest_file", 57 | dest="dest_file", 58 | help="Full path including filename for downloaded file.") 59 | 60 | args = parser.parse_args() 61 | file_id = args.file_id 62 | dest_file = args.dest_file 63 | 64 | download_file_from_google_drive(file_id, dest_file) 65 | 66 | 67 | if __name__ == "__main__": 68 | main() 69 | -------------------------------------------------------------------------------- /wrfhydropy/tests/data/nan_na_data/fill_value.nc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NCAR/wrf_hydro_py/003f73f7efb2d38114794ae14daac702e59edc24/wrfhydropy/tests/data/nan_na_data/fill_value.nc -------------------------------------------------------------------------------- /wrfhydropy/tests/data/nan_na_data/nan_fill.nc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NCAR/wrf_hydro_py/003f73f7efb2d38114794ae14daac702e59edc24/wrfhydropy/tests/data/nan_na_data/nan_fill.nc -------------------------------------------------------------------------------- /wrfhydropy/tests/data/nan_na_data/nan_value.nc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NCAR/wrf_hydro_py/003f73f7efb2d38114794ae14daac702e59edc24/wrfhydropy/tests/data/nan_na_data/nan_value.nc -------------------------------------------------------------------------------- /wrfhydropy/tests/data/nan_na_data/value_value.nc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NCAR/wrf_hydro_py/003f73f7efb2d38114794ae14daac702e59edc24/wrfhydropy/tests/data/nan_na_data/value_value.nc -------------------------------------------------------------------------------- /wrfhydropy/tests/data/nan_na_files_recipe.py: -------------------------------------------------------------------------------- 1 | # Create the data necessary for testing if netcdf files contain nans 2 | # as distinct from the fill value. 3 | # This creates the following files 4 | # nan_na_data/fill_value.nc 5 | # nan_na_data/nan_fill.nc 6 | # nan_na_data/nan_value.nc 7 | # nan_na_data/value_value.nc 8 | # and runs the basic test to show that these are the right files for the job. 9 | # These files could be created on the fly by the tests, but it's kinda "6-of-one". 10 | 11 | import xarray as xr 12 | import numpy as np 13 | 14 | the_nan = float('nan') 15 | the_fill = -9999.0 16 | the_value = 0.0 17 | 18 | all_combos = { 19 | 'value_value': [the_value, the_value], 20 | 'nan_value': [the_nan, the_value], 21 | 'fill_value': [the_fill, the_value], 22 | 'nan_fill': [the_nan, the_fill] 23 | } 24 | 25 | for name, value in all_combos.items(): 26 | ds = xr.Dataset() 27 | da = xr.DataArray( 28 | np.array(value), 29 | coords=[np.array([0, 1])], 30 | dims='dim' 31 | ) 32 | ds['some_var'] = da # np.array(value) 33 | ds.encoding = {'_FillValue': the_fill} 34 | ds.reset_coords('some_var') 35 | the_file = 'nan_na_data/' + name + '.nc' 36 | ds.to_netcdf(the_file) 37 | # This is just an xarray based check. 38 | ds_in = xr.open_dataset(the_file, mask_and_scale=False) 39 | print('') 40 | print(name) 41 | print(ds_in) 42 | -------------------------------------------------------------------------------- /wrfhydropy/tests/data/nodefile_pbs_example_copy.txt: -------------------------------------------------------------------------------- 1 | r10i1n1.ib0.cheyenne.ucar.edu 2 | r10i1n1.ib0.cheyenne.ucar.edu 3 | r10i1n2.ib0.cheyenne.ucar.edu 4 | r10i1n2.ib0.cheyenne.ucar.edu 5 | r10i1n3.ib0.cheyenne.ucar.edu 6 | r10i1n3.ib0.cheyenne.ucar.edu 7 | -------------------------------------------------------------------------------- /wrfhydropy/tests/test_collection.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pathlib 3 | import pytest 4 | import shutil 5 | import xarray as xr 6 | from wrfhydropy import open_whp_dataset 7 | from .data import collection_data_download 8 | 9 | test_dir = pathlib.Path(os.path.dirname(os.path.realpath(__file__))) 10 | # The collection_data gets wiped... 11 | answer_dir = test_dir / 'data/collection_data/test_answers' 12 | 13 | os.chdir(str(test_dir)) 14 | # The data are found here. 15 | collection_data_download.download() 16 | 17 | # Issues raised by these tests 18 | # https://github.com/NCAR/wrf_hydro_nwm_public/issues/301 19 | # Make an issue: The restart files should have reference time and time just like the other files. 20 | 21 | # TODO: Test multiple versions (current and previous) 22 | version_file = test_dir.joinpath('data/collection_data/croton_NY/.version') 23 | version = version_file.open('r').read().split('-')[0] 24 | 25 | # Simulation 26 | # Make a sim dir to a single simulation. 27 | sim_dir = test_dir / 'data/collection_data/simulation' 28 | if sim_dir.is_symlink(): 29 | sim_dir.unlink() 30 | sim_dir.symlink_to(test_dir / 'data/collection_data/ens_ana/cast_2011082600/member_000') 31 | 32 | 33 | @pytest.mark.parametrize( 34 | ['file_glob', 'ans_file', 'n_cores'], 35 | [ 36 | ('*CHRTOUT_DOMAIN1', version + '/simulation/CHRTOUT.nc', 1), 37 | ('*LAKEOUT_DOMAIN1', version + '/simulation/LAKEOUT.nc', 1), 38 | ('*CHANOBS_DOMAIN1', version + '/simulation/CHANOBS.nc', 1), 39 | ('*GWOUT_DOMAIN1', version + '/simulation/GWOUT.nc', 1), 40 | ('*[0-9].RTOUT_DOMAIN1', version + '/simulation/RTOUT.nc', 2), 41 | ('*LDASOUT_DOMAIN1', version + '/simulation/LDASOUT.nc', 3), 42 | ('*LSMOUT_DOMAIN', version + '/simulation/LSMOUT.nc', 2), 43 | ('RESTART.*_DOMAIN1', version + '/simulation/RESTART.nc', 2), 44 | ('HYDRO_RST.*_DOMAIN1', version + '/simulation/HYDRO_RST.nc', 3), 45 | ], 46 | ids=[ 47 | 'simulation-CHRTOUT_DOMAIN1', 48 | 'simulation-LAKEOUT_DOMAIN1', 49 | 'simulation-CHANOBS_DOMAIN1', 50 | 'simulation-GWOUT_DOMAIN1', 51 | 'simulation-RTOUT_DOMAIN1', 52 | 'simulation-LDASOUT_DOMAIN1', 53 | 'simulation-LSMOUT_DOMAIN', 54 | 'simulation-RESTART.*_DOMAIN1', 55 | 'simulation-HYDRO_RST.*_DOMAIN1' 56 | ] 57 | ) 58 | def test_collect_simulation( 59 | file_glob, 60 | ans_file, 61 | n_cores 62 | ): 63 | sim_path = test_dir.joinpath(sim_dir) 64 | files = sorted(sim_path.glob(file_glob)) 65 | sim_ds = open_whp_dataset(files, n_cores=n_cores) 66 | ans = xr.open_dataset(answer_dir / ans_file) 67 | xr.testing.assert_equal(sim_ds, ans) 68 | 69 | 70 | # Cycle 71 | # Make a cycle dir and set it up from the ensemble cycle. 72 | cycle_dir = test_dir / 'data/collection_data/cycle' 73 | # delete the directory here. 74 | if cycle_dir.exists(): 75 | shutil.rmtree(str(cycle_dir)) 76 | cycle_dir.mkdir() 77 | os.chdir(str(cycle_dir)) 78 | cycle_dir.joinpath('WrfHydroCycle.pkl').symlink_to( 79 | test_dir.joinpath('data/collection_data/ens_ana/WrfHydroCycle.pkl') 80 | ) 81 | for cast in test_dir.joinpath('data/collection_data/ens_ana').glob('cast_*'): 82 | cast_name = pathlib.Path(cast.name) 83 | cast_name.symlink_to(cast.joinpath('member_000')) 84 | 85 | 86 | @pytest.mark.parametrize( 87 | ['file_glob', 'ans_file', 'n_cores'], 88 | [ 89 | ('*/*CHRTOUT_DOMAIN1', version + '/cycle/CHRTOUT.nc', 1), 90 | ('*/*LAKEOUT_DOMAIN1', version + '/cycle/LAKEOUT.nc', 1), 91 | ('*/*CHANOBS_DOMAIN1', version + '/cycle/CHANOBS.nc', 1), 92 | ('*/*GWOUT_DOMAIN1', version + '/cycle/GWOUT.nc', 1), 93 | ('*/*[0-9].RTOUT_DOMAIN1', version + '/cycle/RTOUT.nc', 2), 94 | ('*/*LDASOUT_DOMAIN1', version + '/cycle/LDASOUT.nc', 3), 95 | ('*/*LSMOUT_DOMAIN', version + '/cycle/LSMOUT.nc', 2), 96 | ('*/RESTART.*DOMAIN1', version + '/cycle/RESTART.nc', 3), 97 | ('*/HYDRO_RST.*DOMAIN1', version + '/cycle/HYDRO_RST.nc', 3), 98 | ], 99 | ids=[ 100 | 'cycle-CHRTOUT_DOMAIN1', 101 | 'cycle-LAKEOUT_DOMAIN1', 102 | 'cycle-CHANOBS_DOMAIN1', 103 | 'cycle-GWOUT_DOMAIN1', 104 | 'cycle-RTOUT_DOMAIN1', 105 | 'cycle-LDASOUT_DOMAIN1', 106 | 'cycle-LSMOUT_DOMAIN', 107 | 'cycle-RESTART.*_DOMAIN1', 108 | 'cycle-HYDRO_RST.*_DOMAIN1' 109 | ] 110 | ) 111 | def test_collect_cycle( 112 | file_glob, 113 | ans_file, 114 | n_cores 115 | ): 116 | cycle_path = test_dir.joinpath(cycle_dir) 117 | files = sorted(cycle_path.glob(file_glob)) 118 | cycle_ds = open_whp_dataset(files, n_cores=n_cores) 119 | ans = xr.open_dataset(answer_dir / ans_file) 120 | xr.testing.assert_equal(cycle_ds, ans) 121 | 122 | 123 | # Ensemble 124 | # Make an ensemble dir and set it up from the ensemble cycle. 125 | ens_dir = test_dir / 'data/collection_data/ensemble' 126 | # delete the directory here. 127 | if ens_dir.is_symlink(): 128 | ens_dir.unlink() 129 | ens_dir.symlink_to(test_dir / 'data/collection_data/ens_ana/cast_2011082600') 130 | 131 | 132 | @pytest.mark.parametrize( 133 | ['file_glob', 'ans_file', 'n_cores'], 134 | [ 135 | ('*/*CHRTOUT_DOMAIN1', version + '/ensemble/CHRTOUT.nc', 1), 136 | ('*/*LAKEOUT_DOMAIN1', version + '/ensemble/LAKEOUT.nc', 1), 137 | ('*/*CHANOBS_DOMAIN1', version + '/ensemble/CHANOBS.nc', 1), 138 | ('*/*GWOUT_DOMAIN1', version + '/ensemble/GWOUT.nc', 1), 139 | ('*/*[0-9].RTOUT_DOMAIN1', version + '/ensemble/RTOUT.nc', 2), 140 | ('*/*LDASOUT_DOMAIN1', version + '/ensemble/LDASOUT.nc', 3), 141 | ('*/*LSMOUT_DOMAIN', version + '/ensemble/LSMOUT_DOMAIN', 2), 142 | ('*/RESTART.*_DOMAIN1', version + '/ensemble/RESTART.nc', 3), 143 | ('*/HYDRO_RST.*_DOMAIN1', version + '/ensemble/HYDRO_RST.nc', 3), 144 | ], 145 | ids=[ 146 | 'ensemble-CHRTOUT_DOMAIN1', 147 | 'ensemble-LAKEOUT_DOMAIN1', 148 | 'ensemble-CHANOBS_DOMAIN1', 149 | 'ensemble-GWOUT_DOMAIN1', 150 | 'ensemble-RTOUT_DOMAIN1', 151 | 'ensemble-LDASOUT_DOMAIN1', 152 | 'ensemble-LSMOUT_DOMAIN', 153 | 'ensemble-RESTART.*_DOMAIN1', 154 | 'ensemble-HYDRO_RST.*_DOMAIN1' 155 | ] 156 | ) 157 | def test_collect_ensemble( 158 | file_glob, 159 | ans_file, 160 | n_cores 161 | ): 162 | ens_path = test_dir.joinpath(ens_dir) 163 | files = sorted(ens_path.glob(file_glob)) 164 | ens_ds = open_whp_dataset(files, n_cores=n_cores) 165 | ans = xr.open_dataset(answer_dir / ans_file) 166 | xr.testing.assert_equal(ens_ds, ans) 167 | 168 | 169 | # Ensemble Cycle 170 | @pytest.mark.parametrize( 171 | ['file_glob', 'ans_file', 'n_cores'], 172 | [ 173 | ( 174 | '*/*/*CHRTOUT_DOMAIN1', 175 | version + '/ensemble_cycle/CHRTOUT.nc', 176 | 1 177 | ), 178 | ( 179 | '*/*/*LAKEOUT_DOMAIN1', 180 | version + '/ensemble_cycle/LAKEOUT.nc', 181 | 2 182 | ), 183 | ( 184 | '*/*/*CHANOBS_DOMAIN1', 185 | version + '/ensemble_cycle/CHANOBS.nc', 186 | 1 187 | ), 188 | ( 189 | '*/*/*GWOUT_DOMAIN1', 190 | version + '/ensemble_cycle/GWOUT.nc', 191 | 1 192 | ), 193 | ( 194 | '*/*/*[0-9].RTOUT_DOMAIN1', 195 | version + '/ensemble_cycle/RTOUT.nc', 196 | 1), 197 | ( 198 | '*/*/*LDASOUT_DOMAIN1', 199 | version + '/ensemble_cycle/LDASOUT.nc', 200 | 3 201 | ), 202 | ( 203 | '*/*/*LSMOUT_DOMAIN', 204 | version + '/ensemble_cycle/LSMOUT.nc', 205 | 2 206 | ), 207 | ( 208 | '*/*/RESTART.*_DOMAIN1', 209 | version + '/ensemble_cycle/RESTART.nc', 210 | 3 211 | ), 212 | ( 213 | '*/*/HYDRO_RST.*_DOMAIN1', 214 | version + '/ensemble_cycle/HYDRO_RST.nc', 215 | 3 216 | ), 217 | ], 218 | ids=[ 219 | 'ensemble_cycle-CHRTOUT_DOMAIN1', 220 | 'ensemble_cycle-LAKEOUT_DOMAIN1', 221 | 'ensemble_cycle-CHANOBS_DOMAIN1', 222 | 'ensemble_cycle-GWOUT_DOMAIN1', 223 | 'ensemble_cycle-RTOUT_DOMAIN1', 224 | 'ensemble_cycle-LDASOUT_DOMAIN1', 225 | 'ensemble_cycle-LSMOUT_DOMAIN', 226 | 'ensemble_cycle-RESTART.*_DOMAIN1', 227 | 'ensemble_cycle-HYDRO_RST.*_DOMAIN1' 228 | ] 229 | ) 230 | def test_collect_ensemble_cycle( 231 | file_glob, 232 | ans_file, 233 | n_cores 234 | ): 235 | ens_cycle_path = test_dir.joinpath('data/collection_data/ens_ana') 236 | files = sorted(ens_cycle_path.glob(file_glob)) 237 | ens_cycle_ds = open_whp_dataset(files, n_cores=n_cores) 238 | ans = xr.open_dataset(answer_dir / ans_file) 239 | xr.testing.assert_equal(ens_cycle_ds, ans) 240 | 241 | # Test that hierarchical collects are identical 242 | # Speed up this super slow one... 243 | file_chunk_size = 1 244 | if file_glob == '*/*/*LDASOUT_DOMAIN1': 245 | file_chunk_size = 50 246 | ens_cycle_ds_chunk = open_whp_dataset( 247 | files, n_cores=n_cores, file_chunk_size=file_chunk_size) 248 | xr.testing.assert_equal(ens_cycle_ds_chunk, ens_cycle_ds) 249 | 250 | 251 | # Missing/bogus files. 252 | # Do this for ensemble cycle as that's the most complicated relationship to the missing file. 253 | miss_ens_cycle_dir = test_dir / 'data/collection_data/miss_ens_cycle' 254 | if miss_ens_cycle_dir.exists(): 255 | shutil.rmtree(str(miss_ens_cycle_dir)) 256 | miss_ens_cycle_dir.mkdir() 257 | os.chdir(str(miss_ens_cycle_dir)) 258 | orig_dir = test_dir / 'data/collection_data/ens_ana/' 259 | casts = sorted(orig_dir.glob('cast_*')) 260 | pkl_file = sorted(orig_dir.glob("*.pkl"))[0] 261 | pathlib.Path(pkl_file.name).symlink_to(pkl_file) 262 | for cc in casts: 263 | pathlib.Path(cc.name).symlink_to(cc) 264 | # Break the last one. 265 | pathlib.Path(cc.name).unlink() 266 | pathlib.Path(cc.name).mkdir() 267 | os.chdir(cc.name) 268 | member_dirs = \ 269 | sorted((test_dir / ('data/collection_data/ens_ana/' + cc.name)).glob('member_*')) 270 | for mm in member_dirs: 271 | pathlib.Path(mm.name).symlink_to(mm) 272 | # Break the last one. 273 | pathlib.Path(mm.name).unlink() 274 | pathlib.Path(mm.name).mkdir() 275 | orig_ens_dir = test_dir / ('data/collection_data/ens_ana/' + cc.name) 276 | orig_sim_dir = orig_ens_dir / mm.name 277 | pkl_file = sorted(orig_ens_dir.glob("*.pkl"))[0] 278 | pathlib.Path(pkl_file.name).symlink_to(pkl_file) 279 | os.chdir(mm.name) 280 | chrtout_files = sorted(orig_sim_dir.glob('*CHRTOUT*')) 281 | for cc in chrtout_files: 282 | pathlib.Path(cc.name).symlink_to(cc) 283 | pathlib.Path(cc.name).unlink() 284 | pathlib.Path(cc.name).symlink_to('/foo/bar') 285 | 286 | 287 | @pytest.mark.parametrize( 288 | ['file_glob', 'ans_file', 'n_cores'], 289 | [ 290 | ( 291 | '*/*/*CHRTOUT_DOMAIN1', 292 | version + '/missing_ens_cycle/CHRTOUT.nc', 293 | 1 294 | ), 295 | ( 296 | '*/*/RESTART.*_DOMAIN1', 297 | version + '/missing_ens_cycle/RESTART.nc', 298 | 2 299 | ), 300 | ( 301 | '*/*/HYDRO_RST.*_DOMAIN1', 302 | version + '/missing_ens_cycle/HYDRO_RST.nc', 303 | 3 304 | ) 305 | ], 306 | ids=[ 307 | 'missing_ens_cycle-CHRTOUT_DOMAIN1', 308 | 'missing_ens_cycle-RESTART.*_DOMAIN1', 309 | 'missing_ens_cycle-HYDRO_RST.*_DOMAIN1' 310 | ] 311 | ) 312 | def test_collect_missing_ens_cycle( 313 | file_glob, 314 | ans_file, 315 | n_cores 316 | ): 317 | miss_ens_cycle_path = test_dir.joinpath(miss_ens_cycle_dir) 318 | files = sorted(miss_ens_cycle_path.glob(file_glob)) 319 | ens_cycle_ds = open_whp_dataset(files, n_cores=n_cores) 320 | # There is a bit of tricky encoding to deal with NaN in strings in netcdf 321 | # and type conversions 322 | if 'crs' in ens_cycle_ds.variables: 323 | ens_cycle_ds['crs'] = ens_cycle_ds['crs'].astype('S8') 324 | ens_cycle_ds['crs'].encoding['_FillValue'] = 'nan' 325 | # This is mostly because int32 is changed to float64 bc of nans 326 | for vv in ens_cycle_ds.variables: 327 | if 'time' not in vv: 328 | ens_cycle_ds[vv].encoding['dtype'] = ens_cycle_ds[vv].dtype 329 | 330 | ans = xr.open_dataset(answer_dir / ans_file) 331 | xr.testing.assert_equal(ens_cycle_ds, ans) 332 | 333 | ens_cycle_ds_chunk = open_whp_dataset(files, n_cores=n_cores, file_chunk_size=1) 334 | if 'crs' in ens_cycle_ds.variables: 335 | ens_cycle_ds_chunk['crs'] = ens_cycle_ds_chunk['crs'].astype('S8') 336 | xr.testing.assert_equal(ens_cycle_ds_chunk, ens_cycle_ds) 337 | 338 | 339 | # Exercise profile and chunking. 340 | @pytest.mark.parametrize( 341 | ['file_glob', 'ans_file', 'n_cores'], 342 | [ 343 | ('*CHRTOUT_DOMAIN1', version + '/profile_chunking/CHRTOUT.nc', 1) 344 | ], 345 | ids=[ 346 | 'profile_chunking-CHRTOUT_DOMAIN1' 347 | ] 348 | ) 349 | def test_collect_profile_chunking( 350 | file_glob, 351 | ans_file, 352 | n_cores 353 | ): 354 | sim_path = test_dir.joinpath(sim_dir) 355 | files = sorted(sim_path.glob(file_glob)) 356 | sim_ds = open_whp_dataset(files, n_cores=n_cores, profile=True, chunks=15) 357 | ans = xr.open_dataset(answer_dir / ans_file) 358 | xr.testing.assert_equal(sim_ds, ans) 359 | 360 | # if file_chunk_size > and chunk is not None there is an error. 361 | sim_ds_chunk = open_whp_dataset( 362 | files, n_cores=n_cores, profile=True, chunks=15, file_chunk_size=1) 363 | xr.testing.assert_equal(sim_ds_chunk, ans) 364 | 365 | 366 | # Test spatial index selection 367 | # Ensemble Cycle 368 | @pytest.mark.parametrize( 369 | ['file_glob', 'ans_file', 'n_cores', 'isel'], 370 | [ 371 | ( 372 | '*/*/*CHRTOUT_DOMAIN1', 373 | version + '/ensemble_cycle_isel/CHRTOUT.nc', 374 | 1, 375 | {'feature_id': [1, 2]} 376 | ), 377 | ( 378 | '*/*/RESTART.*_DOMAIN1', 379 | version + '/ensemble_cycle_isel/RESTART.nc', 380 | 3, 381 | {'snow_layers': [1, 2], 'west_east': [0, 1, 2]} 382 | ), 383 | ( 384 | '*/*/HYDRO_RST.*_DOMAIN1', 385 | version + '/ensemble_cycle_isel/HYDRO_RST.nc', 386 | 3, 387 | {'links': [0], 'lakes':[0], 'iy':[0, 1]} 388 | ), 389 | ], 390 | ids=[ 391 | 'ensemble_cycle_isel-CHRTOUT_DOMAIN1', 392 | 'ensemble_cycle_isel-RESTART.*_DOMAIN1', 393 | 'ensemble_cycle_isel-HYDRO_RST.*_DOMAIN1' 394 | ] 395 | ) 396 | def test_collect_ensemble_cycle_isel( 397 | file_glob, 398 | ans_file, 399 | n_cores, 400 | isel 401 | ): 402 | ens_cycle_path = test_dir.joinpath('data/collection_data/ens_ana') 403 | files = sorted(ens_cycle_path.glob(file_glob)) 404 | ens_cycle_ds = open_whp_dataset(files, n_cores=n_cores, isel=isel) 405 | ans = xr.open_dataset(answer_dir / ans_file) 406 | xr.testing.assert_equal(ens_cycle_ds, ans) 407 | 408 | ens_cycle_ds_chunk = open_whp_dataset(files, n_cores=n_cores, isel=isel, file_chunk_size=2) 409 | xr.testing.assert_equal(ens_cycle_ds_chunk, ans) 410 | 411 | 412 | # Test dropping/keeping variables 413 | # Ensemble Cycle 414 | @pytest.mark.parametrize( 415 | ['file_glob', 'ans_file', 'n_cores', 'drop_vars'], 416 | [ 417 | ( 418 | '*/*/*CHRTOUT_DOMAIN1', 419 | version + '/ensemble_cycle_drop_vars/CHRTOUT.nc', 420 | 1, 421 | ['Head', 'crs'] 422 | ), 423 | ( 424 | '*/*/RESTART.*_DOMAIN1', 425 | version + '/ensemble_cycle_drop_vars/RESTART.nc', 426 | 3, 427 | ['SOIL_T', 'SNOW_T', 'SMC', 'SH2O', 'ZSNSO'] 428 | ), 429 | ( 430 | '*/*/HYDRO_RST.*_DOMAIN1', 431 | version + '/ensemble_cycle_drop_vars/HYDRO_RST.nc', 432 | 3, 433 | ['z_gwsubbas', 'resht', 'sfcheadsubrt'] 434 | ), 435 | ], 436 | ids=[ 437 | 'ensemble_cycle_drop_vars-CHRTOUT_DOMAIN1', 438 | 'ensemble_cycle_drop_vars-RESTART.*_DOMAIN1', 439 | 'ensemble_cycle_drop_vars-HYDRO_RST.*_DOMAIN1' 440 | ] 441 | ) 442 | def test_collect_ensemble_cycle_drop_vars( 443 | file_glob, 444 | ans_file, 445 | n_cores, 446 | drop_vars 447 | ): 448 | ens_cycle_path = test_dir.joinpath('data/collection_data/ens_ana') 449 | files = sorted(ens_cycle_path.glob(file_glob)) 450 | ens_cycle_ds = open_whp_dataset(files, n_cores=n_cores, drop_variables=drop_vars) 451 | ans = xr.open_dataset(answer_dir / ans_file) 452 | xr.testing.assert_equal(ens_cycle_ds, ans) 453 | 454 | ens_cycle_ds_chunk = open_whp_dataset( 455 | files, n_cores=n_cores, drop_variables=drop_vars, file_chunk_size=1) 456 | xr.testing.assert_equal(ens_cycle_ds_chunk, ans) 457 | -------------------------------------------------------------------------------- /wrfhydropy/tests/test_domain.py: -------------------------------------------------------------------------------- 1 | import pathlib 2 | 3 | from wrfhydropy import Domain, WrfHydroStatic, WrfHydroTs 4 | 5 | 6 | def test_domain_init(domain_dir): 7 | domain = Domain(domain_top_dir=domain_dir, 8 | domain_config='nwm_ana', 9 | compatible_version='v5.0.1') 10 | assert type(domain) == Domain 11 | 12 | def test_domain_namelists(domain_dir): 13 | domain = Domain(domain_top_dir=domain_dir, 14 | domain_config='nwm_ana', 15 | compatible_version='v5.0.1') 16 | 17 | # Check namelist configuration 18 | assert domain.hydro_namelist_patches == { 19 | 'hydro_nlist': 20 | {'geo_static_flnm': './NWM/DOMAIN/geo_em.d01.nc', 21 | 'restart_file': './NWM/RESTART/HYDRO_RST.2011-08-26_00:00_DOMAIN1', 22 | 'aggfactrt': 4, 23 | 'udmp_opt': 1}, 24 | 'nudging_nlist': { 25 | 'nudginglastobsfile': './NWM/RESTART/nudgingLastObs.2011-08-26_00:00:00.nc'} 26 | }, 'hydro_namelist JSONNamelist did not return expected dictionary ' \ 27 | 'for config nwm_ana' 28 | 29 | assert domain.hrldas_namelist_patches == { 30 | 'noahlsm_offline': 31 | {'hrldas_setup_file': './NWM/DOMAIN/wrfinput_d01.nc', 32 | 'restart_filename_requested': './NWM/RESTART/RESTART.2011082600_DOMAIN1', 33 | 'indir': './FORCING'}, 34 | 'wrf_hydro_offline': {'forc_typ': 1}}, 'hrldas_namelist JSONNamelist did not return ' \ 35 | 'expected dictionary for config nwm_ana' 36 | 37 | def test_domain_filepaths(domain_dir): 38 | domain = Domain(domain_top_dir=domain_dir, 39 | domain_config='nwm_ana', 40 | compatible_version='v5.0.1') 41 | assert type(domain.hydro_files) == list and type(domain.hydro_files[0]) == WrfHydroStatic, \ 42 | 'hydro files not imported correctly' 43 | assert type(domain.lsm_files) == list and type(domain.lsm_files[0]) == WrfHydroStatic, \ 44 | 'lsm files not imported correctly' 45 | assert type(domain.forcing_data) == WrfHydroTs and len(domain.forcing_data) == 3, \ 46 | 'forcing files not imported correctly' 47 | 48 | def test_domain_copyfiles(tmpdir,domain_dir): 49 | domain = Domain(domain_top_dir=str(domain_dir), 50 | domain_config='nwm_ana', 51 | compatible_version='v5.0.1') 52 | tmpdir = pathlib.Path(tmpdir) 53 | copy_dir = tmpdir.joinpath('domain_copy_test') 54 | domain.copy_files(str(copy_dir)) 55 | 56 | namelist_files = [] 57 | for item in domain.hydro_files: 58 | # Make relative for ease of comparison 59 | relative_path = item.absolute().relative_to(domain_dir.absolute()) 60 | namelist_files.append(str(relative_path)) 61 | for item in domain.lsm_files: 62 | relative_path = item.absolute().relative_to(domain_dir.absolute()) 63 | namelist_files.append(str(relative_path)) 64 | for item in domain.nudging_files: 65 | relative_path = item.absolute().relative_to(domain_dir.absolute()) 66 | namelist_files.append(str(relative_path)) 67 | 68 | copied_files = [] 69 | for file in list(copy_dir.rglob('*')): 70 | # Get path as relative so that can be compared to namelist paths 71 | relative_path = file.absolute().relative_to(copy_dir.absolute()) 72 | copied_files.append(str(relative_path)) 73 | 74 | # Manually check that FORCING got copied, rglob is ignoring contents of symlinked dir 75 | assert 'FORCING' in copied_files, 'Forcing data not copied' 76 | 77 | # Check the rest of the files 78 | for file in namelist_files: 79 | if file not in ['FORCING']: 80 | assert file in copied_files, 'file ' + file + ' was not copied successfully' 81 | 82 | # Check the special case of RESTARTS which should be symlinked into main dir 83 | restart_file_patterns = ['*RESTART*','*HYDRO_RST*','*nudgingLastObs*'] 84 | for file_pattern in restart_file_patterns: 85 | assert len(list(copy_dir.glob(file_pattern))) == 1, \ 86 | 'restart file ' + file_pattern + ' not copied' 87 | 88 | -------------------------------------------------------------------------------- /wrfhydropy/tests/test_evaluation.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import datetime 3 | import math 4 | import numpy as np 5 | import os 6 | import pathlib 7 | import pandas as pd 8 | import pytest 9 | import warnings 10 | import xarray as xr 11 | 12 | from io import StringIO 13 | from pandas.testing import assert_frame_equal 14 | from wrfhydropy import Evaluation, open_whp_dataset 15 | from .data import collection_data_download 16 | from .data.evaluation_answer_reprs import * 17 | 18 | # Testing helper functons for data frames. Serialization is a PITA. 19 | float_form = '.2f' 20 | 21 | 22 | def assert_frame_close(df1, df2): 23 | assert_frame_equal(df1, df2, check_exact=False) 24 | 25 | 26 | def str_to_frame(string: str): 27 | return(pd.read_csv(StringIO(string))) 28 | 29 | 30 | def frame_to_str(frame: pd.DataFrame): 31 | return(frame.to_csv(float_format='%' + float_form)) 32 | 33 | 34 | def round_trip_df_serial(frame: pd.DataFrame): 35 | return(str_to_frame(frame_to_str(frame))) 36 | 37 | 38 | pd.options.display.float_format = ('{:' + float_form + '}').format 39 | 40 | test_dir = pathlib.Path(os.path.dirname(os.path.realpath(__file__))) 41 | 42 | # Get the full reprs 43 | pd.set_option('display.max_rows', None) 44 | pd.set_option('display.max_columns', None) 45 | 46 | # The data are found here. Uses the same data as collection. 47 | os.chdir(str(test_dir)) 48 | collection_data_download.download() 49 | 50 | engine = ['pd', 'xr'] 51 | 52 | 53 | @pytest.mark.parametrize( 54 | ['mod_dir', 'mod_glob'], 55 | [ 56 | (test_dir / 'data/collection_data/simulation', '*CHRTOUT_DOMAIN1'), 57 | ], 58 | ids=[ 59 | 'init-simulation', 60 | ] 61 | ) 62 | def test_init(mod_dir, mod_glob): 63 | 64 | files = sorted(mod_dir.glob(mod_glob)) 65 | mod = open_whp_dataset(files) 66 | mod_df = mod.streamflow.to_dataframe() 67 | obs_df = mod_df 68 | streamflow_eval = Evaluation(mod_df, obs_df) 69 | assert type(streamflow_eval) == Evaluation 70 | 71 | 72 | # Should there be a "stage" prt of collection_data_download? I 73 | # would have to look more closely at test_collection. The 74 | # following is certainly repeated code 75 | sim_dir = test_dir / 'data/collection_data/simulation' 76 | if sim_dir.exists(): 77 | sim_dir.unlink() 78 | sim_dir.symlink_to(test_dir / 'data/collection_data/ens_ana/cast_2011082600/member_000') 79 | 80 | 81 | @pytest.mark.parametrize('engine', engine) 82 | @pytest.mark.parametrize('group_by_in', [None, 'space']) 83 | @pytest.mark.parametrize( 84 | ['transform', 'transform_key'], 85 | [(lambda x: x, 'identity'), 86 | (lambda x: [ii for ii in range(len(x))], 'index')], 87 | ids=['lambda_identity', 'lambda_index']) 88 | @pytest.mark.parametrize( 89 | ['mod_dir', 'mod_glob', 'indices_dict', 'join_on', 'variable', 'expected_key'], 90 | [ 91 | (test_dir / 'data/collection_data/simulation', 92 | '*CHRTOUT_DOMAIN1', 93 | {'feature_id': [1, 39, 56, 34]}, 94 | ['time', 'feature_id'], 95 | 'streamflow', 96 | '*CHRTOUT_DOMAIN1'), 97 | (test_dir / 'data/collection_data/simulation', 98 | '*LDASOUT_DOMAIN1', 99 | {'x': [1, 3, 5], 'y': [2, 4, 6], 'soil_layers_stag': [2]}, 100 | ['time', 'x', 'y', 'soil_layers_stag'], 101 | 'SOIL_M', 102 | '*LDASOUT_DOMAIN1'), 103 | ], 104 | ids=[ 105 | 'gof-simulation-CHRTOUT', 106 | 'gof-simulation-LSMOUT', 107 | ] 108 | ) 109 | def test_gof_perfect( 110 | engine, 111 | mod_dir, 112 | mod_glob, 113 | indices_dict, 114 | join_on, 115 | variable, 116 | group_by_in, 117 | transform, 118 | transform_key, 119 | expected_key 120 | ): 121 | # Keep this variable agnostic 122 | files = sorted(mod_dir.glob(mod_glob)) 123 | mod = open_whp_dataset(files).isel(indices_dict) 124 | 125 | if group_by_in is None: 126 | group_by_key = '' 127 | group_by = None 128 | elif group_by_in == 'space': 129 | group_by_key = '-' + group_by_in 130 | group_by = copy.deepcopy(join_on) 131 | group_by.remove('time') 132 | else: 133 | raise ValueError("not a valid grouping for this test: ", group_by) 134 | 135 | expected_answer_key = expected_key + group_by_key + '_' + transform_key 136 | # expected = gof_answer_reprs[expected_answer_key] 137 | expected = str_to_frame(gof_answer_reprs[expected_answer_key]) 138 | 139 | if engine == 'pd': 140 | mod_df = mod[variable].to_dataframe().rename( 141 | columns={variable: 'modeled'}) 142 | obs_df = mod[variable].to_dataframe().rename( 143 | columns={variable: 'observed'}) 144 | mod_df.modeled = transform(mod_df.modeled) 145 | the_eval = Evaluation(mod_df, obs_df, join_on=join_on) 146 | gof = the_eval.gof(group_by=group_by) 147 | assert_frame_close(round_trip_df_serial(gof), expected) 148 | 149 | elif engine == 'xr': 150 | if group_by_in is not None: 151 | pytest.skip("Currently not grouping using xarray.") 152 | mod_ds = mod.rename({variable: 'modeled'})['modeled'] 153 | obs_ds = mod.rename({variable: 'observed'})['observed'] 154 | new_data = np.array(transform(mod_ds.to_dataframe().modeled)).reshape(mod_ds.shape) 155 | mod_ds.values = new_data 156 | # mod_ds = xr.DataArray(new_data, dims=mod_ds.dims, coords=mod_ds.coords) 157 | the_eval = Evaluation(mod_ds, obs_ds, join_on=join_on) 158 | gof = the_eval.gof(group_by=group_by).to_dataframe() 159 | # assert repr(gof) == expected 160 | assert_frame_close(round_trip_df_serial(gof), expected) 161 | 162 | 163 | @pytest.mark.parametrize('engine', engine) 164 | @pytest.mark.parametrize('the_stat', ['crps', 'brier']) 165 | def test_crps_brier_basic( 166 | the_stat, 167 | engine 168 | ): 169 | 170 | # The input data for the test 171 | ens0 = np.linspace(-5, 5, num=1000) 172 | ens1 = np.linspace(-500, 500, num=1000) 173 | obs = 0.0000 174 | 175 | # WOw i must be a dunce, this is way too much work. 176 | t0 = datetime.datetime(2000, 1, 1) 177 | t1 = datetime.datetime(2000, 1, 2) 178 | modeled = pd.DataFrame( 179 | np.array([ens0, ens1]).transpose(), 180 | columns=[t0, t1] 181 | ) 182 | modeled.index.name = 'member' 183 | modeled = modeled.reset_index() 184 | modeled = modeled.melt( 185 | id_vars=['member'], 186 | var_name='time', 187 | value_name='modeled' 188 | ).set_index(['time', 'member']) 189 | observed = modeled.rename(columns={'modeled': 'observed'}) * obs 190 | 191 | if engine == 'xr': 192 | pytest.skip("Currently using xarray for brier and crps.") 193 | modeled = modeled.to_xarray()['modeled'] 194 | observed = observed.to_xarray()['observed'] 195 | 196 | the_eval = Evaluation(modeled, observed) 197 | 198 | if the_stat == 'crps': 199 | # Generate the answer 200 | # import properscoring as ps 201 | # answer = np.array([ps.crps_ensemble(obs, mod) for mod in [ens0, ens1]]) 202 | answer = pd.DataFrame( 203 | {'time': [t0, t1], 204 | 'crps': np.array([0.83416917, 83.41691692])} 205 | ).set_index('time') 206 | crps = the_eval.crps() 207 | assert_frame_close(crps, answer) 208 | 209 | elif the_stat == 'brier': 210 | threshold = 1 211 | # Generate the answer 212 | # import properscoring as ps 213 | # answer = np.array([ps.threshold_brier_score(obs, mod, threshold=threshold) 214 | # for mod in [ens0, ens1]]) 215 | # answer = pd.DataFrame( 216 | # {'time': [t0, t1], 217 | # 'crps': np.array([ 0.83416917, 83.41691692])} 218 | # ).set_index('time') 219 | answer = np.array([0.16, 0.249001]) 220 | brier = the_eval.brier(threshold) 221 | assert np.isclose(brier, answer).all() 222 | 223 | 224 | # Inputs for contingency and event stat calculations. 225 | # Answers are in data/evaluation_answer_reprs.py 226 | base_dum_time = datetime.datetime(2000, 1, 1) 227 | dumtime = [base_dum_time + datetime.timedelta(hours=dd) for dd in range(4)] 228 | 229 | # Easy to read and interpret inputs and grouped output. 230 | contingency_known_data_input = pd.DataFrame({ 231 | # hits #mix # misses # false pos # corr_neg 232 | 'mod': [1, 1, 1, 1, 1, -1, 1, -1, -1, -1, -1, -1, 1, 1, 1, 1, -1, -1, -1, -1], 233 | 'obs': [1, 1, 1, 1, 1, 1, -1, -1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1], 234 | 'tsh': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 235 | 'loc': (['hits']*4)+ (['mix']*4)+ (['miss']*4)+ (['false_pos']*4)+ (['corr_neg']*4), 236 | 'time': dumtime + dumtime + dumtime + dumtime + dumtime, 237 | }).set_index(['loc', 'time']) 238 | 239 | # A threshold that varies across the group on which the calcuation is made. 240 | contingency_known_data_input_2 = pd.DataFrame({ 241 | # hits #mix # misses # false pos # corr_neg 242 | 'mod': [1, 11, 111, 1, 1, 1, 111, 1, 0, 10, 110, -1, 1, 11, 111, 2, 0, 2, 10, 17], 243 | 'obs': [1, 11, 111, 1, 1, 11, 1, 1, 2, 11, 111, 1, 0, 10, 110, 1, 0, 2, 10, 13], 244 | 'tsh': [0, 10, 110, 0, 0, 10, 110, 10, 1, 10, 110, 0, 0, 10, 110, 1, 1, 3, 11, 20], 245 | 'loc': (['hits']*4)+ (['mix']*4)+ (['miss']*4)+ (['false_pos']*4)+ (['corr_neg']*4), 246 | 'time': dumtime + dumtime + dumtime + dumtime + dumtime, 247 | }).set_index(['loc', 'time']) 248 | 249 | # TODO: test NaNs in the data 250 | 251 | 252 | # @pytest.mark.parametrize('engine', engine) 253 | @pytest.mark.parametrize( 254 | 'input_data', 255 | [contingency_known_data_input, contingency_known_data_input_2]) 256 | def test_contingency_known_data(input_data): 257 | known_data = input_data.to_xarray().set_coords("tsh") 258 | mod = known_data.mod.drop('tsh') 259 | obs = known_data.obs 260 | result = mod.eval.obs(obs).contingency(threshold='tsh', group_by='loc') 261 | result = round_trip_df_serial(result) 262 | expected = str_to_frame(contingency_known_data_answer) 263 | assert_frame_close(result, expected) 264 | 265 | 266 | # @pytest.mark.parametrize('engine', engine) 267 | @pytest.mark.parametrize( 268 | 'input_data', 269 | [contingency_known_data_input, contingency_known_data_input_2]) 270 | def test_contingency_missing_columns(input_data): 271 | known_data = input_data.to_xarray().set_coords("tsh") 272 | mod = known_data.mod.drop('tsh') 273 | obs = known_data.obs 274 | result = mod.eval.obs(obs).contingency(threshold='tsh', group_by='loc') 275 | result = round_trip_df_serial(result) 276 | expected = str_to_frame(contingency_known_data_answer) 277 | assert_frame_close(result, expected) 278 | 279 | 280 | @pytest.mark.parametrize( 281 | 'input_data', 282 | [contingency_known_data_input, contingency_known_data_input_2]) 283 | def test_event_known_data(input_data): 284 | known_data = input_data.to_xarray().set_coords("tsh") 285 | mod = known_data.mod.drop('tsh') 286 | obs = known_data.obs 287 | result = mod.eval.obs(obs).event(threshold='tsh', group_by='loc') 288 | result = round_trip_df_serial(result) 289 | expected = str_to_frame(event_known_data_answer) 290 | assert_frame_close(result, expected) 291 | -------------------------------------------------------------------------------- /wrfhydropy/tests/test_ioutils.py: -------------------------------------------------------------------------------- 1 | from bs4 import BeautifulSoup 2 | import datetime 3 | import numpy as np 4 | import pandas as pd 5 | import pathlib 6 | import pytest 7 | import re 8 | import requests 9 | import warnings 10 | import xarray as xr 11 | 12 | from wrfhydropy.core.ioutils import \ 13 | open_wh_dataset, WrfHydroTs, WrfHydroStatic, check_input_files, nwm_forcing_to_ldasin 14 | 15 | from wrfhydropy.core.namelist import JSONNamelist 16 | 17 | 18 | @pytest.fixture(scope='function') 19 | def ds_timeseries(tmpdir): 20 | ts_dir = pathlib.Path(tmpdir).joinpath('timeseries_data') 21 | ts_dir.mkdir(parents=True) 22 | 23 | # Create a dummy dataset 24 | with warnings.catch_warnings(): 25 | warnings.simplefilter("ignore") 26 | vals_ts = np.array([np.log(-1.0), 2.0, 3.0], dtype='float') 27 | 28 | reference_times = pd.to_datetime([ 29 | '1984-10-14 00:00:00', 30 | '1984-10-14 01:00:00', 31 | '1984-10-14 02:00:00' 32 | ]) 33 | times = pd.to_datetime([ 34 | '1984-10-14 01:00:00', 35 | '1984-10-14 02:00:00', 36 | '1984-10-14 03:00:00' 37 | ]) 38 | location = ['loc1', 'loc2', 'loc3'] 39 | 40 | for idx in enumerate(times): 41 | idx = idx[0] 42 | time_array = [times[idx]] 43 | ref_time_array = [reference_times[idx]] 44 | ds_ts = xr.Dataset({'var1': ('location', vals_ts)}, 45 | {'time': time_array, 46 | 'reference_time': ref_time_array, 47 | 'location': location}) 48 | filename = 'timeseries_' + str(idx) + '.nc' 49 | ds_ts.to_netcdf(ts_dir.joinpath(filename)) 50 | return ts_dir 51 | 52 | 53 | def test_open_wh_dataset_no_forecast(ds_timeseries): 54 | ds_paths = sorted(ds_timeseries.rglob('*.nc')) 55 | the_ds = open_wh_dataset( 56 | paths=ds_paths, 57 | chunks=None, 58 | forecast=False 59 | ) 60 | 61 | the_ref_times = np.array( 62 | ['1970-01-01T00:00:00.000000000'], dtype='datetime64[ns]') 63 | assert (the_ds['reference_time'].values == the_ref_times).all() 64 | 65 | the_ds['time'].values.sort() 66 | assert np.all(the_ds['time'].values == np.array(['1984-10-14T01:00:00.000000000', 67 | '1984-10-14T02:00:00.000000000', 68 | '1984-10-14T03:00:00.000000000'], 69 | dtype='datetime64[ns]')) 70 | 71 | 72 | def test_open_wh_dataset_forecast(ds_timeseries): 73 | ds_paths = list(ds_timeseries.rglob('*.nc')) 74 | the_ds = open_wh_dataset( 75 | paths=ds_paths, 76 | chunks=None, 77 | forecast=True 78 | ) 79 | 80 | the_ds['reference_time'].values.sort() 81 | assert np.all(the_ds['reference_time'].values == np.array(['1984-10-14T00:00:00.000000000', 82 | '1984-10-14T01:00:00.000000000', 83 | '1984-10-14T02:00:00.000000000'], 84 | dtype='datetime64[ns]')) 85 | 86 | the_ds['time'].values.sort() 87 | assert np.all(the_ds['time'].values == np.array(['1984-10-14T01:00:00.000000000', 88 | '1984-10-14T02:00:00.000000000', 89 | '1984-10-14T03:00:00.000000000'], 90 | dtype='datetime64[ns]')) 91 | # print(the_ds) 92 | # print(the_ds['var1'].values) 93 | # assert np.all(the_ds['var1'].values == np.array([[[1.0,2.0,3.0]]], dtype='int')) 94 | 95 | 96 | def test_wrfhydrots(ds_timeseries): 97 | ts_obj = WrfHydroTs(list(ds_timeseries.rglob('*.nc'))) 98 | 99 | ts_obj_open = ts_obj.open() 100 | 101 | assert type(ts_obj_open) == xr.core.dataset.Dataset 102 | assert type(ts_obj.check_nans()) == dict 103 | 104 | 105 | def test_wrfhydrostatic(ds_timeseries): 106 | 107 | static_obj = WrfHydroStatic(list(ds_timeseries.rglob('*.nc'))[0]) 108 | 109 | static_obj_open = static_obj.open() 110 | 111 | assert type(static_obj_open) == xr.core.dataset.Dataset 112 | assert type(static_obj.check_nans()) == dict 113 | 114 | 115 | def test_check_input_files(domain_dir): 116 | hrldas_namelist = JSONNamelist(domain_dir.joinpath('hrldas_namelist_patches.json')) 117 | hrldas_namelist = hrldas_namelist.get_config('nwm_ana') 118 | hydro_namelist = JSONNamelist(domain_dir.joinpath('hydro_namelist_patches.json')) 119 | hydro_namelist = hydro_namelist.get_config('nwm_ana') 120 | 121 | input_file_check = check_input_files(hrldas_namelist=hrldas_namelist, 122 | hydro_namelist=hydro_namelist, 123 | sim_dir=domain_dir) 124 | assert input_file_check is None 125 | 126 | # Alter one file to cause a false in check_input_files 127 | hydro_namelist['hydro_nlist']['geo_static_flnm'] = 'no_such_file' 128 | 129 | with pytest.raises(ValueError) as excinfo: 130 | check_input_files(hrldas_namelist=hrldas_namelist, 131 | hydro_namelist=hydro_namelist, 132 | sim_dir=domain_dir) 133 | 134 | assert str(excinfo.value) == 'The namelist file geo_static_flnm = no_such_file does not exist' 135 | 136 | 137 | def test_nwm_forcing_to_ldasin(tmpdir): 138 | tmpdir = pathlib.Path(tmpdir) 139 | 140 | def url_index_anchor_regex(url, regex=''): 141 | page = requests.get(url).text 142 | soup = BeautifulSoup(page, 'html.parser') 143 | anchors = [url + '/' + node.get('href') for 144 | node in soup.find_all('a') if re.search(regex, node.get('href'))] 145 | return anchors 146 | 147 | nwm_yesterday = (datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(days=1)) 148 | nwm_yesterday = nwm_yesterday.strftime("nwm.%Y%m%d") 149 | prod_url = 'http://nomads.ncep.noaa.gov/pub/data/nccf/com/nwm/prod/' + nwm_yesterday 150 | para_url = 'http://para.nomads.ncep.noaa.gov/pub/data/nccf/com/nwm/para/' + nwm_yesterday 151 | version_dict = { 152 | # 'para': para_url, 153 | 'prod': prod_url} 154 | 155 | for version_name, model_version in version_dict.items(): 156 | 157 | forcing_dirs = url_index_anchor_regex(model_version, r'^forcing_analysis_assim/$') 158 | for forcing_range in forcing_dirs: 159 | 160 | forcing_files = url_index_anchor_regex(forcing_range, r'\.nc$') 161 | for file in forcing_files: 162 | the_split = file.split('/') 163 | the_base = '/'.join(file.split('/')[(the_split.index(version_name)+1):]) 164 | the_file = tmpdir.joinpath(version_name).joinpath(the_base) 165 | the_file.parent.mkdir(exist_ok=True, parents=True) 166 | the_file.touch() 167 | 168 | # The argument to nwm_forcing_dir is a list of "nwm.YYYYMMDD" dirs. 169 | ldasin_dir_list = tmpdir.joinpath( 170 | 'ldasin_' + version_name + '_from_list/' + pathlib.Path(forcing_range).name 171 | ) 172 | ldasin_dir_list.mkdir(parents=True) 173 | nwm_forcing_to_ldasin( 174 | nwm_forcing_dir=[tmpdir.joinpath(version_name).joinpath(nwm_yesterday)], 175 | ldasin_dir=ldasin_dir_list, 176 | range=pathlib.Path(forcing_range).name 177 | ) 178 | ldasin_list_files = sorted(ldasin_dir_list.glob('*/*')) 179 | assert len(ldasin_list_files) == len(forcing_files) 180 | 181 | # The argument to nwm_forcing_dir is a path which contains "nwm.YYYYMMDD" dirs. 182 | ldasin_dir = tmpdir.joinpath( 183 | 'ldasin_' + version_name + '/' + pathlib.Path(forcing_range).name 184 | ) 185 | ldasin_dir.mkdir(parents=True) 186 | nwm_forcing_to_ldasin( 187 | nwm_forcing_dir=tmpdir.joinpath(version_name), 188 | ldasin_dir=ldasin_dir, 189 | range=pathlib.Path(forcing_range).name 190 | ) 191 | ldasin_files = sorted(ldasin_dir.glob('*/*')) 192 | assert len(ldasin_files) == len(forcing_files) 193 | -------------------------------------------------------------------------------- /wrfhydropy/tests/test_model.py: -------------------------------------------------------------------------------- 1 | import pathlib 2 | import warnings 3 | 4 | from wrfhydropy import Model 5 | 6 | 7 | def test_model_init(model_dir): 8 | model = Model(source_dir=model_dir, 9 | model_config='nwm_ana') 10 | assert type(model) == Model 11 | 12 | def test_model_setenvar(model_dir,tmpdir): 13 | model = Model(source_dir=model_dir, 14 | model_config='nwm_ana') 15 | 16 | assert model.compile_options == { 17 | "WRF_HYDRO": 1, 18 | "HYDRO_D": 0, 19 | "SPATIAL_SOIL": 1, 20 | "WRF_HYDRO_RAPID": 0, 21 | "WRFIO_NCD_LARGE_FILE_SUPPORT": 1, 22 | "NCEP_WCOSS": 0, 23 | "WRF_HYDRO_NUDGING": 1 24 | } 25 | 26 | compile_dir = pathlib.Path(tmpdir).joinpath('compile_dir_setenvar') 27 | 28 | # Compile will fail so trap axception and check compile artifacts instead 29 | try: 30 | with warnings.catch_warnings(): 31 | warnings.simplefilter("ignore") 32 | model.compile(compile_dir=compile_dir) 33 | except: 34 | pass 35 | 36 | with model_dir.joinpath('compile_options.sh').open('r') as f: 37 | assert f.read() == 'export WRF_HYDRO=1\n' \ 38 | 'export HYDRO_D=0\n' \ 39 | 'export SPATIAL_SOIL=1\n' \ 40 | 'export WRF_HYDRO_RAPID=0\n' \ 41 | 'export WRFIO_NCD_LARGE_FILE_SUPPORT=1\n' \ 42 | 'export NCEP_WCOSS=0\n' \ 43 | 'export WRF_HYDRO_NUDGING=1\n' 44 | 45 | #model_dir=pathlib.Path('test') 46 | def test_model_compile(model_dir,tmpdir): 47 | model = Model(source_dir=model_dir, 48 | model_config='nwm_ana') 49 | 50 | compile_dir = pathlib.Path(tmpdir).joinpath('compile_dir_compile') 51 | 52 | # Compile will fail so trap exception and check compile artifacts instead 53 | try: 54 | with warnings.catch_warnings(): 55 | warnings.simplefilter("ignore") 56 | model.compile(compile_dir=compile_dir) 57 | except: 58 | pass 59 | 60 | assert model.compile_log.returncode == 0 61 | 62 | def test_model_copyfiles(model_dir, tmpdir, compile_dir): 63 | 64 | model = Model(source_dir=model_dir, 65 | model_config='nwm_ana') 66 | 67 | # compile_dir = pathlib.Path(tmpdir).joinpath('compile_dir_compile') 68 | # compile_dir.mkdir(parents=True) 69 | copy_dir = pathlib.Path(tmpdir).joinpath('compile_dir_copy') 70 | copy_dir.mkdir(parents=True) 71 | 72 | # Set table files and exe file attributes 73 | model.table_files = [compile_dir.joinpath('file1.tbl'),compile_dir.joinpath('file2.tbl')] 74 | model.wrf_hydro_exe = compile_dir.joinpath('wrf_hydro.exe') 75 | 76 | # Make fake run directory with files that would have been produced at compile 77 | with model.wrf_hydro_exe.open('w') as f: 78 | f.write('#dummy exe file') 79 | 80 | for file in model.table_files: 81 | with file.open('w') as f: 82 | f.write('#dummy table file') 83 | 84 | model.copy_files(str(copy_dir)) 85 | 86 | actual_files_list = list(copy_dir.glob('*')) 87 | expected_files_list = list() 88 | for file in model.table_files: 89 | expected_files_list.append(file.name) 90 | expected_files_list.append(model.wrf_hydro_exe.name) 91 | 92 | for file in actual_files_list: 93 | assert file.name in expected_files_list 94 | -------------------------------------------------------------------------------- /wrfhydropy/tests/test_namelist.py: -------------------------------------------------------------------------------- 1 | from wrfhydropy import namelist 2 | import copy 3 | import json 4 | 5 | # Make some test dicts 6 | main_dict = {'key_1': 'value_1', 7 | 'key_2': 1, 8 | 'sub_dict1': { 9 | 'subdict1_key1': 'sub_value1', 10 | 'subdict1_key2': 2, 11 | }, 12 | 'sub_dict2': { 13 | 'subdict2_key1': 1} 14 | } 15 | 16 | patch_dict = { 17 | 'sub_dict1': { 18 | 'subdict1_key1': 'patched_value' 19 | }, 20 | 'key_2': 'patched_value' 21 | } 22 | 23 | # Make some test namelists 24 | main_nl = namelist.Namelist(main_dict) 25 | patch_nl = namelist.Namelist(patch_dict) 26 | 27 | def test_namelist_patch(): 28 | patched_nl = main_nl.patch(patch_nl) 29 | 30 | assert patched_nl == {'key_1': 'value_1', 31 | 'key_2': 'patched_value', 32 | 'sub_dict1': {'subdict1_key1': 'patched_value', 'subdict1_key2': 2}, 33 | 'sub_dict2': {'subdict2_key1': 1}} 34 | 35 | def test_namelist_write_read(tmpdir): 36 | file_path = tmpdir + '/test_nml_write_f90' 37 | # Note that for F90nml write method the first key of hte dict must have a value of a dict 38 | write_nml = namelist.Namelist({'nml1':main_nl}) 39 | write_nml.write(str(file_path)) 40 | 41 | read_nl = namelist.load_namelist(str(file_path)) 42 | 43 | assert write_nml == read_nl, 'written namelist does not match read namelist' 44 | 45 | 46 | def test_namelist_diff(): 47 | main_nl_altered = copy.deepcopy(main_nl) 48 | del main_nl_altered['key_1'] 49 | main_nl_altered['sub_dict2']['subdict2_key1'] = 'altered_key1' 50 | 51 | nl_diffs = namelist.diff_namelist(main_nl,main_nl_altered) 52 | 53 | assert nl_diffs == {'type_changes': 54 | {"root['sub_dict2']['subdict2_key1']": {'old_type': int, 55 | 'new_type': str, 56 | 'old_value': 1, 57 | 'new_value': 'altered_key1'} 58 | }, 59 | 'dictionary_item_removed': {"root['key_1']"} 60 | } 61 | 62 | 63 | def test_namelist_dictmerge(): 64 | patched_dict = namelist.dict_merge(main_dict,patch_dict) 65 | assert patched_dict == {'key_1': 'value_1', 66 | 'key_2': 'patched_value', 67 | 'sub_dict1': 68 | {'subdict1_key1': 'patched_value', 'subdict1_key2': 2}, 69 | 'sub_dict2': {'subdict2_key1': 1} 70 | } 71 | 72 | def test_namelist_jsonnamelist(tmpdir): 73 | file_path = tmpdir + '/test_json.json' 74 | 75 | 76 | json_string = json.loads('{"base":{"key1":1,"key2":"value2"},"a_config":{' 77 | '"key2":"config_value2"}}') 78 | json.dump(json_string,open(file_path,'w')) 79 | 80 | json_nl = namelist.JSONNamelist(file_path) 81 | json_nl_config = json_nl.get_config('a_config') 82 | 83 | assert json_nl_config == {'key1': 1, 'key2': 'config_value2'} 84 | assert type(json_nl_config) == namelist.Namelist 85 | -------------------------------------------------------------------------------- /wrfhydropy/tests/test_outputdiffs.py: -------------------------------------------------------------------------------- 1 | from wrfhydropy.core.outputdiffs import compare_ncfiles, OutputDataDiffs, OutputMetaDataDiffs 2 | from wrfhydropy.core.simulation import SimulationOutput 3 | import os 4 | 5 | def test_outputdiffs_compare_ncfiles(sim_output): 6 | 7 | chrtout = list(sim_output.glob('*CHRTOUT_DOMAIN1*')) 8 | gwout = list(sim_output.glob('*GWOUT*')) 9 | 10 | assert compare_ncfiles(chrtout,chrtout) == [None,None,None] 11 | assert compare_ncfiles(chrtout,gwout) != [None,None,None] 12 | 13 | 14 | def test_outputdiffs_outputdatadiffs(sim_output): 15 | 16 | output=SimulationOutput() 17 | output.collect_output(sim_dir=sim_output) 18 | 19 | output_diffs = OutputDataDiffs(output,output) 20 | print(output_diffs.diff_counts) 21 | assert output_diffs.diff_counts == { 22 | 'channel_rt': 0, 'channel_rt_grid': 0, 'chanobs': 0, 23 | 'lakeout': 0, 'gwout': 0, 'restart_hydro': 0, 24 | 'restart_lsm': 0, 'restart_nudging': 0, 25 | 'ldasout': 0, 'rtout': 0 26 | } 27 | 28 | 29 | def test_outputdiffs_outputmetadatadiffs(sim_output): 30 | 31 | output=SimulationOutput() 32 | output.collect_output(sim_dir=sim_output) 33 | 34 | output_diffs = OutputMetaDataDiffs(output,output) 35 | 36 | assert output_diffs.diff_counts == { 37 | 'channel_rt': 0, 'chanobs': 0, 'lakeout': 0, 'gwout': 3, 38 | 'rtout': 0, 'ldasout': 0, 'restart_hydro': 0, 39 | 'restart_lsm': 0, 'restart_nudging': 0 40 | } 41 | -------------------------------------------------------------------------------- /wrfhydropy/tests/test_schedulers_pbs.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pathlib 3 | import pytest 4 | 5 | from wrfhydropy.core.schedulers import PBSCheyenne 6 | from wrfhydropy.core.job import Job 7 | 8 | 9 | @pytest.fixture(scope='function') 10 | def scheduler_regular(): 11 | scheduler = PBSCheyenne( 12 | account='fake_acct', 13 | email_who='elmo', 14 | email_when='abe', 15 | nproc=216, 16 | nnodes=6, 17 | ppn=None, 18 | queue='regular', 19 | walltime="12:00:00") 20 | return scheduler 21 | 22 | 23 | @pytest.fixture(scope='function') 24 | def scheduler_shared(): 25 | scheduler = PBSCheyenne( 26 | account='fake_acct', 27 | email_who='elmo', 28 | email_when='abe', 29 | nproc=216, 30 | nnodes=6, 31 | ppn=None, 32 | queue='shared', 33 | walltime="12:00:00") 34 | return scheduler 35 | 36 | 37 | def test_schedulers_pbs_solve_nodes(scheduler_regular): 38 | 39 | assert scheduler_regular.ppn == 36 40 | 41 | scheduler_regular.nproc = None 42 | scheduler_regular.nnodes = 5 43 | assert scheduler_regular.ppn == 36 44 | assert scheduler_regular.nnodes == 5 45 | assert scheduler_regular.nproc == 180 46 | 47 | 48 | expected_script_list = [ 49 | '#!/bin/sh\n' , 50 | '#PBS -N test_job_1\n' , 51 | '#PBS -A fake_acct\n' , 52 | '#PBS -q regular\n' , 53 | '#PBS -M elmo\n' , 54 | '#PBS -m abe\n' , 55 | '\n' , 56 | '#PBS -l walltime=12:00:00\n' , 57 | '#PBS -l select=6:ncpus=36:mpiprocs=36\n' , 58 | '\n' , 59 | '# Not using PBS standard error and out files to capture model output\n' , 60 | '# but these files might catch output and errors from the scheduler.\n' , 61 | '#PBS -o job_test_job_1\n' , 62 | '#PBS -e job_test_job_1\n' , 63 | '\n' , 64 | '# CISL suggests users set TMPDIR when running batch jobs on Cheyenne.\n' , 65 | 'export TMPDIR=/glade/scratch/$USER/temp\n' , 66 | 'mkdir -p $TMPDIR\n' ] 67 | # Beyond here in the script there is a system/user dependent path and exit line, 68 | # drop these and only compare on the length of this string. 69 | 70 | custom_none = {} 71 | expected_script = ''.join(expected_script_list) 72 | 73 | custom_l = {'-l': 'select=1:ncpus=36:mpiprocs=36:mem=109GB+1:ncpus=36:mpiprocs=36'} 74 | expected_script_custom_l = expected_script_list 75 | expected_script_custom_l[8] = '#PBS -l ' + custom_l['-l'] + '\n' 76 | expected_script_custom_l = ''.join(expected_script_custom_l) 77 | 78 | 79 | @pytest.mark.parametrize( 80 | ['sched', 'custom', 'expected'], 81 | [ 82 | (pytest.lazy_fixture("scheduler_regular"), custom_none, expected_script), 83 | (pytest.lazy_fixture("scheduler_regular"), custom_l, expected_script_custom_l), 84 | ], 85 | ids=['no_custom', 'custom_l'] 86 | ) 87 | def test_schedulers_pbs_writescript(tmpdir, sched, custom, expected): 88 | job = Job( 89 | job_id='test_job_1', 90 | model_start_time='1984-10-14', 91 | model_end_time='2017-01-04', 92 | restart=False, 93 | exe_cmd='bogus exe cmd', 94 | entry_cmd='bogus entry cmd', 95 | exit_cmd='bogus exit cmd') 96 | 97 | sched.scheduler_opts['custom'] = custom 98 | 99 | os.chdir(tmpdir) 100 | job.job_dir.mkdir() # WHY IS THIS NOW NECESSARY? 101 | 102 | sched._write_job_pbs([job, job]) 103 | script_path = job.job_dir.joinpath('job_' + job.job_id + '.pbs') 104 | with script_path.open(mode='r') as f: 105 | job_script = f.read() 106 | 107 | # Only comparing the first 400 lines because the last lines vary according to system 108 | len_expected = len(expected) 109 | assert job_script[0:len_expected] == expected 110 | 111 | 112 | def test_schedulers_pbs_schedule(scheduler_regular,capfd): 113 | job = Job(job_id='test_job_1', 114 | model_start_time='1984-10-14', 115 | model_end_time='2017-01-04', 116 | restart=False, 117 | exe_cmd='bogus exe cmd', 118 | entry_cmd='bogus entry cmd', 119 | exit_cmd='bogus exit cmd') 120 | 121 | try: 122 | scheduler_regular.schedule([job, job]) 123 | out, err = capfd.readouterr() 124 | print(out) 125 | except: 126 | out, err = capfd.readouterr() 127 | pass 128 | assert out == "qsub_str: /bin/bash -c 'job_test_job_1=`qsub -h job_test_job_1/job_test_job_1.pbs`;" \ 129 | "job_test_job_1=`qsub -W depend=afterok:${job_test_job_1} " \ 130 | "job_test_job_1/job_test_job_1.pbs`;qrls ${job_test_job_1};'" \ 131 | '\n' 132 | -------------------------------------------------------------------------------- /wrfhydropy/tests/test_simulation.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import deepdiff 3 | import os 4 | import pathlib 5 | import pickle 6 | import pytest 7 | 8 | from wrfhydropy.core.simulation import Simulation, SimulationOutput 9 | from wrfhydropy.core.ioutils import WrfHydroTs 10 | from wrfhydropy.core.ensemble_tools import DeepDiffEq 11 | from wrfhydropy.core.outputdiffs import check_unprocessed_diffs 12 | 13 | 14 | def test_simulation_add_model_domain(model, domain): 15 | sim = Simulation() 16 | sim.add(model) 17 | sim.add(domain) 18 | 19 | assert sim.base_hydro_namelist == \ 20 | {'hydro_nlist': 21 | { 22 | 'channel_option': 2, 23 | 'chanobs_domain': 0, 24 | 'chanrtswcrt': 1, 25 | 'chrtout_domain': 1, 26 | 'geo_static_flnm': './NWM/DOMAIN/geo_em.d01.nc', 27 | 'restart_file': './NWM/RESTART/HYDRO_RST.2011-08-26_00:00_DOMAIN1', 28 | 'aggfactrt': 4, 29 | 'udmp_opt': 1, 30 | 'out_dt': 1440, 31 | 'rst_dt': 1440 32 | }, 33 | 'nudging_nlist': { 34 | 'maxagepairsbiaspersist': 3, 35 | 'minnumpairsbiaspersist': 1, 36 | 'nudginglastobsfile': 37 | './NWM/RESTART/nudgingLastObs.2011-08-26_00:00:00.nc' 38 | } 39 | } 40 | 41 | assert sim.base_hrldas_namelist == \ 42 | {'noahlsm_offline': 43 | { 44 | 'btr_option': 1, 45 | 'canopy_stomatal_resistance_option': 1, 46 | 'hrldas_setup_file': './NWM/DOMAIN/wrfinput_d01.nc', 47 | 'restart_filename_requested': 48 | './NWM/RESTART/RESTART.2011082600_DOMAIN1', 49 | 'indir': './FORCING', 50 | 'output_timestep': 86400, 51 | 'restart_frequency_hours': 24 52 | }, 53 | 'wrf_hydro_offline': {'forc_typ': 1} 54 | } 55 | 56 | 57 | def test_simulation_add_job(model, domain, job): 58 | sim = Simulation() 59 | with pytest.raises(Exception) as e_info: 60 | sim.add(job) 61 | 62 | sim.add(model) 63 | sim.add(domain) 64 | sim.add(job) 65 | 66 | 67 | def test_simulation_compose(model, domain, job, capfd, tmpdir): 68 | 69 | sim = Simulation() 70 | sim.add(model) 71 | sim.add(domain) 72 | sim.add(job) 73 | 74 | # copy before compose 75 | sim_opts = copy.deepcopy(sim) 76 | sim_tbls = copy.deepcopy(sim) 77 | 78 | compose_dir = pathlib.Path(tmpdir).joinpath('sim_compose') 79 | os.mkdir(str(compose_dir)) 80 | os.chdir(str(compose_dir)) 81 | 82 | sim.compose() 83 | 84 | # Doing this thrice kinda asks for function... 85 | # This compose exercises the options to compose. Gives the same result. 86 | compose_dir_opts = pathlib.Path(tmpdir).joinpath('sim_compose_opts') 87 | os.mkdir(str(compose_dir_opts)) 88 | os.chdir(str(compose_dir_opts)) 89 | 90 | sim_opts.compose( 91 | symlink_domain=False, 92 | force=True, 93 | check_nlst_warn=True 94 | ) 95 | 96 | actual_files = list(compose_dir.rglob('./*')) 97 | domain_files = domain.domain_top_dir.rglob('*') 98 | expected_files = [ 99 | 'namelist.hrldas', 100 | 'hydro.namelist', 101 | 'job_test_job_1', 102 | '.uid', 103 | 'NWM', 104 | 'WrfHydroModel.pkl', 105 | 'FORCING', 106 | 'DUMMY.TBL', 107 | 'wrf_hydro.exe' 108 | ] 109 | 110 | for file in domain_files: 111 | expected_files.append(file.name) 112 | 113 | for file in actual_files: 114 | assert file.name in expected_files 115 | 116 | assert sim.model.table_files == sim_opts.model.table_files 117 | assert [str(ff.name) for ff in sim.model.table_files] == ['DUMMY.TBL'] 118 | 119 | # These composes result in alternative, user selected table files. 120 | # Do it before and after model.compile() 121 | sim_tbls_postcompile = copy.deepcopy(sim_tbls) 122 | 123 | dummy_user_tbl = pathlib.Path(tmpdir).joinpath('DUMMY_USER.TBL') 124 | with dummy_user_tbl.open('w') as f: 125 | f.write('# dummy TBL \n') 126 | 127 | compose_dir_tbls = pathlib.Path(tmpdir).joinpath('sim_compose_tbls') 128 | os.mkdir(str(compose_dir_tbls)) 129 | os.chdir(str(compose_dir_tbls)) 130 | # before compile 131 | sim_tbls.model.table_files = [dummy_user_tbl] 132 | sim_tbls.compose() 133 | 134 | compose_dir_tbls_postcompile = pathlib.Path(tmpdir).joinpath('sim_compose_tbls_postcompile') 135 | compile_dir_tbls_postcompile = pathlib.Path(tmpdir).joinpath('sim_compile_tbls_postcompile') 136 | os.mkdir(str(compose_dir_tbls_postcompile)) 137 | os.chdir(str(compose_dir_tbls_postcompile)) 138 | sim_tbls_postcompile.model.compile(compile_dir_tbls_postcompile) 139 | sim_tbls_postcompile.model.table_files = [dummy_user_tbl] 140 | sim_tbls_postcompile.compose() 141 | 142 | assert sim_tbls.model.table_files == sim_tbls_postcompile.model.table_files 143 | assert sim_tbls.model.table_files == [dummy_user_tbl] 144 | 145 | actual_files = list(compose_dir_tbls.rglob('./*')) 146 | domain_files = domain.domain_top_dir.rglob('*') 147 | expected_files = [ 148 | 'namelist.hrldas', 149 | 'hydro.namelist', 150 | 'job_test_job_1', 151 | '.uid', 152 | 'NWM', 153 | 'WrfHydroModel.pkl', 154 | 'FORCING', 155 | 'DUMMY_USER.TBL', 156 | 'wrf_hydro.exe' 157 | ] 158 | 159 | for file in domain_files: 160 | expected_files.append(file.name) 161 | 162 | for file in actual_files: 163 | assert file.name in expected_files 164 | 165 | 166 | def test_simulation_run_no_scheduler(model, domain, job, tmpdir, capfd): 167 | sim = Simulation() 168 | sim.add(model) 169 | sim.add(domain) 170 | sim.add(job) 171 | 172 | compose_dir = pathlib.Path(tmpdir).joinpath('sim_run_no_sched') 173 | os.mkdir(str(compose_dir)) 174 | os.chdir(str(compose_dir)) 175 | 176 | sim.compose() 177 | sim.run() 178 | assert sim.jobs[0].exit_status == 0, \ 179 | "The job did not exit successfully." 180 | 181 | 182 | def test_simulation_collect(sim_output): 183 | sim = Simulation() 184 | sim.collect(sim_dir=sim_output) 185 | assert sim.output is not None 186 | assert type(sim.output) is SimulationOutput 187 | 188 | 189 | def test_simulation_output_checknans(sim_output): 190 | output = SimulationOutput() 191 | output.collect_output(sim_dir=sim_output) 192 | public_atts = [att for att in dir(output) if not att.startswith('__')] 193 | for att in public_atts: 194 | assert getattr(output, att) is not None 195 | assert output.check_output_nans() is None 196 | 197 | 198 | def test_simulation_pickle(model, domain, job, tmpdir): 199 | sim = Simulation() 200 | sim.add(model) 201 | sim.add(domain) 202 | sim.add(job) 203 | pickle_path = pathlib.Path(tmpdir).joinpath('Sim.pkl') 204 | sim.pickle(pickle_path) 205 | sim0 = copy.deepcopy(sim) 206 | del sim 207 | sim = pickle.load(pickle_path.open(mode='rb')) 208 | 209 | sim_diff = deepdiff.DeepDiff(sim, sim0) 210 | unprocessed_diffs = sim_diff.pop('unprocessed', []) 211 | if unprocessed_diffs: 212 | check_unprocessed_diffs(unprocessed_diffs) 213 | assert sim_diff == {} 214 | 215 | 216 | def test_simulation_sub_obj_pickle(model, domain, job, tmpdir): 217 | sim = Simulation() 218 | sim.add(model) 219 | sim.add(domain) 220 | sim.add(job) 221 | 222 | os.chdir(tmpdir) 223 | domain_path = pathlib.Path(tmpdir).joinpath('WrfHydroDomain.pkl') 224 | model_path = pathlib.Path(tmpdir).joinpath('WrfHydroModel.pkl') 225 | sim.pickle_sub_objs() 226 | assert sim.domain.resolve() == domain_path 227 | assert sim.model.resolve() == model_path 228 | 229 | sim.restore_sub_objs() 230 | domain_diff = deepdiff.DeepDiff(sim.domain, domain) 231 | unprocessed_diffs = domain_diff.pop('unprocessed', []) 232 | if unprocessed_diffs: 233 | check_unprocessed_diffs(unprocessed_diffs) 234 | assert domain_diff == {} 235 | 236 | model_diff = deepdiff.DeepDiff(sim.model, model) 237 | unprocessed_diffs = model_diff.pop('unprocessed', []) 238 | if unprocessed_diffs: 239 | check_unprocessed_diffs(unprocessed_diffs) 240 | assert model_diff == {} 241 | -------------------------------------------------------------------------------- /wrfhydropy/tests/test_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pathlib 3 | import pytest 4 | 5 | from wrfhydropy.util.xrcmp import xrcmp 6 | from wrfhydropy.util.xrnan import xrnan 7 | 8 | test_dir = pathlib.Path(os.path.dirname(os.path.realpath(__file__))) 9 | collection_data_dir = test_dir / 'data/collection_data/simulation' 10 | nan_na_data_dir = test_dir / 'data/nan_na_data' 11 | 12 | 13 | @pytest.mark.parametrize( 14 | ['filename'], 15 | [ 16 | ('201108260100.CHANOBS_DOMAIN1',), 17 | ('201108260100.CHRTOUT_DOMAIN1',), 18 | ('201108260100.GWOUT_DOMAIN1',), 19 | ('201108260100.LAKEOUT_DOMAIN1',), 20 | ('201108260100.LDASOUT_DOMAIN1',), 21 | ('201108260100.LSMOUT_DOMAIN',), 22 | ('201108260100.RTOUT_DOMAIN1',), 23 | ('HYDRO_RST.2011-08-26_01:00_DOMAIN1',), 24 | ('nudgingLastObs.2011-08-26_01:00:00.nc',), 25 | ('RESTART.2011082601_DOMAIN1',), 26 | ], 27 | ids=[ 28 | 'xrcmp-equals-CHANOBS', 29 | 'xrcmp-equals-CHRTOUT', 30 | 'xrcmp-equals-GWOUT', 31 | 'xrcmp-equals-LAKEOUT', 32 | 'xrcmp-equals-LDASOUT', 33 | 'xrcmp-equals-LSMOUT', 34 | 'xrcmp-equals-RTOUT', 35 | 'xrcmp-equals-HYDRO_RST', 36 | 'xrcmp-equals-nudginglastobs', 37 | 'xrcmp-equals-RESTART', 38 | ] 39 | ) 40 | def test_xrcmp_eq(filename, tmpdir): 41 | file_path = test_dir.joinpath(collection_data_dir) 42 | the_file = file_path.joinpath(filename) 43 | log_file = pathlib.Path(tmpdir).joinpath('log.txt') 44 | result = xrcmp(the_file, the_file, log_file) 45 | assert result == 0 46 | 47 | 48 | @pytest.mark.parametrize( 49 | ['filename1', 'filename2'], 50 | [ 51 | ('201108260100.CHANOBS_DOMAIN1', '201108260200.CHANOBS_DOMAIN1'), 52 | ('201108260100.CHRTOUT_DOMAIN1', '201108260200.CHRTOUT_DOMAIN1'), 53 | ('201108260100.GWOUT_DOMAIN1', '201108260200.GWOUT_DOMAIN1'), 54 | ('201108260100.LAKEOUT_DOMAIN1', '201108260200.LAKEOUT_DOMAIN1'), 55 | # ('201108260100.LDASOUT_DOMAIN1', '201108260200.LDASOUT_DOMAIN1'), 56 | # ('201108260100.LSMOUT_DOMAIN', '201108260200.LSMOUT_DOMAIN'), 57 | # ('201108260100.RTOUT_DOMAIN1', '201108260200.RTOUT_DOMAIN1'), 58 | ('HYDRO_RST.2011-08-26_01:00_DOMAIN1', 'HYDRO_RST.2011-08-26_02:00_DOMAIN1'), 59 | # ('nudgingLastObs.2011-08-26_01:00:00.nc', 'nudgingLastObs.2011-08-26_02:00:00.nc'), 60 | # ('RESTART.2011082601_DOMAIN1', 'RESTART.2011082602_DOMAIN1'), 61 | ], 62 | ids=[ 63 | 'xrcmp-unequal-CHANOBS', 64 | 'xrcmp-unequal-CHRTOUT', 65 | 'xrcmp-unequal-GWOUT', 66 | 'xrcmp-unequal-LAKEOUT', 67 | # 'xrcmp-unequal-LDASOUT', 68 | # 'xrcmp-unequal-LSMOUT', 69 | # 'xrcmp-unequal-RTOUT', 70 | 'xrcmp-unequal-HYDRO_RST', 71 | # 'xrcmp-unequal-nudginglastobs', # identical data is the problem 72 | # 'xrcmp-unequal-RESTART', 73 | ] 74 | ) 75 | def test_xrcmp_uneq(filename1, filename2, tmpdir): 76 | file_path = test_dir.joinpath(collection_data_dir) 77 | the_file1 = file_path.joinpath(filename1) 78 | the_file2 = file_path.joinpath(filename2) 79 | log_file = pathlib.Path(tmpdir).joinpath('log.txt') 80 | result = xrcmp(the_file1, the_file2, log_file) 81 | assert result == 1 82 | 83 | 84 | @pytest.mark.parametrize( 85 | ['filename', 'expected'], 86 | [ 87 | ('201108260200.CHANOBS_DOMAIN1', None), 88 | ('201108260200.CHRTOUT_DOMAIN1', None), 89 | ('201108260200.GWOUT_DOMAIN1', None), 90 | ('201108260200.LAKEOUT_DOMAIN1', None), 91 | ('201108260200.LDASOUT_DOMAIN1', None), 92 | ('201108260200.LSMOUT_DOMAIN', None), 93 | ('201108260200.RTOUT_DOMAIN1', None), 94 | ('HYDRO_RST.2011-08-26_02:00_DOMAIN1', None), 95 | ('nudgingLastObs.2011-08-26_02:00:00.nc', None), 96 | ('RESTART.2011082602_DOMAIN1', None), 97 | ], 98 | ids=[ 99 | 'xrnan-CHANOBS', 100 | 'xrnan-CHRTOUT', 101 | 'xrnan-GWOUT', 102 | 'xrnan-LAKEOUT', 103 | 'xrnan-LDASOUT', 104 | 'xrnan-LSMOUT', 105 | 'xrnan-RTOUT', 106 | 'xrnan-HYDRO_RST', 107 | 'xrnan-nudginglastobs', 108 | 'xrnan-RESTART', 109 | ] 110 | ) 111 | def test_xrnan_none(filename, expected, tmpdir): 112 | # Perhaps this test is extraneous? 113 | # Right now only have real data on hand without NaNs. 114 | file_path = test_dir.joinpath(collection_data_dir) 115 | the_file = file_path.joinpath(filename) 116 | result = xrnan(the_file) 117 | assert result is expected 118 | 119 | 120 | @pytest.mark.parametrize( 121 | ['filename', 'expected'], 122 | [ 123 | ('fill_value.nc', 'None'), 124 | ('nan_fill.nc', "{'vars': ['some_var']}"), 125 | ('nan_value.nc', "{'vars': ['some_var']}"), 126 | ('value_value.nc', 'None'), 127 | ], 128 | ids=[ 129 | 'xrnan-fill_value', 130 | 'xrnan-nan_fill', 131 | 'xrnan-nan_value', 132 | 'xrnan-value_value', 133 | ] 134 | ) 135 | def test_xrnan_matrix(filename, expected, tmpdir): 136 | file_path = test_dir.joinpath(nan_na_data_dir) 137 | the_file = file_path.joinpath(filename) 138 | result = xrnan(the_file) 139 | assert repr(result) == expected 140 | -------------------------------------------------------------------------------- /wrfhydropy/util/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NCAR/wrf_hydro_py/003f73f7efb2d38114794ae14daac702e59edc24/wrfhydropy/util/__init__.py -------------------------------------------------------------------------------- /wrfhydropy/util/xrcmp.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # Example Usage 4 | # ipython --pdb xrcmp.py -- \ 5 | # --candidate conus_test/201806012300.RTOUT_DOMAIN1 \ 6 | # --reference conus_test/201806020000.RTOUT_DOMAIN1 \ 7 | # --n_cores 8 \ 8 | # --log_file log.txt 9 | 10 | import math 11 | from multiprocessing import Pool 12 | import pathlib 13 | import sys 14 | # import time 15 | import xarray as xr 16 | 17 | 18 | # A dictionary of chunks for various variables for CONUS testing 19 | # These are for the larger fields which need some control 20 | conus_chunks_dict = { 21 | # RTOUT variables to control 22 | 'SOIL_M': {}, # with {} maxes out at < 18% memory when files do NOT match 23 | # HYDRO_RST variables: None currently 24 | } 25 | 26 | 27 | # # A decorator/closure to check timings. 28 | # def stopwatch(the_func): 29 | # def the_closure(*args, **kw): 30 | # ts = time.time() 31 | # result = the_func(*args, **kw) 32 | # te = time.time() 33 | # print('Timing: ' + the_func.__name__ + ' took ', round(te - ts, 2),' seconds.') 34 | # return result 35 | # return the_closure 36 | 37 | 38 | def calc_stats(arg_tuple): 39 | key = arg_tuple[0] 40 | can_file = arg_tuple[1] 41 | ref_file = arg_tuple[2] 42 | chunks = arg_tuple[3] 43 | exclude_vars = arg_tuple[4] 44 | 45 | # ignore excluded vars 46 | if key in exclude_vars: 47 | return None 48 | 49 | if chunks is None: 50 | chunks = {} # default is no chunks 51 | if key in conus_chunks_dict: 52 | chunks = conus_chunks_dict[key] 53 | 54 | can_ds = xr.open_dataset(can_file, chunks=chunks, mask_and_scale=False) 55 | ref_ds = xr.open_dataset(ref_file, chunks=chunks, mask_and_scale=False) 56 | 57 | # Check for variables in reference and not in candidate? 58 | # Check for variables in candidate and not in reference? 59 | 60 | if can_ds[key].equals(ref_ds[key]): 61 | return None 62 | 63 | else: 64 | cc = can_ds[key] 65 | rr = ref_ds[key] 66 | 67 | if '|S' in str(cc.dtype): 68 | 69 | # Deal with strings 70 | nz_xr = cc.where(cc != rr, drop=True) 71 | if len(nz_xr) == 0: 72 | return None 73 | else: 74 | the_count = nz_xr.count().load().item(0) 75 | inf = float('inf') 76 | result = { 77 | 'Variable': key, 78 | 'Count': the_count, 79 | 'Sum': inf, 80 | 'Min': inf, 81 | 'Max': inf, 82 | 'Range': inf, 83 | 'Mean': inf, 84 | 'StdDev': inf 85 | } 86 | return result 87 | 88 | else: 89 | # All non-string types 90 | cc = cc.astype(float) 91 | rr = rr.astype(float) 92 | 93 | # THIS NEEDS REMOVED AFTER TESTING IS COMPLETE 94 | # FOR convenience of comparing two files at different times. 95 | # if 'time' in rr.coords: 96 | # rr['time'] = cc.time 97 | # if key == 'time': 98 | # rr.values = cc.values 99 | 100 | diff_da = cc - rr 101 | diff_xr = xr.DataArray(diff_da.compute()) 102 | # TODO: This threshold should be type dependent 103 | nz_xr = diff_xr.where(abs(diff_xr) > 0.000000, drop=True) 104 | if len(nz_xr) == 0: 105 | return None 106 | 107 | the_count = nz_xr.count().load().item(0) 108 | the_sum = nz_xr.sum().load().item(0) 109 | the_min = nz_xr.min().load().item(0) 110 | the_max = nz_xr.max().load().item(0) 111 | the_range = the_max - the_min 112 | the_mean = the_sum / the_count 113 | the_z = (nz_xr - the_mean) 114 | the_std = math.sqrt((the_z * the_z).sum() / the_count) 115 | del the_z 116 | 117 | result = { 118 | 'Variable': key, 119 | 'Count': the_count, 120 | 'Sum': the_sum, 121 | 'Min': the_min, 122 | 'Max': the_max, 123 | 'Range': the_range, 124 | 'Mean': the_mean, 125 | 'StdDev': the_std 126 | } 127 | return result 128 | 129 | 130 | # @stopwatch 131 | def xrcmp( 132 | can_file: str, 133 | ref_file: str, 134 | log_file: str, 135 | n_cores: int = 1, 136 | chunks={}, 137 | exclude_vars: list = [], 138 | ) -> int: 139 | 140 | if exclude_vars is None: 141 | exclude_vars = [] 142 | 143 | # Delete log file first 144 | # Should write a log file that says nothing yet determined? 145 | log_file = pathlib.Path(log_file) 146 | if log_file.exists(): 147 | log_file.unlink() 148 | 149 | # Dont chunk, this is just a meta-data read. 150 | can_ds = xr.open_dataset(can_file) 151 | ref_ds = xr.open_dataset(ref_file) 152 | 153 | # May need to check that they have the same vars. 154 | can_vars = set([kk for kk in can_ds.variables.keys()]) 155 | ref_vars = set([kk for kk in ref_ds.variables.keys()]) 156 | have_same_variables = can_vars.difference(ref_vars) == set([]) 157 | can_ds.close() # These are likely critical to the success 158 | ref_ds.close() # of multiprocessing 159 | 160 | # TODO: Check that the meta data matches 161 | 162 | # This is quick if not true 163 | # ds_equal = can_ds.equals(re_ds) 164 | # if not ds_equal: 165 | 166 | if n_cores == 1: 167 | all_stats_list = [] 168 | for key, val in can_ds.items(): 169 | result = calc_stats( 170 | (key, can_file, ref_file, chunks, exclude_vars)) 171 | all_stats_list.append(result) 172 | else: 173 | the_args = [ 174 | (key, can_file, ref_file, chunks, exclude_vars) for key in can_ds.keys()] 175 | with Pool(n_cores) as pool: 176 | all_stats_list = pool.map(calc_stats, the_args) 177 | 178 | all_stats = {item['Variable']: item for item in all_stats_list if item is not None} 179 | 180 | diff_var_names = sorted(all_stats.keys()) 181 | if not diff_var_names: 182 | with open(log_file, 'w') as opened_file: 183 | opened_file.write("Files are identical\n") 184 | return 0 185 | 186 | # Formatting: 187 | 188 | # The goal is to print something like this which is what nccmp outputs. 189 | # channel_rt 190 | # Variable Group Count Sum ... Max Range Mean StdDev 191 | # 0 streamflow / 162 0.003022 ... 0.003832 0.004315 0.000019 0.000361 192 | # 1 nudge / 4 -0.001094 ... 0.000093 0.001272 -0.000274 0.000605 193 | # 2 q_lateral / 170 0.000345 ... 0.000700 0.001145 0.000002 0.000086 194 | # 3 velocity / 165 0.010788 ... 0.005488 0.006231 0.000065 0.000503 195 | # 4 Head / 177 0.002717 ... 0.002662 0.003292 0.000015 0.000258 196 | 197 | stat_names = sorted(all_stats[diff_var_names[0]].keys()) 198 | stat_lens = {} # the length/width of each column/stat 199 | n_dec = 3 # number of decimals for floats 200 | n_dec_p = n_dec + 1 # plus the decimal point 201 | 202 | # The format for each type, where full_len sepcifices the width of the field. 203 | type_fmt = { 204 | str: '{{:{full_len}}}', 205 | int: '{{:{full_len}}}', 206 | float: '{{:{full_len}.' + str(n_dec) + 'f}}' 207 | } 208 | 209 | # Now solve the full_len field widths for all stats. Do this by 210 | # just formatting each as it's type and finding the max (best way 211 | # to handle negatives). For floats, take the integer part to find 212 | # its length to the left of the decimal. 213 | for stat_name in stat_names: 214 | all_lens = [] 215 | for key, val in all_stats.items(): 216 | the_val = val[stat_name] 217 | the_type = type(the_val) 218 | the_fmt0 = type_fmt[the_type] 219 | if the_type is str: 220 | full_len = len(the_val) 221 | elif not math.isfinite(the_val): 222 | full_len = len(str(the_val)) 223 | else: 224 | full_len = len(str(int(the_val))) 225 | if the_type is float: 226 | full_len = full_len + n_dec_p 227 | the_fmt = the_fmt0.format(**{'full_len': full_len}) 228 | the_string = the_fmt.format(*[the_val]) 229 | all_lens.append(len(the_string)) 230 | 231 | stat_lens[stat_name] = max(all_lens) 232 | 233 | header_string = ( 234 | '{Variable:>' + str(stat_lens['Variable']) + '} ' 235 | '{Count:>' + str(stat_lens['Count']) + '} ' 236 | '{Sum:>' + str(stat_lens['Sum']) + '} ' 237 | '{Min:>' + str(stat_lens['Min']) + '} ' 238 | '{Max:>' + str(stat_lens['Max']) + '} ' 239 | '{Range:>' + str(stat_lens['Range']) + '} ' 240 | '{Mean:>' + str(stat_lens['Mean']) + '} ' 241 | '{StdDev:>' + str(stat_lens['StdDev']) + '} \n' 242 | ) 243 | 244 | var_string = ( 245 | '{Variable:>' + str(stat_lens['Variable']) + '} ' 246 | '{Count:>' + str(stat_lens['Count']) + '} ' 247 | '{Sum:>' + str(stat_lens['Sum']) + '.' + str(n_dec) + 'f} ' 248 | '{Min:>' + str(stat_lens['Min']) + '.' + str(n_dec) + 'f} ' 249 | '{Max:>' + str(stat_lens['Max']) + '.' + str(n_dec) + 'f} ' 250 | '{Range:>' + str(stat_lens['Range']) + '.' + str(n_dec) + 'f} ' 251 | '{Mean:>' + str(stat_lens['Mean']) + '.' + str(n_dec) + 'f} ' 252 | '{StdDev:>' + str(stat_lens['StdDev']) + '.' + str(n_dec) + 'f} \n' 253 | ) 254 | 255 | header_dict = {name: name for name in stat_names} 256 | the_header = header_string.format(**header_dict) 257 | 258 | with open(log_file, 'w') as opened_file: 259 | opened_file.write(the_header) 260 | for key in all_stats.keys(): 261 | opened_file.write(var_string.format(**all_stats[key])) 262 | 263 | return 1 264 | 265 | 266 | def parse_arguments(): 267 | 268 | import argparse 269 | parser = argparse.ArgumentParser() 270 | parser.add_argument( 271 | "--candidate", metavar="FILE", type=str, required=True, 272 | help="Candidate file to compare." 273 | ) 274 | parser.add_argument( 275 | "--reference", metavar="FILE", type=str, required=True, 276 | help="Reference file to compare." 277 | ) 278 | parser.add_argument( 279 | "--log_file", metavar="FILE", type=str, required=True, 280 | help="File to log potential differences to. " 281 | "Existing file is clobbered." 282 | ) 283 | parser.add_argument( 284 | "--n_cores", metavar="n_cores", type=int, required=False, 285 | default=1, 286 | help="The number of processors to use." 287 | ) 288 | parser.add_argument( 289 | "--chunks", metavar="chunks", type=int, required=False, 290 | default=1, 291 | help="Chunks as integer." 292 | ) 293 | args = parser.parse_args() 294 | can_file = args.candidate 295 | ref_file = args.reference 296 | log_file = args.log_file 297 | chunks = args.chunks 298 | n_cores = args.n_cores 299 | 300 | if chunks == 1: 301 | chunks = {} # No chunking 302 | elif chunks == 0: 303 | chunks = None # This will use the conus_chunks_dict 304 | 305 | return can_file, ref_file, log_file, chunks, n_cores 306 | 307 | 308 | if __name__ == "__main__": 309 | 310 | can_file, ref_file, log_file, chunks, n_cores = parse_arguments() 311 | ret = xrcmp( 312 | can_file=can_file, 313 | ref_file=ref_file, 314 | log_file=log_file, 315 | n_cores=n_cores, 316 | chunks=chunks 317 | ) 318 | sys.exit(ret) 319 | -------------------------------------------------------------------------------- /wrfhydropy/util/xrnan.py: -------------------------------------------------------------------------------- 1 | from multiprocessing import Pool 2 | import pathlib 3 | import sys 4 | from typing import Union 5 | import xarray as xr 6 | 7 | 8 | def check_nans(arg_dict): 9 | var_name = arg_dict['var_name'] 10 | if 'path' in arg_dict.keys(): 11 | path = arg_dict['path'] 12 | ds = xr.open_dataset(path, mask_and_scale=False) 13 | else: 14 | ds = arg_dict['ds'] 15 | if ds[var_name].isnull().any().values: 16 | return var_name 17 | else: 18 | return None 19 | 20 | 21 | def xrnan( 22 | dataset_or_path: Union[str, pathlib.Path, xr.Dataset], 23 | log_file: str = None, 24 | exclude_vars: list = [], 25 | chunks=None, 26 | n_cores: int = 1 27 | ) -> int: 28 | # Set filepath to strings 29 | if not isinstance(dataset_or_path, xr.Dataset): 30 | ds = xr.open_dataset(str(dataset_or_path), mask_and_scale=False, chunks=chunks) 31 | else: 32 | ds = dataset_or_path 33 | 34 | # Looping on variables is much faster for small applications and parallelizable 35 | # for larger ones. 36 | if n_cores < 2 or isinstance(dataset_or_path, xr.Dataset): 37 | nan_vars = [] 38 | for var_name in ds.variables: 39 | nan_vars.append(check_nans({'var_name': var_name, 'ds': ds})) 40 | ds.close() 41 | else: 42 | # The following ds.close() is CRITICAL to the correct results being returned by 43 | # multiprocessing 44 | ds.close() 45 | the_args = [{'var_name': var_name, 'path': dataset_or_path} 46 | for var_name in ds.variables] 47 | with Pool(n_cores) as pool: 48 | nan_vars = pool.map(check_nans, the_args) 49 | 50 | nan_vars_2 = [var for var in nan_vars if var is not None] 51 | 52 | if len(nan_vars_2) == 0: 53 | return None 54 | else: 55 | for var in nan_vars_2: 56 | print(str(dataset_or_path), ': variable "' + var + '' 57 | '' + '" contains NaNs') 58 | return {'vars': nan_vars_2} 59 | 60 | 61 | def parse_arguments(): 62 | 63 | import argparse 64 | parser = argparse.ArgumentParser() 65 | parser.add_argument( 66 | "--path", metavar="FILE", type=str, required=True, 67 | help="File to check for NaNs." 68 | ) 69 | parser.add_argument( 70 | "--log_file", metavar="FILE", type=str, required=True, 71 | help="File to log potential differences to. " 72 | "Existing file is clobbered." 73 | ) 74 | args = parser.parse_args() 75 | path = args.path 76 | log_file = args.log_file 77 | return path, log_file 78 | 79 | 80 | if __name__ == "__main__": 81 | 82 | path, log_file = parse_arguments() 83 | ret = xrnan(path, log_file=log_file) 84 | if ret is None: 85 | exit_code = 0 86 | else: 87 | exit_code = 1 88 | sys.exit(exit_code) 89 | --------------------------------------------------------------------------------