├── src
    └── pygeohydro
    │   ├── py.typed
    │   ├── us_abbrs.py
    │   ├── __init__.py
    │   ├── levee.py
    │   ├── print_versions.py
    │   ├── watershed.py
    │   ├── exceptions.py
    │   ├── plot.py
    │   ├── nfhl.py
    │   ├── helpers.py
    │   ├── waterdata.py
    │   ├── nlcd.py
    │   └── nid.py
├── .github
    ├── FUNDING.yml
    ├── PULL_REQUEST_TEMPLATE.md
    ├── workflows
    │   ├── pre-commit.yml
    │   ├── test.yml
    │   └── release.yml
    └── ISSUE_TEMPLATE
    │   ├── config.yml
    │   ├── newfeature.yml
    │   └── bugreport.yml
├── .sonarcloud.properties
├── MANIFEST.in
├── .gitattributes
├── .git_archival.txt
├── .pep8speaks.yml
├── AUTHORS.rst
├── .deepsource.toml
├── .codecov.yml
├── conftest.py
├── LICENSE
├── CITATION.cff
├── .gitignore
├── ci
    └── requirements
    │   ├── environment.yml
    │   └── environment-dev.yml
├── .pre-commit-config.yaml
├── tests
    ├── test_exceptions.py
    ├── test_pygeohydro.py
    └── test_stn.py
├── CODE_OF_CONDUCT.rst
├── CONTRIBUTING.rst
├── noxfile.py
├── pyproject.toml
└── README.rst


/src/pygeohydro/py.typed:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | github: [cheginit]
2 | 


--------------------------------------------------------------------------------
/.sonarcloud.properties:
--------------------------------------------------------------------------------
1 | sonar.python.version=3
2 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include LICENSE
2 | include README.rst
3 | 
4 | global-exclude .DS_Store
5 | global-exclude *.py[cod]
6 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | # reduce the number of merge conflicts
2 | HISTORY.rst merge=union
3 | # allow installing from git archives
4 | .git_archival.txt  export-subst
5 | 


--------------------------------------------------------------------------------
/.git_archival.txt:
--------------------------------------------------------------------------------
1 | node: 9e08f3f00734f40f508b8ff2450c711cbab4ae50
2 | node-date: 2025-08-12T08:21:11-04:00
3 | describe-name: v0.19.4-6-g9e08f3f0
4 | ref-names: HEAD -> main
5 | 


--------------------------------------------------------------------------------
/.pep8speaks.yml:
--------------------------------------------------------------------------------
1 | # https://github.com/OrkoHunter/pep8speaks for more info
2 | # pep8speaks will use the flake8 configs in `setup.cfg`
3 | 
4 | scanner:
5 |   diff_only: false
6 |   linter: flake8
7 | 


--------------------------------------------------------------------------------
/AUTHORS.rst:
--------------------------------------------------------------------------------
 1 | =======
 2 | Credits
 3 | =======
 4 | 
 5 | Development Lead
 6 | ----------------
 7 | 
 8 | `Taher Cheginil <https://github.com/cheginit>`__
 9 | 
10 | Contributors
11 | ------------
12 | 
13 | `Fernando Aristizabal <https://github.com/fernando-aristizabal>`__
14 | 


--------------------------------------------------------------------------------
/.deepsource.toml:
--------------------------------------------------------------------------------
 1 | exclude_patterns = [
 2 |   "tests/**",
 3 |   "*/print_versions.py",
 4 |   "*/__init__.py"
 5 | ]
 6 | version = 1
 7 | 
 8 | [[analyzers]]
 9 | enabled = true
10 | name = "python"
11 | 
12 | [analyzers.meta]
13 | runtime_version = "3.x.x"
14 | max_line_length = 100
15 | 


--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
1 | <!-- Feel free to remove check-list items aren't relevant to your changes -->
2 | 
3 |  - [ ] Closes #xxxx
4 |  - [ ] Tests added and `nox` passes.
5 |  - [ ] Passes `pre-commit run --all-files`
6 |  - [ ] Changes and the contributor name are documented in `HISTORY.rst`.
7 | 


--------------------------------------------------------------------------------
/.codecov.yml:
--------------------------------------------------------------------------------
 1 | codecov:
 2 |   branch: main
 3 | 
 4 | coverage:
 5 |   status:
 6 |     project:
 7 |       default:
 8 |         informational: true
 9 |     patch:
10 |       default:
11 |         informational: true
12 | 
13 | comment: false
14 | ignore:
15 | - '**/__init__.py'
16 | - '**/print_versions.py'
17 | 


--------------------------------------------------------------------------------
/conftest.py:
--------------------------------------------------------------------------------
 1 | """Configuration for pytest."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | import pytest
 6 | 
 7 | 
 8 | @pytest.fixture(autouse=True)
 9 | def _add_standard_imports(doctest_namespace):
10 |     """Add pygeohydro namespace for doctest."""
11 |     import pygeohydro as gh
12 | 
13 |     doctest_namespace["gh"] = gh
14 | 


--------------------------------------------------------------------------------
/.github/workflows/pre-commit.yml:
--------------------------------------------------------------------------------
 1 | name: Linting
 2 | 
 3 | on:
 4 |   pull_request:
 5 |   workflow_dispatch: # allows you to trigger manually
 6 | 
 7 | concurrency:
 8 |   group: ${{ github.workflow }}-${{ github.ref }}
 9 |   cancel-in-progress: true
10 | 
11 | jobs:
12 |   pre-commit:
13 |     runs-on: ubuntu-latest
14 |     steps:
15 |     - uses: actions/checkout@v5
16 |     - uses: excitedleigh/setup-nox@v2.1.0
17 |     - run: nox -s pre-commit
18 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/config.yml:
--------------------------------------------------------------------------------
1 | blank_issues_enabled: false
2 | contact_links:
3 | - name: 💡 Ideas & Discussion
4 |   url: https://github.com/hyriver/hyriver.github.io/discussions/categories/ideas
5 |   about: Do you have an idea how to improve HyRiver? Feel free to post it to the discussion forum that allows voting for other users.
6 | - name: ⁉️ Help & Support
7 |   url: https://github.com/hyriver/hyriver.github.io/discussions/categories/q-a
8 |   about: Need help with installation or usage of HyRiver? Please use the discussion forum.
9 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | =======
 2 | License
 3 | =======
 4 | 
 5 | MIT License
 6 | 
 7 | Copyright (c) 2020, Taher Chegini
 8 | 
 9 | Permission is hereby granted, free of charge, to any person obtaining a copy
10 | of this software and associated documentation files (the "Software"), to deal
11 | in the Software without restriction, including without limitation the rights
12 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13 | copies of the Software, and to permit persons to whom the Software is
14 | furnished to do so, subject to the following conditions:
15 | 
16 | The above copyright notice and this permission notice shall be included in all
17 | copies or substantial portions of the Software.
18 | 
19 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25 | SOFTWARE.
26 | 


--------------------------------------------------------------------------------
/CITATION.cff:
--------------------------------------------------------------------------------
 1 | cff-version: 1.2.0
 2 | message: "If you use this software, please cite it as below."
 3 | authors:
 4 | - family-names: "Chegini"
 5 |   given-names: "Taher"
 6 |   orcid: "https://orcid.org/0000-0002-5430-6000"
 7 | - family-names: "Li"
 8 |   given-names: "Hong-Yi"
 9 |   orcid: "https://orcid.org/0000-0002-9807-3851"
10 | - family-names: "Leung"
11 |   given-names: "L. Ruby"
12 |   orcid: "https://orcid.org/0000-0002-3221-9467"
13 | title: "HyRiver: Hydroclimate Data Retriever"
14 | version: 0.11
15 | doi: 10.21105/joss.03175
16 | date-released: 2021-10-27
17 | url: "https://github.com/cheginit/HyRiver"
18 | preferred-citation:
19 |   type: article
20 |   authors:
21 |   - family-names: "Chegini"
22 |     given-names: "Taher"
23 |     orcid: "https://orcid.org/0000-0002-5430-6000"
24 |   - family-names: "Li"
25 |     given-names: "Hong-Yi"
26 |     orcid: "https://orcid.org/0000-0002-9807-3851"
27 |   - family-names: "Leung"
28 |     given-names: "L. Ruby"
29 |     orcid: "https://orcid.org/0000-0002-3221-9467"
30 |   doi: "10.21105/joss.03175"
31 |   journal: "Journal of Open Source Software"
32 |   month: 10
33 |   start: 1
34 |   end: 3
35 |   title: "HyRiver: Hydroclimate Data Retriever"
36 |   issue: 66
37 |   volume: 6
38 |   year: 2021
39 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/newfeature.yml:
--------------------------------------------------------------------------------
 1 | name: 💡 Feature Request
 2 | description: Suggest an idea for HyRiver
 3 | labels: [enhancement]
 4 | body:
 5 | - type: textarea
 6 |   id: description
 7 |   attributes:
 8 |     label: Is your feature request related to a problem?
 9 |     description: |
10 |       Please do a quick search of existing issues to make sure that this has not been asked before.
11 |       Please provide a clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 |   validations:
13 |     required: true
14 | - type: textarea
15 |   id: solution
16 |   attributes:
17 |     label: Describe the solution you'd like
18 |     description: |
19 |       A clear and concise description of what you want to happen.
20 | - type: textarea
21 |   id: alternatives
22 |   attributes:
23 |     label: Describe alternatives you've considered
24 |     description: |
25 |       A clear and concise description of any alternative solutions or features you've considered.
26 |   validations:
27 |     required: false
28 | - type: textarea
29 |   id: additional-context
30 |   attributes:
31 |     label: Additional context
32 |     description: |
33 |       Add any other context about the feature request here.
34 |   validations:
35 |     required: false
36 | 


--------------------------------------------------------------------------------
/src/pygeohydro/us_abbrs.py:
--------------------------------------------------------------------------------
  1 | """US states and territories Abbreviations from ``us`` package."""
  2 | 
  3 | from __future__ import annotations
  4 | 
  5 | CONTIGUOUS = [
  6 |     "AL",
  7 |     "AZ",
  8 |     "AR",
  9 |     "CA",
 10 |     "CO",
 11 |     "CT",
 12 |     "DE",
 13 |     "FL",
 14 |     "GA",
 15 |     "ID",
 16 |     "IL",
 17 |     "IN",
 18 |     "IA",
 19 |     "KS",
 20 |     "KY",
 21 |     "LA",
 22 |     "ME",
 23 |     "MD",
 24 |     "MA",
 25 |     "MI",
 26 |     "MN",
 27 |     "MS",
 28 |     "MO",
 29 |     "MT",
 30 |     "NE",
 31 |     "NV",
 32 |     "NH",
 33 |     "NJ",
 34 |     "NM",
 35 |     "NY",
 36 |     "NC",
 37 |     "ND",
 38 |     "OH",
 39 |     "OK",
 40 |     "OR",
 41 |     "PA",
 42 |     "RI",
 43 |     "SC",
 44 |     "SD",
 45 |     "TN",
 46 |     "TX",
 47 |     "UT",
 48 |     "VT",
 49 |     "VA",
 50 |     "WA",
 51 |     "WV",
 52 |     "WI",
 53 |     "WY",
 54 | ]
 55 | 
 56 | CONTINENTAL = [
 57 |     "AL",
 58 |     "AK",
 59 |     "AZ",
 60 |     "AR",
 61 |     "CA",
 62 |     "CO",
 63 |     "CT",
 64 |     "DE",
 65 |     "FL",
 66 |     "GA",
 67 |     "ID",
 68 |     "IL",
 69 |     "IN",
 70 |     "IA",
 71 |     "KS",
 72 |     "KY",
 73 |     "LA",
 74 |     "ME",
 75 |     "MD",
 76 |     "MA",
 77 |     "MI",
 78 |     "MN",
 79 |     "MS",
 80 |     "MO",
 81 |     "MT",
 82 |     "NE",
 83 |     "NV",
 84 |     "NH",
 85 |     "NJ",
 86 |     "NM",
 87 |     "NY",
 88 |     "NC",
 89 |     "ND",
 90 |     "OH",
 91 |     "OK",
 92 |     "OR",
 93 |     "PA",
 94 |     "RI",
 95 |     "SC",
 96 |     "SD",
 97 |     "TN",
 98 |     "TX",
 99 |     "UT",
100 |     "VT",
101 |     "VA",
102 |     "WA",
103 |     "WV",
104 |     "WI",
105 |     "WY",
106 | ]
107 | TERRITORIES = ["AS", "GU", "MP", "PR", "VI"]
108 | COMMONWEALTHS = ["KY", "MA", "PA", "VA"]
109 | STATES = list(set(CONTINENTAL + TERRITORIES + COMMONWEALTHS))
110 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | env/
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | .pytest_cache/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | 
 58 | # Flask stuff:
 59 | instance/
 60 | .webassets-cache
 61 | 
 62 | # Scrapy stuff:
 63 | .scrapy
 64 | 
 65 | # Sphinx documentation
 66 | docs/_build/
 67 | 
 68 | # PyBuilder
 69 | target/
 70 | 
 71 | # Jupyter Notebook
 72 | .ipynb_checkpoints
 73 | 
 74 | # pyenv
 75 | .python-version
 76 | 
 77 | # celery beat schedule file
 78 | celerybeat-schedule
 79 | 
 80 | # SageMath parsed files
 81 | *.sage.py
 82 | 
 83 | # dotenv
 84 | .env
 85 | 
 86 | # virtualenv
 87 | .venv
 88 | venv/
 89 | ENV/
 90 | 
 91 | # Spyder project settings
 92 | .spyderproject
 93 | .spyproject
 94 | 
 95 | # Rope project settings
 96 | .ropeproject
 97 | 
 98 | # mkdocs documentation
 99 | /site
100 | 
101 | # mypy
102 | .mypy_cache/
103 | 
104 | # IDE settings
105 | .vscode/
106 | 
107 | # logs
108 | .nvimlog
109 | data/
110 | tags*
111 | cache
112 | junit.xml
113 | .DS_Store
114 | .pixi
115 | pixi.lock
116 | .nox
117 | ehydro_*
118 | 


--------------------------------------------------------------------------------
/ci/requirements/environment.yml:
--------------------------------------------------------------------------------
 1 | name: pygeohydro-tests
 2 | channels:
 3 | - conda-forge
 4 | - nodefaults
 5 | dependencies:
 6 |   # async-retriever deps
 7 | - aiodns
 8 | - aiosqlite
 9 | - aiohttp >=3.8.3
10 | - brotli
11 | - cytoolz
12 | - nest-asyncio
13 | - aiohttp-client-cache >=0.8.1
14 | - ujson
15 | - cytoolz
16 | 
17 |   # pygeoogc deps
18 |   # - async-retriever >=0.3.5
19 | - cytoolz
20 | - defusedxml
21 | - owslib >=0.27.2
22 | - pyproj >=2.2
23 | - requests
24 | - requests-cache >=0.9.6
25 | - shapely >=2.0
26 | - urllib3
27 | 
28 |   # pygeoutils deps
29 | - cytoolz
30 | - geopandas-base >=0.7
31 | - netcdf4
32 | - numpy >=1.17
33 | - pyproj >=2.2
34 | - rasterio >=1.2
35 | - rioxarray >=0.11
36 | - scipy
37 | - shapely >=2.0
38 | - ujson
39 | - xarray >=2023.01.0
40 | 
41 |   # hydrosignatures deps
42 | - numpy
43 | - pandas
44 | - scipy
45 |   # optional deps
46 | - numba>=0.57
47 | 
48 |   # pynhd deps
49 |   # - async-retriever >=0.3.6
50 | - cytoolz
51 | - geopandas-base >=0.9
52 | - networkx
53 | - numpy >=1.17
54 | - pandas >=1.0
55 | - pyarrow >=1.0.1
56 |   # - pygeoogc >=0.13.7
57 |   # - pygeoutils >=0.13.7
58 | - shapely >=2.0
59 | 
60 |   # pygeohydro deps
61 | - cytoolz
62 | - defusedxml
63 | - folium
64 | - geopandas-base >=0.7
65 | - h5netcdf
66 |   # - hydrosignatures >=0.1.1
67 | - lxml
68 | - matplotlib-base >=3.3
69 | - numpy >=1.17
70 | - pandas >=1.0
71 |   # - pygeoogc >=0.13.7
72 |   # - pygeoutils >=0.13.9
73 |   # - pynhd >=0.13.7
74 | - rasterio >=1.2
75 | - rioxarray >=0.11.0
76 | - scipy
77 | - shapely >=2.0
78 | - xarray >=2023.01.0
79 |   # optional deps
80 | - planetary-computer
81 | - pystac-client
82 | 
83 |   # optional deps for speeding up some operations
84 | - bottleneck
85 | 
86 |   # test deps
87 | - psutil
88 | - pytest-cov
89 | - pytest-xdist
90 | 
91 | - pip
92 | - pip:
93 |   - git+https://github.com/hyriver/async-retriever.git
94 |   - git+https://github.com/hyriver/hydrosignatures.git
95 |   - git+https://github.com/hyriver/pygeoogc.git
96 |   - git+https://github.com/hyriver/pygeoutils.git
97 |   - git+https://github.com/hyriver/pynhd.git
98 | 


--------------------------------------------------------------------------------
/.github/workflows/test.yml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |     - '**'
 7 |     tags-ignore:
 8 |     - '**'
 9 |   pull_request:
10 |     branches:
11 |     - '**'
12 |   workflow_dispatch:
13 | 
14 | concurrency:
15 |   group: ${{ github.workflow }}-${{ github.ref }}
16 |   cancel-in-progress: true
17 | 
18 | jobs:
19 |   test:
20 |     name: python ${{ matrix.python-version }}, ${{ matrix.os }}
21 |     runs-on: ${{ matrix.os }}
22 |     env:
23 |       REPO_NAME: ${{ github.event.repository.name }}
24 |     defaults:
25 |       run:
26 |         shell: bash -l {0}
27 |     strategy:
28 |       matrix:
29 |         python-version: [3.9, '3.12']
30 |         os: [ubuntu-latest, macos-latest, windows-latest]
31 | 
32 |     steps:
33 |     - uses: actions/checkout@v5
34 |     - name: Set environment variables
35 |       run: |-
36 |         echo "PYTHON_VERSION=${{ matrix.python-version }}" >> $GITHUB_ENV
37 |     - name: Setup micromamba
38 |       uses: mamba-org/setup-micromamba@v2
39 |       env:
40 |         DATE: ${{ steps.date.outputs.time }}
41 |       with:
42 |         environment-file: ci/requirements/environment.yml
43 |         environment-name: ${{ env.REPO_NAME }}-tests
44 |         create-args: >-
45 |           python=${{ matrix.python-version }}
46 |         post-cleanup: all
47 |         cache-environment: true
48 |         cache-environment-key: ${{runner.os}}-${{runner.arch}}-py${{matrix.python-version}}-${{env.DATE}}
49 |     - name: Install error reporter
50 |       if: matrix.os == 'ubuntu-latest' && matrix.python-version == '3.12'
51 |       run: |
52 |         python -m pip install pytest-github-actions-annotate-failures
53 |     - name: Install the package
54 |       run: |-
55 |         python -m pip install --no-deps .
56 |     - name: Run pytest
57 |       run: |-
58 |         pytest --cov --cov-append --cov-branch --cov-report=xml --junitxml=junit.xml
59 |     - name: Upload coverage reports to Codecov
60 |       uses: codecov/codecov-action@v5
61 |       with:
62 |         token: ${{ secrets.CODECOV_TOKEN }}
63 |         slug: ${{ github.event.repository.owner.login }}/${{ github.event.repository.name }}
64 | 


--------------------------------------------------------------------------------
/src/pygeohydro/__init__.py:
--------------------------------------------------------------------------------
 1 | """Top-level package for PyGeoHydro."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from importlib.metadata import PackageNotFoundError, version
 6 | 
 7 | # for now export the plotting functions from hydrosignatures
 8 | # since they have been moved from pygeohydro.plot to hydrosignatures
 9 | from hydrosignatures import plot
10 | from pygeohydro import exceptions, helpers
11 | from pygeohydro.helpers import get_us_states
12 | from pygeohydro.levee import NLD
13 | from pygeohydro.nfhl import NFHL
14 | from pygeohydro.nid import NID
15 | from pygeohydro.nlcd import (
16 |     cover_statistics,
17 |     nlcd_area_percent,
18 |     nlcd_bycoords,
19 |     nlcd_bygeom,
20 |     overland_roughness,
21 | )
22 | from pygeohydro.nwis import NWIS, streamflow_fillna
23 | from pygeohydro.plot import cover_legends, descriptor_legends, interactive_map
24 | from pygeohydro.print_versions import show_versions
25 | from pygeohydro.pygeohydro import (
26 |     EHydro,
27 |     get_camels,
28 |     soil_gnatsgo,
29 |     soil_polaris,
30 |     soil_properties,
31 |     soil_soilgrids,
32 |     ssebopeta_bycoords,
33 |     ssebopeta_bygeom,
34 | )
35 | from pygeohydro.stnfloodevents import STNFloodEventData, stn_flood_event
36 | from pygeohydro.waterdata import SensorThings, WaterQuality
37 | from pygeohydro.watershed import WBD, huc_wb_full, irrigation_withdrawals
38 | 
39 | try:
40 |     __version__ = version("pygeohydro")
41 | except PackageNotFoundError:
42 |     __version__ = "999"
43 | 
44 | __all__ = [
45 |     "NFHL",
46 |     "NID",
47 |     "NLD",
48 |     "NWIS",
49 |     "WBD",
50 |     "EHydro",
51 |     "STNFloodEventData",
52 |     "SensorThings",
53 |     "WaterQuality",
54 |     "__version__",
55 |     "cover_legends",
56 |     "cover_statistics",
57 |     "descriptor_legends",
58 |     "exceptions",
59 |     "get_camels",
60 |     "get_us_states",
61 |     "helpers",
62 |     "huc_wb_full",
63 |     "interactive_map",
64 |     "irrigation_withdrawals",
65 |     "nlcd_area_percent",
66 |     "nlcd_bycoords",
67 |     "nlcd_bygeom",
68 |     "overland_roughness",
69 |     "plot",
70 |     "show_versions",
71 |     "soil_gnatsgo",
72 |     "soil_polaris",
73 |     "soil_properties",
74 |     "soil_soilgrids",
75 |     "ssebopeta_bycoords",
76 |     "ssebopeta_bygeom",
77 |     "stn_flood_event",
78 |     "streamflow_fillna",
79 | ]
80 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 | - repo: https://github.com/pre-commit/pre-commit-hooks
 3 |   rev: v6.0.0
 4 |   hooks:
 5 |   - id: check-added-large-files
 6 |     args: [--maxkb=50000]
 7 |   - id: mixed-line-ending
 8 |     args: [--fix=lf]
 9 |   - id: check-ast
10 |   - id: check-builtin-literals
11 |   - id: check-case-conflict
12 |   - id: check-docstring-first
13 |   - id: check-shebang-scripts-are-executable
14 |   - id: check-merge-conflict
15 |   - id: check-json
16 |   - id: check-toml
17 |   - id: check-xml
18 |   - id: check-yaml
19 |   - id: debug-statements
20 |   - id: destroyed-symlinks
21 |   - id: detect-private-key
22 |   - id: end-of-file-fixer
23 |     exclude: ^LICENSE|\.(html|csv|txt|svg|py)$
24 |   - id: pretty-format-json
25 |     args: [--autofix, --no-ensure-ascii, --no-sort-keys]
26 |   - id: trailing-whitespace
27 |     args: [--markdown-linebreak-ext=md]
28 |     exclude: \.(html|svg)$
29 | 
30 | - repo: https://github.com/bwhmather/ssort
31 |   rev: 0.15.0
32 |   hooks:
33 |   - id: ssort
34 |     name: Sort top level statements with ssort
35 | 
36 | - repo: https://github.com/astral-sh/ruff-pre-commit
37 |   rev: v0.12.8
38 |   hooks:
39 |   - id: ruff
40 |     name: Linting with Ruff
41 |     types_or: [python, jupyter]
42 |     args: [--fix]
43 |   - id: ruff-format
44 |     name: Formatting with Ruff
45 |     types_or: [python, jupyter]
46 | 
47 | - repo: https://github.com/PyCQA/doc8
48 |   rev: v2.0.0
49 |   hooks:
50 |   - id: doc8
51 |     name: Check documentation formats with doc8
52 |     args: [--max-line-length, '100']
53 | 
54 | - repo: https://github.com/codespell-project/codespell
55 |   rev: v2.4.1
56 |   hooks:
57 |   - id: codespell
58 |     name: Check common misspellings in text files with codespell.
59 |     additional_dependencies:
60 |     - tomli
61 | 
62 | - repo: https://github.com/tox-dev/pyproject-fmt
63 |   rev: v2.6.0
64 |   hooks:
65 |   - id: pyproject-fmt
66 |     name: Apply a consistent format to pyproject.toml
67 | 
68 | - repo: https://github.com/macisamuele/language-formatters-pre-commit-hooks
69 |   rev: v2.15.0
70 |   hooks:
71 |   - id: pretty-format-yaml
72 |     args: [--autofix, --indent, '2']
73 | 
74 | - repo: https://github.com/rhysd/actionlint
75 |   rev: v1.7.7
76 |   hooks:
77 |   - id: actionlint
78 |     files: .github/workflows/
79 |     args: [-ignore, SC1090, -ignore, SC2046, -ignore, SC2086, -ignore, SC2129, -ignore, SC2155, -ignore, property "date"]
80 | 


--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
 1 | name: Publish
 2 | 
 3 | on:
 4 |   push:
 5 |     tags:
 6 |     - v*
 7 |   workflow_dispatch: # allows you to trigger manually
 8 | 
 9 | permissions:
10 |   contents: write
11 |   attestations: write
12 |   id-token: write
13 | 
14 | jobs:
15 |   release-notes:
16 |     name: Create Release Notes
17 |     runs-on: ubuntu-latest
18 |     steps:
19 |     - uses: actions/checkout@v5
20 |     - name: Generate Changelog
21 |       run: |-
22 |         echo "Release Notes" > ${{ github.workflow }}-CHANGELOG.rst
23 |         echo "-------------" >> ${{ github.workflow }}-CHANGELOG.rst
24 |         PAT="^---"
25 |         L1=$(grep -n $PAT HISTORY.rst | sed -n 1p | cut -d ":" -f 1)
26 |         L2=$(grep -n $PAT HISTORY.rst | sed -n 2p | cut -d ":" -f 1)
27 |         awk "NR > $L1 && NR < $L2 - 1" HISTORY.rst >> ${{ github.workflow }}-CHANGELOG.rst
28 |     - name: Convert rst to md
29 |       uses: docker://pandoc/core
30 |       with:
31 |         args: >-
32 |           ${{ github.workflow }}-CHANGELOG.rst
33 |           --wrap=none
34 |           -t markdown
35 |           -o ${{ github.workflow }}-CHANGELOG.md
36 |     - name: Remove extra spaces
37 |       run: |-
38 |         sed -i 's/-   /- /g' ${{ github.workflow }}-CHANGELOG.md
39 |     - name: Github Release
40 |       uses: softprops/action-gh-release@v2
41 |       if: startsWith(github.ref, 'refs/tags/')
42 |       with:
43 |         body_path: ${{ github.workflow }}-CHANGELOG.md
44 |       env:
45 |         GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
46 | 
47 |   build-package:
48 |     name: Build and Inspect Python Package
49 |     runs-on: ubuntu-latest
50 |     needs: release-notes
51 |     steps:
52 |     - uses: actions/checkout@v5
53 |       with:
54 |         fetch-depth: 0
55 | 
56 |     - uses: hynek/build-and-inspect-python-package@v2
57 |       with:
58 |         attest-build-provenance-github: true
59 | 
60 |   release-pypi:
61 |     name: Publish to PyPI
62 |     runs-on: ubuntu-latest
63 |     needs: build-package
64 | 
65 |     steps:
66 |     - name: Download packages built by build-and-inspect-python-package
67 |       uses: actions/download-artifact@v5
68 |       with:
69 |         name: Packages
70 |         path: dist
71 | 
72 |     - name: Generate artifact attestation for sdist and wheel
73 |       uses: actions/attest-build-provenance@v2
74 |       with:
75 |         subject-path: dist
76 | 
77 |     - name: Upload package to PyPI
78 |       uses: pypa/gh-action-pypi-publish@release/v1
79 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bugreport.yml:
--------------------------------------------------------------------------------
 1 | name: 🐛 Bug Report
 2 | description: File a bug report to help us improve
 3 | labels: [bug, needs triage]
 4 | body:
 5 | - type: textarea
 6 |   id: what-happened
 7 |   attributes:
 8 |     label: What happened?
 9 |     description: |
10 |       Thanks for reporting a bug! Please describe what you were trying to get done.
11 |       Tell us what happened, what went wrong.
12 |   validations:
13 |     required: true
14 | 
15 | - type: textarea
16 |   id: what-did-you-expect-to-happen
17 |   attributes:
18 |     label: What did you expect to happen?
19 |     description: |
20 |       Describe what you expected to happen.
21 |   validations:
22 |     required: false
23 | 
24 | - type: textarea
25 |   id: sample-code
26 |   attributes:
27 |     label: Minimal Complete Verifiable Example
28 |     description: |
29 |       Minimal, self-contained copy-pastable example that demonstrates the issue. This will be automatically formatted into code, so no need for markdown backticks.
30 |     render: Python
31 | 
32 | - type: checkboxes
33 |   id: mvce-checkboxes
34 |   attributes:
35 |     label: MVCE confirmation
36 |     description: |
37 |       Please confirm that the bug report is in an excellent state, so we can understand & fix it quickly & efficiently. For more details, check out:
38 | 
39 |       - [Minimal Complete Verifiable Examples](https://stackoverflow.com/help/mcve)
40 |       - [Craft Minimal Bug Reports](http://matthewrocklin.com/blog/work/2018/02/28/minimal-bug-reports)
41 | 
42 |     options:
43 |     - label: Minimal example — the example is as focused as reasonably possible to demonstrate the underlying issue.
44 |     - label: Complete example — the example is self-contained, including all data and the text of any traceback.
45 |     - label: New issue — a search of GitHub Issues suggests this is not a duplicate.
46 | 
47 | - type: textarea
48 |   id: log-output
49 |   attributes:
50 |     label: Relevant log output
51 |     description: Please copy and paste any relevant output/traceback. This will be automatically formatted into code, so no need for markdown backticks.
52 |     render: Python
53 | 
54 | - type: textarea
55 |   id: extra
56 |   attributes:
57 |     label: Anything else we need to know?
58 |     description: |
59 |       Please describe any other information you want to share.
60 | 
61 | - type: textarea
62 |   id: show-versions
63 |   attributes:
64 |     label: Environment
65 |     description: |
66 |       Paste the output of `pygeohydro.show_versions()` between the `<details>` tags, leaving an empty line following the opening tag.
67 |     value: |
68 |       <details>
69 | 
70 | 
71 | 
72 |       </details>
73 |   validations:
74 |     required: true
75 | 


--------------------------------------------------------------------------------
/ci/requirements/environment-dev.yml:
--------------------------------------------------------------------------------
  1 | name: hyriver-dev
  2 | channels:
  3 | - conda-forge
  4 | - nodefaults
  5 | dependencies:
  6 | # system
  7 | - python >=3.9
  8 | - libnetcdf
  9 | - hdf5
 10 | - libgdal-core
 11 | 
 12 | # async-retriever
 13 | - aiodns
 14 | - aiofiles
 15 | - aiohttp >=3.8.3
 16 | - aiohttp-client-cache >=0.12.3
 17 | - aiosqlite
 18 | - brotli
 19 | - cytoolz
 20 | - ujson
 21 | 
 22 | # hydrosignatures
 23 | - numpy >=2
 24 | - pandas >=1
 25 | - scipy
 26 | - xarray >=2024.7
 27 | # optional
 28 | - numba >=0.60
 29 | - numbagg
 30 | 
 31 | # py3dep
 32 | # - async-retriever
 33 | - click >=0.7
 34 | - cytoolz
 35 | - geopandas >=1
 36 | - numpy >=1.17
 37 | # - pygeoogc
 38 | # - pygeoutils
 39 | - rasterio >=1.2
 40 | - rioxarray >=0.11
 41 | - shapely >=2
 42 | - xarray >=2024.7
 43 | # optional
 44 | - numba >=0.60
 45 | - numbagg
 46 | 
 47 | # pydaymet/pygridmet/pynldas2
 48 | - aiofiles
 49 | - aiohttp >=3.8.3
 50 | - click >=0.7
 51 | - netcdf4
 52 | - numpy >=2
 53 | - pandas >=1
 54 | - pyproj >=3.0.1
 55 | - rasterio
 56 | - rioxarray >=0.15
 57 | - shapely >=2
 58 | - xarray >=2024.7
 59 | 
 60 | # pygeohydro
 61 | # - async-retriever
 62 | - cytoolz
 63 | - defusedxml
 64 | - folium
 65 | - geopandas >=1
 66 | - h5netcdf
 67 | # - hydrosignatures
 68 | - matplotlib >=3.5
 69 | - numpy >=2
 70 | - pandas >=1
 71 | # - pygeoogc
 72 | # - pygeoutils
 73 | # - pynhd
 74 | - pyproj >=3.0.1
 75 | - rioxarray >=0.15
 76 | - scipy
 77 | - shapely >=2
 78 | - ujson
 79 | - xarray >=2024.7
 80 | # optional
 81 | - planetary-computer
 82 | - pystac-client
 83 | 
 84 | # pygeoogc
 85 | # - async-retriever
 86 | - cytoolz
 87 | - defusedxml
 88 | - joblib
 89 | - multidict
 90 | - owslib >=0.27.2
 91 | - pyproj >=3.0.1
 92 | - requests
 93 | - requests-cache >=0.9.6
 94 | - shapely >=2
 95 | - typing-extensions
 96 | - ujson
 97 | - url-normalize >=1.4
 98 | - urllib3
 99 | - yarl
100 | 
101 | # pygeoutils
102 | - cytoolz
103 | - geopandas >=1
104 | - netcdf4
105 | - numpy >=2
106 | - pyproj >=3.0.1
107 | - rasterio >=1.2
108 | - rioxarray >=0.15
109 | - scipy
110 | - shapely >=2
111 | - ujson
112 | - xarray >=2024.7
113 | 
114 | # pynhd
115 | # - async-retriever
116 | - cytoolz
117 | - geopandas >=1
118 | - networkx
119 | - numpy >=2
120 | - pandas >=1
121 | - pyarrow >=1.0.1
122 | # - pygeoogc
123 | # - pygeoutils
124 | - shapely >=2
125 | # optional
126 | - py7zr
127 | 
128 | # seamless-3dep
129 | - aiofiles
130 | - aiohttp >=3.8.3
131 | - rasterio
132 | 
133 | # performance
134 | - numbagg
135 | - flox
136 | - opt-einsum
137 | 
138 | # example notebooks
139 | - mapclassify
140 | - contextily
141 | - hvplot
142 | - osmnx
143 | - tqdm
144 | - ffmpeg
145 | - xarray-spatial
146 | - dask-expr
147 | - datashader
148 | 
149 | # IDE and dev
150 | - pytest
151 | - nox
152 | - nbmake
153 | - ipywidgets
154 | - ipykernel
155 | - nbconvert >=7.16.4
156 | - notebook >=7.2.2
157 | 


--------------------------------------------------------------------------------
/tests/test_exceptions.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | import geopandas as gpd
  4 | import pandas as pd
  5 | import pytest
  6 | from shapely import Polygon
  7 | 
  8 | import pygeohydro as gh
  9 | from pygeohydro.exceptions import (
 10 |     DataNotAvailableError,
 11 |     InputRangeError,
 12 |     InputTypeError,
 13 |     InputValueError,
 14 | )
 15 | 
 16 | SID_NATURAL = "01031500"
 17 | GEOM = Polygon(
 18 |     [
 19 |         [-69.77, 45.07],
 20 |         [-69.31, 45.07],
 21 |         [-69.31, 45.45],
 22 |         [-69.77, 45.45],
 23 |         [-69.77, 45.07],
 24 |     ]
 25 | )
 26 | 
 27 | 
 28 | class TestETAExceptions:
 29 |     dates = ("2000-01-01", "2000-01-05")
 30 | 
 31 |     def test_invalid_dates(self):
 32 |         with pytest.raises(InputTypeError) as ex:
 33 |             _ = gh.ssebopeta_bycoords((GEOM.centroid.x, GEOM.centroid.y), dates="2000-01-01")
 34 |         assert "pandas.DataFrame" in str(ex.value)
 35 | 
 36 |     def test_unsupported_years(self):
 37 |         coords = pd.DataFrame(
 38 |             [
 39 |                 ["s1", -72.77, 40.07],
 40 |                 ["s2", -70.31, 46.07],
 41 |                 ["s3", -69.31, 45.45],
 42 |                 ["s4", -69.77, 45.45],
 43 |             ],
 44 |             columns=["id", "x", "y"],
 45 |         )
 46 |         with pytest.raises(InputRangeError) as ex:
 47 |             _ = gh.ssebopeta_bycoords(coords, dates=[2010, 2014, 2030])
 48 |         assert "2000" in str(ex.value)
 49 | 
 50 | 
 51 | class TestNLCDExceptions:
 52 |     """Test NLCD Exceptions."""
 53 | 
 54 |     years = {"cover": [2016, 2019]}
 55 |     res = 1e3
 56 |     geom = gpd.GeoSeries([GEOM], crs=4326)
 57 | 
 58 |     def test_invalid_years_type(self):
 59 |         with pytest.raises(InputTypeError) as ex:
 60 |             _ = gh.nlcd_bygeom(self.geom, years=2010, resolution=self.res, ssl=False)
 61 |         assert "dict" in str(ex.value)
 62 | 
 63 |     def test_invalid_region(self):
 64 |         with pytest.raises(InputValueError) as ex:
 65 |             _ = gh.nlcd_bygeom(
 66 |                 self.geom, years=self.years, resolution=self.res, region="us", ssl=False
 67 |             )
 68 |         assert "L48" in str(ex.value)
 69 | 
 70 |     def test_invalid_years(self):
 71 |         with pytest.raises(InputValueError) as ex:
 72 |             _ = gh.nlcd_bygeom(self.geom, years={"cover": 2030}, resolution=self.res, ssl=False)
 73 |         assert "2019" in str(ex.value)
 74 | 
 75 |     def test_invalid_cover_type(self):
 76 |         lulc = gh.nlcd_bygeom(
 77 |             self.geom,
 78 |             years={"cover": [2016, 2019]},
 79 |             resolution=1e3,
 80 |             crs=3542,
 81 |             ssl=False,
 82 |         )
 83 |         cover = lulc[0]
 84 |         with pytest.raises(InputTypeError, match="DataArray"):
 85 |             _ = gh.cover_statistics(cover)
 86 | 
 87 |     def test_invalid_cover_values(self):
 88 |         lulc = gh.nlcd_bygeom(
 89 |             self.geom,
 90 |             years={"cover": [2016, 2019]},
 91 |             resolution=1e3,
 92 |             crs=3542,
 93 |             ssl=False,
 94 |         )
 95 |         cover = lulc[0].cover_2016
 96 |         with pytest.raises(InputValueError, match="11"):
 97 |             _ = gh.cover_statistics(cover * 2)
 98 | 
 99 | 
100 | class TestNWISExceptions:
101 |     nwis = gh.NWIS()
102 | 
103 |     def test_invaild_station(self):
104 |         with pytest.raises(DataNotAvailableError, match="Discharge"):
105 |             _ = self.nwis.get_streamflow(SID_NATURAL, ("1900-01-01", "1900-01-31"))
106 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.rst:
--------------------------------------------------------------------------------
 1 | Contributor Covenant Code of Conduct
 2 | ====================================
 3 | 
 4 | Our Pledge
 5 | ----------
 6 | 
 7 | In the interest of fostering an open and welcoming environment, we as
 8 | contributors and maintainers pledge to making participation in our
 9 | project and our community a harassment-free experience for everyone,
10 | regardless of age, body size, disability, ethnicity, sex
11 | characteristics, gender identity and expression, level of experience,
12 | education, socioeconomic status, nationality, personal appearance,
13 | race, religion, or sexual identity and orientation.
14 | 
15 | Our Standards
16 | -------------
17 | 
18 | Examples of behavior that contributes to creating a positive environment
19 | include:
20 | 
21 | -  Using welcoming and inclusive language
22 | -  Being respectful of differing viewpoints and experiences
23 | -  Gracefully accepting constructive criticism
24 | -  Focusing on what is best for the community
25 | -  Showing empathy towards other community members
26 | 
27 | Examples of unacceptable behavior by participants include:
28 | 
29 | -  The use of sexualized language or imagery and unwelcome sexual
30 |    attention or advances
31 | -  Trolling, insulting/derogatory comments, and personal or political
32 |    attacks
33 | -  Public or private harassment
34 | -  Publishing others' private information, such as a physical or
35 |    electronic address, without explicit permission
36 | -  Other conduct which could reasonably be considered inappropriate in a
37 |    professional setting
38 | 
39 | Our Responsibilities
40 | --------------------
41 | 
42 | Project maintainers are responsible for clarifying the standards of
43 | acceptable behavior and are expected to take appropriate and fair
44 | corrective action in response to any instances of unacceptable behavior.
45 | 
46 | Project maintainers have the right and responsibility to remove, edit,
47 | or reject comments, commits, code, wiki edits, issues, and other
48 | contributions that are not aligned to this Code of Conduct, or to ban
49 | temporarily or permanently any contributor for other behaviors that they
50 | deem inappropriate, threatening, offensive, or harmful.
51 | 
52 | Scope
53 | -----
54 | 
55 | This Code of Conduct applies both within project spaces and in public
56 | spaces when an individual is representing the project or its community.
57 | Examples of representing a project or community include using an
58 | official project e-mail address, posting via an official social media
59 | account, or acting as an appointed representative at an online or
60 | offline event. Representation of a project may be further defined and
61 | clarified by project maintainers.
62 | 
63 | Enforcement
64 | -----------
65 | 
66 | Instances of abusive, harassing, or otherwise unacceptable behavior may
67 | be reported by contacting the project team at tchegini@uh.edu. All
68 | complaints will be reviewed and investigated and will result in a
69 | response that is deemed necessary and appropriate to the circumstances.
70 | The project team is obligated to maintain confidentiality with regard to
71 | the reporter of an incident. Further details of specific enforcement
72 | policies may be posted separately.
73 | 
74 | Project maintainers who do not follow or enforce the Code of Conduct in
75 | good faith may face temporary or permanent repercussions as determined
76 | by other members of the project's leadership.
77 | 
78 | Attribution
79 | -----------
80 | 
81 | This Code of Conduct is adapted from the `Contributor
82 | Covenant <https://www.contributor-covenant.org>`__, version 1.4,
83 | available at
84 | https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
85 | 
86 | For answers to common questions about this code of conduct, see
87 | https://www.contributor-covenant.org/faq
88 | 


--------------------------------------------------------------------------------
/src/pygeohydro/levee.py:
--------------------------------------------------------------------------------
  1 | """Accessing National Flood Hazard Layers (NLD) through web services."""
  2 | 
  3 | from __future__ import annotations
  4 | 
  5 | from typing import TYPE_CHECKING, Literal
  6 | 
  7 | from pygeohydro.exceptions import InputValueError
  8 | from pynhd import AGRBase
  9 | 
 10 | if TYPE_CHECKING:
 11 |     from pyproj import CRS
 12 | 
 13 |     CRSType = int | str | CRS
 14 | 
 15 | __all__ = ["NLD"]
 16 | 
 17 | 
 18 | class NLD(AGRBase):
 19 |     """Access National Levee Database (NLD) services.
 20 | 
 21 |     Notes
 22 |     -----
 23 |     For more info visit: https://geospatial.sec.usace.army.mil/dls/rest/services/NLD/Public/FeatureServer
 24 | 
 25 |     Parameters
 26 |     ----------
 27 |     layer : str, optional
 28 |         A valid service layer. Valid layers are:
 29 | 
 30 |         - ``boreholes``
 31 |         - ``crossings``
 32 |         - ``levee_stations``
 33 |         - ``piezometers``
 34 |         - ``pump_stations``
 35 |         - ``relief_wells``
 36 |         - ``alignment_lines``
 37 |         - ``closure_structures``
 38 |         - ``cross_sections``
 39 |         - ``embankments``
 40 |         - ``floodwalls``
 41 |         - ``frm_lines``
 42 |         - ``pipe_gates``
 43 |         - ``toe_drains``
 44 |         - ``leveed_areas``
 45 |         - ``system_routes``
 46 |         - ``pipes``
 47 |         - ``channels``
 48 | 
 49 |     outfields : str or list, optional
 50 |         Target field name(s), default to "*" i.e., all the fields.
 51 |     crs : str, int, or pyproj.CRS, optional
 52 |         Target spatial reference, default to ``EPSG:4326``.
 53 | 
 54 |     Methods
 55 |     -------
 56 |     bygeom(geom, geo_crs=4326, sql_clause="", distance=None, return_m=False, return_geom=True)
 57 |         Get features within a geometry that can be combined with a SQL where clause.
 58 |     byids(field, fids, return_m=False, return_geom=True)
 59 |         Get features by object IDs.
 60 |     bysql(sql_clause, return_m=False, return_geom=True)
 61 |         Get features using a valid SQL 92 WHERE clause.
 62 | 
 63 |     Examples
 64 |     --------
 65 |     >>> from pygeohydro import NLD
 66 |     >>> nld = NLD("levee_stations")
 67 |     >>> levees = nld.bygeom((-105.914551, 37.437388, -105.807434, 37.522392))
 68 |     >>> levees.shape
 69 |     (1838, 12)
 70 |     """
 71 | 
 72 |     def __init__(
 73 |         self,
 74 |         layer: Literal[
 75 |             "boreholes",
 76 |             "crossings",
 77 |             "levee_stations",
 78 |             "piezometers",
 79 |             "pump_stations",
 80 |             "relief_wells",
 81 |             "alignment_lines",
 82 |             "closure_structures",
 83 |             "cross_sections",
 84 |             "embankments",
 85 |             "floodwalls",
 86 |             "frm_lines",
 87 |             "pipe_gates",
 88 |             "toe_drains",
 89 |             "leveed_areas",
 90 |             "system_routes",
 91 |             "pipes",
 92 |             "channels",
 93 |         ],
 94 |         outfields: str | list[str] = "*",
 95 |         crs: CRSType = 4326,
 96 |     ):
 97 |         self.valid_layers = {
 98 |             "boreholes": "0",
 99 |             "crossings": "1",
100 |             "levee_stations": "2",
101 |             "piezometers": "3",
102 |             "pump_stations": "4",
103 |             "relief_wells": "5",
104 |             "alignment_lines": "6",
105 |             "closure_structures": "7",
106 |             "cross_sections": "8",
107 |             "embankments": "9",
108 |             "floodwalls": "10",
109 |             "frm_lines": "11",
110 |             "pipe_gates": "12",
111 |             "toe_drains": "13",
112 |             "leveed_areas": "14",
113 |             "system_routes": "15",
114 |             "pipes": "16",
115 |             "channels": "17",
116 |         }
117 |         _layer = self.valid_layers.get(layer)
118 |         if _layer is None:
119 |             raise InputValueError("layer", list(self.valid_layers))
120 |         base_url = (
121 |             "https://geospatial.sec.usace.army.mil/dls/rest/services/NLD/Public/FeatureServer"
122 |         )
123 |         super().__init__(
124 |             f"{base_url}/{_layer}",
125 |             None,
126 |             outfields,
127 |             crs,
128 |         )
129 | 


--------------------------------------------------------------------------------
/src/pygeohydro/print_versions.py:
--------------------------------------------------------------------------------
  1 | """Utility functions for printing version information.
  2 | 
  3 | The original script is from
  4 | `xarray <https://github.com/pydata/xarray/blob/main/xarray/util/print_versions.py>`__
  5 | """
  6 | 
  7 | from __future__ import annotations
  8 | 
  9 | import contextlib
 10 | import locale
 11 | import os
 12 | import platform
 13 | import struct
 14 | import subprocess
 15 | import sys
 16 | from importlib.metadata import PackageNotFoundError, distribution
 17 | from importlib.metadata import version as get_version
 18 | from pathlib import Path
 19 | from typing import TextIO
 20 | 
 21 | __all__ = ["show_versions"]
 22 | 
 23 | 
 24 | def _get_sys_info():
 25 |     """Return system information as a dict."""
 26 |     blob = []
 27 | 
 28 |     # get full commit hash
 29 |     commit = None
 30 |     if Path(".git").is_dir():
 31 |         with contextlib.suppress(Exception):
 32 |             pipe = subprocess.Popen(
 33 |                 ["git", "log", '--format="%H"', "-n", "1"],
 34 |                 stdout=subprocess.PIPE,
 35 |                 stderr=subprocess.PIPE,
 36 |             )
 37 |             so, _ = pipe.communicate()
 38 | 
 39 |             if pipe.returncode == 0:
 40 |                 commit = so
 41 |                 with contextlib.suppress(ValueError):
 42 |                     commit = so.decode("utf-8")
 43 |                     commit = commit.strip().strip('"')
 44 | 
 45 |     blob.append(("commit", commit))
 46 | 
 47 |     with contextlib.suppress(Exception):
 48 |         (sysname, _, release, _, machine, processor) = platform.uname()
 49 |         blob.extend(
 50 |             [
 51 |                 ("python", sys.version),
 52 |                 ("python-bits", struct.calcsize("P") * 8),
 53 |                 ("OS", str(sysname)),
 54 |                 ("OS-release", str(release)),
 55 |                 ("machine", str(machine)),
 56 |                 ("processor", str(processor)),
 57 |                 ("byteorder", str(sys.byteorder)),
 58 |                 ("LC_ALL", os.environ.get("LC_ALL", "None")),
 59 |                 ("LANG", os.environ.get("LANG", "None")),
 60 |                 ("LOCALE", str(locale.getlocale())),
 61 |             ]
 62 |         )
 63 |     return blob
 64 | 
 65 | 
 66 | def _get_package_version(modname: str) -> str:
 67 |     try:
 68 |         _ = distribution(modname)
 69 |         try:
 70 |             return get_version(modname)
 71 |         except (NotImplementedError, AttributeError):
 72 |             return "installed"
 73 |     except PackageNotFoundError:
 74 |         return "N/A"
 75 | 
 76 | 
 77 | def show_versions(file: TextIO = sys.stdout) -> None:
 78 |     """Print versions of all the dependencies.
 79 | 
 80 |     Parameters
 81 |     ----------
 82 |     file : file-like, optional
 83 |         print to the given file-like object. Defaults to sys.stdout.
 84 |     """
 85 |     deps = [
 86 |         # HyRiver packages
 87 |         "async-retriever",
 88 |         "pygeoogc",
 89 |         "pygeoutils",
 90 |         "py3dep",
 91 |         "pynhd",
 92 |         "pygridmet",
 93 |         "pydaymet",
 94 |         "hydrosignatures",
 95 |         "pynldas2",
 96 |         "pygeohydro",
 97 |         "tiny-retriever",
 98 |         # async-retriever
 99 |         "aiodns",
100 |         "aiofiles",
101 |         "aiohttp",
102 |         "aiohttp-client-cache",
103 |         "aiosqlite",
104 |         "brotli",
105 |         "cytoolz",
106 |         "orjson",
107 |         # hydrosignatures
108 |         "numpy",
109 |         "pandas",
110 |         "scipy",
111 |         "xarray",
112 |         "numba",
113 |         "numbagg",
114 |         # py3dep
115 |         "click",
116 |         "geopandas",
117 |         "rasterio",
118 |         "rioxarray",
119 |         "shapely",
120 |         # pydaymet/pygridmet/pynldas2
121 |         "netcdf4",
122 |         "pyproj",
123 |         # pygeohydro
124 |         "defusedxml",
125 |         "folium",
126 |         "h5netcdf",
127 |         "matplotlib",
128 |         "planetary-computer",
129 |         "pystac-client",
130 |         # pygeoogc
131 |         "joblib",
132 |         "multidict",
133 |         "owslib",
134 |         "requests",
135 |         "requests-cache",
136 |         "typing-extensions",
137 |         "url-normalize",
138 |         "urllib3",
139 |         "yarl",
140 |         # pygeoutils
141 |         # (no unique dependencies not already listed)
142 |         # pynhd
143 |         "networkx",
144 |         "pyarrow",
145 |         "py7zr",
146 |         # performance
147 |         "flox",
148 |         "opt-einsum",
149 |     ]
150 |     deps_blob = {modname: _get_package_version(modname) for modname in deps}
151 | 
152 |     print("\nSYS INFO", file=file)
153 |     print("--------", file=file)
154 |     for k, stat in _get_sys_info():
155 |         print(f"{k}: {stat}", file=file)
156 | 
157 |     pad = len(max(deps, key=len)) + 1
158 |     header = f"\n{'PACKAGE':<{pad}}  VERSION"
159 |     print(header, file=file)
160 |     print("-" * len(header), file=file)
161 |     for k, stat in deps_blob.items():
162 |         print(f"{k:<{pad}}  {stat}", file=file)
163 |     print("-" * len(header), file=file)
164 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.rst:
--------------------------------------------------------------------------------
  1 | .. highlight:: shell
  2 | 
  3 | ============
  4 | Contributing
  5 | ============
  6 | 
  7 | Contributions are welcome, and they are greatly appreciated! Every little bit
  8 | helps, and credit will always be given.
  9 | 
 10 | You can contribute in many ways:
 11 | 
 12 | Types of Contributions
 13 | ----------------------
 14 | 
 15 | Report Bugs
 16 | ~~~~~~~~~~~
 17 | 
 18 | Report bugs at https://github.com/hyriver/pygeohydro/issues.
 19 | 
 20 | Fix Bugs
 21 | ~~~~~~~~
 22 | 
 23 | Look through the GitHub issues for bugs. Anything tagged with "bug" and "help
 24 | wanted" is open to whoever wants to implement it.
 25 | 
 26 | Implement Features
 27 | ~~~~~~~~~~~~~~~~~~
 28 | 
 29 | Other than new features that you might have in mind, you can look through
 30 | the GitHub issues for features. Anything tagged with "enhancement"
 31 | and "help wanted" is open to whoever wants to implement it.
 32 | 
 33 | Write Documentation
 34 | ~~~~~~~~~~~~~~~~~~~
 35 | 
 36 | PyGeoHydro could always use more documentation, whether as part of the
 37 | official PyGeoHydro docs, in docstrings, or even on the web in blog posts,
 38 | articles, and such.
 39 | 
 40 | Submit Feedback
 41 | ~~~~~~~~~~~~~~~
 42 | 
 43 | The best way to send feedback is to file an issue at https://github.com/hyriver/pygeohydro/issues.
 44 | 
 45 | If you are proposing a feature:
 46 | 
 47 | * Explain in detail how it would work.
 48 | * Keep the scope as narrow as possible, to make it easier to implement.
 49 | * Remember that this is a volunteer-driven project, and that contributions
 50 |   are welcome :)
 51 | 
 52 | Get Started!
 53 | ------------
 54 | 
 55 | Ready to contribute? Here's how to set up pygeohydro for local development.
 56 | 
 57 | 1. Fork the PyGeoHydro repo through the GitHub website.
 58 | 2. Clone your fork locally and add the main ``pygeohydro`` as the upstream remote:
 59 | 
 60 | .. code-block:: console
 61 | 
 62 |     $ git clone git@github.com:your_name_here/pygeohydro.git
 63 |     $ git remote add upstream git@github.com:hyriver/pygeohydro.git
 64 | 
 65 | 3. Install your local copy into a virtualenv. Assuming you have ``mamba`` installed,
 66 |    this is how you can set up your fork for local development:
 67 | 
 68 | .. code-block:: console
 69 | 
 70 |     $ cd pygeohydro/
 71 |     $ mamba env create -f ci/requirements/environment-dev.yml
 72 |     $ mamba activate pygeohydro-dev
 73 |     $ python -m pip install . --no-deps
 74 | 
 75 | 4. Create a branch for local development:
 76 | 
 77 | .. code-block:: console
 78 | 
 79 |     $ git checkout -b bugfix-or-feature/name-of-your-bugfix-or-feature
 80 |     $ git push
 81 | 
 82 | 5. Now you can make your changes locally, make sure to add a description of
 83 |    the changes to ``HISTORY.rst`` file and add extra tests, if applicable,
 84 |    to ``tests`` folder. Also, make sure to give yourself credit by adding
 85 |    your name at the end of the item(s) that you add in the history like this
 86 |    ``By `Taher Chegini <https://github.com/hyriver>`_``. Then,
 87 |    fetch the latest updates from the remote and resolve any merge conflicts:
 88 | 
 89 | .. code-block:: console
 90 | 
 91 |     $ git fetch upstream
 92 |     $ git merge upstream/name-of-your-branch
 93 | 
 94 | 6. Then create a new environment for linting and another for testing:
 95 | 
 96 | .. code-block:: console
 97 | 
 98 |     $ mamba create -n py11 python=3.11 nox tomli pre-commit codespell gdal
 99 |     $ mamba activate py11
100 |     $ nox -s pre-commit
101 |     $ nox -s type-check
102 | 
103 |     $ mamba create -n py38 python=3.8 nox tomli pre-commit codespell gdal
104 |     $ mamba activate py38
105 |     $ nox -s tests
106 | 
107 |    Note that if Python 3.11 is already installed on your system, you can
108 |    skip creating the ``py11`` environment and just use your system's Python 3.11
109 |    to run the linting and type-checking tests, like this:
110 | 
111 | .. code-block:: console
112 | 
113 |     $ mamba create -n py38 python=3.8 nox tomli pre-commit codespell gdal
114 |     $ mamba activate py38
115 |     $ nox
116 | 
117 | 7. If you are making breaking changes make sure to reflect them in
118 |    the documentation, ``README.rst``, and tests if necessary.
119 | 
120 | 8. Commit your changes and push your branch to GitHub. Start the commit message with
121 |    ``ENH:``, ``BUG:``, ``DOC:`` to indicate whether the commit is a new feature,
122 |    documentation related, or a bug fix. For example:
123 | 
124 | .. code-block:: console
125 | 
126 |     $ git add .
127 |     $ git commit -m "ENH: A detailed description of your changes."
128 |     $ git push origin name-of-your-branch
129 | 
130 | 9. Submit a pull request through the GitHub website.
131 | 
132 | Tips
133 | ----
134 | 
135 | To run a subset of tests:
136 | 
137 | .. code-block:: console
138 | 
139 |     $ nox -s tests -- -n=1 -k "test_name1 or test_name2"
140 | 
141 | Deploying
142 | ---------
143 | 
144 | A reminder for the maintainers on how to deploy.
145 | Make sure all your changes are committed (including an entry in HISTORY.rst).
146 | Then run:
147 | 
148 | .. code-block:: console
149 | 
150 |     $ git tag -a vX.X.X -m "vX.X.X"
151 |     $ git push --follow-tags
152 | 
153 | where ``X.X.X`` is the version number following the
154 | `semantic versioning spec <https://semver.org>`__ i.e., MAJOR.MINOR.PATCH.
155 | Then release the tag from Github and Github Actions will deploy it to PyPi.
156 | 


--------------------------------------------------------------------------------
/noxfile.py:
--------------------------------------------------------------------------------
  1 | """Nox sessions."""
  2 | 
  3 | from __future__ import annotations
  4 | 
  5 | import shutil
  6 | from pathlib import Path
  7 | from typing import Literal
  8 | 
  9 | import nox
 10 | 
 11 | try:
 12 |     import tomllib as tomli
 13 | except ImportError:
 14 |     import tomli
 15 | 
 16 | 
 17 | def get_package_name() -> str:
 18 |     """Get the name of the package."""
 19 |     with Path("pyproject.toml").open("rb") as f:
 20 |         return tomli.load(f)["project"]["name"]
 21 | 
 22 | 
 23 | def get_extras() -> list[str]:
 24 |     """Get the name of the package."""
 25 |     with Path("pyproject.toml").open("rb") as f:
 26 |         extras = tomli.load(f)["project"]["optional-dependencies"]
 27 |     return [e for e in extras if e not in ("test", "typeguard")]
 28 | 
 29 | 
 30 | def get_deps() -> list[str]:
 31 |     """Get the name of the package."""
 32 |     with Path("pyproject.toml").open("rb") as f:
 33 |         return tomli.load(f)["project"]["dependencies"]
 34 | 
 35 | 
 36 | py39 = ["3.9"]
 37 | py312 = ["3.12"]
 38 | py313 = ["3.13"]
 39 | package = get_package_name()
 40 | gh_deps = {
 41 |     "async-retriever": [],
 42 |     "hydrosignatures": [],
 43 |     "pygeoogc": ["async-retriever"],
 44 |     "pygeoutils": ["async-retriever", "pygeoogc"],
 45 |     "pynhd": ["async-retriever", "pygeoogc", "pygeoutils"],
 46 |     "py3dep": ["async-retriever", "pygeoogc", "pygeoutils"],
 47 |     "pygeohydro": ["async-retriever", "pygeoogc", "pygeoutils", "pynhd", "hydrosignatures"],
 48 |     "pydaymet": [],
 49 |     "pygridmet": [],
 50 |     "pynldas2": [],
 51 | }
 52 | nox.options.sessions = (
 53 |     "pre-commit",
 54 |     "type-check",
 55 |     "test39",
 56 |     "test312",
 57 | )
 58 | 
 59 | 
 60 | def install_deps(
 61 |     session: nox.Session, extra: str | None = None, version_limit: list[str] | None = None
 62 | ) -> None:
 63 |     """Install package dependencies."""
 64 |     deps = [f".[{extra}]"] if extra else ["."]
 65 |     deps += [f"git+https://github.com/hyriver/{p}.git" for p in gh_deps[package]]
 66 |     if version_limit:
 67 |         deps += list(version_limit)
 68 |     session.install(*deps)
 69 |     dirs = [".pytest_cache", "build", "dist", ".eggs"]
 70 |     for d in dirs:
 71 |         shutil.rmtree(d, ignore_errors=True)
 72 | 
 73 |     patterns = ["*.egg-info", "*.egg", "*.pyc", "*~", "**/__pycache__"]
 74 |     for p in patterns:
 75 |         for f in Path.cwd().rglob(p):
 76 |             shutil.rmtree(f, ignore_errors=True)
 77 | 
 78 | 
 79 | @nox.session(name="pre-commit", python=py313, venv_backend="micromamba")
 80 | def pre_commit(session: nox.Session) -> None:
 81 |     """Lint using pre-commit."""
 82 |     session.install("pre-commit")
 83 |     session.run(
 84 |         "pre-commit",
 85 |         "run",
 86 |         "--all-files",
 87 |         "--hook-stage=manual",
 88 |         *session.posargs,
 89 |     )
 90 | 
 91 | 
 92 | @nox.session(name="pc-update", python=py313, venv_backend="micromamba")
 93 | def pc_update(session: nox.Session) -> None:
 94 |     """Lint using pre-commit."""
 95 |     session.install("pre-commit")
 96 |     session.run(
 97 |         "pre-commit",
 98 |         "autoupdate",
 99 |         *session.posargs,
100 |     )
101 | 
102 | 
103 | @nox.session(name="type-check", python=py312, venv_backend="micromamba")
104 | def type_check(session: nox.Session) -> None:
105 |     """Run Pyright."""
106 |     extras = get_extras()
107 |     install_deps(session, ",".join(extras))
108 |     session.install("pyright")
109 |     session.run("pyright")
110 | 
111 | 
112 | def setup_session(session: nox.Session, with_jit: bool) -> bool:
113 |     """Set up session environment with conda and dependencies."""
114 |     session.conda_install("gdal", channel="conda-forge")
115 |     extras = get_extras()
116 |     jit_dep = "jit" in extras
117 |     if jit_dep and not with_jit:
118 |         extras.remove("jit")
119 |     install_deps(session, ",".join(["test", *extras]))
120 |     return jit_dep
121 | 
122 | 
123 | def run_tests(
124 |     session: nox.Session,
125 |     jit: bool,
126 |     py_version: Literal[39, 312],
127 |     extra_args: list[str] | None = None,
128 | ) -> None:
129 |     """Run the test suite with optional jit and extra arguments."""
130 |     session.run(
131 |         "pytest",
132 |         "--cov",
133 |         "--cov-append",
134 |         "--cov-branch",
135 |         "--cov-report=xml",
136 |         "--junitxml=junit.xml",
137 |         *(extra_args or []),
138 |         *session.posargs,
139 |     )
140 |     session.notify("cover")
141 |     if jit:
142 |         session.notify(f"jit{py_version}")
143 | 
144 | 
145 | @nox.session(python="3.9", venv_backend="micromamba")
146 | def test39(session: nox.Session) -> None:
147 |     """Run the test suite for Python 3.9."""
148 |     jit_dep = setup_session(session, False)
149 |     run_tests(session, jit_dep, 39)
150 | 
151 | 
152 | @nox.session(python="3.12", venv_backend="micromamba")
153 | def test312(session: nox.Session) -> None:
154 |     """Run the test suite for Python 3.12."""
155 |     jit_dep = setup_session(session, False)
156 |     run_tests(session, jit_dep, 312)
157 | 
158 | 
159 | @nox.session(python="3.9", venv_backend="micromamba")
160 | def jit39(session: nox.Session) -> None:
161 |     """Run tests that require jit dependencies for Python 3.9."""
162 |     setup_session(session, True)
163 |     session.run("pytest", "-m", "jit", *session.posargs)
164 | 
165 | 
166 | @nox.session(python="3.12", venv_backend="micromamba")
167 | def jit312(session: nox.Session) -> None:
168 |     """Run tests that require jit dependencies for Python 3.12."""
169 |     setup_session(session, True)
170 |     session.run("pytest", "-m", "jit", *session.posargs)
171 | 
172 | 
173 | @nox.session(python=py313, venv_backend="micromamba")
174 | def cover(session: nox.Session) -> None:
175 |     """Coverage analysis."""
176 |     session.install("coverage[toml]")
177 |     session.run("coverage", "report")
178 |     session.run("coverage", "html")
179 | 


--------------------------------------------------------------------------------
/src/pygeohydro/watershed.py:
--------------------------------------------------------------------------------
  1 | """Accessing watershed boundary-level data through web services."""
  2 | 
  3 | from __future__ import annotations
  4 | 
  5 | import importlib.util
  6 | import io
  7 | from pathlib import Path
  8 | from typing import TYPE_CHECKING, cast
  9 | 
 10 | import geopandas as gpd
 11 | import pandas as pd
 12 | import xarray as xr
 13 | 
 14 | import async_retriever as ar
 15 | import pygeoogc as ogc
 16 | from pygeohydro.exceptions import InputValueError
 17 | from pygeoogc import ServiceURL
 18 | from pynhd import AGRBase
 19 | from pynhd.core import ScienceBase
 20 | 
 21 | if TYPE_CHECKING:
 22 |     from pyproj import CRS
 23 | 
 24 |     CRSType = int | str | CRS
 25 | 
 26 | __all__ = [
 27 |     "WBD",
 28 |     "huc_wb_full",
 29 |     "irrigation_withdrawals",
 30 | ]
 31 | 
 32 | 
 33 | class WBD(AGRBase):
 34 |     """Access Watershed Boundary Dataset (WBD).
 35 | 
 36 |     Notes
 37 |     -----
 38 |     This web service offers Hydrologic Unit (HU) polygon boundaries for
 39 |     the United States, Puerto Rico, and the U.S. Virgin Islands.
 40 |     For more info visit: https://hydro.nationalmap.gov/arcgis/rest/services/wbd/MapServer
 41 | 
 42 |     Parameters
 43 |     ----------
 44 |     layer : str, optional
 45 |         A valid service layer. Valid layers are:
 46 | 
 47 |         - ``wbdline``
 48 |         - ``huc2``
 49 |         - ``huc4``
 50 |         - ``huc6``
 51 |         - ``huc8``
 52 |         - ``huc10``
 53 |         - ``huc12``
 54 |         - ``huc14``
 55 |         - ``huc16``
 56 | 
 57 |     outfields : str or list, optional
 58 |         Target field name(s), default to "*" i.e., all the fields.
 59 |     crs : str, int, or pyproj.CRS, optional
 60 |         Target spatial reference, default to ``EPSG:4326``.
 61 |     """
 62 | 
 63 |     def __init__(self, layer: str, outfields: str | list[str] = "*", crs: CRSType = 4326):
 64 |         self.valid_layers = {
 65 |             "wbdline": "wbdline",
 66 |             "huc2": "2-digit hu (region)",
 67 |             "huc4": "4-digit hu (subregion)",
 68 |             "huc6": "6-digit hu (basin)",
 69 |             "huc8": "8-digit hu  (subbasin)",
 70 |             "huc10": "10-digit hu (watershed)",
 71 |             "huc12": "12-digit hu (subwatershed)",
 72 |             "huc14": "14-digit hu",
 73 |             "huc16": "16-digit hu",
 74 |         }
 75 |         _layer = self.valid_layers.get(layer)
 76 |         if _layer is None:
 77 |             raise InputValueError("layer", list(self.valid_layers))
 78 |         super().__init__(ServiceURL().restful.wbd, _layer, outfields, crs)
 79 | 
 80 | 
 81 | def huc_wb_full(huc_lvl: int) -> gpd.GeoDataFrame:
 82 |     """Get the full watershed boundary for a given HUC level.
 83 | 
 84 |     Notes
 85 |     -----
 86 |     This function is designed for cases where the full watershed boundary is needed
 87 |     for a given HUC level. If only a subset of the HUCs is needed, then use
 88 |     the ``pygeohydro.WBD`` class. The full dataset is downloaded from the National Maps'
 89 |     `WBD staged products <https://prd-tnm.s3.amazonaws.com/index.html?prefix=StagedProducts/Hydrography/WBD/HU2/Shape/>`__.
 90 | 
 91 |     Parameters
 92 |     ----------
 93 |     huc_lvl : int
 94 |         HUC level, must be even numbers between 2 and 16.
 95 | 
 96 |     Returns
 97 |     -------
 98 |     geopandas.GeoDataFrame
 99 |         The full watershed boundary for the given HUC level.
100 |     """
101 |     valid_hucs = [2, 4, 6, 8, 10, 12, 14, 16]
102 |     if huc_lvl not in valid_hucs:
103 |         raise InputValueError("huc_lvl", list(map(str, valid_hucs)))
104 | 
105 |     base_url = "https://prd-tnm.s3.amazonaws.com/StagedProducts/Hydrography/WBD/HU2/Shape"
106 | 
107 |     urls = [f"{base_url}/WBD_{h2:02}_HU2_Shape.zip" for h2 in range(1, 23)]
108 |     fnames = [Path("cache", Path(url).name) for url in urls]
109 |     fnames = ogc.streaming_download(urls, fnames=fnames)
110 |     fnames = [f for f in fnames if f is not None]
111 |     keys = (p.stem.split("_")[1] for p in fnames)
112 |     engine = "pyogrio" if importlib.util.find_spec("pyogrio") else "fiona"
113 |     huc = (
114 |         gpd.GeoDataFrame(
115 |             pd.concat(
116 |                 (gpd.read_file(f"{p}!Shape/WBDHU{huc_lvl}.shp", engine=engine) for p in fnames),
117 |                 keys=keys,
118 |             )
119 |         )
120 |         .reset_index()
121 |         .rename(columns={"level_0": "huc2"})
122 |         .drop(columns="level_1")
123 |     )
124 |     huc = cast("gpd.GeoDataFrame", huc)
125 |     return huc
126 | 
127 | 
128 | def irrigation_withdrawals() -> xr.Dataset:
129 |     """Get monthly water use for irrigation at HUC12-level for CONUS.
130 | 
131 |     Notes
132 |     -----
133 |     Dataset is retrieved from https://doi.org/10.5066/P9FDLY8P.
134 |     """
135 |     item = ScienceBase.get_file_urls("5ff7acf4d34ea5387df03d73")
136 |     urls = item.loc[item.index.str.contains(".csv"), "url"]
137 |     resp = ar.retrieve_text(urls.tolist())
138 |     irr = {}
139 |     for name, r in zip(urls.index, resp):
140 |         df = pd.read_csv(
141 |             io.StringIO(r),
142 |             usecols=lambda s: "m3" in s or "huc12t" in s,  # type: ignore
143 |         )
144 |         df["huc12t"] = df["huc12t"].str.strip("'")
145 |         df = df.rename(columns={"huc12t": "huc12"}).set_index("huc12")
146 |         df = df.rename(columns={c: str(c)[:3].capitalize() for c in df})
147 |         irr[name[-6:-4]] = df.copy()
148 |     ds = xr.Dataset(irr).rename({"dim_1": "month"})
149 |     long_names = {
150 |         "GW": "groundwater_withdrawal",
151 |         "SW": "surface_water_withdrawal",
152 |         "TW": "total_withdrawal",
153 |         "CU": "consumptive_use",
154 |     }
155 |     for v, n in long_names.items():
156 |         ds[v].attrs["long_name"] = n
157 |         ds[v].attrs["units"] = "m3"
158 |     ds.attrs["description"] = " ".join(
159 |         (
160 |             "Estimated Monthly Water Use for Irrigation by",
161 |             "12-Digit Hydrologic Unit in the Conterminous United States for 2015",
162 |         )
163 |     )
164 |     ds.attrs["source"] = "https://doi.org/10.5066/P9FDLY8P"
165 |     return ds
166 | 


--------------------------------------------------------------------------------
/src/pygeohydro/exceptions.py:
--------------------------------------------------------------------------------
  1 | """Customized PyGeoHydro exceptions."""
  2 | 
  3 | from __future__ import annotations
  4 | 
  5 | from typing import TYPE_CHECKING
  6 | 
  7 | import async_retriever.exceptions as ar
  8 | import pygeoogc.exceptions as ogc
  9 | 
 10 | if TYPE_CHECKING:
 11 |     from collections.abc import Generator, Sequence
 12 | 
 13 | 
 14 | class ServiceError(ar.ServiceError):
 15 |     """Exception raised when the requested data is not available on the server.
 16 | 
 17 |     Parameters
 18 |     ----------
 19 |     err : str
 20 |         Service error message.
 21 |     """
 22 | 
 23 | 
 24 | class MissingColumnError(Exception):
 25 |     """Exception raised when a required column is missing from a dataframe.
 26 | 
 27 |     Parameters
 28 |     ----------
 29 |     missing : list
 30 |         List of missing columns.
 31 |     """
 32 | 
 33 |     def __init__(self, missing: list[str]) -> None:
 34 |         self.message = f"The following columns are missing:\n{', '.join(missing)}"
 35 |         super().__init__(self.message)
 36 | 
 37 |     def __str__(self) -> str:
 38 |         """Return the error message."""
 39 |         return self.message
 40 | 
 41 | 
 42 | class MissingCRSError(Exception):
 43 |     """Exception raised when input GeoDataFrame is missing CRS."""
 44 | 
 45 |     def __init__(self) -> None:
 46 |         self.message = "The input GeoDataFrame is missing CRS."
 47 |         super().__init__(self.message)
 48 | 
 49 |     def __str__(self) -> str:
 50 |         """Return the error message."""
 51 |         return self.message
 52 | 
 53 | 
 54 | class ServiceUnavailableError(ogc.ServiceUnavailableError):
 55 |     """Exception raised when the service is not available.
 56 | 
 57 |     Parameters
 58 |     ----------
 59 |     url : str
 60 |         The server url
 61 |     """
 62 | 
 63 | 
 64 | class DataNotAvailableError(Exception):
 65 |     """Exception raised for requested data is not available.
 66 | 
 67 |     Parameters
 68 |     ----------
 69 |     data_name : str
 70 |         Data name requested.
 71 |     """
 72 | 
 73 |     def __init__(self, data_name: str) -> None:
 74 |         self.message = f"{data_name.capitalize()} is not available for the requested query."
 75 |         super().__init__(self.message)
 76 | 
 77 |     def __str__(self) -> str:
 78 |         """Return the error message."""
 79 |         return self.message
 80 | 
 81 | 
 82 | class InputValueError(Exception):
 83 |     """Exception raised for invalid input.
 84 | 
 85 |     Parameters
 86 |     ----------
 87 |     inp : str
 88 |         Name of the input parameter
 89 |     valid_inputs : tuple
 90 |         List of valid inputs
 91 |     given : str, optional
 92 |         The given input, defaults to None.
 93 |     """
 94 | 
 95 |     def __init__(
 96 |         self,
 97 |         inp: str,
 98 |         valid_inputs: Sequence[str | int] | Generator[str | int, None, None],
 99 |         given: str | int | None = None,
100 |     ) -> None:
101 |         if given is None:
102 |             self.message = f"Given {inp} is invalid. Valid options are:\n"
103 |         else:
104 |             self.message = f"Given {inp} ({given}) is invalid. Valid options are:\n"
105 |         self.message += "\n".join(str(i) for i in valid_inputs)
106 |         super().__init__(self.message)
107 | 
108 |     def __str__(self) -> str:
109 |         """Return the error message."""
110 |         return self.message
111 | 
112 | 
113 | class InputRangeError(Exception):
114 |     """Exception raised when a function argument is not in the valid range.
115 | 
116 |     Parameters
117 |     ----------
118 |     database : str
119 |         Data base name.
120 |     rng : tuple
121 |         Tuple of valid range.
122 |     """
123 | 
124 |     def __init__(self, database: str, rng: tuple[str, str]) -> None:
125 |         self.message = f"{database.capitalize()} is available from {rng[0]} to {rng[1]}."
126 |         super().__init__(self.message)
127 | 
128 |     def __str__(self) -> str:
129 |         """Return the error message."""
130 |         return self.message
131 | 
132 | 
133 | class InputTypeError(Exception):
134 |     """Exception raised when a function argument type is invalid.
135 | 
136 |     Parameters
137 |     ----------
138 |     arg : str
139 |         Name of the function argument
140 |     valid_type : str
141 |         The valid type of the argument
142 |     example : str, optional
143 |         An example of a valid form of the argument, defaults to None.
144 |     """
145 | 
146 |     def __init__(self, arg: str, valid_type: str, example: str | None = None) -> None:
147 |         self.message = f"The {arg} argument should be of type {valid_type}"
148 |         if example is not None:
149 |             self.message += f":\n{example}"
150 |         super().__init__(self.message)
151 | 
152 |     def __str__(self) -> str:
153 |         """Return the error message."""
154 |         return self.message
155 | 
156 | 
157 | class ZeroMatchedError(ValueError):
158 |     """Exception raised when a function argument is missing.
159 | 
160 |     Parameters
161 |     ----------
162 |     msg : str
163 |         The exception error message
164 |     """
165 | 
166 |     def __init__(self, msg: str | None = None) -> None:
167 |         if msg is None:
168 |             self.message = "Service returned no features."
169 |         else:
170 |             self.message = f"Service returned no features with the following error message:\n{msg}"
171 |         super().__init__(self.message)
172 | 
173 |     def __str__(self) -> str:
174 |         """Return the error message."""
175 |         return self.message
176 | 
177 | 
178 | class DependencyError(Exception):
179 |     """Exception raised when a dependencies are not met.
180 | 
181 |     Parameters
182 |     ----------
183 |     libraries : tuple
184 |         List of valid inputs
185 |     """
186 | 
187 |     def __init__(self, func: str, libraries: str | list[str] | Generator[str, None, None]) -> None:
188 |         libraries = [libraries] if isinstance(libraries, str) else libraries
189 |         self.message = f"The following dependencies are missing for running {func}:\n"
190 |         self.message += ", ".join(libraries)
191 |         super().__init__(self.message)
192 | 
193 |     def __str__(self) -> str:
194 |         """Return the error message."""
195 |         return self.message
196 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
  1 | [build-system]
  2 | build-backend = "hatchling.build"
  3 | 
  4 | requires = [
  5 |   "hatch-vcs",
  6 |   "hatchling",
  7 | ]
  8 | 
  9 | [project]
 10 | name = "pygeohydro"
 11 | description = "Access geospatial web services that offer hydrological data"
 12 | readme = "README.rst"
 13 | license = { text = "MIT" }
 14 | authors = [
 15 |   { name = "Taher Chegini", email = "cheginit@gmail.com" },
 16 | ]
 17 | requires-python = ">=3.9"
 18 | classifiers = [
 19 |   "Development Status :: 4 - Beta",
 20 |   "Intended Audience :: Science/Research",
 21 |   "License :: OSI Approved :: MIT License",
 22 |   "Operating System :: OS Independent",
 23 |   "Programming Language :: Python",
 24 |   "Programming Language :: Python :: 3 :: Only",
 25 |   "Programming Language :: Python :: 3.9",
 26 |   "Programming Language :: Python :: 3.10",
 27 |   "Programming Language :: Python :: 3.11",
 28 |   "Programming Language :: Python :: 3.12",
 29 |   "Programming Language :: Python :: 3.13",
 30 |   "Topic :: Scientific/Engineering",
 31 |   "Topic :: Scientific/Engineering :: GIS",
 32 |   "Topic :: Scientific/Engineering :: Hydrology",
 33 |   "Typing :: Typed",
 34 | ]
 35 | dynamic = [
 36 |   "version",
 37 | ]
 38 | dependencies = [
 39 |   "async-retriever>=0.19.3,<0.20",
 40 |   "cytoolz",
 41 |   "defusedxml",
 42 |   "geopandas>=1",
 43 |   "h5netcdf",
 44 |   "hydrosignatures>=0.19.3,<0.20",
 45 |   "numpy>=2",
 46 |   "pandas>=1",
 47 |   "pygeoogc>=0.19.4,<0.20",
 48 |   "pygeoutils>=0.19.5,<0.20",
 49 |   "pynhd>=0.19.3,<0.20",
 50 |   "pyproj>=3.0.1",
 51 |   "rioxarray>=0.15",
 52 |   "scipy",
 53 |   "shapely>=2",
 54 |   "xarray>=2024.7",
 55 | ]
 56 | optional-dependencies.plot = [
 57 |   "folium",
 58 |   "matplotlib>=3.5",
 59 | ]
 60 | optional-dependencies.stac = [
 61 |   "planetary-computer",
 62 |   "pystac-client",
 63 | ]
 64 | optional-dependencies.test = [
 65 |   "pytest-cov",
 66 |   "pytest-sugar",
 67 | ]
 68 | urls.Changelog = "https://docs.hyriver.io/changelogs/pygeohydro.html"
 69 | urls.CI = "https://github.com/hyriver/pygeohydro/actions"
 70 | urls.Homepage = "https://docs.hyriver.io/readme/pygeohydro.html"
 71 | urls.Issues = "https://github.com/hyriver/pygeohydro/issues"
 72 | 
 73 | [tool.hatch.version]
 74 | source = "vcs"
 75 | 
 76 | [tool.hatch.build.targets.wheel]
 77 | packages = [
 78 |   "src/pygeohydro",
 79 | ]
 80 | 
 81 | [tool.ruff]
 82 | target-version = "py39"
 83 | line-length = 100
 84 | 
 85 | exclude = [
 86 |   ".nox",
 87 |   "__pycache__",
 88 | ]
 89 | 
 90 | lint.select = [
 91 |   # flake8-bugbear
 92 |   "B",
 93 |   # flake8-comprehensions
 94 |   "C4",
 95 |   # pydocstyle
 96 |   "D",
 97 |   # Error
 98 |   "E",
 99 |   # pyflakes
100 |   "F",
101 |   # refurb
102 |   "FURB",
103 |   # isort
104 |   "I",
105 |   # flake8-gettext
106 |   "INT",
107 |   # flake8-implicit-str-concat
108 |   "ISC",
109 |   # pep8-naming
110 |   "N",
111 |   # compatibility with numpy 2.0
112 |   "NPY201",
113 |   # Perflint
114 |   "PERF",
115 |   # pygrep-hooks
116 |   "PGH",
117 |   # misc lints
118 |   "PIE",
119 |   # pylint
120 |   "PLC",
121 |   "PLE",
122 |   "PLR",
123 |   "PLW",
124 |   # flake8-pytest-style
125 |   "PT",
126 |   # flake8-use-pathlib
127 |   "PTH",
128 |   # flake8-pyi
129 |   "PYI",
130 |   # flake8-quotes
131 |   "Q",
132 |   # Ruff-specific rules
133 |   "RUF",
134 |   # bandit
135 |   "S",
136 |   # flake8-simplify
137 |   "SIM",
138 |   # flake8-debugger
139 |   "T10",
140 |   # flake8-print
141 |   "T20",
142 |   # type-checking imports
143 |   "TC",
144 |   # tidy imports
145 |   "TID",
146 |   # tryceratops
147 |   "TRY",
148 |   # pyupgrade
149 |   "UP",
150 |   # Warning
151 |   "W",
152 |   # flake8-2020
153 |   "YTT",
154 | ]
155 | 
156 | lint.ignore = [
157 |   "D103",
158 |   "D105",
159 |   "E501",
160 |   # conflict with ruff-formatter
161 |   "ISC001",
162 |   # specific type error ignored
163 |   "PGH003",
164 |   # topo level import
165 |   "PLC0415",
166 |   "PLR0913",
167 |   "PLR2004",
168 |   # url schema
169 |   "S310",
170 |   "S603",
171 |   # shell command
172 |   "S605",
173 |   "S607",
174 |   # custom exception
175 |   "TRY003",
176 | ]
177 | 
178 | lint.per-file-ignores."tests/*.py" = [
179 |   "D100",
180 |   "D101",
181 |   "D102",
182 |   "D103",
183 |   "D104",
184 |   "D105",
185 |   "D106",
186 |   "D107",
187 |   # specific type error ignored
188 |   "PGH003",
189 |   # use typing.ClassVar
190 |   "RUF012",
191 |   # use of "assert"
192 |   "S101",
193 | ]
194 | lint.extend-safe-fixes = [
195 |   # module level imports
196 |   "E402",
197 |   # break down assert
198 |   "PT018",
199 |   # Move imports
200 |   "TC",
201 |   # absolute imports
202 |   "TID252",
203 | ]
204 | lint.isort.known-first-party = [
205 |   "async_retriever",
206 |   "pygeoogc",
207 |   "pygeoutils",
208 |   "pynhd",
209 |   "py3dep",
210 |   "hydrosignatures",
211 |   "pygeohydro",
212 |   "pydaymet",
213 |   "pygridmet",
214 |   "pynldas2",
215 |   "tiny_retriever",
216 | ]
217 | lint.isort.required-imports = [ "from __future__ import annotations" ]
218 | 
219 | lint.pydocstyle.convention = "numpy"
220 | 
221 | [tool.codespell]
222 | skip = "__pycache__,_build,.mypy_cache,.git,./htmlcov,.nox,**/us_abbrs.py,cache"
223 | ignore-words-list = "gage,gages,paramss,trough"
224 | 
225 | [tool.pytest.ini_options]
226 | addopts = [
227 |   "--import-mode=importlib",
228 |   "--doctest-modules",
229 |   "-v",
230 |   "--durations=5",
231 | ]
232 | doctest_optionflags = 'NORMALIZE_WHITESPACE IGNORE_EXCEPTION_DETAIL NUMBER'
233 | filterwarnings = [
234 |   "ignore:.*distutils.*",
235 |   "ignore:.*--rsyncdir command line argument.*",
236 |   "ignore:.*numpy.ndarray size changed.*",
237 |   "ignore:.*'cgi' is deprecated.*",
238 |   "ignore:.*Ensure you extract a single element.*",
239 |   "ignore:.*Deprecated in Pydantic V2.0.*",
240 | ]
241 | testpaths = [
242 |   "tests",
243 | ]
244 | 
245 | [tool.coverage.report]
246 | exclude_lines = [
247 |   'raise ServiceUnavailableError',
248 |   "if TYPE_CHECKING:",
249 | ]
250 | ignore_errors = true
251 | omit = [
252 |   "**/__init__.py",
253 | ]
254 | 
255 | [tool.coverage.paths]
256 | source = [ "src", "*/site-packages" ]
257 | 
258 | [tool.coverage.run]
259 | branch = true
260 | parallel = true
261 | source_pkgs = [
262 |   "pygeohydro",
263 | ]
264 | 
265 | [tool.pyright]
266 | exclude = [
267 |   "**/__pycache__",
268 |   "**/__init__.py",
269 | ]
270 | include = [
271 |   "src/pygeohydro",
272 | ]
273 | reportMissingTypeStubs = false
274 | reportUnknownArgumentType = false
275 | reportUnknownLambdaType = false
276 | reportUnknownMemberType = false
277 | reportUnknownParameterType = false
278 | reportUnknownVariableType = false
279 | reportUnnecessaryIsInstance = false
280 | reportUntypedFunctionDecorator = false
281 | reportAttributeAccessIssue = false
282 | reportInvalidTypeForm = false
283 | typeCheckingMode = "strict"
284 | 


--------------------------------------------------------------------------------
/src/pygeohydro/plot.py:
--------------------------------------------------------------------------------
  1 | """Plot hydrological signatures.
  2 | 
  3 | Plots include daily, monthly and annual hydrograph as well as regime
  4 | curve (monthly mean) and flow duration curve.
  5 | """
  6 | 
  7 | from __future__ import annotations
  8 | 
  9 | import contextlib
 10 | from typing import TYPE_CHECKING, Any, TypeVar
 11 | 
 12 | import pygeoutils as geoutils
 13 | from pygeohydro import helpers
 14 | from pygeohydro.nwis import NWIS
 15 | 
 16 | if TYPE_CHECKING:
 17 |     import pandas as pd
 18 |     from folium import Map
 19 |     from matplotlib.colors import BoundaryNorm, ListedColormap
 20 |     from pyproj import CRS
 21 | 
 22 |     CRSType = int | str | CRS
 23 |     DF = TypeVar("DF", pd.DataFrame, pd.Series)
 24 | 
 25 | __all__ = ["cover_legends", "descriptor_legends", "interactive_map"]
 26 | 
 27 | 
 28 | def descriptor_legends() -> tuple[ListedColormap, BoundaryNorm, list[int]]:
 29 |     """Colormap (cmap) and their respective values (norm) for land cover data legends."""
 30 |     try:
 31 |         from matplotlib.colors import BoundaryNorm, ListedColormap
 32 |     except ImportError as e:
 33 |         raise ImportError(
 34 |             "matplotlib is required for descriptor legends. Please install it."
 35 |         ) from e
 36 |     nlcd_meta = helpers.nlcd_helper()
 37 |     bounds = [int(v) for v in nlcd_meta["descriptors"]]
 38 |     with contextlib.suppress(ValueError):
 39 |         bounds.remove(127)
 40 | 
 41 |     cmap = ListedColormap(list(nlcd_meta["colors"].values())[: len(bounds)])
 42 |     norm = BoundaryNorm(bounds, cmap.N)
 43 |     levels = [*bounds, 30]
 44 |     return cmap, norm, levels
 45 | 
 46 | 
 47 | def cover_legends() -> tuple[ListedColormap, BoundaryNorm, list[int]]:
 48 |     """Colormap (cmap) and their respective values (norm) for land cover data legends."""
 49 |     try:
 50 |         from matplotlib.colors import BoundaryNorm, ListedColormap
 51 |     except ImportError as e:
 52 |         raise ImportError(
 53 |             "matplotlib is required for descriptor legends. Please install it."
 54 |         ) from e
 55 |     nlcd_meta = helpers.nlcd_helper()
 56 |     bounds = list(nlcd_meta["colors"])
 57 |     with contextlib.suppress(ValueError):
 58 |         bounds.remove(127)
 59 | 
 60 |     cmap = ListedColormap(list(nlcd_meta["colors"].values()))
 61 |     norm = BoundaryNorm(bounds, cmap.N)
 62 |     levels = [*bounds, 100]
 63 |     return cmap, norm, levels
 64 | 
 65 | 
 66 | def interactive_map(
 67 |     bbox: tuple[float, float, float, float],
 68 |     crs: CRSType = 4326,
 69 |     nwis_kwds: dict[str, Any] | None = None,
 70 | ) -> Map:
 71 |     """Generate an interactive map including all USGS stations within a bounding box.
 72 | 
 73 |     Parameters
 74 |     ----------
 75 |     bbox : tuple
 76 |         List of corners in this order (west, south, east, north)
 77 |     crs : str, int, or pyproj.CRS, optional
 78 |         CRS of the input bounding box, defaults to EPSG:4326.
 79 |     nwis_kwds : dict, optional
 80 |         Additional keywords to include in the NWIS request as a dictionary like so:
 81 |         ``{"hasDataTypeCd": "dv,iv", "outputDataTypeCd": "dv,iv", "parameterCd": "06000"}``.
 82 |         Default to None.
 83 | 
 84 |     Returns
 85 |     -------
 86 |     folium.Map
 87 |         Interactive map within a bounding box.
 88 | 
 89 |     Examples
 90 |     --------
 91 |     >>> import pygeohydro as gh
 92 |     >>> nwis_kwds = {"hasDataTypeCd": "dv,iv", "outputDataTypeCd": "dv,iv"}
 93 |     >>> m = gh.interactive_map((-69.77, 45.07, -69.31, 45.45), nwis_kwds=nwis_kwds)
 94 |     >>> n_stations = len(m.to_dict()["children"]) - 1
 95 |     >>> n_stations
 96 |     10
 97 |     """
 98 |     try:
 99 |         import folium
100 |     except ImportError as e:
101 |         raise ImportError("folium is required for interactive map. Please install it.") from e
102 |     nwis = NWIS()
103 |     bbox = geoutils.geometry_reproject(bbox, crs, 4326)
104 |     query = {"bBox": ",".join(f"{b:.06f}" for b in bbox)}
105 |     if isinstance(nwis_kwds, dict):
106 |         query.update(nwis_kwds)
107 | 
108 |     sites = nwis.get_info(query, expanded=True, nhd_info=True)
109 | 
110 |     sites["coords"] = list(sites[["dec_long_va", "dec_lat_va"]].itertuples(name=None, index=False))
111 |     sites["altitude"] = (
112 |         sites["alt_va"].astype("str") + " ft above " + sites["alt_datum_cd"].astype("str")
113 |     )
114 | 
115 |     sites["drain_area_va"] = sites["drain_area_va"].astype("str") + " sqmi"
116 |     sites["contrib_drain_area_va"] = sites["contrib_drain_area_va"].astype("str") + " sqmi"
117 |     sites["nhd_areasqkm"] = sites["nhd_areasqkm"].astype("str") + " sqkm"
118 |     for c in ("drain_area_va", "contrib_drain_area_va", "nhd_areasqkm"):
119 |         sites.loc[sites[c].str.contains("nan"), c] = "N/A"
120 | 
121 |     cols_old = [
122 |         "site_no",
123 |         "station_nm",
124 |         "coords",
125 |         "altitude",
126 |         "huc_cd",
127 |         "drain_area_va",
128 |         "contrib_drain_area_va",
129 |         "nhd_areasqkm",
130 |         "hcdn_2009",
131 |     ]
132 | 
133 |     cols_new = [
134 |         "Site No.",
135 |         "Station Name",
136 |         "Coordinate",
137 |         "Altitude",
138 |         "HUC8",
139 |         "Drainage Area (NWIS)",
140 |         "Contributing Drainage Area (NWIS)",
141 |         "Drainage Area (GagesII)",
142 |         "HCDN 2009",
143 |     ]
144 | 
145 |     sites = (
146 |         sites.groupby("site_no")[cols_old[1:]]
147 |         .agg(set)
148 |         .reset_index()
149 |         .rename(columns=dict(zip(cols_old, cols_new)))
150 |     )
151 | 
152 |     msgs = []
153 |     base_url = "https://waterdata.usgs.gov/nwis/inventory?agency_code=USGS&site_no="
154 |     for row in sites.itertuples(index=False):
155 |         site_no = row[sites.columns.get_loc(cols_new[0])]
156 |         msg = f"<strong>{cols_new[0]}</strong>: {site_no}<br>"
157 |         for col in cols_new[1:]:
158 |             value = ", ".join(str(s) for s in row[sites.columns.get_loc(col)])
159 |             msg += f"<strong>{col}</strong>: {value}<br>"
160 |         msg += f'<a href="{base_url}{site_no}" target="_blank">More on USGS Website</a>'
161 |         msgs.append(msg[:-4])
162 | 
163 |     sites["msg"] = msgs
164 | 
165 |     west, south, east, north = bbox
166 |     lon = (west + east) * 0.5
167 |     lat = (south + north) * 0.5
168 | 
169 |     imap = folium.Map(location=(lat, lon), zoom_start=10)
170 | 
171 |     for coords, msg in sites[["Coordinate", "msg"]].itertuples(name=None, index=False):
172 |         folium.Marker(
173 |             location=next(iter(coords))[::-1],
174 |             popup=folium.Popup(msg, max_width=250),  # pyright: ignore[reportGeneralTypeIssues]
175 |             icon=folium.Icon(),
176 |         ).add_to(imap)
177 | 
178 |     return imap
179 | 


--------------------------------------------------------------------------------
/src/pygeohydro/nfhl.py:
--------------------------------------------------------------------------------
  1 | """Accessing National Flood Hazard Layers (NFHL) through web services."""
  2 | 
  3 | from __future__ import annotations
  4 | 
  5 | from typing import TYPE_CHECKING
  6 | 
  7 | from pygeohydro.exceptions import InputValueError
  8 | from pygeoogc import ServiceURL
  9 | from pynhd import AGRBase
 10 | 
 11 | if TYPE_CHECKING:
 12 |     from pyproj import CRS
 13 | 
 14 |     CRSType = int | str | CRS
 15 | 
 16 | __all__ = ["NFHL"]
 17 | 
 18 | 
 19 | class NFHL(AGRBase):
 20 |     """Access National Flood Hazard Layers (NFHL).
 21 | 
 22 |     Parameters
 23 |     ----------
 24 |     service : str
 25 |         The service type. Valid services are:
 26 | 
 27 |         - ``NFHL``: Effective National Flood Hazard Layers
 28 |         - ``Prelim_CSLF``: Preliminary Changes Since Last Firm (CSLF)
 29 |         - ``Draft_CSLF``: Draft Changes Since Last Firm (CSLF)
 30 |         - ``Prelim_NFHL``: Preliminary National Flood Hazard Layers
 31 |         - ``Pending_NFHL``: Pending National Flood Hazard Layers
 32 |         - ``Draft_NFHL``: Draft National Flood Hazard Layers
 33 | 
 34 |     layer : str
 35 |         A valid service layer. Valid layers are service specific:
 36 | 
 37 |         - ``NFHL``: ``nfhl availability``, ``firm panels``, ``lomrs``, ``lomas``,
 38 |             ``political jurisdictions``, ``profile baselines``, ``water lines``,
 39 |             ``cross-sections``, ``base flood elevations``, ``levees``,
 40 |             ``seclusion boundaries``, ``coastal transects``, ``transect baselines``,
 41 |             ``general structures``, ``river mile markers``, ``water areas``, ``plss``,
 42 |             ``limit of moderate wave action``, ``flood hazard boundaries``,
 43 |             ``flood hazard zones``, ``primary frontal dunes``, ``base index``,
 44 |             ``topographic low confidence areas``, ``datum conversion points``,
 45 |             ``coastal gages``, ``gages``, ``nodes``, ``high water marks``,
 46 |             ``station start points``, ``hydrologic reaches``, ``alluvial fans``,
 47 |             and ``subbasins``
 48 |         - ``Prelim_CSLF``: ``preliminary``, ``coastal high hazard area change``,
 49 |             ``floodway change``, ``special flood hazard area change``,
 50 |             and ``non-special flood hazard area change``
 51 |         - ``Draft_CSLF``: ``draft``, ``coastal high hazard area change``,
 52 |             ``floodway change``, ``special flood hazard area change``, and
 53 |             ``non-special flood hazard area change``
 54 |         - ``Prelim_NFHL``: ``preliminary data availability``,
 55 |             ``preliminary firm panel index``, ``preliminary plss``,
 56 |             ``preliminary topographic low confidence areas``,
 57 |             ``preliminary river mile markers``, ``preliminary datum conversion points``,
 58 |             ``preliminary coastal gages``, ``preliminary gages``, ``preliminary nodes``,
 59 |             ``preliminary high water marks``, ``preliminary station start points``,
 60 |             ``preliminary cross-sections``, ``preliminary coastal transects``,
 61 |             ``preliminary base flood elevations``, ``preliminary profile baselines``,
 62 |             ``preliminary transect baselines``, ``preliminary limit of moderate wave action``,
 63 |             ``preliminary water lines``, ``preliminary political jurisdictions``,
 64 |             ``preliminary levees``, ``preliminary general structures``,
 65 |             ``preliminary primary frontal dunes``, ``preliminary hydrologic reaches``,
 66 |             ``preliminary flood hazard boundaries``, ``preliminary flood hazard zones``,
 67 |             ``preliminary submittal information``, ``preliminary alluvial fans``,
 68 |             ``preliminary subbasins``, and ``preliminary water areas``
 69 |         - ``Pending_NFHL``: ``pending submittal information``, ``pending water areas``,
 70 |             ``pending firm panel index``, ``pending data availability``,
 71 |             ``pending firm panels``, ``pending political jurisdictions``,
 72 |             ``pending profile baselines``, ``pending water lines``,
 73 |             ``pending cross-sections``, ``pending base flood elevations``,
 74 |             ``pending levees``, ``pending seclusion boundaries``,
 75 |             ``pending coastal transects``, ``pending transect baselines``,
 76 |             ``pending general structures``, ``pending river mile markers``,
 77 |             ``pending plss``, ``pending limit of moderate wave action``,
 78 |             ``pending flood hazard boundaries``, ``pending flood hazard zones``,
 79 |             ``pending primary frontal dunes``, ``pending topographic low confidence areas``,
 80 |             ``pending datum conversion points``, ``pending coastal gages``,
 81 |             ``pending gages``, ``pending nodes``, ``pending high water marks``,
 82 |             ``pending station start points``, ``pending hydrologic reaches``,
 83 |             ``pending alluvial fans``, and ``pending subbasins``
 84 |         - ``Draft_NFHL``: ``draft data availability``, ``draft firm panels``,
 85 |             ``draft political jurisdictions``, ``draft profile baselines``,
 86 |             ``draft water lines``, ``draft cross-sections``, ``draft base flood elevations``,
 87 |             ``draft levees``, ``draft submittal info``, ``draft coastal transects``,
 88 |             ``draft transect baselines``, ``draft general structures``,
 89 |             ``draft limit of moderate wave action``, ``draft flood hazard boundaries``,
 90 |             and ``draft flood hazard zones``
 91 | 
 92 |     outfields : str or list, optional
 93 |         Target field name(s), default to "*" i.e., all the fields.
 94 |     crs : str, int, or pyproj.CRS, optional
 95 |         Target spatial reference of output, default to ``EPSG:4326``.
 96 | 
 97 |     Examples
 98 |     --------
 99 |     >>> from pygeohydro import NFHL
100 |     >>> nfhl = NFHL("NFHL", "cross-sections")
101 |     >>> gdf_xs = nfhl.bygeom((-73.42, 43.28, -72.9, 43.52), geo_crs=4269)
102 | 
103 |     References
104 |     ----------
105 |     * `National Flood Hazard Layer <https://hazards.fema.gov/femaportal/wps/portal/NFHLWMS>`__
106 | 
107 |     Methods
108 |     -------
109 |     bygeom(geom, geo_crs=4326, sql_clause="", distance=None, return_m=False, return_geom=True)
110 |         Get features within a geometry that can be combined with a SQL where clause.
111 |     byids(field, fids, return_m=False, return_geom=True)
112 |         Get features by object IDs.
113 |     bysql(sql_clause, return_m=False, return_geom=True)
114 |         Get features using a valid SQL 92 WHERE clause.
115 |     """
116 | 
117 |     def __init__(
118 |         self, service: str, layer: str, outfields: str | list[str] = "*", crs: CRSType = 4326
119 |     ):
120 |         # service URLs
121 |         self.__valid_services = {
122 |             "NFHL": ServiceURL().restful.fema_nfhl,
123 |             "Prelim_CSLF": ServiceURL().restful.fema_prelim_cslf,
124 |             "Draft_CSLF": ServiceURL().restful.fema_draft_cslf,
125 |             "Prelim_NFHL": ServiceURL().restful.fema_prelim_nfhl,
126 |             "Pending_NFHL": ServiceURL().restful.fema_pending_nfhl,
127 |             "Draft_NFHL": ServiceURL().restful.fema_draft_nfhl,
128 |         }
129 |         url = self.valid_services.get(service)
130 |         if url is None:
131 |             raise InputValueError("service", list(self.valid_services))
132 | 
133 |         super().__init__(url, layer, outfields, crs)
134 | 
135 |     @property
136 |     def valid_services(self) -> dict[str, str]:
137 |         """A dictionary of valid services and their URLs."""
138 |         return self.__valid_services
139 | 


--------------------------------------------------------------------------------
/src/pygeohydro/helpers.py:
--------------------------------------------------------------------------------
  1 | """Some helper function for PyGeoHydro."""
  2 | 
  3 | from __future__ import annotations
  4 | 
  5 | import io
  6 | import json
  7 | from dataclasses import dataclass
  8 | from datetime import datetime
  9 | from typing import TYPE_CHECKING, Any, Union, cast
 10 | 
 11 | import cytoolz.curried as tlz
 12 | import geopandas as gpd
 13 | import numpy as np
 14 | import pandas as pd
 15 | from defusedxml import ElementTree
 16 | 
 17 | import async_retriever as ar
 18 | from pygeohydro import us_abbrs
 19 | from pygeohydro.exceptions import InputRangeError, InputTypeError, InputValueError
 20 | from pygeoogc import ServiceURL
 21 | 
 22 | if TYPE_CHECKING:
 23 |     from pyproj import CRS
 24 |     from shapely import MultiPolygon, Polygon
 25 | 
 26 |     GTYPE = Union[Polygon, MultiPolygon, tuple[float, float, float, float]]
 27 |     CRSType = int | str | CRS
 28 | __all__ = ["get_us_states", "nlcd_helper", "nwis_errors", "states_lookup_table"]
 29 | 
 30 | 
 31 | def nlcd_helper() -> dict[str, Any]:
 32 |     """Get legends and properties of the NLCD cover dataset.
 33 | 
 34 |     Notes
 35 |     -----
 36 |     The following references have been used:
 37 |         - https://github.com/jzmiller1/nlcd
 38 |         - https://www.mrlc.gov/data-services-page
 39 |         - https://www.mrlc.gov/data/legends/national-land-cover-database-2016-nlcd2016-legend
 40 |         - https://doi.org/10.1111/jfr3.12347
 41 | 
 42 |     Returns
 43 |     -------
 44 |     dict
 45 |         Years when data is available and cover classes and categories, and roughness estimations.
 46 |     """
 47 |     base_url = "https://www.mrlc.gov/downloads/sciweb1/shared/mrlc/metadata"
 48 |     base_path = "eainfo/detailed/attr/attrdomv/edom"
 49 | 
 50 |     def _get_xml(
 51 |         layer: str,
 52 |     ) -> tuple[Any, Any, Any]:
 53 |         et = ElementTree.fromstring(ar.retrieve_text([f"{base_url}/{layer}.xml"], ssl=False)[0])
 54 |         return (
 55 |             et,
 56 |             et.findall(f"{base_path}/edomv"),
 57 |             et.findall(f"{base_path}/edomvd"),
 58 |         )
 59 | 
 60 |     root, edomv, edomvd = _get_xml("NLCD_2019_Land_Cover_Science_Product_L48_20210604")
 61 |     cover_classes = {}
 62 |     for t, v in zip(edomv, edomvd):
 63 |         cover_classes[t.text] = v.text
 64 | 
 65 |     clist = [i.split() for i in root.find("eainfo/overview/eadetcit").text.split("\n")[2:]]
 66 |     colors = {
 67 |         int(c): (float(r) / 255.0, float(g) / 255.0, float(b) / 255.0, 1.0) for c, r, g, b in clist
 68 |     }
 69 |     colors[0] = (colors[0][0], colors[0][1], colors[0][2], 0.0)
 70 | 
 71 |     _, edomv, edomvd = _get_xml("nlcd_2019_impervious_descriptor_l48_20210604")
 72 |     descriptors = {}
 73 |     for t, v in zip(edomv, edomvd):
 74 |         tag = t.text.split(" - ")
 75 |         descriptors[tag[0]] = v.text if tag[-1].isnumeric() else f"{tag[-1]}: {v.text}"
 76 | 
 77 |     cyear = [2021, 2019, 2016, 2013, 2011, 2008, 2006, 2004, 2001]
 78 |     nlcd_meta = {
 79 |         "cover_years": cyear,
 80 |         "impervious_years": cyear,
 81 |         "descriptor_years": cyear,
 82 |         "canopy_years": list(range(2011, 2022)),
 83 |         "classes": cover_classes,
 84 |         "categories": {
 85 |             "Background": ("127",),
 86 |             "Water": ("11", "12"),
 87 |             "Developed": ("21", "22", "23", "24"),
 88 |             "Barren": ("31",),
 89 |             "Forest": ("41", "42", "43", "45", "46"),
 90 |             "Shrubland": ("51", "52"),
 91 |             "Herbaceous": ("71", "72", "73", "74"),
 92 |             "Planted/Cultivated": ("81", "82"),
 93 |             "Wetlands": ("90", "95"),
 94 |         },
 95 |         "descriptors": descriptors,
 96 |         "roughness": {
 97 |             "11": 0.001,
 98 |             "12": 0.022,
 99 |             "21": 0.0404,
100 |             "22": 0.0678,
101 |             "23": 0.0678,
102 |             "24": 0.0404,
103 |             "31": 0.0113,
104 |             "41": 0.36,
105 |             "42": 0.32,
106 |             "43": 0.4,
107 |             "45": 0.4,
108 |             "46": 0.24,
109 |             "51": 0.24,
110 |             "52": 0.4,
111 |             "71": 0.368,
112 |             "81": 0.325,
113 |             "82": 0.037,
114 |             "90": 0.086,
115 |             "95": 0.1825,
116 |         },
117 |         "colors": colors,
118 |     }
119 | 
120 |     return nlcd_meta
121 | 
122 | 
123 | def nwis_errors() -> pd.DataFrame:
124 |     """Get error code lookup table for USGS sites that have daily values."""
125 |     return pd.read_html(
126 |         "https://waterservices.usgs.gov/docs/dv-service/daily-values-service-details/#error-codes"
127 |     )[0]
128 | 
129 | 
130 | def get_ssebopeta_urls(dates: tuple[str, str] | int | list[int]) -> list[tuple[pd.Timestamp, str]]:
131 |     """Get list of URLs for SSEBop dataset within a period or years."""
132 |     if not isinstance(dates, (tuple, list, int)):
133 |         raise InputTypeError("dates", "tuple, list, or int", "(start, end), year, or [years, ...]")
134 | 
135 |     year = datetime.now().year - 1
136 |     if isinstance(dates, tuple):
137 |         if len(dates) != 2:
138 |             raise InputTypeError("dates", "(start, end)")
139 |         start = pd.to_datetime(dates[0])
140 |         end = pd.to_datetime(dates[1])
141 |         if start < pd.to_datetime("2000-01-01") or end > pd.to_datetime(f"{year}-12-31"):
142 |             raise InputRangeError("SSEBop", ("2000", str(year)))
143 |         date_range = pd.date_range(start, end)
144 |     else:
145 |         years = dates if isinstance(dates, list) else [dates]
146 |         seebop_yrs = np.arange(2000, year + 1)
147 | 
148 |         if any(y not in seebop_yrs for y in years):
149 |             raise InputRangeError("SSEBop", ("2000", str(year)))
150 | 
151 |         d_list = [pd.date_range(f"{y}0101", f"{y}1231") for y in years]
152 |         date_range = d_list.pop(0)
153 |         while d_list:
154 |             date = d_list.pop(0)
155 |             date_range = date_range.union(date)  # pyright: ignore[reportOptionalMemberAccess]
156 | 
157 |     date_range = cast("pd.DatetimeIndex", date_range)
158 |     base_url = ServiceURL().http.ssebopeta
159 | 
160 |     f_list = [
161 |         (d, f"{base_url}/det{d.strftime('%Y%j')}.modisSSEBopETactual.zip") for d in date_range
162 |     ]
163 | 
164 |     return f_list
165 | 
166 | 
167 | @dataclass(frozen=True)
168 | class Stats:
169 |     """Statistics for NLCD."""
170 | 
171 |     classes: dict[str, float]
172 |     categories: dict[str, float]
173 | 
174 | 
175 | def _get_state_codes(subset_key: str | list[str]) -> list[str]:
176 |     """Get state codes for a subset of the US."""
177 |     keys = [subset_key] if isinstance(subset_key, str) else subset_key
178 |     state_cd = []
179 | 
180 |     state_keys = [k.upper() for k in keys if len(k) == 2]
181 |     states = us_abbrs.STATES
182 |     if any(k not in states for k in state_keys):
183 |         raise InputValueError("subset_key", states)
184 |     if state_keys:
185 |         state_cd += state_keys
186 | 
187 |     other_keys = [k for k in keys if len(k) > 2]
188 |     if "conus" in other_keys:
189 |         other_keys.remove("conus")
190 |         other_keys.append("contiguous")
191 |     valid_keys = ["contiguous", "continental", "territories", "commonwealths"]
192 |     if any(k not in valid_keys for k in other_keys):
193 |         raise InputValueError("subset_key", [*valid_keys, "conus"])
194 |     if other_keys:
195 |         state_cd += tlz.concat(getattr(us_abbrs, k.upper()) for k in other_keys)
196 |     return state_cd
197 | 
198 | 
199 | def get_us_states(subset_key: str | list[str] | None = None) -> gpd.GeoDataFrame:
200 |     """Get US states as a GeoDataFrame from Census' TIGERLine 2023 database.
201 | 
202 |     Parameters
203 |     ----------
204 |     subset_key : str or list of str, optional
205 |         Key to subset the geometries instead of returning all states, by default
206 |         all states are returned. Valid keys are:
207 | 
208 |         - ``contiguous`` or ``conus``
209 |         - ``continental``
210 |         - ``commonwealths``
211 |         - ``territories``
212 |         - Two letter state codes, e.g., ``["TX", "CA", "FL", ...]``
213 | 
214 |     Returns
215 |     -------
216 |     geopandas.GeoDataFrame
217 |         GeoDataFrame of requested US states.
218 |     """
219 |     url = "https://www2.census.gov/geo/tiger/TIGER2024/STATE/tl_2024_us_state.zip"
220 |     headers = {
221 |         "headers": {
222 |             "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
223 |             "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
224 |             "Accept-Language": "en-US,en;q=0.5",
225 |             "Accept-Encoding": "gzip, deflate, br",
226 |             "DNT": "1",
227 |             "Connection": "keep-alive",
228 |             "Upgrade-Insecure-Requests": "1",
229 |             "Sec-Fetch-Dest": "document",
230 |             "Sec-Fetch-Mode": "navigate",
231 |             "Sec-Fetch-Site": "none",
232 |             "Sec-Fetch-User": "?1",
233 |             "Cache-Control": "max-age=0",
234 |         }
235 |     }
236 |     us_states = gpd.read_file(
237 |         io.BytesIO(ar.retrieve_binary([url], request_kwds=[headers], ssl=False)[0])
238 |     )
239 |     if subset_key is not None:
240 |         state_cd = _get_state_codes(subset_key)
241 |         return us_states[us_states.STUSPS.isin(state_cd)].copy()  # pyright: ignore[reportReturnType]
242 |     return us_states  # pyright: ignore[reportReturnType]
243 | 
244 | 
245 | @dataclass(frozen=True)
246 | class StateCounties:
247 |     """State and county codes and names."""
248 | 
249 |     name: str
250 |     code: str | None
251 |     counties: pd.Series
252 | 
253 | 
254 | def states_lookup_table() -> dict[str, StateCounties]:
255 |     """Get codes and names of US states and their counties.
256 | 
257 |     Notes
258 |     -----
259 |     This function is based on a file prepared by developers of
260 |     an R package called `dataRetrieval <https://github.com/USGS-R/dataRetrieval>`__.
261 | 
262 |     Returns
263 |     -------
264 |     pandas.DataFrame
265 |         State codes and name as a dataframe.
266 |     """
267 |     urls = [
268 |         "https://www2.census.gov/geo/docs/reference/state.txt",
269 |         "/".join(
270 |             [
271 |                 "https://raw.githubusercontent.com/USGS-R/dataRetrieval",
272 |                 "main/inst/extdata/state_county.json",
273 |             ]
274 |         ),
275 |     ]
276 |     resp = ar.retrieve_text(urls, ssl=False)
277 | 
278 |     codes = pd.read_csv(io.StringIO(resp[0]), sep="|")
279 |     codes["STATE"] = codes["STATE"].astype(str).str.zfill(2)
280 |     codes = codes.set_index("STATE")
281 | 
282 |     def _county2series(cd: dict[str, dict[str, str]]) -> pd.Series:
283 |         return pd.DataFrame.from_dict(cd, orient="index")["name"]  # pyright: ignore[reportReturnType]
284 | 
285 |     def _state_cd(state: str) -> str | None:
286 |         try:
287 |             return codes.loc[state, "STUSAB"]
288 |         except KeyError:
289 |             return None
290 | 
291 |     states = {
292 |         c: StateCounties(s["name"], _state_cd(c), _county2series(s["county_cd"]))
293 |         for c, s in json.loads(resp[1])["US"]["state_cd"].items()
294 |     }
295 |     return states
296 | 


--------------------------------------------------------------------------------
/src/pygeohydro/waterdata.py:
--------------------------------------------------------------------------------
  1 | """Accessing WaterData related APIs."""
  2 | 
  3 | from __future__ import annotations
  4 | 
  5 | import io
  6 | from typing import TYPE_CHECKING, Any, Literal, cast, overload
  7 | 
  8 | import cytoolz.curried as tlz
  9 | import pandas as pd
 10 | 
 11 | import async_retriever as ar
 12 | import pygeoutils as geoutils
 13 | from pygeohydro.exceptions import InputTypeError, InputValueError, ServiceError
 14 | 
 15 | if TYPE_CHECKING:
 16 |     import geopandas as gpd
 17 | 
 18 | __all__ = ["SensorThings", "WaterQuality"]
 19 | 
 20 | 
 21 | class WaterQuality:
 22 |     """Water Quality Web Service https://www.waterqualitydata.us.
 23 | 
 24 |     Notes
 25 |     -----
 26 |     This class has a number of convenience methods to retrieve data from the
 27 |     Water Quality Data. Since there are many parameter combinations that can be
 28 |     used to retrieve data, a general method is also provided to retrieve data from
 29 |     any of the valid endpoints. You can use ``get_json`` to retrieve stations info
 30 |     as a ``geopandas.GeoDataFrame`` or ``get_csv`` to retrieve stations data as a
 31 |     ``pandas.DataFrame``. You can construct a dictionary of the parameters and pass
 32 |     it to one of these functions. For more information on the parameters, please
 33 |     consult the
 34 |     `Water Quality Data documentation <https://www.waterqualitydata.us/webservices_documentation>`__.
 35 |     """
 36 | 
 37 |     def __init__(self) -> None:
 38 |         self.wq_url = "https://www.waterqualitydata.us"
 39 |         self.keywords = self.get_param_table()
 40 | 
 41 |     def get_param_table(self) -> pd.Series:
 42 |         """Get the parameter table from the USGS Water Quality Web Service."""
 43 |         params = pd.read_html(f"{self.wq_url}/webservices_documentation/")
 44 |         params = params[0].iloc[:29].drop(columns="Discussion")
 45 |         return params.groupby("REST parameter")["Argument"].apply(",".join)
 46 | 
 47 |     def lookup_domain_values(self, endpoint: str) -> list[str]:
 48 |         """Get the domain values for the target endpoint."""
 49 |         valid_endpoints = [
 50 |             "statecode",
 51 |             "countycode",
 52 |             "sitetype",
 53 |             "organization",
 54 |             "samplemedia",
 55 |             "characteristictype",
 56 |             "characteristicname",
 57 |             "providers",
 58 |         ]
 59 |         if endpoint.lower() not in valid_endpoints:
 60 |             raise InputValueError("endpoint", valid_endpoints)
 61 |         resp = ar.retrieve_json([f"{self.wq_url}/Codes/{endpoint}?mimeType=json"])
 62 |         resp = cast("list[dict[str, Any]]", resp)
 63 |         return [r["value"] for r in resp[0]["codes"]]
 64 | 
 65 |     def _base_url(self, endpoint: str) -> str:
 66 |         """Get the base URL for the target endpoint."""
 67 |         valid_endpoints = [
 68 |             "Station",
 69 |             "Result",
 70 |             "Activity",
 71 |             "ActivityMetric",
 72 |             "ProjectMonitoringLocationWeighting",
 73 |             "ResultDetectionQuantitationLimit",
 74 |             "BiologicalMetric",
 75 |         ]
 76 |         if endpoint.lower() not in map(str.lower, valid_endpoints):
 77 |             raise InputValueError("endpoint", valid_endpoints)
 78 |         return f"{self.wq_url}/data/{endpoint}/search"
 79 | 
 80 |     def get_json(
 81 |         self,
 82 |         endpoint: str,
 83 |         kwds: dict[str, str],
 84 |         request_method: Literal["get", "GET", "post", "POST"] = "GET",
 85 |     ) -> gpd.GeoDataFrame:
 86 |         """Get the JSON response from the Water Quality Web Service.
 87 | 
 88 |         Parameters
 89 |         ----------
 90 |         endpoint : str
 91 |             Endpoint of the Water Quality Web Service.
 92 |         kwds : dict
 93 |             Water Quality Web Service keyword arguments.
 94 |         request_method : str, optional
 95 |             HTTP request method. Default to GET.
 96 | 
 97 |         Returns
 98 |         -------
 99 |         geopandas.GeoDataFrame
100 |             The web service response as a GeoDataFrame.
101 |         """
102 |         req_kwds = [{"params": kwds}] if request_method == "GET" else [{"data": kwds}]
103 |         resp = ar.retrieve_json([self._base_url(endpoint)], req_kwds, request_method=request_method)
104 |         resp = cast("list[dict[str, Any]]", resp)
105 |         return geoutils.json2geodf(resp)
106 | 
107 |     def _check_kwds(self, wq_kwds: dict[str, str]) -> None:
108 |         """Check the validity of the Water Quality Web Service keyword arguments."""
109 |         invalids = [k for k in wq_kwds if k not in self.keywords.index]
110 |         if invalids:
111 |             raise InputValueError("wq_kwds", invalids)
112 | 
113 |     def station_bybbox(
114 |         self, bbox: tuple[float, float, float, float], wq_kwds: dict[str, str] | None
115 |     ) -> gpd.GeoDataFrame:
116 |         """Retrieve station info within bounding box.
117 | 
118 |         Parameters
119 |         ----------
120 |         bbox : tuple of float
121 |             Bounding box coordinates (west, south, east, north) in epsg:4326.
122 |         wq_kwds : dict, optional
123 |             Water Quality Web Service keyword arguments. Default to None.
124 | 
125 |         Returns
126 |         -------
127 |         geopandas.GeoDataFrame
128 |             GeoDataFrame of station info within the bounding box.
129 |         """
130 |         kwds = {
131 |             "mimeType": "geojson",
132 |             "bBox": ",".join(f"{b:.06f}" for b in bbox),
133 |             "zip": "no",
134 |             "sorted": "no",
135 |         }
136 |         if wq_kwds is not None:
137 |             self._check_kwds(wq_kwds)
138 |             kwds.update(wq_kwds)
139 | 
140 |         return self.get_json("station", kwds)
141 | 
142 |     def station_bydistance(
143 |         self, lon: float, lat: float, radius: float, wq_kwds: dict[str, str] | None
144 |     ) -> gpd.GeoDataFrame:
145 |         """Retrieve station within a radius (decimal miles) of a point.
146 | 
147 |         Parameters
148 |         ----------
149 |         lon : float
150 |             Longitude of point.
151 |         lat : float
152 |             Latitude of point.
153 |         radius : float
154 |             Radius (decimal miles) of search.
155 |         wq_kwds : dict, optional
156 |             Water Quality Web Service keyword arguments. Default to None.
157 | 
158 |         Returns
159 |         -------
160 |         geopandas.GeoDataFrame
161 |             GeoDataFrame of station info within the radius of the point.
162 |         """
163 |         kwds = {
164 |             "mimeType": "geojson",
165 |             "long": f"{lon:.06f}",
166 |             "lat": f"{lat:.06f}",
167 |             "within": f"{radius:.06f}",
168 |             "zip": "no",
169 |             "sorted": "no",
170 |         }
171 |         if wq_kwds is not None:
172 |             self._check_kwds(wq_kwds)
173 |             kwds.update(wq_kwds)
174 | 
175 |         return self.get_json("station", kwds)
176 | 
177 |     def get_csv(
178 |         self,
179 |         endpoint: str,
180 |         kwds: dict[str, str],
181 |         request_method: Literal["get", "GET", "post", "POST"] = "GET",
182 |     ) -> pd.DataFrame:
183 |         """Get the CSV response from the Water Quality Web Service.
184 | 
185 |         Parameters
186 |         ----------
187 |         endpoint : str
188 |             Endpoint of the Water Quality Web Service.
189 |         kwds : dict
190 |             Water Quality Web Service keyword arguments.
191 |         request_method : str, optional
192 |             HTTP request method. Default to GET.
193 | 
194 |         Returns
195 |         -------
196 |         pandas.DataFrame
197 |             The web service response as a DataFrame.
198 |         """
199 |         req_kwds = [{"params": kwds}] if request_method == "GET" else [{"data": kwds}]
200 |         r = ar.retrieve_binary([self._base_url(endpoint)], req_kwds, request_method=request_method)
201 |         return pd.read_csv(io.BytesIO(r[0]), compression="zip")
202 | 
203 |     def data_bystation(
204 |         self, station_ids: str | list[str], wq_kwds: dict[str, str] | None
205 |     ) -> pd.DataFrame:
206 |         """Retrieve data for a single station.
207 | 
208 |         Parameters
209 |         ----------
210 |         station_ids : str or list of str
211 |             Station ID(s). The IDs should have the format "Agency code-Station ID".
212 |         wq_kwds : dict, optional
213 |             Water Quality Web Service keyword arguments. Default to None.
214 | 
215 |         Returns
216 |         -------
217 |         pandas.DataFrame
218 |             DataFrame of data for the stations.
219 |         """
220 |         siteid = set(station_ids) if isinstance(station_ids, list) else {station_ids}
221 |         if any("-" not in s for s in siteid):
222 |             valid_type = "list of hyphenated IDs like so 'agency code-station ID'"
223 |             raise InputTypeError("station_ids", valid_type)
224 |         kwds = {
225 |             "mimeType": "csv",
226 |             "siteid": ";".join(siteid),
227 |             "zip": "yes",
228 |             "sorted": "no",
229 |         }
230 |         if wq_kwds is not None:
231 |             self._check_kwds(wq_kwds)
232 |             kwds.update(wq_kwds)
233 | 
234 |         if len(siteid) > 10:
235 |             return self.get_csv("result", kwds, request_method="POST")
236 |         return self.get_csv("result", kwds)
237 | 
238 | 
239 | class SensorThings:
240 |     """Class for interacting with SensorThings API."""
241 | 
242 |     def __init__(self) -> None:
243 |         self.base_url = "https://api.water.usgs.gov/sta/v1.1/Things"
244 | 
245 |     @overload
246 |     @staticmethod
247 |     def _get_urls(url: str, kwd: dict[str, Any] | None = ...) -> dict[str, Any]: ...
248 | 
249 |     @overload
250 |     @staticmethod
251 |     def _get_urls(
252 |         url: list[str], kwd: list[dict[str, Any]] | None = ...
253 |     ) -> list[dict[str, Any]]: ...
254 | 
255 |     @staticmethod
256 |     def _get_urls(
257 |         url: str | list[str], kwd: dict[str, Any] | list[dict[str, Any]] | None = None
258 |     ) -> dict[str, Any] | list[dict[str, Any]]:
259 |         urls = url if isinstance(url, list) else [url]
260 |         if kwd:
261 |             kwds = kwd if isinstance(kwd, list) else [kwd]
262 |             if len(urls) == 1 and len(urls) != len(kwds):
263 |                 urls = urls * len(kwds)
264 |         else:
265 |             kwds = None
266 |         resp = ar.retrieve_json(urls, kwds)
267 |         resp = cast("list[dict[str, Any]]", resp)
268 |         if isinstance(url, str):
269 |             return resp[0]
270 |         return resp
271 | 
272 |     @staticmethod
273 |     def odata_helper(
274 |         columns: list[str] | None = None,
275 |         conditionals: str | None = None,
276 |         expand: dict[str, dict[str, str]] | None = None,
277 |         max_count: int | None = None,
278 |         extra_params: dict[str, Any] | None = None,
279 |     ) -> dict[str, str]:
280 |         """Generate Odata filters for SensorThings API.
281 | 
282 |         Parameters
283 |         ----------
284 |         columns : list of str, optional
285 |             Columns to be selected from the database, defaults to ``None``.
286 |         conditionals : str, optional
287 |             Conditionals to be applied to the database, defaults to ``None``.
288 |             Note that the conditionals should have the form of
289 |             ``cond1 operator 'value' and/or cond2 operator 'value``.
290 |             For example:
291 |             ``properties/monitoringLocationType eq 'Stream' and ...``
292 |         expand : dict of dict, optional
293 |             Expand the properties of the selected columns, defaults to ``None``.
294 |             Note that the ``expand`` should have the form of
295 |             ``{Property: {func: value, ...}}``. For example: ``{"Locations":
296 |             {"select": "location", "filter": "ObservedProperty/@iot.id eq '00060'"}}``
297 |         max_count : int, optional
298 |             Maximum number of items to be returned, defaults to ``None``.
299 |         extra_params : dict, optional
300 |             Extra parameters to be added to the Odata filter, defaults to ``None``.
301 | 
302 |         Returns
303 |         -------
304 |         odata : dict
305 |             Odata filter for the SensorThings API.
306 |         """
307 |         odata = {}
308 |         if columns is not None:
309 |             odata["select"] = ",".join(columns)
310 | 
311 |         if conditionals is not None:
312 |             odata["filter"] = conditionals
313 | 
314 |         def _odata(kwds: dict[str, str]) -> str:
315 |             return ";".join(f"${k}={v}" for k, v in kwds.items())
316 | 
317 |         if expand is not None:
318 |             odata["expand"] = ",".join(f"{func}({_odata(od)})" for func, od in expand.items())
319 | 
320 |         if max_count is not None:
321 |             odata["top"] = max_count
322 | 
323 |         if extra_params is not None:
324 |             odata.update(extra_params)
325 |         return odata
326 | 
327 |     def query_byodata(
328 |         self, odata: dict[str, Any], outformat: str = "json"
329 |     ) -> gpd.GeoDataFrame | pd.DataFrame:
330 |         """Query the SensorThings API by Odata filter.
331 | 
332 |         Parameters
333 |         ----------
334 |         odata : str
335 |             Odata filter for the SensorThings API.
336 |         outformat : str, optional
337 |             Format of the response, defaults to ``json``.
338 |             Valid values are ``json`` and ``geojson``.
339 | 
340 |         Returns
341 |         -------
342 |         pandas.DataFrame or geopandas.GeoDataFrame
343 |             Requested data.
344 |         """
345 |         valid_formats = ["json", "geojson"]
346 |         if outformat not in valid_formats:
347 |             raise InputValueError("format", valid_formats)
348 | 
349 |         kwds = odata.copy()
350 |         if outformat == "geojson":
351 |             kwds.update({"resultFormat": "GeoJSON"})
352 | 
353 |         kwds = {"params": {f"${k}": v for k, v in kwds.items()}}
354 |         resp = self._get_urls(self.base_url, kwds)
355 | 
356 |         if "message" in resp:
357 |             raise ServiceError(resp["message"])
358 | 
359 |         if outformat == "json":
360 |             data = resp["value"]
361 |             data = cast("list[dict[str, Any]]", data)
362 |             while "@iot.nextLink" in resp:
363 |                 resp = self._get_urls(resp["@iot.nextLink"])
364 |                 data.extend(resp["value"])
365 |             return pd.json_normalize(data)
366 |         return geoutils.json2geodf(resp)
367 | 
368 |     def sensor_info(self, sensor_ids: str | list[str]) -> pd.DataFrame:
369 |         """Query the SensorThings API by a sensor ID.
370 | 
371 |         Parameters
372 |         ----------
373 |         sensor_ids : str or list of str
374 |             A single or list of sensor IDs, e.g., ``USGS-09380000``.
375 | 
376 |         Returns
377 |         -------
378 |         pandas.DataFrame
379 |             Requested sensor data.
380 |         """
381 |         sensor_ids = [sensor_ids] if isinstance(sensor_ids, str) else sensor_ids
382 |         urls = [f"{self.base_url}('{i}')" for i in sensor_ids]
383 |         data = pd.json_normalize(self._get_urls(urls))
384 |         columns = data.columns[data.columns.str.endswith("Link")]
385 |         return data.drop(columns=columns)  # pyright: ignore[reportCallIssue,reportArgumentType]
386 | 
387 |     def sensor_property(self, sensor_property: str, sensor_ids: str | list[str]) -> pd.DataFrame:
388 |         """Query a sensor property.
389 | 
390 |         Parameters
391 |         ----------
392 |         sensor_property : str or list of str
393 |             A sensor property, Valid properties are ``Datastreams``,
394 |             ``MultiDatastreams``, ``Locations``, ``HistoricalLocations``,
395 |             ``TaskingCapabilities``.
396 |         sensor_ids : str or list of str
397 |             A single or list of sensor IDs, e.g., ``USGS-09380000``.
398 | 
399 |         Returns
400 |         -------
401 |         pandas.DataFrame
402 |             A dataframe containing the requested property.
403 |         """
404 |         sensor_ids = [sensor_ids] if isinstance(sensor_ids, str) else sensor_ids
405 |         urls = [f"{self.base_url}('{i}')" for i in sensor_ids]
406 |         resp = self._get_urls(urls)
407 |         links = tlz.merge_with(
408 |             list,
409 |             (
410 |                 {p.split("@")[0]: r.pop(p, None) for p in list(r) if p.endswith("navigationLink")}
411 |                 for r in resp
412 |             ),
413 |         )
414 |         links = cast("dict[str, list[str]]", links)
415 | 
416 |         if sensor_property not in links:
417 |             raise InputValueError("properties", list(links))
418 |         resp = self._get_urls(links[sensor_property])
419 |         return pd.concat(pd.json_normalize(r["value"]) for r in resp).reset_index(drop=True)
420 | 


--------------------------------------------------------------------------------
/tests/test_pygeohydro.py:
--------------------------------------------------------------------------------
  1 | """Tests for PyGeoHydro package."""
  2 | 
  3 | from __future__ import annotations
  4 | 
  5 | import io
  6 | 
  7 | import geopandas as gpd
  8 | import numpy as np
  9 | import pandas as pd
 10 | import pytest
 11 | import xarray as xr
 12 | from shapely import Polygon
 13 | 
 14 | import pygeohydro as gh
 15 | import pygeoutils as geoutils
 16 | import pynhd as nhd
 17 | from pygeohydro import NFHL, NID, NLD, NWIS, WBD, EHydro
 18 | 
 19 | DEF_CRS = 4326
 20 | ALT_CRS = 3542
 21 | SID_NATURAL = "01031500"
 22 | SID_URBAN = "11092450"
 23 | DATES = ("2005-01-01", "2005-01-31")
 24 | DATES_LONG = ("2000-01-01", "2009-12-31")
 25 | GEOM = Polygon(
 26 |     [
 27 |         [-69.77, 45.07],
 28 |         [-69.31, 45.07],
 29 |         [-69.31, 45.45],
 30 |         [-69.77, 45.45],
 31 |         [-69.77, 45.07],
 32 |     ]
 33 | )
 34 | 
 35 | 
 36 | def assert_close(a: float, b: float) -> None:
 37 |     np.testing.assert_allclose(a, b, rtol=1e-3)
 38 | 
 39 | 
 40 | class TestNWIS:
 41 |     nwis: NWIS = NWIS()
 42 | 
 43 |     def test_qobs_dv(self):
 44 |         df = self.nwis.get_streamflow(SID_NATURAL, DATES)
 45 |         ds = self.nwis.get_streamflow(SID_NATURAL, DATES, to_xarray=True)
 46 |         col = f"USGS-{SID_NATURAL}"
 47 |         assert_close(df[col].sum().item(), ds.sel(station_id=col).discharge.sum().item())
 48 |         assert df.attrs[col]["huc_cd"] == ds.sel(station_id=col).huc_cd.item()
 49 | 
 50 |     def test_qobs_mmd(self):
 51 |         df = self.nwis.get_streamflow(SID_NATURAL, DATES, mmd=True)
 52 |         assert_close(df[f"USGS-{SID_NATURAL}"].sum().item(), 27.6375)
 53 | 
 54 |     def test_cst_tz(self):
 55 |         q = self.nwis.get_streamflow(["08075000", "11092450"], DATES)
 56 |         assert q.index.tz.tzname(None) == "UTC"
 57 | 
 58 |     def test_qobs_iv(self):
 59 |         iv = self.nwis.get_streamflow(SID_NATURAL, ("2020-01-01", "2020-01-31"), freq="iv")
 60 |         dv = self.nwis.get_streamflow(SID_NATURAL, ("2020-01-01", "2020-01-31"), freq="dv")
 61 |         assert_close(abs(iv.mean().item() - dv.mean().item()), 0.0539)
 62 | 
 63 |     def test_info(self):
 64 |         query = {"sites": ",".join([SID_NATURAL])}
 65 |         info = self.nwis.get_info(query, expanded=True, nhd_info=True)
 66 |         assert_close(info["nhd_areasqkm"].item(), 773.964)
 67 |         assert info.hcdn_2009.item()
 68 | 
 69 |     def test_info_box(self):
 70 |         query = {"bBox": ",".join(f"{b:.06f}" for b in GEOM.bounds)}
 71 |         info_box = self.nwis.get_info(query, nhd_info=True)
 72 |         assert info_box.shape[0] == 35
 73 |         assert info_box["nhd_areasqkm"].isna().sum() == 31
 74 | 
 75 |     def test_param_cd(self):
 76 |         codes = self.nwis.get_parameter_codes("%discharge%")
 77 |         assert (
 78 |             codes.loc[codes.parameter_cd == "00060", "parm_nm"].iloc[0]
 79 |             == "Discharge, cubic feet per second"
 80 |         )
 81 | 
 82 |     def test_fillna(self):
 83 |         index = pd.date_range("2000-01-01", "2020-12-31", freq="D")
 84 |         q = pd.Series(np.ones(index.size), index=index)
 85 |         qf = gh.streamflow_fillna(q)
 86 |         assert not qf.name
 87 |         q.loc[slice("2000-01-01", "2000-01-05")] = np.nan
 88 |         qf = gh.streamflow_fillna(q)
 89 |         assert np.all(qf == 1)
 90 |         qf = gh.streamflow_fillna(q.to_frame("12345678"))
 91 |         assert np.all(qf == 1)
 92 |         qf = gh.streamflow_fillna(xr.DataArray(q))
 93 |         assert np.all(qf == 1)
 94 | 
 95 | 
 96 | class TestETA:
 97 |     dates = ("2005-10-01", "2005-10-05")
 98 |     years = [2010, 2014, 2015]
 99 | 
100 |     def test_coords(self):
101 |         coords = pd.DataFrame(
102 |             [
103 |                 ["s1", -72.77, 40.07],
104 |                 ["s2", -70.31, 46.07],
105 |                 ["s3", -69.31, 45.45],
106 |                 ["s4", -69.77, 45.45],
107 |             ],
108 |             columns=["id", "x", "y"],
109 |         )
110 |         ds = gh.ssebopeta_bycoords(coords, dates=self.dates)
111 |         assert_close(ds.eta.sum().item(), 1.858)
112 |         assert ds.eta.isnull().sum().item() == 5
113 | 
114 |     def test_geom(self):
115 |         eta_g = gh.ssebopeta_bygeom(GEOM, dates=self.dates)
116 |         assert_close(eta_g.mean().values.item(), 0.6822)
117 | 
118 |     def test_get_ssebopeta_urls(self):
119 |         _ = gh.helpers.get_ssebopeta_urls(self.years[0])
120 |         urls_dates = gh.helpers.get_ssebopeta_urls(DATES_LONG)
121 |         urls_years = gh.helpers.get_ssebopeta_urls(self.years)
122 |         assert len(urls_dates) == 3653
123 |         assert len(urls_years) == 1095
124 | 
125 | 
126 | class TestNLCD:
127 |     years = {"cover": [2016]}
128 |     res = 1000
129 | 
130 |     @staticmethod
131 |     def assertion(cover, expected):
132 |         st = gh.cover_statistics(cover)
133 |         assert_close(st.categories["Forest"], expected)
134 | 
135 |     def test_geodf(self):
136 |         geom = gpd.GeoSeries([GEOM, GEOM], crs=DEF_CRS)
137 |         lulc = gh.nlcd_bygeom(geom, years=self.years, resolution=self.res, crs=ALT_CRS, ssl=False)
138 |         self.assertion(lulc[0].cover_2016, 73.1459)
139 |         self.assertion(lulc[1].cover_2016, 73.1459)
140 |         assert lulc[0].cover_2016.rio.nodata == 127
141 | 
142 |     def test_coords(self):
143 |         coords = list(GEOM.exterior.coords)
144 |         lulc = gh.nlcd_bycoords(coords, ssl=False)
145 |         assert lulc.cover_2021.sum() == 211
146 | 
147 |     def test_consistency(self):
148 |         coords = [(-87.11890, 34.70421), (-88.83390, 40.17190), (-95.68978, 38.23926)]
149 |         lulc_m = gh.nlcd_bycoords(coords, ssl=False)
150 |         lulc_s = gh.nlcd_bycoords(coords[:1], ssl=False)
151 |         assert lulc_m.iloc[0]["cover_2021"] == lulc_s.iloc[0]["cover_2021"] == 24
152 | 
153 |     def test_roughness(self):
154 |         geom = gpd.GeoSeries([GEOM], crs=DEF_CRS)
155 |         lulc = gh.nlcd_bygeom(geom, years=self.years, resolution=self.res, crs=ALT_CRS, ssl=False)
156 |         roughness = gh.overland_roughness(lulc[0].cover_2016)
157 |         assert_close(roughness.mean().item(), 0.3256)
158 | 
159 |     def test_area(self):
160 |         geom = gpd.GeoSeries([GEOM], crs=DEF_CRS)
161 |         area = gh.nlcd_area_percent(geom)
162 |         assert_close(area[["urban", "natural"]].sum(axis=1), 100)
163 |         assert_close(area[["natural", "developed", "impervious"]].sum(axis=1), 100)
164 | 
165 | 
166 | class TestNID:
167 |     nid = NID()
168 |     ids = ["KY01232", "GA02400", "NE04081", "IL55070", "TN05345"]
169 | 
170 |     def test_suggestion(self):
171 |         dams, contexts = self.nid.get_suggestions("houston", "city")
172 |         assert dams.empty
173 |         assert contexts["suggestion"].to_list() == ["Houston", "Houston Lake"]
174 | 
175 |     def test_filter(self):
176 |         query_list = [
177 |             {"drainageArea": ["[200 500]"]},
178 |             {"nidId": ["CA01222"]},
179 |         ]
180 |         dam_dfs = self.nid.get_byfilter(query_list)
181 |         assert dam_dfs[0].loc[dam_dfs[0].name == "Prairie Portage"].id.item() == "496613"
182 | 
183 |     def test_id(self):
184 |         dams = self.nid.inventory_byid(self.ids)
185 |         assert_close(dams.damHeight.max(), 39)
186 | 
187 |     def test_geom(self):
188 |         dams_geo = self.nid.get_bygeom(GEOM, DEF_CRS)
189 |         bbox = geoutils.geometry_reproject(GEOM.bounds, DEF_CRS, ALT_CRS)
190 |         dams_box = self.nid.get_bygeom(bbox, ALT_CRS)
191 |         name = "Pingree Pond"
192 |         assert (dams_geo.name == name).any()
193 |         assert (dams_box.name == "Pingree Pond").any()
194 | 
195 |     def test_nation(self):
196 |         assert self.nid.df.shape == (92392, 83)
197 |         assert self.nid.gdf.shape == (92245, 97)
198 | 
199 | 
200 | class TestWaterQuality:
201 |     wq: gh.WaterQuality = gh.WaterQuality()
202 | 
203 |     def test_bbox(self):
204 |         stations = self.wq.station_bybbox(
205 |             (-92.8, 44.2, -88.9, 46.0), {"characteristicName": "Caffeine"}
206 |         )
207 |         assert stations.shape[0] == 83
208 | 
209 |     def test_distance(self):
210 |         stations = self.wq.station_bydistance(-92.8, 44.2, 30, {"characteristicName": "Caffeine"})
211 |         assert stations.shape[0] == 44
212 | 
213 |     def test_data(self):
214 |         stations = [
215 |             "USGS-435221093001901",
216 |             "MN040-443119093050101",
217 |             "MN040-443602092510501",
218 |             "MN040-443656092474901",
219 |             "MN048-442839093085901",
220 |             "MN048-442849093085401",
221 |             "MN048-443122093050101",
222 |             "MN048-443128092593201",
223 |             "MN048-443129092592701",
224 |             "MN048-443140093042801",
225 |             "MN048-443141093042601",
226 |         ]
227 |         caff = self.wq.data_bystation(stations, {"characteristicName": "Caffeine"})
228 |         assert caff.shape[0] == 12
229 | 
230 | 
231 | def test_wbd():
232 |     wbd = WBD("huc4")
233 |     hudson = wbd.byids("huc4", ["0202", "0203"])
234 |     assert len(",".join(hudson.states).split(",")) == 8
235 | 
236 | 
237 | def test_states_lookup():
238 |     codes = gh.helpers.states_lookup_table()
239 |     ca = codes["06"].counties
240 |     la_cd = ca[ca.str.contains("Los")].index[0]
241 |     assert la_cd == "037"
242 | 
243 | 
244 | @pytest.mark.xfail(reason="Hydroshare is unstable.")
245 | def test_camels():
246 |     attrs, qobs = gh.get_camels()
247 |     assert attrs.shape[0] == qobs.station_id.shape[0] == 671
248 | 
249 | 
250 | def test_interactive_map():
251 |     nwis_kwds = {
252 |         "hasDataTypeCd": "dv",
253 |         "outputDataTypeCd": "dv",
254 |         "parameterCd": "00060",
255 |     }
256 |     m = gh.interactive_map((-69.77, 45.07, -69.31, 45.45), nwis_kwds=nwis_kwds)
257 |     assert len(m.to_dict()["children"]) == 4
258 | 
259 | 
260 | def test_plot():
261 |     _, _, levels = gh.plot.cover_legends()
262 |     assert levels[-1] == 100
263 | 
264 | 
265 | def test_nwis_errors():
266 |     err = gh.helpers.nwis_errors()
267 |     assert err.shape[0] == 7
268 | 
269 | 
270 | @pytest.mark.parametrize(
271 |     ("key", "expected"),
272 |     [
273 |         (None, 56),
274 |         (["TX", "ca"], 2),
275 |         ("contiguous", 48),
276 |         ("continental", 49),
277 |         ("commonwealths", 4),
278 |         ("territories", 5),
279 |     ],
280 | )
281 | def test_us_states(key, expected):
282 |     states = gh.helpers.get_us_states(key)
283 |     assert states.shape[0] == expected
284 | 
285 | 
286 | def test_full_huc():
287 |     hu16 = gh.huc_wb_full(16)
288 |     assert hu16.shape[0] == 7266
289 | 
290 | 
291 | def test_irrigation():
292 |     irr = gh.irrigation_withdrawals()
293 |     assert_close(irr.TW.mean(), 419996.4992)
294 | 
295 | 
296 | def test_soil():
297 |     soil = gh.soil_properties("por")
298 |     assert soil.sizes["x"] == 266301
299 | 
300 | 
301 | @pytest.mark.xfail(reason="NLD is unstable.")
302 | def test_nld():
303 |     nld = NLD("levee_stations")
304 |     levees = nld.bygeom((-105.914551, 37.437388, -105.807434, 37.522392))
305 |     assert levees.shape == (1838, 12)
306 | 
307 | 
308 | def test_gnatsgo():
309 |     layers = ["Tk0_100a", "Soc20_50"]
310 |     geometry = (-95.624515, 30.121598, -95.448253, 30.264074)
311 |     soil = gh.soil_gnatsgo(layers, geometry, 4326)
312 |     assert_close(soil.tk0_100a.mean().item(), 89.848)
313 | 
314 | 
315 | def test_soilgrid():
316 |     layers = "bdod_5"
317 |     geometry = (-95.624515, 30.121598, -95.448253, 30.264074)
318 |     soil = gh.soil_soilgrids(layers, geometry, 4326)
319 |     assert_close(soil.bdod_0_5cm_mean.mean().item(), 1.4459)
320 | 
321 | 
322 | def test_soilpolaris():
323 |     layers = "bd_5"
324 |     geometry = (-95.624515, 30.121598, -95.614515, 30.131598)
325 |     soil = gh.soil_polaris(layers, geometry, 4326)
326 |     assert_close(soil.bd_0_5cm_mean.mean().item(), 1.4620)
327 | 
328 | 
329 | # def test_sensorthings():
330 | # sensor = gh.SensorThings()
331 | # cond = " and ".join(
332 | #     ("properties/monitoringLocationType eq 'Stream'", "properties/stateFIPS eq 'US:04'")
333 | # )
334 | # odata = sensor.odata_helper(conditionals=cond)
335 | # df = sensor.query_byodata(odata)
336 | # assert df.shape[0] == 72
337 | 
338 | # df = sensor.sensor_info("USGS-09380000")
339 | # assert df["description"].iloc[0] == "Stream"
340 | 
341 | # df = sensor.sensor_property("Datastreams", "USGS-09380000")
342 | # assert df["observationType"].unique()[0] == "Instantaneous"
343 | 
344 | 
345 | def test_show_versions():
346 |     f = io.StringIO()
347 |     gh.show_versions(file=f)
348 |     assert "SYS INFO" in f.getvalue()
349 | 
350 | 
351 | def test_ehydro():
352 |     bound = (-122.53, 45.57, -122.52, 45.59)
353 |     ehydro = EHydro("bathymetry")
354 |     bathy = ehydro.bygeom(bound)
355 |     assert_close(bathy["depthMean"].mean(), 25.39277)
356 |     assert ehydro.survey_grid.shape[0] == 2672
357 | 
358 | 
359 | class TestNFHL:
360 |     """Test the Natinoal Flood Hazard Layer (NFHL) class."""
361 | 
362 |     @pytest.mark.parametrize(
363 |         ("service", "layer", "expected_url", "expected_layer"),
364 |         [
365 |             (
366 |                 "NFHL",
367 |                 "cross-sections",
368 |                 "https://hazards.fema.gov/arcgis/rest/services/public/NFHL/MapServer",
369 |                 "Cross-Sections (14)",
370 |             ),
371 |             (
372 |                 "Prelim_CSLF",
373 |                 "floodway change",
374 |                 "https://hazards.fema.gov/arcgis/rest/services/CSLF/Prelim_CSLF/MapServer",
375 |                 "Floodway Change (2)",
376 |             ),
377 |             (
378 |                 "Draft_CSLF",
379 |                 "special flood hazard area change",
380 |                 "https://hazards.fema.gov/arcgis/rest/services/CSLF/Draft_CSLF/MapServer",
381 |                 "Special Flood Hazard Area Change (3)",
382 |             ),
383 |             (
384 |                 "Prelim_NFHL",
385 |                 "preliminary water lines",
386 |                 "https://hazards.fema.gov/arcgis/rest/services/PrelimPending/Prelim_NFHL/MapServer",
387 |                 "Preliminary Water Lines (20)",
388 |             ),
389 |             (
390 |                 "Pending_NFHL",
391 |                 "pending high water marks",
392 |                 "https://hazards.fema.gov/arcgis/rest/services/PrelimPending/Pending_NFHL/MapServer",
393 |                 "Pending High Water Marks (12)",
394 |             ),
395 |             (
396 |                 "Draft_NFHL",
397 |                 "draft transect baselines",
398 |                 "https://hazards.fema.gov/arcgis/rest/services/AFHI/Draft_FIRM_DB/MapServer",
399 |                 "Draft Transect Baselines (13)",
400 |             ),
401 |         ],
402 |     )
403 |     def test_nfhl(self, service, layer, expected_url, expected_layer):
404 |         """Test the NFHL class."""
405 |         nfhl = NFHL(service, layer)
406 |         assert nfhl.service_info.url == expected_url
407 |         assert nfhl.service_info.layer == expected_layer
408 | 
409 |     def test_nfhl_fail_layer(self):
410 |         """Test the layer argument failures in NFHL init."""
411 |         with pytest.raises(nhd.exceptions.InputValueError):
412 |             NFHL("NFHL", "cross_sections")
413 | 
414 |     def test_nfhl_fail_service(self):
415 |         """Test the service argument failures in NFHL init."""
416 |         with pytest.raises(gh.exceptions.InputValueError):
417 |             NFHL("NTHL", "cross-sections")
418 | 
419 |     @pytest.mark.parametrize(
420 |         ("service", "layer", "geom", "expected_gdf_len", "expected_schema"),
421 |         [
422 |             (
423 |                 "NFHL",
424 |                 "cross-sections",
425 |                 (-73.42, 43.48, -72.5, 43.52),
426 |                 44,
427 |                 [
428 |                     "geometry",
429 |                     "OBJECTID",
430 |                     "DFIRM_ID",
431 |                     "VERSION_ID",
432 |                     "XS_LN_ID",
433 |                     "WTR_NM",
434 |                     "STREAM_STN",
435 |                     "START_ID",
436 |                     "XS_LTR",
437 |                     "XS_LN_TYP",
438 |                     "WSEL_REG",
439 |                     "STRMBED_EL",
440 |                     "LEN_UNIT",
441 |                     "V_DATUM",
442 |                     "PROFXS_TXT",
443 |                     "MODEL_ID",
444 |                     "SEQ",
445 |                     "SOURCE_CIT",
446 |                     "SHAPE.STLength()",
447 |                     "GFID",
448 |                     "GlobalID",
449 |                 ],
450 |             ),
451 |         ],
452 |     )
453 |     def test_nfhl_getgeom(self, service, layer, geom, expected_gdf_len, expected_schema):
454 |         """Test the NFHL bygeom method."""
455 |         nfhl = NFHL(service, layer)
456 |         gdf_xs = nfhl.bygeom(geom, geo_crs=4269)
457 |         assert isinstance(gdf_xs, gpd.GeoDataFrame)
458 |         assert len(gdf_xs) >= expected_gdf_len
459 |         assert set(gdf_xs.columns) == set(expected_schema)
460 | 


--------------------------------------------------------------------------------
/src/pygeohydro/nlcd.py:
--------------------------------------------------------------------------------
  1 | """Accessing data from the supported databases through their APIs."""
  2 | 
  3 | from __future__ import annotations
  4 | 
  5 | import warnings
  6 | from typing import TYPE_CHECKING, Union, cast
  7 | 
  8 | import cytoolz.curried as tlz
  9 | import geopandas as gpd
 10 | import numpy as np
 11 | import pandas as pd
 12 | import pyproj
 13 | import rasterio as rio
 14 | import xarray as xr
 15 | 
 16 | import pygeoutils as geoutils
 17 | from pygeohydro import helpers
 18 | from pygeohydro.exceptions import (
 19 |     InputTypeError,
 20 |     InputValueError,
 21 |     MissingCRSError,
 22 |     ServiceUnavailableError,
 23 | )
 24 | from pygeohydro.helpers import Stats
 25 | from pygeoogc import WMS, ServiceURL
 26 | from pygeoogc import utils as ogc_utils
 27 | 
 28 | if TYPE_CHECKING:
 29 |     from collections.abc import Mapping
 30 |     from numbers import Number
 31 | 
 32 |     from pyproj import CRS
 33 |     from shapely import MultiPolygon, Polygon
 34 | 
 35 |     GTYPE = Union[Polygon, MultiPolygon, tuple[float, float, float, float]]
 36 | 
 37 |     CRSType = int | str | CRS
 38 | 
 39 | __all__ = [
 40 |     "cover_statistics",
 41 |     "nlcd_area_percent",
 42 |     "nlcd_bycoords",
 43 |     "nlcd_bygeom",
 44 |     "overland_roughness",
 45 | ]
 46 | 
 47 | 
 48 | class NLCD:
 49 |     """Get data from NLCD database (2021).
 50 | 
 51 |     Parameters
 52 |     ----------
 53 |     years : dict, optional
 54 |         The years for NLCD layers as a dictionary, defaults to
 55 |         ``{'impervious': [2021], 'cover': [2021], 'canopy': [2021], "descriptor": [2021]}``.
 56 |         Layers that are not in years are ignored, e.g., ``{'cover': [2016, 2021]}`` returns
 57 |         land cover data for 2016 and 2021.
 58 |     region : str, optional
 59 |         Region in the US that the input geometries are located, defaults to ``L48``.
 60 |         Valid values are ``L48`` (for CONUS), ``HI`` (for Hawaii), ``AK`` (for Alaska),
 61 |         and ``PR`` (for Puerto Rico). Both lower and upper cases are acceptable.
 62 |     crs : str, int, or pyproj.CRS, optional
 63 |         The spatial reference system to be used for requesting the data, defaults to
 64 |         ``epsg:4326``.
 65 |     ssl : bool, optional
 66 |         Whether to use SSL for the connection, defaults to ``True``.
 67 |     """
 68 | 
 69 |     def __init__(
 70 |         self,
 71 |         years: Mapping[str, int | list[int]] | None = None,
 72 |         region: str = "L48",
 73 |         crs: CRSType = 4326,
 74 |         ssl: bool = True,
 75 |     ) -> None:
 76 |         default_years = {
 77 |             "impervious": [2021],
 78 |             "cover": [2021],
 79 |             "canopy": [2021],
 80 |             "descriptor": [2021],
 81 |         }
 82 |         years = default_years if years is None else years
 83 |         if not isinstance(years, dict):
 84 |             raise InputTypeError("years", "dict", str(default_years))
 85 |         self.years = tlz.valmap(lambda x: x if isinstance(x, list) else [x], years)
 86 |         self.years = cast("dict[str, list[int]]", self.years)
 87 |         self.region = region.upper()
 88 |         base_url = ServiceURL().wms.mrlc
 89 |         self.valid_crs = ogc_utils.valid_wms_crs(base_url)
 90 |         self.crs = pyproj.CRS(crs).to_string().lower()
 91 |         if self.crs not in self.valid_crs:
 92 |             raise InputValueError("crs", self.valid_crs)
 93 |         self.layers = self.get_layers()
 94 |         self.units = {"impervious": "%", "cover": "classes", "canopy": "%", "descriptor": "classes"}
 95 |         self.types = {"impervious": "f4", "cover": "u1", "canopy": "f4", "descriptor": "u1"}
 96 |         self.nodata = {"impervious": np.nan, "cover": 127, "canopy": np.nan, "descriptor": 127}
 97 | 
 98 |         self.wms = WMS(
 99 |             base_url,
100 |             layers=list(self.layers.values()),
101 |             outformat="image/geotiff",
102 |             crs=self.crs,
103 |             validation=False,
104 |             ssl=ssl,
105 |         )
106 | 
107 |     def get_layers(self) -> dict[str, str]:
108 |         """Get NLCD layers for the provided years dictionary."""
109 |         valid_regions = ("L48", "HI", "PR", "AK")
110 |         if self.region not in valid_regions:
111 |             raise InputValueError("region", valid_regions)
112 | 
113 |         nlcd_meta = helpers.nlcd_helper()
114 | 
115 |         names = ["impervious", "cover", "canopy", "descriptor"]
116 |         avail_years = {n: nlcd_meta[f"{n}_years"] for n in names}
117 | 
118 |         if any(
119 |             yr not in avail_years[lyr] or lyr not in names
120 |             for lyr, yrs in self.years.items()
121 |             for yr in yrs
122 |         ):
123 |             vals = [f"\n{lyr}: {', '.join(str(y) for y in yr)}" for lyr, yr in avail_years.items()]
124 |             raise InputValueError("years", vals)
125 | 
126 |         def layer_name(lyr: str, yr: int) -> str:
127 |             if lyr == "canopy":
128 |                 if self.region == "L48":
129 |                     return f"nlcd_tcc_conus_{yr}_v2021-4"
130 |                 return f"NLCD_{yr}_Tree_Canopy_{self.region}"
131 |             if lyr == "cover":
132 |                 return f"NLCD_{yr}_Land_Cover_{self.region}"
133 |             if lyr == "impervious":
134 |                 return f"NLCD_{yr}_Impervious_{self.region}"
135 |             if self.region in ("HI", "PR"):
136 |                 raise InputValueError("region (descriptor)", ("L48", "AK"))
137 |             service_lyr = (
138 |                 "Impervious_Descriptor" if self.region == "AK" else "Impervious_descriptor"
139 |             )
140 |             return f"NLCD_{yr}_{service_lyr}_{self.region}"
141 | 
142 |         return {f"{lyr}_{yr}": layer_name(lyr, yr) for lyr, yrs in self.years.items() for yr in yrs}
143 | 
144 |     def get_map(
145 |         self,
146 |         geometry: Polygon | MultiPolygon,
147 |         resolution: int,
148 |     ) -> xr.Dataset:
149 |         """Get NLCD response and convert it to ``xarray.DataArray``."""
150 |         r_dict = self.wms.getmap_bybox(geometry.bounds, resolution, self.crs)
151 |         gtiff2xarray = tlz.partial(
152 |             geoutils.gtiff2xarray, geometry=geometry, geo_crs=self.crs, nodata=255
153 |         )
154 |         try:
155 |             _ds = gtiff2xarray(r_dict=r_dict)
156 |         except rio.RasterioIOError as ex:
157 |             raise ServiceUnavailableError(self.wms.url) from ex
158 | 
159 |         ds = _ds.to_dataset() if isinstance(_ds, xr.DataArray) else _ds
160 |         ds.attrs = _ds.attrs
161 |         for lyr_name, lyr in self.layers.items():
162 |             name = lyr_name.split("_")[0]
163 |             ds = ds.rename({lyr: lyr_name})
164 |             ds[lyr_name] = ds[lyr_name].where(ds[lyr_name] < 255, self.nodata[name])
165 |             ds[lyr_name].attrs["units"] = self.units[name]
166 |             ds[lyr_name] = ds[lyr_name].astype(self.types[name])
167 |             ds[lyr_name].attrs["nodatavals"] = (self.nodata[name],)
168 |             ds[lyr_name] = ds[lyr_name].rio.write_nodata(self.nodata[name])
169 |         return ds
170 | 
171 |     def __repr__(self) -> str:
172 |         """Print the services properties."""
173 |         return self.wms.__repr__()
174 | 
175 | 
176 | def nlcd_bygeom(
177 |     geometry: gpd.GeoSeries | gpd.GeoDataFrame,
178 |     resolution: int = 30,
179 |     years: Mapping[str, int | list[int]] | None = None,
180 |     region: str = "L48",
181 |     crs: CRSType = 4326,
182 |     ssl: bool = True,
183 | ) -> dict[int | str, xr.Dataset]:
184 |     """Get data from NLCD database (2019).
185 | 
186 |     Parameters
187 |     ----------
188 |     geometry : geopandas.GeoDataFrame or geopandas.GeoSeries
189 |         A GeoDataFrame or GeoSeries with the geometry to query. The indices are used
190 |         as keys in the output dictionary.
191 |     resolution : float, optional
192 |         The data resolution in meters. The width and height of the output are computed in pixel
193 |         based on the geometry bounds and the given resolution. The default is 30 m which is the
194 |         native resolution of NLCD data.
195 |     years : dict, optional
196 |         The years for NLCD layers as a dictionary, defaults to
197 |         ``{'impervious': [2019], 'cover': [2019], 'canopy': [2019], "descriptor": [2019]}``.
198 |         Layers that are not in years are ignored, e.g., ``{'cover': [2016, 2019]}`` returns
199 |         land cover data for 2016 and 2019.
200 |     region : str, optional
201 |         Region in the US that the input geometries are located, defaults to ``L48``.
202 |         Valid values are ``L48`` (for CONUS), ``HI`` (for Hawaii), ``AK`` (for Alaska),
203 |         and ``PR`` (for Puerto Rico). Both lower and upper cases are acceptable.
204 |     crs : str, int, or pyproj.CRS, optional
205 |         The spatial reference system to be used for requesting the data, defaults to
206 |         ``epsg:4326``.
207 |     ssl : bool, optional
208 |         Whether to use SSL for the connection, defaults to ``True``.
209 | 
210 |     Returns
211 |     -------
212 |     dict of xarray.Dataset or xarray.Dataset
213 |         A single or a ``dict`` of NLCD datasets. If dict, the keys are indices
214 |         of the input ``GeoDataFrame``.
215 |     """
216 |     if resolution < 30:
217 |         warnings.warn(
218 |             "NLCD's resolution is 30 m, so finer resolutions are not recommended.",
219 |             UserWarning,
220 |             stacklevel=2,
221 |         )
222 | 
223 |     if not isinstance(geometry, (gpd.GeoDataFrame, gpd.GeoSeries)):
224 |         raise InputTypeError("geometry", "GeoDataFrame or GeoSeries")
225 | 
226 |     if geometry.crs is None:
227 |         raise MissingCRSError
228 |     _geometry = cast("gpd.GeoDataFrame", geometry.to_crs(crs))
229 |     geo_dict = _geometry.geometry.to_dict()
230 | 
231 |     nlcd_wms = NLCD(years=years, region=region, crs=crs, ssl=ssl)
232 | 
233 |     return {i: nlcd_wms.get_map(g, resolution) for i, g in geo_dict.items()}
234 | 
235 | 
236 | def nlcd_bycoords(
237 |     coords: list[tuple[float, float]],
238 |     years: Mapping[str, int | list[int]] | None = None,
239 |     region: str = "L48",
240 |     ssl: bool = True,
241 | ) -> gpd.GeoDataFrame:
242 |     """Get data from NLCD database (2019).
243 | 
244 |     Parameters
245 |     ----------
246 |     coords : list of tuple
247 |         List of coordinates in the form of (longitude, latitude).
248 |     years : dict, optional
249 |         The years for NLCD layers as a dictionary, defaults to
250 |         ``{'impervious': [2019], 'cover': [2019], 'canopy': [2019], "descriptor": [2019]}``.
251 |         Layers that are not in years are ignored, e.g., ``{'cover': [2016, 2019]}`` returns
252 |         land cover data for 2016 and 2019.
253 |     region : str, optional
254 |         Region in the US that the input geometries are located, defaults to ``L48``.
255 |         Valid values are ``L48`` (for CONUS), ``HI`` (for Hawaii), ``AK`` (for Alaska),
256 |         and ``PR`` (for Puerto Rico). Both lower and upper cases are acceptable.
257 |     ssl : bool, optional
258 |         Whether to use SSL for the connection, defaults to ``True``.
259 | 
260 |     Returns
261 |     -------
262 |     geopandas.GeoDataFrame
263 |         A GeoDataFrame with the NLCD data and the coordinates.
264 |     """
265 |     nlcd_wms = NLCD(years=years, region=region, crs=3857, ssl=ssl)
266 |     points = gpd.GeoSeries(gpd.points_from_xy(*zip(*coords), crs=4326))
267 |     points_proj = points.to_crs(nlcd_wms.crs)
268 |     geoms = points_proj.buffer(50, cap_style="square")
269 |     ds_list = [nlcd_wms.get_map(g, 30) for g in geoms]
270 | 
271 |     def get_value(da: xr.DataArray, x: float, y: float) -> Number:
272 |         nodata = da.attrs["nodatavals"][0]
273 |         value = da.fillna(nodata).interp(x=[x], y=[y], method="nearest")
274 |         return da.dtype.type(value)[0, 0]
275 | 
276 |     values = {
277 |         v: [get_value(ds[v], p.x, p.y) for ds, p in zip(ds_list, points_proj)] for v in ds_list[0]
278 |     }
279 |     points = cast("gpd.GeoDataFrame", points.to_frame("geometry"))
280 |     return gpd.GeoDataFrame(
281 |         pd.merge(points, pd.DataFrame(values), left_index=True, right_index=True)
282 |     )
283 | 
284 | 
285 | def overland_roughness(cover_da: xr.DataArray) -> xr.DataArray:
286 |     """Estimate overland roughness from land cover data.
287 | 
288 |     Parameters
289 |     ----------
290 |     cover_da : xarray.DataArray
291 |         Land cover DataArray from a LULC Dataset from the ``nlcd_bygeom`` function.
292 | 
293 |     Returns
294 |     -------
295 |     xarray.DataArray
296 |         Overland roughness
297 |     """
298 |     if not isinstance(cover_da, xr.DataArray):
299 |         raise InputTypeError("cover_da", "xarray.DataArray")
300 | 
301 |     roughness = cover_da.astype(np.float64)
302 |     roughness = roughness.rio.write_nodata(np.nan)
303 |     roughness.name = "roughness"
304 |     roughness.attrs["long_name"] = "overland roughness"
305 |     roughness.attrs["units"] = "-"
306 | 
307 |     meta = helpers.nlcd_helper()
308 |     get_roughness = np.vectorize(meta["roughness"].get, excluded=["default"])
309 |     return roughness.copy(data=get_roughness(cover_da.astype("uint8").astype(str), np.nan))
310 | 
311 | 
312 | def cover_statistics(cover_da: xr.DataArray) -> Stats:
313 |     """Percentages of the categorical NLCD cover data.
314 | 
315 |     Parameters
316 |     ----------
317 |     cover_da : xarray.DataArray
318 |         Land cover DataArray from a LULC Dataset from the ``nlcd_bygeom`` function.
319 | 
320 |     Returns
321 |     -------
322 |     Stats
323 |         A named tuple with the percentages of the cover classes and categories.
324 |     """
325 |     if not isinstance(cover_da, xr.DataArray):
326 |         raise InputTypeError("cover_da", "xarray.DataArray")
327 | 
328 |     nlcd_meta = helpers.nlcd_helper()
329 |     val, freq = np.unique(cover_da, return_counts=True)
330 |     zero_idx = np.argwhere(val == 127)
331 |     val = np.delete(val, zero_idx).astype(str)
332 |     freq = np.delete(freq, zero_idx)
333 |     freq_dict = dict(zip(val.tolist(), freq.tolist()))
334 |     total_count = freq.sum()
335 | 
336 |     if any(c not in nlcd_meta["classes"] for c in freq_dict):
337 |         raise InputValueError("ds", list(nlcd_meta["classes"]))
338 | 
339 |     class_percentage = {
340 |         nlcd_meta["classes"][k].split(" -")[0].strip(): v / total_count * 100.0
341 |         for k, v in freq_dict.items()
342 |     }
343 |     category_percentage = {
344 |         k: sum(freq_dict[c] for c in v if c in freq_dict) / total_count * 100.0
345 |         for k, v in nlcd_meta["categories"].items()
346 |         if k != "Background"
347 |     }
348 | 
349 |     return Stats(class_percentage, category_percentage)
350 | 
351 | 
352 | def _area_percent(nlcd: xr.Dataset, year: int) -> dict[str, float]:
353 |     """Calculate the percentage of the area for each land cover class."""
354 |     cover_nodata = nlcd[f"cover_{year}"].rio.nodata
355 |     if np.isnan(cover_nodata):
356 |         msk = ~nlcd[f"cover_{year}"].isnull()
357 |     elif cover_nodata > 0:
358 |         msk = nlcd[f"cover_{year}"] < cover_nodata
359 |     else:
360 |         msk = nlcd[f"cover_{year}"] > cover_nodata
361 |     cell_total = msk.sum()
362 | 
363 |     msk = nlcd[f"cover_{year}"].isin(range(21, 25))
364 |     urban = msk.sum() / cell_total
365 |     natural = 1 - urban
366 | 
367 |     impervious = nlcd.where(msk)[f"impervious_{year}"].mean() * urban / 100
368 |     developed = urban - impervious
369 |     natural = natural.compute().item() * 100
370 |     developed = developed.compute().item() * 100
371 |     impervious = impervious.compute().item() * 100
372 |     return {
373 |         "natural": natural,
374 |         "developed": developed,
375 |         "impervious": impervious,
376 |         "urban": developed + impervious,
377 |     }
378 | 
379 | 
380 | def nlcd_area_percent(
381 |     geo_df: gpd.GeoSeries | gpd.GeoDataFrame,
382 |     year: int = 2019,
383 |     region: str = "L48",
384 | ) -> pd.DataFrame:
385 |     """Compute the area percentages of the natural, developed, and impervious areas.
386 | 
387 |     Notes
388 |     -----
389 |     This function uses imperviousness and land use/land cover data from NLCD
390 |     to compute the area percentages of the natural, developed, and impervious areas.
391 |     It considers land cover classes of 21 to 24 as urban and the rest as natural.
392 |     Then, uses imperviousness percentage to partition the urban area into developed
393 |     and impervious areas. So, ``urban = developed + impervious`` and always
394 |     ``natural + urban = natural + developed + impervious = 100``.
395 | 
396 |     Parameters
397 |     ----------
398 |     geo_df : geopandas.GeoDataFrame or geopandas.GeoSeries
399 |         A GeoDataFrame or GeoSeries with the geometry to query. The indices are used
400 |         as keys in the output dictionary.
401 |     year : int, optional
402 |         Year of the NLCD data, defaults to 2019. Available years are 2021, 2019, 2016,
403 |         2013, 2011, 2008, 2006, 2004, and 2001.
404 |     region : str, optional
405 |         Region in the US that the input geometries are located, defaults to ``L48``.
406 |         Valid values are ``L48`` (for CONUS), ``HI`` (for Hawaii), ``AK`` (for Alaska),
407 |         and ``PR`` (for Puerto Rico). Both lower and upper cases are acceptable.
408 | 
409 |     Returns
410 |     -------
411 |     pandas.DataFrame
412 |         A dataframe with the same index as input ``geo_df`` and columns are the area
413 |         percentages of the natural, developed, impervious, and urban
414 |         (sum of developed and impervious) areas. Sum of urban and natural percentages
415 |         is always 100, as well as the sum of natural, developed, and impervious
416 |         percentages.
417 |     """
418 |     valid_year = (2021, 2019, 2016, 2013, 2011, 2008, 2006, 2004, 2001)
419 |     if year not in valid_year:
420 |         raise InputValueError("year", valid_year)
421 | 
422 |     if not isinstance(geo_df, (gpd.GeoDataFrame, gpd.GeoSeries)):
423 |         raise InputTypeError("geometry", "GeoDataFrame or GeoSeries")
424 | 
425 |     if geo_df.crs is None:
426 |         raise MissingCRSError
427 | 
428 |     geoms = geo_df.to_crs(4326).geometry  # pyright: ignore[reportOptionalMemberAccess]
429 | 
430 |     wms = NLCD(years={"impervious": year, "cover": year}, region=region, ssl=False)
431 | 
432 |     return pd.DataFrame.from_dict(
433 |         {i: _area_percent(wms.get_map(g, 30), year) for i, g in geoms.items()},
434 |         orient="index",
435 |     )
436 | 


--------------------------------------------------------------------------------
/tests/test_stn.py:
--------------------------------------------------------------------------------
  1 | """Tests for PyGeoHydro package."""
  2 | 
  3 | from __future__ import annotations
  4 | 
  5 | import geopandas as gpd
  6 | import numpy as np
  7 | import pandas as pd
  8 | import pytest
  9 | from pyproj import CRS
 10 | from pyproj.exceptions import CRSError
 11 | 
 12 | import pygeohydro as gh
 13 | 
 14 | 
 15 | def assert_close(a: float, b: float) -> None:
 16 |     np.testing.assert_allclose(a, b, rtol=1e-3)
 17 | 
 18 | 
 19 | class TestSTNFloodEventData:
 20 |     stn = gh.STNFloodEventData
 21 | 
 22 |     expected_all_data_schemas = {
 23 |         "instruments": [
 24 |             "instrument_id",
 25 |             "sensor_type_id",
 26 |             "deployment_type_id",
 27 |             "location_description",
 28 |             "serial_number",
 29 |             "interval",
 30 |             "site_id",
 31 |             "event_id",
 32 |             "inst_collection_id",
 33 |             "housing_type_id",
 34 |             "sensor_brand_id",
 35 |             "vented",
 36 |             "instrument_status",
 37 |             "data_files",
 38 |             "files",
 39 |             "last_updated",
 40 |             "last_updated_by",
 41 |             "housing_serial_number",
 42 |         ],
 43 |         "peaks": [
 44 |             "peak_summary_id",
 45 |             "member_id",
 46 |             "peak_date",
 47 |             "is_peak_estimated",
 48 |             "is_peak_time_estimated",
 49 |             "peak_stage",
 50 |             "is_peak_stage_estimated",
 51 |             "is_peak_discharge_estimated",
 52 |             "vdatum_id",
 53 |             "time_zone",
 54 |             "calc_notes",
 55 |             "data_file",
 56 |             "hwms",
 57 |             "height_above_gnd",
 58 |             "peak_discharge",
 59 |             "aep",
 60 |             "aep_lowci",
 61 |             "aep_upperci",
 62 |             "aep_range",
 63 |             "is_hag_estimated",
 64 |             "last_updated",
 65 |             "last_updated_by",
 66 |         ],
 67 |         "hwms": [
 68 |             "hwm_id",
 69 |             "waterbody",
 70 |             "site_id",
 71 |             "event_id",
 72 |             "hwm_type_id",
 73 |             "hwm_quality_id",
 74 |             "hwm_locationdescription",
 75 |             "latitude_dd",
 76 |             "longitude_dd",
 77 |             "survey_date",
 78 |             "elev_ft",
 79 |             "vdatum_id",
 80 |             "vcollect_method_id",
 81 |             "bank",
 82 |             "approval_id",
 83 |             "marker_id",
 84 |             "height_above_gnd",
 85 |             "hcollect_method_id",
 86 |             "hwm_notes",
 87 |             "hwm_environment",
 88 |             "flag_date",
 89 |             "hdatum_id",
 90 |             "flag_member_id",
 91 |             "survey_member_id",
 92 |             "hwm_label",
 93 |             "files",
 94 |             "stillwater",
 95 |             "peak_summary_id",
 96 |             "last_updated",
 97 |             "last_updated_by",
 98 |             "uncertainty",
 99 |             "hwm_uncertainty",
100 |             "geometry",
101 |         ],
102 |         "sites": [
103 |             "site_id",
104 |             "site_no",
105 |             "site_name",
106 |             "site_description",
107 |             "state",
108 |             "county",
109 |             "waterbody",
110 |             "latitude_dd",
111 |             "longitude_dd",
112 |             "hdatum_id",
113 |             "hcollect_method_id",
114 |             "member_id",
115 |             "network_name_site",
116 |             "network_type_site",
117 |             "objective_points",
118 |             "instruments",
119 |             "files",
120 |             "site_housing",
121 |             "hwms",
122 |             "site_notes",
123 |             "access_granted",
124 |             "address",
125 |             "city",
126 |             "is_permanent_housing_installed",
127 |             "safety_notes",
128 |             "zip",
129 |             "last_updated",
130 |             "last_updated_by",
131 |             "other_sid",
132 |             "sensor_not_appropriate",
133 |             "drainage_area_sqmi",
134 |             "landownercontact_id",
135 |             "priority_id",
136 |             "zone",
137 |             "usgs_sid",
138 |             "noaa_sid",
139 |             "geometry",
140 |         ],
141 |     }
142 | 
143 |     expected_filtered_data_schemas = {
144 |         "instruments": [
145 |             "sensorType",
146 |             "deploymentType",
147 |             "eventName",
148 |             "collectionCondition",
149 |             "housingType",
150 |             "sensorBrand",
151 |             "statusId",
152 |             "timeStamp",
153 |             "site_no",
154 |             "latitude",
155 |             "longitude",
156 |             "siteDescription",
157 |             "networkNames",
158 |             "stateName",
159 |             "countyName",
160 |             "siteWaterbody",
161 |             "siteHDatum",
162 |             "sitePriorityName",
163 |             "siteZone",
164 |             "siteHCollectMethod",
165 |             "sitePermHousing",
166 |             "instrument_id",
167 |             "sensor_type_id",
168 |             "deployment_type_id",
169 |             "location_description",
170 |             "serial_number",
171 |             "interval",
172 |             "site_id",
173 |             "vented",
174 |             "instrument_status",
175 |             "data_files",
176 |             "files",
177 |             "housing_serial_number",
178 |             "geometry",
179 |         ],
180 |         "peaks": [
181 |             "vdatum",
182 |             "member_name",
183 |             "site_id",
184 |             "site_no",
185 |             "latitude_dd",
186 |             "longitude_dd",
187 |             "description",
188 |             "networks",
189 |             "state",
190 |             "county",
191 |             "waterbody",
192 |             "horizontal_datum",
193 |             "priority",
194 |             "horizontal_collection_method",
195 |             "perm_housing_installed",
196 |             "peak_summary_id",
197 |             "peak_date",
198 |             "is_peak_estimated",
199 |             "is_peak_time_estimated",
200 |             "peak_stage",
201 |             "is_peak_stage_estimated",
202 |             "is_peak_discharge_estimated",
203 |             "time_zone",
204 |             "calc_notes",
205 |             "data_file",
206 |             "hwms",
207 |             "peak_discharge",
208 |             "zone",
209 |             "height_above_gnd",
210 |             "is_hag_estimated",
211 |             "aep_upperci",
212 |             "geometry",
213 |         ],
214 |         "hwms": [
215 |             "latitude",
216 |             "longitude",
217 |             "eventName",
218 |             "hwmTypeName",
219 |             "hwmQualityName",
220 |             "verticalDatumName",
221 |             "verticalMethodName",
222 |             "approvalMember",
223 |             "markerName",
224 |             "horizontalMethodName",
225 |             "horizontalDatumName",
226 |             "flagMemberName",
227 |             "surveyMemberName",
228 |             "site_no",
229 |             "siteDescription",
230 |             "sitePriorityName",
231 |             "networkNames",
232 |             "stateName",
233 |             "countyName",
234 |             "siteZone",
235 |             "sitePermHousing",
236 |             "site_latitude",
237 |             "site_longitude",
238 |             "hwm_id",
239 |             "waterbody",
240 |             "site_id",
241 |             "event_id",
242 |             "hwm_type_id",
243 |             "hwm_quality_id",
244 |             "hwm_locationdescription",
245 |             "latitude_dd",
246 |             "longitude_dd",
247 |             "survey_date",
248 |             "elev_ft",
249 |             "vdatum_id",
250 |             "vcollect_method_id",
251 |             "bank",
252 |             "approval_id",
253 |             "marker_id",
254 |             "hcollect_method_id",
255 |             "hwm_notes",
256 |             "hwm_environment",
257 |             "flag_date",
258 |             "stillwater",
259 |             "hdatum_id",
260 |             "flag_member_id",
261 |             "survey_member_id",
262 |             "uncertainty",
263 |             "hwm_label",
264 |             "files",
265 |             "height_above_gnd",
266 |             "hwm_uncertainty",
267 |             "peak_summary_id",
268 |             "geometry",
269 |         ],
270 |         "sites": [
271 |             "networkNames",
272 |             "Events",
273 |             "site_id",
274 |             "site_no",
275 |             "site_name",
276 |             "site_description",
277 |             "address",
278 |             "city",
279 |             "state",
280 |             "zip",
281 |             "other_sid",
282 |             "county",
283 |             "waterbody",
284 |             "latitude_dd",
285 |             "longitude_dd",
286 |             "hdatum_id",
287 |             "zone",
288 |             "is_permanent_housing_installed",
289 |             "usgs_sid",
290 |             "noaa_sid",
291 |             "hcollect_method_id",
292 |             "safety_notes",
293 |             "access_granted",
294 |             "network_name_site",
295 |             "network_type_site",
296 |             "objective_points",
297 |             "instruments",
298 |             "files",
299 |             "site_housing",
300 |             "hwms",
301 |             "RecentOP",
302 |             "priority_id",
303 |             "member_id",
304 |             "landownercontact_id",
305 |             "drainage_area_sqmi",
306 |             "geometry",
307 |         ],
308 |     }
309 | 
310 |     @pytest.mark.parametrize(
311 |         ("data_type", "as_list", "crs", "async_retriever_kwargs", "expected_shape"),
312 |         [
313 |             ("instruments", False, 4329, {"raise_status": False}, (4612, 18)),
314 |             ("peaks", False, None, None, (13159, 22)),
315 |             ("hwms", False, None, {"url": "https://www.google.com"}, (34694, 33)),
316 |             (
317 |                 "sites",
318 |                 False,
319 |                 "+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=23 +lon_0=-96 +x_0=0 +y_0=0 +ellps=GRS80 +datum=NAD83 +units=m +no_defs",
320 |                 {},
321 |                 (26343, 37),
322 |             ),
323 |             ("sites", False, 4236, {}, (26343, 37)),
324 |             (
325 |                 "instruments",
326 |                 True,
327 |                 4326,
328 |                 {"url": "https://www.google.com", "disable": True},
329 |                 4612,
330 |             ),
331 |             ("peaks", True, None, {"timeout": 10}, 13159),
332 |             ("hwms", True, 26915, None, 34694),
333 |             ("sites", True, None, None, 26343),
334 |         ],
335 |     )
336 |     def test_get_all_data_success(
337 |         self, data_type, as_list, crs, async_retriever_kwargs, expected_shape
338 |     ):
339 |         """Test the get_all_data method of the STNFloodEventData class for success cases."""
340 |         result = self.stn.get_all_data(
341 |             data_type, as_list=as_list, crs=crs, async_retriever_kwargs=async_retriever_kwargs
342 |         )
343 | 
344 |         if as_list:
345 |             assert isinstance(result, list)
346 |             assert len(result) >= expected_shape
347 |             assert isinstance(result[0], dict)
348 |             assert all(
349 |                 rk in self.expected_all_data_schemas[data_type] for rk in list(result[0].keys())
350 |             )
351 |         else:
352 |             assert isinstance(result, (gpd.GeoDataFrame, pd.DataFrame))
353 | 
354 |             if isinstance(result, gpd.GeoDataFrame):
355 |                 if crs is None:
356 |                     crs = self.stn.service_crs
357 |                 assert result.crs == CRS(crs)
358 | 
359 |             assert result.shape[0] >= expected_shape[0]  # minimum number of rows
360 |             assert result.shape[1] == expected_shape[1]  # exact number of columns
361 | 
362 |             assert list(result.columns) == self.expected_all_data_schemas[data_type]
363 | 
364 |     @pytest.mark.parametrize(
365 |         ("data_type", "as_list", "crs", "async_retriever_kwargs", "expected_exception"),
366 |         [
367 |             ("instruments", False, 4329, {"raise_status": False, "anything": 1}, TypeError),
368 |             ("peekks", False, None, None, gh.exceptions.InputValueError),
369 |             ("hwms", False, None, {"url": "https://www.google.com", "any": "yes"}, TypeError),
370 |             ("sites", False, "EBSJ:3829", {}, CRSError),
371 |         ],
372 |     )
373 |     def test_get_all_data_fail(
374 |         self, data_type, as_list, crs, async_retriever_kwargs, expected_exception
375 |     ):
376 |         """Test the get_all_data method of the STNFloodEventData class for failure cases."""
377 |         with pytest.raises(expected_exception):
378 |             self.stn.get_all_data(
379 |                 data_type, as_list=as_list, crs=crs, async_retriever_kwargs=async_retriever_kwargs
380 |             )
381 | 
382 |     @pytest.mark.parametrize(
383 |         ("data_type", "query_params", "as_list", "crs", "async_retriever_kwargs", "expected_shape"),
384 |         [
385 |             (
386 |                 "instruments",
387 |                 {"States": "OR,WA,AK,HI"},
388 |                 False,
389 |                 4329,
390 |                 {"raise_status": False},
391 |                 (1, 32),
392 |             ),
393 |             ("peaks", {"States": "CA, FL, SC"}, False, None, None, (885, 31)),
394 |             (
395 |                 "hwms",
396 |                 {"States": "LA"},
397 |                 False,
398 |                 None,
399 |                 {"url": "https://www.google.com", "request_kwds": {"k": "v"}},
400 |                 (1208, 54),
401 |             ),
402 |             ("sites", {"State": "OK, KS, NE, SD, MS, MD, MN, WI"}, False, 3829, {}, (1, 36)),
403 |             (
404 |                 "instruments",
405 |                 {"States": "NE,IL,IA,TX"},
406 |                 True,
407 |                 4326,
408 |                 {"url": "https://www.google.com", "disable": True},
409 |                 143,
410 |             ),
411 |             (
412 |                 "peaks",
413 |                 {"States": "NV, AZ, AR, MO, IN"},
414 |                 True,
415 |                 None,
416 |                 {"timeout": 10},
417 |                 205,
418 |             ),
419 |             ("hwms", {"States": "KY,WV,NC,GA,TN,PA"}, True, 26915, None, 6220),
420 |             ("sites", {"State": "NY"}, True, None, None, 712),
421 |             ("instruments", None, True, None, None, 4612),
422 |         ],
423 |     )
424 |     def test_get_filtered_data_success(
425 |         self, data_type, query_params, as_list, crs, async_retriever_kwargs, expected_shape
426 |     ):
427 |         """Test the get_filtered_data method of the STNFloodEventData class for success cases."""
428 |         result = self.stn.get_filtered_data(
429 |             data_type,
430 |             query_params,
431 |             as_list=as_list,
432 |             crs=crs,
433 |             async_retriever_kwargs=async_retriever_kwargs,
434 |         )
435 | 
436 |         if as_list:
437 |             assert isinstance(result, list)
438 |             assert len(result) >= expected_shape
439 |             assert isinstance(result[0], dict)
440 |             assert all(
441 |                 rk in self.expected_filtered_data_schemas[data_type]
442 |                 for rk in list(result[0].keys())
443 |             )
444 |         else:
445 |             assert isinstance(result, gpd.GeoDataFrame)
446 |             if crs is None:
447 |                 crs = self.stn.service_crs
448 |             assert result.crs == CRS(crs)
449 | 
450 |             assert result.shape[0] >= expected_shape[0]
451 |             assert result.shape[1] == expected_shape[1]
452 | 
453 |             assert all(
454 |                 rc in self.expected_filtered_data_schemas[data_type] for rc in list(result.columns)
455 |             )
456 | 
457 |     @pytest.mark.parametrize(
458 |         (
459 |             "data_type",
460 |             "query_params",
461 |             "as_list",
462 |             "crs",
463 |             "async_retriever_kwargs",
464 |             "expected_exception",
465 |         ),
466 |         [
467 |             (
468 |                 "instruments",
469 |                 {"States": "OR,WA,AK,HI"},
470 |                 False,
471 |                 4329,
472 |                 {"raise_status": False, "anything": 1},
473 |                 TypeError,
474 |             ),
475 |             (
476 |                 "peaks",
477 |                 {"Storms": "Sandy, Ivan, Harvey"},
478 |                 False,
479 |                 None,
480 |                 None,
481 |                 gh.exceptions.InputValueError,
482 |             ),
483 |             (
484 |                 "hwms",
485 |                 {"States": "LA"},
486 |                 False,
487 |                 None,
488 |                 {"url": "https://www.google.com", "any": "yes"},
489 |                 TypeError,
490 |             ),
491 |             (
492 |                 "sitessss",
493 |                 {"State": "OK, KS, NE, SD, MS, MD, MN, WI"},
494 |                 False,
495 |                 3829,
496 |                 {},
497 |                 gh.exceptions.InputValueError,
498 |             ),
499 |             ("instruments", {}, False, "EPSJ:4326", None, CRSError),
500 |         ],
501 |     )
502 |     def test_get_filtered_data_fail(
503 |         self, data_type, query_params, as_list, crs, async_retriever_kwargs, expected_exception
504 |     ):
505 |         """Test the get_filtered_data method of the STNFloodEventData class for failure cases."""
506 |         with pytest.raises(expected_exception):
507 |             self.stn.get_filtered_data(
508 |                 data_type,
509 |                 query_params,
510 |                 as_list=as_list,
511 |                 crs=crs,
512 |                 async_retriever_kwargs=async_retriever_kwargs,
513 |             )
514 | 
515 |     @pytest.mark.parametrize(
516 |         ("data_type", "query_params", "expected_shape"),
517 |         [
518 |             (
519 |                 "instruments",
520 |                 {"States": "OR,WA,AK,HI"},
521 |                 (1, 32),
522 |             ),
523 |             ("peaks", {"States": "CA, FL, SC"}, (885, 31)),
524 |             (
525 |                 "hwms",
526 |                 {"States": "LA"},
527 |                 (1208, 54),
528 |             ),
529 |             ("sites", {"State": "OK, KS, NE, SD, MS, MD, MN, WI"}, (1, 36)),
530 |             (
531 |                 "instruments",
532 |                 {"States": "NE,IL,IA,TX"},
533 |                 143,
534 |             ),
535 |             (
536 |                 "peaks",
537 |                 {"States": "NV, AZ, AR, MO, IN"},
538 |                 205,
539 |             ),
540 |             ("hwms", {"States": "KY,WV,NC,GA,TN,PA"}, 6220),
541 |             ("sites", {"State": "NY"}, 712),
542 |             ("instruments", None, 4612),
543 |         ],
544 |     )
545 |     def test_stn_func(self, data_type, query_params, expected_shape):
546 |         """Test the function wrapper of the STNFloodEventData class."""
547 |         result = gh.stn_flood_event(data_type, query_params)
548 |         if isinstance(expected_shape, tuple):
549 |             assert result.shape[0] >= expected_shape[0]
550 |             assert result.shape[1] == expected_shape[1]
551 |         else:
552 |             assert len(result) >= expected_shape
553 |         if query_params is None:
554 |             assert all(rc in self.expected_all_data_schemas[data_type] for rc in result)
555 |         else:
556 |             assert all(rc in self.expected_filtered_data_schemas[data_type] for rc in result)
557 | 


--------------------------------------------------------------------------------
/src/pygeohydro/nid.py:
--------------------------------------------------------------------------------
  1 | """Accessing data from the supported databases through their APIs."""
  2 | 
  3 | from __future__ import annotations
  4 | 
  5 | import importlib.util
  6 | import warnings
  7 | from collections.abc import Iterable, Sequence
  8 | from datetime import datetime, timezone
  9 | from pathlib import Path
 10 | from typing import TYPE_CHECKING, Any, Union, cast
 11 | 
 12 | import geopandas as gpd
 13 | import pandas as pd
 14 | import requests
 15 | 
 16 | import async_retriever as ar
 17 | import pygeoogc as ogc
 18 | import pygeoutils as geoutils
 19 | from pygeohydro.exceptions import (
 20 |     InputTypeError,
 21 |     InputValueError,
 22 |     ServiceError,
 23 |     ZeroMatchedError,
 24 | )
 25 | from pygeoogc import ServiceURL
 26 | from pygeoutils.exceptions import EmptyResponseError
 27 | 
 28 | if TYPE_CHECKING:
 29 |     from pyproj import CRS
 30 |     from shapely import MultiPolygon, Polygon
 31 | 
 32 |     GTYPE = Union[Polygon, MultiPolygon, tuple[float, float, float, float]]
 33 | 
 34 |     CRSType = int | str | CRS
 35 | 
 36 | __all__ = ["NID"]
 37 | 
 38 | 
 39 | def _remote_file_modified(file_path: Path) -> bool:
 40 |     """Check if the file is older than the last modification date of the NID web service."""
 41 |     if not file_path.exists():
 42 |         return True
 43 |     url = "https://nid.sec.usace.army.mil/api/nation/gpkg"
 44 |     # we need to get the redirect URL so that we can get the last modified date, so
 45 |     # we need to send a request with the Range header set to 0-0 to avoid downloading
 46 |     # the entire file.
 47 |     response = requests.get(url, headers={"Range": "bytes=0-0"}, allow_redirects=True, timeout=50)
 48 |     if response.status_code not in (200, 206):
 49 |         raise ServiceError(response.reason, url)
 50 |     response = requests.head(response.url, timeout=50)
 51 |     if response.status_code != 200:
 52 |         raise ServiceError(response.reason, url)
 53 | 
 54 |     remote_last_modified = datetime.strptime(
 55 |         response.headers["Last-Modified"], "%a, %d %b %Y %H:%M:%S GMT"
 56 |     ).replace(tzinfo=timezone.utc)
 57 |     local_last_modified = datetime.fromtimestamp(file_path.stat().st_mtime, tz=timezone.utc)
 58 |     return local_last_modified < remote_last_modified
 59 | 
 60 | 
 61 | class NID:
 62 |     """Retrieve data from the National Inventory of Dams web service."""
 63 | 
 64 |     def __init__(self) -> None:
 65 |         self.base_url = ServiceURL().restful.nid
 66 |         self.suggest_url = f"{self.base_url}/suggestions"
 67 |         resp = ar.retrieve_json([f"{self.base_url}/advanced-fields"])
 68 |         resp = cast("list[dict[str, Any]]", resp)
 69 |         self.fields_meta = pd.DataFrame(resp[0])
 70 |         self.valid_fields = self.fields_meta["name"]
 71 |         self.dam_type = {
 72 |             pd.NA: "N/A",
 73 |             None: "N/A",
 74 |             1: "Arch",
 75 |             2: "Multi-Arch",
 76 |             3: "Stone",
 77 |             4: "Roller-Compacted Concrete",
 78 |             5: "Rockfill",
 79 |             6: "Buttress",
 80 |             7: "Masonry",
 81 |             8: "Earth",
 82 |             9: "Gravity",
 83 |             10: "Timber Crib",
 84 |             11: "Concrete",
 85 |             12: "Other",
 86 |         }
 87 |         dtype_str = {str(k): v for k, v in self.dam_type.items() if str(k).isdigit()}
 88 |         self.dam_type = self.dam_type | dtype_str
 89 |         self.dam_purpose = {
 90 |             pd.NA: "N/A",
 91 |             None: "N/A",
 92 |             1: "Tailings",
 93 |             2: "Irrigation",
 94 |             3: "Navigation",
 95 |             4: "Fish and Wildlife Pond",
 96 |             5: "Recreation",
 97 |             6: "Hydroelectric",
 98 |             7: "Debris Control",
 99 |             8: "Water Supply",
100 |             9: "Flood Risk Reduction",
101 |             10: "Fire Protection, Stock, Or Small Farm Pond",
102 |             11: "Grade Stabilization",
103 |             12: "Other",
104 |         }
105 |         purp_str = {str(k): v for k, v in self.dam_purpose.items() if str(k).isdigit()}
106 |         self.dam_purpose = self.dam_purpose | purp_str
107 |         self.data_units = {
108 |             "distance": "mile",
109 |             "damHeight": "ft",
110 |             "hydraulicHeight": "ft",
111 |             "structuralHeight": "ft",
112 |             "nidHeight": "ft",
113 |             "damLength": "ft",
114 |             "volume": "cubic yards",
115 |             "nidStorage": "acre-ft",
116 |             "maxStorage": "acre-ft",
117 |             "normalStorage": "acre-ft",
118 |             "surfaceArea": "acre",
119 |             "drainageArea": "square miles",
120 |             "maxDischarge": "cfs",
121 |             "spillwayWidth": "ft",
122 |         }
123 |         self._nid_inventory_path = Path("cache", "full_nid_inventory.parquet")
124 | 
125 |     @property
126 |     def nid_inventory_path(self) -> Path:
127 |         """Path to the NID inventory parquet file."""
128 |         return self._nid_inventory_path
129 | 
130 |     @nid_inventory_path.setter
131 |     def nid_inventory_path(self, value: Path | str) -> None:
132 |         self._nid_inventory_path = Path(value)
133 |         self._nid_inventory_path.parent.mkdir(parents=True, exist_ok=True)
134 | 
135 |     def stage_nid_inventory(self, fname: str | Path | None = None) -> None:
136 |         """Download the entire NID inventory data and save to a parquet file.
137 | 
138 |         Parameters
139 |         ----------
140 |         fname : str, pathlib.Path, optional
141 |             The path to the file to save the data to, defaults to
142 |             ``./cache/full_nid_inventory.parquet``.
143 |         """
144 |         fname = self.nid_inventory_path if fname is None else Path(fname)
145 |         if fname.suffix != ".parquet":
146 |             fname = fname.with_suffix(".parquet")
147 | 
148 |         self.nid_inventory_path = fname
149 |         gpkg_file = fname.with_suffix(".gpkg")
150 |         if _remote_file_modified(gpkg_file) or not self.nid_inventory_path.exists():
151 |             gpkg_file.unlink(missing_ok=True)
152 |             url = "https://nid.sec.usace.army.mil/api/nation/gpkg"
153 |             fname_ = ogc.streaming_download(url, fnames=gpkg_file)
154 |             if fname_ is None:
155 |                 raise EmptyResponseError
156 |             dams = (
157 |                 gpd.read_file(fname_, engine="pyogrio", use_arrow=True)
158 |                 if importlib.util.find_spec("pyogrio")
159 |                 else gpd.read_file(fname_)
160 |             )
161 |             dams = cast("gpd.GeoDataFrame", dams)
162 | 
163 |             dams = dams.astype(
164 |                 {
165 |                     "name": str,
166 |                     "otherNames": str,
167 |                     "formerNames": str,
168 |                     "nidId": str,
169 |                     "otherStructureId": str,
170 |                     "federalId": str,
171 |                     "ownerNames": str,
172 |                     "ownerTypeIds": str,
173 |                     "primaryOwnerTypeId": str,
174 |                     "separateStructuresCount": str,
175 |                     "isAssociatedStructureId": str,
176 |                     "designerNames": str,
177 |                     "nonFederalDamOnFederalId": str,
178 |                     "primaryPurposeId": str,
179 |                     "purposeIds": str,
180 |                     "sourceAgency": str,
181 |                     "stateFedId": str,
182 |                     "latitude": "f8",
183 |                     "longitude": "f8",
184 |                     "state": str,
185 |                     "county": str,
186 |                     "countyState": str,
187 |                     "city": str,
188 |                     "distance": "f8",
189 |                     "riverName": str,
190 |                     "congDist": str,
191 |                     "stateRegulatedId": str,
192 |                     "jurisdictionAuthorityId": str,
193 |                     "stateRegulatoryAgency": str,
194 |                     "permittingAuthorityId": str,
195 |                     "inspectionAuthorityId": str,
196 |                     "enforcementAuthorityId": str,
197 |                     "fedRegulatedId": str,
198 |                     "fedOwnerIds": str,
199 |                     "fedFundingIds": str,
200 |                     "fedDesignIds": str,
201 |                     "fedConstructionIds": str,
202 |                     "fedRegulatoryIds": str,
203 |                     "fedInspectionIds": str,
204 |                     "fedOperationIds": str,
205 |                     "fedOtherIds": str,
206 |                     "secretaryAgricultureBuiltId": str,
207 |                     "nrcsWatershedAuthorizationId": str,
208 |                     "primaryDamTypeId": str,
209 |                     "damTypeIds": str,
210 |                     "coreTypeIds": str,
211 |                     "foundationTypeIds": str,
212 |                     "damHeight": "f8",
213 |                     "hydraulicHeight": "f8",
214 |                     "structuralHeight": "f8",
215 |                     "nidHeight": "f8",
216 |                     "nidHeightId": str,
217 |                     "damLength": "f8",
218 |                     "volume": "f8",
219 |                     "yearCompleted": "Int32",
220 |                     "yearCompletedId": str,
221 |                     "yearsModified": str,
222 |                     "nidStorage": "f8",
223 |                     "maxStorage": "f8",
224 |                     "normalStorage": "f8",
225 |                     "surfaceArea": "f8",
226 |                     "drainageArea": "f8",
227 |                     "maxDischarge": "f8",
228 |                     "spillwayTypeId": str,
229 |                     "spillwayWidth": "f8",
230 |                     "numberOfLocks": "Int32",
231 |                     "lengthOfLocks": "f8",
232 |                     "widthOfLocks": "f8",
233 |                     "secondaryLengthOfLocks": "Int32",
234 |                     "secondaryWidthOfLocks": "Int32",
235 |                     "outletGateTypes": str,
236 |                     "dataUpdated": "datetime64[ns]",
237 |                     "inspectionDate": str,
238 |                     "inspectionFrequency": "f4",
239 |                     "hazardId": str,
240 |                     "conditionAssessId": str,
241 |                     "conditionAssessDate": "datetime64[ns]",
242 |                     "eapId": str,
243 |                     "eapLastRevDate": "datetime64[ns]",
244 |                     "websiteUrl": str,
245 |                     "usaceDivision": str,
246 |                     "usaceDistrict": str,
247 |                     "operationalStatusId": str,
248 |                     "operationalStatusDate": "datetime64[ms]",
249 |                     "inundationNidAddedId": str,
250 |                     "huc2": str,
251 |                     "huc4": str,
252 |                     "huc6": str,
253 |                     "huc8": str,
254 |                     "zipcode": str,
255 |                     "nation": str,
256 |                     "stateKey": str,
257 |                     "femaRegion": str,
258 |                     "femaCommunity": str,
259 |                     "aiannh": str,
260 |                 }
261 |             )
262 |             for c in dams:
263 |                 if (dams[c] == "Yes").any():
264 |                     dams[c] = dams[c] == "Yes"
265 |             dams.loc[dams["yearCompleted"] < 1000, "yearCompleted"] = pd.NA
266 |             dams.to_parquet(fname)
267 | 
268 |     @property
269 |     def df(self):
270 |         """Entire NID inventory (``csv`` version) as a ``pandas.DataFrame``."""
271 |         fname = self.nid_inventory_path
272 |         par_name = fname.with_suffix(".parquert")
273 |         if par_name.exists():
274 |             return pd.read_parquet(par_name)
275 |         url = "https://nid.sec.usace.army.mil/api/nation/csv"
276 |         fname = ogc.streaming_download(url, fnames=fname.with_suffix(".csv"))
277 |         if fname is None:
278 |             raise EmptyResponseError
279 |         dams = pd.read_csv(fname, header=1, engine="pyarrow")
280 |         dams.to_parquet(par_name)
281 |         return dams
282 | 
283 |     @property
284 |     def gdf(self):
285 |         """Entire NID inventory (``gpkg`` version) as a ``geopandas.GeoDataFrame``."""
286 |         self.stage_nid_inventory()
287 |         return gpd.read_parquet(self.nid_inventory_path)
288 | 
289 |     @staticmethod
290 |     def _get_json(
291 |         urls: Sequence[str], params: list[dict[str, str]] | None = None
292 |     ) -> list[dict[str, Any]]:
293 |         """Get JSON response from NID web service.
294 | 
295 |         Parameters
296 |         ----------
297 |         urls : list of str
298 |             A list of query URLs.
299 |         params : dict, optional
300 |             A list of parameters to pass to the web service, defaults to ``None``.
301 | 
302 |         Returns
303 |         -------
304 |         list of dict
305 |             List of JSON responses from the web service.
306 |         """
307 |         if not isinstance(urls, list):
308 |             raise InputTypeError("urls", "list or str")
309 | 
310 |         kwds = None if params is None else [{"params": p | {"out": "json"}} for p in params]
311 |         resp = ar.retrieve_json(urls, kwds)
312 |         resp = cast("list[dict[str, Any]]", resp)
313 |         if not resp:
314 |             raise ZeroMatchedError
315 | 
316 |         failed = [(i, f"Req_{i}: {r['message']}") for i, r in enumerate(resp) if "error" in r]
317 |         if failed:
318 |             idx, err_msgs = zip(*failed)
319 |             idx = cast("tuple[int]", idx)
320 |             err_msgs = cast("tuple[str]", err_msgs)
321 |             errs = " service requests failed with the following messages:\n"
322 |             errs += "\n".join(err_msgs)
323 |             if len(failed) == len(urls):
324 |                 raise ZeroMatchedError(f"All{errs}")
325 |             resp = [r for i, r in enumerate(resp) if i not in idx]
326 |             fail_count = f"{len(failed)} of {len(urls)}"
327 |             warnings.warn(f"{fail_count}{errs}", UserWarning, stacklevel=2)
328 |         return resp
329 | 
330 |     @staticmethod
331 |     def _to_geodf(nid_df: pd.DataFrame) -> gpd.GeoDataFrame:
332 |         """Convert a NID dataframe to a GeoDataFrame.
333 | 
334 |         Parameters
335 |         ----------
336 |         dams : pd.DataFrame
337 |             NID dataframe
338 | 
339 |         Returns
340 |         -------
341 |         geopandas.GeoDataFrame
342 |             GeoDataFrame of NID data
343 |         """
344 |         return gpd.GeoDataFrame(  # pyright: ignore[reportCallIssue]
345 |             nid_df,
346 |             geometry=gpd.points_from_xy(nid_df["longitude"], nid_df["latitude"], crs=4326),
347 |         )
348 | 
349 |     def get_byfilter(self, query_list: list[dict[str, list[str]]]) -> list[gpd.GeoDataFrame]:
350 |         """Query dams by filters from the National Inventory of Dams web service.
351 | 
352 |         Parameters
353 |         ----------
354 |         query_list : list of dict
355 |             List of dictionary of query parameters. For an exhaustive list of the parameters,
356 |             use the advanced fields dataframe that can be accessed via ``NID().fields_meta``.
357 |             Some filter require min/max values such as ``damHeight`` and ``drainageArea``.
358 |             For such filters, the min/max values should be passed like so:
359 |             ``{filter_key: ["[min1 max1]", "[min2 max2]"]}``.
360 | 
361 |         Returns
362 |         -------
363 |         list of geopandas.GeoDataFrame
364 |             Query results in the same order as the input query list.
365 | 
366 |         Examples
367 |         --------
368 |         >>> from pygeohydro import NID
369 |         >>> nid = NID()
370 |         >>> query_list = [
371 |         ...    {"drainageArea": ["[200 500]"]},
372 |         ...    {"nidId": ["CA01222"]},
373 |         ... ]
374 |         >>> dam_dfs = nid.get_byfilter(query_list)
375 |         """
376 |         fields = self.valid_fields.to_list()
377 |         invalid = [k for key in query_list for k in key if k not in fields]
378 |         if invalid:
379 |             raise InputValueError("query_dict", fields)
380 |         params = [
381 |             {"sy": " ".join(f"@{s}:{fid}" for s, fids in key.items() for fid in fids)}
382 |             for key in query_list
383 |         ]
384 |         return [
385 |             self._to_geodf(pd.DataFrame(r))
386 |             for r in self._get_json([f"{self.base_url}/query"] * len(params), params)
387 |         ]
388 | 
389 |     def get_bygeom(self, geometry: GTYPE, geo_crs: CRSType) -> gpd.GeoDataFrame:
390 |         """Retrieve NID data within a geometry.
391 | 
392 |         Parameters
393 |         ----------
394 |         geometry : Polygon, MultiPolygon, or tuple of length 4
395 |             Geometry or bounding box (west, south, east, north) for extracting the data.
396 |         geo_crs : list of str
397 |             The CRS of the input geometry.
398 | 
399 |         Returns
400 |         -------
401 |         geopandas.GeoDataFrame
402 |             GeoDataFrame of NID data
403 | 
404 |         Examples
405 |         --------
406 |         >>> from pygeohydro import NID
407 |         >>> nid = NID()
408 |         >>> dams = nid.get_bygeom((-69.77, 45.07, -69.31, 45.45), 4326)
409 |         """
410 |         _geometry = geoutils.geo2polygon(geometry, geo_crs, self.gdf.crs)
411 |         idx = self.gdf.sindex.query(_geometry, "contains")
412 |         return self.gdf.iloc[idx].copy()
413 | 
414 |     def inventory_byid(self, federal_ids: list[str]) -> gpd.GeoDataFrame:
415 |         """Get extra attributes for dams based on their dam ID.
416 | 
417 |         Notes
418 |         -----
419 |         This function is meant to be used for getting extra attributes for dams.
420 |         For example, first you need to use either ``get_bygeom`` or ``get_byfilter``
421 |         to get basic attributes of the target dams. Then you can use this function
422 |         to get extra attributes using the ``id`` column of the ``GeoDataFrame``
423 |         that ``get_bygeom`` or ``get_byfilter`` returns.
424 | 
425 |         Parameters
426 |         ----------
427 |         federal_ids : list of str
428 |             List of the target dam Federal IDs.
429 | 
430 |         Returns
431 |         -------
432 |         pandas.DataFrame
433 |             Dams with extra attributes in addition to the standard NID fields
434 |             that other ``NID`` methods return.
435 | 
436 |         Examples
437 |         --------
438 |         >>> from pygeohydro import NID
439 |         >>> nid = NID()
440 |         >>> dams = nid.inventory_byid(['KY01232', 'GA02400', 'NE04081', 'IL55070', 'TN05345'])
441 |         """
442 |         if not isinstance(federal_ids, Iterable) or isinstance(federal_ids, (str, int)):
443 |             raise InputTypeError("federal_ids", "list of str (Federal IDs)")
444 | 
445 |         if not all(isinstance(i, str) for i in federal_ids):
446 |             raise InputTypeError("federal_ids", "list of str (Federal IDs)")
447 | 
448 |         urls = [f"{self.base_url}/dams/{i.upper()}/inventory" for i in set(federal_ids)]
449 |         return self._to_geodf(pd.DataFrame(self._get_json(urls)).set_index("id"))
450 | 
451 |     def get_suggestions(
452 |         self, text: str, context_key: str | None = None
453 |     ) -> tuple[pd.DataFrame, pd.DataFrame]:
454 |         """Get suggestions from the National Inventory of Dams web service.
455 | 
456 |         Notes
457 |         -----
458 |         This function is useful for exploring and/or narrowing down the filter fields
459 |         that are needed to query the dams using ``get_byfilter``.
460 | 
461 |         Parameters
462 |         ----------
463 |         text : str
464 |             Text to query for suggestions.
465 |         context_key : str, optional
466 |             Suggestion context, defaults to empty string, i.e., all context keys.
467 |             For a list of valid context keys, see ``NID().fields_meta``.
468 | 
469 |         Returns
470 |         -------
471 |         tuple of pandas.DataFrame
472 |             The suggestions for the requested text as two DataFrames:
473 |             First, is suggestions found in the dams properties and
474 |             second, those found in the query fields such as states, huc6, etc.
475 | 
476 |         Examples
477 |         --------
478 |         >>> from pygeohydro import NID
479 |         >>> nid = NID()
480 |         >>> dams, contexts = nid.get_suggestions("houston", "city")
481 |         """
482 |         fields = self.valid_fields.to_list()
483 |         params = {"text": text}
484 |         if context_key:
485 |             if context_key not in fields:
486 |                 raise InputValueError("context", fields)
487 |             params["contextKey"] = context_key
488 |         resp = self._get_json([f"{self.base_url}/suggestions"], [params])
489 |         dams = pd.DataFrame(resp[0]["dams"])
490 |         contexts = pd.DataFrame(resp[0]["contexts"])
491 |         return (
492 |             dams if dams.empty else dams.set_index("id"),
493 |             contexts if contexts.empty else contexts.set_index("name"),
494 |         )
495 | 
496 |     def __repr__(self) -> str:
497 |         """Print the services properties."""
498 |         resp = self._get_json([f"{self.base_url}/metadata"])[0]
499 |         return "\n".join(
500 |             [
501 |                 "NID RESTful information:",
502 |                 f"URL: {self.base_url}",
503 |                 f"Date Refreshed: {resp['dateRefreshed']}",
504 |                 f"Version: {resp['version']}",
505 |             ]
506 |         )
507 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
  1 | .. image:: https://raw.githubusercontent.com/hyriver/HyRiver-examples/main/notebooks/_static/pygeohydro_logo.png
  2 |     :target: https://github.com/hyriver/HyRiver
  3 | 
  4 | |
  5 | 
  6 | .. image:: https://joss.theoj.org/papers/b0df2f6192f0a18b9e622a3edff52e77/status.svg
  7 |     :target: https://joss.theoj.org/papers/b0df2f6192f0a18b9e622a3edff52e77
  8 |     :alt: JOSS
  9 | 
 10 | |
 11 | 
 12 | .. |pygeohydro| image:: https://github.com/hyriver/pygeohydro/actions/workflows/test.yml/badge.svg
 13 |     :target: https://github.com/hyriver/pygeohydro/actions/workflows/test.yml
 14 |     :alt: Github Actions
 15 | 
 16 | .. |pygeoogc| image:: https://github.com/hyriver/pygeoogc/actions/workflows/test.yml/badge.svg
 17 |     :target: https://github.com/hyriver/pygeoogc/actions/workflows/test.yml
 18 |     :alt: Github Actions
 19 | 
 20 | .. |pygeoutils| image:: https://github.com/hyriver/pygeoutils/actions/workflows/test.yml/badge.svg
 21 |     :target: https://github.com/hyriver/pygeoutils/actions/workflows/test.yml
 22 |     :alt: Github Actions
 23 | 
 24 | .. |pynhd| image:: https://github.com/hyriver/pynhd/actions/workflows/test.yml/badge.svg
 25 |     :target: https://github.com/hyriver/pynhd/actions/workflows/test.yml
 26 |     :alt: Github Actions
 27 | 
 28 | .. |py3dep| image:: https://github.com/hyriver/py3dep/actions/workflows/test.yml/badge.svg
 29 |     :target: https://github.com/hyriver/py3dep/actions/workflows/test.yml
 30 |     :alt: Github Actions
 31 | 
 32 | .. |pydaymet| image:: https://github.com/hyriver/pydaymet/actions/workflows/test.yml/badge.svg
 33 |     :target: https://github.com/hyriver/pydaymet/actions/workflows/test.yml
 34 |     :alt: Github Actions
 35 | 
 36 | .. |pygridmet| image:: https://github.com/hyriver/pygridmet/actions/workflows/test.yml/badge.svg
 37 |     :target: https://github.com/hyriver/pygridmet/actions/workflows/test.yml
 38 |     :alt: Github Actions
 39 | 
 40 | .. |pynldas2| image:: https://github.com/hyriver/pynldas2/actions/workflows/test.yml/badge.svg
 41 |     :target: https://github.com/hyriver/pynldas2/actions/workflows/test.yml
 42 |     :alt: Github Actions
 43 | 
 44 | .. |async| image:: https://github.com/hyriver/async-retriever/actions/workflows/test.yml/badge.svg
 45 |     :target: https://github.com/hyriver/async-retriever/actions/workflows/test.yml
 46 |     :alt: Github Actions
 47 | 
 48 | .. |signatures| image:: https://github.com/hyriver/hydrosignatures/actions/workflows/test.yml/badge.svg
 49 |     :target: https://github.com/hyriver/hydrosignatures/actions/workflows/test.yml
 50 |     :alt: Github Actions
 51 | 
 52 | ================ ====================================================================
 53 | Package          Description
 54 | ================ ====================================================================
 55 | PyNHD_           Navigate and subset NHDPlus (MR and HR) using web services
 56 | Py3DEP_          Access topographic data through National Map's 3DEP web service
 57 | PyGeoHydro_      Access NWIS, NID, WQP, eHydro, NLCD, CAMELS, and SSEBop databases
 58 | PyDaymet_        Access daily, monthly, and annual climate data via Daymet
 59 | PyGridMET_       Access daily climate data via GridMET
 60 | PyNLDAS2_        Access hourly NLDAS-2 data via web services
 61 | HydroSignatures_ A collection of tools for computing hydrological signatures
 62 | AsyncRetriever_  High-level API for asynchronous requests with persistent caching
 63 | PyGeoOGC_        Send queries to any ArcGIS RESTful-, WMS-, and WFS-based services
 64 | PyGeoUtils_      Utilities for manipulating geospatial, (Geo)JSON, and (Geo)TIFF data
 65 | ================ ====================================================================
 66 | 
 67 | .. _PyGeoHydro: https://github.com/hyriver/pygeohydro
 68 | .. _AsyncRetriever: https://github.com/hyriver/async-retriever
 69 | .. _PyGeoOGC: https://github.com/hyriver/pygeoogc
 70 | .. _PyGeoUtils: https://github.com/hyriver/pygeoutils
 71 | .. _PyNHD: https://github.com/hyriver/pynhd
 72 | .. _Py3DEP: https://github.com/hyriver/py3dep
 73 | .. _PyDaymet: https://github.com/hyriver/pydaymet
 74 | .. _PyGridMET: https://github.com/hyriver/pygridmet
 75 | .. _PyNLDAS2: https://github.com/hyriver/pynldas2
 76 | .. _HydroSignatures: https://github.com/hyriver/hydrosignatures
 77 | 
 78 | PyGeoHydro: Retrieve Geospatial Hydrology Data
 79 | ----------------------------------------------
 80 | 
 81 | .. image:: https://img.shields.io/pypi/v/pygeohydro.svg
 82 |     :target: https://pypi.python.org/pypi/pygeohydro
 83 |     :alt: PyPi
 84 | 
 85 | .. image:: https://img.shields.io/conda/vn/conda-forge/pygeohydro.svg
 86 |     :target: https://anaconda.org/conda-forge/pygeohydro
 87 |     :alt: Conda Version
 88 | 
 89 | .. image:: https://codecov.io/gh/hyriver/pygeohydro/graph/badge.svg
 90 |     :target: https://codecov.io/gh/hyriver/pygeohydro
 91 |     :alt: CodeCov
 92 | 
 93 | .. image:: https://img.shields.io/pypi/pyversions/pygeohydro.svg
 94 |     :target: https://pypi.python.org/pypi/pygeohydro
 95 |     :alt: Python Versions
 96 | 
 97 | .. image:: https://static.pepy.tech/badge/pygeohydro
 98 |     :target: https://pepy.tech/project/pygeohydro
 99 |     :alt: Downloads
100 | 
101 | |
102 | 
103 | .. image:: https://www.codefactor.io/repository/github/hyriver/pygeohydro/badge/main
104 |     :target: https://www.codefactor.io/repository/github/hyriver/pygeohydro/overview/main
105 |     :alt: CodeFactor
106 | 
107 | .. image:: https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json
108 |     :target: https://github.com/astral-sh/ruff
109 |     :alt: Ruff
110 | 
111 | .. image:: https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit&logoColor=white
112 |     :target: https://github.com/pre-commit/pre-commit
113 |     :alt: pre-commit
114 | 
115 | .. image:: https://mybinder.org/badge_logo.svg
116 |     :target: https://mybinder.org/v2/gh/hyriver/HyRiver-examples/main?urlpath=lab/tree/notebooks
117 |     :alt: Binder
118 | 
119 | |
120 | 
121 | Features
122 | --------
123 | 
124 | PyGeoHydro (formerly named `hydrodata <https://pypi.org/project/hydrodata>`__) is a part of
125 | `HyRiver <https://github.com/hyriver/HyRiver>`__ software stack that
126 | is designed to aid in hydroclimate analysis through web services. This package provides
127 | access to some public web services that offer geospatial hydrology data. It has three
128 | main modules: ``pygeohydro``, ``plot``, and ``helpers``.
129 | 
130 | PyGeoHydro supports the following datasets:
131 | 
132 | * `gNATSGO <https://planetarycomputer.microsoft.com/dataset/gnatsgo-rasters>`__ for
133 |   US soil properties.
134 | * `SoilGrids <https://www.isric.org/explore/soilgrids/faq-soilgrids#What_do_the_filename_codes_mean>`__
135 |   for seamless global soil properties.
136 | * `Derived Soil Properties <https://www.sciencebase.gov/catalog/item/5fd7c19cd34e30b9123cb51f>`__
137 |   for soil porosity, available water capacity, and field capacity across the US.
138 | * `NWIS <https://nwis.waterdata.usgs.gov/nwis>`__ for daily mean streamflow observations
139 |   (returned as a ``pandas.DataFrame`` or ``xarray.Dataset`` with station attributes),
140 | * `SensorThings API <https://labs.waterdata.usgs.gov/api-docs/about-sensorthings-api/index.html>`__
141 |   for accessing real-time data of USGS sensors.
142 | * `CAMELS <https://ral.ucar.edu/solutions/products/camels>`__ for accessing streamflow
143 |   observations (1980-2014) and basin-level attributes of 671 stations within CONUS.
144 | * `Water Quality Portal <https://www.waterqualitydata.us/>`__ for accessing current and
145 |   historical water quality data from more than 1.5 million sites across the US,
146 | * `NID <https://nid.sec.usace.army.mil>`__ for accessing the National Inventory of Dams
147 |   web service,
148 | * `HCDN 2009 <https://www2.usgs.gov/science/cite-view.php?cite=2932>`__ for identifying sites
149 |   where human activity affects the natural flow of the watercourse,
150 | * `NLCD 2021 <https://www.mrlc.gov/>`__ for land cover/land use, imperviousness
151 |   descriptor, and canopy data. You can get data using both geometries and coordinates.
152 | * `WBD <https://hydro.nationalmap.gov/arcgis/rest/services/wbd/MapServer/>`__ for accessing
153 |   Hydrologic Unit (HU) polygon boundaries within the US (all HUC levels).
154 | * `SSEBop <https://earlywarning.usgs.gov/ssebop/modis/daily>`__ for daily actual
155 |   evapotranspiration, for both single pixel and gridded data.
156 | * `Irrigation Withdrawals <https://doi.org/10.5066/P9FDLY8P>`__ for estimated
157 |   monthly water use for irrigation by 12-digit hydrologic unit in the CONUS for 2015
158 | * `STN <https://stn.wim.usgs.gov/STNWeb/#/>`__ for access USGS Short-Term Network (STN)
159 | * `eHydro <https://navigation.usace.army.mil/Survey/Hydro>`__ for accessing USACE
160 |   Hydrographic Surveys that includes topobathymetry data
161 | * `NFHL <https://hazards.fema.gov/femaportal/wps/portal/NFHLWMS>`__ for accessing
162 |   FEMA's National Flood Hazard Layer (NFHL) data.
163 | 
164 | Also, it includes several other functions:
165 | 
166 | * ``interactive_map``: Interactive map for exploring NWIS stations within a bounding box.
167 | * ``cover_statistics``: Categorical statistics of land use/land cover data.
168 | * ``overland_roughness``: Estimate overland roughness from land use/land cover data.
169 | * ``streamflow_fillna``: Fill missing daily streamflow values with day-of-year averages.
170 |   Streamflow observations must be at least for 10-year long.
171 | 
172 | The ``plot`` module includes two main functions:
173 | 
174 | * ``signatures``: Hydrologic signature graphs.
175 | * ``cover_legends``: Official NLCD land cover legends for plotting a land cover dataset.
176 | * ``descriptor_legends``: Color map and legends for plotting an imperviousness descriptor dataset.
177 | 
178 | The ``helpers`` module includes:
179 | 
180 | * ``nlcd_helper``: A roughness coefficients lookup table for each land cover and imperviousness
181 |   descriptor type which is useful for overland flow routing among other applications.
182 | * ``nwis_error``: A dataframe for finding information about NWIS requests' errors.
183 | 
184 | You can find some example notebooks `here <https://github.com/hyriver/HyRiver-examples>`__.
185 | 
186 | Moreover, under the hood, PyGeoHydro uses
187 | `PyGeoOGC <https://github.com/hyriver/pygeoogc>`__ and
188 | `AsyncRetriever <https://github.com/hyriver/async-retriever>`__ packages
189 | for making requests in parallel and storing responses in chunks. This improves the
190 | reliability and speed of data retrieval significantly.
191 | 
192 | You can control the request/response caching behavior and verbosity of the package
193 | by setting the following environment variables:
194 | 
195 | * ``HYRIVER_CACHE_NAME``: Path to the caching SQLite database for asynchronous HTTP
196 |   requests. It defaults to ``./cache/aiohttp_cache.sqlite``
197 | * ``HYRIVER_CACHE_NAME_HTTP``: Path to the caching SQLite database for HTTP requests.
198 |   It defaults to ``./cache/http_cache.sqlite``
199 | * ``HYRIVER_CACHE_EXPIRE``: Expiration time for cached requests in seconds. It defaults to
200 |   one week.
201 | * ``HYRIVER_CACHE_DISABLE``: Disable reading/writing from/to the cache. The default is false.
202 | * ``HYRIVER_SSL_CERT``: Path to a SSL certificate file.
203 | 
204 | For example, in your code before making any requests you can do:
205 | 
206 | .. code-block:: python
207 | 
208 |     import os
209 | 
210 |     os.environ["HYRIVER_CACHE_NAME"] = "path/to/aiohttp_cache.sqlite"
211 |     os.environ["HYRIVER_CACHE_NAME_HTTP"] = "path/to/http_cache.sqlite"
212 |     os.environ["HYRIVER_CACHE_EXPIRE"] = "3600"
213 |     os.environ["HYRIVER_CACHE_DISABLE"] = "true"
214 |     os.environ["HYRIVER_SSL_CERT"] = "path/to/cert.pem"
215 | 
216 | You can also try using PyGeoHydro without installing
217 | it on your system by clicking on the binder badge. A Jupyter Lab
218 | instance with the HyRiver stack pre-installed will be launched in your web browser, and you
219 | can start coding!
220 | 
221 | Moreover, requests for additional functionalities can be submitted via
222 | `issue tracker <https://github.com/hyriver/pygeohydro/issues>`__.
223 | 
224 | Citation
225 | --------
226 | If you use any of HyRiver packages in your research, we appreciate citations:
227 | 
228 | .. code-block:: bibtex
229 | 
230 |     @article{Chegini_2021,
231 |         author = {Chegini, Taher and Li, Hong-Yi and Leung, L. Ruby},
232 |         doi = {10.21105/joss.03175},
233 |         journal = {Journal of Open Source Software},
234 |         month = {10},
235 |         number = {66},
236 |         pages = {1--3},
237 |         title = {{HyRiver: Hydroclimate Data Retriever}},
238 |         volume = {6},
239 |         year = {2021}
240 |     }
241 | 
242 | Installation
243 | ------------
244 | 
245 | You can install PyGeoHydro using ``pip`` after installing ``libgdal`` on your system
246 | (for example, in Ubuntu run ``sudo apt install libgdal-dev``). Moreover, PyGeoHydro has an optional
247 | dependency for using persistent caching, ``requests-cache``. We highly recommend installing
248 | this package as it can significantly speed up send/receive queries. You don't have to change
249 | anything in your code, since PyGeoHydro under-the-hood looks for ``requests-cache`` and
250 | if available, it will automatically use persistent caching:
251 | 
252 | .. code-block:: console
253 | 
254 |     $ pip install pygeohydro
255 | 
256 | Alternatively, PyGeoHydro can be installed from the ``conda-forge`` repository
257 | using `Conda <https://docs.conda.io/en/latest/>`__:
258 | 
259 | .. code-block:: console
260 | 
261 |     $ conda install -c conda-forge pygeohydro
262 | 
263 | Quick start
264 | -----------
265 | We can obtain river topobathymetry data using the ``EHydro`` class. We can subset
266 | the dataset either using a geometry or a bounding box, based on their ID, or SQL query:
267 | 
268 | .. code-block:: python
269 | 
270 |     from pygeohydro import EHydro
271 | 
272 |     ehydro = EHydro("points")
273 |     topobathy = ehydro.bygeom((-122.53, 45.57, -122.52, 45.59))
274 | 
275 | We can explore the available NWIS stations within a bounding box using ``interactive_map``
276 | function. It returns an interactive map and by clicking on a station some of the most
277 | important properties of stations are shown.
278 | 
279 | .. code-block:: python
280 | 
281 |     import pygeohydro as gh
282 | 
283 |     bbox = (-69.5, 45, -69, 45.5)
284 |     gh.interactive_map(bbox)
285 | 
286 | .. image:: https://raw.githubusercontent.com/hyriver/HyRiver-examples/main/notebooks/_static/interactive_map.png
287 |     :target: https://github.com/hyriver/HyRiver-examples/blob/main/notebooks/nwis.ipynb
288 |     :alt: Interactive Map
289 | 
290 | We can select all the stations within this boundary box that have daily mean streamflow data from
291 | ``2000-01-01`` to ``2010-12-31``:
292 | 
293 | .. code-block:: python
294 | 
295 |     from pygeohydro import NWIS
296 | 
297 |     nwis = NWIS()
298 |     query = {
299 |         "bBox": ",".join(f"{b:.06f}" for b in bbox),
300 |         "hasDataTypeCd": "dv",
301 |         "outputDataTypeCd": "dv",
302 |     }
303 |     info_box = nwis.get_info(query)
304 |     dates = ("2000-01-01", "2010-12-31")
305 |     stations = info_box[
306 |         (info_box.begin_date <= dates[0]) & (info_box.end_date >= dates[1])
307 |     ].site_no.tolist()
308 | 
309 | Then, we can get the daily streamflow data in mm/day (by default the values are in cms)
310 | and plot them:
311 | 
312 | .. code-block:: python
313 | 
314 |     from pygeohydro import plot
315 | 
316 |     qobs = nwis.get_streamflow(stations, dates, mmd=True)
317 |     plot.signatures(qobs)
318 | 
319 | By default, ``get_streamflow`` returns a ``pandas.DataFrame`` that has a ``attrs`` method
320 | containing metadata for all the stations. You can access it like so ``qobs.attrs``.
321 | Moreover, we can get the same data as ``xarray.Dataset`` as follows:
322 | 
323 | .. code-block:: python
324 | 
325 |     qobs_ds = nwis.get_streamflow(stations, dates, to_xarray=True)
326 | 
327 | This ``xarray.Dataset`` has two dimensions: ``time`` and ``station_id``. It has
328 | 10 variables including ``discharge`` with two dimensions while other variables
329 | that are station attitudes are one dimensional.
330 | 
331 | We can also get instantaneous streamflow data using ``get_streamflow``. This method assumes
332 | that the input dates are in UTC time zone and returns the data in UTC time zone as well.
333 | 
334 | .. code-block:: python
335 | 
336 |     date = ("2005-01-01 12:00", "2005-01-12 15:00")
337 |     qobs = nwis.get_streamflow("01646500", date, freq="iv")
338 | 
339 | We can query USGS stations of type "stream" in Arizona using SensorThings API
340 | as follows:
341 | 
342 | .. code-block:: python
343 | 
344 |     odata = {
345 |         "filter": "properties/monitoringLocationType eq 'Stream' and properties/stateFIPS eq 'US:04'",
346 |     }
347 |     df = sensor.query_byodata(odata)
348 | 
349 | Irrigation withdrawals data can be obtained as follows:
350 | 
351 | .. code-block:: python
352 | 
353 |     irr = gh.irrigation_withdrawals()
354 | 
355 | We can get the CAMELS dataset as a ``geopandas.GeoDataFrame`` that includes geometry and
356 | basin-level attributes of 671 natural watersheds within CONUS and their streamflow
357 | observations between 1980-2014 as a ``xarray.Dataset``, like so:
358 | 
359 | .. code-block:: python
360 | 
361 |     attrs, qobs = gh.get_camels()
362 | 
363 | The ``WaterQuality`` has a number of convenience methods to retrieve data from the
364 | web service. Since there are many parameter combinations that can be
365 | used to retrieve data, a general method is also provided to retrieve data from
366 | any of the valid endpoints. You can use ``get_json`` to retrieve stations info
367 | as a ``geopandas.GeoDataFrame`` or ``get_csv`` to retrieve stations data as a
368 | ``pandas.DataFrame``. You can construct a dictionary of the parameters and pass
369 | it to one of these functions. For more information on the parameters, please
370 | consult the `Water Quality Data documentation <https://www.waterqualitydata.us/webservices_documentation>`__.
371 | For example, let's find all the stations within a bounding box that have Caffeine data:
372 | 
373 | .. code-block:: python
374 | 
375 |     from pynhd import WaterQuality
376 | 
377 |     bbox = (-92.8, 44.2, -88.9, 46.0)
378 |     kwds = {"characteristicName": "Caffeine"}
379 |     wq = WaterQuality()
380 |     stations = wq.station_bybbox(bbox, kwds)
381 | 
382 | Or the same criterion but within a 30-mile radius of a point:
383 | 
384 | .. code-block:: python
385 | 
386 |     stations = wq.station_bydistance(-92.8, 44.2, 30, kwds)
387 | 
388 | Then we can get the data for all these stations the data like this:
389 | 
390 | .. code-block:: python
391 | 
392 |     sids = stations.MonitoringLocationIdentifier.tolist()
393 |     caff = wq.data_bystation(sids, kwds)
394 | 
395 | .. image:: https://raw.githubusercontent.com/hyriver/HyRiver-examples/main/notebooks/_static/water_quality.png
396 |     :target: https://github.com/hyriver/HyRiver-examples/blob/main/notebooks/water_quality.ipynb
397 |     :alt: Water Quality
398 | 
399 | Moreover, we can get land use/land cove data using ``nlcd_bygeom`` or ``nlcd_bycoods`` functions,
400 | percentages of land cover types using ``cover_statistics``, and overland roughness using
401 | ``overland_roughness``. The ``nlcd_bycoords`` function returns a ``geopandas.GeoDataFrame``
402 | with the NLCD layers as columns and input coordinates as the ``geometry`` column. Moreover,
403 | the ``nlcd_bygeom`` function accepts both a single geometry or a ``geopandas.GeoDataFrame``
404 | as the input.
405 | 
406 | .. code-block:: python
407 | 
408 |     from pynhd import NLDI
409 | 
410 |     basins = NLDI().get_basins(["01031450", "01318500", "01031510"])
411 |     lulc = gh.nlcd_bygeom(basins, 100, years={"cover": [2016, 2019]})
412 |     stats = gh.cover_statistics(lulc["01318500"].cover_2016)
413 |     roughness = gh.overland_roughness(lulc["01318500"].cover_2019)
414 | 
415 | .. image:: https://raw.githubusercontent.com/hyriver/HyRiver-examples/main/notebooks/_static/lulc.png
416 |     :target: https://github.com/hyriver/HyRiver-examples/blob/main/notebooks/nlcd.ipynb
417 |     :alt: Land Use/Land Cover
418 | 
419 | Next, let's use ``ssebopeta_bygeom`` to get actual ET data for a basin. Note that there's a
420 | ``ssebopeta_bycoords`` function that returns an ETA time series for a single coordinate.
421 | 
422 | .. code-block:: python
423 | 
424 |     geometry = NLDI().get_basins("01315500").geometry[0]
425 |     eta = gh.ssebopeta_bygeom(geometry, dates=("2005-10-01", "2005-10-05"))
426 | 
427 | .. image:: https://raw.githubusercontent.com/hyriver/HyRiver-examples/main/notebooks/_static/eta.png
428 |     :target: https://github.com/hyriver/HyRiver-examples/blob/main/notebooks/ssebop.ipynb
429 |     :alt: Actual ET
430 | 
431 | Additionally, we can pull all the US dams data using ``NID``. Let's get dams that are within this
432 | bounding box and have a maximum storage larger than 200 acre-feet.
433 | 
434 | .. code-block:: python
435 | 
436 |     nid = NID()
437 |     dams = nid.get_bygeom((-65.77, 43.07, -69.31, 45.45), 4326)
438 |     dams = nid.inventory_byid(dams.id.to_list())
439 |     dams = dams[dams.maxStorage > 200]
440 | 
441 | We can get also all dams within CONUS with maximum storage larger than 2500 acre-feet:
442 | 
443 | .. code-block:: python
444 | 
445 |     conus_geom = gh.get_us_states("contiguous")
446 | 
447 |     dam_list = nid.get_byfilter([{"maxStorage": ["[2500 +inf]"]}])
448 |     dams = nid.inventory_byid(dam_list[0].id.to_list(), stage_nid=True)
449 | 
450 |     conus_dams = dams[dams.stateKey.isin(conus_geom.STUSPS)].reset_index(drop=True)
451 | 
452 | .. image:: https://raw.githubusercontent.com/hyriver/HyRiver-examples/main/notebooks/_static/dams.png
453 |     :target: https://github.com/hyriver/HyRiver-examples/blob/main/notebooks/nid.ipynb
454 |     :alt: Dams
455 | 
456 | 
457 | The ``WBD`` class allows us to get Hydrologic Unit (HU) polygon boundaries. Let's
458 | get the two Hudson HUC4s:
459 | 
460 | .. code-block:: python
461 | 
462 |     from pygeohydro import WBD
463 | 
464 |     wbd = WBD("huc4")
465 |     hudson = wbd.byids("huc4", ["0202", "0203"])
466 | 
467 | 
468 | The ``NFHL`` class allows us to retrieve FEMA's National Flood Hazard Layer (NFHL) data.
469 | Let's get the cross-section data for a small region in Vermont:
470 | 
471 | .. code-block:: python
472 | 
473 |     from pygeohydro import NFHL
474 | 
475 |     nfhl = NFHL("NFHL", "cross-sections")
476 |     gdf_xs = nfhl.bygeom((-73.42, 43.28, -72.9, 43.52), geo_crs=4269)
477 | 
478 | 
479 | Contributing
480 | ------------
481 | 
482 | Contributions are very welcomed. Please read
483 | `CONTRIBUTING.rst <https://github.com/hyriver/pygeoogc/blob/main/CONTRIBUTING.rst>`__
484 | file for instructions.
485 | 
486 | Credits
487 | -------
488 | 
489 | This package was created based on the `audreyr/cookiecutter-pypackage`__ project template.
490 | 
491 | __ https://github.com/audreyr/cookiecutter-pypackage
492 | 


--------------------------------------------------------------------------------