├── .flake8
├── .github
    └── workflows
    │   ├── python-package.yml
    │   ├── python-publish.yml
    │   └── sphinx-docs.yml
├── .gitignore
├── .gitlab
    ├── issue_templates
    │   └── reviewer_checklist.md
    └── merge_request_templates
    │   └── reviewer_checklist.md
├── .pre-commit-config.yaml
├── .prettierrc.toml
├── CONTRIBUTING.md
├── DISCLAIMER.md
├── LICENSE.md
├── README.md
├── code.json
├── dataretrieval
    ├── __init__.py
    ├── codes
    │   ├── __init__.py
    │   ├── states.py
    │   └── timezones.py
    ├── nadp.py
    ├── nldi.py
    ├── nwis.py
    ├── samples.py
    ├── streamstats.py
    ├── utils.py
    ├── waterwatch.py
    └── wqp.py
├── demos
    ├── NWIS_demo_1.ipynb
    ├── R Python Vignette equivalents.ipynb
    ├── datasets
    │   └── peak_discharge_trends.csv
    ├── hydroshare
    │   ├── USGS_dataretrieval_DailyValues_Examples.ipynb
    │   ├── USGS_dataretrieval_GroundwaterLevels_Examples.ipynb
    │   ├── USGS_dataretrieval_Measurements_Examples.ipynb
    │   ├── USGS_dataretrieval_NLDI_Examples.ipynb
    │   ├── USGS_dataretrieval_ParameterCodes_Examples.ipynb
    │   ├── USGS_dataretrieval_Peaks_Examples.ipynb
    │   ├── USGS_dataretrieval_Ratings_Examples.ipynb
    │   ├── USGS_dataretrieval_SiteInfo_Examples.ipynb
    │   ├── USGS_dataretrieval_SiteInventory_Examples.ipynb
    │   ├── USGS_dataretrieval_Statistics_Examples.ipynb
    │   ├── USGS_dataretrieval_UnitValues_Examples.ipynb
    │   ├── USGS_dataretrieval_WaterSamples_Examples.ipynb
    │   └── USGS_dataretrieval_WaterUse_Examples.ipynb
    └── nwqn_data_pull
    │   ├── Dockerfile_dataretrieval
    │   ├── README.md
    │   ├── lithops.yaml
    │   ├── requirements.txt
    │   ├── retrieve_nwqn_samples.py
    │   └── retrieve_nwqn_streamflow.py
├── docs
    ├── Makefile
    └── source
    │   ├── .nojekyll
    │   ├── conf.py
    │   ├── examples
    │       ├── USGS_dataretrieval_DailyValues_Examples.nblink
    │       ├── USGS_dataretrieval_GroundwaterLevels_Examples.nblink
    │       ├── USGS_dataretrieval_Measurements_Examples.nblink
    │       ├── USGS_dataretrieval_ParameterCodes_Examples.nblink
    │       ├── USGS_dataretrieval_Peaks_Examples.nblink
    │       ├── USGS_dataretrieval_Ratings_Examples.nblink
    │       ├── USGS_dataretrieval_SiteInfo_Examples.nblink
    │       ├── USGS_dataretrieval_SiteInventory_Examples.nblink
    │       ├── USGS_dataretrieval_Statistics_Examples.nblink
    │       ├── USGS_dataretrieval_UnitValues_Examples.nblink
    │       ├── USGS_dataretrieval_WaterSamples_Examples.nblink
    │       ├── USGS_dataretrieval_WaterUse_Examples.nblink
    │       ├── datasets
    │       │   └── peak_discharge_trends.csv
    │       ├── index.rst
    │       ├── nwisdemo01.nblink
    │       ├── readme_examples.rst
    │       ├── rvignettes.nblink
    │       └── siteinfo_examples.rst
    │   ├── index.rst
    │   ├── meta
    │       ├── contributing.rst
    │       ├── installing.rst
    │       └── license.rst
    │   ├── reference
    │       ├── index.rst
    │       ├── nadp.rst
    │       ├── nwis.rst
    │       ├── samples.rst
    │       ├── streamstats.rst
    │       ├── utils.rst
    │       └── wqp.rst
    │   └── userguide
    │       ├── dataportals.rst
    │       ├── index.rst
    │       └── timeconventions.rst
├── pyproject.toml
├── requirements-dev.txt
├── setup.py
└── tests
    ├── __init__.py
    ├── data
        ├── nldi_get_basin.json
        ├── nldi_get_features_by_comid.json
        ├── nldi_get_features_by_feature_source_with_nav_mode.json
        ├── nldi_get_features_by_feature_source_without_nav_mode.json
        ├── nldi_get_features_by_lat_long.json
        ├── nldi_get_flowlines.json
        ├── nldi_get_flowlines_by_comid.json
        ├── nwis_sites.txt
        ├── samples_results.txt
        ├── water_use_allegheny.txt
        ├── water_use_national.txt
        ├── waterdata_gwlevels.txt
        ├── waterdata_measurements.txt
        ├── waterdata_pmcodes.txt
        ├── waterdata_qwdata.txt
        ├── waterservices_dv.txt
        ├── waterservices_iv.txt
        ├── waterservices_peaks.txt
        ├── waterservices_ratings.txt
        ├── waterservices_site.txt
        ├── waterservices_stats.txt
        ├── wqp3_results.txt
        ├── wqp_activities.txt
        ├── wqp_activity_metrics.txt
        ├── wqp_detection_limits.txt
        ├── wqp_habitat_metrics.txt
        ├── wqp_organizations.txt
        ├── wqp_project_weights.txt
        ├── wqp_projects.txt
        ├── wqp_results.txt
        └── wqp_sites.txt
    ├── nadp_test.py
    ├── nldi_test.py
    ├── nwis_test.py
    ├── samples_test.py
    ├── utils_test.py
    ├── waterservices_test.py
    └── wqp_test.py


/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | max-line-length = 88
3 | extend-ignore = E203, E704
4 | 


--------------------------------------------------------------------------------
/.github/workflows/python-package.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions
 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
 3 | 
 4 | name: Python package
 5 | 
 6 | on:
 7 |   push:
 8 |     branches: ['main']
 9 |   pull_request:
10 |     branches: ['main']
11 | 
12 | jobs:
13 |   build:
14 |     runs-on: ${{ matrix.os }}
15 |     strategy:
16 |       fail-fast: false
17 |       matrix:
18 |         os: [ubuntu-latest, windows-latest]
19 |         python-version: [3.8, 3.9, '3.10', 3.11, 3.12]
20 | 
21 |     steps:
22 |       - uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871
23 |       - name: Set up Python ${{ matrix.python-version }}
24 |         uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3
25 |         with:
26 |           python-version: ${{ matrix.python-version }}
27 |       - name: Install dependencies
28 |         run: |
29 |           python -m pip install --upgrade pip
30 |           pip install .[test,nldi]
31 |       - name: Lint with flake8
32 |         run: |
33 |           # stop the build if there are Python syntax errors or undefined names
34 |           flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
35 |           # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
36 |           flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
37 |       - name: Test with pytest and report coverage
38 |         run: |
39 |           cd tests
40 |           coverage run -m pytest
41 |           coverage report -m
42 |           cd ..
43 | 


--------------------------------------------------------------------------------
/.github/workflows/python-publish.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will upload a Python Package using Twine when a release is created
 2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python#publishing-to-package-registries
 3 | 
 4 | # This workflow uses actions that are not certified by GitHub.
 5 | # They are provided by a third-party and are governed  by
 6 | # separate terms of service, privacy policy, and support
 7 | # documentation.
 8 | 
 9 | name: Upload Python Package
10 | 
11 | on:
12 |   release:
13 |     types: [published]
14 | 
15 | permissions:
16 |   contents: read
17 | 
18 | jobs:
19 |   deploy:
20 | 
21 |     runs-on: ubuntu-latest
22 | 
23 |     steps:
24 |     - uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871
25 |     - name: Set up Python
26 |       uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3
27 |       with:
28 |         python-version: '3.x'
29 |     - name: Install dependencies
30 |       run: |
31 |         python -m pip install --upgrade pip
32 |         pip install build
33 |         pip install setuptools setuptools-scm wheel twine check-manifest
34 |     - name: Build package
35 |       run: python -m build
36 |     - name: Publish package
37 |       uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29
38 |       with:
39 |         user: __token__
40 |         password: ${{ secrets.PYPI_API_TOKEN }}
41 | 


--------------------------------------------------------------------------------
/.github/workflows/sphinx-docs.yml:
--------------------------------------------------------------------------------
 1 | # This workflow builds the sphinx docs
 2 | 
 3 | name: Sphinx Docs Build
 4 | 
 5 | on:
 6 |   push:
 7 |   pull_request:
 8 | 
 9 | jobs:
10 |   docs:
11 |     runs-on: ubuntu-latest
12 |     steps:
13 |       - name: Checkout
14 |         uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871
15 |         with:
16 |           persist-credentials: false
17 |       - name: Install dataretrieval, dependencies, and Sphinx then build docs
18 |         shell: bash -l {0}
19 |         run: |
20 |           python -m pip install --upgrade pip
21 |           pip install .[doc]
22 |           ipython kernel install --name "python3" --user
23 |           sudo apt update -y && sudo apt install -y latexmk texlive-latex-recommended texlive-latex-extra texlive-fonts-recommended dvipng pandoc
24 |           (cd docs && make docs)
25 |           (cd docs && make html)
26 |       - name: Debug
27 |         run: |
28 |           echo $REF
29 |           echo $EVENT_NAME
30 |           echo ${{ github.event_name == 'push' }}
31 |           echo ${{ github.ref == 'refs/heads/main' }}
32 |           echo ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' }}
33 |       - name: Deploy to GitHub Pages
34 |         uses: JamesIves/github-pages-deploy-action@881db5376404c5c8d621010bcbec0310b58d5e29
35 |         if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' }}
36 |         with:
37 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
38 |           EVENT_NAME: ${{ github.event_name }}
39 |           REF: ${{ github.ref }}
40 |           BRANCH: gh-pages
41 |           FOLDER: docs/build/html
42 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | *.swp
  2 | __pycachr__
  3 | 
  4 | # Byte-compiled / optimized / DLL files
  5 | **/__pycache__/
  6 | *.py[cod]
  7 | *$py.class
  8 | .pytest_cache/
  9 | 
 10 | # C extensions
 11 | *.so
 12 | 
 13 | # Distribution / packaging
 14 | *_version.py
 15 | .Python
 16 | env/
 17 | build/
 18 | develop-eggs/
 19 | dist/
 20 | downloads/
 21 | eggs/
 22 | .eggs/
 23 | lib/
 24 | lib64/
 25 | parts/
 26 | sdist/
 27 | var/
 28 | wheels/
 29 | *.egg-info/
 30 | .installed.cfg
 31 | .idea/
 32 | *.egg
 33 | .miniconda
 34 | # PyInstaller
 35 | #  Usually these files are written by a python script from a template
 36 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 37 | *.manifest
 38 | *.spec
 39 | 
 40 | # Installer logs
 41 | pip-log.txt
 42 | pip-delete-this-directory.txt
 43 | 
 44 | # Pipenv
 45 | Pipfile
 46 | Pipfile.lock
 47 | 
 48 | # Unit test / coverage reports
 49 | htmlcov/
 50 | .tox/
 51 | .coverage
 52 | .coverage.*
 53 | .cache
 54 | nosetests.xml
 55 | coverage.xml
 56 | *.cover
 57 | .hypothesis/
 58 | 
 59 | # Translations
 60 | *.mo
 61 | *.pot
 62 | 
 63 | # Django stuff:
 64 | *.log
 65 | local_settings.py
 66 | 
 67 | # Flask stuff:
 68 | instance/
 69 | .webassets-cache
 70 | 
 71 | # Scrapy stuff:
 72 | .scrapy
 73 | 
 74 | # Sphinx documentation
 75 | docs/_build/
 76 | 
 77 | # PyBuilder
 78 | target/
 79 | 
 80 | # Jupyter Notebook
 81 | .ipynb_checkpoints
 82 | 
 83 | # pyenv
 84 | .python-version
 85 | 
 86 | # celery beat schedule file
 87 | celerybeat-schedule
 88 | 
 89 | # SageMath parsed files
 90 | *.sage.py
 91 | 
 92 | # dotenv
 93 | .env
 94 | 
 95 | # virtualenv
 96 | .venv
 97 | venv/
 98 | ENV/
 99 | 
100 | # Spyder project settings
101 | .spyderproject
102 | .spyproject
103 | 
104 | # Rope project settings
105 | .ropeproject
106 | 
107 | # mkdocs documentation
108 | /site
109 | 
110 | # mypy
111 | .mypy_cache/
112 | 
113 | # macOS
114 | *.DS_Store


--------------------------------------------------------------------------------
/.gitlab/issue_templates/reviewer_checklist.md:
--------------------------------------------------------------------------------
 1 | ## Review checklist for @dataretrieval
 2 | 
 3 | Background information for reviewers [here](https://www.usgs.gov/products/software/software-management/types-software-review)
 4 | 
 5 | *Please check off boxes as applicable, and elaborate in comments below.*
 6 | 
 7 | - Code location https://code.usgs.gov/cmwsc/shwa/dataretrieval
 8 | - author @[gitlab handle]
 9 | 
10 | ### Conflict of interest
11 | 
12 | - [ ] I confirm that I have no COIs with reviewing this work, meaning that there is no relationship with the product or the product's authors or affiliated institutions that could influence or be perceived to influence the outcome of the review (if you are unsure whether you have a conflict, please speak to your supervisor _before_ starting your review).
13 | 
14 | ### Adherence to Fundamental Science Practices
15 | 
16 | - [ ] I confirm that I read and will adhere to the [Federal Source Code Policy for Scientific Software](https://www.usgs.gov/survey-manual/im-osqi-2019-01-review-and-approval-scientific-software-release) and relevant federal guidelines for approved software release as outlined in [SM502.1](https://code.usgs.gov/cmwsc/shwa/dataretrieval) and [SM502.4](https://www.usgs.gov/survey-manual/5024-fundamental-science-practices-review-approval-and-release-information-products).
17 | 
18 | ### Security Review
19 | 
20 | - [ ] No proprietary code is included
21 | - [ ] No Personally Identifiable Information (PII) is included
22 | - [ ] No other sensitive information such as data base passwords are included
23 | 
24 | ### General checks
25 | 
26 | - [ ] **Repository:** Is the source code for this software available?
27 | - [ ] **License:** Does the repository contain a plain-text LICENSE file?
28 | - [ ] **Disclaimer:** Does the repository have the USGS-required provisional Disclaimer?
29 | - [ ] **Contribution and authorship:** Has the submitting author made major contributions to the software? Does the full list of software authors seem appropriate and complete?
30 | - [ ] Does the repository have a code.json file?
31 | 
32 | ### Documentation
33 | 
34 | - [ ] **A statement of need**: Do the authors clearly state what problems the software is designed to solve and who the target audience is?
35 | - [ ] **Installation instructions:** Is there a clearly-stated list of dependencies? Ideally these should be handled with an automated package management solution.
36 | - [ ] **Example usage:** Do the authors include examples of how to use the software (ideally to solve real-world analysis problems)?
37 | - [ ] **Functionality documentation:** Is the core functionality of the software documented to a satisfactory level (e.g., API method documentation)?
38 | - [ ] **Automated tests:** Are there automated tests or manual steps described so that the functionality of the software can be verified?
39 | - [ ] **Community guidelines:** Are there clear guidelines for third parties wishing to 1) Contribute to the software 2) Report issues or problems with the software 3) Seek support? This information could be found in the README, CONTRIBUTING, or DESCRIPTION sections of the documentation.
40 | - [ ] **References:** When present, do references in the text use the proper [citation syntax](https://pandoc.org/MANUAL.html#extension-citations)?
41 | 
42 | ### Functionality
43 | 
44 | - [ ] **Installation:** Does installation succeed as outlined in the documentation?
45 | - [ ] **Functionality:** Have the functional claims of the software been confirmed?
46 | - [ ] **Performance:** If there are any performance claims of the software, have they been confirmed? (If there are no claims, please check off this item.)
47 | - [ ] **Automated tests:** Do unit tests cover essential functions of the software and a reasonable range of inputs and conditions? Do all tests pass when run locally?
48 | - [ ] **Packaging guidelines:** Does the software conform to the applicable packaging guidelines? R packaging guidelines [here](https://devguide.ropensci.org/building.html#building); Python packaging guidelines [here](https://packaging.python.org/en/latest/)
49 | 
50 | ### Review Comments
51 | 
52 | - Add free text comments here.
53 | 
54 | ### Reviewer checklist source statement
55 | 
56 | This checklist combines elements of the [rOpenSci](https://devguide.ropensci.org/) review guidelines and the Journal of Open Source Science (JOSS) review [checklist](https://joss.readthedocs.io/en/latest/review_checklist.html): it has been modified for use with USGS software releases.
57 | 


--------------------------------------------------------------------------------
/.gitlab/merge_request_templates/reviewer_checklist.md:
--------------------------------------------------------------------------------
 1 | ## Review checklist for @dataretrieval
 2 | 
 3 | Background information for reviewers [here](https://www.usgs.gov/products/software/software-management/types-software-review)
 4 | 
 5 | *Please check off boxes as applicable, and elaborate in comments below.*
 6 | 
 7 | - Code location https://code.usgs.gov/cmwsc/shwa/dataretrieval
 8 | - author @[gitlab handle]
 9 | 
10 | ### Conflict of interest
11 | 
12 | - [ ] I confirm that I have no COIs with reviewing this work, meaning that there is no relationship with the product or the product's authors or affiliated institutions that could influence or be perceived to influence the outcome of the review (if you are unsure whether you have a conflict, please speak to your supervisor _before_ starting your review).
13 | 
14 | ### Adherence to Fundamental Science Practices
15 | 
16 | - [ ] I confirm that I read and will adhere to the [Federal Source Code Policy for Scientific Software](https://www.usgs.gov/survey-manual/im-osqi-2019-01-review-and-approval-scientific-software-release) and relevant federal guidelines for approved software release as outlined in [SM502.1](https://code.usgs.gov/cmwsc/shwa/dataretrieval) and [SM502.4](https://www.usgs.gov/survey-manual/5024-fundamental-science-practices-review-approval-and-release-information-products).
17 | 
18 | ### Security Review
19 | 
20 | - [ ] No proprietary code is included
21 | - [ ] No Personally Identifiable Information (PII) is included
22 | - [ ] No other sensitive information such as data base passwords are included
23 | 
24 | ### General checks
25 | 
26 | - [ ] **Repository:** Is the source code for this software available?
27 | - [ ] **License:** Does the repository contain a plain-text LICENSE file?
28 | - [ ] **Disclaimer:** Does the repository have the USGS-required provisional Disclaimer?
29 | - [ ] **Contribution and authorship:** Has the submitting author made major contributions to the software? Does the full list of software authors seem appropriate and complete?
30 | - [ ] Does the repository have a code.json file?
31 | 
32 | ### Documentation
33 | 
34 | - [ ] **A statement of need**: Do the authors clearly state what problems the software is designed to solve and who the target audience is?
35 | - [ ] **Installation instructions:** Is there a clearly-stated list of dependencies? Ideally these should be handled with an automated package management solution.
36 | - [ ] **Example usage:** Do the authors include examples of how to use the software (ideally to solve real-world analysis problems)?
37 | - [ ] **Functionality documentation:** Is the core functionality of the software documented to a satisfactory level (e.g., API method documentation)?
38 | - [ ] **Automated tests:** Are there automated tests or manual steps described so that the functionality of the software can be verified?
39 | - [ ] **Community guidelines:** Are there clear guidelines for third parties wishing to 1) Contribute to the software 2) Report issues or problems with the software 3) Seek support? This information could be found in the README, CONTRIBUTING, or DESCRIPTION sections of the documentation.
40 | - [ ] **References:** When present, do references in the text use the proper [citation syntax](https://pandoc.org/MANUAL.html#extension-citations)?
41 | 
42 | ### Functionality
43 | 
44 | - [ ] **Installation:** Does installation succeed as outlined in the documentation?
45 | - [ ] **Functionality:** Have the functional claims of the software been confirmed?
46 | - [ ] **Performance:** If there are any performance claims of the software, have they been confirmed? (If there are no claims, please check off this item.)
47 | - [ ] **Automated tests:** Do unit tests cover essential functions of the software and a reasonable range of inputs and conditions? Do all tests pass when run locally?
48 | - [ ] **Packaging guidelines:** Does the software conform to the applicable packaging guidelines? R packaging guidelines [here](https://devguide.ropensci.org/building.html#building); Python packaging guidelines [here](https://packaging.python.org/en/latest/)
49 | 
50 | ### Review Comments
51 | 
52 | - Add free text comments here.
53 | 
54 | ### Reviewer checklist source statement
55 | 
56 | This checklist combines elements of the [rOpenSci](https://devguide.ropensci.org/) review guidelines and the Journal of Open Source Science (JOSS) review [checklist](https://joss.readthedocs.io/en/latest/review_checklist.html): it has been modified for use with USGS software releases.
57 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | ci:
 2 |   autoupdate_schedule: monthly
 3 |   autofix_prs: false
 4 | 
 5 | repos:
 6 |   - repo: https://github.com/pre-commit/pre-commit-hooks
 7 |     rev: v4.4.0
 8 |     hooks:
 9 |       - id: trailing-whitespace
10 |       - id: end-of-file-fixer
11 |       - id: check-docstring-first
12 |       - id: check-json
13 |       - id: check-yaml
14 |       - id: double-quote-string-fixer
15 |       - id: debug-statements
16 |       - id: mixed-line-ending
17 | 
18 |   - repo: https://github.com/asottile/pyupgrade
19 |     rev: v3.3.1
20 |     hooks:
21 |       - id: pyupgrade
22 |         args:
23 |           - '--py38-plus'
24 | 
25 |   - repo: https://github.com/psf/black
26 |     rev: 23.3.0
27 |     hooks:
28 |       - id: black
29 |       - id: black-jupyter
30 | 
31 |   - repo: https://github.com/keewis/blackdoc
32 |     rev: v0.3.8
33 |     hooks:
34 |       - id: blackdoc
35 | 
36 |   - repo: https://github.com/PyCQA/flake8
37 |     rev: 6.0.0
38 |     hooks:
39 |       - id: flake8
40 | 
41 |   - repo: https://github.com/PyCQA/isort
42 |     rev: 5.12.0
43 |     hooks:
44 |       - id: isort
45 | 
46 |   - repo: https://github.com/pre-commit/mirrors-prettier
47 |     rev: v3.0.0-alpha.6
48 |     hooks:
49 |       - id: prettier
50 | 


--------------------------------------------------------------------------------
/.prettierrc.toml:
--------------------------------------------------------------------------------
1 | semi = false
2 | singleQuote = true
3 | 


--------------------------------------------------------------------------------
/DISCLAIMER.md:
--------------------------------------------------------------------------------
1 | Disclaimer
2 | ==========
3 | 
4 | This software is preliminary or provisional and is subject to revision. It is being provided to meet the need for timely best science. The software has not received final approval by the U.S. Geological Survey (USGS). No warranty, expressed or implied, is made by the USGS or the U.S. Government as to the functionality of the software and related material nor shall the fact of release constitute any such warranty. The software is provided on the condition that neither the USGS nor the U.S. Government shall be held liable for any damages resulting from the authorized or unauthorized use of the software.
5 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | License
 2 | =======
 3 | 
 4 | Unless otherwise noted, this project is in the public domain in the United
 5 | States because it contains materials that originally came from the United
 6 | States Geological Survey, an agency of the United States Department of
 7 | Interior. For more information, see the official USGS copyright policy at
 8 | https://www.usgs.gov/information-policies-and-instructions/copyrights-and-credits
 9 | 
10 | Additionally, we waive copyright and related rights in the work
11 | worldwide through the CC0 1.0 Universal public domain dedication.
12 | 
13 | 
14 | CC0 1.0 Universal Summary
15 | -------------------------
16 | 
17 | This is a human-readable summary of the
18 | [Legal Code (read the full text)][1].
19 | 
20 | 
21 | ### No Copyright
22 | 
23 | The person who associated a work with this deed has dedicated the work to
24 | the public domain by waiving all of his or her rights to the work worldwide
25 | under copyright law, including all related and neighboring rights, to the
26 | extent allowed by law.
27 | 
28 | You can copy, modify, distribute and perform the work, even for commercial
29 | purposes, all without asking permission.
30 | 
31 | 
32 | ### Other Information
33 | 
34 | In no way are the patent or trademark rights of any person affected by CC0,
35 | nor are the rights that other persons may have in the work or in how the
36 | work is used, such as publicity or privacy rights.
37 | 
38 | Unless expressly stated otherwise, the person who associated a work with
39 | this deed makes no warranties about the work, and disclaims liability for
40 | all uses of the work, to the fullest extent permitted by applicable law.
41 | When using or citing the work, you should not imply endorsement by the
42 | author or the affirmer.
43 | 
44 | 
45 | 
46 | [1]: https://creativecommons.org/publicdomain/zero/1.0/legalcode
47 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # dataretrieval: Download hydrologic data
  2 | 
  3 | ![PyPI - Version](https://img.shields.io/pypi/v/dataretrieval)
  4 | ![Conda Version](https://img.shields.io/conda/v/conda-forge/dataretrieval)
  5 | ![Downloads](https://static.pepy.tech/badge/dataretrieval)
  6 | 
  7 | :warning: USGS data availability and format are changing on Water Quality Portal (WQP). Since March 2024, data obtained from WQP legacy profiles will not include new USGS data or recent updates to existing data. 
  8 | To view the status of changes in data availability and code functionality, visit: https://doi-usgs.github.io/dataRetrieval/articles/Status.html
  9 | 
 10 | :mega: **09/03/2024:** The groundwater levels service has switched endpoints, and `dataretrieval` was updated accordingly in [`v1.0.10`](https://github.com/DOI-USGS/dataretrieval-python/releases/tag/v1.0.10). Older versions using the discontinued endpoint will return 503 errors for `nwis.get_gwlevels` or the `service='gwlevels'` argument. Visit [Water Data For the Nation](https://waterdata.usgs.gov/blog/wdfn-waterservices-2024/) for more information.
 11 | 
 12 | ## What is dataretrieval?
 13 | `dataretrieval` was created to simplify the process of loading hydrologic data into the Python environment.
 14 | Like the original R version [`dataRetrieval`](https://github.com/DOI-USGS/dataRetrieval),
 15 | it is designed to retrieve the major data types of U.S. Geological Survey (USGS) hydrology
 16 | data that are available on the Web, as well as data from the Water
 17 | Quality Portal (WQP), which currently houses water quality data from the
 18 | Environmental Protection Agency (EPA), U.S. Department of Agriculture
 19 | (USDA), and USGS. Direct USGS data is obtained from a service called the
 20 | National Water Information System (NWIS).
 21 | 
 22 | Note that the python version is not a direct port of the original: it attempts to reproduce the functionality of the R package,
 23 | though its organization and interface often differ.
 24 | 
 25 | If there's a hydrologic or environmental data portal that you'd like dataretrieval to 
 26 | work with, raise it as an [issue](https://github.com/USGS-python/dataretrieval/issues).
 27 | 
 28 | Here's an example using `dataretrieval` to retrieve data from the National Water Information System (NWIS).
 29 | 
 30 | ```python
 31 | # first import the functions for downloading data from NWIS
 32 | import dataretrieval.nwis as nwis
 33 | 
 34 | # specify the USGS site code for which we want data.
 35 | site = '03339000'
 36 | 
 37 | 
 38 | # get instantaneous values (iv)
 39 | df = nwis.get_record(sites=site, service='iv', start='2017-12-31', end='2018-01-01')
 40 | 
 41 | # get water quality samples (qwdata)
 42 | df2 = nwis.get_record(sites=site, service='qwdata', start='2017-12-31', end='2018-01-01')
 43 | 
 44 | # get basic info about the site
 45 | df3 = nwis.get_record(sites=site, service='site')
 46 | ```
 47 | Services available from NWIS include:
 48 | - instantaneous values (iv)
 49 | - daily values (dv)
 50 | - statistics (stat)
 51 | - site info (site)
 52 | - discharge peaks (peaks)
 53 | - discharge measurements (measurements)
 54 | * water quality samples (qwdata)
 55 | 
 56 | To access the full functionality available from NWIS web services, nwis.get record appends any additional kwargs into the REST request. For example
 57 | ```python
 58 | nwis.get_record(sites='03339000', service='dv', start='2017-12-31', parameterCd='00060')
 59 | ```
 60 | will download daily data with the parameter code 00060 (discharge).
 61 | 
 62 | ## Accessing the "Internal" NWIS
 63 | If you're connected to the USGS network, dataretrieval call pull from the internal (non-public) NWIS interface.
 64 | Most dataretrieval functions pass kwargs directly to NWIS's REST API, which provides simple access to internal data; simply specify "access='3'".
 65 | For example
 66 | ```python
 67 | nwis.get_record(sites='05404147',service='iv', start='2021-01-01', end='2021-3-01', access='3')
 68 | ```
 69 | 
 70 | More services and documentation to come!
 71 | 
 72 | ## Quick start
 73 | 
 74 | dataretrieval can be installed using pip:
 75 | 	
 76 |     $ python3 -m pip install -U dataretrieval
 77 | 
 78 | or conda:
 79 | 
 80 |     $ conda install -c conda-forge dataretrieval
 81 | 
 82 | More examples of use are include in [`demos`](https://github.com/USGS-python/dataretrieval/tree/main/demos).
 83 | 
 84 | ## Issue tracker
 85 | 
 86 | Please report any bugs and enhancement ideas using the dataretrieval issue
 87 | tracker:
 88 | 
 89 |   https://github.com/USGS-python/dataretrieval/issues
 90 | 
 91 | Feel free to also ask questions on the tracker.
 92 | 
 93 | 
 94 | ## Contributing
 95 | 
 96 | Any help in testing, development, documentation and other tasks is welcome.
 97 | For more details, see the file [CONTRIBUTING.md](CONTRIBUTING.md).
 98 | 
 99 | 
100 | ## Package Support
101 | The Water Mission Area of the USGS supports the development and maintenance of `dataretrieval`
102 | and most likely further into the future.
103 | Resources are available primarily for maintenance and responding to user questions.
104 | Priorities on the development of new features are determined by the `dataretrieval` development team.
105 | 
106 | 
107 | ## Acknowledgments
108 | This material is partially based upon work supported by the National Science Foundation (NSF) under award 1931297.
109 | Any opinions, findings, conclusions, or recommendations expressed in this material are those of the authors and do not necessarily reflect the views of the NSF.
110 | 
111 | ## Disclaimer
112 | 
113 | This software is preliminary or provisional and is subject to revision. 
114 | It is being provided to meet the need for timely best science.
115 | The software has not received final approval by the U.S. Geological Survey (USGS).
116 | No warranty, expressed or implied, is made by the USGS or the U.S. Government as to the functionality of the software and related material nor shall the fact of release constitute any such warranty. 
117 | The software is provided on the condition that neither the USGS nor the U.S. Government shall be held liable for any damages resulting from the authorized or unauthorized use of the software.
118 | 
119 | ## Citation
120 | 
121 | Hodson, T.O., Hariharan, J.A., Black, S., and Horsburgh, J.S., 2023, dataretrieval (Python): a Python package for discovering
122 | and retrieving water data available from U.S. federal hydrologic web services:
123 | U.S. Geological Survey software release,
124 | https://doi.org/10.5066/P94I5TX3.
125 | 


--------------------------------------------------------------------------------
/code.json:
--------------------------------------------------------------------------------
 1 | [
 2 |   {
 3 |     "name": "dataretrieval",
 4 |     "organization": "U.S. Geological Survey",
 5 |     "description": "A Python package for discovering and retrieving water data from U.S. federal hydrologic web services.",
 6 |     "version": "main",
 7 |     "status": "Development",
 8 | 
 9 |     "permissions": {
10 |       "usageType": "openSource",
11 |       "licenses": [
12 |         {
13 |           "name": "Public Domain, CC0-1.0",
14 |           "URL": "https://code.usgs.gov/water/dataretrieval-python/-/raw/main/LICENSE.md"
15 |         }
16 |       ]
17 |     },
18 | 
19 |     "homepageURL": "https://code.usgs.gov/water/dataretrieval-python",
20 |     "downloadURL": "https://code.usgs.gov/water/dataretrieval-python/-/archive/main/dataretrieval-python-main.zip",
21 |     "disclaimerURL": "https://code.usgs.gov/water/dataretrieval-python/-/raw/main/DISCLAIMER.md",
22 |     "repositoryURL": "https://code.usgs.gov/water/dataretrieval-python.git",
23 |     "vcs": "git",
24 | 
25 |     "laborHours": 0,
26 | 
27 |     "tags": [
28 |       "Python",
29 |       "USGS"
30 |     ],
31 | 
32 |     "languages": [
33 |       "Python"
34 |     ],
35 | 
36 |     "contact": {
37 |       "name": "Timothy 0. Hodson",
38 |       "email": "thodson@usgs.gov"
39 |     },
40 | 
41 |     "date": {
42 |       "metadataLastUpdated": "2024-09-17"
43 |     }
44 |   },
45 |   {
46 |     "name": "dataretrieval",
47 |     "organization": "U.S. Geological Survey",
48 |     "description": "A Python package for discovering and retrieving water data from U.S. federal hydrologic web services.",
49 |     "version": "v1.0.2",
50 |     "status": "Production",
51 | 
52 |     "permissions": {
53 |       "usageType": "openSource",
54 |       "licenses": [
55 |         {
56 |           "name": "Public Domain, CC0-1.0",
57 |           "URL": "https://code.usgs.gov/water/dataretrieval-python/-/raw/v1.0.2/LICENSE.md"
58 |         }
59 |       ]
60 |     },
61 | 
62 |     "homepageURL": "https://code.usgs.gov/water/dataretrieval-python",
63 |     "downloadURL": "https://code.usgs.gov/water/dataretrieval-python/-/archive/v1.0.2/dataretrieval-python-v1.0.2.zip",
64 |     "disclaimerURL": "https://code.usgs.gov/water/dataretrieval-python/-/raw/v1.0.2/DISCLAIMER.md",
65 |     "repositoryURL": "https://code.usgs.gov/water/dataretrieval-python.git",
66 |     "vcs": "git",
67 | 
68 |     "laborHours": 0,
69 | 
70 |     "tags": [
71 |       "Python",
72 |       "USGS"
73 |     ],
74 | 
75 |     "languages": [
76 |       "Python"
77 |     ],
78 | 
79 |     "contact": {
80 |       "name": "Timothy 0. Hodson",
81 |       "email": "thodson@usgs.gov"
82 |     },
83 | 
84 |     "date": {
85 |       "metadataLastUpdated": "2024-08-30"
86 |     }
87 |   }
88 | ]
89 | 


--------------------------------------------------------------------------------
/dataretrieval/__init__.py:
--------------------------------------------------------------------------------
 1 | from importlib.metadata import PackageNotFoundError, version
 2 | 
 3 | from dataretrieval.nadp import *
 4 | from dataretrieval.nwis import *
 5 | from dataretrieval.samples import *
 6 | from dataretrieval.streamstats import *
 7 | from dataretrieval.utils import *
 8 | from dataretrieval.waterwatch import *
 9 | from dataretrieval.wqp import *
10 | 
11 | try:
12 |     __version__ = version("dataretrieval")
13 | except PackageNotFoundError:
14 |     __version__ = "version-unknown"
15 | 


--------------------------------------------------------------------------------
/dataretrieval/codes/__init__.py:
--------------------------------------------------------------------------------
1 | from .states import *
2 | from .timezones import *
3 | 


--------------------------------------------------------------------------------
/dataretrieval/codes/states.py:
--------------------------------------------------------------------------------
  1 | """List of 2-digit state codes with commented full names."""
  2 | 
  3 | state_codes = {
  4 |     "Alabama": "al",
  5 |     "Alaska": "ak",
  6 |     "Arizona": "az",
  7 |     "Arkansas": "ar",
  8 |     "California": "ca",
  9 |     "Colorado": "co",
 10 |     "Connecticut": "ct",
 11 |     "Delaware": "de",
 12 |     "District of Columbia": "dc",
 13 |     "Florida": "fl",
 14 |     "Georgia": "ga",
 15 |     "Hawaii": "hi",
 16 |     "Idaho": "id",
 17 |     "Illinois": "il",
 18 |     "Indiana": "in",
 19 |     "Iowa": "ia",
 20 |     "Kansas": "ks",
 21 |     "Kentucky": "ky",
 22 |     "Louisiana": "la",
 23 |     "Maine": "me",
 24 |     "Maryland": "md",
 25 |     "Massachusetts": "ma",
 26 |     "Michigan": "mi",
 27 |     "Minnesota": "mn",
 28 |     "Mississippi": "ms",
 29 |     "Missouri": "mo",
 30 |     "Montana": "mt",
 31 |     "Nebraska": "ne",
 32 |     "Nevada": "nv",
 33 |     "New Hampshire": "nh",
 34 |     "New Jersey": "nj",
 35 |     "New Mexico": "nm",
 36 |     "New York": "ny",
 37 |     "North Carolina": "nc",
 38 |     "North Dakota": "nd",
 39 |     "Ohio": "oh",
 40 |     "Oklahoma": "ok",
 41 |     "Oregon": "or",
 42 |     "Pennsylvania": "pa",
 43 |     "Rhode Island": "ri",
 44 |     "South Carolina": "sc",
 45 |     "South Dakota": "sd",
 46 |     "Tennessee": "tn",
 47 |     "Texas": "tx",
 48 |     "Utah": "ut",
 49 |     "Vermont": "vt",
 50 |     "Virginia": "va",
 51 |     "Washington": "wa",
 52 |     "West Virginia": "wv",
 53 |     "Wisconsin": "wi",
 54 |     "Wyoming": "wy",
 55 | }
 56 | 
 57 | fips_codes = {
 58 |     "Alabama": "01",
 59 |     "Alaska": "02",
 60 |     "Arizona": "04",
 61 |     "Arkansas": "05",
 62 |     "California": "06",
 63 |     "Colorado": "08",
 64 |     "Connecticut": "09",
 65 |     "Delaware": "10",
 66 |     "District of Columbia": "11",
 67 |     "Florida": "12",
 68 |     "Georgia": "13",
 69 |     "Hawaii": "15",
 70 |     "Idaho": "16",
 71 |     "Illinois": "17",
 72 |     "Indiana": "18",
 73 |     "Iowa": "19",
 74 |     "Kansas": "20",
 75 |     "Kentucky": "21",
 76 |     "Louisiana": "22",
 77 |     "Maine": "23",
 78 |     "Maryland": "24",
 79 |     "Massachusetts": "25",
 80 |     "Michigan": "26",
 81 |     "Minnesota": "27",
 82 |     "Mississippi": "28",
 83 |     "Missouri": "29",
 84 |     "Montana": "30",
 85 |     "Nebraska": "31",
 86 |     "Nevada": "32",
 87 |     "New Hampshire": "33",
 88 |     "New Jersey": "34",
 89 |     "New Mexico": "35",
 90 |     "New York": "36",
 91 |     "North Carolina": "37",
 92 |     "North Dakota": "38",
 93 |     "Ohio": "39",
 94 |     "Oklahoma": "40",
 95 |     "Oregon": "41",
 96 |     "Pennsylvania": "42",
 97 |     "Rhode Island": "44",
 98 |     "South Carolina": "45",
 99 |     "South Dakota": "46",
100 |     "Tennessee": "47",
101 |     "Texas": "48",
102 |     "Utah": "49",
103 |     "Vermont": "50",
104 |     "Virginia": "51",
105 |     "Washington": "53",
106 |     "West Virginia": "54",
107 |     "Wisconsin": "55",
108 |     "Wyoming": "56",
109 | }
110 | 


--------------------------------------------------------------------------------
/dataretrieval/codes/timezones.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Time zone information
 3 | """
 4 | 
 5 | tz_str = """-1200 Y
 6 | -1100 X NUT SST
 7 | -1000 W CKT HAST HST TAHT TKT
 8 | -0900 V AKST GAMT GIT HADT HNY
 9 | -0800 U AKDT CIST HAY HNP PST PT
10 | -0700 T HAP HNR MST PDT
11 | -0600 S CST EAST GALT HAR HNC MDT
12 | -0500 R CDT COT EASST ECT EST ET HAC HNE PET
13 | -0400 Q AST BOT CLT COST EDT FKT GYT HAE HNA PYT
14 | -0300 P ADT ART BRT CLST FKST GFT HAA PMST PYST SRT UYT WGT
15 | -0200 O BRST FNT PMDT UYST WGST
16 | -0100 N AZOT CVT EGT
17 | +0000 Z EGST GMT UTC WET WT
18 | +0100 A CET DFT WAT WEDT WEST
19 | +0200 B CAT CEDT CEST EET SAST WAST
20 | +0300 C EAT EEDT EEST IDT MSK
21 | +0400 D AMT AZT GET GST KUYT MSD MUT RET SAMT SCT
22 | +0500 E AMST AQTT AZST HMT MAWT MVT PKT TFT TJT TMT UZT YEKT
23 | +0600 F ALMT BIOT BTT IOT KGT NOVT OMST YEKST
24 | +0700 G CXT DAVT HOVT ICT KRAT NOVST OMSST THA WIB
25 | +0800 H ACT AWST BDT BNT CAST HKT IRKT KRAST MYT PHT SGT ULAT WITA WST
26 | +0900 I AWDT IRKST JST KST PWT TLT WDT WIT YAKT
27 | +1000 K AEST ChST PGT VLAT YAKST YAPT
28 | +1100 L AEDT LHDT MAGT NCT PONT SBT VLAST VUT
29 | +1200 M ANAST ANAT FJT GILT MAGST MHT NZST PETST PETT TVT WFT
30 | +1330 FJST NZDT
31 | +1130 NFT
32 | +1030 ACDT LHST
33 | +0930 ACST
34 | +0630 CCT MMT
35 | +0545 NPT
36 | +0530 SLT
37 | +0430 AFT IRDT
38 | +0330 IRST
39 | -0230 HAT NDT
40 | -0330 HNT NST NT
41 | -0430 HLV VET
42 | -0930 MART MIT"""
43 | 
44 | tz = {}
45 | for tz_descr in map(str.split, tz_str.split("\n")):
46 |     tz_offset = tz_descr[0]
47 |     for tz_code in tz_descr[1:]:
48 |         tz[tz_code] = tz_offset
49 | 


--------------------------------------------------------------------------------
/dataretrieval/nadp.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Tools for retrieving data from the National Atmospheric Deposition Program
  3 | (NADP) including the National Trends Network (NTN), the Mercury Deposition
  4 | Network (MDN).
  5 | 
  6 | National Trends Network
  7 | -----------------------
  8 | The  NTN provides long-term records of precipitation chemistry across the
  9 | United States. See https://nadp.slh.wisc.edu/ntn for more info.
 10 | 
 11 | Mercury Deposition Network
 12 | --------------------------
 13 | The MDN provides long-term records of total mercury (Hg) concentration and
 14 | deposition in precipitation in the United States and Canada. For more
 15 | information visit https://nadp.slh.wisc.edu/networks/mercury-deposition-network/
 16 | 
 17 | Notes
 18 | -----
 19 | Gridded data on NADP is served as zipped tif files. Functions in this module
 20 | will either download and extract the data, when a path is specified, or open
 21 | the data as a GDAL memory-mapped file when no path is specified.
 22 | 
 23 | .. todo::
 24 | 
 25 |     - include AIRMoN, AMNet, and AMoN
 26 |     - flexible handling of strings for parameters and measurement types
 27 |     - add errorchecking
 28 |     - add tests
 29 | 
 30 | """
 31 | 
 32 | import io
 33 | import os
 34 | import re
 35 | import zipfile
 36 | from os.path import basename
 37 | 
 38 | import requests
 39 | 
 40 | NADP_URL = "https://nadp.slh.wisc.edu"
 41 | NADP_MAP_EXT = "filelib/maps"
 42 | 
 43 | NTN_CONC_PARAMS = ["pH", "So4", "NO3", "NH4", "Ca", "Mg", "K", "Na", "Cl", "Br"]
 44 | NTN_DEP_PARAMS = [
 45 |     "H",
 46 |     "So4",
 47 |     "NO3",
 48 |     "NH4",
 49 |     "Ca",
 50 |     "Mg",
 51 |     "K",
 52 |     "Na",
 53 |     "Cl",
 54 |     "Br",
 55 |     "N",
 56 |     "SPlusN",
 57 | ]
 58 | 
 59 | NTN_MEAS_TYPE = ["conc", "dep", "precip"]  # concentration or deposition
 60 | 
 61 | 
 62 | class NADP_ZipFile(zipfile.ZipFile):
 63 |     """Extend zipfile.ZipFile for working on data from NADP"""
 64 | 
 65 |     def tif_name(self):
 66 |         """Get the name of the tif file in the zip file."""
 67 |         filenames = self.namelist()
 68 |         r = re.compile(".*tif$")
 69 |         tif_list = list(filter(r.match, filenames))
 70 |         return tif_list[0]
 71 | 
 72 |     def tif(self):
 73 |         """Read the tif file in the zip file."""
 74 |         return self.read(self.tif_name())
 75 | 
 76 | 
 77 | def get_annual_MDN_map(measurement_type, year, path):
 78 |     """Download a MDN map from NDAP.
 79 | 
 80 |     This function looks for a zip file containing gridded information at:
 81 |     https://nadp.slh.wisc.edu/maps-data/mdn-gradient-maps/.
 82 |     The function will download the zip file and extract it, exposing the tif
 83 |     file if a path is provided.
 84 | 
 85 |     Parameters
 86 |     ----------
 87 |     measurement_type: string
 88 |         The type of measurement (concentration or deposition) as a string,
 89 |         either 'conc' or 'dep' respectively.
 90 | 
 91 |     year: string
 92 |         Year as a string 'YYYY'
 93 | 
 94 |     path: string
 95 |         Download directory.
 96 | 
 97 |     Returns
 98 |     -------
 99 |     path: string
100 |         Path that zip file was extracted into if path was specified.
101 | 
102 |     Examples
103 |     --------
104 |     .. code::
105 | 
106 |         >>> # get map of mercury concentration in 2010 and extract it to a path
107 |         >>> data_path = dataretrieval.nadp.get_annual_MDN_map(
108 |         ...     measurement_type="conc", year="2010", path="somepath"
109 |         ... )
110 | 
111 |     """
112 |     url = f"{NADP_URL}/{NADP_MAP_EXT}/MDN/grids/"
113 | 
114 |     filename = f"Hg_{measurement_type}_{year}.zip"
115 | 
116 |     z = get_zip(url, filename)
117 | 
118 |     if path:
119 |         z.extractall(path)
120 | 
121 |     return f"{path}{os.sep}{basename(filename)}"
122 | 
123 | 
124 | def get_annual_NTN_map(measurement_type, measurement=None, year=None, path="."):
125 |     """Download a NTN map from NDAP.
126 | 
127 |     This function looks for a zip file containing gridded information at:
128 |     https://nadp.slh.wisc.edu/maps-data/ntn-gradient-maps/.
129 |     The function will download the zip file and extract it, exposing the tif
130 |     file at the provided path.
131 | 
132 |     .. note::
133 | 
134 |         Measurement type abbreviations for concentration and deposition are
135 |         all lower-case, but for precipitation data, the first letter must be
136 |         capitalized!
137 | 
138 |     Parameters
139 |     ----------
140 |     measurement : string
141 |         The measured constituent to return.
142 |     measurement_type : string
143 |         The type of measurement, 'conc', 'dep', or 'Precip', which represent
144 |         concentration, deposition, or precipitation respectively.
145 |     year : string
146 |         Year as a string 'YYYY'
147 |     path : string
148 |         Download directory, defaults to current directory if not specified.
149 | 
150 |     Returns
151 |     -------
152 |     path: string
153 |         Path that zip file was extracted into if path was specified.
154 | 
155 |     Examples
156 |     --------
157 |     .. code::
158 | 
159 |         >>> # get a map of precipitation in 2015 and extract it to a path
160 |         >>> data_path = dataretrieval.nadp.get_annual_NTN_map(
161 |         ...     measurement_type="Precip", year="2015", path="somepath"
162 |         ... )
163 | 
164 |     """
165 |     url = f"{NADP_URL}/{NADP_MAP_EXT}/NTN/grids/{year}/"
166 | 
167 |     filename = f"{measurement_type}_{year}.zip"
168 | 
169 |     if measurement:
170 |         filename = f"{measurement}_{filename}"
171 | 
172 |     z = get_zip(url, filename)
173 | 
174 |     if path:
175 |         z.extractall(path)
176 | 
177 |     return f"{path}{os.sep}{basename(filename)}"
178 | 
179 | 
180 | def get_zip(url, filename):
181 |     """Gets a ZipFile at url and returns it
182 | 
183 |     Parameters
184 |     ----------
185 |     url : string
186 |         URL to zip file
187 | 
188 |     filename : string
189 |         Name of zip file
190 | 
191 |     Returns
192 |     -------
193 |     ZipFile
194 | 
195 |     .. todo::
196 | 
197 |         finish docstring
198 | 
199 |     """
200 |     req = requests.get(url + filename)
201 |     req.raise_for_status()
202 | 
203 |     # z = zipfile.ZipFile(io.BytesIO(req.content))
204 |     z = NADP_ZipFile(io.BytesIO(req.content))
205 |     return z
206 | 


--------------------------------------------------------------------------------
/dataretrieval/streamstats.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This module is a wrapper for the streamstats API (`streamstats documentation`_).
  3 | 
  4 | .. _streamstats documentation: https://streamstats.usgs.gov/streamstatsservices/#/
  5 | 
  6 | """
  7 | 
  8 | import json
  9 | 
 10 | import requests
 11 | 
 12 | 
 13 | def download_workspace(workspaceID, format=""):
 14 |     """Function to download streamstats workspace.
 15 | 
 16 |     Parameters
 17 |     ----------
 18 |     workspaceID: string
 19 |         Service workspace received from watershed result
 20 | 
 21 |     format: string
 22 |         Download return format. Default will return ESRI geodatabase zipfile.
 23 |         'SHAPE' will return a zip file containing shape format.
 24 | 
 25 |     Returns
 26 |     -------
 27 |     r: geodatabase or shapefiles
 28 |         A zip file containing the workspace contents, in either a
 29 |         geodatabase or shape files.
 30 | 
 31 |     """
 32 |     payload = {"workspaceID": workspaceID, "format": format}
 33 |     url = "https://streamstats.usgs.gov/streamstatsservices/download"
 34 | 
 35 |     r = requests.get(url, params=payload)
 36 | 
 37 |     r.raise_for_status()
 38 |     return r
 39 |     # data = r.raw.read()
 40 | 
 41 |     # with open(filepath, 'wb') as f:
 42 |     #    f.write(data)
 43 | 
 44 |     # return
 45 | 
 46 | 
 47 | def get_sample_watershed():
 48 |     """Sample function to get a watershed object for a location in NY.
 49 | 
 50 |     Makes the function call :obj:`dataretrieval.streamstats.get_watershed`
 51 |     with the parameters 'NY', -74.524, 43.939, and returns the watershed
 52 |     object.
 53 | 
 54 |     Returns
 55 |     -------
 56 |     Watershed: :obj:`dataretrieval.streamstats.Watershed`
 57 |         Custom object that contains the watershed information as extracted
 58 |         from the streamstats JSON object.
 59 | 
 60 |     """
 61 |     return get_watershed("NY", -74.524, 43.939)
 62 | 
 63 | 
 64 | def get_watershed(
 65 |     rcode,
 66 |     xlocation,
 67 |     ylocation,
 68 |     crs=4326,
 69 |     includeparameters=True,
 70 |     includeflowtypes=False,
 71 |     includefeatures=True,
 72 |     simplify=True,
 73 |     format="geojson",
 74 | ):
 75 |     """Get watershed object based on location
 76 | 
 77 |     **Streamstats documentation:**
 78 |     Returns a watershed object. The request configuration will determine the
 79 |     overall request response. However all returns will return a watershed
 80 |     object with at least the workspaceid. The workspace id is the id to the
 81 |     service workspace where files are stored and can be used for further
 82 |     processing such as for downloads and flow statistic computations.
 83 | 
 84 |     See: https://streamstats.usgs.gov/streamstatsservices/#/ for more
 85 |     information.
 86 | 
 87 |     Parameters
 88 |     ----------
 89 |     rcode: string
 90 |         StreamStats 2-3 character code that identifies the Study Area --
 91 |         either a State or a Regional Study.
 92 |     xlocation: float
 93 |         X location of the most downstream point of desired study area.
 94 |     ylocation: float
 95 |         Y location of the most downstream point of desired study area.
 96 |     crs: integer, string, optional
 97 |         ESPSG spatial reference code, default is 4326
 98 |     includeparameters: bool, optional
 99 |         Boolean flag to include parameters in response.
100 |     includeflowtypes: bool, string, optional
101 |         Not yet implemented. Would be a comma separated list of region flow
102 |         types to compute with the default being True
103 |     includefeatures: list, optional
104 |         Comma separated list of features to include in response.
105 |     simplify: bool, optional
106 |         Boolean flag controlling whether or not to simplify the returned
107 |         result.
108 | 
109 |     Returns
110 |     -------
111 |     Watershed: :obj:`dataretrieval.streamstats.Watershed`
112 |         Custom object that contains the watershed information as extracted
113 |         from the streamstats JSON object.
114 | 
115 |     """
116 |     payload = {
117 |         "rcode": rcode,
118 |         "xlocation": xlocation,
119 |         "ylocation": ylocation,
120 |         "crs": crs,
121 |         "includeparameters": includeparameters,
122 |         "includeflowtypes": includeflowtypes,
123 |         "includefeatures": includefeatures,
124 |         "simplify": simplify,
125 |     }
126 |     url = "https://streamstats.usgs.gov/streamstatsservices/watershed.geojson"
127 | 
128 |     r = requests.get(url, params=payload)
129 | 
130 |     r.raise_for_status()
131 | 
132 |     if format == "geojson":
133 |         return r
134 | 
135 |     if format == "shape":
136 |         # use Fiona to return a shape object
137 |         pass
138 | 
139 |     if format == "object":
140 |         # return a python object
141 |         pass
142 | 
143 |     data = json.loads(r.text)
144 |     return Watershed.from_streamstats_json(data)
145 | 
146 | 
147 | class Watershed:
148 |     """Class to extract information from the streamstats JSON object."""
149 | 
150 |     @classmethod
151 |     def from_streamstats_json(cls, streamstats_json):
152 |         """Method that creates a Watershed object from a streamstats JSON."""
153 |         cls.watershed_point = streamstats_json["featurecollection"][0]["feature"]
154 |         cls.watershed_polygon = streamstats_json["featurecollection"][1]["feature"]
155 |         cls.parameters = streamstats_json["parameters"]
156 |         cls._workspaceID = streamstats_json["workspaceID"]
157 |         return cls
158 | 
159 |     def __init__(self, rcode, xlocation, ylocation):
160 |         """Init method that calls the :obj:`from_streamstats_json` method."""
161 |         self = get_watershed(rcode, xlocation, ylocation)
162 | 


--------------------------------------------------------------------------------
/dataretrieval/utils.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Useful utilities for data munging.
  3 | """
  4 | 
  5 | import warnings
  6 | 
  7 | import pandas as pd
  8 | import requests
  9 | 
 10 | import dataretrieval
 11 | from dataretrieval.codes import tz
 12 | 
 13 | 
 14 | def to_str(listlike, delimiter=","):
 15 |     """Translates list-like objects into strings.
 16 | 
 17 |     Parameters
 18 |     ----------
 19 |     listlike: list-like object
 20 |         An object that is a list, or list-like
 21 |         (e.g., ``pandas.core.series.Series``)
 22 |     delimiter: string, optional
 23 |         The delimiter that is placed between entries in listlike when it is
 24 |         turned into a string. Default value is a comma.
 25 | 
 26 |     Returns
 27 |     -------
 28 |     listlike: string
 29 |         The listlike object as string separated by the delimiter
 30 | 
 31 |     Examples
 32 |     --------
 33 |     .. doctest::
 34 | 
 35 |         >>> dataretrieval.utils.to_str([1, "a", 2])
 36 |         '1,a,2'
 37 | 
 38 |         >>> dataretrieval.utils.to_str([0, 10, 42], delimiter="+")
 39 |         '0+10+42'
 40 | 
 41 |     """
 42 |     if type(listlike) == list:
 43 |         return delimiter.join([str(x) for x in listlike])
 44 | 
 45 |     elif type(listlike) == pd.core.series.Series:
 46 |         return delimiter.join(listlike.tolist())
 47 | 
 48 |     elif type(listlike) == pd.core.indexes.base.Index:
 49 |         return delimiter.join(listlike.tolist())
 50 | 
 51 |     elif type(listlike) == str:
 52 |         return listlike
 53 | 
 54 | 
 55 | def format_datetime(df, date_field, time_field, tz_field):
 56 |     """Creates a datetime field from separate date, time, and
 57 |     time zone fields.
 58 | 
 59 |     Assumes ISO 8601.
 60 | 
 61 |     Parameters
 62 |     ----------
 63 |     df: ``pandas.DataFrame``
 64 |         A data frame containing date, time, and timezone fields.
 65 |     date_field: string
 66 |         Name of date column in df.
 67 |     time_field: string
 68 |         Name of time column in df.
 69 |     tz_field: string
 70 |         Name of time zone column in df.
 71 | 
 72 |     Returns
 73 |     -------
 74 |     df: ``pandas.DataFrame``
 75 |         The data frame with a formatted 'datetime' column
 76 | 
 77 |     """
 78 |     # create a datetime index from the columns in qwdata response
 79 |     df[tz_field] = df[tz_field].map(tz)
 80 | 
 81 |     df["datetime"] = pd.to_datetime(
 82 |         df[date_field] + " " + df[time_field] + " " + df[tz_field],
 83 |         format="ISO8601",
 84 |         utc=True,
 85 |     )
 86 | 
 87 |     # if there are any incomplete dates, warn the user
 88 |     if df["datetime"].isna().any():
 89 |         count = df["datetime"].isna().sum()
 90 |         warnings.warn(
 91 |             f"Warning: {count} incomplete dates found, "
 92 |             + "consider setting datetime_index to False.",
 93 |             UserWarning,
 94 |         )
 95 | 
 96 |     return df
 97 | 
 98 | 
 99 | class BaseMetadata:
100 |     """Base class for metadata.
101 | 
102 |     Attributes
103 |     ----------
104 |     url : str
105 |         Response url
106 |     query_time: datetme.timedelta
107 |         Response elapsed time
108 |     header: requests.structures.CaseInsensitiveDict
109 |         Response headers
110 | 
111 |     """
112 | 
113 |     def __init__(self, response) -> None:
114 |         """Generates a standard set of metadata informed by the response.
115 | 
116 |         Parameters
117 |         ----------
118 |         response: Response
119 |             Response object from requests module
120 | 
121 |         Returns
122 |         -------
123 |         md: :obj:`dataretrieval.utils.BaseMetadata`
124 |             A ``dataretrieval`` custom :obj:`dataretrieval.utils.BaseMetadata` object.
125 | 
126 |         """
127 | 
128 |         # These are built from the API response
129 |         self.url = response.url
130 |         self.query_time = response.elapsed
131 |         self.header = response.headers
132 |         self.comment = None
133 | 
134 |         # # not sure what statistic_info is
135 |         # self.statistic_info = None
136 | 
137 |         # # disclaimer seems to be only part of importWaterML1
138 |         # self.disclaimer = None
139 | 
140 |     # These properties are to be set by `nwis` or `wqp`-specific metadata classes.
141 |     @property
142 |     def site_info(self):
143 |         raise NotImplementedError(
144 |             "site_info must be implemented by utils.BaseMetadata children"
145 |         )
146 | 
147 |     @property
148 |     def variable_info(self):
149 |         raise NotImplementedError(
150 |             "variable_info must be implemented by utils.BaseMetadata children"
151 |         )
152 | 
153 |     def __repr__(self) -> str:
154 |         return f"{type(self).__name__}(url={self.url})"
155 | 
156 | 
157 | def query(url, payload, delimiter=",", ssl_check=True):
158 |     """Send a query.
159 | 
160 |     Wrapper for requests.get that handles errors, converts listed
161 |     query parameters to comma separated strings, and returns response.
162 | 
163 |     Parameters
164 |     ----------
165 |     url: string
166 |         URL to query
167 |     payload: dict
168 |         query parameters passed to ``requests.get``
169 |     delimiter: string
170 |         delimiter to use with lists
171 |     ssl_check: bool
172 |         If True, check SSL certificates, if False, do not check SSL,
173 |         default is True
174 | 
175 |     Returns
176 |     -------
177 |     string: query response
178 |         The response from the API query ``requests.get`` function call.
179 |     """
180 | 
181 |     for key, value in payload.items():
182 |         payload[key] = to_str(value, delimiter)
183 |     # for index in range(len(payload)):
184 |     #    key, value = payload[index]
185 |     #    payload[index] = (key, to_str(value))
186 | 
187 |     # define the user agent for the query
188 |     user_agent = {"user-agent": f"python-dataretrieval/{dataretrieval.__version__}"}
189 | 
190 |     response = requests.get(url, params=payload, headers=user_agent, verify=ssl_check)
191 | 
192 |     if response.status_code == 400:
193 |         raise ValueError(
194 |             f"Bad Request, check that your parameters are correct. URL: {response.url}"
195 |         )
196 |     elif response.status_code == 404:
197 |         raise ValueError(
198 |             "Page Not Found Error. May be the result of an empty query. "
199 |             + f"URL: {response.url}"
200 |         )
201 |     elif response.status_code == 414:
202 |         _reason = response.reason
203 |         _example = """
204 |                     # n is the number of chunks to divide the query into \n
205 |                     split_list = np.array_split(site_list, n)
206 |                     data_list = []  # list to store chunk results in \n
207 |                     # loop through chunks and make requests \n
208 |                     for site_list in split_list: \n
209 |                         data = nwis.get_record(sites=site_list, service='dv', \n
210 |                                                start=start, end=end) \n
211 |                         data_list.append(data)  # append results to list"""
212 |         raise ValueError(
213 |             "Request URL too long. Modify your query to use fewer sites. "
214 |             + f"API response reason: {_reason}. Pseudo-code example of how to "
215 |             + f"split your query: \n {_example}"
216 |         )
217 | 
218 |     if response.text.startswith("No sites/data"):
219 |         raise NoSitesError(response.url)
220 | 
221 |     return response
222 | 
223 | 
224 | class NoSitesError(Exception):
225 |     """Custom error class used when selection criteria returns no sites/data."""
226 | 
227 |     def __init__(self, url):
228 |         self.url = url
229 | 
230 |     def __str__(self):
231 |         return (
232 |             "No sites/data found using the selection criteria specified in url: "
233 |             "{url}"
234 |         ).format(url=self.url)
235 | 


--------------------------------------------------------------------------------
/dataretrieval/waterwatch.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, List, Union
 2 | 
 3 | import pandas as pd
 4 | import requests
 5 | 
 6 | ResponseFormat = "json"  # json, xml
 7 | 
 8 | # WaterWatch won't receive any new features but it will continue to operate.
 9 | waterwatch_url = "https://waterwatch.usgs.gov/webservices/"
10 | 
11 | 
12 | def _read_json(data: Dict) -> pd.DataFrame:
13 |     return pd.DataFrame(data).T
14 | 
15 | 
16 | def get_flood_stage(
17 |     sites: List[str] = None, fmt: str = "DF"
18 | ) -> Union[pd.DataFrame, Dict]:
19 |     """
20 |     Retrieves flood stages for a list of station numbers.
21 | 
22 |     Parameters
23 |     ----------
24 |     sites: List of strings
25 |         Site numbers
26 |     fmt: ``pandas.DataFrame`` or dict
27 |         Returned format: Default is "DF" for ``pandas.DataFrame``, else
28 |         a dictionary is returned.
29 | 
30 |     Returns
31 |     -------
32 |     station_stages: ``pandas.Dataframe`` or dict
33 |         contains station numbers and their flood stages.
34 |         If no flood stage for a station, ``None`` is returned.
35 | 
36 |     Examples
37 |     --------
38 |     .. doctest::
39 | 
40 |         >> stations = ["07144100", "07144101"]
41 |         >> res = get_flood_stage(stations, fmt="dict")  # dictionary output
42 |         >> print(res)
43 |         {'07144100': {'action_stage': '20',
44 |                       'flood_stage': '22',
45 |                       'moderate_flood_stage': '25',
46 |                       'major_flood_stage': '26'},
47 |          '07144101': None}
48 |         >> print(get_flood_stage(stations))
49 |         >> print(res)
50 |                 action_stage flood_stage moderate_flood_stage major_flood_stage
51 |         07144100           20          22                   25                26
52 |         07144101         None        None                 None              None
53 |         50057000           16          20                   24                30
54 | 
55 |     """
56 |     res = requests.get(waterwatch_url + "floodstage", params={"format": ResponseFormat})
57 | 
58 |     if res.ok:
59 |         json_res = res.json()
60 |         stages = {
61 |             site["site_no"]: {k: v for k, v in site.items() if k != "site_no"}
62 |             for site in json_res["sites"]
63 |         }
64 |     else:
65 |         raise requests.RequestException(f"[{res.status_code}] - {res.reason}")
66 | 
67 |     if not sites:
68 |         stations_stages = stages
69 |     else:
70 |         stations_stages = {}
71 |         for site in sites:
72 |             try:
73 |                 stations_stages[site] = stages[site]
74 |             except KeyError:
75 |                 stations_stages[site] = None
76 | 
77 |     if fmt == "dict":
78 |         return stations_stages
79 |     else:
80 |         return _read_json(stations_stages)
81 | 


--------------------------------------------------------------------------------
/demos/hydroshare/USGS_dataretrieval_Measurements_Examples.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {
  6 |     "collapsed": true,
  7 |     "pycharm": {
  8 |      "name": "#%% md\n"
  9 |     }
 10 |    },
 11 |    "source": [
 12 |     "# USGS dataretrieval Python Package `get_discharge_measurements()` Examples\n",
 13 |     "\n",
 14 |     "This notebook provides examples of using the Python dataretrieval package to retrieve surface water discharge measurement data for a United States Geological Survey (USGS) monitoring site. The dataretrieval package provides a collection of functions to get data from the USGS National Water Information System (NWIS) and other online sources of hydrology and water quality data, including the United States Environmental Protection Agency (USEPA)."
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "markdown",
 19 |    "source": [
 20 |     "### Install the Package\n",
 21 |     "\n",
 22 |     "Use the following code to install the package if it doesn't exist already within your Jupyter Python environment."
 23 |    ],
 24 |    "metadata": {
 25 |     "collapsed": false
 26 |    }
 27 |   },
 28 |   {
 29 |    "cell_type": "code",
 30 |    "execution_count": null,
 31 |    "outputs": [],
 32 |    "source": [
 33 |     "!pip install dataretrieval"
 34 |    ],
 35 |    "metadata": {
 36 |     "collapsed": false,
 37 |     "pycharm": {
 38 |      "name": "#%%\n"
 39 |     }
 40 |    }
 41 |   },
 42 |   {
 43 |    "cell_type": "markdown",
 44 |    "source": [
 45 |     "Load the package so you can use it along with other packages used in this notebook."
 46 |    ],
 47 |    "metadata": {
 48 |     "collapsed": false
 49 |    }
 50 |   },
 51 |   {
 52 |    "cell_type": "code",
 53 |    "execution_count": null,
 54 |    "outputs": [],
 55 |    "source": [
 56 |     "from dataretrieval import nwis\n",
 57 |     "from IPython.display import display"
 58 |    ],
 59 |    "metadata": {
 60 |     "collapsed": false,
 61 |     "pycharm": {
 62 |      "name": "#%%\n"
 63 |     }
 64 |    }
 65 |   },
 66 |   {
 67 |    "cell_type": "markdown",
 68 |    "source": [
 69 |     "### Basic Usage\n",
 70 |     "\n",
 71 |     "The dataretrieval package has several functions that allow you to retrieve data from different web services. This examples uses the `get_discharge_measurements()` function to retrieve surface water discharge measurements for a USGS monitoring site from NWIS. The function has the following arguments:\n",
 72 |     "\n",
 73 |     "Arguments (Additional arguments, if supplied, will be used as query parameters)\n",
 74 |     "\n",
 75 |     "* **sites** (list of strings): A list of USGS site codes to retrieve data for. If the qwdata parameter site_no is supplied, it will overwrite the sites parameter.\n",
 76 |     "* **start** (string): The beginning date of a period for which to retrieve measurements. If the qwdata parameter begin_date is supplied, it will overwrite the start parameter.\n",
 77 |     "* **end** (string): The ending date of a period for which to retrieve measurements. If the qwdata parameter end_date is supplied, it will overwrite the end parameter."
 78 |    ],
 79 |    "metadata": {
 80 |     "collapsed": false
 81 |    }
 82 |   },
 83 |   {
 84 |    "cell_type": "markdown",
 85 |    "source": [
 86 |     "Example 1: Get all of the surface water measurements for a single site"
 87 |    ],
 88 |    "metadata": {
 89 |     "collapsed": false
 90 |    }
 91 |   },
 92 |   {
 93 |    "cell_type": "code",
 94 |    "execution_count": null,
 95 |    "outputs": [],
 96 |    "source": [
 97 |     "measurements1 = nwis.get_discharge_measurements(sites=\"10109000\")\n",
 98 |     "print(\"Retrieved \" + str(len(measurements1[0])) + \" data values.\")"
 99 |    ],
100 |    "metadata": {
101 |     "collapsed": false,
102 |     "pycharm": {
103 |      "name": "#%%\n"
104 |     }
105 |    }
106 |   },
107 |   {
108 |    "cell_type": "markdown",
109 |    "source": [
110 |     "### Interpreting the Result\n",
111 |     "\n",
112 |     "The result of calling the `get_discharge_measurements()` function is an object that contains a Pandas data frame object and an associated metadata object. The Pandas data frame contains the discharge measurements for the time period requested.\n",
113 |     "\n",
114 |     "Once you've got the data frame, there's several useful things you can do to explore the data."
115 |    ],
116 |    "metadata": {
117 |     "collapsed": false
118 |    }
119 |   },
120 |   {
121 |    "cell_type": "markdown",
122 |    "source": [
123 |     "Display the data frame as a table"
124 |    ],
125 |    "metadata": {
126 |     "collapsed": false,
127 |     "pycharm": {
128 |      "name": "#%% md\n"
129 |     }
130 |    }
131 |   },
132 |   {
133 |    "cell_type": "code",
134 |    "execution_count": null,
135 |    "outputs": [],
136 |    "source": [
137 |     "display(measurements1[0])"
138 |    ],
139 |    "metadata": {
140 |     "collapsed": false,
141 |     "pycharm": {
142 |      "name": "#%%\n"
143 |     }
144 |    }
145 |   },
146 |   {
147 |    "cell_type": "markdown",
148 |    "source": [
149 |     "Show the data types of the columns in the resulting data frame."
150 |    ],
151 |    "metadata": {
152 |     "collapsed": false
153 |    }
154 |   },
155 |   {
156 |    "cell_type": "code",
157 |    "execution_count": null,
158 |    "outputs": [],
159 |    "source": [
160 |     "print(measurements1[0].dtypes)"
161 |    ],
162 |    "metadata": {
163 |     "collapsed": false,
164 |     "pycharm": {
165 |      "name": "#%%\n"
166 |     }
167 |    }
168 |   },
169 |   {
170 |    "cell_type": "markdown",
171 |    "source": [
172 |     "The other part of the result returned from the `get_discharge_measurements()` function is a metadata object that contains information about the query that was executed to return the data. For example, you can access the URL that was assembled to retrieve the requested data from the USGS web service. The USGS web service responses contain a descriptive header that defines and can be helpful in interpreting the contents of the response."
173 |    ],
174 |    "metadata": {
175 |     "collapsed": false
176 |    }
177 |   },
178 |   {
179 |    "cell_type": "code",
180 |    "execution_count": null,
181 |    "outputs": [],
182 |    "source": [
183 |     "print(\"The query URL used to retrieve the data from NWIS was: \" + measurements1[1].url)"
184 |    ],
185 |    "metadata": {
186 |     "collapsed": false,
187 |     "pycharm": {
188 |      "name": "#%%\n"
189 |     }
190 |    }
191 |   },
192 |   {
193 |    "cell_type": "markdown",
194 |    "source": [
195 |     "### Additional Examples\n",
196 |     "\n",
197 |     "Example 2: Get all of the surface water measurements between a start and end date"
198 |    ],
199 |    "metadata": {
200 |     "collapsed": false
201 |    }
202 |   },
203 |   {
204 |    "cell_type": "code",
205 |    "execution_count": null,
206 |    "outputs": [],
207 |    "source": [
208 |     "measurements2 = nwis.get_discharge_measurements(sites=\"10109000\", start=\"2019-01-01\", end=\"2019-12-31\")\n",
209 |     "print(\"Retrieved \" + str(len(measurements2[0])) + \" data values.\")\n",
210 |     "display(measurements2[0])"
211 |    ],
212 |    "metadata": {
213 |     "collapsed": false,
214 |     "pycharm": {
215 |      "name": "#%%\n"
216 |     }
217 |    }
218 |   },
219 |   {
220 |    "cell_type": "markdown",
221 |    "source": [
222 |     "Example 3: Get all of the surface water measurements for multiple sites"
223 |    ],
224 |    "metadata": {
225 |     "collapsed": false,
226 |     "pycharm": {
227 |      "name": "#%% md\n"
228 |     }
229 |    }
230 |   },
231 |   {
232 |    "cell_type": "code",
233 |    "execution_count": null,
234 |    "outputs": [],
235 |    "source": [
236 |     "measurements3 = nwis.get_discharge_measurements(sites=[\"01594440\", \"040851325\"])\n",
237 |     "print(\"Retrieved \" + str(len(measurements3[0])) + \" data values.\")\n",
238 |     "display(measurements3[0])"
239 |    ],
240 |    "metadata": {
241 |     "collapsed": false,
242 |     "pycharm": {
243 |      "name": "#%%\n"
244 |     }
245 |    }
246 |   }
247 |  ],
248 |  "metadata": {
249 |   "kernelspec": {
250 |    "display_name": "Python 3",
251 |    "language": "python",
252 |    "name": "python3"
253 |   },
254 |   "language_info": {
255 |    "codemirror_mode": {
256 |     "name": "ipython",
257 |     "version": 2
258 |    },
259 |    "file_extension": ".py",
260 |    "mimetype": "text/x-python",
261 |    "name": "python",
262 |    "nbconvert_exporter": "python",
263 |    "pygments_lexer": "ipython2",
264 |    "version": "2.7.6"
265 |   }
266 |  },
267 |  "nbformat": 4,
268 |  "nbformat_minor": 0
269 | }


--------------------------------------------------------------------------------
/demos/hydroshare/USGS_dataretrieval_ParameterCodes_Examples.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {
  6 |     "collapsed": true,
  7 |     "pycharm": {
  8 |      "name": "#%% md\n"
  9 |     }
 10 |    },
 11 |    "source": [
 12 |     "# USGS dataretrieval Python Package `get_pmcodes()` Examples\n",
 13 |     "\n",
 14 |     "This notebook provides examples of using the Python dataretrieval package to retrieve information about USGS parameter codes from NWIS. The dataretrieval package provides a collection of functions to get data from the USGS National Water Information System (NWIS) and other online sources of hydrology and water quality data, including the United States Environmental Protection Agency (USEPA).\n",
 15 |     "\n",
 16 |     "For more information about USGS NWIS parameter codes, see:\n",
 17 |     "https://help.waterdata.usgs.gov/codes-and-parameters/parameters"
 18 |    ]
 19 |   },
 20 |   {
 21 |    "cell_type": "markdown",
 22 |    "source": [
 23 |     "### Install the Package\n",
 24 |     "\n",
 25 |     "Use the following code to install the package if it doesn't exist already within your Jupyter Python environment."
 26 |    ],
 27 |    "metadata": {
 28 |     "collapsed": false
 29 |    }
 30 |   },
 31 |   {
 32 |    "cell_type": "code",
 33 |    "execution_count": null,
 34 |    "outputs": [],
 35 |    "source": [
 36 |     "!pip install dataretrieval"
 37 |    ],
 38 |    "metadata": {
 39 |     "collapsed": false,
 40 |     "pycharm": {
 41 |      "name": "#%%\n"
 42 |     }
 43 |    }
 44 |   },
 45 |   {
 46 |    "cell_type": "markdown",
 47 |    "source": [
 48 |     "Load the package so you can use it along with other packages used in this notebook."
 49 |    ],
 50 |    "metadata": {
 51 |     "collapsed": false
 52 |    }
 53 |   },
 54 |   {
 55 |    "cell_type": "code",
 56 |    "execution_count": null,
 57 |    "outputs": [],
 58 |    "source": [
 59 |     "from dataretrieval import nwis\n",
 60 |     "from IPython.display import display"
 61 |    ],
 62 |    "metadata": {
 63 |     "collapsed": false,
 64 |     "pycharm": {
 65 |      "name": "#%%\n"
 66 |     }
 67 |    }
 68 |   },
 69 |   {
 70 |    "cell_type": "markdown",
 71 |    "source": [
 72 |     "### Basic Usage\n",
 73 |     "\n",
 74 |     "The dataretrieval package has several functions that allow you to retrieve data from different web services. This examples uses the `get_pmcodes()` function to retrieve information about parameter codes (i.e., observed variables) from NWIS. The following arguments are supported:\n",
 75 |     "\n",
 76 |     "Arguments (Additional arguments, if supplied, will be used as query parameters)\n",
 77 |     "\n",
 78 |     "* **parameterCd** (string): A string containing the parameter code for which information is to be retrieved."
 79 |    ],
 80 |    "metadata": {
 81 |     "collapsed": false
 82 |    }
 83 |   },
 84 |   {
 85 |    "cell_type": "markdown",
 86 |    "source": [
 87 |     "Example 1: Retrieve information for a set of USGS NWIS parameter codes."
 88 |    ],
 89 |    "metadata": {
 90 |     "collapsed": false
 91 |    }
 92 |   },
 93 |   {
 94 |    "cell_type": "code",
 95 |    "execution_count": null,
 96 |    "outputs": [],
 97 |    "source": [
 98 |     "parameter_codes = nwis.get_pmcodes(['00400'])\n",
 99 |     "print(\"Retrieved information about \" + str(len(parameter_codes[0])) + \" parameter code.\")"
100 |    ],
101 |    "metadata": {
102 |     "collapsed": false,
103 |     "pycharm": {
104 |      "name": "#%%\n"
105 |     }
106 |    }
107 |   },
108 |   {
109 |    "cell_type": "markdown",
110 |    "source": [
111 |     "### Interpreting the Result\n",
112 |     "\n",
113 |     "The result of calling the `get_pmcodes()` function is an object that contains a Pandas data frame object and an associated metadata object. The Pandas data frame contains the parameter code information requested.\n",
114 |     "\n",
115 |     "Once you've got the data frame, you can explore the data."
116 |    ],
117 |    "metadata": {
118 |     "collapsed": false
119 |    }
120 |   },
121 |   {
122 |    "cell_type": "code",
123 |    "execution_count": null,
124 |    "outputs": [],
125 |    "source": [
126 |     "# Display the data frame as a table\n",
127 |     "display(parameter_codes[0])\n"
128 |    ],
129 |    "metadata": {
130 |     "collapsed": false,
131 |     "pycharm": {
132 |      "name": "#%%\n"
133 |     }
134 |    }
135 |   }
136 |  ],
137 |  "metadata": {
138 |   "kernelspec": {
139 |    "display_name": "Python 3",
140 |    "language": "python",
141 |    "name": "python3"
142 |   },
143 |   "language_info": {
144 |    "codemirror_mode": {
145 |     "name": "ipython",
146 |     "version": 2
147 |    },
148 |    "file_extension": ".py",
149 |    "mimetype": "text/x-python",
150 |    "name": "python",
151 |    "nbconvert_exporter": "python",
152 |    "pygments_lexer": "ipython2",
153 |    "version": "2.7.6"
154 |   }
155 |  },
156 |  "nbformat": 4,
157 |  "nbformat_minor": 0
158 | }


--------------------------------------------------------------------------------
/demos/hydroshare/USGS_dataretrieval_Peaks_Examples.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {
  6 |     "pycharm": {
  7 |      "name": "#%% md\n"
  8 |     }
  9 |    },
 10 |    "source": [
 11 |     "# USGS dataretrieval Python Package `get_discharge_peaks()` Examples\n",
 12 |     "\n",
 13 |     "This notebook provides examples of using the Python dataretrieval package to retrieve streamflow peak data for United States Geological Survey (USGS) monitoring sites. The dataretrieval package provides a collection of functions to get data from the USGS National Water Information System (NWIS) and other online sources of hydrology and water quality data, including the United States Environmental Protection Agency (USEPA)."
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "markdown",
 18 |    "metadata": {},
 19 |    "source": [
 20 |     "### Install the Package\n",
 21 |     "\n",
 22 |     "Use the following code to install the package if it doesn't exist already within your Jupyter Python environment."
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "code",
 27 |    "execution_count": null,
 28 |    "metadata": {
 29 |     "collapsed": false,
 30 |     "jupyter": {
 31 |      "outputs_hidden": false
 32 |     },
 33 |     "pycharm": {
 34 |      "name": "#%%\n"
 35 |     }
 36 |    },
 37 |    "outputs": [],
 38 |    "source": [
 39 |     "!pip install dataretrieval"
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "markdown",
 44 |    "metadata": {},
 45 |    "source": [
 46 |     "Load the package so you can use it along with other packages used in this notebook."
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "code",
 51 |    "execution_count": null,
 52 |    "metadata": {
 53 |     "collapsed": false,
 54 |     "jupyter": {
 55 |      "outputs_hidden": false
 56 |     },
 57 |     "pycharm": {
 58 |      "name": "#%%\n"
 59 |     }
 60 |    },
 61 |    "outputs": [],
 62 |    "source": [
 63 |     "from dataretrieval import nwis\n",
 64 |     "from IPython.display import display"
 65 |    ]
 66 |   },
 67 |   {
 68 |    "cell_type": "markdown",
 69 |    "metadata": {},
 70 |    "source": [
 71 |     "### Basic Usage\n",
 72 |     "\n",
 73 |     "The dataretrieval package has several functions that allow you to retrieve data from different web services. This examples uses the `get_discharge_peaks()` function to retrieve peak streamflow data for a USGS monitoring site from NWIS. The function has the following arguments:\n",
 74 |     "\n",
 75 |     "Arguments (Additional parameters, if supplied, will be used as query parameters)\n",
 76 |     "\n",
 77 |     "* **sites** (list of strings): A list of USGS site identifiers for which data will be retrieved. If the waterdata parameter site_no is supplied, it will overwrite the sites parameter.\n",
 78 |     "* **start** (string): A beginning date for the period for which data will be retrieved. If the waterdata parameter begin_date is supplied, it will overwrite the start parameter.\n",
 79 |     "* **end** (string): An ending date for the period for which data will be retrieved. If the waterdata parameter end_date is supplied, it will overwrite the end parameter."
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "markdown",
 84 |    "metadata": {},
 85 |    "source": [
 86 |     "Example 1: Retrieve streamflow peak data for two USGS monitoring sites"
 87 |    ]
 88 |   },
 89 |   {
 90 |    "cell_type": "code",
 91 |    "execution_count": null,
 92 |    "metadata": {
 93 |     "collapsed": false,
 94 |     "jupyter": {
 95 |      "outputs_hidden": false
 96 |     },
 97 |     "pycharm": {
 98 |      "name": "#%%\n"
 99 |     }
100 |    },
101 |    "outputs": [],
102 |    "source": [
103 |     "site_ids = ['01594440', '040851325']\n",
104 |     "peak_data = nwis.get_discharge_peaks(site_ids)\n",
105 |     "print(\"Retrieved \" + str(len(peak_data[0])) + \" data values.\")"
106 |    ]
107 |   },
108 |   {
109 |    "cell_type": "markdown",
110 |    "metadata": {},
111 |    "source": [
112 |     "### Interpreting the Result\n",
113 |     "\n",
114 |     "The result of calling the `get_discharge_peaks()` function is an object that contains a Pandas data frame object and an associated metadata object. The Pandas data frame contains the discharge peak values for the requested site(s).\n",
115 |     "\n",
116 |     "Once you've got the data frame, there's several useful things you can do to explore the data.\n",
117 |     "\n",
118 |     "Display the data frame as a table."
119 |    ]
120 |   },
121 |   {
122 |    "cell_type": "code",
123 |    "execution_count": null,
124 |    "metadata": {
125 |     "collapsed": false,
126 |     "jupyter": {
127 |      "outputs_hidden": false
128 |     },
129 |     "pycharm": {
130 |      "name": "#%%\n"
131 |     }
132 |    },
133 |    "outputs": [],
134 |    "source": [
135 |     "display(peak_data[0])"
136 |    ]
137 |   },
138 |   {
139 |    "cell_type": "markdown",
140 |    "metadata": {},
141 |    "source": [
142 |     "Show the data types of the columns in the resulting data frame."
143 |    ]
144 |   },
145 |   {
146 |    "cell_type": "code",
147 |    "execution_count": null,
148 |    "metadata": {
149 |     "collapsed": false,
150 |     "jupyter": {
151 |      "outputs_hidden": false
152 |     },
153 |     "pycharm": {
154 |      "name": "#%%\n"
155 |     }
156 |    },
157 |    "outputs": [],
158 |    "source": [
159 |     "print(peak_data[0].dtypes)"
160 |    ]
161 |   },
162 |   {
163 |    "cell_type": "markdown",
164 |    "metadata": {},
165 |    "source": [
166 |     "The other part of the result returned from the `get_dv()` function is a metadata object that contains information about the query that was executed to return the data. For example, you can access the URL that was assembled to retrieve the requested data from the USGS web service. The USGS web service responses contain a descriptive header that defines and can be helpful in interpreting the contents of the response."
167 |    ]
168 |   },
169 |   {
170 |    "cell_type": "code",
171 |    "execution_count": null,
172 |    "metadata": {
173 |     "collapsed": false,
174 |     "jupyter": {
175 |      "outputs_hidden": false
176 |     },
177 |     "pycharm": {
178 |      "name": "#%%\n"
179 |     }
180 |    },
181 |    "outputs": [],
182 |    "source": [
183 |     "print(\"The query URL used to retrieve the data from NWIS was: \" + peak_data[1].url)"
184 |    ]
185 |   },
186 |   {
187 |    "metadata": {},
188 |    "cell_type": "markdown",
189 |    "source": "The following example is the same as the previous example but with multi index turned off (multi_index=False)"
190 |   },
191 |   {
192 |    "metadata": {},
193 |    "cell_type": "code",
194 |    "outputs": [],
195 |    "execution_count": null,
196 |    "source": [
197 |     "site_ids = ['01594440', '040851325']\n",
198 |     "peak_data = nwis.get_discharge_peaks(site_ids, multi_index=False)\n",
199 |     "print(\"Retrieved \" + str(len(peak_data[0])) + \" data values.\")"
200 |    ]
201 |   },
202 |   {
203 |    "cell_type": "markdown",
204 |    "metadata": {},
205 |    "source": [
206 |     "### Additional Examples\n",
207 |     "\n",
208 |     "Example 2: Retrieve discharge peaks for a single site."
209 |    ]
210 |   },
211 |   {
212 |    "cell_type": "code",
213 |    "execution_count": null,
214 |    "metadata": {
215 |     "collapsed": false,
216 |     "jupyter": {
217 |      "outputs_hidden": false
218 |     },
219 |     "pycharm": {
220 |      "name": "#%%\n"
221 |     }
222 |    },
223 |    "outputs": [],
224 |    "source": [
225 |     "stations = \"06011000\"\n",
226 |     "data3 = nwis.get_discharge_peaks(stations)\n",
227 |     "display(data3[0])"
228 |    ]
229 |   },
230 |   {
231 |    "cell_type": "markdown",
232 |    "metadata": {
233 |     "pycharm": {
234 |      "name": "#%% md\n"
235 |     }
236 |    },
237 |    "source": [
238 |     "Example 3: Retrieve peak discharge data for a monitoring site between two dates"
239 |    ]
240 |   },
241 |   {
242 |    "cell_type": "code",
243 |    "execution_count": null,
244 |    "metadata": {
245 |     "collapsed": false,
246 |     "jupyter": {
247 |      "outputs_hidden": false
248 |     },
249 |     "pycharm": {
250 |      "name": "#%%\n"
251 |     }
252 |    },
253 |    "outputs": [],
254 |    "source": [
255 |     "data4 = nwis.get_discharge_peaks(stations, start='1953-01-01', end='1960-01-01')\n",
256 |     "display(data4[0])"
257 |    ]
258 |   }
259 |  ],
260 |  "metadata": {
261 |   "kernelspec": {
262 |    "display_name": "Python 3 (ipykernel)",
263 |    "language": "python",
264 |    "name": "python3"
265 |   },
266 |   "language_info": {
267 |    "codemirror_mode": {
268 |     "name": "ipython",
269 |     "version": 3
270 |    },
271 |    "file_extension": ".py",
272 |    "mimetype": "text/x-python",
273 |    "name": "python",
274 |    "nbconvert_exporter": "python",
275 |    "pygments_lexer": "ipython3",
276 |    "version": "3.9.7"
277 |   }
278 |  },
279 |  "nbformat": 4,
280 |  "nbformat_minor": 4
281 | }
282 | 


--------------------------------------------------------------------------------
/demos/hydroshare/USGS_dataretrieval_Ratings_Examples.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "source": [
  6 |     "# USGS dataretrieval Python Package `get_ratings()` Examples\n",
  7 |     "\n",
  8 |     "This notebook provides examples of using the Python dataretrieval package to retrieve rating curve data for a United States Geological Survey (USGS) streamflow gage. The dataretrieval package provides a collection of functions to get data from the USGS National Water Information System (NWIS) and other online sources of hydrology and water quality data, including the United States Environmental Protection Agency (USEPA)."
  9 |    ],
 10 |    "metadata": {
 11 |     "collapsed": false
 12 |    }
 13 |   },
 14 |   {
 15 |    "cell_type": "markdown",
 16 |    "source": [
 17 |     "### Install the Package\n",
 18 |     "\n",
 19 |     "Use the following code to install the package if it doesn't exist already within your Jupyter Python environment."
 20 |    ],
 21 |    "metadata": {
 22 |     "collapsed": false
 23 |    }
 24 |   },
 25 |   {
 26 |    "cell_type": "code",
 27 |    "execution_count": null,
 28 |    "outputs": [],
 29 |    "source": [
 30 |     "!pip install dataretrieval"
 31 |    ],
 32 |    "metadata": {
 33 |     "collapsed": false,
 34 |     "pycharm": {
 35 |      "name": "#%%\n"
 36 |     }
 37 |    }
 38 |   },
 39 |   {
 40 |    "cell_type": "markdown",
 41 |    "source": [
 42 |     "Load the package so you can use it along with other packages used in this notebook."
 43 |    ],
 44 |    "metadata": {
 45 |     "collapsed": false
 46 |    }
 47 |   },
 48 |   {
 49 |    "cell_type": "code",
 50 |    "execution_count": null,
 51 |    "outputs": [],
 52 |    "source": [
 53 |     "from dataretrieval import nwis\n",
 54 |     "from IPython.display import display"
 55 |    ],
 56 |    "metadata": {
 57 |     "collapsed": false,
 58 |     "pycharm": {
 59 |      "name": "#%%\n"
 60 |     }
 61 |    }
 62 |   },
 63 |   {
 64 |    "cell_type": "markdown",
 65 |    "source": [
 66 |     "### Basic Usage\n",
 67 |     "\n",
 68 |     "The dataretrieval package has several functions that allow you to retrieve data from different web services. This example uses the `get_ratings()` function to retrieve rating curve data for a monitoring site from USGS NWIS. The following arguments are available:\n",
 69 |     "\n",
 70 |     "Arguments (Additional arguments, if supplied, will be used as query parameters)\n",
 71 |     "\n",
 72 |     "* **site** (string): A USGS site number.  This is usually an 8 digit number as a string. If the nwis parameter site_no is supplied, it will overwrite the site parameter.\n",
 73 |     "* **base** (string): Can be \"base\", \"corr\", or \"exsa\"\n",
 74 |     "* **county** (string): County IDs from county lookup or \"ALL\"\n",
 75 |     "* **categories** (Listlike): List or comma delimited string of Two-letter category abbreviations\n",
 76 |     "\n",
 77 |     "NOTE: Not all active USGS streamflow gages have traditional rating curves that relate stage to flow."
 78 |    ],
 79 |    "metadata": {
 80 |     "collapsed": false
 81 |    }
 82 |   },
 83 |   {
 84 |    "cell_type": "markdown",
 85 |    "source": [
 86 |     "Example 1: Get rating data for an NWIS Site"
 87 |    ],
 88 |    "metadata": {
 89 |     "collapsed": false,
 90 |     "pycharm": {
 91 |      "name": "#%% md\n"
 92 |     }
 93 |    }
 94 |   },
 95 |   {
 96 |    "cell_type": "code",
 97 |    "execution_count": null,
 98 |    "metadata": {
 99 |     "collapsed": true
100 |    },
101 |    "outputs": [],
102 |    "source": [
103 |     "# Specify the USGS site number/code\n",
104 |     "site_id = \"10109000\"\n",
105 |     "\n",
106 |     "# Get the rating curve data\n",
107 |     "ratingData = nwis.get_ratings(site=site_id, file_type=\"exsa\")\n",
108 |     "print(\"Retrieved \" + str(len(ratingData[0])) + \" data values.\")"
109 |    ]
110 |   },
111 |   {
112 |    "cell_type": "markdown",
113 |    "source": [
114 |     "### Interpreting the Result\n",
115 |     "\n",
116 |     "The result of calling the `get_ratings()` function is an object that contains a Pandas data frame object and an associated metadata object. The Pandas data frame contains the rating curve data for the requested site.\n",
117 |     "\n",
118 |     "Once you've got the data frame, there's several useful things you can do to explore the data. You can execute the following code to display the data frame as a table.\n",
119 |     "\n",
120 |     "If the \"type\" parameter in the request has a value of \"base,\" then the columns in the data frame are as follows:\n",
121 |     "* INDEP - typically the gage height in feet\n",
122 |     "* DEP - typically the streamflow in cubic feet per second\n",
123 |     "* STOR - where an \"*\" indicates that the pair are a fixed point of the rating curve\n",
124 |     "\n",
125 |     "If the \"type\" parameter is specified as \"exsa,\" then an additional column called SHIFT is included that indicates the current shift in the rating for that value of INDEP.\n",
126 |     "\n",
127 |     "If the \"type\" parameter is specified as \"corr,\" then the columns are as follows:\n",
128 |     "* INDEP - typically gage height in feet\n",
129 |     "* CORR - the correction for that value\n",
130 |     "* CORRINDEP - the corrected value for CORR"
131 |    ],
132 |    "metadata": {
133 |     "collapsed": false,
134 |     "pycharm": {
135 |      "name": "#%% md\n"
136 |     }
137 |    }
138 |   },
139 |   {
140 |    "cell_type": "code",
141 |    "execution_count": null,
142 |    "outputs": [],
143 |    "source": [
144 |     "display(ratingData[0])"
145 |    ],
146 |    "metadata": {
147 |     "collapsed": false,
148 |     "pycharm": {
149 |      "name": "#%%\n"
150 |     }
151 |    }
152 |   },
153 |   {
154 |    "cell_type": "markdown",
155 |    "source": [
156 |     "Show the data types of the columns in the resulting data frame"
157 |    ],
158 |    "metadata": {
159 |     "collapsed": false,
160 |     "pycharm": {
161 |      "name": "#%% md\n"
162 |     }
163 |    }
164 |   },
165 |   {
166 |    "cell_type": "code",
167 |    "execution_count": null,
168 |    "outputs": [],
169 |    "source": [
170 |     "print(ratingData[0].dtypes)"
171 |    ],
172 |    "metadata": {
173 |     "collapsed": false,
174 |     "pycharm": {
175 |      "name": "#%%\n"
176 |     }
177 |    }
178 |   },
179 |   {
180 |    "cell_type": "markdown",
181 |    "source": [
182 |     "The other part of the result returned from the `get_ratings()` function is a metadata object that contains information about the query that was executed to return the data. For example, you can access the URL that was assembled to retrieve the requested data from the USGS web service. The USGS web service responses contain a descriptive header that defines and can be helpful in interpreting the contents of the response."
183 |    ],
184 |    "metadata": {
185 |     "collapsed": false,
186 |     "pycharm": {
187 |      "name": "#%% md\n"
188 |     }
189 |    }
190 |   },
191 |   {
192 |    "cell_type": "code",
193 |    "execution_count": null,
194 |    "outputs": [],
195 |    "source": [
196 |     "print(\"The query URL used to retrieve the data from NWIS was: \" + ratingData[1].url)"
197 |    ],
198 |    "metadata": {
199 |     "collapsed": false,
200 |     "pycharm": {
201 |      "name": "#%%\n"
202 |     }
203 |    }
204 |   },
205 |   {
206 |    "cell_type": "markdown",
207 |    "source": [
208 |     "Example 2: Get rating data for a different NWIS site by changing the site_id"
209 |    ],
210 |    "metadata": {
211 |     "collapsed": false,
212 |     "pycharm": {
213 |      "name": "#%% md\n"
214 |     }
215 |    }
216 |   },
217 |   {
218 |    "cell_type": "code",
219 |    "execution_count": null,
220 |    "outputs": [],
221 |    "source": [
222 |     "site_id = '01594440'\n",
223 |     "data = nwis.get_ratings(site=site_id, file_type=\"base\")\n",
224 |     "print(\"Retrieved \" + str(len(data[0])) + \" data values.\")"
225 |    ],
226 |    "metadata": {
227 |     "collapsed": false,
228 |     "pycharm": {
229 |      "name": "#%%\n"
230 |     }
231 |    }
232 |   }
233 |  ],
234 |  "metadata": {
235 |   "kernelspec": {
236 |    "display_name": "Python 3",
237 |    "language": "python",
238 |    "name": "python3"
239 |   },
240 |   "language_info": {
241 |    "codemirror_mode": {
242 |     "name": "ipython",
243 |     "version": 2
244 |    },
245 |    "file_extension": ".py",
246 |    "mimetype": "text/x-python",
247 |    "name": "python",
248 |    "nbconvert_exporter": "python",
249 |    "pygments_lexer": "ipython2",
250 |    "version": "2.7.6"
251 |   }
252 |  },
253 |  "nbformat": 4,
254 |  "nbformat_minor": 0
255 | }


--------------------------------------------------------------------------------
/demos/hydroshare/USGS_dataretrieval_WaterUse_Examples.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {
  6 |     "pycharm": {
  7 |      "name": "#%% md\n"
  8 |     }
  9 |    },
 10 |    "source": [
 11 |     "# USGS dataretrieval Python Package `get_water_use()` Examples\n",
 12 |     "\n",
 13 |     "This notebook provides examples of using the Python dataretrieval package to retrieve water use data. The dataretrieval package provides a collection of functions to get data from the USGS National Water Information System (NWIS) and other online sources of hydrology and water quality data, including the United States Environmental Protection Agency (USEPA)."
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "markdown",
 18 |    "metadata": {},
 19 |    "source": [
 20 |     "### Install the Package\n",
 21 |     "\n",
 22 |     "Use the following code to install the package if it doesn't exist already within your Jupyter Python environment."
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "code",
 27 |    "execution_count": null,
 28 |    "metadata": {
 29 |     "collapsed": false,
 30 |     "jupyter": {
 31 |      "outputs_hidden": false
 32 |     },
 33 |     "pycharm": {
 34 |      "name": "#%%\n"
 35 |     }
 36 |    },
 37 |    "outputs": [],
 38 |    "source": [
 39 |     "!pip install dataretrieval"
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "markdown",
 44 |    "metadata": {},
 45 |    "source": [
 46 |     "Load the package so you can use it along with other packages used in this notebook."
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "code",
 51 |    "execution_count": null,
 52 |    "metadata": {
 53 |     "collapsed": false,
 54 |     "jupyter": {
 55 |      "outputs_hidden": false
 56 |     },
 57 |     "pycharm": {
 58 |      "name": "#%%\n"
 59 |     }
 60 |    },
 61 |    "outputs": [],
 62 |    "source": [
 63 |     "from dataretrieval import nwis\n",
 64 |     "from IPython.display import display"
 65 |    ]
 66 |   },
 67 |   {
 68 |    "cell_type": "markdown",
 69 |    "metadata": {},
 70 |    "source": [
 71 |     "### Basic Usage\n",
 72 |     "\n",
 73 |     "The dataretrieval package has several functions that allow you to retrieve data from different web services. This examples uses the `get_water_use()` function to retrieve water use data. The following arguments are supported:\n",
 74 |     "\n",
 75 |     "Arguments (Additional arguments, if supplied, will be used as query parameters)\n",
 76 |     "\n",
 77 |     "* **years** (Listlike): List or comma delimited string of years.  Must be years ending in 0 or 5 because water use data is only reported during these years, or \"ALL\", which retrieves all available years\n",
 78 |     "* **state** (string): Full name, abbreviation or id for a state for which to retrieve data\n",
 79 |     "* **county** (string or list of strings): County IDs from county lookup or \"ALL\"\n",
 80 |     "* **categories** (Listlike): List or comma delimited string of two-letter category abbreviations"
 81 |    ]
 82 |   },
 83 |   {
 84 |    "cell_type": "markdown",
 85 |    "metadata": {
 86 |     "pycharm": {
 87 |      "name": "#%% md\n"
 88 |     }
 89 |    },
 90 |    "source": [
 91 |     "#### Example 1: Retrieve all water use data for a state"
 92 |    ]
 93 |   },
 94 |   {
 95 |    "cell_type": "code",
 96 |    "execution_count": null,
 97 |    "metadata": {
 98 |     "collapsed": false,
 99 |     "jupyter": {
100 |      "outputs_hidden": false
101 |     },
102 |     "pycharm": {
103 |      "name": "#%%\n"
104 |     }
105 |    },
106 |    "outputs": [],
107 |    "source": [
108 |     "pennsylvania = nwis.get_water_use(state='PA')\n",
109 |     "print('Retrieved ' + str(len(pennsylvania[0])) + ' water use records.')"
110 |    ]
111 |   },
112 |   {
113 |    "cell_type": "markdown",
114 |    "metadata": {},
115 |    "source": [
116 |     "### Interpreting the Result\n",
117 |     "\n",
118 |     "The result of calling the `get_water_use()` function is an object that contains a Pandas data frame object and an associated metadata object. The Pandas data frame contains the water use data.\n",
119 |     "\n",
120 |     "Once you've got the data frame, there's several useful things you can do to explore the data."
121 |    ]
122 |   },
123 |   {
124 |    "cell_type": "markdown",
125 |    "metadata": {
126 |     "pycharm": {
127 |      "name": "#%% md\n"
128 |     }
129 |    },
130 |    "source": [
131 |     "Display the data frame as a table. The example request was for a whole state. The data returned are organized by county and year, with summary data reported every 5 years."
132 |    ]
133 |   },
134 |   {
135 |    "cell_type": "code",
136 |    "execution_count": null,
137 |    "metadata": {
138 |     "collapsed": false,
139 |     "jupyter": {
140 |      "outputs_hidden": false
141 |     },
142 |     "pycharm": {
143 |      "name": "#%%\n"
144 |     }
145 |    },
146 |    "outputs": [],
147 |    "source": [
148 |     "display(pennsylvania[0])"
149 |    ]
150 |   },
151 |   {
152 |    "cell_type": "markdown",
153 |    "metadata": {},
154 |    "source": [
155 |     "Show the data types of the columns in the resulting data frame."
156 |    ]
157 |   },
158 |   {
159 |    "cell_type": "code",
160 |    "execution_count": null,
161 |    "metadata": {
162 |     "collapsed": false,
163 |     "jupyter": {
164 |      "outputs_hidden": false
165 |     },
166 |     "pycharm": {
167 |      "name": "#%%\n"
168 |     }
169 |    },
170 |    "outputs": [],
171 |    "source": [
172 |     "print(pennsylvania[0].dtypes)"
173 |    ]
174 |   },
175 |   {
176 |    "cell_type": "markdown",
177 |    "metadata": {
178 |     "pycharm": {
179 |      "name": "#%% md\n"
180 |     }
181 |    },
182 |    "source": [
183 |     "#### Example 2: Retrieve data for an entire state for certain years\n",
184 |     "\n",
185 |     "Returns data parsed by county - one row for each county for each year of interest rather than the entire state. Data are included for 5 year periods."
186 |    ]
187 |   },
188 |   {
189 |    "cell_type": "code",
190 |    "execution_count": null,
191 |    "metadata": {
192 |     "collapsed": false,
193 |     "jupyter": {
194 |      "outputs_hidden": false
195 |     },
196 |     "pycharm": {
197 |      "name": "#%%\n"
198 |     }
199 |    },
200 |    "outputs": [],
201 |    "source": [
202 |     "ohio = nwis.get_water_use(years=[2000, 2005, 2010], state='OH')\n",
203 |     "print('Retrieved ' + str(len(ohio[0])) + ' water use records.')\n",
204 |     "display(ohio[0])"
205 |    ]
206 |   },
207 |   {
208 |    "cell_type": "markdown",
209 |    "metadata": {
210 |     "pycharm": {
211 |      "name": "#%% md\n"
212 |     }
213 |    },
214 |    "source": [
215 |     "#### Example 3: Retrieve two specific water use categories for an entire state"
216 |    ]
217 |   },
218 |   {
219 |    "cell_type": "code",
220 |    "execution_count": null,
221 |    "metadata": {
222 |     "collapsed": false,
223 |     "jupyter": {
224 |      "outputs_hidden": false
225 |     },
226 |     "pycharm": {
227 |      "name": "#%%\n"
228 |     }
229 |    },
230 |    "outputs": [],
231 |    "source": [
232 |     "# Get water use data for livestock (LI) and irrigation (IT)\n",
233 |     "kansas = nwis.get_water_use(state='KS', categories=['IT', 'LI'])\n",
234 |     "print('Retrieved ' + str(len(kansas[0])) + ' water use records.')\n",
235 |     "display(kansas[0])\n"
236 |    ]
237 |   }
238 |  ],
239 |  "metadata": {
240 |   "kernelspec": {
241 |    "display_name": "Python 3 (ipykernel)",
242 |    "language": "python",
243 |    "name": "python3"
244 |   },
245 |   "language_info": {
246 |    "codemirror_mode": {
247 |     "name": "ipython",
248 |     "version": 3
249 |    },
250 |    "file_extension": ".py",
251 |    "mimetype": "text/x-python",
252 |    "name": "python",
253 |    "nbconvert_exporter": "python",
254 |    "pygments_lexer": "ipython3",
255 |    "version": "3.9.7"
256 |   }
257 |  },
258 |  "nbformat": 4,
259 |  "nbformat_minor": 4
260 | }
261 | 


--------------------------------------------------------------------------------
/demos/nwqn_data_pull/Dockerfile_dataretrieval:
--------------------------------------------------------------------------------
 1 | # Python 3.11
 2 | FROM python:3.11-slim-buster
 3 | 
 4 | 
 5 | RUN apt-get update \
 6 |     # Install aws-lambda-cpp build dependencies
 7 |     && apt-get install -y \
 8 |       g++ \
 9 |       make \
10 |       cmake \
11 |       unzip \
12 |     # cleanup package lists, they are not used anymore in this image
13 |     && rm -rf /var/lib/apt/lists/* \
14 |     && apt-cache search linux-headers-generic
15 | 
16 | ARG FUNCTION_DIR="/function"
17 | 
18 | # Copy function code
19 | RUN mkdir -p ${FUNCTION_DIR}
20 | 
21 | # Update pip
22 | # NB botocore/boto3 are pinned due to https://github.com/boto/boto3/issues/3648
23 | #    using versions from https://github.com/aio-libs/aiobotocore/blob/72b8dd5d7d4ef2f1a49a0ae0c37b47e5280e2070/setup.py
24 | #    due to s3fs dependency
25 | RUN pip install --upgrade --ignore-installed pip wheel six setuptools \
26 |     && pip install --upgrade --no-cache-dir --ignore-installed \
27 |         awslambdaric \
28 |         botocore==1.29.76 \
29 |         boto3==1.26.76 \
30 |         redis \
31 |         httplib2 \
32 |         requests \
33 |         numpy \
34 |         scipy \
35 |         pandas \
36 |         pika \
37 |         kafka-python \
38 |         cloudpickle \
39 |         ps-mem \
40 |         tblib
41 | 
42 | # Set working directory to function root directory
43 | WORKDIR ${FUNCTION_DIR}
44 | 
45 | # Add Lithops
46 | COPY lithops_lambda.zip ${FUNCTION_DIR}
47 | RUN unzip lithops_lambda.zip \
48 |     && rm lithops_lambda.zip \
49 |     && mkdir handler \
50 |     && touch handler/__init__.py \
51 |     && mv entry_point.py handler/
52 | 
53 | # Put your dependencies here, using RUN pip install... or RUN apt install...
54 | 
55 | COPY requirements.txt requirements.txt
56 | RUN pip install --no-cache-dir -r requirements.txt
57 | 
58 | ENTRYPOINT [ "/usr/local/bin/python", "-m", "awslambdaric" ]
59 | CMD [ "handler.entry_point.lambda_handler" ]


--------------------------------------------------------------------------------
/demos/nwqn_data_pull/README.md:
--------------------------------------------------------------------------------
 1 | # Retrieve data from the National Water Quality Network (NWQN)
 2 | 
 3 | > This usage example is for demonstration and not for research or
 4 | > operational use.
 5 | 
 6 | This example uses Lithops to retrieve data from every NWQN
 7 | monitoring site, then writes the results to Parquet files on S3. Each
 8 | retrieval also searches the NLDI for neighboring sites with NWQN data and
 9 | merges those data. In the streamflow retrieval, the neighborhood search
10 | progressively fill in gaps in the record by taking data from the
11 | nearest streamgage and rescaling it by the drainage area ratio.
12 | 
13 | 1. Set up a Python environment
14 | ```bash
15 | conda create --name dataretrieval-lithops -y python=3.11
16 | conda activate dataretrieval-lithops
17 | pip install -r requirements.txt
18 | ```
19 | 
20 | 2. Configure compute and storage backends for [lithops](https://lithops-cloud.github.io/docs/source/configuration.html).
21 | The configuration in `lithops.yaml` uses AWS Lambda for [compute](https://lithops-cloud.github.io/docs/source/compute_config/aws_lambda.html) and AWS S3 for [storage](https://lithops-cloud.github.io/docs/source/storage_config/aws_s3.html).
22 | To use those backends, simply edit `lithops.yaml` with your `bucket` and `execution_role`.
23 | 
24 | 3. Build a runtime image for Cubed
25 | ```bash
26 | export LITHOPS_CONFIG_FILE=$(pwd)/lithops.yaml
27 | lithops runtime build -b aws_lambda -f Dockerfile_dataretrieval dataretrieval-runtime
28 | ```
29 | 
30 | 4. Download the site list from ScienceBase using `wget` or navigate to the URL and copy the CVS into `nwqn_data_pull/`.
31 | ```bash
32 | wget https://www.sciencebase.gov/catalog/file/get/655d2063d34ee4b6e05cc9e6?f=__disk__b3%2F3e%2F5b%2Fb33e5b0038f004c2a48818d0fcc88a0921f3f689 -O NWQN_sites.csv
33 | ```
34 | 
35 | 5. Create a s3 bucket for the output, then set it as an environmental variable
36 | ```bash
37 | export DESTINATION_BUCKET=<path/to/bucket>
38 | ```
39 | 
40 | 6. Run the scripts
41 | ```bash
42 | python retrieve_nwqn_samples.py
43 | 
44 | python retrieve_nwqn_streamflow.py
45 | ```
46 | 
47 | ## Cleaning up
48 | To rebuild the Lithops image, delete the existing one by running
49 | ```bash
50 | lithops runtime delete -b aws_lambda -d dataretrieval-runtime
51 | ```
52 | 


--------------------------------------------------------------------------------
/demos/nwqn_data_pull/lithops.yaml:
--------------------------------------------------------------------------------
 1 | lithops:
 2 |     backend: aws_lambda
 3 |     storage: aws_s3
 4 | 
 5 | aws:
 6 |     region: us-west-2
 7 | 
 8 | aws_lambda:
 9 |     execution_role: arn:aws:iam::account-id:role/lambdaLithopsExecutionRole
10 |     runtime: dataretrieval-runtime
11 |     runtime_memory: 1024
12 |     runtime_timeout: 900
13 | 
14 | aws_s3:
15 |     bucket: arn:aws:s3:::the-name-of-your-bucket
16 | 


--------------------------------------------------------------------------------
/demos/nwqn_data_pull/requirements.txt:
--------------------------------------------------------------------------------
1 | boto3
2 | dataretrieval[nldi]
3 | lithops
4 | pika
5 | ps_mem
6 | pyarrow
7 | s3fs
8 | tblib
9 | 


--------------------------------------------------------------------------------
/demos/nwqn_data_pull/retrieve_nwqn_samples.py:
--------------------------------------------------------------------------------
  1 | # Retrieve data from the National Water Quality Assessment Program (NAWQA)
  2 | 
  3 | import lithops
  4 | import math
  5 | import os
  6 | import pandas as pd
  7 | 
  8 | from random import randint
  9 | from time import sleep
 10 | from dataretrieval import nldi, nwis, wqp
 11 | 
 12 | DESTINATION_BUCKET = os.environ.get('DESTINATION_BUCKET')
 13 | PROJECT = "National Water Quality Assessment Program (NAWQA)"
 14 | # some sites are not found in NLDI, avoid them for now
 15 | NOT_FOUND_SITES = [
 16 |     "15565447",  # "USGS-"
 17 |     "15292700",
 18 | ]
 19 | BAD_GEOMETRY_SITES = [
 20 |     "06805500",
 21 |     "09306200",
 22 | ]
 23 | 
 24 | BAD_NLDI_SITES = NOT_FOUND_SITES + BAD_GEOMETRY_SITES
 25 | 
 26 | 
 27 | def map_retrieval(site):
 28 |     """Map function to pull data from NWIS and WQP"""
 29 |     print(f"Retrieving samples from site {site}")
 30 |     # skip bad sites
 31 |     if site in BAD_NLDI_SITES:
 32 |         site_list = [site]
 33 |     # else query slowly
 34 |     else:
 35 |         sleep(randint(0, 5))
 36 |         site_list = find_neighboring_sites(site)
 37 | 
 38 |     # reformat for wqp
 39 |     site_list = [f"USGS-{site}" for site in site_list]
 40 | 
 41 |     df, _ = wqp_get_results(siteid=site_list,
 42 |                             project=PROJECT,
 43 |                             )
 44 | 
 45 |     try:
 46 |         # merge sites
 47 |         df['MonitoringLocationIdentifier'] = f"USGS-{site}"
 48 |         df.astype(str).to_parquet(f's3://{DESTINATION_BUCKET}/nwqn-samples.parquet',
 49 |                                   engine='pyarrow',
 50 |                                   partition_cols=['MonitoringLocationIdentifier'],
 51 |                                   compression='zstd')
 52 |         # optionally, `return df` for further processing
 53 | 
 54 |     except Exception as e:
 55 |         print(f"No samples returned from site {site}: {e}")
 56 | 
 57 | 
 58 | def exponential_backoff(max_retries=5, base_delay=1):
 59 |     """Exponential backoff decorator with configurable retries and base delay"""
 60 |     def decorator(func):
 61 |         def wrapper(*args, **kwargs):
 62 |             attempts = 0
 63 |             while True:
 64 |                 try:
 65 |                     return func(*args, **kwargs)
 66 |                 except Exception as e:
 67 |                     attempts += 1
 68 |                     if attempts > max_retries:
 69 |                         raise e
 70 |                     wait_time = base_delay * (2 ** attempts)
 71 |                     print(f"Retrying in {wait_time} seconds...")
 72 |                     sleep(wait_time)
 73 |         return wrapper
 74 |     return decorator
 75 | 
 76 | 
 77 | @exponential_backoff(max_retries=5, base_delay=1)
 78 | def nwis_get_info(*args, **kwargs):
 79 |     return nwis.get_info(*args, **kwargs)
 80 | 
 81 | 
 82 | @exponential_backoff(max_retries=5, base_delay=1)
 83 | def wqp_get_results(*args, **kwargs):
 84 |     return wqp.get_results(*args, **kwargs)
 85 | 
 86 | 
 87 | @exponential_backoff(max_retries=3, base_delay=1)
 88 | def find_neighboring_sites(site, search_factor=0.1, fudge_factor=3.0):
 89 |     """Find sites upstream and downstream of the given site within a certain distance.
 90 | 
 91 |     TODO Use geoconnex to determine mainstem length
 92 | 
 93 |     Parameters
 94 |     ----------
 95 |     site : str
 96 |         8-digit site number.
 97 |     search_factor : float, optional
 98 |         The factor by which to multiply the watershed length to determine the
 99 |         search distance.
100 |     fudge_factor : float, optional
101 |         An additional fudge factor to apply to the search distance, because
102 |         watersheds are not circular.
103 |     """
104 |     site_df, _ = nwis_get_info(sites=site)
105 |     drain_area_sq_mi = site_df["drain_area_va"].values[0]
106 |     length = _estimate_watershed_length_km(drain_area_sq_mi)
107 |     search_distance = length * search_factor * fudge_factor
108 |     # clip between 1 and 9999km
109 |     search_distance = max(1.0, min(9999.0, search_distance))
110 | 
111 |     # get upstream and downstream sites
112 |     gdfs = [
113 |         nldi.get_features(
114 |             feature_source="WQP",
115 |             feature_id=f"USGS-{site}",
116 |             navigation_mode=mode,
117 |             distance=search_distance,
118 |             data_source="nwissite",
119 |             )
120 |         for mode in ["UM", "DM"]  # upstream and downstream
121 |     ]
122 | 
123 |     features = pd.concat(gdfs, ignore_index=True)
124 | 
125 |     df, _ = nwis_get_info(sites=list(features.identifier.str.strip('USGS-')))
126 |     # drop sites with disimilar different drainage areas
127 |     df = df.where(
128 |         (df["drain_area_va"] / drain_area_sq_mi) > search_factor,
129 |         ).dropna(how="all")
130 | 
131 |     site_list = df["site_no"].to_list()
132 | 
133 |     # include the original search site among the neighbors
134 |     if site not in site_list:
135 |         site_list.append(site)
136 | 
137 |     return site_list
138 | 
139 | 
140 | def _estimate_watershed_length_km(drain_area_sq_mi):
141 |     """Estimate the diameter assuming a circular watershed.
142 | 
143 |     Parameters
144 |     ----------
145 |     drain_area_sq_mi : float
146 |         The drainage area in square miles.
147 | 
148 |     Returns
149 |     -------
150 |     float
151 |         The diameter of the watershed in kilometers.
152 |     """
153 |     # assume a circular watershed
154 |     length_miles = 2 * (drain_area_sq_mi / math.pi) ** 0.5
155 |     # convert from miles to km
156 |     return length_miles * 1.60934
157 | 
158 | 
159 | if __name__ == "__main__":
160 |     project = "National Water Quality Assessment Program (NAWQA)"
161 | 
162 |     site_df = pd.read_csv(
163 |         'NWQN_sites.csv',
164 |         comment='#',
165 |         dtype={'SITE_QW_ID': str, 'SITE_FLOW_ID': str},
166 |         )
167 | 
168 |     site_list = site_df['SITE_QW_ID'].to_list()
169 |     #site_list = site_list[:2]  # prune for testing
170 | 
171 |     fexec = lithops.FunctionExecutor(config_file="lithops.yaml")
172 |     futures = fexec.map(map_retrieval, site_list)
173 | 
174 |     futures.get_result()
175 | 


--------------------------------------------------------------------------------
/demos/nwqn_data_pull/retrieve_nwqn_streamflow.py:
--------------------------------------------------------------------------------
  1 | # Retrieve data from the National Water Quality Assessment Program (NAWQA)
  2 | 
  3 | import lithops
  4 | import os
  5 | import numpy as np
  6 | import pandas as pd
  7 | 
  8 | 
  9 | from dataretrieval import nwis
 10 | from random import randint
 11 | from time import sleep
 12 | 
 13 | from retrieve_nwqn_samples import find_neighboring_sites, BAD_NLDI_SITES
 14 | 
 15 | DESTINATION_BUCKET = os.environ.get('DESTINATION_BUCKET')
 16 | START_DATE = "1991-01-01"
 17 | END_DATE = "2023-12-31"
 18 | 
 19 | def map_retrieval(site):
 20 |     """Map function to pull data from NWIS and WQP"""
 21 |     print(f"Retrieving daily streamflow from site {site}")
 22 | 
 23 |     if site in BAD_NLDI_SITES:
 24 |         site_list = [site]
 25 |     # else query slowly
 26 |     else:
 27 |         sleep(randint(0, 5))
 28 |         site_list = find_neighboring_sites(site)
 29 | 
 30 |     df, _ = nwis.get_dv(
 31 |         sites=site_list,
 32 |         start=START_DATE,
 33 |         end=END_DATE,
 34 |         parameterCd="00060",
 35 |     )
 36 | 
 37 |     # by default, site_no is not in the index if a single site is queried
 38 |     if "site_no" in df.columns:
 39 |         index_name = df.index.names[0]
 40 |         df.set_index(["site_no", df.index], inplace=True)
 41 |         df.index.names = ["site_no", index_name]
 42 | 
 43 |     print(len(df), "records retrieved")
 44 |     # process the results
 45 |     if not df.empty:
 46 |         # drop rows with missing values; neglect other 00060_* columns
 47 |         df = df.dropna(subset=["00060_Mean"])
 48 |         # fill missing codes to enable string operations
 49 |         df["00060_Mean_cd"] = df["00060_Mean_cd"].fillna("M")
 50 |         df = df[df["00060_Mean_cd"].str.contains("A")]
 51 |         df['00060_Mean'] = df['00060_Mean'].replace(-999999, np.nan)
 52 | 
 53 |         site_info, _ = nwis.get_info(sites=site_list)
 54 |         # USACE sites may have same site_no, which creates index conflicts later
 55 |         site_info = site_info[site_info["agency_cd"] == "USGS"]  # keep only USGS sites
 56 |         site_info = site_info.set_index("site_no")
 57 | 
 58 |         main_site = site_info.loc[site]
 59 |         main_site_drainage_area = main_site["drain_area_va"]
 60 | 
 61 |         # compute fraction of drainage area
 62 |         site_info = site_info[["drain_area_va"]].copy()
 63 |         site_info["drain_fraction"] = site_info["drain_area_va"] / main_site_drainage_area
 64 |         site_info["fraction_diff"] = np.abs(1 - site_info["drain_fraction"])
 65 | 
 66 |         # apply drainage area fraction
 67 |         df = pd.merge(df, site_info, left_index=True, right_index=True)
 68 |         df["00060_Mean"] *= site_info.loc[df.index.get_level_values("site_no"), "drain_fraction"].values
 69 | 
 70 |         # order sites by the difference in drainage area fraction
 71 |         fill_order = site_info.sort_values("fraction_diff", ascending=True)
 72 |         fill_order = fill_order.index.values
 73 | 
 74 |         flow_sites = df.index.get_level_values("site_no").values
 75 |         fill_order = set(fill_order).intersection(flow_sites)
 76 | 
 77 |         output = pd.DataFrame()
 78 | 
 79 |         # loop through sites and fill in missing flow values
 80 |         # going from most to least-similar drainage areas.
 81 |         for fill_site in fill_order:
 82 |             fill_data = df.loc[fill_site]
 83 |             output = update_dataframe(output, fill_data)
 84 | 
 85 |         output = output.drop(columns=["drain_area_va", "drain_fraction", "fraction_diff"])
 86 |         output["site_no"] = site
 87 | 
 88 |     else:
 89 |         print(f"No data retrieved for site {site}")
 90 |         return
 91 | 
 92 |     try:
 93 |         # merge sites
 94 |         output.astype(str).to_parquet(f's3://{DESTINATION_BUCKET}/nwqn-streamflow.parquet',
 95 |                                   engine='pyarrow',
 96 |                                   partition_cols=['site_no'],
 97 |                                   compression='zstd')
 98 |         # optionally, `return df` for further processing
 99 | 
100 |     except Exception as e:
101 |         print(f"Failed to write parquet: {e}")
102 | 
103 | 
104 | def update_dataframe(
105 |         original_df: pd.DataFrame,
106 |         new_df: pd.DataFrame,
107 |         overwrite: bool = False,
108 | ) -> pd.DataFrame:
109 |     """Update a DataFrame with values from another DataFrame.
110 | 
111 |     NOTE: this fuction does not handle MultiIndex DataFrames.
112 |     """
113 |     # Identify new rows in new_df that are not in original_df
114 |     new_rows = new_df[~new_df.index.isin(original_df.index)]
115 | 
116 |     # Concatenate new rows to original_df
117 |     original_df = pd.concat([original_df, new_rows]).sort_index()
118 | 
119 |     return original_df
120 | 
121 | 
122 | if __name__ == "__main__":
123 |     project = "National Water Quality Assessment Program (NAWQA)"
124 | 
125 |     site_df = pd.read_csv(
126 |         'NWQN_sites.csv',
127 |         comment='#',
128 |         dtype={'SITE_QW_ID': str, 'SITE_FLOW_ID': str},
129 |         )
130 | 
131 |     site_list = site_df['SITE_QW_ID'].to_list()
132 |     # site_list = site_list[:4]  # prune for testing
133 | 
134 |     fexec = lithops.FunctionExecutor(config_file="lithops.yaml")
135 |     futures = fexec.map(map_retrieval, site_list)
136 | 
137 |     futures.get_result()
138 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | 
 3 | # You can set these variables from the command line.
 4 | SPHINXOPTS    ?=
 5 | SPHINXBUILD   ?= sphinx-build
 6 | SPHINXPROJ    = dataretrieval
 7 | SOURCEDIR     = source
 8 | BUILDDIR      = build
 9 | 
10 | # Put it first so that "make" without argument is like "make help".
11 | help:
12 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
13 | 
14 | .PHONY: help docs Makefile
15 | 
16 | 
17 | docs: test clean html
18 | 
19 | 
20 | clean :
21 | 	rm -rf ./build/
22 | 
23 | 
24 | test : clean
25 | 	@$(SPHINXBUILD) -b doctest "$(SOURCEDIR)" "$(BUILDDIR)"
26 | 	@$(SPHINXBUILD) -b linkcheck "$(SOURCEDIR)" "$(BUILDDIR)"
27 | 
28 | 
29 | # Catch-all target: route all unknown targets to Sphinx using the new
30 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
31 | %: clean Makefile
32 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
33 | 


--------------------------------------------------------------------------------
/docs/source/.nojekyll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DOI-USGS/dataretrieval-python/4b3a3e8fa408e8d01a3147f1cba8d5be4e1a0a09/docs/source/.nojekyll


--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
  1 | # If extensions (or modules to document with autodoc) are in another directory,
  2 | # add these directories to sys.path here. If the directory is relative to the
  3 | # documentation root, use os.path.abspath to make it absolute, like shown here.
  4 | # Since we aren't installing package here, we mock imports of the dependencies.
  5 | 
  6 | # Relative paths so documentation can reference and include demos folder
  7 | import os
  8 | import sys
  9 | from importlib.metadata import version
 10 | 
 11 | # path to repository head
 12 | sys.path.insert(0, os.path.abspath('../..'))
 13 | 
 14 | # Project Information
 15 | project = 'dataretrieval'
 16 | release = version(project)
 17 | version = '.'.join(release.split('.')[:2])
 18 | author = 'Hodson et al'
 19 | 
 20 | # -- General configuration ------------------------------------------------
 21 | 
 22 | # Add any Sphinx extension module names here, as strings.
 23 | extensions = [
 24 |     'sphinx.ext.autodoc',
 25 |     'sphinx.ext.doctest',
 26 |     'sphinx.ext.autosummary',
 27 |     'sphinx.ext.napoleon',
 28 |     'sphinx.ext.todo',
 29 |     'sphinx.ext.coverage',
 30 |     'sphinx.ext.viewcode',
 31 |     'sphinx.ext.githubpages',
 32 |     'nbsphinx',
 33 |     'nbsphinx_link',
 34 | ]
 35 | 
 36 | # Add any paths that contain templates here, relative to this directory.
 37 | templates_path = ['_templates']
 38 | 
 39 | # suffix of source documents
 40 | source_suffix = '.rst'
 41 | 
 42 | # The main toctree document.
 43 | main_doc = 'index'
 44 | 
 45 | # The version info for the project you're documenting, acts as replacement for
 46 | # |version| and |release|, also used in various other places throughout the
 47 | # built documents.
 48 | 
 49 | # The language for content autogenerated by Sphinx. Refer to documentation
 50 | # for a list of supported languages.
 51 | #
 52 | # This is also used if you do content translation via gettext catalogs.
 53 | # Usually you set "language" from the command line for these cases.
 54 | language = 'en'
 55 | 
 56 | # List of patterns, relative to source directory, that match files and
 57 | # directories to ignore when looking for source files.
 58 | # This patterns also effect to html_static_path and html_extra_path
 59 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
 60 | 
 61 | # The name of the Pygments (syntax highlighting) style to use.
 62 | pygments_style = 'default'
 63 | 
 64 | # If true, `todo` and `todoList` produce output, else they produce nothing.
 65 | todo_include_todos = True
 66 | 
 67 | # Napoleon settings
 68 | napoleon_google_docstring = False
 69 | napoleon_numpy_docstring = True
 70 | napoleon_include_init_with_doc = False
 71 | napoleon_include_private_with_doc = False
 72 | napoleon_include_special_with_doc = True
 73 | napoleon_use_admonition_for_examples = False
 74 | napoleon_use_admonition_for_notes = False
 75 | napoleon_use_admonition_for_references = False
 76 | napoleon_use_ivar = False
 77 | napoleon_use_param = True
 78 | napoleon_use_rtype = True
 79 | 
 80 | # Autosummary / Automodapi settings
 81 | autosummary_generate = True
 82 | automodapi_inheritance_diagram = False
 83 | autodoc_default_options = {
 84 |     'members': True,
 85 |     'inherited-members': False,
 86 |     'private-members': True,
 87 | }
 88 | 
 89 | # doctest
 90 | doctest_global_setup = '''
 91 | import dataretrieval
 92 | import numpy as np
 93 | import pandas as pd
 94 | import matplotlib
 95 | '''
 96 | 
 97 | # -- Options for HTML output ----------------------------------------------
 98 | 
 99 | # The theme to use for HTML and HTML Help pages.  See the documentation for
100 | # a list of builtin themes.
101 | 
102 | html_theme = 'sphinx_rtd_theme'
103 | 
104 | # Theme options are theme-specific and customize the look and feel of a theme
105 | # further.  For a list of options available for each theme, see the
106 | # documentation.
107 | 
108 | html_theme_options = {
109 |     'logo_only': False,
110 |     'display_version': True,
111 | }
112 | 
113 | # Add any paths that contain custom static files (such as style sheets) here,
114 | # relative to this directory. They are copied after the builtin static files,
115 | # so a file named "default.css" will overwrite the builtin "default.css".
116 | html_static_path = ['_static']
117 | 
118 | # -- Options for linkcheck -------------------------------------------
119 | 
120 | # Links to not "check" because they are problematic for the link checker
121 | linkcheck_ignore = [
122 |     r'https://streamstats.usgs.gov/streamstatsservices/#/',
123 |     r'https://www.waterqualitydata.us/public_srsnames/',
124 |     r'https://waterqualitydata.us',
125 |     r'https://github.com/USGS-python/dataretrieval/tree/main/demos/hydroshare',
126 | ]
127 | 
128 | # Some notebooks have warnings, which nbsphinx should ignore
129 | nbsphinx_allow_errors = True
130 | 


--------------------------------------------------------------------------------
/docs/source/examples/USGS_dataretrieval_DailyValues_Examples.nblink:
--------------------------------------------------------------------------------
1 | {
2 |     "path": "../../../demos/hydroshare/USGS_dataretrieval_DailyValues_Examples.ipynb"
3 | }


--------------------------------------------------------------------------------
/docs/source/examples/USGS_dataretrieval_GroundwaterLevels_Examples.nblink:
--------------------------------------------------------------------------------
1 | {
2 |     "path": "../../../demos/hydroshare/USGS_dataretrieval_GroundwaterLevels_Examples.ipynb"
3 | }


--------------------------------------------------------------------------------
/docs/source/examples/USGS_dataretrieval_Measurements_Examples.nblink:
--------------------------------------------------------------------------------
1 | {
2 |     "path": "../../../demos/hydroshare/USGS_dataretrieval_Measurements_Examples.ipynb"
3 | }


--------------------------------------------------------------------------------
/docs/source/examples/USGS_dataretrieval_ParameterCodes_Examples.nblink:
--------------------------------------------------------------------------------
1 | {
2 |     "path": "../../../demos/hydroshare/USGS_dataretrieval_ParameterCodes_Examples.ipynb"
3 | }


--------------------------------------------------------------------------------
/docs/source/examples/USGS_dataretrieval_Peaks_Examples.nblink:
--------------------------------------------------------------------------------
1 | {
2 |     "path": "../../../demos/hydroshare/USGS_dataretrieval_Peaks_Examples.ipynb"
3 | }


--------------------------------------------------------------------------------
/docs/source/examples/USGS_dataretrieval_Ratings_Examples.nblink:
--------------------------------------------------------------------------------
1 | {
2 |     "path": "../../../demos/hydroshare/USGS_dataretrieval_Ratings_Examples.ipynb"
3 | }


--------------------------------------------------------------------------------
/docs/source/examples/USGS_dataretrieval_SiteInfo_Examples.nblink:
--------------------------------------------------------------------------------
1 | {
2 |     "path": "../../../demos/hydroshare/USGS_dataretrieval_SiteInfo_Examples.ipynb"
3 | }


--------------------------------------------------------------------------------
/docs/source/examples/USGS_dataretrieval_SiteInventory_Examples.nblink:
--------------------------------------------------------------------------------
1 | {
2 |     "path": "../../../demos/hydroshare/USGS_dataretrieval_SiteInventory_Examples.ipynb"
3 | }


--------------------------------------------------------------------------------
/docs/source/examples/USGS_dataretrieval_Statistics_Examples.nblink:
--------------------------------------------------------------------------------
1 | {
2 |     "path": "../../../demos/hydroshare/USGS_dataretrieval_Statistics_Examples.ipynb"
3 | }


--------------------------------------------------------------------------------
/docs/source/examples/USGS_dataretrieval_UnitValues_Examples.nblink:
--------------------------------------------------------------------------------
1 | {
2 |     "path": "../../../demos/hydroshare/USGS_dataretrieval_UnitValues_Examples.ipynb"
3 | }


--------------------------------------------------------------------------------
/docs/source/examples/USGS_dataretrieval_WaterSamples_Examples.nblink:
--------------------------------------------------------------------------------
1 | {
2 |     "path": "../../../demos/hydroshare/USGS_dataretrieval_WaterSamples_Examples.ipynb"
3 | }


--------------------------------------------------------------------------------
/docs/source/examples/USGS_dataretrieval_WaterUse_Examples.nblink:
--------------------------------------------------------------------------------
1 | {
2 |     "path": "../../../demos/hydroshare/USGS_dataretrieval_WaterUse_Examples.ipynb"
3 | }


--------------------------------------------------------------------------------
/docs/source/examples/index.rst:
--------------------------------------------------------------------------------
 1 | .. examples:
 2 | 
 3 | ========
 4 | Examples
 5 | ========
 6 | 
 7 | Simple uses of the ``dataretrieval`` package
 8 | --------------------------------------------
 9 | 
10 | .. toctree::
11 |     :maxdepth: 2
12 | 
13 |     readme_examples
14 |     siteinfo_examples
15 | 
16 | 
17 | Example Notebooks from Hydroshare
18 | ---------------------------------
19 | A set of Jupyter Notebooks with Python code examples on how to use the
20 | ``dataretrieval`` package are available on the `Hydroshare`_ platform.
21 | We provide executed versions of these notebooks below; to download the
22 | ``.ipynb`` files for your own use, either visit the `Hydroshare`_ repository,
23 | or navigate to the `demos/hydroshare`_ subdirectory of the ``dataretrieval``
24 | project repository.
25 | 
26 | .. _Hydroshare: https://www.hydroshare.org/resource/c97c32ecf59b4dff90ef013030c54264/
27 | 
28 | .. _demos/hydroshare: https://github.com/DOI-USGS/dataretrieval-python/tree/main/demos/hydroshare
29 | 
30 | .. toctree::
31 |     :maxdepth: 1
32 | 
33 |     USGS_dataretrieval_DailyValues_Examples
34 |     USGS_dataretrieval_GroundwaterLevels_Examples
35 |     USGS_dataretrieval_Measurements_Examples
36 |     USGS_dataretrieval_ParameterCodes_Examples
37 |     USGS_dataretrieval_Peaks_Examples
38 |     USGS_dataretrieval_Ratings_Examples
39 |     USGS_dataretrieval_SiteInfo_Examples
40 |     USGS_dataretrieval_SiteInventory_Examples
41 |     USGS_dataretrieval_Statistics_Examples
42 |     USGS_dataretrieval_UnitValues_Examples
43 |     USGS_dataretrieval_WaterSamples_Examples
44 |     USGS_dataretrieval_WaterUse_Examples
45 | 
46 | 
47 | Using ``dataretrieval`` to obtain nation trends in peak annual streamflow
48 | -------------------------------------------------------------------------
49 | 
50 | .. toctree::
51 |     :maxdepth: 2
52 | 
53 |     nwisdemo01
54 | 
55 | 
56 | Duplicating the R ``dataRetrieval`` vignettes functionality
57 | -----------------------------------------------------------
58 | 
59 | .. note::
60 | 
61 |     Some of the larger (e.g., state-wide) examples have been commented out
62 |     in the interest of run-time for the notebook.
63 | 
64 | .. toctree::
65 |     :maxdepth: 2
66 | 
67 |     rvignettes


--------------------------------------------------------------------------------
/docs/source/examples/nwisdemo01.nblink:
--------------------------------------------------------------------------------
1 | {
2 |     "path": "../../../demos/NWIS_demo_1.ipynb",
3 |     "extra-media": [
4 |         "../../../demos/datasets"
5 |     ]
6 | }


--------------------------------------------------------------------------------
/docs/source/examples/readme_examples.rst:
--------------------------------------------------------------------------------
 1 | 
 2 | Examples from the Readme file on retrieving NWIS data
 3 | -----------------------------------------------------
 4 | 
 5 | .. note::
 6 | 
 7 |     NWIS stands for the National Water Information System
 8 | 
 9 | 
10 | .. doctest::
11 | 
12 |     >>> # first import the functions for downloading data from NWIS
13 |     >>> import dataretrieval.nwis as nwis
14 | 
15 |     >>> # specify the USGS site code for which we want data.
16 |     >>> site = '03339000'
17 | 
18 |     >>> # get instantaneous values (iv)
19 |     >>> df = nwis.get_record(sites=site, service='iv', start='2017-12-31', end='2018-01-01')
20 | 
21 |     >>> df.head()
22 |                                00010 00010_cd   site_no  00060 00060_cd  ...  63680_ysi), [discontinued 10/5/21_cd 63680_hach  63680_hach_cd 99133  99133_cd
23 |     datetime                                                             ...
24 |     2017-12-31 06:00:00+00:00    1.0        A  03339000  140.0        A  ...                                     A        3.6              A  4.61         A
25 |     2017-12-31 06:15:00+00:00    1.0        A  03339000  138.0        A  ...                                     A        3.6              A  4.61         A
26 |     2017-12-31 06:30:00+00:00    1.0        A  03339000  139.0        A  ...                                     A        3.4              A  4.61         A
27 |     2017-12-31 06:45:00+00:00    1.0        A  03339000  139.0        A  ...                                     A        3.4              A  4.61         A
28 |     2017-12-31 07:00:00+00:00    1.0        A  03339000  139.0        A  ...                                     A        3.5              A  4.61         A
29 |     <BLANKLINE>
30 |     [5 rows x 21 columns]
31 | 
32 |     >>> # get water quality samples (qwdata)
33 |     >>> df2 = nwis.get_record(sites=site, service='qwdata', start='2018-12-01', end='2019-01-01')
34 | 
35 |     >>> print(df2)
36 |                               agency_cd   site_no   sample_dt sample_tm  sample_end_dt  sample_end_tm  ... p80154 p82398 p84164  p91157  p91158  p91159
37 |     datetime                                                                                           ...
38 |     2018-12-10 17:30:00+00:00      USGS  03339000  2018-12-10     11:30            NaN            NaN  ...     16     50   3060  0.0165  0.0141  0.0024
39 |     <BLANKLINE>
40 |     [1 rows x 33 columns]
41 | 
42 |     >>> # get basic info about the site
43 |     >>> df3 = nwis.get_record(sites=site, service='site')
44 | 
45 |     >>> print(df3)
46 |       agency_cd   site_no                         station_nm site_tp_cd  lat_va  long_va  ...  aqfr_cd  aqfr_type_cd well_depth_va hole_depth_va depth_src_cd project_no
47 |     0      USGS  03339000  VERMILION RIVER NEAR DANVILLE, IL         ST  400603   873550  ...      NaN           NaN           NaN           NaN          NaN        100
48 |     <BLANKLINE>
49 |     [1 rows x 42 columns]


--------------------------------------------------------------------------------
/docs/source/examples/rvignettes.nblink:
--------------------------------------------------------------------------------
1 | {
2 |     "path": "../../../demos/R Python Vignette equivalents.ipynb"
3 | }


--------------------------------------------------------------------------------
/docs/source/examples/siteinfo_examples.rst:
--------------------------------------------------------------------------------
 1 | 
 2 | Retrieving site information
 3 | ---------------------------
 4 | 
 5 | By default ``dataretrieval`` fetches the so-called "expanded" site date from
 6 | the NWIS web service. However there is an optional keyword parameter called
 7 | ``seriesCatalogOutput`` that can be set to "True" if you wish to retrieve the
 8 | detailed period of record information for a site instead. Refer to the
 9 | `NWIS water services documentation`_ for additional information. The below
10 | example illustrates the use of the ``seriesCatalogOutput`` switch and displays
11 | the resulting column names for the output dataframes (example prompted by
12 | `GitHub Issue #34`_).
13 | 
14 | .. _NWIS water services documentation: https://waterservices.usgs.gov/docs/site-service/site-service-details/
15 | 
16 | .. _GitHub Issue #34: https://github.com/DOI-USGS/dataretrieval-python/issues/34
17 | 
18 | .. doctest::
19 | 
20 |     # first import the functions for downloading data from NWIS
21 |     >>> import dataretrieval.nwis as nwis
22 | 
23 |     # fetch data from a major HUC basin with seriesCatalogOutput set to True
24 |     >>> df = nwis.get_record(huc='20', parameterCd='00060',
25 |     ...                      service='site', seriesCatalogOutput='True')
26 | 
27 |     >>> print(df.columns)
28 |     Index(['agency_cd', 'site_no', 'station_nm', 'site_tp_cd', 'dec_lat_va',
29 |            'dec_long_va', 'coord_acy_cd', 'dec_coord_datum_cd', 'alt_va',
30 |            'alt_acy_va', 'alt_datum_cd', 'huc_cd', 'data_type_cd', 'parm_cd',
31 |            'stat_cd', 'ts_id', 'loc_web_ds', 'medium_grp_cd', 'parm_grp_cd',
32 |            'srs_id', 'access_cd', 'begin_date', 'end_date', 'count_nu'],
33 |           dtype='object')
34 | 
35 |     # repeat the same query with seriesCatalogOutput set as False
36 |     >>> df = nwis.get_record(huc='20', parameterCd='00060',
37 |     ...                      service='site', seriesCatalogOutput='False')
38 | 
39 |     >>> print(df.columns)
40 |     Index(['agency_cd', 'site_no', 'station_nm', 'site_tp_cd', 'lat_va', 'long_va',
41 |            'dec_lat_va', 'dec_long_va', 'coord_meth_cd', 'coord_acy_cd',
42 |            'coord_datum_cd', 'dec_coord_datum_cd', 'district_cd', 'state_cd',
43 |            'county_cd', 'country_cd', 'land_net_ds', 'map_nm', 'map_scale_fc',
44 |            'alt_va', 'alt_meth_cd', 'alt_acy_va', 'alt_datum_cd', 'huc_cd',
45 |            'basin_cd', 'topo_cd', 'instruments_cd', 'construction_dt',
46 |            'inventory_dt', 'drain_area_va', 'contrib_drain_area_va', 'tz_cd',
47 |            'local_time_fg', 'reliability_cd', 'gw_file_cd', 'nat_aqfr_cd',
48 |            'aqfr_cd', 'aqfr_type_cd', 'well_depth_va', 'hole_depth_va',
49 |            'depth_src_cd', 'project_no'],
50 |           dtype='object')
51 | 


--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
 1 | Welcome
 2 | =======
 3 | 
 4 | Welcome to the documentation for the Python ``dataretrieval`` package.
 5 | ``dataretrieval`` is a Python alternative to the `USGS R dataRetrieval package`_
 6 | and is used to obtain USGS and EPA water quality data, streamflow data, and
 7 | metadata directly from webservices (see the
 8 | :doc:`data portals documentation </userguide/dataportals>` for additional
 9 | details about specific data sources).
10 | 
11 | .. _USGS R dataRetrieval package: https://github.com/DOI-USGS/dataRetrieval
12 | 
13 | 
14 | Table of Contents
15 | -----------------
16 | 
17 | .. toctree::
18 |    :maxdepth: 1
19 | 
20 |    meta/installing
21 |    userguide/index
22 |    examples/index
23 |    meta/contributing
24 |    meta/license
25 |    reference/index
26 | 


--------------------------------------------------------------------------------
/docs/source/meta/contributing.rst:
--------------------------------------------------------------------------------
  1 | Contributing
  2 | ============
  3 | 
  4 | Contributions to ``dataretrieval`` are welcome and greatly appreciated, but
  5 | please read this document before doing so.
  6 | 
  7 | 
  8 | Ways to contribute
  9 | ------------------
 10 | 
 11 | Reporting Bugs:
 12 | ^^^^^^^^^^^^^^^
 13 | 
 14 | Report bugs at https://github.com/DOI-USGS/dataretrieval-python/issues
 15 | 
 16 | When reporting a bug, please include:
 17 | 
 18 |     - Detailed steps to reproduce the bug
 19 |     - Your operating system name and version.
 20 |     - Any details about your local setup that might be helpful in troubleshooting.
 21 | 
 22 | Fixing Bugs:
 23 | ^^^^^^^^^^^^
 24 | 
 25 | Look through the GitHub issues for bugs. Anything tagged as a "bug" is open to
 26 | whomever wants to fix it.
 27 | 
 28 | 
 29 | Implementing Features:
 30 | ^^^^^^^^^^^^^^^^^^^^^^
 31 | 
 32 | Look through the GitHub issues for features. Anything tagged with "enhancement"
 33 | and "please-help" is open to whomever wants to implement it.
 34 | 
 35 | Please do not combine multiple feature enhancements into a single pull request.
 36 | 
 37 | Writing Documentation:
 38 | ^^^^^^^^^^^^^^^^^^^^^^
 39 | 
 40 | ``dataretrieval`` could always use more documentation, whether as part of the
 41 | official docs, in docstrings, or even in blog posts or articles.
 42 | 
 43 | Submitting Feedback:
 44 | ^^^^^^^^^^^^^^^^^^^^
 45 | 
 46 | The best way to send feedback is to file an issue at
 47 | https://github.com/DOI-USGS/dataretrieval-python/issues
 48 | 
 49 | If you are proposing a feature:
 50 | 
 51 |     - Explain in detail how it would work.
 52 |     - Keep the scope as narrow as possible, to make it easier to implement.
 53 | 
 54 | Contributor Guidelines
 55 | ----------------------
 56 | 
 57 | Pull Request Guidelines:
 58 | ^^^^^^^^^^^^^^^^^^^^^^^^
 59 | 
 60 | Before you submit a pull request, check that it meets these guidelines:
 61 | 
 62 | 1. Any pull request should include tests. However, a contribution with
 63 |    no tests is preferable to no contribution at all.
 64 | 2. If the pull request adds functionality, the docs should be updated. Put
 65 |    your new functionality into a function with a docstring, and add the
 66 |    feature to the list in README.md.
 67 | 3. The pull request should work for Python 3.6, 3.7, 3.8, and pass the GitHub
 68 |    Actions continuous integration pipelines.
 69 | 
 70 | 
 71 | Updating Package Version:
 72 | ^^^^^^^^^^^^^^^^^^^^^^^^^
 73 | 
 74 | Follow semantic versioning as best as possible. This means that changing the
 75 | first digit of the version indicates a breaking change. Any smaller changes
 76 | should attempt to maintain backwards-compatibility with previous code and
 77 | issue deprecation warnings for features or functionality that will be removed
 78 | or no longer be backwards-compatible in future releases.
 79 | 
 80 | When updating the package version, there are currently two places where this
 81 | must be done:
 82 | 
 83 | 1. In the `setup.py` file the version field should be updated
 84 | 2. In the `conf.py` file both the version and release fields can be updated
 85 | 
 86 | 
 87 | Coding Standards
 88 | ----------------
 89 | 
 90 |     - PEP8 (https://peps.python.org/pep-0008/)
 91 |     - Doc-strings should follow the NumPy standard (`example`_):
 92 | 
 93 | .. _example: https://www.sphinx-doc.org/en/master/usage/extensions/example_numpy.html
 94 | 
 95 |     - Example:
 96 | 
 97 |         .. code:: python
 98 | 
 99 |             def foo(param1, param2):
100 |             """Example function with types documented in the docstring.
101 | 
102 |             A more detailed description of the function and its implementation.
103 | 
104 |             Parameters
105 |             ----------
106 |             param1 : int
107 |                 The first parameter.
108 |             param2 : str
109 |                 The second parameter.
110 | 
111 |             Returns
112 |             -------
113 |             bool
114 |                 True if successful, False otherwise.
115 | 
116 |             Examples
117 |             --------
118 |             Examples should be written in doctest format and should demonstrate basic usage.
119 | 
120 |             .. doctest::
121 | 
122 |                 >>> type(1) is int
123 |                 True
124 | 
125 |             """
126 | 
127 |     - The public interface should emphasize functions over classes; however, classes can and should be used internally and in tests.
128 |     - Functions for downloading data from a specific web portal must be grouped within their own submodule.
129 |     - For example, all NWIS functions are located at :obj:`dataretrieval.nwis`.
130 | 
131 |     - Quotes via http://stackoverflow.com/a/56190/5549:
132 | 
133 |     - Use double quotes around strings that are used for interpolation or that are natural language messages
134 |     - Use single quotes for small symbol-like strings (but break the rules if the strings contain quotes)
135 |     - Use triple double quotes for doc-strings and raw string literals for regular expressions even if they aren't needed.
136 | 
137 |     - Example:
138 | 
139 |     .. code:: python
140 | 
141 |         LIGHT_MESSAGES = {
142 |             'English': "There are %(number_of_lights)s lights.",
143 |             'Pirate':  "Arr! Thar be %(number_of_lights)s lights."
144 |         }
145 | 
146 |         def lights_message(language, number_of_lights):
147 |             """Return a language-appropriate string reporting the light count."""
148 |             return LIGHT_MESSAGES[language] % locals()
149 | 
150 |         def is_pirate(message):
151 |             """Return True if the given message sounds piratical."""
152 |             return re.search(r"(?i)(arr|avast|yohoho)!", message) is not None
153 | 
154 | 
155 | Acknowledgements
156 | ----------------
157 | This document was adapted from the ``cookiecutter`` project's CONTRIBUTING file, which resides at
158 | https://github.com/cookiecutter/cookiecutter/blob/main/CONTRIBUTING.md
159 | Thank you to the ``cookiecutter`` team for helping streamline open-source development for the masses.


--------------------------------------------------------------------------------
/docs/source/meta/installing.rst:
--------------------------------------------------------------------------------
 1 | Installation Guide
 2 | ==================
 3 | 
 4 | Whether you are a user or developer we recommend installing ``dataretrieval``
 5 | in a virtual environment. This can be done using something like ``virtualenv``
 6 | or ``conda``. Package dependencies are listed in the `requirements.txt`_ file,
 7 | a full list of dependencies necessary for development are listed in the
 8 | `requirements-dev.txt`_ file.
 9 | 
10 | .. _requirements.txt: https://github.com/DOI-USGS/dataretrieval-python/blob/main/requirements.txt
11 | 
12 | .. _requirements-dev.txt: https://github.com/DOI-USGS/dataretrieval-python/blob/main/requirements-dev.txt
13 | 
14 | 
15 | User Installation
16 | -----------------
17 | 
18 | Via ``pip``:
19 | ^^^^^^^^^^^^
20 | To install the latest stable release of ``dataretrieval`` from `PyPI`_, run the
21 | following commands:
22 | 
23 | .. code-block:: bash
24 | 
25 |     $ pip install dataretrieval
26 | 
27 | .. _PyPI: https://pypi.org/project/dataretrieval
28 | 
29 | 
30 | Via ``conda``:
31 | ^^^^^^^^^^^^^^
32 | To install the latest stable release of ``dataretrieval`` from the
33 | `conda-forge channel`_, run the following commands:
34 | 
35 | .. code-block:: bash
36 | 
37 |     $ conda -c conda-forge install dataretrieval
38 | 
39 | .. _conda-forge channel: https://anaconda.org/conda-forge/dataretrieval
40 | 
41 | 
42 | Developer Installation
43 | ----------------------
44 | 
45 | To install ``dataretrieval`` for development, we recommend first forking
46 | the repository on GitHub. This will allow you to develop on your own
47 | feature branch, and propose changes as pull requests to the main branch of
48 | the repository.
49 | 
50 | The first step is to clone your fork of the repository:
51 | 
52 | .. code-block:: bash
53 | 
54 |     $ git clone https://github.com/DOI-USGS/dataretrieval-python.git
55 | 
56 | Then, set the cloned repository as your current working directory in your
57 | terminal and run the following commands to get an "editable" installation of
58 | the package for development:
59 | 
60 | .. code-block:: bash
61 | 
62 |     $ pip install -r requirements-dev.txt
63 |     $ pip install -e .
64 | 
65 | To check your installation you can run the tests with the following commands:
66 | 
67 | .. code-block:: bash
68 | 
69 |     $ cd tests
70 |     $ pytest
71 | 
72 | In order to fetch the latest version of ``dataretrieval``, we recommend
73 | defining the main repository as a remote `upstream` repository:
74 | 
75 | .. code-block:: bash
76 | 
77 |     $ git remote add upstream https://github.com/DOI-USGS/dataretrieval-python.git
78 | 
79 | You can also build the documentation locally by running the following commands:
80 | 
81 | .. code-block:: bash
82 | 
83 |     $ cd docs
84 |     $ make docs
85 | 
86 | This both tests the documentation (runs code blocks and checks links), and also
87 | locally *builds* the documentation, placing the HTML files within the
88 | ``docs/build/html`` directory. You can then open the ``index.html`` file in
89 | your browser to view the documentation.


--------------------------------------------------------------------------------
/docs/source/meta/license.rst:
--------------------------------------------------------------------------------
 1 | License and Disclaimer
 2 | ======================
 3 | 
 4 | Unless otherwise noted, this project is in the public domain in the United
 5 | States because it contains materials that originally came from the United
 6 | States Geological Survey, an agency of the United States Department of
 7 | Interior. For more information, see the `LICENSE.md`_ file. See the
 8 | `Disclaimer.md`_ file for more information about the disclaimer.
 9 | 
10 | .. _LICENSE.md: https://github.com/DOI-USGS/dataretrieval-python/blob/main/LICENSE.md
11 | 
12 | .. _Disclaimer.md: https://github.com/DOI-USGS/dataretrieval-python/blob/main/DISCLAIMER.md


--------------------------------------------------------------------------------
/docs/source/reference/index.rst:
--------------------------------------------------------------------------------
 1 | .. api:
 2 | 
 3 | =============
 4 | API reference
 5 | =============
 6 | 
 7 | .. toctree::
 8 |     :maxdepth: 1
 9 | 
10 |     nadp
11 |     nwis
12 |     samples
13 |     streamstats
14 |     utils
15 |     wqp
16 | 


--------------------------------------------------------------------------------
/docs/source/reference/nadp.rst:
--------------------------------------------------------------------------------
1 | .. _nadp
2 | 
3 | dataretrieval.nadp
4 | ------------------
5 | 
6 | .. automodule:: dataretrieval.nadp
7 |     :members:
8 |     :special-members:


--------------------------------------------------------------------------------
/docs/source/reference/nwis.rst:
--------------------------------------------------------------------------------
1 | .. _nwis
2 | 
3 | dataretrieval.nwis
4 | ------------------
5 | 
6 | .. automodule:: dataretrieval.nwis
7 |     :members:
8 |     :special-members:


--------------------------------------------------------------------------------
/docs/source/reference/samples.rst:
--------------------------------------------------------------------------------
1 | .. _samples
2 | 
3 | dataretrieval.samples
4 | -------------------------
5 | 
6 | .. automodule:: dataretrieval.samples
7 |     :members:
8 |     :special-members:


--------------------------------------------------------------------------------
/docs/source/reference/streamstats.rst:
--------------------------------------------------------------------------------
1 | .. _streamstats
2 | 
3 | dataretrieval.streamstats
4 | -------------------------
5 | 
6 | .. automodule:: dataretrieval.streamstats
7 |     :members:
8 |     :special-members:


--------------------------------------------------------------------------------
/docs/source/reference/utils.rst:
--------------------------------------------------------------------------------
1 | .. _utils
2 | 
3 | dataretrieval.utils
4 | -------------------
5 | 
6 | .. automodule:: dataretrieval.utils
7 |     :members:
8 |     :special-members:


--------------------------------------------------------------------------------
/docs/source/reference/wqp.rst:
--------------------------------------------------------------------------------
1 | .. _wqp
2 | 
3 | dataretrieval.wqp
4 | -----------------
5 | 
6 | .. automodule:: dataretrieval.wqp
7 |     :members:
8 |     :special-members:


--------------------------------------------------------------------------------
/docs/source/userguide/dataportals.rst:
--------------------------------------------------------------------------------
 1 | .. dataportals:
 2 | 
 3 | ============
 4 | Data Portals
 5 | ============
 6 | 
 7 | ``dataretrieval`` provides a number of functions to retrieve data from several
 8 | data portals, a table listing the portals and corresponding web addresses is
 9 | provided below.
10 | 
11 | +-----------------------------------+---------------------------------------------------------------+
12 | | Data Portal                       | Uniform Resource Locator (URL)                                |
13 | +===================================+===============================================================+
14 | | National Water Information System | https://waterdata.usgs.gov/nwis                               |
15 | +-----------------------------------+---------------------------------------------------------------+
16 | | National Trends Network           | https://nadp.slh.wisc.edu/networks/national-trends-network    |
17 | +-----------------------------------+---------------------------------------------------------------+
18 | | Mercury Deposition Network        | https://nadp.slh.wisc.edu/networks/mercury-deposition-network |
19 | +-----------------------------------+---------------------------------------------------------------+
20 | | USGS Samples                      | https://waterdata.usgs.gov/download-samples/                  |
21 | +-----------------------------------+---------------------------------------------------------------+
22 | | Streamstats                       | https://streamstats.usgs.gov                                  |
23 | +-----------------------------------+---------------------------------------------------------------+
24 | | Water Quality Portal              | https://waterqualitydata.us                                   |
25 | +-----------------------------------+---------------------------------------------------------------+
26 | | Water Services                    | https://waterservices.usgs.gov                                |
27 | +-----------------------------------+---------------------------------------------------------------+
28 | 


--------------------------------------------------------------------------------
/docs/source/userguide/index.rst:
--------------------------------------------------------------------------------
 1 | .. userguide:
 2 | 
 3 | ==========
 4 | User Guide
 5 | ==========
 6 | 
 7 | Topic guides to provide additional information about various aspects of
 8 | ``dataretrieval``.
 9 | 
10 | Contents
11 | --------
12 | 
13 | .. toctree::
14 |     :maxdepth: 1
15 | 
16 |     timeconventions
17 |     dataportals
18 | 


--------------------------------------------------------------------------------
/docs/source/userguide/timeconventions.rst:
--------------------------------------------------------------------------------
 1 | .. timeconventions:
 2 | 
 3 | Datetime Information
 4 | --------------------
 5 | 
 6 | ``dataretrieval`` attempts to normalize time data to UTC time when converting
 7 | web service data into dataframes. To do this, in-built pandas functions are
 8 | used; either :obj:`pandas.to_datetime()` during the initial datetime object
 9 | conversion, or :obj:`pandas.DataFrame.tz_localize()` if the datetime objects
10 | exist but are not UTC-localized. In most cases (single-site and multi-site),
11 | ``dataretrieval`` assigns the datetime information as the dataframe *index*,
12 | the exception to this is when incomplete datetime information is available, in
13 | these cases integers are used as the dataframe index (see `PR#58`_ for more
14 | details).
15 | 
16 | .. _PR#58: https://github.com/DOI-USGS/dataretrieval-python/pull/58
17 | 
18 | 
19 | Inspecting Timestamps
20 | *********************
21 | 
22 | For single sites, the index of the returned dataframe contains pandas
23 | timestamps.
24 | 
25 | .. code:: python
26 | 
27 |     >>> import dataretrieval.nwis as nwis
28 |     >>> site = '03339000'
29 |     >>> df = nwis.get_record(sites=site, service='peaks',
30 |     ...                      start='2015-01-01', end='2017-12-31')
31 |     >>> print(df)
32 |                               agency_cd   site_no peak_tm  peak_va peak_cd  gage_ht  gage_ht_cd  year_last_pk  ag_dt  ag_tm  ag_gage_ht  ag_gage_ht_cd
33 |     datetime
34 |     2015-06-08 00:00:00+00:00      USGS  03339000   17:30    25100       C    22.83         NaN           NaN    NaN    NaN         NaN            NaN
35 |     2015-12-29 00:00:00+00:00      USGS  03339000   18:45    37600       C    26.66         NaN           NaN    NaN    NaN         NaN            NaN
36 |     2017-05-05 00:00:00+00:00      USGS  03339000   04:45    17000       C    18.47         NaN           NaN    NaN    NaN         NaN            NaN
37 | 
38 | Here the index of the dataframe ``df`` is a set of datetime objects. Each has
39 | the format, ``YYYY-MM-DD HH:MM:SS+HH:MM``. Because these timestamps are
40 | localized to be in UTC, the expected offset (``+HH:MM``) is ``+00:00``.
41 | These values can be converted to a local timezone of your choosing using
42 | :obj:`pandas` functionality.
43 | 
44 | .. code:: python
45 | 
46 |     >>> df.index = df.index.tz_convert(tz='America/New_York')
47 |     >>> print(df)
48 |                               agency_cd   site_no peak_tm  peak_va peak_cd  gage_ht  gage_ht_cd  year_last_pk  ag_dt  ag_tm  ag_gage_ht  ag_gage_ht_cd
49 |     datetime
50 |     2015-06-07 20:00:00-04:00      USGS  03339000   17:30    25100       C    22.83         NaN           NaN    NaN    NaN         NaN            NaN
51 |     2015-12-28 19:00:00-05:00      USGS  03339000   18:45    37600       C    26.66         NaN           NaN    NaN    NaN         NaN            NaN
52 |     2017-05-04 20:00:00-04:00      USGS  03339000   04:45    17000       C    18.47         NaN           NaN    NaN    NaN         NaN            NaN
53 | 
54 | Above, the index was converted to localize the timestamps to New York.
55 | In the updated dataframe index, the resulting timestamps now have offsets of
56 | ``-04:00`` and ``-05:00`` as New York is either 4 or 5 hours behind UTC
57 | depending on the time of year (due to daylight savings).
58 | 
59 | When information for multiple sites is requested, ``dataretrieval`` creates a
60 | dataframe with a multi-index, with the first entry containing the site number,
61 | and the second containing the datetime information.
62 | 
63 | .. doctest::
64 | 
65 |     >>> import dataretrieval.nwis as nwis
66 |     >>> sites = ['180049066381200', '290000095192602']
67 |     >>> df = nwis.get_record(sites=sites, service='gwlevels',
68 |     ...                      start='2021-10-01', end='2022-01-01')
69 |     >>> df
70 |                                               agency_cd site_tp_cd      lev_dt lev_tm lev_tz_cd  ...  lev_dt_acy_cd  lev_acy_cd  lev_src_cd  lev_meth_cd lev_age_cd
71 |     site_no         datetime                                                                     ...
72 |     180049066381200 2021-10-04 19:54:00+00:00      USGS         GW  2021-10-04  19:54     +0000  ...              m         NaN           S            S          A
73 |                     2021-11-16 14:28:00+00:00      USGS         GW  2021-11-16  14:28     +0000  ...              m         NaN           S            S          A
74 |                     2021-12-09 10:43:00+00:00      USGS         GW  2021-12-09  10:43     +0000  ...              m         NaN           S            S          A
75 |     290000095192602 2021-12-08 19:07:00+00:00      USGS         GW  2021-12-08  19:07     +0000  ...              m         NaN           S            S          P
76 |     <BLANKLINE>
77 |     [4 rows x 15 columns]
78 | 
79 | Here note that the default datetime index information returned is also UTC
80 | localized, and therefore the offset values are ``+00:00``.


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["setuptools>=45", "setuptools_scm[toml]>=6.2"]
 3 | build-backend = "setuptools.build_meta"
 4 | 
 5 | [project]
 6 | name = "dataretrieval"
 7 | description = "Discover and retrieve water data from U.S. federal hydrologic web services."
 8 | readme = "README.md"
 9 | requires-python = ">=3.8"
10 | keywords = ["USGS", "water data"]
11 | license = {file = "LICENSE.md"}
12 | authors = [
13 |   {name = "Timothy Hodson", email = "thodson@usgs.gov"},
14 | ]
15 | maintainers = [
16 |   {name = "Elise Hinman", email = "ehinman@usgs.gov"},
17 | ]
18 | classifiers = [
19 |     "Programming Language :: Python :: 3",
20 | ]
21 | dependencies = [
22 |     "requests",
23 |     "pandas==2.*",
24 | ]
25 | dynamic = ["version"]
26 | 
27 | [tool.setuptools]
28 | packages = ["dataretrieval", "dataretrieval.codes"]
29 | 
30 | [project.optional-dependencies]
31 | test = [
32 |   "pytest > 5.0.0",
33 |   "pytest-cov[all]",
34 |   "coverage",
35 |   "requests-mock",
36 |   "flake8",
37 | ]
38 | doc = [
39 |   "sphinx",
40 |   "sphinx-rtd-theme",
41 |   "nbsphinx",
42 |   "nbsphinx_link",
43 |   "ipython",
44 |   "ipykernel",
45 |   "matplotlib",
46 | ]
47 | nldi = [
48 |   'geopandas>=0.10'
49 | ]
50 | 
51 | [project.urls]
52 | homepage = "https://github.com/DOI-USGS/dataretrieval-python"
53 | documentation = "https://doi-usgs.github.io/dataretrieval-python/"
54 | repository = "https://github.com/DOI-USGS/dataretrieval-python.git"
55 | 
56 | [tool.setuptools_scm]
57 | write_to = "dataretrieval/_version.py"
58 | 
59 | [tool.isort]
60 | profile = "black"
61 | 
62 | [tool.black]
63 | skip-string-normalization = true
64 | 
65 | [tool.ruff.format]
66 | quote-style = "double"
67 | docstring-code-format = true
68 | docstring-code-line-length = 72
69 | 
70 | [tool.ruff.lint]
71 | preview = true
72 | # Default ["E4", "E7", "E9", and "F"] --> Pyflakes ("F") and pycodestyle ("E")
73 | extend-select = [
74 |   "B", "I", "Q",
75 |   "W291", "W292", "W293", "W605",
76 |   "E231", "E252", "E261", "E262", "E303", "E501",
77 | ]
78 | 
79 | 


--------------------------------------------------------------------------------
/requirements-dev.txt:
--------------------------------------------------------------------------------
 1 | numpy<2
 2 | pandas==2.*
 3 | geopandas==0.14.*
 4 | scipy
 5 | python-dateutil
 6 | requests
 7 | requests-mock
 8 | coverage
 9 | pytest
10 | flake8
11 | sphinx
12 | sphinx-rtd-theme
13 | ipython
14 | ipykernel
15 | nbsphinx
16 | nbsphinx_link
17 | matplotlib
18 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup
2 | 
3 | setup()
4 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DOI-USGS/dataretrieval-python/4b3a3e8fa408e8d01a3147f1cba8d5be4e1a0a09/tests/__init__.py


--------------------------------------------------------------------------------
/tests/data/nldi_get_basin.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "type": "FeatureCollection",
  3 |   "features": [
  4 |     {
  5 |       "type": "Feature",
  6 |       "geometry": {
  7 |         "type": "Polygon",
  8 |         "coordinates": [
  9 |           [
 10 |             [-89.467166934, 43.120532162],
 11 |             [-89.461615766, 43.12632345],
 12 |             [-89.457260835, 43.127037725],
 13 |             [-89.452061009, 43.124187365],
 14 |             [-89.4476819, 43.119429024],
 15 |             [-89.439928469, 43.119445644],
 16 |             [-89.428664489, 43.113435904],
 17 |             [-89.410715151, 43.106836358],
 18 |             [-89.413577818, 43.100214712],
 19 |             [-89.414991109, 43.089543698],
 20 |             [-89.422806579, 43.085870913],
 21 |             [-89.430063704, 43.088309393],
 22 |             [-89.441315037, 43.083364081],
 23 |             [-89.444278818, 43.084620004],
 24 |             [-89.450179911, 43.081715309],
 25 |             [-89.451427898, 43.079513559],
 26 |             [-89.464561163, 43.078447595],
 27 |             [-89.466686537, 43.076682871],
 28 |             [-89.4622099, 43.07306458],
 29 |             [-89.465497169, 43.073234059],
 30 |             [-89.468704754, 43.07134039],
 31 |             [-89.469622381, 43.068424824],
 32 |             [-89.467935617, 43.067497217],
 33 |             [-89.470726914, 43.06540292],
 34 |             [-89.470430177, 43.062692826],
 35 |             [-89.466400073, 43.056302895],
 36 |             [-89.469802035, 43.053666055],
 37 |             [-89.47601985, 43.057901927],
 38 |             [-89.476585982, 43.060280486],
 39 |             [-89.478603327, 43.060411566],
 40 |             [-89.483458574, 43.058438858],
 41 |             [-89.484967442, 43.056130059],
 42 |             [-89.491406587, 43.054388451],
 43 |             [-89.494069541, 43.055509411],
 44 |             [-89.493868228, 43.06153445],
 45 |             [-89.500475724, 43.063815698],
 46 |             [-89.506329775, 43.06379093],
 47 |             [-89.507540669, 43.061129535],
 48 |             [-89.516487667, 43.05889596],
 49 |             [-89.524196291, 43.0484005],
 50 |             [-89.527027161, 43.049865572],
 51 |             [-89.531212693, 43.048578393],
 52 |             [-89.53168683, 43.05078274],
 53 |             [-89.537781776, 43.052965206],
 54 |             [-89.537977928, 43.05550807],
 55 |             [-89.544353411, 43.058384424],
 56 |             [-89.545783506, 43.061283656],
 57 |             [-89.551286859, 43.061283754],
 58 |             [-89.554899419, 43.062989677],
 59 |             [-89.555177648, 43.065189554],
 60 |             [-89.55939716, 43.069584622],
 61 |             [-89.552552004, 43.070032995],
 62 |             [-89.551027329, 43.072160878],
 63 |             [-89.55664221, 43.078164337],
 64 |             [-89.561552454, 43.080518638],
 65 |             [-89.557979773, 43.081411202],
 66 |             [-89.553540642, 43.086194967],
 67 |             [-89.548701193, 43.086177316],
 68 |             [-89.546825331, 43.088023965],
 69 |             [-89.543205962, 43.087800221],
 70 |             [-89.540831467, 43.089363501],
 71 |             [-89.536587878, 43.095690791],
 72 |             [-89.536402562, 43.103900066],
 73 |             [-89.539222509, 43.106589488],
 74 |             [-89.543754931, 43.106648012],
 75 |             [-89.545473151, 43.108651969],
 76 |             [-89.551215165, 43.105435169],
 77 |             [-89.562937764, 43.104929008],
 78 |             [-89.571631233, 43.102745105],
 79 |             [-89.577430373, 43.106944886],
 80 |             [-89.575279549, 43.112421282],
 81 |             [-89.585534254, 43.110302501],
 82 |             [-89.590268184, 43.11100234],
 83 |             [-89.591233389, 43.112975078],
 84 |             [-89.590180668, 43.11496155],
 85 |             [-89.593396468, 43.118692324],
 86 |             [-89.590911252, 43.118300423],
 87 |             [-89.585126608, 43.12232665],
 88 |             [-89.588527844, 43.125843725],
 89 |             [-89.594046461, 43.126263171],
 90 |             [-89.584750406, 43.134728013],
 91 |             [-89.58703379, 43.136667616],
 92 |             [-89.58646025, 43.139891225],
 93 |             [-89.58311069, 43.140790363],
 94 |             [-89.58050643, 43.147945499],
 95 |             [-89.577277976, 43.149190149],
 96 |             [-89.577524762, 43.155001208],
 97 |             [-89.575687513, 43.156361384],
 98 |             [-89.574702434, 43.160718603],
 99 |             [-89.575498624, 43.163477344],
100 |             [-89.572475709, 43.166520978],
101 |             [-89.573423699, 43.16805986],
102 |             [-89.571498421, 43.168773113],
103 |             [-89.561668082, 43.160869482],
104 |             [-89.556299248, 43.163934156],
105 |             [-89.5530407, 43.163969446],
106 |             [-89.553533801, 43.154687139],
107 |             [-89.542983929, 43.149942779],
108 |             [-89.543949065, 43.14881666],
109 |             [-89.541736611, 43.147335113],
110 |             [-89.544166951, 43.145507198],
111 |             [-89.540220508, 43.141656916],
112 |             [-89.536084993, 43.147942882],
113 |             [-89.529435603, 43.145701478],
114 |             [-89.526609809, 43.142382532],
115 |             [-89.530611508, 43.138913019],
116 |             [-89.526215839, 43.137656712],
117 |             [-89.525317304, 43.134190086],
118 |             [-89.521417176, 43.136858837],
119 |             [-89.515079469, 43.136375804],
120 |             [-89.512331543, 43.131812042],
121 |             [-89.509033557, 43.130035294],
122 |             [-89.499032624, 43.13174139],
123 |             [-89.495385036, 43.1363024],
124 |             [-89.476329648, 43.136222116],
125 |             [-89.475062917, 43.133358252],
126 |             [-89.479607077, 43.13215007],
127 |             [-89.482220611, 43.126289637],
128 |             [-89.475491468, 43.126823191],
129 |             [-89.471635553, 43.125462527],
130 |             [-89.471517511, 43.122731655],
131 |             [-89.467166934, 43.120532162]
132 |           ]
133 |         ]
134 |       },
135 |       "properties": {}
136 |     }
137 |   ]
138 | }
139 | 


--------------------------------------------------------------------------------
/tests/data/nldi_get_features_by_feature_source_with_nav_mode.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "features": [
  3 |     {
  4 |       "geometry": {
  5 |         "coordinates": [-89.5361111, 43.1111111],
  6 |         "type": "Point"
  7 |       },
  8 |       "type": "Feature",
  9 |       "properties": {
 10 |         "identifier": "USGS-05427943",
 11 |         "navigation": "https://api.water.usgs.gov/nldi/linked-data/nwissite/USGS-05427943/navigation",
 12 |         "measure": 0,
 13 |         "reachcode": "07090002007651",
 14 |         "name": "PHEASANT BRANCH AT AIRPORT ROAD NEAR MIDDLETON, WI",
 15 |         "source": "nwissite",
 16 |         "sourceName": "NWIS Surface Water Sites",
 17 |         "comid": "13293676",
 18 |         "type": "hydrolocation",
 19 |         "uri": "https://waterdata.usgs.gov/monitoring-location/05427943",
 20 |         "mainstem": "https://geoconnex.us/ref/mainstems/575519"
 21 |       }
 22 |     },
 23 |     {
 24 |       "geometry": {
 25 |         "coordinates": [-89.4555556, 43.0998611],
 26 |         "type": "Point"
 27 |       },
 28 |       "type": "Feature",
 29 |       "properties": {
 30 |         "identifier": "USGS-430600089272001",
 31 |         "navigation": "https://api.water.usgs.gov/nldi/linked-data/nwissite/USGS-430600089272001/navigation",
 32 |         "measure": 59.3341,
 33 |         "reachcode": "07090002008384",
 34 |         "name": "LAKE MENDOTA, WEST BAY, AT MADISON, WI",
 35 |         "source": "nwissite",
 36 |         "sourceName": "NWIS Surface Water Sites",
 37 |         "comid": "13294314",
 38 |         "type": "hydrolocation",
 39 |         "uri": "https://waterdata.usgs.gov/monitoring-location/430600089272001",
 40 |         "mainstem": "https://geoconnex.us/ref/mainstems/575519"
 41 |       }
 42 |     },
 43 |     {
 44 |       "geometry": {
 45 |         "coordinates": [-89.52151, 43.09860617],
 46 |         "type": "Point"
 47 |       },
 48 |       "type": "Feature",
 49 |       "properties": {
 50 |         "identifier": "USGS-054279465",
 51 |         "navigation": "https://api.water.usgs.gov/nldi/linked-data/nwissite/USGS-054279465/navigation",
 52 |         "measure": 53.7958,
 53 |         "reachcode": "07090002007650",
 54 |         "name": "S FORK PHEASANT BRANCH @ DEMING WAY @ MIDDLETON,WI",
 55 |         "source": "nwissite",
 56 |         "sourceName": "NWIS Surface Water Sites",
 57 |         "comid": "13294264",
 58 |         "type": "hydrolocation",
 59 |         "uri": "https://waterdata.usgs.gov/monitoring-location/054279465",
 60 |         "mainstem": "https://geoconnex.us/ref/mainstems/575519"
 61 |       }
 62 |     },
 63 |     {
 64 |       "geometry": {
 65 |         "coordinates": [-89.493454, 43.10443947],
 66 |         "type": "Point"
 67 |       },
 68 |       "type": "Feature",
 69 |       "properties": {
 70 |         "identifier": "USGS-05427950",
 71 |         "navigation": "https://api.water.usgs.gov/nldi/linked-data/nwissite/USGS-05427950/navigation",
 72 |         "measure": 2.32298,
 73 |         "reachcode": "07090002007650",
 74 |         "name": "PHEASANT BRANCH AT CENTURY AVE AT MIDDLETON, WI",
 75 |         "source": "nwissite",
 76 |         "sourceName": "NWIS Surface Water Sites",
 77 |         "comid": "13294264",
 78 |         "type": "hydrolocation",
 79 |         "uri": "https://waterdata.usgs.gov/monitoring-location/05427950",
 80 |         "mainstem": "https://geoconnex.us/ref/mainstems/575519"
 81 |       }
 82 |     },
 83 |     {
 84 |       "geometry": {
 85 |         "coordinates": [-89.5287322, 43.1044393],
 86 |         "type": "Point"
 87 |       },
 88 |       "type": "Feature",
 89 |       "properties": {
 90 |         "identifier": "USGS-054279435",
 91 |         "navigation": "https://api.water.usgs.gov/nldi/linked-data/nwissite/USGS-054279435/navigation",
 92 |         "measure": 72.0559,
 93 |         "reachcode": "07090002007650",
 94 |         "name": "PHEASANT BR W. OF CONFLUENCE POND @ MIDDLETON, WI",
 95 |         "source": "nwissite",
 96 |         "sourceName": "NWIS Surface Water Sites",
 97 |         "comid": "13294264",
 98 |         "type": "hydrolocation",
 99 |         "uri": "https://waterdata.usgs.gov/monitoring-location/054279435",
100 |         "mainstem": "https://geoconnex.us/ref/mainstems/575519"
101 |       }
102 |     },
103 |     {
104 |       "geometry": {
105 |         "coordinates": [-89.5116667, 43.1033333],
106 |         "type": "Point"
107 |       },
108 |       "type": "Feature",
109 |       "properties": {
110 |         "identifier": "USGS-05427948",
111 |         "navigation": "https://api.water.usgs.gov/nldi/linked-data/nwissite/USGS-05427948/navigation",
112 |         "measure": 40.6051151144,
113 |         "reachcode": "07090002007650",
114 |         "name": "PHEASANT BRANCH AT MIDDLETON, WI",
115 |         "source": "nwissite",
116 |         "sourceName": "NWIS Surface Water Sites",
117 |         "comid": "13294264",
118 |         "type": "hydrolocation",
119 |         "uri": "https://waterdata.usgs.gov/monitoring-location/05427948",
120 |         "mainstem": "https://geoconnex.us/ref/mainstems/575519"
121 |       }
122 |     },
123 |     {
124 |       "geometry": {
125 |         "coordinates": [-89.5167877, 43.1030505],
126 |         "type": "Point"
127 |       },
128 |       "type": "Feature",
129 |       "properties": {
130 |         "identifier": "USGS-054279475",
131 |         "navigation": "https://api.water.usgs.gov/nldi/linked-data/nwissite/USGS-054279475/navigation",
132 |         "measure": 48.974,
133 |         "reachcode": "07090002007650",
134 |         "name": "PHEASANT BRANCH UPSTREAM OF HWY 12 @ MIDDLETON, WI",
135 |         "source": "nwissite",
136 |         "sourceName": "NWIS Surface Water Sites",
137 |         "comid": "13294264",
138 |         "type": "hydrolocation",
139 |         "uri": "https://waterdata.usgs.gov/monitoring-location/054279475",
140 |         "mainstem": "https://geoconnex.us/ref/mainstems/575519"
141 |       }
142 |     },
143 |     {
144 |       "geometry": {
145 |         "coordinates": [-89.5138889, 43.10777778],
146 |         "type": "Point"
147 |       },
148 |       "type": "Feature",
149 |       "properties": {
150 |         "identifier": "USGS-430628089305001",
151 |         "navigation": "https://api.water.usgs.gov/nldi/linked-data/nwissite/USGS-430628089305001/navigation",
152 |         "measure": 43.532,
153 |         "reachcode": "07090002007650",
154 |         "name": "GW QUALITY ASSURANCE-NAWQA WAREHOUSE-MIDDLETON, WI",
155 |         "source": "nwissite",
156 |         "sourceName": "NWIS Surface Water Sites",
157 |         "comid": "13294264",
158 |         "type": "hydrolocation",
159 |         "uri": "https://waterdata.usgs.gov/monitoring-location/430628089305001",
160 |         "mainstem": "https://geoconnex.us/ref/mainstems/575519"
161 |       }
162 |     },
163 |     {
164 |       "geometry": {
165 |         "coordinates": [-89.4837316, 43.10607834],
166 |         "type": "Point"
167 |       },
168 |       "type": "Feature",
169 |       "properties": {
170 |         "identifier": "USGS-05427952",
171 |         "navigation": "https://api.water.usgs.gov/nldi/linked-data/nwissite/USGS-05427952/navigation",
172 |         "measure": 46.391,
173 |         "reachcode": "07090002007647",
174 |         "name": "PHEASANT BRANCH AT MOUTH AT MIDDLETON, WI",
175 |         "source": "nwissite",
176 |         "sourceName": "NWIS Surface Water Sites",
177 |         "comid": "13293696",
178 |         "type": "hydrolocation",
179 |         "uri": "https://waterdata.usgs.gov/monitoring-location/05427952",
180 |         "mainstem": "https://geoconnex.us/ref/mainstems/575519"
181 |       }
182 |     }
183 |   ],
184 |   "type": "FeatureCollection"
185 | }
186 | 


--------------------------------------------------------------------------------
/tests/data/nldi_get_features_by_feature_source_without_nav_mode.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "type": "FeatureCollection",
 3 |   "features": [
 4 |     {
 5 |       "type": "Feature",
 6 |       "geometry": {
 7 |         "type": "Point",
 8 |         "coordinates": [-89.5098433, 43.0872176]
 9 |       },
10 |       "properties": {
11 |         "identifier": "USGS-054279485",
12 |         "navigation": "https://api.water.usgs.gov/nldi/linked-data/WQP/USGS-054279485/navigation",
13 |         "name": "STRICKER'S POND AT MIDDLETON, WI",
14 |         "source": "WQP",
15 |         "sourceName": "Water Quality Portal",
16 |         "comid": "13294314",
17 |         "type": "varies",
18 |         "uri": "https://www.waterqualitydata.us/provider/NWIS/USGS-WI/USGS-054279485/",
19 |         "mainstem": "https://geoconnex.us/ref/mainstems/575519"
20 |       }
21 |     }
22 |   ]
23 | }
24 | 


--------------------------------------------------------------------------------
/tests/data/nldi_get_features_by_lat_long.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "type": "FeatureCollection",
 3 |   "features": [
 4 |     {
 5 |       "type": "Feature",
 6 |       "geometry": {
 7 |         "type": "LineString",
 8 |         "coordinates": [
 9 |           [-89.482287705, 43.1049596],
10 |           [-89.482173502, 43.104764298],
11 |           [-89.481962003, 43.104193598],
12 |           [-89.478003301, 43.1009003],
13 |           [-89.474274702, 43.0990991],
14 |           [-89.471394107, 43.098147698],
15 |           [-89.470654801, 43.098110899],
16 |           [-89.469584204, 43.097653598],
17 |           [-89.455723904, 43.096961297],
18 |           [-89.4536274, 43.097529493],
19 |           [-89.451938607, 43.097714297],
20 |           [-89.450211607, 43.098300897],
21 |           [-89.449472405, 43.098263897],
22 |           [-89.448879704, 43.098638095],
23 |           [-89.448325306, 43.098610297],
24 |           [-89.4471654, 43.099090695],
25 |           [-89.446375206, 43.099589594],
26 |           [-89.445820704, 43.0995619],
27 |           [-89.441537805, 43.101635799],
28 |           [-89.435227506, 43.105492599],
29 |           [-89.433882602, 43.105963595],
30 |           [-89.433328107, 43.105935797],
31 |           [-89.432537705, 43.106434599],
32 |           [-89.430083804, 43.106849998],
33 |           [-89.429293305, 43.1073488],
34 |           [-89.426310502, 43.107468195],
35 |           [-89.410033204, 43.106784396]
36 |         ]
37 |       },
38 |       "properties": {
39 |         "identifier": "13294314",
40 |         "navigation": "https://api.water.usgs.gov/nldi/linked-data/comid/13294314/navigation",
41 |         "source": "comid",
42 |         "sourceName": "NHDPlus comid",
43 |         "comid": "13294314"
44 |       }
45 |     }
46 |   ]
47 | }
48 | 


--------------------------------------------------------------------------------
/tests/data/nldi_get_flowlines.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "type": "FeatureCollection",
 3 |   "features": [
 4 |     {
 5 |       "type": "Feature",
 6 |       "geometry": {
 7 |         "type": "LineString",
 8 |         "coordinates": [
 9 |           [-89.482287705, 43.1049596],
10 |           [-89.482173502, 43.104764298],
11 |           [-89.481962003, 43.104193598],
12 |           [-89.478003301, 43.1009003],
13 |           [-89.474274702, 43.0990991],
14 |           [-89.471394107, 43.098147698],
15 |           [-89.470654801, 43.098110899],
16 |           [-89.469584204, 43.097653598],
17 |           [-89.455723904, 43.096961297],
18 |           [-89.4536274, 43.097529493],
19 |           [-89.451938607, 43.097714297],
20 |           [-89.450211607, 43.098300897],
21 |           [-89.449472405, 43.098263897],
22 |           [-89.448879704, 43.098638095],
23 |           [-89.448325306, 43.098610297],
24 |           [-89.4471654, 43.099090695],
25 |           [-89.446375206, 43.099589594],
26 |           [-89.445820704, 43.0995619],
27 |           [-89.441537805, 43.101635799],
28 |           [-89.435227506, 43.105492599],
29 |           [-89.433882602, 43.105963595],
30 |           [-89.433328107, 43.105935797],
31 |           [-89.432537705, 43.106434599],
32 |           [-89.430083804, 43.106849998],
33 |           [-89.429293305, 43.1073488],
34 |           [-89.426310502, 43.107468195],
35 |           [-89.410033204, 43.106784396]
36 |         ]
37 |       },
38 |       "properties": {
39 |         "nhdplus_comid": "13294314"
40 |       }
41 |     }
42 |   ]
43 | }
44 | 


--------------------------------------------------------------------------------
/tests/data/water_use_national.txt:
--------------------------------------------------------------------------------
 1 | #
 2 | # File created on 2020-02-28 18:01:44 EST
 3 | # Refresh Date: 2018-06
 4 | #
 5 | # U.S. Geological Survey
 6 | #
 7 | # This file contains selected WaterUse data
 8 | #
 9 | # The data you have secured from the USGS NWISWeb database may include data that have
10 | # not received Director's approval and as such are provisional and subject to revision.
11 | # The data are released on the condition that neither the USGS nor the United States
12 | # Government may be held liable for any damages resulting from its authorized or
13 | # unauthorized use.
14 | #
15 | #  * References to sources of water-use data can be found here. - https://water.usgs.gov/watuse
16 | #
17 | #  Search Criteria:
18 | #  Year(s)          - ALL
19 | #  Area             -
20 | #  County Codes(s)  - ALL
21 | #  County Name(s)   -
22 | #  Category Code(s) - ALL
23 | #  Category Name(s) -
24 | #
25 | #  Columns:
26 | #  National Totals - Summary
27 | #
28 | #  The following years are included:
29 | #  1950
30 | #  1955
31 | #  1960
32 | #  1965
33 | #  1970
34 | #  1975
35 | #  1980
36 | #  1985
37 | #  1990
38 | #  1995
39 | #  2000
40 | #  2005
41 | #  2010
42 | #  2015
43 | #
44 | National Totals	1950	1955	1960	1965	1970	1975	1980	1985	1990	1995	2000	2005	2010	2015
45 | 100s	16s	16s	16s	16s	16s	16s	16s	16s	16s	16s	16s	16s	16s	16s
46 | Population, in millions	150.7	164.0	179.3	193.8	205.9	216.4	229.6	242.4	252.3	267.1	285.3	300.7	312.6	325.0
47 | Total withdrawals, in Bgal/d	180	240	270	310	370	420	430	397	404	398	413	410a	354a	322
48 | Public supply, in Bgal/d	14	17	21	24	27	29	33	36.6	38.7	40.2	43.3	44.4a	42.0	39.0
49 | Self-supplied domestic, in Bgal/d	2.1	2.1	2.0	2.3	2.6	2.8	3.4	3.32	3.39	3.39	3.58	3.73a	3.53a	3.26
50 | Livestock, in Bgal/d	1.5	1.5	1.6	1.7	1.9	2.1	2.2	2.23	2.25	2.28	2.37a	2.15	2.00	2.00
51 | Irrigation, in Bgal/d	89	110	110	120	130	140	150	135	134	130	139	127	116a	118
52 | Thermoelectric power, in Bgal/d	40	72	100	130	170	200	210	187	194	190	195	201	162a	133
53 | Self-supplied industrial, in Bgal/d	37	39	38	46	47	45	45	25.8	22.4a	21.6	19.5a	18.1	16.2a	14.8
54 | Mining, In Bgal/d	b	b	b	b	b	b	b	3.44	4.93	3.59	4.13a	3.83	3.97a	4.00
55 | Commercial, in Bgal/d	b	b	b	b	b	b	b	1.23	2.39	2.89	c	c	c	c
56 | Aquaculture, in Bgal/d	b	b	b	b	b	b	b	2.24	2.24	3.27a	5.79a	8.83a	8.96a	7.55
57 | Total Groundwater, fresh, in Bgal/d	34	47	50	60	68	82	83	73.4	79.4	76.4a	84.3a	78.9	75.9a	82.3
58 | Total Groundwater, saline, in Bgal/d	c	0.6	0.4	0.5	1.0	1.0	0.93	0.66	1.30a	1.11	2.47a	1.51	2.22a	2.34
59 | Total Surface water, fresh, in Bgal/d	140	180	190	210	250	260	280	263	255a	261	265	270	231a	198
60 | Total Surface water, saline, in Bgal/d	10	18	31	43	53	69	71	59.6	68.7a	59.7	61.0	59.8a	45.0	38.6
61 | 


--------------------------------------------------------------------------------
/tests/data/waterdata_gwlevels.txt:
--------------------------------------------------------------------------------
 1 | # ---------------------------------- WARNING ----------------------------------------
 2 | # Some of the data that you have obtained from this U.S. Geological Survey database may not 
 3 | # have received Director's approval.  Any such data values are qualified as provisional and 
 4 | # are subject to revision.  Provisional data are released on the condition that neither the 
 5 | # USGS nor the United States Government may be held liable for any damages resulting from its use.
 6 | # Additional info: http://help.waterdata.usgs.gov/policies/provisional-data-statement
 7 | #
 8 | # File-format description:  http://help.waterdata.usgs.gov/faq/about-tab-delimited-output
 9 | # Automated-retrieval info: http://help.waterdata.usgs.gov/faq/automated-retrievals
10 | #
11 | # Contact:   gs-w_support_nwisweb@usgs.gov
12 | # retrieved: 2020-02-14 17:37:13 -05:00	(natwebsdas01)
13 | #
14 | # US Geological Survey groundwater levels
15 | #
16 | # Data for the following 1 site(s) are contained in this file
17 | #    USGS 434400121275801 21S/11E-19CCC
18 | # -----------------------------------------------------------------------------------
19 | #
20 | # The fields in this file include:
21 | # ---------------------------------
22 | # agency_cd     Agency code
23 | # site_no       USGS site number
24 | # site_tp_cd    Site type code
25 | # lev_dt        Date level measured
26 | # lev_tm        Time level measured
27 | # lev_tz_cd     Time datum
28 | # lev_va        Water-level value in feet below land surface
29 | # sl_lev_va     Water-level value in feet above specific vertical datum
30 | # sl_datum_cd   Referenced vertical datum
31 | # lev_status_cd Status
32 | # lev_agency_cd Measuring agency
33 | # lev_dt_acy_cd Water-level date-time accuracy
34 | # lev_acy_cd    Water-level accuracy
35 | # lev_src_cd    Source of measurement
36 | # lev_meth_cd   Method of measurement
37 | # lev_age_cd    Water-level approval status
38 | #
39 | # Referenced agency codes (lev_agency_cd) included in this output
40 | #
41 | #     USGS       U.S. Geological Survey
42 | #
43 | # Referenced site type codes (site_tp_cd) included in this output
44 | #
45 | #     GW         Well
46 | #
47 | # Referenced water-level site status codes (lev_status_cd) included in this output
48 | #
49 | #     ""         The reported water-level measurement represents a static level
50 | #
51 | #
52 | # Referenced water-level date-time accuracy codes (lev_dt_acy_cd) included in this output
53 | #
54 | #     m          Date is accurate to the Minute
55 | #
56 | # Referenced water-level accuracy codes (lev_acy_cd) included in this output
57 | #
58 | #     2          Water level accuracy to nearest hundredth of a foot
59 | #
60 | # Referenced source of measurement codes (lev_src_cd) included in this output
61 | #
62 | #     S          Measured by personnel of reporting agency.
63 | #
64 | # Referenced method of measurement codes (lev_meth_cd) included in this output
65 | #
66 | #     S          Steel-tape measurement.
67 | #
68 | # Referenced water-level approval-status codes (lev_age_cd) included in this output
69 | #
70 | #     A          Approved for publication -- Processing and review completed.
71 | #
72 | agency_cd	site_no	site_tp_cd	lev_dt	lev_tm	lev_tz_cd	lev_va	sl_lev_va	sl_datum_cd	lev_status_cd	lev_agency_cd	lev_dt_acy_cd	lev_acy_cd	lev_src_cd	lev_meth_cd	lev_age_cd
73 | 5s	15s	6s	10d	5d	5s	12s	12s	10s	1s	5s	1s	1s	1s	1s	1s
74 | USGS	434400121275801	GW	2016-10-26	09:22	PDT	28.33				USGS	m	2	S	S	A
75 | 


--------------------------------------------------------------------------------
/tests/data/waterdata_pmcodes.txt:
--------------------------------------------------------------------------------
 1 | #
 2 | # National Water Information System
 3 | # 2022/06/08
 4 | #
 5 | #
 6 | # Date Retrieved: USGS Water Data for the Nation Help System
 7 | #
 8 | parameter_cd	group	parm_nm	epa_equivalence	result_statistical_basis	result_time_basis	result_weight_basis	result_particle_size_basis	result_sample_fraction	result_temperature_basis	CASRN	SRSName	parm_unit
 9 | 5s	8s	58s	5s	0s	0s	0s	0s	9s	0s	10s	7s	9s
10 | 00618	Nutrient	Nitrate, water, filtered, milligrams per liter as nitrogen	Agree					Dissolved		14797-55-8	Nitrate	mg/l as N
11 | 


--------------------------------------------------------------------------------
/tests/data/waterservices_peaks.txt:
--------------------------------------------------------------------------------
 1 | #
 2 | # U.S. Geological Survey
 3 | # National Water Information System
 4 | # Retrieved: 2020-02-20 16:35:50 EST
 5 | #
 6 | # ---------------------------------- WARNING ----------------------------------------
 7 | # Some of the data that you have obtained from this U.S. Geological Survey database
 8 | # may not have received Director's approval. Any such data values are qualified
 9 | # as provisional and are subject to revision. Provisional data are released on the
10 | # condition that neither the USGS nor the United States Government may be held liable
11 | # for any damages resulting from its use.
12 | #
13 | # More data may be available offline.
14 | # For more information on these data,  contact  USGS Water Data Inquiries.
15 | # This file contains the annual peak streamflow data.
16 | #
17 | # This information includes the following fields:
18 | #
19 | #  agency_cd     Agency Code
20 | #  site_no       USGS station number
21 | #  peak_dt       Date of peak streamflow (format YYYY-MM-DD)
22 | #  peak_tm       Time of peak streamflow (24 hour format, 00:00 - 23:59)
23 | #  peak_va       Annual peak streamflow value in cfs
24 | #  peak_cd       Peak Discharge-Qualification codes (see explanation below)
25 | #  gage_ht       Gage height for the associated peak streamflow in feet
26 | #  gage_ht_cd    Gage height qualification codes
27 | #  year_last_pk  Peak streamflow reported is the highest since this year
28 | #  ag_dt         Date of maximum gage-height for water year (if not concurrent with peak)
29 | #  ag_tm         Time of maximum gage-height for water year (if not concurrent with peak
30 | #  ag_gage_ht    maximum Gage height for water year in feet (if not concurrent with peak
31 | #  ag_gage_ht_cd maximum Gage height code
32 | #
33 | # Sites in this file include:
34 | #  USGS 01594440 PATUXENT RIVER NEAR BOWIE, MD
35 | #
36 | # Peak Streamflow-Qualification Codes(peak_cd):
37 | #   1 ... Discharge is a Maximum Daily Average
38 | #   2 ... Discharge is an Estimate
39 | #   3 ... Discharge affected by Dam Failure
40 | #   4 ... Discharge less than indicated value,
41 | #           which is Minimum Recordable Discharge at this site
42 | #   5 ... Discharge affected to unknown degree by
43 | #           Regulation or Diversion
44 | #   6 ... Discharge affected by Regulation or Diversion
45 | #   7 ... Discharge is an Historic Peak
46 | #   8 ... Discharge actually greater than indicated value
47 | #   9 ... Discharge due to Snowmelt, Hurricane,
48 | #           Ice-Jam or Debris Dam breakup
49 | #   A ... Year of occurrence is unknown or not exact
50 | #   Bd ... Day of occurrence is unknown or not exact
51 | #   Bm ... Month of occurrence is unknown or not exact
52 | #   C ... All or part of the record affected by Urbanization,
53 | #            Mining, Agricultural changes, Channelization, or other
54 | #   F ... Peak supplied by another agency
55 | #   O ... Opportunistic value not from systematic data collection
56 | #   R ... Revised
57 | #
58 | # Gage height qualification codes(gage_ht_cd,ag_gage_ht_cd):
59 | #   1 ... Gage height affected by backwater
60 | #   2 ... Gage height not the maximum for the year
61 | #   3 ... Gage height at different site and(or) datum
62 | #   4 ... Gage height below minimum recordable elevation
63 | #   5 ... Gage height is an estimate
64 | #   6 ... Gage datum changed during this year
65 | #   7 ... Debris, mud, or hyper-concentrated flow
66 | #   8 ... Gage height tidally affected
67 | #   Bd ... Day of occurrence is unknown or not exact
68 | #   Bm ... Month of occurrence is unknown or not exact
69 | #   F ... Peak supplied by another agency
70 | #   R ... Revised
71 | #
72 | #
73 | agency_cd	site_no	peak_dt	peak_tm	peak_va	peak_cd	gage_ht	gage_ht_cd	year_last_pk	ag_dt	ag_tm	ag_gage_ht	ag_gage_ht_cd
74 | 5s	15s	10d	6s	8s	33s	8s	27s	4s	10d	6s	8s	27s
75 | USGS	01594440	2000-03-22		3640	5	11.90						
76 | USGS	01594440	2001-06-08	06:30	3800	5	12.05						
77 | USGS	01594440	2002-04-29		1510	2,5,8							
78 | USGS	01594440	2003-02-23	19:30	6990	5	15.08						
79 | USGS	01594440	2003-12-12	10:45	5790	5	13.99						
80 | USGS	01594440	2005-04-03	19:15	5210	5	13.42						
81 | USGS	01594440	2006-06-26	23:00	12700	5	19.20						
82 | USGS	01594440	2007-04-16	11:15	5520	5	13.73						
83 | USGS	01594440	2008-05-13	02:00	7860	5	15.80						
84 | USGS	01594440	2009-06-19	05:45	4130	5	12.35						
85 | USGS	01594440	2010-03-14	13:00	5780	5	13.98						
86 | USGS	01594440	2011-09-08	13:15	16800	5	21.10						
87 | USGS	01594440	2011-12-08	20:30	4900	5	13.74						
88 | USGS	01594440	2012-10-30	23:00	10800	5	18.02						
89 | USGS	01594440	2014-05-01	17:30	15600	5	20.56						
90 | USGS	01594440	2015-06-28	18:15	6610	5	15.03						
91 | USGS	01594440	2016-08-01	00:30	6140	5	14.64						
92 | USGS	01594440	2017-07-30	01:15	4960	5	13.60						
93 | USGS	01594440	2018-06-04	18:30	8360	5	16.32						
94 | USGS	01594440	2018-12-16	23:30	7220	5	15.50						
95 | 


--------------------------------------------------------------------------------
/tests/data/waterservices_ratings.txt:
--------------------------------------------------------------------------------
 1 | # //UNITED STATES GEOLOGICAL SURVEY       http://water.usgs.gov/
 2 | # //NATIONAL WATER INFORMATION SYSTEM     http://water.usgs.gov/data.html
 3 | # //DATA ARE PROVISIONAL AND SUBJECT TO CHANGE UNTIL PUBLISHED BY USGS
 4 | # //RETRIEVED: 2018-02-28 01:11:02
 5 | # //WARNING
 6 | # //WARNING The stage-discharge rating provided in this file should be
 7 | # //WARNING considered provisional and subject to change. Stage-discharge
 8 | # //WARNING ratings change over time as the channel features that control
 9 | # //WARNING the relation between stage and discharge vary. Users are
10 | # //WARNING cautioned to consider carefully the applicability of this
11 | # //WARNING rating before using it for decisions that concern personal or
12 | # //WARNING public safety or operational consequences.
13 | # //WARNING
14 | # //WARNING This rating does not include any shifts that may have been
15 | # //WARNING used along with this base rating in converting stage to
16 | # //WARNING discharge at this site. Stage data processed with the rating
17 | # //WARNING thus may not match that displayed or published by the USGS.
18 | # //WARNING
19 | # //FILE TYPE="NWIS RATING" 
20 | # //DATABASE NUMBER=01 DESCRIPTION=" Standard data base for this site."
21 | # //STATION AGENCY="USGS " NUMBER="01594440       " TIME_ZONE="EST" DST_FLAG=N
22 | # //STATION NAME="PATUXENT RIVER NEAR BOWIE, MD"
23 | # //LABEL="Discharge ft^3/s"
24 | # //PARAMETER CODE="00060"
25 | # //RATING ID="20.0" TYPE="STGQ" NAME="stage-discharge" AGING=Working
26 | # //RATING REMARKS=""
27 | # //RATING EXPANSION="logarithmic"
28 | # //RATING OFFSET1=2.000000E+00
29 | # //RATING_INDEP ROUNDING="????" PARAMETER="Gage height (ft)"
30 | # //RATING_DEP ROUNDING="????" PARAMETER="Discharge (ft^3/s)"
31 | # //RATING_DATETIME BEGIN=20151001000000 BZONE=-05:00 END=20170206000000 EZONE=-05:00 AGING=None
32 | # //RATING_DATETIME COMMENT="Adjust high end to Meas 32C. Begin on WY change"
33 | # //RATING_DATETIME BEGIN=20170206000000 BZONE=-05:00 END=-------------- EZONE=--- AGING=None
34 | # //RATING_DATETIME COMMENT="Adjust high end to Meas 32C. Begin on WY change"
35 | INDEP	DEP	STOR
36 | 16N	16N	1S
37 | 2.9900000E+00	3.0000000E+01	*
38 | 4.0000000E+00	1.1000000E+02	*
39 | 5.0000000E+00	2.2500000E+02	*
40 | 5.5000000E+00	3.0000000E+02	*
41 | 6.0000000E+00	3.9000000E+02	*
42 | 6.5000000E+00	4.9000000E+02	*
43 | 7.0000000E+00	6.0000000E+02	*
44 | 9.0000000E+00	1.1750000E+03	*
45 | 1.3000000E+01	4.3500000E+03	*
46 | 2.0850000E+01	1.6497750E+04	*
47 | 2.7900000E+01	3.1100000E+04	*
48 | 


--------------------------------------------------------------------------------
/tests/data/waterservices_site.txt:
--------------------------------------------------------------------------------
 1 | #
 2 | #
 3 | # US Geological Survey
 4 | # retrieved: 2020-02-14 13:17:02 -05:00	(sdas01)
 5 | #
 6 | # The Site File stores location and general information about groundwater,
 7 | # surface water, and meteorological sites
 8 | # for sites in USA.
 9 | #
10 | # File-format description:  http://help.waterdata.usgs.gov/faq/about-tab-delimited-output
11 | # Automated-retrieval info: https://waterservices.usgs.gov/docs/site-service/site-service-details/
12 | #
13 | # Contact:   gs-w_support_nwisweb@usgs.gov
14 | #
15 | # The following selected fields are included in this output:
16 | #
17 | #  agency_cd       -- Agency
18 | #  site_no         -- Site identification number
19 | #  station_nm      -- Site name
20 | #  site_tp_cd      -- Site type
21 | #  dec_lat_va      -- Decimal latitude
22 | #  dec_long_va     -- Decimal longitude
23 | #  coord_acy_cd    -- Latitude-longitude accuracy
24 | #  dec_coord_datum_cd -- Decimal Latitude-longitude datum
25 | #  alt_va          -- Altitude of Gage/land surface
26 | #  alt_acy_va      -- Altitude accuracy
27 | #  alt_datum_cd    -- Altitude datum
28 | #  huc_cd          -- Hydrologic unit code
29 | #
30 | agency_cd	site_no	station_nm	site_tp_cd	dec_lat_va	dec_long_va	coord_acy_cd	dec_coord_datum_cd	alt_va	alt_acy_va	alt_datum_cd	huc_cd
31 | 5s	15s	50s	7s	16s	16s	1s	10s	8s	3s	10s	16s
32 | USGS	01491000	CHOPTANK RIVER NEAR GREENSBORO, MD	ST	38.99719444	-75.7858056	S	NAD83	 2.73	 .1	NAVD88	02060005
33 | USGS	01645000	SENECA CREEK AT DAWSONVILLE, MD	ST	39.1280833	-77.33577778	S	NAD83	 213.31	 .1	NAVD88	02070008
34 | 


--------------------------------------------------------------------------------
/tests/data/wqp3_results.txt:
--------------------------------------------------------------------------------
1 | Org_Identifier,Org_FormalName,Project_Identifier,Project_Name,Project_QAPPApproved,Project_QAPPApprovalAgency,ProjectAttachment_FileName,ProjectAttachment_FileType,Location_Identifier,Location_Name,Location_Type,Location_Description,Location_State,Location_CountryName,Location_CountyName,Location_CountryCode,Location_StatePostalCode,Location_CountyCode,Location_HUCEightDigitCode,Location_HUCTwelveDigitCode,Location_TribalLandIndicator,Location_TribalLand,Location_Latitude,Location_Longitude,Location_HorzCoordReferenceSystemDatum,Location_LatitudeStandardized,Location_LongitudeStandardized,Location_HorzCoordStandardizedDatum,AlternateLocation_IdentifierCount,Activity_ActivityIdentifier,Activity_ActivityIdentifierUserSupplied,Activity_TypeCode,Activity_Media,Activity_MediaSubdivisionName,Activity_BottomDepthSamplingComponent,ActivityBiological_AssemblageSampled,ActivityBiological_ToxicityTestType,Activity_ConductingOrganization,Activity_Comment,ActivityLocation_Latitude,ActivityLocation_Longitude,ActivityLocation_HorzCoordReferenceSystemDatum,ActivityLocation_SourceMapScale,ActivityLocation_LatitudeStandardized,ActivityLocation_LongitudeStandardized,ActivityLocation_HorzCoordStandardizedDatum,ActivityLocation_HorzAccuracyMeasure,ActivityLocation_HorzAccuracyMeasureUnit,ActivityLocation_HorizontalAccuracyHorzCollectionMethod,ActivityLocation_Description,Activity_StartDate,Activity_StartTime,Activity_StartTimeZone,Activity_EndDate,Activity_EndTime,Activity_EndTimeZone,Activity_DepthHeightMeasure,Activity_DepthHeightMeasureUnit,Activity_BottomDepthAltitudeReferencePoint,Activity_ActivityRelativeDepth,Activity_TopDepthMeasure,Activity_TopDepthMeasureUnit,Activity_BottomDepthMeasure,Activity_BottomDepthMeasureUnit,SampleCollectionMethod_Identifier,SampleCollectionMethod_IdentifierContext,SampleCollectionMethod_Name,SampleCollectionMethod_QualifierTypeName,SampleCollectionMethod_Description,SampleCollectionMethod_EquipmentName,SampleCollectionMethod_EquipmentComment,SamplePrepMethod_Identifier,SamplePrepMethod_IdentifierContext,SamplePrepMethod_Name,SamplePrepMethod_QualifierType,SamplePrepMethod_Description,SamplePrepMethod_ContainerLabel,SamplePrepMethod_ContainerType,SamplePrepMethod_ContainerColor,SamplePrepMethod_ChemicalPreservativeUsed,SamplePrepMethod_ThermalPreservativeUsed,SamplePrepMethod_TransportStorageDescription,Activity_HydrologicCondition,Activity_HydrologicEvent,ActivityAttachment_FileName,ActivityAttachment_FileType,ActivityAttachment_FileDownload,Result_DataLoggerLine,Result_ResultDetectionCondition,Result_Characteristic,Result_CharacteristicUserSupplied,Result_CASNumber,Result_MethodSpeciation,Result_SampleFraction,ResultBiological_Intent,ResultBiological_IndividualIdentifier,ResultBiological_Taxon,ResultBiological_TaxonUserSupplied,ResultBiological_TaxonUserSuppliedReference,ResultBiological_UnidentifiedSpeciesIdentifier,ResultBiological_SampleTissueAnatomy,ResultBiological_GroupSummaryCount,GroupSummaryWeight_Measure,GroupSummaryWeightMeasure_Unit,ResultDepthHeight_Measure,ResultDepthHeight_MeasureUnit,ResultDepthHeight_AltitudeReferencePoint,ResultDepthHeight_SamplingPointName,ResultDepthHeight_SamplingPointType,ResultDepthHeight_SamplingPointPlaceInSeries,ResultDepthHeight_SamplingPointComment,ResultDepthHeight_RecordIdentifierUserSupplied,Result_MeasureIdentifier,Result_Measure,Result_MeasureUnit,Result_MeasureQualifierCode,Result_MeasureStatusIdentifier,Result_StatisticalBase,Result_StatisticalNValue,Result_MeasureType,Result_WeightBasis,Result_TimeBasis,Result_MeasureTemperatureBasis,Result_MeasureParticleSizeBasis,DataQuality_PrecisionValue,DataQuality_BiasValue,DataQuality_ConfidenceIntervalValue,DataQuality_UpperConfidenceLimitValue,DataQuality_LowerConfidenceLimitValue,DataQuality_ResultComment,DetectionLimit_TypeA,DetectionLimit_MeasureA,DetectionLimit_MeasureUnitA,DetectionLimit_CommentA,DetectionLimit_TypeB,DetectionLimit_MeasureB,DetectionLimit_MeasureUnitB,DetectionLimit_CommentB,LabInfo_LabSampleSplitRatio,LabInfo_LabAccreditationIndicator,LabInfo_LabAccreditationAuthority,LabInfo_TaxonAccreditationIndicator,LabInfo_TaxonAccreditationAuthority,ResultAnalyticalMethod_Identifier,ResultAnalyticalMethod_IdentifierContext,ResultAnalyticalMethod_Name,ResultAnalyticalMethod_QualifierType,ResultAnalyticalMethod_Description,Result_ComparableMethodIdentifier,Result_ComparableMethodIdentifierContext,Result_ComparableMethodModification,LabInfo_Name,LabInfo_AnalysisStartDate,LabInfo_AnalysisStartTime,LabInfo_AnalysisStartTimeZone,LabInfo_AnalysisEndDate,LabInfo_AnalysisEndTime,LabInfo_AnalysisEndTimeZone,LabInfo_LaboratoryComment,LabSamplePrepMethod_Identifier,LabSamplePrepMethod_IdentifierContext,LabSamplePrepMethod_Name,LabSamplePrepMethod_QualifierType,LabSamplePrepMethod_Description,LabSamplePrepMethod_StartDate,LabSamplePrepMethod_StartTime,LabSamplePrepMethod_StartTimeZone,LabSamplePrepMethod_EndDate,LabSamplePrepMethod_EndTime,LabSamplePrepMethod_EndTimeZone,LabSamplePrepMethod_DilutionFactor,ResultAttachment_FileName,ResultAttachment_FileType,ResultAttachment_FileDownload,ProviderName,Result_CharacteristicComparable,Result_CharacteristicGroup,Org_Type,LastChangeDate,USGSpcode
2 | WIDNR_WQX,Wisconsin Department of Natural Resources,"[""CBSM_URSS_Madison""]","[""Urban Road Salt Study - Madison Sites""]",,,,,WIDNR_WQX-10032762,North Branch Pheasant Branch downstream of pond at Deming Way,River/Stream,,Wisconsin,United States,Dane,US,WI,25,7090002,,,,43.102665,-89.51866,NAD83,43.102665,-89.51866,NAD83,0,WIDNR_WQX-49176537,,Field Msr/Obs,Water,,,,,WIDNR_WQX,,43.102665,-89.51866,NAD83,,43.102665,-89.51866,NAD83,,,Interpolation-Other,,2011-08-08,13:55:00,CDT,2011-08-08,14:05:00,CDT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Specific conductance,,,,,,,,,,,,,,,,,,,,,,,STORET-113777847,471,uS/cm,,Final,,,Actual,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,STORET,,Physical,*State Government US,Fri Jun 15 09:44:33 GMT 2018,
3 | WIDNR_WQX,Wisconsin Department of Natural Resources,"[""CBSM_URSS_Madison""]","[""Urban Road Salt Study - Madison Sites""]",,,,,WIDNR_WQX-10032762,North Branch Pheasant Branch downstream of pond at Deming Way,River/Stream,,Wisconsin,United States,Dane,US,WI,25,7090002,,,,43.102665,-89.51866,NAD83,43.102665,-89.51866,NAD83,0,WIDNR_WQX-47619240,,Field Msr/Obs,Water,,,,,WIDNR_WQX,,43.102665,-89.51866,NAD83,,43.102665,-89.51866,NAD83,,,Interpolation-Other,,2011-07-06,08:35:00,CDT,2011-07-06,08:45:00,CDT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Specific conductance,,,,,,,,,,,,,,,,,,,,,,,STORET-113777841,860,uS/cm,,Final,,,Actual,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,STORET,,Physical,*State Government US,Fri Jun 15 09:44:33 GMT 2018,
4 | WIDNR_WQX,Wisconsin Department of Natural Resources,"[""CBSM_URSS_Madison""]","[""Urban Road Salt Study - Madison Sites""]",,,,,WIDNR_WQX-10032762,North Branch Pheasant Branch downstream of pond at Deming Way,River/Stream,,Wisconsin,United States,Dane,US,WI,25,7090002,,,,43.102665,-89.51866,NAD83,43.102665,-89.51866,NAD83,0,WIDNR_WQX-45822640,,Field Msr/Obs,Water,,,,,WIDNR_WQX,,43.102665,-89.51866,NAD83,,43.102665,-89.51866,NAD83,,,Interpolation-Other,,2011-05-09,12:20:00,CDT,2011-05-09,12:30:00,CDT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Specific conductance,,,,,,,,,,,,,,,,,,,,,,,STORET-113777835,1000,uS/cm,,Final,,,Actual,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,STORET,,Physical,*State Government US,Fri Jun 15 09:44:33 GMT 2018,
5 | WIDNR_WQX,Wisconsin Department of Natural Resources,"[""CBSM_URSS_Madison""]","[""Urban Road Salt Study - Madison Sites""]",,,,,WIDNR_WQX-10032762,North Branch Pheasant Branch downstream of pond at Deming Way,River/Stream,,Wisconsin,United States,Dane,US,WI,25,7090002,,,,43.102665,-89.51866,NAD83,43.102665,-89.51866,NAD83,0,WIDNR_WQX-46495059,,Field Msr/Obs,Water,,,,,WIDNR_WQX,,43.102665,-89.51866,NAD83,,43.102665,-89.51866,NAD83,,,Interpolation-Other,,2011-06-05,14:45:00,CDT,2011-06-05,14:55:00,CDT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Specific conductance,,,,,,,,,,,,,,,,,,,,,,,STORET-113777838,800,uS/cm,,Final,,,Actual,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,STORET,,Physical,*State Government US,Fri Jun 15 09:44:33 GMT 2018,
6 | WIDNR_WQX,Wisconsin Department of Natural Resources,"[""CBSM_URSS_Madison""]","[""Urban Road Salt Study - Madison Sites""]",,,,,WIDNR_WQX-10032762,North Branch Pheasant Branch downstream of pond at Deming Way,River/Stream,,Wisconsin,United States,Dane,US,WI,25,7090002,,,,43.102665,-89.51866,NAD83,43.102665,-89.51866,NAD83,0,WIDNR_WQX-50689894,,Field Msr/Obs,Water,,,,,WIDNR_WQX,,43.102665,-89.51866,NAD83,,43.102665,-89.51866,NAD83,,,Interpolation-Other,,2011-09-11,16:10:00,CDT,2011-09-11,16:20:00,CDT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Specific conductance,,,,,,,,,,,,,,,,,,,,,,,STORET-113777850,750,uS/cm,,Final,,,Actual,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,STORET,,Physical,*State Government US,Fri Jun 15 09:44:33 GMT 2018,
7 | 


--------------------------------------------------------------------------------
/tests/data/wqp_activity_metrics.txt:
--------------------------------------------------------------------------------
 1 | OrganizationIdentifier,OrganizationFormalName,MonitoringLocationIdentifier,ActivityIdentifier,ActivityMetricType/MetricTypeIdentifier,ActivityMetricType/MetricTypeIdentifierContext,ActivityMetricType/MetricTypeName,MetricTypeCitation/ResourceTitleName,MetricTypeCitation/ResourceCreatorName,MetricTypeCitation/ResourceSubjectText,MetricTypeCitation/ResourcePublisherName,MetricTypeCitation/ResourceDate,MetricTypeCitation/ResourceIdentifier,MetricTypeCitation/MetricTypeScaleText,MetricTypeCitation/FormulaDescriptionText,MetricValueMeasure/MeasureValue,MetricValueMeasure/MeasureUnitCode,MetricValueMeasure/MetricScoreNumeric,MetricValueMeasure/MetricCommentText,MetricValueMeasure/IndexIdentifier,ProviderName
 2 | GSWA,Great Swamp Watershed Association(Volunteer)*,GSWA-LB2,GSWA-V95068SC,WATER_ODOR,GSWA,Water Odor,,,,,,,,,0.0,None,0.0,Sewage,,STORET
 3 | NARS_WQX,EPA National Aquatic Resources Survey (NARS),NARS_WQX-OWW04440-0178,NARS_WQX-QWCH:OWW04440-0178:040811,SOBC,NARS_WQX,Sum of Base Cations (ueq/L),,,,,,,,,330.05,ueq/L,330.05,,,STORET
 4 | NARS_WQX,EPA National Aquatic Resources Survey (NARS),NARS_WQX-OWW04440-0178,NARS_WQX-QWCH:OWW04440-0178:040811,BALANCE,NARS_WQX,Ion Balance [C-A]/[C+A/2] (%),,,,,,,,,6.08,%,6.08,,,STORET
 5 | NARS_WQX,EPA National Aquatic Resources Survey (NARS),NARS_WQX-OWW04440-0178,NARS_WQX-QWCH:OWW04440-0178:040811,CONDHO,NARS_WQX,Debye-Huckel-Onsager Calc. Cond. (uS/cm),,,,,,,,,45.52,uS/cm,45.52,,,STORET
 6 | NARS_WQX,EPA National Aquatic Resources Survey (NARS),NARS_WQX-OWW04440-0114,NARS_WQX-QWCH:OWW04440-0114:040813,SOBC,NARS_WQX,Sum of Base Cations (ueq/L),,,,,,,,,2139.84,ueq/L,2139.84,,,STORET
 7 | NARS_WQX,EPA National Aquatic Resources Survey (NARS),NARS_WQX-OWW04440-0114,NARS_WQX-QWCH:OWW04440-0114:040813,BALANCE,NARS_WQX,Ion Balance [C-A]/[C+A/2] (%),,,,,,,,,0.3,%,0.3,,,STORET
 8 | NARS_WQX,EPA National Aquatic Resources Survey (NARS),NARS_WQX-OWW04440-0114,NARS_WQX-QWCH:OWW04440-0114:040813,CONDHO,NARS_WQX,Debye-Huckel-Onsager Calc. Cond. (uS/cm),,,,,,,,,250.99,uS/cm,250.99,,,STORET
 9 | NARS_WQX,EPA National Aquatic Resources Survey (NARS),NARS_WQX-OWW04440-0458,NARS_WQX-QWCH:OWW04440-0458:040816,SOBC,NARS_WQX,Sum of Base Cations (ueq/L),,,,,,,,,1908.46,ueq/L,1908.46,,,STORET
10 | NARS_WQX,EPA National Aquatic Resources Survey (NARS),NARS_WQX-OWW04440-0458,NARS_WQX-QWCH:OWW04440-0458:040816,BALANCE,NARS_WQX,Ion Balance [C-A]/[C+A/2] (%),,,,,,,,,-1.56,%,-1.56,,,STORET
11 | NARS_WQX,EPA National Aquatic Resources Survey (NARS),NARS_WQX-OWW04440-0458,NARS_WQX-QWCH:OWW04440-0458:040816,CONDHO,NARS_WQX,Debye-Huckel-Onsager Calc. Cond. (uS/cm),,,,,,,,,208.89,uS/cm,208.89,,,STORET
12 | GSWA,Great Swamp Watershed Association(Volunteer)*,GSWA-HLT,GSWA-V504943SC,WATER_ODOR,GSWA,Water Odor,,,,,,,,,0.0,None,0.0,Normal,,STORET
13 | NARS_WQX,EPA National Aquatic Resources Survey (NARS),NARS_WQX-OWW04440-0138,NARS_WQX-QWCH:OWW04440-0138:040810,SOBC,NARS_WQX,Sum of Base Cations (ueq/L),,,,,,,,,1606.47,ueq/L,1606.47,,,STORET
14 | NARS_WQX,EPA National Aquatic Resources Survey (NARS),NARS_WQX-OWW04440-0138,NARS_WQX-QWCH:OWW04440-0138:040810,BALANCE,NARS_WQX,Ion Balance [C-A]/[C+A/2] (%),,,,,,,,,0.18,%,0.18,,,STORET
15 | NARS_WQX,EPA National Aquatic Resources Survey (NARS),NARS_WQX-OWW04440-0138,NARS_WQX-QWCH:OWW04440-0138:040810,CONDHO,NARS_WQX,Debye-Huckel-Onsager Calc. Cond. (uS/cm),,,,,,,,,191.85,uS/cm,191.85,,,STORET
16 | NARS_WQX,EPA National Aquatic Resources Survey (NARS),NARS_WQX-OWW04440-0554,NARS_WQX-QWCH:OWW04440-0554:040815,SOBC,NARS_WQX,Sum of Base Cations (ueq/L),,,,,,,,,3191.21,ueq/L,3191.21,,,STORET
17 | NARS_WQX,EPA National Aquatic Resources Survey (NARS),NARS_WQX-OWW04440-0554,NARS_WQX-QWCH:OWW04440-0554:040815,BALANCE,NARS_WQX,Ion Balance [C-A]/[C+A/2] (%),,,,,,,,,-0.36,%,-0.36,,,STORET
18 | NARS_WQX,EPA National Aquatic Resources Survey (NARS),NARS_WQX-OWW04440-0554,NARS_WQX-QWCH:OWW04440-0554:040815,CONDHO,NARS_WQX,Debye-Huckel-Onsager Calc. Cond. (uS/cm),,,,,,,,,330.12,uS/cm,330.12,,,STORET
19 | GSWA,Great Swamp Watershed Association(Volunteer)*,GSWA-CMA,GSWA-V504945SC,WATER_ODOR,GSWA,Water Odor,,,,,,,,,0.0,None,0.0,Normal,,STORET
20 | 


--------------------------------------------------------------------------------
/tests/data/wqp_organizations.txt:
--------------------------------------------------------------------------------
 1 | OrganizationIdentifier,OrganizationFormalName,OrganizationDescriptionText,OrganizationType,TribalCode,ElectronicAddress,Telephonic,OrganizationAddress/AddressTypeName_1,OrganizationAddress/AddressText_1,OrganizationAddress/SupplementalAddressText_1,OrganizationAddress/LocalityName_1,OrganizationAddress/StateCode_1,OrganizationAddress/PostalCode_1,OrganizationAddress/CountryCode_1,OrganizationAddress/CountyCode_1,OrganizationAddress/AddressTypeName_2,OrganizationAddress/AddressText_2,OrganizationAddress/SupplementalAddressText_2,OrganizationAddress/LocalityName_2,OrganizationAddress/StateCode_2,OrganizationAddress/PostalCode_2,OrganizationAddress/CountryCode_2,OrganizationAddress/CountyCode_2,OrganizationAddress/AddressTypeName_3,OrganizationAddress/AddressText_3,OrganizationAddress/SupplementalAddressText_3,OrganizationAddress/LocalityName_3,OrganizationAddress/StateCode_3,OrganizationAddress/PostalCode_3,OrganizationAddress/CountryCode_3,OrganizationAddress/CountyCode_3,ProviderName
 2 | USGS-GA,USGS Georgia Water Science Center,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,NWIS
 3 | USGS-NY,USGS New York Water Science Center,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,NWIS
 4 | USGS-NJ,USGS New Jersey Water Science Center,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,NWIS
 5 | USGS-PA,USGS Pennsylvania Water Science Center,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,NWIS
 6 | USGS-MD,USGS Maryland Water Science Center,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,NWIS
 7 | NWRSFWS_WQX,"National Wildlife Refuge System, Fish and Wildlife Service",,Federal/US Government,,maritza_mallek@fws.gov (Email),413-253-8783 (Office),Location,300 Westgate Center Dr,,Hadley,MA,01035,US,,,,,,,,,,,,,,,,,,STORET
 8 | DRBC,Delaware River Basin Commission,Interstate River Basin Commission,State/US Government,,Elaine Panuccio@drbc.nj.gov (Email),(609)883-9500 x307 (Office),Location,25 State Police Drive,,West Trenton,NJ,08628,US,21.0,,,,,,,,,,,,,,,,,STORET
 9 | NARS_WQX,EPA National Aquatic Resources Survey (NARS),,Federal/US Government,,,,,,,,,,,,,,,,,,,,,,,,,,,,STORET
10 | GSWA,Great Swamp Watershed Association(Volunteer)*,Great Swamp Watershed Association,Private Non-Industrial,,Danielle.Donkersloot@dep.state.nj.us (Email),609-633-9241 (Office),,,,,,,,,,,,,,,,,,,,,,,,,STORET
11 | BTMUA,Brick Utilities,Drinking water provider,Private Industry,,rkarl@brickmua.com (Email),732-458-7000 (Office),Location,1551 Rt 88 W.,,Brick,NJ,08724,US,,Mailing,1551 Rt 88 W.,,Brick,NJ,8724.0,US,,,,,,,,,,STORET
12 | 31DELRBC_WQX,Delaware River Basin Commission,,State/US Government,,,,,,,,,,,,,,,,,,,,,,,,,,,,STORET
13 | MERI,Meadowlands Environmental Research Institute,"Our mission is to provide the scientific community, policy makers, and the public with the knowledge and predictive understanding necessary to conserve, protect, and manage the District ecosystems and the services they provide.",Local/US Government,,Cheryl.Yao@njmeadowlands.gov (Email);http://meri/njmeadowlands.gov/ (Internet),201-460-4604 (Office),Mailing,One Dekorte Park Plaza,,Lyndhurst,NJ,07071,US,,Location,One Dekorte Park Plaza,,Lyndhurst,NJ,7071.0,US,,,,,,,,,,STORET
14 | NJDEP_BFBM,NJDEP Bureau of Freshwater and Biological Monitoring,"The Bureau is responsible for monitoring the ambient conditions of the state's fresh and ground water resources. This monitoring includes regular statewide sampling through of 115 surface water monitoring stations, 820 benthic macroinvertebrate stream monitoring stations, 100 fish assemblage stream monitoring stations, and 150 ground water stations.",State/US Government,,http://www.nj.gov/dep/wms/bfbm/ (Internet);leigh.lager@dep.nj.gov (Email),609-943-3266 (Office),Mailing,PO Box 427,,Trenton,NJ,08625,US,,Location,35 Arctic Pkwy,,Ewing,NJ,8638.0,US,,,,,,,,,,STORET
15 | 11NPSWRD_WQX,National Park Service Water Resources Division,,Federal/US Government,,dean_tucker@nps.gov (Email),970-225-3516 (Office),Location,"1201 Oakridge Drive, Suite 250",,Fort Collins,CO,80525-5596,US,,,,,,,,,,,,,,,,,,STORET
16 | 31DRBCSP,Delaware River Basin Commission,Water Quality Monitoring Data,Interstate Comsn/US Government,,,,,,,,,,,,,,,,,,,,,,,,,,,,STORET
17 | 31DELRBC,Delaware River Basin Commission,Water Quality Monitoring Data,Interstate Comsn/US Government,,,,,,,,,,,,,,,,,,,,,,,,,,,,STORET
18 | NARS,EPA National Aquatic Resource Survey Data,Wadeable Streams Assessment Data,Federal/US Government,,,,,,,,,,,,,,,,,,,,,,,,,,,,STORET
19 | 21NJDEP1,NJ Department of Environmental Protection,Ambient Water Quality Monitoring Data,State/US Government,,,,,,,,,,,,,,,,,,,,,,,,,,,,STORET
20 | 


--------------------------------------------------------------------------------
/tests/data/wqp_results.txt:
--------------------------------------------------------------------------------
1 | OrganizationIdentifier,OrganizationFormalName,ActivityIdentifier,ActivityTypeCode,ActivityMediaName,ActivityMediaSubdivisionName,ActivityStartDate,ActivityStartTime/Time,ActivityStartTime/TimeZoneCode,ActivityEndDate,ActivityEndTime/Time,ActivityEndTime/TimeZoneCode,ActivityDepthHeightMeasure/MeasureValue,ActivityDepthHeightMeasure/MeasureUnitCode,ActivityDepthAltitudeReferencePointText,ActivityTopDepthHeightMeasure/MeasureValue,ActivityTopDepthHeightMeasure/MeasureUnitCode,ActivityBottomDepthHeightMeasure/MeasureValue,ActivityBottomDepthHeightMeasure/MeasureUnitCode,ProjectIdentifier,ActivityConductingOrganizationText,MonitoringLocationIdentifier,ActivityCommentText,SampleAquifer,HydrologicCondition,HydrologicEvent,SampleCollectionMethod/MethodIdentifier,SampleCollectionMethod/MethodIdentifierContext,SampleCollectionMethod/MethodName,SampleCollectionEquipmentName,ResultDetectionConditionText,CharacteristicName,ResultSampleFractionText,ResultMeasureValue,ResultMeasure/MeasureUnitCode,MeasureQualifierCode,ResultStatusIdentifier,StatisticalBaseCode,ResultValueTypeName,ResultWeightBasisText,ResultTimeBasisText,ResultTemperatureBasisText,ResultParticleSizeBasisText,PrecisionValue,ResultCommentText,USGSPCode,ResultDepthHeightMeasure/MeasureValue,ResultDepthHeightMeasure/MeasureUnitCode,ResultDepthAltitudeReferencePointText,SubjectTaxonomicName,SampleTissueAnatomyName,ResultAnalyticalMethod/MethodIdentifier,ResultAnalyticalMethod/MethodIdentifierContext,ResultAnalyticalMethod/MethodName,MethodDescriptionText,LaboratoryName,AnalysisStartDate,ResultLaboratoryCommentText,DetectionQuantitationLimitTypeName,DetectionQuantitationLimitMeasure/MeasureValue,DetectionQuantitationLimitMeasure/MeasureUnitCode,PreparationStartDate,ProviderName
2 | WIDNR_WQX,Wisconsin Department of Natural Resources,WIDNR_WQX-45822640,Field Msr/Obs,Water,,2011-05-09,12:20:00,CDT,2011-05-09,12:30:00,CDT,,,,,,,,CBSM_URSS_Madison,WIDNR_WQX,WIDNR_WQX-10032762,,,,,,,,,,Specific conductance,,1000,uS/cm,,Final,,Actual,,,,,,,,,,,,,,,,,,,,,,,,STORET
3 | WIDNR_WQX,Wisconsin Department of Natural Resources,WIDNR_WQX-49176537,Field Msr/Obs,Water,,2011-08-08,13:55:00,CDT,2011-08-08,14:05:00,CDT,,,,,,,,CBSM_URSS_Madison,WIDNR_WQX,WIDNR_WQX-10032762,,,,,,,,,,Specific conductance,,471,uS/cm,,Final,,Actual,,,,,,,,,,,,,,,,,,,,,,,,STORET
4 | WIDNR_WQX,Wisconsin Department of Natural Resources,WIDNR_WQX-47619240,Field Msr/Obs,Water,,2011-07-06,08:35:00,CDT,2011-07-06,08:45:00,CDT,,,,,,,,CBSM_URSS_Madison,WIDNR_WQX,WIDNR_WQX-10032762,,,,,,,,,,Specific conductance,,860,uS/cm,,Final,,Actual,,,,,,,,,,,,,,,,,,,,,,,,STORET
5 | WIDNR_WQX,Wisconsin Department of Natural Resources,WIDNR_WQX-50689894,Field Msr/Obs,Water,,2011-09-11,16:10:00,CDT,2011-09-11,16:20:00,CDT,,,,,,,,CBSM_URSS_Madison,WIDNR_WQX,WIDNR_WQX-10032762,,,,,,,,,,Specific conductance,,750,uS/cm,,Final,,Actual,,,,,,,,,,,,,,,,,,,,,,,,STORET
6 | WIDNR_WQX,Wisconsin Department of Natural Resources,WIDNR_WQX-46495059,Field Msr/Obs,Water,,2011-06-05,14:45:00,CDT,2011-06-05,14:55:00,CDT,,,,,,,,CBSM_URSS_Madison,WIDNR_WQX,WIDNR_WQX-10032762,,,,,,,,,,Specific conductance,,800,uS/cm,,Final,,Actual,,,,,,,,,,,,,,,,,,,,,,,,STORET


--------------------------------------------------------------------------------
/tests/nadp_test.py:
--------------------------------------------------------------------------------
 1 | """Tests for NADP functions."""
 2 | 
 3 | import os
 4 | 
 5 | import dataretrieval.nadp as nadp
 6 | 
 7 | 
 8 | class TestMDNmap:
 9 |     """Testing the mercury deposition network map functions.
10 | 
11 |     This set of tests actually queries the services themselves to ensure there
12 |     have been no upstream changes to paths or file names. Tests created
13 |     because there was an upstream change to paths that broke ``dataretrieval``
14 |     functionality.
15 |     """
16 | 
17 |     def test_get_annual_MDN_map_zip(self, tmp_path):
18 |         """Test the get_annual_MDN_map function zip return."""
19 |         z_path = nadp.get_annual_MDN_map(
20 |             measurement_type="conc", year="2010", path=tmp_path
21 |         )
22 |         exp_path = os.path.join(tmp_path, "Hg_conc_2010.zip")
23 |         # assert path matches expectation
24 |         assert z_path == str(exp_path)
25 |         # assert unpacked zip exists as a directory
26 |         assert os.path.exists(exp_path[:-4])
27 |         # assert tif exists in directory
28 |         assert os.path.exists(os.path.join(z_path[:-4], "conc_Hg_2010.tif"))
29 | 
30 | 
31 | class TestNTNmap:
32 |     """Testing the national trends network map functions."""
33 | 
34 |     def test_get_annual_NTN_map_zip(self, tmp_path):
35 |         """Test the get_annual_NTN_map function zip return."""
36 |         z_path = nadp.get_annual_NTN_map(
37 |             measurement_type="Precip", year="2015", path=tmp_path
38 |         )
39 |         exp_path = os.path.join(tmp_path, "Precip_2015.zip")
40 |         # assert path matches expectation
41 |         assert z_path == str(exp_path)
42 |         # assert unpacked zip exists as a directory
43 |         assert os.path.exists(exp_path[:-4])
44 |         # assert tif exists in directory
45 |         assert os.path.exists(os.path.join(z_path[:-4], "Precip_2015.tif"))
46 | 


--------------------------------------------------------------------------------
/tests/samples_test.py:
--------------------------------------------------------------------------------
  1 | import datetime
  2 | 
  3 | import pytest
  4 | from pandas import DataFrame
  5 | 
  6 | from dataretrieval.samples import (
  7 |     _check_profiles,
  8 |     get_usgs_samples
  9 | )
 10 | 
 11 | def mock_request(requests_mock, request_url, file_path):
 12 |     """Mock request code"""
 13 |     with open(file_path) as text:
 14 |         requests_mock.get(
 15 |             request_url, text=text.read(), headers={"mock_header": "value"}
 16 |         )
 17 | 
 18 | def test_mock_get_usgs_samples(requests_mock):
 19 |     """Tests USGS Samples query"""
 20 |     request_url = (
 21 |         "https://api.waterdata.usgs.gov/samples-data/results/fullphyschem?"
 22 |         "activityMediaName=Water&activityStartDateLower=2020-01-01"
 23 |         "&activityStartDateUpper=2024-12-31&monitoringLocationIdentifier=USGS-05406500&mimeType=text%2Fcsv"
 24 |     )
 25 |     response_file_path = "data/samples_results.txt"
 26 |     mock_request(requests_mock, request_url, response_file_path)
 27 |     df, md = get_usgs_samples(
 28 |         service="results",
 29 |         profile="fullphyschem",
 30 |         activityMediaName="Water",
 31 |         activityStartDateLower="2020-01-01",
 32 |         activityStartDateUpper="2024-12-31",
 33 |         monitoringLocationIdentifier="USGS-05406500",
 34 |         )
 35 |     assert type(df) is DataFrame
 36 |     assert df.size == 12127
 37 |     assert md.url == request_url
 38 |     assert isinstance(md.query_time, datetime.timedelta)
 39 |     assert md.header == {"mock_header": "value"}
 40 |     assert md.comment is None
 41 | 
 42 | def test_check_profiles():
 43 |     """Tests that correct errors are raised for invalid profiles."""
 44 |     with pytest.raises(ValueError):
 45 |         _check_profiles(service="foo", profile="bar")
 46 |     with pytest.raises(ValueError):
 47 |         _check_profiles(service="results", profile="foo")
 48 | 
 49 | def test_samples_results():
 50 |     """Test results call for proper columns"""
 51 |     df,_ = get_usgs_samples(
 52 |         service="results",
 53 |         profile="narrow",
 54 |         monitoringLocationIdentifier="USGS-05288705",
 55 |         activityStartDateLower="2024-10-01",
 56 |         activityStartDateUpper="2025-04-24"
 57 |         )
 58 |     assert all(col in df.columns for col in ["Location_Identifier", "Activity_ActivityIdentifier"])
 59 |     assert len(df) > 0
 60 | 
 61 | def test_samples_activity():
 62 |     """Test activity call for proper columns"""
 63 |     df,_ = get_usgs_samples(
 64 |         service="activities",
 65 |         profile="sampact",
 66 |         monitoringLocationIdentifier="USGS-06719505"
 67 |         )
 68 |     assert len(df) > 0
 69 |     assert len(df.columns) == 95
 70 |     assert "Location_HUCTwelveDigitCode" in df.columns
 71 | 
 72 | def test_samples_locations():
 73 |     """Test locations call for proper columns"""
 74 |     df,_ = get_usgs_samples(
 75 |         service="locations",
 76 |         profile="site",
 77 |         stateFips="US:55",
 78 |         activityStartDateLower="2024-10-01",
 79 |         activityStartDateUpper="2025-04-24",
 80 |         usgsPCode="00010"
 81 |         )
 82 |     assert all(col in df.columns for col in ["Location_Identifier", "Location_Latitude"])
 83 |     assert len(df) > 0
 84 | 
 85 | def test_samples_projects():
 86 |     """Test projects call for proper columns"""
 87 |     df,_ = get_usgs_samples(
 88 |         service="projects",
 89 |         profile="project",
 90 |         stateFips="US:15",
 91 |         activityStartDateLower="2024-10-01",
 92 |         activityStartDateUpper="2025-04-24"
 93 |         )
 94 |     assert all(col in df.columns for col in ["Org_Identifier", "Project_Identifier"])
 95 |     assert len(df) > 0
 96 | 
 97 | def test_samples_organizations():
 98 |     """Test organizations call for proper columns"""
 99 |     df,_ = get_usgs_samples(
100 |         service="organizations",
101 |         profile="count",
102 |         stateFips="US:01"
103 |         )
104 |     assert len(df) == 1
105 |     assert df.size == 3
106 | 


--------------------------------------------------------------------------------
/tests/utils_test.py:
--------------------------------------------------------------------------------
 1 | """Unit tests for functions in utils.py"""
 2 | 
 3 | import unittest.mock as mock
 4 | 
 5 | import pytest
 6 | 
 7 | import dataretrieval.nwis as nwis
 8 | from dataretrieval import utils
 9 | 
10 | 
11 | class Test_query:
12 |     """Tests of the query function."""
13 | 
14 |     def test_url_too_long(self):
15 |         """Test to confirm more useful error when query URL too long.
16 | 
17 |         Test based on GitHub Issue #64
18 |         """
19 |         # all sites in MD
20 |         sites, _ = nwis.what_sites(stateCd="MD")
21 |         # expected error message
22 |         _msg = "Request URL too long. Modify your query to use fewer sites. API response reason: Request-URI Too Long"
23 |         # raise error by trying to query them all, so URL is way too long
24 |         with pytest.raises(ValueError, match=_msg):
25 |             nwis.get_iv(sites=sites.site_no.values.tolist())
26 | 
27 |     def test_header(self):
28 |         """Test checking header info with user-agent is part of query."""
29 |         url = "https://waterservices.usgs.gov/nwis/dv"
30 |         payload = {
31 |             "format": "json",
32 |             "startDT": "2010-10-01",
33 |             "endDT": "2010-10-10",
34 |             "sites": "01646500",
35 |             "multi_index": True,
36 |         }
37 |         response = utils.query(url, payload)
38 |         assert response.status_code == 200  # GET was successful
39 |         assert "user-agent" in response.request.headers
40 | 
41 | 
42 | class Test_BaseMetadata:
43 |     """Tests of BaseMetadata"""
44 | 
45 |     def test_init_with_response(self):
46 |         response = mock.MagicMock()
47 |         md = utils.BaseMetadata(response)
48 | 
49 |         ## Test parameters initialized from the API response
50 |         assert md.url is not None
51 |         assert md.query_time is not None
52 |         assert md.header is not None
53 | 
54 |         ## Test NotImplementedError parameters
55 |         with pytest.raises(NotImplementedError):
56 |             md.site_info
57 |         with pytest.raises(NotImplementedError):
58 |             md.variable_info
59 | 


--------------------------------------------------------------------------------
/tests/wqp_test.py:
--------------------------------------------------------------------------------
  1 | import datetime
  2 | 
  3 | import pytest
  4 | from pandas import DataFrame
  5 | 
  6 | from dataretrieval.wqp import (
  7 |     _check_kwargs,
  8 |     get_results,
  9 |     what_activities,
 10 |     what_activity_metrics,
 11 |     what_detection_limits,
 12 |     what_habitat_metrics,
 13 |     what_organizations,
 14 |     what_project_weights,
 15 |     what_projects,
 16 |     what_sites,
 17 | )
 18 | 
 19 | 
 20 | def test_get_results(requests_mock):
 21 |     """Tests water quality portal ratings query"""
 22 |     request_url = (
 23 |         "https://www.waterqualitydata.us/data/Result/Search?siteid=WIDNR_WQX-10032762"
 24 |         "&characteristicName=Specific+conductance&startDateLo=05-01-2011&startDateHi=09-30-2011"
 25 |         "&mimeType=csv"
 26 |     )
 27 |     response_file_path = "data/wqp_results.txt"
 28 |     mock_request(requests_mock, request_url, response_file_path)
 29 |     df, md = get_results(
 30 |         siteid="WIDNR_WQX-10032762",
 31 |         characteristicName="Specific conductance",
 32 |         startDateLo="05-01-2011",
 33 |         startDateHi="09-30-2011",
 34 |     )
 35 |     assert type(df) is DataFrame
 36 |     assert df.size == 315
 37 |     assert md.url == request_url
 38 |     assert isinstance(md.query_time, datetime.timedelta)
 39 |     assert md.header == {"mock_header": "value"}
 40 |     assert md.comment is None
 41 | 
 42 | 
 43 | def test_get_results_WQX3(requests_mock):
 44 |     """Tests water quality portal results query with new WQX3.0 profile"""
 45 |     request_url = (
 46 |         "https://www.waterqualitydata.us/wqx3/Result/search?siteid=WIDNR_WQX-10032762"
 47 |         "&characteristicName=Specific+conductance&startDateLo=05-01-2011&startDateHi=09-30-2011"
 48 |         "&mimeType=csv"
 49 |         "&dataProfile=fullPhysChem"
 50 |     )
 51 |     response_file_path = "data/wqp3_results.txt"
 52 |     mock_request(requests_mock, request_url, response_file_path)
 53 |     df, md = get_results(
 54 |         legacy=False,
 55 |         siteid="WIDNR_WQX-10032762",
 56 |         characteristicName="Specific conductance",
 57 |         startDateLo="05-01-2011",
 58 |         startDateHi="09-30-2011",
 59 |     )
 60 |     assert type(df) is DataFrame
 61 |     assert df.size == 900
 62 |     assert md.url == request_url
 63 |     assert isinstance(md.query_time, datetime.timedelta)
 64 |     assert md.header == {"mock_header": "value"}
 65 |     assert md.comment is None
 66 | 
 67 | 
 68 | def test_what_sites(requests_mock):
 69 |     """Tests Water quality portal sites query"""
 70 |     request_url = (
 71 |         "https://www.waterqualitydata.us/data/Station/Search?statecode=US%3A34&characteristicName=Chloride"
 72 |         "&mimeType=csv"
 73 |     )
 74 |     response_file_path = "data/wqp_sites.txt"
 75 |     mock_request(requests_mock, request_url, response_file_path)
 76 |     df, md = what_sites(statecode="US:34", characteristicName="Chloride")
 77 |     assert type(df) is DataFrame
 78 |     assert df.size == 239868
 79 |     assert md.url == request_url
 80 |     assert isinstance(md.query_time, datetime.timedelta)
 81 |     assert md.header == {"mock_header": "value"}
 82 |     assert md.comment is None
 83 | 
 84 | 
 85 | def test_what_organizations(requests_mock):
 86 |     """Tests Water quality portal organizations query"""
 87 |     request_url = (
 88 |         "https://www.waterqualitydata.us/data/Organization/Search?statecode=US%3A34&characteristicName=Chloride"
 89 |         "&mimeType=csv"
 90 |     )
 91 |     response_file_path = "data/wqp_organizations.txt"
 92 |     mock_request(requests_mock, request_url, response_file_path)
 93 |     df, md = what_organizations(statecode="US:34", characteristicName="Chloride")
 94 |     assert type(df) is DataFrame
 95 |     assert df.size == 576
 96 |     assert md.url == request_url
 97 |     assert isinstance(md.query_time, datetime.timedelta)
 98 |     assert md.header == {"mock_header": "value"}
 99 |     assert md.comment is None
100 | 
101 | 
102 | def test_what_projects(requests_mock):
103 |     """Tests Water quality portal projects query"""
104 |     request_url = (
105 |         "https://www.waterqualitydata.us/data/Project/Search?statecode=US%3A34&characteristicName=Chloride"
106 |         "&mimeType=csv"
107 |     )
108 |     response_file_path = "data/wqp_projects.txt"
109 |     mock_request(requests_mock, request_url, response_file_path)
110 |     df, md = what_projects(statecode="US:34", characteristicName="Chloride")
111 |     assert type(df) is DataFrame
112 |     assert df.size == 530
113 |     assert md.url == request_url
114 |     assert isinstance(md.query_time, datetime.timedelta)
115 |     assert md.header == {"mock_header": "value"}
116 |     assert md.comment is None
117 | 
118 | 
119 | def test_what_activities(requests_mock):
120 |     """Tests Water quality portal activities query"""
121 |     request_url = (
122 |         "https://www.waterqualitydata.us/data/Activity/Search?statecode=US%3A34&characteristicName=Chloride"
123 |         "&mimeType=csv"
124 |     )
125 |     response_file_path = "data/wqp_activities.txt"
126 |     mock_request(requests_mock, request_url, response_file_path)
127 |     df, md = what_activities(statecode="US:34", characteristicName="Chloride")
128 |     assert type(df) is DataFrame
129 |     assert df.size == 5087443
130 |     assert md.url == request_url
131 |     assert isinstance(md.query_time, datetime.timedelta)
132 |     assert md.header == {"mock_header": "value"}
133 |     assert md.comment is None
134 | 
135 | 
136 | def test_what_detection_limits(requests_mock):
137 |     """Tests Water quality portal detection limits query"""
138 |     request_url = (
139 |         "https://www.waterqualitydata.us/data/ResultDetectionQuantitationLimit/Search?statecode=US%3A34&characteristicName=Chloride"
140 |         "&mimeType=csv"
141 |     )
142 |     response_file_path = "data/wqp_detection_limits.txt"
143 |     mock_request(requests_mock, request_url, response_file_path)
144 |     df, md = what_detection_limits(statecode="US:34", characteristicName="Chloride")
145 |     assert type(df) is DataFrame
146 |     assert df.size == 98770
147 |     assert md.url == request_url
148 |     assert isinstance(md.query_time, datetime.timedelta)
149 |     assert md.header == {"mock_header": "value"}
150 |     assert md.comment is None
151 | 
152 | 
153 | def test_what_habitat_metrics(requests_mock):
154 |     """Tests Water quality portal habitat metrics query"""
155 |     request_url = (
156 |         "https://www.waterqualitydata.us/data/BiologicalMetric/Search?statecode=US%3A34&characteristicName=Chloride"
157 |         "&mimeType=csv"
158 |     )
159 |     response_file_path = "data/wqp_habitat_metrics.txt"
160 |     mock_request(requests_mock, request_url, response_file_path)
161 |     df, md = what_habitat_metrics(statecode="US:34", characteristicName="Chloride")
162 |     assert type(df) is DataFrame
163 |     assert df.size == 48114
164 |     assert md.url == request_url
165 |     assert isinstance(md.query_time, datetime.timedelta)
166 |     assert md.header == {"mock_header": "value"}
167 |     assert md.comment is None
168 | 
169 | 
170 | def test_what_project_weights(requests_mock):
171 |     """Tests Water quality portal project weights query"""
172 |     request_url = (
173 |         "https://www.waterqualitydata.us/data/ProjectMonitoringLocationWeighting/Search?statecode=US%3A34&characteristicName=Chloride"
174 |         "&mimeType=csv"
175 |     )
176 |     response_file_path = "data/wqp_project_weights.txt"
177 |     mock_request(requests_mock, request_url, response_file_path)
178 |     df, md = what_project_weights(statecode="US:34", characteristicName="Chloride")
179 |     assert type(df) is DataFrame
180 |     assert df.size == 33098
181 |     assert md.url == request_url
182 |     assert isinstance(md.query_time, datetime.timedelta)
183 |     assert md.header == {"mock_header": "value"}
184 |     assert md.comment is None
185 | 
186 | 
187 | def test_what_activity_metrics(requests_mock):
188 |     """Tests Water quality portal activity metrics query"""
189 |     request_url = (
190 |         "https://www.waterqualitydata.us/data/ActivityMetric/Search?statecode=US%3A34&characteristicName=Chloride"
191 |         "&mimeType=csv"
192 |     )
193 |     response_file_path = "data/wqp_activity_metrics.txt"
194 |     mock_request(requests_mock, request_url, response_file_path)
195 |     df, md = what_activity_metrics(statecode="US:34", characteristicName="Chloride")
196 |     assert type(df) is DataFrame
197 |     assert df.size == 378
198 |     assert md.url == request_url
199 |     assert isinstance(md.query_time, datetime.timedelta)
200 |     assert md.header == {"mock_header": "value"}
201 |     assert md.comment is None
202 | 
203 | 
204 | def mock_request(requests_mock, request_url, file_path):
205 |     with open(file_path) as text:
206 |         requests_mock.get(
207 |             request_url, text=text.read(), headers={"mock_header": "value"}
208 |         )
209 | 
210 | 
211 | def test_check_kwargs():
212 |     """Tests that correct errors are raised for invalid mimetypes."""
213 |     kwargs = {"mimeType": "geojson"}
214 |     with pytest.raises(NotImplementedError):
215 |         kwargs = _check_kwargs(kwargs)
216 |     kwargs = {"mimeType": "foo"}
217 |     with pytest.raises(ValueError):
218 |         kwargs = _check_kwargs(kwargs)
219 | 


--------------------------------------------------------------------------------