├── .flake8 ├── .github └── workflows │ ├── python-package.yml │ ├── python-publish.yml │ └── sphinx-docs.yml ├── .gitignore ├── .gitlab ├── issue_templates │ └── reviewer_checklist.md └── merge_request_templates │ └── reviewer_checklist.md ├── .pre-commit-config.yaml ├── .prettierrc.toml ├── CONTRIBUTING.md ├── DISCLAIMER.md ├── LICENSE.md ├── README.md ├── code.json ├── dataretrieval ├── __init__.py ├── codes │ ├── __init__.py │ ├── states.py │ └── timezones.py ├── nadp.py ├── nldi.py ├── nwis.py ├── samples.py ├── streamstats.py ├── utils.py ├── waterwatch.py └── wqp.py ├── demos ├── NWIS_demo_1.ipynb ├── R Python Vignette equivalents.ipynb ├── datasets │ └── peak_discharge_trends.csv ├── hydroshare │ ├── USGS_dataretrieval_DailyValues_Examples.ipynb │ ├── USGS_dataretrieval_GroundwaterLevels_Examples.ipynb │ ├── USGS_dataretrieval_Measurements_Examples.ipynb │ ├── USGS_dataretrieval_NLDI_Examples.ipynb │ ├── USGS_dataretrieval_ParameterCodes_Examples.ipynb │ ├── USGS_dataretrieval_Peaks_Examples.ipynb │ ├── USGS_dataretrieval_Ratings_Examples.ipynb │ ├── USGS_dataretrieval_SiteInfo_Examples.ipynb │ ├── USGS_dataretrieval_SiteInventory_Examples.ipynb │ ├── USGS_dataretrieval_Statistics_Examples.ipynb │ ├── USGS_dataretrieval_UnitValues_Examples.ipynb │ ├── USGS_dataretrieval_WaterSamples_Examples.ipynb │ └── USGS_dataretrieval_WaterUse_Examples.ipynb └── nwqn_data_pull │ ├── Dockerfile_dataretrieval │ ├── README.md │ ├── lithops.yaml │ ├── requirements.txt │ ├── retrieve_nwqn_samples.py │ └── retrieve_nwqn_streamflow.py ├── docs ├── Makefile └── source │ ├── .nojekyll │ ├── conf.py │ ├── examples │ ├── USGS_dataretrieval_DailyValues_Examples.nblink │ ├── USGS_dataretrieval_GroundwaterLevels_Examples.nblink │ ├── USGS_dataretrieval_Measurements_Examples.nblink │ ├── USGS_dataretrieval_ParameterCodes_Examples.nblink │ ├── USGS_dataretrieval_Peaks_Examples.nblink │ ├── USGS_dataretrieval_Ratings_Examples.nblink │ ├── USGS_dataretrieval_SiteInfo_Examples.nblink │ ├── USGS_dataretrieval_SiteInventory_Examples.nblink │ ├── USGS_dataretrieval_Statistics_Examples.nblink │ ├── USGS_dataretrieval_UnitValues_Examples.nblink │ ├── USGS_dataretrieval_WaterSamples_Examples.nblink │ ├── USGS_dataretrieval_WaterUse_Examples.nblink │ ├── datasets │ │ └── peak_discharge_trends.csv │ ├── index.rst │ ├── nwisdemo01.nblink │ ├── readme_examples.rst │ ├── rvignettes.nblink │ └── siteinfo_examples.rst │ ├── index.rst │ ├── meta │ ├── contributing.rst │ ├── installing.rst │ └── license.rst │ ├── reference │ ├── index.rst │ ├── nadp.rst │ ├── nwis.rst │ ├── samples.rst │ ├── streamstats.rst │ ├── utils.rst │ └── wqp.rst │ └── userguide │ ├── dataportals.rst │ ├── index.rst │ └── timeconventions.rst ├── pyproject.toml ├── requirements-dev.txt ├── setup.py └── tests ├── __init__.py ├── data ├── nldi_get_basin.json ├── nldi_get_features_by_comid.json ├── nldi_get_features_by_feature_source_with_nav_mode.json ├── nldi_get_features_by_feature_source_without_nav_mode.json ├── nldi_get_features_by_lat_long.json ├── nldi_get_flowlines.json ├── nldi_get_flowlines_by_comid.json ├── nwis_sites.txt ├── samples_results.txt ├── water_use_allegheny.txt ├── water_use_national.txt ├── waterdata_gwlevels.txt ├── waterdata_measurements.txt ├── waterdata_pmcodes.txt ├── waterdata_qwdata.txt ├── waterservices_dv.txt ├── waterservices_iv.txt ├── waterservices_peaks.txt ├── waterservices_ratings.txt ├── waterservices_site.txt ├── waterservices_stats.txt ├── wqp3_results.txt ├── wqp_activities.txt ├── wqp_activity_metrics.txt ├── wqp_detection_limits.txt ├── wqp_habitat_metrics.txt ├── wqp_organizations.txt ├── wqp_project_weights.txt ├── wqp_projects.txt ├── wqp_results.txt └── wqp_sites.txt ├── nadp_test.py ├── nldi_test.py ├── nwis_test.py ├── samples_test.py ├── utils_test.py ├── waterservices_test.py └── wqp_test.py /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | max-line-length = 88 3 | extend-ignore = E203, E704 4 | -------------------------------------------------------------------------------- /.github/workflows/python-package.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions 3 | 4 | name: Python package 5 | 6 | on: 7 | push: 8 | branches: ['main'] 9 | pull_request: 10 | branches: ['main'] 11 | 12 | jobs: 13 | build: 14 | runs-on: ${{ matrix.os }} 15 | strategy: 16 | fail-fast: false 17 | matrix: 18 | os: [ubuntu-latest, windows-latest] 19 | python-version: [3.8, 3.9, '3.10', 3.11, 3.12] 20 | 21 | steps: 22 | - uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 23 | - name: Set up Python ${{ matrix.python-version }} 24 | uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 25 | with: 26 | python-version: ${{ matrix.python-version }} 27 | - name: Install dependencies 28 | run: | 29 | python -m pip install --upgrade pip 30 | pip install .[test,nldi] 31 | - name: Lint with flake8 32 | run: | 33 | # stop the build if there are Python syntax errors or undefined names 34 | flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics 35 | # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide 36 | flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics 37 | - name: Test with pytest and report coverage 38 | run: | 39 | cd tests 40 | coverage run -m pytest 41 | coverage report -m 42 | cd .. 43 | -------------------------------------------------------------------------------- /.github/workflows/python-publish.yml: -------------------------------------------------------------------------------- 1 | # This workflow will upload a Python Package using Twine when a release is created 2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python#publishing-to-package-registries 3 | 4 | # This workflow uses actions that are not certified by GitHub. 5 | # They are provided by a third-party and are governed by 6 | # separate terms of service, privacy policy, and support 7 | # documentation. 8 | 9 | name: Upload Python Package 10 | 11 | on: 12 | release: 13 | types: [published] 14 | 15 | permissions: 16 | contents: read 17 | 18 | jobs: 19 | deploy: 20 | 21 | runs-on: ubuntu-latest 22 | 23 | steps: 24 | - uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 25 | - name: Set up Python 26 | uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 27 | with: 28 | python-version: '3.x' 29 | - name: Install dependencies 30 | run: | 31 | python -m pip install --upgrade pip 32 | pip install build 33 | pip install setuptools setuptools-scm wheel twine check-manifest 34 | - name: Build package 35 | run: python -m build 36 | - name: Publish package 37 | uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29 38 | with: 39 | user: __token__ 40 | password: ${{ secrets.PYPI_API_TOKEN }} 41 | -------------------------------------------------------------------------------- /.github/workflows/sphinx-docs.yml: -------------------------------------------------------------------------------- 1 | # This workflow builds the sphinx docs 2 | 3 | name: Sphinx Docs Build 4 | 5 | on: 6 | push: 7 | pull_request: 8 | 9 | jobs: 10 | docs: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - name: Checkout 14 | uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 15 | with: 16 | persist-credentials: false 17 | - name: Install dataretrieval, dependencies, and Sphinx then build docs 18 | shell: bash -l {0} 19 | run: | 20 | python -m pip install --upgrade pip 21 | pip install .[doc] 22 | ipython kernel install --name "python3" --user 23 | sudo apt update -y && sudo apt install -y latexmk texlive-latex-recommended texlive-latex-extra texlive-fonts-recommended dvipng pandoc 24 | (cd docs && make docs) 25 | (cd docs && make html) 26 | - name: Debug 27 | run: | 28 | echo $REF 29 | echo $EVENT_NAME 30 | echo ${{ github.event_name == 'push' }} 31 | echo ${{ github.ref == 'refs/heads/main' }} 32 | echo ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' }} 33 | - name: Deploy to GitHub Pages 34 | uses: JamesIves/github-pages-deploy-action@881db5376404c5c8d621010bcbec0310b58d5e29 35 | if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' }} 36 | with: 37 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 38 | EVENT_NAME: ${{ github.event_name }} 39 | REF: ${{ github.ref }} 40 | BRANCH: gh-pages 41 | FOLDER: docs/build/html 42 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.swp 2 | __pycachr__ 3 | 4 | # Byte-compiled / optimized / DLL files 5 | **/__pycache__/ 6 | *.py[cod] 7 | *$py.class 8 | .pytest_cache/ 9 | 10 | # C extensions 11 | *.so 12 | 13 | # Distribution / packaging 14 | *_version.py 15 | .Python 16 | env/ 17 | build/ 18 | develop-eggs/ 19 | dist/ 20 | downloads/ 21 | eggs/ 22 | .eggs/ 23 | lib/ 24 | lib64/ 25 | parts/ 26 | sdist/ 27 | var/ 28 | wheels/ 29 | *.egg-info/ 30 | .installed.cfg 31 | .idea/ 32 | *.egg 33 | .miniconda 34 | # PyInstaller 35 | # Usually these files are written by a python script from a template 36 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 37 | *.manifest 38 | *.spec 39 | 40 | # Installer logs 41 | pip-log.txt 42 | pip-delete-this-directory.txt 43 | 44 | # Pipenv 45 | Pipfile 46 | Pipfile.lock 47 | 48 | # Unit test / coverage reports 49 | htmlcov/ 50 | .tox/ 51 | .coverage 52 | .coverage.* 53 | .cache 54 | nosetests.xml 55 | coverage.xml 56 | *.cover 57 | .hypothesis/ 58 | 59 | # Translations 60 | *.mo 61 | *.pot 62 | 63 | # Django stuff: 64 | *.log 65 | local_settings.py 66 | 67 | # Flask stuff: 68 | instance/ 69 | .webassets-cache 70 | 71 | # Scrapy stuff: 72 | .scrapy 73 | 74 | # Sphinx documentation 75 | docs/_build/ 76 | 77 | # PyBuilder 78 | target/ 79 | 80 | # Jupyter Notebook 81 | .ipynb_checkpoints 82 | 83 | # pyenv 84 | .python-version 85 | 86 | # celery beat schedule file 87 | celerybeat-schedule 88 | 89 | # SageMath parsed files 90 | *.sage.py 91 | 92 | # dotenv 93 | .env 94 | 95 | # virtualenv 96 | .venv 97 | venv/ 98 | ENV/ 99 | 100 | # Spyder project settings 101 | .spyderproject 102 | .spyproject 103 | 104 | # Rope project settings 105 | .ropeproject 106 | 107 | # mkdocs documentation 108 | /site 109 | 110 | # mypy 111 | .mypy_cache/ 112 | 113 | # macOS 114 | *.DS_Store -------------------------------------------------------------------------------- /.gitlab/issue_templates/reviewer_checklist.md: -------------------------------------------------------------------------------- 1 | ## Review checklist for @dataretrieval 2 | 3 | Background information for reviewers [here](https://www.usgs.gov/products/software/software-management/types-software-review) 4 | 5 | *Please check off boxes as applicable, and elaborate in comments below.* 6 | 7 | - Code location https://code.usgs.gov/cmwsc/shwa/dataretrieval 8 | - author @[gitlab handle] 9 | 10 | ### Conflict of interest 11 | 12 | - [ ] I confirm that I have no COIs with reviewing this work, meaning that there is no relationship with the product or the product's authors or affiliated institutions that could influence or be perceived to influence the outcome of the review (if you are unsure whether you have a conflict, please speak to your supervisor _before_ starting your review). 13 | 14 | ### Adherence to Fundamental Science Practices 15 | 16 | - [ ] I confirm that I read and will adhere to the [Federal Source Code Policy for Scientific Software](https://www.usgs.gov/survey-manual/im-osqi-2019-01-review-and-approval-scientific-software-release) and relevant federal guidelines for approved software release as outlined in [SM502.1](https://code.usgs.gov/cmwsc/shwa/dataretrieval) and [SM502.4](https://www.usgs.gov/survey-manual/5024-fundamental-science-practices-review-approval-and-release-information-products). 17 | 18 | ### Security Review 19 | 20 | - [ ] No proprietary code is included 21 | - [ ] No Personally Identifiable Information (PII) is included 22 | - [ ] No other sensitive information such as data base passwords are included 23 | 24 | ### General checks 25 | 26 | - [ ] **Repository:** Is the source code for this software available? 27 | - [ ] **License:** Does the repository contain a plain-text LICENSE file? 28 | - [ ] **Disclaimer:** Does the repository have the USGS-required provisional Disclaimer? 29 | - [ ] **Contribution and authorship:** Has the submitting author made major contributions to the software? Does the full list of software authors seem appropriate and complete? 30 | - [ ] Does the repository have a code.json file? 31 | 32 | ### Documentation 33 | 34 | - [ ] **A statement of need**: Do the authors clearly state what problems the software is designed to solve and who the target audience is? 35 | - [ ] **Installation instructions:** Is there a clearly-stated list of dependencies? Ideally these should be handled with an automated package management solution. 36 | - [ ] **Example usage:** Do the authors include examples of how to use the software (ideally to solve real-world analysis problems)? 37 | - [ ] **Functionality documentation:** Is the core functionality of the software documented to a satisfactory level (e.g., API method documentation)? 38 | - [ ] **Automated tests:** Are there automated tests or manual steps described so that the functionality of the software can be verified? 39 | - [ ] **Community guidelines:** Are there clear guidelines for third parties wishing to 1) Contribute to the software 2) Report issues or problems with the software 3) Seek support? This information could be found in the README, CONTRIBUTING, or DESCRIPTION sections of the documentation. 40 | - [ ] **References:** When present, do references in the text use the proper [citation syntax](https://pandoc.org/MANUAL.html#extension-citations)? 41 | 42 | ### Functionality 43 | 44 | - [ ] **Installation:** Does installation succeed as outlined in the documentation? 45 | - [ ] **Functionality:** Have the functional claims of the software been confirmed? 46 | - [ ] **Performance:** If there are any performance claims of the software, have they been confirmed? (If there are no claims, please check off this item.) 47 | - [ ] **Automated tests:** Do unit tests cover essential functions of the software and a reasonable range of inputs and conditions? Do all tests pass when run locally? 48 | - [ ] **Packaging guidelines:** Does the software conform to the applicable packaging guidelines? R packaging guidelines [here](https://devguide.ropensci.org/building.html#building); Python packaging guidelines [here](https://packaging.python.org/en/latest/) 49 | 50 | ### Review Comments 51 | 52 | - Add free text comments here. 53 | 54 | ### Reviewer checklist source statement 55 | 56 | This checklist combines elements of the [rOpenSci](https://devguide.ropensci.org/) review guidelines and the Journal of Open Source Science (JOSS) review [checklist](https://joss.readthedocs.io/en/latest/review_checklist.html): it has been modified for use with USGS software releases. 57 | -------------------------------------------------------------------------------- /.gitlab/merge_request_templates/reviewer_checklist.md: -------------------------------------------------------------------------------- 1 | ## Review checklist for @dataretrieval 2 | 3 | Background information for reviewers [here](https://www.usgs.gov/products/software/software-management/types-software-review) 4 | 5 | *Please check off boxes as applicable, and elaborate in comments below.* 6 | 7 | - Code location https://code.usgs.gov/cmwsc/shwa/dataretrieval 8 | - author @[gitlab handle] 9 | 10 | ### Conflict of interest 11 | 12 | - [ ] I confirm that I have no COIs with reviewing this work, meaning that there is no relationship with the product or the product's authors or affiliated institutions that could influence or be perceived to influence the outcome of the review (if you are unsure whether you have a conflict, please speak to your supervisor _before_ starting your review). 13 | 14 | ### Adherence to Fundamental Science Practices 15 | 16 | - [ ] I confirm that I read and will adhere to the [Federal Source Code Policy for Scientific Software](https://www.usgs.gov/survey-manual/im-osqi-2019-01-review-and-approval-scientific-software-release) and relevant federal guidelines for approved software release as outlined in [SM502.1](https://code.usgs.gov/cmwsc/shwa/dataretrieval) and [SM502.4](https://www.usgs.gov/survey-manual/5024-fundamental-science-practices-review-approval-and-release-information-products). 17 | 18 | ### Security Review 19 | 20 | - [ ] No proprietary code is included 21 | - [ ] No Personally Identifiable Information (PII) is included 22 | - [ ] No other sensitive information such as data base passwords are included 23 | 24 | ### General checks 25 | 26 | - [ ] **Repository:** Is the source code for this software available? 27 | - [ ] **License:** Does the repository contain a plain-text LICENSE file? 28 | - [ ] **Disclaimer:** Does the repository have the USGS-required provisional Disclaimer? 29 | - [ ] **Contribution and authorship:** Has the submitting author made major contributions to the software? Does the full list of software authors seem appropriate and complete? 30 | - [ ] Does the repository have a code.json file? 31 | 32 | ### Documentation 33 | 34 | - [ ] **A statement of need**: Do the authors clearly state what problems the software is designed to solve and who the target audience is? 35 | - [ ] **Installation instructions:** Is there a clearly-stated list of dependencies? Ideally these should be handled with an automated package management solution. 36 | - [ ] **Example usage:** Do the authors include examples of how to use the software (ideally to solve real-world analysis problems)? 37 | - [ ] **Functionality documentation:** Is the core functionality of the software documented to a satisfactory level (e.g., API method documentation)? 38 | - [ ] **Automated tests:** Are there automated tests or manual steps described so that the functionality of the software can be verified? 39 | - [ ] **Community guidelines:** Are there clear guidelines for third parties wishing to 1) Contribute to the software 2) Report issues or problems with the software 3) Seek support? This information could be found in the README, CONTRIBUTING, or DESCRIPTION sections of the documentation. 40 | - [ ] **References:** When present, do references in the text use the proper [citation syntax](https://pandoc.org/MANUAL.html#extension-citations)? 41 | 42 | ### Functionality 43 | 44 | - [ ] **Installation:** Does installation succeed as outlined in the documentation? 45 | - [ ] **Functionality:** Have the functional claims of the software been confirmed? 46 | - [ ] **Performance:** If there are any performance claims of the software, have they been confirmed? (If there are no claims, please check off this item.) 47 | - [ ] **Automated tests:** Do unit tests cover essential functions of the software and a reasonable range of inputs and conditions? Do all tests pass when run locally? 48 | - [ ] **Packaging guidelines:** Does the software conform to the applicable packaging guidelines? R packaging guidelines [here](https://devguide.ropensci.org/building.html#building); Python packaging guidelines [here](https://packaging.python.org/en/latest/) 49 | 50 | ### Review Comments 51 | 52 | - Add free text comments here. 53 | 54 | ### Reviewer checklist source statement 55 | 56 | This checklist combines elements of the [rOpenSci](https://devguide.ropensci.org/) review guidelines and the Journal of Open Source Science (JOSS) review [checklist](https://joss.readthedocs.io/en/latest/review_checklist.html): it has been modified for use with USGS software releases. 57 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | ci: 2 | autoupdate_schedule: monthly 3 | autofix_prs: false 4 | 5 | repos: 6 | - repo: https://github.com/pre-commit/pre-commit-hooks 7 | rev: v4.4.0 8 | hooks: 9 | - id: trailing-whitespace 10 | - id: end-of-file-fixer 11 | - id: check-docstring-first 12 | - id: check-json 13 | - id: check-yaml 14 | - id: double-quote-string-fixer 15 | - id: debug-statements 16 | - id: mixed-line-ending 17 | 18 | - repo: https://github.com/asottile/pyupgrade 19 | rev: v3.3.1 20 | hooks: 21 | - id: pyupgrade 22 | args: 23 | - '--py38-plus' 24 | 25 | - repo: https://github.com/psf/black 26 | rev: 23.3.0 27 | hooks: 28 | - id: black 29 | - id: black-jupyter 30 | 31 | - repo: https://github.com/keewis/blackdoc 32 | rev: v0.3.8 33 | hooks: 34 | - id: blackdoc 35 | 36 | - repo: https://github.com/PyCQA/flake8 37 | rev: 6.0.0 38 | hooks: 39 | - id: flake8 40 | 41 | - repo: https://github.com/PyCQA/isort 42 | rev: 5.12.0 43 | hooks: 44 | - id: isort 45 | 46 | - repo: https://github.com/pre-commit/mirrors-prettier 47 | rev: v3.0.0-alpha.6 48 | hooks: 49 | - id: prettier 50 | -------------------------------------------------------------------------------- /.prettierrc.toml: -------------------------------------------------------------------------------- 1 | semi = false 2 | singleQuote = true 3 | -------------------------------------------------------------------------------- /DISCLAIMER.md: -------------------------------------------------------------------------------- 1 | Disclaimer 2 | ========== 3 | 4 | This software is preliminary or provisional and is subject to revision. It is being provided to meet the need for timely best science. The software has not received final approval by the U.S. Geological Survey (USGS). No warranty, expressed or implied, is made by the USGS or the U.S. Government as to the functionality of the software and related material nor shall the fact of release constitute any such warranty. The software is provided on the condition that neither the USGS nor the U.S. Government shall be held liable for any damages resulting from the authorized or unauthorized use of the software. 5 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | License 2 | ======= 3 | 4 | Unless otherwise noted, this project is in the public domain in the United 5 | States because it contains materials that originally came from the United 6 | States Geological Survey, an agency of the United States Department of 7 | Interior. For more information, see the official USGS copyright policy at 8 | https://www.usgs.gov/information-policies-and-instructions/copyrights-and-credits 9 | 10 | Additionally, we waive copyright and related rights in the work 11 | worldwide through the CC0 1.0 Universal public domain dedication. 12 | 13 | 14 | CC0 1.0 Universal Summary 15 | ------------------------- 16 | 17 | This is a human-readable summary of the 18 | [Legal Code (read the full text)][1]. 19 | 20 | 21 | ### No Copyright 22 | 23 | The person who associated a work with this deed has dedicated the work to 24 | the public domain by waiving all of his or her rights to the work worldwide 25 | under copyright law, including all related and neighboring rights, to the 26 | extent allowed by law. 27 | 28 | You can copy, modify, distribute and perform the work, even for commercial 29 | purposes, all without asking permission. 30 | 31 | 32 | ### Other Information 33 | 34 | In no way are the patent or trademark rights of any person affected by CC0, 35 | nor are the rights that other persons may have in the work or in how the 36 | work is used, such as publicity or privacy rights. 37 | 38 | Unless expressly stated otherwise, the person who associated a work with 39 | this deed makes no warranties about the work, and disclaims liability for 40 | all uses of the work, to the fullest extent permitted by applicable law. 41 | When using or citing the work, you should not imply endorsement by the 42 | author or the affirmer. 43 | 44 | 45 | 46 | [1]: https://creativecommons.org/publicdomain/zero/1.0/legalcode 47 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # dataretrieval: Download hydrologic data 2 | 3 | ![PyPI - Version](https://img.shields.io/pypi/v/dataretrieval) 4 | ![Conda Version](https://img.shields.io/conda/v/conda-forge/dataretrieval) 5 | ![Downloads](https://static.pepy.tech/badge/dataretrieval) 6 | 7 | :warning: USGS data availability and format are changing on Water Quality Portal (WQP). Since March 2024, data obtained from WQP legacy profiles will not include new USGS data or recent updates to existing data. 8 | To view the status of changes in data availability and code functionality, visit: https://doi-usgs.github.io/dataRetrieval/articles/Status.html 9 | 10 | :mega: **09/03/2024:** The groundwater levels service has switched endpoints, and `dataretrieval` was updated accordingly in [`v1.0.10`](https://github.com/DOI-USGS/dataretrieval-python/releases/tag/v1.0.10). Older versions using the discontinued endpoint will return 503 errors for `nwis.get_gwlevels` or the `service='gwlevels'` argument. Visit [Water Data For the Nation](https://waterdata.usgs.gov/blog/wdfn-waterservices-2024/) for more information. 11 | 12 | ## What is dataretrieval? 13 | `dataretrieval` was created to simplify the process of loading hydrologic data into the Python environment. 14 | Like the original R version [`dataRetrieval`](https://github.com/DOI-USGS/dataRetrieval), 15 | it is designed to retrieve the major data types of U.S. Geological Survey (USGS) hydrology 16 | data that are available on the Web, as well as data from the Water 17 | Quality Portal (WQP), which currently houses water quality data from the 18 | Environmental Protection Agency (EPA), U.S. Department of Agriculture 19 | (USDA), and USGS. Direct USGS data is obtained from a service called the 20 | National Water Information System (NWIS). 21 | 22 | Note that the python version is not a direct port of the original: it attempts to reproduce the functionality of the R package, 23 | though its organization and interface often differ. 24 | 25 | If there's a hydrologic or environmental data portal that you'd like dataretrieval to 26 | work with, raise it as an [issue](https://github.com/USGS-python/dataretrieval/issues). 27 | 28 | Here's an example using `dataretrieval` to retrieve data from the National Water Information System (NWIS). 29 | 30 | ```python 31 | # first import the functions for downloading data from NWIS 32 | import dataretrieval.nwis as nwis 33 | 34 | # specify the USGS site code for which we want data. 35 | site = '03339000' 36 | 37 | 38 | # get instantaneous values (iv) 39 | df = nwis.get_record(sites=site, service='iv', start='2017-12-31', end='2018-01-01') 40 | 41 | # get water quality samples (qwdata) 42 | df2 = nwis.get_record(sites=site, service='qwdata', start='2017-12-31', end='2018-01-01') 43 | 44 | # get basic info about the site 45 | df3 = nwis.get_record(sites=site, service='site') 46 | ``` 47 | Services available from NWIS include: 48 | - instantaneous values (iv) 49 | - daily values (dv) 50 | - statistics (stat) 51 | - site info (site) 52 | - discharge peaks (peaks) 53 | - discharge measurements (measurements) 54 | * water quality samples (qwdata) 55 | 56 | To access the full functionality available from NWIS web services, nwis.get record appends any additional kwargs into the REST request. For example 57 | ```python 58 | nwis.get_record(sites='03339000', service='dv', start='2017-12-31', parameterCd='00060') 59 | ``` 60 | will download daily data with the parameter code 00060 (discharge). 61 | 62 | ## Accessing the "Internal" NWIS 63 | If you're connected to the USGS network, dataretrieval call pull from the internal (non-public) NWIS interface. 64 | Most dataretrieval functions pass kwargs directly to NWIS's REST API, which provides simple access to internal data; simply specify "access='3'". 65 | For example 66 | ```python 67 | nwis.get_record(sites='05404147',service='iv', start='2021-01-01', end='2021-3-01', access='3') 68 | ``` 69 | 70 | More services and documentation to come! 71 | 72 | ## Quick start 73 | 74 | dataretrieval can be installed using pip: 75 | 76 | $ python3 -m pip install -U dataretrieval 77 | 78 | or conda: 79 | 80 | $ conda install -c conda-forge dataretrieval 81 | 82 | More examples of use are include in [`demos`](https://github.com/USGS-python/dataretrieval/tree/main/demos). 83 | 84 | ## Issue tracker 85 | 86 | Please report any bugs and enhancement ideas using the dataretrieval issue 87 | tracker: 88 | 89 | https://github.com/USGS-python/dataretrieval/issues 90 | 91 | Feel free to also ask questions on the tracker. 92 | 93 | 94 | ## Contributing 95 | 96 | Any help in testing, development, documentation and other tasks is welcome. 97 | For more details, see the file [CONTRIBUTING.md](CONTRIBUTING.md). 98 | 99 | 100 | ## Package Support 101 | The Water Mission Area of the USGS supports the development and maintenance of `dataretrieval` 102 | and most likely further into the future. 103 | Resources are available primarily for maintenance and responding to user questions. 104 | Priorities on the development of new features are determined by the `dataretrieval` development team. 105 | 106 | 107 | ## Acknowledgments 108 | This material is partially based upon work supported by the National Science Foundation (NSF) under award 1931297. 109 | Any opinions, findings, conclusions, or recommendations expressed in this material are those of the authors and do not necessarily reflect the views of the NSF. 110 | 111 | ## Disclaimer 112 | 113 | This software is preliminary or provisional and is subject to revision. 114 | It is being provided to meet the need for timely best science. 115 | The software has not received final approval by the U.S. Geological Survey (USGS). 116 | No warranty, expressed or implied, is made by the USGS or the U.S. Government as to the functionality of the software and related material nor shall the fact of release constitute any such warranty. 117 | The software is provided on the condition that neither the USGS nor the U.S. Government shall be held liable for any damages resulting from the authorized or unauthorized use of the software. 118 | 119 | ## Citation 120 | 121 | Hodson, T.O., Hariharan, J.A., Black, S., and Horsburgh, J.S., 2023, dataretrieval (Python): a Python package for discovering 122 | and retrieving water data available from U.S. federal hydrologic web services: 123 | U.S. Geological Survey software release, 124 | https://doi.org/10.5066/P94I5TX3. 125 | -------------------------------------------------------------------------------- /code.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "name": "dataretrieval", 4 | "organization": "U.S. Geological Survey", 5 | "description": "A Python package for discovering and retrieving water data from U.S. federal hydrologic web services.", 6 | "version": "main", 7 | "status": "Development", 8 | 9 | "permissions": { 10 | "usageType": "openSource", 11 | "licenses": [ 12 | { 13 | "name": "Public Domain, CC0-1.0", 14 | "URL": "https://code.usgs.gov/water/dataretrieval-python/-/raw/main/LICENSE.md" 15 | } 16 | ] 17 | }, 18 | 19 | "homepageURL": "https://code.usgs.gov/water/dataretrieval-python", 20 | "downloadURL": "https://code.usgs.gov/water/dataretrieval-python/-/archive/main/dataretrieval-python-main.zip", 21 | "disclaimerURL": "https://code.usgs.gov/water/dataretrieval-python/-/raw/main/DISCLAIMER.md", 22 | "repositoryURL": "https://code.usgs.gov/water/dataretrieval-python.git", 23 | "vcs": "git", 24 | 25 | "laborHours": 0, 26 | 27 | "tags": [ 28 | "Python", 29 | "USGS" 30 | ], 31 | 32 | "languages": [ 33 | "Python" 34 | ], 35 | 36 | "contact": { 37 | "name": "Timothy 0. Hodson", 38 | "email": "thodson@usgs.gov" 39 | }, 40 | 41 | "date": { 42 | "metadataLastUpdated": "2024-09-17" 43 | } 44 | }, 45 | { 46 | "name": "dataretrieval", 47 | "organization": "U.S. Geological Survey", 48 | "description": "A Python package for discovering and retrieving water data from U.S. federal hydrologic web services.", 49 | "version": "v1.0.2", 50 | "status": "Production", 51 | 52 | "permissions": { 53 | "usageType": "openSource", 54 | "licenses": [ 55 | { 56 | "name": "Public Domain, CC0-1.0", 57 | "URL": "https://code.usgs.gov/water/dataretrieval-python/-/raw/v1.0.2/LICENSE.md" 58 | } 59 | ] 60 | }, 61 | 62 | "homepageURL": "https://code.usgs.gov/water/dataretrieval-python", 63 | "downloadURL": "https://code.usgs.gov/water/dataretrieval-python/-/archive/v1.0.2/dataretrieval-python-v1.0.2.zip", 64 | "disclaimerURL": "https://code.usgs.gov/water/dataretrieval-python/-/raw/v1.0.2/DISCLAIMER.md", 65 | "repositoryURL": "https://code.usgs.gov/water/dataretrieval-python.git", 66 | "vcs": "git", 67 | 68 | "laborHours": 0, 69 | 70 | "tags": [ 71 | "Python", 72 | "USGS" 73 | ], 74 | 75 | "languages": [ 76 | "Python" 77 | ], 78 | 79 | "contact": { 80 | "name": "Timothy 0. Hodson", 81 | "email": "thodson@usgs.gov" 82 | }, 83 | 84 | "date": { 85 | "metadataLastUpdated": "2024-08-30" 86 | } 87 | } 88 | ] 89 | -------------------------------------------------------------------------------- /dataretrieval/__init__.py: -------------------------------------------------------------------------------- 1 | from importlib.metadata import PackageNotFoundError, version 2 | 3 | from dataretrieval.nadp import * 4 | from dataretrieval.nwis import * 5 | from dataretrieval.samples import * 6 | from dataretrieval.streamstats import * 7 | from dataretrieval.utils import * 8 | from dataretrieval.waterwatch import * 9 | from dataretrieval.wqp import * 10 | 11 | try: 12 | __version__ = version("dataretrieval") 13 | except PackageNotFoundError: 14 | __version__ = "version-unknown" 15 | -------------------------------------------------------------------------------- /dataretrieval/codes/__init__.py: -------------------------------------------------------------------------------- 1 | from .states import * 2 | from .timezones import * 3 | -------------------------------------------------------------------------------- /dataretrieval/codes/states.py: -------------------------------------------------------------------------------- 1 | """List of 2-digit state codes with commented full names.""" 2 | 3 | state_codes = { 4 | "Alabama": "al", 5 | "Alaska": "ak", 6 | "Arizona": "az", 7 | "Arkansas": "ar", 8 | "California": "ca", 9 | "Colorado": "co", 10 | "Connecticut": "ct", 11 | "Delaware": "de", 12 | "District of Columbia": "dc", 13 | "Florida": "fl", 14 | "Georgia": "ga", 15 | "Hawaii": "hi", 16 | "Idaho": "id", 17 | "Illinois": "il", 18 | "Indiana": "in", 19 | "Iowa": "ia", 20 | "Kansas": "ks", 21 | "Kentucky": "ky", 22 | "Louisiana": "la", 23 | "Maine": "me", 24 | "Maryland": "md", 25 | "Massachusetts": "ma", 26 | "Michigan": "mi", 27 | "Minnesota": "mn", 28 | "Mississippi": "ms", 29 | "Missouri": "mo", 30 | "Montana": "mt", 31 | "Nebraska": "ne", 32 | "Nevada": "nv", 33 | "New Hampshire": "nh", 34 | "New Jersey": "nj", 35 | "New Mexico": "nm", 36 | "New York": "ny", 37 | "North Carolina": "nc", 38 | "North Dakota": "nd", 39 | "Ohio": "oh", 40 | "Oklahoma": "ok", 41 | "Oregon": "or", 42 | "Pennsylvania": "pa", 43 | "Rhode Island": "ri", 44 | "South Carolina": "sc", 45 | "South Dakota": "sd", 46 | "Tennessee": "tn", 47 | "Texas": "tx", 48 | "Utah": "ut", 49 | "Vermont": "vt", 50 | "Virginia": "va", 51 | "Washington": "wa", 52 | "West Virginia": "wv", 53 | "Wisconsin": "wi", 54 | "Wyoming": "wy", 55 | } 56 | 57 | fips_codes = { 58 | "Alabama": "01", 59 | "Alaska": "02", 60 | "Arizona": "04", 61 | "Arkansas": "05", 62 | "California": "06", 63 | "Colorado": "08", 64 | "Connecticut": "09", 65 | "Delaware": "10", 66 | "District of Columbia": "11", 67 | "Florida": "12", 68 | "Georgia": "13", 69 | "Hawaii": "15", 70 | "Idaho": "16", 71 | "Illinois": "17", 72 | "Indiana": "18", 73 | "Iowa": "19", 74 | "Kansas": "20", 75 | "Kentucky": "21", 76 | "Louisiana": "22", 77 | "Maine": "23", 78 | "Maryland": "24", 79 | "Massachusetts": "25", 80 | "Michigan": "26", 81 | "Minnesota": "27", 82 | "Mississippi": "28", 83 | "Missouri": "29", 84 | "Montana": "30", 85 | "Nebraska": "31", 86 | "Nevada": "32", 87 | "New Hampshire": "33", 88 | "New Jersey": "34", 89 | "New Mexico": "35", 90 | "New York": "36", 91 | "North Carolina": "37", 92 | "North Dakota": "38", 93 | "Ohio": "39", 94 | "Oklahoma": "40", 95 | "Oregon": "41", 96 | "Pennsylvania": "42", 97 | "Rhode Island": "44", 98 | "South Carolina": "45", 99 | "South Dakota": "46", 100 | "Tennessee": "47", 101 | "Texas": "48", 102 | "Utah": "49", 103 | "Vermont": "50", 104 | "Virginia": "51", 105 | "Washington": "53", 106 | "West Virginia": "54", 107 | "Wisconsin": "55", 108 | "Wyoming": "56", 109 | } 110 | -------------------------------------------------------------------------------- /dataretrieval/codes/timezones.py: -------------------------------------------------------------------------------- 1 | """ 2 | Time zone information 3 | """ 4 | 5 | tz_str = """-1200 Y 6 | -1100 X NUT SST 7 | -1000 W CKT HAST HST TAHT TKT 8 | -0900 V AKST GAMT GIT HADT HNY 9 | -0800 U AKDT CIST HAY HNP PST PT 10 | -0700 T HAP HNR MST PDT 11 | -0600 S CST EAST GALT HAR HNC MDT 12 | -0500 R CDT COT EASST ECT EST ET HAC HNE PET 13 | -0400 Q AST BOT CLT COST EDT FKT GYT HAE HNA PYT 14 | -0300 P ADT ART BRT CLST FKST GFT HAA PMST PYST SRT UYT WGT 15 | -0200 O BRST FNT PMDT UYST WGST 16 | -0100 N AZOT CVT EGT 17 | +0000 Z EGST GMT UTC WET WT 18 | +0100 A CET DFT WAT WEDT WEST 19 | +0200 B CAT CEDT CEST EET SAST WAST 20 | +0300 C EAT EEDT EEST IDT MSK 21 | +0400 D AMT AZT GET GST KUYT MSD MUT RET SAMT SCT 22 | +0500 E AMST AQTT AZST HMT MAWT MVT PKT TFT TJT TMT UZT YEKT 23 | +0600 F ALMT BIOT BTT IOT KGT NOVT OMST YEKST 24 | +0700 G CXT DAVT HOVT ICT KRAT NOVST OMSST THA WIB 25 | +0800 H ACT AWST BDT BNT CAST HKT IRKT KRAST MYT PHT SGT ULAT WITA WST 26 | +0900 I AWDT IRKST JST KST PWT TLT WDT WIT YAKT 27 | +1000 K AEST ChST PGT VLAT YAKST YAPT 28 | +1100 L AEDT LHDT MAGT NCT PONT SBT VLAST VUT 29 | +1200 M ANAST ANAT FJT GILT MAGST MHT NZST PETST PETT TVT WFT 30 | +1330 FJST NZDT 31 | +1130 NFT 32 | +1030 ACDT LHST 33 | +0930 ACST 34 | +0630 CCT MMT 35 | +0545 NPT 36 | +0530 SLT 37 | +0430 AFT IRDT 38 | +0330 IRST 39 | -0230 HAT NDT 40 | -0330 HNT NST NT 41 | -0430 HLV VET 42 | -0930 MART MIT""" 43 | 44 | tz = {} 45 | for tz_descr in map(str.split, tz_str.split("\n")): 46 | tz_offset = tz_descr[0] 47 | for tz_code in tz_descr[1:]: 48 | tz[tz_code] = tz_offset 49 | -------------------------------------------------------------------------------- /dataretrieval/nadp.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tools for retrieving data from the National Atmospheric Deposition Program 3 | (NADP) including the National Trends Network (NTN), the Mercury Deposition 4 | Network (MDN). 5 | 6 | National Trends Network 7 | ----------------------- 8 | The NTN provides long-term records of precipitation chemistry across the 9 | United States. See https://nadp.slh.wisc.edu/ntn for more info. 10 | 11 | Mercury Deposition Network 12 | -------------------------- 13 | The MDN provides long-term records of total mercury (Hg) concentration and 14 | deposition in precipitation in the United States and Canada. For more 15 | information visit https://nadp.slh.wisc.edu/networks/mercury-deposition-network/ 16 | 17 | Notes 18 | ----- 19 | Gridded data on NADP is served as zipped tif files. Functions in this module 20 | will either download and extract the data, when a path is specified, or open 21 | the data as a GDAL memory-mapped file when no path is specified. 22 | 23 | .. todo:: 24 | 25 | - include AIRMoN, AMNet, and AMoN 26 | - flexible handling of strings for parameters and measurement types 27 | - add errorchecking 28 | - add tests 29 | 30 | """ 31 | 32 | import io 33 | import os 34 | import re 35 | import zipfile 36 | from os.path import basename 37 | 38 | import requests 39 | 40 | NADP_URL = "https://nadp.slh.wisc.edu" 41 | NADP_MAP_EXT = "filelib/maps" 42 | 43 | NTN_CONC_PARAMS = ["pH", "So4", "NO3", "NH4", "Ca", "Mg", "K", "Na", "Cl", "Br"] 44 | NTN_DEP_PARAMS = [ 45 | "H", 46 | "So4", 47 | "NO3", 48 | "NH4", 49 | "Ca", 50 | "Mg", 51 | "K", 52 | "Na", 53 | "Cl", 54 | "Br", 55 | "N", 56 | "SPlusN", 57 | ] 58 | 59 | NTN_MEAS_TYPE = ["conc", "dep", "precip"] # concentration or deposition 60 | 61 | 62 | class NADP_ZipFile(zipfile.ZipFile): 63 | """Extend zipfile.ZipFile for working on data from NADP""" 64 | 65 | def tif_name(self): 66 | """Get the name of the tif file in the zip file.""" 67 | filenames = self.namelist() 68 | r = re.compile(".*tif$") 69 | tif_list = list(filter(r.match, filenames)) 70 | return tif_list[0] 71 | 72 | def tif(self): 73 | """Read the tif file in the zip file.""" 74 | return self.read(self.tif_name()) 75 | 76 | 77 | def get_annual_MDN_map(measurement_type, year, path): 78 | """Download a MDN map from NDAP. 79 | 80 | This function looks for a zip file containing gridded information at: 81 | https://nadp.slh.wisc.edu/maps-data/mdn-gradient-maps/. 82 | The function will download the zip file and extract it, exposing the tif 83 | file if a path is provided. 84 | 85 | Parameters 86 | ---------- 87 | measurement_type: string 88 | The type of measurement (concentration or deposition) as a string, 89 | either 'conc' or 'dep' respectively. 90 | 91 | year: string 92 | Year as a string 'YYYY' 93 | 94 | path: string 95 | Download directory. 96 | 97 | Returns 98 | ------- 99 | path: string 100 | Path that zip file was extracted into if path was specified. 101 | 102 | Examples 103 | -------- 104 | .. code:: 105 | 106 | >>> # get map of mercury concentration in 2010 and extract it to a path 107 | >>> data_path = dataretrieval.nadp.get_annual_MDN_map( 108 | ... measurement_type="conc", year="2010", path="somepath" 109 | ... ) 110 | 111 | """ 112 | url = f"{NADP_URL}/{NADP_MAP_EXT}/MDN/grids/" 113 | 114 | filename = f"Hg_{measurement_type}_{year}.zip" 115 | 116 | z = get_zip(url, filename) 117 | 118 | if path: 119 | z.extractall(path) 120 | 121 | return f"{path}{os.sep}{basename(filename)}" 122 | 123 | 124 | def get_annual_NTN_map(measurement_type, measurement=None, year=None, path="."): 125 | """Download a NTN map from NDAP. 126 | 127 | This function looks for a zip file containing gridded information at: 128 | https://nadp.slh.wisc.edu/maps-data/ntn-gradient-maps/. 129 | The function will download the zip file and extract it, exposing the tif 130 | file at the provided path. 131 | 132 | .. note:: 133 | 134 | Measurement type abbreviations for concentration and deposition are 135 | all lower-case, but for precipitation data, the first letter must be 136 | capitalized! 137 | 138 | Parameters 139 | ---------- 140 | measurement : string 141 | The measured constituent to return. 142 | measurement_type : string 143 | The type of measurement, 'conc', 'dep', or 'Precip', which represent 144 | concentration, deposition, or precipitation respectively. 145 | year : string 146 | Year as a string 'YYYY' 147 | path : string 148 | Download directory, defaults to current directory if not specified. 149 | 150 | Returns 151 | ------- 152 | path: string 153 | Path that zip file was extracted into if path was specified. 154 | 155 | Examples 156 | -------- 157 | .. code:: 158 | 159 | >>> # get a map of precipitation in 2015 and extract it to a path 160 | >>> data_path = dataretrieval.nadp.get_annual_NTN_map( 161 | ... measurement_type="Precip", year="2015", path="somepath" 162 | ... ) 163 | 164 | """ 165 | url = f"{NADP_URL}/{NADP_MAP_EXT}/NTN/grids/{year}/" 166 | 167 | filename = f"{measurement_type}_{year}.zip" 168 | 169 | if measurement: 170 | filename = f"{measurement}_{filename}" 171 | 172 | z = get_zip(url, filename) 173 | 174 | if path: 175 | z.extractall(path) 176 | 177 | return f"{path}{os.sep}{basename(filename)}" 178 | 179 | 180 | def get_zip(url, filename): 181 | """Gets a ZipFile at url and returns it 182 | 183 | Parameters 184 | ---------- 185 | url : string 186 | URL to zip file 187 | 188 | filename : string 189 | Name of zip file 190 | 191 | Returns 192 | ------- 193 | ZipFile 194 | 195 | .. todo:: 196 | 197 | finish docstring 198 | 199 | """ 200 | req = requests.get(url + filename) 201 | req.raise_for_status() 202 | 203 | # z = zipfile.ZipFile(io.BytesIO(req.content)) 204 | z = NADP_ZipFile(io.BytesIO(req.content)) 205 | return z 206 | -------------------------------------------------------------------------------- /dataretrieval/streamstats.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module is a wrapper for the streamstats API (`streamstats documentation`_). 3 | 4 | .. _streamstats documentation: https://streamstats.usgs.gov/streamstatsservices/#/ 5 | 6 | """ 7 | 8 | import json 9 | 10 | import requests 11 | 12 | 13 | def download_workspace(workspaceID, format=""): 14 | """Function to download streamstats workspace. 15 | 16 | Parameters 17 | ---------- 18 | workspaceID: string 19 | Service workspace received from watershed result 20 | 21 | format: string 22 | Download return format. Default will return ESRI geodatabase zipfile. 23 | 'SHAPE' will return a zip file containing shape format. 24 | 25 | Returns 26 | ------- 27 | r: geodatabase or shapefiles 28 | A zip file containing the workspace contents, in either a 29 | geodatabase or shape files. 30 | 31 | """ 32 | payload = {"workspaceID": workspaceID, "format": format} 33 | url = "https://streamstats.usgs.gov/streamstatsservices/download" 34 | 35 | r = requests.get(url, params=payload) 36 | 37 | r.raise_for_status() 38 | return r 39 | # data = r.raw.read() 40 | 41 | # with open(filepath, 'wb') as f: 42 | # f.write(data) 43 | 44 | # return 45 | 46 | 47 | def get_sample_watershed(): 48 | """Sample function to get a watershed object for a location in NY. 49 | 50 | Makes the function call :obj:`dataretrieval.streamstats.get_watershed` 51 | with the parameters 'NY', -74.524, 43.939, and returns the watershed 52 | object. 53 | 54 | Returns 55 | ------- 56 | Watershed: :obj:`dataretrieval.streamstats.Watershed` 57 | Custom object that contains the watershed information as extracted 58 | from the streamstats JSON object. 59 | 60 | """ 61 | return get_watershed("NY", -74.524, 43.939) 62 | 63 | 64 | def get_watershed( 65 | rcode, 66 | xlocation, 67 | ylocation, 68 | crs=4326, 69 | includeparameters=True, 70 | includeflowtypes=False, 71 | includefeatures=True, 72 | simplify=True, 73 | format="geojson", 74 | ): 75 | """Get watershed object based on location 76 | 77 | **Streamstats documentation:** 78 | Returns a watershed object. The request configuration will determine the 79 | overall request response. However all returns will return a watershed 80 | object with at least the workspaceid. The workspace id is the id to the 81 | service workspace where files are stored and can be used for further 82 | processing such as for downloads and flow statistic computations. 83 | 84 | See: https://streamstats.usgs.gov/streamstatsservices/#/ for more 85 | information. 86 | 87 | Parameters 88 | ---------- 89 | rcode: string 90 | StreamStats 2-3 character code that identifies the Study Area -- 91 | either a State or a Regional Study. 92 | xlocation: float 93 | X location of the most downstream point of desired study area. 94 | ylocation: float 95 | Y location of the most downstream point of desired study area. 96 | crs: integer, string, optional 97 | ESPSG spatial reference code, default is 4326 98 | includeparameters: bool, optional 99 | Boolean flag to include parameters in response. 100 | includeflowtypes: bool, string, optional 101 | Not yet implemented. Would be a comma separated list of region flow 102 | types to compute with the default being True 103 | includefeatures: list, optional 104 | Comma separated list of features to include in response. 105 | simplify: bool, optional 106 | Boolean flag controlling whether or not to simplify the returned 107 | result. 108 | 109 | Returns 110 | ------- 111 | Watershed: :obj:`dataretrieval.streamstats.Watershed` 112 | Custom object that contains the watershed information as extracted 113 | from the streamstats JSON object. 114 | 115 | """ 116 | payload = { 117 | "rcode": rcode, 118 | "xlocation": xlocation, 119 | "ylocation": ylocation, 120 | "crs": crs, 121 | "includeparameters": includeparameters, 122 | "includeflowtypes": includeflowtypes, 123 | "includefeatures": includefeatures, 124 | "simplify": simplify, 125 | } 126 | url = "https://streamstats.usgs.gov/streamstatsservices/watershed.geojson" 127 | 128 | r = requests.get(url, params=payload) 129 | 130 | r.raise_for_status() 131 | 132 | if format == "geojson": 133 | return r 134 | 135 | if format == "shape": 136 | # use Fiona to return a shape object 137 | pass 138 | 139 | if format == "object": 140 | # return a python object 141 | pass 142 | 143 | data = json.loads(r.text) 144 | return Watershed.from_streamstats_json(data) 145 | 146 | 147 | class Watershed: 148 | """Class to extract information from the streamstats JSON object.""" 149 | 150 | @classmethod 151 | def from_streamstats_json(cls, streamstats_json): 152 | """Method that creates a Watershed object from a streamstats JSON.""" 153 | cls.watershed_point = streamstats_json["featurecollection"][0]["feature"] 154 | cls.watershed_polygon = streamstats_json["featurecollection"][1]["feature"] 155 | cls.parameters = streamstats_json["parameters"] 156 | cls._workspaceID = streamstats_json["workspaceID"] 157 | return cls 158 | 159 | def __init__(self, rcode, xlocation, ylocation): 160 | """Init method that calls the :obj:`from_streamstats_json` method.""" 161 | self = get_watershed(rcode, xlocation, ylocation) 162 | -------------------------------------------------------------------------------- /dataretrieval/utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Useful utilities for data munging. 3 | """ 4 | 5 | import warnings 6 | 7 | import pandas as pd 8 | import requests 9 | 10 | import dataretrieval 11 | from dataretrieval.codes import tz 12 | 13 | 14 | def to_str(listlike, delimiter=","): 15 | """Translates list-like objects into strings. 16 | 17 | Parameters 18 | ---------- 19 | listlike: list-like object 20 | An object that is a list, or list-like 21 | (e.g., ``pandas.core.series.Series``) 22 | delimiter: string, optional 23 | The delimiter that is placed between entries in listlike when it is 24 | turned into a string. Default value is a comma. 25 | 26 | Returns 27 | ------- 28 | listlike: string 29 | The listlike object as string separated by the delimiter 30 | 31 | Examples 32 | -------- 33 | .. doctest:: 34 | 35 | >>> dataretrieval.utils.to_str([1, "a", 2]) 36 | '1,a,2' 37 | 38 | >>> dataretrieval.utils.to_str([0, 10, 42], delimiter="+") 39 | '0+10+42' 40 | 41 | """ 42 | if type(listlike) == list: 43 | return delimiter.join([str(x) for x in listlike]) 44 | 45 | elif type(listlike) == pd.core.series.Series: 46 | return delimiter.join(listlike.tolist()) 47 | 48 | elif type(listlike) == pd.core.indexes.base.Index: 49 | return delimiter.join(listlike.tolist()) 50 | 51 | elif type(listlike) == str: 52 | return listlike 53 | 54 | 55 | def format_datetime(df, date_field, time_field, tz_field): 56 | """Creates a datetime field from separate date, time, and 57 | time zone fields. 58 | 59 | Assumes ISO 8601. 60 | 61 | Parameters 62 | ---------- 63 | df: ``pandas.DataFrame`` 64 | A data frame containing date, time, and timezone fields. 65 | date_field: string 66 | Name of date column in df. 67 | time_field: string 68 | Name of time column in df. 69 | tz_field: string 70 | Name of time zone column in df. 71 | 72 | Returns 73 | ------- 74 | df: ``pandas.DataFrame`` 75 | The data frame with a formatted 'datetime' column 76 | 77 | """ 78 | # create a datetime index from the columns in qwdata response 79 | df[tz_field] = df[tz_field].map(tz) 80 | 81 | df["datetime"] = pd.to_datetime( 82 | df[date_field] + " " + df[time_field] + " " + df[tz_field], 83 | format="ISO8601", 84 | utc=True, 85 | ) 86 | 87 | # if there are any incomplete dates, warn the user 88 | if df["datetime"].isna().any(): 89 | count = df["datetime"].isna().sum() 90 | warnings.warn( 91 | f"Warning: {count} incomplete dates found, " 92 | + "consider setting datetime_index to False.", 93 | UserWarning, 94 | ) 95 | 96 | return df 97 | 98 | 99 | class BaseMetadata: 100 | """Base class for metadata. 101 | 102 | Attributes 103 | ---------- 104 | url : str 105 | Response url 106 | query_time: datetme.timedelta 107 | Response elapsed time 108 | header: requests.structures.CaseInsensitiveDict 109 | Response headers 110 | 111 | """ 112 | 113 | def __init__(self, response) -> None: 114 | """Generates a standard set of metadata informed by the response. 115 | 116 | Parameters 117 | ---------- 118 | response: Response 119 | Response object from requests module 120 | 121 | Returns 122 | ------- 123 | md: :obj:`dataretrieval.utils.BaseMetadata` 124 | A ``dataretrieval`` custom :obj:`dataretrieval.utils.BaseMetadata` object. 125 | 126 | """ 127 | 128 | # These are built from the API response 129 | self.url = response.url 130 | self.query_time = response.elapsed 131 | self.header = response.headers 132 | self.comment = None 133 | 134 | # # not sure what statistic_info is 135 | # self.statistic_info = None 136 | 137 | # # disclaimer seems to be only part of importWaterML1 138 | # self.disclaimer = None 139 | 140 | # These properties are to be set by `nwis` or `wqp`-specific metadata classes. 141 | @property 142 | def site_info(self): 143 | raise NotImplementedError( 144 | "site_info must be implemented by utils.BaseMetadata children" 145 | ) 146 | 147 | @property 148 | def variable_info(self): 149 | raise NotImplementedError( 150 | "variable_info must be implemented by utils.BaseMetadata children" 151 | ) 152 | 153 | def __repr__(self) -> str: 154 | return f"{type(self).__name__}(url={self.url})" 155 | 156 | 157 | def query(url, payload, delimiter=",", ssl_check=True): 158 | """Send a query. 159 | 160 | Wrapper for requests.get that handles errors, converts listed 161 | query parameters to comma separated strings, and returns response. 162 | 163 | Parameters 164 | ---------- 165 | url: string 166 | URL to query 167 | payload: dict 168 | query parameters passed to ``requests.get`` 169 | delimiter: string 170 | delimiter to use with lists 171 | ssl_check: bool 172 | If True, check SSL certificates, if False, do not check SSL, 173 | default is True 174 | 175 | Returns 176 | ------- 177 | string: query response 178 | The response from the API query ``requests.get`` function call. 179 | """ 180 | 181 | for key, value in payload.items(): 182 | payload[key] = to_str(value, delimiter) 183 | # for index in range(len(payload)): 184 | # key, value = payload[index] 185 | # payload[index] = (key, to_str(value)) 186 | 187 | # define the user agent for the query 188 | user_agent = {"user-agent": f"python-dataretrieval/{dataretrieval.__version__}"} 189 | 190 | response = requests.get(url, params=payload, headers=user_agent, verify=ssl_check) 191 | 192 | if response.status_code == 400: 193 | raise ValueError( 194 | f"Bad Request, check that your parameters are correct. URL: {response.url}" 195 | ) 196 | elif response.status_code == 404: 197 | raise ValueError( 198 | "Page Not Found Error. May be the result of an empty query. " 199 | + f"URL: {response.url}" 200 | ) 201 | elif response.status_code == 414: 202 | _reason = response.reason 203 | _example = """ 204 | # n is the number of chunks to divide the query into \n 205 | split_list = np.array_split(site_list, n) 206 | data_list = [] # list to store chunk results in \n 207 | # loop through chunks and make requests \n 208 | for site_list in split_list: \n 209 | data = nwis.get_record(sites=site_list, service='dv', \n 210 | start=start, end=end) \n 211 | data_list.append(data) # append results to list""" 212 | raise ValueError( 213 | "Request URL too long. Modify your query to use fewer sites. " 214 | + f"API response reason: {_reason}. Pseudo-code example of how to " 215 | + f"split your query: \n {_example}" 216 | ) 217 | 218 | if response.text.startswith("No sites/data"): 219 | raise NoSitesError(response.url) 220 | 221 | return response 222 | 223 | 224 | class NoSitesError(Exception): 225 | """Custom error class used when selection criteria returns no sites/data.""" 226 | 227 | def __init__(self, url): 228 | self.url = url 229 | 230 | def __str__(self): 231 | return ( 232 | "No sites/data found using the selection criteria specified in url: " 233 | "{url}" 234 | ).format(url=self.url) 235 | -------------------------------------------------------------------------------- /dataretrieval/waterwatch.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, List, Union 2 | 3 | import pandas as pd 4 | import requests 5 | 6 | ResponseFormat = "json" # json, xml 7 | 8 | # WaterWatch won't receive any new features but it will continue to operate. 9 | waterwatch_url = "https://waterwatch.usgs.gov/webservices/" 10 | 11 | 12 | def _read_json(data: Dict) -> pd.DataFrame: 13 | return pd.DataFrame(data).T 14 | 15 | 16 | def get_flood_stage( 17 | sites: List[str] = None, fmt: str = "DF" 18 | ) -> Union[pd.DataFrame, Dict]: 19 | """ 20 | Retrieves flood stages for a list of station numbers. 21 | 22 | Parameters 23 | ---------- 24 | sites: List of strings 25 | Site numbers 26 | fmt: ``pandas.DataFrame`` or dict 27 | Returned format: Default is "DF" for ``pandas.DataFrame``, else 28 | a dictionary is returned. 29 | 30 | Returns 31 | ------- 32 | station_stages: ``pandas.Dataframe`` or dict 33 | contains station numbers and their flood stages. 34 | If no flood stage for a station, ``None`` is returned. 35 | 36 | Examples 37 | -------- 38 | .. doctest:: 39 | 40 | >> stations = ["07144100", "07144101"] 41 | >> res = get_flood_stage(stations, fmt="dict") # dictionary output 42 | >> print(res) 43 | {'07144100': {'action_stage': '20', 44 | 'flood_stage': '22', 45 | 'moderate_flood_stage': '25', 46 | 'major_flood_stage': '26'}, 47 | '07144101': None} 48 | >> print(get_flood_stage(stations)) 49 | >> print(res) 50 | action_stage flood_stage moderate_flood_stage major_flood_stage 51 | 07144100 20 22 25 26 52 | 07144101 None None None None 53 | 50057000 16 20 24 30 54 | 55 | """ 56 | res = requests.get(waterwatch_url + "floodstage", params={"format": ResponseFormat}) 57 | 58 | if res.ok: 59 | json_res = res.json() 60 | stages = { 61 | site["site_no"]: {k: v for k, v in site.items() if k != "site_no"} 62 | for site in json_res["sites"] 63 | } 64 | else: 65 | raise requests.RequestException(f"[{res.status_code}] - {res.reason}") 66 | 67 | if not sites: 68 | stations_stages = stages 69 | else: 70 | stations_stages = {} 71 | for site in sites: 72 | try: 73 | stations_stages[site] = stages[site] 74 | except KeyError: 75 | stations_stages[site] = None 76 | 77 | if fmt == "dict": 78 | return stations_stages 79 | else: 80 | return _read_json(stations_stages) 81 | -------------------------------------------------------------------------------- /demos/hydroshare/USGS_dataretrieval_Measurements_Examples.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "collapsed": true, 7 | "pycharm": { 8 | "name": "#%% md\n" 9 | } 10 | }, 11 | "source": [ 12 | "# USGS dataretrieval Python Package `get_discharge_measurements()` Examples\n", 13 | "\n", 14 | "This notebook provides examples of using the Python dataretrieval package to retrieve surface water discharge measurement data for a United States Geological Survey (USGS) monitoring site. The dataretrieval package provides a collection of functions to get data from the USGS National Water Information System (NWIS) and other online sources of hydrology and water quality data, including the United States Environmental Protection Agency (USEPA)." 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "source": [ 20 | "### Install the Package\n", 21 | "\n", 22 | "Use the following code to install the package if it doesn't exist already within your Jupyter Python environment." 23 | ], 24 | "metadata": { 25 | "collapsed": false 26 | } 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": null, 31 | "outputs": [], 32 | "source": [ 33 | "!pip install dataretrieval" 34 | ], 35 | "metadata": { 36 | "collapsed": false, 37 | "pycharm": { 38 | "name": "#%%\n" 39 | } 40 | } 41 | }, 42 | { 43 | "cell_type": "markdown", 44 | "source": [ 45 | "Load the package so you can use it along with other packages used in this notebook." 46 | ], 47 | "metadata": { 48 | "collapsed": false 49 | } 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": null, 54 | "outputs": [], 55 | "source": [ 56 | "from dataretrieval import nwis\n", 57 | "from IPython.display import display" 58 | ], 59 | "metadata": { 60 | "collapsed": false, 61 | "pycharm": { 62 | "name": "#%%\n" 63 | } 64 | } 65 | }, 66 | { 67 | "cell_type": "markdown", 68 | "source": [ 69 | "### Basic Usage\n", 70 | "\n", 71 | "The dataretrieval package has several functions that allow you to retrieve data from different web services. This examples uses the `get_discharge_measurements()` function to retrieve surface water discharge measurements for a USGS monitoring site from NWIS. The function has the following arguments:\n", 72 | "\n", 73 | "Arguments (Additional arguments, if supplied, will be used as query parameters)\n", 74 | "\n", 75 | "* **sites** (list of strings): A list of USGS site codes to retrieve data for. If the qwdata parameter site_no is supplied, it will overwrite the sites parameter.\n", 76 | "* **start** (string): The beginning date of a period for which to retrieve measurements. If the qwdata parameter begin_date is supplied, it will overwrite the start parameter.\n", 77 | "* **end** (string): The ending date of a period for which to retrieve measurements. If the qwdata parameter end_date is supplied, it will overwrite the end parameter." 78 | ], 79 | "metadata": { 80 | "collapsed": false 81 | } 82 | }, 83 | { 84 | "cell_type": "markdown", 85 | "source": [ 86 | "Example 1: Get all of the surface water measurements for a single site" 87 | ], 88 | "metadata": { 89 | "collapsed": false 90 | } 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": null, 95 | "outputs": [], 96 | "source": [ 97 | "measurements1 = nwis.get_discharge_measurements(sites=\"10109000\")\n", 98 | "print(\"Retrieved \" + str(len(measurements1[0])) + \" data values.\")" 99 | ], 100 | "metadata": { 101 | "collapsed": false, 102 | "pycharm": { 103 | "name": "#%%\n" 104 | } 105 | } 106 | }, 107 | { 108 | "cell_type": "markdown", 109 | "source": [ 110 | "### Interpreting the Result\n", 111 | "\n", 112 | "The result of calling the `get_discharge_measurements()` function is an object that contains a Pandas data frame object and an associated metadata object. The Pandas data frame contains the discharge measurements for the time period requested.\n", 113 | "\n", 114 | "Once you've got the data frame, there's several useful things you can do to explore the data." 115 | ], 116 | "metadata": { 117 | "collapsed": false 118 | } 119 | }, 120 | { 121 | "cell_type": "markdown", 122 | "source": [ 123 | "Display the data frame as a table" 124 | ], 125 | "metadata": { 126 | "collapsed": false, 127 | "pycharm": { 128 | "name": "#%% md\n" 129 | } 130 | } 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": null, 135 | "outputs": [], 136 | "source": [ 137 | "display(measurements1[0])" 138 | ], 139 | "metadata": { 140 | "collapsed": false, 141 | "pycharm": { 142 | "name": "#%%\n" 143 | } 144 | } 145 | }, 146 | { 147 | "cell_type": "markdown", 148 | "source": [ 149 | "Show the data types of the columns in the resulting data frame." 150 | ], 151 | "metadata": { 152 | "collapsed": false 153 | } 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": null, 158 | "outputs": [], 159 | "source": [ 160 | "print(measurements1[0].dtypes)" 161 | ], 162 | "metadata": { 163 | "collapsed": false, 164 | "pycharm": { 165 | "name": "#%%\n" 166 | } 167 | } 168 | }, 169 | { 170 | "cell_type": "markdown", 171 | "source": [ 172 | "The other part of the result returned from the `get_discharge_measurements()` function is a metadata object that contains information about the query that was executed to return the data. For example, you can access the URL that was assembled to retrieve the requested data from the USGS web service. The USGS web service responses contain a descriptive header that defines and can be helpful in interpreting the contents of the response." 173 | ], 174 | "metadata": { 175 | "collapsed": false 176 | } 177 | }, 178 | { 179 | "cell_type": "code", 180 | "execution_count": null, 181 | "outputs": [], 182 | "source": [ 183 | "print(\"The query URL used to retrieve the data from NWIS was: \" + measurements1[1].url)" 184 | ], 185 | "metadata": { 186 | "collapsed": false, 187 | "pycharm": { 188 | "name": "#%%\n" 189 | } 190 | } 191 | }, 192 | { 193 | "cell_type": "markdown", 194 | "source": [ 195 | "### Additional Examples\n", 196 | "\n", 197 | "Example 2: Get all of the surface water measurements between a start and end date" 198 | ], 199 | "metadata": { 200 | "collapsed": false 201 | } 202 | }, 203 | { 204 | "cell_type": "code", 205 | "execution_count": null, 206 | "outputs": [], 207 | "source": [ 208 | "measurements2 = nwis.get_discharge_measurements(sites=\"10109000\", start=\"2019-01-01\", end=\"2019-12-31\")\n", 209 | "print(\"Retrieved \" + str(len(measurements2[0])) + \" data values.\")\n", 210 | "display(measurements2[0])" 211 | ], 212 | "metadata": { 213 | "collapsed": false, 214 | "pycharm": { 215 | "name": "#%%\n" 216 | } 217 | } 218 | }, 219 | { 220 | "cell_type": "markdown", 221 | "source": [ 222 | "Example 3: Get all of the surface water measurements for multiple sites" 223 | ], 224 | "metadata": { 225 | "collapsed": false, 226 | "pycharm": { 227 | "name": "#%% md\n" 228 | } 229 | } 230 | }, 231 | { 232 | "cell_type": "code", 233 | "execution_count": null, 234 | "outputs": [], 235 | "source": [ 236 | "measurements3 = nwis.get_discharge_measurements(sites=[\"01594440\", \"040851325\"])\n", 237 | "print(\"Retrieved \" + str(len(measurements3[0])) + \" data values.\")\n", 238 | "display(measurements3[0])" 239 | ], 240 | "metadata": { 241 | "collapsed": false, 242 | "pycharm": { 243 | "name": "#%%\n" 244 | } 245 | } 246 | } 247 | ], 248 | "metadata": { 249 | "kernelspec": { 250 | "display_name": "Python 3", 251 | "language": "python", 252 | "name": "python3" 253 | }, 254 | "language_info": { 255 | "codemirror_mode": { 256 | "name": "ipython", 257 | "version": 2 258 | }, 259 | "file_extension": ".py", 260 | "mimetype": "text/x-python", 261 | "name": "python", 262 | "nbconvert_exporter": "python", 263 | "pygments_lexer": "ipython2", 264 | "version": "2.7.6" 265 | } 266 | }, 267 | "nbformat": 4, 268 | "nbformat_minor": 0 269 | } -------------------------------------------------------------------------------- /demos/hydroshare/USGS_dataretrieval_ParameterCodes_Examples.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "collapsed": true, 7 | "pycharm": { 8 | "name": "#%% md\n" 9 | } 10 | }, 11 | "source": [ 12 | "# USGS dataretrieval Python Package `get_pmcodes()` Examples\n", 13 | "\n", 14 | "This notebook provides examples of using the Python dataretrieval package to retrieve information about USGS parameter codes from NWIS. The dataretrieval package provides a collection of functions to get data from the USGS National Water Information System (NWIS) and other online sources of hydrology and water quality data, including the United States Environmental Protection Agency (USEPA).\n", 15 | "\n", 16 | "For more information about USGS NWIS parameter codes, see:\n", 17 | "https://help.waterdata.usgs.gov/codes-and-parameters/parameters" 18 | ] 19 | }, 20 | { 21 | "cell_type": "markdown", 22 | "source": [ 23 | "### Install the Package\n", 24 | "\n", 25 | "Use the following code to install the package if it doesn't exist already within your Jupyter Python environment." 26 | ], 27 | "metadata": { 28 | "collapsed": false 29 | } 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": null, 34 | "outputs": [], 35 | "source": [ 36 | "!pip install dataretrieval" 37 | ], 38 | "metadata": { 39 | "collapsed": false, 40 | "pycharm": { 41 | "name": "#%%\n" 42 | } 43 | } 44 | }, 45 | { 46 | "cell_type": "markdown", 47 | "source": [ 48 | "Load the package so you can use it along with other packages used in this notebook." 49 | ], 50 | "metadata": { 51 | "collapsed": false 52 | } 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": null, 57 | "outputs": [], 58 | "source": [ 59 | "from dataretrieval import nwis\n", 60 | "from IPython.display import display" 61 | ], 62 | "metadata": { 63 | "collapsed": false, 64 | "pycharm": { 65 | "name": "#%%\n" 66 | } 67 | } 68 | }, 69 | { 70 | "cell_type": "markdown", 71 | "source": [ 72 | "### Basic Usage\n", 73 | "\n", 74 | "The dataretrieval package has several functions that allow you to retrieve data from different web services. This examples uses the `get_pmcodes()` function to retrieve information about parameter codes (i.e., observed variables) from NWIS. The following arguments are supported:\n", 75 | "\n", 76 | "Arguments (Additional arguments, if supplied, will be used as query parameters)\n", 77 | "\n", 78 | "* **parameterCd** (string): A string containing the parameter code for which information is to be retrieved." 79 | ], 80 | "metadata": { 81 | "collapsed": false 82 | } 83 | }, 84 | { 85 | "cell_type": "markdown", 86 | "source": [ 87 | "Example 1: Retrieve information for a set of USGS NWIS parameter codes." 88 | ], 89 | "metadata": { 90 | "collapsed": false 91 | } 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": null, 96 | "outputs": [], 97 | "source": [ 98 | "parameter_codes = nwis.get_pmcodes(['00400'])\n", 99 | "print(\"Retrieved information about \" + str(len(parameter_codes[0])) + \" parameter code.\")" 100 | ], 101 | "metadata": { 102 | "collapsed": false, 103 | "pycharm": { 104 | "name": "#%%\n" 105 | } 106 | } 107 | }, 108 | { 109 | "cell_type": "markdown", 110 | "source": [ 111 | "### Interpreting the Result\n", 112 | "\n", 113 | "The result of calling the `get_pmcodes()` function is an object that contains a Pandas data frame object and an associated metadata object. The Pandas data frame contains the parameter code information requested.\n", 114 | "\n", 115 | "Once you've got the data frame, you can explore the data." 116 | ], 117 | "metadata": { 118 | "collapsed": false 119 | } 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": null, 124 | "outputs": [], 125 | "source": [ 126 | "# Display the data frame as a table\n", 127 | "display(parameter_codes[0])\n" 128 | ], 129 | "metadata": { 130 | "collapsed": false, 131 | "pycharm": { 132 | "name": "#%%\n" 133 | } 134 | } 135 | } 136 | ], 137 | "metadata": { 138 | "kernelspec": { 139 | "display_name": "Python 3", 140 | "language": "python", 141 | "name": "python3" 142 | }, 143 | "language_info": { 144 | "codemirror_mode": { 145 | "name": "ipython", 146 | "version": 2 147 | }, 148 | "file_extension": ".py", 149 | "mimetype": "text/x-python", 150 | "name": "python", 151 | "nbconvert_exporter": "python", 152 | "pygments_lexer": "ipython2", 153 | "version": "2.7.6" 154 | } 155 | }, 156 | "nbformat": 4, 157 | "nbformat_minor": 0 158 | } -------------------------------------------------------------------------------- /demos/hydroshare/USGS_dataretrieval_Peaks_Examples.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "pycharm": { 7 | "name": "#%% md\n" 8 | } 9 | }, 10 | "source": [ 11 | "# USGS dataretrieval Python Package `get_discharge_peaks()` Examples\n", 12 | "\n", 13 | "This notebook provides examples of using the Python dataretrieval package to retrieve streamflow peak data for United States Geological Survey (USGS) monitoring sites. The dataretrieval package provides a collection of functions to get data from the USGS National Water Information System (NWIS) and other online sources of hydrology and water quality data, including the United States Environmental Protection Agency (USEPA)." 14 | ] 15 | }, 16 | { 17 | "cell_type": "markdown", 18 | "metadata": {}, 19 | "source": [ 20 | "### Install the Package\n", 21 | "\n", 22 | "Use the following code to install the package if it doesn't exist already within your Jupyter Python environment." 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": null, 28 | "metadata": { 29 | "collapsed": false, 30 | "jupyter": { 31 | "outputs_hidden": false 32 | }, 33 | "pycharm": { 34 | "name": "#%%\n" 35 | } 36 | }, 37 | "outputs": [], 38 | "source": [ 39 | "!pip install dataretrieval" 40 | ] 41 | }, 42 | { 43 | "cell_type": "markdown", 44 | "metadata": {}, 45 | "source": [ 46 | "Load the package so you can use it along with other packages used in this notebook." 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": null, 52 | "metadata": { 53 | "collapsed": false, 54 | "jupyter": { 55 | "outputs_hidden": false 56 | }, 57 | "pycharm": { 58 | "name": "#%%\n" 59 | } 60 | }, 61 | "outputs": [], 62 | "source": [ 63 | "from dataretrieval import nwis\n", 64 | "from IPython.display import display" 65 | ] 66 | }, 67 | { 68 | "cell_type": "markdown", 69 | "metadata": {}, 70 | "source": [ 71 | "### Basic Usage\n", 72 | "\n", 73 | "The dataretrieval package has several functions that allow you to retrieve data from different web services. This examples uses the `get_discharge_peaks()` function to retrieve peak streamflow data for a USGS monitoring site from NWIS. The function has the following arguments:\n", 74 | "\n", 75 | "Arguments (Additional parameters, if supplied, will be used as query parameters)\n", 76 | "\n", 77 | "* **sites** (list of strings): A list of USGS site identifiers for which data will be retrieved. If the waterdata parameter site_no is supplied, it will overwrite the sites parameter.\n", 78 | "* **start** (string): A beginning date for the period for which data will be retrieved. If the waterdata parameter begin_date is supplied, it will overwrite the start parameter.\n", 79 | "* **end** (string): An ending date for the period for which data will be retrieved. If the waterdata parameter end_date is supplied, it will overwrite the end parameter." 80 | ] 81 | }, 82 | { 83 | "cell_type": "markdown", 84 | "metadata": {}, 85 | "source": [ 86 | "Example 1: Retrieve streamflow peak data for two USGS monitoring sites" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": null, 92 | "metadata": { 93 | "collapsed": false, 94 | "jupyter": { 95 | "outputs_hidden": false 96 | }, 97 | "pycharm": { 98 | "name": "#%%\n" 99 | } 100 | }, 101 | "outputs": [], 102 | "source": [ 103 | "site_ids = ['01594440', '040851325']\n", 104 | "peak_data = nwis.get_discharge_peaks(site_ids)\n", 105 | "print(\"Retrieved \" + str(len(peak_data[0])) + \" data values.\")" 106 | ] 107 | }, 108 | { 109 | "cell_type": "markdown", 110 | "metadata": {}, 111 | "source": [ 112 | "### Interpreting the Result\n", 113 | "\n", 114 | "The result of calling the `get_discharge_peaks()` function is an object that contains a Pandas data frame object and an associated metadata object. The Pandas data frame contains the discharge peak values for the requested site(s).\n", 115 | "\n", 116 | "Once you've got the data frame, there's several useful things you can do to explore the data.\n", 117 | "\n", 118 | "Display the data frame as a table." 119 | ] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": null, 124 | "metadata": { 125 | "collapsed": false, 126 | "jupyter": { 127 | "outputs_hidden": false 128 | }, 129 | "pycharm": { 130 | "name": "#%%\n" 131 | } 132 | }, 133 | "outputs": [], 134 | "source": [ 135 | "display(peak_data[0])" 136 | ] 137 | }, 138 | { 139 | "cell_type": "markdown", 140 | "metadata": {}, 141 | "source": [ 142 | "Show the data types of the columns in the resulting data frame." 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": null, 148 | "metadata": { 149 | "collapsed": false, 150 | "jupyter": { 151 | "outputs_hidden": false 152 | }, 153 | "pycharm": { 154 | "name": "#%%\n" 155 | } 156 | }, 157 | "outputs": [], 158 | "source": [ 159 | "print(peak_data[0].dtypes)" 160 | ] 161 | }, 162 | { 163 | "cell_type": "markdown", 164 | "metadata": {}, 165 | "source": [ 166 | "The other part of the result returned from the `get_dv()` function is a metadata object that contains information about the query that was executed to return the data. For example, you can access the URL that was assembled to retrieve the requested data from the USGS web service. The USGS web service responses contain a descriptive header that defines and can be helpful in interpreting the contents of the response." 167 | ] 168 | }, 169 | { 170 | "cell_type": "code", 171 | "execution_count": null, 172 | "metadata": { 173 | "collapsed": false, 174 | "jupyter": { 175 | "outputs_hidden": false 176 | }, 177 | "pycharm": { 178 | "name": "#%%\n" 179 | } 180 | }, 181 | "outputs": [], 182 | "source": [ 183 | "print(\"The query URL used to retrieve the data from NWIS was: \" + peak_data[1].url)" 184 | ] 185 | }, 186 | { 187 | "metadata": {}, 188 | "cell_type": "markdown", 189 | "source": "The following example is the same as the previous example but with multi index turned off (multi_index=False)" 190 | }, 191 | { 192 | "metadata": {}, 193 | "cell_type": "code", 194 | "outputs": [], 195 | "execution_count": null, 196 | "source": [ 197 | "site_ids = ['01594440', '040851325']\n", 198 | "peak_data = nwis.get_discharge_peaks(site_ids, multi_index=False)\n", 199 | "print(\"Retrieved \" + str(len(peak_data[0])) + \" data values.\")" 200 | ] 201 | }, 202 | { 203 | "cell_type": "markdown", 204 | "metadata": {}, 205 | "source": [ 206 | "### Additional Examples\n", 207 | "\n", 208 | "Example 2: Retrieve discharge peaks for a single site." 209 | ] 210 | }, 211 | { 212 | "cell_type": "code", 213 | "execution_count": null, 214 | "metadata": { 215 | "collapsed": false, 216 | "jupyter": { 217 | "outputs_hidden": false 218 | }, 219 | "pycharm": { 220 | "name": "#%%\n" 221 | } 222 | }, 223 | "outputs": [], 224 | "source": [ 225 | "stations = \"06011000\"\n", 226 | "data3 = nwis.get_discharge_peaks(stations)\n", 227 | "display(data3[0])" 228 | ] 229 | }, 230 | { 231 | "cell_type": "markdown", 232 | "metadata": { 233 | "pycharm": { 234 | "name": "#%% md\n" 235 | } 236 | }, 237 | "source": [ 238 | "Example 3: Retrieve peak discharge data for a monitoring site between two dates" 239 | ] 240 | }, 241 | { 242 | "cell_type": "code", 243 | "execution_count": null, 244 | "metadata": { 245 | "collapsed": false, 246 | "jupyter": { 247 | "outputs_hidden": false 248 | }, 249 | "pycharm": { 250 | "name": "#%%\n" 251 | } 252 | }, 253 | "outputs": [], 254 | "source": [ 255 | "data4 = nwis.get_discharge_peaks(stations, start='1953-01-01', end='1960-01-01')\n", 256 | "display(data4[0])" 257 | ] 258 | } 259 | ], 260 | "metadata": { 261 | "kernelspec": { 262 | "display_name": "Python 3 (ipykernel)", 263 | "language": "python", 264 | "name": "python3" 265 | }, 266 | "language_info": { 267 | "codemirror_mode": { 268 | "name": "ipython", 269 | "version": 3 270 | }, 271 | "file_extension": ".py", 272 | "mimetype": "text/x-python", 273 | "name": "python", 274 | "nbconvert_exporter": "python", 275 | "pygments_lexer": "ipython3", 276 | "version": "3.9.7" 277 | } 278 | }, 279 | "nbformat": 4, 280 | "nbformat_minor": 4 281 | } 282 | -------------------------------------------------------------------------------- /demos/hydroshare/USGS_dataretrieval_Ratings_Examples.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "source": [ 6 | "# USGS dataretrieval Python Package `get_ratings()` Examples\n", 7 | "\n", 8 | "This notebook provides examples of using the Python dataretrieval package to retrieve rating curve data for a United States Geological Survey (USGS) streamflow gage. The dataretrieval package provides a collection of functions to get data from the USGS National Water Information System (NWIS) and other online sources of hydrology and water quality data, including the United States Environmental Protection Agency (USEPA)." 9 | ], 10 | "metadata": { 11 | "collapsed": false 12 | } 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "source": [ 17 | "### Install the Package\n", 18 | "\n", 19 | "Use the following code to install the package if it doesn't exist already within your Jupyter Python environment." 20 | ], 21 | "metadata": { 22 | "collapsed": false 23 | } 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": null, 28 | "outputs": [], 29 | "source": [ 30 | "!pip install dataretrieval" 31 | ], 32 | "metadata": { 33 | "collapsed": false, 34 | "pycharm": { 35 | "name": "#%%\n" 36 | } 37 | } 38 | }, 39 | { 40 | "cell_type": "markdown", 41 | "source": [ 42 | "Load the package so you can use it along with other packages used in this notebook." 43 | ], 44 | "metadata": { 45 | "collapsed": false 46 | } 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": null, 51 | "outputs": [], 52 | "source": [ 53 | "from dataretrieval import nwis\n", 54 | "from IPython.display import display" 55 | ], 56 | "metadata": { 57 | "collapsed": false, 58 | "pycharm": { 59 | "name": "#%%\n" 60 | } 61 | } 62 | }, 63 | { 64 | "cell_type": "markdown", 65 | "source": [ 66 | "### Basic Usage\n", 67 | "\n", 68 | "The dataretrieval package has several functions that allow you to retrieve data from different web services. This example uses the `get_ratings()` function to retrieve rating curve data for a monitoring site from USGS NWIS. The following arguments are available:\n", 69 | "\n", 70 | "Arguments (Additional arguments, if supplied, will be used as query parameters)\n", 71 | "\n", 72 | "* **site** (string): A USGS site number. This is usually an 8 digit number as a string. If the nwis parameter site_no is supplied, it will overwrite the site parameter.\n", 73 | "* **base** (string): Can be \"base\", \"corr\", or \"exsa\"\n", 74 | "* **county** (string): County IDs from county lookup or \"ALL\"\n", 75 | "* **categories** (Listlike): List or comma delimited string of Two-letter category abbreviations\n", 76 | "\n", 77 | "NOTE: Not all active USGS streamflow gages have traditional rating curves that relate stage to flow." 78 | ], 79 | "metadata": { 80 | "collapsed": false 81 | } 82 | }, 83 | { 84 | "cell_type": "markdown", 85 | "source": [ 86 | "Example 1: Get rating data for an NWIS Site" 87 | ], 88 | "metadata": { 89 | "collapsed": false, 90 | "pycharm": { 91 | "name": "#%% md\n" 92 | } 93 | } 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": null, 98 | "metadata": { 99 | "collapsed": true 100 | }, 101 | "outputs": [], 102 | "source": [ 103 | "# Specify the USGS site number/code\n", 104 | "site_id = \"10109000\"\n", 105 | "\n", 106 | "# Get the rating curve data\n", 107 | "ratingData = nwis.get_ratings(site=site_id, file_type=\"exsa\")\n", 108 | "print(\"Retrieved \" + str(len(ratingData[0])) + \" data values.\")" 109 | ] 110 | }, 111 | { 112 | "cell_type": "markdown", 113 | "source": [ 114 | "### Interpreting the Result\n", 115 | "\n", 116 | "The result of calling the `get_ratings()` function is an object that contains a Pandas data frame object and an associated metadata object. The Pandas data frame contains the rating curve data for the requested site.\n", 117 | "\n", 118 | "Once you've got the data frame, there's several useful things you can do to explore the data. You can execute the following code to display the data frame as a table.\n", 119 | "\n", 120 | "If the \"type\" parameter in the request has a value of \"base,\" then the columns in the data frame are as follows:\n", 121 | "* INDEP - typically the gage height in feet\n", 122 | "* DEP - typically the streamflow in cubic feet per second\n", 123 | "* STOR - where an \"*\" indicates that the pair are a fixed point of the rating curve\n", 124 | "\n", 125 | "If the \"type\" parameter is specified as \"exsa,\" then an additional column called SHIFT is included that indicates the current shift in the rating for that value of INDEP.\n", 126 | "\n", 127 | "If the \"type\" parameter is specified as \"corr,\" then the columns are as follows:\n", 128 | "* INDEP - typically gage height in feet\n", 129 | "* CORR - the correction for that value\n", 130 | "* CORRINDEP - the corrected value for CORR" 131 | ], 132 | "metadata": { 133 | "collapsed": false, 134 | "pycharm": { 135 | "name": "#%% md\n" 136 | } 137 | } 138 | }, 139 | { 140 | "cell_type": "code", 141 | "execution_count": null, 142 | "outputs": [], 143 | "source": [ 144 | "display(ratingData[0])" 145 | ], 146 | "metadata": { 147 | "collapsed": false, 148 | "pycharm": { 149 | "name": "#%%\n" 150 | } 151 | } 152 | }, 153 | { 154 | "cell_type": "markdown", 155 | "source": [ 156 | "Show the data types of the columns in the resulting data frame" 157 | ], 158 | "metadata": { 159 | "collapsed": false, 160 | "pycharm": { 161 | "name": "#%% md\n" 162 | } 163 | } 164 | }, 165 | { 166 | "cell_type": "code", 167 | "execution_count": null, 168 | "outputs": [], 169 | "source": [ 170 | "print(ratingData[0].dtypes)" 171 | ], 172 | "metadata": { 173 | "collapsed": false, 174 | "pycharm": { 175 | "name": "#%%\n" 176 | } 177 | } 178 | }, 179 | { 180 | "cell_type": "markdown", 181 | "source": [ 182 | "The other part of the result returned from the `get_ratings()` function is a metadata object that contains information about the query that was executed to return the data. For example, you can access the URL that was assembled to retrieve the requested data from the USGS web service. The USGS web service responses contain a descriptive header that defines and can be helpful in interpreting the contents of the response." 183 | ], 184 | "metadata": { 185 | "collapsed": false, 186 | "pycharm": { 187 | "name": "#%% md\n" 188 | } 189 | } 190 | }, 191 | { 192 | "cell_type": "code", 193 | "execution_count": null, 194 | "outputs": [], 195 | "source": [ 196 | "print(\"The query URL used to retrieve the data from NWIS was: \" + ratingData[1].url)" 197 | ], 198 | "metadata": { 199 | "collapsed": false, 200 | "pycharm": { 201 | "name": "#%%\n" 202 | } 203 | } 204 | }, 205 | { 206 | "cell_type": "markdown", 207 | "source": [ 208 | "Example 2: Get rating data for a different NWIS site by changing the site_id" 209 | ], 210 | "metadata": { 211 | "collapsed": false, 212 | "pycharm": { 213 | "name": "#%% md\n" 214 | } 215 | } 216 | }, 217 | { 218 | "cell_type": "code", 219 | "execution_count": null, 220 | "outputs": [], 221 | "source": [ 222 | "site_id = '01594440'\n", 223 | "data = nwis.get_ratings(site=site_id, file_type=\"base\")\n", 224 | "print(\"Retrieved \" + str(len(data[0])) + \" data values.\")" 225 | ], 226 | "metadata": { 227 | "collapsed": false, 228 | "pycharm": { 229 | "name": "#%%\n" 230 | } 231 | } 232 | } 233 | ], 234 | "metadata": { 235 | "kernelspec": { 236 | "display_name": "Python 3", 237 | "language": "python", 238 | "name": "python3" 239 | }, 240 | "language_info": { 241 | "codemirror_mode": { 242 | "name": "ipython", 243 | "version": 2 244 | }, 245 | "file_extension": ".py", 246 | "mimetype": "text/x-python", 247 | "name": "python", 248 | "nbconvert_exporter": "python", 249 | "pygments_lexer": "ipython2", 250 | "version": "2.7.6" 251 | } 252 | }, 253 | "nbformat": 4, 254 | "nbformat_minor": 0 255 | } -------------------------------------------------------------------------------- /demos/hydroshare/USGS_dataretrieval_WaterUse_Examples.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "pycharm": { 7 | "name": "#%% md\n" 8 | } 9 | }, 10 | "source": [ 11 | "# USGS dataretrieval Python Package `get_water_use()` Examples\n", 12 | "\n", 13 | "This notebook provides examples of using the Python dataretrieval package to retrieve water use data. The dataretrieval package provides a collection of functions to get data from the USGS National Water Information System (NWIS) and other online sources of hydrology and water quality data, including the United States Environmental Protection Agency (USEPA)." 14 | ] 15 | }, 16 | { 17 | "cell_type": "markdown", 18 | "metadata": {}, 19 | "source": [ 20 | "### Install the Package\n", 21 | "\n", 22 | "Use the following code to install the package if it doesn't exist already within your Jupyter Python environment." 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": null, 28 | "metadata": { 29 | "collapsed": false, 30 | "jupyter": { 31 | "outputs_hidden": false 32 | }, 33 | "pycharm": { 34 | "name": "#%%\n" 35 | } 36 | }, 37 | "outputs": [], 38 | "source": [ 39 | "!pip install dataretrieval" 40 | ] 41 | }, 42 | { 43 | "cell_type": "markdown", 44 | "metadata": {}, 45 | "source": [ 46 | "Load the package so you can use it along with other packages used in this notebook." 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": null, 52 | "metadata": { 53 | "collapsed": false, 54 | "jupyter": { 55 | "outputs_hidden": false 56 | }, 57 | "pycharm": { 58 | "name": "#%%\n" 59 | } 60 | }, 61 | "outputs": [], 62 | "source": [ 63 | "from dataretrieval import nwis\n", 64 | "from IPython.display import display" 65 | ] 66 | }, 67 | { 68 | "cell_type": "markdown", 69 | "metadata": {}, 70 | "source": [ 71 | "### Basic Usage\n", 72 | "\n", 73 | "The dataretrieval package has several functions that allow you to retrieve data from different web services. This examples uses the `get_water_use()` function to retrieve water use data. The following arguments are supported:\n", 74 | "\n", 75 | "Arguments (Additional arguments, if supplied, will be used as query parameters)\n", 76 | "\n", 77 | "* **years** (Listlike): List or comma delimited string of years. Must be years ending in 0 or 5 because water use data is only reported during these years, or \"ALL\", which retrieves all available years\n", 78 | "* **state** (string): Full name, abbreviation or id for a state for which to retrieve data\n", 79 | "* **county** (string or list of strings): County IDs from county lookup or \"ALL\"\n", 80 | "* **categories** (Listlike): List or comma delimited string of two-letter category abbreviations" 81 | ] 82 | }, 83 | { 84 | "cell_type": "markdown", 85 | "metadata": { 86 | "pycharm": { 87 | "name": "#%% md\n" 88 | } 89 | }, 90 | "source": [ 91 | "#### Example 1: Retrieve all water use data for a state" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": null, 97 | "metadata": { 98 | "collapsed": false, 99 | "jupyter": { 100 | "outputs_hidden": false 101 | }, 102 | "pycharm": { 103 | "name": "#%%\n" 104 | } 105 | }, 106 | "outputs": [], 107 | "source": [ 108 | "pennsylvania = nwis.get_water_use(state='PA')\n", 109 | "print('Retrieved ' + str(len(pennsylvania[0])) + ' water use records.')" 110 | ] 111 | }, 112 | { 113 | "cell_type": "markdown", 114 | "metadata": {}, 115 | "source": [ 116 | "### Interpreting the Result\n", 117 | "\n", 118 | "The result of calling the `get_water_use()` function is an object that contains a Pandas data frame object and an associated metadata object. The Pandas data frame contains the water use data.\n", 119 | "\n", 120 | "Once you've got the data frame, there's several useful things you can do to explore the data." 121 | ] 122 | }, 123 | { 124 | "cell_type": "markdown", 125 | "metadata": { 126 | "pycharm": { 127 | "name": "#%% md\n" 128 | } 129 | }, 130 | "source": [ 131 | "Display the data frame as a table. The example request was for a whole state. The data returned are organized by county and year, with summary data reported every 5 years." 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": null, 137 | "metadata": { 138 | "collapsed": false, 139 | "jupyter": { 140 | "outputs_hidden": false 141 | }, 142 | "pycharm": { 143 | "name": "#%%\n" 144 | } 145 | }, 146 | "outputs": [], 147 | "source": [ 148 | "display(pennsylvania[0])" 149 | ] 150 | }, 151 | { 152 | "cell_type": "markdown", 153 | "metadata": {}, 154 | "source": [ 155 | "Show the data types of the columns in the resulting data frame." 156 | ] 157 | }, 158 | { 159 | "cell_type": "code", 160 | "execution_count": null, 161 | "metadata": { 162 | "collapsed": false, 163 | "jupyter": { 164 | "outputs_hidden": false 165 | }, 166 | "pycharm": { 167 | "name": "#%%\n" 168 | } 169 | }, 170 | "outputs": [], 171 | "source": [ 172 | "print(pennsylvania[0].dtypes)" 173 | ] 174 | }, 175 | { 176 | "cell_type": "markdown", 177 | "metadata": { 178 | "pycharm": { 179 | "name": "#%% md\n" 180 | } 181 | }, 182 | "source": [ 183 | "#### Example 2: Retrieve data for an entire state for certain years\n", 184 | "\n", 185 | "Returns data parsed by county - one row for each county for each year of interest rather than the entire state. Data are included for 5 year periods." 186 | ] 187 | }, 188 | { 189 | "cell_type": "code", 190 | "execution_count": null, 191 | "metadata": { 192 | "collapsed": false, 193 | "jupyter": { 194 | "outputs_hidden": false 195 | }, 196 | "pycharm": { 197 | "name": "#%%\n" 198 | } 199 | }, 200 | "outputs": [], 201 | "source": [ 202 | "ohio = nwis.get_water_use(years=[2000, 2005, 2010], state='OH')\n", 203 | "print('Retrieved ' + str(len(ohio[0])) + ' water use records.')\n", 204 | "display(ohio[0])" 205 | ] 206 | }, 207 | { 208 | "cell_type": "markdown", 209 | "metadata": { 210 | "pycharm": { 211 | "name": "#%% md\n" 212 | } 213 | }, 214 | "source": [ 215 | "#### Example 3: Retrieve two specific water use categories for an entire state" 216 | ] 217 | }, 218 | { 219 | "cell_type": "code", 220 | "execution_count": null, 221 | "metadata": { 222 | "collapsed": false, 223 | "jupyter": { 224 | "outputs_hidden": false 225 | }, 226 | "pycharm": { 227 | "name": "#%%\n" 228 | } 229 | }, 230 | "outputs": [], 231 | "source": [ 232 | "# Get water use data for livestock (LI) and irrigation (IT)\n", 233 | "kansas = nwis.get_water_use(state='KS', categories=['IT', 'LI'])\n", 234 | "print('Retrieved ' + str(len(kansas[0])) + ' water use records.')\n", 235 | "display(kansas[0])\n" 236 | ] 237 | } 238 | ], 239 | "metadata": { 240 | "kernelspec": { 241 | "display_name": "Python 3 (ipykernel)", 242 | "language": "python", 243 | "name": "python3" 244 | }, 245 | "language_info": { 246 | "codemirror_mode": { 247 | "name": "ipython", 248 | "version": 3 249 | }, 250 | "file_extension": ".py", 251 | "mimetype": "text/x-python", 252 | "name": "python", 253 | "nbconvert_exporter": "python", 254 | "pygments_lexer": "ipython3", 255 | "version": "3.9.7" 256 | } 257 | }, 258 | "nbformat": 4, 259 | "nbformat_minor": 4 260 | } 261 | -------------------------------------------------------------------------------- /demos/nwqn_data_pull/Dockerfile_dataretrieval: -------------------------------------------------------------------------------- 1 | # Python 3.11 2 | FROM python:3.11-slim-buster 3 | 4 | 5 | RUN apt-get update \ 6 | # Install aws-lambda-cpp build dependencies 7 | && apt-get install -y \ 8 | g++ \ 9 | make \ 10 | cmake \ 11 | unzip \ 12 | # cleanup package lists, they are not used anymore in this image 13 | && rm -rf /var/lib/apt/lists/* \ 14 | && apt-cache search linux-headers-generic 15 | 16 | ARG FUNCTION_DIR="/function" 17 | 18 | # Copy function code 19 | RUN mkdir -p ${FUNCTION_DIR} 20 | 21 | # Update pip 22 | # NB botocore/boto3 are pinned due to https://github.com/boto/boto3/issues/3648 23 | # using versions from https://github.com/aio-libs/aiobotocore/blob/72b8dd5d7d4ef2f1a49a0ae0c37b47e5280e2070/setup.py 24 | # due to s3fs dependency 25 | RUN pip install --upgrade --ignore-installed pip wheel six setuptools \ 26 | && pip install --upgrade --no-cache-dir --ignore-installed \ 27 | awslambdaric \ 28 | botocore==1.29.76 \ 29 | boto3==1.26.76 \ 30 | redis \ 31 | httplib2 \ 32 | requests \ 33 | numpy \ 34 | scipy \ 35 | pandas \ 36 | pika \ 37 | kafka-python \ 38 | cloudpickle \ 39 | ps-mem \ 40 | tblib 41 | 42 | # Set working directory to function root directory 43 | WORKDIR ${FUNCTION_DIR} 44 | 45 | # Add Lithops 46 | COPY lithops_lambda.zip ${FUNCTION_DIR} 47 | RUN unzip lithops_lambda.zip \ 48 | && rm lithops_lambda.zip \ 49 | && mkdir handler \ 50 | && touch handler/__init__.py \ 51 | && mv entry_point.py handler/ 52 | 53 | # Put your dependencies here, using RUN pip install... or RUN apt install... 54 | 55 | COPY requirements.txt requirements.txt 56 | RUN pip install --no-cache-dir -r requirements.txt 57 | 58 | ENTRYPOINT [ "/usr/local/bin/python", "-m", "awslambdaric" ] 59 | CMD [ "handler.entry_point.lambda_handler" ] -------------------------------------------------------------------------------- /demos/nwqn_data_pull/README.md: -------------------------------------------------------------------------------- 1 | # Retrieve data from the National Water Quality Network (NWQN) 2 | 3 | > This usage example is for demonstration and not for research or 4 | > operational use. 5 | 6 | This example uses Lithops to retrieve data from every NWQN 7 | monitoring site, then writes the results to Parquet files on S3. Each 8 | retrieval also searches the NLDI for neighboring sites with NWQN data and 9 | merges those data. In the streamflow retrieval, the neighborhood search 10 | progressively fill in gaps in the record by taking data from the 11 | nearest streamgage and rescaling it by the drainage area ratio. 12 | 13 | 1. Set up a Python environment 14 | ```bash 15 | conda create --name dataretrieval-lithops -y python=3.11 16 | conda activate dataretrieval-lithops 17 | pip install -r requirements.txt 18 | ``` 19 | 20 | 2. Configure compute and storage backends for [lithops](https://lithops-cloud.github.io/docs/source/configuration.html). 21 | The configuration in `lithops.yaml` uses AWS Lambda for [compute](https://lithops-cloud.github.io/docs/source/compute_config/aws_lambda.html) and AWS S3 for [storage](https://lithops-cloud.github.io/docs/source/storage_config/aws_s3.html). 22 | To use those backends, simply edit `lithops.yaml` with your `bucket` and `execution_role`. 23 | 24 | 3. Build a runtime image for Cubed 25 | ```bash 26 | export LITHOPS_CONFIG_FILE=$(pwd)/lithops.yaml 27 | lithops runtime build -b aws_lambda -f Dockerfile_dataretrieval dataretrieval-runtime 28 | ``` 29 | 30 | 4. Download the site list from ScienceBase using `wget` or navigate to the URL and copy the CVS into `nwqn_data_pull/`. 31 | ```bash 32 | wget https://www.sciencebase.gov/catalog/file/get/655d2063d34ee4b6e05cc9e6?f=__disk__b3%2F3e%2F5b%2Fb33e5b0038f004c2a48818d0fcc88a0921f3f689 -O NWQN_sites.csv 33 | ``` 34 | 35 | 5. Create a s3 bucket for the output, then set it as an environmental variable 36 | ```bash 37 | export DESTINATION_BUCKET= 38 | ``` 39 | 40 | 6. Run the scripts 41 | ```bash 42 | python retrieve_nwqn_samples.py 43 | 44 | python retrieve_nwqn_streamflow.py 45 | ``` 46 | 47 | ## Cleaning up 48 | To rebuild the Lithops image, delete the existing one by running 49 | ```bash 50 | lithops runtime delete -b aws_lambda -d dataretrieval-runtime 51 | ``` 52 | -------------------------------------------------------------------------------- /demos/nwqn_data_pull/lithops.yaml: -------------------------------------------------------------------------------- 1 | lithops: 2 | backend: aws_lambda 3 | storage: aws_s3 4 | 5 | aws: 6 | region: us-west-2 7 | 8 | aws_lambda: 9 | execution_role: arn:aws:iam::account-id:role/lambdaLithopsExecutionRole 10 | runtime: dataretrieval-runtime 11 | runtime_memory: 1024 12 | runtime_timeout: 900 13 | 14 | aws_s3: 15 | bucket: arn:aws:s3:::the-name-of-your-bucket 16 | -------------------------------------------------------------------------------- /demos/nwqn_data_pull/requirements.txt: -------------------------------------------------------------------------------- 1 | boto3 2 | dataretrieval[nldi] 3 | lithops 4 | pika 5 | ps_mem 6 | pyarrow 7 | s3fs 8 | tblib 9 | -------------------------------------------------------------------------------- /demos/nwqn_data_pull/retrieve_nwqn_samples.py: -------------------------------------------------------------------------------- 1 | # Retrieve data from the National Water Quality Assessment Program (NAWQA) 2 | 3 | import lithops 4 | import math 5 | import os 6 | import pandas as pd 7 | 8 | from random import randint 9 | from time import sleep 10 | from dataretrieval import nldi, nwis, wqp 11 | 12 | DESTINATION_BUCKET = os.environ.get('DESTINATION_BUCKET') 13 | PROJECT = "National Water Quality Assessment Program (NAWQA)" 14 | # some sites are not found in NLDI, avoid them for now 15 | NOT_FOUND_SITES = [ 16 | "15565447", # "USGS-" 17 | "15292700", 18 | ] 19 | BAD_GEOMETRY_SITES = [ 20 | "06805500", 21 | "09306200", 22 | ] 23 | 24 | BAD_NLDI_SITES = NOT_FOUND_SITES + BAD_GEOMETRY_SITES 25 | 26 | 27 | def map_retrieval(site): 28 | """Map function to pull data from NWIS and WQP""" 29 | print(f"Retrieving samples from site {site}") 30 | # skip bad sites 31 | if site in BAD_NLDI_SITES: 32 | site_list = [site] 33 | # else query slowly 34 | else: 35 | sleep(randint(0, 5)) 36 | site_list = find_neighboring_sites(site) 37 | 38 | # reformat for wqp 39 | site_list = [f"USGS-{site}" for site in site_list] 40 | 41 | df, _ = wqp_get_results(siteid=site_list, 42 | project=PROJECT, 43 | ) 44 | 45 | try: 46 | # merge sites 47 | df['MonitoringLocationIdentifier'] = f"USGS-{site}" 48 | df.astype(str).to_parquet(f's3://{DESTINATION_BUCKET}/nwqn-samples.parquet', 49 | engine='pyarrow', 50 | partition_cols=['MonitoringLocationIdentifier'], 51 | compression='zstd') 52 | # optionally, `return df` for further processing 53 | 54 | except Exception as e: 55 | print(f"No samples returned from site {site}: {e}") 56 | 57 | 58 | def exponential_backoff(max_retries=5, base_delay=1): 59 | """Exponential backoff decorator with configurable retries and base delay""" 60 | def decorator(func): 61 | def wrapper(*args, **kwargs): 62 | attempts = 0 63 | while True: 64 | try: 65 | return func(*args, **kwargs) 66 | except Exception as e: 67 | attempts += 1 68 | if attempts > max_retries: 69 | raise e 70 | wait_time = base_delay * (2 ** attempts) 71 | print(f"Retrying in {wait_time} seconds...") 72 | sleep(wait_time) 73 | return wrapper 74 | return decorator 75 | 76 | 77 | @exponential_backoff(max_retries=5, base_delay=1) 78 | def nwis_get_info(*args, **kwargs): 79 | return nwis.get_info(*args, **kwargs) 80 | 81 | 82 | @exponential_backoff(max_retries=5, base_delay=1) 83 | def wqp_get_results(*args, **kwargs): 84 | return wqp.get_results(*args, **kwargs) 85 | 86 | 87 | @exponential_backoff(max_retries=3, base_delay=1) 88 | def find_neighboring_sites(site, search_factor=0.1, fudge_factor=3.0): 89 | """Find sites upstream and downstream of the given site within a certain distance. 90 | 91 | TODO Use geoconnex to determine mainstem length 92 | 93 | Parameters 94 | ---------- 95 | site : str 96 | 8-digit site number. 97 | search_factor : float, optional 98 | The factor by which to multiply the watershed length to determine the 99 | search distance. 100 | fudge_factor : float, optional 101 | An additional fudge factor to apply to the search distance, because 102 | watersheds are not circular. 103 | """ 104 | site_df, _ = nwis_get_info(sites=site) 105 | drain_area_sq_mi = site_df["drain_area_va"].values[0] 106 | length = _estimate_watershed_length_km(drain_area_sq_mi) 107 | search_distance = length * search_factor * fudge_factor 108 | # clip between 1 and 9999km 109 | search_distance = max(1.0, min(9999.0, search_distance)) 110 | 111 | # get upstream and downstream sites 112 | gdfs = [ 113 | nldi.get_features( 114 | feature_source="WQP", 115 | feature_id=f"USGS-{site}", 116 | navigation_mode=mode, 117 | distance=search_distance, 118 | data_source="nwissite", 119 | ) 120 | for mode in ["UM", "DM"] # upstream and downstream 121 | ] 122 | 123 | features = pd.concat(gdfs, ignore_index=True) 124 | 125 | df, _ = nwis_get_info(sites=list(features.identifier.str.strip('USGS-'))) 126 | # drop sites with disimilar different drainage areas 127 | df = df.where( 128 | (df["drain_area_va"] / drain_area_sq_mi) > search_factor, 129 | ).dropna(how="all") 130 | 131 | site_list = df["site_no"].to_list() 132 | 133 | # include the original search site among the neighbors 134 | if site not in site_list: 135 | site_list.append(site) 136 | 137 | return site_list 138 | 139 | 140 | def _estimate_watershed_length_km(drain_area_sq_mi): 141 | """Estimate the diameter assuming a circular watershed. 142 | 143 | Parameters 144 | ---------- 145 | drain_area_sq_mi : float 146 | The drainage area in square miles. 147 | 148 | Returns 149 | ------- 150 | float 151 | The diameter of the watershed in kilometers. 152 | """ 153 | # assume a circular watershed 154 | length_miles = 2 * (drain_area_sq_mi / math.pi) ** 0.5 155 | # convert from miles to km 156 | return length_miles * 1.60934 157 | 158 | 159 | if __name__ == "__main__": 160 | project = "National Water Quality Assessment Program (NAWQA)" 161 | 162 | site_df = pd.read_csv( 163 | 'NWQN_sites.csv', 164 | comment='#', 165 | dtype={'SITE_QW_ID': str, 'SITE_FLOW_ID': str}, 166 | ) 167 | 168 | site_list = site_df['SITE_QW_ID'].to_list() 169 | #site_list = site_list[:2] # prune for testing 170 | 171 | fexec = lithops.FunctionExecutor(config_file="lithops.yaml") 172 | futures = fexec.map(map_retrieval, site_list) 173 | 174 | futures.get_result() 175 | -------------------------------------------------------------------------------- /demos/nwqn_data_pull/retrieve_nwqn_streamflow.py: -------------------------------------------------------------------------------- 1 | # Retrieve data from the National Water Quality Assessment Program (NAWQA) 2 | 3 | import lithops 4 | import os 5 | import numpy as np 6 | import pandas as pd 7 | 8 | 9 | from dataretrieval import nwis 10 | from random import randint 11 | from time import sleep 12 | 13 | from retrieve_nwqn_samples import find_neighboring_sites, BAD_NLDI_SITES 14 | 15 | DESTINATION_BUCKET = os.environ.get('DESTINATION_BUCKET') 16 | START_DATE = "1991-01-01" 17 | END_DATE = "2023-12-31" 18 | 19 | def map_retrieval(site): 20 | """Map function to pull data from NWIS and WQP""" 21 | print(f"Retrieving daily streamflow from site {site}") 22 | 23 | if site in BAD_NLDI_SITES: 24 | site_list = [site] 25 | # else query slowly 26 | else: 27 | sleep(randint(0, 5)) 28 | site_list = find_neighboring_sites(site) 29 | 30 | df, _ = nwis.get_dv( 31 | sites=site_list, 32 | start=START_DATE, 33 | end=END_DATE, 34 | parameterCd="00060", 35 | ) 36 | 37 | # by default, site_no is not in the index if a single site is queried 38 | if "site_no" in df.columns: 39 | index_name = df.index.names[0] 40 | df.set_index(["site_no", df.index], inplace=True) 41 | df.index.names = ["site_no", index_name] 42 | 43 | print(len(df), "records retrieved") 44 | # process the results 45 | if not df.empty: 46 | # drop rows with missing values; neglect other 00060_* columns 47 | df = df.dropna(subset=["00060_Mean"]) 48 | # fill missing codes to enable string operations 49 | df["00060_Mean_cd"] = df["00060_Mean_cd"].fillna("M") 50 | df = df[df["00060_Mean_cd"].str.contains("A")] 51 | df['00060_Mean'] = df['00060_Mean'].replace(-999999, np.nan) 52 | 53 | site_info, _ = nwis.get_info(sites=site_list) 54 | # USACE sites may have same site_no, which creates index conflicts later 55 | site_info = site_info[site_info["agency_cd"] == "USGS"] # keep only USGS sites 56 | site_info = site_info.set_index("site_no") 57 | 58 | main_site = site_info.loc[site] 59 | main_site_drainage_area = main_site["drain_area_va"] 60 | 61 | # compute fraction of drainage area 62 | site_info = site_info[["drain_area_va"]].copy() 63 | site_info["drain_fraction"] = site_info["drain_area_va"] / main_site_drainage_area 64 | site_info["fraction_diff"] = np.abs(1 - site_info["drain_fraction"]) 65 | 66 | # apply drainage area fraction 67 | df = pd.merge(df, site_info, left_index=True, right_index=True) 68 | df["00060_Mean"] *= site_info.loc[df.index.get_level_values("site_no"), "drain_fraction"].values 69 | 70 | # order sites by the difference in drainage area fraction 71 | fill_order = site_info.sort_values("fraction_diff", ascending=True) 72 | fill_order = fill_order.index.values 73 | 74 | flow_sites = df.index.get_level_values("site_no").values 75 | fill_order = set(fill_order).intersection(flow_sites) 76 | 77 | output = pd.DataFrame() 78 | 79 | # loop through sites and fill in missing flow values 80 | # going from most to least-similar drainage areas. 81 | for fill_site in fill_order: 82 | fill_data = df.loc[fill_site] 83 | output = update_dataframe(output, fill_data) 84 | 85 | output = output.drop(columns=["drain_area_va", "drain_fraction", "fraction_diff"]) 86 | output["site_no"] = site 87 | 88 | else: 89 | print(f"No data retrieved for site {site}") 90 | return 91 | 92 | try: 93 | # merge sites 94 | output.astype(str).to_parquet(f's3://{DESTINATION_BUCKET}/nwqn-streamflow.parquet', 95 | engine='pyarrow', 96 | partition_cols=['site_no'], 97 | compression='zstd') 98 | # optionally, `return df` for further processing 99 | 100 | except Exception as e: 101 | print(f"Failed to write parquet: {e}") 102 | 103 | 104 | def update_dataframe( 105 | original_df: pd.DataFrame, 106 | new_df: pd.DataFrame, 107 | overwrite: bool = False, 108 | ) -> pd.DataFrame: 109 | """Update a DataFrame with values from another DataFrame. 110 | 111 | NOTE: this fuction does not handle MultiIndex DataFrames. 112 | """ 113 | # Identify new rows in new_df that are not in original_df 114 | new_rows = new_df[~new_df.index.isin(original_df.index)] 115 | 116 | # Concatenate new rows to original_df 117 | original_df = pd.concat([original_df, new_rows]).sort_index() 118 | 119 | return original_df 120 | 121 | 122 | if __name__ == "__main__": 123 | project = "National Water Quality Assessment Program (NAWQA)" 124 | 125 | site_df = pd.read_csv( 126 | 'NWQN_sites.csv', 127 | comment='#', 128 | dtype={'SITE_QW_ID': str, 'SITE_FLOW_ID': str}, 129 | ) 130 | 131 | site_list = site_df['SITE_QW_ID'].to_list() 132 | # site_list = site_list[:4] # prune for testing 133 | 134 | fexec = lithops.FunctionExecutor(config_file="lithops.yaml") 135 | futures = fexec.map(map_retrieval, site_list) 136 | 137 | futures.get_result() 138 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | 3 | # You can set these variables from the command line. 4 | SPHINXOPTS ?= 5 | SPHINXBUILD ?= sphinx-build 6 | SPHINXPROJ = dataretrieval 7 | SOURCEDIR = source 8 | BUILDDIR = build 9 | 10 | # Put it first so that "make" without argument is like "make help". 11 | help: 12 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 13 | 14 | .PHONY: help docs Makefile 15 | 16 | 17 | docs: test clean html 18 | 19 | 20 | clean : 21 | rm -rf ./build/ 22 | 23 | 24 | test : clean 25 | @$(SPHINXBUILD) -b doctest "$(SOURCEDIR)" "$(BUILDDIR)" 26 | @$(SPHINXBUILD) -b linkcheck "$(SOURCEDIR)" "$(BUILDDIR)" 27 | 28 | 29 | # Catch-all target: route all unknown targets to Sphinx using the new 30 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 31 | %: clean Makefile 32 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 33 | -------------------------------------------------------------------------------- /docs/source/.nojekyll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DOI-USGS/dataretrieval-python/4b3a3e8fa408e8d01a3147f1cba8d5be4e1a0a09/docs/source/.nojekyll -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | # If extensions (or modules to document with autodoc) are in another directory, 2 | # add these directories to sys.path here. If the directory is relative to the 3 | # documentation root, use os.path.abspath to make it absolute, like shown here. 4 | # Since we aren't installing package here, we mock imports of the dependencies. 5 | 6 | # Relative paths so documentation can reference and include demos folder 7 | import os 8 | import sys 9 | from importlib.metadata import version 10 | 11 | # path to repository head 12 | sys.path.insert(0, os.path.abspath('../..')) 13 | 14 | # Project Information 15 | project = 'dataretrieval' 16 | release = version(project) 17 | version = '.'.join(release.split('.')[:2]) 18 | author = 'Hodson et al' 19 | 20 | # -- General configuration ------------------------------------------------ 21 | 22 | # Add any Sphinx extension module names here, as strings. 23 | extensions = [ 24 | 'sphinx.ext.autodoc', 25 | 'sphinx.ext.doctest', 26 | 'sphinx.ext.autosummary', 27 | 'sphinx.ext.napoleon', 28 | 'sphinx.ext.todo', 29 | 'sphinx.ext.coverage', 30 | 'sphinx.ext.viewcode', 31 | 'sphinx.ext.githubpages', 32 | 'nbsphinx', 33 | 'nbsphinx_link', 34 | ] 35 | 36 | # Add any paths that contain templates here, relative to this directory. 37 | templates_path = ['_templates'] 38 | 39 | # suffix of source documents 40 | source_suffix = '.rst' 41 | 42 | # The main toctree document. 43 | main_doc = 'index' 44 | 45 | # The version info for the project you're documenting, acts as replacement for 46 | # |version| and |release|, also used in various other places throughout the 47 | # built documents. 48 | 49 | # The language for content autogenerated by Sphinx. Refer to documentation 50 | # for a list of supported languages. 51 | # 52 | # This is also used if you do content translation via gettext catalogs. 53 | # Usually you set "language" from the command line for these cases. 54 | language = 'en' 55 | 56 | # List of patterns, relative to source directory, that match files and 57 | # directories to ignore when looking for source files. 58 | # This patterns also effect to html_static_path and html_extra_path 59 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] 60 | 61 | # The name of the Pygments (syntax highlighting) style to use. 62 | pygments_style = 'default' 63 | 64 | # If true, `todo` and `todoList` produce output, else they produce nothing. 65 | todo_include_todos = True 66 | 67 | # Napoleon settings 68 | napoleon_google_docstring = False 69 | napoleon_numpy_docstring = True 70 | napoleon_include_init_with_doc = False 71 | napoleon_include_private_with_doc = False 72 | napoleon_include_special_with_doc = True 73 | napoleon_use_admonition_for_examples = False 74 | napoleon_use_admonition_for_notes = False 75 | napoleon_use_admonition_for_references = False 76 | napoleon_use_ivar = False 77 | napoleon_use_param = True 78 | napoleon_use_rtype = True 79 | 80 | # Autosummary / Automodapi settings 81 | autosummary_generate = True 82 | automodapi_inheritance_diagram = False 83 | autodoc_default_options = { 84 | 'members': True, 85 | 'inherited-members': False, 86 | 'private-members': True, 87 | } 88 | 89 | # doctest 90 | doctest_global_setup = ''' 91 | import dataretrieval 92 | import numpy as np 93 | import pandas as pd 94 | import matplotlib 95 | ''' 96 | 97 | # -- Options for HTML output ---------------------------------------------- 98 | 99 | # The theme to use for HTML and HTML Help pages. See the documentation for 100 | # a list of builtin themes. 101 | 102 | html_theme = 'sphinx_rtd_theme' 103 | 104 | # Theme options are theme-specific and customize the look and feel of a theme 105 | # further. For a list of options available for each theme, see the 106 | # documentation. 107 | 108 | html_theme_options = { 109 | 'logo_only': False, 110 | 'display_version': True, 111 | } 112 | 113 | # Add any paths that contain custom static files (such as style sheets) here, 114 | # relative to this directory. They are copied after the builtin static files, 115 | # so a file named "default.css" will overwrite the builtin "default.css". 116 | html_static_path = ['_static'] 117 | 118 | # -- Options for linkcheck ------------------------------------------- 119 | 120 | # Links to not "check" because they are problematic for the link checker 121 | linkcheck_ignore = [ 122 | r'https://streamstats.usgs.gov/streamstatsservices/#/', 123 | r'https://www.waterqualitydata.us/public_srsnames/', 124 | r'https://waterqualitydata.us', 125 | r'https://github.com/USGS-python/dataretrieval/tree/main/demos/hydroshare', 126 | ] 127 | 128 | # Some notebooks have warnings, which nbsphinx should ignore 129 | nbsphinx_allow_errors = True 130 | -------------------------------------------------------------------------------- /docs/source/examples/USGS_dataretrieval_DailyValues_Examples.nblink: -------------------------------------------------------------------------------- 1 | { 2 | "path": "../../../demos/hydroshare/USGS_dataretrieval_DailyValues_Examples.ipynb" 3 | } -------------------------------------------------------------------------------- /docs/source/examples/USGS_dataretrieval_GroundwaterLevels_Examples.nblink: -------------------------------------------------------------------------------- 1 | { 2 | "path": "../../../demos/hydroshare/USGS_dataretrieval_GroundwaterLevels_Examples.ipynb" 3 | } -------------------------------------------------------------------------------- /docs/source/examples/USGS_dataretrieval_Measurements_Examples.nblink: -------------------------------------------------------------------------------- 1 | { 2 | "path": "../../../demos/hydroshare/USGS_dataretrieval_Measurements_Examples.ipynb" 3 | } -------------------------------------------------------------------------------- /docs/source/examples/USGS_dataretrieval_ParameterCodes_Examples.nblink: -------------------------------------------------------------------------------- 1 | { 2 | "path": "../../../demos/hydroshare/USGS_dataretrieval_ParameterCodes_Examples.ipynb" 3 | } -------------------------------------------------------------------------------- /docs/source/examples/USGS_dataretrieval_Peaks_Examples.nblink: -------------------------------------------------------------------------------- 1 | { 2 | "path": "../../../demos/hydroshare/USGS_dataretrieval_Peaks_Examples.ipynb" 3 | } -------------------------------------------------------------------------------- /docs/source/examples/USGS_dataretrieval_Ratings_Examples.nblink: -------------------------------------------------------------------------------- 1 | { 2 | "path": "../../../demos/hydroshare/USGS_dataretrieval_Ratings_Examples.ipynb" 3 | } -------------------------------------------------------------------------------- /docs/source/examples/USGS_dataretrieval_SiteInfo_Examples.nblink: -------------------------------------------------------------------------------- 1 | { 2 | "path": "../../../demos/hydroshare/USGS_dataretrieval_SiteInfo_Examples.ipynb" 3 | } -------------------------------------------------------------------------------- /docs/source/examples/USGS_dataretrieval_SiteInventory_Examples.nblink: -------------------------------------------------------------------------------- 1 | { 2 | "path": "../../../demos/hydroshare/USGS_dataretrieval_SiteInventory_Examples.ipynb" 3 | } -------------------------------------------------------------------------------- /docs/source/examples/USGS_dataretrieval_Statistics_Examples.nblink: -------------------------------------------------------------------------------- 1 | { 2 | "path": "../../../demos/hydroshare/USGS_dataretrieval_Statistics_Examples.ipynb" 3 | } -------------------------------------------------------------------------------- /docs/source/examples/USGS_dataretrieval_UnitValues_Examples.nblink: -------------------------------------------------------------------------------- 1 | { 2 | "path": "../../../demos/hydroshare/USGS_dataretrieval_UnitValues_Examples.ipynb" 3 | } -------------------------------------------------------------------------------- /docs/source/examples/USGS_dataretrieval_WaterSamples_Examples.nblink: -------------------------------------------------------------------------------- 1 | { 2 | "path": "../../../demos/hydroshare/USGS_dataretrieval_WaterSamples_Examples.ipynb" 3 | } -------------------------------------------------------------------------------- /docs/source/examples/USGS_dataretrieval_WaterUse_Examples.nblink: -------------------------------------------------------------------------------- 1 | { 2 | "path": "../../../demos/hydroshare/USGS_dataretrieval_WaterUse_Examples.ipynb" 3 | } -------------------------------------------------------------------------------- /docs/source/examples/index.rst: -------------------------------------------------------------------------------- 1 | .. examples: 2 | 3 | ======== 4 | Examples 5 | ======== 6 | 7 | Simple uses of the ``dataretrieval`` package 8 | -------------------------------------------- 9 | 10 | .. toctree:: 11 | :maxdepth: 2 12 | 13 | readme_examples 14 | siteinfo_examples 15 | 16 | 17 | Example Notebooks from Hydroshare 18 | --------------------------------- 19 | A set of Jupyter Notebooks with Python code examples on how to use the 20 | ``dataretrieval`` package are available on the `Hydroshare`_ platform. 21 | We provide executed versions of these notebooks below; to download the 22 | ``.ipynb`` files for your own use, either visit the `Hydroshare`_ repository, 23 | or navigate to the `demos/hydroshare`_ subdirectory of the ``dataretrieval`` 24 | project repository. 25 | 26 | .. _Hydroshare: https://www.hydroshare.org/resource/c97c32ecf59b4dff90ef013030c54264/ 27 | 28 | .. _demos/hydroshare: https://github.com/DOI-USGS/dataretrieval-python/tree/main/demos/hydroshare 29 | 30 | .. toctree:: 31 | :maxdepth: 1 32 | 33 | USGS_dataretrieval_DailyValues_Examples 34 | USGS_dataretrieval_GroundwaterLevels_Examples 35 | USGS_dataretrieval_Measurements_Examples 36 | USGS_dataretrieval_ParameterCodes_Examples 37 | USGS_dataretrieval_Peaks_Examples 38 | USGS_dataretrieval_Ratings_Examples 39 | USGS_dataretrieval_SiteInfo_Examples 40 | USGS_dataretrieval_SiteInventory_Examples 41 | USGS_dataretrieval_Statistics_Examples 42 | USGS_dataretrieval_UnitValues_Examples 43 | USGS_dataretrieval_WaterSamples_Examples 44 | USGS_dataretrieval_WaterUse_Examples 45 | 46 | 47 | Using ``dataretrieval`` to obtain nation trends in peak annual streamflow 48 | ------------------------------------------------------------------------- 49 | 50 | .. toctree:: 51 | :maxdepth: 2 52 | 53 | nwisdemo01 54 | 55 | 56 | Duplicating the R ``dataRetrieval`` vignettes functionality 57 | ----------------------------------------------------------- 58 | 59 | .. note:: 60 | 61 | Some of the larger (e.g., state-wide) examples have been commented out 62 | in the interest of run-time for the notebook. 63 | 64 | .. toctree:: 65 | :maxdepth: 2 66 | 67 | rvignettes -------------------------------------------------------------------------------- /docs/source/examples/nwisdemo01.nblink: -------------------------------------------------------------------------------- 1 | { 2 | "path": "../../../demos/NWIS_demo_1.ipynb", 3 | "extra-media": [ 4 | "../../../demos/datasets" 5 | ] 6 | } -------------------------------------------------------------------------------- /docs/source/examples/readme_examples.rst: -------------------------------------------------------------------------------- 1 | 2 | Examples from the Readme file on retrieving NWIS data 3 | ----------------------------------------------------- 4 | 5 | .. note:: 6 | 7 | NWIS stands for the National Water Information System 8 | 9 | 10 | .. doctest:: 11 | 12 | >>> # first import the functions for downloading data from NWIS 13 | >>> import dataretrieval.nwis as nwis 14 | 15 | >>> # specify the USGS site code for which we want data. 16 | >>> site = '03339000' 17 | 18 | >>> # get instantaneous values (iv) 19 | >>> df = nwis.get_record(sites=site, service='iv', start='2017-12-31', end='2018-01-01') 20 | 21 | >>> df.head() 22 | 00010 00010_cd site_no 00060 00060_cd ... 63680_ysi), [discontinued 10/5/21_cd 63680_hach 63680_hach_cd 99133 99133_cd 23 | datetime ... 24 | 2017-12-31 06:00:00+00:00 1.0 A 03339000 140.0 A ... A 3.6 A 4.61 A 25 | 2017-12-31 06:15:00+00:00 1.0 A 03339000 138.0 A ... A 3.6 A 4.61 A 26 | 2017-12-31 06:30:00+00:00 1.0 A 03339000 139.0 A ... A 3.4 A 4.61 A 27 | 2017-12-31 06:45:00+00:00 1.0 A 03339000 139.0 A ... A 3.4 A 4.61 A 28 | 2017-12-31 07:00:00+00:00 1.0 A 03339000 139.0 A ... A 3.5 A 4.61 A 29 | 30 | [5 rows x 21 columns] 31 | 32 | >>> # get water quality samples (qwdata) 33 | >>> df2 = nwis.get_record(sites=site, service='qwdata', start='2018-12-01', end='2019-01-01') 34 | 35 | >>> print(df2) 36 | agency_cd site_no sample_dt sample_tm sample_end_dt sample_end_tm ... p80154 p82398 p84164 p91157 p91158 p91159 37 | datetime ... 38 | 2018-12-10 17:30:00+00:00 USGS 03339000 2018-12-10 11:30 NaN NaN ... 16 50 3060 0.0165 0.0141 0.0024 39 | 40 | [1 rows x 33 columns] 41 | 42 | >>> # get basic info about the site 43 | >>> df3 = nwis.get_record(sites=site, service='site') 44 | 45 | >>> print(df3) 46 | agency_cd site_no station_nm site_tp_cd lat_va long_va ... aqfr_cd aqfr_type_cd well_depth_va hole_depth_va depth_src_cd project_no 47 | 0 USGS 03339000 VERMILION RIVER NEAR DANVILLE, IL ST 400603 873550 ... NaN NaN NaN NaN NaN 100 48 | 49 | [1 rows x 42 columns] -------------------------------------------------------------------------------- /docs/source/examples/rvignettes.nblink: -------------------------------------------------------------------------------- 1 | { 2 | "path": "../../../demos/R Python Vignette equivalents.ipynb" 3 | } -------------------------------------------------------------------------------- /docs/source/examples/siteinfo_examples.rst: -------------------------------------------------------------------------------- 1 | 2 | Retrieving site information 3 | --------------------------- 4 | 5 | By default ``dataretrieval`` fetches the so-called "expanded" site date from 6 | the NWIS web service. However there is an optional keyword parameter called 7 | ``seriesCatalogOutput`` that can be set to "True" if you wish to retrieve the 8 | detailed period of record information for a site instead. Refer to the 9 | `NWIS water services documentation`_ for additional information. The below 10 | example illustrates the use of the ``seriesCatalogOutput`` switch and displays 11 | the resulting column names for the output dataframes (example prompted by 12 | `GitHub Issue #34`_). 13 | 14 | .. _NWIS water services documentation: https://waterservices.usgs.gov/docs/site-service/site-service-details/ 15 | 16 | .. _GitHub Issue #34: https://github.com/DOI-USGS/dataretrieval-python/issues/34 17 | 18 | .. doctest:: 19 | 20 | # first import the functions for downloading data from NWIS 21 | >>> import dataretrieval.nwis as nwis 22 | 23 | # fetch data from a major HUC basin with seriesCatalogOutput set to True 24 | >>> df = nwis.get_record(huc='20', parameterCd='00060', 25 | ... service='site', seriesCatalogOutput='True') 26 | 27 | >>> print(df.columns) 28 | Index(['agency_cd', 'site_no', 'station_nm', 'site_tp_cd', 'dec_lat_va', 29 | 'dec_long_va', 'coord_acy_cd', 'dec_coord_datum_cd', 'alt_va', 30 | 'alt_acy_va', 'alt_datum_cd', 'huc_cd', 'data_type_cd', 'parm_cd', 31 | 'stat_cd', 'ts_id', 'loc_web_ds', 'medium_grp_cd', 'parm_grp_cd', 32 | 'srs_id', 'access_cd', 'begin_date', 'end_date', 'count_nu'], 33 | dtype='object') 34 | 35 | # repeat the same query with seriesCatalogOutput set as False 36 | >>> df = nwis.get_record(huc='20', parameterCd='00060', 37 | ... service='site', seriesCatalogOutput='False') 38 | 39 | >>> print(df.columns) 40 | Index(['agency_cd', 'site_no', 'station_nm', 'site_tp_cd', 'lat_va', 'long_va', 41 | 'dec_lat_va', 'dec_long_va', 'coord_meth_cd', 'coord_acy_cd', 42 | 'coord_datum_cd', 'dec_coord_datum_cd', 'district_cd', 'state_cd', 43 | 'county_cd', 'country_cd', 'land_net_ds', 'map_nm', 'map_scale_fc', 44 | 'alt_va', 'alt_meth_cd', 'alt_acy_va', 'alt_datum_cd', 'huc_cd', 45 | 'basin_cd', 'topo_cd', 'instruments_cd', 'construction_dt', 46 | 'inventory_dt', 'drain_area_va', 'contrib_drain_area_va', 'tz_cd', 47 | 'local_time_fg', 'reliability_cd', 'gw_file_cd', 'nat_aqfr_cd', 48 | 'aqfr_cd', 'aqfr_type_cd', 'well_depth_va', 'hole_depth_va', 49 | 'depth_src_cd', 'project_no'], 50 | dtype='object') 51 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | Welcome 2 | ======= 3 | 4 | Welcome to the documentation for the Python ``dataretrieval`` package. 5 | ``dataretrieval`` is a Python alternative to the `USGS R dataRetrieval package`_ 6 | and is used to obtain USGS and EPA water quality data, streamflow data, and 7 | metadata directly from webservices (see the 8 | :doc:`data portals documentation ` for additional 9 | details about specific data sources). 10 | 11 | .. _USGS R dataRetrieval package: https://github.com/DOI-USGS/dataRetrieval 12 | 13 | 14 | Table of Contents 15 | ----------------- 16 | 17 | .. toctree:: 18 | :maxdepth: 1 19 | 20 | meta/installing 21 | userguide/index 22 | examples/index 23 | meta/contributing 24 | meta/license 25 | reference/index 26 | -------------------------------------------------------------------------------- /docs/source/meta/contributing.rst: -------------------------------------------------------------------------------- 1 | Contributing 2 | ============ 3 | 4 | Contributions to ``dataretrieval`` are welcome and greatly appreciated, but 5 | please read this document before doing so. 6 | 7 | 8 | Ways to contribute 9 | ------------------ 10 | 11 | Reporting Bugs: 12 | ^^^^^^^^^^^^^^^ 13 | 14 | Report bugs at https://github.com/DOI-USGS/dataretrieval-python/issues 15 | 16 | When reporting a bug, please include: 17 | 18 | - Detailed steps to reproduce the bug 19 | - Your operating system name and version. 20 | - Any details about your local setup that might be helpful in troubleshooting. 21 | 22 | Fixing Bugs: 23 | ^^^^^^^^^^^^ 24 | 25 | Look through the GitHub issues for bugs. Anything tagged as a "bug" is open to 26 | whomever wants to fix it. 27 | 28 | 29 | Implementing Features: 30 | ^^^^^^^^^^^^^^^^^^^^^^ 31 | 32 | Look through the GitHub issues for features. Anything tagged with "enhancement" 33 | and "please-help" is open to whomever wants to implement it. 34 | 35 | Please do not combine multiple feature enhancements into a single pull request. 36 | 37 | Writing Documentation: 38 | ^^^^^^^^^^^^^^^^^^^^^^ 39 | 40 | ``dataretrieval`` could always use more documentation, whether as part of the 41 | official docs, in docstrings, or even in blog posts or articles. 42 | 43 | Submitting Feedback: 44 | ^^^^^^^^^^^^^^^^^^^^ 45 | 46 | The best way to send feedback is to file an issue at 47 | https://github.com/DOI-USGS/dataretrieval-python/issues 48 | 49 | If you are proposing a feature: 50 | 51 | - Explain in detail how it would work. 52 | - Keep the scope as narrow as possible, to make it easier to implement. 53 | 54 | Contributor Guidelines 55 | ---------------------- 56 | 57 | Pull Request Guidelines: 58 | ^^^^^^^^^^^^^^^^^^^^^^^^ 59 | 60 | Before you submit a pull request, check that it meets these guidelines: 61 | 62 | 1. Any pull request should include tests. However, a contribution with 63 | no tests is preferable to no contribution at all. 64 | 2. If the pull request adds functionality, the docs should be updated. Put 65 | your new functionality into a function with a docstring, and add the 66 | feature to the list in README.md. 67 | 3. The pull request should work for Python 3.6, 3.7, 3.8, and pass the GitHub 68 | Actions continuous integration pipelines. 69 | 70 | 71 | Updating Package Version: 72 | ^^^^^^^^^^^^^^^^^^^^^^^^^ 73 | 74 | Follow semantic versioning as best as possible. This means that changing the 75 | first digit of the version indicates a breaking change. Any smaller changes 76 | should attempt to maintain backwards-compatibility with previous code and 77 | issue deprecation warnings for features or functionality that will be removed 78 | or no longer be backwards-compatible in future releases. 79 | 80 | When updating the package version, there are currently two places where this 81 | must be done: 82 | 83 | 1. In the `setup.py` file the version field should be updated 84 | 2. In the `conf.py` file both the version and release fields can be updated 85 | 86 | 87 | Coding Standards 88 | ---------------- 89 | 90 | - PEP8 (https://peps.python.org/pep-0008/) 91 | - Doc-strings should follow the NumPy standard (`example`_): 92 | 93 | .. _example: https://www.sphinx-doc.org/en/master/usage/extensions/example_numpy.html 94 | 95 | - Example: 96 | 97 | .. code:: python 98 | 99 | def foo(param1, param2): 100 | """Example function with types documented in the docstring. 101 | 102 | A more detailed description of the function and its implementation. 103 | 104 | Parameters 105 | ---------- 106 | param1 : int 107 | The first parameter. 108 | param2 : str 109 | The second parameter. 110 | 111 | Returns 112 | ------- 113 | bool 114 | True if successful, False otherwise. 115 | 116 | Examples 117 | -------- 118 | Examples should be written in doctest format and should demonstrate basic usage. 119 | 120 | .. doctest:: 121 | 122 | >>> type(1) is int 123 | True 124 | 125 | """ 126 | 127 | - The public interface should emphasize functions over classes; however, classes can and should be used internally and in tests. 128 | - Functions for downloading data from a specific web portal must be grouped within their own submodule. 129 | - For example, all NWIS functions are located at :obj:`dataretrieval.nwis`. 130 | 131 | - Quotes via http://stackoverflow.com/a/56190/5549: 132 | 133 | - Use double quotes around strings that are used for interpolation or that are natural language messages 134 | - Use single quotes for small symbol-like strings (but break the rules if the strings contain quotes) 135 | - Use triple double quotes for doc-strings and raw string literals for regular expressions even if they aren't needed. 136 | 137 | - Example: 138 | 139 | .. code:: python 140 | 141 | LIGHT_MESSAGES = { 142 | 'English': "There are %(number_of_lights)s lights.", 143 | 'Pirate': "Arr! Thar be %(number_of_lights)s lights." 144 | } 145 | 146 | def lights_message(language, number_of_lights): 147 | """Return a language-appropriate string reporting the light count.""" 148 | return LIGHT_MESSAGES[language] % locals() 149 | 150 | def is_pirate(message): 151 | """Return True if the given message sounds piratical.""" 152 | return re.search(r"(?i)(arr|avast|yohoho)!", message) is not None 153 | 154 | 155 | Acknowledgements 156 | ---------------- 157 | This document was adapted from the ``cookiecutter`` project's CONTRIBUTING file, which resides at 158 | https://github.com/cookiecutter/cookiecutter/blob/main/CONTRIBUTING.md 159 | Thank you to the ``cookiecutter`` team for helping streamline open-source development for the masses. -------------------------------------------------------------------------------- /docs/source/meta/installing.rst: -------------------------------------------------------------------------------- 1 | Installation Guide 2 | ================== 3 | 4 | Whether you are a user or developer we recommend installing ``dataretrieval`` 5 | in a virtual environment. This can be done using something like ``virtualenv`` 6 | or ``conda``. Package dependencies are listed in the `requirements.txt`_ file, 7 | a full list of dependencies necessary for development are listed in the 8 | `requirements-dev.txt`_ file. 9 | 10 | .. _requirements.txt: https://github.com/DOI-USGS/dataretrieval-python/blob/main/requirements.txt 11 | 12 | .. _requirements-dev.txt: https://github.com/DOI-USGS/dataretrieval-python/blob/main/requirements-dev.txt 13 | 14 | 15 | User Installation 16 | ----------------- 17 | 18 | Via ``pip``: 19 | ^^^^^^^^^^^^ 20 | To install the latest stable release of ``dataretrieval`` from `PyPI`_, run the 21 | following commands: 22 | 23 | .. code-block:: bash 24 | 25 | $ pip install dataretrieval 26 | 27 | .. _PyPI: https://pypi.org/project/dataretrieval 28 | 29 | 30 | Via ``conda``: 31 | ^^^^^^^^^^^^^^ 32 | To install the latest stable release of ``dataretrieval`` from the 33 | `conda-forge channel`_, run the following commands: 34 | 35 | .. code-block:: bash 36 | 37 | $ conda -c conda-forge install dataretrieval 38 | 39 | .. _conda-forge channel: https://anaconda.org/conda-forge/dataretrieval 40 | 41 | 42 | Developer Installation 43 | ---------------------- 44 | 45 | To install ``dataretrieval`` for development, we recommend first forking 46 | the repository on GitHub. This will allow you to develop on your own 47 | feature branch, and propose changes as pull requests to the main branch of 48 | the repository. 49 | 50 | The first step is to clone your fork of the repository: 51 | 52 | .. code-block:: bash 53 | 54 | $ git clone https://github.com/DOI-USGS/dataretrieval-python.git 55 | 56 | Then, set the cloned repository as your current working directory in your 57 | terminal and run the following commands to get an "editable" installation of 58 | the package for development: 59 | 60 | .. code-block:: bash 61 | 62 | $ pip install -r requirements-dev.txt 63 | $ pip install -e . 64 | 65 | To check your installation you can run the tests with the following commands: 66 | 67 | .. code-block:: bash 68 | 69 | $ cd tests 70 | $ pytest 71 | 72 | In order to fetch the latest version of ``dataretrieval``, we recommend 73 | defining the main repository as a remote `upstream` repository: 74 | 75 | .. code-block:: bash 76 | 77 | $ git remote add upstream https://github.com/DOI-USGS/dataretrieval-python.git 78 | 79 | You can also build the documentation locally by running the following commands: 80 | 81 | .. code-block:: bash 82 | 83 | $ cd docs 84 | $ make docs 85 | 86 | This both tests the documentation (runs code blocks and checks links), and also 87 | locally *builds* the documentation, placing the HTML files within the 88 | ``docs/build/html`` directory. You can then open the ``index.html`` file in 89 | your browser to view the documentation. -------------------------------------------------------------------------------- /docs/source/meta/license.rst: -------------------------------------------------------------------------------- 1 | License and Disclaimer 2 | ====================== 3 | 4 | Unless otherwise noted, this project is in the public domain in the United 5 | States because it contains materials that originally came from the United 6 | States Geological Survey, an agency of the United States Department of 7 | Interior. For more information, see the `LICENSE.md`_ file. See the 8 | `Disclaimer.md`_ file for more information about the disclaimer. 9 | 10 | .. _LICENSE.md: https://github.com/DOI-USGS/dataretrieval-python/blob/main/LICENSE.md 11 | 12 | .. _Disclaimer.md: https://github.com/DOI-USGS/dataretrieval-python/blob/main/DISCLAIMER.md -------------------------------------------------------------------------------- /docs/source/reference/index.rst: -------------------------------------------------------------------------------- 1 | .. api: 2 | 3 | ============= 4 | API reference 5 | ============= 6 | 7 | .. toctree:: 8 | :maxdepth: 1 9 | 10 | nadp 11 | nwis 12 | samples 13 | streamstats 14 | utils 15 | wqp 16 | -------------------------------------------------------------------------------- /docs/source/reference/nadp.rst: -------------------------------------------------------------------------------- 1 | .. _nadp 2 | 3 | dataretrieval.nadp 4 | ------------------ 5 | 6 | .. automodule:: dataretrieval.nadp 7 | :members: 8 | :special-members: -------------------------------------------------------------------------------- /docs/source/reference/nwis.rst: -------------------------------------------------------------------------------- 1 | .. _nwis 2 | 3 | dataretrieval.nwis 4 | ------------------ 5 | 6 | .. automodule:: dataretrieval.nwis 7 | :members: 8 | :special-members: -------------------------------------------------------------------------------- /docs/source/reference/samples.rst: -------------------------------------------------------------------------------- 1 | .. _samples 2 | 3 | dataretrieval.samples 4 | ------------------------- 5 | 6 | .. automodule:: dataretrieval.samples 7 | :members: 8 | :special-members: -------------------------------------------------------------------------------- /docs/source/reference/streamstats.rst: -------------------------------------------------------------------------------- 1 | .. _streamstats 2 | 3 | dataretrieval.streamstats 4 | ------------------------- 5 | 6 | .. automodule:: dataretrieval.streamstats 7 | :members: 8 | :special-members: -------------------------------------------------------------------------------- /docs/source/reference/utils.rst: -------------------------------------------------------------------------------- 1 | .. _utils 2 | 3 | dataretrieval.utils 4 | ------------------- 5 | 6 | .. automodule:: dataretrieval.utils 7 | :members: 8 | :special-members: -------------------------------------------------------------------------------- /docs/source/reference/wqp.rst: -------------------------------------------------------------------------------- 1 | .. _wqp 2 | 3 | dataretrieval.wqp 4 | ----------------- 5 | 6 | .. automodule:: dataretrieval.wqp 7 | :members: 8 | :special-members: -------------------------------------------------------------------------------- /docs/source/userguide/dataportals.rst: -------------------------------------------------------------------------------- 1 | .. dataportals: 2 | 3 | ============ 4 | Data Portals 5 | ============ 6 | 7 | ``dataretrieval`` provides a number of functions to retrieve data from several 8 | data portals, a table listing the portals and corresponding web addresses is 9 | provided below. 10 | 11 | +-----------------------------------+---------------------------------------------------------------+ 12 | | Data Portal | Uniform Resource Locator (URL) | 13 | +===================================+===============================================================+ 14 | | National Water Information System | https://waterdata.usgs.gov/nwis | 15 | +-----------------------------------+---------------------------------------------------------------+ 16 | | National Trends Network | https://nadp.slh.wisc.edu/networks/national-trends-network | 17 | +-----------------------------------+---------------------------------------------------------------+ 18 | | Mercury Deposition Network | https://nadp.slh.wisc.edu/networks/mercury-deposition-network | 19 | +-----------------------------------+---------------------------------------------------------------+ 20 | | USGS Samples | https://waterdata.usgs.gov/download-samples/ | 21 | +-----------------------------------+---------------------------------------------------------------+ 22 | | Streamstats | https://streamstats.usgs.gov | 23 | +-----------------------------------+---------------------------------------------------------------+ 24 | | Water Quality Portal | https://waterqualitydata.us | 25 | +-----------------------------------+---------------------------------------------------------------+ 26 | | Water Services | https://waterservices.usgs.gov | 27 | +-----------------------------------+---------------------------------------------------------------+ 28 | -------------------------------------------------------------------------------- /docs/source/userguide/index.rst: -------------------------------------------------------------------------------- 1 | .. userguide: 2 | 3 | ========== 4 | User Guide 5 | ========== 6 | 7 | Topic guides to provide additional information about various aspects of 8 | ``dataretrieval``. 9 | 10 | Contents 11 | -------- 12 | 13 | .. toctree:: 14 | :maxdepth: 1 15 | 16 | timeconventions 17 | dataportals 18 | -------------------------------------------------------------------------------- /docs/source/userguide/timeconventions.rst: -------------------------------------------------------------------------------- 1 | .. timeconventions: 2 | 3 | Datetime Information 4 | -------------------- 5 | 6 | ``dataretrieval`` attempts to normalize time data to UTC time when converting 7 | web service data into dataframes. To do this, in-built pandas functions are 8 | used; either :obj:`pandas.to_datetime()` during the initial datetime object 9 | conversion, or :obj:`pandas.DataFrame.tz_localize()` if the datetime objects 10 | exist but are not UTC-localized. In most cases (single-site and multi-site), 11 | ``dataretrieval`` assigns the datetime information as the dataframe *index*, 12 | the exception to this is when incomplete datetime information is available, in 13 | these cases integers are used as the dataframe index (see `PR#58`_ for more 14 | details). 15 | 16 | .. _PR#58: https://github.com/DOI-USGS/dataretrieval-python/pull/58 17 | 18 | 19 | Inspecting Timestamps 20 | ********************* 21 | 22 | For single sites, the index of the returned dataframe contains pandas 23 | timestamps. 24 | 25 | .. code:: python 26 | 27 | >>> import dataretrieval.nwis as nwis 28 | >>> site = '03339000' 29 | >>> df = nwis.get_record(sites=site, service='peaks', 30 | ... start='2015-01-01', end='2017-12-31') 31 | >>> print(df) 32 | agency_cd site_no peak_tm peak_va peak_cd gage_ht gage_ht_cd year_last_pk ag_dt ag_tm ag_gage_ht ag_gage_ht_cd 33 | datetime 34 | 2015-06-08 00:00:00+00:00 USGS 03339000 17:30 25100 C 22.83 NaN NaN NaN NaN NaN NaN 35 | 2015-12-29 00:00:00+00:00 USGS 03339000 18:45 37600 C 26.66 NaN NaN NaN NaN NaN NaN 36 | 2017-05-05 00:00:00+00:00 USGS 03339000 04:45 17000 C 18.47 NaN NaN NaN NaN NaN NaN 37 | 38 | Here the index of the dataframe ``df`` is a set of datetime objects. Each has 39 | the format, ``YYYY-MM-DD HH:MM:SS+HH:MM``. Because these timestamps are 40 | localized to be in UTC, the expected offset (``+HH:MM``) is ``+00:00``. 41 | These values can be converted to a local timezone of your choosing using 42 | :obj:`pandas` functionality. 43 | 44 | .. code:: python 45 | 46 | >>> df.index = df.index.tz_convert(tz='America/New_York') 47 | >>> print(df) 48 | agency_cd site_no peak_tm peak_va peak_cd gage_ht gage_ht_cd year_last_pk ag_dt ag_tm ag_gage_ht ag_gage_ht_cd 49 | datetime 50 | 2015-06-07 20:00:00-04:00 USGS 03339000 17:30 25100 C 22.83 NaN NaN NaN NaN NaN NaN 51 | 2015-12-28 19:00:00-05:00 USGS 03339000 18:45 37600 C 26.66 NaN NaN NaN NaN NaN NaN 52 | 2017-05-04 20:00:00-04:00 USGS 03339000 04:45 17000 C 18.47 NaN NaN NaN NaN NaN NaN 53 | 54 | Above, the index was converted to localize the timestamps to New York. 55 | In the updated dataframe index, the resulting timestamps now have offsets of 56 | ``-04:00`` and ``-05:00`` as New York is either 4 or 5 hours behind UTC 57 | depending on the time of year (due to daylight savings). 58 | 59 | When information for multiple sites is requested, ``dataretrieval`` creates a 60 | dataframe with a multi-index, with the first entry containing the site number, 61 | and the second containing the datetime information. 62 | 63 | .. doctest:: 64 | 65 | >>> import dataretrieval.nwis as nwis 66 | >>> sites = ['180049066381200', '290000095192602'] 67 | >>> df = nwis.get_record(sites=sites, service='gwlevels', 68 | ... start='2021-10-01', end='2022-01-01') 69 | >>> df 70 | agency_cd site_tp_cd lev_dt lev_tm lev_tz_cd ... lev_dt_acy_cd lev_acy_cd lev_src_cd lev_meth_cd lev_age_cd 71 | site_no datetime ... 72 | 180049066381200 2021-10-04 19:54:00+00:00 USGS GW 2021-10-04 19:54 +0000 ... m NaN S S A 73 | 2021-11-16 14:28:00+00:00 USGS GW 2021-11-16 14:28 +0000 ... m NaN S S A 74 | 2021-12-09 10:43:00+00:00 USGS GW 2021-12-09 10:43 +0000 ... m NaN S S A 75 | 290000095192602 2021-12-08 19:07:00+00:00 USGS GW 2021-12-08 19:07 +0000 ... m NaN S S P 76 | 77 | [4 rows x 15 columns] 78 | 79 | Here note that the default datetime index information returned is also UTC 80 | localized, and therefore the offset values are ``+00:00``. -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools>=45", "setuptools_scm[toml]>=6.2"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "dataretrieval" 7 | description = "Discover and retrieve water data from U.S. federal hydrologic web services." 8 | readme = "README.md" 9 | requires-python = ">=3.8" 10 | keywords = ["USGS", "water data"] 11 | license = {file = "LICENSE.md"} 12 | authors = [ 13 | {name = "Timothy Hodson", email = "thodson@usgs.gov"}, 14 | ] 15 | maintainers = [ 16 | {name = "Elise Hinman", email = "ehinman@usgs.gov"}, 17 | ] 18 | classifiers = [ 19 | "Programming Language :: Python :: 3", 20 | ] 21 | dependencies = [ 22 | "requests", 23 | "pandas==2.*", 24 | ] 25 | dynamic = ["version"] 26 | 27 | [tool.setuptools] 28 | packages = ["dataretrieval", "dataretrieval.codes"] 29 | 30 | [project.optional-dependencies] 31 | test = [ 32 | "pytest > 5.0.0", 33 | "pytest-cov[all]", 34 | "coverage", 35 | "requests-mock", 36 | "flake8", 37 | ] 38 | doc = [ 39 | "sphinx", 40 | "sphinx-rtd-theme", 41 | "nbsphinx", 42 | "nbsphinx_link", 43 | "ipython", 44 | "ipykernel", 45 | "matplotlib", 46 | ] 47 | nldi = [ 48 | 'geopandas>=0.10' 49 | ] 50 | 51 | [project.urls] 52 | homepage = "https://github.com/DOI-USGS/dataretrieval-python" 53 | documentation = "https://doi-usgs.github.io/dataretrieval-python/" 54 | repository = "https://github.com/DOI-USGS/dataretrieval-python.git" 55 | 56 | [tool.setuptools_scm] 57 | write_to = "dataretrieval/_version.py" 58 | 59 | [tool.isort] 60 | profile = "black" 61 | 62 | [tool.black] 63 | skip-string-normalization = true 64 | 65 | [tool.ruff.format] 66 | quote-style = "double" 67 | docstring-code-format = true 68 | docstring-code-line-length = 72 69 | 70 | [tool.ruff.lint] 71 | preview = true 72 | # Default ["E4", "E7", "E9", and "F"] --> Pyflakes ("F") and pycodestyle ("E") 73 | extend-select = [ 74 | "B", "I", "Q", 75 | "W291", "W292", "W293", "W605", 76 | "E231", "E252", "E261", "E262", "E303", "E501", 77 | ] 78 | 79 | -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- 1 | numpy<2 2 | pandas==2.* 3 | geopandas==0.14.* 4 | scipy 5 | python-dateutil 6 | requests 7 | requests-mock 8 | coverage 9 | pytest 10 | flake8 11 | sphinx 12 | sphinx-rtd-theme 13 | ipython 14 | ipykernel 15 | nbsphinx 16 | nbsphinx_link 17 | matplotlib 18 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | setup() 4 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DOI-USGS/dataretrieval-python/4b3a3e8fa408e8d01a3147f1cba8d5be4e1a0a09/tests/__init__.py -------------------------------------------------------------------------------- /tests/data/nldi_get_basin.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "FeatureCollection", 3 | "features": [ 4 | { 5 | "type": "Feature", 6 | "geometry": { 7 | "type": "Polygon", 8 | "coordinates": [ 9 | [ 10 | [-89.467166934, 43.120532162], 11 | [-89.461615766, 43.12632345], 12 | [-89.457260835, 43.127037725], 13 | [-89.452061009, 43.124187365], 14 | [-89.4476819, 43.119429024], 15 | [-89.439928469, 43.119445644], 16 | [-89.428664489, 43.113435904], 17 | [-89.410715151, 43.106836358], 18 | [-89.413577818, 43.100214712], 19 | [-89.414991109, 43.089543698], 20 | [-89.422806579, 43.085870913], 21 | [-89.430063704, 43.088309393], 22 | [-89.441315037, 43.083364081], 23 | [-89.444278818, 43.084620004], 24 | [-89.450179911, 43.081715309], 25 | [-89.451427898, 43.079513559], 26 | [-89.464561163, 43.078447595], 27 | [-89.466686537, 43.076682871], 28 | [-89.4622099, 43.07306458], 29 | [-89.465497169, 43.073234059], 30 | [-89.468704754, 43.07134039], 31 | [-89.469622381, 43.068424824], 32 | [-89.467935617, 43.067497217], 33 | [-89.470726914, 43.06540292], 34 | [-89.470430177, 43.062692826], 35 | [-89.466400073, 43.056302895], 36 | [-89.469802035, 43.053666055], 37 | [-89.47601985, 43.057901927], 38 | [-89.476585982, 43.060280486], 39 | [-89.478603327, 43.060411566], 40 | [-89.483458574, 43.058438858], 41 | [-89.484967442, 43.056130059], 42 | [-89.491406587, 43.054388451], 43 | [-89.494069541, 43.055509411], 44 | [-89.493868228, 43.06153445], 45 | [-89.500475724, 43.063815698], 46 | [-89.506329775, 43.06379093], 47 | [-89.507540669, 43.061129535], 48 | [-89.516487667, 43.05889596], 49 | [-89.524196291, 43.0484005], 50 | [-89.527027161, 43.049865572], 51 | [-89.531212693, 43.048578393], 52 | [-89.53168683, 43.05078274], 53 | [-89.537781776, 43.052965206], 54 | [-89.537977928, 43.05550807], 55 | [-89.544353411, 43.058384424], 56 | [-89.545783506, 43.061283656], 57 | [-89.551286859, 43.061283754], 58 | [-89.554899419, 43.062989677], 59 | [-89.555177648, 43.065189554], 60 | [-89.55939716, 43.069584622], 61 | [-89.552552004, 43.070032995], 62 | [-89.551027329, 43.072160878], 63 | [-89.55664221, 43.078164337], 64 | [-89.561552454, 43.080518638], 65 | [-89.557979773, 43.081411202], 66 | [-89.553540642, 43.086194967], 67 | [-89.548701193, 43.086177316], 68 | [-89.546825331, 43.088023965], 69 | [-89.543205962, 43.087800221], 70 | [-89.540831467, 43.089363501], 71 | [-89.536587878, 43.095690791], 72 | [-89.536402562, 43.103900066], 73 | [-89.539222509, 43.106589488], 74 | [-89.543754931, 43.106648012], 75 | [-89.545473151, 43.108651969], 76 | [-89.551215165, 43.105435169], 77 | [-89.562937764, 43.104929008], 78 | [-89.571631233, 43.102745105], 79 | [-89.577430373, 43.106944886], 80 | [-89.575279549, 43.112421282], 81 | [-89.585534254, 43.110302501], 82 | [-89.590268184, 43.11100234], 83 | [-89.591233389, 43.112975078], 84 | [-89.590180668, 43.11496155], 85 | [-89.593396468, 43.118692324], 86 | [-89.590911252, 43.118300423], 87 | [-89.585126608, 43.12232665], 88 | [-89.588527844, 43.125843725], 89 | [-89.594046461, 43.126263171], 90 | [-89.584750406, 43.134728013], 91 | [-89.58703379, 43.136667616], 92 | [-89.58646025, 43.139891225], 93 | [-89.58311069, 43.140790363], 94 | [-89.58050643, 43.147945499], 95 | [-89.577277976, 43.149190149], 96 | [-89.577524762, 43.155001208], 97 | [-89.575687513, 43.156361384], 98 | [-89.574702434, 43.160718603], 99 | [-89.575498624, 43.163477344], 100 | [-89.572475709, 43.166520978], 101 | [-89.573423699, 43.16805986], 102 | [-89.571498421, 43.168773113], 103 | [-89.561668082, 43.160869482], 104 | [-89.556299248, 43.163934156], 105 | [-89.5530407, 43.163969446], 106 | [-89.553533801, 43.154687139], 107 | [-89.542983929, 43.149942779], 108 | [-89.543949065, 43.14881666], 109 | [-89.541736611, 43.147335113], 110 | [-89.544166951, 43.145507198], 111 | [-89.540220508, 43.141656916], 112 | [-89.536084993, 43.147942882], 113 | [-89.529435603, 43.145701478], 114 | [-89.526609809, 43.142382532], 115 | [-89.530611508, 43.138913019], 116 | [-89.526215839, 43.137656712], 117 | [-89.525317304, 43.134190086], 118 | [-89.521417176, 43.136858837], 119 | [-89.515079469, 43.136375804], 120 | [-89.512331543, 43.131812042], 121 | [-89.509033557, 43.130035294], 122 | [-89.499032624, 43.13174139], 123 | [-89.495385036, 43.1363024], 124 | [-89.476329648, 43.136222116], 125 | [-89.475062917, 43.133358252], 126 | [-89.479607077, 43.13215007], 127 | [-89.482220611, 43.126289637], 128 | [-89.475491468, 43.126823191], 129 | [-89.471635553, 43.125462527], 130 | [-89.471517511, 43.122731655], 131 | [-89.467166934, 43.120532162] 132 | ] 133 | ] 134 | }, 135 | "properties": {} 136 | } 137 | ] 138 | } 139 | -------------------------------------------------------------------------------- /tests/data/nldi_get_features_by_feature_source_with_nav_mode.json: -------------------------------------------------------------------------------- 1 | { 2 | "features": [ 3 | { 4 | "geometry": { 5 | "coordinates": [-89.5361111, 43.1111111], 6 | "type": "Point" 7 | }, 8 | "type": "Feature", 9 | "properties": { 10 | "identifier": "USGS-05427943", 11 | "navigation": "https://api.water.usgs.gov/nldi/linked-data/nwissite/USGS-05427943/navigation", 12 | "measure": 0, 13 | "reachcode": "07090002007651", 14 | "name": "PHEASANT BRANCH AT AIRPORT ROAD NEAR MIDDLETON, WI", 15 | "source": "nwissite", 16 | "sourceName": "NWIS Surface Water Sites", 17 | "comid": "13293676", 18 | "type": "hydrolocation", 19 | "uri": "https://waterdata.usgs.gov/monitoring-location/05427943", 20 | "mainstem": "https://geoconnex.us/ref/mainstems/575519" 21 | } 22 | }, 23 | { 24 | "geometry": { 25 | "coordinates": [-89.4555556, 43.0998611], 26 | "type": "Point" 27 | }, 28 | "type": "Feature", 29 | "properties": { 30 | "identifier": "USGS-430600089272001", 31 | "navigation": "https://api.water.usgs.gov/nldi/linked-data/nwissite/USGS-430600089272001/navigation", 32 | "measure": 59.3341, 33 | "reachcode": "07090002008384", 34 | "name": "LAKE MENDOTA, WEST BAY, AT MADISON, WI", 35 | "source": "nwissite", 36 | "sourceName": "NWIS Surface Water Sites", 37 | "comid": "13294314", 38 | "type": "hydrolocation", 39 | "uri": "https://waterdata.usgs.gov/monitoring-location/430600089272001", 40 | "mainstem": "https://geoconnex.us/ref/mainstems/575519" 41 | } 42 | }, 43 | { 44 | "geometry": { 45 | "coordinates": [-89.52151, 43.09860617], 46 | "type": "Point" 47 | }, 48 | "type": "Feature", 49 | "properties": { 50 | "identifier": "USGS-054279465", 51 | "navigation": "https://api.water.usgs.gov/nldi/linked-data/nwissite/USGS-054279465/navigation", 52 | "measure": 53.7958, 53 | "reachcode": "07090002007650", 54 | "name": "S FORK PHEASANT BRANCH @ DEMING WAY @ MIDDLETON,WI", 55 | "source": "nwissite", 56 | "sourceName": "NWIS Surface Water Sites", 57 | "comid": "13294264", 58 | "type": "hydrolocation", 59 | "uri": "https://waterdata.usgs.gov/monitoring-location/054279465", 60 | "mainstem": "https://geoconnex.us/ref/mainstems/575519" 61 | } 62 | }, 63 | { 64 | "geometry": { 65 | "coordinates": [-89.493454, 43.10443947], 66 | "type": "Point" 67 | }, 68 | "type": "Feature", 69 | "properties": { 70 | "identifier": "USGS-05427950", 71 | "navigation": "https://api.water.usgs.gov/nldi/linked-data/nwissite/USGS-05427950/navigation", 72 | "measure": 2.32298, 73 | "reachcode": "07090002007650", 74 | "name": "PHEASANT BRANCH AT CENTURY AVE AT MIDDLETON, WI", 75 | "source": "nwissite", 76 | "sourceName": "NWIS Surface Water Sites", 77 | "comid": "13294264", 78 | "type": "hydrolocation", 79 | "uri": "https://waterdata.usgs.gov/monitoring-location/05427950", 80 | "mainstem": "https://geoconnex.us/ref/mainstems/575519" 81 | } 82 | }, 83 | { 84 | "geometry": { 85 | "coordinates": [-89.5287322, 43.1044393], 86 | "type": "Point" 87 | }, 88 | "type": "Feature", 89 | "properties": { 90 | "identifier": "USGS-054279435", 91 | "navigation": "https://api.water.usgs.gov/nldi/linked-data/nwissite/USGS-054279435/navigation", 92 | "measure": 72.0559, 93 | "reachcode": "07090002007650", 94 | "name": "PHEASANT BR W. OF CONFLUENCE POND @ MIDDLETON, WI", 95 | "source": "nwissite", 96 | "sourceName": "NWIS Surface Water Sites", 97 | "comid": "13294264", 98 | "type": "hydrolocation", 99 | "uri": "https://waterdata.usgs.gov/monitoring-location/054279435", 100 | "mainstem": "https://geoconnex.us/ref/mainstems/575519" 101 | } 102 | }, 103 | { 104 | "geometry": { 105 | "coordinates": [-89.5116667, 43.1033333], 106 | "type": "Point" 107 | }, 108 | "type": "Feature", 109 | "properties": { 110 | "identifier": "USGS-05427948", 111 | "navigation": "https://api.water.usgs.gov/nldi/linked-data/nwissite/USGS-05427948/navigation", 112 | "measure": 40.6051151144, 113 | "reachcode": "07090002007650", 114 | "name": "PHEASANT BRANCH AT MIDDLETON, WI", 115 | "source": "nwissite", 116 | "sourceName": "NWIS Surface Water Sites", 117 | "comid": "13294264", 118 | "type": "hydrolocation", 119 | "uri": "https://waterdata.usgs.gov/monitoring-location/05427948", 120 | "mainstem": "https://geoconnex.us/ref/mainstems/575519" 121 | } 122 | }, 123 | { 124 | "geometry": { 125 | "coordinates": [-89.5167877, 43.1030505], 126 | "type": "Point" 127 | }, 128 | "type": "Feature", 129 | "properties": { 130 | "identifier": "USGS-054279475", 131 | "navigation": "https://api.water.usgs.gov/nldi/linked-data/nwissite/USGS-054279475/navigation", 132 | "measure": 48.974, 133 | "reachcode": "07090002007650", 134 | "name": "PHEASANT BRANCH UPSTREAM OF HWY 12 @ MIDDLETON, WI", 135 | "source": "nwissite", 136 | "sourceName": "NWIS Surface Water Sites", 137 | "comid": "13294264", 138 | "type": "hydrolocation", 139 | "uri": "https://waterdata.usgs.gov/monitoring-location/054279475", 140 | "mainstem": "https://geoconnex.us/ref/mainstems/575519" 141 | } 142 | }, 143 | { 144 | "geometry": { 145 | "coordinates": [-89.5138889, 43.10777778], 146 | "type": "Point" 147 | }, 148 | "type": "Feature", 149 | "properties": { 150 | "identifier": "USGS-430628089305001", 151 | "navigation": "https://api.water.usgs.gov/nldi/linked-data/nwissite/USGS-430628089305001/navigation", 152 | "measure": 43.532, 153 | "reachcode": "07090002007650", 154 | "name": "GW QUALITY ASSURANCE-NAWQA WAREHOUSE-MIDDLETON, WI", 155 | "source": "nwissite", 156 | "sourceName": "NWIS Surface Water Sites", 157 | "comid": "13294264", 158 | "type": "hydrolocation", 159 | "uri": "https://waterdata.usgs.gov/monitoring-location/430628089305001", 160 | "mainstem": "https://geoconnex.us/ref/mainstems/575519" 161 | } 162 | }, 163 | { 164 | "geometry": { 165 | "coordinates": [-89.4837316, 43.10607834], 166 | "type": "Point" 167 | }, 168 | "type": "Feature", 169 | "properties": { 170 | "identifier": "USGS-05427952", 171 | "navigation": "https://api.water.usgs.gov/nldi/linked-data/nwissite/USGS-05427952/navigation", 172 | "measure": 46.391, 173 | "reachcode": "07090002007647", 174 | "name": "PHEASANT BRANCH AT MOUTH AT MIDDLETON, WI", 175 | "source": "nwissite", 176 | "sourceName": "NWIS Surface Water Sites", 177 | "comid": "13293696", 178 | "type": "hydrolocation", 179 | "uri": "https://waterdata.usgs.gov/monitoring-location/05427952", 180 | "mainstem": "https://geoconnex.us/ref/mainstems/575519" 181 | } 182 | } 183 | ], 184 | "type": "FeatureCollection" 185 | } 186 | -------------------------------------------------------------------------------- /tests/data/nldi_get_features_by_feature_source_without_nav_mode.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "FeatureCollection", 3 | "features": [ 4 | { 5 | "type": "Feature", 6 | "geometry": { 7 | "type": "Point", 8 | "coordinates": [-89.5098433, 43.0872176] 9 | }, 10 | "properties": { 11 | "identifier": "USGS-054279485", 12 | "navigation": "https://api.water.usgs.gov/nldi/linked-data/WQP/USGS-054279485/navigation", 13 | "name": "STRICKER'S POND AT MIDDLETON, WI", 14 | "source": "WQP", 15 | "sourceName": "Water Quality Portal", 16 | "comid": "13294314", 17 | "type": "varies", 18 | "uri": "https://www.waterqualitydata.us/provider/NWIS/USGS-WI/USGS-054279485/", 19 | "mainstem": "https://geoconnex.us/ref/mainstems/575519" 20 | } 21 | } 22 | ] 23 | } 24 | -------------------------------------------------------------------------------- /tests/data/nldi_get_features_by_lat_long.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "FeatureCollection", 3 | "features": [ 4 | { 5 | "type": "Feature", 6 | "geometry": { 7 | "type": "LineString", 8 | "coordinates": [ 9 | [-89.482287705, 43.1049596], 10 | [-89.482173502, 43.104764298], 11 | [-89.481962003, 43.104193598], 12 | [-89.478003301, 43.1009003], 13 | [-89.474274702, 43.0990991], 14 | [-89.471394107, 43.098147698], 15 | [-89.470654801, 43.098110899], 16 | [-89.469584204, 43.097653598], 17 | [-89.455723904, 43.096961297], 18 | [-89.4536274, 43.097529493], 19 | [-89.451938607, 43.097714297], 20 | [-89.450211607, 43.098300897], 21 | [-89.449472405, 43.098263897], 22 | [-89.448879704, 43.098638095], 23 | [-89.448325306, 43.098610297], 24 | [-89.4471654, 43.099090695], 25 | [-89.446375206, 43.099589594], 26 | [-89.445820704, 43.0995619], 27 | [-89.441537805, 43.101635799], 28 | [-89.435227506, 43.105492599], 29 | [-89.433882602, 43.105963595], 30 | [-89.433328107, 43.105935797], 31 | [-89.432537705, 43.106434599], 32 | [-89.430083804, 43.106849998], 33 | [-89.429293305, 43.1073488], 34 | [-89.426310502, 43.107468195], 35 | [-89.410033204, 43.106784396] 36 | ] 37 | }, 38 | "properties": { 39 | "identifier": "13294314", 40 | "navigation": "https://api.water.usgs.gov/nldi/linked-data/comid/13294314/navigation", 41 | "source": "comid", 42 | "sourceName": "NHDPlus comid", 43 | "comid": "13294314" 44 | } 45 | } 46 | ] 47 | } 48 | -------------------------------------------------------------------------------- /tests/data/nldi_get_flowlines.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "FeatureCollection", 3 | "features": [ 4 | { 5 | "type": "Feature", 6 | "geometry": { 7 | "type": "LineString", 8 | "coordinates": [ 9 | [-89.482287705, 43.1049596], 10 | [-89.482173502, 43.104764298], 11 | [-89.481962003, 43.104193598], 12 | [-89.478003301, 43.1009003], 13 | [-89.474274702, 43.0990991], 14 | [-89.471394107, 43.098147698], 15 | [-89.470654801, 43.098110899], 16 | [-89.469584204, 43.097653598], 17 | [-89.455723904, 43.096961297], 18 | [-89.4536274, 43.097529493], 19 | [-89.451938607, 43.097714297], 20 | [-89.450211607, 43.098300897], 21 | [-89.449472405, 43.098263897], 22 | [-89.448879704, 43.098638095], 23 | [-89.448325306, 43.098610297], 24 | [-89.4471654, 43.099090695], 25 | [-89.446375206, 43.099589594], 26 | [-89.445820704, 43.0995619], 27 | [-89.441537805, 43.101635799], 28 | [-89.435227506, 43.105492599], 29 | [-89.433882602, 43.105963595], 30 | [-89.433328107, 43.105935797], 31 | [-89.432537705, 43.106434599], 32 | [-89.430083804, 43.106849998], 33 | [-89.429293305, 43.1073488], 34 | [-89.426310502, 43.107468195], 35 | [-89.410033204, 43.106784396] 36 | ] 37 | }, 38 | "properties": { 39 | "nhdplus_comid": "13294314" 40 | } 41 | } 42 | ] 43 | } 44 | -------------------------------------------------------------------------------- /tests/data/water_use_national.txt: -------------------------------------------------------------------------------- 1 | # 2 | # File created on 2020-02-28 18:01:44 EST 3 | # Refresh Date: 2018-06 4 | # 5 | # U.S. Geological Survey 6 | # 7 | # This file contains selected WaterUse data 8 | # 9 | # The data you have secured from the USGS NWISWeb database may include data that have 10 | # not received Director's approval and as such are provisional and subject to revision. 11 | # The data are released on the condition that neither the USGS nor the United States 12 | # Government may be held liable for any damages resulting from its authorized or 13 | # unauthorized use. 14 | # 15 | # * References to sources of water-use data can be found here. - https://water.usgs.gov/watuse 16 | # 17 | # Search Criteria: 18 | # Year(s) - ALL 19 | # Area - 20 | # County Codes(s) - ALL 21 | # County Name(s) - 22 | # Category Code(s) - ALL 23 | # Category Name(s) - 24 | # 25 | # Columns: 26 | # National Totals - Summary 27 | # 28 | # The following years are included: 29 | # 1950 30 | # 1955 31 | # 1960 32 | # 1965 33 | # 1970 34 | # 1975 35 | # 1980 36 | # 1985 37 | # 1990 38 | # 1995 39 | # 2000 40 | # 2005 41 | # 2010 42 | # 2015 43 | # 44 | National Totals 1950 1955 1960 1965 1970 1975 1980 1985 1990 1995 2000 2005 2010 2015 45 | 100s 16s 16s 16s 16s 16s 16s 16s 16s 16s 16s 16s 16s 16s 16s 46 | Population, in millions 150.7 164.0 179.3 193.8 205.9 216.4 229.6 242.4 252.3 267.1 285.3 300.7 312.6 325.0 47 | Total withdrawals, in Bgal/d 180 240 270 310 370 420 430 397 404 398 413 410a 354a 322 48 | Public supply, in Bgal/d 14 17 21 24 27 29 33 36.6 38.7 40.2 43.3 44.4a 42.0 39.0 49 | Self-supplied domestic, in Bgal/d 2.1 2.1 2.0 2.3 2.6 2.8 3.4 3.32 3.39 3.39 3.58 3.73a 3.53a 3.26 50 | Livestock, in Bgal/d 1.5 1.5 1.6 1.7 1.9 2.1 2.2 2.23 2.25 2.28 2.37a 2.15 2.00 2.00 51 | Irrigation, in Bgal/d 89 110 110 120 130 140 150 135 134 130 139 127 116a 118 52 | Thermoelectric power, in Bgal/d 40 72 100 130 170 200 210 187 194 190 195 201 162a 133 53 | Self-supplied industrial, in Bgal/d 37 39 38 46 47 45 45 25.8 22.4a 21.6 19.5a 18.1 16.2a 14.8 54 | Mining, In Bgal/d b b b b b b b 3.44 4.93 3.59 4.13a 3.83 3.97a 4.00 55 | Commercial, in Bgal/d b b b b b b b 1.23 2.39 2.89 c c c c 56 | Aquaculture, in Bgal/d b b b b b b b 2.24 2.24 3.27a 5.79a 8.83a 8.96a 7.55 57 | Total Groundwater, fresh, in Bgal/d 34 47 50 60 68 82 83 73.4 79.4 76.4a 84.3a 78.9 75.9a 82.3 58 | Total Groundwater, saline, in Bgal/d c 0.6 0.4 0.5 1.0 1.0 0.93 0.66 1.30a 1.11 2.47a 1.51 2.22a 2.34 59 | Total Surface water, fresh, in Bgal/d 140 180 190 210 250 260 280 263 255a 261 265 270 231a 198 60 | Total Surface water, saline, in Bgal/d 10 18 31 43 53 69 71 59.6 68.7a 59.7 61.0 59.8a 45.0 38.6 61 | -------------------------------------------------------------------------------- /tests/data/waterdata_gwlevels.txt: -------------------------------------------------------------------------------- 1 | # ---------------------------------- WARNING ---------------------------------------- 2 | # Some of the data that you have obtained from this U.S. Geological Survey database may not 3 | # have received Director's approval. Any such data values are qualified as provisional and 4 | # are subject to revision. Provisional data are released on the condition that neither the 5 | # USGS nor the United States Government may be held liable for any damages resulting from its use. 6 | # Additional info: http://help.waterdata.usgs.gov/policies/provisional-data-statement 7 | # 8 | # File-format description: http://help.waterdata.usgs.gov/faq/about-tab-delimited-output 9 | # Automated-retrieval info: http://help.waterdata.usgs.gov/faq/automated-retrievals 10 | # 11 | # Contact: gs-w_support_nwisweb@usgs.gov 12 | # retrieved: 2020-02-14 17:37:13 -05:00 (natwebsdas01) 13 | # 14 | # US Geological Survey groundwater levels 15 | # 16 | # Data for the following 1 site(s) are contained in this file 17 | # USGS 434400121275801 21S/11E-19CCC 18 | # ----------------------------------------------------------------------------------- 19 | # 20 | # The fields in this file include: 21 | # --------------------------------- 22 | # agency_cd Agency code 23 | # site_no USGS site number 24 | # site_tp_cd Site type code 25 | # lev_dt Date level measured 26 | # lev_tm Time level measured 27 | # lev_tz_cd Time datum 28 | # lev_va Water-level value in feet below land surface 29 | # sl_lev_va Water-level value in feet above specific vertical datum 30 | # sl_datum_cd Referenced vertical datum 31 | # lev_status_cd Status 32 | # lev_agency_cd Measuring agency 33 | # lev_dt_acy_cd Water-level date-time accuracy 34 | # lev_acy_cd Water-level accuracy 35 | # lev_src_cd Source of measurement 36 | # lev_meth_cd Method of measurement 37 | # lev_age_cd Water-level approval status 38 | # 39 | # Referenced agency codes (lev_agency_cd) included in this output 40 | # 41 | # USGS U.S. Geological Survey 42 | # 43 | # Referenced site type codes (site_tp_cd) included in this output 44 | # 45 | # GW Well 46 | # 47 | # Referenced water-level site status codes (lev_status_cd) included in this output 48 | # 49 | # "" The reported water-level measurement represents a static level 50 | # 51 | # 52 | # Referenced water-level date-time accuracy codes (lev_dt_acy_cd) included in this output 53 | # 54 | # m Date is accurate to the Minute 55 | # 56 | # Referenced water-level accuracy codes (lev_acy_cd) included in this output 57 | # 58 | # 2 Water level accuracy to nearest hundredth of a foot 59 | # 60 | # Referenced source of measurement codes (lev_src_cd) included in this output 61 | # 62 | # S Measured by personnel of reporting agency. 63 | # 64 | # Referenced method of measurement codes (lev_meth_cd) included in this output 65 | # 66 | # S Steel-tape measurement. 67 | # 68 | # Referenced water-level approval-status codes (lev_age_cd) included in this output 69 | # 70 | # A Approved for publication -- Processing and review completed. 71 | # 72 | agency_cd site_no site_tp_cd lev_dt lev_tm lev_tz_cd lev_va sl_lev_va sl_datum_cd lev_status_cd lev_agency_cd lev_dt_acy_cd lev_acy_cd lev_src_cd lev_meth_cd lev_age_cd 73 | 5s 15s 6s 10d 5d 5s 12s 12s 10s 1s 5s 1s 1s 1s 1s 1s 74 | USGS 434400121275801 GW 2016-10-26 09:22 PDT 28.33 USGS m 2 S S A 75 | -------------------------------------------------------------------------------- /tests/data/waterdata_pmcodes.txt: -------------------------------------------------------------------------------- 1 | # 2 | # National Water Information System 3 | # 2022/06/08 4 | # 5 | # 6 | # Date Retrieved: USGS Water Data for the Nation Help System 7 | # 8 | parameter_cd group parm_nm epa_equivalence result_statistical_basis result_time_basis result_weight_basis result_particle_size_basis result_sample_fraction result_temperature_basis CASRN SRSName parm_unit 9 | 5s 8s 58s 5s 0s 0s 0s 0s 9s 0s 10s 7s 9s 10 | 00618 Nutrient Nitrate, water, filtered, milligrams per liter as nitrogen Agree Dissolved 14797-55-8 Nitrate mg/l as N 11 | -------------------------------------------------------------------------------- /tests/data/waterservices_peaks.txt: -------------------------------------------------------------------------------- 1 | # 2 | # U.S. Geological Survey 3 | # National Water Information System 4 | # Retrieved: 2020-02-20 16:35:50 EST 5 | # 6 | # ---------------------------------- WARNING ---------------------------------------- 7 | # Some of the data that you have obtained from this U.S. Geological Survey database 8 | # may not have received Director's approval. Any such data values are qualified 9 | # as provisional and are subject to revision. Provisional data are released on the 10 | # condition that neither the USGS nor the United States Government may be held liable 11 | # for any damages resulting from its use. 12 | # 13 | # More data may be available offline. 14 | # For more information on these data, contact USGS Water Data Inquiries. 15 | # This file contains the annual peak streamflow data. 16 | # 17 | # This information includes the following fields: 18 | # 19 | # agency_cd Agency Code 20 | # site_no USGS station number 21 | # peak_dt Date of peak streamflow (format YYYY-MM-DD) 22 | # peak_tm Time of peak streamflow (24 hour format, 00:00 - 23:59) 23 | # peak_va Annual peak streamflow value in cfs 24 | # peak_cd Peak Discharge-Qualification codes (see explanation below) 25 | # gage_ht Gage height for the associated peak streamflow in feet 26 | # gage_ht_cd Gage height qualification codes 27 | # year_last_pk Peak streamflow reported is the highest since this year 28 | # ag_dt Date of maximum gage-height for water year (if not concurrent with peak) 29 | # ag_tm Time of maximum gage-height for water year (if not concurrent with peak 30 | # ag_gage_ht maximum Gage height for water year in feet (if not concurrent with peak 31 | # ag_gage_ht_cd maximum Gage height code 32 | # 33 | # Sites in this file include: 34 | # USGS 01594440 PATUXENT RIVER NEAR BOWIE, MD 35 | # 36 | # Peak Streamflow-Qualification Codes(peak_cd): 37 | # 1 ... Discharge is a Maximum Daily Average 38 | # 2 ... Discharge is an Estimate 39 | # 3 ... Discharge affected by Dam Failure 40 | # 4 ... Discharge less than indicated value, 41 | # which is Minimum Recordable Discharge at this site 42 | # 5 ... Discharge affected to unknown degree by 43 | # Regulation or Diversion 44 | # 6 ... Discharge affected by Regulation or Diversion 45 | # 7 ... Discharge is an Historic Peak 46 | # 8 ... Discharge actually greater than indicated value 47 | # 9 ... Discharge due to Snowmelt, Hurricane, 48 | # Ice-Jam or Debris Dam breakup 49 | # A ... Year of occurrence is unknown or not exact 50 | # Bd ... Day of occurrence is unknown or not exact 51 | # Bm ... Month of occurrence is unknown or not exact 52 | # C ... All or part of the record affected by Urbanization, 53 | # Mining, Agricultural changes, Channelization, or other 54 | # F ... Peak supplied by another agency 55 | # O ... Opportunistic value not from systematic data collection 56 | # R ... Revised 57 | # 58 | # Gage height qualification codes(gage_ht_cd,ag_gage_ht_cd): 59 | # 1 ... Gage height affected by backwater 60 | # 2 ... Gage height not the maximum for the year 61 | # 3 ... Gage height at different site and(or) datum 62 | # 4 ... Gage height below minimum recordable elevation 63 | # 5 ... Gage height is an estimate 64 | # 6 ... Gage datum changed during this year 65 | # 7 ... Debris, mud, or hyper-concentrated flow 66 | # 8 ... Gage height tidally affected 67 | # Bd ... Day of occurrence is unknown or not exact 68 | # Bm ... Month of occurrence is unknown or not exact 69 | # F ... Peak supplied by another agency 70 | # R ... Revised 71 | # 72 | # 73 | agency_cd site_no peak_dt peak_tm peak_va peak_cd gage_ht gage_ht_cd year_last_pk ag_dt ag_tm ag_gage_ht ag_gage_ht_cd 74 | 5s 15s 10d 6s 8s 33s 8s 27s 4s 10d 6s 8s 27s 75 | USGS 01594440 2000-03-22 3640 5 11.90 76 | USGS 01594440 2001-06-08 06:30 3800 5 12.05 77 | USGS 01594440 2002-04-29 1510 2,5,8 78 | USGS 01594440 2003-02-23 19:30 6990 5 15.08 79 | USGS 01594440 2003-12-12 10:45 5790 5 13.99 80 | USGS 01594440 2005-04-03 19:15 5210 5 13.42 81 | USGS 01594440 2006-06-26 23:00 12700 5 19.20 82 | USGS 01594440 2007-04-16 11:15 5520 5 13.73 83 | USGS 01594440 2008-05-13 02:00 7860 5 15.80 84 | USGS 01594440 2009-06-19 05:45 4130 5 12.35 85 | USGS 01594440 2010-03-14 13:00 5780 5 13.98 86 | USGS 01594440 2011-09-08 13:15 16800 5 21.10 87 | USGS 01594440 2011-12-08 20:30 4900 5 13.74 88 | USGS 01594440 2012-10-30 23:00 10800 5 18.02 89 | USGS 01594440 2014-05-01 17:30 15600 5 20.56 90 | USGS 01594440 2015-06-28 18:15 6610 5 15.03 91 | USGS 01594440 2016-08-01 00:30 6140 5 14.64 92 | USGS 01594440 2017-07-30 01:15 4960 5 13.60 93 | USGS 01594440 2018-06-04 18:30 8360 5 16.32 94 | USGS 01594440 2018-12-16 23:30 7220 5 15.50 95 | -------------------------------------------------------------------------------- /tests/data/waterservices_ratings.txt: -------------------------------------------------------------------------------- 1 | # //UNITED STATES GEOLOGICAL SURVEY http://water.usgs.gov/ 2 | # //NATIONAL WATER INFORMATION SYSTEM http://water.usgs.gov/data.html 3 | # //DATA ARE PROVISIONAL AND SUBJECT TO CHANGE UNTIL PUBLISHED BY USGS 4 | # //RETRIEVED: 2018-02-28 01:11:02 5 | # //WARNING 6 | # //WARNING The stage-discharge rating provided in this file should be 7 | # //WARNING considered provisional and subject to change. Stage-discharge 8 | # //WARNING ratings change over time as the channel features that control 9 | # //WARNING the relation between stage and discharge vary. Users are 10 | # //WARNING cautioned to consider carefully the applicability of this 11 | # //WARNING rating before using it for decisions that concern personal or 12 | # //WARNING public safety or operational consequences. 13 | # //WARNING 14 | # //WARNING This rating does not include any shifts that may have been 15 | # //WARNING used along with this base rating in converting stage to 16 | # //WARNING discharge at this site. Stage data processed with the rating 17 | # //WARNING thus may not match that displayed or published by the USGS. 18 | # //WARNING 19 | # //FILE TYPE="NWIS RATING" 20 | # //DATABASE NUMBER=01 DESCRIPTION=" Standard data base for this site." 21 | # //STATION AGENCY="USGS " NUMBER="01594440 " TIME_ZONE="EST" DST_FLAG=N 22 | # //STATION NAME="PATUXENT RIVER NEAR BOWIE, MD" 23 | # //LABEL="Discharge ft^3/s" 24 | # //PARAMETER CODE="00060" 25 | # //RATING ID="20.0" TYPE="STGQ" NAME="stage-discharge" AGING=Working 26 | # //RATING REMARKS="" 27 | # //RATING EXPANSION="logarithmic" 28 | # //RATING OFFSET1=2.000000E+00 29 | # //RATING_INDEP ROUNDING="????" PARAMETER="Gage height (ft)" 30 | # //RATING_DEP ROUNDING="????" PARAMETER="Discharge (ft^3/s)" 31 | # //RATING_DATETIME BEGIN=20151001000000 BZONE=-05:00 END=20170206000000 EZONE=-05:00 AGING=None 32 | # //RATING_DATETIME COMMENT="Adjust high end to Meas 32C. Begin on WY change" 33 | # //RATING_DATETIME BEGIN=20170206000000 BZONE=-05:00 END=-------------- EZONE=--- AGING=None 34 | # //RATING_DATETIME COMMENT="Adjust high end to Meas 32C. Begin on WY change" 35 | INDEP DEP STOR 36 | 16N 16N 1S 37 | 2.9900000E+00 3.0000000E+01 * 38 | 4.0000000E+00 1.1000000E+02 * 39 | 5.0000000E+00 2.2500000E+02 * 40 | 5.5000000E+00 3.0000000E+02 * 41 | 6.0000000E+00 3.9000000E+02 * 42 | 6.5000000E+00 4.9000000E+02 * 43 | 7.0000000E+00 6.0000000E+02 * 44 | 9.0000000E+00 1.1750000E+03 * 45 | 1.3000000E+01 4.3500000E+03 * 46 | 2.0850000E+01 1.6497750E+04 * 47 | 2.7900000E+01 3.1100000E+04 * 48 | -------------------------------------------------------------------------------- /tests/data/waterservices_site.txt: -------------------------------------------------------------------------------- 1 | # 2 | # 3 | # US Geological Survey 4 | # retrieved: 2020-02-14 13:17:02 -05:00 (sdas01) 5 | # 6 | # The Site File stores location and general information about groundwater, 7 | # surface water, and meteorological sites 8 | # for sites in USA. 9 | # 10 | # File-format description: http://help.waterdata.usgs.gov/faq/about-tab-delimited-output 11 | # Automated-retrieval info: https://waterservices.usgs.gov/docs/site-service/site-service-details/ 12 | # 13 | # Contact: gs-w_support_nwisweb@usgs.gov 14 | # 15 | # The following selected fields are included in this output: 16 | # 17 | # agency_cd -- Agency 18 | # site_no -- Site identification number 19 | # station_nm -- Site name 20 | # site_tp_cd -- Site type 21 | # dec_lat_va -- Decimal latitude 22 | # dec_long_va -- Decimal longitude 23 | # coord_acy_cd -- Latitude-longitude accuracy 24 | # dec_coord_datum_cd -- Decimal Latitude-longitude datum 25 | # alt_va -- Altitude of Gage/land surface 26 | # alt_acy_va -- Altitude accuracy 27 | # alt_datum_cd -- Altitude datum 28 | # huc_cd -- Hydrologic unit code 29 | # 30 | agency_cd site_no station_nm site_tp_cd dec_lat_va dec_long_va coord_acy_cd dec_coord_datum_cd alt_va alt_acy_va alt_datum_cd huc_cd 31 | 5s 15s 50s 7s 16s 16s 1s 10s 8s 3s 10s 16s 32 | USGS 01491000 CHOPTANK RIVER NEAR GREENSBORO, MD ST 38.99719444 -75.7858056 S NAD83 2.73 .1 NAVD88 02060005 33 | USGS 01645000 SENECA CREEK AT DAWSONVILLE, MD ST 39.1280833 -77.33577778 S NAD83 213.31 .1 NAVD88 02070008 34 | -------------------------------------------------------------------------------- /tests/data/wqp3_results.txt: -------------------------------------------------------------------------------- 1 | Org_Identifier,Org_FormalName,Project_Identifier,Project_Name,Project_QAPPApproved,Project_QAPPApprovalAgency,ProjectAttachment_FileName,ProjectAttachment_FileType,Location_Identifier,Location_Name,Location_Type,Location_Description,Location_State,Location_CountryName,Location_CountyName,Location_CountryCode,Location_StatePostalCode,Location_CountyCode,Location_HUCEightDigitCode,Location_HUCTwelveDigitCode,Location_TribalLandIndicator,Location_TribalLand,Location_Latitude,Location_Longitude,Location_HorzCoordReferenceSystemDatum,Location_LatitudeStandardized,Location_LongitudeStandardized,Location_HorzCoordStandardizedDatum,AlternateLocation_IdentifierCount,Activity_ActivityIdentifier,Activity_ActivityIdentifierUserSupplied,Activity_TypeCode,Activity_Media,Activity_MediaSubdivisionName,Activity_BottomDepthSamplingComponent,ActivityBiological_AssemblageSampled,ActivityBiological_ToxicityTestType,Activity_ConductingOrganization,Activity_Comment,ActivityLocation_Latitude,ActivityLocation_Longitude,ActivityLocation_HorzCoordReferenceSystemDatum,ActivityLocation_SourceMapScale,ActivityLocation_LatitudeStandardized,ActivityLocation_LongitudeStandardized,ActivityLocation_HorzCoordStandardizedDatum,ActivityLocation_HorzAccuracyMeasure,ActivityLocation_HorzAccuracyMeasureUnit,ActivityLocation_HorizontalAccuracyHorzCollectionMethod,ActivityLocation_Description,Activity_StartDate,Activity_StartTime,Activity_StartTimeZone,Activity_EndDate,Activity_EndTime,Activity_EndTimeZone,Activity_DepthHeightMeasure,Activity_DepthHeightMeasureUnit,Activity_BottomDepthAltitudeReferencePoint,Activity_ActivityRelativeDepth,Activity_TopDepthMeasure,Activity_TopDepthMeasureUnit,Activity_BottomDepthMeasure,Activity_BottomDepthMeasureUnit,SampleCollectionMethod_Identifier,SampleCollectionMethod_IdentifierContext,SampleCollectionMethod_Name,SampleCollectionMethod_QualifierTypeName,SampleCollectionMethod_Description,SampleCollectionMethod_EquipmentName,SampleCollectionMethod_EquipmentComment,SamplePrepMethod_Identifier,SamplePrepMethod_IdentifierContext,SamplePrepMethod_Name,SamplePrepMethod_QualifierType,SamplePrepMethod_Description,SamplePrepMethod_ContainerLabel,SamplePrepMethod_ContainerType,SamplePrepMethod_ContainerColor,SamplePrepMethod_ChemicalPreservativeUsed,SamplePrepMethod_ThermalPreservativeUsed,SamplePrepMethod_TransportStorageDescription,Activity_HydrologicCondition,Activity_HydrologicEvent,ActivityAttachment_FileName,ActivityAttachment_FileType,ActivityAttachment_FileDownload,Result_DataLoggerLine,Result_ResultDetectionCondition,Result_Characteristic,Result_CharacteristicUserSupplied,Result_CASNumber,Result_MethodSpeciation,Result_SampleFraction,ResultBiological_Intent,ResultBiological_IndividualIdentifier,ResultBiological_Taxon,ResultBiological_TaxonUserSupplied,ResultBiological_TaxonUserSuppliedReference,ResultBiological_UnidentifiedSpeciesIdentifier,ResultBiological_SampleTissueAnatomy,ResultBiological_GroupSummaryCount,GroupSummaryWeight_Measure,GroupSummaryWeightMeasure_Unit,ResultDepthHeight_Measure,ResultDepthHeight_MeasureUnit,ResultDepthHeight_AltitudeReferencePoint,ResultDepthHeight_SamplingPointName,ResultDepthHeight_SamplingPointType,ResultDepthHeight_SamplingPointPlaceInSeries,ResultDepthHeight_SamplingPointComment,ResultDepthHeight_RecordIdentifierUserSupplied,Result_MeasureIdentifier,Result_Measure,Result_MeasureUnit,Result_MeasureQualifierCode,Result_MeasureStatusIdentifier,Result_StatisticalBase,Result_StatisticalNValue,Result_MeasureType,Result_WeightBasis,Result_TimeBasis,Result_MeasureTemperatureBasis,Result_MeasureParticleSizeBasis,DataQuality_PrecisionValue,DataQuality_BiasValue,DataQuality_ConfidenceIntervalValue,DataQuality_UpperConfidenceLimitValue,DataQuality_LowerConfidenceLimitValue,DataQuality_ResultComment,DetectionLimit_TypeA,DetectionLimit_MeasureA,DetectionLimit_MeasureUnitA,DetectionLimit_CommentA,DetectionLimit_TypeB,DetectionLimit_MeasureB,DetectionLimit_MeasureUnitB,DetectionLimit_CommentB,LabInfo_LabSampleSplitRatio,LabInfo_LabAccreditationIndicator,LabInfo_LabAccreditationAuthority,LabInfo_TaxonAccreditationIndicator,LabInfo_TaxonAccreditationAuthority,ResultAnalyticalMethod_Identifier,ResultAnalyticalMethod_IdentifierContext,ResultAnalyticalMethod_Name,ResultAnalyticalMethod_QualifierType,ResultAnalyticalMethod_Description,Result_ComparableMethodIdentifier,Result_ComparableMethodIdentifierContext,Result_ComparableMethodModification,LabInfo_Name,LabInfo_AnalysisStartDate,LabInfo_AnalysisStartTime,LabInfo_AnalysisStartTimeZone,LabInfo_AnalysisEndDate,LabInfo_AnalysisEndTime,LabInfo_AnalysisEndTimeZone,LabInfo_LaboratoryComment,LabSamplePrepMethod_Identifier,LabSamplePrepMethod_IdentifierContext,LabSamplePrepMethod_Name,LabSamplePrepMethod_QualifierType,LabSamplePrepMethod_Description,LabSamplePrepMethod_StartDate,LabSamplePrepMethod_StartTime,LabSamplePrepMethod_StartTimeZone,LabSamplePrepMethod_EndDate,LabSamplePrepMethod_EndTime,LabSamplePrepMethod_EndTimeZone,LabSamplePrepMethod_DilutionFactor,ResultAttachment_FileName,ResultAttachment_FileType,ResultAttachment_FileDownload,ProviderName,Result_CharacteristicComparable,Result_CharacteristicGroup,Org_Type,LastChangeDate,USGSpcode 2 | WIDNR_WQX,Wisconsin Department of Natural Resources,"[""CBSM_URSS_Madison""]","[""Urban Road Salt Study - Madison Sites""]",,,,,WIDNR_WQX-10032762,North Branch Pheasant Branch downstream of pond at Deming Way,River/Stream,,Wisconsin,United States,Dane,US,WI,25,7090002,,,,43.102665,-89.51866,NAD83,43.102665,-89.51866,NAD83,0,WIDNR_WQX-49176537,,Field Msr/Obs,Water,,,,,WIDNR_WQX,,43.102665,-89.51866,NAD83,,43.102665,-89.51866,NAD83,,,Interpolation-Other,,2011-08-08,13:55:00,CDT,2011-08-08,14:05:00,CDT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Specific conductance,,,,,,,,,,,,,,,,,,,,,,,STORET-113777847,471,uS/cm,,Final,,,Actual,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,STORET,,Physical,*State Government US,Fri Jun 15 09:44:33 GMT 2018, 3 | WIDNR_WQX,Wisconsin Department of Natural Resources,"[""CBSM_URSS_Madison""]","[""Urban Road Salt Study - Madison Sites""]",,,,,WIDNR_WQX-10032762,North Branch Pheasant Branch downstream of pond at Deming Way,River/Stream,,Wisconsin,United States,Dane,US,WI,25,7090002,,,,43.102665,-89.51866,NAD83,43.102665,-89.51866,NAD83,0,WIDNR_WQX-47619240,,Field Msr/Obs,Water,,,,,WIDNR_WQX,,43.102665,-89.51866,NAD83,,43.102665,-89.51866,NAD83,,,Interpolation-Other,,2011-07-06,08:35:00,CDT,2011-07-06,08:45:00,CDT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Specific conductance,,,,,,,,,,,,,,,,,,,,,,,STORET-113777841,860,uS/cm,,Final,,,Actual,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,STORET,,Physical,*State Government US,Fri Jun 15 09:44:33 GMT 2018, 4 | WIDNR_WQX,Wisconsin Department of Natural Resources,"[""CBSM_URSS_Madison""]","[""Urban Road Salt Study - Madison Sites""]",,,,,WIDNR_WQX-10032762,North Branch Pheasant Branch downstream of pond at Deming Way,River/Stream,,Wisconsin,United States,Dane,US,WI,25,7090002,,,,43.102665,-89.51866,NAD83,43.102665,-89.51866,NAD83,0,WIDNR_WQX-45822640,,Field Msr/Obs,Water,,,,,WIDNR_WQX,,43.102665,-89.51866,NAD83,,43.102665,-89.51866,NAD83,,,Interpolation-Other,,2011-05-09,12:20:00,CDT,2011-05-09,12:30:00,CDT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Specific conductance,,,,,,,,,,,,,,,,,,,,,,,STORET-113777835,1000,uS/cm,,Final,,,Actual,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,STORET,,Physical,*State Government US,Fri Jun 15 09:44:33 GMT 2018, 5 | WIDNR_WQX,Wisconsin Department of Natural Resources,"[""CBSM_URSS_Madison""]","[""Urban Road Salt Study - Madison Sites""]",,,,,WIDNR_WQX-10032762,North Branch Pheasant Branch downstream of pond at Deming Way,River/Stream,,Wisconsin,United States,Dane,US,WI,25,7090002,,,,43.102665,-89.51866,NAD83,43.102665,-89.51866,NAD83,0,WIDNR_WQX-46495059,,Field Msr/Obs,Water,,,,,WIDNR_WQX,,43.102665,-89.51866,NAD83,,43.102665,-89.51866,NAD83,,,Interpolation-Other,,2011-06-05,14:45:00,CDT,2011-06-05,14:55:00,CDT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Specific conductance,,,,,,,,,,,,,,,,,,,,,,,STORET-113777838,800,uS/cm,,Final,,,Actual,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,STORET,,Physical,*State Government US,Fri Jun 15 09:44:33 GMT 2018, 6 | WIDNR_WQX,Wisconsin Department of Natural Resources,"[""CBSM_URSS_Madison""]","[""Urban Road Salt Study - Madison Sites""]",,,,,WIDNR_WQX-10032762,North Branch Pheasant Branch downstream of pond at Deming Way,River/Stream,,Wisconsin,United States,Dane,US,WI,25,7090002,,,,43.102665,-89.51866,NAD83,43.102665,-89.51866,NAD83,0,WIDNR_WQX-50689894,,Field Msr/Obs,Water,,,,,WIDNR_WQX,,43.102665,-89.51866,NAD83,,43.102665,-89.51866,NAD83,,,Interpolation-Other,,2011-09-11,16:10:00,CDT,2011-09-11,16:20:00,CDT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Specific conductance,,,,,,,,,,,,,,,,,,,,,,,STORET-113777850,750,uS/cm,,Final,,,Actual,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,STORET,,Physical,*State Government US,Fri Jun 15 09:44:33 GMT 2018, 7 | -------------------------------------------------------------------------------- /tests/data/wqp_activity_metrics.txt: -------------------------------------------------------------------------------- 1 | OrganizationIdentifier,OrganizationFormalName,MonitoringLocationIdentifier,ActivityIdentifier,ActivityMetricType/MetricTypeIdentifier,ActivityMetricType/MetricTypeIdentifierContext,ActivityMetricType/MetricTypeName,MetricTypeCitation/ResourceTitleName,MetricTypeCitation/ResourceCreatorName,MetricTypeCitation/ResourceSubjectText,MetricTypeCitation/ResourcePublisherName,MetricTypeCitation/ResourceDate,MetricTypeCitation/ResourceIdentifier,MetricTypeCitation/MetricTypeScaleText,MetricTypeCitation/FormulaDescriptionText,MetricValueMeasure/MeasureValue,MetricValueMeasure/MeasureUnitCode,MetricValueMeasure/MetricScoreNumeric,MetricValueMeasure/MetricCommentText,MetricValueMeasure/IndexIdentifier,ProviderName 2 | GSWA,Great Swamp Watershed Association(Volunteer)*,GSWA-LB2,GSWA-V95068SC,WATER_ODOR,GSWA,Water Odor,,,,,,,,,0.0,None,0.0,Sewage,,STORET 3 | NARS_WQX,EPA National Aquatic Resources Survey (NARS),NARS_WQX-OWW04440-0178,NARS_WQX-QWCH:OWW04440-0178:040811,SOBC,NARS_WQX,Sum of Base Cations (ueq/L),,,,,,,,,330.05,ueq/L,330.05,,,STORET 4 | NARS_WQX,EPA National Aquatic Resources Survey (NARS),NARS_WQX-OWW04440-0178,NARS_WQX-QWCH:OWW04440-0178:040811,BALANCE,NARS_WQX,Ion Balance [C-A]/[C+A/2] (%),,,,,,,,,6.08,%,6.08,,,STORET 5 | NARS_WQX,EPA National Aquatic Resources Survey (NARS),NARS_WQX-OWW04440-0178,NARS_WQX-QWCH:OWW04440-0178:040811,CONDHO,NARS_WQX,Debye-Huckel-Onsager Calc. Cond. (uS/cm),,,,,,,,,45.52,uS/cm,45.52,,,STORET 6 | NARS_WQX,EPA National Aquatic Resources Survey (NARS),NARS_WQX-OWW04440-0114,NARS_WQX-QWCH:OWW04440-0114:040813,SOBC,NARS_WQX,Sum of Base Cations (ueq/L),,,,,,,,,2139.84,ueq/L,2139.84,,,STORET 7 | NARS_WQX,EPA National Aquatic Resources Survey (NARS),NARS_WQX-OWW04440-0114,NARS_WQX-QWCH:OWW04440-0114:040813,BALANCE,NARS_WQX,Ion Balance [C-A]/[C+A/2] (%),,,,,,,,,0.3,%,0.3,,,STORET 8 | NARS_WQX,EPA National Aquatic Resources Survey (NARS),NARS_WQX-OWW04440-0114,NARS_WQX-QWCH:OWW04440-0114:040813,CONDHO,NARS_WQX,Debye-Huckel-Onsager Calc. Cond. (uS/cm),,,,,,,,,250.99,uS/cm,250.99,,,STORET 9 | NARS_WQX,EPA National Aquatic Resources Survey (NARS),NARS_WQX-OWW04440-0458,NARS_WQX-QWCH:OWW04440-0458:040816,SOBC,NARS_WQX,Sum of Base Cations (ueq/L),,,,,,,,,1908.46,ueq/L,1908.46,,,STORET 10 | NARS_WQX,EPA National Aquatic Resources Survey (NARS),NARS_WQX-OWW04440-0458,NARS_WQX-QWCH:OWW04440-0458:040816,BALANCE,NARS_WQX,Ion Balance [C-A]/[C+A/2] (%),,,,,,,,,-1.56,%,-1.56,,,STORET 11 | NARS_WQX,EPA National Aquatic Resources Survey (NARS),NARS_WQX-OWW04440-0458,NARS_WQX-QWCH:OWW04440-0458:040816,CONDHO,NARS_WQX,Debye-Huckel-Onsager Calc. Cond. (uS/cm),,,,,,,,,208.89,uS/cm,208.89,,,STORET 12 | GSWA,Great Swamp Watershed Association(Volunteer)*,GSWA-HLT,GSWA-V504943SC,WATER_ODOR,GSWA,Water Odor,,,,,,,,,0.0,None,0.0,Normal,,STORET 13 | NARS_WQX,EPA National Aquatic Resources Survey (NARS),NARS_WQX-OWW04440-0138,NARS_WQX-QWCH:OWW04440-0138:040810,SOBC,NARS_WQX,Sum of Base Cations (ueq/L),,,,,,,,,1606.47,ueq/L,1606.47,,,STORET 14 | NARS_WQX,EPA National Aquatic Resources Survey (NARS),NARS_WQX-OWW04440-0138,NARS_WQX-QWCH:OWW04440-0138:040810,BALANCE,NARS_WQX,Ion Balance [C-A]/[C+A/2] (%),,,,,,,,,0.18,%,0.18,,,STORET 15 | NARS_WQX,EPA National Aquatic Resources Survey (NARS),NARS_WQX-OWW04440-0138,NARS_WQX-QWCH:OWW04440-0138:040810,CONDHO,NARS_WQX,Debye-Huckel-Onsager Calc. Cond. (uS/cm),,,,,,,,,191.85,uS/cm,191.85,,,STORET 16 | NARS_WQX,EPA National Aquatic Resources Survey (NARS),NARS_WQX-OWW04440-0554,NARS_WQX-QWCH:OWW04440-0554:040815,SOBC,NARS_WQX,Sum of Base Cations (ueq/L),,,,,,,,,3191.21,ueq/L,3191.21,,,STORET 17 | NARS_WQX,EPA National Aquatic Resources Survey (NARS),NARS_WQX-OWW04440-0554,NARS_WQX-QWCH:OWW04440-0554:040815,BALANCE,NARS_WQX,Ion Balance [C-A]/[C+A/2] (%),,,,,,,,,-0.36,%,-0.36,,,STORET 18 | NARS_WQX,EPA National Aquatic Resources Survey (NARS),NARS_WQX-OWW04440-0554,NARS_WQX-QWCH:OWW04440-0554:040815,CONDHO,NARS_WQX,Debye-Huckel-Onsager Calc. Cond. (uS/cm),,,,,,,,,330.12,uS/cm,330.12,,,STORET 19 | GSWA,Great Swamp Watershed Association(Volunteer)*,GSWA-CMA,GSWA-V504945SC,WATER_ODOR,GSWA,Water Odor,,,,,,,,,0.0,None,0.0,Normal,,STORET 20 | -------------------------------------------------------------------------------- /tests/data/wqp_organizations.txt: -------------------------------------------------------------------------------- 1 | OrganizationIdentifier,OrganizationFormalName,OrganizationDescriptionText,OrganizationType,TribalCode,ElectronicAddress,Telephonic,OrganizationAddress/AddressTypeName_1,OrganizationAddress/AddressText_1,OrganizationAddress/SupplementalAddressText_1,OrganizationAddress/LocalityName_1,OrganizationAddress/StateCode_1,OrganizationAddress/PostalCode_1,OrganizationAddress/CountryCode_1,OrganizationAddress/CountyCode_1,OrganizationAddress/AddressTypeName_2,OrganizationAddress/AddressText_2,OrganizationAddress/SupplementalAddressText_2,OrganizationAddress/LocalityName_2,OrganizationAddress/StateCode_2,OrganizationAddress/PostalCode_2,OrganizationAddress/CountryCode_2,OrganizationAddress/CountyCode_2,OrganizationAddress/AddressTypeName_3,OrganizationAddress/AddressText_3,OrganizationAddress/SupplementalAddressText_3,OrganizationAddress/LocalityName_3,OrganizationAddress/StateCode_3,OrganizationAddress/PostalCode_3,OrganizationAddress/CountryCode_3,OrganizationAddress/CountyCode_3,ProviderName 2 | USGS-GA,USGS Georgia Water Science Center,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,NWIS 3 | USGS-NY,USGS New York Water Science Center,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,NWIS 4 | USGS-NJ,USGS New Jersey Water Science Center,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,NWIS 5 | USGS-PA,USGS Pennsylvania Water Science Center,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,NWIS 6 | USGS-MD,USGS Maryland Water Science Center,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,NWIS 7 | NWRSFWS_WQX,"National Wildlife Refuge System, Fish and Wildlife Service",,Federal/US Government,,maritza_mallek@fws.gov (Email),413-253-8783 (Office),Location,300 Westgate Center Dr,,Hadley,MA,01035,US,,,,,,,,,,,,,,,,,,STORET 8 | DRBC,Delaware River Basin Commission,Interstate River Basin Commission,State/US Government,,Elaine Panuccio@drbc.nj.gov (Email),(609)883-9500 x307 (Office),Location,25 State Police Drive,,West Trenton,NJ,08628,US,21.0,,,,,,,,,,,,,,,,,STORET 9 | NARS_WQX,EPA National Aquatic Resources Survey (NARS),,Federal/US Government,,,,,,,,,,,,,,,,,,,,,,,,,,,,STORET 10 | GSWA,Great Swamp Watershed Association(Volunteer)*,Great Swamp Watershed Association,Private Non-Industrial,,Danielle.Donkersloot@dep.state.nj.us (Email),609-633-9241 (Office),,,,,,,,,,,,,,,,,,,,,,,,,STORET 11 | BTMUA,Brick Utilities,Drinking water provider,Private Industry,,rkarl@brickmua.com (Email),732-458-7000 (Office),Location,1551 Rt 88 W.,,Brick,NJ,08724,US,,Mailing,1551 Rt 88 W.,,Brick,NJ,8724.0,US,,,,,,,,,,STORET 12 | 31DELRBC_WQX,Delaware River Basin Commission,,State/US Government,,,,,,,,,,,,,,,,,,,,,,,,,,,,STORET 13 | MERI,Meadowlands Environmental Research Institute,"Our mission is to provide the scientific community, policy makers, and the public with the knowledge and predictive understanding necessary to conserve, protect, and manage the District ecosystems and the services they provide.",Local/US Government,,Cheryl.Yao@njmeadowlands.gov (Email);http://meri/njmeadowlands.gov/ (Internet),201-460-4604 (Office),Mailing,One Dekorte Park Plaza,,Lyndhurst,NJ,07071,US,,Location,One Dekorte Park Plaza,,Lyndhurst,NJ,7071.0,US,,,,,,,,,,STORET 14 | NJDEP_BFBM,NJDEP Bureau of Freshwater and Biological Monitoring,"The Bureau is responsible for monitoring the ambient conditions of the state's fresh and ground water resources. This monitoring includes regular statewide sampling through of 115 surface water monitoring stations, 820 benthic macroinvertebrate stream monitoring stations, 100 fish assemblage stream monitoring stations, and 150 ground water stations.",State/US Government,,http://www.nj.gov/dep/wms/bfbm/ (Internet);leigh.lager@dep.nj.gov (Email),609-943-3266 (Office),Mailing,PO Box 427,,Trenton,NJ,08625,US,,Location,35 Arctic Pkwy,,Ewing,NJ,8638.0,US,,,,,,,,,,STORET 15 | 11NPSWRD_WQX,National Park Service Water Resources Division,,Federal/US Government,,dean_tucker@nps.gov (Email),970-225-3516 (Office),Location,"1201 Oakridge Drive, Suite 250",,Fort Collins,CO,80525-5596,US,,,,,,,,,,,,,,,,,,STORET 16 | 31DRBCSP,Delaware River Basin Commission,Water Quality Monitoring Data,Interstate Comsn/US Government,,,,,,,,,,,,,,,,,,,,,,,,,,,,STORET 17 | 31DELRBC,Delaware River Basin Commission,Water Quality Monitoring Data,Interstate Comsn/US Government,,,,,,,,,,,,,,,,,,,,,,,,,,,,STORET 18 | NARS,EPA National Aquatic Resource Survey Data,Wadeable Streams Assessment Data,Federal/US Government,,,,,,,,,,,,,,,,,,,,,,,,,,,,STORET 19 | 21NJDEP1,NJ Department of Environmental Protection,Ambient Water Quality Monitoring Data,State/US Government,,,,,,,,,,,,,,,,,,,,,,,,,,,,STORET 20 | -------------------------------------------------------------------------------- /tests/data/wqp_results.txt: -------------------------------------------------------------------------------- 1 | OrganizationIdentifier,OrganizationFormalName,ActivityIdentifier,ActivityTypeCode,ActivityMediaName,ActivityMediaSubdivisionName,ActivityStartDate,ActivityStartTime/Time,ActivityStartTime/TimeZoneCode,ActivityEndDate,ActivityEndTime/Time,ActivityEndTime/TimeZoneCode,ActivityDepthHeightMeasure/MeasureValue,ActivityDepthHeightMeasure/MeasureUnitCode,ActivityDepthAltitudeReferencePointText,ActivityTopDepthHeightMeasure/MeasureValue,ActivityTopDepthHeightMeasure/MeasureUnitCode,ActivityBottomDepthHeightMeasure/MeasureValue,ActivityBottomDepthHeightMeasure/MeasureUnitCode,ProjectIdentifier,ActivityConductingOrganizationText,MonitoringLocationIdentifier,ActivityCommentText,SampleAquifer,HydrologicCondition,HydrologicEvent,SampleCollectionMethod/MethodIdentifier,SampleCollectionMethod/MethodIdentifierContext,SampleCollectionMethod/MethodName,SampleCollectionEquipmentName,ResultDetectionConditionText,CharacteristicName,ResultSampleFractionText,ResultMeasureValue,ResultMeasure/MeasureUnitCode,MeasureQualifierCode,ResultStatusIdentifier,StatisticalBaseCode,ResultValueTypeName,ResultWeightBasisText,ResultTimeBasisText,ResultTemperatureBasisText,ResultParticleSizeBasisText,PrecisionValue,ResultCommentText,USGSPCode,ResultDepthHeightMeasure/MeasureValue,ResultDepthHeightMeasure/MeasureUnitCode,ResultDepthAltitudeReferencePointText,SubjectTaxonomicName,SampleTissueAnatomyName,ResultAnalyticalMethod/MethodIdentifier,ResultAnalyticalMethod/MethodIdentifierContext,ResultAnalyticalMethod/MethodName,MethodDescriptionText,LaboratoryName,AnalysisStartDate,ResultLaboratoryCommentText,DetectionQuantitationLimitTypeName,DetectionQuantitationLimitMeasure/MeasureValue,DetectionQuantitationLimitMeasure/MeasureUnitCode,PreparationStartDate,ProviderName 2 | WIDNR_WQX,Wisconsin Department of Natural Resources,WIDNR_WQX-45822640,Field Msr/Obs,Water,,2011-05-09,12:20:00,CDT,2011-05-09,12:30:00,CDT,,,,,,,,CBSM_URSS_Madison,WIDNR_WQX,WIDNR_WQX-10032762,,,,,,,,,,Specific conductance,,1000,uS/cm,,Final,,Actual,,,,,,,,,,,,,,,,,,,,,,,,STORET 3 | WIDNR_WQX,Wisconsin Department of Natural Resources,WIDNR_WQX-49176537,Field Msr/Obs,Water,,2011-08-08,13:55:00,CDT,2011-08-08,14:05:00,CDT,,,,,,,,CBSM_URSS_Madison,WIDNR_WQX,WIDNR_WQX-10032762,,,,,,,,,,Specific conductance,,471,uS/cm,,Final,,Actual,,,,,,,,,,,,,,,,,,,,,,,,STORET 4 | WIDNR_WQX,Wisconsin Department of Natural Resources,WIDNR_WQX-47619240,Field Msr/Obs,Water,,2011-07-06,08:35:00,CDT,2011-07-06,08:45:00,CDT,,,,,,,,CBSM_URSS_Madison,WIDNR_WQX,WIDNR_WQX-10032762,,,,,,,,,,Specific conductance,,860,uS/cm,,Final,,Actual,,,,,,,,,,,,,,,,,,,,,,,,STORET 5 | WIDNR_WQX,Wisconsin Department of Natural Resources,WIDNR_WQX-50689894,Field Msr/Obs,Water,,2011-09-11,16:10:00,CDT,2011-09-11,16:20:00,CDT,,,,,,,,CBSM_URSS_Madison,WIDNR_WQX,WIDNR_WQX-10032762,,,,,,,,,,Specific conductance,,750,uS/cm,,Final,,Actual,,,,,,,,,,,,,,,,,,,,,,,,STORET 6 | WIDNR_WQX,Wisconsin Department of Natural Resources,WIDNR_WQX-46495059,Field Msr/Obs,Water,,2011-06-05,14:45:00,CDT,2011-06-05,14:55:00,CDT,,,,,,,,CBSM_URSS_Madison,WIDNR_WQX,WIDNR_WQX-10032762,,,,,,,,,,Specific conductance,,800,uS/cm,,Final,,Actual,,,,,,,,,,,,,,,,,,,,,,,,STORET -------------------------------------------------------------------------------- /tests/nadp_test.py: -------------------------------------------------------------------------------- 1 | """Tests for NADP functions.""" 2 | 3 | import os 4 | 5 | import dataretrieval.nadp as nadp 6 | 7 | 8 | class TestMDNmap: 9 | """Testing the mercury deposition network map functions. 10 | 11 | This set of tests actually queries the services themselves to ensure there 12 | have been no upstream changes to paths or file names. Tests created 13 | because there was an upstream change to paths that broke ``dataretrieval`` 14 | functionality. 15 | """ 16 | 17 | def test_get_annual_MDN_map_zip(self, tmp_path): 18 | """Test the get_annual_MDN_map function zip return.""" 19 | z_path = nadp.get_annual_MDN_map( 20 | measurement_type="conc", year="2010", path=tmp_path 21 | ) 22 | exp_path = os.path.join(tmp_path, "Hg_conc_2010.zip") 23 | # assert path matches expectation 24 | assert z_path == str(exp_path) 25 | # assert unpacked zip exists as a directory 26 | assert os.path.exists(exp_path[:-4]) 27 | # assert tif exists in directory 28 | assert os.path.exists(os.path.join(z_path[:-4], "conc_Hg_2010.tif")) 29 | 30 | 31 | class TestNTNmap: 32 | """Testing the national trends network map functions.""" 33 | 34 | def test_get_annual_NTN_map_zip(self, tmp_path): 35 | """Test the get_annual_NTN_map function zip return.""" 36 | z_path = nadp.get_annual_NTN_map( 37 | measurement_type="Precip", year="2015", path=tmp_path 38 | ) 39 | exp_path = os.path.join(tmp_path, "Precip_2015.zip") 40 | # assert path matches expectation 41 | assert z_path == str(exp_path) 42 | # assert unpacked zip exists as a directory 43 | assert os.path.exists(exp_path[:-4]) 44 | # assert tif exists in directory 45 | assert os.path.exists(os.path.join(z_path[:-4], "Precip_2015.tif")) 46 | -------------------------------------------------------------------------------- /tests/samples_test.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | 3 | import pytest 4 | from pandas import DataFrame 5 | 6 | from dataretrieval.samples import ( 7 | _check_profiles, 8 | get_usgs_samples 9 | ) 10 | 11 | def mock_request(requests_mock, request_url, file_path): 12 | """Mock request code""" 13 | with open(file_path) as text: 14 | requests_mock.get( 15 | request_url, text=text.read(), headers={"mock_header": "value"} 16 | ) 17 | 18 | def test_mock_get_usgs_samples(requests_mock): 19 | """Tests USGS Samples query""" 20 | request_url = ( 21 | "https://api.waterdata.usgs.gov/samples-data/results/fullphyschem?" 22 | "activityMediaName=Water&activityStartDateLower=2020-01-01" 23 | "&activityStartDateUpper=2024-12-31&monitoringLocationIdentifier=USGS-05406500&mimeType=text%2Fcsv" 24 | ) 25 | response_file_path = "data/samples_results.txt" 26 | mock_request(requests_mock, request_url, response_file_path) 27 | df, md = get_usgs_samples( 28 | service="results", 29 | profile="fullphyschem", 30 | activityMediaName="Water", 31 | activityStartDateLower="2020-01-01", 32 | activityStartDateUpper="2024-12-31", 33 | monitoringLocationIdentifier="USGS-05406500", 34 | ) 35 | assert type(df) is DataFrame 36 | assert df.size == 12127 37 | assert md.url == request_url 38 | assert isinstance(md.query_time, datetime.timedelta) 39 | assert md.header == {"mock_header": "value"} 40 | assert md.comment is None 41 | 42 | def test_check_profiles(): 43 | """Tests that correct errors are raised for invalid profiles.""" 44 | with pytest.raises(ValueError): 45 | _check_profiles(service="foo", profile="bar") 46 | with pytest.raises(ValueError): 47 | _check_profiles(service="results", profile="foo") 48 | 49 | def test_samples_results(): 50 | """Test results call for proper columns""" 51 | df,_ = get_usgs_samples( 52 | service="results", 53 | profile="narrow", 54 | monitoringLocationIdentifier="USGS-05288705", 55 | activityStartDateLower="2024-10-01", 56 | activityStartDateUpper="2025-04-24" 57 | ) 58 | assert all(col in df.columns for col in ["Location_Identifier", "Activity_ActivityIdentifier"]) 59 | assert len(df) > 0 60 | 61 | def test_samples_activity(): 62 | """Test activity call for proper columns""" 63 | df,_ = get_usgs_samples( 64 | service="activities", 65 | profile="sampact", 66 | monitoringLocationIdentifier="USGS-06719505" 67 | ) 68 | assert len(df) > 0 69 | assert len(df.columns) == 95 70 | assert "Location_HUCTwelveDigitCode" in df.columns 71 | 72 | def test_samples_locations(): 73 | """Test locations call for proper columns""" 74 | df,_ = get_usgs_samples( 75 | service="locations", 76 | profile="site", 77 | stateFips="US:55", 78 | activityStartDateLower="2024-10-01", 79 | activityStartDateUpper="2025-04-24", 80 | usgsPCode="00010" 81 | ) 82 | assert all(col in df.columns for col in ["Location_Identifier", "Location_Latitude"]) 83 | assert len(df) > 0 84 | 85 | def test_samples_projects(): 86 | """Test projects call for proper columns""" 87 | df,_ = get_usgs_samples( 88 | service="projects", 89 | profile="project", 90 | stateFips="US:15", 91 | activityStartDateLower="2024-10-01", 92 | activityStartDateUpper="2025-04-24" 93 | ) 94 | assert all(col in df.columns for col in ["Org_Identifier", "Project_Identifier"]) 95 | assert len(df) > 0 96 | 97 | def test_samples_organizations(): 98 | """Test organizations call for proper columns""" 99 | df,_ = get_usgs_samples( 100 | service="organizations", 101 | profile="count", 102 | stateFips="US:01" 103 | ) 104 | assert len(df) == 1 105 | assert df.size == 3 106 | -------------------------------------------------------------------------------- /tests/utils_test.py: -------------------------------------------------------------------------------- 1 | """Unit tests for functions in utils.py""" 2 | 3 | import unittest.mock as mock 4 | 5 | import pytest 6 | 7 | import dataretrieval.nwis as nwis 8 | from dataretrieval import utils 9 | 10 | 11 | class Test_query: 12 | """Tests of the query function.""" 13 | 14 | def test_url_too_long(self): 15 | """Test to confirm more useful error when query URL too long. 16 | 17 | Test based on GitHub Issue #64 18 | """ 19 | # all sites in MD 20 | sites, _ = nwis.what_sites(stateCd="MD") 21 | # expected error message 22 | _msg = "Request URL too long. Modify your query to use fewer sites. API response reason: Request-URI Too Long" 23 | # raise error by trying to query them all, so URL is way too long 24 | with pytest.raises(ValueError, match=_msg): 25 | nwis.get_iv(sites=sites.site_no.values.tolist()) 26 | 27 | def test_header(self): 28 | """Test checking header info with user-agent is part of query.""" 29 | url = "https://waterservices.usgs.gov/nwis/dv" 30 | payload = { 31 | "format": "json", 32 | "startDT": "2010-10-01", 33 | "endDT": "2010-10-10", 34 | "sites": "01646500", 35 | "multi_index": True, 36 | } 37 | response = utils.query(url, payload) 38 | assert response.status_code == 200 # GET was successful 39 | assert "user-agent" in response.request.headers 40 | 41 | 42 | class Test_BaseMetadata: 43 | """Tests of BaseMetadata""" 44 | 45 | def test_init_with_response(self): 46 | response = mock.MagicMock() 47 | md = utils.BaseMetadata(response) 48 | 49 | ## Test parameters initialized from the API response 50 | assert md.url is not None 51 | assert md.query_time is not None 52 | assert md.header is not None 53 | 54 | ## Test NotImplementedError parameters 55 | with pytest.raises(NotImplementedError): 56 | md.site_info 57 | with pytest.raises(NotImplementedError): 58 | md.variable_info 59 | -------------------------------------------------------------------------------- /tests/wqp_test.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | 3 | import pytest 4 | from pandas import DataFrame 5 | 6 | from dataretrieval.wqp import ( 7 | _check_kwargs, 8 | get_results, 9 | what_activities, 10 | what_activity_metrics, 11 | what_detection_limits, 12 | what_habitat_metrics, 13 | what_organizations, 14 | what_project_weights, 15 | what_projects, 16 | what_sites, 17 | ) 18 | 19 | 20 | def test_get_results(requests_mock): 21 | """Tests water quality portal ratings query""" 22 | request_url = ( 23 | "https://www.waterqualitydata.us/data/Result/Search?siteid=WIDNR_WQX-10032762" 24 | "&characteristicName=Specific+conductance&startDateLo=05-01-2011&startDateHi=09-30-2011" 25 | "&mimeType=csv" 26 | ) 27 | response_file_path = "data/wqp_results.txt" 28 | mock_request(requests_mock, request_url, response_file_path) 29 | df, md = get_results( 30 | siteid="WIDNR_WQX-10032762", 31 | characteristicName="Specific conductance", 32 | startDateLo="05-01-2011", 33 | startDateHi="09-30-2011", 34 | ) 35 | assert type(df) is DataFrame 36 | assert df.size == 315 37 | assert md.url == request_url 38 | assert isinstance(md.query_time, datetime.timedelta) 39 | assert md.header == {"mock_header": "value"} 40 | assert md.comment is None 41 | 42 | 43 | def test_get_results_WQX3(requests_mock): 44 | """Tests water quality portal results query with new WQX3.0 profile""" 45 | request_url = ( 46 | "https://www.waterqualitydata.us/wqx3/Result/search?siteid=WIDNR_WQX-10032762" 47 | "&characteristicName=Specific+conductance&startDateLo=05-01-2011&startDateHi=09-30-2011" 48 | "&mimeType=csv" 49 | "&dataProfile=fullPhysChem" 50 | ) 51 | response_file_path = "data/wqp3_results.txt" 52 | mock_request(requests_mock, request_url, response_file_path) 53 | df, md = get_results( 54 | legacy=False, 55 | siteid="WIDNR_WQX-10032762", 56 | characteristicName="Specific conductance", 57 | startDateLo="05-01-2011", 58 | startDateHi="09-30-2011", 59 | ) 60 | assert type(df) is DataFrame 61 | assert df.size == 900 62 | assert md.url == request_url 63 | assert isinstance(md.query_time, datetime.timedelta) 64 | assert md.header == {"mock_header": "value"} 65 | assert md.comment is None 66 | 67 | 68 | def test_what_sites(requests_mock): 69 | """Tests Water quality portal sites query""" 70 | request_url = ( 71 | "https://www.waterqualitydata.us/data/Station/Search?statecode=US%3A34&characteristicName=Chloride" 72 | "&mimeType=csv" 73 | ) 74 | response_file_path = "data/wqp_sites.txt" 75 | mock_request(requests_mock, request_url, response_file_path) 76 | df, md = what_sites(statecode="US:34", characteristicName="Chloride") 77 | assert type(df) is DataFrame 78 | assert df.size == 239868 79 | assert md.url == request_url 80 | assert isinstance(md.query_time, datetime.timedelta) 81 | assert md.header == {"mock_header": "value"} 82 | assert md.comment is None 83 | 84 | 85 | def test_what_organizations(requests_mock): 86 | """Tests Water quality portal organizations query""" 87 | request_url = ( 88 | "https://www.waterqualitydata.us/data/Organization/Search?statecode=US%3A34&characteristicName=Chloride" 89 | "&mimeType=csv" 90 | ) 91 | response_file_path = "data/wqp_organizations.txt" 92 | mock_request(requests_mock, request_url, response_file_path) 93 | df, md = what_organizations(statecode="US:34", characteristicName="Chloride") 94 | assert type(df) is DataFrame 95 | assert df.size == 576 96 | assert md.url == request_url 97 | assert isinstance(md.query_time, datetime.timedelta) 98 | assert md.header == {"mock_header": "value"} 99 | assert md.comment is None 100 | 101 | 102 | def test_what_projects(requests_mock): 103 | """Tests Water quality portal projects query""" 104 | request_url = ( 105 | "https://www.waterqualitydata.us/data/Project/Search?statecode=US%3A34&characteristicName=Chloride" 106 | "&mimeType=csv" 107 | ) 108 | response_file_path = "data/wqp_projects.txt" 109 | mock_request(requests_mock, request_url, response_file_path) 110 | df, md = what_projects(statecode="US:34", characteristicName="Chloride") 111 | assert type(df) is DataFrame 112 | assert df.size == 530 113 | assert md.url == request_url 114 | assert isinstance(md.query_time, datetime.timedelta) 115 | assert md.header == {"mock_header": "value"} 116 | assert md.comment is None 117 | 118 | 119 | def test_what_activities(requests_mock): 120 | """Tests Water quality portal activities query""" 121 | request_url = ( 122 | "https://www.waterqualitydata.us/data/Activity/Search?statecode=US%3A34&characteristicName=Chloride" 123 | "&mimeType=csv" 124 | ) 125 | response_file_path = "data/wqp_activities.txt" 126 | mock_request(requests_mock, request_url, response_file_path) 127 | df, md = what_activities(statecode="US:34", characteristicName="Chloride") 128 | assert type(df) is DataFrame 129 | assert df.size == 5087443 130 | assert md.url == request_url 131 | assert isinstance(md.query_time, datetime.timedelta) 132 | assert md.header == {"mock_header": "value"} 133 | assert md.comment is None 134 | 135 | 136 | def test_what_detection_limits(requests_mock): 137 | """Tests Water quality portal detection limits query""" 138 | request_url = ( 139 | "https://www.waterqualitydata.us/data/ResultDetectionQuantitationLimit/Search?statecode=US%3A34&characteristicName=Chloride" 140 | "&mimeType=csv" 141 | ) 142 | response_file_path = "data/wqp_detection_limits.txt" 143 | mock_request(requests_mock, request_url, response_file_path) 144 | df, md = what_detection_limits(statecode="US:34", characteristicName="Chloride") 145 | assert type(df) is DataFrame 146 | assert df.size == 98770 147 | assert md.url == request_url 148 | assert isinstance(md.query_time, datetime.timedelta) 149 | assert md.header == {"mock_header": "value"} 150 | assert md.comment is None 151 | 152 | 153 | def test_what_habitat_metrics(requests_mock): 154 | """Tests Water quality portal habitat metrics query""" 155 | request_url = ( 156 | "https://www.waterqualitydata.us/data/BiologicalMetric/Search?statecode=US%3A34&characteristicName=Chloride" 157 | "&mimeType=csv" 158 | ) 159 | response_file_path = "data/wqp_habitat_metrics.txt" 160 | mock_request(requests_mock, request_url, response_file_path) 161 | df, md = what_habitat_metrics(statecode="US:34", characteristicName="Chloride") 162 | assert type(df) is DataFrame 163 | assert df.size == 48114 164 | assert md.url == request_url 165 | assert isinstance(md.query_time, datetime.timedelta) 166 | assert md.header == {"mock_header": "value"} 167 | assert md.comment is None 168 | 169 | 170 | def test_what_project_weights(requests_mock): 171 | """Tests Water quality portal project weights query""" 172 | request_url = ( 173 | "https://www.waterqualitydata.us/data/ProjectMonitoringLocationWeighting/Search?statecode=US%3A34&characteristicName=Chloride" 174 | "&mimeType=csv" 175 | ) 176 | response_file_path = "data/wqp_project_weights.txt" 177 | mock_request(requests_mock, request_url, response_file_path) 178 | df, md = what_project_weights(statecode="US:34", characteristicName="Chloride") 179 | assert type(df) is DataFrame 180 | assert df.size == 33098 181 | assert md.url == request_url 182 | assert isinstance(md.query_time, datetime.timedelta) 183 | assert md.header == {"mock_header": "value"} 184 | assert md.comment is None 185 | 186 | 187 | def test_what_activity_metrics(requests_mock): 188 | """Tests Water quality portal activity metrics query""" 189 | request_url = ( 190 | "https://www.waterqualitydata.us/data/ActivityMetric/Search?statecode=US%3A34&characteristicName=Chloride" 191 | "&mimeType=csv" 192 | ) 193 | response_file_path = "data/wqp_activity_metrics.txt" 194 | mock_request(requests_mock, request_url, response_file_path) 195 | df, md = what_activity_metrics(statecode="US:34", characteristicName="Chloride") 196 | assert type(df) is DataFrame 197 | assert df.size == 378 198 | assert md.url == request_url 199 | assert isinstance(md.query_time, datetime.timedelta) 200 | assert md.header == {"mock_header": "value"} 201 | assert md.comment is None 202 | 203 | 204 | def mock_request(requests_mock, request_url, file_path): 205 | with open(file_path) as text: 206 | requests_mock.get( 207 | request_url, text=text.read(), headers={"mock_header": "value"} 208 | ) 209 | 210 | 211 | def test_check_kwargs(): 212 | """Tests that correct errors are raised for invalid mimetypes.""" 213 | kwargs = {"mimeType": "geojson"} 214 | with pytest.raises(NotImplementedError): 215 | kwargs = _check_kwargs(kwargs) 216 | kwargs = {"mimeType": "foo"} 217 | with pytest.raises(ValueError): 218 | kwargs = _check_kwargs(kwargs) 219 | --------------------------------------------------------------------------------