├── py_noaa ├── __init__.py └── coops.py ├── requirements.txt ├── pytest.ini ├── .travis.yml ├── .gitignore ├── tests └── test_py_noaa.py ├── setup.py ├── LICENSE └── README.md /py_noaa/__init__.py: -------------------------------------------------------------------------------- 1 | import py_noaa.coops 2 | 3 | __version__ = 1.0 -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | pytest>=4.0 2 | pytest-cov 3 | pandas 4 | numpy 5 | requests 6 | -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | testpaths= ./py_noaa ./tests ./README.md 3 | addopts= --cov=py_noaa --cov=tests --doctest-modules --cov-report term-missing --doctest-glob='*.md' -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | 2 | language: python 3 | python: 4 | - 3.5 5 | - 3.6 6 | - 2.7 7 | 8 | install: 9 | - pip install --upgrade pip 10 | - pip install -r requirements.txt 11 | 12 | script: 13 | - pytest 14 | 15 | notifications: 16 | email: false 17 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | 4 | # Distribution / packaging 5 | build/ 6 | dist/ 7 | *.egg-info/ 8 | 9 | # Jupyter Notebook 10 | .ipynb_checkpoints 11 | 12 | # Testing & Coverage 13 | .cache 14 | .coverage 15 | .pytest_cache 16 | 17 | # Example .csv files 18 | .csv -------------------------------------------------------------------------------- /tests/test_py_noaa.py: -------------------------------------------------------------------------------- 1 | 2 | from __future__ import absolute_import 3 | from py_noaa import coops 4 | 5 | import pytest 6 | 7 | def test_error_handling(): 8 | with pytest.raises(ValueError): 9 | coops.get_data( 10 | begin_date="20150101", 11 | end_date="20150331", 12 | stationid="9442396", 13 | product="water_level", 14 | datum="navd88", # this is an invalid datum 15 | units="metric", 16 | time_zone="gmt") 17 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | import py_noaa 3 | 4 | setup(name='py_noaa', 5 | version='1.0', 6 | description='Python wrapper to fetch data from NOAA APIs', 7 | url='https://github.com/GClunies/py_noaa', 8 | author='Greg Clunies', 9 | author_email='greg.clunies@gmail.com', 10 | license='MIT', 11 | classifiers=[ 12 | 'Development Status :: 3 - Alpha', 13 | 'Intended Audience :: Science/Research', 14 | 'Topic :: Scientific/Engineering', 15 | 'License :: OSI Approved :: MIT License', 16 | 'Programming Language :: Python :: 2.7', 17 | 'Programming Language :: Python :: 3.5', 18 | 'Programming Language :: Python :: 3.6', 19 | ], 20 | packages=['py_noaa'], 21 | install_requires=['requests', 'numpy', 'pandas'], 22 | zip_safe=False) 23 | 24 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Gregory Clunies 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # py_noaa 2 | 3 | [![Build Status](https://travis-ci.org/GClunies/py_noaa.svg?branch=master)](https://travis-ci.org/GClunies/py_noaa) 4 | [![PyPI](https://img.shields.io/pypi/v/py_noaa.svg)](https://pypi.python.org/pypi/py-noaa) 5 | [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/py_noaa.svg)](https://pypi.python.org/pypi/py-noaa) 6 | 7 | ## NOTE: THIS PACKAGE HAS BEEN REPLACED BY [`noaa_coops`](https://github.com/GClunies/noaa_coops). NO FURTHER DEVELOPMENT IS PLANNED. 8 | 9 | 10 | `py_noaa` is a Python package that wraps around the NOAA CO-OPS Tides & Currents API and returns data in convenient formats (i.e., pandas dataframe) for further analysis in python. Analysis of the data is left up to the end user. 11 | 12 | **NOTE:** 13 | 14 | This package is under development, additional functionality will be added over time. 15 | 16 | ## Installation 17 | 18 | ```bash 19 | pip install py_noaa 20 | ``` 21 | 22 | You can update `py_noaa` using: 23 | 24 | ```bash 25 | pip install py_noaa --upgrade 26 | ``` 27 | 28 | ## NOAA CO-OPS Tides & Currents 29 | 30 | NOAA records tides, currents, and other meteoroligical observations at various locations across the United States and the Great Lakes regions. Predictions are also available for [tides](https://tidesandcurrents.noaa.gov/tide_predictions.html) and [currents](https://tidesandcurrents.noaa.gov/noaacurrents/Help). 31 | 32 | `py_noaa` accesses data following the [NOAA CO-OPS API](https://tidesandcurrents.noaa.gov/api/) documentation. 33 | 34 | ## Available Data 35 | 36 | A list of available data products is provided in the [API documentation](https://tidesandcurrents.noaa.gov/api/#products) 37 | 38 | ### CO-OPS module basics 39 | 40 | 1. Get the station ID for your station of interest, a summary of available stations, by data type, can be found through the following links: 41 | 42 | - [Water Level Observation Stations](https://tidesandcurrents.noaa.gov/stations.html?type=Water+Levels) 43 | - [Tidal Prediction Stations](https://tidesandcurrents.noaa.gov/tide_predictions.html) 44 | - [Current Observation Stations](https://tidesandcurrents.noaa.gov/cdata/StationList?type=Current+Data&filter=active) 45 | - [Meteorological Observation Stations](https://tidesandcurrents.noaa.gov/stations.html?type=Meteorological%20Observations) 46 | 47 | 2. Read the station info if available! Useful station info is typically available based on the datatype recorded at a station. Station info for current stations are **NOT** the same for water level and tide stations (see examples below). 48 | 49 | - Exmaple [current station info](https://tidesandcurrents.noaa.gov/cdata/StationInfo?id=PUG1515) 50 | - Example [water level & tide station info](https://tidesandcurrents.noaa.gov/stationhome.html?id=9447130) 51 | 52 | 3. Fetch data using the `coops.get_data()` function for various data products, listed [here](https://tidesandcurrents.noaa.gov/api/#products). The currently supported data types are: 53 | 54 | - Currents 55 | - Observed water levels 56 | - Observered daily high and low water levels (use `product="high_low"`) 57 | - Predicted water levels 58 | - Predicted high and low water levels 59 | - Winds 60 | - Air pressure 61 | - Air temperature 62 | - Water temperature 63 | 64 | Compatibility with other data products listed on the [NOAA CO-OPS API](https://tidesandcurrents.noaa.gov/api/#products) may exist, but is not guaranteed at this time. 65 | 66 | ### Examples data requests are shown below: 67 | 68 | **Observed Currents** 69 | 70 | ```python 71 | >>> from py_noaa import coops 72 | >>> df_currents = coops.get_data( 73 | ... begin_date="20150727", 74 | ... end_date="20150910", 75 | ... stationid="PUG1515", 76 | ... product="currents", 77 | ... bin_num=1, 78 | ... units="metric", 79 | ... time_zone="gmt") 80 | ... 81 | >>> df_currents.head() # doctest: +NORMALIZE_WHITESPACE 82 | bin direction speed 83 | date_time 84 | 2015-07-27 20:06:00 1.0 255.0 32.1 85 | 2015-07-27 20:12:00 1.0 255.0 30.1 86 | 2015-07-27 20:18:00 1.0 261.0 29.3 87 | 2015-07-27 20:24:00 1.0 260.0 27.3 88 | 2015-07-27 20:30:00 1.0 261.0 23.0 89 | 90 | ``` 91 | 92 | **Observed Water Levels** 93 | 94 | ```python 95 | >>> from py_noaa import coops 96 | >>> df_water_levels = coops.get_data( 97 | ... begin_date="20150101", 98 | ... end_date="20150331", 99 | ... stationid="9447130", 100 | ... product="water_level", 101 | ... datum="MLLW", 102 | ... units="metric", 103 | ... time_zone="gmt") 104 | ... 105 | >>> df_water_levels.head() # doctest: +NORMALIZE_WHITESPACE 106 | flags QC sigma water_level 107 | date_time 108 | 2015-01-01 00:00:00 0,0,0,0 v 0.023 1.799 109 | 2015-01-01 01:00:00 0,0,0,0 v 0.014 0.977 110 | 2015-01-01 02:00:00 0,0,0,0 v 0.009 0.284 111 | 2015-01-01 03:00:00 0,0,0,0 v 0.010 -0.126 112 | 2015-01-01 04:00:00 0,0,0,0 v 0.013 -0.161 113 | 114 | ``` 115 | 116 | **Predicted Water Levels (Tides)** 117 | 118 | Note the use of the `interval` parameter to specify only hourly data be returned. The `interval` parameter works with, water level, currents, predictions, and meteorological data types. 119 | 120 | ```python 121 | >>> from py_noaa import coops 122 | >>> df_predictions = coops.get_data( 123 | ... begin_date="20121115", 124 | ... end_date="20121217", 125 | ... stationid="9447130", 126 | ... product="predictions", 127 | ... datum="MLLW", 128 | ... interval="h", 129 | ... units="metric", 130 | ... time_zone="gmt") 131 | ... 132 | >>> df_predictions.head() # doctest: +NORMALIZE_WHITESPACE 133 | predicted_wl 134 | date_time 135 | 2012-11-15 00:00:00 3.660 136 | 2012-11-15 01:00:00 3.431 137 | 2012-11-15 02:00:00 2.842 138 | 2012-11-15 03:00:00 1.974 139 | 2012-11-15 04:00:00 0.953 140 | 141 | ``` 142 | 143 | Also available for the `interval` parameter is the `hilo` key which returns High and Low tide predictions. 144 | 145 | ```python 146 | >>> from py_noaa import coops 147 | >>> df_predictions = coops.get_data( 148 | ... begin_date="20121115", 149 | ... end_date="20121217", 150 | ... stationid="9447130", 151 | ... product="predictions", 152 | ... datum="MLLW", 153 | ... interval="hilo", 154 | ... units="metric", 155 | ... time_zone="gmt") 156 | ... 157 | >>> df_predictions.head() # doctest: +NORMALIZE_WHITESPACE 158 | hi_lo predicted_wl 159 | date_time 160 | 2012-11-15 06:57:00 L -1.046 161 | 2012-11-15 14:11:00 H 3.813 162 | 2012-11-15 19:36:00 L 2.037 163 | 2012-11-16 00:39:00 H 3.573 164 | 2012-11-16 07:44:00 L -1.049 165 | 166 | ``` 167 | 168 | **Filtering Data by date** 169 | 170 | All data is returned as a pandas dataframe, with a DatimeIndex which allows for easy filtering of the data by dates. 171 | 172 | ```python 173 | >>> from py_noaa import coops 174 | >>> df_predictions = coops.get_data( 175 | ... begin_date="20121115", 176 | ... end_date="20121217", 177 | ... stationid="9447130", 178 | ... product="predictions", 179 | ... datum="MLLW", 180 | ... interval="h", 181 | ... units="metric", 182 | ... time_zone="gmt") 183 | ... 184 | >>> df_predictions['201211150000':'201211151200'] # doctest: +NORMALIZE_WHITESPACE 185 | predicted_wl 186 | date_time 187 | 2012-11-15 00:00:00 3.660 188 | 2012-11-15 01:00:00 3.431 189 | 2012-11-15 02:00:00 2.842 190 | 2012-11-15 03:00:00 1.974 191 | 2012-11-15 04:00:00 0.953 192 | 2012-11-15 05:00:00 -0.047 193 | 2012-11-15 06:00:00 -0.787 194 | 2012-11-15 07:00:00 -1.045 195 | 2012-11-15 08:00:00 -0.740 196 | 2012-11-15 09:00:00 0.027 197 | 2012-11-15 10:00:00 1.053 198 | 2012-11-15 11:00:00 2.114 199 | 2012-11-15 12:00:00 3.006 200 | 201 | ``` 202 | 203 | ### Exporting Data 204 | --- 205 | Since data is returned in a pandas dataframe, exporting the data is simple using the `.to_csv` method on the returned pandas dataframe. This requires the [pandas](https://pandas.pydata.org/) package, which should be taken care of if you installed `py_noaa` with `pip`. 206 | 207 | ```python 208 | >>> df_currents = coops.get_data( 209 | ... begin_date="20150727", 210 | ... end_date="20150910", 211 | ... stationid="PUG1515", 212 | ... product="currents", 213 | ... bin_num=1, 214 | ... units="metric", 215 | ... time_zone="gmt") 216 | ... 217 | >>> df_currents.to_csv( 218 | ... 'example.csv', 219 | ... sep='\t', 220 | ... encoding='utf-8') 221 | 222 | ``` 223 | 224 | As shown above, you can set the delimeter type using the `sep=` argument in the `.to_csv` method and control the file encoding using the `encoding=` argument. 225 | 226 | ## Requirements 227 | 228 | For use: 229 | 230 | - requests 231 | - numpy 232 | - pandas 233 | 234 | Suggested for development/contributions: 235 | 236 | - pytest 237 | - pytest-cov 238 | 239 | 240 | ## TODO 241 | 242 | See [issues](https://github.com/GClunies/py_noaa/issues) for a list of issues and to add issues of your own. 243 | 244 | ## Contribution 245 | 246 | All contributions are welcome, feel free to submit a pull request if you feel you have a valuable addition to the package or constructive feedback. 247 | 248 | The development of `py_noaa` was originally intended to help me ([@GClunies](https://github.com/GClunies)) learn Python packaging, git, and GitHub while also helping to alleviate the pain of downloading NOAA Tides and Current data as part of my day job as a Coastal engineer. 249 | 250 | As this project started as a learning exercise, please be patient and willing to teach/learn. 251 | 252 | 253 | **Many thanks to the following contributors!** 254 | 255 | - [@delgadom](https://github.com/delgadom) 256 | - [@CraigHarter](https://github.com/CraigHarter) 257 | - [@jcconnel](https://github.com/jcconnell) 258 | - [@fabaff](https://github.com/fabaff) 259 | -------------------------------------------------------------------------------- /py_noaa/coops.py: -------------------------------------------------------------------------------- 1 | import math 2 | from datetime import datetime, timedelta 3 | 4 | import pandas as pd 5 | import requests 6 | from pandas.io.json import json_normalize 7 | 8 | 9 | def build_query_url( 10 | begin_date, end_date, stationid, product, datum=None, bin_num=None, 11 | interval=None, units='metric', time_zone='gmt'): 12 | """ 13 | Build an URL to be used to fetch data from the NOAA CO-OPS API 14 | (see https://tidesandcurrents.noaa.gov/api/) 15 | """ 16 | base_url = 'http://tidesandcurrents.noaa.gov/api/datagetter?' 17 | 18 | # If the data product is water levels, check that a datum is specified 19 | if product == 'water_level': 20 | if datum is None: 21 | raise ValueError('No datum specified for water level data.See' 22 | ' https://tidesandcurrents.noaa.gov/api/#datum ' 23 | 'for list of available datums') 24 | else: 25 | # Compile parameter string for use in URL 26 | parameters = {'begin_date': begin_date, 27 | 'end_date': end_date, 28 | 'station': stationid, 29 | 'product': product, 30 | 'datum': datum, 31 | 'units': units, 32 | 'time_zone': time_zone, 33 | 'application': 'py_noaa', 34 | 'format': 'json'} 35 | 36 | elif product == 'hourly_height': 37 | if datum is None: 38 | raise ValueError('No datum specified for water level data.See' 39 | ' https://tidesandcurrents.noaa.gov/api/#datum ' 40 | 'for list of available datums') 41 | else: 42 | # Compile parameter string for use in URL 43 | parameters = {'begin_date': begin_date, 44 | 'end_date': end_date, 45 | 'station': stationid, 46 | 'product': product, 47 | 'datum': datum, 48 | 'units': units, 49 | 'time_zone': time_zone, 50 | 'application': 'py_noaa', 51 | 'format': 'json'} 52 | elif product == 'high_low': 53 | if datum is None: 54 | raise ValueError('No datum specified for water level data.See' 55 | ' https://tidesandcurrents.noaa.gov/api/#datum ' 56 | 'for list of available datums') 57 | else: 58 | # Compile parameter string for use in URL 59 | parameters = {'begin_date': begin_date, 60 | 'end_date': end_date, 61 | 'station': stationid, 62 | 'product': product, 63 | 'datum': datum, 64 | 'units': units, 65 | 'time_zone': time_zone, 66 | 'application': 'py_noaa', 67 | 'format': 'json'} 68 | 69 | elif product == 'predictions': 70 | # If no interval provided, return 6-min predictions data 71 | if interval is None: 72 | # Compile parameter string for use in URL 73 | parameters = {'begin_date': begin_date, 74 | 'end_date': end_date, 75 | 'station': stationid, 76 | 'product': product, 77 | 'datum': datum, 78 | 'units': units, 79 | 'time_zone': time_zone, 80 | 'application': 'py_noaa', 81 | 'format': 'json'} 82 | 83 | else: 84 | # Compile parameter string, including interval, for use in URL 85 | parameters = {'begin_date': begin_date, 86 | 'end_date': end_date, 87 | 'station': stationid, 88 | 'product': product, 89 | 'datum': datum, 90 | 'interval': interval, 91 | 'units': units, 92 | 'time_zone': time_zone, 93 | 'application': 'py_noaa', 94 | 'format': 'json'} 95 | 96 | # If the data product is currents, check that a bin number is specified 97 | elif product == 'currents': 98 | if bin_num is None: 99 | raise ValueError( 100 | 'No bin specified for current data. Bin info can be ' 101 | 'found on the station info page' 102 | ' (e.g., https://tidesandcurrents.noaa.gov/cdata/StationInfo?id=PUG1515)') 103 | else: 104 | # Compile parameter string for use in URL 105 | parameters = {'begin_date': begin_date, 106 | 'end_date': end_date, 107 | 'station': stationid, 108 | 'product': product, 109 | 'bin': str(bin_num), 110 | 'units': units, 111 | 'time_zone': time_zone, 112 | 'application': 'py_noaa', 113 | 'format': 'json'} 114 | 115 | # For all other data types (e.g., meteoroligcal conditions) 116 | else: 117 | # If no interval provided, return 6-min met data 118 | if interval is None: 119 | # Compile parameter string for use in URL 120 | parameters = {'begin_date': begin_date, 121 | 'end_date': end_date, 122 | 'station': stationid, 123 | 'product': product, 124 | 'units': units, 125 | 'time_zone': time_zone, 126 | 'application': 'py_noaa', 127 | 'format': 'json'} 128 | else: 129 | # Compile parameter string, including interval, for use in URL 130 | parameters = {'begin_date': begin_date, 131 | 'end_date': end_date, 132 | 'station': stationid, 133 | 'product': product, 134 | 'interval': interval, 135 | 'units': units, 136 | 'time_zone': time_zone, 137 | 'application': 'py_noaa', 138 | 'format': 'json'} 139 | 140 | # Build URL with requests library 141 | query_url = requests.Request( 142 | 'GET', base_url, params=parameters).prepare().url 143 | 144 | return query_url 145 | 146 | 147 | def url2pandas(data_url, product, num_request_blocks): 148 | """ 149 | Takes in a provided URL using the NOAA CO-OPS API conventions 150 | (see https://tidesandcurrents.noaa.gov/api/) and converts the corresponding 151 | JSON data into a pandas dataframe. 152 | """ 153 | 154 | response = requests.get(data_url) # Get JSON data from URL 155 | json_dict = response.json() # Create a dictionary from JSON data 156 | 157 | df = pd.DataFrame() # Initialize a empty DataFrame 158 | 159 | # Error when the requested begin_date and/or end_date does not have data 160 | large_data_gap_error = 'No data was found. This product may not be offered at this station at the requested time.' 161 | 162 | # Handle coops.get_data() request size & errors from COOPS API, cases below: 163 | # 1. coops.get_data() makes a large request (i.e. >1 block requests) 164 | # and an error occurs in one of the individual blocks of data 165 | 166 | # 2. coops.get_data() makes a large request (i.e. >1 block requests) 167 | # and an error does not occur in one of the individual blocks of data 168 | 169 | # 3. coops.get_data() makes a small request (i.e. 1 request) 170 | # and an error occurs in the data requested 171 | 172 | # 4. coops.get_data() makes a small request (i.e. 1 request) 173 | # and an error does not occur in the data requested 174 | 175 | # Case 1 176 | if (num_request_blocks > 1) and ('error' in json_dict): 177 | error_message = json_dict['error'].get('message', 178 | 'Error retrieving data') 179 | error_message = error_message.lstrip() 180 | error_message = error_message.rstrip() 181 | 182 | if error_message == large_data_gap_error: 183 | return df # Return the empty DataFrame 184 | else: 185 | raise ValueError( 186 | json_dict['error'].get('message', 'Error retrieving data')) 187 | 188 | # Case 2 189 | elif (num_request_blocks > 1) and ('error' not in json_dict): 190 | if product == 'predictions': 191 | key = 'predictions' 192 | else: 193 | key = 'data' 194 | 195 | df = json_normalize(json_dict[key]) # Parse JSON dict into dataframe 196 | 197 | return df 198 | 199 | # Case 3 200 | elif (num_request_blocks == 1) and ('error' in json_dict): 201 | raise ValueError( 202 | json_dict['error'].get('message', 'Error retrieving data')) 203 | 204 | # Case 4 205 | else: 206 | if product == 'predictions': 207 | key = 'predictions' 208 | else: 209 | key = 'data' 210 | 211 | df = json_normalize(json_dict[key]) # Parse JSON dict into dataframe 212 | 213 | return df 214 | 215 | 216 | def parse_known_date_formats(dt_string): 217 | """Attempt to parse CO-OPS accepted date formats.""" 218 | for fmt in ('%Y%m%d', '%Y%m%d %H:%M', '%m/%d/%Y', '%m/%d/%Y %H:%M'): 219 | try: 220 | return datetime.strptime(dt_string, fmt) 221 | except ValueError: 222 | pass 223 | raise ValueError("No valid date format found." 224 | "See https://tidesandcurrents.noaa.gov/api/ " 225 | "for list of accepted date formats.") 226 | 227 | 228 | def get_data( 229 | begin_date, end_date, stationid, product, datum=None, bin_num=None, 230 | interval=None, units='metric', time_zone='gmt'): 231 | """ 232 | Function to get data from NOAA CO-OPS API and convert it to a pandas 233 | dataframe for convenient analysis. 234 | 235 | Info on the NOOA CO-OPS API can be found at https://tidesandcurrents.noaa.gov/api/, 236 | the arguments listed below generally follow the same (or a very similar) format. 237 | 238 | Arguments: 239 | begin_date -- the starting date of request (yyyyMMdd, yyyyMMdd HH:mm, MM/dd/yyyy, or MM/dd/yyyy HH:mm), string 240 | end_date -- the ending date of request (yyyyMMdd, yyyyMMdd HH:mm, MM/dd/yyyy, or MM/dd/yyyy HH:mm), string 241 | stationid -- station at which you want data, string 242 | product -- the product type you would like, string 243 | datum -- the datum to be used for water level data, string (default None) 244 | bin_num -- the bin number you would like your currents data at, int (default None) 245 | interval -- the interval you would like data returned, string 246 | units -- units to be used for data output, string (default metric) 247 | time_zone -- time zone to be used for data output, string (default gmt) 248 | """ 249 | # Convert dates to datetime objects so deltas can be calculated 250 | begin_datetime = parse_known_date_formats(begin_date) 251 | end_datetime = parse_known_date_formats(end_date) 252 | delta = end_datetime - begin_datetime 253 | 254 | # If the length of our data request is less or equal to 31 days, 255 | # we can pull the data from API in one request 256 | if delta.days <= 31: 257 | data_url = build_query_url( 258 | begin_datetime.strftime("%Y%m%d %H:%M"), 259 | end_datetime.strftime("%Y%m%d %H:%M"), 260 | stationid, product, datum, bin_num, interval, units, time_zone) 261 | 262 | df = url2pandas(data_url, product, num_request_blocks=1) 263 | 264 | # If the length of the user specified data request is less than 365 days 265 | # AND the product is hourly_height or high_low, we can pull data directly 266 | # from the API in one request 267 | elif delta.days <= 365 and ( 268 | product == 'hourly_height' or product == 'high_low'): 269 | data_url = build_query_url( 270 | begin_date, end_date, stationid, product, datum, bin_num, interval, 271 | units, time_zone) 272 | 273 | df = url2pandas(data_url, product, num_request_blocks=1) 274 | 275 | # If the length of the user specified data request is greater than 365 days 276 | # AND the product is hourly_height or high_low, we need to load data from 277 | # the API in365 day blocks. 278 | elif product == 'hourly_height' or product == 'high_low': 279 | # Find the number of 365 day blocks in our desired period, 280 | # constrain the upper limit of index in the for loop to follow 281 | num_365day_blocks = int(math.floor(delta.days / 365)) 282 | 283 | df = pd.DataFrame([]) # Empty dataframe for data from API requests 284 | 285 | # Loop through in 365 day blocks, 286 | # adjust the begin_datetime and end_datetime accordingly, 287 | # make a request to the NOAA CO-OPS API 288 | for i in range(num_365day_blocks + 1): 289 | begin_datetime_loop = begin_datetime + timedelta(days=(i * 365)) 290 | end_datetime_loop = begin_datetime_loop + timedelta(days=365) 291 | 292 | # If end_datetime_loop of the current 365 day block is greater 293 | # than end_datetime specified by user, use end_datetime 294 | if end_datetime_loop > end_datetime: 295 | end_datetime_loop = end_datetime 296 | 297 | # Build url for each API request as we proceed through the loop 298 | data_url = build_query_url( 299 | begin_datetime_loop.strftime('%Y%m%d'), 300 | end_datetime_loop.strftime('%Y%m%d'), 301 | stationid, product, datum, bin_num, interval, units, time_zone) 302 | 303 | df_new = url2pandas(data_url, product, num_365day_blocks) # Get dataframe for block 304 | df = df.append(df_new) # Append to existing dataframe 305 | 306 | # If the length of the user specified data request is greater than 31 days 307 | # for any other products, we need to load data from the API in 31 day 308 | # blocks 309 | else: 310 | # Find the number of 31 day blocks in our desired period, 311 | # constrain the upper limit of index in the for loop to follow 312 | num_31day_blocks = int(math.floor(delta.days / 31)) 313 | 314 | df = pd.DataFrame([]) # Empty dataframe for data from API requests 315 | 316 | # Loop through in 31 day blocks, 317 | # adjust the begin_datetime and end_datetime accordingly, 318 | # make a request to the NOAA CO-OPS API 319 | for i in range(num_31day_blocks + 1): 320 | begin_datetime_loop = begin_datetime + timedelta(days=(i * 31)) 321 | end_datetime_loop = begin_datetime_loop + timedelta(days=31) 322 | 323 | # If end_datetime_loop of the current 31 day block is greater 324 | # than end_datetime specified by user, use end_datetime 325 | if end_datetime_loop > end_datetime: 326 | end_datetime_loop = end_datetime 327 | 328 | # Build URL for each API request as we proceed through the loop 329 | data_url = build_query_url( 330 | begin_datetime_loop.strftime('%Y%m%d'), 331 | end_datetime_loop.strftime('%Y%m%d'), 332 | stationid, product, datum, bin_num, interval, units, time_zone) 333 | 334 | df_new = url2pandas(data_url, product, num_31day_blocks) # Get dataframe for block 335 | df = df.append(df_new) # Append to existing dataframe 336 | 337 | # Rename output dataframe columns based on requested product 338 | # and convert to useable data types 339 | if product == 'water_level': 340 | # Rename columns for clarity 341 | df.rename(columns={'f': 'flags', 'q': 'QC', 's': 'sigma', 342 | 't': 'date_time', 'v': 'water_level'}, 343 | inplace=True) 344 | 345 | # Convert columns to numeric values 346 | data_cols = df.columns.drop(['flags', 'QC', 'date_time']) 347 | df[data_cols] = df[data_cols].apply( 348 | pd.to_numeric, axis=1, errors='coerce') 349 | 350 | # Convert date & time strings to datetime objects 351 | df['date_time'] = pd.to_datetime(df['date_time']) 352 | 353 | elif product == 'hourly_height': 354 | # Rename columns for clarity 355 | df.rename(columns={'f': 'flags', 's': 'sigma', 356 | 't': 'date_time', 'v': 'water_level'}, 357 | inplace=True) 358 | 359 | # Convert columns to numeric values 360 | data_cols = df.columns.drop(['flags', 'date_time']) 361 | df[data_cols] = df[data_cols].apply( 362 | pd.to_numeric, axis=1, errors='coerce') 363 | 364 | # Convert date & time strings to datetime objects 365 | df['date_time'] = pd.to_datetime(df['date_time']) 366 | 367 | elif product == 'high_low': 368 | # Rename columns for clarity 369 | df.rename(columns={'f': 'flags', 'ty': 'high_low', 370 | 't': 'date_time', 'v': 'water_level'}, 371 | inplace=True) 372 | 373 | # Separate to high and low dataframes 374 | df_HH = df[df['high_low'] == "HH"].copy() 375 | df_HH.rename(columns={'date_time': 'date_time_HH', 376 | 'water_level': 'HH_water_level'}, 377 | inplace=True) 378 | 379 | df_H = df[df['high_low'] == "H "].copy() 380 | df_H.rename(columns={'date_time': 'date_time_H', 381 | 'water_level': 'H_water_level'}, 382 | inplace=True) 383 | 384 | df_L = df[df['high_low'].str.contains("L ")].copy() 385 | df_L.rename(columns={'date_time': 'date_time_L', 386 | 'water_level': 'L_water_level'}, 387 | inplace=True) 388 | 389 | df_LL = df[df['high_low'].str.contains("LL")].copy() 390 | df_LL.rename(columns={'date_time': 'date_time_LL', 391 | 'water_level': 'LL_water_level'}, 392 | inplace=True) 393 | 394 | # Extract dates (without time) for each entry 395 | dates_HH = [x.date() for x in pd.to_datetime(df_HH['date_time_HH'])] 396 | dates_H = [x.date() for x in pd.to_datetime(df_H['date_time_H'])] 397 | dates_L = [x.date() for x in pd.to_datetime(df_L['date_time_L'])] 398 | dates_LL = [x.date() for x in pd.to_datetime(df_LL['date_time_LL'])] 399 | 400 | # Set indices to datetime 401 | df_HH['date_time'] = dates_HH 402 | df_HH.index = df_HH['date_time'] 403 | df_H['date_time'] = dates_H 404 | df_H.index = df_H['date_time'] 405 | df_L['date_time'] = dates_L 406 | df_L.index = df_L['date_time'] 407 | df_LL['date_time'] = dates_LL 408 | df_LL.index = df_LL['date_time'] 409 | 410 | # Remove flags and combine to single dataframe 411 | df_HH = df_HH.drop( 412 | columns=['flags', 'high_low']) 413 | df_H = df_H.drop(columns=['flags', 'high_low', 414 | 'date_time']) 415 | df_L = df_L.drop(columns=['flags', 'high_low', 416 | 'date_time']) 417 | df_LL = df_LL.drop(columns=['flags', 'high_low', 418 | 'date_time']) 419 | 420 | # Keep only one instance per date (based on max/min) 421 | maxes = df_HH.groupby(df_HH.index).HH_water_level.transform(max) 422 | df_HH = df_HH.loc[df_HH.HH_water_level == maxes] 423 | maxes = df_H.groupby(df_H.index).H_water_level.transform(max) 424 | df_H = df_H.loc[df_H.H_water_level == maxes] 425 | mins = df_L.groupby(df_L.index).L_water_level.transform(max) 426 | df_L = df_L.loc[df_L.L_water_level == mins] 427 | mins = df_LL.groupby(df_LL.index).LL_water_level.transform(max) 428 | df_LL = df_LL.loc[df_LL.LL_water_level == mins] 429 | 430 | df = df_HH.join(df_H, how='outer') 431 | df = df.join(df_L, how='outer') 432 | df = df.join(df_LL, how='outer') 433 | 434 | # Convert columns to numeric values 435 | data_cols = df.columns.drop( 436 | ['date_time', 'date_time_HH', 'date_time_H', 'date_time_L', 437 | 'date_time_LL']) 438 | df[data_cols] = df[data_cols].apply(pd.to_numeric, axis=1, 439 | errors='coerce') 440 | 441 | # Convert date & time strings to datetime objects 442 | df['date_time'] = pd.to_datetime(df.index) 443 | df['date_time_HH'] = pd.to_datetime(df['date_time_HH']) 444 | df['date_time_H'] = pd.to_datetime(df['date_time_H']) 445 | df['date_time_L'] = pd.to_datetime(df['date_time_L']) 446 | df['date_time_LL'] = pd.to_datetime(df['date_time_LL']) 447 | 448 | elif product == 'predictions': 449 | if interval == 'h': 450 | # Rename columns for clarity 451 | df.rename(columns={'t': 'date_time', 'v': 'predicted_wl'}, 452 | inplace=True) 453 | 454 | # Convert columns to numeric values 455 | data_cols = df.columns.drop(['date_time']) 456 | 457 | elif interval == 'hilo': 458 | # Rename columns for clarity 459 | df.rename(columns={'t': 'date_time', 'v': 'predicted_wl', 460 | 'type': 'hi_lo'}, 461 | inplace=True) 462 | 463 | # Convert columns to numeric values 464 | data_cols = df.columns.drop(['date_time', 'hi_lo']) 465 | 466 | # Convert date & time strings to datetime objects 467 | df['date_time'] = pd.to_datetime(df['date_time']) 468 | 469 | elif product == 'currents': 470 | # Rename columns for clarity 471 | df.rename(columns={'b': 'bin', 'd': 'direction', 472 | 's': 'speed', 't': 'date_time'}, 473 | inplace=True) 474 | 475 | # Convert columns to numeric values 476 | data_cols = df.columns.drop(['date_time']) 477 | df[data_cols] = df[data_cols].apply(pd.to_numeric, axis=1, 478 | errors='coerce') 479 | 480 | # Convert date & time strings to datetime objects 481 | df['date_time'] = pd.to_datetime(df['date_time']) 482 | 483 | elif product == 'wind': 484 | # Rename columns for clarity 485 | df.rename(columns={'d': 'dir', 'dr': 'compass', 486 | 'f': 'flags', 'g': 'gust_spd', 487 | 's': 'spd', 't': 'date_time'}, 488 | inplace=True) 489 | 490 | # Convert columns to numeric values 491 | data_cols = df.columns.drop(['date_time', 'flags', 'compass']) 492 | df[data_cols] = df[data_cols].apply(pd.to_numeric, axis=1, 493 | errors='coerce') 494 | 495 | # Convert date & time strings to datetime objects 496 | df['date_time'] = pd.to_datetime(df['date_time']) 497 | 498 | elif product == 'air_pressure': 499 | # Rename columns for clarity 500 | df.rename(columns={'f': 'flags', 't': 'date_time', 'v': 'air_press'}, 501 | inplace=True) 502 | 503 | # Convert columns to numeric values 504 | data_cols = df.columns.drop(['date_time', 'flags']) 505 | df[data_cols] = df[data_cols].apply(pd.to_numeric, axis=1, 506 | errors='coerce') 507 | 508 | # Convert date & time strings to datetime objects 509 | df['date_time'] = pd.to_datetime(df['date_time']) 510 | 511 | elif product == 'air_temperature': 512 | # Rename columns for clarity 513 | df.rename(columns={'f': 'flags', 't': 'date_time', 'v': 'air_temp'}, 514 | inplace=True) 515 | 516 | # Convert columns to numeric values 517 | data_cols = df.columns.drop(['date_time', 'flags']) 518 | df[data_cols] = df[data_cols].apply(pd.to_numeric, axis=1, 519 | errors='coerce') 520 | 521 | # Convert date & time strings to datetime objects 522 | df['date_time'] = pd.to_datetime(df['date_time']) 523 | 524 | elif product == 'water_temperature': 525 | # Rename columns for clarity 526 | df.rename(columns={'f': 'flags', 't': 'date_time', 'v': 'water_temp'}, 527 | inplace=True) 528 | 529 | # Convert columns to numeric values 530 | data_cols = df.columns.drop(['date_time', 'flags']) 531 | df[data_cols] = df[data_cols].apply(pd.to_numeric, axis=1, 532 | errors='coerce') 533 | 534 | # Convert date & time strings to datetime objects 535 | df['date_time'] = pd.to_datetime(df['date_time']) 536 | 537 | # Set datetime to index (for use in resampling) 538 | df.index = df['date_time'] 539 | df = df.drop(columns=['date_time']) 540 | 541 | # Handle hourly requests for water_level and currents data 542 | if (product == 'water_level') | (product == 'currents') & ( 543 | interval == 'h'): 544 | df = df.resample('H').first() # Only return the hourly data 545 | 546 | return df 547 | --------------------------------------------------------------------------------