├── py_noaa
    ├── __init__.py
    └── coops.py
├── requirements.txt
├── pytest.ini
├── .travis.yml
├── .gitignore
├── tests
    └── test_py_noaa.py
├── setup.py
├── LICENSE
└── README.md


/py_noaa/__init__.py:
--------------------------------------------------------------------------------
1 | import py_noaa.coops
2 | 
3 | __version__ = 1.0


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | pytest>=4.0
2 | pytest-cov
3 | pandas
4 | numpy
5 | requests
6 | 


--------------------------------------------------------------------------------
/pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 | testpaths= ./py_noaa ./tests ./README.md
3 | addopts= --cov=py_noaa --cov=tests --doctest-modules --cov-report term-missing --doctest-glob='*.md'


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | 
 2 | language: python
 3 | python:
 4 | - 3.5
 5 | - 3.6
 6 | - 2.7
 7 | 
 8 | install:
 9 |   - pip install --upgrade pip
10 |   - pip install -r requirements.txt
11 | 
12 | script:
13 |   - pytest
14 | 
15 | notifications:
16 |   email: false
17 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | 
 4 | # Distribution / packaging
 5 | build/
 6 | dist/
 7 | *.egg-info/
 8 | 
 9 | # Jupyter Notebook
10 | .ipynb_checkpoints
11 | 
12 | # Testing & Coverage
13 | .cache
14 | .coverage
15 | .pytest_cache
16 | 
17 | # Example .csv files
18 | .csv


--------------------------------------------------------------------------------
/tests/test_py_noaa.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from __future__ import absolute_import
 3 | from py_noaa import coops
 4 | 
 5 | import pytest
 6 | 
 7 | def test_error_handling():
 8 |     with pytest.raises(ValueError):
 9 |         coops.get_data(
10 |             begin_date="20150101",
11 |             end_date="20150331",
12 |             stationid="9442396",
13 |             product="water_level",
14 |             datum="navd88", # this is an invalid datum
15 |             units="metric",
16 |             time_zone="gmt")
17 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | import py_noaa
 3 | 
 4 | setup(name='py_noaa',
 5 |       version='1.0',
 6 |       description='Python wrapper to fetch data from NOAA APIs',
 7 |       url='https://github.com/GClunies/py_noaa',
 8 |       author='Greg Clunies',
 9 |       author_email='greg.clunies@gmail.com',
10 |       license='MIT',
11 |       classifiers=[
12 |           'Development Status :: 3 - Alpha',
13 |           'Intended Audience :: Science/Research',
14 |           'Topic :: Scientific/Engineering',
15 |           'License :: OSI Approved :: MIT License',
16 |           'Programming Language :: Python :: 2.7',
17 |           'Programming Language :: Python :: 3.5',
18 |           'Programming Language :: Python :: 3.6',
19 |       ],
20 |       packages=['py_noaa'],
21 |       install_requires=['requests', 'numpy', 'pandas'],
22 |       zip_safe=False)
23 |       
24 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 Gregory Clunies
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # py_noaa
  2 | 
  3 | [![Build Status](https://travis-ci.org/GClunies/py_noaa.svg?branch=master)](https://travis-ci.org/GClunies/py_noaa)
  4 | [![PyPI](https://img.shields.io/pypi/v/py_noaa.svg)](https://pypi.python.org/pypi/py-noaa)
  5 | [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/py_noaa.svg)](https://pypi.python.org/pypi/py-noaa)
  6 | 
  7 | ## NOTE: THIS PACKAGE HAS BEEN REPLACED BY [`noaa_coops`](https://github.com/GClunies/noaa_coops). NO FURTHER DEVELOPMENT IS PLANNED.
  8 |  
  9 | 
 10 | `py_noaa` is a Python package that wraps around the NOAA CO-OPS Tides & Currents API and returns data in convenient formats (i.e., pandas dataframe) for further analysis in python. Analysis of the data is left up to the end user.
 11 | 
 12 | **NOTE:**
 13 | 
 14 | This package is under development, additional functionality will be added over time.
 15 | 
 16 | ## Installation
 17 | 
 18 | ```bash
 19 | pip install py_noaa
 20 | ```
 21 | 
 22 | You can update `py_noaa` using:
 23 | 
 24 | ```bash
 25 | pip install py_noaa --upgrade
 26 | ``` 
 27 | 
 28 | ## NOAA CO-OPS Tides & Currents
 29 | 
 30 | NOAA records tides, currents, and other meteoroligical observations at various locations across the United States and the Great Lakes regions. Predictions are also available for [tides](https://tidesandcurrents.noaa.gov/tide_predictions.html) and [currents](https://tidesandcurrents.noaa.gov/noaacurrents/Help).
 31 | 
 32 | `py_noaa` accesses data following the [NOAA CO-OPS API](https://tidesandcurrents.noaa.gov/api/) documentation.
 33 | 
 34 | ## Available Data
 35 | 
 36 | A list of available data products is provided in the [API documentation](https://tidesandcurrents.noaa.gov/api/#products)
 37 | 
 38 | ### CO-OPS module basics
 39 | 
 40 | 1. Get the station ID for your station of interest, a summary of available stations, by data type, can be found through the following links:
 41 | 
 42 |    - [Water Level Observation Stations](https://tidesandcurrents.noaa.gov/stations.html?type=Water+Levels)
 43 |    - [Tidal Prediction Stations](https://tidesandcurrents.noaa.gov/tide_predictions.html)
 44 |    - [Current Observation Stations](https://tidesandcurrents.noaa.gov/cdata/StationList?type=Current+Data&filter=active)
 45 |    - [Meteorological Observation Stations](https://tidesandcurrents.noaa.gov/stations.html?type=Meteorological%20Observations)
 46 | 
 47 | 2. Read the station info if available! Useful station info is typically available based on the datatype recorded at a station.  Station info for current stations are **NOT** the same for water level and tide stations (see examples below).
 48 | 
 49 |     - Exmaple [current station info](https://tidesandcurrents.noaa.gov/cdata/StationInfo?id=PUG1515)
 50 |     - Example [water level & tide station info](https://tidesandcurrents.noaa.gov/stationhome.html?id=9447130)
 51 | 
 52 | 3. Fetch data using the `coops.get_data()` function for various data products, listed [here](https://tidesandcurrents.noaa.gov/api/#products). The currently supported data types are:
 53 | 
 54 |     - Currents
 55 |     - Observed water levels
 56 |     - Observered daily high and low water levels (use `product="high_low"`)
 57 |     - Predicted water levels
 58 |     - Predicted high and low water levels
 59 |     - Winds
 60 |     - Air pressure
 61 |     - Air temperature
 62 |     - Water temperature
 63 | 
 64 | Compatibility with other data products listed on the [NOAA CO-OPS API](https://tidesandcurrents.noaa.gov/api/#products) may exist, but is not guaranteed at this time.
 65 | 
 66 | ### Examples data requests are shown below:
 67 | 
 68 | **Observed Currents**
 69 | 
 70 | ```python
 71 | >>> from py_noaa import coops
 72 | >>> df_currents = coops.get_data(
 73 | ...     begin_date="20150727",
 74 | ...     end_date="20150910",
 75 | ...     stationid="PUG1515",
 76 | ...     product="currents",
 77 | ...     bin_num=1,
 78 | ...     units="metric",
 79 | ...     time_zone="gmt")
 80 | ...
 81 | >>> df_currents.head() # doctest: +NORMALIZE_WHITESPACE
 82 |                      bin  direction  speed
 83 | date_time
 84 | 2015-07-27 20:06:00  1.0      255.0   32.1
 85 | 2015-07-27 20:12:00  1.0      255.0   30.1
 86 | 2015-07-27 20:18:00  1.0      261.0   29.3
 87 | 2015-07-27 20:24:00  1.0      260.0   27.3
 88 | 2015-07-27 20:30:00  1.0      261.0   23.0
 89 | 
 90 | ```
 91 | 
 92 | **Observed Water Levels**
 93 | 
 94 | ```python
 95 | >>> from py_noaa import coops
 96 | >>> df_water_levels = coops.get_data(
 97 | ...     begin_date="20150101",
 98 | ...     end_date="20150331",
 99 | ...     stationid="9447130",
100 | ...     product="water_level",
101 | ...     datum="MLLW",
102 | ...     units="metric",
103 | ...     time_zone="gmt")
104 | ...
105 | >>> df_water_levels.head() # doctest: +NORMALIZE_WHITESPACE
106 |                        flags QC  sigma  water_level
107 | date_time
108 | 2015-01-01 00:00:00  0,0,0,0  v  0.023        1.799
109 | 2015-01-01 01:00:00  0,0,0,0  v  0.014        0.977
110 | 2015-01-01 02:00:00  0,0,0,0  v  0.009        0.284
111 | 2015-01-01 03:00:00  0,0,0,0  v  0.010       -0.126
112 | 2015-01-01 04:00:00  0,0,0,0  v  0.013       -0.161
113 | 
114 | ```
115 | 
116 | **Predicted Water Levels (Tides)**
117 | 
118 | Note the use of the `interval` parameter to specify only hourly data be returned. The `interval` parameter works with, water level, currents, predictions, and meteorological data types.
119 | 
120 | ```python
121 | >>> from py_noaa import coops
122 | >>> df_predictions = coops.get_data(
123 | ...     begin_date="20121115",
124 | ...     end_date="20121217",
125 | ...     stationid="9447130",
126 | ...     product="predictions",
127 | ...     datum="MLLW",
128 | ...     interval="h",
129 | ...     units="metric",
130 | ...     time_zone="gmt")
131 | ...
132 | >>> df_predictions.head() # doctest: +NORMALIZE_WHITESPACE
133 |                      predicted_wl
134 | date_time
135 | 2012-11-15 00:00:00         3.660
136 | 2012-11-15 01:00:00         3.431
137 | 2012-11-15 02:00:00         2.842
138 | 2012-11-15 03:00:00         1.974
139 | 2012-11-15 04:00:00         0.953
140 | 
141 | ```
142 | 
143 | Also available for the `interval` parameter is the `hilo` key which returns High and Low tide predictions.
144 | 
145 | ```python
146 | >>> from py_noaa import coops
147 | >>> df_predictions = coops.get_data(
148 | ...     begin_date="20121115",
149 | ...     end_date="20121217",
150 | ...     stationid="9447130",
151 | ...     product="predictions",
152 | ...     datum="MLLW",
153 | ...     interval="hilo",
154 | ...     units="metric",
155 | ...     time_zone="gmt")
156 | ...
157 | >>> df_predictions.head() # doctest: +NORMALIZE_WHITESPACE
158 |                     hi_lo  predicted_wl
159 | date_time
160 | 2012-11-15 06:57:00     L        -1.046
161 | 2012-11-15 14:11:00     H         3.813
162 | 2012-11-15 19:36:00     L         2.037
163 | 2012-11-16 00:39:00     H         3.573
164 | 2012-11-16 07:44:00     L        -1.049
165 | 
166 | ```
167 | 
168 | **Filtering Data by date**
169 | 
170 | All data is returned as a pandas dataframe, with a DatimeIndex which allows for easy filtering of the data by dates.
171 | 
172 | ```python
173 | >>> from py_noaa import coops
174 | >>> df_predictions = coops.get_data(
175 | ...     begin_date="20121115",
176 | ...     end_date="20121217",
177 | ...     stationid="9447130",
178 | ...     product="predictions",
179 | ...     datum="MLLW",
180 | ...     interval="h",
181 | ...     units="metric",
182 | ...     time_zone="gmt")
183 | ...
184 | >>> df_predictions['201211150000':'201211151200'] # doctest: +NORMALIZE_WHITESPACE
185 |                      predicted_wl
186 | date_time
187 | 2012-11-15 00:00:00         3.660
188 | 2012-11-15 01:00:00         3.431
189 | 2012-11-15 02:00:00         2.842
190 | 2012-11-15 03:00:00         1.974
191 | 2012-11-15 04:00:00         0.953
192 | 2012-11-15 05:00:00        -0.047
193 | 2012-11-15 06:00:00        -0.787
194 | 2012-11-15 07:00:00        -1.045
195 | 2012-11-15 08:00:00        -0.740
196 | 2012-11-15 09:00:00         0.027
197 | 2012-11-15 10:00:00         1.053
198 | 2012-11-15 11:00:00         2.114
199 | 2012-11-15 12:00:00         3.006
200 | 
201 | ```
202 | 
203 | ### Exporting Data 
204 | ---
205 | Since data is returned in a pandas dataframe, exporting the data is simple using the `.to_csv` method on the returned pandas dataframe. This requires the [pandas](https://pandas.pydata.org/) package, which should be taken care of if you installed `py_noaa` with `pip`.
206 | 
207 | ```python
208 | >>> df_currents = coops.get_data(
209 | ...     begin_date="20150727",
210 | ...     end_date="20150910",
211 | ...     stationid="PUG1515",
212 | ...     product="currents",
213 | ...     bin_num=1,
214 | ...     units="metric",
215 | ...     time_zone="gmt")
216 | ...
217 | >>> df_currents.to_csv(
218 | ...     'example.csv',
219 | ...     sep='\t',
220 | ...     encoding='utf-8')
221 | 
222 | ```
223 | 
224 | As shown above, you can set the delimeter type using the `sep=` argument in the `.to_csv` method and control the file encoding using the `encoding=` argument.
225 | 
226 | ## Requirements
227 | 
228 | For use:
229 | 
230 | - requests
231 | - numpy
232 | - pandas
233 | 
234 | Suggested for development/contributions:
235 | 
236 | - pytest
237 | - pytest-cov
238 | 
239 | 
240 | ## TODO
241 | 
242 | See [issues](https://github.com/GClunies/py_noaa/issues) for a list of issues and to add issues of your own.
243 | 
244 | ## Contribution
245 | 
246 | All contributions are welcome, feel free to submit a pull request if you feel you have a valuable addition to the package or constructive feedback. 
247 | 
248 | The development of `py_noaa` was originally intended to help me ([@GClunies](https://github.com/GClunies)) learn Python packaging, git, and GitHub while also helping to alleviate the pain of downloading NOAA Tides and Current data as part of my day job as a Coastal engineer.
249 | 
250 | As this project started as a learning exercise, please be patient and willing to teach/learn.
251 | 
252 | 
253 | **Many thanks to the following contributors!**
254 | 
255 | - [@delgadom](https://github.com/delgadom)
256 | - [@CraigHarter](https://github.com/CraigHarter)
257 | - [@jcconnel](https://github.com/jcconnell)
258 | - [@fabaff](https://github.com/fabaff)
259 | 


--------------------------------------------------------------------------------
/py_noaa/coops.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | from datetime import datetime, timedelta
  3 | 
  4 | import pandas as pd
  5 | import requests
  6 | from pandas.io.json import json_normalize
  7 | 
  8 | 
  9 | def build_query_url(
 10 |         begin_date, end_date, stationid, product, datum=None, bin_num=None,
 11 |         interval=None, units='metric', time_zone='gmt'):
 12 |     """
 13 |     Build an URL to be used to fetch data from the NOAA CO-OPS API
 14 |     (see https://tidesandcurrents.noaa.gov/api/)
 15 |     """
 16 |     base_url = 'http://tidesandcurrents.noaa.gov/api/datagetter?'
 17 | 
 18 |     # If the data product is water levels, check that a datum is specified
 19 |     if product == 'water_level':
 20 |         if datum is None:
 21 |             raise ValueError('No datum specified for water level data.See'
 22 |                              ' https://tidesandcurrents.noaa.gov/api/#datum '
 23 |                              'for list of available datums')
 24 |         else:
 25 |             # Compile parameter string for use in URL
 26 |             parameters = {'begin_date': begin_date,
 27 |                           'end_date': end_date,
 28 |                           'station': stationid,
 29 |                           'product': product,
 30 |                           'datum': datum,
 31 |                           'units': units,
 32 |                           'time_zone': time_zone,
 33 |                           'application': 'py_noaa',
 34 |                           'format': 'json'}
 35 | 
 36 |     elif product == 'hourly_height':
 37 |         if datum is None:
 38 |             raise ValueError('No datum specified for water level data.See'
 39 |                              ' https://tidesandcurrents.noaa.gov/api/#datum '
 40 |                              'for list of available datums')
 41 |         else:
 42 |             # Compile parameter string for use in URL
 43 |             parameters = {'begin_date': begin_date,
 44 |                           'end_date': end_date,
 45 |                           'station': stationid,
 46 |                           'product': product,
 47 |                           'datum': datum,
 48 |                           'units': units,
 49 |                           'time_zone': time_zone,
 50 |                           'application': 'py_noaa',
 51 |                           'format': 'json'}
 52 |     elif product == 'high_low':
 53 |         if datum is None:
 54 |             raise ValueError('No datum specified for water level data.See'
 55 |                              ' https://tidesandcurrents.noaa.gov/api/#datum '
 56 |                              'for list of available datums')
 57 |         else:
 58 |             # Compile parameter string for use in URL
 59 |             parameters = {'begin_date': begin_date,
 60 |                           'end_date': end_date,
 61 |                           'station': stationid,
 62 |                           'product': product,
 63 |                           'datum': datum,
 64 |                           'units': units,
 65 |                           'time_zone': time_zone,
 66 |                           'application': 'py_noaa',
 67 |                           'format': 'json'}
 68 | 
 69 |     elif product == 'predictions':
 70 |         # If no interval provided, return 6-min predictions data
 71 |         if interval is None:
 72 |             # Compile parameter string for use in URL
 73 |             parameters = {'begin_date': begin_date,
 74 |                           'end_date': end_date,
 75 |                           'station': stationid,
 76 |                           'product': product,
 77 |                           'datum': datum,
 78 |                           'units': units,
 79 |                           'time_zone': time_zone,
 80 |                           'application': 'py_noaa',
 81 |                           'format': 'json'}
 82 | 
 83 |         else:
 84 |             # Compile parameter string, including interval, for use in URL
 85 |             parameters = {'begin_date': begin_date,
 86 |                           'end_date': end_date,
 87 |                           'station': stationid,
 88 |                           'product': product,
 89 |                           'datum': datum,
 90 |                           'interval': interval,
 91 |                           'units': units,
 92 |                           'time_zone': time_zone,
 93 |                           'application': 'py_noaa',
 94 |                           'format': 'json'}
 95 | 
 96 |     # If the data product is currents, check that a bin number is specified
 97 |     elif product == 'currents':
 98 |         if bin_num is None:
 99 |             raise ValueError(
100 |                 'No bin specified for current data. Bin info can be '
101 |                 'found on the station info page'
102 |                 ' (e.g., https://tidesandcurrents.noaa.gov/cdata/StationInfo?id=PUG1515)')
103 |         else:
104 |             # Compile parameter string for use in URL
105 |             parameters = {'begin_date': begin_date,
106 |                           'end_date': end_date,
107 |                           'station': stationid,
108 |                           'product': product,
109 |                           'bin': str(bin_num),
110 |                           'units': units,
111 |                           'time_zone': time_zone,
112 |                           'application': 'py_noaa',
113 |                           'format': 'json'}
114 | 
115 |     # For all other data types (e.g., meteoroligcal conditions)
116 |     else:
117 |         # If no interval provided, return 6-min met data
118 |         if interval is None:
119 |             # Compile parameter string for use in URL
120 |             parameters = {'begin_date': begin_date,
121 |                           'end_date': end_date,
122 |                           'station': stationid,
123 |                           'product': product,
124 |                           'units': units,
125 |                           'time_zone': time_zone,
126 |                           'application': 'py_noaa',
127 |                           'format': 'json'}
128 |         else:
129 |             # Compile parameter string, including interval, for use in URL
130 |             parameters = {'begin_date': begin_date,
131 |                           'end_date': end_date,
132 |                           'station': stationid,
133 |                           'product': product,
134 |                           'interval': interval,
135 |                           'units': units,
136 |                           'time_zone': time_zone,
137 |                           'application': 'py_noaa',
138 |                           'format': 'json'}
139 | 
140 |     # Build URL with requests library
141 |     query_url = requests.Request(
142 |         'GET', base_url, params=parameters).prepare().url
143 | 
144 |     return query_url
145 | 
146 | 
147 | def url2pandas(data_url, product, num_request_blocks):
148 |     """
149 |     Takes in a provided URL using the NOAA CO-OPS API conventions
150 |     (see https://tidesandcurrents.noaa.gov/api/) and converts the corresponding
151 |     JSON data into a pandas dataframe.
152 |     """
153 | 
154 |     response = requests.get(data_url)  # Get JSON data from URL
155 |     json_dict = response.json()  # Create a dictionary from JSON data
156 | 
157 |     df = pd.DataFrame()  # Initialize a empty DataFrame
158 | 
159 |     # Error when the requested begin_date and/or end_date does not have data
160 |     large_data_gap_error = 'No data was found. This product may not be offered at this station at the requested time.'
161 | 
162 |     # Handle coops.get_data() request size & errors from COOPS API, cases below:
163 |         # 1. coops.get_data() makes a large request (i.e. >1 block requests)
164 |         #    and an error occurs in one of the individual blocks of data
165 | 
166 |         # 2. coops.get_data() makes a large request (i.e. >1 block requests)
167 |         #    and an error does not occur in one of the individual blocks of data
168 | 
169 |         # 3. coops.get_data() makes a small request (i.e. 1 request)
170 |         #    and an error occurs in the data requested
171 | 
172 |         # 4. coops.get_data() makes a small request (i.e. 1 request)
173 |         #    and an error does not occur in the data requested
174 | 
175 |     # Case 1
176 |     if (num_request_blocks > 1) and ('error' in json_dict): 
177 |         error_message = json_dict['error'].get('message',
178 |                                                'Error retrieving data')
179 |         error_message = error_message.lstrip()
180 |         error_message = error_message.rstrip()
181 | 
182 |         if error_message == large_data_gap_error:
183 |             return df  # Return the empty DataFrame
184 |         else:
185 |             raise ValueError(
186 |                 json_dict['error'].get('message', 'Error retrieving data'))
187 | 
188 |     # Case 2
189 |     elif (num_request_blocks > 1) and ('error' not in json_dict):
190 |         if product == 'predictions':
191 |             key = 'predictions'
192 |         else:
193 |             key = 'data'
194 | 
195 |         df = json_normalize(json_dict[key])  # Parse JSON dict into dataframe
196 | 
197 |         return df
198 | 
199 |     # Case 3
200 |     elif (num_request_blocks == 1) and ('error' in json_dict):
201 |         raise ValueError(
202 |                 json_dict['error'].get('message', 'Error retrieving data'))
203 |     
204 |     # Case 4
205 |     else:
206 |         if product == 'predictions':
207 |             key = 'predictions'
208 |         else:
209 |             key = 'data'
210 | 
211 |         df = json_normalize(json_dict[key])  # Parse JSON dict into dataframe
212 | 
213 |         return df
214 | 
215 | 
216 | def parse_known_date_formats(dt_string):
217 |     """Attempt to parse CO-OPS accepted date formats."""
218 |     for fmt in ('%Y%m%d', '%Y%m%d %H:%M', '%m/%d/%Y', '%m/%d/%Y %H:%M'):
219 |         try:
220 |             return datetime.strptime(dt_string, fmt)
221 |         except ValueError:
222 |             pass
223 |     raise ValueError("No valid date format found."
224 |                      "See https://tidesandcurrents.noaa.gov/api/ "
225 |                      "for list of accepted date formats.")
226 | 
227 | 
228 | def get_data(
229 |         begin_date, end_date, stationid, product, datum=None, bin_num=None,
230 |         interval=None, units='metric', time_zone='gmt'):
231 |     """
232 |     Function to get data from NOAA CO-OPS API and convert it to a pandas
233 |     dataframe for convenient analysis.
234 | 
235 |     Info on the NOOA CO-OPS API can be found at https://tidesandcurrents.noaa.gov/api/,
236 |     the arguments listed below generally follow the same (or a very similar) format.
237 | 
238 |     Arguments:
239 |     begin_date -- the starting date of request (yyyyMMdd, yyyyMMdd HH:mm, MM/dd/yyyy, or MM/dd/yyyy HH:mm), string
240 |     end_date -- the ending date of request (yyyyMMdd, yyyyMMdd HH:mm, MM/dd/yyyy, or MM/dd/yyyy HH:mm), string
241 |     stationid -- station at which you want data, string
242 |     product -- the product type you would like, string
243 |     datum -- the datum to be used for water level data, string  (default None)
244 |     bin_num -- the bin number you would like your currents data at, int (default None)
245 |     interval -- the interval you would like data returned, string
246 |     units -- units to be used for data output, string (default metric)
247 |     time_zone -- time zone to be used for data output, string (default gmt)
248 |     """
249 |     # Convert dates to datetime objects so deltas can be calculated
250 |     begin_datetime = parse_known_date_formats(begin_date)
251 |     end_datetime = parse_known_date_formats(end_date)
252 |     delta = end_datetime - begin_datetime
253 | 
254 |     # If the length of our data request is less or equal to 31 days,
255 |     # we can pull the data from API in one request
256 |     if delta.days <= 31:
257 |         data_url = build_query_url(
258 |             begin_datetime.strftime("%Y%m%d %H:%M"),
259 |             end_datetime.strftime("%Y%m%d %H:%M"),
260 |             stationid, product, datum, bin_num, interval, units, time_zone)
261 | 
262 |         df = url2pandas(data_url, product, num_request_blocks=1)
263 | 
264 |     # If the length of the user specified data request is less than 365 days
265 |     # AND the product is hourly_height or high_low, we can pull data directly
266 |     # from the API in one request
267 |     elif delta.days <= 365 and (
268 |             product == 'hourly_height' or product == 'high_low'):
269 |         data_url = build_query_url(
270 |             begin_date, end_date, stationid, product, datum, bin_num, interval,
271 |             units, time_zone)
272 | 
273 |         df = url2pandas(data_url, product, num_request_blocks=1)
274 | 
275 |     # If the length of the user specified data request is greater than 365 days
276 |     # AND the product is hourly_height or high_low, we need to load data from
277 |     # the API in365 day blocks.
278 |     elif product == 'hourly_height' or product == 'high_low':
279 |         # Find the number of 365 day blocks in our desired period,
280 |         # constrain the upper limit of index in the for loop to follow
281 |         num_365day_blocks = int(math.floor(delta.days / 365))
282 | 
283 |         df = pd.DataFrame([])  # Empty dataframe for data from API requests
284 | 
285 |         # Loop through in 365 day blocks,
286 |         # adjust the begin_datetime and end_datetime accordingly,
287 |         # make a request to the NOAA CO-OPS API
288 |         for i in range(num_365day_blocks + 1):
289 |             begin_datetime_loop = begin_datetime + timedelta(days=(i * 365))
290 |             end_datetime_loop = begin_datetime_loop + timedelta(days=365)
291 | 
292 |             # If end_datetime_loop of the current 365 day block is greater
293 |             # than end_datetime specified by user, use end_datetime
294 |             if end_datetime_loop > end_datetime:
295 |                 end_datetime_loop = end_datetime
296 | 
297 |             # Build url for each API request as we proceed through the loop
298 |             data_url = build_query_url(
299 |                 begin_datetime_loop.strftime('%Y%m%d'),
300 |                 end_datetime_loop.strftime('%Y%m%d'),
301 |                 stationid, product, datum, bin_num, interval, units, time_zone)
302 |             
303 |             df_new = url2pandas(data_url, product, num_365day_blocks)  # Get dataframe for block
304 |             df = df.append(df_new)  # Append to existing dataframe
305 |             
306 |     # If the length of the user specified data request is greater than 31 days
307 |     # for any other products, we need to load data from the API in 31 day
308 |     # blocks
309 |     else:
310 |         # Find the number of 31 day blocks in our desired period,
311 |         # constrain the upper limit of index in the for loop to follow
312 |         num_31day_blocks = int(math.floor(delta.days / 31))
313 | 
314 |         df = pd.DataFrame([])  # Empty dataframe for data from API requests
315 | 
316 |         # Loop through in 31 day blocks,
317 |         # adjust the begin_datetime and end_datetime accordingly,
318 |         # make a request to the NOAA CO-OPS API
319 |         for i in range(num_31day_blocks + 1):
320 |             begin_datetime_loop = begin_datetime + timedelta(days=(i * 31))
321 |             end_datetime_loop = begin_datetime_loop + timedelta(days=31)
322 | 
323 |             # If end_datetime_loop of the current 31 day block is greater
324 |             # than end_datetime specified by user, use end_datetime
325 |             if end_datetime_loop > end_datetime:
326 |                 end_datetime_loop = end_datetime
327 | 
328 |             # Build URL for each API request as we proceed through the loop
329 |             data_url = build_query_url(
330 |                 begin_datetime_loop.strftime('%Y%m%d'),
331 |                 end_datetime_loop.strftime('%Y%m%d'),
332 |                 stationid, product, datum, bin_num, interval, units, time_zone)
333 |             
334 |             df_new = url2pandas(data_url, product, num_31day_blocks)  # Get dataframe for block
335 |             df = df.append(df_new)  # Append to existing dataframe
336 |             
337 |     # Rename output dataframe columns based on requested product
338 |     # and convert to useable data types
339 |     if product == 'water_level':
340 |         # Rename columns for clarity
341 |         df.rename(columns={'f': 'flags', 'q': 'QC', 's': 'sigma',
342 |                            't': 'date_time', 'v': 'water_level'},
343 |                   inplace=True)
344 | 
345 |         # Convert columns to numeric values
346 |         data_cols = df.columns.drop(['flags', 'QC', 'date_time'])
347 |         df[data_cols] = df[data_cols].apply(
348 |             pd.to_numeric, axis=1, errors='coerce')
349 | 
350 |         # Convert date & time strings to datetime objects
351 |         df['date_time'] = pd.to_datetime(df['date_time'])
352 | 
353 |     elif product == 'hourly_height':
354 |         # Rename columns for clarity
355 |         df.rename(columns={'f': 'flags', 's': 'sigma',
356 |                            't': 'date_time', 'v': 'water_level'},
357 |                   inplace=True)
358 | 
359 |         # Convert columns to numeric values
360 |         data_cols = df.columns.drop(['flags', 'date_time'])
361 |         df[data_cols] = df[data_cols].apply(
362 |             pd.to_numeric, axis=1, errors='coerce')
363 | 
364 |         # Convert date & time strings to datetime objects
365 |         df['date_time'] = pd.to_datetime(df['date_time'])
366 | 
367 |     elif product == 'high_low':
368 |         # Rename columns for clarity
369 |         df.rename(columns={'f': 'flags', 'ty': 'high_low',
370 |                            't': 'date_time', 'v': 'water_level'},
371 |                   inplace=True)
372 | 
373 |         # Separate to high and low dataframes
374 |         df_HH = df[df['high_low'] == "HH"].copy()
375 |         df_HH.rename(columns={'date_time': 'date_time_HH',
376 |                               'water_level': 'HH_water_level'},
377 |                      inplace=True)
378 | 
379 |         df_H = df[df['high_low'] == "H "].copy()
380 |         df_H.rename(columns={'date_time': 'date_time_H',
381 |                              'water_level': 'H_water_level'},
382 |                     inplace=True)
383 | 
384 |         df_L = df[df['high_low'].str.contains("L ")].copy()
385 |         df_L.rename(columns={'date_time': 'date_time_L',
386 |                              'water_level': 'L_water_level'},
387 |                     inplace=True)
388 | 
389 |         df_LL = df[df['high_low'].str.contains("LL")].copy()
390 |         df_LL.rename(columns={'date_time': 'date_time_LL',
391 |                               'water_level': 'LL_water_level'},
392 |                      inplace=True)
393 | 
394 |         # Extract dates (without time) for each entry
395 |         dates_HH = [x.date() for x in pd.to_datetime(df_HH['date_time_HH'])]
396 |         dates_H = [x.date() for x in pd.to_datetime(df_H['date_time_H'])]
397 |         dates_L = [x.date() for x in pd.to_datetime(df_L['date_time_L'])]
398 |         dates_LL = [x.date() for x in pd.to_datetime(df_LL['date_time_LL'])]
399 | 
400 |         # Set indices to datetime
401 |         df_HH['date_time'] = dates_HH
402 |         df_HH.index = df_HH['date_time']
403 |         df_H['date_time'] = dates_H
404 |         df_H.index = df_H['date_time']
405 |         df_L['date_time'] = dates_L
406 |         df_L.index = df_L['date_time']
407 |         df_LL['date_time'] = dates_LL
408 |         df_LL.index = df_LL['date_time']
409 | 
410 |         # Remove flags and combine to single dataframe
411 |         df_HH = df_HH.drop(
412 |             columns=['flags', 'high_low'])
413 |         df_H = df_H.drop(columns=['flags', 'high_low',
414 |                                   'date_time'])
415 |         df_L = df_L.drop(columns=['flags', 'high_low',
416 |                                   'date_time'])
417 |         df_LL = df_LL.drop(columns=['flags', 'high_low',
418 |                                     'date_time'])
419 | 
420 |         # Keep only one instance per date (based on max/min)
421 |         maxes = df_HH.groupby(df_HH.index).HH_water_level.transform(max)
422 |         df_HH = df_HH.loc[df_HH.HH_water_level == maxes]
423 |         maxes = df_H.groupby(df_H.index).H_water_level.transform(max)
424 |         df_H = df_H.loc[df_H.H_water_level == maxes]
425 |         mins = df_L.groupby(df_L.index).L_water_level.transform(max)
426 |         df_L = df_L.loc[df_L.L_water_level == mins]
427 |         mins = df_LL.groupby(df_LL.index).LL_water_level.transform(max)
428 |         df_LL = df_LL.loc[df_LL.LL_water_level == mins]
429 | 
430 |         df = df_HH.join(df_H, how='outer')
431 |         df = df.join(df_L, how='outer')
432 |         df = df.join(df_LL, how='outer')
433 | 
434 |         # Convert columns to numeric values
435 |         data_cols = df.columns.drop(
436 |             ['date_time', 'date_time_HH', 'date_time_H', 'date_time_L',
437 |              'date_time_LL'])
438 |         df[data_cols] = df[data_cols].apply(pd.to_numeric, axis=1,
439 |                                             errors='coerce')
440 | 
441 |         # Convert date & time strings to datetime objects
442 |         df['date_time'] = pd.to_datetime(df.index)
443 |         df['date_time_HH'] = pd.to_datetime(df['date_time_HH'])
444 |         df['date_time_H'] = pd.to_datetime(df['date_time_H'])
445 |         df['date_time_L'] = pd.to_datetime(df['date_time_L'])
446 |         df['date_time_LL'] = pd.to_datetime(df['date_time_LL'])
447 | 
448 |     elif product == 'predictions':
449 |         if interval == 'h':
450 |             # Rename columns for clarity
451 |             df.rename(columns={'t': 'date_time', 'v': 'predicted_wl'},
452 |                       inplace=True)
453 | 
454 |             # Convert columns to numeric values
455 |             data_cols = df.columns.drop(['date_time'])
456 | 
457 |         elif interval == 'hilo':
458 |             # Rename columns for clarity
459 |             df.rename(columns={'t': 'date_time', 'v': 'predicted_wl',
460 |                                'type': 'hi_lo'},
461 |                       inplace=True)
462 | 
463 |             # Convert columns to numeric values
464 |             data_cols = df.columns.drop(['date_time', 'hi_lo'])
465 | 
466 |         # Convert date & time strings to datetime objects
467 |         df['date_time'] = pd.to_datetime(df['date_time'])
468 | 
469 |     elif product == 'currents':
470 |         # Rename columns for clarity
471 |         df.rename(columns={'b': 'bin', 'd': 'direction',
472 |                            's': 'speed', 't': 'date_time'},
473 |                   inplace=True)
474 | 
475 |         # Convert columns to numeric values
476 |         data_cols = df.columns.drop(['date_time'])
477 |         df[data_cols] = df[data_cols].apply(pd.to_numeric, axis=1,
478 |                                             errors='coerce')
479 | 
480 |         # Convert date & time strings to datetime objects
481 |         df['date_time'] = pd.to_datetime(df['date_time'])
482 | 
483 |     elif product == 'wind':
484 |         # Rename columns for clarity
485 |         df.rename(columns={'d': 'dir', 'dr': 'compass',
486 |                            'f': 'flags', 'g': 'gust_spd',
487 |                            's': 'spd', 't': 'date_time'},
488 |                   inplace=True)
489 | 
490 |         # Convert columns to numeric values
491 |         data_cols = df.columns.drop(['date_time', 'flags', 'compass'])
492 |         df[data_cols] = df[data_cols].apply(pd.to_numeric, axis=1,
493 |                                             errors='coerce')
494 | 
495 |         # Convert date & time strings to datetime objects
496 |         df['date_time'] = pd.to_datetime(df['date_time'])
497 | 
498 |     elif product == 'air_pressure':
499 |         # Rename columns for clarity
500 |         df.rename(columns={'f': 'flags', 't': 'date_time', 'v': 'air_press'},
501 |                   inplace=True)
502 | 
503 |         # Convert columns to numeric values
504 |         data_cols = df.columns.drop(['date_time', 'flags'])
505 |         df[data_cols] = df[data_cols].apply(pd.to_numeric, axis=1,
506 |                                             errors='coerce')
507 | 
508 |         # Convert date & time strings to datetime objects
509 |         df['date_time'] = pd.to_datetime(df['date_time'])
510 | 
511 |     elif product == 'air_temperature':
512 |         # Rename columns for clarity
513 |         df.rename(columns={'f': 'flags', 't': 'date_time', 'v': 'air_temp'},
514 |                   inplace=True)
515 | 
516 |         # Convert columns to numeric values
517 |         data_cols = df.columns.drop(['date_time', 'flags'])
518 |         df[data_cols] = df[data_cols].apply(pd.to_numeric, axis=1,
519 |                                             errors='coerce')
520 | 
521 |         # Convert date & time strings to datetime objects
522 |         df['date_time'] = pd.to_datetime(df['date_time'])
523 | 
524 |     elif product == 'water_temperature':
525 |         # Rename columns for clarity
526 |         df.rename(columns={'f': 'flags', 't': 'date_time', 'v': 'water_temp'},
527 |                   inplace=True)
528 | 
529 |         # Convert columns to numeric values
530 |         data_cols = df.columns.drop(['date_time', 'flags'])
531 |         df[data_cols] = df[data_cols].apply(pd.to_numeric, axis=1,
532 |                                             errors='coerce')
533 | 
534 |         # Convert date & time strings to datetime objects
535 |         df['date_time'] = pd.to_datetime(df['date_time'])
536 | 
537 |     # Set datetime to index (for use in resampling)
538 |     df.index = df['date_time']
539 |     df = df.drop(columns=['date_time'])
540 | 
541 |     # Handle hourly requests for water_level and currents data
542 |     if (product == 'water_level') | (product == 'currents') & (
543 |             interval == 'h'):
544 |         df = df.resample('H').first()  # Only return the hourly data
545 | 
546 |     return df
547 | 


--------------------------------------------------------------------------------