├── .gitignore ├── AORC_Temperature_async_download.py ├── INTENT.md ├── LICENSE.md ├── MRMS_GageCorr_qpe_async_download.py ├── MRMS_multiSensor_qpe_async_download.py ├── NLDAS2_filelist.py ├── README.md ├── RTMA_Pressure_async_download.py ├── RTMA_Temp_async_download.py ├── async_ftp_cmip5_projections.py ├── async_ftp_snodas.py ├── livneh_historical_psl.py ├── requirements.txt ├── retrieve_hrrr_subhourly.py ├── retrieve_hrrr_surface_hourly.py ├── retrieve_qpe_gagecorr_01h.py └── retrieve_rtma_temperature.py /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | venv 3 | -------------------------------------------------------------------------------- /AORC_Temperature_async_download.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from datetime import datetime 3 | from datetime import timedelta 4 | import os 5 | import nest_asyncio 6 | nest_asyncio.apply() 7 | import asyncio 8 | import aiohttp 9 | import async_timeout 10 | from dateutil.relativedelta import relativedelta as relativeDelta 11 | 12 | async def download_coroutine(url, session, destination): 13 | async with async_timeout.timeout(1200): 14 | async with session.get(url) as response: 15 | if response.status == 200: 16 | fp = destination + os.sep + os.path.basename(url) 17 | with open(fp, 'wb') as f_handle: 18 | while True: 19 | chunk = await response.content.read(1024) 20 | if not chunk: 21 | break 22 | f_handle.write(chunk) 23 | else: 24 | print(url) 25 | return await response.release() 26 | 27 | async def main(loop, tmp, destination): 28 | 29 | async with aiohttp.ClientSession() as session: 30 | tasks = [download_coroutine(url, session, destination) for url in tmp] 31 | return await asyncio.gather(*tasks) 32 | 33 | 34 | if __name__ == '__main__': 35 | 36 | start = datetime(2021, 10, 1, 0, 0) 37 | end = datetime(2021, 12, 1, 0, 0) 38 | destination = r"D:\FY22\SanLorenzo\data\AORC" 39 | office = 'CNRFC' 40 | 41 | hour = relativeDelta(months=1) 42 | os.makedirs(destination, exist_ok=True) 43 | 44 | #loop through and see if you already have the file locally 45 | date = start 46 | urls = [] 47 | opath = [] 48 | while date <= end: 49 | 50 | #need date like 197902 51 | url = f"https://hydrology.nws.noaa.gov/aorc-historic/AORC_{office}_4km/AORC_TMPR_4KM_{office}_{date.strftime('%Y%m')}.zip" 52 | 53 | filename = url.split("/")[-1] 54 | if not os.path.isfile(destination + os.sep + filename): 55 | urls.append(url) 56 | opath.append(destination + os.sep + filename) 57 | date += hour 58 | 59 | #Split urls into chunks so you wont overwhelm IA mesonet with asyncronous downloads 60 | chunk_size = 50 61 | chunked_urls = [urls[i * chunk_size:(i + 1) * chunk_size] for i in range((len(urls) + chunk_size - 1) // chunk_size )] 62 | 63 | for tmp in chunked_urls: 64 | loop = asyncio.get_event_loop() 65 | results = loop.run_until_complete(main(loop, tmp, destination)) 66 | del loop, results 67 | -------------------------------------------------------------------------------- /INTENT.md: -------------------------------------------------------------------------------- 1 | Licensing Intent 2 | 3 | The intent is that this software and documentation ("Project") should be treated as if it is licensed under the license associated with the Project ("License") in the LICENSE.md file. However, because we are part of the United States (U.S.) Federal Government, it is not that simple. 4 | 5 | The portions of this Project written by United States (U.S.) Federal government employees within the scope of their federal employment are ineligible for copyright protection in the U.S.; this is generally understood to mean that these portions of the Project are placed in the public domain. 6 | 7 | In countries where copyright protection is available (which does not include the U.S.), contributions made by U.S. Federal government employees are released under the License. Merged contributions from private contributors are released under the License. -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 U.S. Federal Government (in countries where recognized) 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 6 | 7 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 8 | 9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /MRMS_GageCorr_qpe_async_download.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from datetime import datetime 3 | from datetime import timedelta 4 | import os 5 | import nest_asyncio 6 | nest_asyncio.apply() 7 | import asyncio 8 | import aiohttp 9 | import async_timeout 10 | 11 | 12 | async def download_coroutine(url, session, destination): 13 | with async_timeout.timeout(1200): 14 | async with session.get(url) as response: 15 | if response.status == 200: 16 | fp = destination + os.sep + os.path.basename(url) 17 | with open(fp, 'wb') as f_handle: 18 | while True: 19 | chunk = await response.content.read(1024) 20 | if not chunk: 21 | break 22 | f_handle.write(chunk) 23 | else: 24 | print(url) 25 | return await response.release() 26 | 27 | async def main(loop, tmp, destination): 28 | 29 | async with aiohttp.ClientSession() as session: 30 | tasks = [download_coroutine(url, session, destination) for url in tmp] 31 | return await asyncio.gather(*tasks) 32 | 33 | 34 | if __name__ == '__main__': 35 | 36 | start = datetime(2018, 12, 1, 0, 0) 37 | end = datetime(2019,1, 1, 0, 0) 38 | hour = timedelta(hours=1) 39 | destination = r"C:\workspace\ririe\HMS\data\precip" 40 | 41 | assert start < datetime(2020,10,15), "GageCorr MRMS data before 2020-10-15 does not exist, consider looking for MultiSensor qpe grids" 42 | assert end < datetime(2020,10,15), "GageCorr MRMS data before 2020-10-15 does not exist, consider looking for MultiSensor qpe grids" 43 | 44 | #loop through and see if you already have the file locally 45 | date = start 46 | urls = [] 47 | opath = [] 48 | while date < end: 49 | url = "http://mtarchive.geol.iastate.edu/{:04d}/{:02d}/{:02d}/mrms/ncep/GaugeCorr_QPE_01H/GaugeCorr_QPE_01H_00.00_{:04d}{:02d}{:02d}-{:02d}0000.grib2.gz".format( 50 | date.year, date.month, date.day, date.year, date.month, date.day, date.hour) 51 | 52 | filename = url.split("/")[-1] 53 | if not os.path.isfile(destination + os.sep + filename): 54 | urls.append(url) 55 | opath.append(destination + os.sep + filename) 56 | date += hour 57 | 58 | #Split urls into chunks so you wont overwhelm IA mesonet with asyncronous downloads 59 | chunk_size = 50 60 | chunked_urls = [urls[i * chunk_size:(i + 1) * chunk_size] for i in range((len(urls) + chunk_size - 1) // chunk_size )] 61 | 62 | for tmp in chunked_urls: 63 | loop = asyncio.get_event_loop() 64 | results = loop.run_until_complete(main(loop, tmp, destination)) 65 | del loop, results 66 | -------------------------------------------------------------------------------- /MRMS_multiSensor_qpe_async_download.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from datetime import datetime 3 | from datetime import timedelta 4 | import os 5 | import nest_asyncio 6 | nest_asyncio.apply() 7 | import asyncio 8 | import aiohttp 9 | import async_timeout 10 | 11 | 12 | async def download_coroutine(url, session, destination): 13 | with async_timeout.timeout(1200): 14 | async with session.get(url) as response: 15 | if response.status == 200: 16 | fp = destination + os.sep + os.path.basename(url) 17 | with open(fp, 'wb') as f_handle: 18 | while True: 19 | chunk = await response.content.read(1024) 20 | if not chunk: 21 | break 22 | f_handle.write(chunk) 23 | else: 24 | print(url) 25 | return await response.release() 26 | 27 | async def main(loop, tmp, destination): 28 | 29 | async with aiohttp.ClientSession() as session: 30 | tasks = [download_coroutine(url, session, destination) for url in tmp] 31 | return await asyncio.gather(*tasks) 32 | 33 | 34 | if __name__ == '__main__': 35 | 36 | start = datetime(2021, 10, 1, 0, 0) 37 | end = datetime(2021, 10, 2, 0, 0) 38 | destination = r"C:\workspace\ririe\HMS\data\precip" 39 | 40 | 41 | assert start >= datetime(2020,10,15), "MultiSensor MRMS data before 2020-10-15 does not exist, consider looking for GageCorr qpe grids" 42 | assert end >= datetime(2020,10,15), "MultiSensor MRMS data before 2020-10-15 does not exist, consider looking for GageCorr qpe grids" 43 | 44 | 45 | """ 46 | https://inside.nssl.noaa.gov/mrms/past-code-updates/ 47 | 48 | In the MRMS v12.0 update to NCO section, the third bullet from the bottom: 49 | - Multi-sensor QPE scheme using gauges and model QPFs to fill radar coverage gaps 50 | I think that suggests the MultiSensor_QPE is gage corrected. So the data might have just changed names in the MRMS migration to 12.0. 51 | """ 52 | 53 | hour = timedelta(hours=1) 54 | os.makedirs(destination, exist_ok=True) 55 | 56 | #loop through and see if you already have the file locally 57 | date = start 58 | urls = [] 59 | opath = [] 60 | while date < end: 61 | 62 | url = 'https://mtarchive.geol.iastate.edu/{:04d}/{:02d}/{:02d}/mrms/ncep/MultiSensor_QPE_01H_Pass2/MultiSensor_QPE_01H_Pass2_00.00_{:04d}{:02d}{:02d}-{:02d}0000.grib2.gz'.format( 63 | date.year, date.month, date.day, date.year, date.month, date.day, date.hour) 64 | 65 | filename = url.split("/")[-1] 66 | if not os.path.isfile(destination + os.sep + filename): 67 | urls.append(url) 68 | opath.append(destination + os.sep + filename) 69 | date += hour 70 | 71 | #Split urls into chunks so you wont overwhelm IA mesonet with asyncronous downloads 72 | chunk_size = 50 73 | chunked_urls = [urls[i * chunk_size:(i + 1) * chunk_size] for i in range((len(urls) + chunk_size - 1) // chunk_size )] 74 | 75 | for tmp in chunked_urls: 76 | loop = asyncio.get_event_loop() 77 | results = loop.run_until_complete(main(loop, tmp, destination)) 78 | del loop, results 79 | -------------------------------------------------------------------------------- /NLDAS2_filelist.py: -------------------------------------------------------------------------------- 1 | import re 2 | import requests 3 | 4 | ### The following steps only need to be done once 5 | # First, create a NASA EarthData account 6 | # Next, link GES DISC with your EarthData account: https://disc.gsfc.nasa.gov/earthdata-login 7 | # Then on your local machine, create a .netrc file placed at C:\Users\\.netrc 8 | # .netrc is a text file with one line using your EarthData login: machine urs.earthdata.nasa.gov login password 9 | 10 | ### The following script works for links lists provided by EarthData GES DISC 11 | # Make sure to update the URLList string to the downloaded links list file 12 | # Also make sure to update the string in FILENAME on line 21 to the folder path for the download 13 | 14 | URLList = "C:/Users/HEC/Downloads/subset_NLDAS_FORA0125_H_2.0_20220208_144246.txt" 15 | 16 | with open(URLList) as file: 17 | lines = file.readlines() 18 | 19 | for URL in lines: 20 | label = re.search('&LABEL=(.*)&SHORTNAME=', URL) 21 | FILENAME = 'C:/Users/HEC/Downloads/NLDAS2/' + label.group(1) 22 | result = requests.get(URL) 23 | 24 | try: 25 | result.raise_for_status() 26 | f = open(FILENAME, 'wb') 27 | f.write(result.content) 28 | f.close() 29 | print('contents of URL written to ' + FILENAME) 30 | except: 31 | print('requests.get() returned an error code ' + str(result.status_code)) -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Some simple data retrieval examples. -------------------------------------------------------------------------------- /RTMA_Pressure_async_download.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | 4 | from datetime import datetime 5 | from datetime import timedelta 6 | import os 7 | import nest_asyncio 8 | nest_asyncio.apply() 9 | import asyncio 10 | import aiohttp 11 | import async_timeout 12 | 13 | 14 | async def download_coroutine(url, session, destination): 15 | with async_timeout.timeout(1200): 16 | async with session.get(url) as response: 17 | if response.status == 200: 18 | fp = destination + os.sep + os.path.basename(url) 19 | with open(fp, 'wb') as f_handle: 20 | while True: 21 | chunk = await response.content.read(1024) 22 | if not chunk: 23 | break 24 | f_handle.write(chunk) 25 | else: 26 | print(url) 27 | return await response.release() 28 | 29 | async def main(loop, tmp, destination): 30 | 31 | async with aiohttp.ClientSession() as session: 32 | tasks = [download_coroutine(url, session, destination) for url in tmp] 33 | return await asyncio.gather(*tasks) 34 | 35 | 36 | 37 | 38 | if __name__ == '__main__': 39 | 40 | start = datetime(2018, 12, 1, 0, 0) 41 | end = datetime(2019,1, 1, 0, 0) 42 | hour = timedelta(hours=1) 43 | destination = r"C:\Users\RDCRLDDH\Desktop\RTMA_Pressure_WY19" 44 | 45 | #loop through and see if you already have the file locally 46 | date = start 47 | urls = [] 48 | opath = [] 49 | while date < end: 50 | url = "http://mtarchive.geol.iastate.edu/{:04d}/{:02d}/{:02d}/grib2/ncep/RTMA/{:04d}{:02d}{:02d}{:02d}00_PRES.grib2".format( 51 | date.year, date.month, date.day, date.year, date.month, date.day, date.hour) 52 | 53 | filename = url.split("/")[-1] 54 | if not os.path.isfile(destination + os.sep + filename): 55 | urls.append(url) 56 | opath.append(destination + os.sep + filename) 57 | date += hour 58 | 59 | #Split urls into chunks so you wont overwhelm IA mesonet with asyncronous downloads 60 | chunk_size = 50 61 | chunked_urls = [urls[i * chunk_size:(i + 1) * chunk_size] for i in range((len(urls) + chunk_size - 1) // chunk_size )] 62 | 63 | for tmp in chunked_urls: 64 | loop = asyncio.get_event_loop() 65 | results = loop.run_until_complete(main(loop, tmp, destination)) 66 | del loop, results -------------------------------------------------------------------------------- /RTMA_Temp_async_download.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from datetime import datetime 3 | from datetime import timedelta 4 | import os 5 | import nest_asyncio 6 | nest_asyncio.apply() 7 | import asyncio 8 | import aiohttp 9 | import async_timeout 10 | 11 | async def download_coroutine(url, session, destination): 12 | with async_timeout.timeout(1200): 13 | async with session.get(url) as response: 14 | if response.status == 200: 15 | fp = destination + os.sep + os.path.basename(url) 16 | with open(fp, 'wb') as f_handle: 17 | while True: 18 | chunk = await response.content.read(1024) 19 | if not chunk: 20 | break 21 | f_handle.write(chunk) 22 | else: 23 | print(url) 24 | return await response.release() 25 | 26 | async def main(loop, tmp, destination): 27 | 28 | async with aiohttp.ClientSession() as session: 29 | tasks = [download_coroutine(url, session, destination) for url in tmp] 30 | return await asyncio.gather(*tasks) 31 | 32 | 33 | 34 | 35 | if __name__ == '__main__': 36 | 37 | start = datetime(2018, 12, 1, 0, 0) 38 | end = datetime(2019,1, 1, 0, 0) 39 | hour = timedelta(hours=1) 40 | destination = r"C:\Users\RDCRLDDH\Desktop\RTMA_Pressure_WY19" 41 | 42 | #loop through and see if you already have the file locally 43 | date = start 44 | urls = [] 45 | opath = [] 46 | while date < end: 47 | url = "http://mtarchive.geol.iastate.edu/{:04d}/{:02d}/{:02d}/grib2/ncep/RTMA/{:04d}{:02d}{:02d}{:02d}00_TMPK.grib2".format( 48 | date.year, date.month, date.day, date.year, date.month, date.day, date.hour) 49 | 50 | filename = url.split("/")[-1] 51 | if not os.path.isfile(destination + os.sep + filename): 52 | urls.append(url) 53 | opath.append(destination + os.sep + filename) 54 | date += hour 55 | 56 | #Split urls into chunks so you wont overwhelm IA mesonet with asyncronous downloads 57 | chunk_size = 50 58 | chunked_urls = [urls[i * chunk_size:(i + 1) * chunk_size] for i in range((len(urls) + chunk_size - 1) // chunk_size )] 59 | 60 | for tmp in chunked_urls: 61 | loop = asyncio.get_event_loop() 62 | results = loop.run_until_complete(main(loop, tmp, destination)) 63 | del loop, results 64 | -------------------------------------------------------------------------------- /async_ftp_cmip5_projections.py: -------------------------------------------------------------------------------- 1 | # Tested on python 3.9 2 | 3 | import asyncio 4 | import aioftp 5 | import os 6 | import pathlib 7 | import aiofiles 8 | import logging 9 | import pathlib 10 | import aiofiles.os as aos 11 | import async_timeout 12 | import sys 13 | from pprint import pprint 14 | 15 | async def dowload_files(paths, dest_dir): 16 | """ 17 | Function to dowload files from ftp server. 18 | Lots of error catching to ensure complete file 19 | download. 20 | 21 | Args: 22 | paths (list(pathlib.PurePath)): List of paths to download 23 | dest_dir (pathlib.PurePath): Output Directory 24 | """ 25 | for path in paths: 26 | remote_size = None 27 | dest = dest_dir.joinpath(path.name) 28 | logging.info(f'File to download: {path} \n') 29 | logging.info(f'Should download to {dest} \n') 30 | max_attempts = 0 31 | 32 | while max_attempts<30: 33 | 34 | try: 35 | logging.info(f'Starting Attempt {max_attempts}') 36 | async with async_timeout.timeout(20): 37 | async with aioftp.Client.context('192.12.137.7', user='anonymous', port=21) as client: 38 | 39 | if remote_size is None: 40 | logging.info(f'Getting remote stats for file {path}') 41 | remote_stat = await client.stat(path) 42 | remote_size = int(remote_stat['size']) 43 | logging.info(f' Remote file has size {remote_size}') 44 | 45 | async with aiofiles.open(dest, mode='ab', ) as local_file: 46 | 47 | #Check to see if local_file exists 48 | if await aos.path.exists(dest): 49 | stat = await aos.stat(dest) 50 | size = stat.st_size 51 | else: 52 | size = 0 53 | logging.info(f'Starting at postition {size}') 54 | local_file.seek(size) 55 | 56 | if remote_size == size: 57 | break 58 | elif size > remote_size: 59 | pathlib.Path(dest).unlink() 60 | logging.info('local file larger than remote file, removing now') 61 | max_attempts +=1 62 | size = 0 63 | 64 | async with client.download_stream(path, offset=size) as stream: 65 | async for block in stream.iter_by_block(): 66 | await local_file.write(block) 67 | 68 | except aioftp.StatusCodeError as ftp_e: 69 | 70 | max_attempts +=1 71 | logging.info(f'Found aioftp error, trying another attempt') 72 | if ftp_e.received_codes ==( '426',): 73 | logging.info(f'Forced timeout error, trying another attempt') 74 | 75 | if ftp_e.received_codes != ( '426',): 76 | logging.info('new code') 77 | await asyncio.sleep(1) 78 | continue 79 | 80 | except asyncio.exceptions.TimeoutError as asy_e: 81 | logging.info(f'found time out exception') 82 | max_attempts +=1 83 | continue 84 | 85 | 86 | 87 | async def list_files(model, var, rcp ): 88 | """ 89 | Function to recursively search FTP for desired files. 90 | Args: 91 | model (str): CMIP5 GCM Model name 92 | var (str): Variable to download ['pr','tasmax','tasmin','DTR'] 93 | rcp (str): rcp name ['rcp45', 'rcp85'] 94 | Returns: 95 | paths (list(pathlib.PurePath)): List of paths found to dowload 96 | """ 97 | 98 | async with aioftp.Client.context('192.12.137.7', user='anonymous', port=21) as client: 99 | 100 | await client.change_directory(f'pub/dcp/archive/cmip5/loca/LOCA_2016-04-02/{model}')#/archive/cmip5/loca/LOCA_2016-04-02/ 101 | _path = await client.get_current_directory() 102 | 103 | paths = [] 104 | 105 | async for path, info in client.list(_path, recursive=True): 106 | 107 | if rcp in path.name and '16th' in path.name and var in path.name and path.suffix =='.nc': 108 | paths.append(path) 109 | 110 | 111 | 112 | return paths 113 | 114 | async def main( model, var, rcp, out_dir): 115 | 116 | paths = await list_files(model, var, rcp) 117 | 118 | pprint(paths) 119 | 120 | await dowload_files(paths, pathlib.PurePath(out_dir)) 121 | 122 | if __name__ == '__main__': 123 | 124 | logging.basicConfig( 125 | level=logging.INFO, 126 | format="%(asctime)s [%(name)s] %(message)s", 127 | datefmt="[%H:%M:%S]:", 128 | ) 129 | 130 | model ='ACCESS1-3' 131 | print(model) 132 | 133 | for rcp in ['rcp45', 'rcp85']: 134 | for var in ['pr','tasmin','tasmax']: 135 | 136 | out_dir = f'output/{model}/{rcp}/{var}' 137 | 138 | os.makedirs(out_dir, exist_ok=True) 139 | 140 | loop = asyncio.get_event_loop() 141 | loop.set_debug(True) 142 | loop.run_until_complete(main(model, var, rcp, out_dir)) -------------------------------------------------------------------------------- /async_ftp_snodas.py: -------------------------------------------------------------------------------- 1 | # Tested on python 3.9 2 | import pandas as pd 3 | import asyncio 4 | import aioftp 5 | import os 6 | import pathlib 7 | import aiofiles 8 | import logging 9 | import pathlib 10 | import aiofiles.os as aos 11 | import async_timeout 12 | import sys 13 | from pprint import pprint 14 | 15 | async def dowload_files(paths, dest_dir): 16 | """ 17 | Function to dowload files from ftp server. 18 | Lots of error catching to ensure complete file 19 | download. 20 | 21 | Args: 22 | paths (list(pathlib.PurePath)): List of server paths to download 23 | dest_dir (pathlib.PurePath): Output Directory 24 | """ 25 | for path in paths: 26 | remote_size = None 27 | dest = dest_dir.joinpath(path.name) 28 | logging.info(f'File to download: {path} \n') 29 | logging.info(f'Should download to {dest} \n') 30 | max_attempts = 0 31 | 32 | while max_attempts<30: 33 | 34 | try: 35 | logging.info(f'Starting Attempt {max_attempts}') 36 | async with async_timeout.timeout(20): 37 | async with aioftp.Client.context('128.138.135.20', user='anonymous', port=21) as client: 38 | 39 | if remote_size is None: 40 | logging.info(f'Getting remote stats for file {path}') 41 | remote_stat = await client.stat(path) 42 | remote_size = int(remote_stat['size']) 43 | logging.info(f' Remote file has size {remote_size}') 44 | 45 | async with aiofiles.open(dest, mode='ab', ) as local_file: 46 | 47 | #Check to see if local_file exists 48 | if await aos.path.exists(dest): 49 | stat = await aos.stat(dest) 50 | size = stat.st_size 51 | else: 52 | size = 0 53 | logging.info(f'Starting at postition {size}') 54 | local_file.seek(size) 55 | 56 | if remote_size == size: 57 | break 58 | elif size > remote_size: 59 | pathlib.Path(dest).unlink() 60 | logging.info('local file larger than remote file, removing now') 61 | max_attempts +=1 62 | size = 0 63 | 64 | async with client.download_stream(path, offset=size) as stream: 65 | async for block in stream.iter_by_block(): 66 | await local_file.write(block) 67 | 68 | except aioftp.StatusCodeError as ftp_e: 69 | 70 | max_attempts +=1 71 | logging.info(f'Found aioftp error, trying another attempt') 72 | if ftp_e.received_codes ==( '426',): 73 | logging.info(f'Forced timeout error, trying another attempt') 74 | 75 | if ftp_e.received_codes != ( '426',): 76 | logging.info('new code') 77 | await asyncio.sleep(1) 78 | continue 79 | 80 | except asyncio.exceptions.TimeoutError as asy_e: 81 | logging.info(f'found time out exception') 82 | max_attempts +=1 83 | continue 84 | 85 | 86 | 87 | async def list_files(month_year ): 88 | """ 89 | Function to recursively search FTP for desired files. 90 | Args: 91 | month_year (list(str))): list of dates of interest (yyyymmdd) 92 | Returns: 93 | paths (list(pathlib.PurePath)): List of paths found to dowload 94 | """ 95 | 96 | async with aioftp.Client.context('128.138.135.20', user='anonymous', port=21) as client: 97 | 98 | await client.change_directory(f'DATASETS/NOAA/G02158/masked/') 99 | _path = await client.get_current_directory() 100 | 101 | paths = [] 102 | 103 | async for path, info in client.list(_path, recursive=True): 104 | 105 | if "SNODAS" in path.name and path.suffix =='.tar' and path.name.split('_')[1][:-4] in month_year: 106 | paths.append(path) 107 | 108 | 109 | 110 | return paths 111 | 112 | async def main( month_year, out_dir): 113 | 114 | paths = await list_files(month_year) 115 | 116 | pprint(paths) 117 | 118 | await dowload_files(paths, pathlib.PurePath(out_dir)) 119 | 120 | if __name__ == '__main__': 121 | 122 | logging.basicConfig( 123 | level=logging.INFO, 124 | format="%(asctime)s [%(name)s] %(message)s", 125 | datefmt="[%H:%M:%S]:", 126 | ) 127 | 128 | start_date = '2008-10-01' 129 | end_date = '2010-10-01' 130 | drange = pd.date_range(start_date, end_date, freq ='D') 131 | 132 | #list of dates to download 133 | month_year = [f'{i.strftime("%Y%m%d")}' for i in drange if i.month not in [8,9]] 134 | 135 | out_dir = f'output/SNODAS' 136 | 137 | os.makedirs(out_dir, exist_ok=True) 138 | 139 | loop = asyncio.get_event_loop() 140 | loop.set_debug(True) 141 | loop.run_until_complete(main(month_year, out_dir)) -------------------------------------------------------------------------------- /livneh_historical_psl.py: -------------------------------------------------------------------------------- 1 | import os 2 | import logging 3 | from datetime import datetime 4 | import asyncio 5 | import aiohttp 6 | import async_timeout 7 | import nest_asyncio 8 | nest_asyncio.apply() 9 | 10 | async def download_coroutine(url, session, destination): 11 | 12 | async with async_timeout.timeout(1200): 13 | async with session.get(url) as response: 14 | if response.status == 200: 15 | fp = destination + os.sep + os.path.basename(url) 16 | with open(fp, 'wb') as f_handle: 17 | while True: 18 | chunk = await response.content.read(1024) 19 | if not chunk: 20 | break 21 | f_handle.write(chunk) 22 | else: 23 | print(url) 24 | return await response.release() 25 | 26 | 27 | async def main(loop, tmp, destination): 28 | session_timeout = aiohttp.ClientTimeout(total=None) 29 | 30 | async with aiohttp.ClientSession(timeout=session_timeout) as session: 31 | tasks = [download_coroutine(url, session, destination) for url in tmp] 32 | return await asyncio.gather(*tasks) 33 | 34 | 35 | if __name__ == '__main__': 36 | logging.basicConfig( 37 | level=logging.INFO, 38 | format="%(asctime)s [%(name)s] %(message)s", 39 | datefmt="[%H:%M:%S]:", 40 | ) 41 | 42 | start = datetime(1972, 1, 1, 0, 0) 43 | end = datetime(2018, 1,1, 0, 0) 44 | yearIncrement = 1 45 | outRoot = r"C:\workspace\prospectHmsAdvanced\output" 46 | 47 | variables = ['tmin','tmax','prec'] 48 | 49 | for variable in variables: 50 | destination = rf'{outRoot}\{variable}' 51 | os.makedirs(destination, exist_ok=True) 52 | 53 | #loop through and see if you already have the file locally 54 | date = start 55 | urls = [] 56 | opath = [] 57 | while date < end: 58 | url = f"https://psl.noaa.gov/thredds/fileServer/Datasets/livneh/metvars/{variable}.{date.year}.nc" 59 | 60 | filename = url.split("/")[-1] 61 | # if not os.path.isfile(destination + os.sep + filename): 62 | urls.append(url) 63 | opath.append(destination + os.sep + filename) 64 | date = datetime(date.year + yearIncrement, date.month, date.day) 65 | 66 | #Split urls into chunks so you wont overwhelm server 67 | chunk_size = 3 68 | chunked_urls = [urls[i * chunk_size:(i + 1) * chunk_size] for i in range((len(urls) + chunk_size - 1) // chunk_size )] 69 | 70 | for tmp in chunked_urls: 71 | loop = asyncio.get_event_loop() 72 | loop.set_debug(True) 73 | results = loop.run_until_complete(main(loop, tmp, destination)) 74 | del loop, results 75 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HydrologicEngineeringCenter/data-retrieval-scripts/f9b38563470b33c6c1e0f4798a7141c7e039ddc4/requirements.txt -------------------------------------------------------------------------------- /retrieve_hrrr_subhourly.py: -------------------------------------------------------------------------------- 1 | import urllib.request 2 | from datetime import datetime 3 | import os 4 | 5 | date = datetime.today().strftime('%Y%m%d') 6 | cycle = 0 7 | 8 | for hour in range(1, 19): 9 | url = "http://nomads.ncep.noaa.gov/pub/data/nccf/com/hrrr/prod/hrrr.{date}/conus/hrrr.t{:02d}z.wrfsubhf{:02d}.grib2".format(cycle, hour, date=date) 10 | print(url) 11 | 12 | filename = url.split("/")[-1] 13 | destination = "C:/Temp/hrrr/" + date + os.sep + "{:02d}".format(cycle) 14 | if not os.path.isdir(os.path.split(destination)[0]): 15 | os.mkdir(os.path.split(destination)[0]) 16 | if not os.path.isdir(destination): 17 | os.mkdir(destination) 18 | f = open(destination + os.sep + filename, 'wb') 19 | f.write(urllib.request.urlopen(url).read()) 20 | -------------------------------------------------------------------------------- /retrieve_hrrr_surface_hourly.py: -------------------------------------------------------------------------------- 1 | import urllib.request 2 | from datetime import datetime 3 | import os 4 | 5 | date = datetime.today().strftime('%Y%m%d') 6 | cycle = 0 7 | 8 | for hour in range(1, 37): 9 | url = "http://nomads.ncep.noaa.gov/pub/data/nccf/com/hrrr/prod/hrrr.{date}/conus/hrrr.t{:02d}z.wrfsfcf{:02d}.grib2".format( 10 | cycle, hour, date=date) 11 | print(url) 12 | 13 | filename = url.split("/")[-1] 14 | 15 | if not os.path.isdir("C:/Temp/hrrr/" + date + os.sep): 16 | os.mkdir("C:/Temp/hrrr/" + date + os.sep) 17 | f = open("C:/Temp/hrrr/" + date + os.sep + filename, 'wb') 18 | f.write(urllib.request.urlopen(url).read()) 19 | -------------------------------------------------------------------------------- /retrieve_qpe_gagecorr_01h.py: -------------------------------------------------------------------------------- 1 | import urllib.request 2 | from urllib.request import HTTPError 3 | from datetime import datetime 4 | from datetime import timedelta 5 | import os 6 | 7 | start = datetime(2018, 9, 1, 0, 0) 8 | end = datetime(2018, 10, 1, 0, 0) 9 | hour = timedelta(hours=1) 10 | 11 | missing_dates = [] 12 | fallback_to_radaronly = True #Enables a post-processing step that will go through the list of missing dates for gage-corrected 13 | ############################# and tries to go get the radar-only values if they exist. 14 | 15 | destination = "C:/Temp/qpe" 16 | 17 | date = start 18 | 19 | while date <= end: 20 | url = "http://mtarchive.geol.iastate.edu/{:04d}/{:02d}/{:02d}/mrms/ncep/GaugeCorr_QPE_01H/GaugeCorr_QPE_01H_00.00_{:04d}{:02d}{:02d}-{:02d}0000.grib2.gz".format( 21 | date.year, date.month, date.day, date.year, date.month, date.day, date.hour) 22 | filename = url.split("/")[-1] 23 | try: 24 | fetched_request = urllib.request.urlopen(url) 25 | except HTTPError as e: 26 | missing_dates.append(date) 27 | else: 28 | with open(destination + os.sep + filename, 'wb') as f: 29 | f.write(fetched_request.read()) 30 | finally: 31 | date += hour 32 | 33 | if fallback_to_radaronly: 34 | radar_also_missing = [] 35 | for date in missing_dates: 36 | url = "http://mtarchive.geol.iastate.edu/{:04d}/{:02d}/{:02d}/mrms/ncep/RadarOnly_QPE_01H/RadarOnly_QPE_01H_00.00_{:04d}{:02d}{:02d}-{:02d}0000.grib2.gz".format( 37 | date.year, date.month, date.day, date.year, date.month, date.day, date.hour) 38 | filename = url.split("/")[-1] 39 | try: 40 | fetched_request = urllib.request.urlopen(url) 41 | except HTTPError as e: 42 | radar_also_missing.append(date) 43 | else: 44 | with open(destination + os.sep + filename, 'wb') as f: 45 | f.write(fetched_request.read()) 46 | -------------------------------------------------------------------------------- /retrieve_rtma_temperature.py: -------------------------------------------------------------------------------- 1 | import urllib.request 2 | from urllib.request import HTTPError 3 | from datetime import datetime 4 | from datetime import timedelta 5 | import os 6 | 7 | start = datetime(2017, 1, 1, 0, 0) 8 | end = datetime(2017, 1, 3, 0, 0) 9 | hour = timedelta(hours=1) 10 | 11 | missing_dates = [] 12 | 13 | destination = "C:/Temp" 14 | 15 | date = start 16 | 17 | while date < end: 18 | url = "http://mtarchive.geol.iastate.edu/{:04d}/{:02d}/{:02d}/grib2/ncep/RTMA/{:04d}{:02d}{:02d}{:02d}00_TMPK.grib2".format( 19 | date.year, date.month, date.day, date.year, date.month, date.day, date.hour) 20 | filename = url.split("/")[-1] 21 | try: 22 | fetched_request = urllib.request.urlopen(url) 23 | print("") 24 | print("opening: " + url) 25 | except HTTPError as e: 26 | missing_dates.append(date) 27 | else: 28 | with open(destination + os.sep + filename, 'wb') as f: 29 | f.write(fetched_request.read()) 30 | finally: 31 | date += hour --------------------------------------------------------------------------------