├── .gitignore
├── AORC_Temperature_async_download.py
├── INTENT.md
├── LICENSE.md
├── MRMS_GageCorr_qpe_async_download.py
├── MRMS_multiSensor_qpe_async_download.py
├── NLDAS2_filelist.py
├── README.md
├── RTMA_Pressure_async_download.py
├── RTMA_Temp_async_download.py
├── async_ftp_cmip5_projections.py
├── async_ftp_snodas.py
├── livneh_historical_psl.py
├── requirements.txt
├── retrieve_hrrr_subhourly.py
├── retrieve_hrrr_surface_hourly.py
├── retrieve_qpe_gagecorr_01h.py
└── retrieve_rtma_temperature.py


/.gitignore:
--------------------------------------------------------------------------------
1 | .idea
2 | venv
3 | 


--------------------------------------------------------------------------------
/AORC_Temperature_async_download.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from datetime import datetime
 3 | from datetime import timedelta
 4 | import os
 5 | import nest_asyncio
 6 | nest_asyncio.apply()
 7 | import asyncio
 8 | import aiohttp
 9 | import async_timeout
10 | from dateutil.relativedelta import relativedelta as relativeDelta
11 | 
12 | async def download_coroutine(url, session, destination):
13 |     async with async_timeout.timeout(1200):
14 |         async with session.get(url) as response:
15 |             if response.status == 200:
16 |                 fp = destination + os.sep + os.path.basename(url)
17 |                 with open(fp, 'wb') as f_handle:
18 |                     while True:
19 |                         chunk = await response.content.read(1024)
20 |                         if not chunk:
21 |                             break
22 |                         f_handle.write(chunk)
23 |             else:
24 |                 print(url)
25 |             return await response.release()
26 | 
27 | async def main(loop, tmp, destination):
28 | 
29 |     async with aiohttp.ClientSession() as session:
30 |         tasks = [download_coroutine(url, session, destination) for url in tmp]
31 |         return await asyncio.gather(*tasks)
32 | 
33 | 
34 | if __name__ == '__main__':
35 | 
36 |     start = datetime(2021, 10, 1, 0, 0)
37 |     end = datetime(2021, 12, 1, 0, 0)
38 |     destination = r"D:\FY22\SanLorenzo\data\AORC"
39 |     office = 'CNRFC'
40 | 
41 |     hour = relativeDelta(months=1)
42 |     os.makedirs(destination, exist_ok=True)
43 |     
44 |     #loop through and see if you already have the file locally
45 |     date = start
46 |     urls = []
47 |     opath = []
48 |     while date <= end:
49 |         
50 |         #need date like 197902
51 |         url = f"https://hydrology.nws.noaa.gov/aorc-historic/AORC_{office}_4km/AORC_TMPR_4KM_{office}_{date.strftime('%Y%m')}.zip"
52 | 
53 |         filename = url.split("/")[-1]
54 |         if not os.path.isfile(destination + os.sep + filename):
55 |             urls.append(url)
56 |             opath.append(destination + os.sep + filename)
57 |         date += hour
58 | 
59 |     #Split urls into chunks so you wont overwhelm IA mesonet with asyncronous downloads
60 |     chunk_size = 50
61 |     chunked_urls = [urls[i * chunk_size:(i + 1) * chunk_size] for i in range((len(urls) + chunk_size - 1) // chunk_size )]
62 | 
63 |     for tmp in chunked_urls:
64 |         loop = asyncio.get_event_loop()
65 |         results = loop.run_until_complete(main(loop, tmp, destination))
66 |         del loop, results
67 | 


--------------------------------------------------------------------------------
/INTENT.md:
--------------------------------------------------------------------------------
1 | Licensing Intent
2 | 
3 | The intent is that this software and documentation ("Project") should be treated as if it is licensed under the license associated with the Project ("License") in the LICENSE.md file. However, because we are part of the United States (U.S.) Federal Government, it is not that simple.
4 | 
5 | The portions of this Project written by United States (U.S.) Federal government employees within the scope of their federal employment are ineligible for copyright protection in the U.S.; this is generally understood to mean that these portions of the Project are placed in the public domain.
6 | 
7 | In countries where copyright protection is available (which does not include the U.S.), contributions made by U.S. Federal government employees are released under the License. Merged contributions from private contributors are released under the License.


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 | MIT License
2 | 
3 | Copyright (c) 2019 U.S. Federal Government (in countries where recognized)
4 | 
5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
6 | 
7 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
8 | 
9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.


--------------------------------------------------------------------------------
/MRMS_GageCorr_qpe_async_download.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from datetime import datetime
 3 | from datetime import timedelta
 4 | import os
 5 | import nest_asyncio
 6 | nest_asyncio.apply()
 7 | import asyncio
 8 | import aiohttp
 9 | import async_timeout
10 | 
11 | 
12 | async def download_coroutine(url, session, destination):
13 |     with async_timeout.timeout(1200):
14 |         async with session.get(url) as response:
15 |             if response.status == 200:
16 |                 fp = destination + os.sep + os.path.basename(url)
17 |                 with open(fp, 'wb') as f_handle:
18 |                     while True:
19 |                         chunk = await response.content.read(1024)
20 |                         if not chunk:
21 |                             break
22 |                         f_handle.write(chunk)
23 |             else:
24 |                 print(url)
25 |             return await response.release()
26 | 
27 | async def main(loop, tmp, destination):
28 | 
29 |     async with aiohttp.ClientSession() as session:
30 |         tasks = [download_coroutine(url, session, destination) for url in tmp]
31 |         return await asyncio.gather(*tasks)
32 | 
33 | 
34 | if __name__ == '__main__':
35 | 
36 |     start = datetime(2018, 12, 1, 0, 0)
37 |     end = datetime(2019,1, 1, 0, 0)
38 |     hour = timedelta(hours=1)
39 |     destination = r"C:\workspace\ririe\HMS\data\precip"
40 | 
41 |     assert start < datetime(2020,10,15), "GageCorr MRMS data before 2020-10-15 does not exist, consider looking for MultiSensor qpe grids"
42 |     assert end < datetime(2020,10,15), "GageCorr MRMS data before 2020-10-15 does not exist, consider looking for MultiSensor qpe grids"
43 | 
44 |     #loop through and see if you already have the file locally
45 |     date = start
46 |     urls = []
47 |     opath = []
48 |     while date < end:
49 |         url = "http://mtarchive.geol.iastate.edu/{:04d}/{:02d}/{:02d}/mrms/ncep/GaugeCorr_QPE_01H/GaugeCorr_QPE_01H_00.00_{:04d}{:02d}{:02d}-{:02d}0000.grib2.gz".format(
50 |         date.year, date.month, date.day, date.year, date.month, date.day, date.hour)
51 | 
52 |         filename = url.split("/")[-1]
53 |         if not os.path.isfile(destination + os.sep + filename):
54 |             urls.append(url)
55 |             opath.append(destination + os.sep + filename)
56 |         date += hour
57 | 
58 |     #Split urls into chunks so you wont overwhelm IA mesonet with asyncronous downloads
59 |     chunk_size = 50
60 |     chunked_urls = [urls[i * chunk_size:(i + 1) * chunk_size] for i in range((len(urls) + chunk_size - 1) // chunk_size )]
61 | 
62 |     for tmp in chunked_urls:
63 |         loop = asyncio.get_event_loop()
64 |         results = loop.run_until_complete(main(loop, tmp, destination))
65 |         del loop, results
66 | 


--------------------------------------------------------------------------------
/MRMS_multiSensor_qpe_async_download.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from datetime import datetime
 3 | from datetime import timedelta
 4 | import os
 5 | import nest_asyncio
 6 | nest_asyncio.apply()
 7 | import asyncio
 8 | import aiohttp
 9 | import async_timeout
10 | 
11 | 
12 | async def download_coroutine(url, session, destination):
13 |     with async_timeout.timeout(1200):
14 |         async with session.get(url) as response:
15 |             if response.status == 200:
16 |                 fp = destination + os.sep + os.path.basename(url)
17 |                 with open(fp, 'wb') as f_handle:
18 |                     while True:
19 |                         chunk = await response.content.read(1024)
20 |                         if not chunk:
21 |                             break
22 |                         f_handle.write(chunk)
23 |             else:
24 |                 print(url)
25 |             return await response.release()
26 | 
27 | async def main(loop, tmp, destination):
28 | 
29 |     async with aiohttp.ClientSession() as session:
30 |         tasks = [download_coroutine(url, session, destination) for url in tmp]
31 |         return await asyncio.gather(*tasks)
32 | 
33 | 
34 | if __name__ == '__main__':
35 | 
36 |     start = datetime(2021, 10, 1, 0, 0)
37 |     end = datetime(2021, 10, 2, 0, 0)
38 |     destination = r"C:\workspace\ririe\HMS\data\precip"
39 |     
40 |     
41 |     assert start >= datetime(2020,10,15), "MultiSensor MRMS data before 2020-10-15 does not exist, consider looking for GageCorr qpe grids"
42 |     assert end >= datetime(2020,10,15), "MultiSensor MRMS data before 2020-10-15 does not exist, consider looking for GageCorr qpe grids"
43 |     
44 | 
45 |     """
46 |     https://inside.nssl.noaa.gov/mrms/past-code-updates/
47 | 
48 |     In the MRMS v12.0 update to NCO section, the third bullet from the bottom:
49 |     -	Multi-sensor QPE scheme using gauges and model QPFs to fill radar coverage gaps
50 |     I think that suggests the MultiSensor_QPE is gage corrected.  So the data might have just changed names in the MRMS migration to 12.0.
51 |     """
52 | 
53 |     hour = timedelta(hours=1)
54 |     os.makedirs(destination, exist_ok=True)
55 |     
56 |     #loop through and see if you already have the file locally
57 |     date = start
58 |     urls = []
59 |     opath = []
60 |     while date < end:
61 |         
62 |         url = 'https://mtarchive.geol.iastate.edu/{:04d}/{:02d}/{:02d}/mrms/ncep/MultiSensor_QPE_01H_Pass2/MultiSensor_QPE_01H_Pass2_00.00_{:04d}{:02d}{:02d}-{:02d}0000.grib2.gz'.format(
63 |         date.year, date.month, date.day, date.year, date.month, date.day, date.hour)
64 | 
65 |         filename = url.split("/")[-1]
66 |         if not os.path.isfile(destination + os.sep + filename):
67 |             urls.append(url)
68 |             opath.append(destination + os.sep + filename)
69 |         date += hour
70 | 
71 |     #Split urls into chunks so you wont overwhelm IA mesonet with asyncronous downloads
72 |     chunk_size = 50
73 |     chunked_urls = [urls[i * chunk_size:(i + 1) * chunk_size] for i in range((len(urls) + chunk_size - 1) // chunk_size )]
74 | 
75 |     for tmp in chunked_urls:
76 |         loop = asyncio.get_event_loop()
77 |         results = loop.run_until_complete(main(loop, tmp, destination))
78 |         del loop, results
79 | 


--------------------------------------------------------------------------------
/NLDAS2_filelist.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | import requests
 3 | 
 4 | ###  The following steps only need to be done once
 5 | #  First, create a NASA EarthData account
 6 | #  Next, link GES DISC with your EarthData account: https://disc.gsfc.nasa.gov/earthdata-login
 7 | #  Then on your local machine, create a .netrc file placed at C:\Users\<username>\.netrc
 8 | #   .netrc is a text file with one line using your EarthData login: machine urs.earthdata.nasa.gov login <uid> password <password>
 9 | 
10 | ###  The following script works for links lists provided by EarthData GES DISC
11 | #  Make sure to update the URLList string to the downloaded links list file
12 | #  Also make sure to update the string in FILENAME on line 21 to the folder path for the download
13 | 
14 | URLList = "C:/Users/HEC/Downloads/subset_NLDAS_FORA0125_H_2.0_20220208_144246.txt"
15 | 
16 | with open(URLList) as file:
17 |     lines = file.readlines()
18 | 
19 | for URL in lines:
20 |     label = re.search('&LABEL=(.*)&SHORTNAME=', URL)
21 |     FILENAME = 'C:/Users/HEC/Downloads/NLDAS2/' + label.group(1)
22 |     result = requests.get(URL)
23 | 
24 |     try:
25 |         result.raise_for_status()
26 |         f = open(FILENAME, 'wb')
27 |         f.write(result.content)
28 |         f.close()
29 |         print('contents of URL written to ' + FILENAME)
30 |     except:
31 |         print('requests.get() returned an error code ' + str(result.status_code))


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | Some simple data retrieval examples.


--------------------------------------------------------------------------------
/RTMA_Pressure_async_download.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | 
 4 | from datetime import datetime
 5 | from datetime import timedelta
 6 | import os
 7 | import nest_asyncio
 8 | nest_asyncio.apply()
 9 | import asyncio
10 | import aiohttp
11 | import async_timeout
12 | 
13 | 
14 | async def download_coroutine(url, session, destination):
15 |     with async_timeout.timeout(1200):
16 |         async with session.get(url) as response:
17 |             if response.status == 200:
18 |                 fp = destination + os.sep + os.path.basename(url)
19 |                 with open(fp, 'wb') as f_handle:
20 |                     while True:
21 |                         chunk = await response.content.read(1024)
22 |                         if not chunk:
23 |                             break
24 |                         f_handle.write(chunk)
25 |             else:
26 |                 print(url)
27 |             return await response.release()
28 | 
29 | async def main(loop, tmp, destination):
30 |        
31 |     async with aiohttp.ClientSession() as session:
32 |         tasks = [download_coroutine(url, session, destination) for url in tmp]
33 |         return await asyncio.gather(*tasks)
34 | 
35 | 
36 | 
37 | 
38 | if __name__ == '__main__':
39 | 
40 |     start = datetime(2018, 12, 1, 0, 0)
41 |     end = datetime(2019,1, 1, 0, 0)
42 |     hour = timedelta(hours=1)
43 |     destination = r"C:\Users\RDCRLDDH\Desktop\RTMA_Pressure_WY19"
44 |     
45 |     #loop through and see if you already have the file locally
46 |     date = start
47 |     urls = []
48 |     opath = []
49 |     while date < end:
50 |         url = "http://mtarchive.geol.iastate.edu/{:04d}/{:02d}/{:02d}/grib2/ncep/RTMA/{:04d}{:02d}{:02d}{:02d}00_PRES.grib2".format(
51 |             date.year, date.month, date.day, date.year, date.month, date.day, date.hour)
52 |         
53 |         filename = url.split("/")[-1]
54 |         if not os.path.isfile(destination + os.sep + filename):
55 |             urls.append(url)
56 |             opath.append(destination + os.sep + filename)
57 |         date += hour
58 | 
59 |     #Split urls into chunks so you wont overwhelm IA mesonet with asyncronous downloads
60 |     chunk_size = 50
61 |     chunked_urls = [urls[i * chunk_size:(i + 1) * chunk_size] for i in range((len(urls) + chunk_size - 1) // chunk_size )] 
62 | 
63 |     for tmp in chunked_urls:
64 |         loop = asyncio.get_event_loop()        
65 |         results = loop.run_until_complete(main(loop, tmp, destination))
66 |         del loop, results


--------------------------------------------------------------------------------
/RTMA_Temp_async_download.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from datetime import datetime
 3 | from datetime import timedelta
 4 | import os
 5 | import nest_asyncio
 6 | nest_asyncio.apply()
 7 | import asyncio
 8 | import aiohttp
 9 | import async_timeout
10 | 
11 | async def download_coroutine(url, session, destination):
12 |     with async_timeout.timeout(1200):
13 |         async with session.get(url) as response:
14 |             if response.status == 200:
15 |                 fp = destination + os.sep + os.path.basename(url)
16 |                 with open(fp, 'wb') as f_handle:
17 |                     while True:
18 |                         chunk = await response.content.read(1024)
19 |                         if not chunk:
20 |                             break
21 |                         f_handle.write(chunk)
22 |             else:
23 |                 print(url)
24 |             return await response.release()
25 | 
26 | async def main(loop, tmp, destination):
27 | 
28 |     async with aiohttp.ClientSession() as session:
29 |         tasks = [download_coroutine(url, session, destination) for url in tmp]
30 |         return await asyncio.gather(*tasks)
31 | 
32 | 
33 | 
34 | 
35 | if __name__ == '__main__':
36 | 
37 |     start = datetime(2018, 12, 1, 0, 0)
38 |     end = datetime(2019,1, 1, 0, 0)
39 |     hour = timedelta(hours=1)
40 |     destination = r"C:\Users\RDCRLDDH\Desktop\RTMA_Pressure_WY19"
41 | 
42 |     #loop through and see if you already have the file locally
43 |     date = start
44 |     urls = []
45 |     opath = []
46 |     while date < end:
47 |         url = "http://mtarchive.geol.iastate.edu/{:04d}/{:02d}/{:02d}/grib2/ncep/RTMA/{:04d}{:02d}{:02d}{:02d}00_TMPK.grib2".format(
48 |             date.year, date.month, date.day, date.year, date.month, date.day, date.hour)
49 | 
50 |         filename = url.split("/")[-1]
51 |         if not os.path.isfile(destination + os.sep + filename):
52 |             urls.append(url)
53 |             opath.append(destination + os.sep + filename)
54 |         date += hour
55 | 
56 |     #Split urls into chunks so you wont overwhelm IA mesonet with asyncronous downloads
57 |     chunk_size = 50
58 |     chunked_urls = [urls[i * chunk_size:(i + 1) * chunk_size] for i in range((len(urls) + chunk_size - 1) // chunk_size )]
59 | 
60 |     for tmp in chunked_urls:
61 |         loop = asyncio.get_event_loop()
62 |         results = loop.run_until_complete(main(loop, tmp, destination))
63 |         del loop, results
64 | 


--------------------------------------------------------------------------------
/async_ftp_cmip5_projections.py:
--------------------------------------------------------------------------------
  1 | # Tested on python 3.9
  2 | 
  3 | import asyncio
  4 | import aioftp
  5 | import os
  6 | import pathlib
  7 | import aiofiles
  8 | import logging
  9 | import pathlib
 10 | import aiofiles.os as aos
 11 | import async_timeout
 12 | import sys
 13 | from pprint import pprint
 14 | 
 15 | async def dowload_files(paths, dest_dir):
 16 |     """
 17 |     Function to dowload files from ftp server.
 18 |     Lots of error catching to ensure complete file
 19 |     download.
 20 | 
 21 |     Args:
 22 |         paths (list(pathlib.PurePath)): List of paths to download
 23 |         dest_dir (pathlib.PurePath): Output Directory
 24 |     """    
 25 |     for path in paths:
 26 |         remote_size = None
 27 |         dest = dest_dir.joinpath(path.name)
 28 |         logging.info(f'File to download: {path} \n')
 29 |         logging.info(f'Should download to {dest} \n')
 30 |         max_attempts = 0
 31 | 
 32 |         while max_attempts<30:
 33 |             
 34 |             try:
 35 |                 logging.info(f'Starting Attempt {max_attempts}')
 36 |                 async with async_timeout.timeout(20):
 37 |                     async with aioftp.Client.context('192.12.137.7', user='anonymous', port=21) as client:
 38 |                         
 39 |                         if remote_size is None:
 40 |                             logging.info(f'Getting remote stats for file {path}')
 41 |                             remote_stat = await client.stat(path)
 42 |                             remote_size = int(remote_stat['size'])
 43 |                             logging.info(f' Remote file has size {remote_size}')
 44 |                         
 45 |                         async with aiofiles.open(dest, mode='ab', ) as local_file:
 46 |                             
 47 |                             #Check to see if local_file exists
 48 |                             if await aos.path.exists(dest):
 49 |                                 stat = await aos.stat(dest)
 50 |                                 size = stat.st_size
 51 |                             else:
 52 |                                 size = 0
 53 |                             logging.info(f'Starting at postition {size}')
 54 |                             local_file.seek(size)
 55 | 
 56 |                             if remote_size == size:
 57 |                                 break
 58 |                             elif size > remote_size:
 59 |                                 pathlib.Path(dest).unlink()
 60 |                                 logging.info('local file larger than remote file, removing now')
 61 |                                 max_attempts +=1
 62 |                                 size = 0
 63 | 
 64 |                             async with client.download_stream(path, offset=size) as stream:
 65 |                                 async for block in stream.iter_by_block():
 66 |                                     await local_file.write(block)
 67 |                                     
 68 |             except aioftp.StatusCodeError as ftp_e:
 69 | 
 70 |                 max_attempts +=1
 71 |                 logging.info(f'Found aioftp error, trying another attempt')
 72 |                 if ftp_e.received_codes ==( '426',):
 73 |                     logging.info(f'Forced timeout error, trying another attempt')
 74 | 
 75 |                 if ftp_e.received_codes != ( '426',):
 76 |                     logging.info('new code')
 77 |                 await asyncio.sleep(1)
 78 |                 continue
 79 | 
 80 |             except asyncio.exceptions.TimeoutError as asy_e:
 81 |                 logging.info(f'found time out exception')
 82 |                 max_attempts +=1
 83 |                 continue
 84 | 
 85 | 
 86 | 
 87 | async def list_files(model, var, rcp ):
 88 |     """
 89 |     Function to recursively search FTP for desired files.
 90 |     Args:
 91 |         model (str): CMIP5 GCM Model name
 92 |         var (str): Variable to download ['pr','tasmax','tasmin','DTR']
 93 |         rcp (str): rcp name ['rcp45', 'rcp85']
 94 |     Returns:
 95 |         paths (list(pathlib.PurePath)): List of paths found to dowload
 96 |     """
 97 |   
 98 |     async with aioftp.Client.context('192.12.137.7', user='anonymous', port=21) as client:
 99 | 
100 |         await client.change_directory(f'pub/dcp/archive/cmip5/loca/LOCA_2016-04-02/{model}')#/archive/cmip5/loca/LOCA_2016-04-02/
101 |         _path = await client.get_current_directory()
102 |         
103 |         paths = []
104 | 
105 |         async for path, info in  client.list(_path, recursive=True):
106 |   
107 |             if rcp in path.name and '16th' in path.name and var in path.name and path.suffix =='.nc':
108 |                 paths.append(path)
109 | 
110 | 
111 | 
112 |     return paths
113 |     
114 | async def main( model, var, rcp, out_dir):
115 | 
116 |     paths = await list_files(model, var, rcp)
117 | 
118 |     pprint(paths)
119 |     
120 |     await dowload_files(paths, pathlib.PurePath(out_dir))
121 | 
122 | if __name__ == '__main__':
123 | 
124 |     logging.basicConfig(
125 |         level=logging.INFO,
126 |         format="%(asctime)s [%(name)s] %(message)s",
127 |         datefmt="[%H:%M:%S]:",
128 |     )
129 |     
130 |     model ='ACCESS1-3'
131 |     print(model)
132 | 
133 |     for rcp in ['rcp45', 'rcp85']:
134 |         for var in ['pr','tasmin','tasmax']:
135 | 
136 |             out_dir = f'output/{model}/{rcp}/{var}'
137 | 
138 |             os.makedirs(out_dir, exist_ok=True)
139 | 
140 |             loop = asyncio.get_event_loop()
141 |             loop.set_debug(True)
142 |             loop.run_until_complete(main(model, var, rcp, out_dir))


--------------------------------------------------------------------------------
/async_ftp_snodas.py:
--------------------------------------------------------------------------------
  1 | # Tested on python 3.9
  2 | import pandas as pd
  3 | import asyncio
  4 | import aioftp
  5 | import os
  6 | import pathlib
  7 | import aiofiles
  8 | import logging
  9 | import pathlib
 10 | import aiofiles.os as aos
 11 | import async_timeout
 12 | import sys
 13 | from pprint import pprint
 14 | 
 15 | async def dowload_files(paths, dest_dir):
 16 |     """
 17 |     Function to dowload files from ftp server.
 18 |     Lots of error catching to ensure complete file
 19 |     download.
 20 | 
 21 |     Args:
 22 |         paths (list(pathlib.PurePath)): List of server paths to download
 23 |         dest_dir (pathlib.PurePath): Output Directory
 24 |     """    
 25 |     for path in paths:
 26 |         remote_size = None
 27 |         dest = dest_dir.joinpath(path.name)
 28 |         logging.info(f'File to download: {path} \n')
 29 |         logging.info(f'Should download to {dest} \n')
 30 |         max_attempts = 0
 31 | 
 32 |         while max_attempts<30:
 33 |             
 34 |             try:
 35 |                 logging.info(f'Starting Attempt {max_attempts}')
 36 |                 async with async_timeout.timeout(20):
 37 |                     async with aioftp.Client.context('128.138.135.20', user='anonymous', port=21) as client:
 38 |                         
 39 |                         if remote_size is None:
 40 |                             logging.info(f'Getting remote stats for file {path}')
 41 |                             remote_stat = await client.stat(path)
 42 |                             remote_size = int(remote_stat['size'])
 43 |                             logging.info(f' Remote file has size {remote_size}')
 44 |                         
 45 |                         async with aiofiles.open(dest, mode='ab', ) as local_file:
 46 |                             
 47 |                             #Check to see if local_file exists
 48 |                             if await aos.path.exists(dest):
 49 |                                 stat = await aos.stat(dest)
 50 |                                 size = stat.st_size
 51 |                             else:
 52 |                                 size = 0
 53 |                             logging.info(f'Starting at postition {size}')
 54 |                             local_file.seek(size)
 55 | 
 56 |                             if remote_size == size:
 57 |                                 break
 58 |                             elif size > remote_size:
 59 |                                 pathlib.Path(dest).unlink()
 60 |                                 logging.info('local file larger than remote file, removing now')
 61 |                                 max_attempts +=1
 62 |                                 size = 0
 63 | 
 64 |                             async with client.download_stream(path, offset=size) as stream:
 65 |                                 async for block in stream.iter_by_block():
 66 |                                     await local_file.write(block)
 67 |                                     
 68 |             except aioftp.StatusCodeError as ftp_e:
 69 | 
 70 |                 max_attempts +=1
 71 |                 logging.info(f'Found aioftp error, trying another attempt')
 72 |                 if ftp_e.received_codes ==( '426',):
 73 |                     logging.info(f'Forced timeout error, trying another attempt')
 74 | 
 75 |                 if ftp_e.received_codes != ( '426',):
 76 |                     logging.info('new code')
 77 |                 await asyncio.sleep(1)
 78 |                 continue
 79 | 
 80 |             except asyncio.exceptions.TimeoutError as asy_e:
 81 |                 logging.info(f'found time out exception')
 82 |                 max_attempts +=1
 83 |                 continue
 84 | 
 85 | 
 86 | 
 87 | async def list_files(month_year ):
 88 |     """
 89 |     Function to recursively search FTP for desired files.
 90 |     Args:
 91 |         month_year (list(str))): list of dates of interest (yyyymmdd)
 92 |     Returns:
 93 |         paths (list(pathlib.PurePath)): List of paths found to dowload
 94 |     """
 95 |   
 96 |     async with aioftp.Client.context('128.138.135.20', user='anonymous', port=21) as client:
 97 | 
 98 |         await client.change_directory(f'DATASETS/NOAA/G02158/masked/')
 99 |         _path = await client.get_current_directory()
100 |         
101 |         paths = []
102 | 
103 |         async for path, info in  client.list(_path, recursive=True):
104 |   
105 |             if "SNODAS" in path.name and path.suffix =='.tar' and path.name.split('_')[1][:-4] in month_year:
106 |                 paths.append(path)
107 | 
108 | 
109 | 
110 |     return paths
111 |     
112 | async def main( month_year, out_dir):
113 | 
114 |     paths = await list_files(month_year)
115 | 
116 |     pprint(paths)
117 |     
118 |     await dowload_files(paths, pathlib.PurePath(out_dir))
119 | 
120 | if __name__ == '__main__':
121 | 
122 |     logging.basicConfig(
123 |         level=logging.INFO,
124 |         format="%(asctime)s [%(name)s] %(message)s",
125 |         datefmt="[%H:%M:%S]:",
126 |     )
127 | 
128 |     start_date = '2008-10-01'
129 |     end_date = '2010-10-01'
130 |     drange = pd.date_range(start_date, end_date, freq ='D') 
131 | 
132 |     #list of dates to download
133 |     month_year = [f'{i.strftime("%Y%m%d")}' for i in drange if i.month not in [8,9]]
134 | 
135 |     out_dir = f'output/SNODAS'
136 | 
137 |     os.makedirs(out_dir, exist_ok=True)
138 | 
139 |     loop = asyncio.get_event_loop()
140 |     loop.set_debug(True)
141 |     loop.run_until_complete(main(month_year, out_dir))


--------------------------------------------------------------------------------
/livneh_historical_psl.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import logging
 3 | from datetime import datetime
 4 | import asyncio
 5 | import aiohttp
 6 | import async_timeout
 7 | import nest_asyncio
 8 | nest_asyncio.apply()
 9 | 
10 | async def download_coroutine(url, session, destination):
11 | 
12 |     async with async_timeout.timeout(1200):
13 |         async with session.get(url) as response:
14 |             if response.status == 200:
15 |                 fp = destination + os.sep + os.path.basename(url)
16 |                 with open(fp, 'wb') as f_handle:
17 |                     while True:
18 |                         chunk = await response.content.read(1024)
19 |                         if not chunk:
20 |                             break
21 |                         f_handle.write(chunk)
22 |             else:
23 |                 print(url)
24 |             return await response.release()
25 |     
26 | 
27 | async def main(loop, tmp, destination):
28 |     session_timeout = aiohttp.ClientTimeout(total=None)
29 | 
30 |     async with aiohttp.ClientSession(timeout=session_timeout) as session:
31 |         tasks = [download_coroutine(url, session, destination) for url in tmp]
32 |         return await asyncio.gather(*tasks)
33 | 
34 | 
35 | if __name__ == '__main__':
36 |     logging.basicConfig(
37 |         level=logging.INFO,
38 |         format="%(asctime)s [%(name)s] %(message)s",
39 |         datefmt="[%H:%M:%S]:",
40 |     )
41 |     
42 |     start = datetime(1972, 1, 1, 0, 0)
43 |     end = datetime(2018, 1,1, 0, 0)
44 |     yearIncrement = 1
45 |     outRoot = r"C:\workspace\prospectHmsAdvanced\output"
46 | 
47 |     variables = ['tmin','tmax','prec']
48 | 
49 |     for variable in variables:
50 |         destination = rf'{outRoot}\{variable}'
51 |         os.makedirs(destination, exist_ok=True)
52 | 
53 |         #loop through and see if you already have the file locally
54 |         date = start
55 |         urls = []
56 |         opath = []
57 |         while date < end:
58 |             url = f"https://psl.noaa.gov/thredds/fileServer/Datasets/livneh/metvars/{variable}.{date.year}.nc"
59 | 
60 |             filename = url.split("/")[-1]
61 |         # if not os.path.isfile(destination + os.sep + filename):
62 |             urls.append(url)
63 |             opath.append(destination + os.sep + filename)
64 |             date = datetime(date.year + yearIncrement, date.month, date.day)
65 | 
66 |         #Split urls into chunks so you wont overwhelm server
67 |         chunk_size = 3
68 |         chunked_urls = [urls[i * chunk_size:(i + 1) * chunk_size] for i in range((len(urls) + chunk_size - 1) // chunk_size )]
69 | 
70 |         for tmp in chunked_urls:
71 |             loop = asyncio.get_event_loop()
72 |             loop.set_debug(True)
73 |             results = loop.run_until_complete(main(loop, tmp, destination))
74 |             del loop, results
75 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HydrologicEngineeringCenter/data-retrieval-scripts/f9b38563470b33c6c1e0f4798a7141c7e039ddc4/requirements.txt


--------------------------------------------------------------------------------
/retrieve_hrrr_subhourly.py:
--------------------------------------------------------------------------------
 1 | import urllib.request
 2 | from datetime import datetime
 3 | import os
 4 | 
 5 | date = datetime.today().strftime('%Y%m%d')
 6 | cycle = 0
 7 | 
 8 | for hour in range(1, 19):
 9 |     url = "http://nomads.ncep.noaa.gov/pub/data/nccf/com/hrrr/prod/hrrr.{date}/conus/hrrr.t{:02d}z.wrfsubhf{:02d}.grib2".format(cycle, hour, date=date)
10 |     print(url)
11 | 
12 |     filename = url.split("/")[-1]
13 |     destination = "C:/Temp/hrrr/" + date + os.sep + "{:02d}".format(cycle)
14 |     if not os.path.isdir(os.path.split(destination)[0]):
15 |         os.mkdir(os.path.split(destination)[0])
16 |     if not os.path.isdir(destination):
17 |         os.mkdir(destination)
18 |     f = open(destination + os.sep + filename, 'wb')
19 |     f.write(urllib.request.urlopen(url).read())
20 | 


--------------------------------------------------------------------------------
/retrieve_hrrr_surface_hourly.py:
--------------------------------------------------------------------------------
 1 | import urllib.request
 2 | from datetime import datetime
 3 | import os
 4 | 
 5 | date = datetime.today().strftime('%Y%m%d')
 6 | cycle = 0
 7 | 
 8 | for hour in range(1, 37):
 9 |     url = "http://nomads.ncep.noaa.gov/pub/data/nccf/com/hrrr/prod/hrrr.{date}/conus/hrrr.t{:02d}z.wrfsfcf{:02d}.grib2".format(
10 |         cycle, hour, date=date)
11 |     print(url)
12 | 
13 |     filename = url.split("/")[-1]
14 | 
15 |     if not os.path.isdir("C:/Temp/hrrr/" + date + os.sep):
16 |         os.mkdir("C:/Temp/hrrr/" + date + os.sep)
17 |     f = open("C:/Temp/hrrr/" + date + os.sep + filename, 'wb')
18 |     f.write(urllib.request.urlopen(url).read())
19 | 


--------------------------------------------------------------------------------
/retrieve_qpe_gagecorr_01h.py:
--------------------------------------------------------------------------------
 1 | import urllib.request
 2 | from urllib.request import HTTPError
 3 | from datetime import datetime
 4 | from datetime import timedelta
 5 | import os
 6 | 
 7 | start = datetime(2018, 9, 1, 0, 0)
 8 | end = datetime(2018, 10, 1, 0, 0)
 9 | hour = timedelta(hours=1)
10 | 
11 | missing_dates = []
12 | fallback_to_radaronly = True #Enables a post-processing step that will go through the list of missing dates for gage-corrected
13 | ############################# and tries to go get the radar-only values if they exist.
14 | 
15 | destination = "C:/Temp/qpe"
16 | 
17 | date = start
18 | 
19 | while date <= end:
20 |     url = "http://mtarchive.geol.iastate.edu/{:04d}/{:02d}/{:02d}/mrms/ncep/GaugeCorr_QPE_01H/GaugeCorr_QPE_01H_00.00_{:04d}{:02d}{:02d}-{:02d}0000.grib2.gz".format(
21 |         date.year, date.month, date.day, date.year, date.month, date.day, date.hour)
22 |     filename = url.split("/")[-1]
23 |     try:
24 |         fetched_request = urllib.request.urlopen(url)
25 |     except HTTPError as e:
26 |         missing_dates.append(date)
27 |     else:
28 |         with open(destination + os.sep + filename, 'wb') as f:
29 |             f.write(fetched_request.read())
30 |     finally:
31 |         date += hour
32 | 
33 | if fallback_to_radaronly:
34 |     radar_also_missing = []
35 |     for date in missing_dates:
36 |         url = "http://mtarchive.geol.iastate.edu/{:04d}/{:02d}/{:02d}/mrms/ncep/RadarOnly_QPE_01H/RadarOnly_QPE_01H_00.00_{:04d}{:02d}{:02d}-{:02d}0000.grib2.gz".format(
37 |             date.year, date.month, date.day, date.year, date.month, date.day, date.hour)
38 |         filename = url.split("/")[-1]
39 |         try:
40 |             fetched_request = urllib.request.urlopen(url)
41 |         except HTTPError as e:
42 |             radar_also_missing.append(date)
43 |         else:
44 |             with open(destination + os.sep + filename, 'wb') as f:
45 |                 f.write(fetched_request.read())
46 | 


--------------------------------------------------------------------------------
/retrieve_rtma_temperature.py:
--------------------------------------------------------------------------------
 1 | import urllib.request
 2 | from urllib.request import HTTPError
 3 | from datetime import datetime
 4 | from datetime import timedelta
 5 | import os
 6 | 
 7 | start = datetime(2017, 1, 1, 0, 0)
 8 | end = datetime(2017, 1, 3, 0, 0)
 9 | hour = timedelta(hours=1)
10 | 
11 | missing_dates = []
12 | 
13 | destination = "C:/Temp"
14 | 
15 | date = start
16 | 
17 | while date < end:
18 |     url = "http://mtarchive.geol.iastate.edu/{:04d}/{:02d}/{:02d}/grib2/ncep/RTMA/{:04d}{:02d}{:02d}{:02d}00_TMPK.grib2".format(
19 |         date.year, date.month, date.day, date.year, date.month, date.day, date.hour)
20 |     filename = url.split("/")[-1]
21 |     try:
22 |         fetched_request = urllib.request.urlopen(url)
23 |         print("")
24 |         print("opening: " + url)
25 |     except HTTPError as e:
26 |         missing_dates.append(date)
27 |     else:
28 |         with open(destination + os.sep + filename, 'wb') as f:
29 |             f.write(fetched_request.read())
30 |     finally:
31 |         date += hour


--------------------------------------------------------------------------------