├── .gitignore ├── LICENSE ├── README.md ├── main.py └── utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | .vscode 2 | __pycache__ 3 | .DS_Store -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Muhamad Reza Pahlevi and Agung Baruna SN 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PyWRF-Automation 2 | Python automation script to download the Global Forecast System (GFS) data from NOMADS NOAA with spatial resolution 0.250 and execute Weather Research & Forecasting (WRF) model. 3 | 4 | ## Prerequisites 5 | To using this script, you must complete following prerequisites such as: 6 | 1. Linux/Unix distribution 7 | 2. Python 3.7+ 8 | 3. MPI Package (OpenMPI/Intel MPI/MPICH) 9 | 4. WRF-ARW Model with `em_real` case using `dmpar` compiler selection. 10 | 11 | This repository includes two script, which is `main.py` as an executable script and `utils.py` as a collection of function that will be used by `main.py` script. 12 | 13 | ## How to use 14 | 1. Open `main.py` file and change the values from line 10 to 31: 15 | 16 | ```python 17 | # Folder path 18 | base_dir = "/home/your_username/wrf_model" 19 | gfs_dir = f"{base_dir}/gfs_dataset" # Path to GFS dataset folder 20 | wps_dir = f"{base_dir}/wps" # Path to compiled WPS folder 21 | wrf_dir = f"{base_dir}/wrf/test/em_real" # Path to compiled WRF em_real folder 22 | wrfout_dir = f"{base_dir}/wrf_output" # Path to wrfout folder 23 | namelist_wps_file = f"{wps_dir}/namelist.wps" # Path to namelist.wps file 24 | namelist_wrf_file = f"{wrf_dir}/namelist.input" # Path to namelist.input file 25 | 26 | # GFS Downloader parameters 27 | gfs_num_workers = 4 # Number of workers will be assigned to download gfs concurrently 28 | gfs_download_increment = 3 # set to 1 if you want to download gfs dataset for every forecast hours 29 | gfs_left_lon = 110 # -180 to 180 30 | gfs_right_lon = 115 # -180 to 180 31 | gfs_top_lat = -2 # -90 to 90 32 | gfs_bottom_lat = -5 # -90 to 90 33 | 34 | # WPS and WRF parameters 35 | forecast_duration = 1 # length of simulation days 36 | max_dom = 3 # Maximum WPS and WRF domain 37 | num_proc = 4 # number of CPU cores will be used to execute real.exe & wrf.exe 38 | wrfout_saved_domain = 3 # which wrfout file will be saved 39 | ``` 40 | 41 | > Note: in order to download GFS dataset for every n hours (eg: every 3 hours), set `gfs_download_increment` to 3. 42 | 2. Export libraries path `LD_LIBRARY_PATH` that will be used by wps.exe and wrf.exe. 43 | 3. Execute the program by typing `python main.py` 44 | 45 | ## Credit 46 | Copyright (c) 2020-present Reza Pahlevi and Agung Baruna Setiawan Noor. -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | """ 2 | WRF-ARW Model & GFS Automation System using Python 3 3 | Credit : Muhamad Reza Pahlevi (@elpahlevi) & Agung Baruna Setiawan Noor (@agungbaruna) 4 | If you find any trouble, reach the author via email : mr.levipahlevi@gmail.com 5 | """ 6 | 7 | from datetime import datetime, timedelta 8 | import utils, time 9 | 10 | # Folder path 11 | base_dir = "/home/your_username/wrf_model" 12 | gfs_dir = f"{base_dir}/gfs_dataset" # Path to GFS dataset folder 13 | wps_dir = f"{base_dir}/wps" # Path to compiled WPS folder 14 | wrf_dir = f"{base_dir}/wrf/test/em_real" # Path to compiled WRF em_real folder 15 | wrfout_dir = f"{base_dir}/wrf_output" # Path to wrfout folder 16 | namelist_wps_file = f"{wps_dir}/namelist.wps" # Path to namelist.wps file 17 | namelist_wrf_file = f"{wrf_dir}/namelist.input" # Path to namelist.input file 18 | 19 | # GFS Downloader parameters 20 | gfs_num_workers = 4 # Number of workers will be assigned to download gfs concurrently 21 | gfs_download_increment = 3 # set to 1 if you want to download gfs dataset for every forecast hours 22 | gfs_left_lon = 110 # -180 to 180 23 | gfs_right_lon = 115 # -180 to 180 24 | gfs_top_lat = -2 # -90 to 90 25 | gfs_bottom_lat = -5 # -90 to 90 26 | 27 | # WPS and WRF parameters 28 | forecast_duration = 1 # length of simulation days 29 | max_dom = 3 # Maximum WPS and WRF domain 30 | num_proc = 4 # number of CPU cores will be used to execute real.exe & wrf.exe 31 | wrfout_saved_domain = 3 # which wrfout file will be saved 32 | 33 | # Time parameters 34 | start_time = time.time() 35 | current_date = datetime.today() 36 | start_date = (current_date - timedelta(days = 1)).replace(hour = 0, minute = 0, second = 0) 37 | end_date = start_date + timedelta(days = forecast_duration) 38 | forecast_time = int((end_date - start_date).total_seconds() / 3600) + 6 39 | 40 | # Automation Sequences 41 | for cycle_time in ["00", "06", "12", "18"]: 42 | utils.download_gfs(gfs_dir, gfs_num_workers, start_date, forecast_time, gfs_download_increment, cycle_time, gfs_left_lon, gfs_right_lon, gfs_top_lat, gfs_bottom_lat) 43 | utils.run_wps(wps_dir, gfs_dir, namelist_wps_file, max_dom, start_date, end_date) 44 | utils.run_wrf(wps_dir, wrf_dir, wrfout_dir, namelist_wrf_file, forecast_duration, max_dom, start_date, end_date, num_proc, wrfout_saved_domain) 45 | utils.calculate_execution_time(start_time, time.time()) -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime, timedelta 2 | from concurrent.futures import ThreadPoolExecutor 3 | from pathlib import Path 4 | import os, sys, re, subprocess, requests, time, logging 5 | 6 | # For logging purposes 7 | logging.basicConfig(format='%(asctime)s - %(message)s', level=logging.INFO) 8 | 9 | # Setup download worker 10 | def gfs_download_worker(data): 11 | if not os.path.exists(data[1]): 12 | start_time = time.time() 13 | response = requests.get(data[0]) 14 | with open(data[1], "wb") as f: 15 | f.write(response.content) 16 | end_time = time.time() 17 | logging.info(f"INFO: GFS Downloader - {Path(data[1]).name} has been downloaded in {int(end_time - start_time)} seconds") 18 | else: 19 | logging.info(f"INFO: GFS Downlaoder - File {Path(data[1]).name} is already exist, skipped") 20 | 21 | # Function to download GFS dataset concurrently 22 | def download_gfs(path: str, n_worker: int, start_date: datetime, forecast_time: int, increment: int, cycle_time: str, left_lon: float, right_lon: float, top_lat: float, bottom_lat: float): 23 | if forecast_time > 384: 24 | sys.exit("ERROR: GFS Downloader - Forecast time can't be more than 384") 25 | 26 | folder_path = f"{path}/{start_date.strftime('%Y-%m-%d')}" 27 | base_url = "https://nomads.ncep.noaa.gov/cgi-bin" 28 | year = str(start_date.year) 29 | month = str("%02d" % (start_date.month)) 30 | day = str("%02d" % (start_date.day)) 31 | 32 | if not(os.path.isdir(folder_path)): 33 | os.makedirs(folder_path) 34 | logging.info(f"INFO: GFS Downloader - Dataset will be saved in {folder_path}") 35 | 36 | list_url = [f"{base_url}/filter_gfs_0p25.pl?file=gfs.t{cycle_time}z.pgrb2.0p25.f{'%03d' % hour}&all_lev=on&all_var=on&subregion=&leftlon={str(left_lon)}&rightlon={str(right_lon)}&toplat={str(top_lat)}&bottomlat={str(bottom_lat)}&dir=%2Fgfs.{year}{month}{day}%2F{cycle_time}%2Fatmos" for hour in range(0, forecast_time + 1, increment)] 37 | list_filepath = [f"{folder_path}/gfs_4_{year}{month}{day}_{cycle_time}00_{'%03d' % hour}.grb2" for hour in range(0, forecast_time + 1, increment)] 38 | 39 | with ThreadPoolExecutor(max_workers = n_worker) as executor: 40 | executor.map(gfs_download_worker, zip(list_url, list_filepath)) 41 | logging.info(f"INFO: GFS Downloader - Dataset with cycle time {cycle_time} has been downloaded") 42 | 43 | # Function to execute WPS sequences 44 | def run_wps(wps_path: str, gfs_path: str, namelist_wps_path: str, max_dom: int, start_date: datetime, end_date: datetime, opts = None): 45 | wps_params = { 46 | "max_dom": str(max_dom), 47 | "start_date": start_date.strftime("%Y-%m-%d_%H:%M:%S"), 48 | "end_date": end_date.strftime("%Y-%m-%d_%H:%M:%S") 49 | } 50 | 51 | if opts: 52 | wps_params.update(opts) 53 | 54 | for key in ["parent_id", "parent_grid_ratio", "i_parent_start", "j_parent_start", "e_we", "e_sn"]: 55 | value = wps_params.get(key) 56 | if value != None and len(value.split(",")) != max_dom: 57 | sys.exit(f"Error: WPS - length of {key} value mismatched to max_dom parameter") 58 | 59 | with open(namelist_wps_path, "r") as file: 60 | lines = file.readlines() 61 | for i, line in enumerate(lines): 62 | for variable, value in wps_params.items(): 63 | matched = re.search(rf"{variable}\s*=\s*[^,]+,", line) 64 | if matched: 65 | index_of_equal_sign = line.find("=") 66 | 67 | if variable in ["wrf_core", "map_proj", "geog_data_path", "out_format", "prefix", "fg_name"]: 68 | lines[i] = f"{line[:index_of_equal_sign + 1]} '{value}',\n" 69 | continue 70 | 71 | if variable in ["start_date", "end_date", "geog_data_res"]: 72 | formatted = f"'{value}'," 73 | lines[i] = f"{line[:index_of_equal_sign + 1]} {formatted * max_dom}\n" 74 | continue 75 | 76 | lines[i] = f"{line[:index_of_equal_sign + 1]} {str(value)},\n" 77 | 78 | with open(namelist_wps_path, "w") as file: 79 | file.writelines(lines) 80 | 81 | logging.info(f"INFO: WPS - Configuration file updated") 82 | 83 | # Delete FILE* and met_em* files from previous run 84 | subprocess.run([f"rm {wps_path}/FILE*"],shell=True) 85 | subprocess.run([f"rm {wps_path}/PFILE*"], shell=True) 86 | subprocess.run([f"rm {wps_path}/met_em*"], shell=True) 87 | subprocess.run([f"rm {wps_path}/GRIBFILE*"], shell=True) 88 | subprocess.run([f"rm {wps_path}/geo_em*"], shell=True) 89 | 90 | # Execute geogrid.exe 91 | subprocess.run("./geogrid.exe", cwd=wps_path) 92 | logging.info("INFO: WPS - geogrid.exe completed") 93 | 94 | # Create a link to GFS dataset 95 | subprocess.run(["./link_grib.csh", f"{gfs_path}/{start_date.strftime('%Y-%m-%d')}/*"], cwd=wps_path) 96 | logging.info("INFO: WPS - GFS dataset linked successfully") 97 | 98 | # Create a symlink to GFS Variable Table 99 | if os.path.exists(f"{wps_path}/Vtable"): 100 | logging.info("INFO: WPS - Vtable.GFS is already linked") 101 | else: 102 | subprocess.run(["ln", "-sf" ,f"{wps_path}/ungrib/Variable_Tables/Vtable.GFS", "Vtable"], cwd=wps_path) 103 | logging.info("INFO: WPS - Symlink of Vtable.GFS created") 104 | 105 | # Execute ungrib.exe 106 | subprocess.run("./ungrib.exe", cwd=wps_path) 107 | logging.info("INFO: WPS - ungrib.exe completed") 108 | 109 | # Execute metgrid.exe 110 | subprocess.run("./metgrid.exe", cwd=wps_path) 111 | logging.info("INFO: WPS - metgrid.exe completed") 112 | 113 | logging.info("INFO: WPS - Process completed. met_em files is ready") 114 | 115 | # Function to execute WRF model 116 | def run_wrf(wps_path: str, wrf_path: str, wrfout_path: str, namelist_input_path: str, run_days: int, max_dom: int, start_date: datetime, end_date: datetime, num_proc: int, wrfout_saved_domain: int, opts = None): 117 | wrf_params = { 118 | "run_days": str(run_days), 119 | "start_year": str(start_date.year), 120 | "start_month": "%02d" % start_date.month, 121 | "start_day": "%02d" % start_date.day, 122 | "start_hour": "%02d" % start_date.hour, 123 | "end_year": str(end_date.year), 124 | "end_month": "%02d" % end_date.month, 125 | "end_day": "%02d" % end_date.day, 126 | "end_hour": "%02d" % end_date.hour, 127 | "max_dom": str(max_dom) 128 | } 129 | if opts: 130 | wrf_params.update(opts) 131 | 132 | for key in ["e_we", "e_sn", "e_vert", "dx", "dy", "grid_id", "parent_id", "i_parent_start", "j_parent_start", "parent_grid_ratio", "parent_time_step_ratio"]: 133 | value = wrf_params.get(key) 134 | if value != None and len(value.split(",")) != max_dom: 135 | sys.exit(f"Error: WRF Model - length of {key} value mismatched to max_dom parameter") 136 | 137 | if wrfout_saved_domain > max_dom: 138 | sys.exit(f"Error: WRF Model - Maximum saved WRF output file domain must be equal or lower to max_domain parameter") 139 | 140 | with open(namelist_input_path, "r") as file: 141 | lines = file.readlines() 142 | 143 | for i, line in enumerate(lines): 144 | for variable, value in wrf_params.items(): 145 | matched = re.search(rf"{variable}\s*=\s*[^,]+,", line) 146 | if matched: 147 | index_of_equal_sign = line.find("=") 148 | 149 | # Change time_control parameter 150 | if variable in ["start_year", "start_month", "start_day", "start_hour", "end_year", "end_month", "end_day", "end_hour"]: 151 | lines[i] = f"{line[:index_of_equal_sign + 1]} {((value + ', ') * max_dom)}\n" 152 | continue 153 | 154 | lines[i] = f"{line[:index_of_equal_sign + 1]} {value},\n" 155 | 156 | with open(namelist_input_path, "w") as file: 157 | file.writelines(lines) 158 | 159 | logging.info("INFO: WRF Model - Configuration file updated") 160 | logging.info(f"INFO: WRF Model - Model will take a simulation from {start_date.strftime('%Y-%m-%d_%H:%M:%S')} to {end_date.strftime('%Y-%m-%d_%H:%M:%S')}") 161 | 162 | # Delete unused files from previous run 163 | subprocess.run([f"rm {wrf_path}/met_em*"], shell=True) 164 | subprocess.run([f"rm {wrf_path}/wrfout*"], shell=True) 165 | subprocess.run([f"rm {wrf_path}/wrfrst*"], shell=True) 166 | 167 | # Create a new symlink to all metgrid files from WPS folder 168 | subprocess.run([f"ln -sf {wps_path}/met_em* ."], shell=True, cwd=wrf_path) 169 | logging.info("INFO: WRF Model - met_em* files has been linked") 170 | 171 | # Execute real.exe 172 | subprocess.run([f"mpirun -np {num_proc} ./real.exe"], shell=True, cwd=wrf_path) 173 | logging.info("INFO: WRF Model - real.exe executed") 174 | 175 | # Check the output from real.exe before execute wrf.exe 176 | rsl_error = subprocess.check_output(["tail --lines 1 rsl.error.0000"], shell=True, cwd=wrf_path) 177 | if re.search("SUCCESS COMPLETE REAL_EM INIT", str(rsl_error)): 178 | # Execute wrf.exe 179 | subprocess.run([f"mpirun -np {num_proc} ./wrf.exe"], shell=True, cwd=wrf_path) 180 | logging.info("INFO: WRF Model - Simulation completed") 181 | else: 182 | sys.exit("Error: WRF Model - Check namelist.input configuration") 183 | 184 | # Move output to assigned location 185 | wrfout_folder_path = f"{wrfout_path}/{start_date.strftime('%Y-%m-%d')}" 186 | if not(os.path.isdir(wrfout_folder_path)): 187 | os.makedirs(wrfout_folder_path) 188 | 189 | subprocess.run([f"mv {wrf_path}/wrfout_d0{wrfout_saved_domain}* {wrfout_folder_path}/wrfout_d0{wrfout_saved_domain}_{start_date.strftime('%Y-%m-%d')}.nc"], shell=True, cwd=wrf_path) 190 | logging.info(f"INFO: WRF Model - Simulation files on domain {wrfout_saved_domain} has been saved to {wrfout_folder_path}") 191 | 192 | # Calculate execution time 193 | def calculate_execution_time(start: float, stop: float): 194 | if stop - start < 60: 195 | execution_duration = ("%1d" % (stop - start)) 196 | logging.info(f"INFO: Automation - Process completed in {execution_duration} seconds") 197 | sys.exit(0) 198 | elif stop - start < 3600: 199 | execution_duration = ("%1d" % ((stop - start) / 60)) 200 | logging.info(f"INFO: Automation - Process completed in {execution_duration} minutes") 201 | sys.exit(0) 202 | else: 203 | execution_duration = ("%1d" % ((stop - start) / 3600)) 204 | logging.info(f"INFO: Automation - Process complete in {execution_duration} hours") 205 | sys.exit(0) --------------------------------------------------------------------------------