├── .gitignore
├── LICENSE
├── README.md
├── main.py
└── utils.py
/.gitignore:
--------------------------------------------------------------------------------
1 | .vscode
2 | __pycache__
3 | .DS_Store
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2020 Muhamad Reza Pahlevi and Agung Baruna SN
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # PyWRF-Automation
2 | Python automation script to download the Global Forecast System (GFS) data from NOMADS NOAA with spatial resolution 0.250 and execute Weather Research & Forecasting (WRF) model.
3 |
4 | ## Prerequisites
5 | To using this script, you must complete following prerequisites such as:
6 | 1. Linux/Unix distribution
7 | 2. Python 3.7+
8 | 3. MPI Package (OpenMPI/Intel MPI/MPICH)
9 | 4. WRF-ARW Model with `em_real` case using `dmpar` compiler selection.
10 |
11 | This repository includes two script, which is `main.py` as an executable script and `utils.py` as a collection of function that will be used by `main.py` script.
12 |
13 | ## How to use
14 | 1. Open `main.py` file and change the values from line 10 to 31:
15 |
16 | ```python
17 | # Folder path
18 | base_dir = "/home/your_username/wrf_model"
19 | gfs_dir = f"{base_dir}/gfs_dataset" # Path to GFS dataset folder
20 | wps_dir = f"{base_dir}/wps" # Path to compiled WPS folder
21 | wrf_dir = f"{base_dir}/wrf/test/em_real" # Path to compiled WRF em_real folder
22 | wrfout_dir = f"{base_dir}/wrf_output" # Path to wrfout folder
23 | namelist_wps_file = f"{wps_dir}/namelist.wps" # Path to namelist.wps file
24 | namelist_wrf_file = f"{wrf_dir}/namelist.input" # Path to namelist.input file
25 |
26 | # GFS Downloader parameters
27 | gfs_num_workers = 4 # Number of workers will be assigned to download gfs concurrently
28 | gfs_download_increment = 3 # set to 1 if you want to download gfs dataset for every forecast hours
29 | gfs_left_lon = 110 # -180 to 180
30 | gfs_right_lon = 115 # -180 to 180
31 | gfs_top_lat = -2 # -90 to 90
32 | gfs_bottom_lat = -5 # -90 to 90
33 |
34 | # WPS and WRF parameters
35 | forecast_duration = 1 # length of simulation days
36 | max_dom = 3 # Maximum WPS and WRF domain
37 | num_proc = 4 # number of CPU cores will be used to execute real.exe & wrf.exe
38 | wrfout_saved_domain = 3 # which wrfout file will be saved
39 | ```
40 |
41 | > Note: in order to download GFS dataset for every n hours (eg: every 3 hours), set `gfs_download_increment` to 3.
42 | 2. Export libraries path `LD_LIBRARY_PATH` that will be used by wps.exe and wrf.exe.
43 | 3. Execute the program by typing `python main.py`
44 |
45 | ## Credit
46 | Copyright (c) 2020-present Reza Pahlevi and Agung Baruna Setiawan Noor.
--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
1 | """
2 | WRF-ARW Model & GFS Automation System using Python 3
3 | Credit : Muhamad Reza Pahlevi (@elpahlevi) & Agung Baruna Setiawan Noor (@agungbaruna)
4 | If you find any trouble, reach the author via email : mr.levipahlevi@gmail.com
5 | """
6 |
7 | from datetime import datetime, timedelta
8 | import utils, time
9 |
10 | # Folder path
11 | base_dir = "/home/your_username/wrf_model"
12 | gfs_dir = f"{base_dir}/gfs_dataset" # Path to GFS dataset folder
13 | wps_dir = f"{base_dir}/wps" # Path to compiled WPS folder
14 | wrf_dir = f"{base_dir}/wrf/test/em_real" # Path to compiled WRF em_real folder
15 | wrfout_dir = f"{base_dir}/wrf_output" # Path to wrfout folder
16 | namelist_wps_file = f"{wps_dir}/namelist.wps" # Path to namelist.wps file
17 | namelist_wrf_file = f"{wrf_dir}/namelist.input" # Path to namelist.input file
18 |
19 | # GFS Downloader parameters
20 | gfs_num_workers = 4 # Number of workers will be assigned to download gfs concurrently
21 | gfs_download_increment = 3 # set to 1 if you want to download gfs dataset for every forecast hours
22 | gfs_left_lon = 110 # -180 to 180
23 | gfs_right_lon = 115 # -180 to 180
24 | gfs_top_lat = -2 # -90 to 90
25 | gfs_bottom_lat = -5 # -90 to 90
26 |
27 | # WPS and WRF parameters
28 | forecast_duration = 1 # length of simulation days
29 | max_dom = 3 # Maximum WPS and WRF domain
30 | num_proc = 4 # number of CPU cores will be used to execute real.exe & wrf.exe
31 | wrfout_saved_domain = 3 # which wrfout file will be saved
32 |
33 | # Time parameters
34 | start_time = time.time()
35 | current_date = datetime.today()
36 | start_date = (current_date - timedelta(days = 1)).replace(hour = 0, minute = 0, second = 0)
37 | end_date = start_date + timedelta(days = forecast_duration)
38 | forecast_time = int((end_date - start_date).total_seconds() / 3600) + 6
39 |
40 | # Automation Sequences
41 | for cycle_time in ["00", "06", "12", "18"]:
42 | utils.download_gfs(gfs_dir, gfs_num_workers, start_date, forecast_time, gfs_download_increment, cycle_time, gfs_left_lon, gfs_right_lon, gfs_top_lat, gfs_bottom_lat)
43 | utils.run_wps(wps_dir, gfs_dir, namelist_wps_file, max_dom, start_date, end_date)
44 | utils.run_wrf(wps_dir, wrf_dir, wrfout_dir, namelist_wrf_file, forecast_duration, max_dom, start_date, end_date, num_proc, wrfout_saved_domain)
45 | utils.calculate_execution_time(start_time, time.time())
--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
1 | from datetime import datetime, timedelta
2 | from concurrent.futures import ThreadPoolExecutor
3 | from pathlib import Path
4 | import os, sys, re, subprocess, requests, time, logging
5 |
6 | # For logging purposes
7 | logging.basicConfig(format='%(asctime)s - %(message)s', level=logging.INFO)
8 |
9 | # Setup download worker
10 | def gfs_download_worker(data):
11 | if not os.path.exists(data[1]):
12 | start_time = time.time()
13 | response = requests.get(data[0])
14 | with open(data[1], "wb") as f:
15 | f.write(response.content)
16 | end_time = time.time()
17 | logging.info(f"INFO: GFS Downloader - {Path(data[1]).name} has been downloaded in {int(end_time - start_time)} seconds")
18 | else:
19 | logging.info(f"INFO: GFS Downlaoder - File {Path(data[1]).name} is already exist, skipped")
20 |
21 | # Function to download GFS dataset concurrently
22 | def download_gfs(path: str, n_worker: int, start_date: datetime, forecast_time: int, increment: int, cycle_time: str, left_lon: float, right_lon: float, top_lat: float, bottom_lat: float):
23 | if forecast_time > 384:
24 | sys.exit("ERROR: GFS Downloader - Forecast time can't be more than 384")
25 |
26 | folder_path = f"{path}/{start_date.strftime('%Y-%m-%d')}"
27 | base_url = "https://nomads.ncep.noaa.gov/cgi-bin"
28 | year = str(start_date.year)
29 | month = str("%02d" % (start_date.month))
30 | day = str("%02d" % (start_date.day))
31 |
32 | if not(os.path.isdir(folder_path)):
33 | os.makedirs(folder_path)
34 | logging.info(f"INFO: GFS Downloader - Dataset will be saved in {folder_path}")
35 |
36 | list_url = [f"{base_url}/filter_gfs_0p25.pl?file=gfs.t{cycle_time}z.pgrb2.0p25.f{'%03d' % hour}&all_lev=on&all_var=on&subregion=&leftlon={str(left_lon)}&rightlon={str(right_lon)}&toplat={str(top_lat)}&bottomlat={str(bottom_lat)}&dir=%2Fgfs.{year}{month}{day}%2F{cycle_time}%2Fatmos" for hour in range(0, forecast_time + 1, increment)]
37 | list_filepath = [f"{folder_path}/gfs_4_{year}{month}{day}_{cycle_time}00_{'%03d' % hour}.grb2" for hour in range(0, forecast_time + 1, increment)]
38 |
39 | with ThreadPoolExecutor(max_workers = n_worker) as executor:
40 | executor.map(gfs_download_worker, zip(list_url, list_filepath))
41 | logging.info(f"INFO: GFS Downloader - Dataset with cycle time {cycle_time} has been downloaded")
42 |
43 | # Function to execute WPS sequences
44 | def run_wps(wps_path: str, gfs_path: str, namelist_wps_path: str, max_dom: int, start_date: datetime, end_date: datetime, opts = None):
45 | wps_params = {
46 | "max_dom": str(max_dom),
47 | "start_date": start_date.strftime("%Y-%m-%d_%H:%M:%S"),
48 | "end_date": end_date.strftime("%Y-%m-%d_%H:%M:%S")
49 | }
50 |
51 | if opts:
52 | wps_params.update(opts)
53 |
54 | for key in ["parent_id", "parent_grid_ratio", "i_parent_start", "j_parent_start", "e_we", "e_sn"]:
55 | value = wps_params.get(key)
56 | if value != None and len(value.split(",")) != max_dom:
57 | sys.exit(f"Error: WPS - length of {key} value mismatched to max_dom parameter")
58 |
59 | with open(namelist_wps_path, "r") as file:
60 | lines = file.readlines()
61 | for i, line in enumerate(lines):
62 | for variable, value in wps_params.items():
63 | matched = re.search(rf"{variable}\s*=\s*[^,]+,", line)
64 | if matched:
65 | index_of_equal_sign = line.find("=")
66 |
67 | if variable in ["wrf_core", "map_proj", "geog_data_path", "out_format", "prefix", "fg_name"]:
68 | lines[i] = f"{line[:index_of_equal_sign + 1]} '{value}',\n"
69 | continue
70 |
71 | if variable in ["start_date", "end_date", "geog_data_res"]:
72 | formatted = f"'{value}',"
73 | lines[i] = f"{line[:index_of_equal_sign + 1]} {formatted * max_dom}\n"
74 | continue
75 |
76 | lines[i] = f"{line[:index_of_equal_sign + 1]} {str(value)},\n"
77 |
78 | with open(namelist_wps_path, "w") as file:
79 | file.writelines(lines)
80 |
81 | logging.info(f"INFO: WPS - Configuration file updated")
82 |
83 | # Delete FILE* and met_em* files from previous run
84 | subprocess.run([f"rm {wps_path}/FILE*"],shell=True)
85 | subprocess.run([f"rm {wps_path}/PFILE*"], shell=True)
86 | subprocess.run([f"rm {wps_path}/met_em*"], shell=True)
87 | subprocess.run([f"rm {wps_path}/GRIBFILE*"], shell=True)
88 | subprocess.run([f"rm {wps_path}/geo_em*"], shell=True)
89 |
90 | # Execute geogrid.exe
91 | subprocess.run("./geogrid.exe", cwd=wps_path)
92 | logging.info("INFO: WPS - geogrid.exe completed")
93 |
94 | # Create a link to GFS dataset
95 | subprocess.run(["./link_grib.csh", f"{gfs_path}/{start_date.strftime('%Y-%m-%d')}/*"], cwd=wps_path)
96 | logging.info("INFO: WPS - GFS dataset linked successfully")
97 |
98 | # Create a symlink to GFS Variable Table
99 | if os.path.exists(f"{wps_path}/Vtable"):
100 | logging.info("INFO: WPS - Vtable.GFS is already linked")
101 | else:
102 | subprocess.run(["ln", "-sf" ,f"{wps_path}/ungrib/Variable_Tables/Vtable.GFS", "Vtable"], cwd=wps_path)
103 | logging.info("INFO: WPS - Symlink of Vtable.GFS created")
104 |
105 | # Execute ungrib.exe
106 | subprocess.run("./ungrib.exe", cwd=wps_path)
107 | logging.info("INFO: WPS - ungrib.exe completed")
108 |
109 | # Execute metgrid.exe
110 | subprocess.run("./metgrid.exe", cwd=wps_path)
111 | logging.info("INFO: WPS - metgrid.exe completed")
112 |
113 | logging.info("INFO: WPS - Process completed. met_em files is ready")
114 |
115 | # Function to execute WRF model
116 | def run_wrf(wps_path: str, wrf_path: str, wrfout_path: str, namelist_input_path: str, run_days: int, max_dom: int, start_date: datetime, end_date: datetime, num_proc: int, wrfout_saved_domain: int, opts = None):
117 | wrf_params = {
118 | "run_days": str(run_days),
119 | "start_year": str(start_date.year),
120 | "start_month": "%02d" % start_date.month,
121 | "start_day": "%02d" % start_date.day,
122 | "start_hour": "%02d" % start_date.hour,
123 | "end_year": str(end_date.year),
124 | "end_month": "%02d" % end_date.month,
125 | "end_day": "%02d" % end_date.day,
126 | "end_hour": "%02d" % end_date.hour,
127 | "max_dom": str(max_dom)
128 | }
129 | if opts:
130 | wrf_params.update(opts)
131 |
132 | for key in ["e_we", "e_sn", "e_vert", "dx", "dy", "grid_id", "parent_id", "i_parent_start", "j_parent_start", "parent_grid_ratio", "parent_time_step_ratio"]:
133 | value = wrf_params.get(key)
134 | if value != None and len(value.split(",")) != max_dom:
135 | sys.exit(f"Error: WRF Model - length of {key} value mismatched to max_dom parameter")
136 |
137 | if wrfout_saved_domain > max_dom:
138 | sys.exit(f"Error: WRF Model - Maximum saved WRF output file domain must be equal or lower to max_domain parameter")
139 |
140 | with open(namelist_input_path, "r") as file:
141 | lines = file.readlines()
142 |
143 | for i, line in enumerate(lines):
144 | for variable, value in wrf_params.items():
145 | matched = re.search(rf"{variable}\s*=\s*[^,]+,", line)
146 | if matched:
147 | index_of_equal_sign = line.find("=")
148 |
149 | # Change time_control parameter
150 | if variable in ["start_year", "start_month", "start_day", "start_hour", "end_year", "end_month", "end_day", "end_hour"]:
151 | lines[i] = f"{line[:index_of_equal_sign + 1]} {((value + ', ') * max_dom)}\n"
152 | continue
153 |
154 | lines[i] = f"{line[:index_of_equal_sign + 1]} {value},\n"
155 |
156 | with open(namelist_input_path, "w") as file:
157 | file.writelines(lines)
158 |
159 | logging.info("INFO: WRF Model - Configuration file updated")
160 | logging.info(f"INFO: WRF Model - Model will take a simulation from {start_date.strftime('%Y-%m-%d_%H:%M:%S')} to {end_date.strftime('%Y-%m-%d_%H:%M:%S')}")
161 |
162 | # Delete unused files from previous run
163 | subprocess.run([f"rm {wrf_path}/met_em*"], shell=True)
164 | subprocess.run([f"rm {wrf_path}/wrfout*"], shell=True)
165 | subprocess.run([f"rm {wrf_path}/wrfrst*"], shell=True)
166 |
167 | # Create a new symlink to all metgrid files from WPS folder
168 | subprocess.run([f"ln -sf {wps_path}/met_em* ."], shell=True, cwd=wrf_path)
169 | logging.info("INFO: WRF Model - met_em* files has been linked")
170 |
171 | # Execute real.exe
172 | subprocess.run([f"mpirun -np {num_proc} ./real.exe"], shell=True, cwd=wrf_path)
173 | logging.info("INFO: WRF Model - real.exe executed")
174 |
175 | # Check the output from real.exe before execute wrf.exe
176 | rsl_error = subprocess.check_output(["tail --lines 1 rsl.error.0000"], shell=True, cwd=wrf_path)
177 | if re.search("SUCCESS COMPLETE REAL_EM INIT", str(rsl_error)):
178 | # Execute wrf.exe
179 | subprocess.run([f"mpirun -np {num_proc} ./wrf.exe"], shell=True, cwd=wrf_path)
180 | logging.info("INFO: WRF Model - Simulation completed")
181 | else:
182 | sys.exit("Error: WRF Model - Check namelist.input configuration")
183 |
184 | # Move output to assigned location
185 | wrfout_folder_path = f"{wrfout_path}/{start_date.strftime('%Y-%m-%d')}"
186 | if not(os.path.isdir(wrfout_folder_path)):
187 | os.makedirs(wrfout_folder_path)
188 |
189 | subprocess.run([f"mv {wrf_path}/wrfout_d0{wrfout_saved_domain}* {wrfout_folder_path}/wrfout_d0{wrfout_saved_domain}_{start_date.strftime('%Y-%m-%d')}.nc"], shell=True, cwd=wrf_path)
190 | logging.info(f"INFO: WRF Model - Simulation files on domain {wrfout_saved_domain} has been saved to {wrfout_folder_path}")
191 |
192 | # Calculate execution time
193 | def calculate_execution_time(start: float, stop: float):
194 | if stop - start < 60:
195 | execution_duration = ("%1d" % (stop - start))
196 | logging.info(f"INFO: Automation - Process completed in {execution_duration} seconds")
197 | sys.exit(0)
198 | elif stop - start < 3600:
199 | execution_duration = ("%1d" % ((stop - start) / 60))
200 | logging.info(f"INFO: Automation - Process completed in {execution_duration} minutes")
201 | sys.exit(0)
202 | else:
203 | execution_duration = ("%1d" % ((stop - start) / 3600))
204 | logging.info(f"INFO: Automation - Process complete in {execution_duration} hours")
205 | sys.exit(0)
--------------------------------------------------------------------------------