├── .idea
├── .gitignore
├── PyRain.iml
├── inspectionProfiles
│ └── profiles_settings.xml
├── misc.xml
├── modules.xml
└── vcs.xml
├── README.md
├── analysis
├── climatology
│ └── climatology.py
├── precip_estimation
│ ├── __init__.py
│ ├── erasim_sample.py
│ ├── erasim_sample_unbalanced.py
│ ├── lightgbm_reg.py
│ ├── make_histograms.py
│ ├── normalise
│ │ ├── 5625__00-06-01_12:00to17-12-31_11:00.json
│ │ ├── 5625__04-01-01_12:00to17-12-31_11:00.json
│ │ ├── 5625__16-04-01_12:00to17-12-31_11:00.json
│ │ ├── 5625__18-01-06_12:00to18-12-31_11:00.json
│ │ ├── 5625__19-01-06_12:00to19-12-31_11:00.json
│ │ ├── 5625__79-01-01_07:00to17-12-31_11:00.json
│ │ └── __init__.py
│ ├── pred_conf_matrix.py
│ ├── sample.py
│ ├── sample_unbalanced.py
│ ├── sim_sample_balanced.py
│ └── sim_sample_unbalanced.py
├── precip_histogram
│ ├── __init__.py
│ ├── hist.py
│ ├── plot.py
│ ├── plot_classhist.py
│ └── results
│ │ ├── era140625.json
│ │ ├── era5625.json
│ │ ├── imerg140625.json
│ │ ├── imerg5625.json
│ │ └── imerg_25bi.json
└── variable_correlations
│ ├── __init__.py
│ ├── corr.py
│ ├── out.json
│ └── plot.py
├── config.yml
├── run_benchmark.py
└── src
├── __init__.py
├── benchmark
├── __init__.py
├── advanced_normalisation.py
├── baseline_data.py
├── collect_data.py
├── graphics.py
├── metrics.py
├── models.py
├── normalisations
│ └── normalisations_sample_datasets.dill
├── normalise.py
├── plot_outputs.py
└── utils.py
├── convert
├── convert_era5625_aaai.py
├── convert_era5625_aaai_sample.py
├── convert_imerg5625.py
├── convert_imerg5625_sample.py
├── convert_simsat5625.py
├── convert_simsat5625_sample.py
└── test_samples.py
└── dataloader
├── __init__.py
└── memmap_dataloader.py
/.idea/.gitignore:
--------------------------------------------------------------------------------
1 | # Default ignored files
2 | /workspace.xml
3 |
--------------------------------------------------------------------------------
/.idea/PyRain.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
--------------------------------------------------------------------------------
/.idea/inspectionProfiles/profiles_settings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Update: Data now publicly available!
2 |
3 | We are very happy to announce that the memmap datasets are now available publicly at:
4 | https://console.cloud.google.com/storage/browser/aaai_release
5 | You require an ordinary Google account to access them.
6 |
7 | The data comes in two different resolutions, `5.625` degrees, and `1.40625` degrees.
8 | To see what variables (and timeranges) are contained in each dataset, simply download the associated .dill file and read out as follows (python3):
9 |
10 | ```import dill
11 | import pprint
12 | with open("path-to-dill-file", "rb") as f:
13 | info = dill.load(f)
14 | pprint.pprint(info)
15 | ```
16 |
17 | Please let us know if you have any questions/issues - for technical issues please use the github issues.
18 | Many thanks, and we hope you will find RainBench useful!
19 |
20 | # RainBench - Getting Started
21 |
22 | ## Downloading the Dataset
23 | Please register [here](https://forms.gle/3AdMJsKtuJ8M1E1Y8) to download the RainBench dataset.
24 |
25 | After downloading, you should update the data paths in config.yml.
26 |
27 | ## Forecasting Precipitation from ERA
28 | Specify `source` as {'simsat', 'simsat_era', 'era16_3'} to use data (*from 2016*) from Simsat alone, ERA5 alone, or both Simsat and ERA5, respectively.
29 |
30 | To use all data available in ERA5 for training (*from 1971*), set `source` as 'era'.
31 |
32 | Set `inc_time` to concatenate inputs with hour, day, month.
33 |
34 | For example, to train, run
35 |
36 | ```
37 | python3 run_benchmark.py --sources simsat_era --inc_time --config_file config.yml
38 | ```
39 |
40 | ## Forecasting Precipitation from IMERG
41 | Again, specify `source` as {'simsat', 'simsat_era', 'era16_3'} to use data (*from 2016*) from Simsat alone, ERA5 alone, or both Simsat and ERA5, respectively.
42 |
43 | To use all data available in ERA5 for training (*from 2000*), set `source` as 'era'.
44 |
45 | For predicting IMERG precipitation, we found empirically that removing the relu function at the end of the ConvLSTM works better.
46 |
47 | Set `inc_time` to concatenate inputs with hour, day, month.
48 |
49 | ```
50 | python3 run_benchmark.py --sources simsat_era --no_relu --imerg --inc_time --config_file config.yml
51 | ```
52 |
53 | ## Evaluating trained models
54 |
55 | To evaluate trained models on the test set, run the following.
56 |
57 | ```
58 | python3 run_benchmark.py --test --phase test --load {MODEL_PATH}
59 | ```
60 |
61 |
62 | # Visualizing Predictions
63 |
64 | To visualize the predictions, run the following.
65 |
66 | ```
67 | python3 -m src.benchmark.plot_outputs --load {MODEL_PATH} --nc_file {ANY_NC_FILE_PATH}
68 | ```
69 |
70 | Example predictions for a randome test date (12 July 2019) is shown below:
71 |
72 | ### Truth
73 | 
74 |
75 | ### Simsat
76 | 
77 |
78 | ### ERA
79 | 
80 |
81 | ### Simsat & ERA
82 | 
83 |
84 | # Advanced Topics
85 |
86 | ## Going to higher spatial resolution
87 |
88 | RainBench contains memmap datasets at two different spatial resolutions: 5.625 degrees, and 1.46025 degrees.
89 | Fortunately, the NetCDF-->Memmap conversion scripts for 5.625 degrees that come with RainBench can be easily adjusted to NetCDF datasets at higher - or native - resolution. The main change needing to be done is to adjust the pixel width and height of the different variable channels. As the conversion scripts use use multiprocessing in order to saturate I/O during dataset conversion, even very high resolution datasets can be fine-grainedly converted to Memmaps.
90 |
91 | ## Generating normalisation files
92 | Under `src/benchmark/normalise.py`, you can generate your own normalisation files to be used for on-the-fly normalisation of training data. Simply insert your own sample configuration and partitioning setup into the section marked and run the file using python3. This will generate a pickled `dill` file, which contains a dictionary with normalisation entries (and indeed, packaged functions) for each variable field across each partition. Partition of type `repeat` are expressly supported. Just as data conversion, normalisation supports multiprocessing (and out-of-core computations), meaning even datasets at large resolutions can be handled. It is also easy to add new normalisation routines in the fields provided (also have a look at `src/benchmark/transforms.py` for patch-wise local normalisation techniques).
93 |
94 |
--------------------------------------------------------------------------------
/analysis/climatology/climatology.py:
--------------------------------------------------------------------------------
1 | from datetime import datetime, timedelta
2 | import numpy as np
3 | import os
4 | import pickle
5 | import sys
6 | import torch as th
7 |
8 | sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
9 | from src.dataloader.memmap_dataloader import Dataset
10 | from src.benchmark.utils import compute_latitude_weighting, compute_weighted_mse
11 |
12 | if __name__ == "__main__":
13 |
14 | memmap_root = "SET_THIS" # SET MEMMAP DATA ROOT PATH HERE
15 | memmap_root2 = "SET_THIS"
16 | datapath = [os.path.join(memmap_root, "imerg_5625", "imerg_5625.dill"),
17 | os.path.join(memmap_root2, "era5625_mf", "era5625_mf.dill"),
18 | ]
19 |
20 | daterange_imerg = (datetime(2000, 6, 1,0), datetime(2017, 12, 31, 23))
21 | daterange_era = (datetime(1979, 1, 1, 7), datetime(2017, 12, 31, 23))
22 |
23 | daterange_val = (datetime(2018,1,6,0), datetime(2018, 12,31,23))
24 |
25 | partition_conf = {"era":
26 | {"timerange": (daterange_era[0].timestamp(), daterange_era[1].timestamp()),
27 | "increment_s": 60 * 60},
28 | "imerg":
29 | {"timerange": (daterange_imerg[0].timestamp(), daterange_imerg[1].timestamp()),
30 | "increment_s": 60 * 60},
31 | "val":
32 | {"timerange": (daterange_val[0].timestamp(), daterange_val[1].timestamp()),
33 | "increment_s": 60 * 60}
34 | }
35 |
36 | partition_type = "range"
37 | sample_conf_era = {"m0": {"era":{"tp_era": {"vbl": "era5625/tp"},"lat2d": {"vbl":"era5625/lat2d"}}}} # sample modes
38 | sample_conf_imerg = {"m0": {"era":{"imerg": {"vbl": "imerg5625/precipitationcal"}}}} # sample modes
39 |
40 | dataset_era = Dataset(datapath=datapath,
41 | partition_conf=partition_conf,
42 | partition_type=partition_type,
43 | partition_selected="era",
44 | sample_conf=sample_conf_era,
45 | )
46 |
47 | grid = dataset_era["era5625/lat2d"]
48 | lat_grid = compute_latitude_weighting(grid)
49 |
50 | era_dict = {}
51 | era_dict_ctr = {}
52 |
53 | # calculate weekly climatology for ERA5
54 | for i, d in enumerate(dataset_era[(daterange_era[0].timestamp(), daterange_era[1].timestamp(), 3600),["era5625/tp"], {}]):
55 | t = daterange_era[0] + timedelta(seconds=i*3600)
56 | week = t.isocalendar()[1]
57 | if week in era_dict_ctr:
58 | era_dict_ctr[week] += 1
59 | else:
60 | era_dict_ctr[week] = 1
61 | if week in era_dict:
62 | era_dict[week] += (np.array(d) - era_dict[week]) / float(era_dict_ctr[week])
63 | else:
64 | era_dict[week] = np.array(d)
65 | pass
66 | print(sorted(era_dict.keys()))
67 |
68 | era_annual_climatology = dataset_era[(daterange_era[0].timestamp(), daterange_era[1].timestamp(), 3600),["era5625/tp"], {}].mean(axis=0)
69 | del dataset_era
70 |
71 | with open("era_climatology.pickle", "wb") as f:
72 | pickle.dump(era_dict, f)
73 |
74 | dataset_imerg = Dataset(datapath=datapath,
75 | partition_conf=partition_conf,
76 | partition_type=partition_type,
77 | partition_selected="imerg",
78 | sample_conf=sample_conf_imerg,
79 | )
80 |
81 | imerg_dict = {}
82 | imerg_dict_ctr = {}
83 | # calculate weekly climatology for ERA5
84 | for i, d in enumerate(dataset_imerg[ (daterange_imerg[0].timestamp(), daterange_imerg[1].timestamp(), 3600) , ["imerg5625/precipitationcal"], {}]):
85 | t = daterange_imerg[0] + timedelta(seconds=i*3600)
86 | week = t.isocalendar()[1]
87 | if week in imerg_dict_ctr:
88 | imerg_dict_ctr[week] += 1
89 | else:
90 | imerg_dict_ctr[week] = 1
91 | if week in imerg_dict:
92 | imerg_dict[week] += (np.array(d) - imerg_dict[week]) / float(imerg_dict_ctr[week])
93 | else:
94 | imerg_dict[week] = np.array(d)
95 | pass
96 | print(sorted(imerg_dict.keys()))
97 |
98 | imerg_annual_climatology = dataset_imerg[ (daterange_imerg[0].timestamp(), daterange_imerg[1].timestamp(), 3600) , ["imerg5625/precipitationcal"], {}].mean(axis=0)
99 | del dataset_imerg
100 | with open("imerg_climatology.pickle", "wb") as f:
101 | pickle.dump(era_dict, f)
102 |
103 | ########################## Predict ERA
104 | dataset_era = Dataset(datapath=datapath,
105 | partition_conf=partition_conf,
106 | partition_type=partition_type,
107 | partition_selected="val",
108 | sample_conf=sample_conf_era,
109 | )
110 | re = 0
111 | for i, d in enumerate(dataset_era[(daterange_val[0].timestamp(), daterange_val[1].timestamp(), 3600), ["era5625/tp"], {}]):
112 | t = daterange_val[0] + timedelta(seconds=i*3600)
113 | week = t.isocalendar()[1]
114 | rms_error = compute_weighted_mse(th.from_numpy(d)*1000, th.from_numpy(era_dict[week])*1000, th.from_numpy(lat_grid))
115 | rms_error = rms_error**0.5
116 | re += (rms_error - re) / float(i+1)
117 | pass
118 |
119 | print("ERA WEEKLY RMS:", re)
120 |
121 | re = 0
122 | for i, d in enumerate(dataset_era[(daterange_val[0].timestamp(), daterange_val[1].timestamp(), 3600), ["era5625/tp"], {}]):
123 | rms_error = compute_weighted_mse(th.from_numpy(d)*1000, th.from_numpy(era_annual_climatology)*1000, th.from_numpy(lat_grid))
124 | rms_error = rms_error**0.5
125 | re += (rms_error - re) / float(i+1)
126 | pass
127 | del dataset_era
128 |
129 | print("ERA ANNUAL RMS:", re)
130 |
131 | ########################## Predict IMERG
132 | dataset_imerg = Dataset(datapath=datapath,
133 | partition_conf=partition_conf,
134 | partition_type=partition_type,
135 | partition_selected="val",
136 | sample_conf=sample_conf_era,
137 | )
138 |
139 | re = 0
140 | for i, d in enumerate(dataset_imerg[(daterange_val[0].timestamp(), daterange_val[1].timestamp(), 3600), ["imerg5625/precipitationcal"], {}]):
141 | t = daterange_val[0] + timedelta(seconds=i * 3600)
142 | week = t.isocalendar()[1]
143 | rms_error = compute_weighted_mse(th.from_numpy(d), th.from_numpy(imerg_dict[week]), th.from_numpy(lat_grid))
144 | rms_error = rms_error**0.5
145 | re += (rms_error - re) / float(i+1)
146 | pass
147 |
148 | print("IMERG WEEKLY RMS:", re)
149 |
150 | re = 0
151 | for i, d in enumerate(dataset_imerg[(daterange_val[0].timestamp(), daterange_val[1].timestamp(), 3600), ["imerg5625/precipitationcal"], {}]):
152 | rms_error = compute_weighted_mse(th.from_numpy(d), th.from_numpy(imerg_annual_climatology), th.from_numpy(lat_grid))
153 | rms_error = rms_error**0.5
154 | re += (rms_error - re) / float(i+1)
155 | pass
156 | del dataset_imerg
157 |
158 | print("IMERG ANNUAL RMS:", re)
159 |
160 |
161 |
--------------------------------------------------------------------------------
/analysis/precip_estimation/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FrontierDevelopmentLab/PyRain/a52e4fd7984dcabb6d908a565a4e7c6bc820d62f/analysis/precip_estimation/__init__.py
--------------------------------------------------------------------------------
/analysis/precip_estimation/lightgbm_reg.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from collections import Counter
3 | import pandas as pd
4 | import lightgbm as lgb
5 | import json
6 | from sklearn.datasets import load_breast_cancer,load_boston,load_wine
7 | from sklearn.model_selection import train_test_split
8 | from sklearn.preprocessing import StandardScaler
9 | from sklearn.ensemble import GradientBoostingClassifier
10 | from sklearn.metrics import mean_squared_error,roc_auc_score,precision_score
11 |
12 | ##### DEFINE TAG
13 | tag = "sim_bal"
14 | ######
15 |
16 | with open("./normalise/5625__16-04-01_12:00to17-12-31_11:00.json") as f:
17 | nl_train = json.load(f)
18 |
19 | import json
20 |
21 | train_path = "./sim_samples_bal_train.json"
22 | test_path = "./sim_samples_unb_test.json"
23 | val_path = "./sim_samples_unb_val.json"
24 |
25 | with open(train_path, "r") as f:
26 | train = json.load(f)
27 | y_train = np.concatenate([np.array(t[1]) for t in train]) # use 2 for classification
28 | X_train = np.stack([t[0] for t in train])
29 |
30 | with open(test_path, "r") as f:
31 | test = json.load(f)
32 | y_test = np.concatenate([np.array(t[1]) for t in test])
33 | X_test = np.stack([t[0] for t in test])
34 |
35 | with open(val_path, "r") as f:
36 | val = json.load(f)
37 | y_val = np.concatenate([np.array(t[1]) for t in val])
38 | X_val = np.stack([t[0] for t in val])
39 | y_val_lst = [[],[],[],[]]
40 | X_val_lst = [[],[],[],[]]
41 | for c in range(4):
42 | y_val_lst[c] = np.concatenate([np.array(t[1]) for t in val if t[2][0]==c])
43 | X_val_lst[c] = np.stack(t[0] for t in val if t[2][0]==c)
44 |
45 | hyper_params = {
46 | 'task': 'train',
47 | 'boosting_type': 'gbdt',
48 | 'objective': 'regression',
49 | 'metric': ['rmse'],
50 | 'learning_rate': 0.005,
51 | 'feature_fraction': 0.9,
52 | 'bagging_fraction': 0.7,
53 | 'bagging_freq': 10,
54 | 'verbose': 0,
55 | "max_depth": 8,
56 | "num_leaves": 128,
57 | "max_bin": 512,
58 | "num_iterations": 100000,
59 | "n_estimators": 1000
60 | }
61 |
62 | # train
63 | print("Setting up regressor...")
64 | gbm = lgb.LGBMRegressor(**hyper_params)
65 |
66 | print("Setting up fit... {},{} -- {},{}".format(X_train.shape, y_train.shape, X_test.shape, y_test.shape))
67 | gbm.fit(X_train, y_train,
68 | eval_set=[(X_test, y_test)],
69 | eval_metric='RMSE',
70 | early_stopping_rounds=1000)
71 |
72 | undo_tp_train = lambda x: (np.exp(x) - 1) * nl_train["imerg5625/precipitationcal"]["std"]
73 | undo_tp_test = lambda x: (np.exp(x) - 1) * nl_train["imerg5625/precipitationcal"]["std"]
74 | undo_tp_val = lambda x: (np.exp(x) - 1) * nl_train["imerg5625/precipitationcal"]["std"]
75 |
76 | y_pred = gbm.predict(X_train, num_iteration=gbm.best_iteration_)
77 | rmse_train = mean_squared_error(undo_tp_train(y_pred), undo_tp_train(y_train)) ** 0.5
78 | rmse_train_log = mean_squared_error(y_pred, y_train) ** 0.5
79 | print('The rmse of train is:', rmse_train)
80 |
81 | y_pred = gbm.predict(X_test, num_iteration=gbm.best_iteration_)
82 | rmse_test = mean_squared_error(undo_tp_test(y_pred), undo_tp_test(y_test)) ** 0.5
83 | rmse_test_log = mean_squared_error(y_pred, y_test) ** 0.5
84 | print('The test of test is:', rmse_test)
85 |
86 | y_pred = gbm.predict(X_val, num_iteration=gbm.best_iteration_)
87 | rmse_val = mean_squared_error(undo_tp_val(y_pred), undo_tp_val(y_val)) ** 0.5
88 | rmse_val_log = mean_squared_error(y_pred, y_val) ** 0.5
89 | print('The test of val is:', rmse_val)
90 |
91 | rmse_valc_lst = []
92 | rmse_valc_log_lst = []
93 | for c in range(4):
94 | y_pred = gbm.predict(X_val_lst[c], num_iteration=gbm.best_iteration_)
95 | rmse_valc = mean_squared_error(undo_tp_val(y_pred), undo_tp_val(y_val_lst[c])) ** 0.5
96 | rmse_valc_log = mean_squared_error(y_pred, y_val_lst[c]) ** 0.5
97 | print('The test of val-{} is:'.format(c), rmse_valc)
98 | rmse_valc_lst.append(rmse_valc)
99 | rmse_valc_log_lst.append(rmse_valc_log)
100 |
101 | # Finished
102 | print("Finished!")
103 | res = {"rmse_train": rmse_train,
104 | "rmse_test": rmse_test,
105 | "rmse_val": rmse_val,
106 | "rmse_train_log": rmse_train_log,
107 | "rmse_test_log": rmse_test_log,
108 | "rmse_val_log": rmse_val_log,
109 | "rmse_valc": rmse_valc_lst,
110 | "rmse_valc_log": rmse_valc_log_lst}
111 |
112 |
113 | print("RES: ", res)
114 |
115 | with open("{}.json".format(tag), "w") as f:
116 | json.dump(res, f)
117 |
118 | gbm.save_model('{}.txt'.format(tag), num_iteration=model.best_iteration)
119 |
120 |
--------------------------------------------------------------------------------
/analysis/precip_estimation/make_histograms.py:
--------------------------------------------------------------------------------
1 | from datetime import datetime
2 | import numpy as np
3 | import os, sys
4 | import json
5 | from scipy import stats
6 | from multiprocessing import Pool, TimeoutError
7 | from functools import partial
8 |
9 | sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
10 | from dataloader.memmap_dataloader import Dataset
11 |
12 |
13 | if __name__ == "__main__":
14 |
15 | # set up dataloader with any dataset type you can think of
16 | memmap_root = "" # SET MEMMAP DATA ROOT PATH HERE
17 | datapath = [os.path.join(memmap_root, "simsat5625", "simsat5625.dill"),
18 | os.path.join(memmap_root, "imerg5625", "imerg5625.dill"),
19 | os.path.join(memmap_root, "era5625", "era5625.dill"),
20 | ]
21 |
22 | daterange_train = (datetime(2016, 4, 1).timestamp(), datetime(2017, 12, 31, 23).timestamp())
23 | daterange_test = (datetime(2019, 1, 6, 0).timestamp(), datetime(2019, 12, 31, 23).timestamp())
24 | daterange_val = (datetime(2018, 1, 6, 0).timestamp(), datetime(2018, 12, 31, 23).timestamp())
25 |
26 | partition_conf = {"train":
27 | {"timerange": daterange_train,
28 | "increment_s": 60 * 60},
29 | "val":
30 | {"timerange": daterange_val,
31 | "increment_s": 60 * 60},
32 | "test":
33 | {"timerange": daterange_test,
34 | "increment_s": 60 * 60}}
35 |
36 | partition_type = "range"
37 |
38 | sample_conf = {"mode0": # sample modes
39 | {
40 | "sample": # sample sections
41 | {
42 | "lsm": {"vbl": "era140625/lsm"},
43 | },
44 | }
45 | }
46 |
47 | dr = (datetime(2016, 4, 1).timestamp(), datetime(2019, 12, 31, 21).timestamp())
48 |
49 | part = "test"
50 | # read in every imerg frame and create a rain class histogram for each and save in a file in the end
51 | def get_histograms(args):
52 | dataset_indices, i = args
53 | print ("Starting process {} indices at iteration {}...".format(len(dataset_indices), i))
54 |
55 | dataset = Dataset(datapath=datapath,
56 | partition_conf=partition_conf,
57 | partition_type=partition_type,
58 | partition_selected=part,
59 | sample_conf=sample_conf,
60 | )
61 |
62 | res = []
63 | def seg_rain_imerg(frame):
64 | c0 = np.count_nonzero( (frame >= 0.0) & (frame < 2.5))
65 | c1 = np.count_nonzero((frame >= 2.5) & (frame < 10.0))
66 | c2 = np.count_nonzero((frame >= 10.0) & (frame < 50.0))
67 | c3 = np.count_nonzero((frame >= 50.0) & (frame < 500000.0))
68 | return c0, c1, c2, c3
69 |
70 | for data_idx in dataset_indices:
71 | data = dataset.dataset[((*partition_conf[part]["timerange"], 3600), ["imerg5625/precipitationcal"], {})][data_idx]
72 | segger = seg_rain_imerg(data)
73 | res.append(segger)
74 |
75 | return res
76 |
77 |
78 | dataset = Dataset(datapath=datapath,
79 | partition_conf=partition_conf,
80 | partition_type=partition_type,
81 | partition_selected="val",
82 | sample_conf=sample_conf,
83 | )
84 | num_idx_shp = dataset.dataset[((*partition_conf[part]["timerange"], 3600), ["imerg5625/precipitationcal"], {})].shape
85 | num_idx = num_idx_shp[0]
86 | print("Num idx: {}".format(num_idx))
87 | n_proc = 60
88 |
89 | idxs = np.array_split(np.array(list(range(num_idx))), n_proc)
90 | print("IDXS:", idxs)
91 | with Pool(processes=n_proc) as pool:
92 | res = pool.map(get_histograms, [(idxlst, i) for idxlst, i in zip(idxs, range(len(idxs)))])
93 |
94 | totres = []
95 | for r in res:
96 | totres += r
97 |
98 | with open("histo_{}.json".format(part), "w") as f:
99 | json.dump(totres, f)
100 |
--------------------------------------------------------------------------------
/analysis/precip_estimation/normalise/5625__00-06-01_12:00to17-12-31_11:00.json:
--------------------------------------------------------------------------------
1 | {
2 | "1plog::era5625/tp": {
3 | "mean": 0.00010117772035300732,
4 | "std": 0.00038926879642531276
5 | },
6 | "1plog::imerg5625/precipitationcal": {
7 | "mean": 0.047896530479192734,
8 | "std": 0.21623234450817108
9 | },
10 | "era5625/ciwc_300hPa": {
11 | "mean": 4.038937731820624e-06,
12 | "std": 1.9641447579488158e-05
13 | },
14 | "era5625/ciwc_500hPa": {
15 | "mean": 3.880424173985375e-06,
16 | "std": 1.3040525118412916e-05
17 | },
18 | "era5625/ciwc_850hPa": {
19 | "mean": 1.7265290352952434e-06,
20 | "std": 5.4465622270072345e-06
21 | },
22 | "era5625/clwc_300hPa": {
23 | "mean": 2.3328139420186744e-08,
24 | "std": 1.963040404007188e-06
25 | },
26 | "era5625/clwc_500hPa": {
27 | "mean": 3.470940100669395e-06,
28 | "std": 1.809844070521649e-05
29 | },
30 | "era5625/clwc_850hPa": {
31 | "mean": 1.7431688320357352e-05,
32 | "std": 4.5697161112912e-05
33 | },
34 | "era5625/lat2d": {
35 | "mean": 0.0,
36 | "std": 51.93614196777344
37 | },
38 | "era5625/lon2d": {
39 | "mean": 177.1875,
40 | "std": 103.91035461425781
41 | },
42 | "era5625/lsm": {
43 | "mean": 0.3370782732963562,
44 | "std": 0.459003746509552
45 | },
46 | "era5625/orography": {
47 | "mean": 379.4975891113281,
48 | "std": 859.8722534179688
49 | },
50 | "era5625/q_300hPa": {
51 | "mean": 0.00013051266432739794,
52 | "std": 0.00017297286831308156
53 | },
54 | "era5625/q_500hPa": {
55 | "mean": 0.0008699031313881278,
56 | "std": 0.0011031731264665723
57 | },
58 | "era5625/q_850hPa": {
59 | "mean": 0.004596292041242123,
60 | "std": 0.004117097705602646
61 | },
62 | "era5625/slt": {
63 | "mean": 0.6792043447494507,
64 | "std": 1.1688841581344604
65 | },
66 | "era5625/sp": {
67 | "mean": 96672.1328125,
68 | "std": 9646.748046875
69 | },
70 | "era5625/t2m": {
71 | "mean": 278.7690734863281,
72 | "std": 21.161643981933594
73 | },
74 | "era5625/t_300hPa": {
75 | "mean": 229.04635620117188,
76 | "std": 10.736078262329102
77 | },
78 | "era5625/t_500hPa": {
79 | "mean": 253.13241577148438,
80 | "std": 13.03946304321289
81 | },
82 | "era5625/t_850hPa": {
83 | "mean": 274.7433166503906,
84 | "std": 15.543451309204102
85 | },
86 | "era5625/tp": {
87 | "mean": 0.00010125964035978541,
88 | "std": 0.00039031924097798765
89 | },
90 | "era5625/z_300hPa": {
91 | "mean": 89486.6640625,
92 | "std": 5084.31396484375
93 | },
94 | "era5625/z_500hPa": {
95 | "mean": 54157.7734375,
96 | "std": 3348.290771484375
97 | },
98 | "era5625/z_850hPa": {
99 | "mean": 13764.474609375,
100 | "std": 1467.3331298828125
101 | },
102 | "imerg5625/precipitationcal": {
103 | "mean": 0.09633872658014297,
104 | "std": 0.7651622295379639
105 | },
106 | "simsat5625/clbt:0": {
107 | "mean": 236.9783477783203,
108 | "std": 7.888313293457031
109 | },
110 | "simsat5625/clbt:1": {
111 | "mean": 251.77040100097656,
112 | "std": 12.534334182739258
113 | },
114 | "simsat5625/clbt:2": {
115 | "mean": 268.89752197265625,
116 | "std": 22.21075439453125
117 | }
118 | }
--------------------------------------------------------------------------------
/analysis/precip_estimation/normalise/5625__04-01-01_12:00to17-12-31_11:00.json:
--------------------------------------------------------------------------------
1 | {
2 | "1plog::era5625/tp": {
3 | "mean": 0.00010132892930414528,
4 | "std": 0.000391952256904915
5 | },
6 | "1plog::imerg5625/precipitationcal": {
7 | "mean": 0.04824421554803848,
8 | "std": 0.21689410507678986
9 | },
10 | "era5625/ciwc_300hPa": {
11 | "mean": 4.024744157504756e-06,
12 | "std": 1.9663053535623476e-05
13 | },
14 | "era5625/ciwc_500hPa": {
15 | "mean": 3.8813036553619895e-06,
16 | "std": 1.3047414540778846e-05
17 | },
18 | "era5625/ciwc_850hPa": {
19 | "mean": 1.7356674106849823e-06,
20 | "std": 5.472066732181702e-06
21 | },
22 | "era5625/clwc_300hPa": {
23 | "mean": 2.4725551384108257e-08,
24 | "std": 2.0352265437395545e-06
25 | },
26 | "era5625/clwc_500hPa": {
27 | "mean": 3.4699708066909807e-06,
28 | "std": 1.8168990209233016e-05
29 | },
30 | "era5625/clwc_850hPa": {
31 | "mean": 1.7463022231822833e-05,
32 | "std": 4.569104203255847e-05
33 | },
34 | "era5625/lat2d": {
35 | "mean": 0.0,
36 | "std": 51.93614196777344
37 | },
38 | "era5625/lon2d": {
39 | "mean": 177.1875,
40 | "std": 103.91035461425781
41 | },
42 | "era5625/lsm": {
43 | "mean": 0.3370782732963562,
44 | "std": 0.459003746509552
45 | },
46 | "era5625/orography": {
47 | "mean": 379.4975891113281,
48 | "std": 859.8722534179688
49 | },
50 | "era5625/q_300hPa": {
51 | "mean": 0.00013101613149046898,
52 | "std": 0.00017383853264618665
53 | },
54 | "era5625/q_500hPa": {
55 | "mean": 0.0008753924630582333,
56 | "std": 0.001109140575863421
57 | },
58 | "era5625/q_850hPa": {
59 | "mean": 0.004605869762599468,
60 | "std": 0.004122736398130655
61 | },
62 | "era5625/slt": {
63 | "mean": 0.6792043447494507,
64 | "std": 1.1688841581344604
65 | },
66 | "era5625/sp": {
67 | "mean": 96670.375,
68 | "std": 9653.7138671875
69 | },
70 | "era5625/t2m": {
71 | "mean": 278.7900390625,
72 | "std": 21.1552734375
73 | },
74 | "era5625/t_300hPa": {
75 | "mean": 229.08236694335938,
76 | "std": 10.754820823669434
77 | },
78 | "era5625/t_500hPa": {
79 | "mean": 253.1505126953125,
80 | "std": 13.043668746948242
81 | },
82 | "era5625/t_850hPa": {
83 | "mean": 274.7486267089844,
84 | "std": 15.535614967346191
85 | },
86 | "era5625/tp": {
87 | "mean": 0.00010141224629478529,
88 | "std": 0.000393031194107607
89 | },
90 | "era5625/z_300hPa": {
91 | "mean": 89490.7578125,
92 | "std": 5091.87841796875
93 | },
94 | "era5625/z_500hPa": {
95 | "mean": 54158.9453125,
96 | "std": 3355.55419921875
97 | },
98 | "era5625/z_850hPa": {
99 | "mean": 13763.185546875,
100 | "std": 1474.3167724609375
101 | },
102 | "imerg5625/precipitationcal": {
103 | "mean": 0.09697847068309784,
104 | "std": 0.7662108540534973
105 | },
106 | "simsat5625/clbt:0": {
107 | "mean": 236.9783477783203,
108 | "std": 7.888313293457031
109 | },
110 | "simsat5625/clbt:1": {
111 | "mean": 251.77040100097656,
112 | "std": 12.534334182739258
113 | },
114 | "simsat5625/clbt:2": {
115 | "mean": 268.89752197265625,
116 | "std": 22.21075439453125
117 | }
118 | }
--------------------------------------------------------------------------------
/analysis/precip_estimation/normalise/5625__16-04-01_12:00to17-12-31_11:00.json:
--------------------------------------------------------------------------------
1 | {
2 | "1plog::era5625/tp": {
3 | "mean": 0.00010312546510249376,
4 | "std": 0.0004055481986142695
5 | },
6 | "1plog::imerg5625/precipitationcal": {
7 | "mean": 0.048683226108551025,
8 | "std": 0.21575742959976196
9 | },
10 | "era5625/ciwc_300hPa": {
11 | "mean": 4.029411684314255e-06,
12 | "std": 1.9714751033461653e-05
13 | },
14 | "era5625/ciwc_500hPa": {
15 | "mean": 3.892440417985199e-06,
16 | "std": 1.3066020983387716e-05
17 | },
18 | "era5625/ciwc_850hPa": {
19 | "mean": 1.7111583474616054e-06,
20 | "std": 5.420844900072552e-06
21 | },
22 | "era5625/clwc_300hPa": {
23 | "mean": 3.145902383039356e-08,
24 | "std": 2.359818154218374e-06
25 | },
26 | "era5625/clwc_500hPa": {
27 | "mean": 3.6120443382969825e-06,
28 | "std": 1.864848491095472e-05
29 | },
30 | "era5625/clwc_850hPa": {
31 | "mean": 1.7701739125186577e-05,
32 | "std": 4.603979687090032e-05
33 | },
34 | "era5625/lat2d": {
35 | "mean": 0.0,
36 | "std": 51.93614196777344
37 | },
38 | "era5625/lon2d": {
39 | "mean": 177.1875,
40 | "std": 103.91035461425781
41 | },
42 | "era5625/lsm": {
43 | "mean": 0.3370782732963562,
44 | "std": 0.459003746509552
45 | },
46 | "era5625/orography": {
47 | "mean": 379.4975891113281,
48 | "std": 859.8722534179688
49 | },
50 | "era5625/q_300hPa": {
51 | "mean": 0.00013589927402790636,
52 | "std": 0.00018083321629092097
53 | },
54 | "era5625/q_500hPa": {
55 | "mean": 0.0009118292946368456,
56 | "std": 0.0011509027099236846
57 | },
58 | "era5625/q_850hPa": {
59 | "mean": 0.00473902840167284,
60 | "std": 0.004170333035290241
61 | },
62 | "era5625/slt": {
63 | "mean": 0.6792043447494507,
64 | "std": 1.1688841581344604
65 | },
66 | "era5625/sp": {
67 | "mean": 96647.2734375,
68 | "std": 9662.501953125
69 | },
70 | "era5625/t2m": {
71 | "mean": 279.39154052734375,
72 | "std": 21.024553298950195
73 | },
74 | "era5625/t_300hPa": {
75 | "mean": 229.49632263183594,
76 | "std": 10.80582332611084
77 | },
78 | "era5625/t_500hPa": {
79 | "mean": 253.59103393554688,
80 | "std": 12.996684074401855
81 | },
82 | "era5625/t_850hPa": {
83 | "mean": 275.28448486328125,
84 | "std": 15.46550464630127
85 | },
86 | "era5625/tp": {
87 | "mean": 0.00010321472655050457,
88 | "std": 0.00040673979674465954
89 | },
90 | "era5625/z_300hPa": {
91 | "mean": 89640.09375,
92 | "std": 5132.85693359375
93 | },
94 | "era5625/z_500hPa": {
95 | "mean": 54243.48828125,
96 | "std": 3392.837158203125
97 | },
98 | "era5625/z_850hPa": {
99 | "mean": 13771.6240234375,
100 | "std": 1512.511474609375
101 | },
102 | "imerg5625/precipitationcal": {
103 | "mean": 0.09606033563613892,
104 | "std": 0.7168794870376587
105 | },
106 | "simsat5625/clbt:0": {
107 | "mean": 236.88621520996094,
108 | "std": 8.104928970336914
109 | },
110 | "simsat5625/clbt:1": {
111 | "mean": 251.5996551513672,
112 | "std": 12.920832633972168
113 | },
114 | "simsat5625/clbt:2": {
115 | "mean": 268.940673828125,
116 | "std": 22.654830932617188
117 | }
118 | }
--------------------------------------------------------------------------------
/analysis/precip_estimation/normalise/5625__18-01-06_12:00to18-12-31_11:00.json:
--------------------------------------------------------------------------------
1 | {
2 | "1plog::era5625/tp": {
3 | "mean": 0.00010232715430902317,
4 | "std": 0.000401621509809047
5 | },
6 | "1plog::imerg5625/precipitationcal": {
7 | "mean": 0.04807817563414574,
8 | "std": 0.21407566964626312
9 | },
10 | "era5625/ciwc_300hPa": {
11 | "mean": 4.074635398865212e-06,
12 | "std": 1.996555511141196e-05
13 | },
14 | "era5625/ciwc_500hPa": {
15 | "mean": 3.881124939653091e-06,
16 | "std": 1.3079568816465326e-05
17 | },
18 | "era5625/ciwc_850hPa": {
19 | "mean": 1.712335915726726e-06,
20 | "std": 5.445999249786837e-06
21 | },
22 | "era5625/clwc_300hPa": {
23 | "mean": 2.9467773998703706e-08,
24 | "std": 2.243578819616232e-06
25 | },
26 | "era5625/clwc_500hPa": {
27 | "mean": 3.5341997772775358e-06,
28 | "std": 1.8467679183231667e-05
29 | },
30 | "era5625/clwc_850hPa": {
31 | "mean": 1.715613871056121e-05,
32 | "std": 4.508942583925091e-05
33 | },
34 | "era5625/lat2d": {
35 | "mean": 0.0,
36 | "std": 51.93614196777344
37 | },
38 | "era5625/lon2d": {
39 | "mean": 177.1875,
40 | "std": 103.91035461425781
41 | },
42 | "era5625/lsm": {
43 | "mean": 0.3370782732963562,
44 | "std": 0.459003746509552
45 | },
46 | "era5625/orography": {
47 | "mean": 379.4975891113281,
48 | "std": 859.8722534179688
49 | },
50 | "era5625/q_300hPa": {
51 | "mean": 0.0001317433052463457,
52 | "std": 0.00017504238348919898
53 | },
54 | "era5625/q_500hPa": {
55 | "mean": 0.0008897155057638884,
56 | "std": 0.001122326240874827
57 | },
58 | "era5625/q_850hPa": {
59 | "mean": 0.00467934412881732,
60 | "std": 0.004145464394241571
61 | },
62 | "era5625/slt": {
63 | "mean": 0.6792043447494507,
64 | "std": 1.1688841581344604
65 | },
66 | "era5625/sp": {
67 | "mean": 96661.5546875,
68 | "std": 9642.8544921875
69 | },
70 | "era5625/t2m": {
71 | "mean": 279.09906005859375,
72 | "std": 20.908159255981445
73 | },
74 | "era5625/t_300hPa": {
75 | "mean": 229.20350646972656,
76 | "std": 10.7034273147583
77 | },
78 | "era5625/t_500hPa": {
79 | "mean": 253.34141540527344,
80 | "std": 12.95738697052002
81 | },
82 | "era5625/t_850hPa": {
83 | "mean": 275.06121826171875,
84 | "std": 15.381122589111328
85 | },
86 | "era5625/tp": {
87 | "mean": 0.00010241532436339185,
88 | "std": 0.00040276843355968595
89 | },
90 | "era5625/z_300hPa": {
91 | "mean": 89554.65625,
92 | "std": 5082.7568359375
93 | },
94 | "era5625/z_500hPa": {
95 | "mean": 54198.89453125,
96 | "std": 3353.39453125
97 | },
98 | "era5625/z_850hPa": {
99 | "mean": 13767.50390625,
100 | "std": 1486.5418701171875
101 | },
102 | "imerg5625/precipitationcal": {
103 | "mean": 0.09452106058597565,
104 | "std": 0.7058719992637634
105 | },
106 | "simsat5625/clbt:0": {
107 | "mean": 237.0115203857422,
108 | "std": 7.813484191894531
109 | },
110 | "simsat5625/clbt:1": {
111 | "mean": 251.81748962402344,
112 | "std": 12.39886474609375
113 | },
114 | "simsat5625/clbt:2": {
115 | "mean": 269.04754638671875,
116 | "std": 22.19377326965332
117 | }
118 | }
--------------------------------------------------------------------------------
/analysis/precip_estimation/normalise/5625__19-01-06_12:00to19-12-31_11:00.json:
--------------------------------------------------------------------------------
1 | {
2 | "1plog::era5625/tp": {
3 | "mean": 0.0001017941176542081,
4 | "std": 0.00040879662265069783
5 | },
6 | "1plog::imerg5625/precipitationcal": {
7 | "mean": 0.047552842646837234,
8 | "std": 0.21335327625274658
9 | },
10 | "era5625/ciwc_300hPa": {
11 | "mean": 4.032832293887623e-06,
12 | "std": 2.0058865629835054e-05
13 | },
14 | "era5625/ciwc_500hPa": {
15 | "mean": 3.87251657230081e-06,
16 | "std": 1.3076591130811721e-05
17 | },
18 | "era5625/ciwc_850hPa": {
19 | "mean": 1.6551554153920733e-06,
20 | "std": 5.288734882924473e-06
21 | },
22 | "era5625/clwc_300hPa": {
23 | "mean": 3.580080232268301e-08,
24 | "std": 2.60587444245175e-06
25 | },
26 | "era5625/clwc_500hPa": {
27 | "mean": 3.5812354326481e-06,
28 | "std": 1.859859003161546e-05
29 | },
30 | "era5625/clwc_850hPa": {
31 | "mean": 1.7308886526734568e-05,
32 | "std": 4.5802691602148116e-05
33 | },
34 | "era5625/lat2d": {
35 | "mean": 0.0,
36 | "std": 51.93614196777344
37 | },
38 | "era5625/lon2d": {
39 | "mean": 177.1875,
40 | "std": 103.91035461425781
41 | },
42 | "era5625/lsm": {
43 | "mean": 0.3370782732963562,
44 | "std": 0.459003746509552
45 | },
46 | "era5625/orography": {
47 | "mean": 379.4975891113281,
48 | "std": 859.8722534179688
49 | },
50 | "era5625/q_300hPa": {
51 | "mean": 0.0001348510995740071,
52 | "std": 0.00017957847740035504
53 | },
54 | "era5625/q_500hPa": {
55 | "mean": 0.0009002968436107039,
56 | "std": 0.0011349129490554333
57 | },
58 | "era5625/q_850hPa": {
59 | "mean": 0.004709702916443348,
60 | "std": 0.004195161163806915
61 | },
62 | "era5625/slt": {
63 | "mean": 0.6792043447494507,
64 | "std": 1.1688841581344604
65 | },
66 | "era5625/sp": {
67 | "mean": 96690.9296875,
68 | "std": 9623.365234375
69 | },
70 | "era5625/t2m": {
71 | "mean": 279.14581298828125,
72 | "std": 21.12673568725586
73 | },
74 | "era5625/t_300hPa": {
75 | "mean": 229.43798828125,
76 | "std": 10.761752128601074
77 | },
78 | "era5625/t_500hPa": {
79 | "mean": 253.47357177734375,
80 | "std": 13.040901184082031
81 | },
82 | "era5625/t_850hPa": {
83 | "mean": 275.2270202636719,
84 | "std": 15.513626098632812
85 | },
86 | "era5625/tp": {
87 | "mean": 0.00010188626765739173,
88 | "std": 0.0004100216319784522
89 | },
90 | "era5625/z_300hPa": {
91 | "mean": 89640.796875,
92 | "std": 5096.0185546875
93 | },
94 | "era5625/z_500hPa": {
95 | "mean": 54256.75390625,
96 | "std": 3352.83251953125
97 | },
98 | "era5625/z_850hPa": {
99 | "mean": 13802.462890625,
100 | "std": 1460.9244384765625
101 | },
102 | "imerg5625/precipitationcal": {
103 | "mean": 0.09389674663543701,
104 | "std": 0.7125973701477051
105 | },
106 | "simsat5625/clbt:0": {
107 | "mean": 237.13368225097656,
108 | "std": 7.68720817565918
109 | },
110 | "simsat5625/clbt:1": {
111 | "mean": 252.09471130371094,
112 | "std": 12.202248573303223
113 | },
114 | "simsat5625/clbt:2": {
115 | "mean": 269.1205139160156,
116 | "std": 21.72414779663086
117 | }
118 | }
--------------------------------------------------------------------------------
/analysis/precip_estimation/normalise/5625__79-01-01_07:00to17-12-31_11:00.json:
--------------------------------------------------------------------------------
1 | {
2 | "1plog::era5625/tp": {
3 | "mean": 9.981045877793804e-05,
4 | "std": 0.0003737492661457509
5 | },
6 | "1plog::imerg5625/precipitationcal": {
7 | "mean": 0.04788779839873314,
8 | "std": 0.21598206460475922
9 | },
10 | "era5625/ciwc_300hPa": {
11 | "mean": 3.962032224080758e-06,
12 | "std": 1.9191713363397866e-05
13 | },
14 | "era5625/ciwc_500hPa": {
15 | "mean": 3.849920176435262e-06,
16 | "std": 1.2940173292008694e-05
17 | },
18 | "era5625/ciwc_850hPa": {
19 | "mean": 1.7343708123007673e-06,
20 | "std": 5.451817742141429e-06
21 | },
22 | "era5625/clwc_300hPa": {
23 | "mean": 1.805662108722572e-08,
24 | "std": 1.6793291024441714e-06
25 | },
26 | "era5625/clwc_500hPa": {
27 | "mean": 3.3815351798693882e-06,
28 | "std": 1.7503703929833136e-05
29 | },
30 | "era5625/clwc_850hPa": {
31 | "mean": 1.7382071746396832e-05,
32 | "std": 4.577460640575737e-05
33 | },
34 | "era5625/lat2d": {
35 | "mean": 0.0,
36 | "std": 51.93614196777344
37 | },
38 | "era5625/lon2d": {
39 | "mean": 177.1875,
40 | "std": 103.91035461425781
41 | },
42 | "era5625/lsm": {
43 | "mean": 0.3370782732963562,
44 | "std": 0.459003746509552
45 | },
46 | "era5625/orography": {
47 | "mean": 379.4975891113281,
48 | "std": 859.8722534179688
49 | },
50 | "era5625/q_300hPa": {
51 | "mean": 0.00012742435501422733,
52 | "std": 0.00016826670616865158
53 | },
54 | "era5625/q_500hPa": {
55 | "mean": 0.0008531111525371671,
56 | "std": 0.0010778913274407387
57 | },
58 | "era5625/q_850hPa": {
59 | "mean": 0.004570512101054192,
60 | "std": 0.004106701351702213
61 | },
62 | "era5625/slt": {
63 | "mean": 0.6792043447494507,
64 | "std": 1.1688841581344604
65 | },
66 | "era5625/sp": {
67 | "mean": 96696.9609375,
68 | "std": 9652.5927734375
69 | },
70 | "era5625/t2m": {
71 | "mean": 278.5038757324219,
72 | "std": 21.239084243774414
73 | },
74 | "era5625/t_300hPa": {
75 | "mean": 228.8600616455078,
76 | "std": 10.72099781036377
77 | },
78 | "era5625/t_500hPa": {
79 | "mean": 252.9261474609375,
80 | "std": 13.068572998046875
81 | },
82 | "era5625/t_850hPa": {
83 | "mean": 274.57611083984375,
84 | "std": 15.585567474365234
85 | },
86 | "era5625/tp": {
87 | "mean": 9.988588863052428e-05,
88 | "std": 0.0003746792790479958
89 | },
90 | "era5625/z_300hPa": {
91 | "mean": 89407.40625,
92 | "std": 5094.4228515625
93 | },
94 | "era5625/z_500hPa": {
95 | "mean": 54111.0625,
96 | "std": 3355.026123046875
97 | },
98 | "era5625/z_850hPa": {
99 | "mean": 13748.427734375,
100 | "std": 1472.0947265625
101 | },
102 | "imerg5625/precipitationcal": {
103 | "mean": 0.09612467885017395,
104 | "std": 0.7596922516822815
105 | },
106 | "simsat5625/clbt:0": {
107 | "mean": 236.9783477783203,
108 | "std": 7.888313293457031
109 | },
110 | "simsat5625/clbt:1": {
111 | "mean": 251.77040100097656,
112 | "std": 12.534334182739258
113 | },
114 | "simsat5625/clbt:2": {
115 | "mean": 268.89752197265625,
116 | "std": 22.21075439453125
117 | }
118 | }
--------------------------------------------------------------------------------
/analysis/precip_estimation/normalise/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FrontierDevelopmentLab/PyRain/a52e4fd7984dcabb6d908a565a4e7c6bc820d62f/analysis/precip_estimation/normalise/__init__.py
--------------------------------------------------------------------------------
/analysis/precip_estimation/pred_conf_matrix.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from collections import Counter
3 | import pandas as pd
4 | import lightgbm as lgb
5 | import json
6 | from sklearn.datasets import load_breast_cancer,load_boston,load_wine
7 | from sklearn.model_selection import train_test_split
8 | from sklearn.preprocessing import StandardScaler
9 | from sklearn.ensemble import GradientBoostingClassifier
10 | from sklearn.metrics import mean_squared_error,roc_auc_score,precision_score
11 |
12 | ##### DEFINE TAG
13 | tag = "sim_bal"
14 | ######
15 |
16 | with open("./normalise/5625__16-04-01_12:00to17-12-31_11:00.json") as f:
17 | nl_train = json.load(f)
18 |
19 | import json
20 |
21 | train_path = "./sim_samples_bal_train.json"
22 | test_path = "./sim_samples_unb_test.json"
23 | val_path = "./sim_samples_bal_val.json"
24 |
25 |
26 | with open(val_path, "r") as f:
27 | val = json.load(f)
28 | y_val = np.concatenate([np.array(t[1]) for t in val])
29 | X_val = np.stack([t[0] for t in val])
30 | y_val_lst = [[],[],[],[]]
31 | X_val_lst = [[],[],[],[]]
32 | for c in range(4):
33 | y_val_lst[c] = np.concatenate([np.array(t[1]) for t in val if t[2][0]==c])
34 | X_val_lst[c] = np.stack(t[0] for t in val if t[2][0]==c)
35 |
36 | print(X_val_lst[0].shape)
37 |
38 | print("Open model...")
39 | import joblib
40 | mod = joblib.load("gbmsim_bal.pkl")
41 | print("Loading done...")
42 |
43 | conf_matrix = np.zeros((4,4))
44 | for i in range(4):
45 | print("Predict class {}".format(i))
46 | ypred = mod.predict(X_val_lst[i], num_iteration=mod.best_iteration_)
47 | print("Done predicting...")
48 | for p, t in zip(ypred, y_val_lst[i]):
49 | if p < 2.5:
50 | c = 0
51 | elif p >= 2.5 and p < 10.0:
52 | c = 1
53 | elif p >= 10.0 and p < 50.0:
54 | c = 2
55 | elif p >= 50.0:
56 | c = 3
57 | conf_matrix[i, c] += 1.0 / float(len(ypred))
58 |
59 | print("CONF MATRIX:")
60 | print(conf_matrix)
61 |
62 | print("All done...")
63 |
64 |
--------------------------------------------------------------------------------
/analysis/precip_estimation/sim_sample_balanced.py:
--------------------------------------------------------------------------------
1 | from datetime import datetime
2 | import numpy as np
3 | import os, sys
4 | import json
5 | from scipy import stats
6 | from multiprocessing import Pool, TimeoutError
7 | from functools import partial
8 |
9 | sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
10 | from dataloader.memmap_dataloader import Dataset
11 |
12 |
13 | if __name__ == "__main__":
14 | with open("./normalise/5625__16-04-01_12:00to17-12-31_11:00.json") as f:
15 | nl_train = json.load(f)
16 |
17 | nl_train["__const__lon2d"] = {"mean": 0.5, "std":0.28980498288430995}
18 | nl_train["__const__lat2d"] = {"mean": 0.5, "std":0.29093928798176877}
19 | nl_train["era5625/slt"] = {"mean": 1.1389103, "std":0.6714027}
20 |
21 |
22 | # set up dataloader with any dataset type you can think of
23 | memmap_root = "" # SET MEMMAP DATA ROOT PATH HERE
24 | datapath = [os.path.join(memmap_root, "simsat5625", "simsat5625.dill"),
25 | os.path.join(memmap_root, "imerg5625", "imerg5625.dill"),
26 | os.path.join(memmap_root, "era5625", "era5625.dill"),
27 | ]
28 |
29 | daterange_train = (datetime(2016, 4, 1).timestamp(), datetime(2017, 12, 31, 21).timestamp())
30 | daterange_test = (datetime(2019, 1, 6, 0).timestamp(), datetime(2019, 12, 31, 21).timestamp())
31 | daterange_val = (datetime(2018, 1, 6, 0).timestamp(), datetime(2018, 12, 31, 21).timestamp())
32 |
33 | partition_conf = {"train":
34 | {"timerange": daterange_train,
35 | "increment_s": 60 * 60},
36 | "val":
37 | {"timerange": daterange_val,
38 | "increment_s": 60 * 60},
39 | "test":
40 | {"timerange": daterange_test,
41 | "increment_s": 60 * 60}}
42 |
43 | partition_type = "range"
44 |
45 | dlt = 0
46 | lt = 0
47 | grid_shape = (32,64)
48 | sample_conf = {"mode0": # sample modes
49 | {
50 | "sample": # sample sections
51 | {
52 | "lsm": {"vbl": "era5625/lsm"},
53 | "orography": {"vbl": "era5625/orography"},
54 | "slt": {"vbl": "era5625/slt"},
55 | "__const__lat2d": {"vbl": "__const__lat2d",
56 | "val": np.repeat(np.expand_dims(np.linspace(0.0, 1.0, grid_shape[0]), axis=1),
57 | grid_shape[1], axis=1)},
58 | "__const__lon2d": {"vbl": "__const__lon2d",
59 | "val": np.repeat(np.expand_dims(np.linspace(0.0, 1.0, grid_shape[1]), axis=0),
60 | grid_shape[0], axis=0)},
61 |
62 | "clbt:0": {"vbl": "simsat5625/clbt:0",
63 | "t": np.array([dlt]) * 3600,
64 | "interpolate": ["nan", "nearest_past", "nearest_future"][1]},
65 | "clbt:1": {"vbl": "simsat5625/clbt:1",
66 | "t": np.array([dlt]) * 3600,
67 | "interpolate": ["nan", "nearest_past", "nearest_future"][1]},
68 | "clbt:2": {"vbl": "simsat5625/clbt:2",
69 | "t": np.array([dlt]) * 3600,
70 | "interpolate": ["nan", "nearest_past", "nearest_future"][1]}
71 |
72 | },
73 | "label": {"tp": {"vbl": "imerg5625/precipitationcal",
74 | "t": np.array([lt]) * 3600,
75 | "interpolate": ["nan", "nearest_past", "nearest_future"][1]}}}
76 | }
77 |
78 | dr = (datetime(2016, 4, 1).timestamp(), datetime(2019, 12, 31, 21).timestamp())
79 |
80 | part = "train"
81 | with open("histo_{}.json".format(part), "r") as f:
82 | histo = np.array(json.load(f))
83 | histo = histo[slice(None, None, 3)][:-1]
84 | print("HISTO NOW: ", histo.shape)
85 |
86 | histo_trans = histo.transpose()
87 |
88 | n_samples = 250000*4
89 |
90 | from collections import defaultdict
91 | id_dct = defaultdict(lambda x: [])
92 |
93 | # calc frequencies
94 | f = []
95 | for j in range(4):
96 | fc = np.sum(histo_trans[j])/ float(32*64*histo.shape[0])
97 | f.append(fc)
98 |
99 | # draw equal number of idxs from each class
100 | for c in range(4):
101 | idx_lst = []
102 | ch = np.random.choice(np.array(list(range(histo.shape[0]))),
103 | int(n_samples / 4.0),
104 | p=histo_trans[c]/np.sum(histo_trans[c]))
105 | id_dct[c] = ch
106 |
107 | # sort indices by frame
108 | bcts = []
109 | for c in range(4):
110 | print("ch_c: {}".format(id_dct[c]))
111 | print("minlen: {} max: {}".format(max(id_dct[c]), histo.shape[0]))
112 | ct = np.bincount(id_dct[c], minlength=histo.shape[0])
113 | bcts.append(ct)
114 | print("Bin {} sum: {}".format(c, np.sum(ct)))
115 |
116 | print("ID_DCT:", id_dct)
117 | print("BCTS: ", bcts)
118 |
119 | b = np.stack(bcts)
120 | print("b:", b)
121 |
122 | print("bincount list: {}".format(b))
123 |
124 | # read in every imerg frame and create a rain class histogram for each and save in a file in the end
125 | def get_pixels(args):
126 | dataset_indices, frame_idxs, i = args
127 | print ("Starting process {} indices at iteration {}...".format(len(dataset_indices), i))
128 |
129 | def choose_pixel(coord, frame, c):
130 | sample = frame
131 | X = None
132 | y = None
133 | latid, lonid = coord
134 | sample_keys = frame[0]["sample"].keys()
135 | label_keys = frame[0]["label"].keys()
136 | sample_lst = []
137 | for sk in sample_keys:
138 | if sk[-4:] == "__ts":
139 | continue
140 | s = sample[0]["sample"][sk][...,latid, lonid]
141 | vn = sample_conf["mode0"]["sample"][sk]["vbl"]
142 | if sk in ["tp"]:
143 | s = np.log(max(s, 0.0)/nl_train[vn]["std"] + 1)
144 | else:
145 | s = (s-nl_train[vn]["mean"])/nl_train[vn]["std"]
146 | sample_lst.append(s.flatten())
147 | X = np.concatenate(sample_lst)
148 | label_lst = []
149 | for sk in label_keys:
150 | if sk[-4:] == "__ts":
151 | continue
152 | s = sample[0]["label"][sk][...,latid,lonid]
153 | vn = sample_conf["mode0"]["label"][sk]["vbl"]
154 | if sk in ["tp"]:
155 | s = np.log(max(s, 0.0) / nl_train[vn]["std"] + 1)
156 | else:
157 | s = (s-nl_train[vn]["mean"])/nl_train[vn]["std"]
158 | label_lst.append(s.flatten())
159 | y = np.concatenate(label_lst)
160 |
161 | return X.tolist(), y.tolist(), [c]
162 |
163 | dataset = Dataset(datapath=datapath,
164 | partition_conf=partition_conf,
165 | partition_type=partition_type,
166 | partition_selected=part,
167 | sample_conf=sample_conf,
168 | )
169 |
170 | res = []
171 | for j, frame_idx in enumerate(frame_idxs):
172 |
173 | data_idx = dataset_indices[:, j]
174 |
175 | if not sum(data_idx):
176 |
177 | continue
178 |
179 | # compile my own sample
180 |
181 | sam = [{"sample":{}, "label":{}}]
182 |
183 | for k,v in sample_conf["mode0"]["sample"].items():
184 | if k[:3] == "__c":
185 | sam[0]["sample"][k] = v["val"]
186 | else:
187 | g = dataset.dataset[((dr[0], dr[1], 3600), [v["vbl"]], {})]
188 | if len(g.shape) == 3:
189 | sam[0]["sample"][k] = g
190 | else:
191 | fidx = frame_idx*3 if k[:4] != "clbt" else frame_idx
192 | sam[0]["sample"][k] = dataset.dataset[((*partition_conf[part]["timerange"], 3600), [v["vbl"]], {})][fidx]
193 | for k,v in sample_conf["mode0"]["label"].items():
194 | sam[0]["label"][k] = dataset.dataset[((*partition_conf[part]["timerange"], 3600), [v["vbl"]], {})][frame_idx*3]
195 |
196 | frame = sam[0]["label"]["tp"][0]
197 |
198 | bounds = [(0.0, 2.5),
199 | (2.5, 10.0),
200 | (10.0, 50.0),
201 | (50.0, 500000.0)]
202 | for c in range(4):
203 | # class 0
204 | idxs = np.where((frame >= bounds[c][0]) & (frame < bounds[c][1]))
205 | if data_idx[c].size == 0.0:
206 | continue
207 | try:
208 | ch = np.random.choice(np.array(list(range(len(idxs[0])))),
209 | data_idx[c])
210 | except Exception as e:
211 | raise Exception("{}: {}, {}".format(e, idxs[0], data_idx[c]))
212 |
213 | if ch.size == 0:
214 | continue
215 | cl = [(idxs[0][h], idxs[1][h]) for h in ch]
216 | for cl_idx in cl:
217 | spl = choose_pixel(cl_idx, sam, c)
218 | res.append(spl)
219 |
220 | return res
221 |
222 | n_proc = 40
223 | idxs = np.array_split(b, n_proc, axis=1)
224 | print("IDXS: ", idxs)
225 | frame_idxs = np.array_split(np.array(range(b[0].shape[0])), n_proc)
226 | print("FRAMEIDXS: ", frame_idxs)
227 | with Pool(processes=n_proc) as pool:
228 | res = pool.map(get_pixels, [(idxlst, fidxs, i) for idxlst, fidxs, i in zip(idxs, frame_idxs, range(len(idxs)))])
229 |
230 | totres = []
231 | for r in res:
232 | totres += r
233 |
234 | with open("sim_samples_bal_{}.json".format(part), "w") as f:
235 | json.dump(totres, f)
236 |
--------------------------------------------------------------------------------
/analysis/precip_estimation/sim_sample_unbalanced.py:
--------------------------------------------------------------------------------
1 | from datetime import datetime
2 | import numpy as np
3 | import os, sys
4 | import json
5 | from scipy import stats
6 | from multiprocessing import Pool, TimeoutError
7 | from functools import partial
8 |
9 | sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
10 | from dataloader.memmap_dataloader import Dataset
11 |
12 |
13 | if __name__ == "__main__":
14 | with open("./normalise/5625__16-04-01_12:00to17-12-31_11:00.json") as f:
15 | nl_train = json.load(f)
16 |
17 | nl_train["__const__lon2d"] = {"mean": 0.5, "std":0.28980498288430995}
18 | nl_train["__const__lat2d"] = {"mean": 0.5, "std":0.29093928798176877}
19 | nl_train["era5625/slt"] = {"mean": 1.1389103, "std":0.6714027}
20 |
21 |
22 | # set up dataloader with any dataset type you can think of
23 | memmap_root = "" # SET MEMMAP DATA ROOT PATH HERE
24 | datapath = [os.path.join(memmap_root, "simsat5625", "simsat5625.dill"),
25 | os.path.join(memmap_root, "imerg5625", "imerg5625.dill"),
26 | os.path.join(memmap_root, "era5625", "era5625.dill"),
27 | ]
28 |
29 | daterange_train = (datetime(2016, 4, 1).timestamp(), datetime(2017, 12, 31, 21).timestamp())
30 | daterange_test = (datetime(2019, 1, 6, 0).timestamp(), datetime(2019, 12, 31, 21).timestamp())
31 | daterange_val = (datetime(2018, 1, 6, 0).timestamp(), datetime(2018, 12, 31, 21).timestamp())
32 |
33 | partition_conf = {"train":
34 | {"timerange": daterange_train,
35 | "increment_s": 60 * 60},
36 | "val":
37 | {"timerange": daterange_val,
38 | "increment_s": 60 * 60},
39 | "test":
40 | {"timerange": daterange_test,
41 | "increment_s": 60 * 60}}
42 |
43 | partition_type = "range"
44 |
45 | dlt = 0
46 | lt = 0
47 | grid_shape = (32,64)
48 | sample_conf = {"mode0": # sample modes
49 | {
50 | "sample": # sample sections
51 | {
52 | "lsm": {"vbl": "era5625/lsm"}, # sample variables
53 | "orography": {"vbl": "era5625/orography"}, # sample variables
54 | "slt": {"vbl": "era5625/slt"},
55 | "__const__lat2d": {"vbl": "__const__lat2d",
56 | "val": np.repeat(np.expand_dims(np.linspace(0.0, 1.0, grid_shape[0]), axis=1),
57 | grid_shape[1], axis=1)},
58 | "__const__lon2d": {"vbl": "__const__lon2d",
59 | "val": np.repeat(np.expand_dims(np.linspace(0.0, 1.0, grid_shape[1]), axis=0),
60 | grid_shape[0], axis=0)},
61 | "clbt:0": {"vbl": "simsat5625/clbt:0",
62 | "t": np.array([dlt]) * 3600,
63 | "interpolate": ["nan", "nearest_past", "nearest_future"][1]},
64 | "clbt:1": {"vbl": "simsat5625/clbt:1",
65 | "t": np.array([dlt]) * 3600,
66 | "interpolate": ["nan", "nearest_past", "nearest_future"][1]},
67 | "clbt:2": {"vbl": "simsat5625/clbt:2",
68 | "t": np.array([dlt]) * 3600,
69 | "interpolate": ["nan", "nearest_past", "nearest_future"][1]}
70 |
71 | },
72 | "label": {"tp": {"vbl": "imerg5625/precipitationcal",
73 | "t": np.array([lt]) * 3600,
74 | "interpolate": ["nan", "nearest_past", "nearest_future"][1]}}}
75 | }
76 |
77 | dr = (datetime(2016, 4, 1).timestamp(), datetime(2019, 12, 31, 21).timestamp())
78 |
79 | part = "train"
80 | with open("histo_{}.json".format(part), "r") as f:
81 | histo = np.array(json.load(f))
82 | histo = histo[slice(None, None, 3)][:-1]
83 | print("HISTO NOW: ", histo.shape)
84 |
85 | histo_trans = histo.transpose()
86 |
87 | n_samples = 250000*4
88 |
89 | from collections import defaultdict
90 | id_dct = defaultdict(lambda x: [])
91 |
92 | # calc frequencies
93 | f = []
94 | for j in range(4):
95 | fc = np.sum(histo_trans[j])/ float(32*64*histo.shape[0])
96 | f.append(fc)
97 |
98 | # draw equal number of idxs from each class
99 | for c in range(4):
100 | idx_lst = []
101 | ch = np.random.choice(np.array(list(range(histo.shape[0]))),
102 | int(n_samples * f[c] + 0.5),
103 | p=histo_trans[c]/np.sum(histo_trans[c]))
104 | id_dct[c] = ch
105 |
106 | # sort indices by frame
107 | bcts = []
108 | for c in range(4):
109 | print("ch_c: {}".format(id_dct[c]))
110 | print("minlen: {} max: {}".format(max(id_dct[c]), histo.shape[0]))
111 | ct = np.bincount(id_dct[c], minlength=histo.shape[0])
112 | bcts.append(ct)
113 | print("Bin {} sum: {}".format(c, np.sum(ct)))
114 |
115 | print("ID_DCT:", id_dct)
116 | print("BCTS: ", bcts)
117 |
118 | b = np.stack(bcts)
119 | print("b:", b)
120 |
121 | print("bincount list: {}".format(b))
122 |
123 | # read in every imerg frame and create a rain class histogram for each and save in a file in the end
124 | def get_pixels(args):
125 | dataset_indices, frame_idxs, i = args
126 | print ("Starting process {} indices at iteration {}...".format(len(dataset_indices), i))
127 |
128 | def choose_pixel(coord, frame, c):
129 | sample = frame
130 | X = None
131 | y = None
132 | latid, lonid = coord
133 | sample_keys = frame[0]["sample"].keys()
134 | label_keys = frame[0]["label"].keys()
135 | sample_lst = []
136 | for sk in sample_keys:
137 | if sk[-4:] == "__ts":
138 | continue
139 | s = sample[0]["sample"][sk][...,latid, lonid]
140 | vn = sample_conf["mode0"]["sample"][sk]["vbl"]
141 | if sk in ["tp"]:
142 | s = np.log(max(s, 0.0)/nl_train[vn]["std"] + 1)
143 | else:
144 | s = (s-nl_train[vn]["mean"])/nl_train[vn]["std"]
145 | sample_lst.append(s.flatten())
146 | X = np.concatenate(sample_lst)
147 | label_lst = []
148 | for sk in label_keys:
149 | if sk[-4:] == "__ts":
150 | continue
151 | s = sample[0]["label"][sk][...,latid,lonid]
152 | vn = sample_conf["mode0"]["label"][sk]["vbl"]
153 | if sk in ["tp"]:
154 | s = np.log(max(s, 0.0) / nl_train[vn]["std"] + 1)
155 | else:
156 | s = (s-nl_train[vn]["mean"])/nl_train[vn]["std"]
157 | label_lst.append(s.flatten())
158 | y = np.concatenate(label_lst)
159 |
160 | return X.tolist(), y.tolist(), [c]
161 |
162 | dataset = Dataset(datapath=datapath,
163 | partition_conf=partition_conf,
164 | partition_type=partition_type,
165 | partition_selected=part,
166 | sample_conf=sample_conf,
167 | )
168 |
169 | res = []
170 | for j, frame_idx in enumerate(frame_idxs):
171 |
172 | data_idx = dataset_indices[:, j]
173 |
174 | if not sum(data_idx):
175 |
176 | continue
177 |
178 |
179 | # compile my own sample
180 |
181 | sam = [{"sample":{}, "label":{}}]
182 |
183 | for k,v in sample_conf["mode0"]["sample"].items():
184 | if k[:3] == "__c":
185 | sam[0]["sample"][k] = v["val"]
186 | else:
187 | g = dataset.dataset[((dr[0], dr[1], 3600), [v["vbl"]], {})]
188 | if len(g.shape) == 3:
189 | sam[0]["sample"][k] = g
190 | else:
191 | fidx = frame_idx*3 if k[:4] != "clbt" else frame_idx
192 | sam[0]["sample"][k] = dataset.dataset[((*partition_conf[part]["timerange"], 3600), [v["vbl"]], {})][fidx]
193 | for k,v in sample_conf["mode0"]["label"].items():
194 | sam[0]["label"][k] = dataset.dataset[((*partition_conf[part]["timerange"], 3600), [v["vbl"]], {})][frame_idx*3]
195 |
196 | frame = sam[0]["label"]["tp"][0
197 |
198 | bounds = [(0.0, 2.5),
199 | (2.5, 10.0),
200 | (10.0, 50.0),
201 | (50.0, 500000.0)]
202 | for c in range(4):
203 | # class 0
204 | idxs = np.where((frame >= bounds[c][0]) & (frame < bounds[c][1]))
205 | if data_idx[c].size == 0.0:
206 | continue
207 |
208 | try:
209 | ch = np.random.choice(np.array(list(range(len(idxs[0])))),
210 | data_idx[c])
211 | except Exception as e:
212 | raise Exception("{}: {}, {}".format(e, idxs[0], data_idx[c]))
213 |
214 | if ch.size == 0:
215 | continue
216 | cl = [(idxs[0][h], idxs[1][h]) for h in ch]
217 |
218 |
219 | for cl_idx in cl:
220 | spl = choose_pixel(cl_idx, sam, c)
221 | res.append(spl)
222 |
223 | return res
224 |
225 | n_proc = 40
226 | idxs = np.array_split(b, n_proc, axis=1)
227 | print("IDXS: ", idxs)
228 | frame_idxs = np.array_split(np.array(range(b[0].shape[0])), n_proc)
229 | print("FRAMEIDXS: ", frame_idxs)
230 | with Pool(processes=n_proc) as pool:
231 | res = pool.map(get_pixels, [(idxlst, fidxs, i) for idxlst, fidxs, i in zip(idxs, frame_idxs, range(len(idxs)))])
232 |
233 | totres = []
234 | for r in res:
235 | totres += r
236 |
237 | with open("sim_samples_unb_{}.json".format(part), "w") as f:
238 | json.dump(totres, f)
239 |
--------------------------------------------------------------------------------
/analysis/precip_histogram/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FrontierDevelopmentLab/PyRain/a52e4fd7984dcabb6d908a565a4e7c6bc820d62f/analysis/precip_histogram/__init__.py
--------------------------------------------------------------------------------
/analysis/precip_histogram/hist.py:
--------------------------------------------------------------------------------
1 | from datetime import datetime
2 | import numpy as np
3 | import os, sys
4 | import json
5 |
6 | sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
7 |
8 | import numpy as np
9 | from dataloader.memmap_dataloader import Dataset
10 |
11 | if __name__ == "__main__":
12 |
13 |
14 | # set up dataloader with any dataset type you can think of
15 | memmap_root = "" # SET MEMMAP DATA ROOT PATH HERE
16 | datapath = [os.path.join(memmap_root, "simsat5625", "simsat5625.dill"),
17 | os.path.join(memmap_root, "imerg5625", "imerg5625.dill"),
18 | os.path.join(memmap_root, "era5625", "era5625.dill"),
19 | ]
20 |
21 | daterange_train = (datetime(2004, 1, 1).timestamp(), datetime(2009, 12, 31, 23).timestamp())
22 | daterange_test = (datetime(2019, 1, 6, 0).timestamp(), datetime(2019, 12, 31, 21).timestamp())
23 | daterange_val = (datetime(2018, 1, 6, 0).timestamp(), datetime(2018, 12, 31, 23).timestamp())
24 |
25 | partition_conf = {"train":
26 | {"timerange": daterange_train,
27 | "increment_s": 60 * 60},
28 | "val":
29 | {"timerange": daterange_val,
30 | "increment_s": 60 * 60},
31 | "test":
32 | {"timerange": daterange_test,
33 | "increment_s": 60 * 60}}
34 |
35 | partition_type = "range"
36 |
37 | sample_conf = {"mode0": # sample modes
38 | {
39 | "sample": # sample sections
40 | {
41 | "lsm": {"vbl": "era140625/lsm"},
42 | },
43 | }
44 | }
45 |
46 | dataset = Dataset(datapath=datapath,
47 | partition_conf=partition_conf,
48 | partition_type=partition_type,
49 | partition_selected="train",
50 | sample_conf=sample_conf,
51 | )
52 |
53 | n_bins = 100
54 |
55 | with open("results/imerg_25bi.json", "r") as f:
56 | bins = json.load(f)["hist_den"][1]
57 |
58 |
59 | # era 5625 histogram
60 | print("era 5625...")
61 | era5_tp = dataset.dataset[((*daterange_train, 3600), ["era5625/tp"], {})]
62 | hist_den = np.histogram(era5_tp.flatten()*1000.0, bins=bins, density=True)
63 | hist_noden = np.histogram(era5_tp.flatten()*1000.0, bins=bins, density=False)
64 | res5 = {"hist_den": [x.tolist() for x in hist_den],
65 | "hist_noden": [x.tolist() for x in hist_noden]}
66 |
67 | with open("./results/era5625.json", "w") as f:
68 | json.dump(res5, f)
69 |
70 | # era 140625 histogram
71 | print("era 140625...")
72 | era1_tp = dataset.dataset[((*daterange_train, 3600), ["era140625/tp"], {})]
73 | hist_den = np.histogram(era1_tp.flatten()*1000, bins=bins, density=True)
74 | hist_noden = np.histogram(era1_tp.flatten()*1000, bins=bins, density=False)
75 | res1 = {"hist_den": [x.tolist() for x in hist_den],
76 | "hist_noden": [x.tolist() for x in hist_noden]}
77 |
78 | with open("./results/era140625.json", "w") as f:
79 | json.dump(res1, f)
80 |
81 | # imerg 140625 histogram
82 | print("imerg 140625...")
83 | imerg1_pre = dataset.dataset[((*daterange_train, 3600), ["imerg140625/precipitationcal"], {})]
84 | hist_den = np.histogram(imerg1_pre.flatten(), bins=bins, density=True)
85 | hist_noden = np.histogram(imerg1_pre.flatten(), bins=bins, density=False)
86 | imerg1 = {"hist_den": [x.tolist() for x in hist_den],
87 | "hist_noden": [x.tolist() for x in hist_noden]}
88 |
89 | with open("./results/imerg140625.json", "w") as f:
90 | json.dump(imerg1, f)
91 |
92 | print("imerg 5625...")
93 | imerg5625_pre = dataset.dataset["imerg5625/precipitationcal"]
94 | hist_den = np.histogram(imerg5625_pre.flatten(), bins=bins, density=True)
95 | hist_noden = np.histogram(imerg5625_pre.flatten(), bins=bins, density=False)
96 | imerg5625 = {"hist_den": [x.tolist() for x in hist_den],
97 | "hist_noden": [x.tolist() for x in hist_noden]}
98 | with open("./results/imerg5625.json", "w") as f:
99 | json.dump(imerg5625, f)
100 |
101 |
--------------------------------------------------------------------------------
/analysis/precip_histogram/plot.py:
--------------------------------------------------------------------------------
1 | import os
2 | import pandas as pd
3 | import numpy as np
4 | import json
5 |
6 | from matplotlib import pyplot as plt
7 | plt.style.use('ggplot')
8 |
9 | # make plots
10 | with open("./results/era5625.json", "r") as f:
11 | era5625 = json.load(f)
12 |
13 | with open("./results/era140625.json", "r") as f:
14 | era140625 = json.load(f)
15 |
16 | with open("./results/imerg5625.json", "r") as f:
17 | imerg5625 = json.load(f)
18 |
19 | with open("./results/imerg140625.json", "r") as f:
20 | imerg140625 = json.load(f)
21 |
22 | with open("./results/imerg_25bi.json", "r") as f:
23 | imerg25 = json.load(f)
24 | markers = ('+', '1', '+', '1', 'x')
25 |
26 | # create plot
27 | tcks = era5625["hist_den"][1]
28 |
29 | fig = plt.figure(figsize=(8, 3))
30 | plt.ylabel('probability density')
31 | plt.xlabel('precipitation [mm/hour]')
32 | ax = fig.gca()
33 | ax.set_yscale("log")
34 | ax.axvline(0, linestyle="-", color="white")
35 | ax.axvline(2, linestyle="--", color="white")
36 | ax.axvline(10, linestyle="--", color="white")
37 | ax.axvline(50, linestyle="--", color="white")
38 | ax.plot(tcks[:-1], era140625["hist_den"][0], label="ERA:tp $1.40625^\circ$", linestyle='None', marker=markers[1])
39 | ax.plot(tcks[:-1], era5625["hist_den"][0], label="ERA:tp $5.625^\circ$", linestyle='None', marker=markers[0])
40 | ax.plot(tcks[:-1], imerg25["hist_den"][0], label="IMERG $0.25^\circ$", linestyle='None', marker=markers[4])
41 | ax.plot(tcks[:-1], imerg140625["hist_den"][0], label="IMERG $1.40625^\circ$", linestyle='None', marker=markers[3])
42 | ax.plot(tcks[:-1], imerg5625["hist_den"][0], label="IMERG $5.625^\circ$", linestyle='None', marker=markers[2])
43 | ax.legend()
44 | plt.grid()
45 | fig.savefig("hist_den.pdf", bbox_inches='tight')
46 |
47 |
--------------------------------------------------------------------------------
/analysis/precip_histogram/plot_classhist.py:
--------------------------------------------------------------------------------
1 | import pickle
2 | from matplotlib import pyplot as plt
3 | import numpy as np
4 | import skimage.measure
5 | from argparse import ArgumentParser
6 | import numpy as np
7 | import matplotlib.pyplot as plt
8 | import matplotlib.colors as colors
9 | from datetime import datetime
10 |
11 |
12 | with open("/home/cs/Desktop/fracres.pkl", "rb") as f:
13 | res = pickle.load(f)
14 |
15 | print("Pickle loaded!")
16 |
17 | import seaborn as sns
18 |
19 | f1_red = skimage.measure.block_reduce(res["f1"], (8,8), np.max)
20 | fig, ax = plt.subplots(figsize=(10, 20))
21 | im = ax.imshow(f1_red.transpose(), cmap=plt.get_cmap('hot'), interpolation='nearest',
22 | vmin=0.95, vmax=1.0, extent=[0, 360,0,180])
23 |
24 | ax.set_xticks(np.linspace(0, 360, 11))
25 | lonw = ["${:d}^\circ$W".format(int(c)) for c in reversed(np.linspace(36, 180, 5))]
26 | lone = ["${:d}^\circ$E".format(int(c)) for c in np.linspace(36, 180, 5)]
27 | lones = lonw + [0] + lone
28 | ax.set_yticks(np.linspace(0, 180, 11))
29 | latn = ["${:d}^\circ$N".format(int(c)) for c in reversed(np.linspace(18, 90, 5))]
30 | late = ["${:d}^\circ$S".format(int(c)) for c in np.linspace(18, 90, 5)]
31 | lates = latn + [0] + late
32 | ax.set_xticklabels(lones)
33 | ax.set_yticklabels(lates)
34 | from mpl_toolkits.axes_grid1 import make_axes_locatable
35 | divider = make_axes_locatable(ax)
36 | cax = divider.append_axes("right", size="5%", pad=0.1)
37 |
38 | ls = np.linspace(0.95, 1.0, 5)
39 | cbar = fig.colorbar(im, cax=cax, ticks=ls)
40 | cbar.set_ticklabels(["{:d}%".format(int(c*100.0)) for c in ls])
41 | print("saving ...")
42 | plt.savefig("f1.pdf", bbox_inches='tight')
43 |
44 | #########################
45 | f2_red = skimage.measure.block_reduce(res["f2"], (8,8), np.max)
46 | fig, ax = plt.subplots(figsize=(10, 20))
47 | im = ax.imshow(f2_red.transpose(), cmap=plt.get_cmap('hot'), interpolation='nearest',
48 | vmin=0, vmax=0.05, extent=[0, 360,0,180])
49 |
50 | ax.set_xticks(np.linspace(0, 360, 11))
51 | lonw = ["${:d}^\circ$W".format(int(c)) for c in reversed(np.linspace(36, 180, 5))]
52 | lone = ["${:d}^\circ$E".format(int(c)) for c in np.linspace(36, 180, 5)]
53 | lones = lonw + [0] + lone
54 | ax.set_yticks(np.linspace(0, 180, 11))
55 | latn = ["${:d}^\circ$N".format(int(c)) for c in reversed(np.linspace(18, 90, 5))]
56 | late = ["${:d}^\circ$S".format(int(c)) for c in np.linspace(18, 90, 5)]
57 | lates = latn + [0] + late
58 | ax.set_xticklabels(lones)
59 | ax.set_yticklabels(lates)
60 | from mpl_toolkits.axes_grid1 import make_axes_locatable
61 | divider = make_axes_locatable(ax)
62 | cax = divider.append_axes("right", size="5%", pad=0.1)
63 |
64 | ls = np.linspace(0, 0.05, 5)
65 | cbar = fig.colorbar(im, cax=cax, ticks=ls)
66 | cbar.set_ticklabels(["{:d}%".format(int(c*100.0)) for c in ls])
67 | print("saving ...")
68 | #plt.show()
69 | plt.savefig("f2.pdf", bbox_inches='tight')
70 |
71 |
72 | #########################
73 | f3_red = skimage.measure.block_reduce(res["f3"], (8,8), np.max)
74 | fig, ax = plt.subplots(figsize=(10, 20))
75 | im = ax.imshow(f3_red.transpose(), cmap=plt.get_cmap('hot'), interpolation='nearest',
76 | vmin=0, vmax=0.01, extent=[0, 360,0,180])
77 |
78 | ax.set_xticks(np.linspace(0, 360, 11))
79 | lonw = ["${:d}^\circ$W".format(int(c)) for c in reversed(np.linspace(36, 180, 5))]
80 | lone = ["${:d}^\circ$E".format(int(c)) for c in np.linspace(36, 180, 5)]
81 | lones = lonw + [0] + lone
82 | ax.set_yticks(np.linspace(0, 180, 11))
83 | latn = ["${:d}^\circ$N".format(int(c)) for c in reversed(np.linspace(18, 90, 5))]
84 | late = ["${:d}^\circ$S".format(int(c)) for c in np.linspace(18, 90, 5)]
85 | lates = latn + [0] + late
86 | ax.set_xticklabels(lones)
87 | ax.set_yticklabels(lates)
88 | from mpl_toolkits.axes_grid1 import make_axes_locatable
89 | divider = make_axes_locatable(ax)
90 | cax = divider.append_axes("right", size="5%", pad=0.1)
91 |
92 | ls = np.linspace(0, 0.01, 5)
93 | cbar = fig.colorbar(im, cax=cax, ticks=ls)
94 | cbar.set_ticklabels(["{:d}%".format(int(c*100.0)) for c in ls])
95 | print("saving ...")
96 | #plt.show()
97 | plt.savefig("f3.pdf", bbox_inches='tight')
98 |
99 | #########################
100 | f4_red = skimage.measure.block_reduce(res["f4"], (8,8), np.max)
101 | fig, ax = plt.subplots(figsize=(10, 20))
102 | im = ax.imshow(f4_red.transpose(), cmap=plt.get_cmap('hot'), interpolation='nearest',
103 | vmin=0, vmax=0.001, extent=[0, 360,0,180])
104 |
105 | ax.set_xticks(np.linspace(0, 360, 11))
106 | lonw = ["${:d}^\circ$W".format(int(c)) for c in reversed(np.linspace(36, 180, 5))]
107 | lone = ["${:d}^\circ$E".format(int(c)) for c in np.linspace(36, 180, 5)]
108 | lones = lonw + [0] + lone
109 | ax.set_yticks(np.linspace(0, 180, 11))
110 | latn = ["${:d}^\circ$S".format(int(c)) for c in reversed(np.linspace(18, 90, 5))]
111 | late = ["${:d}^\circ$N".format(int(c)) for c in np.linspace(18, 90, 5)]
112 | lates = latn + [0] + late
113 | ax.set_xticklabels(lones)
114 | ax.set_yticklabels(lates)
115 | from mpl_toolkits.axes_grid1 import make_axes_locatable
116 | divider = make_axes_locatable(ax)
117 | cax = divider.append_axes("right", size="5%", pad=0.1)
118 |
119 | ls = np.linspace(0, 0.001, 5)
120 | cbar = fig.colorbar(im, cax=cax, ticks=ls)
121 | cbar.set_ticklabels(["{:.2f}%".format(c*100.0) for c in ls])
122 | print("saving ...")
123 | plt.savefig("f4.pdf", bbox_inches='tight')
124 |
--------------------------------------------------------------------------------
/analysis/precip_histogram/results/era140625.json:
--------------------------------------------------------------------------------
1 | {"hist_den": [[0.4836122399791112, 0.002774826335797002, 0.00044091698864900903, 0.00012748867450560223, 5.171670783780497e-05, 2.5393532555904468e-05, 1.368843847723224e-05, 7.72077678789198e-06, 4.505650517042691e-06, 2.5703115215379565e-06, 1.3940167152086806e-06, 7.53079205370543e-07, 4.3156617736016433e-07, 1.980374623231844e-07, 9.603623925913461e-08, 4.861461801005883e-08, 3.131613588308035e-08, 1.5508972300631698e-08, 9.245716308338006e-09, 4.771991477117446e-09, 1.7894934790331625e-09, 3.2807441405182438e-09, 2.38599130537755e-09, 1.4912473365992017e-09, 1.7894934790331625e-09, 1.4912445658609687e-09, 8.94748401959521e-10, 8.947467395165813e-10, 5.964989346396807e-10, 8.947467395165813e-10, 8.94748401959521e-10, 8.947467395165813e-10, 2.9824946731984036e-10, 0.0, 2.9824946731984036e-10, 2.9824946731984036e-10, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 2.0531179904937744, 4.106235980987549, 6.159353733062744, 8.212471961975098, 10.265589714050293, 12.318707466125488, 14.371826171875, 16.424943923950195, 18.47806167602539, 20.531179428100586, 22.58429718017578, 24.637414932250977, 26.690534591674805, 28.74365234375, 30.796770095825195, 32.84988784790039, 34.90300750732422, 36.95612335205078, 39.00924301147461, 41.06235885620117, 43.115478515625, 45.16859436035156, 47.22171401977539, 49.27482986450195, 51.32794952392578, 53.38106918334961, 55.43418502807617, 57.4873046875, 59.54042053222656, 61.59354019165039, 63.64665603637695, 65.69977569580078, 67.75289154052734, 69.80601501464844, 71.859130859375, 73.91224670410156, 75.96536254882812, 78.01848602294922, 80.07160186767578, 82.12471771240234, 84.1778335571289, 86.23095703125, 88.28407287597656, 90.33718872070312, 92.39031219482422, 94.44342803955078, 96.49654388427734, 98.5496597290039, 100.602783203125, 102.65589904785156, 104.70901489257812, 106.76213836669922, 108.81525421142578, 110.86837005615234, 112.9214859008789, 114.974609375, 117.02772521972656, 119.08084106445312, 121.13396453857422, 123.18708038330078, 125.24019622802734, 127.2933120727539, 129.346435546875, 131.39955139160156, 133.45266723632812, 135.5057830810547, 137.55889892578125, 139.61203002929688, 141.66514587402344, 143.71826171875, 145.77137756347656, 147.82449340820312, 149.8776092529297, 151.93072509765625, 153.98385620117188, 156.03697204589844, 158.090087890625, 160.14320373535156, 162.19631958007812, 164.2494354248047, 166.30255126953125, 168.3556671142578, 170.40879821777344, 172.4619140625, 174.51502990722656, 176.56814575195312, 178.6212615966797, 180.67437744140625, 182.7274932861328, 184.78062438964844, 186.833740234375, 188.88685607910156, 190.93997192382812, 192.9930877685547, 195.04620361328125, 197.0993194580078, 199.15245056152344, 201.20556640625, 203.25868225097656, 205.31179809570312]], "hist_noden": [[1621504138, 9303719, 1478351, 427457, 173401, 85142, 45896, 25887, 15107, 8618, 4674, 2525, 1447, 664, 322, 163, 105, 52, 31, 16, 6, 11, 8, 5, 6, 5, 3, 3, 2, 3, 3, 3, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0.0, 2.0531179904937744, 4.106235980987549, 6.159353733062744, 8.212471961975098, 10.265589714050293, 12.318707466125488, 14.371826171875, 16.424943923950195, 18.47806167602539, 20.531179428100586, 22.58429718017578, 24.637414932250977, 26.690534591674805, 28.74365234375, 30.796770095825195, 32.84988784790039, 34.90300750732422, 36.95612335205078, 39.00924301147461, 41.06235885620117, 43.115478515625, 45.16859436035156, 47.22171401977539, 49.27482986450195, 51.32794952392578, 53.38106918334961, 55.43418502807617, 57.4873046875, 59.54042053222656, 61.59354019165039, 63.64665603637695, 65.69977569580078, 67.75289154052734, 69.80601501464844, 71.859130859375, 73.91224670410156, 75.96536254882812, 78.01848602294922, 80.07160186767578, 82.12471771240234, 84.1778335571289, 86.23095703125, 88.28407287597656, 90.33718872070312, 92.39031219482422, 94.44342803955078, 96.49654388427734, 98.5496597290039, 100.602783203125, 102.65589904785156, 104.70901489257812, 106.76213836669922, 108.81525421142578, 110.86837005615234, 112.9214859008789, 114.974609375, 117.02772521972656, 119.08084106445312, 121.13396453857422, 123.18708038330078, 125.24019622802734, 127.2933120727539, 129.346435546875, 131.39955139160156, 133.45266723632812, 135.5057830810547, 137.55889892578125, 139.61203002929688, 141.66514587402344, 143.71826171875, 145.77137756347656, 147.82449340820312, 149.8776092529297, 151.93072509765625, 153.98385620117188, 156.03697204589844, 158.090087890625, 160.14320373535156, 162.19631958007812, 164.2494354248047, 166.30255126953125, 168.3556671142578, 170.40879821777344, 172.4619140625, 174.51502990722656, 176.56814575195312, 178.6212615966797, 180.67437744140625, 182.7274932861328, 184.78062438964844, 186.833740234375, 188.88685607910156, 190.93997192382812, 192.9930877685547, 195.04620361328125, 197.0993194580078, 199.15245056152344, 201.20556640625, 203.25868225097656, 205.31179809570312]]}
--------------------------------------------------------------------------------
/analysis/precip_histogram/results/era5625.json:
--------------------------------------------------------------------------------
1 | {"hist_den": [[0.48363464168047676, 0.0027648189212509558, 0.0004341750084180838, 0.000123436087429969, 5.0164254438357146e-05, 2.5191822774615183e-05, 1.3979022766940212e-05, 7.85515564365604e-06, 4.2542798021500835e-06, 2.6613095623315545e-06, 1.4212728487003644e-06, 6.915589364481638e-07, 4.197043439386092e-07, 1.478505312406419e-07, 1.526199032161465e-07, 3.338560382853205e-08, 9.538735089513845e-09, 9.538752812520931e-09, 4.769367544756922e-09, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 2.0531179904937744, 4.106235980987549, 6.159353733062744, 8.212471961975098, 10.265589714050293, 12.318707466125488, 14.371826171875, 16.424943923950195, 18.47806167602539, 20.531179428100586, 22.58429718017578, 24.637414932250977, 26.690534591674805, 28.74365234375, 30.796770095825195, 32.84988784790039, 34.90300750732422, 36.95612335205078, 39.00924301147461, 41.06235885620117, 43.115478515625, 45.16859436035156, 47.22171401977539, 49.27482986450195, 51.32794952392578, 53.38106918334961, 55.43418502807617, 57.4873046875, 59.54042053222656, 61.59354019165039, 63.64665603637695, 65.69977569580078, 67.75289154052734, 69.80601501464844, 71.859130859375, 73.91224670410156, 75.96536254882812, 78.01848602294922, 80.07160186767578, 82.12471771240234, 84.1778335571289, 86.23095703125, 88.28407287597656, 90.33718872070312, 92.39031219482422, 94.44342803955078, 96.49654388427734, 98.5496597290039, 100.602783203125, 102.65589904785156, 104.70901489257812, 106.76213836669922, 108.81525421142578, 110.86837005615234, 112.9214859008789, 114.974609375, 117.02772521972656, 119.08084106445312, 121.13396453857422, 123.18708038330078, 125.24019622802734, 127.2933120727539, 129.346435546875, 131.39955139160156, 133.45266723632812, 135.5057830810547, 137.55889892578125, 139.61203002929688, 141.66514587402344, 143.71826171875, 145.77137756347656, 147.82449340820312, 149.8776092529297, 151.93072509765625, 153.98385620117188, 156.03697204589844, 158.090087890625, 160.14320373535156, 162.19631958007812, 164.2494354248047, 166.30255126953125, 168.3556671142578, 170.40879821777344, 172.4619140625, 174.51502990722656, 176.56814575195312, 178.6212615966797, 180.67437744140625, 182.7274932861328, 184.78062438964844, 186.833740234375, 188.88685607910156, 190.93997192382812, 192.9930877685547, 195.04620361328125, 197.0993194580078, 199.15245056152344, 201.20556640625, 203.25868225097656, 205.31179809570312]], "hist_noden": [[101404273, 579703, 91034, 25881, 10518, 5282, 2931, 1647, 892, 558, 298, 145, 88, 31, 32, 7, 2, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0.0, 2.0531179904937744, 4.106235980987549, 6.159353733062744, 8.212471961975098, 10.265589714050293, 12.318707466125488, 14.371826171875, 16.424943923950195, 18.47806167602539, 20.531179428100586, 22.58429718017578, 24.637414932250977, 26.690534591674805, 28.74365234375, 30.796770095825195, 32.84988784790039, 34.90300750732422, 36.95612335205078, 39.00924301147461, 41.06235885620117, 43.115478515625, 45.16859436035156, 47.22171401977539, 49.27482986450195, 51.32794952392578, 53.38106918334961, 55.43418502807617, 57.4873046875, 59.54042053222656, 61.59354019165039, 63.64665603637695, 65.69977569580078, 67.75289154052734, 69.80601501464844, 71.859130859375, 73.91224670410156, 75.96536254882812, 78.01848602294922, 80.07160186767578, 82.12471771240234, 84.1778335571289, 86.23095703125, 88.28407287597656, 90.33718872070312, 92.39031219482422, 94.44342803955078, 96.49654388427734, 98.5496597290039, 100.602783203125, 102.65589904785156, 104.70901489257812, 106.76213836669922, 108.81525421142578, 110.86837005615234, 112.9214859008789, 114.974609375, 117.02772521972656, 119.08084106445312, 121.13396453857422, 123.18708038330078, 125.24019622802734, 127.2933120727539, 129.346435546875, 131.39955139160156, 133.45266723632812, 135.5057830810547, 137.55889892578125, 139.61203002929688, 141.66514587402344, 143.71826171875, 145.77137756347656, 147.82449340820312, 149.8776092529297, 151.93072509765625, 153.98385620117188, 156.03697204589844, 158.090087890625, 160.14320373535156, 162.19631958007812, 164.2494354248047, 166.30255126953125, 168.3556671142578, 170.40879821777344, 172.4619140625, 174.51502990722656, 176.56814575195312, 178.6212615966797, 180.67437744140625, 182.7274932861328, 184.78062438964844, 186.833740234375, 188.88685607910156, 190.93997192382812, 192.9930877685547, 195.04620361328125, 197.0993194580078, 199.15245056152344, 201.20556640625, 203.25868225097656, 205.31179809570312]]}
--------------------------------------------------------------------------------
/analysis/precip_histogram/results/imerg140625.json:
--------------------------------------------------------------------------------
1 | {"hist_den": [[0.48163964033791573, 0.0033633184512914353, 0.0010042788763038406, 0.00042414273407464056, 0.00021545917019077625, 0.0001231492337569391, 7.654794497221805e-05, 5.1089250137649246e-05, 3.565674552927242e-05, 2.5751732959447667e-05, 1.95288895991721e-05, 1.511831091891867e-05, 1.1822374227710223e-05, 9.495035012806957e-06, 7.696050102033505e-06, 6.417801674777481e-06, 5.24776436430529e-06, 4.460028772955226e-06, 3.6719940601147664e-06, 3.1184882842639746e-06, 2.6517131620634873e-06, 2.252476995755405e-06, 1.914827927469819e-06, 1.6780558050415643e-06, 1.401720724240486e-06, 1.2302140764650426e-06, 1.0813132793221193e-06, 9.146079387958986e-07, 7.815292737405232e-07, 6.614449134140249e-07, 5.682051227375965e-07, 4.972845141429662e-07, 4.181718456746111e-07, 3.7352778763122335e-07, 2.932853890609772e-07, 2.6616072880100247e-07, 2.271690296772887e-07, 1.8309077638807314e-07, 1.6444325282609707e-07, 1.4749034016361283e-07, 1.206482284480128e-07, 9.550105311600113e-08, 9.493631090991171e-08, 7.628810698117905e-08, 5.763968886291192e-08, 5.198893216495164e-08, 4.2664830200585314e-08, 3.531856804684215e-08, 3.1080224386864276e-08, 2.7407208804349506e-08, 2.4864271904976874e-08, 1.667030217113629e-08, 1.610526702936002e-08, 1.412742721873686e-08, 9.041553419991591e-09, 1.2714637249171748e-08, 9.606650508741065e-09, 8.193907786867379e-09, 4.80330740524266e-09, 0.0, 0.0, 0.0, 0.0, 2.8254854437473723e-10, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 2.0531179904937744, 4.106235980987549, 6.159353733062744, 8.212471961975098, 10.265589714050293, 12.318707466125488, 14.371826171875, 16.424943923950195, 18.47806167602539, 20.531179428100586, 22.58429718017578, 24.637414932250977, 26.690534591674805, 28.74365234375, 30.796770095825195, 32.84988784790039, 34.90300750732422, 36.95612335205078, 39.00924301147461, 41.06235885620117, 43.115478515625, 45.16859436035156, 47.22171401977539, 49.27482986450195, 51.32794952392578, 53.38106918334961, 55.43418502807617, 57.4873046875, 59.54042053222656, 61.59354019165039, 63.64665603637695, 65.69977569580078, 67.75289154052734, 69.80601501464844, 71.859130859375, 73.91224670410156, 75.96536254882812, 78.01848602294922, 80.07160186767578, 82.12471771240234, 84.1778335571289, 86.23095703125, 88.28407287597656, 90.33718872070312, 92.39031219482422, 94.44342803955078, 96.49654388427734, 98.5496597290039, 100.602783203125, 102.65589904785156, 104.70901489257812, 106.76213836669922, 108.81525421142578, 110.86837005615234, 112.9214859008789, 114.974609375, 117.02772521972656, 119.08084106445312, 121.13396453857422, 123.18708038330078, 125.24019622802734, 127.2933120727539, 129.346435546875, 131.39955139160156, 133.45266723632812, 135.5057830810547, 137.55889892578125, 139.61203002929688, 141.66514587402344, 143.71826171875, 145.77137756347656, 147.82449340820312, 149.8776092529297, 151.93072509765625, 153.98385620117188, 156.03697204589844, 158.090087890625, 160.14320373535156, 162.19631958007812, 164.2494354248047, 166.30255126953125, 168.3556671142578, 170.40879821777344, 172.4619140625, 174.51502990722656, 176.56814575195312, 178.6212615966797, 180.67437744140625, 182.7274932861328, 184.78062438964844, 186.833740234375, 188.88685607910156, 190.93997192382812, 192.9930877685547, 195.04620361328125, 197.0993194580078, 199.15245056152344, 201.20556640625, 203.25868225097656, 205.31179809570312]], "hist_noden": [[1704627942, 11903519, 3554362, 1501134, 762557, 435852, 270920, 180816, 126197, 91141, 69117, 53507, 41842, 33605, 27238, 22714, 18573, 15785, 12996, 11037, 9385, 7972, 6777, 5939, 4961, 4354, 3827, 3237, 2766, 2341, 2011, 1760, 1480, 1322, 1038, 942, 804, 648, 582, 522, 427, 338, 336, 270, 204, 184, 151, 125, 110, 97, 88, 59, 57, 50, 32, 45, 34, 29, 17, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0.0, 2.0531179904937744, 4.106235980987549, 6.159353733062744, 8.212471961975098, 10.265589714050293, 12.318707466125488, 14.371826171875, 16.424943923950195, 18.47806167602539, 20.531179428100586, 22.58429718017578, 24.637414932250977, 26.690534591674805, 28.74365234375, 30.796770095825195, 32.84988784790039, 34.90300750732422, 36.95612335205078, 39.00924301147461, 41.06235885620117, 43.115478515625, 45.16859436035156, 47.22171401977539, 49.27482986450195, 51.32794952392578, 53.38106918334961, 55.43418502807617, 57.4873046875, 59.54042053222656, 61.59354019165039, 63.64665603637695, 65.69977569580078, 67.75289154052734, 69.80601501464844, 71.859130859375, 73.91224670410156, 75.96536254882812, 78.01848602294922, 80.07160186767578, 82.12471771240234, 84.1778335571289, 86.23095703125, 88.28407287597656, 90.33718872070312, 92.39031219482422, 94.44342803955078, 96.49654388427734, 98.5496597290039, 100.602783203125, 102.65589904785156, 104.70901489257812, 106.76213836669922, 108.81525421142578, 110.86837005615234, 112.9214859008789, 114.974609375, 117.02772521972656, 119.08084106445312, 121.13396453857422, 123.18708038330078, 125.24019622802734, 127.2933120727539, 129.346435546875, 131.39955139160156, 133.45266723632812, 135.5057830810547, 137.55889892578125, 139.61203002929688, 141.66514587402344, 143.71826171875, 145.77137756347656, 147.82449340820312, 149.8776092529297, 151.93072509765625, 153.98385620117188, 156.03697204589844, 158.090087890625, 160.14320373535156, 162.19631958007812, 164.2494354248047, 166.30255126953125, 168.3556671142578, 170.40879821777344, 172.4619140625, 174.51502990722656, 176.56814575195312, 178.6212615966797, 180.67437744140625, 182.7274932861328, 184.78062438964844, 186.833740234375, 188.88685607910156, 190.93997192382812, 192.9930877685547, 195.04620361328125, 197.0993194580078, 199.15245056152344, 201.20556640625, 203.25868225097656, 205.31179809570312]]}
--------------------------------------------------------------------------------
/analysis/precip_histogram/results/imerg5625.json:
--------------------------------------------------------------------------------
1 | {"hist_den": [[0.48159319008157137, 0.0033696811649768705, 0.0010224177598011873, 0.0004368893837773687, 0.00022320061866130475, 0.00012752896144073257, 7.934121933386373e-05, 5.278149638146249e-05, 3.611168677626202e-05, 2.6015320749277806e-05, 1.909138586963083e-05, 1.4503136105447005e-05, 1.1430438899310496e-05, 8.988093137084743e-06, 7.235636629931197e-06, 5.699293336308051e-06, 4.620108317682766e-06, 4.000868849348633e-06, 3.3386689791950426e-06, 2.677866857960841e-06, 2.2941227508493736e-06, 1.9588741526935483e-06, 1.6194622558834044e-06, 1.3230019913877924e-06, 1.2440351632021362e-06, 1.0112980725362578e-06, 8.824631083916479e-07, 6.926699126960669e-07, 6.261747645102431e-07, 5.167317548712659e-07, 4.654750461846056e-07, 4.0590456883989516e-07, 3.643450510314026e-07, 2.9646217180706595e-07, 2.438202622871743e-07, 1.9948930550768806e-07, 1.7455314231922708e-07, 1.3714838789205384e-07, 1.3160752793909977e-07, 1.163687615461514e-07, 9.974465275384403e-08, 7.75788860803537e-08, 8.312054396153669e-08, 5.956972317243464e-08, 4.1560117543046624e-08, 5.125766877628097e-08, 2.909219038653785e-08, 3.601890238333257e-08, 2.7706745028697747e-08, 3.047753278589679e-08, 2.3550820789102067e-08, 1.662404701721865e-08, 9.697396795512615e-09, 8.31205439615367e-09, 6.9267119967947245e-09, 6.926686257174437e-09, 0.0, 4.156027198076835e-09, 8.312023508609325e-09, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 2.0531179904937744, 4.106235980987549, 6.159353733062744, 8.212471961975098, 10.265589714050293, 12.318707466125488, 14.371826171875, 16.424943923950195, 18.47806167602539, 20.531179428100586, 22.58429718017578, 24.637414932250977, 26.690534591674805, 28.74365234375, 30.796770095825195, 32.84988784790039, 34.90300750732422, 36.95612335205078, 39.00924301147461, 41.06235885620117, 43.115478515625, 45.16859436035156, 47.22171401977539, 49.27482986450195, 51.32794952392578, 53.38106918334961, 55.43418502807617, 57.4873046875, 59.54042053222656, 61.59354019165039, 63.64665603637695, 65.69977569580078, 67.75289154052734, 69.80601501464844, 71.859130859375, 73.91224670410156, 75.96536254882812, 78.01848602294922, 80.07160186767578, 82.12471771240234, 84.1778335571289, 86.23095703125, 88.28407287597656, 90.33718872070312, 92.39031219482422, 94.44342803955078, 96.49654388427734, 98.5496597290039, 100.602783203125, 102.65589904785156, 104.70901489257812, 106.76213836669922, 108.81525421142578, 110.86837005615234, 112.9214859008789, 114.974609375, 117.02772521972656, 119.08084106445312, 121.13396453857422, 123.18708038330078, 125.24019622802734, 127.2933120727539, 129.346435546875, 131.39955139160156, 133.45266723632812, 135.5057830810547, 137.55889892578125, 139.61203002929688, 141.66514587402344, 143.71826171875, 145.77137756347656, 147.82449340820312, 149.8776092529297, 151.93072509765625, 153.98385620117188, 156.03697204589844, 158.090087890625, 160.14320373535156, 162.19631958007812, 164.2494354248047, 166.30255126953125, 168.3556671142578, 170.40879821777344, 172.4619140625, 174.51502990722656, 176.56814575195312, 178.6212615966797, 180.67437744140625, 182.7274932861328, 184.78062438964844, 186.833740234375, 188.88685607910156, 190.93997192382812, 192.9930877685547, 195.04620361328125, 197.0993194580078, 199.15245056152344, 201.20556640625, 203.25868225097656, 205.31179809570312]], "hist_noden": [[347635136, 2432384, 738026, 315366, 161116, 92056, 57272, 38100, 26067, 18779, 13781, 10469, 8251, 6488, 5223, 4114, 3335, 2888, 2410, 1933, 1656, 1414, 1169, 955, 898, 730, 637, 500, 452, 373, 336, 293, 263, 214, 176, 144, 126, 99, 95, 84, 72, 56, 60, 43, 30, 37, 21, 26, 20, 22, 17, 12, 7, 6, 5, 5, 0, 3, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0.0, 2.0531179904937744, 4.106235980987549, 6.159353733062744, 8.212471961975098, 10.265589714050293, 12.318707466125488, 14.371826171875, 16.424943923950195, 18.47806167602539, 20.531179428100586, 22.58429718017578, 24.637414932250977, 26.690534591674805, 28.74365234375, 30.796770095825195, 32.84988784790039, 34.90300750732422, 36.95612335205078, 39.00924301147461, 41.06235885620117, 43.115478515625, 45.16859436035156, 47.22171401977539, 49.27482986450195, 51.32794952392578, 53.38106918334961, 55.43418502807617, 57.4873046875, 59.54042053222656, 61.59354019165039, 63.64665603637695, 65.69977569580078, 67.75289154052734, 69.80601501464844, 71.859130859375, 73.91224670410156, 75.96536254882812, 78.01848602294922, 80.07160186767578, 82.12471771240234, 84.1778335571289, 86.23095703125, 88.28407287597656, 90.33718872070312, 92.39031219482422, 94.44342803955078, 96.49654388427734, 98.5496597290039, 100.602783203125, 102.65589904785156, 104.70901489257812, 106.76213836669922, 108.81525421142578, 110.86837005615234, 112.9214859008789, 114.974609375, 117.02772521972656, 119.08084106445312, 121.13396453857422, 123.18708038330078, 125.24019622802734, 127.2933120727539, 129.346435546875, 131.39955139160156, 133.45266723632812, 135.5057830810547, 137.55889892578125, 139.61203002929688, 141.66514587402344, 143.71826171875, 145.77137756347656, 147.82449340820312, 149.8776092529297, 151.93072509765625, 153.98385620117188, 156.03697204589844, 158.090087890625, 160.14320373535156, 162.19631958007812, 164.2494354248047, 166.30255126953125, 168.3556671142578, 170.40879821777344, 172.4619140625, 174.51502990722656, 176.56814575195312, 178.6212615966797, 180.67437744140625, 182.7274932861328, 184.78062438964844, 186.833740234375, 188.88685607910156, 190.93997192382812, 192.9930877685547, 195.04620361328125, 197.0993194580078, 199.15245056152344, 201.20556640625, 203.25868225097656, 205.31179809570312]]}
--------------------------------------------------------------------------------
/analysis/precip_histogram/results/imerg_25bi.json:
--------------------------------------------------------------------------------
1 | {"hist_den": [[0.48164910687763596, 0.0033566947441984927, 0.0010020441351742298, 0.0004237454064031667, 0.00021557650069177283, 0.00012330520229372353, 7.665047569833547e-05, 5.080823084502584e-05, 3.5499118857840555e-05, 2.58202775503848e-05, 1.9418797832557096e-05, 1.4984095830457264e-05, 1.1824551323726532e-05, 9.46412543833123e-06, 7.713581649442637e-06, 6.347892280497019e-06, 5.273013222306187e-06, 4.403853802670343e-06, 3.706227310458301e-06, 3.133613364922706e-06, 2.6687251237798176e-06, 2.276526112690999e-06, 1.9457463439024665e-06, 1.6640428342992278e-06, 1.427312256438571e-06, 1.2240980957272723e-06, 1.0580693506259627e-06, 9.109855883150046e-07, 7.817468708324833e-07, 6.712755680955512e-07, 5.79255643072758e-07, 4.911180037263263e-07, 4.28682037389018e-07, 3.636541769386662e-07, 3.120861668272464e-07, 2.6543710289106696e-07, 2.2678910351974528e-07, 1.959092669643797e-07, 1.6604887899947486e-07, 1.4040260954604696e-07, 1.2075713851626234e-07, 1.0276329113569813e-07, 8.557388585382994e-08, 7.34829914109534e-08, 6.299028985672703e-08, 5.2265882698786037e-08, 4.4389834767754206e-08, 3.742462231173967e-08, 3.1959490699918636e-08, 2.6932154829922894e-08, 2.1940419236445804e-08, 1.9207841434904995e-08, 1.5636008987796745e-08, 1.270704785244704e-08, 1.0510327000934763e-08, 9.251196525598129e-09, 7.742101537646932e-09, 7.009861253809507e-09, 6.357965179754699e-09, 4.4648797794965e-11, 6.2508316912951e-11, 1.7859519117986002e-11, 2.6789179128179914e-11, 1.7859519117986002e-11, 2.6789278676979004e-11, 8.929759558993001e-12, 8.929759558993001e-12, 1.785938638674711e-11, 0.0, 1.7859519117986002e-11, 8.929759558993001e-12, 0.0, 8.929759558993001e-12, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 8.929759558993001e-12, 0.0, 0.0, 8.929759558993001e-12, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 8.929759558993001e-12], [0.0, 2.0531179904937744, 4.106235980987549, 6.159353733062744, 8.212471961975098, 10.265589714050293, 12.318707466125488, 14.371826171875, 16.424943923950195, 18.47806167602539, 20.531179428100586, 22.58429718017578, 24.637414932250977, 26.690534591674805, 28.74365234375, 30.796770095825195, 32.84988784790039, 34.90300750732422, 36.95612335205078, 39.00924301147461, 41.06235885620117, 43.115478515625, 45.16859436035156, 47.22171401977539, 49.27482986450195, 51.32794952392578, 53.38106918334961, 55.43418502807617, 57.4873046875, 59.54042053222656, 61.59354019165039, 63.64665603637695, 65.69977569580078, 67.75289154052734, 69.80601501464844, 71.859130859375, 73.91224670410156, 75.96536254882812, 78.01848602294922, 80.07160186767578, 82.12471771240234, 84.1778335571289, 86.23095703125, 88.28407287597656, 90.33718872070312, 92.39031219482422, 94.44342803955078, 96.49654388427734, 98.5496597290039, 100.602783203125, 102.65589904785156, 104.70901489257812, 106.76213836669922, 108.81525421142578, 110.86837005615234, 112.9214859008789, 114.974609375, 117.02772521972656, 119.08084106445312, 121.13396453857422, 123.18708038330078, 125.24019622802734, 127.2933120727539, 129.346435546875, 131.39955139160156, 133.45266723632812, 135.5057830810547, 137.55889892578125, 139.61203002929688, 141.66514587402344, 143.71826171875, 145.77137756347656, 147.82449340820312, 149.8776092529297, 151.93072509765625, 153.98385620117188, 156.03697204589844, 158.090087890625, 160.14320373535156, 162.19631958007812, 164.2494354248047, 166.30255126953125, 168.3556671142578, 170.40879821777344, 172.4619140625, 174.51502990722656, 176.56814575195312, 178.6212615966797, 180.67437744140625, 182.7274932861328, 184.78062438964844, 186.833740234375, 188.88685607910156, 190.93997192382812, 192.9930877685547, 195.04620361328125, 197.0993194580078, 199.15245056152344, 201.20556640625, 203.25868225097656, 205.31179809570312]], "hist_noden": [[53937578843, 375900183, 112214115, 47453226, 24141378, 13808358, 8583723, 5689770, 3975376, 2891489, 2174618, 1677997, 1324176, 1059842, 863807, 710870, 590500, 493166, 415043, 350918, 298858, 254937, 217895, 186348, 159838, 137081, 118488, 102017, 87544, 75173, 64868, 54998, 48006, 40724, 34949, 29725, 25397, 21939, 18595, 15723, 13523, 11508, 9583, 8229, 7054, 5853, 4971, 4191, 3579, 3016, 2457, 2151, 1751, 1423, 1177, 1036, 867, 785, 712, 5, 7, 2, 3, 2, 3, 1, 1, 2, 0, 2, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1], [0.0, 2.0531179904937744, 4.106235980987549, 6.159353733062744, 8.212471961975098, 10.265589714050293, 12.318707466125488, 14.371826171875, 16.424943923950195, 18.47806167602539, 20.531179428100586, 22.58429718017578, 24.637414932250977, 26.690534591674805, 28.74365234375, 30.796770095825195, 32.84988784790039, 34.90300750732422, 36.95612335205078, 39.00924301147461, 41.06235885620117, 43.115478515625, 45.16859436035156, 47.22171401977539, 49.27482986450195, 51.32794952392578, 53.38106918334961, 55.43418502807617, 57.4873046875, 59.54042053222656, 61.59354019165039, 63.64665603637695, 65.69977569580078, 67.75289154052734, 69.80601501464844, 71.859130859375, 73.91224670410156, 75.96536254882812, 78.01848602294922, 80.07160186767578, 82.12471771240234, 84.1778335571289, 86.23095703125, 88.28407287597656, 90.33718872070312, 92.39031219482422, 94.44342803955078, 96.49654388427734, 98.5496597290039, 100.602783203125, 102.65589904785156, 104.70901489257812, 106.76213836669922, 108.81525421142578, 110.86837005615234, 112.9214859008789, 114.974609375, 117.02772521972656, 119.08084106445312, 121.13396453857422, 123.18708038330078, 125.24019622802734, 127.2933120727539, 129.346435546875, 131.39955139160156, 133.45266723632812, 135.5057830810547, 137.55889892578125, 139.61203002929688, 141.66514587402344, 143.71826171875, 145.77137756347656, 147.82449340820312, 149.8776092529297, 151.93072509765625, 153.98385620117188, 156.03697204589844, 158.090087890625, 160.14320373535156, 162.19631958007812, 164.2494354248047, 166.30255126953125, 168.3556671142578, 170.40879821777344, 172.4619140625, 174.51502990722656, 176.56814575195312, 178.6212615966797, 180.67437744140625, 182.7274932861328, 184.78062438964844, 186.833740234375, 188.88685607910156, 190.93997192382812, 192.9930877685547, 195.04620361328125, 197.0993194580078, 199.15245056152344, 201.20556640625, 203.25868225097656, 205.31179809570312]]}
--------------------------------------------------------------------------------
/analysis/variable_correlations/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FrontierDevelopmentLab/PyRain/a52e4fd7984dcabb6d908a565a4e7c6bc820d62f/analysis/variable_correlations/__init__.py
--------------------------------------------------------------------------------
/analysis/variable_correlations/corr.py:
--------------------------------------------------------------------------------
1 | from datetime import datetime
2 | import numpy as np
3 | import os, sys
4 | import json
5 | from scipy import stats
6 | from multiprocessing import Pool, TimeoutError
7 | from functools import partial
8 |
9 | sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
10 | from dataloader.memmap_dataloader import Dataset
11 |
12 |
13 | if __name__ == "__main__":
14 |
15 | # set up dataloader with any dataset type you can think of
16 | memmap_root = "" # SET MEMMAP DATA ROOT PATH HERE
17 | datapath = [os.path.join(memmap_root, "simsat5625", "simsat5625.dill"),
18 | os.path.join(memmap_root, "imerg5625", "imerg5625.dill"),
19 | os.path.join(memmap_root, "era5625", "era5625.dill"),
20 | ]
21 |
22 | daterange_train = (datetime(2016, 4, 1).timestamp(), datetime(2017, 12, 31, 23).timestamp())
23 | daterange_test = (datetime(2019, 1, 6, 0).timestamp(), datetime(2019, 12, 31, 21).timestamp())
24 | daterange_val = (datetime(2018, 1, 6, 0).timestamp(), datetime(2018, 12, 31, 23).timestamp())
25 |
26 | partition_conf = {"train":
27 | {"timerange": daterange_train,
28 | "increment_s": 60 * 60},
29 | "val":
30 | {"timerange": daterange_val,
31 | "increment_s": 60 * 60},
32 | "test":
33 | {"timerange": daterange_test,
34 | "increment_s": 60 * 60}}
35 |
36 | partition_type = "range"
37 |
38 | sample_conf = {"mode0": # sample modes
39 | {
40 | "sample": # sample sections
41 | {
42 | "lsm": {"vbl": "era140625/lsm"},
43 | },
44 | }
45 | }
46 |
47 | dr = (datetime(2016, 4, 1).timestamp(), datetime(2019, 12, 31, 21).timestamp())
48 |
49 | def get_corr(args):
50 | curr_vbl, vbls, x, dr = args
51 | print ("Starting process {}...".format(x))
52 |
53 | dataset = Dataset(datapath=datapath,
54 | partition_conf=partition_conf,
55 | partition_type=partition_type,
56 | partition_selected="train",
57 | sample_conf=sample_conf,
58 | )
59 | res = []
60 | try:
61 | dx1 = dataset.dataset[((*dr, 3600*3), [curr_vbl], {})]
62 | except Exception as e:
63 |
64 | raise Exception("{}: vbl: {} dr: {}".format(e, curr_vbl, dr))
65 | ds1 = dx1[..., int(dx1.shape[-2]/4):-int(dx1.shape[-2]/4), int(dx1.shape[-1]/4):-int(dx1.shape[-1]/4)]
66 | for y, v2 in enumerate(vbls):
67 | print("Process {}, v1: {} v2: {} it:{} start corr...".format(x, curr_vbl, v2, y))
68 | dx2 = dataset.dataset[((*dr, 3600*3), [v2], {})]
69 | ds2 = dx2[..., int(dx2.shape[-2]/4):-int(dx2.shape[-2]/4), int(dx2.shape[-1]/4):-int(dx2.shape[-1]/4)]
70 | if len(ds1.shape) < len(ds2.shape):
71 | print("DONOT: {} shp1: {} shp2: {}".format(np.expand_dims(ds1, axis=0).shape, ds1.shape, ds2.shape))
72 | try:
73 | a = np.expand_dims(ds1, axis=0).repeat(ds2.shape[0], axis=0)
74 | except Exception as e:
75 | raise Exception("{}: DONOT: {} shp1: {} shp2: {}".format(e, np.expand_dims(ds1, axis=0).shape, ds1.shape, ds2.shape))
76 | print("WARING: shp1: {} shp2: {} new_shp: {}".format(ds1.shape, ds2.shape, a.shape))
77 | else:
78 | a = ds1
79 | corr = stats.spearmanr(a.flatten(), ds2.flatten())[0]
80 | print("Process {}, v1: {} v2: {} it:{} found corr: {}!".format(x, curr_vbl, v2, y, corr))
81 | res.append(corr)
82 | return res
83 |
84 | pressure_levels = [300, 500, 850]
85 | era_lst = ["era5625"]
86 | simsat_lst = ["simsat5625"]
87 | imerg_lst = ["imerg5625"]
88 | reso_lst = ["5625"]
89 |
90 | resdct = {}
91 | for pl in pressure_levels:
92 | print("pressure level: {}".format(pl))
93 | resdct[pl] = {}
94 | for reso, era, simsat, imerg in zip(reso_lst, era_lst, simsat_lst, imerg_lst):
95 |
96 | vbl_list = ['{}/lon2d'.format(era), '{}/lat2d'.format(era), '{}/lsm'.format(era), '{}/orography'.format(era), '{}/slt'.format(era)]
97 | if era in ["era5625"]:
98 | vbl_list += ['{}/z_{}hPa'.format(era, pl), '{}/t_{}hPa'.format(era, pl), '{}/q_{}hPa'.format(era, pl),
99 | '{}/sp'.format(era), '{}/clwc_{}hPa'.format(era, pl), '{}/ciwc_{}hPa'.format(era, pl), '{}/t2m'.format(era),
100 | '{}/clbt:0'.format(simsat), '{}/clbt:1'.format(simsat), '{}/clbt:2'.format(simsat),
101 | "{}/tp".format(era), "{}/precipitationcal".format(imerg)]
102 | else:
103 | vbl_list += ['{}/z_{}hPa'.format(era, pl), '{}/t_{}hPa'.format(era, pl), '{}/q_{}hPa'.format(era, pl),
104 | '{}/sp'.format(era), '{}/clwc_{}hPa'.format(era, pl), '{}/ciwc_{}hPa'.format(era, pl), '{}/t2m'.format(era),
105 | '{}/clbt:0'.format(simsat), '{}/clbt:1'.format(simsat), '{}/clbt:2'.format(simsat),
106 | "{}/tp".format(era), "{}/precipitationcal".format(imerg)]
107 |
108 | resdct[pl]["vbl_lst"] = vbl_list
109 |
110 |
111 | vbl_args = [vbl_list[i:] for i in range(1, len(vbl_list))]
112 | with Pool(processes=len(vbl_list)) as pool:
113 | res = pool.map(get_corr, [(v, va, x, dr) for v, va, x in zip(vbl_list[:-1], vbl_args, range(1,len(vbl_list)))])
114 |
115 | try:
116 | resdct[pl][reso] = np.zeros((len(vbl_list), len(vbl_list)))
117 | except Exception as e:
118 | print("res: ", res)
119 | raise Exception(res[pl].keys())
120 |
121 | for j, r in enumerate(res):
122 | try:
123 | resdct[pl][reso][j, j+1:] = np.array(r)
124 | except Exception as e:
125 | raise Exception(r)
126 | resdct[pl][reso] = resdct[pl][reso].tolist()
127 |
128 |
129 | with open("out.json", "w") as f:
130 | json.dump(resdct, f)
131 |
--------------------------------------------------------------------------------
/analysis/variable_correlations/out.json:
--------------------------------------------------------------------------------
1 | {"300": {"vbl_lst": ["era5625/lon2d", "era5625/lat2d", "era5625/lsm", "era5625/orography", "era5625/slt", "era5625/z_300hPa", "era5625/t_300hPa", "era5625/q_300hPa", "era5625/sp", "era5625/clwc_300hPa", "era5625/ciwc_300hPa", "era5625/t2m", "simsat5625/clbt:0", "simsat5625/clbt:1", "simsat5625/clbt:2", "era5625/tp", "imerg5625/precipitationcal"], "5625": [[0.0, 0.0, -0.3177168700168456, -0.09549379829619742, -0.2553785868697466, -0.08715137388046382, -0.21546466295065664, -0.08788077149974781, 0.26045042498838405, -0.026643450825193513, -0.07545864149389221, -0.1050581860011756, 0.08557432278786276, 0.14595313156696713, 0.10649916565759408, 0.008616773509850075, -0.0594204300858448], [0.0, 0.0, 0.19648095479842195, 0.16594718360057314, 0.20859422570752606, 0.1774073524831399, 0.1445060759301099, 0.14803531699837114, -0.18280435432396444, 0.018903555007841, 0.06408965804712029, 0.07415423943217579, -0.08295954371421065, -0.07742337885964642, -0.02292048643260018, -0.03003015419210857, 0.007400616884911511], [0.0, 0.0, 0.0, 0.5960805898339152, 0.8828652529574451, -0.016555773911789876, 0.03643880187738786, 0.041648396825674526, -0.5886072913329173, 0.01867149437345957, 0.0894967009046487, -0.04132121153194514, -0.08813560319544973, -0.13140470629079276, -0.1606952928020179, -0.18602379824595836, 0.008825231456441849], [0.0, 0.0, 0.0, 0.0, 0.6222101968877777, -0.06524898842515552, -0.04513418916142937, -0.009602088616926446, -0.4027445408715066, 0.00851510297907061, 0.0416073859428534, -0.09878319758779724, -0.04491396447895362, -0.07214148838972995, -0.1109931577769699, -0.15149513280567695, -0.01651932122553308], [0.0, 0.0, 0.0, 0.0, 0.0, -0.08466183573680075, -0.043904406268225504, -0.015619121977354438, -0.5847020123643477, 0.010442814337223239, 0.06417416667365214, -0.13347877330431604, -0.06588782452096424, -0.11103598712040974, -0.17317177964851715, -0.21814332763844557, -0.020403097138447266], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.870395365557898, 0.4741300934644555, -0.1686565192882331, 0.05802765211020303, 0.03161067125691434, 0.8096201492748264, 0.0866937327603289, 0.16539684250902525, 0.34405584391580946, 0.14973397928213353, 0.08291304320188556], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.46949470487370476, -0.3193456769908918, 0.09071948381206275, 0.08458356116806756, 0.7729447158309221, 0.06414631333930076, 0.07751064783524136, 0.23537215151427512, 0.1994797489858496, 0.14967374492947585], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.16223620952994544, 0.11599272216765051, 0.5793451938685434, 0.4563222944365399, -0.639465327724407, -0.48891281821011773, -0.23903668932388403, 0.24079477570776295, 0.2887785048447469], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.05310070147842954, -0.13896539517119438, -0.24979722469976873, 0.12520888537096275, 0.19071848073711964, 0.11374917438499219, -0.06327363178512428, -0.14415551968348903], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.13555550373067665, 0.048712612153705405, -0.09875471172563402, -0.10166281462323232, -0.09662254013746377, 0.08932089461632568, 0.10645078231569388], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.06899419821944712, -0.6965412139561395, -0.673840264132455, -0.59141734340533, 0.3098650015670476, 0.4138022349919871], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.030458100504734537, 0.10151221246880085, 0.39661005200476473, 0.16271949476451364, 0.0962131604460685], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.9228549043141981, 0.7031292764562275, -0.2744093519211432, -0.37311167206662427], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8167429535972578, -0.337723268537943, -0.4296875955861106], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.29751518293664203, -0.41526437133709004], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.46592336412190316], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]}, "500": {"vbl_lst": ["era5625/lon2d", "era5625/lat2d", "era5625/lsm", "era5625/orography", "era5625/slt", "era5625/z_500hPa", "era5625/t_500hPa", "era5625/q_500hPa", "era5625/sp", "era5625/clwc_500hPa", "era5625/ciwc_500hPa", "era5625/t2m", "simsat5625/clbt:0", "simsat5625/clbt:1", "simsat5625/clbt:2", "era5625/tp", "imerg5625/precipitationcal"], "5625": [[0.0, 0.0, -0.3177168700168456, -0.09549379829619742, -0.2553785868697466, 0.05295723434411872, -0.07096749404556527, -0.1395804711203429, 0.26045042498838405, -0.13934431890333882, -0.13255467433478846, -0.1050581860011756, 0.08557432278786276, 0.14595313156696713, 0.10649916565759408, 0.008616773509850075, -0.0594204300858448], [0.0, 0.0, 0.19648095479842195, 0.16594718360057314, 0.20859422570752606, 0.1849560365355781, 0.14086822222010553, 0.1612160685014734, -0.18280435432396444, 0.08343025610629015, 0.0640334782607752, 0.07415423943217579, -0.08295954371421065, -0.07742337885964642, -0.02292048643260018, -0.03003015419210857, 0.007400616884911511], [0.0, 0.0, 0.0, 0.5960805898339152, 0.8828652529574451, -0.07053868078362067, -0.0353426816237076, 0.0946652966686655, -0.5886072913329173, 0.13117349577679238, 0.1349178709952073, -0.04132121153194514, -0.08813560319544973, -0.13140470629079276, -0.1606952928020179, -0.18602379824595836, 0.008825231456441849], [0.0, 0.0, 0.0, 0.0, 0.6222101968877777, -0.07782733550406129, -0.0728198418711775, 0.02334874808387871, -0.4027445408715066, 0.05300645437631226, 0.061588621336249774, -0.09878319758779724, -0.04491396447895362, -0.07214148838972995, -0.1109931577769699, -0.15149513280567695, -0.01651932122553308], [0.0, 0.0, 0.0, 0.0, 0.0, -0.12057457593686746, -0.09723632235121875, 0.03701763358225301, -0.5847020123643477, 0.09126944851014936, 0.10470629274591992, -0.13347877330431604, -0.06588782452096424, -0.11103598712040974, -0.17317177964851715, -0.21814332763844557, -0.020403097138447266], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7389038526687302, 0.3029610591486723, 0.040999093062291096, 0.11022764448490562, -0.06456875432403025, 0.7091234084271889, 0.15059945767295227, 0.2667852254508393, 0.42573787240788796, 0.0552571878987345, -0.012375669527371354], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.33758457292969346, -0.25298222748088, 0.2080965300320602, 0.01741792123602956, 0.7725253030915625, 0.13305239701129024, 0.21878328190202853, 0.34918226590519535, 0.16426890133685013, 0.08462757704359743], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.25603352086757586, 0.5324475696377969, 0.573169727057861, 0.4319934016918736, -0.58557909026585, -0.6049808012315643, -0.2902801902456115, 0.3230616649396378, 0.35845314517542576], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.24836678437428797, -0.25676220583509257, -0.24979722469976873, 0.12520888537096275, 0.19071848073711964, 0.11374917438499219, -0.06327363178512428, -0.14415551968348903], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.6763803371855069, 0.2588962827909601, -0.36372555377681476, -0.4290842938757065, -0.32330973010071273, 0.3943629942693203, 0.3835579865501589], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.09426505527839732, -0.5335649363606867, -0.6275946043762992, -0.5346292985387815, 0.43573602200899975, 0.4837899533757049], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.030458100504734537, 0.10151221246880085, 0.39661005200476473, 0.16271949476451364, 0.0962131604460685], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.9228549043141981, 0.7031292764562275, -0.2744093519211432, -0.37311167206662427], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8167429535972578, -0.337723268537943, -0.4296875955861106], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.29751518293664203, -0.41526437133709004], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.46592336412190316], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]}, "850": {"vbl_lst": ["era5625/lon2d", "era5625/lat2d", "era5625/lsm", "era5625/orography", "era5625/slt", "era5625/z_850hPa", "era5625/t_850hPa", "era5625/q_850hPa", "era5625/sp", "era5625/clwc_850hPa", "era5625/ciwc_850hPa", "era5625/t2m", "simsat5625/clbt:0", "simsat5625/clbt:1", "simsat5625/clbt:2", "era5625/tp", "imerg5625/precipitationcal"], "5625": [[0.0, 0.0, -0.3177168700168456, -0.09549379829619742, -0.2553785868697466, 0.13673565999279566, -0.11354189049566119, -0.0830713108568745, 0.26045042498838405, 0.08406407874550167, -0.013547487933883794, -0.1050581860011756, 0.08557432278786276, 0.14595313156696713, 0.10649916565759408, 0.008616773509850075, -0.0594204300858448], [0.0, 0.0, 0.19648095479842195, 0.16594718360057314, 0.20859422570752606, 0.020309037494548158, 0.21426105568084475, 0.10278705265181116, -0.18280435432396444, -0.14993650243285478, -0.04299503392766058, 0.07415423943217579, -0.08295954371421065, -0.07742337885964642, -0.02292048643260018, -0.03003015419210857, 0.007400616884911511], [0.0, 0.0, 0.0, 0.5960805898339152, 0.8828652529574451, -0.07392622056763892, 0.14900044637439597, -0.009355606612847452, -0.5886072913329173, -0.25986102534891825, -0.01979780736656416, -0.04132121153194514, -0.08813560319544973, -0.13140470629079276, -0.1606952928020179, -0.18602379824595836, 0.008825231456441849], [0.0, 0.0, 0.0, 0.0, 0.6222101968877777, -0.02820542710667479, 0.0578947732508045, -0.06223938384586625, -0.4027445408715066, -0.18336782530457968, -0.00874963621248447, -0.09878319758779724, -0.04491396447895362, -0.07214148838972995, -0.1109931577769699, -0.15149513280567695, -0.01651932122553308], [0.0, 0.0, 0.0, 0.0, 0.0, -0.0666547446381641, 0.08676858436921331, -0.07953531333677089, -0.5847020123643477, -0.2611174624626138, -0.008413986938549294, -0.13347877330431604, -0.06588782452096424, -0.11103598712040974, -0.17317177964851715, -0.21814332763844557, -0.020403097138447266], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.04876465231406704, -0.11216393724093153, 0.5924683963446897, -0.06654244379845295, -0.11049577189139291, -0.005204861453155439, 0.16079626395998406, 0.2450619306266719, 0.20723626096400036, -0.20048439192230355, -0.17781427775569789], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5996293243895688, -0.40464331116955, -0.2207390818312782, -0.208760630702854, 0.8160776386669425, -0.005403439088521789, 0.06425036750511218, 0.29448679683825923, 0.05295582721334499, 0.07542305814954041], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.29449651628271595, 0.2825151657351957, -0.15665414096263358, 0.7165337182949488, -0.1703944514860449, -0.16688462995254363, 0.022057433430265002, 0.5146004222176873, 0.34589495178162705], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1511589886138849, 0.01603127506563371, -0.24979722469976873, 0.12520888537096275, 0.19071848073711964, 0.11374917438499219, -0.06327363178512428, -0.14415551968348903], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.02791321168121268, 0.04078532940188567, -0.005846554190264432, -0.032370415846414764, -0.03643399791087186, 0.4761089617000506, 0.15927126742474546], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.19513110740495518, -0.032408296637920185, -0.08372859202754664, -0.0938051783051143, -0.012270082330155929, 0.0005665961536950896], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.030458100504734537, 0.10151221246880085, 0.39661005200476473, 0.16271949476451364, 0.0962131604460685], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.9228549043141981, 0.7031292764562275, -0.2744093519211432, -0.37311167206662427], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8167429535972578, -0.337723268537943, -0.4296875955861106], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.29751518293664203, -0.41526437133709004], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.46592336412190316], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]}}
--------------------------------------------------------------------------------
/analysis/variable_correlations/plot.py:
--------------------------------------------------------------------------------
1 | import os
2 | import pandas as pd
3 | import numpy as np
4 | import json
5 |
6 | from matplotlib import pyplot as plt
7 |
8 | # open results files
9 | with open("./out.json") as f:
10 | res = json.load(f)
11 |
12 | d500 = np.array(res["300"]["5625"])
13 | d500h = np.array(res["850"]["5625"])
14 |
15 |
16 | t_tot = d500 + d500h.transpose() + np.eye(d500.shape[0])
17 |
18 | t_tot[t_tot!=t_tot] = 0.0
19 |
20 | import seaborn as sns
21 | corr = t_tot
22 | tick_labels = ["longitude (lon)",
23 | "latitude (lat)",
24 | "land-sea mask (lsm)",
25 | "orography (oro)",
26 | "soil type (slt)",
27 | "geopotential height (z)",
28 | "temperature (t)",
29 | "specific humidity (q)",
30 | "surface pressure (sp)",
31 | "cloud liquid water content (clwc)",
32 | "cloud ice water content (ciwc)",
33 | "temperature at 2m (t2m)",
34 | "SimSat channel 0 (clbt:0)",
35 | "SimSat channel 1 (clbt:1)",
36 | "SimSat channel 2 (clbt:2)",
37 | "ERA5 total precipitation (tp)",
38 | "IMERG precipitation"]
39 | tick_labels_short = ["lon", "lat", "lsm", "oro", "slt",
40 | "z", "t", "q", "sp", "clwc", "ciwc",
41 | "t2m", "clbt:0", "clbt:1", "clbt:2",
42 | "tp", "imerg"]
43 | sns.heatmap(t_tot,
44 | xticklabels=tick_labels_short,
45 | yticklabels=tick_labels_short,
46 | cmap= 'coolwarm', vmin=-1, vmax=1,
47 | annot = False,
48 | center=0)
49 |
50 | plt.savefig("corr_matrix.pdf", bbox_inches='tight')
51 |
52 |
53 |
--------------------------------------------------------------------------------
/config.yml:
--------------------------------------------------------------------------------
1 | data_paths: ["/mnt/disks/train-data-era5625-aaai/era5625_aaai.dill", "/mnt/disks/train-data-imerg5625/imerg_5625/imerg_5625.dill", "/mnt/disks/train-data5/mmap/simsat5625/simsat5625.dill"]
2 | norm_path: "/home/egctong/precip-forecasting-e2e-baselines/src/test/normalisation/5625__16-04-01_12:00to17-12-31_11:00.json"
3 | log_path: "/data/forecast_benchmark/"
--------------------------------------------------------------------------------
/src/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FrontierDevelopmentLab/PyRain/a52e4fd7984dcabb6d908a565a4e7c6bc820d62f/src/__init__.py
--------------------------------------------------------------------------------
/src/benchmark/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FrontierDevelopmentLab/PyRain/a52e4fd7984dcabb6d908a565a4e7c6bc820d62f/src/benchmark/__init__.py
--------------------------------------------------------------------------------
/src/benchmark/baseline_data.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 | from torch.utils.data import DataLoader
4 | from src.benchmark.utils import get_vbl_name
5 | from src.benchmark.collect_data import define_data_paths, write_partition_conf, read_normalization_stats
6 | from src.dataloader.memmap_dataloader import Dataset
7 |
8 |
9 | def collate_fn_persistence(x_list, v):
10 | """
11 | return
12 | inputs = [bsz, channels, lat, lon]
13 | output = [bsz, channels, lat, lon]
14 | """
15 | categories={'input': [v], 'input_temporal': [v], 'input_static': [], 'output': [v]}
16 | output = []
17 | inputs = []
18 | lead_times = []
19 |
20 | for sample in x_list:
21 | output.append(np.concatenate([sample[0]['target'][v] for v in categories['output']], 1))
22 | inputs.append([sample[0]['label'][v] for v in categories['input']]) #
23 | lead_times.append(int(sample[0]['__sample_modes__'].split('_')[-1]))
24 |
25 | inputs[-1] = np.concatenate(inputs[-1], 1)
26 |
27 | inputs = torch.Tensor(np.concatenate(inputs))
28 | output = torch.Tensor(np.concatenate(output))
29 | lead_times = torch.Tensor(lead_times).long()
30 | return inputs, output, lead_times
31 |
32 |
33 | def write_sample_conf_persistence(v: str,
34 | lead_times: list,
35 | interporlation: str = "nearest_past",
36 | grid: float = 5.625):
37 | """
38 | Write a sample configuration dictionary for calculating baselines.
39 | """
40 | sample_conf = {}
41 | samples = {var: \
42 | {"vbl": get_vbl_name(var, grid), \
43 | "t": np.array([0]), \
44 | "interpolate": interporlation} \
45 | for var in [v]}
46 |
47 | for lt in lead_times:
48 | sample_conf["lead_time_{}".format(int(lt/3600))] = {
49 | "label": samples,
50 | "target": {var: {"vbl": get_vbl_name(var, grid), "t": np.array([lt]), "interpolate": interporlation} \
51 | for var in [v]}
52 | }
53 | return sample_conf
54 |
55 |
56 | def get_persistence_data(hparams):
57 | """Main function to get data for computing climatology baseline according to hparams"""
58 | # get data
59 | target_v = 'precipitationcal' if hparams['imerg'] else 'tp'
60 | phase = hparams['phase']
61 | datapath = hparams['data_paths']
62 | lead_times = np.arange(hparams['forecast_freq'], hparams['forecast_time_window'] + hparams['forecast_freq'], hparams['forecast_freq']) * 3600
63 | partition_conf = write_partition_conf(hparams['sources'], hparams['imerg'])
64 | sample_conf = write_sample_conf_persistence(target_v, lead_times)
65 | loaderDict = {p: Dataset(datapath=datapath,
66 | partition_conf=partition_conf,
67 | partition_type="range",
68 | partition_selected=p,
69 | sample_conf=sample_conf) for p in [phase]}
70 | # define collate and dataloader
71 | lead_times = lead_times //3600
72 | collate = lambda x: collate_fn_persistence(x, target_v)
73 | dataloader = DataLoader(loaderDict[phase], batch_size=hparams['batch_size'], \
74 | num_workers=hparams['num_workers'], collate_fn=collate, shuffle=False)
75 | return loaderDict, dataloader, target_v, lead_times
76 |
77 |
78 |
79 | def get_climatology_data(hparams):
80 | """Main function to get data for computing climatology baseline according to hparams"""
81 | loaderDict, trd, target_v, lead_times = get_persistence_data(hparams)
82 | # get climatology value (mean over all trainin data)
83 | normalizer = read_normalization_stats(hparams['sources'], hparams['grid'], hparams['imerg'])
84 | mean_pred_v = normalizer[target_v]['mean']
85 | # get prediction matrix
86 | latlon = (32, 64) if hparams['grid'] == 5.625 else (128, 256)
87 | pred = torch.ones((hparams['batch_size'], 1, *latlon)) * mean_pred_v
88 | return pred, loaderDict, trd, target_v, lead_times
89 |
--------------------------------------------------------------------------------
/src/benchmark/collect_data.py:
--------------------------------------------------------------------------------
1 | """
2 | Collect data for benchmark tasks.
3 | """
4 | import json
5 | import numpy as np
6 | from tqdm import tqdm
7 | import torch
8 | from datetime import datetime
9 | from torch.utils.data import DataLoader
10 | from pathlib import Path
11 | # from src.datasets.scaler import Scaler
12 | from src.benchmark.utils import local_time_shift, collate_fn, get_local_shift, is_vbl_const, get_var_name, get_vbl_name
13 | from src.dataloader.memmap_dataloader import Dataset
14 |
15 |
16 | def get_data(hparams: dict, tvt: str = 'train_valid'):
17 | """Main function to get data according to hparams"""
18 | datapath, partition_conf, sample_conf = write_data_config(hparams)
19 |
20 | # Collect datasets
21 | loaderDict = {p: Dataset(datapath=datapath,
22 | partition_conf=partition_conf,
23 | partition_type="range",
24 | partition_selected=p,
25 | sample_conf=sample_conf) for p in tvt.split('_')}
26 |
27 | # Define collate function
28 | normalizer = read_normalization_stats(hparams['norm_path'])
29 | if hparams['inc_time']:
30 | time_shift = get_local_shift(hparams['grid'], loaderDict['train'].dataset)
31 | collate = lambda x: collate_fn(x, hparams, normalizer, time_shift)
32 |
33 | return hparams, loaderDict, normalizer, collate
34 |
35 |
36 | def get_checkpoint_path(model_dir):
37 | """Return path of latest checkpoint found in the model directory."""
38 | chkpt = str(list(Path(model_dir).glob('checkpoints/*'))[-1])
39 | return chkpt
40 |
41 |
42 | def read_normalization_stats(path):
43 | """Read json file storing normalization statistics."""
44 | with open(path) as f:
45 | tmp = json.load(f)
46 | n_dict = {}
47 | for vbl in tmp:
48 | n_dict[get_var_name(vbl)] = tmp[vbl]
49 | return n_dict
50 |
51 |
52 | def write_partition_conf(sources: str, imerg: bool):
53 | """
54 | Write a time partition configuration dictionary.
55 | """
56 | if sources in ['simsat', 'simsat_era', 'era16_3']:
57 | train_timerange = (datetime(2016,4,1,0).timestamp(), datetime(2017, 12, 31,23).timestamp())
58 | sample_stride = 3
59 |
60 | elif sources == 'era':
61 | if imerg:
62 | train_timerange = (datetime(2000,6,1,0).timestamp(), datetime(2017, 12,31,23).timestamp())
63 | else:
64 | train_timerange = (datetime(1979,1,1,7).timestamp(), datetime(2017, 12,31,23).timestamp())
65 | sample_stride = 1
66 |
67 | val_timerange = (datetime(2018,1,6,0).timestamp(), datetime(2018, 12,31,23).timestamp())
68 | test_timerange = (datetime(2019,1,6,0).timestamp(), datetime(2019, 12,31,23).timestamp())
69 |
70 | increments = int(sample_stride * 60 * 60)
71 |
72 | partition_conf = {
73 | "train":
74 | {"timerange": train_timerange,
75 | "increment_s": increments},
76 | "valid":
77 | {"timerange": val_timerange,
78 | "increment_s": increments},
79 | "test":
80 | {"timerange": test_timerange,
81 | "increment_s": increments}
82 | }
83 | return partition_conf
84 |
85 |
86 | def write_sample_conf(
87 | categories: dict,
88 | history: list,
89 | lead_times: list,
90 | interporlation: str = "nearest_past",
91 | grid: float = 5.625):
92 | """
93 | Write a sample configuration dictionary.
94 | """
95 | sample_conf = {}
96 |
97 | if 'clbt-0' in categories['input']:
98 | samples = {}
99 | for var in categories['input']:
100 | if is_vbl_const(var):
101 | samples[var] = {"vbl": get_vbl_name(var, grid)}
102 | elif var not in ['hour', 'day', 'month', 'clbt-1', 'clbt-2', 'clbt-0']:
103 | samples[var] = {"vbl": get_vbl_name(var, grid), "t": history, "interpolate": interporlation}
104 | elif var == 'clbt-0':
105 | samples['clbt'] = {"vbl": get_vbl_name('clbt', grid), "t": history, "interpolate": interporlation}
106 | else:
107 | samples = {var: {"vbl": get_vbl_name(var, grid)} if is_vbl_const(var) else \
108 | {"vbl": get_vbl_name(var, grid), "t": history, "interpolate": interporlation} \
109 | for var in categories['input'] if var not in ['hour', 'day', 'month']}
110 |
111 | for lt in lead_times:
112 | sample_conf["lead_time_{}".format(int(lt/3600))] = {
113 | "label": samples,
114 | "target": {var: {"vbl": get_vbl_name(var, grid), "t": np.array([lt]), "interpolate": interporlation} \
115 | for var in categories['output']}
116 | }
117 |
118 | return sample_conf
119 |
120 |
121 | def define_categories(sources: str, inc_time: bool, imerg: bool):
122 | """
123 | Write a dictionary which holds lists specifying the model input / output variables.
124 | """
125 | simsat_vars_list = ['clbt-0', 'clbt-1', 'clbt-2'] if 'simsat' in sources else []
126 | era_vars_list = ['sp', 't2m', 'z-300', 'z-500', 'z-850', 't-300', 't-500', 't-850', \
127 | 'q-300', 'q-500', 'q-850', 'clwc-300', 'clwc-500', 'ciwc-500', 'clwc-850', 'ciwc-850'] if 'era' in sources else []
128 | simsat_vars_list = ['clbt-0', 'clbt-1', 'clbt-2'] if 'simsat' in sources else []
129 | simsat_vars_list_clbt = ['clbt'] if 'simsat' in sources else []
130 | input_temporal = simsat_vars_list + era_vars_list
131 | input_temporal_clbt = simsat_vars_list_clbt + era_vars_list
132 |
133 | constants = ['lsm','orography', 'lat2d', 'lon2d', 'slt']
134 | inputs = input_temporal + (['hour', 'day', 'month'] if inc_time else []) + constants
135 | output = ['precipitationcal'] if imerg else ['tp']
136 |
137 | categories = {
138 | 'input': inputs,
139 | 'input_temporal': input_temporal,
140 | 'input_temporal_clbt': input_temporal_clbt,
141 | 'input_static': constants,
142 | 'output': output}
143 |
144 | return categories
145 |
146 |
147 | def write_data_config(hparams: dict):
148 | """
149 | Define configurations for collecting data.
150 | """
151 | hparams['latlon'] = (32, 64) if hparams['grid'] == 5.625 else (128, 256)
152 |
153 | # define paths
154 | datapath = hparams['data_paths']
155 |
156 | # define data configurations
157 | categories = define_categories(hparams['sources'], inc_time=hparams['inc_time'], imerg=hparams['imerg'])
158 | history = np.flip(np.arange(0, hparams['sample_time_window'] + hparams['sample_freq'], hparams['sample_freq']) * -1 * 3600)
159 | lead_times = np.arange(hparams['forecast_freq'], hparams['forecast_time_window'] + hparams['forecast_freq'], hparams['forecast_freq']) * 3600
160 | partition_conf = write_partition_conf(hparams['sources'], hparams['imerg'])
161 | sample_conf = write_sample_conf(categories, history, lead_times, grid=hparams['grid'])
162 |
163 | # define new parameters in hparams
164 | hparams['categories'] = categories
165 | hparams['seq_len'] = len(history)
166 | hparams['forecast_n_steps'] = len(lead_times)
167 | hparams['out_channels'] = len(categories['output'])
168 | hparams['num_channels'] = len(categories['input']) + hparams['forecast_n_steps']
169 | hparams['lead_times'] = lead_times // 3600
170 | return datapath, partition_conf, sample_conf
171 |
--------------------------------------------------------------------------------
/src/benchmark/graphics.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torchvision.utils import make_grid
3 |
4 | def plot_random_outputs_multi_ts(sample_X, sample_y, pred_y,
5 | idx_dictionary, normalizer, order):
6 | """
7 | X of shape [N, seq_len, channels, lat, lon]
8 | y of shape [N, channels, lat, lon]
9 | """
10 | num_lead_times = len(sample_X)
11 | sample_images = []
12 | for v in order:
13 | _, cat_ind_y = idx_dictionary[v]
14 | truth_v = sample_y[:, cat_ind_y]
15 | pred_v = pred_y[:, cat_ind_y]
16 | diff_v = (truth_v - pred_v).abs()
17 |
18 | # scale for the image
19 | vmin = min([pred_v.min(), truth_v.min()])
20 | vmax = max([pred_v.max(), truth_v.max()])
21 | scale = lambda x: (x - vmin) / (vmax - vmin)
22 |
23 | # truth
24 | # sample_images += [scale(sample_X_v[:, ts]) for ts in range(seq_len)]
25 | sample_images += [scale(torch.unsqueeze(truth_v[i], 0)) for i in range(num_lead_times)]
26 | sample_images += [scale(torch.unsqueeze(pred_v[i], 0)) for i in range(num_lead_times)]
27 | sample_images += [scale(torch.unsqueeze(diff_v[i], 0)) for i in range(num_lead_times)]
28 |
29 | nrow = num_lead_times
30 | grid = make_grid(sample_images, nrow=nrow)
31 | return grid
32 |
33 |
--------------------------------------------------------------------------------
/src/benchmark/metrics.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import numpy as np
3 |
4 |
5 | def collect_outputs(outputs, multi_gpu):
6 | log_dict = {}
7 | for loss_type in outputs[0]:
8 | if multi_gpu:
9 | collect = []
10 | for output in outputs:
11 | for v in output[loss_type]:
12 | if v == v:
13 | collect.append(v)
14 | else:
15 | collect = [v[loss_type] for v in outputs if v[loss_type] == v[loss_type]]
16 | if collect:
17 | log_dict[loss_type] = torch.stack(collect).mean()
18 | else:
19 | log_dict[loss_type] = float('nan')
20 | return log_dict
21 |
22 |
23 | def define_loss_fn(lat2d):
24 | weights_lat = compute_latitude_weighting(lat2d)
25 | loss = lambda x, y: compute_weighted_mse(x, y, weights_lat)
26 | return weights_lat, loss
27 |
28 |
29 | def compute_latitude_weighting(lat):
30 | weights_lat = np.cos(np.deg2rad(lat))
31 | weights_lat /= weights_lat.mean()
32 | return weights_lat
33 |
34 |
35 | def compute_weighted_mse(pred, truth, weights_lat, flat_weights=False):
36 | """
37 | Compute the MSE with latitude weighting.
38 | Args:
39 | pred : Forecast. Torch tensor.
40 | truth: Truth. Torch tensor.
41 | weights_lat: Latitude weighting, 2d Torch tensor.
42 | Returns:
43 | rmse: Latitude weighted mean squared error
44 | """
45 | if not flat_weights:
46 | weights_lat = truth.new(weights_lat).expand_as(truth)
47 | error = (pred - truth)**2
48 | out = error * weights_lat
49 | return out.mean()
50 |
51 |
52 | def eval_loss(pred, output, lts, loss_function, possible_lead_times, phase='val', target_v=None, normalizer=None):
53 | results = {}
54 | # Unpick which of the batch samples contain which lead_time
55 | lead_time_dist = {t: lts == t for t in possible_lead_times}
56 | results[f'{phase}_loss'] = loss_function(pred, output)
57 | # Caclulate loss per lead_time
58 | for t, cond in lead_time_dist.items():
59 | if any(cond):
60 | results[f'{phase}_loss_{t}hrs'] = loss_function(pred[cond], output[cond])
61 | else:
62 | results[f'{phase}_loss_{t}hrs'] = pred.new([float('nan')])[0]
63 |
64 | # Undo normalization
65 | if normalizer:
66 | scaled_pred_v = (torch.exp(pred[:, 0, :, :]) - 1 ) * normalizer[target_v]['std']
67 | scaled_output_v = (torch.exp(output[:, 0, :, :]) - 1) * normalizer[target_v]['std']
68 | results[f'{phase}_loss_' + target_v] = loss_function(scaled_pred_v, scaled_output_v)
69 | # Caclulate loss per lead_time
70 | for t, cond in lead_time_dist.items():
71 | if any(cond):
72 | results[f'{phase}_loss_{target_v}_{t}hrs'] = loss_function(scaled_pred_v[cond], scaled_output_v[cond])
73 | else:
74 | results[f'{phase}_loss_{target_v}_{t}hrs'] = scaled_pred_v.new([float('nan')])[0]
75 | return results
76 |
77 |
78 | def convert_precip_to_mm(output, target_v, normalizer):
79 | converted = (np.exp(output) - 1) * normalizer[target_v]['std']
80 | if target_v == 'tp':
81 | converted *= 1e3
82 | return converted
83 |
--------------------------------------------------------------------------------
/src/benchmark/models.py:
--------------------------------------------------------------------------------
1 | """
2 | Define ConvLSTM model as forecasting baseline.
3 | """
4 | import torch
5 | import torch.nn as nn
6 | from torch.nn import functional as F
7 | from operator import itemgetter
8 |
9 |
10 | class ConvLSTMForecaster(nn.Module):
11 | def __init__(self,
12 | in_channels: int,
13 | output_shape: tuple,
14 | channels: tuple,
15 | last_ts: bool = True,
16 | kernel_size: int = 3,
17 | last_relu: bool = True):
18 | super().__init__()
19 |
20 | self.last_ts = last_ts
21 | self.rnn = ConvLSTM(in_channels=in_channels, num_filter=channels[0], kernel_size=kernel_size,
22 | patch_h=output_shape[1], patch_w=output_shape[2])
23 | self.out_layer1 = nn.Conv2d(channels[0], channels[1], kernel_size=1)
24 | self.out_layer2 = nn.Conv2d(channels[1], output_shape[0], 1)
25 | self.latlon = output_shape[1:]
26 | self.last_relu = last_relu
27 | self.relu = torch.nn.ReLU()
28 |
29 | def forward(self, inputs):
30 | inputs = inputs.permute(1,0,2,3,4) # seq_first
31 | out, _ = self.rnn(inputs)
32 |
33 | if self.last_ts:
34 | out = out[-1]
35 | else:
36 | out = out.permute(1,0,2,3,4) # bsz_first
37 | bsz = len(out)
38 | out = out.contiguous().view(bsz, -1, *self.latlon) # use all time steps
39 |
40 | out = self.out_layer1(out)
41 | out = self.out_layer2(out)
42 | if self.last_relu:
43 | out = self.relu(out)
44 | return out
45 |
46 |
47 | class ConvLSTM(nn.Module):
48 | """
49 | ConvLSTM based on https://github.com/Hzzone/Precipitation-Nowcasting/blob/master/nowcasting/models/convLSTM.py"""
50 |
51 | def __init__(self, in_channels: int, num_filter: int, kernel_size: int, patch_h: int, patch_w: int):
52 | super().__init__()
53 | self._state_height, self._state_width = patch_h, patch_w # patch dimensions after SpatialDownsampler
54 | self._conv = nn.Conv2d(in_channels=in_channels + num_filter,
55 | out_channels=num_filter*4,
56 | kernel_size=kernel_size,
57 | stride=1,
58 | padding=1)
59 |
60 | # if using requires_grad flag, torch.save will not save parameters in deed although it may be updated every epoch.
61 | # Howerver, if you use declare an optimizer like Adam(model.parameters()),
62 | # parameters will not be updated forever.
63 | self.Wci = nn.Parameter(torch.zeros(1, num_filter, self._state_height, self._state_width))
64 | self.Wcf = nn.Parameter(torch.zeros(1, num_filter, self._state_height, self._state_width))
65 | self.Wco = nn.Parameter(torch.zeros(1, num_filter, self._state_height, self._state_width))
66 |
67 | self._input_channel = in_channels
68 | self._num_filter = num_filter
69 |
70 | def init_hidden(self, inputs):
71 | c = inputs.new(size=(inputs.size(1), self._num_filter, self._state_height, self._state_width))
72 | h = inputs.new(size=(inputs.size(1), self._num_filter, self._state_height, self._state_width))
73 | return h, c
74 |
75 | # inputs and states should not be all none
76 | # inputs: S*B*C*H*W
77 | def forward(self, inputs):
78 | """
79 | Expected input shape [seq_len, bsz, channels, height, width]
80 | input shape (seq_len, bsz, 256, 64, 64)
81 | output[0] shape (seq_len, bsz, 384, 64, 64)
82 | """
83 |
84 | seq_len = len(inputs)
85 | self.hidden = self.init_hidden(inputs)
86 | h, c = self.hidden
87 |
88 | outputs = []
89 | for index in range(seq_len):
90 | # initial inputs
91 | if inputs is None:
92 | x = torch.zeros((h.size(0), self.in_channels, self._state_height, self._state_width), dtype=torch.float)
93 | else:
94 | x = inputs[index, ...]
95 | cat_x = torch.cat([x, h], dim=1)
96 | conv_x = self._conv(cat_x)
97 | i, f, tmp_c, o = torch.chunk(conv_x, 4, dim=1)
98 |
99 | # lstm equations
100 | i = torch.sigmoid(i+self.Wci*c)
101 | f = torch.sigmoid(f+self.Wcf*c)
102 | c = f*c + i*torch.tanh(tmp_c)
103 | o = torch.sigmoid(o+self.Wco*c)
104 | h = o*torch.tanh(c)
105 |
106 | outputs.append(h)
107 | outputs = torch.stack(outputs)
108 |
109 | return outputs, (h, c)
--------------------------------------------------------------------------------
/src/benchmark/normalisations/normalisations_sample_datasets.dill:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FrontierDevelopmentLab/PyRain/a52e4fd7984dcabb6d908a565a4e7c6bc820d62f/src/benchmark/normalisations/normalisations_sample_datasets.dill
--------------------------------------------------------------------------------
/src/benchmark/normalise.py:
--------------------------------------------------------------------------------
1 | # Use this script in order to generate normalisation .json files to use with the dataloader
2 |
3 | # First, set up the dataloader as you would for your application.
4 |
5 | ########################################################################################################################
6 | # User-modification START
7 | ########################################################################################################################
8 |
9 | import numpy as np
10 | import dill
11 | import datetime
12 | import multiprocessing
13 | import os
14 |
15 | from src.dataloader import Dataset
16 |
17 | n_procs = 4 # Set to number of available CPUs
18 | expname = "sample_datasets"
19 |
20 | datapath = ["PATH TO ERA5625 SAMPLES DILL FILE",
21 | "PATH TO IMERG5625 SAMPLES DILL FILE",
22 | "PATH TO SIMSAT5625 SAMPLES DILL FILE"]
23 |
24 | # partition_conf = {"train":
25 | # {"timerange": (datetime.datetime(2010, 1, 1, 0).timestamp(),
26 | # datetime.datetime(2010, 12, 31, 0).timestamp()),
27 | # "increment_s": 60 * 60},
28 | # "test":
29 | # {"timerange": (datetime.datetime(2017, 1, 15, 0).timestamp(),
30 | # datetime.datetime(2018, 12, 31, 0).timestamp()),
31 | # "increment_s": 60 * 60}}
32 | #partition_type = "range"
33 |
34 | partition_conf = {"timerange": (datetime.datetime(2018, 1, 1, 0).timestamp(),
35 | datetime.datetime(2019, 12, 31, 23).timestamp()),
36 | # Define partition elements
37 | "partitions": [{"name": "train", "len_s": 12 * 24 * 60 * 60, "increment_s": 60 * 60},
38 | {"name": "val", "len_s": 2 * 24 * 60 * 60, "increment_s": 60 * 60},
39 | {"name": "test", "len_s": 2 * 24 * 60 * 60, "increment_s": 60 * 60}]}
40 |
41 |
42 | partition_type = "repeat"
43 |
44 | sample_conf = {"lead_time_{}".format(int(lt / 3600)): # sample modes
45 | {
46 | "sample": # sample sections
47 | {
48 | "lat2d": {"vbl": "era5625/lat2d"},
49 | "lon2d": {"vbl": "era5625/lon2d"},
50 | "orography": {"vbl": "era5625/orography"},
51 | "slt": {"vbl": "era5625/slt"},
52 | "lsm": {"vbl": "era5625/lsm"}, # sample variables
53 | # "lat": {"vbl": "era5625/lat2d"},
54 | "tp": {"vbl": "era5625/tp",
55 | "t": np.array([lt]),
56 | "interpolate": ["nan", "nearest_past", "nearest_future"][1],
57 | "normalise": ["log"]},
58 | "imerg": {"vbl": "imerg5625/precipitationcal",
59 | "t": np.array([lt]),
60 | "interpolate": ["nan", "nearest_past", "nearest_future"][1],
61 | "normalise": ["log"]},
62 | "clbt0": {"vbl": "simsat5625/clbt:0",
63 | "t": np.array([lt]),
64 | "interpolate": ["nan", "nearest_past", "nearest_future"][1],
65 | "normalise": ["log"]},
66 | "clbt1": {"vbl": "simsat5625/clbt:1",
67 | "t": np.array([lt]),
68 | "interpolate": ["nan", "nearest_past", "nearest_future"][1],
69 | "normalise": ["log"]},
70 | "clbt2": {"vbl": "simsat5625/clbt:2",
71 | "t": np.array([lt]),
72 | "interpolate": ["nan", "nearest_past", "nearest_future"][1],
73 | "normalise": ["log"]},
74 | }
75 | }
76 | for lt in np.array([3, 7]) * 3600} # np.array([1, 3, 6, 9]) * 3600}
77 |
78 | # choose a default normalisation method
79 | default_normalisation = "stdmean_global"
80 |
81 | ########################################################################################################################
82 | # User-modification STOP
83 | ########################################################################################################################
84 |
85 | if partition_type == "repeat":
86 | partition_labels = [v["name"] for v in partition_conf["partitions"]]
87 | else:
88 | partition_labels = list(partition_conf.keys())
89 |
90 | dataset = Dataset(datapath=datapath,
91 | partition_conf=partition_conf,
92 | partition_type=partition_type,
93 | partition_selected="train",
94 | sample_conf=sample_conf,
95 | )
96 | dataset_conf = dict(datapath=datapath,
97 | partition_conf=partition_conf,
98 | partition_type=partition_type,
99 | partition_selected="train",
100 | sample_conf=sample_conf)
101 |
102 | # Go through all partitions and select all variables in use
103 | vbls = {}
104 | for i, partition in enumerate(partition_labels):
105 | vbls[partition] = set()
106 | print("Generating normalisation data for partition: {} ({}/{})".format(partition, i, len(list(partition_conf.keys()))))
107 | dataset.select_partition(partition)
108 | for mode, mode_v in sample_conf.items():
109 | for section, section_v in mode_v.items():
110 | for k, v in section_v.items():
111 | for n in v.get("normalise", [default_normalisation]):
112 | vbls[partition].add((v["vbl"], n, "t" in v))
113 |
114 | # Retrieve the dataset idx for all all partitions
115 | timesegments = {}
116 | for i, partition in enumerate(partition_labels):
117 | timesegments[partition] = dataset.get_partition_ts_segments(partition)
118 |
119 | # TODO: const normalisation!
120 |
121 | # create a list of jobs to be done
122 | joblist = []
123 | for partition in partition_labels:
124 | for vbl in list(vbls[partition]):
125 | joblist.append({"timesegments": timesegments[partition],
126 | "vbl_name": vbl[0],
127 | "normalise": vbl[1],
128 | "has_t": vbl[2],
129 | "dataset_conf": dataset_conf,
130 | "partition": partition})
131 |
132 |
133 | def worker(args):
134 | # creating our own dataset per thread, alleviates any issues with memmaps and multiprocessing!
135 | dataset = Dataset(**args["dataset_conf"])
136 | dataset.select_partition(args["partition"])
137 |
138 | fi = None
139 | if args["has_t"]:
140 | # expand timesegments
141 | for ts in args["timesegments"]:
142 | ret = dataset.get_file_indices_from_ts_range(ts, args["vbl_name"])
143 | if fi is None:
144 | fi = ret
145 | else:
146 | fi = np.concatenate([fi, ret])
147 | else:
148 | fi = None
149 |
150 | vals = None
151 | if fi is not None:
152 | vals = dataset[args["vbl_name"]][fi]
153 | else: # constant value
154 | vals = dataset[args["vbl_name"]]
155 |
156 | results = {args["vbl_name"]: {}}
157 | n = args["normalise"]
158 | if n in ["stdmean_global"]:
159 | mean = np.nanmean(vals) # will be done out-of-core automagically by numpy memmap
160 | std = np.nanstd(vals) # will be done out-of-core automagically by numpy memmap
161 | fn = lambda x: (x-mean) / std if std != 0.0 else (x-mean)
162 | results[args["vbl_name"]]["stdmean_global"] = {"mean": mean, "std": std, "fn": fn}
163 | elif n in ["log"]:
164 | std = np.nanstd(vals) # will be done out-of-core automagically by numpy memmap
165 | fn = lambda x: np.log(max(x, 0.0) / std + 1)
166 | results[args["vbl_name"]]["log"] = {"std": std, "fn": fn}
167 | else:
168 | print("Unknown normalisation: {}".format(n))
169 |
170 | return dill.dumps({args["partition"]: results})
171 |
172 |
173 | pool = multiprocessing.Pool(processes=n_procs)
174 | results = pool.map(worker, joblist)
175 |
176 | results_dct = {}
177 | for r in results:
178 | loadr = dill.loads(r)
179 | partition = list(loadr.keys())[0]
180 | if partition not in results_dct:
181 | results_dct[partition] = loadr[partition]
182 | else:
183 | results_dct[partition].update(loadr[partition])
184 |
185 | # save to normalisation file
186 | with open(os.path.join("normalisations", "normalisations_{}.dill".format(expname)), "wb") as f:
187 | dill.dump(results_dct, f)
188 |
--------------------------------------------------------------------------------
/src/benchmark/plot_outputs.py:
--------------------------------------------------------------------------------
1 | from argparse import ArgumentParser
2 | import numpy as np
3 | import matplotlib.pyplot as plt
4 | import matplotlib.colors as colors
5 | import iris
6 | import iris.plot as iplt
7 | import cartopy.crs as ccrs
8 | from datetime import datetime
9 | from run_benchmark import RegressionModel
10 |
11 |
12 | def collect_predictions(model_path, date):
13 | """
14 | Given model path, load learnt model to predict for all lead times, using an input sequence from a specific date.
15 | Output has shapes (no. of lead times, lat, lon)
16 | """
17 | # Collect data to plot
18 | model, hparams, loaderDict, normalizer, collate = RegressionModel.load_model(model_path)
19 | samples= []
20 | lead_times = hparams['lead_times']
21 | for i in lead_times:
22 | samplet = f'lead_time_{i}'
23 | sample_lt = loaderDict['test'].get_sample_at(samplet, date.timestamp())
24 | sample_lt['__sample_modes__'] = samplet
25 | samples.append([sample_lt])
26 | # make predictions
27 | sample_x, truth, lts = collate(samples)
28 | model.eval()
29 | out = model(sample_x).detach().numpy()
30 | truth = truth.numpy()
31 | return out[:, 0, :, :], truth[:, 0, :, :], hparams
32 |
33 |
34 | def determine_bounds(img: list):
35 | """
36 | determine min and max values found in samples
37 | """
38 | vmin = min([im.min() for im in img])
39 | vmax = max([im.max() for im in img])
40 | return vmin, vmax
41 |
42 |
43 | def make_use_of_cube_data(nc_path: str):
44 | """
45 | load any nc file (e.g. 'total_precipitation_2019_5.625deg.nc') in order to make use of its structure for plotting later.
46 | """
47 | cube = iris.load_cube(nc_path)
48 | cube = cube[0,:,:]# Remove time
49 | cube.coord('lat').rename('latitude')
50 | cube.coord('lon').rename('longitude')
51 | cube.coord('latitude').guess_bounds()
52 | cube.coord('longitude').guess_bounds()
53 | return cube
54 |
55 |
56 | def plot_one_img(cube, inarray, vmin, vmax, path=None):
57 | """
58 | Given the cube structure, the data array, together with min/ max values for scaling, output a projected plot over earth.
59 | """
60 | # replace data in cube with incoming array.
61 | cube.data = inarray[:,:]
62 |
63 | fig=plt.figure(dpi=400,figsize=(8,4))
64 | # determine colour bounds
65 | delta = (vmax - vmin) / 100
66 | bounds = np.arange(vmin, vmax+delta,delta)
67 | norm = colors.BoundaryNorm(boundaries=bounds, ncolors=256)
68 | cmap = plt.get_cmap('viridis')
69 | # project image
70 | projection=ccrs.Mollweide
71 | ax = plt.axes(projection=projection(central_longitude=0))
72 | iplt.pcolormesh(cube ,norm=norm,cmap=cmap)
73 | ax.coastlines()
74 | # transparent background
75 | ax.patch.set_facecolor('#ababab')
76 | ax.patch.set_alpha(0)
77 | fig.clf()
78 | if path is not None:
79 | fig.savefig(path, facecolor=fig.get_facecolor(), edgecolor='none')
80 |
81 |
82 | def main(hparams):
83 | """
84 | Main function for plotting truth and predictions.
85 | """
86 | date = datetime(2019, 7, 12) # random
87 | load_path = hparams.load
88 | nc_file = hparams.nc_file
89 | hparams = vars(hparams)
90 | out, truth, hparams = collect_predictions(load_path, date)
91 | cube = make_use_of_cube_data(nc_file)
92 | vmin, vmax = determine_bounds([out, truth])
93 | for t, _ in enumerate(hparams['lead_times']):
94 | # plot predictions
95 | plot_one_img(cube, out[t], vmin, vmax)
96 | # plot ground truth
97 | plot_one_img(cube, truth[t], vmin, vmax)
98 |
99 |
100 | if __name__ == '__main__':
101 | parser = ArgumentParser()
102 | parser.add_argument("--load", required=True, type=str, help='Path of checkpoint directory to load')
103 | parser.add_argument("--nc_file", required=True, type=str, help='Path of a random .nc file')
104 | hparams = parser.parse_args()
105 | main(hparams)
--------------------------------------------------------------------------------
/src/benchmark/utils.py:
--------------------------------------------------------------------------------
1 | """
2 | Collect data for benchmark tasks.
3 | """
4 | import torch
5 | import numpy as np
6 | from datetime import datetime, timedelta
7 | import yaml
8 | import random
9 | import os
10 |
11 | def seed_everything(seed):
12 | random.seed(seed)
13 | os.environ['PYTHONHASHSEED'] = str(seed)
14 | np.random.seed(seed)
15 | torch.manual_seed(seed)
16 | torch.cuda.manual_seed(seed)
17 | torch.cuda.manual_seed_all(seed)
18 | torch.backends.cudnn.deterministic = True
19 | torch.backends.cudnn.benchmark = False
20 |
21 |
22 | def add_yml_params(args):
23 | data = yaml.load(args.config_file, Loader=yaml.Loader)
24 | delattr(args, 'config_file')
25 | arg_dict = args.__dict__
26 | for key, value in data.items():
27 | arg_dict[key] = value
28 |
29 |
30 | def get_lat2d(grid, dataset=None):
31 | if grid == 5.625:
32 | lat2d = dataset['era5625/lat2d']
33 | else:
34 | lat = np.linspace(-89.296875, 89.296875, 128)
35 | lat2d = np.expand_dims(lat, axis=1).repeat(256, 1)
36 | return lat2d
37 |
38 |
39 | def add_device_hparams(hparams):
40 | num_gpus = torch.cuda.device_count() if hparams['gpus'] == -1 else hparams['gpus']
41 | if num_gpus > 0:
42 | hparams['batch_size'] *= num_gpus
43 | hparams['num_workers'] *= num_gpus
44 | hparams['multi_gpu'] = num_gpus > 1
45 |
46 |
47 | def get_vbl_name(var:str, grid: float):
48 | if grid == 5.625:
49 | if var == 'clbt':
50 | return 'simsat5625/clbt'
51 | if var == 'precipitationcal':
52 | return 'imerg5625/precipitationcal'
53 | if (var[:4] == 'ciwc') or (var[:4] == 'clwc'):
54 | return "era5625/" + (var.replace('-', '_') + 'hPa' if '-' in var else var)
55 | return "era5625/" + (var.replace('-', '_') + 'hPa' if '-' in var else var)
56 | else:
57 | if var == 'precipitationcal':
58 | return 'imerg140625/precipitationcal'
59 | if var == 'clbt':
60 | return 'simsat140625/clbt'
61 | if (var[:4] == 'ciwc') or (var[:4] == 'clwc'):
62 | return "era140625/" + (var.replace('-', '_') + 'hPa' if '-' in var else var)
63 | return "era140625/" + (var.replace('-', '_') + 'hPa' if '-' in var else var)
64 |
65 |
66 | def get_var_name(vbl: str):
67 | return vbl.split('/')[1].replace(':', '-').replace('_', '-').replace('hPa', '')
68 |
69 |
70 | def is_vbl_const(var: str):
71 | if var in ['lat', 'lon', 'orography', 'lsm', 'slt', 'lat2d', 'lon2d']:
72 | return True
73 | return False
74 |
75 |
76 | def local_time_shift(longitude: float):
77 | return timedelta(hours=(np.mod(longitude + 180, 360) - 180) / 180 * 12)
78 |
79 |
80 | def get_local_shift(grid, dataset):
81 | if grid == 5.625:
82 | lon2d = dataset['era5625/lon2d']
83 | else:
84 | lon = np.linspace(0, 358.59375, 256)
85 | lon2d = np.expand_dims(lon, axis=1).repeat(128, 1).T
86 | time_shift = np.vectorize(local_time_shift)(lon2d)
87 | return time_shift
88 |
89 |
90 | def apply_normalization(inputs, output, categories, normalizer):
91 | for i, v in enumerate(categories['input']):
92 | if v not in ['hour', 'day', 'month']:
93 | inputs[:, :, i, :, :] = (inputs[:, :, i, :, :] - normalizer[v]['mean']) / normalizer[v]['std']
94 |
95 | target_v = categories['output'][0]
96 | output[:, 0, :, :] = np.log(output[:, 0, :, :] / normalizer[target_v]['std'] + 1)
97 | return inputs, output
98 |
99 |
100 | def leadtime_into_maxtrix(lead_times: list,
101 | seq_len: int,
102 | forecast_freq: int,
103 | forecast_n_steps: int,
104 | latlon: tuple):
105 | """
106 | return shape of [bsz, seq_len, forecast_n_steps, lat, lon]
107 | """
108 | bsz = len(lead_times)
109 | leadtime = np.zeros((bsz, seq_len, forecast_n_steps, latlon[0], latlon[1]))
110 | for batch_i, lt in enumerate(lead_times):
111 | leadtime[batch_i, :, lt // forecast_freq-1, :, :] = 1
112 | return leadtime
113 |
114 |
115 |
116 | def collate_fn(x_list, hparams, normalizer, time_shift):
117 | """
118 | return
119 | inputs = [bsz, seq_len, channels, lat, lon] (constants are repeated per timestep)
120 | output = [bsz, channels, lat, lon]
121 | lead_time = [bsz]
122 | """
123 | output = []
124 | inputs = []
125 | lead_times = []
126 | categories = hparams['categories']
127 | latlon = hparams['latlon']
128 | compute_time = [v for v in categories['input'] if v in ['hour', 'day', 'month']]
129 | tmp = 'input_temporal_clbt' if 'clbt-0' in categories['input_temporal'] else 'input_temporal'
130 |
131 | for sample in x_list:
132 | output.append(np.concatenate([sample[0]['target'][v] for v in categories['output']], 1))
133 | lead_times.append(int(sample[0]['__sample_modes__'].split('_')[-1]))
134 |
135 | # temporal
136 | inputs.append([sample[0]['label'][v] for v in categories[tmp]])
137 |
138 | # hour, day, month
139 | if compute_time:
140 | time_scaling = {'hour': 24, 'day': 31, 'month': 12}
141 | timestamps = [datetime.fromtimestamp(t) for t in sample[0]['label'][categories[tmp][0]+ '__ts']]
142 | timestamps = np.transpose(np.tile(timestamps, (1, *latlon, 1)), (3,0,1,2))
143 | if time_shift is not None:
144 | timestamps -= time_shift
145 | for m in ['hour', 'day', 'month']:
146 | tfunc = np.vectorize(lambda t: getattr(t, m))
147 | inputs[-1] += [tfunc(timestamps)/ time_scaling[m]]
148 |
149 | if categories['input_static']:
150 | inputs[-1] += [np.repeat(sample[0]['label'][v][None, :, :], hparams['seq_len'], 0) for v in categories['input_static']]
151 | inputs[-1] = np.concatenate(inputs[-1], 1)
152 |
153 | inputs = torch.Tensor(np.stack(inputs))
154 | output = torch.Tensor(np.concatenate(output))
155 | lead_times = torch.Tensor(lead_times).long()
156 |
157 | # apply normalization
158 | if normalizer is not None:
159 | inputs, output = apply_normalization(inputs, output, categories, normalizer)
160 |
161 | # concatenate lead times to inputs.
162 | one_hot_lt = leadtime_into_maxtrix(lead_times, hparams['seq_len'], hparams['forecast_freq'], hparams['forecast_n_steps'], latlon)
163 | one_hot_lt = torch.Tensor(one_hot_lt)
164 | inputs = torch.cat([inputs, one_hot_lt], 2)
165 |
166 | return inputs, output, lead_times
--------------------------------------------------------------------------------
/src/convert/convert_era5625_aaai.py:
--------------------------------------------------------------------------------
1 | import datetime
2 | from netCDF4 import Dataset as netcdf_Dataset
3 | import os
4 | import numpy as np
5 |
6 | if __name__ == "__main__":
7 |
8 | pressure_to_idx = {50:0, 100:1, 150:2, 200:3, 250:4, 300:5, 400:6, 500:7, 600:8, 700:9, 850:10, 925:11, 1000:12}
9 | idx_to_pressure = {v:k for k,v in pressure_to_idx.items()}
10 |
11 | years=list(range(1979,2020))
12 | dataset_name = "era5625"
13 | input_path = "EDIT INPUT PATH TO NETCDF FOLDER"
14 | output_path = os.path.join("EDIT OUTPUT PATH WHERE MEMMAPS ARE TO BE CREATED", dataset_name)
15 | if not os.path.exists(output_path):
16 | os.makedirs(output_path)
17 |
18 | variables_const = [
19 | {"name": "lat2d",
20 | "ftemplate": os.path.join(input_path, "constants_5.625deg.nc"),
21 | "dims": (32, 64)},
22 | {"name": "lon2d",
23 | "ftemplate": os.path.join(input_path, "constants_5.625deg.nc"),
24 | "dims": (32, 64)},
25 | {"name": "lsm",
26 | "ftemplate": os.path.join(input_path, "constants_5.625deg.nc"),
27 | "dims": (32, 64)},
28 | {"name": "orography",
29 | "ftemplate": os.path.join(input_path, "constants_5.625deg.nc"),
30 | "dims": (32, 64)},
31 | {"name": "slt",
32 | "ftemplate": os.path.join(input_path, "constants_5.625deg.nc"),
33 | "dims": (32, 64)},
34 | ]
35 |
36 | variables_era = [
37 | {"name": "t2m",
38 | "ftemplate": os.path.join(input_path, "2m_temperature_{}_5.625deg.nc"),
39 | "dims": (32, 64),
40 | "levels": list(range(1))},
41 | {"name": "sp",
42 | "ftemplate": os.path.join(input_path, "surface_pressure_{}_5.625deg.nc"),
43 | "dims": (32, 64),
44 | "levels": list(range(1))},
45 | {"name": "tp",
46 | "ftemplate": os.path.join(input_path, "total_precipitation_{}_5.625deg.nc"),
47 | "dims": (32, 64),
48 | "levels": list(range(1))}, #]#,
49 | ]
50 |
51 | from copy import deepcopy
52 | variables_era_2019 = deepcopy(variables_era)
53 |
54 | era_extra_pressure_levels = [300, 500, 850] #, 850]
55 | for i, p in enumerate(era_extra_pressure_levels):
56 | variables_era.append({"name": "ciwc".format(p),
57 | "ftemplate": os.path.join(input_path, "specific_cloud_ice_water_content_{}_"+str(int(p))+"_5.625deg.nc"),
58 | "dims": (32, 64),
59 | "levels": list((pressure_to_idx[p],)),
60 | "p_level": p})
61 | variables_era.append({"name": "clwc".format(p),
62 | "ftemplate": os.path.join(input_path, "specific_cloud_liquid_water_content_{}_"+str(int(p))+"_5.625deg.nc"),
63 | "dims": (32, 64),
64 | "levels": list((pressure_to_idx[p],)),
65 | "p_level": p})
66 | variables_era.append({"name": "t",
67 | "ftemplate": os.path.join(input_path, "temperature_{}_5.625deg.nc"),
68 | "dims": (32, 64),
69 | "levels": list((pressure_to_idx[p],)),
70 | "p_level": p}),
71 | variables_era.append({"name": "z",
72 | "ftemplate": os.path.join(input_path, "geopotential_{}_5.625deg.nc"),
73 | "dims": (32, 64),
74 | "levels": list((pressure_to_idx[p],))}),
75 | variables_era.append({"name": "q",
76 | "ftemplate": os.path.join(input_path, "specific_humidity_{}_5.625deg.nc"),
77 | "dims": (32, 64),
78 | "levels": list((pressure_to_idx[p],))}),
79 |
80 | era_extra_pressure_levels = [300, 500, 850] #, 850]
81 | for i, p in enumerate(era_extra_pressure_levels):
82 | variables_era_2019.append({"name": "ciwc".format(p),
83 | "ftemplate": os.path.join(input_path, "specific_cloud_ice_water_content_{}_"+str(int(p))+"_5.625deg.nc"),
84 | "dims": (32, 64),
85 | "levels": list((pressure_to_idx[p],)),
86 | "p_level": p})
87 | variables_era_2019.append({"name": "clwc".format(p),
88 | "ftemplate": os.path.join(input_path, "specific_cloud_liquid_water_content_{}_"+str(int(p))+"_5.625deg.nc"),
89 | "dims": (32, 64),
90 | "levels": list((pressure_to_idx[p],)),
91 | "p_level": p})
92 | variables_era_2019.append({"name": "t",
93 | "ftemplate": os.path.join(input_path, "temperature_{}_"+str(int(p))+"_5.625deg.nc"),
94 | "dims": (32, 64),
95 | "levels": list((pressure_to_idx[p],)),
96 | "p_level": p}),
97 | variables_era_2019.append({"name": "z",
98 | "ftemplate": os.path.join(input_path, "geopotential_{}_"+str(int(p))+"_5.625deg.nc"),
99 | "dims": (32, 64),
100 | "levels": list((pressure_to_idx[p],))}),
101 | variables_era_2019.append({"name": "q",
102 | "ftemplate": os.path.join(input_path, "specific_humidity_{}_"+str(int(p))+"_5.625deg.nc"),
103 | "dims": (32, 64),
104 | "levels": list((pressure_to_idx[p],))}),
105 |
106 | era_const_path = os.path.join(output_path, "{}__era5625_const.mmap".format(dataset_name))
107 | print("Writing const values...")
108 | const_dims = (sum([1 for vg in variables_const]), 32, 64)
109 | era_const_dims = const_dims
110 |
111 | if os.path.exists(era_const_path):
112 | print("Skipping ERA CONST as file exists... ")
113 | else:
114 | # write const variables
115 | mmap = np.memmap(era_const_path, dtype='float32', mode='w+', shape=const_dims)
116 | def write_const(vbls):
117 | rootgrp = netcdf_Dataset(os.path.join(input_path, vbls[0]["ftemplate"]), "r", format="NETCDF4")
118 | for i, vbl in enumerate(vbls):
119 | print("WRITING CONST VBL ", vbl["name"])
120 | root_channel = 0 if not i else sum([1 for vg in variables_const[:i]])
121 | print("ROOT CHANNEL: ", root_channel)
122 | mmap[root_channel] = rootgrp[vbl["name"]][:]
123 | write_const(variables_const)
124 | mmap.flush()
125 | del mmap
126 |
127 | # write temporal ERA variables
128 | n_rec_dim = (32, 64)
129 | n_recs = (datetime.datetime(min(max(years), 2019), 12, 31, 23).timestamp()-datetime.datetime(years[0], 1, 1, 0).timestamp()) // 3600 + 1
130 | n_rec_channels = sum([len(vg["levels"]) for vg in variables_era])
131 | dims = (int(n_recs), int(n_rec_channels), *n_rec_dim)
132 | era_dims = dims
133 | if not os.path.exists(output_path):
134 | os.makedirs(output_path)
135 |
136 | era_path = os.path.join(output_path, "{}__era5625.mmap".format(dataset_name))
137 | if os.path.exists(era_path):
138 | print("Skipping ERA as file exists... ")
139 | else:
140 | mmap = np.memmap(era_path, dtype='float32', mode='w+', shape=dims)
141 |
142 | def write_year(y, vbls):
143 | vbls, vbls_2019 = vbls
144 |
145 | if y > 2019:
146 | print("ERA: no data available for year {}".format(y))
147 | return
148 | t_offset = int((datetime.datetime(y, 1, 1, 0).timestamp() - datetime.datetime(years[0], 1, 1, 0).timestamp()) // 3600)
149 | t_end = int((datetime.datetime(y, 12, 31, 23).timestamp() - datetime.datetime(years[0], 1, 1, 0).timestamp()) // 3600) + 1
150 | for i, vbl in enumerate(vbls):
151 | if y == 2019:
152 | vbl = vbls_2019[i]
153 | print("ERA5625 writing year {} vbl {}...".format(y, vbl["name"]))
154 | netcdf_fname = vbl["ftemplate"].format(y)
155 | root_channel = 0 if not i else sum([len(vg["levels"]) for vg in variables_era[:i]])
156 | if vbl["name"] in ["tcwv"] and y > 2000:
157 | mmap[t_offset:t_end, root_channel] = float("nan")
158 | else:
159 | rootgrp = netcdf_Dataset(os.path.join(input_path, netcdf_fname), "r", format="NETCDF4")
160 | print(t_offset, t_end, root_channel, len(vbl["levels"]))
161 | if vbl["name"] in ["tisr", "tp"] and y == 1979:
162 | mmap[t_offset+7:t_end, root_channel] = rootgrp[vbl["name"]][:] # tisr, tp starts at 7:00 o clock
163 | mmap[t_offset:t_offset+7, root_channel] = float("nan")
164 | else:
165 |
166 | if len(vbl["levels"]) == 1:
167 | mmap[t_offset:t_end, root_channel] = rootgrp[vbl["name"]][:] #[:, vbl["levels"]]
168 | else:
169 | mmap[t_offset:t_end, root_channel:root_channel+len(vbl["levels"])] = rootgrp[vbl["name"]][:, vbl["levels"]]
170 | #mmap.flush()
171 |
172 | from multiprocessing import Pool
173 | from functools import partial
174 | with Pool(40) as p:
175 | p.map(partial(write_year,vbls=(variables_era, variables_era_2019)), years)
176 | mmap.flush()
177 | del mmap
178 |
179 |
180 | # Create Pickle file describing which variables are contained in what file at what positions and what frequency
181 | print("Done converting. Generating dataset pickle file...")
182 | import dill
183 | import json
184 | dct = {}
185 | dct["variables"] = {}
186 | for i, v in enumerate(variables_const):
187 | vbl_dict = {"name":v["name"],
188 | "mmap_name":"{}__era5625_const.mmap".format(dataset_name),
189 | "type":"const",
190 | "dims": v["dims"],
191 | "offset": 0 if not i else sum([1 for vg in variables_const[:i]]),
192 | "first_ts": None,
193 | "last_ts": None,
194 | "tfreq_s": None,
195 | "levels": None}
196 | dct["variables"]["era5625/{}".format(v["name"])] = vbl_dict
197 |
198 | for i, v in enumerate(variables_era):
199 | vbl_dict = {"name": "{}_{}hPa".format(v["name"], v["p_level"]) if v["name"] in ["ciwc","clwc"] else v["name"],
200 | "mmap_name":"{}__era5625.mmap".format(dataset_name),
201 | "type":"temp",
202 | "dims": v["dims"],
203 | "offset": 0 if not i else sum([len(vg["levels"]) for vg in variables_era[:i]]),
204 | "first_ts": datetime.datetime(1979, 1, 1, 0).timestamp() if v["name"] not in ["tisr", "tp"] else datetime.datetime(1979, 1, 1, 7).timestamp(),
205 | "last_ts": datetime.datetime(2019, 12, 31, 23).timestamp(),# if v["name"] not in ["ciwc", "clwc"] else datetime.datetime(2000,12,31,23).timestamp(),
206 | "tfreq_s": 3600,
207 | "levels": v["levels"]}
208 |
209 | if "p_level" in v:
210 | vbl_dict["index2pressure"] = {i:int(v["p_level"]) for i, vl in enumerate(v["levels"])}
211 | else:
212 | vbl_dict["index2pressure"] = {i:int(idx_to_pressure[vl]) for i, vl in enumerate(v["levels"])}
213 | dct["variables"]["era5625/{}".format(vbl_dict["name"])] = vbl_dict
214 |
215 | dct["memmap"] = {"{}__era5625_const.mmap".format(dataset_name): {"dims": era_const_dims,
216 | "dtype": "float32",
217 | "daterange": None,
218 | "tfreq_s": None},
219 | "{}__era5625.mmap".format(dataset_name): {"dims": era_dims,
220 | "dtype": "float32",
221 | "daterange": (datetime.datetime(1979, 1, 1, 0).timestamp(), datetime.datetime(2018, 12, 31, 23).timestamp()),
222 | "tfreq_s": 3600},
223 | }
224 |
225 | dill.dump(dct, open(os.path.join(output_path, dataset_name+".dill"),'wb'))
226 |
227 | with open(os.path.join(output_path, dataset_name+"_info.json"), 'w') as outfile:
228 | json.dump(dct, outfile, indent=4, sort_keys=True)
229 |
--------------------------------------------------------------------------------
/src/convert/convert_era5625_aaai_sample.py:
--------------------------------------------------------------------------------
1 | import datetime
2 | from netCDF4 import Dataset as netcdf_Dataset
3 | import os
4 | import numpy as np
5 |
6 | if __name__ == "__main__":
7 |
8 | pressure_to_idx = {50:0, 100:1, 150:2, 200:3, 250:4, 300:5, 400:6, 500:7, 600:8, 700:9, 850:10, 925:11, 1000:12}
9 | idx_to_pressure = {v:k for k,v in pressure_to_idx.items()}
10 |
11 | years=list(range(2018,2020))
12 | dataset_name = "era5625_sample"
13 | input_path = "EDIT INPUT PATH TO NETCDF FOLDER"
14 | output_path = os.path.join("EDIT OUTPUT PATH WHERE MEMMAPS ARE TO BE CREATED", dataset_name)
15 | if not os.path.exists(output_path):
16 | os.makedirs(output_path)
17 |
18 | variables_const = [
19 | {"name": "lat2d",
20 | "ftemplate": os.path.join(input_path, "constants_5.625deg.nc"),
21 | "dims": (32, 64)},
22 | {"name": "lon2d",
23 | "ftemplate": os.path.join(input_path, "constants_5.625deg.nc"),
24 | "dims": (32, 64)},
25 | {"name": "lsm",
26 | "ftemplate": os.path.join(input_path, "constants_5.625deg.nc"),
27 | "dims": (32, 64)},
28 | {"name": "orography",
29 | "ftemplate": os.path.join(input_path, "constants_5.625deg.nc"),
30 | "dims": (32, 64)},
31 | {"name": "slt",
32 | "ftemplate": os.path.join(input_path, "constants_5.625deg.nc"),
33 | "dims": (32, 64)},
34 | ]
35 |
36 | variables_era = [
37 | {"name": "t2m",
38 | "ftemplate": os.path.join(input_path, "2m_temperature_{}_5.625deg.nc"),
39 | "dims": (32, 64),
40 | "levels": list(range(1))},
41 | {"name": "sp",
42 | "ftemplate": os.path.join(input_path, "surface_pressure_{}_5.625deg.nc"),
43 | "dims": (32, 64),
44 | "levels": list(range(1))},
45 | {"name": "tp",
46 | "ftemplate": os.path.join(input_path, "total_precipitation_{}_5.625deg.nc"),
47 | "dims": (32, 64),
48 | "levels": list(range(1))}, #]#,
49 | ]
50 |
51 | from copy import deepcopy
52 | variables_era_2019 = deepcopy(variables_era)
53 |
54 | # era_extra_pressure_levels = [300, 500, 850] #, 850]
55 | # for i, p in enumerate(era_extra_pressure_levels):
56 | # variables_era.append({"name": "ciwc".format(p),
57 | # "ftemplate": os.path.join(input_path, "specific_cloud_ice_water_content_{}_"+str(int(p))+"_5.625deg.nc"),
58 | # "dims": (32, 64),
59 | # "levels": list((pressure_to_idx[p],)),
60 | # "p_level": p})
61 | # variables_era.append({"name": "clwc".format(p),
62 | # "ftemplate": os.path.join(input_path, "specific_cloud_liquid_water_content_{}_"+str(int(p))+"_5.625deg.nc"),
63 | # "dims": (32, 64),
64 | # "levels": list((pressure_to_idx[p],)),
65 | # "p_level": p})
66 | # variables_era.append({"name": "t",
67 | # "ftemplate": os.path.join(input_path, "temperature_{}_5.625deg.nc"),
68 | # "dims": (32, 64),
69 | # "levels": list((pressure_to_idx[p],)),
70 | # "p_level": p}),
71 | # variables_era.append({"name": "z",
72 | # "ftemplate": os.path.join(input_path, "geopotential_{}_5.625deg.nc"),
73 | # "dims": (32, 64),
74 | # "levels": list((pressure_to_idx[p],))}),
75 | # variables_era.append({"name": "q",
76 | # "ftemplate": os.path.join(input_path, "specific_humidity_{}_5.625deg.nc"),
77 | # "dims": (32, 64),
78 | # "levels": list((pressure_to_idx[p],))}),
79 |
80 | # era_extra_pressure_levels = [300, 500, 850] #, 850]
81 | # for i, p in enumerate(era_extra_pressure_levels):
82 | # variables_era_2019.append({"name": "ciwc".format(p),
83 | # "ftemplate": os.path.join(input_path, "specific_cloud_ice_water_content_{}_"+str(int(p))+"_5.625deg.nc"),
84 | # "dims": (32, 64),
85 | # "levels": list((pressure_to_idx[p],)),
86 | # "p_level": p})
87 | # variables_era_2019.append({"name": "clwc".format(p),
88 | # "ftemplate": os.path.join(input_path, "specific_cloud_liquid_water_content_{}_"+str(int(p))+"_5.625deg.nc"),
89 | # "dims": (32, 64),
90 | # "levels": list((pressure_to_idx[p],)),
91 | # "p_level": p})
92 | # variables_era_2019.append({"name": "t",
93 | # "ftemplate": os.path.join(input_path, "temperature_{}_"+str(int(p))+"_5.625deg.nc"),
94 | # "dims": (32, 64),
95 | # "levels": list((pressure_to_idx[p],)),
96 | # "p_level": p}),
97 | # variables_era_2019.append({"name": "z",
98 | # "ftemplate": os.path.join(input_path, "geopotential_{}_"+str(int(p))+"_5.625deg.nc"),
99 | # "dims": (32, 64),
100 | # "levels": list((pressure_to_idx[p],))}),
101 | # variables_era_2019.append({"name": "q",
102 | # "ftemplate": os.path.join(input_path, "specific_humidity_{}_"+str(int(p))+"_5.625deg.nc"),
103 | # "dims": (32, 64),
104 | # "levels": list((pressure_to_idx[p],))}),
105 |
106 | era_const_path = os.path.join(output_path, "{}__era5625_const.mmap".format(dataset_name))
107 | print("Writing const values...")
108 | const_dims = (sum([1 for vg in variables_const]), 32, 64)
109 | era_const_dims = const_dims
110 |
111 | if os.path.exists(era_const_path):
112 | print("Skipping ERA CONST as file exists... ")
113 | else:
114 | # write const variables
115 | mmap = np.memmap(era_const_path, dtype='float32', mode='w+', shape=const_dims)
116 | def write_const(vbls):
117 | rootgrp = netcdf_Dataset(os.path.join(input_path, vbls[0]["ftemplate"]), "r", format="NETCDF4")
118 | for i, vbl in enumerate(vbls):
119 | print("WRITING CONST VBL ", vbl["name"])
120 | root_channel = 0 if not i else sum([1 for vg in variables_const[:i]])
121 | print("ROOT CHANNEL: ", root_channel)
122 | mmap[root_channel] = rootgrp[vbl["name"]][:]
123 | write_const(variables_const)
124 | mmap.flush()
125 | del mmap
126 |
127 | # write temporal ERA variables
128 | n_rec_dim = (32, 64)
129 | n_recs = (datetime.datetime(min(max(years), 2019), 12, 31, 23).timestamp()-datetime.datetime(years[0], 1, 1, 0).timestamp()) // 3600 + 1
130 | n_rec_channels = sum([len(vg["levels"]) for vg in variables_era])
131 | dims = (int(n_recs), int(n_rec_channels), *n_rec_dim)
132 | era_dims = dims
133 | if not os.path.exists(output_path):
134 | os.makedirs(output_path)
135 |
136 | era_path = os.path.join(output_path, "{}__era5625.mmap".format(dataset_name))
137 | if os.path.exists(era_path):
138 | print("Skipping ERA as file exists... ")
139 | else:
140 | mmap = np.memmap(era_path, dtype='float32', mode='w+', shape=dims)
141 |
142 | def write_year(y, vbls):
143 | vbls, vbls_2019 = vbls
144 |
145 | if y > 2019:
146 | print("ERA: no data available for year {}".format(y))
147 | return
148 | t_offset = int((datetime.datetime(y, 1, 1, 0).timestamp() - datetime.datetime(years[0], 1, 1, 0).timestamp()) // 3600)
149 | t_end = int((datetime.datetime(y, 12, 31, 23).timestamp() - datetime.datetime(years[0], 1, 1, 0).timestamp()) // 3600) + 1
150 | for i, vbl in enumerate(vbls):
151 | if y == 2019:
152 | vbl = vbls_2019[i]
153 | print("ERA5625 writing year {} vbl {}...".format(y, vbl["name"]))
154 | netcdf_fname = vbl["ftemplate"].format(y)
155 | root_channel = 0 if not i else sum([len(vg["levels"]) for vg in variables_era[:i]])
156 | if vbl["name"] in ["tcwv"] and y > 2000:
157 | mmap[t_offset:t_end, root_channel] = float("nan")
158 | else:
159 | rootgrp = netcdf_Dataset(os.path.join(input_path, netcdf_fname), "r", format="NETCDF4")
160 | print(t_offset, t_end, root_channel, len(vbl["levels"]))
161 | if vbl["name"] in ["tisr", "tp"] and y == 1979:
162 | mmap[t_offset+7:t_end, root_channel] = rootgrp[vbl["name"]][:] # tisr, tp starts at 7:00 o clock
163 | mmap[t_offset:t_offset+7, root_channel] = float("nan")
164 | else:
165 |
166 | if len(vbl["levels"]) == 1:
167 | mmap[t_offset:t_end, root_channel] = rootgrp[vbl["name"]][:] #[:, vbl["levels"]]
168 | else:
169 | mmap[t_offset:t_end, root_channel:root_channel+len(vbl["levels"])] = rootgrp[vbl["name"]][:, vbl["levels"]]
170 | #mmap.flush()
171 |
172 | from multiprocessing import Pool
173 | from functools import partial
174 | with Pool(40) as p:
175 | p.map(partial(write_year,vbls=(variables_era, variables_era_2019)), years)
176 | mmap.flush()
177 | del mmap
178 |
179 |
180 | # Create Pickle file describing which variables are contained in what file at what positions and what frequency
181 | print("Done converting. Generating dataset pickle file...")
182 | import dill
183 | import json
184 | dct = {}
185 | dct["variables"] = {}
186 | for i, v in enumerate(variables_const):
187 | vbl_dict = {"name":v["name"],
188 | "mmap_name":"{}__era5625_const.mmap".format(dataset_name),
189 | "type":"const",
190 | "dims": v["dims"],
191 | "offset": 0 if not i else sum([1 for vg in variables_const[:i]]),
192 | "first_ts": None,
193 | "last_ts": None,
194 | "tfreq_s": None,
195 | "levels": None}
196 | dct["variables"]["era5625/{}".format(v["name"])] = vbl_dict
197 |
198 | for i, v in enumerate(variables_era):
199 | vbl_dict = {"name": "{}_{}hPa".format(v["name"], v["p_level"]) if v["name"] in ["ciwc","clwc"] else v["name"],
200 | "mmap_name":"{}__era5625.mmap".format(dataset_name),
201 | "type":"temp",
202 | "dims": v["dims"],
203 | "offset": 0 if not i else sum([len(vg["levels"]) for vg in variables_era[:i]]),
204 | "first_ts": datetime.datetime(years[0], 1, 1, 0).timestamp(),
205 | "last_ts": datetime.datetime(years[1], 12, 31, 23).timestamp(),# if v["name"] not in ["ciwc", "clwc"] else datetime.datetime(2000,12,31,23).timestamp(),
206 | "tfreq_s": 3600,
207 | "levels": v["levels"]}
208 |
209 | if "p_level" in v:
210 | vbl_dict["index2pressure"] = {i:int(v["p_level"]) for i, vl in enumerate(v["levels"])}
211 | else:
212 | vbl_dict["index2pressure"] = {i:int(idx_to_pressure[vl]) for i, vl in enumerate(v["levels"])}
213 | dct["variables"]["era5625/{}".format(vbl_dict["name"])] = vbl_dict
214 |
215 | dct["memmap"] = {"{}__era5625_const.mmap".format(dataset_name): {"dims": era_const_dims,
216 | "dtype": "float32",
217 | "daterange": None,
218 | "tfreq_s": None},
219 | "{}__era5625.mmap".format(dataset_name): {"dims": era_dims,
220 | "dtype": "float32",
221 | "daterange": (datetime.datetime(years[0], 1, 1, 0).timestamp(), datetime.datetime(years[1], 12, 31, 23).timestamp()),
222 | "tfreq_s": 3600},
223 | }
224 |
225 | dill.dump(dct, open(os.path.join(output_path, dataset_name+".dill"),'wb'))
226 |
227 | with open(os.path.join(output_path, dataset_name+"_info.json"), 'w') as outfile:
228 | json.dump(dct, outfile, indent=4, sort_keys=True)
229 |
--------------------------------------------------------------------------------
/src/convert/convert_imerg5625.py:
--------------------------------------------------------------------------------
1 | import datetime
2 | from netCDF4 import Dataset as netcdf_Dataset
3 | import os
4 | import numpy as np
5 |
6 | if __name__ == "__main__":
7 |
8 | pressure_to_idx = {50:0, 100:1, 150:2, 200:3, 250:4, 300:5, 400:6, 500:7, 600:8, 700:9, 850:10, 925:11, 1000:12}
9 | idx_to_pressure = {v:k for k,v in pressure_to_idx.items()}
10 | dataset_range=(datetime.datetime(2000, 6, 1, 0), datetime.datetime(2019, 12, 31, 23))
11 |
12 |
13 | years=list(range(2000,2020))
14 | dataset_name = "imerg5625"
15 | input_path = "EDIT INPUT PATH TO NETCDF FOLDER"
16 | output_path = os.path.join("EDIT OUTPUT PATH WHERE MEMMAPS ARE TO BE CREATED", dataset_name)
17 |
18 | if not os.path.exists(output_path):
19 | os.makedirs(output_path)
20 |
21 | variables_imerg25bi = [
22 | {"name": "precipitationcal",
23 | "ftemplate": os.path.join(input_path, "imerg{}{:02d}{:02d}.nc"),
24 | "dims": (32, 64),
25 | "levels": list(range(1))},
26 | ]
27 |
28 | imerg25bi_path = os.path.join(output_path, "{}__imerg5625bi.mmap".format(dataset_name))
29 | n_rec_dim = variables_imerg25bi[0]["dims"]
30 | imerg25bi_sample_freq = 1
31 | n_recs = ((datetime.datetime(2019, 12, 31, 23).timestamp()-datetime.datetime(2000, 6, 1, 0).timestamp()) // 3600 ) // imerg25bi_sample_freq + 1
32 | n_rec_channels = sum([len(vg["levels"]) for vg in variables_imerg25bi])
33 | dims = (int(n_recs), int(n_rec_channels), *n_rec_dim)
34 | print("dims: ", dims)
35 | print("nrecs: ", n_recs)
36 | imerg25bi_dims = dims
37 | if os.path.exists(imerg25bi_path):
38 | print("Skipping iMERG25bi as file exists... ")
39 | else:
40 | # write temporal SimSat variables
41 | if not os.path.exists(output_path):
42 | os.makedirs(output_path)
43 |
44 | mmap = np.memmap(imerg25bi_path, dtype='float32', mode='w+', shape=dims)
45 |
46 | def write_day(ymd, vbls):
47 | y, m, d = ymd
48 | if y < dataset_range[0].year:
49 | print("iMERG25bi: no data available for year {}".format(y))
50 | return
51 | t_offset = int((datetime.datetime(y, m, d, 0).timestamp() - datetime.datetime(max(min(years), dataset_range[0].year), 6, 1, 0).timestamp()) // 3600) // imerg25bi_sample_freq
52 | t_end = int((datetime.datetime(y, m, d, 23).timestamp() - datetime.datetime(max(min(years), dataset_range[0].year), 6, 1, 0).timestamp()) // 3600) // imerg25bi_sample_freq + 1
53 | for i, vbl in enumerate(vbls):
54 | print("SimSat writing year {} month {} day {} vbl {}...".format(y, m, d, vbl["name"]))
55 | rootgrp = netcdf_Dataset(os.path.join(input_path, vbl["ftemplate"].format(y, m, d)), "r", format="NETCDF4")
56 | root_channel = 0 if not i else sum([len(vg["levels"]) for vg in variables_imerg25bi[:i]])
57 | print(t_offset, t_end, root_channel, len(vbl["levels"]))
58 | try:
59 | mmap[t_offset:t_end, root_channel] = rootgrp[vbl["name"]][:]
60 | except Exception as e:
61 | print(y, m, d, vbl["name"], vbl["levels"], t_offset, t_end, root_channel, e, rootgrp[vbl["name"]][:].shape)
62 | raise Exception("{} {} {} {} {} {} {} {} {} {} ".format(y, m, d, vbl["name"], vbl["levels"], t_offset, t_end, root_channel, e, rootgrp[vbl["name"]][:].shape))
63 |
64 | ymd = []
65 | dd = dataset_range[0]
66 | while dd <= dataset_range[1]:
67 | ymd.append((dd.year, dd.month, dd.day))
68 | dd += datetime.timedelta(days=1)
69 |
70 | from multiprocessing import Pool
71 | from functools import partial
72 | with Pool(40) as p:
73 | p.map(partial(write_day,vbls=variables_imerg25bi), ymd)
74 | mmap.flush()
75 | del mmap
76 |
77 |
78 | # Create Pickle file describing which variables are contained in what file at what positions and what frequency
79 | print("Done converting. Generating dataset pickle file...")
80 | import dill
81 | import json
82 | dct = {}
83 | dct["variables"] = {}
84 | for i, v in enumerate(variables_imerg25bi):
85 | vbl_dict = {"name":v["name"],
86 | "mmap_name":"{}__imerg5625.mmap".format(dataset_name),
87 | "type":"temp",
88 | "dims": v["dims"],
89 | "offset": 0 if not i else sum([len(vg["levels"]) for vg in variables_imerg25bi[:i]]),
90 | "first_ts": dataset_range[0].timestamp(),
91 | "last_ts": dataset_range[1].timestamp(),
92 | "tfreq_s": 3600,
93 | "levels": v["levels"]}
94 | dct["variables"]["imerg5625/{}".format(v["name"])] = vbl_dict
95 |
96 | dct["memmap"] = {"{}__imerg5625.mmap".format(dataset_name): {"dims": imerg25bi_dims,
97 | "dtype": "float32",
98 | "daterange": (dataset_range[0].timestamp(), dataset_range[1].timestamp()),
99 | "tfreq_s": 3600}
100 | }
101 |
102 | dill.dump(dct, open(os.path.join(output_path, dataset_name+".dill"),'wb'))
103 |
104 | with open(os.path.join(output_path, dataset_name+"_info.json"), 'w') as outfile:
105 | json.dump(dct, outfile, indent=4, sort_keys=True)
106 |
--------------------------------------------------------------------------------
/src/convert/convert_imerg5625_sample.py:
--------------------------------------------------------------------------------
1 | import datetime
2 | from netCDF4 import Dataset as netcdf_Dataset
3 | import os
4 | import numpy as np
5 |
6 | if __name__ == "__main__":
7 |
8 | pressure_to_idx = {50:0, 100:1, 150:2, 200:3, 250:4, 300:5, 400:6, 500:7, 600:8, 700:9, 850:10, 925:11, 1000:12}
9 | idx_to_pressure = {v:k for k,v in pressure_to_idx.items()}
10 | dataset_range=(datetime.datetime(2000, 6, 1, 0), datetime.datetime(2019, 12, 31, 23))
11 |
12 |
13 | years=list(range(2018,2020))
14 | dataset_name = "imerg5625_sample"
15 | input_path = "EDIT INPUT PATH TO NETCDF FOLDER"
16 | output_path = os.path.join("EDIT OUTPUT PATH WHERE MEMMAPS ARE TO BE CREATED", dataset_name)
17 |
18 | if not os.path.exists(output_path):
19 | os.makedirs(output_path)
20 |
21 | variables_imerg25bi = [
22 | {"name": "precipitationcal",
23 | "ftemplate": os.path.join(input_path, "imerg{}{:02d}{:02d}.nc"),
24 | "dims": (32, 64),
25 | "levels": list(range(1))},
26 | ]
27 |
28 | imerg25bi_path = os.path.join(output_path, "{}__imerg5625.mmap".format(dataset_name))
29 | n_rec_dim = variables_imerg25bi[0]["dims"]
30 | imerg25bi_sample_freq = 1
31 | n_recs = ((datetime.datetime(2019, 12, 31, 23).timestamp()-datetime.datetime(years[0], 1, 1, 0).timestamp()) // 3600 ) // imerg25bi_sample_freq + 1
32 | n_rec_channels = sum([len(vg["levels"]) for vg in variables_imerg25bi])
33 | dims = (int(n_recs), int(n_rec_channels), *n_rec_dim)
34 | print("dims: ", dims)
35 | print("nrecs: ", n_recs)
36 | imerg25bi_dims = dims
37 | if os.path.exists(imerg25bi_path):
38 | print("Skipping iMERG25bi as file exists... ")
39 | else:
40 | # write temporal SimSat variables
41 | if not os.path.exists(output_path):
42 | os.makedirs(output_path)
43 |
44 | mmap = np.memmap(imerg25bi_path, dtype='float32', mode='w+', shape=dims)
45 |
46 | def write_day(ymd, vbls):
47 | y, m, d = ymd
48 | if y < dataset_range[0].year:
49 | print("iMERG25bi: no data available for year {}".format(y))
50 | return
51 | t_offset = int((datetime.datetime(y, m, d, 0).timestamp() - datetime.datetime(years[0], 1, 1, 0).timestamp()) // 3600) // imerg25bi_sample_freq
52 | t_end = int((datetime.datetime(y, m, d, 23).timestamp() - datetime.datetime(years[0], 1, 1, 0).timestamp()) // 3600) // imerg25bi_sample_freq + 1
53 | for i, vbl in enumerate(vbls):
54 | print("SimSat writing year {} month {} day {} vbl {}...".format(y, m, d, vbl["name"]))
55 | rootgrp = netcdf_Dataset(os.path.join(input_path, vbl["ftemplate"].format(y, m, d)), "r", format="NETCDF4")
56 | root_channel = 0 if not i else sum([len(vg["levels"]) for vg in variables_imerg25bi[:i]])
57 | print(t_offset, t_end, root_channel, len(vbl["levels"]))
58 | try:
59 | mmap[t_offset:t_end, root_channel] = rootgrp[vbl["name"]][:]
60 | except Exception as e:
61 | print(y, m, d, vbl["name"], vbl["levels"], t_offset, t_end, root_channel, e, rootgrp[vbl["name"]][:].shape)
62 | raise Exception("{} {} {} {} {} {} {} {} {} {} ".format(y, m, d, vbl["name"], vbl["levels"], t_offset, t_end, root_channel, e, rootgrp[vbl["name"]][:].shape))
63 |
64 | ymd = []
65 | dd = datetime.datetime(years[0], 1, 1, 0)
66 | while dd <= datetime.datetime(years[1], 12, 31, 23):
67 | ymd.append((dd.year, dd.month, dd.day))
68 | dd += datetime.timedelta(days=1)
69 |
70 | from multiprocessing import Pool
71 | from functools import partial
72 | with Pool(40) as p:
73 | p.map(partial(write_day,vbls=variables_imerg25bi), ymd)
74 | mmap.flush()
75 | del mmap
76 |
77 |
78 | # Create Pickle file describing which variables are contained in what file at what positions and what frequency
79 | print("Done converting. Generating dataset pickle file...")
80 | import dill
81 | import json
82 | dct = {}
83 | dct["variables"] = {}
84 | for i, v in enumerate(variables_imerg25bi):
85 | vbl_dict = {"name":v["name"],
86 | "mmap_name":"{}__imerg5625.mmap".format(dataset_name),
87 | "type":"temp",
88 | "dims": v["dims"],
89 | "offset": 0 if not i else sum([len(vg["levels"]) for vg in variables_imerg25bi[:i]]),
90 | "first_ts": datetime.datetime(years[0], 1, 1, 0).timestamp(),
91 | "last_ts": datetime.datetime(years[1], 12, 31, 23).timestamp(),
92 | "tfreq_s": 3600,
93 | "levels": v["levels"]}
94 | dct["variables"]["imerg5625/{}".format(v["name"])] = vbl_dict
95 |
96 | dct["memmap"] = {"{}__imerg5625.mmap".format(dataset_name): {"dims": imerg25bi_dims,
97 | "dtype": "float32",
98 | "daterange": (datetime.datetime(years[0], 1, 1, 0).timestamp(),
99 | datetime.datetime(years[1], 12, 31, 23).timestamp()),
100 | "tfreq_s": 3600}
101 | }
102 |
103 | dill.dump(dct, open(os.path.join(output_path, dataset_name+".dill"),'wb'))
104 |
105 | with open(os.path.join(output_path, dataset_name+"_info.json"), 'w') as outfile:
106 | json.dump(dct, outfile, indent=4, sort_keys=True)
107 |
--------------------------------------------------------------------------------
/src/convert/convert_simsat5625.py:
--------------------------------------------------------------------------------
1 | import datetime
2 | from netCDF4 import Dataset as netcdf_Dataset
3 | import os
4 | import numpy as np
5 |
6 | if __name__ == "__main__":
7 |
8 | pressure_to_idx = {50:0, 100:1, 150:2, 200:3, 250:4, 300:5, 400:6, 500:7, 600:8, 700:9, 850:10, 925:11, 1000:12}
9 | idx_to_pressure = {v:k for k,v in pressure_to_idx.items()}
10 |
11 | years=list(range(2016,2021))
12 | dataset_name = "simsat5625"
13 | input_path = "EDIT INPUT PATH TO NETCDF FOLDER"
14 | output_path = os.path.join("EDIT OUTPUT PATH WHERE MEMMAPS ARE TO BE CREATED", dataset_name)
15 | if not os.path.exists(output_path):
16 | os.makedirs(output_path)
17 |
18 | variables_simsat = [
19 | {"name": "clbt",
20 | "ftemplate": os.path.join(input_path, "sat{}.nc"),
21 | "dims": (32, 64),
22 | "levels": list(range(3))},
23 | ]
24 |
25 | ds_daterange = (datetime.datetime(2016, 4, 1, 0), datetime.datetime(2020, 3, 31, 21))
26 | ts_daterange = ds_daterange
27 |
28 | simsat_path = os.path.join(output_path, "{}__simsat5625.mmap".format(dataset_name))
29 | n_rec_dim = (32, 64)
30 | simsat_sample_freq = 3 # every 3 hours
31 | n_recs = ((ds_daterange[1].timestamp()-ds_daterange[0].timestamp()) // 3600 ) // simsat_sample_freq + 1
32 | n_rec_channels = sum([len(vg["levels"]) for vg in variables_simsat])
33 | dims = (int(n_recs), int(n_rec_channels), *n_rec_dim)
34 | simsat_dims = dims
35 | if os.path.exists(simsat_path):
36 | print("Skipping SimSat as file exists... ")
37 | else:
38 | # write temporal SimSat variables
39 | if not os.path.exists(output_path):
40 | os.makedirs(output_path)
41 |
42 | mmap = np.memmap(simsat_path, dtype='float32', mode='w+', shape=dims)
43 | print("MMAP DIMS: ", dims)
44 |
45 | def write_year(y, vbls):
46 | if y < 2016:
47 | print("SimSat: no data available for year {}".format(y))
48 | return
49 | if y == 2016:
50 | t_offset = 0
51 | else:
52 | t_offset = int((datetime.datetime(y,1,1,0).timestamp() - ds_daterange[0].timestamp()) // 3600) // simsat_sample_freq
53 | if y == 2020:
54 | t_end = int((ts_daterange[1].timestamp() - ds_daterange[0].timestamp()) // 3600) // simsat_sample_freq + 1
55 | else:
56 | t_end = int((datetime.datetime(y, 12, 31, 23).timestamp() - ds_daterange[0].timestamp()) // 3600) // simsat_sample_freq + 1
57 | print("year: ", y, " t_offset: ", t_offset, "t_end:", t_end)
58 | for i, vbl in enumerate(vbls):
59 | print("SimSat writing year {} vbl {}...".format(y, vbl["name"]))
60 | rootgrp = netcdf_Dataset(os.path.join(input_path, vbl["ftemplate"].format(y)), "r", format="NETCDF4")
61 | root_channel = 0 if not i else sum([len(vg["levels"]) for vg in variables_simsat[:i]])
62 | print("hello:", t_offset, t_end, root_channel, len(vbl["levels"]), rootgrp[vbl["name"]].shape)
63 | try:
64 | mmap[t_offset:t_end, root_channel:root_channel+len(vbl["levels"])] = rootgrp[vbl["name"]][:, vbl["levels"]]
65 | except Exception as e:
66 | print("EXCEPTION", rootgrp[vbl["name"]].shape, t_offset, t_end, root_channel, mmap.shape)
67 | raise Exception()
68 |
69 | from multiprocessing import Pool
70 | from functools import partial
71 | with Pool(1) as p:
72 | p.map(partial(write_year,vbls=variables_simsat), years)
73 | mmap.flush()
74 | del mmap
75 |
76 |
77 | # Create Pickle file describing which variables are contained in what file at what positions and what frequency
78 | print("Done converting. Generating dataset pickle file...")
79 | import dill
80 | import json
81 | dct = {}
82 | dct["variables"] = {}
83 | for i, v in enumerate(variables_simsat):
84 | vbl_dict = {"name":v["name"],
85 | "mmap_name":"{}__simsat5625.mmap".format(dataset_name),
86 | "type":"temp",
87 | "dims": v["dims"],
88 | "offset": 0 if not i else sum([len(vg["levels"]) for vg in variables_simsat[:i]]),
89 | "first_ts": ts_daterange[0].timestamp(),
90 | "last_ts": ts_daterange[1].timestamp(),
91 | "tfreq_s": 3600*3,
92 | "levels": v["levels"]}
93 | dct["variables"]["simsat5625/{}".format(v["name"])] = vbl_dict
94 |
95 | dct["memmap"] = {"{}__simsat5625.mmap".format(dataset_name): {"dims": simsat_dims,
96 | "dtype": "float32",
97 | "daterange": (ts_daterange[0].timestamp(),
98 | ts_daterange[1].timestamp()),
99 | "tfreq_s": 3600*3}
100 | }
101 |
102 | dill.dump(dct, open(os.path.join(output_path, dataset_name+".dill"),'wb'))
103 |
104 | with open(os.path.join(output_path, dataset_name+"_info.json"), 'w') as outfile:
105 | json.dump(dct, outfile, indent=4, sort_keys=True)
106 |
--------------------------------------------------------------------------------
/src/convert/convert_simsat5625_sample.py:
--------------------------------------------------------------------------------
1 | import datetime
2 | from netCDF4 import Dataset as netcdf_Dataset
3 | import os
4 | import numpy as np
5 |
6 | if __name__ == "__main__":
7 |
8 | pressure_to_idx = {50:0, 100:1, 150:2, 200:3, 250:4, 300:5, 400:6, 500:7, 600:8, 700:9, 850:10, 925:11, 1000:12}
9 | idx_to_pressure = {v:k for k,v in pressure_to_idx.items()}
10 |
11 | years=list(range(2018,2020))
12 | dataset_name = "simsat5625"
13 | input_path = "EDIT INPUT PATH TO NETCDF FOLDER"
14 | output_path = os.path.join("EDIT OUTPUT PATH WHERE MEMMAPS ARE TO BE CREATED", dataset_name)
15 |
16 | if not os.path.exists(output_path):
17 | os.makedirs(output_path)
18 |
19 | variables_simsat = [
20 | {"name": "clbt",
21 | "ftemplate": os.path.join(input_path, "sat{}.nc"),
22 | "dims": (32, 64),
23 | "levels": list(range(3))},
24 | ]
25 |
26 | ds_daterange = (datetime.datetime(2016, 4, 1, 0), datetime.datetime(2020, 3, 31, 21))
27 | ts_daterange = ds_daterange
28 |
29 | simsat_path = os.path.join(output_path, "{}__simsat5625.mmap".format(dataset_name))
30 | n_rec_dim = (32, 64)
31 | simsat_sample_freq = 3 # every 3 hours
32 | n_recs = ((datetime.datetime(2019, 12, 31, 23).timestamp()-datetime.datetime(years[0], 1, 1, 0).timestamp()) // 3600 ) // simsat_sample_freq + 1
33 | n_rec_channels = sum([len(vg["levels"]) for vg in variables_simsat])
34 | dims = (int(n_recs), int(n_rec_channels), *n_rec_dim)
35 | simsat_dims = dims
36 | if os.path.exists(simsat_path):
37 | print("Skipping SimSat as file exists... ")
38 | else:
39 | # write temporal SimSat variables
40 | if not os.path.exists(output_path):
41 | os.makedirs(output_path)
42 |
43 | mmap = np.memmap(simsat_path, dtype='float32', mode='w+', shape=dims)
44 | print("MMAP DIMS: ", dims)
45 |
46 | def write_year(y, vbls):
47 | if y < 2016:
48 | print("SimSat: no data available for year {}".format(y))
49 | return
50 | if y == 2016:
51 | t_offset = 0
52 | else:
53 | t_offset = int((datetime.datetime(y,1,1,0).timestamp() - ds_daterange[0].timestamp()) // 3600) // simsat_sample_freq
54 | if y == 2020:
55 | t_end = int((ts_daterange[1].timestamp() - ds_daterange[0].timestamp()) // 3600) // simsat_sample_freq + 1
56 | else:
57 | t_end = int((datetime.datetime(y, 12, 31, 23).timestamp() - datetime.datetime(years[0], 1, 1, 0).timestamp()) // 3600) // simsat_sample_freq + 1
58 | print("year: ", y, " t_offset: ", t_offset, "t_end:", t_end)
59 | for i, vbl in enumerate(vbls):
60 | print("SimSat writing year {} vbl {}...".format(y, vbl["name"]))
61 | rootgrp = netcdf_Dataset(os.path.join(input_path, vbl["ftemplate"].format(y)), "r", format="NETCDF4")
62 | root_channel = 0 if not i else sum([len(vg["levels"]) for vg in variables_simsat[:i]])
63 | print("hello:", t_offset, t_end, root_channel, len(vbl["levels"]), rootgrp[vbl["name"]].shape)
64 | try:
65 | mmap[t_offset:t_end, root_channel:root_channel+len(vbl["levels"])] = rootgrp[vbl["name"]][:, vbl["levels"]]
66 | except Exception as e:
67 | print("EXCEPTION", rootgrp[vbl["name"]].shape, t_offset, t_end, root_channel, mmap.shape)
68 | raise Exception()
69 |
70 | from multiprocessing import Pool
71 | from functools import partial
72 | with Pool(1) as p:
73 | p.map(partial(write_year,vbls=variables_simsat), years)
74 | mmap.flush()
75 | del mmap
76 |
77 |
78 | # Create Pickle file describing which variables are contained in what file at what positions and what frequency
79 | print("Done converting. Generating dataset pickle file...")
80 | import dill
81 | import json
82 | dct = {}
83 | dct["variables"] = {}
84 | for i, v in enumerate(variables_simsat):
85 | vbl_dict = {"name":v["name"],
86 | "mmap_name":"{}__simsat5625.mmap".format(dataset_name),
87 | "type":"temp",
88 | "dims": v["dims"],
89 | "offset": 0 if not i else sum([len(vg["levels"]) for vg in variables_simsat[:i]]),
90 | "first_ts": datetime.datetime(years[0], 1, 1, 0).timestamp(),
91 | "last_ts": datetime.datetime(years[1], 12, 31, 23).timestamp(),
92 | "tfreq_s": 3600*3,
93 | "levels": v["levels"]}
94 | dct["variables"]["simsat5625/{}".format(v["name"])] = vbl_dict
95 |
96 | dct["memmap"] = {"{}__simsat5625.mmap".format(dataset_name): {"dims": simsat_dims,
97 | "dtype": "float32",
98 | "daterange": (datetime.datetime(years[0], 1, 1, 0).timestamp(),
99 | datetime.datetime(years[1], 12, 31, 23).timestamp()),
100 | "tfreq_s": 3600*3}
101 | }
102 |
103 | dill.dump(dct, open(os.path.join(output_path, dataset_name+".dill"),'wb'))
104 |
105 | with open(os.path.join(output_path, dataset_name+"_info.json"), 'w') as outfile:
106 | json.dump(dct, outfile, indent=4, sort_keys=True)
107 |
--------------------------------------------------------------------------------
/src/convert/test_samples.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import datetime
3 |
4 | from src.dataloader import Dataset
5 |
6 | datapath = ["PATH TO ERA5625 SAMPLES DILL FILE",
7 | "PATH TO IMERG5625 SAMPLES DILL FILE",
8 | "PATH TO SIMSAT5625 SAMPLES DILL FILE"]
9 |
10 | partition_conf = {"train":
11 | {"timerange": (
12 | datetime.datetime(2010, 1, 1, 0).timestamp(), datetime.datetime(2010, 12, 31, 0).timestamp()),
13 | "increment_s": 60 * 60},
14 | "test":
15 | {"timerange": (datetime.datetime(2017, 1, 15, 0).timestamp(),
16 | datetime.datetime(2018, 12, 31, 0).timestamp()),
17 | "increment_s": 60 * 60}}
18 | partition_type = "range"
19 |
20 | sample_conf = {"lead_time_{}".format(int(lt / 3600)): # sample modes
21 | {
22 | "sample": # sample sections
23 | {
24 | "lsm": {"vbl": "yera5625/lsm"}, # sample variables
25 | # "lat": {"vbl": "era5625/lat2d"},
26 | "t_300hPa": {"vbl": "yera5625/t:600hPa",
27 | "t": np.array([0, -1, -2, -3, ]) * 3600,
28 | "interpolate": ["nan", "nearest_past", "nearest_future"][1]},
29 | "t_500hPa": {"vbl": "yera5625/t",
30 | "t": np.array([0, -1, -2, -3, ]) * 3600,
31 | "interpolate": ["nan", "nearest_past", "nearest_future"][1]},
32 | # "t1000": {"vbl": "xera5625/t:1000hPa",
33 | # "t": np.array([0, -1, -2, -3, -4]) * 3600,
34 | # "interpolate": ["nan", "nearest_past", "nearest_future"][1]}
35 | },
36 | "label":
37 | {
38 | "tp": {"vbl": "yera5625/tp",
39 | "t": np.array([lt]),
40 | "interpolate": ["nan", "nearest_past", "nearest_future"][1]}}
41 | }
42 | for lt in np.array([3, 7]) * 3600} # np.array([1, 3, 6, 9]) * 3600}
43 |
44 | # Met-Net style: different targets per label -- as an option
45 |
46 | dataset = Dataset(datapath=datapath,
47 | partition_conf=partition_conf,
48 | partition_type=partition_type,
49 | partition_selected="train",
50 | sample_conf=sample_conf,
51 | )
52 |
53 | tp = dataset[((datetime.datetime(2018,1,1,0).timestamp(), datetime.datetime(2019,12,31,23).timestamp(), 3600), ["era5625/tp"], None)]
54 | imerg = dataset[((datetime.datetime(2018,1,1,0).timestamp(), datetime.datetime(2019,12,31,23).timestamp(), 3600), ["imerg5625/precipitationcal"], None)]
55 | simsat = dataset[((datetime.datetime(2018,1,1,0).timestamp(), datetime.datetime(2019,12,31,23).timestamp(), 3*3600), ["simsat5625/clbt:0"], {"interpolate":"nearest_past"})]
56 | simsat2 = dataset[([datetime.datetime(2018,1,1,0).timestamp(), datetime.datetime(2019,12,31,23).timestamp()], ["simsat5625/clbt:0"], {})]
57 |
--------------------------------------------------------------------------------
/src/dataloader/__init__.py:
--------------------------------------------------------------------------------
1 | from .memmap_dataloader import Dataset
--------------------------------------------------------------------------------