├── .idea ├── .gitignore ├── PyRain.iml ├── inspectionProfiles │ └── profiles_settings.xml ├── misc.xml ├── modules.xml └── vcs.xml ├── README.md ├── analysis ├── climatology │ └── climatology.py ├── precip_estimation │ ├── __init__.py │ ├── erasim_sample.py │ ├── erasim_sample_unbalanced.py │ ├── lightgbm_reg.py │ ├── make_histograms.py │ ├── normalise │ │ ├── 5625__00-06-01_12:00to17-12-31_11:00.json │ │ ├── 5625__04-01-01_12:00to17-12-31_11:00.json │ │ ├── 5625__16-04-01_12:00to17-12-31_11:00.json │ │ ├── 5625__18-01-06_12:00to18-12-31_11:00.json │ │ ├── 5625__19-01-06_12:00to19-12-31_11:00.json │ │ ├── 5625__79-01-01_07:00to17-12-31_11:00.json │ │ └── __init__.py │ ├── pred_conf_matrix.py │ ├── sample.py │ ├── sample_unbalanced.py │ ├── sim_sample_balanced.py │ └── sim_sample_unbalanced.py ├── precip_histogram │ ├── __init__.py │ ├── hist.py │ ├── plot.py │ ├── plot_classhist.py │ └── results │ │ ├── era140625.json │ │ ├── era5625.json │ │ ├── imerg140625.json │ │ ├── imerg5625.json │ │ └── imerg_25bi.json └── variable_correlations │ ├── __init__.py │ ├── corr.py │ ├── out.json │ └── plot.py ├── config.yml ├── run_benchmark.py └── src ├── __init__.py ├── benchmark ├── __init__.py ├── advanced_normalisation.py ├── baseline_data.py ├── collect_data.py ├── graphics.py ├── metrics.py ├── models.py ├── normalisations │ └── normalisations_sample_datasets.dill ├── normalise.py ├── plot_outputs.py └── utils.py ├── convert ├── convert_era5625_aaai.py ├── convert_era5625_aaai_sample.py ├── convert_imerg5625.py ├── convert_imerg5625_sample.py ├── convert_simsat5625.py ├── convert_simsat5625_sample.py └── test_samples.py └── dataloader ├── __init__.py └── memmap_dataloader.py /.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | /workspace.xml 3 | -------------------------------------------------------------------------------- /.idea/PyRain.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 12 | -------------------------------------------------------------------------------- /.idea/inspectionProfiles/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | 7 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Update: Data now publicly available! 2 | 3 | We are very happy to announce that the memmap datasets are now available publicly at: 4 | https://console.cloud.google.com/storage/browser/aaai_release 5 | You require an ordinary Google account to access them. 6 | 7 | The data comes in two different resolutions, `5.625` degrees, and `1.40625` degrees. 8 | To see what variables (and timeranges) are contained in each dataset, simply download the associated .dill file and read out as follows (python3): 9 | 10 | ```import dill 11 | import pprint 12 | with open("path-to-dill-file", "rb") as f: 13 | info = dill.load(f) 14 | pprint.pprint(info) 15 | ``` 16 | 17 | Please let us know if you have any questions/issues - for technical issues please use the github issues. 18 | Many thanks, and we hope you will find RainBench useful! 19 | 20 | # RainBench - Getting Started 21 | 22 | ## Downloading the Dataset 23 | Please register [here](https://forms.gle/3AdMJsKtuJ8M1E1Y8) to download the RainBench dataset. 24 | 25 | After downloading, you should update the data paths in config.yml. 26 | 27 | ## Forecasting Precipitation from ERA 28 | Specify `source` as {'simsat', 'simsat_era', 'era16_3'} to use data (*from 2016*) from Simsat alone, ERA5 alone, or both Simsat and ERA5, respectively. 29 | 30 | To use all data available in ERA5 for training (*from 1971*), set `source` as 'era'. 31 | 32 | Set `inc_time` to concatenate inputs with hour, day, month. 33 | 34 | For example, to train, run 35 | 36 | ``` 37 | python3 run_benchmark.py --sources simsat_era --inc_time --config_file config.yml 38 | ``` 39 | 40 | ## Forecasting Precipitation from IMERG 41 | Again, specify `source` as {'simsat', 'simsat_era', 'era16_3'} to use data (*from 2016*) from Simsat alone, ERA5 alone, or both Simsat and ERA5, respectively. 42 | 43 | To use all data available in ERA5 for training (*from 2000*), set `source` as 'era'. 44 | 45 | For predicting IMERG precipitation, we found empirically that removing the relu function at the end of the ConvLSTM works better. 46 | 47 | Set `inc_time` to concatenate inputs with hour, day, month. 48 | 49 | ``` 50 | python3 run_benchmark.py --sources simsat_era --no_relu --imerg --inc_time --config_file config.yml 51 | ``` 52 | 53 | ## Evaluating trained models 54 | 55 | To evaluate trained models on the test set, run the following. 56 | 57 | ``` 58 | python3 run_benchmark.py --test --phase test --load {MODEL_PATH} 59 | ``` 60 | 61 | 62 | # Visualizing Predictions 63 | 64 | To visualize the predictions, run the following. 65 | 66 | ``` 67 | python3 -m src.benchmark.plot_outputs --load {MODEL_PATH} --nc_file {ANY_NC_FILE_PATH} 68 | ``` 69 | 70 | Example predictions for a randome test date (12 July 2019) is shown below: 71 | 72 | ### Truth 73 | ![](https://i.imgur.com/O1Fk0XS.gif) 74 | 75 | ### Simsat 76 | ![](https://i.imgur.com/uMvodFI.gif) 77 | 78 | ### ERA 79 | ![](https://i.imgur.com/UbOe0Ia.gif) 80 | 81 | ### Simsat & ERA 82 | ![](https://i.imgur.com/tX5pmLP.gif) 83 | 84 | # Advanced Topics 85 | 86 | ## Going to higher spatial resolution 87 | 88 | RainBench contains memmap datasets at two different spatial resolutions: 5.625 degrees, and 1.46025 degrees. 89 | Fortunately, the NetCDF-->Memmap conversion scripts for 5.625 degrees that come with RainBench can be easily adjusted to NetCDF datasets at higher - or native - resolution. The main change needing to be done is to adjust the pixel width and height of the different variable channels. As the conversion scripts use use multiprocessing in order to saturate I/O during dataset conversion, even very high resolution datasets can be fine-grainedly converted to Memmaps. 90 | 91 | ## Generating normalisation files 92 | Under `src/benchmark/normalise.py`, you can generate your own normalisation files to be used for on-the-fly normalisation of training data. Simply insert your own sample configuration and partitioning setup into the section marked and run the file using python3. This will generate a pickled `dill` file, which contains a dictionary with normalisation entries (and indeed, packaged functions) for each variable field across each partition. Partition of type `repeat` are expressly supported. Just as data conversion, normalisation supports multiprocessing (and out-of-core computations), meaning even datasets at large resolutions can be handled. It is also easy to add new normalisation routines in the fields provided (also have a look at `src/benchmark/transforms.py` for patch-wise local normalisation techniques). 93 | 94 | -------------------------------------------------------------------------------- /analysis/climatology/climatology.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime, timedelta 2 | import numpy as np 3 | import os 4 | import pickle 5 | import sys 6 | import torch as th 7 | 8 | sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) 9 | from src.dataloader.memmap_dataloader import Dataset 10 | from src.benchmark.utils import compute_latitude_weighting, compute_weighted_mse 11 | 12 | if __name__ == "__main__": 13 | 14 | memmap_root = "SET_THIS" # SET MEMMAP DATA ROOT PATH HERE 15 | memmap_root2 = "SET_THIS" 16 | datapath = [os.path.join(memmap_root, "imerg_5625", "imerg_5625.dill"), 17 | os.path.join(memmap_root2, "era5625_mf", "era5625_mf.dill"), 18 | ] 19 | 20 | daterange_imerg = (datetime(2000, 6, 1,0), datetime(2017, 12, 31, 23)) 21 | daterange_era = (datetime(1979, 1, 1, 7), datetime(2017, 12, 31, 23)) 22 | 23 | daterange_val = (datetime(2018,1,6,0), datetime(2018, 12,31,23)) 24 | 25 | partition_conf = {"era": 26 | {"timerange": (daterange_era[0].timestamp(), daterange_era[1].timestamp()), 27 | "increment_s": 60 * 60}, 28 | "imerg": 29 | {"timerange": (daterange_imerg[0].timestamp(), daterange_imerg[1].timestamp()), 30 | "increment_s": 60 * 60}, 31 | "val": 32 | {"timerange": (daterange_val[0].timestamp(), daterange_val[1].timestamp()), 33 | "increment_s": 60 * 60} 34 | } 35 | 36 | partition_type = "range" 37 | sample_conf_era = {"m0": {"era":{"tp_era": {"vbl": "era5625/tp"},"lat2d": {"vbl":"era5625/lat2d"}}}} # sample modes 38 | sample_conf_imerg = {"m0": {"era":{"imerg": {"vbl": "imerg5625/precipitationcal"}}}} # sample modes 39 | 40 | dataset_era = Dataset(datapath=datapath, 41 | partition_conf=partition_conf, 42 | partition_type=partition_type, 43 | partition_selected="era", 44 | sample_conf=sample_conf_era, 45 | ) 46 | 47 | grid = dataset_era["era5625/lat2d"] 48 | lat_grid = compute_latitude_weighting(grid) 49 | 50 | era_dict = {} 51 | era_dict_ctr = {} 52 | 53 | # calculate weekly climatology for ERA5 54 | for i, d in enumerate(dataset_era[(daterange_era[0].timestamp(), daterange_era[1].timestamp(), 3600),["era5625/tp"], {}]): 55 | t = daterange_era[0] + timedelta(seconds=i*3600) 56 | week = t.isocalendar()[1] 57 | if week in era_dict_ctr: 58 | era_dict_ctr[week] += 1 59 | else: 60 | era_dict_ctr[week] = 1 61 | if week in era_dict: 62 | era_dict[week] += (np.array(d) - era_dict[week]) / float(era_dict_ctr[week]) 63 | else: 64 | era_dict[week] = np.array(d) 65 | pass 66 | print(sorted(era_dict.keys())) 67 | 68 | era_annual_climatology = dataset_era[(daterange_era[0].timestamp(), daterange_era[1].timestamp(), 3600),["era5625/tp"], {}].mean(axis=0) 69 | del dataset_era 70 | 71 | with open("era_climatology.pickle", "wb") as f: 72 | pickle.dump(era_dict, f) 73 | 74 | dataset_imerg = Dataset(datapath=datapath, 75 | partition_conf=partition_conf, 76 | partition_type=partition_type, 77 | partition_selected="imerg", 78 | sample_conf=sample_conf_imerg, 79 | ) 80 | 81 | imerg_dict = {} 82 | imerg_dict_ctr = {} 83 | # calculate weekly climatology for ERA5 84 | for i, d in enumerate(dataset_imerg[ (daterange_imerg[0].timestamp(), daterange_imerg[1].timestamp(), 3600) , ["imerg5625/precipitationcal"], {}]): 85 | t = daterange_imerg[0] + timedelta(seconds=i*3600) 86 | week = t.isocalendar()[1] 87 | if week in imerg_dict_ctr: 88 | imerg_dict_ctr[week] += 1 89 | else: 90 | imerg_dict_ctr[week] = 1 91 | if week in imerg_dict: 92 | imerg_dict[week] += (np.array(d) - imerg_dict[week]) / float(imerg_dict_ctr[week]) 93 | else: 94 | imerg_dict[week] = np.array(d) 95 | pass 96 | print(sorted(imerg_dict.keys())) 97 | 98 | imerg_annual_climatology = dataset_imerg[ (daterange_imerg[0].timestamp(), daterange_imerg[1].timestamp(), 3600) , ["imerg5625/precipitationcal"], {}].mean(axis=0) 99 | del dataset_imerg 100 | with open("imerg_climatology.pickle", "wb") as f: 101 | pickle.dump(era_dict, f) 102 | 103 | ########################## Predict ERA 104 | dataset_era = Dataset(datapath=datapath, 105 | partition_conf=partition_conf, 106 | partition_type=partition_type, 107 | partition_selected="val", 108 | sample_conf=sample_conf_era, 109 | ) 110 | re = 0 111 | for i, d in enumerate(dataset_era[(daterange_val[0].timestamp(), daterange_val[1].timestamp(), 3600), ["era5625/tp"], {}]): 112 | t = daterange_val[0] + timedelta(seconds=i*3600) 113 | week = t.isocalendar()[1] 114 | rms_error = compute_weighted_mse(th.from_numpy(d)*1000, th.from_numpy(era_dict[week])*1000, th.from_numpy(lat_grid)) 115 | rms_error = rms_error**0.5 116 | re += (rms_error - re) / float(i+1) 117 | pass 118 | 119 | print("ERA WEEKLY RMS:", re) 120 | 121 | re = 0 122 | for i, d in enumerate(dataset_era[(daterange_val[0].timestamp(), daterange_val[1].timestamp(), 3600), ["era5625/tp"], {}]): 123 | rms_error = compute_weighted_mse(th.from_numpy(d)*1000, th.from_numpy(era_annual_climatology)*1000, th.from_numpy(lat_grid)) 124 | rms_error = rms_error**0.5 125 | re += (rms_error - re) / float(i+1) 126 | pass 127 | del dataset_era 128 | 129 | print("ERA ANNUAL RMS:", re) 130 | 131 | ########################## Predict IMERG 132 | dataset_imerg = Dataset(datapath=datapath, 133 | partition_conf=partition_conf, 134 | partition_type=partition_type, 135 | partition_selected="val", 136 | sample_conf=sample_conf_era, 137 | ) 138 | 139 | re = 0 140 | for i, d in enumerate(dataset_imerg[(daterange_val[0].timestamp(), daterange_val[1].timestamp(), 3600), ["imerg5625/precipitationcal"], {}]): 141 | t = daterange_val[0] + timedelta(seconds=i * 3600) 142 | week = t.isocalendar()[1] 143 | rms_error = compute_weighted_mse(th.from_numpy(d), th.from_numpy(imerg_dict[week]), th.from_numpy(lat_grid)) 144 | rms_error = rms_error**0.5 145 | re += (rms_error - re) / float(i+1) 146 | pass 147 | 148 | print("IMERG WEEKLY RMS:", re) 149 | 150 | re = 0 151 | for i, d in enumerate(dataset_imerg[(daterange_val[0].timestamp(), daterange_val[1].timestamp(), 3600), ["imerg5625/precipitationcal"], {}]): 152 | rms_error = compute_weighted_mse(th.from_numpy(d), th.from_numpy(imerg_annual_climatology), th.from_numpy(lat_grid)) 153 | rms_error = rms_error**0.5 154 | re += (rms_error - re) / float(i+1) 155 | pass 156 | del dataset_imerg 157 | 158 | print("IMERG ANNUAL RMS:", re) 159 | 160 | 161 | -------------------------------------------------------------------------------- /analysis/precip_estimation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FrontierDevelopmentLab/PyRain/a52e4fd7984dcabb6d908a565a4e7c6bc820d62f/analysis/precip_estimation/__init__.py -------------------------------------------------------------------------------- /analysis/precip_estimation/lightgbm_reg.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from collections import Counter 3 | import pandas as pd 4 | import lightgbm as lgb 5 | import json 6 | from sklearn.datasets import load_breast_cancer,load_boston,load_wine 7 | from sklearn.model_selection import train_test_split 8 | from sklearn.preprocessing import StandardScaler 9 | from sklearn.ensemble import GradientBoostingClassifier 10 | from sklearn.metrics import mean_squared_error,roc_auc_score,precision_score 11 | 12 | ##### DEFINE TAG 13 | tag = "sim_bal" 14 | ###### 15 | 16 | with open("./normalise/5625__16-04-01_12:00to17-12-31_11:00.json") as f: 17 | nl_train = json.load(f) 18 | 19 | import json 20 | 21 | train_path = "./sim_samples_bal_train.json" 22 | test_path = "./sim_samples_unb_test.json" 23 | val_path = "./sim_samples_unb_val.json" 24 | 25 | with open(train_path, "r") as f: 26 | train = json.load(f) 27 | y_train = np.concatenate([np.array(t[1]) for t in train]) # use 2 for classification 28 | X_train = np.stack([t[0] for t in train]) 29 | 30 | with open(test_path, "r") as f: 31 | test = json.load(f) 32 | y_test = np.concatenate([np.array(t[1]) for t in test]) 33 | X_test = np.stack([t[0] for t in test]) 34 | 35 | with open(val_path, "r") as f: 36 | val = json.load(f) 37 | y_val = np.concatenate([np.array(t[1]) for t in val]) 38 | X_val = np.stack([t[0] for t in val]) 39 | y_val_lst = [[],[],[],[]] 40 | X_val_lst = [[],[],[],[]] 41 | for c in range(4): 42 | y_val_lst[c] = np.concatenate([np.array(t[1]) for t in val if t[2][0]==c]) 43 | X_val_lst[c] = np.stack(t[0] for t in val if t[2][0]==c) 44 | 45 | hyper_params = { 46 | 'task': 'train', 47 | 'boosting_type': 'gbdt', 48 | 'objective': 'regression', 49 | 'metric': ['rmse'], 50 | 'learning_rate': 0.005, 51 | 'feature_fraction': 0.9, 52 | 'bagging_fraction': 0.7, 53 | 'bagging_freq': 10, 54 | 'verbose': 0, 55 | "max_depth": 8, 56 | "num_leaves": 128, 57 | "max_bin": 512, 58 | "num_iterations": 100000, 59 | "n_estimators": 1000 60 | } 61 | 62 | # train 63 | print("Setting up regressor...") 64 | gbm = lgb.LGBMRegressor(**hyper_params) 65 | 66 | print("Setting up fit... {},{} -- {},{}".format(X_train.shape, y_train.shape, X_test.shape, y_test.shape)) 67 | gbm.fit(X_train, y_train, 68 | eval_set=[(X_test, y_test)], 69 | eval_metric='RMSE', 70 | early_stopping_rounds=1000) 71 | 72 | undo_tp_train = lambda x: (np.exp(x) - 1) * nl_train["imerg5625/precipitationcal"]["std"] 73 | undo_tp_test = lambda x: (np.exp(x) - 1) * nl_train["imerg5625/precipitationcal"]["std"] 74 | undo_tp_val = lambda x: (np.exp(x) - 1) * nl_train["imerg5625/precipitationcal"]["std"] 75 | 76 | y_pred = gbm.predict(X_train, num_iteration=gbm.best_iteration_) 77 | rmse_train = mean_squared_error(undo_tp_train(y_pred), undo_tp_train(y_train)) ** 0.5 78 | rmse_train_log = mean_squared_error(y_pred, y_train) ** 0.5 79 | print('The rmse of train is:', rmse_train) 80 | 81 | y_pred = gbm.predict(X_test, num_iteration=gbm.best_iteration_) 82 | rmse_test = mean_squared_error(undo_tp_test(y_pred), undo_tp_test(y_test)) ** 0.5 83 | rmse_test_log = mean_squared_error(y_pred, y_test) ** 0.5 84 | print('The test of test is:', rmse_test) 85 | 86 | y_pred = gbm.predict(X_val, num_iteration=gbm.best_iteration_) 87 | rmse_val = mean_squared_error(undo_tp_val(y_pred), undo_tp_val(y_val)) ** 0.5 88 | rmse_val_log = mean_squared_error(y_pred, y_val) ** 0.5 89 | print('The test of val is:', rmse_val) 90 | 91 | rmse_valc_lst = [] 92 | rmse_valc_log_lst = [] 93 | for c in range(4): 94 | y_pred = gbm.predict(X_val_lst[c], num_iteration=gbm.best_iteration_) 95 | rmse_valc = mean_squared_error(undo_tp_val(y_pred), undo_tp_val(y_val_lst[c])) ** 0.5 96 | rmse_valc_log = mean_squared_error(y_pred, y_val_lst[c]) ** 0.5 97 | print('The test of val-{} is:'.format(c), rmse_valc) 98 | rmse_valc_lst.append(rmse_valc) 99 | rmse_valc_log_lst.append(rmse_valc_log) 100 | 101 | # Finished 102 | print("Finished!") 103 | res = {"rmse_train": rmse_train, 104 | "rmse_test": rmse_test, 105 | "rmse_val": rmse_val, 106 | "rmse_train_log": rmse_train_log, 107 | "rmse_test_log": rmse_test_log, 108 | "rmse_val_log": rmse_val_log, 109 | "rmse_valc": rmse_valc_lst, 110 | "rmse_valc_log": rmse_valc_log_lst} 111 | 112 | 113 | print("RES: ", res) 114 | 115 | with open("{}.json".format(tag), "w") as f: 116 | json.dump(res, f) 117 | 118 | gbm.save_model('{}.txt'.format(tag), num_iteration=model.best_iteration) 119 | 120 | -------------------------------------------------------------------------------- /analysis/precip_estimation/make_histograms.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | import numpy as np 3 | import os, sys 4 | import json 5 | from scipy import stats 6 | from multiprocessing import Pool, TimeoutError 7 | from functools import partial 8 | 9 | sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) 10 | from dataloader.memmap_dataloader import Dataset 11 | 12 | 13 | if __name__ == "__main__": 14 | 15 | # set up dataloader with any dataset type you can think of 16 | memmap_root = "" # SET MEMMAP DATA ROOT PATH HERE 17 | datapath = [os.path.join(memmap_root, "simsat5625", "simsat5625.dill"), 18 | os.path.join(memmap_root, "imerg5625", "imerg5625.dill"), 19 | os.path.join(memmap_root, "era5625", "era5625.dill"), 20 | ] 21 | 22 | daterange_train = (datetime(2016, 4, 1).timestamp(), datetime(2017, 12, 31, 23).timestamp()) 23 | daterange_test = (datetime(2019, 1, 6, 0).timestamp(), datetime(2019, 12, 31, 23).timestamp()) 24 | daterange_val = (datetime(2018, 1, 6, 0).timestamp(), datetime(2018, 12, 31, 23).timestamp()) 25 | 26 | partition_conf = {"train": 27 | {"timerange": daterange_train, 28 | "increment_s": 60 * 60}, 29 | "val": 30 | {"timerange": daterange_val, 31 | "increment_s": 60 * 60}, 32 | "test": 33 | {"timerange": daterange_test, 34 | "increment_s": 60 * 60}} 35 | 36 | partition_type = "range" 37 | 38 | sample_conf = {"mode0": # sample modes 39 | { 40 | "sample": # sample sections 41 | { 42 | "lsm": {"vbl": "era140625/lsm"}, 43 | }, 44 | } 45 | } 46 | 47 | dr = (datetime(2016, 4, 1).timestamp(), datetime(2019, 12, 31, 21).timestamp()) 48 | 49 | part = "test" 50 | # read in every imerg frame and create a rain class histogram for each and save in a file in the end 51 | def get_histograms(args): 52 | dataset_indices, i = args 53 | print ("Starting process {} indices at iteration {}...".format(len(dataset_indices), i)) 54 | 55 | dataset = Dataset(datapath=datapath, 56 | partition_conf=partition_conf, 57 | partition_type=partition_type, 58 | partition_selected=part, 59 | sample_conf=sample_conf, 60 | ) 61 | 62 | res = [] 63 | def seg_rain_imerg(frame): 64 | c0 = np.count_nonzero( (frame >= 0.0) & (frame < 2.5)) 65 | c1 = np.count_nonzero((frame >= 2.5) & (frame < 10.0)) 66 | c2 = np.count_nonzero((frame >= 10.0) & (frame < 50.0)) 67 | c3 = np.count_nonzero((frame >= 50.0) & (frame < 500000.0)) 68 | return c0, c1, c2, c3 69 | 70 | for data_idx in dataset_indices: 71 | data = dataset.dataset[((*partition_conf[part]["timerange"], 3600), ["imerg5625/precipitationcal"], {})][data_idx] 72 | segger = seg_rain_imerg(data) 73 | res.append(segger) 74 | 75 | return res 76 | 77 | 78 | dataset = Dataset(datapath=datapath, 79 | partition_conf=partition_conf, 80 | partition_type=partition_type, 81 | partition_selected="val", 82 | sample_conf=sample_conf, 83 | ) 84 | num_idx_shp = dataset.dataset[((*partition_conf[part]["timerange"], 3600), ["imerg5625/precipitationcal"], {})].shape 85 | num_idx = num_idx_shp[0] 86 | print("Num idx: {}".format(num_idx)) 87 | n_proc = 60 88 | 89 | idxs = np.array_split(np.array(list(range(num_idx))), n_proc) 90 | print("IDXS:", idxs) 91 | with Pool(processes=n_proc) as pool: 92 | res = pool.map(get_histograms, [(idxlst, i) for idxlst, i in zip(idxs, range(len(idxs)))]) 93 | 94 | totres = [] 95 | for r in res: 96 | totres += r 97 | 98 | with open("histo_{}.json".format(part), "w") as f: 99 | json.dump(totres, f) 100 | -------------------------------------------------------------------------------- /analysis/precip_estimation/normalise/5625__00-06-01_12:00to17-12-31_11:00.json: -------------------------------------------------------------------------------- 1 | { 2 | "1plog::era5625/tp": { 3 | "mean": 0.00010117772035300732, 4 | "std": 0.00038926879642531276 5 | }, 6 | "1plog::imerg5625/precipitationcal": { 7 | "mean": 0.047896530479192734, 8 | "std": 0.21623234450817108 9 | }, 10 | "era5625/ciwc_300hPa": { 11 | "mean": 4.038937731820624e-06, 12 | "std": 1.9641447579488158e-05 13 | }, 14 | "era5625/ciwc_500hPa": { 15 | "mean": 3.880424173985375e-06, 16 | "std": 1.3040525118412916e-05 17 | }, 18 | "era5625/ciwc_850hPa": { 19 | "mean": 1.7265290352952434e-06, 20 | "std": 5.4465622270072345e-06 21 | }, 22 | "era5625/clwc_300hPa": { 23 | "mean": 2.3328139420186744e-08, 24 | "std": 1.963040404007188e-06 25 | }, 26 | "era5625/clwc_500hPa": { 27 | "mean": 3.470940100669395e-06, 28 | "std": 1.809844070521649e-05 29 | }, 30 | "era5625/clwc_850hPa": { 31 | "mean": 1.7431688320357352e-05, 32 | "std": 4.5697161112912e-05 33 | }, 34 | "era5625/lat2d": { 35 | "mean": 0.0, 36 | "std": 51.93614196777344 37 | }, 38 | "era5625/lon2d": { 39 | "mean": 177.1875, 40 | "std": 103.91035461425781 41 | }, 42 | "era5625/lsm": { 43 | "mean": 0.3370782732963562, 44 | "std": 0.459003746509552 45 | }, 46 | "era5625/orography": { 47 | "mean": 379.4975891113281, 48 | "std": 859.8722534179688 49 | }, 50 | "era5625/q_300hPa": { 51 | "mean": 0.00013051266432739794, 52 | "std": 0.00017297286831308156 53 | }, 54 | "era5625/q_500hPa": { 55 | "mean": 0.0008699031313881278, 56 | "std": 0.0011031731264665723 57 | }, 58 | "era5625/q_850hPa": { 59 | "mean": 0.004596292041242123, 60 | "std": 0.004117097705602646 61 | }, 62 | "era5625/slt": { 63 | "mean": 0.6792043447494507, 64 | "std": 1.1688841581344604 65 | }, 66 | "era5625/sp": { 67 | "mean": 96672.1328125, 68 | "std": 9646.748046875 69 | }, 70 | "era5625/t2m": { 71 | "mean": 278.7690734863281, 72 | "std": 21.161643981933594 73 | }, 74 | "era5625/t_300hPa": { 75 | "mean": 229.04635620117188, 76 | "std": 10.736078262329102 77 | }, 78 | "era5625/t_500hPa": { 79 | "mean": 253.13241577148438, 80 | "std": 13.03946304321289 81 | }, 82 | "era5625/t_850hPa": { 83 | "mean": 274.7433166503906, 84 | "std": 15.543451309204102 85 | }, 86 | "era5625/tp": { 87 | "mean": 0.00010125964035978541, 88 | "std": 0.00039031924097798765 89 | }, 90 | "era5625/z_300hPa": { 91 | "mean": 89486.6640625, 92 | "std": 5084.31396484375 93 | }, 94 | "era5625/z_500hPa": { 95 | "mean": 54157.7734375, 96 | "std": 3348.290771484375 97 | }, 98 | "era5625/z_850hPa": { 99 | "mean": 13764.474609375, 100 | "std": 1467.3331298828125 101 | }, 102 | "imerg5625/precipitationcal": { 103 | "mean": 0.09633872658014297, 104 | "std": 0.7651622295379639 105 | }, 106 | "simsat5625/clbt:0": { 107 | "mean": 236.9783477783203, 108 | "std": 7.888313293457031 109 | }, 110 | "simsat5625/clbt:1": { 111 | "mean": 251.77040100097656, 112 | "std": 12.534334182739258 113 | }, 114 | "simsat5625/clbt:2": { 115 | "mean": 268.89752197265625, 116 | "std": 22.21075439453125 117 | } 118 | } -------------------------------------------------------------------------------- /analysis/precip_estimation/normalise/5625__04-01-01_12:00to17-12-31_11:00.json: -------------------------------------------------------------------------------- 1 | { 2 | "1plog::era5625/tp": { 3 | "mean": 0.00010132892930414528, 4 | "std": 0.000391952256904915 5 | }, 6 | "1plog::imerg5625/precipitationcal": { 7 | "mean": 0.04824421554803848, 8 | "std": 0.21689410507678986 9 | }, 10 | "era5625/ciwc_300hPa": { 11 | "mean": 4.024744157504756e-06, 12 | "std": 1.9663053535623476e-05 13 | }, 14 | "era5625/ciwc_500hPa": { 15 | "mean": 3.8813036553619895e-06, 16 | "std": 1.3047414540778846e-05 17 | }, 18 | "era5625/ciwc_850hPa": { 19 | "mean": 1.7356674106849823e-06, 20 | "std": 5.472066732181702e-06 21 | }, 22 | "era5625/clwc_300hPa": { 23 | "mean": 2.4725551384108257e-08, 24 | "std": 2.0352265437395545e-06 25 | }, 26 | "era5625/clwc_500hPa": { 27 | "mean": 3.4699708066909807e-06, 28 | "std": 1.8168990209233016e-05 29 | }, 30 | "era5625/clwc_850hPa": { 31 | "mean": 1.7463022231822833e-05, 32 | "std": 4.569104203255847e-05 33 | }, 34 | "era5625/lat2d": { 35 | "mean": 0.0, 36 | "std": 51.93614196777344 37 | }, 38 | "era5625/lon2d": { 39 | "mean": 177.1875, 40 | "std": 103.91035461425781 41 | }, 42 | "era5625/lsm": { 43 | "mean": 0.3370782732963562, 44 | "std": 0.459003746509552 45 | }, 46 | "era5625/orography": { 47 | "mean": 379.4975891113281, 48 | "std": 859.8722534179688 49 | }, 50 | "era5625/q_300hPa": { 51 | "mean": 0.00013101613149046898, 52 | "std": 0.00017383853264618665 53 | }, 54 | "era5625/q_500hPa": { 55 | "mean": 0.0008753924630582333, 56 | "std": 0.001109140575863421 57 | }, 58 | "era5625/q_850hPa": { 59 | "mean": 0.004605869762599468, 60 | "std": 0.004122736398130655 61 | }, 62 | "era5625/slt": { 63 | "mean": 0.6792043447494507, 64 | "std": 1.1688841581344604 65 | }, 66 | "era5625/sp": { 67 | "mean": 96670.375, 68 | "std": 9653.7138671875 69 | }, 70 | "era5625/t2m": { 71 | "mean": 278.7900390625, 72 | "std": 21.1552734375 73 | }, 74 | "era5625/t_300hPa": { 75 | "mean": 229.08236694335938, 76 | "std": 10.754820823669434 77 | }, 78 | "era5625/t_500hPa": { 79 | "mean": 253.1505126953125, 80 | "std": 13.043668746948242 81 | }, 82 | "era5625/t_850hPa": { 83 | "mean": 274.7486267089844, 84 | "std": 15.535614967346191 85 | }, 86 | "era5625/tp": { 87 | "mean": 0.00010141224629478529, 88 | "std": 0.000393031194107607 89 | }, 90 | "era5625/z_300hPa": { 91 | "mean": 89490.7578125, 92 | "std": 5091.87841796875 93 | }, 94 | "era5625/z_500hPa": { 95 | "mean": 54158.9453125, 96 | "std": 3355.55419921875 97 | }, 98 | "era5625/z_850hPa": { 99 | "mean": 13763.185546875, 100 | "std": 1474.3167724609375 101 | }, 102 | "imerg5625/precipitationcal": { 103 | "mean": 0.09697847068309784, 104 | "std": 0.7662108540534973 105 | }, 106 | "simsat5625/clbt:0": { 107 | "mean": 236.9783477783203, 108 | "std": 7.888313293457031 109 | }, 110 | "simsat5625/clbt:1": { 111 | "mean": 251.77040100097656, 112 | "std": 12.534334182739258 113 | }, 114 | "simsat5625/clbt:2": { 115 | "mean": 268.89752197265625, 116 | "std": 22.21075439453125 117 | } 118 | } -------------------------------------------------------------------------------- /analysis/precip_estimation/normalise/5625__16-04-01_12:00to17-12-31_11:00.json: -------------------------------------------------------------------------------- 1 | { 2 | "1plog::era5625/tp": { 3 | "mean": 0.00010312546510249376, 4 | "std": 0.0004055481986142695 5 | }, 6 | "1plog::imerg5625/precipitationcal": { 7 | "mean": 0.048683226108551025, 8 | "std": 0.21575742959976196 9 | }, 10 | "era5625/ciwc_300hPa": { 11 | "mean": 4.029411684314255e-06, 12 | "std": 1.9714751033461653e-05 13 | }, 14 | "era5625/ciwc_500hPa": { 15 | "mean": 3.892440417985199e-06, 16 | "std": 1.3066020983387716e-05 17 | }, 18 | "era5625/ciwc_850hPa": { 19 | "mean": 1.7111583474616054e-06, 20 | "std": 5.420844900072552e-06 21 | }, 22 | "era5625/clwc_300hPa": { 23 | "mean": 3.145902383039356e-08, 24 | "std": 2.359818154218374e-06 25 | }, 26 | "era5625/clwc_500hPa": { 27 | "mean": 3.6120443382969825e-06, 28 | "std": 1.864848491095472e-05 29 | }, 30 | "era5625/clwc_850hPa": { 31 | "mean": 1.7701739125186577e-05, 32 | "std": 4.603979687090032e-05 33 | }, 34 | "era5625/lat2d": { 35 | "mean": 0.0, 36 | "std": 51.93614196777344 37 | }, 38 | "era5625/lon2d": { 39 | "mean": 177.1875, 40 | "std": 103.91035461425781 41 | }, 42 | "era5625/lsm": { 43 | "mean": 0.3370782732963562, 44 | "std": 0.459003746509552 45 | }, 46 | "era5625/orography": { 47 | "mean": 379.4975891113281, 48 | "std": 859.8722534179688 49 | }, 50 | "era5625/q_300hPa": { 51 | "mean": 0.00013589927402790636, 52 | "std": 0.00018083321629092097 53 | }, 54 | "era5625/q_500hPa": { 55 | "mean": 0.0009118292946368456, 56 | "std": 0.0011509027099236846 57 | }, 58 | "era5625/q_850hPa": { 59 | "mean": 0.00473902840167284, 60 | "std": 0.004170333035290241 61 | }, 62 | "era5625/slt": { 63 | "mean": 0.6792043447494507, 64 | "std": 1.1688841581344604 65 | }, 66 | "era5625/sp": { 67 | "mean": 96647.2734375, 68 | "std": 9662.501953125 69 | }, 70 | "era5625/t2m": { 71 | "mean": 279.39154052734375, 72 | "std": 21.024553298950195 73 | }, 74 | "era5625/t_300hPa": { 75 | "mean": 229.49632263183594, 76 | "std": 10.80582332611084 77 | }, 78 | "era5625/t_500hPa": { 79 | "mean": 253.59103393554688, 80 | "std": 12.996684074401855 81 | }, 82 | "era5625/t_850hPa": { 83 | "mean": 275.28448486328125, 84 | "std": 15.46550464630127 85 | }, 86 | "era5625/tp": { 87 | "mean": 0.00010321472655050457, 88 | "std": 0.00040673979674465954 89 | }, 90 | "era5625/z_300hPa": { 91 | "mean": 89640.09375, 92 | "std": 5132.85693359375 93 | }, 94 | "era5625/z_500hPa": { 95 | "mean": 54243.48828125, 96 | "std": 3392.837158203125 97 | }, 98 | "era5625/z_850hPa": { 99 | "mean": 13771.6240234375, 100 | "std": 1512.511474609375 101 | }, 102 | "imerg5625/precipitationcal": { 103 | "mean": 0.09606033563613892, 104 | "std": 0.7168794870376587 105 | }, 106 | "simsat5625/clbt:0": { 107 | "mean": 236.88621520996094, 108 | "std": 8.104928970336914 109 | }, 110 | "simsat5625/clbt:1": { 111 | "mean": 251.5996551513672, 112 | "std": 12.920832633972168 113 | }, 114 | "simsat5625/clbt:2": { 115 | "mean": 268.940673828125, 116 | "std": 22.654830932617188 117 | } 118 | } -------------------------------------------------------------------------------- /analysis/precip_estimation/normalise/5625__18-01-06_12:00to18-12-31_11:00.json: -------------------------------------------------------------------------------- 1 | { 2 | "1plog::era5625/tp": { 3 | "mean": 0.00010232715430902317, 4 | "std": 0.000401621509809047 5 | }, 6 | "1plog::imerg5625/precipitationcal": { 7 | "mean": 0.04807817563414574, 8 | "std": 0.21407566964626312 9 | }, 10 | "era5625/ciwc_300hPa": { 11 | "mean": 4.074635398865212e-06, 12 | "std": 1.996555511141196e-05 13 | }, 14 | "era5625/ciwc_500hPa": { 15 | "mean": 3.881124939653091e-06, 16 | "std": 1.3079568816465326e-05 17 | }, 18 | "era5625/ciwc_850hPa": { 19 | "mean": 1.712335915726726e-06, 20 | "std": 5.445999249786837e-06 21 | }, 22 | "era5625/clwc_300hPa": { 23 | "mean": 2.9467773998703706e-08, 24 | "std": 2.243578819616232e-06 25 | }, 26 | "era5625/clwc_500hPa": { 27 | "mean": 3.5341997772775358e-06, 28 | "std": 1.8467679183231667e-05 29 | }, 30 | "era5625/clwc_850hPa": { 31 | "mean": 1.715613871056121e-05, 32 | "std": 4.508942583925091e-05 33 | }, 34 | "era5625/lat2d": { 35 | "mean": 0.0, 36 | "std": 51.93614196777344 37 | }, 38 | "era5625/lon2d": { 39 | "mean": 177.1875, 40 | "std": 103.91035461425781 41 | }, 42 | "era5625/lsm": { 43 | "mean": 0.3370782732963562, 44 | "std": 0.459003746509552 45 | }, 46 | "era5625/orography": { 47 | "mean": 379.4975891113281, 48 | "std": 859.8722534179688 49 | }, 50 | "era5625/q_300hPa": { 51 | "mean": 0.0001317433052463457, 52 | "std": 0.00017504238348919898 53 | }, 54 | "era5625/q_500hPa": { 55 | "mean": 0.0008897155057638884, 56 | "std": 0.001122326240874827 57 | }, 58 | "era5625/q_850hPa": { 59 | "mean": 0.00467934412881732, 60 | "std": 0.004145464394241571 61 | }, 62 | "era5625/slt": { 63 | "mean": 0.6792043447494507, 64 | "std": 1.1688841581344604 65 | }, 66 | "era5625/sp": { 67 | "mean": 96661.5546875, 68 | "std": 9642.8544921875 69 | }, 70 | "era5625/t2m": { 71 | "mean": 279.09906005859375, 72 | "std": 20.908159255981445 73 | }, 74 | "era5625/t_300hPa": { 75 | "mean": 229.20350646972656, 76 | "std": 10.7034273147583 77 | }, 78 | "era5625/t_500hPa": { 79 | "mean": 253.34141540527344, 80 | "std": 12.95738697052002 81 | }, 82 | "era5625/t_850hPa": { 83 | "mean": 275.06121826171875, 84 | "std": 15.381122589111328 85 | }, 86 | "era5625/tp": { 87 | "mean": 0.00010241532436339185, 88 | "std": 0.00040276843355968595 89 | }, 90 | "era5625/z_300hPa": { 91 | "mean": 89554.65625, 92 | "std": 5082.7568359375 93 | }, 94 | "era5625/z_500hPa": { 95 | "mean": 54198.89453125, 96 | "std": 3353.39453125 97 | }, 98 | "era5625/z_850hPa": { 99 | "mean": 13767.50390625, 100 | "std": 1486.5418701171875 101 | }, 102 | "imerg5625/precipitationcal": { 103 | "mean": 0.09452106058597565, 104 | "std": 0.7058719992637634 105 | }, 106 | "simsat5625/clbt:0": { 107 | "mean": 237.0115203857422, 108 | "std": 7.813484191894531 109 | }, 110 | "simsat5625/clbt:1": { 111 | "mean": 251.81748962402344, 112 | "std": 12.39886474609375 113 | }, 114 | "simsat5625/clbt:2": { 115 | "mean": 269.04754638671875, 116 | "std": 22.19377326965332 117 | } 118 | } -------------------------------------------------------------------------------- /analysis/precip_estimation/normalise/5625__19-01-06_12:00to19-12-31_11:00.json: -------------------------------------------------------------------------------- 1 | { 2 | "1plog::era5625/tp": { 3 | "mean": 0.0001017941176542081, 4 | "std": 0.00040879662265069783 5 | }, 6 | "1plog::imerg5625/precipitationcal": { 7 | "mean": 0.047552842646837234, 8 | "std": 0.21335327625274658 9 | }, 10 | "era5625/ciwc_300hPa": { 11 | "mean": 4.032832293887623e-06, 12 | "std": 2.0058865629835054e-05 13 | }, 14 | "era5625/ciwc_500hPa": { 15 | "mean": 3.87251657230081e-06, 16 | "std": 1.3076591130811721e-05 17 | }, 18 | "era5625/ciwc_850hPa": { 19 | "mean": 1.6551554153920733e-06, 20 | "std": 5.288734882924473e-06 21 | }, 22 | "era5625/clwc_300hPa": { 23 | "mean": 3.580080232268301e-08, 24 | "std": 2.60587444245175e-06 25 | }, 26 | "era5625/clwc_500hPa": { 27 | "mean": 3.5812354326481e-06, 28 | "std": 1.859859003161546e-05 29 | }, 30 | "era5625/clwc_850hPa": { 31 | "mean": 1.7308886526734568e-05, 32 | "std": 4.5802691602148116e-05 33 | }, 34 | "era5625/lat2d": { 35 | "mean": 0.0, 36 | "std": 51.93614196777344 37 | }, 38 | "era5625/lon2d": { 39 | "mean": 177.1875, 40 | "std": 103.91035461425781 41 | }, 42 | "era5625/lsm": { 43 | "mean": 0.3370782732963562, 44 | "std": 0.459003746509552 45 | }, 46 | "era5625/orography": { 47 | "mean": 379.4975891113281, 48 | "std": 859.8722534179688 49 | }, 50 | "era5625/q_300hPa": { 51 | "mean": 0.0001348510995740071, 52 | "std": 0.00017957847740035504 53 | }, 54 | "era5625/q_500hPa": { 55 | "mean": 0.0009002968436107039, 56 | "std": 0.0011349129490554333 57 | }, 58 | "era5625/q_850hPa": { 59 | "mean": 0.004709702916443348, 60 | "std": 0.004195161163806915 61 | }, 62 | "era5625/slt": { 63 | "mean": 0.6792043447494507, 64 | "std": 1.1688841581344604 65 | }, 66 | "era5625/sp": { 67 | "mean": 96690.9296875, 68 | "std": 9623.365234375 69 | }, 70 | "era5625/t2m": { 71 | "mean": 279.14581298828125, 72 | "std": 21.12673568725586 73 | }, 74 | "era5625/t_300hPa": { 75 | "mean": 229.43798828125, 76 | "std": 10.761752128601074 77 | }, 78 | "era5625/t_500hPa": { 79 | "mean": 253.47357177734375, 80 | "std": 13.040901184082031 81 | }, 82 | "era5625/t_850hPa": { 83 | "mean": 275.2270202636719, 84 | "std": 15.513626098632812 85 | }, 86 | "era5625/tp": { 87 | "mean": 0.00010188626765739173, 88 | "std": 0.0004100216319784522 89 | }, 90 | "era5625/z_300hPa": { 91 | "mean": 89640.796875, 92 | "std": 5096.0185546875 93 | }, 94 | "era5625/z_500hPa": { 95 | "mean": 54256.75390625, 96 | "std": 3352.83251953125 97 | }, 98 | "era5625/z_850hPa": { 99 | "mean": 13802.462890625, 100 | "std": 1460.9244384765625 101 | }, 102 | "imerg5625/precipitationcal": { 103 | "mean": 0.09389674663543701, 104 | "std": 0.7125973701477051 105 | }, 106 | "simsat5625/clbt:0": { 107 | "mean": 237.13368225097656, 108 | "std": 7.68720817565918 109 | }, 110 | "simsat5625/clbt:1": { 111 | "mean": 252.09471130371094, 112 | "std": 12.202248573303223 113 | }, 114 | "simsat5625/clbt:2": { 115 | "mean": 269.1205139160156, 116 | "std": 21.72414779663086 117 | } 118 | } -------------------------------------------------------------------------------- /analysis/precip_estimation/normalise/5625__79-01-01_07:00to17-12-31_11:00.json: -------------------------------------------------------------------------------- 1 | { 2 | "1plog::era5625/tp": { 3 | "mean": 9.981045877793804e-05, 4 | "std": 0.0003737492661457509 5 | }, 6 | "1plog::imerg5625/precipitationcal": { 7 | "mean": 0.04788779839873314, 8 | "std": 0.21598206460475922 9 | }, 10 | "era5625/ciwc_300hPa": { 11 | "mean": 3.962032224080758e-06, 12 | "std": 1.9191713363397866e-05 13 | }, 14 | "era5625/ciwc_500hPa": { 15 | "mean": 3.849920176435262e-06, 16 | "std": 1.2940173292008694e-05 17 | }, 18 | "era5625/ciwc_850hPa": { 19 | "mean": 1.7343708123007673e-06, 20 | "std": 5.451817742141429e-06 21 | }, 22 | "era5625/clwc_300hPa": { 23 | "mean": 1.805662108722572e-08, 24 | "std": 1.6793291024441714e-06 25 | }, 26 | "era5625/clwc_500hPa": { 27 | "mean": 3.3815351798693882e-06, 28 | "std": 1.7503703929833136e-05 29 | }, 30 | "era5625/clwc_850hPa": { 31 | "mean": 1.7382071746396832e-05, 32 | "std": 4.577460640575737e-05 33 | }, 34 | "era5625/lat2d": { 35 | "mean": 0.0, 36 | "std": 51.93614196777344 37 | }, 38 | "era5625/lon2d": { 39 | "mean": 177.1875, 40 | "std": 103.91035461425781 41 | }, 42 | "era5625/lsm": { 43 | "mean": 0.3370782732963562, 44 | "std": 0.459003746509552 45 | }, 46 | "era5625/orography": { 47 | "mean": 379.4975891113281, 48 | "std": 859.8722534179688 49 | }, 50 | "era5625/q_300hPa": { 51 | "mean": 0.00012742435501422733, 52 | "std": 0.00016826670616865158 53 | }, 54 | "era5625/q_500hPa": { 55 | "mean": 0.0008531111525371671, 56 | "std": 0.0010778913274407387 57 | }, 58 | "era5625/q_850hPa": { 59 | "mean": 0.004570512101054192, 60 | "std": 0.004106701351702213 61 | }, 62 | "era5625/slt": { 63 | "mean": 0.6792043447494507, 64 | "std": 1.1688841581344604 65 | }, 66 | "era5625/sp": { 67 | "mean": 96696.9609375, 68 | "std": 9652.5927734375 69 | }, 70 | "era5625/t2m": { 71 | "mean": 278.5038757324219, 72 | "std": 21.239084243774414 73 | }, 74 | "era5625/t_300hPa": { 75 | "mean": 228.8600616455078, 76 | "std": 10.72099781036377 77 | }, 78 | "era5625/t_500hPa": { 79 | "mean": 252.9261474609375, 80 | "std": 13.068572998046875 81 | }, 82 | "era5625/t_850hPa": { 83 | "mean": 274.57611083984375, 84 | "std": 15.585567474365234 85 | }, 86 | "era5625/tp": { 87 | "mean": 9.988588863052428e-05, 88 | "std": 0.0003746792790479958 89 | }, 90 | "era5625/z_300hPa": { 91 | "mean": 89407.40625, 92 | "std": 5094.4228515625 93 | }, 94 | "era5625/z_500hPa": { 95 | "mean": 54111.0625, 96 | "std": 3355.026123046875 97 | }, 98 | "era5625/z_850hPa": { 99 | "mean": 13748.427734375, 100 | "std": 1472.0947265625 101 | }, 102 | "imerg5625/precipitationcal": { 103 | "mean": 0.09612467885017395, 104 | "std": 0.7596922516822815 105 | }, 106 | "simsat5625/clbt:0": { 107 | "mean": 236.9783477783203, 108 | "std": 7.888313293457031 109 | }, 110 | "simsat5625/clbt:1": { 111 | "mean": 251.77040100097656, 112 | "std": 12.534334182739258 113 | }, 114 | "simsat5625/clbt:2": { 115 | "mean": 268.89752197265625, 116 | "std": 22.21075439453125 117 | } 118 | } -------------------------------------------------------------------------------- /analysis/precip_estimation/normalise/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FrontierDevelopmentLab/PyRain/a52e4fd7984dcabb6d908a565a4e7c6bc820d62f/analysis/precip_estimation/normalise/__init__.py -------------------------------------------------------------------------------- /analysis/precip_estimation/pred_conf_matrix.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from collections import Counter 3 | import pandas as pd 4 | import lightgbm as lgb 5 | import json 6 | from sklearn.datasets import load_breast_cancer,load_boston,load_wine 7 | from sklearn.model_selection import train_test_split 8 | from sklearn.preprocessing import StandardScaler 9 | from sklearn.ensemble import GradientBoostingClassifier 10 | from sklearn.metrics import mean_squared_error,roc_auc_score,precision_score 11 | 12 | ##### DEFINE TAG 13 | tag = "sim_bal" 14 | ###### 15 | 16 | with open("./normalise/5625__16-04-01_12:00to17-12-31_11:00.json") as f: 17 | nl_train = json.load(f) 18 | 19 | import json 20 | 21 | train_path = "./sim_samples_bal_train.json" 22 | test_path = "./sim_samples_unb_test.json" 23 | val_path = "./sim_samples_bal_val.json" 24 | 25 | 26 | with open(val_path, "r") as f: 27 | val = json.load(f) 28 | y_val = np.concatenate([np.array(t[1]) for t in val]) 29 | X_val = np.stack([t[0] for t in val]) 30 | y_val_lst = [[],[],[],[]] 31 | X_val_lst = [[],[],[],[]] 32 | for c in range(4): 33 | y_val_lst[c] = np.concatenate([np.array(t[1]) for t in val if t[2][0]==c]) 34 | X_val_lst[c] = np.stack(t[0] for t in val if t[2][0]==c) 35 | 36 | print(X_val_lst[0].shape) 37 | 38 | print("Open model...") 39 | import joblib 40 | mod = joblib.load("gbmsim_bal.pkl") 41 | print("Loading done...") 42 | 43 | conf_matrix = np.zeros((4,4)) 44 | for i in range(4): 45 | print("Predict class {}".format(i)) 46 | ypred = mod.predict(X_val_lst[i], num_iteration=mod.best_iteration_) 47 | print("Done predicting...") 48 | for p, t in zip(ypred, y_val_lst[i]): 49 | if p < 2.5: 50 | c = 0 51 | elif p >= 2.5 and p < 10.0: 52 | c = 1 53 | elif p >= 10.0 and p < 50.0: 54 | c = 2 55 | elif p >= 50.0: 56 | c = 3 57 | conf_matrix[i, c] += 1.0 / float(len(ypred)) 58 | 59 | print("CONF MATRIX:") 60 | print(conf_matrix) 61 | 62 | print("All done...") 63 | 64 | -------------------------------------------------------------------------------- /analysis/precip_estimation/sim_sample_balanced.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | import numpy as np 3 | import os, sys 4 | import json 5 | from scipy import stats 6 | from multiprocessing import Pool, TimeoutError 7 | from functools import partial 8 | 9 | sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) 10 | from dataloader.memmap_dataloader import Dataset 11 | 12 | 13 | if __name__ == "__main__": 14 | with open("./normalise/5625__16-04-01_12:00to17-12-31_11:00.json") as f: 15 | nl_train = json.load(f) 16 | 17 | nl_train["__const__lon2d"] = {"mean": 0.5, "std":0.28980498288430995} 18 | nl_train["__const__lat2d"] = {"mean": 0.5, "std":0.29093928798176877} 19 | nl_train["era5625/slt"] = {"mean": 1.1389103, "std":0.6714027} 20 | 21 | 22 | # set up dataloader with any dataset type you can think of 23 | memmap_root = "" # SET MEMMAP DATA ROOT PATH HERE 24 | datapath = [os.path.join(memmap_root, "simsat5625", "simsat5625.dill"), 25 | os.path.join(memmap_root, "imerg5625", "imerg5625.dill"), 26 | os.path.join(memmap_root, "era5625", "era5625.dill"), 27 | ] 28 | 29 | daterange_train = (datetime(2016, 4, 1).timestamp(), datetime(2017, 12, 31, 21).timestamp()) 30 | daterange_test = (datetime(2019, 1, 6, 0).timestamp(), datetime(2019, 12, 31, 21).timestamp()) 31 | daterange_val = (datetime(2018, 1, 6, 0).timestamp(), datetime(2018, 12, 31, 21).timestamp()) 32 | 33 | partition_conf = {"train": 34 | {"timerange": daterange_train, 35 | "increment_s": 60 * 60}, 36 | "val": 37 | {"timerange": daterange_val, 38 | "increment_s": 60 * 60}, 39 | "test": 40 | {"timerange": daterange_test, 41 | "increment_s": 60 * 60}} 42 | 43 | partition_type = "range" 44 | 45 | dlt = 0 46 | lt = 0 47 | grid_shape = (32,64) 48 | sample_conf = {"mode0": # sample modes 49 | { 50 | "sample": # sample sections 51 | { 52 | "lsm": {"vbl": "era5625/lsm"}, 53 | "orography": {"vbl": "era5625/orography"}, 54 | "slt": {"vbl": "era5625/slt"}, 55 | "__const__lat2d": {"vbl": "__const__lat2d", 56 | "val": np.repeat(np.expand_dims(np.linspace(0.0, 1.0, grid_shape[0]), axis=1), 57 | grid_shape[1], axis=1)}, 58 | "__const__lon2d": {"vbl": "__const__lon2d", 59 | "val": np.repeat(np.expand_dims(np.linspace(0.0, 1.0, grid_shape[1]), axis=0), 60 | grid_shape[0], axis=0)}, 61 | 62 | "clbt:0": {"vbl": "simsat5625/clbt:0", 63 | "t": np.array([dlt]) * 3600, 64 | "interpolate": ["nan", "nearest_past", "nearest_future"][1]}, 65 | "clbt:1": {"vbl": "simsat5625/clbt:1", 66 | "t": np.array([dlt]) * 3600, 67 | "interpolate": ["nan", "nearest_past", "nearest_future"][1]}, 68 | "clbt:2": {"vbl": "simsat5625/clbt:2", 69 | "t": np.array([dlt]) * 3600, 70 | "interpolate": ["nan", "nearest_past", "nearest_future"][1]} 71 | 72 | }, 73 | "label": {"tp": {"vbl": "imerg5625/precipitationcal", 74 | "t": np.array([lt]) * 3600, 75 | "interpolate": ["nan", "nearest_past", "nearest_future"][1]}}} 76 | } 77 | 78 | dr = (datetime(2016, 4, 1).timestamp(), datetime(2019, 12, 31, 21).timestamp()) 79 | 80 | part = "train" 81 | with open("histo_{}.json".format(part), "r") as f: 82 | histo = np.array(json.load(f)) 83 | histo = histo[slice(None, None, 3)][:-1] 84 | print("HISTO NOW: ", histo.shape) 85 | 86 | histo_trans = histo.transpose() 87 | 88 | n_samples = 250000*4 89 | 90 | from collections import defaultdict 91 | id_dct = defaultdict(lambda x: []) 92 | 93 | # calc frequencies 94 | f = [] 95 | for j in range(4): 96 | fc = np.sum(histo_trans[j])/ float(32*64*histo.shape[0]) 97 | f.append(fc) 98 | 99 | # draw equal number of idxs from each class 100 | for c in range(4): 101 | idx_lst = [] 102 | ch = np.random.choice(np.array(list(range(histo.shape[0]))), 103 | int(n_samples / 4.0), 104 | p=histo_trans[c]/np.sum(histo_trans[c])) 105 | id_dct[c] = ch 106 | 107 | # sort indices by frame 108 | bcts = [] 109 | for c in range(4): 110 | print("ch_c: {}".format(id_dct[c])) 111 | print("minlen: {} max: {}".format(max(id_dct[c]), histo.shape[0])) 112 | ct = np.bincount(id_dct[c], minlength=histo.shape[0]) 113 | bcts.append(ct) 114 | print("Bin {} sum: {}".format(c, np.sum(ct))) 115 | 116 | print("ID_DCT:", id_dct) 117 | print("BCTS: ", bcts) 118 | 119 | b = np.stack(bcts) 120 | print("b:", b) 121 | 122 | print("bincount list: {}".format(b)) 123 | 124 | # read in every imerg frame and create a rain class histogram for each and save in a file in the end 125 | def get_pixels(args): 126 | dataset_indices, frame_idxs, i = args 127 | print ("Starting process {} indices at iteration {}...".format(len(dataset_indices), i)) 128 | 129 | def choose_pixel(coord, frame, c): 130 | sample = frame 131 | X = None 132 | y = None 133 | latid, lonid = coord 134 | sample_keys = frame[0]["sample"].keys() 135 | label_keys = frame[0]["label"].keys() 136 | sample_lst = [] 137 | for sk in sample_keys: 138 | if sk[-4:] == "__ts": 139 | continue 140 | s = sample[0]["sample"][sk][...,latid, lonid] 141 | vn = sample_conf["mode0"]["sample"][sk]["vbl"] 142 | if sk in ["tp"]: 143 | s = np.log(max(s, 0.0)/nl_train[vn]["std"] + 1) 144 | else: 145 | s = (s-nl_train[vn]["mean"])/nl_train[vn]["std"] 146 | sample_lst.append(s.flatten()) 147 | X = np.concatenate(sample_lst) 148 | label_lst = [] 149 | for sk in label_keys: 150 | if sk[-4:] == "__ts": 151 | continue 152 | s = sample[0]["label"][sk][...,latid,lonid] 153 | vn = sample_conf["mode0"]["label"][sk]["vbl"] 154 | if sk in ["tp"]: 155 | s = np.log(max(s, 0.0) / nl_train[vn]["std"] + 1) 156 | else: 157 | s = (s-nl_train[vn]["mean"])/nl_train[vn]["std"] 158 | label_lst.append(s.flatten()) 159 | y = np.concatenate(label_lst) 160 | 161 | return X.tolist(), y.tolist(), [c] 162 | 163 | dataset = Dataset(datapath=datapath, 164 | partition_conf=partition_conf, 165 | partition_type=partition_type, 166 | partition_selected=part, 167 | sample_conf=sample_conf, 168 | ) 169 | 170 | res = [] 171 | for j, frame_idx in enumerate(frame_idxs): 172 | 173 | data_idx = dataset_indices[:, j] 174 | 175 | if not sum(data_idx): 176 | 177 | continue 178 | 179 | # compile my own sample 180 | 181 | sam = [{"sample":{}, "label":{}}] 182 | 183 | for k,v in sample_conf["mode0"]["sample"].items(): 184 | if k[:3] == "__c": 185 | sam[0]["sample"][k] = v["val"] 186 | else: 187 | g = dataset.dataset[((dr[0], dr[1], 3600), [v["vbl"]], {})] 188 | if len(g.shape) == 3: 189 | sam[0]["sample"][k] = g 190 | else: 191 | fidx = frame_idx*3 if k[:4] != "clbt" else frame_idx 192 | sam[0]["sample"][k] = dataset.dataset[((*partition_conf[part]["timerange"], 3600), [v["vbl"]], {})][fidx] 193 | for k,v in sample_conf["mode0"]["label"].items(): 194 | sam[0]["label"][k] = dataset.dataset[((*partition_conf[part]["timerange"], 3600), [v["vbl"]], {})][frame_idx*3] 195 | 196 | frame = sam[0]["label"]["tp"][0] 197 | 198 | bounds = [(0.0, 2.5), 199 | (2.5, 10.0), 200 | (10.0, 50.0), 201 | (50.0, 500000.0)] 202 | for c in range(4): 203 | # class 0 204 | idxs = np.where((frame >= bounds[c][0]) & (frame < bounds[c][1])) 205 | if data_idx[c].size == 0.0: 206 | continue 207 | try: 208 | ch = np.random.choice(np.array(list(range(len(idxs[0])))), 209 | data_idx[c]) 210 | except Exception as e: 211 | raise Exception("{}: {}, {}".format(e, idxs[0], data_idx[c])) 212 | 213 | if ch.size == 0: 214 | continue 215 | cl = [(idxs[0][h], idxs[1][h]) for h in ch] 216 | for cl_idx in cl: 217 | spl = choose_pixel(cl_idx, sam, c) 218 | res.append(spl) 219 | 220 | return res 221 | 222 | n_proc = 40 223 | idxs = np.array_split(b, n_proc, axis=1) 224 | print("IDXS: ", idxs) 225 | frame_idxs = np.array_split(np.array(range(b[0].shape[0])), n_proc) 226 | print("FRAMEIDXS: ", frame_idxs) 227 | with Pool(processes=n_proc) as pool: 228 | res = pool.map(get_pixels, [(idxlst, fidxs, i) for idxlst, fidxs, i in zip(idxs, frame_idxs, range(len(idxs)))]) 229 | 230 | totres = [] 231 | for r in res: 232 | totres += r 233 | 234 | with open("sim_samples_bal_{}.json".format(part), "w") as f: 235 | json.dump(totres, f) 236 | -------------------------------------------------------------------------------- /analysis/precip_estimation/sim_sample_unbalanced.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | import numpy as np 3 | import os, sys 4 | import json 5 | from scipy import stats 6 | from multiprocessing import Pool, TimeoutError 7 | from functools import partial 8 | 9 | sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) 10 | from dataloader.memmap_dataloader import Dataset 11 | 12 | 13 | if __name__ == "__main__": 14 | with open("./normalise/5625__16-04-01_12:00to17-12-31_11:00.json") as f: 15 | nl_train = json.load(f) 16 | 17 | nl_train["__const__lon2d"] = {"mean": 0.5, "std":0.28980498288430995} 18 | nl_train["__const__lat2d"] = {"mean": 0.5, "std":0.29093928798176877} 19 | nl_train["era5625/slt"] = {"mean": 1.1389103, "std":0.6714027} 20 | 21 | 22 | # set up dataloader with any dataset type you can think of 23 | memmap_root = "" # SET MEMMAP DATA ROOT PATH HERE 24 | datapath = [os.path.join(memmap_root, "simsat5625", "simsat5625.dill"), 25 | os.path.join(memmap_root, "imerg5625", "imerg5625.dill"), 26 | os.path.join(memmap_root, "era5625", "era5625.dill"), 27 | ] 28 | 29 | daterange_train = (datetime(2016, 4, 1).timestamp(), datetime(2017, 12, 31, 21).timestamp()) 30 | daterange_test = (datetime(2019, 1, 6, 0).timestamp(), datetime(2019, 12, 31, 21).timestamp()) 31 | daterange_val = (datetime(2018, 1, 6, 0).timestamp(), datetime(2018, 12, 31, 21).timestamp()) 32 | 33 | partition_conf = {"train": 34 | {"timerange": daterange_train, 35 | "increment_s": 60 * 60}, 36 | "val": 37 | {"timerange": daterange_val, 38 | "increment_s": 60 * 60}, 39 | "test": 40 | {"timerange": daterange_test, 41 | "increment_s": 60 * 60}} 42 | 43 | partition_type = "range" 44 | 45 | dlt = 0 46 | lt = 0 47 | grid_shape = (32,64) 48 | sample_conf = {"mode0": # sample modes 49 | { 50 | "sample": # sample sections 51 | { 52 | "lsm": {"vbl": "era5625/lsm"}, # sample variables 53 | "orography": {"vbl": "era5625/orography"}, # sample variables 54 | "slt": {"vbl": "era5625/slt"}, 55 | "__const__lat2d": {"vbl": "__const__lat2d", 56 | "val": np.repeat(np.expand_dims(np.linspace(0.0, 1.0, grid_shape[0]), axis=1), 57 | grid_shape[1], axis=1)}, 58 | "__const__lon2d": {"vbl": "__const__lon2d", 59 | "val": np.repeat(np.expand_dims(np.linspace(0.0, 1.0, grid_shape[1]), axis=0), 60 | grid_shape[0], axis=0)}, 61 | "clbt:0": {"vbl": "simsat5625/clbt:0", 62 | "t": np.array([dlt]) * 3600, 63 | "interpolate": ["nan", "nearest_past", "nearest_future"][1]}, 64 | "clbt:1": {"vbl": "simsat5625/clbt:1", 65 | "t": np.array([dlt]) * 3600, 66 | "interpolate": ["nan", "nearest_past", "nearest_future"][1]}, 67 | "clbt:2": {"vbl": "simsat5625/clbt:2", 68 | "t": np.array([dlt]) * 3600, 69 | "interpolate": ["nan", "nearest_past", "nearest_future"][1]} 70 | 71 | }, 72 | "label": {"tp": {"vbl": "imerg5625/precipitationcal", 73 | "t": np.array([lt]) * 3600, 74 | "interpolate": ["nan", "nearest_past", "nearest_future"][1]}}} 75 | } 76 | 77 | dr = (datetime(2016, 4, 1).timestamp(), datetime(2019, 12, 31, 21).timestamp()) 78 | 79 | part = "train" 80 | with open("histo_{}.json".format(part), "r") as f: 81 | histo = np.array(json.load(f)) 82 | histo = histo[slice(None, None, 3)][:-1] 83 | print("HISTO NOW: ", histo.shape) 84 | 85 | histo_trans = histo.transpose() 86 | 87 | n_samples = 250000*4 88 | 89 | from collections import defaultdict 90 | id_dct = defaultdict(lambda x: []) 91 | 92 | # calc frequencies 93 | f = [] 94 | for j in range(4): 95 | fc = np.sum(histo_trans[j])/ float(32*64*histo.shape[0]) 96 | f.append(fc) 97 | 98 | # draw equal number of idxs from each class 99 | for c in range(4): 100 | idx_lst = [] 101 | ch = np.random.choice(np.array(list(range(histo.shape[0]))), 102 | int(n_samples * f[c] + 0.5), 103 | p=histo_trans[c]/np.sum(histo_trans[c])) 104 | id_dct[c] = ch 105 | 106 | # sort indices by frame 107 | bcts = [] 108 | for c in range(4): 109 | print("ch_c: {}".format(id_dct[c])) 110 | print("minlen: {} max: {}".format(max(id_dct[c]), histo.shape[0])) 111 | ct = np.bincount(id_dct[c], minlength=histo.shape[0]) 112 | bcts.append(ct) 113 | print("Bin {} sum: {}".format(c, np.sum(ct))) 114 | 115 | print("ID_DCT:", id_dct) 116 | print("BCTS: ", bcts) 117 | 118 | b = np.stack(bcts) 119 | print("b:", b) 120 | 121 | print("bincount list: {}".format(b)) 122 | 123 | # read in every imerg frame and create a rain class histogram for each and save in a file in the end 124 | def get_pixels(args): 125 | dataset_indices, frame_idxs, i = args 126 | print ("Starting process {} indices at iteration {}...".format(len(dataset_indices), i)) 127 | 128 | def choose_pixel(coord, frame, c): 129 | sample = frame 130 | X = None 131 | y = None 132 | latid, lonid = coord 133 | sample_keys = frame[0]["sample"].keys() 134 | label_keys = frame[0]["label"].keys() 135 | sample_lst = [] 136 | for sk in sample_keys: 137 | if sk[-4:] == "__ts": 138 | continue 139 | s = sample[0]["sample"][sk][...,latid, lonid] 140 | vn = sample_conf["mode0"]["sample"][sk]["vbl"] 141 | if sk in ["tp"]: 142 | s = np.log(max(s, 0.0)/nl_train[vn]["std"] + 1) 143 | else: 144 | s = (s-nl_train[vn]["mean"])/nl_train[vn]["std"] 145 | sample_lst.append(s.flatten()) 146 | X = np.concatenate(sample_lst) 147 | label_lst = [] 148 | for sk in label_keys: 149 | if sk[-4:] == "__ts": 150 | continue 151 | s = sample[0]["label"][sk][...,latid,lonid] 152 | vn = sample_conf["mode0"]["label"][sk]["vbl"] 153 | if sk in ["tp"]: 154 | s = np.log(max(s, 0.0) / nl_train[vn]["std"] + 1) 155 | else: 156 | s = (s-nl_train[vn]["mean"])/nl_train[vn]["std"] 157 | label_lst.append(s.flatten()) 158 | y = np.concatenate(label_lst) 159 | 160 | return X.tolist(), y.tolist(), [c] 161 | 162 | dataset = Dataset(datapath=datapath, 163 | partition_conf=partition_conf, 164 | partition_type=partition_type, 165 | partition_selected=part, 166 | sample_conf=sample_conf, 167 | ) 168 | 169 | res = [] 170 | for j, frame_idx in enumerate(frame_idxs): 171 | 172 | data_idx = dataset_indices[:, j] 173 | 174 | if not sum(data_idx): 175 | 176 | continue 177 | 178 | 179 | # compile my own sample 180 | 181 | sam = [{"sample":{}, "label":{}}] 182 | 183 | for k,v in sample_conf["mode0"]["sample"].items(): 184 | if k[:3] == "__c": 185 | sam[0]["sample"][k] = v["val"] 186 | else: 187 | g = dataset.dataset[((dr[0], dr[1], 3600), [v["vbl"]], {})] 188 | if len(g.shape) == 3: 189 | sam[0]["sample"][k] = g 190 | else: 191 | fidx = frame_idx*3 if k[:4] != "clbt" else frame_idx 192 | sam[0]["sample"][k] = dataset.dataset[((*partition_conf[part]["timerange"], 3600), [v["vbl"]], {})][fidx] 193 | for k,v in sample_conf["mode0"]["label"].items(): 194 | sam[0]["label"][k] = dataset.dataset[((*partition_conf[part]["timerange"], 3600), [v["vbl"]], {})][frame_idx*3] 195 | 196 | frame = sam[0]["label"]["tp"][0 197 | 198 | bounds = [(0.0, 2.5), 199 | (2.5, 10.0), 200 | (10.0, 50.0), 201 | (50.0, 500000.0)] 202 | for c in range(4): 203 | # class 0 204 | idxs = np.where((frame >= bounds[c][0]) & (frame < bounds[c][1])) 205 | if data_idx[c].size == 0.0: 206 | continue 207 | 208 | try: 209 | ch = np.random.choice(np.array(list(range(len(idxs[0])))), 210 | data_idx[c]) 211 | except Exception as e: 212 | raise Exception("{}: {}, {}".format(e, idxs[0], data_idx[c])) 213 | 214 | if ch.size == 0: 215 | continue 216 | cl = [(idxs[0][h], idxs[1][h]) for h in ch] 217 | 218 | 219 | for cl_idx in cl: 220 | spl = choose_pixel(cl_idx, sam, c) 221 | res.append(spl) 222 | 223 | return res 224 | 225 | n_proc = 40 226 | idxs = np.array_split(b, n_proc, axis=1) 227 | print("IDXS: ", idxs) 228 | frame_idxs = np.array_split(np.array(range(b[0].shape[0])), n_proc) 229 | print("FRAMEIDXS: ", frame_idxs) 230 | with Pool(processes=n_proc) as pool: 231 | res = pool.map(get_pixels, [(idxlst, fidxs, i) for idxlst, fidxs, i in zip(idxs, frame_idxs, range(len(idxs)))]) 232 | 233 | totres = [] 234 | for r in res: 235 | totres += r 236 | 237 | with open("sim_samples_unb_{}.json".format(part), "w") as f: 238 | json.dump(totres, f) 239 | -------------------------------------------------------------------------------- /analysis/precip_histogram/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FrontierDevelopmentLab/PyRain/a52e4fd7984dcabb6d908a565a4e7c6bc820d62f/analysis/precip_histogram/__init__.py -------------------------------------------------------------------------------- /analysis/precip_histogram/hist.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | import numpy as np 3 | import os, sys 4 | import json 5 | 6 | sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) 7 | 8 | import numpy as np 9 | from dataloader.memmap_dataloader import Dataset 10 | 11 | if __name__ == "__main__": 12 | 13 | 14 | # set up dataloader with any dataset type you can think of 15 | memmap_root = "" # SET MEMMAP DATA ROOT PATH HERE 16 | datapath = [os.path.join(memmap_root, "simsat5625", "simsat5625.dill"), 17 | os.path.join(memmap_root, "imerg5625", "imerg5625.dill"), 18 | os.path.join(memmap_root, "era5625", "era5625.dill"), 19 | ] 20 | 21 | daterange_train = (datetime(2004, 1, 1).timestamp(), datetime(2009, 12, 31, 23).timestamp()) 22 | daterange_test = (datetime(2019, 1, 6, 0).timestamp(), datetime(2019, 12, 31, 21).timestamp()) 23 | daterange_val = (datetime(2018, 1, 6, 0).timestamp(), datetime(2018, 12, 31, 23).timestamp()) 24 | 25 | partition_conf = {"train": 26 | {"timerange": daterange_train, 27 | "increment_s": 60 * 60}, 28 | "val": 29 | {"timerange": daterange_val, 30 | "increment_s": 60 * 60}, 31 | "test": 32 | {"timerange": daterange_test, 33 | "increment_s": 60 * 60}} 34 | 35 | partition_type = "range" 36 | 37 | sample_conf = {"mode0": # sample modes 38 | { 39 | "sample": # sample sections 40 | { 41 | "lsm": {"vbl": "era140625/lsm"}, 42 | }, 43 | } 44 | } 45 | 46 | dataset = Dataset(datapath=datapath, 47 | partition_conf=partition_conf, 48 | partition_type=partition_type, 49 | partition_selected="train", 50 | sample_conf=sample_conf, 51 | ) 52 | 53 | n_bins = 100 54 | 55 | with open("results/imerg_25bi.json", "r") as f: 56 | bins = json.load(f)["hist_den"][1] 57 | 58 | 59 | # era 5625 histogram 60 | print("era 5625...") 61 | era5_tp = dataset.dataset[((*daterange_train, 3600), ["era5625/tp"], {})] 62 | hist_den = np.histogram(era5_tp.flatten()*1000.0, bins=bins, density=True) 63 | hist_noden = np.histogram(era5_tp.flatten()*1000.0, bins=bins, density=False) 64 | res5 = {"hist_den": [x.tolist() for x in hist_den], 65 | "hist_noden": [x.tolist() for x in hist_noden]} 66 | 67 | with open("./results/era5625.json", "w") as f: 68 | json.dump(res5, f) 69 | 70 | # era 140625 histogram 71 | print("era 140625...") 72 | era1_tp = dataset.dataset[((*daterange_train, 3600), ["era140625/tp"], {})] 73 | hist_den = np.histogram(era1_tp.flatten()*1000, bins=bins, density=True) 74 | hist_noden = np.histogram(era1_tp.flatten()*1000, bins=bins, density=False) 75 | res1 = {"hist_den": [x.tolist() for x in hist_den], 76 | "hist_noden": [x.tolist() for x in hist_noden]} 77 | 78 | with open("./results/era140625.json", "w") as f: 79 | json.dump(res1, f) 80 | 81 | # imerg 140625 histogram 82 | print("imerg 140625...") 83 | imerg1_pre = dataset.dataset[((*daterange_train, 3600), ["imerg140625/precipitationcal"], {})] 84 | hist_den = np.histogram(imerg1_pre.flatten(), bins=bins, density=True) 85 | hist_noden = np.histogram(imerg1_pre.flatten(), bins=bins, density=False) 86 | imerg1 = {"hist_den": [x.tolist() for x in hist_den], 87 | "hist_noden": [x.tolist() for x in hist_noden]} 88 | 89 | with open("./results/imerg140625.json", "w") as f: 90 | json.dump(imerg1, f) 91 | 92 | print("imerg 5625...") 93 | imerg5625_pre = dataset.dataset["imerg5625/precipitationcal"] 94 | hist_den = np.histogram(imerg5625_pre.flatten(), bins=bins, density=True) 95 | hist_noden = np.histogram(imerg5625_pre.flatten(), bins=bins, density=False) 96 | imerg5625 = {"hist_den": [x.tolist() for x in hist_den], 97 | "hist_noden": [x.tolist() for x in hist_noden]} 98 | with open("./results/imerg5625.json", "w") as f: 99 | json.dump(imerg5625, f) 100 | 101 | -------------------------------------------------------------------------------- /analysis/precip_histogram/plot.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pandas as pd 3 | import numpy as np 4 | import json 5 | 6 | from matplotlib import pyplot as plt 7 | plt.style.use('ggplot') 8 | 9 | # make plots 10 | with open("./results/era5625.json", "r") as f: 11 | era5625 = json.load(f) 12 | 13 | with open("./results/era140625.json", "r") as f: 14 | era140625 = json.load(f) 15 | 16 | with open("./results/imerg5625.json", "r") as f: 17 | imerg5625 = json.load(f) 18 | 19 | with open("./results/imerg140625.json", "r") as f: 20 | imerg140625 = json.load(f) 21 | 22 | with open("./results/imerg_25bi.json", "r") as f: 23 | imerg25 = json.load(f) 24 | markers = ('+', '1', '+', '1', 'x') 25 | 26 | # create plot 27 | tcks = era5625["hist_den"][1] 28 | 29 | fig = plt.figure(figsize=(8, 3)) 30 | plt.ylabel('probability density') 31 | plt.xlabel('precipitation [mm/hour]') 32 | ax = fig.gca() 33 | ax.set_yscale("log") 34 | ax.axvline(0, linestyle="-", color="white") 35 | ax.axvline(2, linestyle="--", color="white") 36 | ax.axvline(10, linestyle="--", color="white") 37 | ax.axvline(50, linestyle="--", color="white") 38 | ax.plot(tcks[:-1], era140625["hist_den"][0], label="ERA:tp $1.40625^\circ$", linestyle='None', marker=markers[1]) 39 | ax.plot(tcks[:-1], era5625["hist_den"][0], label="ERA:tp $5.625^\circ$", linestyle='None', marker=markers[0]) 40 | ax.plot(tcks[:-1], imerg25["hist_den"][0], label="IMERG $0.25^\circ$", linestyle='None', marker=markers[4]) 41 | ax.plot(tcks[:-1], imerg140625["hist_den"][0], label="IMERG $1.40625^\circ$", linestyle='None', marker=markers[3]) 42 | ax.plot(tcks[:-1], imerg5625["hist_den"][0], label="IMERG $5.625^\circ$", linestyle='None', marker=markers[2]) 43 | ax.legend() 44 | plt.grid() 45 | fig.savefig("hist_den.pdf", bbox_inches='tight') 46 | 47 | -------------------------------------------------------------------------------- /analysis/precip_histogram/plot_classhist.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | from matplotlib import pyplot as plt 3 | import numpy as np 4 | import skimage.measure 5 | from argparse import ArgumentParser 6 | import numpy as np 7 | import matplotlib.pyplot as plt 8 | import matplotlib.colors as colors 9 | from datetime import datetime 10 | 11 | 12 | with open("/home/cs/Desktop/fracres.pkl", "rb") as f: 13 | res = pickle.load(f) 14 | 15 | print("Pickle loaded!") 16 | 17 | import seaborn as sns 18 | 19 | f1_red = skimage.measure.block_reduce(res["f1"], (8,8), np.max) 20 | fig, ax = plt.subplots(figsize=(10, 20)) 21 | im = ax.imshow(f1_red.transpose(), cmap=plt.get_cmap('hot'), interpolation='nearest', 22 | vmin=0.95, vmax=1.0, extent=[0, 360,0,180]) 23 | 24 | ax.set_xticks(np.linspace(0, 360, 11)) 25 | lonw = ["${:d}^\circ$W".format(int(c)) for c in reversed(np.linspace(36, 180, 5))] 26 | lone = ["${:d}^\circ$E".format(int(c)) for c in np.linspace(36, 180, 5)] 27 | lones = lonw + [0] + lone 28 | ax.set_yticks(np.linspace(0, 180, 11)) 29 | latn = ["${:d}^\circ$N".format(int(c)) for c in reversed(np.linspace(18, 90, 5))] 30 | late = ["${:d}^\circ$S".format(int(c)) for c in np.linspace(18, 90, 5)] 31 | lates = latn + [0] + late 32 | ax.set_xticklabels(lones) 33 | ax.set_yticklabels(lates) 34 | from mpl_toolkits.axes_grid1 import make_axes_locatable 35 | divider = make_axes_locatable(ax) 36 | cax = divider.append_axes("right", size="5%", pad=0.1) 37 | 38 | ls = np.linspace(0.95, 1.0, 5) 39 | cbar = fig.colorbar(im, cax=cax, ticks=ls) 40 | cbar.set_ticklabels(["{:d}%".format(int(c*100.0)) for c in ls]) 41 | print("saving ...") 42 | plt.savefig("f1.pdf", bbox_inches='tight') 43 | 44 | ######################### 45 | f2_red = skimage.measure.block_reduce(res["f2"], (8,8), np.max) 46 | fig, ax = plt.subplots(figsize=(10, 20)) 47 | im = ax.imshow(f2_red.transpose(), cmap=plt.get_cmap('hot'), interpolation='nearest', 48 | vmin=0, vmax=0.05, extent=[0, 360,0,180]) 49 | 50 | ax.set_xticks(np.linspace(0, 360, 11)) 51 | lonw = ["${:d}^\circ$W".format(int(c)) for c in reversed(np.linspace(36, 180, 5))] 52 | lone = ["${:d}^\circ$E".format(int(c)) for c in np.linspace(36, 180, 5)] 53 | lones = lonw + [0] + lone 54 | ax.set_yticks(np.linspace(0, 180, 11)) 55 | latn = ["${:d}^\circ$N".format(int(c)) for c in reversed(np.linspace(18, 90, 5))] 56 | late = ["${:d}^\circ$S".format(int(c)) for c in np.linspace(18, 90, 5)] 57 | lates = latn + [0] + late 58 | ax.set_xticklabels(lones) 59 | ax.set_yticklabels(lates) 60 | from mpl_toolkits.axes_grid1 import make_axes_locatable 61 | divider = make_axes_locatable(ax) 62 | cax = divider.append_axes("right", size="5%", pad=0.1) 63 | 64 | ls = np.linspace(0, 0.05, 5) 65 | cbar = fig.colorbar(im, cax=cax, ticks=ls) 66 | cbar.set_ticklabels(["{:d}%".format(int(c*100.0)) for c in ls]) 67 | print("saving ...") 68 | #plt.show() 69 | plt.savefig("f2.pdf", bbox_inches='tight') 70 | 71 | 72 | ######################### 73 | f3_red = skimage.measure.block_reduce(res["f3"], (8,8), np.max) 74 | fig, ax = plt.subplots(figsize=(10, 20)) 75 | im = ax.imshow(f3_red.transpose(), cmap=plt.get_cmap('hot'), interpolation='nearest', 76 | vmin=0, vmax=0.01, extent=[0, 360,0,180]) 77 | 78 | ax.set_xticks(np.linspace(0, 360, 11)) 79 | lonw = ["${:d}^\circ$W".format(int(c)) for c in reversed(np.linspace(36, 180, 5))] 80 | lone = ["${:d}^\circ$E".format(int(c)) for c in np.linspace(36, 180, 5)] 81 | lones = lonw + [0] + lone 82 | ax.set_yticks(np.linspace(0, 180, 11)) 83 | latn = ["${:d}^\circ$N".format(int(c)) for c in reversed(np.linspace(18, 90, 5))] 84 | late = ["${:d}^\circ$S".format(int(c)) for c in np.linspace(18, 90, 5)] 85 | lates = latn + [0] + late 86 | ax.set_xticklabels(lones) 87 | ax.set_yticklabels(lates) 88 | from mpl_toolkits.axes_grid1 import make_axes_locatable 89 | divider = make_axes_locatable(ax) 90 | cax = divider.append_axes("right", size="5%", pad=0.1) 91 | 92 | ls = np.linspace(0, 0.01, 5) 93 | cbar = fig.colorbar(im, cax=cax, ticks=ls) 94 | cbar.set_ticklabels(["{:d}%".format(int(c*100.0)) for c in ls]) 95 | print("saving ...") 96 | #plt.show() 97 | plt.savefig("f3.pdf", bbox_inches='tight') 98 | 99 | ######################### 100 | f4_red = skimage.measure.block_reduce(res["f4"], (8,8), np.max) 101 | fig, ax = plt.subplots(figsize=(10, 20)) 102 | im = ax.imshow(f4_red.transpose(), cmap=plt.get_cmap('hot'), interpolation='nearest', 103 | vmin=0, vmax=0.001, extent=[0, 360,0,180]) 104 | 105 | ax.set_xticks(np.linspace(0, 360, 11)) 106 | lonw = ["${:d}^\circ$W".format(int(c)) for c in reversed(np.linspace(36, 180, 5))] 107 | lone = ["${:d}^\circ$E".format(int(c)) for c in np.linspace(36, 180, 5)] 108 | lones = lonw + [0] + lone 109 | ax.set_yticks(np.linspace(0, 180, 11)) 110 | latn = ["${:d}^\circ$S".format(int(c)) for c in reversed(np.linspace(18, 90, 5))] 111 | late = ["${:d}^\circ$N".format(int(c)) for c in np.linspace(18, 90, 5)] 112 | lates = latn + [0] + late 113 | ax.set_xticklabels(lones) 114 | ax.set_yticklabels(lates) 115 | from mpl_toolkits.axes_grid1 import make_axes_locatable 116 | divider = make_axes_locatable(ax) 117 | cax = divider.append_axes("right", size="5%", pad=0.1) 118 | 119 | ls = np.linspace(0, 0.001, 5) 120 | cbar = fig.colorbar(im, cax=cax, ticks=ls) 121 | cbar.set_ticklabels(["{:.2f}%".format(c*100.0) for c in ls]) 122 | print("saving ...") 123 | plt.savefig("f4.pdf", bbox_inches='tight') 124 | -------------------------------------------------------------------------------- /analysis/precip_histogram/results/era140625.json: -------------------------------------------------------------------------------- 1 | {"hist_den": [[0.4836122399791112, 0.002774826335797002, 0.00044091698864900903, 0.00012748867450560223, 5.171670783780497e-05, 2.5393532555904468e-05, 1.368843847723224e-05, 7.72077678789198e-06, 4.505650517042691e-06, 2.5703115215379565e-06, 1.3940167152086806e-06, 7.53079205370543e-07, 4.3156617736016433e-07, 1.980374623231844e-07, 9.603623925913461e-08, 4.861461801005883e-08, 3.131613588308035e-08, 1.5508972300631698e-08, 9.245716308338006e-09, 4.771991477117446e-09, 1.7894934790331625e-09, 3.2807441405182438e-09, 2.38599130537755e-09, 1.4912473365992017e-09, 1.7894934790331625e-09, 1.4912445658609687e-09, 8.94748401959521e-10, 8.947467395165813e-10, 5.964989346396807e-10, 8.947467395165813e-10, 8.94748401959521e-10, 8.947467395165813e-10, 2.9824946731984036e-10, 0.0, 2.9824946731984036e-10, 2.9824946731984036e-10, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 2.0531179904937744, 4.106235980987549, 6.159353733062744, 8.212471961975098, 10.265589714050293, 12.318707466125488, 14.371826171875, 16.424943923950195, 18.47806167602539, 20.531179428100586, 22.58429718017578, 24.637414932250977, 26.690534591674805, 28.74365234375, 30.796770095825195, 32.84988784790039, 34.90300750732422, 36.95612335205078, 39.00924301147461, 41.06235885620117, 43.115478515625, 45.16859436035156, 47.22171401977539, 49.27482986450195, 51.32794952392578, 53.38106918334961, 55.43418502807617, 57.4873046875, 59.54042053222656, 61.59354019165039, 63.64665603637695, 65.69977569580078, 67.75289154052734, 69.80601501464844, 71.859130859375, 73.91224670410156, 75.96536254882812, 78.01848602294922, 80.07160186767578, 82.12471771240234, 84.1778335571289, 86.23095703125, 88.28407287597656, 90.33718872070312, 92.39031219482422, 94.44342803955078, 96.49654388427734, 98.5496597290039, 100.602783203125, 102.65589904785156, 104.70901489257812, 106.76213836669922, 108.81525421142578, 110.86837005615234, 112.9214859008789, 114.974609375, 117.02772521972656, 119.08084106445312, 121.13396453857422, 123.18708038330078, 125.24019622802734, 127.2933120727539, 129.346435546875, 131.39955139160156, 133.45266723632812, 135.5057830810547, 137.55889892578125, 139.61203002929688, 141.66514587402344, 143.71826171875, 145.77137756347656, 147.82449340820312, 149.8776092529297, 151.93072509765625, 153.98385620117188, 156.03697204589844, 158.090087890625, 160.14320373535156, 162.19631958007812, 164.2494354248047, 166.30255126953125, 168.3556671142578, 170.40879821777344, 172.4619140625, 174.51502990722656, 176.56814575195312, 178.6212615966797, 180.67437744140625, 182.7274932861328, 184.78062438964844, 186.833740234375, 188.88685607910156, 190.93997192382812, 192.9930877685547, 195.04620361328125, 197.0993194580078, 199.15245056152344, 201.20556640625, 203.25868225097656, 205.31179809570312]], "hist_noden": [[1621504138, 9303719, 1478351, 427457, 173401, 85142, 45896, 25887, 15107, 8618, 4674, 2525, 1447, 664, 322, 163, 105, 52, 31, 16, 6, 11, 8, 5, 6, 5, 3, 3, 2, 3, 3, 3, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0.0, 2.0531179904937744, 4.106235980987549, 6.159353733062744, 8.212471961975098, 10.265589714050293, 12.318707466125488, 14.371826171875, 16.424943923950195, 18.47806167602539, 20.531179428100586, 22.58429718017578, 24.637414932250977, 26.690534591674805, 28.74365234375, 30.796770095825195, 32.84988784790039, 34.90300750732422, 36.95612335205078, 39.00924301147461, 41.06235885620117, 43.115478515625, 45.16859436035156, 47.22171401977539, 49.27482986450195, 51.32794952392578, 53.38106918334961, 55.43418502807617, 57.4873046875, 59.54042053222656, 61.59354019165039, 63.64665603637695, 65.69977569580078, 67.75289154052734, 69.80601501464844, 71.859130859375, 73.91224670410156, 75.96536254882812, 78.01848602294922, 80.07160186767578, 82.12471771240234, 84.1778335571289, 86.23095703125, 88.28407287597656, 90.33718872070312, 92.39031219482422, 94.44342803955078, 96.49654388427734, 98.5496597290039, 100.602783203125, 102.65589904785156, 104.70901489257812, 106.76213836669922, 108.81525421142578, 110.86837005615234, 112.9214859008789, 114.974609375, 117.02772521972656, 119.08084106445312, 121.13396453857422, 123.18708038330078, 125.24019622802734, 127.2933120727539, 129.346435546875, 131.39955139160156, 133.45266723632812, 135.5057830810547, 137.55889892578125, 139.61203002929688, 141.66514587402344, 143.71826171875, 145.77137756347656, 147.82449340820312, 149.8776092529297, 151.93072509765625, 153.98385620117188, 156.03697204589844, 158.090087890625, 160.14320373535156, 162.19631958007812, 164.2494354248047, 166.30255126953125, 168.3556671142578, 170.40879821777344, 172.4619140625, 174.51502990722656, 176.56814575195312, 178.6212615966797, 180.67437744140625, 182.7274932861328, 184.78062438964844, 186.833740234375, 188.88685607910156, 190.93997192382812, 192.9930877685547, 195.04620361328125, 197.0993194580078, 199.15245056152344, 201.20556640625, 203.25868225097656, 205.31179809570312]]} -------------------------------------------------------------------------------- /analysis/precip_histogram/results/era5625.json: -------------------------------------------------------------------------------- 1 | {"hist_den": [[0.48363464168047676, 0.0027648189212509558, 0.0004341750084180838, 0.000123436087429969, 5.0164254438357146e-05, 2.5191822774615183e-05, 1.3979022766940212e-05, 7.85515564365604e-06, 4.2542798021500835e-06, 2.6613095623315545e-06, 1.4212728487003644e-06, 6.915589364481638e-07, 4.197043439386092e-07, 1.478505312406419e-07, 1.526199032161465e-07, 3.338560382853205e-08, 9.538735089513845e-09, 9.538752812520931e-09, 4.769367544756922e-09, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 2.0531179904937744, 4.106235980987549, 6.159353733062744, 8.212471961975098, 10.265589714050293, 12.318707466125488, 14.371826171875, 16.424943923950195, 18.47806167602539, 20.531179428100586, 22.58429718017578, 24.637414932250977, 26.690534591674805, 28.74365234375, 30.796770095825195, 32.84988784790039, 34.90300750732422, 36.95612335205078, 39.00924301147461, 41.06235885620117, 43.115478515625, 45.16859436035156, 47.22171401977539, 49.27482986450195, 51.32794952392578, 53.38106918334961, 55.43418502807617, 57.4873046875, 59.54042053222656, 61.59354019165039, 63.64665603637695, 65.69977569580078, 67.75289154052734, 69.80601501464844, 71.859130859375, 73.91224670410156, 75.96536254882812, 78.01848602294922, 80.07160186767578, 82.12471771240234, 84.1778335571289, 86.23095703125, 88.28407287597656, 90.33718872070312, 92.39031219482422, 94.44342803955078, 96.49654388427734, 98.5496597290039, 100.602783203125, 102.65589904785156, 104.70901489257812, 106.76213836669922, 108.81525421142578, 110.86837005615234, 112.9214859008789, 114.974609375, 117.02772521972656, 119.08084106445312, 121.13396453857422, 123.18708038330078, 125.24019622802734, 127.2933120727539, 129.346435546875, 131.39955139160156, 133.45266723632812, 135.5057830810547, 137.55889892578125, 139.61203002929688, 141.66514587402344, 143.71826171875, 145.77137756347656, 147.82449340820312, 149.8776092529297, 151.93072509765625, 153.98385620117188, 156.03697204589844, 158.090087890625, 160.14320373535156, 162.19631958007812, 164.2494354248047, 166.30255126953125, 168.3556671142578, 170.40879821777344, 172.4619140625, 174.51502990722656, 176.56814575195312, 178.6212615966797, 180.67437744140625, 182.7274932861328, 184.78062438964844, 186.833740234375, 188.88685607910156, 190.93997192382812, 192.9930877685547, 195.04620361328125, 197.0993194580078, 199.15245056152344, 201.20556640625, 203.25868225097656, 205.31179809570312]], "hist_noden": [[101404273, 579703, 91034, 25881, 10518, 5282, 2931, 1647, 892, 558, 298, 145, 88, 31, 32, 7, 2, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0.0, 2.0531179904937744, 4.106235980987549, 6.159353733062744, 8.212471961975098, 10.265589714050293, 12.318707466125488, 14.371826171875, 16.424943923950195, 18.47806167602539, 20.531179428100586, 22.58429718017578, 24.637414932250977, 26.690534591674805, 28.74365234375, 30.796770095825195, 32.84988784790039, 34.90300750732422, 36.95612335205078, 39.00924301147461, 41.06235885620117, 43.115478515625, 45.16859436035156, 47.22171401977539, 49.27482986450195, 51.32794952392578, 53.38106918334961, 55.43418502807617, 57.4873046875, 59.54042053222656, 61.59354019165039, 63.64665603637695, 65.69977569580078, 67.75289154052734, 69.80601501464844, 71.859130859375, 73.91224670410156, 75.96536254882812, 78.01848602294922, 80.07160186767578, 82.12471771240234, 84.1778335571289, 86.23095703125, 88.28407287597656, 90.33718872070312, 92.39031219482422, 94.44342803955078, 96.49654388427734, 98.5496597290039, 100.602783203125, 102.65589904785156, 104.70901489257812, 106.76213836669922, 108.81525421142578, 110.86837005615234, 112.9214859008789, 114.974609375, 117.02772521972656, 119.08084106445312, 121.13396453857422, 123.18708038330078, 125.24019622802734, 127.2933120727539, 129.346435546875, 131.39955139160156, 133.45266723632812, 135.5057830810547, 137.55889892578125, 139.61203002929688, 141.66514587402344, 143.71826171875, 145.77137756347656, 147.82449340820312, 149.8776092529297, 151.93072509765625, 153.98385620117188, 156.03697204589844, 158.090087890625, 160.14320373535156, 162.19631958007812, 164.2494354248047, 166.30255126953125, 168.3556671142578, 170.40879821777344, 172.4619140625, 174.51502990722656, 176.56814575195312, 178.6212615966797, 180.67437744140625, 182.7274932861328, 184.78062438964844, 186.833740234375, 188.88685607910156, 190.93997192382812, 192.9930877685547, 195.04620361328125, 197.0993194580078, 199.15245056152344, 201.20556640625, 203.25868225097656, 205.31179809570312]]} -------------------------------------------------------------------------------- /analysis/precip_histogram/results/imerg140625.json: -------------------------------------------------------------------------------- 1 | {"hist_den": [[0.48163964033791573, 0.0033633184512914353, 0.0010042788763038406, 0.00042414273407464056, 0.00021545917019077625, 0.0001231492337569391, 7.654794497221805e-05, 5.1089250137649246e-05, 3.565674552927242e-05, 2.5751732959447667e-05, 1.95288895991721e-05, 1.511831091891867e-05, 1.1822374227710223e-05, 9.495035012806957e-06, 7.696050102033505e-06, 6.417801674777481e-06, 5.24776436430529e-06, 4.460028772955226e-06, 3.6719940601147664e-06, 3.1184882842639746e-06, 2.6517131620634873e-06, 2.252476995755405e-06, 1.914827927469819e-06, 1.6780558050415643e-06, 1.401720724240486e-06, 1.2302140764650426e-06, 1.0813132793221193e-06, 9.146079387958986e-07, 7.815292737405232e-07, 6.614449134140249e-07, 5.682051227375965e-07, 4.972845141429662e-07, 4.181718456746111e-07, 3.7352778763122335e-07, 2.932853890609772e-07, 2.6616072880100247e-07, 2.271690296772887e-07, 1.8309077638807314e-07, 1.6444325282609707e-07, 1.4749034016361283e-07, 1.206482284480128e-07, 9.550105311600113e-08, 9.493631090991171e-08, 7.628810698117905e-08, 5.763968886291192e-08, 5.198893216495164e-08, 4.2664830200585314e-08, 3.531856804684215e-08, 3.1080224386864276e-08, 2.7407208804349506e-08, 2.4864271904976874e-08, 1.667030217113629e-08, 1.610526702936002e-08, 1.412742721873686e-08, 9.041553419991591e-09, 1.2714637249171748e-08, 9.606650508741065e-09, 8.193907786867379e-09, 4.80330740524266e-09, 0.0, 0.0, 0.0, 0.0, 2.8254854437473723e-10, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 2.0531179904937744, 4.106235980987549, 6.159353733062744, 8.212471961975098, 10.265589714050293, 12.318707466125488, 14.371826171875, 16.424943923950195, 18.47806167602539, 20.531179428100586, 22.58429718017578, 24.637414932250977, 26.690534591674805, 28.74365234375, 30.796770095825195, 32.84988784790039, 34.90300750732422, 36.95612335205078, 39.00924301147461, 41.06235885620117, 43.115478515625, 45.16859436035156, 47.22171401977539, 49.27482986450195, 51.32794952392578, 53.38106918334961, 55.43418502807617, 57.4873046875, 59.54042053222656, 61.59354019165039, 63.64665603637695, 65.69977569580078, 67.75289154052734, 69.80601501464844, 71.859130859375, 73.91224670410156, 75.96536254882812, 78.01848602294922, 80.07160186767578, 82.12471771240234, 84.1778335571289, 86.23095703125, 88.28407287597656, 90.33718872070312, 92.39031219482422, 94.44342803955078, 96.49654388427734, 98.5496597290039, 100.602783203125, 102.65589904785156, 104.70901489257812, 106.76213836669922, 108.81525421142578, 110.86837005615234, 112.9214859008789, 114.974609375, 117.02772521972656, 119.08084106445312, 121.13396453857422, 123.18708038330078, 125.24019622802734, 127.2933120727539, 129.346435546875, 131.39955139160156, 133.45266723632812, 135.5057830810547, 137.55889892578125, 139.61203002929688, 141.66514587402344, 143.71826171875, 145.77137756347656, 147.82449340820312, 149.8776092529297, 151.93072509765625, 153.98385620117188, 156.03697204589844, 158.090087890625, 160.14320373535156, 162.19631958007812, 164.2494354248047, 166.30255126953125, 168.3556671142578, 170.40879821777344, 172.4619140625, 174.51502990722656, 176.56814575195312, 178.6212615966797, 180.67437744140625, 182.7274932861328, 184.78062438964844, 186.833740234375, 188.88685607910156, 190.93997192382812, 192.9930877685547, 195.04620361328125, 197.0993194580078, 199.15245056152344, 201.20556640625, 203.25868225097656, 205.31179809570312]], "hist_noden": [[1704627942, 11903519, 3554362, 1501134, 762557, 435852, 270920, 180816, 126197, 91141, 69117, 53507, 41842, 33605, 27238, 22714, 18573, 15785, 12996, 11037, 9385, 7972, 6777, 5939, 4961, 4354, 3827, 3237, 2766, 2341, 2011, 1760, 1480, 1322, 1038, 942, 804, 648, 582, 522, 427, 338, 336, 270, 204, 184, 151, 125, 110, 97, 88, 59, 57, 50, 32, 45, 34, 29, 17, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0.0, 2.0531179904937744, 4.106235980987549, 6.159353733062744, 8.212471961975098, 10.265589714050293, 12.318707466125488, 14.371826171875, 16.424943923950195, 18.47806167602539, 20.531179428100586, 22.58429718017578, 24.637414932250977, 26.690534591674805, 28.74365234375, 30.796770095825195, 32.84988784790039, 34.90300750732422, 36.95612335205078, 39.00924301147461, 41.06235885620117, 43.115478515625, 45.16859436035156, 47.22171401977539, 49.27482986450195, 51.32794952392578, 53.38106918334961, 55.43418502807617, 57.4873046875, 59.54042053222656, 61.59354019165039, 63.64665603637695, 65.69977569580078, 67.75289154052734, 69.80601501464844, 71.859130859375, 73.91224670410156, 75.96536254882812, 78.01848602294922, 80.07160186767578, 82.12471771240234, 84.1778335571289, 86.23095703125, 88.28407287597656, 90.33718872070312, 92.39031219482422, 94.44342803955078, 96.49654388427734, 98.5496597290039, 100.602783203125, 102.65589904785156, 104.70901489257812, 106.76213836669922, 108.81525421142578, 110.86837005615234, 112.9214859008789, 114.974609375, 117.02772521972656, 119.08084106445312, 121.13396453857422, 123.18708038330078, 125.24019622802734, 127.2933120727539, 129.346435546875, 131.39955139160156, 133.45266723632812, 135.5057830810547, 137.55889892578125, 139.61203002929688, 141.66514587402344, 143.71826171875, 145.77137756347656, 147.82449340820312, 149.8776092529297, 151.93072509765625, 153.98385620117188, 156.03697204589844, 158.090087890625, 160.14320373535156, 162.19631958007812, 164.2494354248047, 166.30255126953125, 168.3556671142578, 170.40879821777344, 172.4619140625, 174.51502990722656, 176.56814575195312, 178.6212615966797, 180.67437744140625, 182.7274932861328, 184.78062438964844, 186.833740234375, 188.88685607910156, 190.93997192382812, 192.9930877685547, 195.04620361328125, 197.0993194580078, 199.15245056152344, 201.20556640625, 203.25868225097656, 205.31179809570312]]} -------------------------------------------------------------------------------- /analysis/precip_histogram/results/imerg5625.json: -------------------------------------------------------------------------------- 1 | {"hist_den": [[0.48159319008157137, 0.0033696811649768705, 0.0010224177598011873, 0.0004368893837773687, 0.00022320061866130475, 0.00012752896144073257, 7.934121933386373e-05, 5.278149638146249e-05, 3.611168677626202e-05, 2.6015320749277806e-05, 1.909138586963083e-05, 1.4503136105447005e-05, 1.1430438899310496e-05, 8.988093137084743e-06, 7.235636629931197e-06, 5.699293336308051e-06, 4.620108317682766e-06, 4.000868849348633e-06, 3.3386689791950426e-06, 2.677866857960841e-06, 2.2941227508493736e-06, 1.9588741526935483e-06, 1.6194622558834044e-06, 1.3230019913877924e-06, 1.2440351632021362e-06, 1.0112980725362578e-06, 8.824631083916479e-07, 6.926699126960669e-07, 6.261747645102431e-07, 5.167317548712659e-07, 4.654750461846056e-07, 4.0590456883989516e-07, 3.643450510314026e-07, 2.9646217180706595e-07, 2.438202622871743e-07, 1.9948930550768806e-07, 1.7455314231922708e-07, 1.3714838789205384e-07, 1.3160752793909977e-07, 1.163687615461514e-07, 9.974465275384403e-08, 7.75788860803537e-08, 8.312054396153669e-08, 5.956972317243464e-08, 4.1560117543046624e-08, 5.125766877628097e-08, 2.909219038653785e-08, 3.601890238333257e-08, 2.7706745028697747e-08, 3.047753278589679e-08, 2.3550820789102067e-08, 1.662404701721865e-08, 9.697396795512615e-09, 8.31205439615367e-09, 6.9267119967947245e-09, 6.926686257174437e-09, 0.0, 4.156027198076835e-09, 8.312023508609325e-09, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 2.0531179904937744, 4.106235980987549, 6.159353733062744, 8.212471961975098, 10.265589714050293, 12.318707466125488, 14.371826171875, 16.424943923950195, 18.47806167602539, 20.531179428100586, 22.58429718017578, 24.637414932250977, 26.690534591674805, 28.74365234375, 30.796770095825195, 32.84988784790039, 34.90300750732422, 36.95612335205078, 39.00924301147461, 41.06235885620117, 43.115478515625, 45.16859436035156, 47.22171401977539, 49.27482986450195, 51.32794952392578, 53.38106918334961, 55.43418502807617, 57.4873046875, 59.54042053222656, 61.59354019165039, 63.64665603637695, 65.69977569580078, 67.75289154052734, 69.80601501464844, 71.859130859375, 73.91224670410156, 75.96536254882812, 78.01848602294922, 80.07160186767578, 82.12471771240234, 84.1778335571289, 86.23095703125, 88.28407287597656, 90.33718872070312, 92.39031219482422, 94.44342803955078, 96.49654388427734, 98.5496597290039, 100.602783203125, 102.65589904785156, 104.70901489257812, 106.76213836669922, 108.81525421142578, 110.86837005615234, 112.9214859008789, 114.974609375, 117.02772521972656, 119.08084106445312, 121.13396453857422, 123.18708038330078, 125.24019622802734, 127.2933120727539, 129.346435546875, 131.39955139160156, 133.45266723632812, 135.5057830810547, 137.55889892578125, 139.61203002929688, 141.66514587402344, 143.71826171875, 145.77137756347656, 147.82449340820312, 149.8776092529297, 151.93072509765625, 153.98385620117188, 156.03697204589844, 158.090087890625, 160.14320373535156, 162.19631958007812, 164.2494354248047, 166.30255126953125, 168.3556671142578, 170.40879821777344, 172.4619140625, 174.51502990722656, 176.56814575195312, 178.6212615966797, 180.67437744140625, 182.7274932861328, 184.78062438964844, 186.833740234375, 188.88685607910156, 190.93997192382812, 192.9930877685547, 195.04620361328125, 197.0993194580078, 199.15245056152344, 201.20556640625, 203.25868225097656, 205.31179809570312]], "hist_noden": [[347635136, 2432384, 738026, 315366, 161116, 92056, 57272, 38100, 26067, 18779, 13781, 10469, 8251, 6488, 5223, 4114, 3335, 2888, 2410, 1933, 1656, 1414, 1169, 955, 898, 730, 637, 500, 452, 373, 336, 293, 263, 214, 176, 144, 126, 99, 95, 84, 72, 56, 60, 43, 30, 37, 21, 26, 20, 22, 17, 12, 7, 6, 5, 5, 0, 3, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0.0, 2.0531179904937744, 4.106235980987549, 6.159353733062744, 8.212471961975098, 10.265589714050293, 12.318707466125488, 14.371826171875, 16.424943923950195, 18.47806167602539, 20.531179428100586, 22.58429718017578, 24.637414932250977, 26.690534591674805, 28.74365234375, 30.796770095825195, 32.84988784790039, 34.90300750732422, 36.95612335205078, 39.00924301147461, 41.06235885620117, 43.115478515625, 45.16859436035156, 47.22171401977539, 49.27482986450195, 51.32794952392578, 53.38106918334961, 55.43418502807617, 57.4873046875, 59.54042053222656, 61.59354019165039, 63.64665603637695, 65.69977569580078, 67.75289154052734, 69.80601501464844, 71.859130859375, 73.91224670410156, 75.96536254882812, 78.01848602294922, 80.07160186767578, 82.12471771240234, 84.1778335571289, 86.23095703125, 88.28407287597656, 90.33718872070312, 92.39031219482422, 94.44342803955078, 96.49654388427734, 98.5496597290039, 100.602783203125, 102.65589904785156, 104.70901489257812, 106.76213836669922, 108.81525421142578, 110.86837005615234, 112.9214859008789, 114.974609375, 117.02772521972656, 119.08084106445312, 121.13396453857422, 123.18708038330078, 125.24019622802734, 127.2933120727539, 129.346435546875, 131.39955139160156, 133.45266723632812, 135.5057830810547, 137.55889892578125, 139.61203002929688, 141.66514587402344, 143.71826171875, 145.77137756347656, 147.82449340820312, 149.8776092529297, 151.93072509765625, 153.98385620117188, 156.03697204589844, 158.090087890625, 160.14320373535156, 162.19631958007812, 164.2494354248047, 166.30255126953125, 168.3556671142578, 170.40879821777344, 172.4619140625, 174.51502990722656, 176.56814575195312, 178.6212615966797, 180.67437744140625, 182.7274932861328, 184.78062438964844, 186.833740234375, 188.88685607910156, 190.93997192382812, 192.9930877685547, 195.04620361328125, 197.0993194580078, 199.15245056152344, 201.20556640625, 203.25868225097656, 205.31179809570312]]} -------------------------------------------------------------------------------- /analysis/precip_histogram/results/imerg_25bi.json: -------------------------------------------------------------------------------- 1 | {"hist_den": [[0.48164910687763596, 0.0033566947441984927, 0.0010020441351742298, 0.0004237454064031667, 0.00021557650069177283, 0.00012330520229372353, 7.665047569833547e-05, 5.080823084502584e-05, 3.5499118857840555e-05, 2.58202775503848e-05, 1.9418797832557096e-05, 1.4984095830457264e-05, 1.1824551323726532e-05, 9.46412543833123e-06, 7.713581649442637e-06, 6.347892280497019e-06, 5.273013222306187e-06, 4.403853802670343e-06, 3.706227310458301e-06, 3.133613364922706e-06, 2.6687251237798176e-06, 2.276526112690999e-06, 1.9457463439024665e-06, 1.6640428342992278e-06, 1.427312256438571e-06, 1.2240980957272723e-06, 1.0580693506259627e-06, 9.109855883150046e-07, 7.817468708324833e-07, 6.712755680955512e-07, 5.79255643072758e-07, 4.911180037263263e-07, 4.28682037389018e-07, 3.636541769386662e-07, 3.120861668272464e-07, 2.6543710289106696e-07, 2.2678910351974528e-07, 1.959092669643797e-07, 1.6604887899947486e-07, 1.4040260954604696e-07, 1.2075713851626234e-07, 1.0276329113569813e-07, 8.557388585382994e-08, 7.34829914109534e-08, 6.299028985672703e-08, 5.2265882698786037e-08, 4.4389834767754206e-08, 3.742462231173967e-08, 3.1959490699918636e-08, 2.6932154829922894e-08, 2.1940419236445804e-08, 1.9207841434904995e-08, 1.5636008987796745e-08, 1.270704785244704e-08, 1.0510327000934763e-08, 9.251196525598129e-09, 7.742101537646932e-09, 7.009861253809507e-09, 6.357965179754699e-09, 4.4648797794965e-11, 6.2508316912951e-11, 1.7859519117986002e-11, 2.6789179128179914e-11, 1.7859519117986002e-11, 2.6789278676979004e-11, 8.929759558993001e-12, 8.929759558993001e-12, 1.785938638674711e-11, 0.0, 1.7859519117986002e-11, 8.929759558993001e-12, 0.0, 8.929759558993001e-12, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 8.929759558993001e-12, 0.0, 0.0, 8.929759558993001e-12, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 8.929759558993001e-12], [0.0, 2.0531179904937744, 4.106235980987549, 6.159353733062744, 8.212471961975098, 10.265589714050293, 12.318707466125488, 14.371826171875, 16.424943923950195, 18.47806167602539, 20.531179428100586, 22.58429718017578, 24.637414932250977, 26.690534591674805, 28.74365234375, 30.796770095825195, 32.84988784790039, 34.90300750732422, 36.95612335205078, 39.00924301147461, 41.06235885620117, 43.115478515625, 45.16859436035156, 47.22171401977539, 49.27482986450195, 51.32794952392578, 53.38106918334961, 55.43418502807617, 57.4873046875, 59.54042053222656, 61.59354019165039, 63.64665603637695, 65.69977569580078, 67.75289154052734, 69.80601501464844, 71.859130859375, 73.91224670410156, 75.96536254882812, 78.01848602294922, 80.07160186767578, 82.12471771240234, 84.1778335571289, 86.23095703125, 88.28407287597656, 90.33718872070312, 92.39031219482422, 94.44342803955078, 96.49654388427734, 98.5496597290039, 100.602783203125, 102.65589904785156, 104.70901489257812, 106.76213836669922, 108.81525421142578, 110.86837005615234, 112.9214859008789, 114.974609375, 117.02772521972656, 119.08084106445312, 121.13396453857422, 123.18708038330078, 125.24019622802734, 127.2933120727539, 129.346435546875, 131.39955139160156, 133.45266723632812, 135.5057830810547, 137.55889892578125, 139.61203002929688, 141.66514587402344, 143.71826171875, 145.77137756347656, 147.82449340820312, 149.8776092529297, 151.93072509765625, 153.98385620117188, 156.03697204589844, 158.090087890625, 160.14320373535156, 162.19631958007812, 164.2494354248047, 166.30255126953125, 168.3556671142578, 170.40879821777344, 172.4619140625, 174.51502990722656, 176.56814575195312, 178.6212615966797, 180.67437744140625, 182.7274932861328, 184.78062438964844, 186.833740234375, 188.88685607910156, 190.93997192382812, 192.9930877685547, 195.04620361328125, 197.0993194580078, 199.15245056152344, 201.20556640625, 203.25868225097656, 205.31179809570312]], "hist_noden": [[53937578843, 375900183, 112214115, 47453226, 24141378, 13808358, 8583723, 5689770, 3975376, 2891489, 2174618, 1677997, 1324176, 1059842, 863807, 710870, 590500, 493166, 415043, 350918, 298858, 254937, 217895, 186348, 159838, 137081, 118488, 102017, 87544, 75173, 64868, 54998, 48006, 40724, 34949, 29725, 25397, 21939, 18595, 15723, 13523, 11508, 9583, 8229, 7054, 5853, 4971, 4191, 3579, 3016, 2457, 2151, 1751, 1423, 1177, 1036, 867, 785, 712, 5, 7, 2, 3, 2, 3, 1, 1, 2, 0, 2, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1], [0.0, 2.0531179904937744, 4.106235980987549, 6.159353733062744, 8.212471961975098, 10.265589714050293, 12.318707466125488, 14.371826171875, 16.424943923950195, 18.47806167602539, 20.531179428100586, 22.58429718017578, 24.637414932250977, 26.690534591674805, 28.74365234375, 30.796770095825195, 32.84988784790039, 34.90300750732422, 36.95612335205078, 39.00924301147461, 41.06235885620117, 43.115478515625, 45.16859436035156, 47.22171401977539, 49.27482986450195, 51.32794952392578, 53.38106918334961, 55.43418502807617, 57.4873046875, 59.54042053222656, 61.59354019165039, 63.64665603637695, 65.69977569580078, 67.75289154052734, 69.80601501464844, 71.859130859375, 73.91224670410156, 75.96536254882812, 78.01848602294922, 80.07160186767578, 82.12471771240234, 84.1778335571289, 86.23095703125, 88.28407287597656, 90.33718872070312, 92.39031219482422, 94.44342803955078, 96.49654388427734, 98.5496597290039, 100.602783203125, 102.65589904785156, 104.70901489257812, 106.76213836669922, 108.81525421142578, 110.86837005615234, 112.9214859008789, 114.974609375, 117.02772521972656, 119.08084106445312, 121.13396453857422, 123.18708038330078, 125.24019622802734, 127.2933120727539, 129.346435546875, 131.39955139160156, 133.45266723632812, 135.5057830810547, 137.55889892578125, 139.61203002929688, 141.66514587402344, 143.71826171875, 145.77137756347656, 147.82449340820312, 149.8776092529297, 151.93072509765625, 153.98385620117188, 156.03697204589844, 158.090087890625, 160.14320373535156, 162.19631958007812, 164.2494354248047, 166.30255126953125, 168.3556671142578, 170.40879821777344, 172.4619140625, 174.51502990722656, 176.56814575195312, 178.6212615966797, 180.67437744140625, 182.7274932861328, 184.78062438964844, 186.833740234375, 188.88685607910156, 190.93997192382812, 192.9930877685547, 195.04620361328125, 197.0993194580078, 199.15245056152344, 201.20556640625, 203.25868225097656, 205.31179809570312]]} -------------------------------------------------------------------------------- /analysis/variable_correlations/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FrontierDevelopmentLab/PyRain/a52e4fd7984dcabb6d908a565a4e7c6bc820d62f/analysis/variable_correlations/__init__.py -------------------------------------------------------------------------------- /analysis/variable_correlations/corr.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | import numpy as np 3 | import os, sys 4 | import json 5 | from scipy import stats 6 | from multiprocessing import Pool, TimeoutError 7 | from functools import partial 8 | 9 | sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) 10 | from dataloader.memmap_dataloader import Dataset 11 | 12 | 13 | if __name__ == "__main__": 14 | 15 | # set up dataloader with any dataset type you can think of 16 | memmap_root = "" # SET MEMMAP DATA ROOT PATH HERE 17 | datapath = [os.path.join(memmap_root, "simsat5625", "simsat5625.dill"), 18 | os.path.join(memmap_root, "imerg5625", "imerg5625.dill"), 19 | os.path.join(memmap_root, "era5625", "era5625.dill"), 20 | ] 21 | 22 | daterange_train = (datetime(2016, 4, 1).timestamp(), datetime(2017, 12, 31, 23).timestamp()) 23 | daterange_test = (datetime(2019, 1, 6, 0).timestamp(), datetime(2019, 12, 31, 21).timestamp()) 24 | daterange_val = (datetime(2018, 1, 6, 0).timestamp(), datetime(2018, 12, 31, 23).timestamp()) 25 | 26 | partition_conf = {"train": 27 | {"timerange": daterange_train, 28 | "increment_s": 60 * 60}, 29 | "val": 30 | {"timerange": daterange_val, 31 | "increment_s": 60 * 60}, 32 | "test": 33 | {"timerange": daterange_test, 34 | "increment_s": 60 * 60}} 35 | 36 | partition_type = "range" 37 | 38 | sample_conf = {"mode0": # sample modes 39 | { 40 | "sample": # sample sections 41 | { 42 | "lsm": {"vbl": "era140625/lsm"}, 43 | }, 44 | } 45 | } 46 | 47 | dr = (datetime(2016, 4, 1).timestamp(), datetime(2019, 12, 31, 21).timestamp()) 48 | 49 | def get_corr(args): 50 | curr_vbl, vbls, x, dr = args 51 | print ("Starting process {}...".format(x)) 52 | 53 | dataset = Dataset(datapath=datapath, 54 | partition_conf=partition_conf, 55 | partition_type=partition_type, 56 | partition_selected="train", 57 | sample_conf=sample_conf, 58 | ) 59 | res = [] 60 | try: 61 | dx1 = dataset.dataset[((*dr, 3600*3), [curr_vbl], {})] 62 | except Exception as e: 63 | 64 | raise Exception("{}: vbl: {} dr: {}".format(e, curr_vbl, dr)) 65 | ds1 = dx1[..., int(dx1.shape[-2]/4):-int(dx1.shape[-2]/4), int(dx1.shape[-1]/4):-int(dx1.shape[-1]/4)] 66 | for y, v2 in enumerate(vbls): 67 | print("Process {}, v1: {} v2: {} it:{} start corr...".format(x, curr_vbl, v2, y)) 68 | dx2 = dataset.dataset[((*dr, 3600*3), [v2], {})] 69 | ds2 = dx2[..., int(dx2.shape[-2]/4):-int(dx2.shape[-2]/4), int(dx2.shape[-1]/4):-int(dx2.shape[-1]/4)] 70 | if len(ds1.shape) < len(ds2.shape): 71 | print("DONOT: {} shp1: {} shp2: {}".format(np.expand_dims(ds1, axis=0).shape, ds1.shape, ds2.shape)) 72 | try: 73 | a = np.expand_dims(ds1, axis=0).repeat(ds2.shape[0], axis=0) 74 | except Exception as e: 75 | raise Exception("{}: DONOT: {} shp1: {} shp2: {}".format(e, np.expand_dims(ds1, axis=0).shape, ds1.shape, ds2.shape)) 76 | print("WARING: shp1: {} shp2: {} new_shp: {}".format(ds1.shape, ds2.shape, a.shape)) 77 | else: 78 | a = ds1 79 | corr = stats.spearmanr(a.flatten(), ds2.flatten())[0] 80 | print("Process {}, v1: {} v2: {} it:{} found corr: {}!".format(x, curr_vbl, v2, y, corr)) 81 | res.append(corr) 82 | return res 83 | 84 | pressure_levels = [300, 500, 850] 85 | era_lst = ["era5625"] 86 | simsat_lst = ["simsat5625"] 87 | imerg_lst = ["imerg5625"] 88 | reso_lst = ["5625"] 89 | 90 | resdct = {} 91 | for pl in pressure_levels: 92 | print("pressure level: {}".format(pl)) 93 | resdct[pl] = {} 94 | for reso, era, simsat, imerg in zip(reso_lst, era_lst, simsat_lst, imerg_lst): 95 | 96 | vbl_list = ['{}/lon2d'.format(era), '{}/lat2d'.format(era), '{}/lsm'.format(era), '{}/orography'.format(era), '{}/slt'.format(era)] 97 | if era in ["era5625"]: 98 | vbl_list += ['{}/z_{}hPa'.format(era, pl), '{}/t_{}hPa'.format(era, pl), '{}/q_{}hPa'.format(era, pl), 99 | '{}/sp'.format(era), '{}/clwc_{}hPa'.format(era, pl), '{}/ciwc_{}hPa'.format(era, pl), '{}/t2m'.format(era), 100 | '{}/clbt:0'.format(simsat), '{}/clbt:1'.format(simsat), '{}/clbt:2'.format(simsat), 101 | "{}/tp".format(era), "{}/precipitationcal".format(imerg)] 102 | else: 103 | vbl_list += ['{}/z_{}hPa'.format(era, pl), '{}/t_{}hPa'.format(era, pl), '{}/q_{}hPa'.format(era, pl), 104 | '{}/sp'.format(era), '{}/clwc_{}hPa'.format(era, pl), '{}/ciwc_{}hPa'.format(era, pl), '{}/t2m'.format(era), 105 | '{}/clbt:0'.format(simsat), '{}/clbt:1'.format(simsat), '{}/clbt:2'.format(simsat), 106 | "{}/tp".format(era), "{}/precipitationcal".format(imerg)] 107 | 108 | resdct[pl]["vbl_lst"] = vbl_list 109 | 110 | 111 | vbl_args = [vbl_list[i:] for i in range(1, len(vbl_list))] 112 | with Pool(processes=len(vbl_list)) as pool: 113 | res = pool.map(get_corr, [(v, va, x, dr) for v, va, x in zip(vbl_list[:-1], vbl_args, range(1,len(vbl_list)))]) 114 | 115 | try: 116 | resdct[pl][reso] = np.zeros((len(vbl_list), len(vbl_list))) 117 | except Exception as e: 118 | print("res: ", res) 119 | raise Exception(res[pl].keys()) 120 | 121 | for j, r in enumerate(res): 122 | try: 123 | resdct[pl][reso][j, j+1:] = np.array(r) 124 | except Exception as e: 125 | raise Exception(r) 126 | resdct[pl][reso] = resdct[pl][reso].tolist() 127 | 128 | 129 | with open("out.json", "w") as f: 130 | json.dump(resdct, f) 131 | -------------------------------------------------------------------------------- /analysis/variable_correlations/out.json: -------------------------------------------------------------------------------- 1 | {"300": {"vbl_lst": ["era5625/lon2d", "era5625/lat2d", "era5625/lsm", "era5625/orography", "era5625/slt", "era5625/z_300hPa", "era5625/t_300hPa", "era5625/q_300hPa", "era5625/sp", "era5625/clwc_300hPa", "era5625/ciwc_300hPa", "era5625/t2m", "simsat5625/clbt:0", "simsat5625/clbt:1", "simsat5625/clbt:2", "era5625/tp", "imerg5625/precipitationcal"], "5625": [[0.0, 0.0, -0.3177168700168456, -0.09549379829619742, -0.2553785868697466, -0.08715137388046382, -0.21546466295065664, -0.08788077149974781, 0.26045042498838405, -0.026643450825193513, -0.07545864149389221, -0.1050581860011756, 0.08557432278786276, 0.14595313156696713, 0.10649916565759408, 0.008616773509850075, -0.0594204300858448], [0.0, 0.0, 0.19648095479842195, 0.16594718360057314, 0.20859422570752606, 0.1774073524831399, 0.1445060759301099, 0.14803531699837114, -0.18280435432396444, 0.018903555007841, 0.06408965804712029, 0.07415423943217579, -0.08295954371421065, -0.07742337885964642, -0.02292048643260018, -0.03003015419210857, 0.007400616884911511], [0.0, 0.0, 0.0, 0.5960805898339152, 0.8828652529574451, -0.016555773911789876, 0.03643880187738786, 0.041648396825674526, -0.5886072913329173, 0.01867149437345957, 0.0894967009046487, -0.04132121153194514, -0.08813560319544973, -0.13140470629079276, -0.1606952928020179, -0.18602379824595836, 0.008825231456441849], [0.0, 0.0, 0.0, 0.0, 0.6222101968877777, -0.06524898842515552, -0.04513418916142937, -0.009602088616926446, -0.4027445408715066, 0.00851510297907061, 0.0416073859428534, -0.09878319758779724, -0.04491396447895362, -0.07214148838972995, -0.1109931577769699, -0.15149513280567695, -0.01651932122553308], [0.0, 0.0, 0.0, 0.0, 0.0, -0.08466183573680075, -0.043904406268225504, -0.015619121977354438, -0.5847020123643477, 0.010442814337223239, 0.06417416667365214, -0.13347877330431604, -0.06588782452096424, -0.11103598712040974, -0.17317177964851715, -0.21814332763844557, -0.020403097138447266], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.870395365557898, 0.4741300934644555, -0.1686565192882331, 0.05802765211020303, 0.03161067125691434, 0.8096201492748264, 0.0866937327603289, 0.16539684250902525, 0.34405584391580946, 0.14973397928213353, 0.08291304320188556], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.46949470487370476, -0.3193456769908918, 0.09071948381206275, 0.08458356116806756, 0.7729447158309221, 0.06414631333930076, 0.07751064783524136, 0.23537215151427512, 0.1994797489858496, 0.14967374492947585], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.16223620952994544, 0.11599272216765051, 0.5793451938685434, 0.4563222944365399, -0.639465327724407, -0.48891281821011773, -0.23903668932388403, 0.24079477570776295, 0.2887785048447469], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.05310070147842954, -0.13896539517119438, -0.24979722469976873, 0.12520888537096275, 0.19071848073711964, 0.11374917438499219, -0.06327363178512428, -0.14415551968348903], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.13555550373067665, 0.048712612153705405, -0.09875471172563402, -0.10166281462323232, -0.09662254013746377, 0.08932089461632568, 0.10645078231569388], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.06899419821944712, -0.6965412139561395, -0.673840264132455, -0.59141734340533, 0.3098650015670476, 0.4138022349919871], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.030458100504734537, 0.10151221246880085, 0.39661005200476473, 0.16271949476451364, 0.0962131604460685], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.9228549043141981, 0.7031292764562275, -0.2744093519211432, -0.37311167206662427], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8167429535972578, -0.337723268537943, -0.4296875955861106], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.29751518293664203, -0.41526437133709004], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.46592336412190316], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]}, "500": {"vbl_lst": ["era5625/lon2d", "era5625/lat2d", "era5625/lsm", "era5625/orography", "era5625/slt", "era5625/z_500hPa", "era5625/t_500hPa", "era5625/q_500hPa", "era5625/sp", "era5625/clwc_500hPa", "era5625/ciwc_500hPa", "era5625/t2m", "simsat5625/clbt:0", "simsat5625/clbt:1", "simsat5625/clbt:2", "era5625/tp", "imerg5625/precipitationcal"], "5625": [[0.0, 0.0, -0.3177168700168456, -0.09549379829619742, -0.2553785868697466, 0.05295723434411872, -0.07096749404556527, -0.1395804711203429, 0.26045042498838405, -0.13934431890333882, -0.13255467433478846, -0.1050581860011756, 0.08557432278786276, 0.14595313156696713, 0.10649916565759408, 0.008616773509850075, -0.0594204300858448], [0.0, 0.0, 0.19648095479842195, 0.16594718360057314, 0.20859422570752606, 0.1849560365355781, 0.14086822222010553, 0.1612160685014734, -0.18280435432396444, 0.08343025610629015, 0.0640334782607752, 0.07415423943217579, -0.08295954371421065, -0.07742337885964642, -0.02292048643260018, -0.03003015419210857, 0.007400616884911511], [0.0, 0.0, 0.0, 0.5960805898339152, 0.8828652529574451, -0.07053868078362067, -0.0353426816237076, 0.0946652966686655, -0.5886072913329173, 0.13117349577679238, 0.1349178709952073, -0.04132121153194514, -0.08813560319544973, -0.13140470629079276, -0.1606952928020179, -0.18602379824595836, 0.008825231456441849], [0.0, 0.0, 0.0, 0.0, 0.6222101968877777, -0.07782733550406129, -0.0728198418711775, 0.02334874808387871, -0.4027445408715066, 0.05300645437631226, 0.061588621336249774, -0.09878319758779724, -0.04491396447895362, -0.07214148838972995, -0.1109931577769699, -0.15149513280567695, -0.01651932122553308], [0.0, 0.0, 0.0, 0.0, 0.0, -0.12057457593686746, -0.09723632235121875, 0.03701763358225301, -0.5847020123643477, 0.09126944851014936, 0.10470629274591992, -0.13347877330431604, -0.06588782452096424, -0.11103598712040974, -0.17317177964851715, -0.21814332763844557, -0.020403097138447266], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7389038526687302, 0.3029610591486723, 0.040999093062291096, 0.11022764448490562, -0.06456875432403025, 0.7091234084271889, 0.15059945767295227, 0.2667852254508393, 0.42573787240788796, 0.0552571878987345, -0.012375669527371354], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.33758457292969346, -0.25298222748088, 0.2080965300320602, 0.01741792123602956, 0.7725253030915625, 0.13305239701129024, 0.21878328190202853, 0.34918226590519535, 0.16426890133685013, 0.08462757704359743], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.25603352086757586, 0.5324475696377969, 0.573169727057861, 0.4319934016918736, -0.58557909026585, -0.6049808012315643, -0.2902801902456115, 0.3230616649396378, 0.35845314517542576], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.24836678437428797, -0.25676220583509257, -0.24979722469976873, 0.12520888537096275, 0.19071848073711964, 0.11374917438499219, -0.06327363178512428, -0.14415551968348903], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.6763803371855069, 0.2588962827909601, -0.36372555377681476, -0.4290842938757065, -0.32330973010071273, 0.3943629942693203, 0.3835579865501589], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.09426505527839732, -0.5335649363606867, -0.6275946043762992, -0.5346292985387815, 0.43573602200899975, 0.4837899533757049], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.030458100504734537, 0.10151221246880085, 0.39661005200476473, 0.16271949476451364, 0.0962131604460685], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.9228549043141981, 0.7031292764562275, -0.2744093519211432, -0.37311167206662427], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8167429535972578, -0.337723268537943, -0.4296875955861106], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.29751518293664203, -0.41526437133709004], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.46592336412190316], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]}, "850": {"vbl_lst": ["era5625/lon2d", "era5625/lat2d", "era5625/lsm", "era5625/orography", "era5625/slt", "era5625/z_850hPa", "era5625/t_850hPa", "era5625/q_850hPa", "era5625/sp", "era5625/clwc_850hPa", "era5625/ciwc_850hPa", "era5625/t2m", "simsat5625/clbt:0", "simsat5625/clbt:1", "simsat5625/clbt:2", "era5625/tp", "imerg5625/precipitationcal"], "5625": [[0.0, 0.0, -0.3177168700168456, -0.09549379829619742, -0.2553785868697466, 0.13673565999279566, -0.11354189049566119, -0.0830713108568745, 0.26045042498838405, 0.08406407874550167, -0.013547487933883794, -0.1050581860011756, 0.08557432278786276, 0.14595313156696713, 0.10649916565759408, 0.008616773509850075, -0.0594204300858448], [0.0, 0.0, 0.19648095479842195, 0.16594718360057314, 0.20859422570752606, 0.020309037494548158, 0.21426105568084475, 0.10278705265181116, -0.18280435432396444, -0.14993650243285478, -0.04299503392766058, 0.07415423943217579, -0.08295954371421065, -0.07742337885964642, -0.02292048643260018, -0.03003015419210857, 0.007400616884911511], [0.0, 0.0, 0.0, 0.5960805898339152, 0.8828652529574451, -0.07392622056763892, 0.14900044637439597, -0.009355606612847452, -0.5886072913329173, -0.25986102534891825, -0.01979780736656416, -0.04132121153194514, -0.08813560319544973, -0.13140470629079276, -0.1606952928020179, -0.18602379824595836, 0.008825231456441849], [0.0, 0.0, 0.0, 0.0, 0.6222101968877777, -0.02820542710667479, 0.0578947732508045, -0.06223938384586625, -0.4027445408715066, -0.18336782530457968, -0.00874963621248447, -0.09878319758779724, -0.04491396447895362, -0.07214148838972995, -0.1109931577769699, -0.15149513280567695, -0.01651932122553308], [0.0, 0.0, 0.0, 0.0, 0.0, -0.0666547446381641, 0.08676858436921331, -0.07953531333677089, -0.5847020123643477, -0.2611174624626138, -0.008413986938549294, -0.13347877330431604, -0.06588782452096424, -0.11103598712040974, -0.17317177964851715, -0.21814332763844557, -0.020403097138447266], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.04876465231406704, -0.11216393724093153, 0.5924683963446897, -0.06654244379845295, -0.11049577189139291, -0.005204861453155439, 0.16079626395998406, 0.2450619306266719, 0.20723626096400036, -0.20048439192230355, -0.17781427775569789], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5996293243895688, -0.40464331116955, -0.2207390818312782, -0.208760630702854, 0.8160776386669425, -0.005403439088521789, 0.06425036750511218, 0.29448679683825923, 0.05295582721334499, 0.07542305814954041], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.29449651628271595, 0.2825151657351957, -0.15665414096263358, 0.7165337182949488, -0.1703944514860449, -0.16688462995254363, 0.022057433430265002, 0.5146004222176873, 0.34589495178162705], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1511589886138849, 0.01603127506563371, -0.24979722469976873, 0.12520888537096275, 0.19071848073711964, 0.11374917438499219, -0.06327363178512428, -0.14415551968348903], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.02791321168121268, 0.04078532940188567, -0.005846554190264432, -0.032370415846414764, -0.03643399791087186, 0.4761089617000506, 0.15927126742474546], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.19513110740495518, -0.032408296637920185, -0.08372859202754664, -0.0938051783051143, -0.012270082330155929, 0.0005665961536950896], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.030458100504734537, 0.10151221246880085, 0.39661005200476473, 0.16271949476451364, 0.0962131604460685], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.9228549043141981, 0.7031292764562275, -0.2744093519211432, -0.37311167206662427], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8167429535972578, -0.337723268537943, -0.4296875955861106], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.29751518293664203, -0.41526437133709004], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.46592336412190316], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]}} -------------------------------------------------------------------------------- /analysis/variable_correlations/plot.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pandas as pd 3 | import numpy as np 4 | import json 5 | 6 | from matplotlib import pyplot as plt 7 | 8 | # open results files 9 | with open("./out.json") as f: 10 | res = json.load(f) 11 | 12 | d500 = np.array(res["300"]["5625"]) 13 | d500h = np.array(res["850"]["5625"]) 14 | 15 | 16 | t_tot = d500 + d500h.transpose() + np.eye(d500.shape[0]) 17 | 18 | t_tot[t_tot!=t_tot] = 0.0 19 | 20 | import seaborn as sns 21 | corr = t_tot 22 | tick_labels = ["longitude (lon)", 23 | "latitude (lat)", 24 | "land-sea mask (lsm)", 25 | "orography (oro)", 26 | "soil type (slt)", 27 | "geopotential height (z)", 28 | "temperature (t)", 29 | "specific humidity (q)", 30 | "surface pressure (sp)", 31 | "cloud liquid water content (clwc)", 32 | "cloud ice water content (ciwc)", 33 | "temperature at 2m (t2m)", 34 | "SimSat channel 0 (clbt:0)", 35 | "SimSat channel 1 (clbt:1)", 36 | "SimSat channel 2 (clbt:2)", 37 | "ERA5 total precipitation (tp)", 38 | "IMERG precipitation"] 39 | tick_labels_short = ["lon", "lat", "lsm", "oro", "slt", 40 | "z", "t", "q", "sp", "clwc", "ciwc", 41 | "t2m", "clbt:0", "clbt:1", "clbt:2", 42 | "tp", "imerg"] 43 | sns.heatmap(t_tot, 44 | xticklabels=tick_labels_short, 45 | yticklabels=tick_labels_short, 46 | cmap= 'coolwarm', vmin=-1, vmax=1, 47 | annot = False, 48 | center=0) 49 | 50 | plt.savefig("corr_matrix.pdf", bbox_inches='tight') 51 | 52 | 53 | -------------------------------------------------------------------------------- /config.yml: -------------------------------------------------------------------------------- 1 | data_paths: ["/mnt/disks/train-data-era5625-aaai/era5625_aaai.dill", "/mnt/disks/train-data-imerg5625/imerg_5625/imerg_5625.dill", "/mnt/disks/train-data5/mmap/simsat5625/simsat5625.dill"] 2 | norm_path: "/home/egctong/precip-forecasting-e2e-baselines/src/test/normalisation/5625__16-04-01_12:00to17-12-31_11:00.json" 3 | log_path: "/data/forecast_benchmark/" -------------------------------------------------------------------------------- /src/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FrontierDevelopmentLab/PyRain/a52e4fd7984dcabb6d908a565a4e7c6bc820d62f/src/__init__.py -------------------------------------------------------------------------------- /src/benchmark/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FrontierDevelopmentLab/PyRain/a52e4fd7984dcabb6d908a565a4e7c6bc820d62f/src/benchmark/__init__.py -------------------------------------------------------------------------------- /src/benchmark/baseline_data.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from torch.utils.data import DataLoader 4 | from src.benchmark.utils import get_vbl_name 5 | from src.benchmark.collect_data import define_data_paths, write_partition_conf, read_normalization_stats 6 | from src.dataloader.memmap_dataloader import Dataset 7 | 8 | 9 | def collate_fn_persistence(x_list, v): 10 | """ 11 | return 12 | inputs = [bsz, channels, lat, lon] 13 | output = [bsz, channels, lat, lon] 14 | """ 15 | categories={'input': [v], 'input_temporal': [v], 'input_static': [], 'output': [v]} 16 | output = [] 17 | inputs = [] 18 | lead_times = [] 19 | 20 | for sample in x_list: 21 | output.append(np.concatenate([sample[0]['target'][v] for v in categories['output']], 1)) 22 | inputs.append([sample[0]['label'][v] for v in categories['input']]) # 23 | lead_times.append(int(sample[0]['__sample_modes__'].split('_')[-1])) 24 | 25 | inputs[-1] = np.concatenate(inputs[-1], 1) 26 | 27 | inputs = torch.Tensor(np.concatenate(inputs)) 28 | output = torch.Tensor(np.concatenate(output)) 29 | lead_times = torch.Tensor(lead_times).long() 30 | return inputs, output, lead_times 31 | 32 | 33 | def write_sample_conf_persistence(v: str, 34 | lead_times: list, 35 | interporlation: str = "nearest_past", 36 | grid: float = 5.625): 37 | """ 38 | Write a sample configuration dictionary for calculating baselines. 39 | """ 40 | sample_conf = {} 41 | samples = {var: \ 42 | {"vbl": get_vbl_name(var, grid), \ 43 | "t": np.array([0]), \ 44 | "interpolate": interporlation} \ 45 | for var in [v]} 46 | 47 | for lt in lead_times: 48 | sample_conf["lead_time_{}".format(int(lt/3600))] = { 49 | "label": samples, 50 | "target": {var: {"vbl": get_vbl_name(var, grid), "t": np.array([lt]), "interpolate": interporlation} \ 51 | for var in [v]} 52 | } 53 | return sample_conf 54 | 55 | 56 | def get_persistence_data(hparams): 57 | """Main function to get data for computing climatology baseline according to hparams""" 58 | # get data 59 | target_v = 'precipitationcal' if hparams['imerg'] else 'tp' 60 | phase = hparams['phase'] 61 | datapath = hparams['data_paths'] 62 | lead_times = np.arange(hparams['forecast_freq'], hparams['forecast_time_window'] + hparams['forecast_freq'], hparams['forecast_freq']) * 3600 63 | partition_conf = write_partition_conf(hparams['sources'], hparams['imerg']) 64 | sample_conf = write_sample_conf_persistence(target_v, lead_times) 65 | loaderDict = {p: Dataset(datapath=datapath, 66 | partition_conf=partition_conf, 67 | partition_type="range", 68 | partition_selected=p, 69 | sample_conf=sample_conf) for p in [phase]} 70 | # define collate and dataloader 71 | lead_times = lead_times //3600 72 | collate = lambda x: collate_fn_persistence(x, target_v) 73 | dataloader = DataLoader(loaderDict[phase], batch_size=hparams['batch_size'], \ 74 | num_workers=hparams['num_workers'], collate_fn=collate, shuffle=False) 75 | return loaderDict, dataloader, target_v, lead_times 76 | 77 | 78 | 79 | def get_climatology_data(hparams): 80 | """Main function to get data for computing climatology baseline according to hparams""" 81 | loaderDict, trd, target_v, lead_times = get_persistence_data(hparams) 82 | # get climatology value (mean over all trainin data) 83 | normalizer = read_normalization_stats(hparams['sources'], hparams['grid'], hparams['imerg']) 84 | mean_pred_v = normalizer[target_v]['mean'] 85 | # get prediction matrix 86 | latlon = (32, 64) if hparams['grid'] == 5.625 else (128, 256) 87 | pred = torch.ones((hparams['batch_size'], 1, *latlon)) * mean_pred_v 88 | return pred, loaderDict, trd, target_v, lead_times 89 | -------------------------------------------------------------------------------- /src/benchmark/collect_data.py: -------------------------------------------------------------------------------- 1 | """ 2 | Collect data for benchmark tasks. 3 | """ 4 | import json 5 | import numpy as np 6 | from tqdm import tqdm 7 | import torch 8 | from datetime import datetime 9 | from torch.utils.data import DataLoader 10 | from pathlib import Path 11 | # from src.datasets.scaler import Scaler 12 | from src.benchmark.utils import local_time_shift, collate_fn, get_local_shift, is_vbl_const, get_var_name, get_vbl_name 13 | from src.dataloader.memmap_dataloader import Dataset 14 | 15 | 16 | def get_data(hparams: dict, tvt: str = 'train_valid'): 17 | """Main function to get data according to hparams""" 18 | datapath, partition_conf, sample_conf = write_data_config(hparams) 19 | 20 | # Collect datasets 21 | loaderDict = {p: Dataset(datapath=datapath, 22 | partition_conf=partition_conf, 23 | partition_type="range", 24 | partition_selected=p, 25 | sample_conf=sample_conf) for p in tvt.split('_')} 26 | 27 | # Define collate function 28 | normalizer = read_normalization_stats(hparams['norm_path']) 29 | if hparams['inc_time']: 30 | time_shift = get_local_shift(hparams['grid'], loaderDict['train'].dataset) 31 | collate = lambda x: collate_fn(x, hparams, normalizer, time_shift) 32 | 33 | return hparams, loaderDict, normalizer, collate 34 | 35 | 36 | def get_checkpoint_path(model_dir): 37 | """Return path of latest checkpoint found in the model directory.""" 38 | chkpt = str(list(Path(model_dir).glob('checkpoints/*'))[-1]) 39 | return chkpt 40 | 41 | 42 | def read_normalization_stats(path): 43 | """Read json file storing normalization statistics.""" 44 | with open(path) as f: 45 | tmp = json.load(f) 46 | n_dict = {} 47 | for vbl in tmp: 48 | n_dict[get_var_name(vbl)] = tmp[vbl] 49 | return n_dict 50 | 51 | 52 | def write_partition_conf(sources: str, imerg: bool): 53 | """ 54 | Write a time partition configuration dictionary. 55 | """ 56 | if sources in ['simsat', 'simsat_era', 'era16_3']: 57 | train_timerange = (datetime(2016,4,1,0).timestamp(), datetime(2017, 12, 31,23).timestamp()) 58 | sample_stride = 3 59 | 60 | elif sources == 'era': 61 | if imerg: 62 | train_timerange = (datetime(2000,6,1,0).timestamp(), datetime(2017, 12,31,23).timestamp()) 63 | else: 64 | train_timerange = (datetime(1979,1,1,7).timestamp(), datetime(2017, 12,31,23).timestamp()) 65 | sample_stride = 1 66 | 67 | val_timerange = (datetime(2018,1,6,0).timestamp(), datetime(2018, 12,31,23).timestamp()) 68 | test_timerange = (datetime(2019,1,6,0).timestamp(), datetime(2019, 12,31,23).timestamp()) 69 | 70 | increments = int(sample_stride * 60 * 60) 71 | 72 | partition_conf = { 73 | "train": 74 | {"timerange": train_timerange, 75 | "increment_s": increments}, 76 | "valid": 77 | {"timerange": val_timerange, 78 | "increment_s": increments}, 79 | "test": 80 | {"timerange": test_timerange, 81 | "increment_s": increments} 82 | } 83 | return partition_conf 84 | 85 | 86 | def write_sample_conf( 87 | categories: dict, 88 | history: list, 89 | lead_times: list, 90 | interporlation: str = "nearest_past", 91 | grid: float = 5.625): 92 | """ 93 | Write a sample configuration dictionary. 94 | """ 95 | sample_conf = {} 96 | 97 | if 'clbt-0' in categories['input']: 98 | samples = {} 99 | for var in categories['input']: 100 | if is_vbl_const(var): 101 | samples[var] = {"vbl": get_vbl_name(var, grid)} 102 | elif var not in ['hour', 'day', 'month', 'clbt-1', 'clbt-2', 'clbt-0']: 103 | samples[var] = {"vbl": get_vbl_name(var, grid), "t": history, "interpolate": interporlation} 104 | elif var == 'clbt-0': 105 | samples['clbt'] = {"vbl": get_vbl_name('clbt', grid), "t": history, "interpolate": interporlation} 106 | else: 107 | samples = {var: {"vbl": get_vbl_name(var, grid)} if is_vbl_const(var) else \ 108 | {"vbl": get_vbl_name(var, grid), "t": history, "interpolate": interporlation} \ 109 | for var in categories['input'] if var not in ['hour', 'day', 'month']} 110 | 111 | for lt in lead_times: 112 | sample_conf["lead_time_{}".format(int(lt/3600))] = { 113 | "label": samples, 114 | "target": {var: {"vbl": get_vbl_name(var, grid), "t": np.array([lt]), "interpolate": interporlation} \ 115 | for var in categories['output']} 116 | } 117 | 118 | return sample_conf 119 | 120 | 121 | def define_categories(sources: str, inc_time: bool, imerg: bool): 122 | """ 123 | Write a dictionary which holds lists specifying the model input / output variables. 124 | """ 125 | simsat_vars_list = ['clbt-0', 'clbt-1', 'clbt-2'] if 'simsat' in sources else [] 126 | era_vars_list = ['sp', 't2m', 'z-300', 'z-500', 'z-850', 't-300', 't-500', 't-850', \ 127 | 'q-300', 'q-500', 'q-850', 'clwc-300', 'clwc-500', 'ciwc-500', 'clwc-850', 'ciwc-850'] if 'era' in sources else [] 128 | simsat_vars_list = ['clbt-0', 'clbt-1', 'clbt-2'] if 'simsat' in sources else [] 129 | simsat_vars_list_clbt = ['clbt'] if 'simsat' in sources else [] 130 | input_temporal = simsat_vars_list + era_vars_list 131 | input_temporal_clbt = simsat_vars_list_clbt + era_vars_list 132 | 133 | constants = ['lsm','orography', 'lat2d', 'lon2d', 'slt'] 134 | inputs = input_temporal + (['hour', 'day', 'month'] if inc_time else []) + constants 135 | output = ['precipitationcal'] if imerg else ['tp'] 136 | 137 | categories = { 138 | 'input': inputs, 139 | 'input_temporal': input_temporal, 140 | 'input_temporal_clbt': input_temporal_clbt, 141 | 'input_static': constants, 142 | 'output': output} 143 | 144 | return categories 145 | 146 | 147 | def write_data_config(hparams: dict): 148 | """ 149 | Define configurations for collecting data. 150 | """ 151 | hparams['latlon'] = (32, 64) if hparams['grid'] == 5.625 else (128, 256) 152 | 153 | # define paths 154 | datapath = hparams['data_paths'] 155 | 156 | # define data configurations 157 | categories = define_categories(hparams['sources'], inc_time=hparams['inc_time'], imerg=hparams['imerg']) 158 | history = np.flip(np.arange(0, hparams['sample_time_window'] + hparams['sample_freq'], hparams['sample_freq']) * -1 * 3600) 159 | lead_times = np.arange(hparams['forecast_freq'], hparams['forecast_time_window'] + hparams['forecast_freq'], hparams['forecast_freq']) * 3600 160 | partition_conf = write_partition_conf(hparams['sources'], hparams['imerg']) 161 | sample_conf = write_sample_conf(categories, history, lead_times, grid=hparams['grid']) 162 | 163 | # define new parameters in hparams 164 | hparams['categories'] = categories 165 | hparams['seq_len'] = len(history) 166 | hparams['forecast_n_steps'] = len(lead_times) 167 | hparams['out_channels'] = len(categories['output']) 168 | hparams['num_channels'] = len(categories['input']) + hparams['forecast_n_steps'] 169 | hparams['lead_times'] = lead_times // 3600 170 | return datapath, partition_conf, sample_conf 171 | -------------------------------------------------------------------------------- /src/benchmark/graphics.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torchvision.utils import make_grid 3 | 4 | def plot_random_outputs_multi_ts(sample_X, sample_y, pred_y, 5 | idx_dictionary, normalizer, order): 6 | """ 7 | X of shape [N, seq_len, channels, lat, lon] 8 | y of shape [N, channels, lat, lon] 9 | """ 10 | num_lead_times = len(sample_X) 11 | sample_images = [] 12 | for v in order: 13 | _, cat_ind_y = idx_dictionary[v] 14 | truth_v = sample_y[:, cat_ind_y] 15 | pred_v = pred_y[:, cat_ind_y] 16 | diff_v = (truth_v - pred_v).abs() 17 | 18 | # scale for the image 19 | vmin = min([pred_v.min(), truth_v.min()]) 20 | vmax = max([pred_v.max(), truth_v.max()]) 21 | scale = lambda x: (x - vmin) / (vmax - vmin) 22 | 23 | # truth 24 | # sample_images += [scale(sample_X_v[:, ts]) for ts in range(seq_len)] 25 | sample_images += [scale(torch.unsqueeze(truth_v[i], 0)) for i in range(num_lead_times)] 26 | sample_images += [scale(torch.unsqueeze(pred_v[i], 0)) for i in range(num_lead_times)] 27 | sample_images += [scale(torch.unsqueeze(diff_v[i], 0)) for i in range(num_lead_times)] 28 | 29 | nrow = num_lead_times 30 | grid = make_grid(sample_images, nrow=nrow) 31 | return grid 32 | 33 | -------------------------------------------------------------------------------- /src/benchmark/metrics.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | 4 | 5 | def collect_outputs(outputs, multi_gpu): 6 | log_dict = {} 7 | for loss_type in outputs[0]: 8 | if multi_gpu: 9 | collect = [] 10 | for output in outputs: 11 | for v in output[loss_type]: 12 | if v == v: 13 | collect.append(v) 14 | else: 15 | collect = [v[loss_type] for v in outputs if v[loss_type] == v[loss_type]] 16 | if collect: 17 | log_dict[loss_type] = torch.stack(collect).mean() 18 | else: 19 | log_dict[loss_type] = float('nan') 20 | return log_dict 21 | 22 | 23 | def define_loss_fn(lat2d): 24 | weights_lat = compute_latitude_weighting(lat2d) 25 | loss = lambda x, y: compute_weighted_mse(x, y, weights_lat) 26 | return weights_lat, loss 27 | 28 | 29 | def compute_latitude_weighting(lat): 30 | weights_lat = np.cos(np.deg2rad(lat)) 31 | weights_lat /= weights_lat.mean() 32 | return weights_lat 33 | 34 | 35 | def compute_weighted_mse(pred, truth, weights_lat, flat_weights=False): 36 | """ 37 | Compute the MSE with latitude weighting. 38 | Args: 39 | pred : Forecast. Torch tensor. 40 | truth: Truth. Torch tensor. 41 | weights_lat: Latitude weighting, 2d Torch tensor. 42 | Returns: 43 | rmse: Latitude weighted mean squared error 44 | """ 45 | if not flat_weights: 46 | weights_lat = truth.new(weights_lat).expand_as(truth) 47 | error = (pred - truth)**2 48 | out = error * weights_lat 49 | return out.mean() 50 | 51 | 52 | def eval_loss(pred, output, lts, loss_function, possible_lead_times, phase='val', target_v=None, normalizer=None): 53 | results = {} 54 | # Unpick which of the batch samples contain which lead_time 55 | lead_time_dist = {t: lts == t for t in possible_lead_times} 56 | results[f'{phase}_loss'] = loss_function(pred, output) 57 | # Caclulate loss per lead_time 58 | for t, cond in lead_time_dist.items(): 59 | if any(cond): 60 | results[f'{phase}_loss_{t}hrs'] = loss_function(pred[cond], output[cond]) 61 | else: 62 | results[f'{phase}_loss_{t}hrs'] = pred.new([float('nan')])[0] 63 | 64 | # Undo normalization 65 | if normalizer: 66 | scaled_pred_v = (torch.exp(pred[:, 0, :, :]) - 1 ) * normalizer[target_v]['std'] 67 | scaled_output_v = (torch.exp(output[:, 0, :, :]) - 1) * normalizer[target_v]['std'] 68 | results[f'{phase}_loss_' + target_v] = loss_function(scaled_pred_v, scaled_output_v) 69 | # Caclulate loss per lead_time 70 | for t, cond in lead_time_dist.items(): 71 | if any(cond): 72 | results[f'{phase}_loss_{target_v}_{t}hrs'] = loss_function(scaled_pred_v[cond], scaled_output_v[cond]) 73 | else: 74 | results[f'{phase}_loss_{target_v}_{t}hrs'] = scaled_pred_v.new([float('nan')])[0] 75 | return results 76 | 77 | 78 | def convert_precip_to_mm(output, target_v, normalizer): 79 | converted = (np.exp(output) - 1) * normalizer[target_v]['std'] 80 | if target_v == 'tp': 81 | converted *= 1e3 82 | return converted 83 | -------------------------------------------------------------------------------- /src/benchmark/models.py: -------------------------------------------------------------------------------- 1 | """ 2 | Define ConvLSTM model as forecasting baseline. 3 | """ 4 | import torch 5 | import torch.nn as nn 6 | from torch.nn import functional as F 7 | from operator import itemgetter 8 | 9 | 10 | class ConvLSTMForecaster(nn.Module): 11 | def __init__(self, 12 | in_channels: int, 13 | output_shape: tuple, 14 | channels: tuple, 15 | last_ts: bool = True, 16 | kernel_size: int = 3, 17 | last_relu: bool = True): 18 | super().__init__() 19 | 20 | self.last_ts = last_ts 21 | self.rnn = ConvLSTM(in_channels=in_channels, num_filter=channels[0], kernel_size=kernel_size, 22 | patch_h=output_shape[1], patch_w=output_shape[2]) 23 | self.out_layer1 = nn.Conv2d(channels[0], channels[1], kernel_size=1) 24 | self.out_layer2 = nn.Conv2d(channels[1], output_shape[0], 1) 25 | self.latlon = output_shape[1:] 26 | self.last_relu = last_relu 27 | self.relu = torch.nn.ReLU() 28 | 29 | def forward(self, inputs): 30 | inputs = inputs.permute(1,0,2,3,4) # seq_first 31 | out, _ = self.rnn(inputs) 32 | 33 | if self.last_ts: 34 | out = out[-1] 35 | else: 36 | out = out.permute(1,0,2,3,4) # bsz_first 37 | bsz = len(out) 38 | out = out.contiguous().view(bsz, -1, *self.latlon) # use all time steps 39 | 40 | out = self.out_layer1(out) 41 | out = self.out_layer2(out) 42 | if self.last_relu: 43 | out = self.relu(out) 44 | return out 45 | 46 | 47 | class ConvLSTM(nn.Module): 48 | """ 49 | ConvLSTM based on https://github.com/Hzzone/Precipitation-Nowcasting/blob/master/nowcasting/models/convLSTM.py""" 50 | 51 | def __init__(self, in_channels: int, num_filter: int, kernel_size: int, patch_h: int, patch_w: int): 52 | super().__init__() 53 | self._state_height, self._state_width = patch_h, patch_w # patch dimensions after SpatialDownsampler 54 | self._conv = nn.Conv2d(in_channels=in_channels + num_filter, 55 | out_channels=num_filter*4, 56 | kernel_size=kernel_size, 57 | stride=1, 58 | padding=1) 59 | 60 | # if using requires_grad flag, torch.save will not save parameters in deed although it may be updated every epoch. 61 | # Howerver, if you use declare an optimizer like Adam(model.parameters()), 62 | # parameters will not be updated forever. 63 | self.Wci = nn.Parameter(torch.zeros(1, num_filter, self._state_height, self._state_width)) 64 | self.Wcf = nn.Parameter(torch.zeros(1, num_filter, self._state_height, self._state_width)) 65 | self.Wco = nn.Parameter(torch.zeros(1, num_filter, self._state_height, self._state_width)) 66 | 67 | self._input_channel = in_channels 68 | self._num_filter = num_filter 69 | 70 | def init_hidden(self, inputs): 71 | c = inputs.new(size=(inputs.size(1), self._num_filter, self._state_height, self._state_width)) 72 | h = inputs.new(size=(inputs.size(1), self._num_filter, self._state_height, self._state_width)) 73 | return h, c 74 | 75 | # inputs and states should not be all none 76 | # inputs: S*B*C*H*W 77 | def forward(self, inputs): 78 | """ 79 | Expected input shape [seq_len, bsz, channels, height, width] 80 | input shape (seq_len, bsz, 256, 64, 64) 81 | output[0] shape (seq_len, bsz, 384, 64, 64) 82 | """ 83 | 84 | seq_len = len(inputs) 85 | self.hidden = self.init_hidden(inputs) 86 | h, c = self.hidden 87 | 88 | outputs = [] 89 | for index in range(seq_len): 90 | # initial inputs 91 | if inputs is None: 92 | x = torch.zeros((h.size(0), self.in_channels, self._state_height, self._state_width), dtype=torch.float) 93 | else: 94 | x = inputs[index, ...] 95 | cat_x = torch.cat([x, h], dim=1) 96 | conv_x = self._conv(cat_x) 97 | i, f, tmp_c, o = torch.chunk(conv_x, 4, dim=1) 98 | 99 | # lstm equations 100 | i = torch.sigmoid(i+self.Wci*c) 101 | f = torch.sigmoid(f+self.Wcf*c) 102 | c = f*c + i*torch.tanh(tmp_c) 103 | o = torch.sigmoid(o+self.Wco*c) 104 | h = o*torch.tanh(c) 105 | 106 | outputs.append(h) 107 | outputs = torch.stack(outputs) 108 | 109 | return outputs, (h, c) -------------------------------------------------------------------------------- /src/benchmark/normalisations/normalisations_sample_datasets.dill: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FrontierDevelopmentLab/PyRain/a52e4fd7984dcabb6d908a565a4e7c6bc820d62f/src/benchmark/normalisations/normalisations_sample_datasets.dill -------------------------------------------------------------------------------- /src/benchmark/normalise.py: -------------------------------------------------------------------------------- 1 | # Use this script in order to generate normalisation .json files to use with the dataloader 2 | 3 | # First, set up the dataloader as you would for your application. 4 | 5 | ######################################################################################################################## 6 | # User-modification START 7 | ######################################################################################################################## 8 | 9 | import numpy as np 10 | import dill 11 | import datetime 12 | import multiprocessing 13 | import os 14 | 15 | from src.dataloader import Dataset 16 | 17 | n_procs = 4 # Set to number of available CPUs 18 | expname = "sample_datasets" 19 | 20 | datapath = ["PATH TO ERA5625 SAMPLES DILL FILE", 21 | "PATH TO IMERG5625 SAMPLES DILL FILE", 22 | "PATH TO SIMSAT5625 SAMPLES DILL FILE"] 23 | 24 | # partition_conf = {"train": 25 | # {"timerange": (datetime.datetime(2010, 1, 1, 0).timestamp(), 26 | # datetime.datetime(2010, 12, 31, 0).timestamp()), 27 | # "increment_s": 60 * 60}, 28 | # "test": 29 | # {"timerange": (datetime.datetime(2017, 1, 15, 0).timestamp(), 30 | # datetime.datetime(2018, 12, 31, 0).timestamp()), 31 | # "increment_s": 60 * 60}} 32 | #partition_type = "range" 33 | 34 | partition_conf = {"timerange": (datetime.datetime(2018, 1, 1, 0).timestamp(), 35 | datetime.datetime(2019, 12, 31, 23).timestamp()), 36 | # Define partition elements 37 | "partitions": [{"name": "train", "len_s": 12 * 24 * 60 * 60, "increment_s": 60 * 60}, 38 | {"name": "val", "len_s": 2 * 24 * 60 * 60, "increment_s": 60 * 60}, 39 | {"name": "test", "len_s": 2 * 24 * 60 * 60, "increment_s": 60 * 60}]} 40 | 41 | 42 | partition_type = "repeat" 43 | 44 | sample_conf = {"lead_time_{}".format(int(lt / 3600)): # sample modes 45 | { 46 | "sample": # sample sections 47 | { 48 | "lat2d": {"vbl": "era5625/lat2d"}, 49 | "lon2d": {"vbl": "era5625/lon2d"}, 50 | "orography": {"vbl": "era5625/orography"}, 51 | "slt": {"vbl": "era5625/slt"}, 52 | "lsm": {"vbl": "era5625/lsm"}, # sample variables 53 | # "lat": {"vbl": "era5625/lat2d"}, 54 | "tp": {"vbl": "era5625/tp", 55 | "t": np.array([lt]), 56 | "interpolate": ["nan", "nearest_past", "nearest_future"][1], 57 | "normalise": ["log"]}, 58 | "imerg": {"vbl": "imerg5625/precipitationcal", 59 | "t": np.array([lt]), 60 | "interpolate": ["nan", "nearest_past", "nearest_future"][1], 61 | "normalise": ["log"]}, 62 | "clbt0": {"vbl": "simsat5625/clbt:0", 63 | "t": np.array([lt]), 64 | "interpolate": ["nan", "nearest_past", "nearest_future"][1], 65 | "normalise": ["log"]}, 66 | "clbt1": {"vbl": "simsat5625/clbt:1", 67 | "t": np.array([lt]), 68 | "interpolate": ["nan", "nearest_past", "nearest_future"][1], 69 | "normalise": ["log"]}, 70 | "clbt2": {"vbl": "simsat5625/clbt:2", 71 | "t": np.array([lt]), 72 | "interpolate": ["nan", "nearest_past", "nearest_future"][1], 73 | "normalise": ["log"]}, 74 | } 75 | } 76 | for lt in np.array([3, 7]) * 3600} # np.array([1, 3, 6, 9]) * 3600} 77 | 78 | # choose a default normalisation method 79 | default_normalisation = "stdmean_global" 80 | 81 | ######################################################################################################################## 82 | # User-modification STOP 83 | ######################################################################################################################## 84 | 85 | if partition_type == "repeat": 86 | partition_labels = [v["name"] for v in partition_conf["partitions"]] 87 | else: 88 | partition_labels = list(partition_conf.keys()) 89 | 90 | dataset = Dataset(datapath=datapath, 91 | partition_conf=partition_conf, 92 | partition_type=partition_type, 93 | partition_selected="train", 94 | sample_conf=sample_conf, 95 | ) 96 | dataset_conf = dict(datapath=datapath, 97 | partition_conf=partition_conf, 98 | partition_type=partition_type, 99 | partition_selected="train", 100 | sample_conf=sample_conf) 101 | 102 | # Go through all partitions and select all variables in use 103 | vbls = {} 104 | for i, partition in enumerate(partition_labels): 105 | vbls[partition] = set() 106 | print("Generating normalisation data for partition: {} ({}/{})".format(partition, i, len(list(partition_conf.keys())))) 107 | dataset.select_partition(partition) 108 | for mode, mode_v in sample_conf.items(): 109 | for section, section_v in mode_v.items(): 110 | for k, v in section_v.items(): 111 | for n in v.get("normalise", [default_normalisation]): 112 | vbls[partition].add((v["vbl"], n, "t" in v)) 113 | 114 | # Retrieve the dataset idx for all all partitions 115 | timesegments = {} 116 | for i, partition in enumerate(partition_labels): 117 | timesegments[partition] = dataset.get_partition_ts_segments(partition) 118 | 119 | # TODO: const normalisation! 120 | 121 | # create a list of jobs to be done 122 | joblist = [] 123 | for partition in partition_labels: 124 | for vbl in list(vbls[partition]): 125 | joblist.append({"timesegments": timesegments[partition], 126 | "vbl_name": vbl[0], 127 | "normalise": vbl[1], 128 | "has_t": vbl[2], 129 | "dataset_conf": dataset_conf, 130 | "partition": partition}) 131 | 132 | 133 | def worker(args): 134 | # creating our own dataset per thread, alleviates any issues with memmaps and multiprocessing! 135 | dataset = Dataset(**args["dataset_conf"]) 136 | dataset.select_partition(args["partition"]) 137 | 138 | fi = None 139 | if args["has_t"]: 140 | # expand timesegments 141 | for ts in args["timesegments"]: 142 | ret = dataset.get_file_indices_from_ts_range(ts, args["vbl_name"]) 143 | if fi is None: 144 | fi = ret 145 | else: 146 | fi = np.concatenate([fi, ret]) 147 | else: 148 | fi = None 149 | 150 | vals = None 151 | if fi is not None: 152 | vals = dataset[args["vbl_name"]][fi] 153 | else: # constant value 154 | vals = dataset[args["vbl_name"]] 155 | 156 | results = {args["vbl_name"]: {}} 157 | n = args["normalise"] 158 | if n in ["stdmean_global"]: 159 | mean = np.nanmean(vals) # will be done out-of-core automagically by numpy memmap 160 | std = np.nanstd(vals) # will be done out-of-core automagically by numpy memmap 161 | fn = lambda x: (x-mean) / std if std != 0.0 else (x-mean) 162 | results[args["vbl_name"]]["stdmean_global"] = {"mean": mean, "std": std, "fn": fn} 163 | elif n in ["log"]: 164 | std = np.nanstd(vals) # will be done out-of-core automagically by numpy memmap 165 | fn = lambda x: np.log(max(x, 0.0) / std + 1) 166 | results[args["vbl_name"]]["log"] = {"std": std, "fn": fn} 167 | else: 168 | print("Unknown normalisation: {}".format(n)) 169 | 170 | return dill.dumps({args["partition"]: results}) 171 | 172 | 173 | pool = multiprocessing.Pool(processes=n_procs) 174 | results = pool.map(worker, joblist) 175 | 176 | results_dct = {} 177 | for r in results: 178 | loadr = dill.loads(r) 179 | partition = list(loadr.keys())[0] 180 | if partition not in results_dct: 181 | results_dct[partition] = loadr[partition] 182 | else: 183 | results_dct[partition].update(loadr[partition]) 184 | 185 | # save to normalisation file 186 | with open(os.path.join("normalisations", "normalisations_{}.dill".format(expname)), "wb") as f: 187 | dill.dump(results_dct, f) 188 | -------------------------------------------------------------------------------- /src/benchmark/plot_outputs.py: -------------------------------------------------------------------------------- 1 | from argparse import ArgumentParser 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | import matplotlib.colors as colors 5 | import iris 6 | import iris.plot as iplt 7 | import cartopy.crs as ccrs 8 | from datetime import datetime 9 | from run_benchmark import RegressionModel 10 | 11 | 12 | def collect_predictions(model_path, date): 13 | """ 14 | Given model path, load learnt model to predict for all lead times, using an input sequence from a specific date. 15 | Output has shapes (no. of lead times, lat, lon) 16 | """ 17 | # Collect data to plot 18 | model, hparams, loaderDict, normalizer, collate = RegressionModel.load_model(model_path) 19 | samples= [] 20 | lead_times = hparams['lead_times'] 21 | for i in lead_times: 22 | samplet = f'lead_time_{i}' 23 | sample_lt = loaderDict['test'].get_sample_at(samplet, date.timestamp()) 24 | sample_lt['__sample_modes__'] = samplet 25 | samples.append([sample_lt]) 26 | # make predictions 27 | sample_x, truth, lts = collate(samples) 28 | model.eval() 29 | out = model(sample_x).detach().numpy() 30 | truth = truth.numpy() 31 | return out[:, 0, :, :], truth[:, 0, :, :], hparams 32 | 33 | 34 | def determine_bounds(img: list): 35 | """ 36 | determine min and max values found in samples 37 | """ 38 | vmin = min([im.min() for im in img]) 39 | vmax = max([im.max() for im in img]) 40 | return vmin, vmax 41 | 42 | 43 | def make_use_of_cube_data(nc_path: str): 44 | """ 45 | load any nc file (e.g. 'total_precipitation_2019_5.625deg.nc') in order to make use of its structure for plotting later. 46 | """ 47 | cube = iris.load_cube(nc_path) 48 | cube = cube[0,:,:]# Remove time 49 | cube.coord('lat').rename('latitude') 50 | cube.coord('lon').rename('longitude') 51 | cube.coord('latitude').guess_bounds() 52 | cube.coord('longitude').guess_bounds() 53 | return cube 54 | 55 | 56 | def plot_one_img(cube, inarray, vmin, vmax, path=None): 57 | """ 58 | Given the cube structure, the data array, together with min/ max values for scaling, output a projected plot over earth. 59 | """ 60 | # replace data in cube with incoming array. 61 | cube.data = inarray[:,:] 62 | 63 | fig=plt.figure(dpi=400,figsize=(8,4)) 64 | # determine colour bounds 65 | delta = (vmax - vmin) / 100 66 | bounds = np.arange(vmin, vmax+delta,delta) 67 | norm = colors.BoundaryNorm(boundaries=bounds, ncolors=256) 68 | cmap = plt.get_cmap('viridis') 69 | # project image 70 | projection=ccrs.Mollweide 71 | ax = plt.axes(projection=projection(central_longitude=0)) 72 | iplt.pcolormesh(cube ,norm=norm,cmap=cmap) 73 | ax.coastlines() 74 | # transparent background 75 | ax.patch.set_facecolor('#ababab') 76 | ax.patch.set_alpha(0) 77 | fig.clf() 78 | if path is not None: 79 | fig.savefig(path, facecolor=fig.get_facecolor(), edgecolor='none') 80 | 81 | 82 | def main(hparams): 83 | """ 84 | Main function for plotting truth and predictions. 85 | """ 86 | date = datetime(2019, 7, 12) # random 87 | load_path = hparams.load 88 | nc_file = hparams.nc_file 89 | hparams = vars(hparams) 90 | out, truth, hparams = collect_predictions(load_path, date) 91 | cube = make_use_of_cube_data(nc_file) 92 | vmin, vmax = determine_bounds([out, truth]) 93 | for t, _ in enumerate(hparams['lead_times']): 94 | # plot predictions 95 | plot_one_img(cube, out[t], vmin, vmax) 96 | # plot ground truth 97 | plot_one_img(cube, truth[t], vmin, vmax) 98 | 99 | 100 | if __name__ == '__main__': 101 | parser = ArgumentParser() 102 | parser.add_argument("--load", required=True, type=str, help='Path of checkpoint directory to load') 103 | parser.add_argument("--nc_file", required=True, type=str, help='Path of a random .nc file') 104 | hparams = parser.parse_args() 105 | main(hparams) -------------------------------------------------------------------------------- /src/benchmark/utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Collect data for benchmark tasks. 3 | """ 4 | import torch 5 | import numpy as np 6 | from datetime import datetime, timedelta 7 | import yaml 8 | import random 9 | import os 10 | 11 | def seed_everything(seed): 12 | random.seed(seed) 13 | os.environ['PYTHONHASHSEED'] = str(seed) 14 | np.random.seed(seed) 15 | torch.manual_seed(seed) 16 | torch.cuda.manual_seed(seed) 17 | torch.cuda.manual_seed_all(seed) 18 | torch.backends.cudnn.deterministic = True 19 | torch.backends.cudnn.benchmark = False 20 | 21 | 22 | def add_yml_params(args): 23 | data = yaml.load(args.config_file, Loader=yaml.Loader) 24 | delattr(args, 'config_file') 25 | arg_dict = args.__dict__ 26 | for key, value in data.items(): 27 | arg_dict[key] = value 28 | 29 | 30 | def get_lat2d(grid, dataset=None): 31 | if grid == 5.625: 32 | lat2d = dataset['era5625/lat2d'] 33 | else: 34 | lat = np.linspace(-89.296875, 89.296875, 128) 35 | lat2d = np.expand_dims(lat, axis=1).repeat(256, 1) 36 | return lat2d 37 | 38 | 39 | def add_device_hparams(hparams): 40 | num_gpus = torch.cuda.device_count() if hparams['gpus'] == -1 else hparams['gpus'] 41 | if num_gpus > 0: 42 | hparams['batch_size'] *= num_gpus 43 | hparams['num_workers'] *= num_gpus 44 | hparams['multi_gpu'] = num_gpus > 1 45 | 46 | 47 | def get_vbl_name(var:str, grid: float): 48 | if grid == 5.625: 49 | if var == 'clbt': 50 | return 'simsat5625/clbt' 51 | if var == 'precipitationcal': 52 | return 'imerg5625/precipitationcal' 53 | if (var[:4] == 'ciwc') or (var[:4] == 'clwc'): 54 | return "era5625/" + (var.replace('-', '_') + 'hPa' if '-' in var else var) 55 | return "era5625/" + (var.replace('-', '_') + 'hPa' if '-' in var else var) 56 | else: 57 | if var == 'precipitationcal': 58 | return 'imerg140625/precipitationcal' 59 | if var == 'clbt': 60 | return 'simsat140625/clbt' 61 | if (var[:4] == 'ciwc') or (var[:4] == 'clwc'): 62 | return "era140625/" + (var.replace('-', '_') + 'hPa' if '-' in var else var) 63 | return "era140625/" + (var.replace('-', '_') + 'hPa' if '-' in var else var) 64 | 65 | 66 | def get_var_name(vbl: str): 67 | return vbl.split('/')[1].replace(':', '-').replace('_', '-').replace('hPa', '') 68 | 69 | 70 | def is_vbl_const(var: str): 71 | if var in ['lat', 'lon', 'orography', 'lsm', 'slt', 'lat2d', 'lon2d']: 72 | return True 73 | return False 74 | 75 | 76 | def local_time_shift(longitude: float): 77 | return timedelta(hours=(np.mod(longitude + 180, 360) - 180) / 180 * 12) 78 | 79 | 80 | def get_local_shift(grid, dataset): 81 | if grid == 5.625: 82 | lon2d = dataset['era5625/lon2d'] 83 | else: 84 | lon = np.linspace(0, 358.59375, 256) 85 | lon2d = np.expand_dims(lon, axis=1).repeat(128, 1).T 86 | time_shift = np.vectorize(local_time_shift)(lon2d) 87 | return time_shift 88 | 89 | 90 | def apply_normalization(inputs, output, categories, normalizer): 91 | for i, v in enumerate(categories['input']): 92 | if v not in ['hour', 'day', 'month']: 93 | inputs[:, :, i, :, :] = (inputs[:, :, i, :, :] - normalizer[v]['mean']) / normalizer[v]['std'] 94 | 95 | target_v = categories['output'][0] 96 | output[:, 0, :, :] = np.log(output[:, 0, :, :] / normalizer[target_v]['std'] + 1) 97 | return inputs, output 98 | 99 | 100 | def leadtime_into_maxtrix(lead_times: list, 101 | seq_len: int, 102 | forecast_freq: int, 103 | forecast_n_steps: int, 104 | latlon: tuple): 105 | """ 106 | return shape of [bsz, seq_len, forecast_n_steps, lat, lon] 107 | """ 108 | bsz = len(lead_times) 109 | leadtime = np.zeros((bsz, seq_len, forecast_n_steps, latlon[0], latlon[1])) 110 | for batch_i, lt in enumerate(lead_times): 111 | leadtime[batch_i, :, lt // forecast_freq-1, :, :] = 1 112 | return leadtime 113 | 114 | 115 | 116 | def collate_fn(x_list, hparams, normalizer, time_shift): 117 | """ 118 | return 119 | inputs = [bsz, seq_len, channels, lat, lon] (constants are repeated per timestep) 120 | output = [bsz, channels, lat, lon] 121 | lead_time = [bsz] 122 | """ 123 | output = [] 124 | inputs = [] 125 | lead_times = [] 126 | categories = hparams['categories'] 127 | latlon = hparams['latlon'] 128 | compute_time = [v for v in categories['input'] if v in ['hour', 'day', 'month']] 129 | tmp = 'input_temporal_clbt' if 'clbt-0' in categories['input_temporal'] else 'input_temporal' 130 | 131 | for sample in x_list: 132 | output.append(np.concatenate([sample[0]['target'][v] for v in categories['output']], 1)) 133 | lead_times.append(int(sample[0]['__sample_modes__'].split('_')[-1])) 134 | 135 | # temporal 136 | inputs.append([sample[0]['label'][v] for v in categories[tmp]]) 137 | 138 | # hour, day, month 139 | if compute_time: 140 | time_scaling = {'hour': 24, 'day': 31, 'month': 12} 141 | timestamps = [datetime.fromtimestamp(t) for t in sample[0]['label'][categories[tmp][0]+ '__ts']] 142 | timestamps = np.transpose(np.tile(timestamps, (1, *latlon, 1)), (3,0,1,2)) 143 | if time_shift is not None: 144 | timestamps -= time_shift 145 | for m in ['hour', 'day', 'month']: 146 | tfunc = np.vectorize(lambda t: getattr(t, m)) 147 | inputs[-1] += [tfunc(timestamps)/ time_scaling[m]] 148 | 149 | if categories['input_static']: 150 | inputs[-1] += [np.repeat(sample[0]['label'][v][None, :, :], hparams['seq_len'], 0) for v in categories['input_static']] 151 | inputs[-1] = np.concatenate(inputs[-1], 1) 152 | 153 | inputs = torch.Tensor(np.stack(inputs)) 154 | output = torch.Tensor(np.concatenate(output)) 155 | lead_times = torch.Tensor(lead_times).long() 156 | 157 | # apply normalization 158 | if normalizer is not None: 159 | inputs, output = apply_normalization(inputs, output, categories, normalizer) 160 | 161 | # concatenate lead times to inputs. 162 | one_hot_lt = leadtime_into_maxtrix(lead_times, hparams['seq_len'], hparams['forecast_freq'], hparams['forecast_n_steps'], latlon) 163 | one_hot_lt = torch.Tensor(one_hot_lt) 164 | inputs = torch.cat([inputs, one_hot_lt], 2) 165 | 166 | return inputs, output, lead_times -------------------------------------------------------------------------------- /src/convert/convert_era5625_aaai.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | from netCDF4 import Dataset as netcdf_Dataset 3 | import os 4 | import numpy as np 5 | 6 | if __name__ == "__main__": 7 | 8 | pressure_to_idx = {50:0, 100:1, 150:2, 200:3, 250:4, 300:5, 400:6, 500:7, 600:8, 700:9, 850:10, 925:11, 1000:12} 9 | idx_to_pressure = {v:k for k,v in pressure_to_idx.items()} 10 | 11 | years=list(range(1979,2020)) 12 | dataset_name = "era5625" 13 | input_path = "EDIT INPUT PATH TO NETCDF FOLDER" 14 | output_path = os.path.join("EDIT OUTPUT PATH WHERE MEMMAPS ARE TO BE CREATED", dataset_name) 15 | if not os.path.exists(output_path): 16 | os.makedirs(output_path) 17 | 18 | variables_const = [ 19 | {"name": "lat2d", 20 | "ftemplate": os.path.join(input_path, "constants_5.625deg.nc"), 21 | "dims": (32, 64)}, 22 | {"name": "lon2d", 23 | "ftemplate": os.path.join(input_path, "constants_5.625deg.nc"), 24 | "dims": (32, 64)}, 25 | {"name": "lsm", 26 | "ftemplate": os.path.join(input_path, "constants_5.625deg.nc"), 27 | "dims": (32, 64)}, 28 | {"name": "orography", 29 | "ftemplate": os.path.join(input_path, "constants_5.625deg.nc"), 30 | "dims": (32, 64)}, 31 | {"name": "slt", 32 | "ftemplate": os.path.join(input_path, "constants_5.625deg.nc"), 33 | "dims": (32, 64)}, 34 | ] 35 | 36 | variables_era = [ 37 | {"name": "t2m", 38 | "ftemplate": os.path.join(input_path, "2m_temperature_{}_5.625deg.nc"), 39 | "dims": (32, 64), 40 | "levels": list(range(1))}, 41 | {"name": "sp", 42 | "ftemplate": os.path.join(input_path, "surface_pressure_{}_5.625deg.nc"), 43 | "dims": (32, 64), 44 | "levels": list(range(1))}, 45 | {"name": "tp", 46 | "ftemplate": os.path.join(input_path, "total_precipitation_{}_5.625deg.nc"), 47 | "dims": (32, 64), 48 | "levels": list(range(1))}, #]#, 49 | ] 50 | 51 | from copy import deepcopy 52 | variables_era_2019 = deepcopy(variables_era) 53 | 54 | era_extra_pressure_levels = [300, 500, 850] #, 850] 55 | for i, p in enumerate(era_extra_pressure_levels): 56 | variables_era.append({"name": "ciwc".format(p), 57 | "ftemplate": os.path.join(input_path, "specific_cloud_ice_water_content_{}_"+str(int(p))+"_5.625deg.nc"), 58 | "dims": (32, 64), 59 | "levels": list((pressure_to_idx[p],)), 60 | "p_level": p}) 61 | variables_era.append({"name": "clwc".format(p), 62 | "ftemplate": os.path.join(input_path, "specific_cloud_liquid_water_content_{}_"+str(int(p))+"_5.625deg.nc"), 63 | "dims": (32, 64), 64 | "levels": list((pressure_to_idx[p],)), 65 | "p_level": p}) 66 | variables_era.append({"name": "t", 67 | "ftemplate": os.path.join(input_path, "temperature_{}_5.625deg.nc"), 68 | "dims": (32, 64), 69 | "levels": list((pressure_to_idx[p],)), 70 | "p_level": p}), 71 | variables_era.append({"name": "z", 72 | "ftemplate": os.path.join(input_path, "geopotential_{}_5.625deg.nc"), 73 | "dims": (32, 64), 74 | "levels": list((pressure_to_idx[p],))}), 75 | variables_era.append({"name": "q", 76 | "ftemplate": os.path.join(input_path, "specific_humidity_{}_5.625deg.nc"), 77 | "dims": (32, 64), 78 | "levels": list((pressure_to_idx[p],))}), 79 | 80 | era_extra_pressure_levels = [300, 500, 850] #, 850] 81 | for i, p in enumerate(era_extra_pressure_levels): 82 | variables_era_2019.append({"name": "ciwc".format(p), 83 | "ftemplate": os.path.join(input_path, "specific_cloud_ice_water_content_{}_"+str(int(p))+"_5.625deg.nc"), 84 | "dims": (32, 64), 85 | "levels": list((pressure_to_idx[p],)), 86 | "p_level": p}) 87 | variables_era_2019.append({"name": "clwc".format(p), 88 | "ftemplate": os.path.join(input_path, "specific_cloud_liquid_water_content_{}_"+str(int(p))+"_5.625deg.nc"), 89 | "dims": (32, 64), 90 | "levels": list((pressure_to_idx[p],)), 91 | "p_level": p}) 92 | variables_era_2019.append({"name": "t", 93 | "ftemplate": os.path.join(input_path, "temperature_{}_"+str(int(p))+"_5.625deg.nc"), 94 | "dims": (32, 64), 95 | "levels": list((pressure_to_idx[p],)), 96 | "p_level": p}), 97 | variables_era_2019.append({"name": "z", 98 | "ftemplate": os.path.join(input_path, "geopotential_{}_"+str(int(p))+"_5.625deg.nc"), 99 | "dims": (32, 64), 100 | "levels": list((pressure_to_idx[p],))}), 101 | variables_era_2019.append({"name": "q", 102 | "ftemplate": os.path.join(input_path, "specific_humidity_{}_"+str(int(p))+"_5.625deg.nc"), 103 | "dims": (32, 64), 104 | "levels": list((pressure_to_idx[p],))}), 105 | 106 | era_const_path = os.path.join(output_path, "{}__era5625_const.mmap".format(dataset_name)) 107 | print("Writing const values...") 108 | const_dims = (sum([1 for vg in variables_const]), 32, 64) 109 | era_const_dims = const_dims 110 | 111 | if os.path.exists(era_const_path): 112 | print("Skipping ERA CONST as file exists... ") 113 | else: 114 | # write const variables 115 | mmap = np.memmap(era_const_path, dtype='float32', mode='w+', shape=const_dims) 116 | def write_const(vbls): 117 | rootgrp = netcdf_Dataset(os.path.join(input_path, vbls[0]["ftemplate"]), "r", format="NETCDF4") 118 | for i, vbl in enumerate(vbls): 119 | print("WRITING CONST VBL ", vbl["name"]) 120 | root_channel = 0 if not i else sum([1 for vg in variables_const[:i]]) 121 | print("ROOT CHANNEL: ", root_channel) 122 | mmap[root_channel] = rootgrp[vbl["name"]][:] 123 | write_const(variables_const) 124 | mmap.flush() 125 | del mmap 126 | 127 | # write temporal ERA variables 128 | n_rec_dim = (32, 64) 129 | n_recs = (datetime.datetime(min(max(years), 2019), 12, 31, 23).timestamp()-datetime.datetime(years[0], 1, 1, 0).timestamp()) // 3600 + 1 130 | n_rec_channels = sum([len(vg["levels"]) for vg in variables_era]) 131 | dims = (int(n_recs), int(n_rec_channels), *n_rec_dim) 132 | era_dims = dims 133 | if not os.path.exists(output_path): 134 | os.makedirs(output_path) 135 | 136 | era_path = os.path.join(output_path, "{}__era5625.mmap".format(dataset_name)) 137 | if os.path.exists(era_path): 138 | print("Skipping ERA as file exists... ") 139 | else: 140 | mmap = np.memmap(era_path, dtype='float32', mode='w+', shape=dims) 141 | 142 | def write_year(y, vbls): 143 | vbls, vbls_2019 = vbls 144 | 145 | if y > 2019: 146 | print("ERA: no data available for year {}".format(y)) 147 | return 148 | t_offset = int((datetime.datetime(y, 1, 1, 0).timestamp() - datetime.datetime(years[0], 1, 1, 0).timestamp()) // 3600) 149 | t_end = int((datetime.datetime(y, 12, 31, 23).timestamp() - datetime.datetime(years[0], 1, 1, 0).timestamp()) // 3600) + 1 150 | for i, vbl in enumerate(vbls): 151 | if y == 2019: 152 | vbl = vbls_2019[i] 153 | print("ERA5625 writing year {} vbl {}...".format(y, vbl["name"])) 154 | netcdf_fname = vbl["ftemplate"].format(y) 155 | root_channel = 0 if not i else sum([len(vg["levels"]) for vg in variables_era[:i]]) 156 | if vbl["name"] in ["tcwv"] and y > 2000: 157 | mmap[t_offset:t_end, root_channel] = float("nan") 158 | else: 159 | rootgrp = netcdf_Dataset(os.path.join(input_path, netcdf_fname), "r", format="NETCDF4") 160 | print(t_offset, t_end, root_channel, len(vbl["levels"])) 161 | if vbl["name"] in ["tisr", "tp"] and y == 1979: 162 | mmap[t_offset+7:t_end, root_channel] = rootgrp[vbl["name"]][:] # tisr, tp starts at 7:00 o clock 163 | mmap[t_offset:t_offset+7, root_channel] = float("nan") 164 | else: 165 | 166 | if len(vbl["levels"]) == 1: 167 | mmap[t_offset:t_end, root_channel] = rootgrp[vbl["name"]][:] #[:, vbl["levels"]] 168 | else: 169 | mmap[t_offset:t_end, root_channel:root_channel+len(vbl["levels"])] = rootgrp[vbl["name"]][:, vbl["levels"]] 170 | #mmap.flush() 171 | 172 | from multiprocessing import Pool 173 | from functools import partial 174 | with Pool(40) as p: 175 | p.map(partial(write_year,vbls=(variables_era, variables_era_2019)), years) 176 | mmap.flush() 177 | del mmap 178 | 179 | 180 | # Create Pickle file describing which variables are contained in what file at what positions and what frequency 181 | print("Done converting. Generating dataset pickle file...") 182 | import dill 183 | import json 184 | dct = {} 185 | dct["variables"] = {} 186 | for i, v in enumerate(variables_const): 187 | vbl_dict = {"name":v["name"], 188 | "mmap_name":"{}__era5625_const.mmap".format(dataset_name), 189 | "type":"const", 190 | "dims": v["dims"], 191 | "offset": 0 if not i else sum([1 for vg in variables_const[:i]]), 192 | "first_ts": None, 193 | "last_ts": None, 194 | "tfreq_s": None, 195 | "levels": None} 196 | dct["variables"]["era5625/{}".format(v["name"])] = vbl_dict 197 | 198 | for i, v in enumerate(variables_era): 199 | vbl_dict = {"name": "{}_{}hPa".format(v["name"], v["p_level"]) if v["name"] in ["ciwc","clwc"] else v["name"], 200 | "mmap_name":"{}__era5625.mmap".format(dataset_name), 201 | "type":"temp", 202 | "dims": v["dims"], 203 | "offset": 0 if not i else sum([len(vg["levels"]) for vg in variables_era[:i]]), 204 | "first_ts": datetime.datetime(1979, 1, 1, 0).timestamp() if v["name"] not in ["tisr", "tp"] else datetime.datetime(1979, 1, 1, 7).timestamp(), 205 | "last_ts": datetime.datetime(2019, 12, 31, 23).timestamp(),# if v["name"] not in ["ciwc", "clwc"] else datetime.datetime(2000,12,31,23).timestamp(), 206 | "tfreq_s": 3600, 207 | "levels": v["levels"]} 208 | 209 | if "p_level" in v: 210 | vbl_dict["index2pressure"] = {i:int(v["p_level"]) for i, vl in enumerate(v["levels"])} 211 | else: 212 | vbl_dict["index2pressure"] = {i:int(idx_to_pressure[vl]) for i, vl in enumerate(v["levels"])} 213 | dct["variables"]["era5625/{}".format(vbl_dict["name"])] = vbl_dict 214 | 215 | dct["memmap"] = {"{}__era5625_const.mmap".format(dataset_name): {"dims": era_const_dims, 216 | "dtype": "float32", 217 | "daterange": None, 218 | "tfreq_s": None}, 219 | "{}__era5625.mmap".format(dataset_name): {"dims": era_dims, 220 | "dtype": "float32", 221 | "daterange": (datetime.datetime(1979, 1, 1, 0).timestamp(), datetime.datetime(2018, 12, 31, 23).timestamp()), 222 | "tfreq_s": 3600}, 223 | } 224 | 225 | dill.dump(dct, open(os.path.join(output_path, dataset_name+".dill"),'wb')) 226 | 227 | with open(os.path.join(output_path, dataset_name+"_info.json"), 'w') as outfile: 228 | json.dump(dct, outfile, indent=4, sort_keys=True) 229 | -------------------------------------------------------------------------------- /src/convert/convert_era5625_aaai_sample.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | from netCDF4 import Dataset as netcdf_Dataset 3 | import os 4 | import numpy as np 5 | 6 | if __name__ == "__main__": 7 | 8 | pressure_to_idx = {50:0, 100:1, 150:2, 200:3, 250:4, 300:5, 400:6, 500:7, 600:8, 700:9, 850:10, 925:11, 1000:12} 9 | idx_to_pressure = {v:k for k,v in pressure_to_idx.items()} 10 | 11 | years=list(range(2018,2020)) 12 | dataset_name = "era5625_sample" 13 | input_path = "EDIT INPUT PATH TO NETCDF FOLDER" 14 | output_path = os.path.join("EDIT OUTPUT PATH WHERE MEMMAPS ARE TO BE CREATED", dataset_name) 15 | if not os.path.exists(output_path): 16 | os.makedirs(output_path) 17 | 18 | variables_const = [ 19 | {"name": "lat2d", 20 | "ftemplate": os.path.join(input_path, "constants_5.625deg.nc"), 21 | "dims": (32, 64)}, 22 | {"name": "lon2d", 23 | "ftemplate": os.path.join(input_path, "constants_5.625deg.nc"), 24 | "dims": (32, 64)}, 25 | {"name": "lsm", 26 | "ftemplate": os.path.join(input_path, "constants_5.625deg.nc"), 27 | "dims": (32, 64)}, 28 | {"name": "orography", 29 | "ftemplate": os.path.join(input_path, "constants_5.625deg.nc"), 30 | "dims": (32, 64)}, 31 | {"name": "slt", 32 | "ftemplate": os.path.join(input_path, "constants_5.625deg.nc"), 33 | "dims": (32, 64)}, 34 | ] 35 | 36 | variables_era = [ 37 | {"name": "t2m", 38 | "ftemplate": os.path.join(input_path, "2m_temperature_{}_5.625deg.nc"), 39 | "dims": (32, 64), 40 | "levels": list(range(1))}, 41 | {"name": "sp", 42 | "ftemplate": os.path.join(input_path, "surface_pressure_{}_5.625deg.nc"), 43 | "dims": (32, 64), 44 | "levels": list(range(1))}, 45 | {"name": "tp", 46 | "ftemplate": os.path.join(input_path, "total_precipitation_{}_5.625deg.nc"), 47 | "dims": (32, 64), 48 | "levels": list(range(1))}, #]#, 49 | ] 50 | 51 | from copy import deepcopy 52 | variables_era_2019 = deepcopy(variables_era) 53 | 54 | # era_extra_pressure_levels = [300, 500, 850] #, 850] 55 | # for i, p in enumerate(era_extra_pressure_levels): 56 | # variables_era.append({"name": "ciwc".format(p), 57 | # "ftemplate": os.path.join(input_path, "specific_cloud_ice_water_content_{}_"+str(int(p))+"_5.625deg.nc"), 58 | # "dims": (32, 64), 59 | # "levels": list((pressure_to_idx[p],)), 60 | # "p_level": p}) 61 | # variables_era.append({"name": "clwc".format(p), 62 | # "ftemplate": os.path.join(input_path, "specific_cloud_liquid_water_content_{}_"+str(int(p))+"_5.625deg.nc"), 63 | # "dims": (32, 64), 64 | # "levels": list((pressure_to_idx[p],)), 65 | # "p_level": p}) 66 | # variables_era.append({"name": "t", 67 | # "ftemplate": os.path.join(input_path, "temperature_{}_5.625deg.nc"), 68 | # "dims": (32, 64), 69 | # "levels": list((pressure_to_idx[p],)), 70 | # "p_level": p}), 71 | # variables_era.append({"name": "z", 72 | # "ftemplate": os.path.join(input_path, "geopotential_{}_5.625deg.nc"), 73 | # "dims": (32, 64), 74 | # "levels": list((pressure_to_idx[p],))}), 75 | # variables_era.append({"name": "q", 76 | # "ftemplate": os.path.join(input_path, "specific_humidity_{}_5.625deg.nc"), 77 | # "dims": (32, 64), 78 | # "levels": list((pressure_to_idx[p],))}), 79 | 80 | # era_extra_pressure_levels = [300, 500, 850] #, 850] 81 | # for i, p in enumerate(era_extra_pressure_levels): 82 | # variables_era_2019.append({"name": "ciwc".format(p), 83 | # "ftemplate": os.path.join(input_path, "specific_cloud_ice_water_content_{}_"+str(int(p))+"_5.625deg.nc"), 84 | # "dims": (32, 64), 85 | # "levels": list((pressure_to_idx[p],)), 86 | # "p_level": p}) 87 | # variables_era_2019.append({"name": "clwc".format(p), 88 | # "ftemplate": os.path.join(input_path, "specific_cloud_liquid_water_content_{}_"+str(int(p))+"_5.625deg.nc"), 89 | # "dims": (32, 64), 90 | # "levels": list((pressure_to_idx[p],)), 91 | # "p_level": p}) 92 | # variables_era_2019.append({"name": "t", 93 | # "ftemplate": os.path.join(input_path, "temperature_{}_"+str(int(p))+"_5.625deg.nc"), 94 | # "dims": (32, 64), 95 | # "levels": list((pressure_to_idx[p],)), 96 | # "p_level": p}), 97 | # variables_era_2019.append({"name": "z", 98 | # "ftemplate": os.path.join(input_path, "geopotential_{}_"+str(int(p))+"_5.625deg.nc"), 99 | # "dims": (32, 64), 100 | # "levels": list((pressure_to_idx[p],))}), 101 | # variables_era_2019.append({"name": "q", 102 | # "ftemplate": os.path.join(input_path, "specific_humidity_{}_"+str(int(p))+"_5.625deg.nc"), 103 | # "dims": (32, 64), 104 | # "levels": list((pressure_to_idx[p],))}), 105 | 106 | era_const_path = os.path.join(output_path, "{}__era5625_const.mmap".format(dataset_name)) 107 | print("Writing const values...") 108 | const_dims = (sum([1 for vg in variables_const]), 32, 64) 109 | era_const_dims = const_dims 110 | 111 | if os.path.exists(era_const_path): 112 | print("Skipping ERA CONST as file exists... ") 113 | else: 114 | # write const variables 115 | mmap = np.memmap(era_const_path, dtype='float32', mode='w+', shape=const_dims) 116 | def write_const(vbls): 117 | rootgrp = netcdf_Dataset(os.path.join(input_path, vbls[0]["ftemplate"]), "r", format="NETCDF4") 118 | for i, vbl in enumerate(vbls): 119 | print("WRITING CONST VBL ", vbl["name"]) 120 | root_channel = 0 if not i else sum([1 for vg in variables_const[:i]]) 121 | print("ROOT CHANNEL: ", root_channel) 122 | mmap[root_channel] = rootgrp[vbl["name"]][:] 123 | write_const(variables_const) 124 | mmap.flush() 125 | del mmap 126 | 127 | # write temporal ERA variables 128 | n_rec_dim = (32, 64) 129 | n_recs = (datetime.datetime(min(max(years), 2019), 12, 31, 23).timestamp()-datetime.datetime(years[0], 1, 1, 0).timestamp()) // 3600 + 1 130 | n_rec_channels = sum([len(vg["levels"]) for vg in variables_era]) 131 | dims = (int(n_recs), int(n_rec_channels), *n_rec_dim) 132 | era_dims = dims 133 | if not os.path.exists(output_path): 134 | os.makedirs(output_path) 135 | 136 | era_path = os.path.join(output_path, "{}__era5625.mmap".format(dataset_name)) 137 | if os.path.exists(era_path): 138 | print("Skipping ERA as file exists... ") 139 | else: 140 | mmap = np.memmap(era_path, dtype='float32', mode='w+', shape=dims) 141 | 142 | def write_year(y, vbls): 143 | vbls, vbls_2019 = vbls 144 | 145 | if y > 2019: 146 | print("ERA: no data available for year {}".format(y)) 147 | return 148 | t_offset = int((datetime.datetime(y, 1, 1, 0).timestamp() - datetime.datetime(years[0], 1, 1, 0).timestamp()) // 3600) 149 | t_end = int((datetime.datetime(y, 12, 31, 23).timestamp() - datetime.datetime(years[0], 1, 1, 0).timestamp()) // 3600) + 1 150 | for i, vbl in enumerate(vbls): 151 | if y == 2019: 152 | vbl = vbls_2019[i] 153 | print("ERA5625 writing year {} vbl {}...".format(y, vbl["name"])) 154 | netcdf_fname = vbl["ftemplate"].format(y) 155 | root_channel = 0 if not i else sum([len(vg["levels"]) for vg in variables_era[:i]]) 156 | if vbl["name"] in ["tcwv"] and y > 2000: 157 | mmap[t_offset:t_end, root_channel] = float("nan") 158 | else: 159 | rootgrp = netcdf_Dataset(os.path.join(input_path, netcdf_fname), "r", format="NETCDF4") 160 | print(t_offset, t_end, root_channel, len(vbl["levels"])) 161 | if vbl["name"] in ["tisr", "tp"] and y == 1979: 162 | mmap[t_offset+7:t_end, root_channel] = rootgrp[vbl["name"]][:] # tisr, tp starts at 7:00 o clock 163 | mmap[t_offset:t_offset+7, root_channel] = float("nan") 164 | else: 165 | 166 | if len(vbl["levels"]) == 1: 167 | mmap[t_offset:t_end, root_channel] = rootgrp[vbl["name"]][:] #[:, vbl["levels"]] 168 | else: 169 | mmap[t_offset:t_end, root_channel:root_channel+len(vbl["levels"])] = rootgrp[vbl["name"]][:, vbl["levels"]] 170 | #mmap.flush() 171 | 172 | from multiprocessing import Pool 173 | from functools import partial 174 | with Pool(40) as p: 175 | p.map(partial(write_year,vbls=(variables_era, variables_era_2019)), years) 176 | mmap.flush() 177 | del mmap 178 | 179 | 180 | # Create Pickle file describing which variables are contained in what file at what positions and what frequency 181 | print("Done converting. Generating dataset pickle file...") 182 | import dill 183 | import json 184 | dct = {} 185 | dct["variables"] = {} 186 | for i, v in enumerate(variables_const): 187 | vbl_dict = {"name":v["name"], 188 | "mmap_name":"{}__era5625_const.mmap".format(dataset_name), 189 | "type":"const", 190 | "dims": v["dims"], 191 | "offset": 0 if not i else sum([1 for vg in variables_const[:i]]), 192 | "first_ts": None, 193 | "last_ts": None, 194 | "tfreq_s": None, 195 | "levels": None} 196 | dct["variables"]["era5625/{}".format(v["name"])] = vbl_dict 197 | 198 | for i, v in enumerate(variables_era): 199 | vbl_dict = {"name": "{}_{}hPa".format(v["name"], v["p_level"]) if v["name"] in ["ciwc","clwc"] else v["name"], 200 | "mmap_name":"{}__era5625.mmap".format(dataset_name), 201 | "type":"temp", 202 | "dims": v["dims"], 203 | "offset": 0 if not i else sum([len(vg["levels"]) for vg in variables_era[:i]]), 204 | "first_ts": datetime.datetime(years[0], 1, 1, 0).timestamp(), 205 | "last_ts": datetime.datetime(years[1], 12, 31, 23).timestamp(),# if v["name"] not in ["ciwc", "clwc"] else datetime.datetime(2000,12,31,23).timestamp(), 206 | "tfreq_s": 3600, 207 | "levels": v["levels"]} 208 | 209 | if "p_level" in v: 210 | vbl_dict["index2pressure"] = {i:int(v["p_level"]) for i, vl in enumerate(v["levels"])} 211 | else: 212 | vbl_dict["index2pressure"] = {i:int(idx_to_pressure[vl]) for i, vl in enumerate(v["levels"])} 213 | dct["variables"]["era5625/{}".format(vbl_dict["name"])] = vbl_dict 214 | 215 | dct["memmap"] = {"{}__era5625_const.mmap".format(dataset_name): {"dims": era_const_dims, 216 | "dtype": "float32", 217 | "daterange": None, 218 | "tfreq_s": None}, 219 | "{}__era5625.mmap".format(dataset_name): {"dims": era_dims, 220 | "dtype": "float32", 221 | "daterange": (datetime.datetime(years[0], 1, 1, 0).timestamp(), datetime.datetime(years[1], 12, 31, 23).timestamp()), 222 | "tfreq_s": 3600}, 223 | } 224 | 225 | dill.dump(dct, open(os.path.join(output_path, dataset_name+".dill"),'wb')) 226 | 227 | with open(os.path.join(output_path, dataset_name+"_info.json"), 'w') as outfile: 228 | json.dump(dct, outfile, indent=4, sort_keys=True) 229 | -------------------------------------------------------------------------------- /src/convert/convert_imerg5625.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | from netCDF4 import Dataset as netcdf_Dataset 3 | import os 4 | import numpy as np 5 | 6 | if __name__ == "__main__": 7 | 8 | pressure_to_idx = {50:0, 100:1, 150:2, 200:3, 250:4, 300:5, 400:6, 500:7, 600:8, 700:9, 850:10, 925:11, 1000:12} 9 | idx_to_pressure = {v:k for k,v in pressure_to_idx.items()} 10 | dataset_range=(datetime.datetime(2000, 6, 1, 0), datetime.datetime(2019, 12, 31, 23)) 11 | 12 | 13 | years=list(range(2000,2020)) 14 | dataset_name = "imerg5625" 15 | input_path = "EDIT INPUT PATH TO NETCDF FOLDER" 16 | output_path = os.path.join("EDIT OUTPUT PATH WHERE MEMMAPS ARE TO BE CREATED", dataset_name) 17 | 18 | if not os.path.exists(output_path): 19 | os.makedirs(output_path) 20 | 21 | variables_imerg25bi = [ 22 | {"name": "precipitationcal", 23 | "ftemplate": os.path.join(input_path, "imerg{}{:02d}{:02d}.nc"), 24 | "dims": (32, 64), 25 | "levels": list(range(1))}, 26 | ] 27 | 28 | imerg25bi_path = os.path.join(output_path, "{}__imerg5625bi.mmap".format(dataset_name)) 29 | n_rec_dim = variables_imerg25bi[0]["dims"] 30 | imerg25bi_sample_freq = 1 31 | n_recs = ((datetime.datetime(2019, 12, 31, 23).timestamp()-datetime.datetime(2000, 6, 1, 0).timestamp()) // 3600 ) // imerg25bi_sample_freq + 1 32 | n_rec_channels = sum([len(vg["levels"]) for vg in variables_imerg25bi]) 33 | dims = (int(n_recs), int(n_rec_channels), *n_rec_dim) 34 | print("dims: ", dims) 35 | print("nrecs: ", n_recs) 36 | imerg25bi_dims = dims 37 | if os.path.exists(imerg25bi_path): 38 | print("Skipping iMERG25bi as file exists... ") 39 | else: 40 | # write temporal SimSat variables 41 | if not os.path.exists(output_path): 42 | os.makedirs(output_path) 43 | 44 | mmap = np.memmap(imerg25bi_path, dtype='float32', mode='w+', shape=dims) 45 | 46 | def write_day(ymd, vbls): 47 | y, m, d = ymd 48 | if y < dataset_range[0].year: 49 | print("iMERG25bi: no data available for year {}".format(y)) 50 | return 51 | t_offset = int((datetime.datetime(y, m, d, 0).timestamp() - datetime.datetime(max(min(years), dataset_range[0].year), 6, 1, 0).timestamp()) // 3600) // imerg25bi_sample_freq 52 | t_end = int((datetime.datetime(y, m, d, 23).timestamp() - datetime.datetime(max(min(years), dataset_range[0].year), 6, 1, 0).timestamp()) // 3600) // imerg25bi_sample_freq + 1 53 | for i, vbl in enumerate(vbls): 54 | print("SimSat writing year {} month {} day {} vbl {}...".format(y, m, d, vbl["name"])) 55 | rootgrp = netcdf_Dataset(os.path.join(input_path, vbl["ftemplate"].format(y, m, d)), "r", format="NETCDF4") 56 | root_channel = 0 if not i else sum([len(vg["levels"]) for vg in variables_imerg25bi[:i]]) 57 | print(t_offset, t_end, root_channel, len(vbl["levels"])) 58 | try: 59 | mmap[t_offset:t_end, root_channel] = rootgrp[vbl["name"]][:] 60 | except Exception as e: 61 | print(y, m, d, vbl["name"], vbl["levels"], t_offset, t_end, root_channel, e, rootgrp[vbl["name"]][:].shape) 62 | raise Exception("{} {} {} {} {} {} {} {} {} {} ".format(y, m, d, vbl["name"], vbl["levels"], t_offset, t_end, root_channel, e, rootgrp[vbl["name"]][:].shape)) 63 | 64 | ymd = [] 65 | dd = dataset_range[0] 66 | while dd <= dataset_range[1]: 67 | ymd.append((dd.year, dd.month, dd.day)) 68 | dd += datetime.timedelta(days=1) 69 | 70 | from multiprocessing import Pool 71 | from functools import partial 72 | with Pool(40) as p: 73 | p.map(partial(write_day,vbls=variables_imerg25bi), ymd) 74 | mmap.flush() 75 | del mmap 76 | 77 | 78 | # Create Pickle file describing which variables are contained in what file at what positions and what frequency 79 | print("Done converting. Generating dataset pickle file...") 80 | import dill 81 | import json 82 | dct = {} 83 | dct["variables"] = {} 84 | for i, v in enumerate(variables_imerg25bi): 85 | vbl_dict = {"name":v["name"], 86 | "mmap_name":"{}__imerg5625.mmap".format(dataset_name), 87 | "type":"temp", 88 | "dims": v["dims"], 89 | "offset": 0 if not i else sum([len(vg["levels"]) for vg in variables_imerg25bi[:i]]), 90 | "first_ts": dataset_range[0].timestamp(), 91 | "last_ts": dataset_range[1].timestamp(), 92 | "tfreq_s": 3600, 93 | "levels": v["levels"]} 94 | dct["variables"]["imerg5625/{}".format(v["name"])] = vbl_dict 95 | 96 | dct["memmap"] = {"{}__imerg5625.mmap".format(dataset_name): {"dims": imerg25bi_dims, 97 | "dtype": "float32", 98 | "daterange": (dataset_range[0].timestamp(), dataset_range[1].timestamp()), 99 | "tfreq_s": 3600} 100 | } 101 | 102 | dill.dump(dct, open(os.path.join(output_path, dataset_name+".dill"),'wb')) 103 | 104 | with open(os.path.join(output_path, dataset_name+"_info.json"), 'w') as outfile: 105 | json.dump(dct, outfile, indent=4, sort_keys=True) 106 | -------------------------------------------------------------------------------- /src/convert/convert_imerg5625_sample.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | from netCDF4 import Dataset as netcdf_Dataset 3 | import os 4 | import numpy as np 5 | 6 | if __name__ == "__main__": 7 | 8 | pressure_to_idx = {50:0, 100:1, 150:2, 200:3, 250:4, 300:5, 400:6, 500:7, 600:8, 700:9, 850:10, 925:11, 1000:12} 9 | idx_to_pressure = {v:k for k,v in pressure_to_idx.items()} 10 | dataset_range=(datetime.datetime(2000, 6, 1, 0), datetime.datetime(2019, 12, 31, 23)) 11 | 12 | 13 | years=list(range(2018,2020)) 14 | dataset_name = "imerg5625_sample" 15 | input_path = "EDIT INPUT PATH TO NETCDF FOLDER" 16 | output_path = os.path.join("EDIT OUTPUT PATH WHERE MEMMAPS ARE TO BE CREATED", dataset_name) 17 | 18 | if not os.path.exists(output_path): 19 | os.makedirs(output_path) 20 | 21 | variables_imerg25bi = [ 22 | {"name": "precipitationcal", 23 | "ftemplate": os.path.join(input_path, "imerg{}{:02d}{:02d}.nc"), 24 | "dims": (32, 64), 25 | "levels": list(range(1))}, 26 | ] 27 | 28 | imerg25bi_path = os.path.join(output_path, "{}__imerg5625.mmap".format(dataset_name)) 29 | n_rec_dim = variables_imerg25bi[0]["dims"] 30 | imerg25bi_sample_freq = 1 31 | n_recs = ((datetime.datetime(2019, 12, 31, 23).timestamp()-datetime.datetime(years[0], 1, 1, 0).timestamp()) // 3600 ) // imerg25bi_sample_freq + 1 32 | n_rec_channels = sum([len(vg["levels"]) for vg in variables_imerg25bi]) 33 | dims = (int(n_recs), int(n_rec_channels), *n_rec_dim) 34 | print("dims: ", dims) 35 | print("nrecs: ", n_recs) 36 | imerg25bi_dims = dims 37 | if os.path.exists(imerg25bi_path): 38 | print("Skipping iMERG25bi as file exists... ") 39 | else: 40 | # write temporal SimSat variables 41 | if not os.path.exists(output_path): 42 | os.makedirs(output_path) 43 | 44 | mmap = np.memmap(imerg25bi_path, dtype='float32', mode='w+', shape=dims) 45 | 46 | def write_day(ymd, vbls): 47 | y, m, d = ymd 48 | if y < dataset_range[0].year: 49 | print("iMERG25bi: no data available for year {}".format(y)) 50 | return 51 | t_offset = int((datetime.datetime(y, m, d, 0).timestamp() - datetime.datetime(years[0], 1, 1, 0).timestamp()) // 3600) // imerg25bi_sample_freq 52 | t_end = int((datetime.datetime(y, m, d, 23).timestamp() - datetime.datetime(years[0], 1, 1, 0).timestamp()) // 3600) // imerg25bi_sample_freq + 1 53 | for i, vbl in enumerate(vbls): 54 | print("SimSat writing year {} month {} day {} vbl {}...".format(y, m, d, vbl["name"])) 55 | rootgrp = netcdf_Dataset(os.path.join(input_path, vbl["ftemplate"].format(y, m, d)), "r", format="NETCDF4") 56 | root_channel = 0 if not i else sum([len(vg["levels"]) for vg in variables_imerg25bi[:i]]) 57 | print(t_offset, t_end, root_channel, len(vbl["levels"])) 58 | try: 59 | mmap[t_offset:t_end, root_channel] = rootgrp[vbl["name"]][:] 60 | except Exception as e: 61 | print(y, m, d, vbl["name"], vbl["levels"], t_offset, t_end, root_channel, e, rootgrp[vbl["name"]][:].shape) 62 | raise Exception("{} {} {} {} {} {} {} {} {} {} ".format(y, m, d, vbl["name"], vbl["levels"], t_offset, t_end, root_channel, e, rootgrp[vbl["name"]][:].shape)) 63 | 64 | ymd = [] 65 | dd = datetime.datetime(years[0], 1, 1, 0) 66 | while dd <= datetime.datetime(years[1], 12, 31, 23): 67 | ymd.append((dd.year, dd.month, dd.day)) 68 | dd += datetime.timedelta(days=1) 69 | 70 | from multiprocessing import Pool 71 | from functools import partial 72 | with Pool(40) as p: 73 | p.map(partial(write_day,vbls=variables_imerg25bi), ymd) 74 | mmap.flush() 75 | del mmap 76 | 77 | 78 | # Create Pickle file describing which variables are contained in what file at what positions and what frequency 79 | print("Done converting. Generating dataset pickle file...") 80 | import dill 81 | import json 82 | dct = {} 83 | dct["variables"] = {} 84 | for i, v in enumerate(variables_imerg25bi): 85 | vbl_dict = {"name":v["name"], 86 | "mmap_name":"{}__imerg5625.mmap".format(dataset_name), 87 | "type":"temp", 88 | "dims": v["dims"], 89 | "offset": 0 if not i else sum([len(vg["levels"]) for vg in variables_imerg25bi[:i]]), 90 | "first_ts": datetime.datetime(years[0], 1, 1, 0).timestamp(), 91 | "last_ts": datetime.datetime(years[1], 12, 31, 23).timestamp(), 92 | "tfreq_s": 3600, 93 | "levels": v["levels"]} 94 | dct["variables"]["imerg5625/{}".format(v["name"])] = vbl_dict 95 | 96 | dct["memmap"] = {"{}__imerg5625.mmap".format(dataset_name): {"dims": imerg25bi_dims, 97 | "dtype": "float32", 98 | "daterange": (datetime.datetime(years[0], 1, 1, 0).timestamp(), 99 | datetime.datetime(years[1], 12, 31, 23).timestamp()), 100 | "tfreq_s": 3600} 101 | } 102 | 103 | dill.dump(dct, open(os.path.join(output_path, dataset_name+".dill"),'wb')) 104 | 105 | with open(os.path.join(output_path, dataset_name+"_info.json"), 'w') as outfile: 106 | json.dump(dct, outfile, indent=4, sort_keys=True) 107 | -------------------------------------------------------------------------------- /src/convert/convert_simsat5625.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | from netCDF4 import Dataset as netcdf_Dataset 3 | import os 4 | import numpy as np 5 | 6 | if __name__ == "__main__": 7 | 8 | pressure_to_idx = {50:0, 100:1, 150:2, 200:3, 250:4, 300:5, 400:6, 500:7, 600:8, 700:9, 850:10, 925:11, 1000:12} 9 | idx_to_pressure = {v:k for k,v in pressure_to_idx.items()} 10 | 11 | years=list(range(2016,2021)) 12 | dataset_name = "simsat5625" 13 | input_path = "EDIT INPUT PATH TO NETCDF FOLDER" 14 | output_path = os.path.join("EDIT OUTPUT PATH WHERE MEMMAPS ARE TO BE CREATED", dataset_name) 15 | if not os.path.exists(output_path): 16 | os.makedirs(output_path) 17 | 18 | variables_simsat = [ 19 | {"name": "clbt", 20 | "ftemplate": os.path.join(input_path, "sat{}.nc"), 21 | "dims": (32, 64), 22 | "levels": list(range(3))}, 23 | ] 24 | 25 | ds_daterange = (datetime.datetime(2016, 4, 1, 0), datetime.datetime(2020, 3, 31, 21)) 26 | ts_daterange = ds_daterange 27 | 28 | simsat_path = os.path.join(output_path, "{}__simsat5625.mmap".format(dataset_name)) 29 | n_rec_dim = (32, 64) 30 | simsat_sample_freq = 3 # every 3 hours 31 | n_recs = ((ds_daterange[1].timestamp()-ds_daterange[0].timestamp()) // 3600 ) // simsat_sample_freq + 1 32 | n_rec_channels = sum([len(vg["levels"]) for vg in variables_simsat]) 33 | dims = (int(n_recs), int(n_rec_channels), *n_rec_dim) 34 | simsat_dims = dims 35 | if os.path.exists(simsat_path): 36 | print("Skipping SimSat as file exists... ") 37 | else: 38 | # write temporal SimSat variables 39 | if not os.path.exists(output_path): 40 | os.makedirs(output_path) 41 | 42 | mmap = np.memmap(simsat_path, dtype='float32', mode='w+', shape=dims) 43 | print("MMAP DIMS: ", dims) 44 | 45 | def write_year(y, vbls): 46 | if y < 2016: 47 | print("SimSat: no data available for year {}".format(y)) 48 | return 49 | if y == 2016: 50 | t_offset = 0 51 | else: 52 | t_offset = int((datetime.datetime(y,1,1,0).timestamp() - ds_daterange[0].timestamp()) // 3600) // simsat_sample_freq 53 | if y == 2020: 54 | t_end = int((ts_daterange[1].timestamp() - ds_daterange[0].timestamp()) // 3600) // simsat_sample_freq + 1 55 | else: 56 | t_end = int((datetime.datetime(y, 12, 31, 23).timestamp() - ds_daterange[0].timestamp()) // 3600) // simsat_sample_freq + 1 57 | print("year: ", y, " t_offset: ", t_offset, "t_end:", t_end) 58 | for i, vbl in enumerate(vbls): 59 | print("SimSat writing year {} vbl {}...".format(y, vbl["name"])) 60 | rootgrp = netcdf_Dataset(os.path.join(input_path, vbl["ftemplate"].format(y)), "r", format="NETCDF4") 61 | root_channel = 0 if not i else sum([len(vg["levels"]) for vg in variables_simsat[:i]]) 62 | print("hello:", t_offset, t_end, root_channel, len(vbl["levels"]), rootgrp[vbl["name"]].shape) 63 | try: 64 | mmap[t_offset:t_end, root_channel:root_channel+len(vbl["levels"])] = rootgrp[vbl["name"]][:, vbl["levels"]] 65 | except Exception as e: 66 | print("EXCEPTION", rootgrp[vbl["name"]].shape, t_offset, t_end, root_channel, mmap.shape) 67 | raise Exception() 68 | 69 | from multiprocessing import Pool 70 | from functools import partial 71 | with Pool(1) as p: 72 | p.map(partial(write_year,vbls=variables_simsat), years) 73 | mmap.flush() 74 | del mmap 75 | 76 | 77 | # Create Pickle file describing which variables are contained in what file at what positions and what frequency 78 | print("Done converting. Generating dataset pickle file...") 79 | import dill 80 | import json 81 | dct = {} 82 | dct["variables"] = {} 83 | for i, v in enumerate(variables_simsat): 84 | vbl_dict = {"name":v["name"], 85 | "mmap_name":"{}__simsat5625.mmap".format(dataset_name), 86 | "type":"temp", 87 | "dims": v["dims"], 88 | "offset": 0 if not i else sum([len(vg["levels"]) for vg in variables_simsat[:i]]), 89 | "first_ts": ts_daterange[0].timestamp(), 90 | "last_ts": ts_daterange[1].timestamp(), 91 | "tfreq_s": 3600*3, 92 | "levels": v["levels"]} 93 | dct["variables"]["simsat5625/{}".format(v["name"])] = vbl_dict 94 | 95 | dct["memmap"] = {"{}__simsat5625.mmap".format(dataset_name): {"dims": simsat_dims, 96 | "dtype": "float32", 97 | "daterange": (ts_daterange[0].timestamp(), 98 | ts_daterange[1].timestamp()), 99 | "tfreq_s": 3600*3} 100 | } 101 | 102 | dill.dump(dct, open(os.path.join(output_path, dataset_name+".dill"),'wb')) 103 | 104 | with open(os.path.join(output_path, dataset_name+"_info.json"), 'w') as outfile: 105 | json.dump(dct, outfile, indent=4, sort_keys=True) 106 | -------------------------------------------------------------------------------- /src/convert/convert_simsat5625_sample.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | from netCDF4 import Dataset as netcdf_Dataset 3 | import os 4 | import numpy as np 5 | 6 | if __name__ == "__main__": 7 | 8 | pressure_to_idx = {50:0, 100:1, 150:2, 200:3, 250:4, 300:5, 400:6, 500:7, 600:8, 700:9, 850:10, 925:11, 1000:12} 9 | idx_to_pressure = {v:k for k,v in pressure_to_idx.items()} 10 | 11 | years=list(range(2018,2020)) 12 | dataset_name = "simsat5625" 13 | input_path = "EDIT INPUT PATH TO NETCDF FOLDER" 14 | output_path = os.path.join("EDIT OUTPUT PATH WHERE MEMMAPS ARE TO BE CREATED", dataset_name) 15 | 16 | if not os.path.exists(output_path): 17 | os.makedirs(output_path) 18 | 19 | variables_simsat = [ 20 | {"name": "clbt", 21 | "ftemplate": os.path.join(input_path, "sat{}.nc"), 22 | "dims": (32, 64), 23 | "levels": list(range(3))}, 24 | ] 25 | 26 | ds_daterange = (datetime.datetime(2016, 4, 1, 0), datetime.datetime(2020, 3, 31, 21)) 27 | ts_daterange = ds_daterange 28 | 29 | simsat_path = os.path.join(output_path, "{}__simsat5625.mmap".format(dataset_name)) 30 | n_rec_dim = (32, 64) 31 | simsat_sample_freq = 3 # every 3 hours 32 | n_recs = ((datetime.datetime(2019, 12, 31, 23).timestamp()-datetime.datetime(years[0], 1, 1, 0).timestamp()) // 3600 ) // simsat_sample_freq + 1 33 | n_rec_channels = sum([len(vg["levels"]) for vg in variables_simsat]) 34 | dims = (int(n_recs), int(n_rec_channels), *n_rec_dim) 35 | simsat_dims = dims 36 | if os.path.exists(simsat_path): 37 | print("Skipping SimSat as file exists... ") 38 | else: 39 | # write temporal SimSat variables 40 | if not os.path.exists(output_path): 41 | os.makedirs(output_path) 42 | 43 | mmap = np.memmap(simsat_path, dtype='float32', mode='w+', shape=dims) 44 | print("MMAP DIMS: ", dims) 45 | 46 | def write_year(y, vbls): 47 | if y < 2016: 48 | print("SimSat: no data available for year {}".format(y)) 49 | return 50 | if y == 2016: 51 | t_offset = 0 52 | else: 53 | t_offset = int((datetime.datetime(y,1,1,0).timestamp() - ds_daterange[0].timestamp()) // 3600) // simsat_sample_freq 54 | if y == 2020: 55 | t_end = int((ts_daterange[1].timestamp() - ds_daterange[0].timestamp()) // 3600) // simsat_sample_freq + 1 56 | else: 57 | t_end = int((datetime.datetime(y, 12, 31, 23).timestamp() - datetime.datetime(years[0], 1, 1, 0).timestamp()) // 3600) // simsat_sample_freq + 1 58 | print("year: ", y, " t_offset: ", t_offset, "t_end:", t_end) 59 | for i, vbl in enumerate(vbls): 60 | print("SimSat writing year {} vbl {}...".format(y, vbl["name"])) 61 | rootgrp = netcdf_Dataset(os.path.join(input_path, vbl["ftemplate"].format(y)), "r", format="NETCDF4") 62 | root_channel = 0 if not i else sum([len(vg["levels"]) for vg in variables_simsat[:i]]) 63 | print("hello:", t_offset, t_end, root_channel, len(vbl["levels"]), rootgrp[vbl["name"]].shape) 64 | try: 65 | mmap[t_offset:t_end, root_channel:root_channel+len(vbl["levels"])] = rootgrp[vbl["name"]][:, vbl["levels"]] 66 | except Exception as e: 67 | print("EXCEPTION", rootgrp[vbl["name"]].shape, t_offset, t_end, root_channel, mmap.shape) 68 | raise Exception() 69 | 70 | from multiprocessing import Pool 71 | from functools import partial 72 | with Pool(1) as p: 73 | p.map(partial(write_year,vbls=variables_simsat), years) 74 | mmap.flush() 75 | del mmap 76 | 77 | 78 | # Create Pickle file describing which variables are contained in what file at what positions and what frequency 79 | print("Done converting. Generating dataset pickle file...") 80 | import dill 81 | import json 82 | dct = {} 83 | dct["variables"] = {} 84 | for i, v in enumerate(variables_simsat): 85 | vbl_dict = {"name":v["name"], 86 | "mmap_name":"{}__simsat5625.mmap".format(dataset_name), 87 | "type":"temp", 88 | "dims": v["dims"], 89 | "offset": 0 if not i else sum([len(vg["levels"]) for vg in variables_simsat[:i]]), 90 | "first_ts": datetime.datetime(years[0], 1, 1, 0).timestamp(), 91 | "last_ts": datetime.datetime(years[1], 12, 31, 23).timestamp(), 92 | "tfreq_s": 3600*3, 93 | "levels": v["levels"]} 94 | dct["variables"]["simsat5625/{}".format(v["name"])] = vbl_dict 95 | 96 | dct["memmap"] = {"{}__simsat5625.mmap".format(dataset_name): {"dims": simsat_dims, 97 | "dtype": "float32", 98 | "daterange": (datetime.datetime(years[0], 1, 1, 0).timestamp(), 99 | datetime.datetime(years[1], 12, 31, 23).timestamp()), 100 | "tfreq_s": 3600*3} 101 | } 102 | 103 | dill.dump(dct, open(os.path.join(output_path, dataset_name+".dill"),'wb')) 104 | 105 | with open(os.path.join(output_path, dataset_name+"_info.json"), 'w') as outfile: 106 | json.dump(dct, outfile, indent=4, sort_keys=True) 107 | -------------------------------------------------------------------------------- /src/convert/test_samples.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import datetime 3 | 4 | from src.dataloader import Dataset 5 | 6 | datapath = ["PATH TO ERA5625 SAMPLES DILL FILE", 7 | "PATH TO IMERG5625 SAMPLES DILL FILE", 8 | "PATH TO SIMSAT5625 SAMPLES DILL FILE"] 9 | 10 | partition_conf = {"train": 11 | {"timerange": ( 12 | datetime.datetime(2010, 1, 1, 0).timestamp(), datetime.datetime(2010, 12, 31, 0).timestamp()), 13 | "increment_s": 60 * 60}, 14 | "test": 15 | {"timerange": (datetime.datetime(2017, 1, 15, 0).timestamp(), 16 | datetime.datetime(2018, 12, 31, 0).timestamp()), 17 | "increment_s": 60 * 60}} 18 | partition_type = "range" 19 | 20 | sample_conf = {"lead_time_{}".format(int(lt / 3600)): # sample modes 21 | { 22 | "sample": # sample sections 23 | { 24 | "lsm": {"vbl": "yera5625/lsm"}, # sample variables 25 | # "lat": {"vbl": "era5625/lat2d"}, 26 | "t_300hPa": {"vbl": "yera5625/t:600hPa", 27 | "t": np.array([0, -1, -2, -3, ]) * 3600, 28 | "interpolate": ["nan", "nearest_past", "nearest_future"][1]}, 29 | "t_500hPa": {"vbl": "yera5625/t", 30 | "t": np.array([0, -1, -2, -3, ]) * 3600, 31 | "interpolate": ["nan", "nearest_past", "nearest_future"][1]}, 32 | # "t1000": {"vbl": "xera5625/t:1000hPa", 33 | # "t": np.array([0, -1, -2, -3, -4]) * 3600, 34 | # "interpolate": ["nan", "nearest_past", "nearest_future"][1]} 35 | }, 36 | "label": 37 | { 38 | "tp": {"vbl": "yera5625/tp", 39 | "t": np.array([lt]), 40 | "interpolate": ["nan", "nearest_past", "nearest_future"][1]}} 41 | } 42 | for lt in np.array([3, 7]) * 3600} # np.array([1, 3, 6, 9]) * 3600} 43 | 44 | # Met-Net style: different targets per label -- as an option 45 | 46 | dataset = Dataset(datapath=datapath, 47 | partition_conf=partition_conf, 48 | partition_type=partition_type, 49 | partition_selected="train", 50 | sample_conf=sample_conf, 51 | ) 52 | 53 | tp = dataset[((datetime.datetime(2018,1,1,0).timestamp(), datetime.datetime(2019,12,31,23).timestamp(), 3600), ["era5625/tp"], None)] 54 | imerg = dataset[((datetime.datetime(2018,1,1,0).timestamp(), datetime.datetime(2019,12,31,23).timestamp(), 3600), ["imerg5625/precipitationcal"], None)] 55 | simsat = dataset[((datetime.datetime(2018,1,1,0).timestamp(), datetime.datetime(2019,12,31,23).timestamp(), 3*3600), ["simsat5625/clbt:0"], {"interpolate":"nearest_past"})] 56 | simsat2 = dataset[([datetime.datetime(2018,1,1,0).timestamp(), datetime.datetime(2019,12,31,23).timestamp()], ["simsat5625/clbt:0"], {})] 57 | -------------------------------------------------------------------------------- /src/dataloader/__init__.py: -------------------------------------------------------------------------------- 1 | from .memmap_dataloader import Dataset --------------------------------------------------------------------------------