├── .gitignore ├── README.md ├── data ├── real_dataset.py └── synthetic_dataset.py ├── eval.py ├── loss ├── dilate_loss.py ├── path_soft_dtw.py └── soft_dtw.py ├── main.py ├── models ├── base_models.py ├── index_models.py ├── inf_index_models.py ├── inf_models.py └── informer.py ├── requirements.txt ├── script.sh ├── train.py └── utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | bee 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Coherent Probabilistic Aggregate Queries on Long-horizon Forecasts 2 | This is the code produced as part of the paper _Coherent Probabilistic Aggregate Queries on Long-horizon Forecasts_ 3 | 4 | > Coherent Probabilistic Aggregate Queries on Long-horizon Forecasts. 5 | > 6 | > Prathamesh Deshpande and Sunita Sarawagi. IJCAI 2022. [arXiv:2111.03394v1](https://arxiv.org/pdf/2111.03394v1). 7 | 8 | ## Package Dependencies / Requirements 9 | - Python 3.7.9 (recommended). 10 | - All the requirements are specified in [requirements.txt](requirements.txt). 11 | 12 | ## Run 13 | ``` 14 | ./script.sh 15 | ``` 16 | 17 | 18 | ## How to work with Command Line Arguments? 19 | - If an optional argument is not passed, it's value will be extracted from configuration specified in the file `main.py` (based on `dataset_name`, `model_name`). 20 | - If a valid argument value is passed through command line arguments, the code will use it further. That is, it will ignore the value assigned in the configuration. 21 | 22 | ## Command Line Arguments Information 23 | | Argument name | Type | Valid Assignments | Default | 24 | | --------------| ---- | ----------------- | ------- | 25 | | dataset_name | str | azure, ett, etthourly, Solar, taxi30min, Traffic911 | positional argument| 26 | | saved_models_dir | str | - | None | 27 | | output_dir | str | - | None | 28 | | N_input | int | >0 | -1 | 29 | | N_output | int | >0 | -1 | 30 | | epochs | int | >0 | -1 | 31 | | normalize | str | same, zscore_per_series, gaussian_copula, log | None | 32 | | learning_rate | float | >0 | -1.0 | 33 | | hidden_size | int | >0 | -1 | 34 | | num_grulstm_layers | int | >0 | -1 | 35 | | batch_size | int | >0 | -1 | 36 | | v_dim | int | >0 | -1 | 37 | | t2v_type | str | local, idx, mdh_lincomb, mdh_parti | None | 38 | | K_list | \[int,...,int \] | \[>0,...,>0 \] | \[\] | 39 | | device | str | - | None | 40 | 41 | ## Datasets 42 | All the datasets can be found [here](https://drive.google.com/drive/folders/1b6xheczhJ1IwkTS5fqRf9_NkEkPf9beM?usp=sharing). 43 | 44 | Add the dataset files/directories in `data` directory before running the code. 45 | 46 | ## Output files 47 | 48 | ### Targets and Forecasts 49 | Following output files are stored in the `//` directory. 50 | 51 | | File name | Description | 52 | | --------- | ----------- | 53 | | inputs.npy | Test input values, size: `number of time-series x N_input` | 54 | | targets.npy | Test target/ground-truth values, size: `number of time-series x N_output` | 55 | | ``\_pred\_mu.npy | Mean forecast values. The size of the matrix is `number of time-series x number of time-steps` | 56 | | ``\_pred\_std.npy | Standard-deviation of forecast values. The size of the matrix is `number of time-series x number of time-steps` | 57 | 58 | ### Metrics 59 | All the evaluation metrics on test data are stored in `/results_.json` in the following format: 60 | 61 | ```yaml 62 | { 63 | : 64 | { 65 | 'crps':, 66 | 'mae':, 67 | 'mse':, 68 | 'smape':, 69 | 'dtw':, 70 | 'tdi':, 71 | } 72 | : 73 | { 74 | 'crps':, 75 | 'mae':, 76 | 'mse':, 77 | 'smape':, 78 | 'dtw':, 79 | 'tdi':, 80 | } 81 | . 82 | . 83 | . 84 | } 85 | ``` 86 | Here `, , ...` are different models under consideration. 87 | -------------------------------------------------------------------------------- /data/real_dataset.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import pandas as pd 4 | import torch 5 | import random 6 | import json 7 | from torch.utils.data import Dataset, DataLoader 8 | from statsmodels.tsa.seasonal import seasonal_decompose, STL 9 | 10 | if os.path.exists('bee'): 11 | DATA_DIRS = '/mnt/infonas/data/pratham/Forecasting/DILATE' 12 | else: 13 | DATA_DIRS = '.' 14 | 15 | def generate_train_dev_test_data(data, N_input): 16 | train_per = 0.6 17 | dev_per = 0.2 18 | N = len(data) 19 | 20 | data_train = data[:int(train_per*N)] 21 | data_dev = data[int(train_per*N)-N_input:int((train_per+dev_per)*N)] 22 | data_test = data[int((train_per+dev_per)*N)-N_input:] 23 | 24 | return (data_train, data_dev, data_test) 25 | 26 | def create_forecast_io_seqs(data, enc_len, dec_len, stride): 27 | 28 | data_in, data_out = [], [] 29 | for idx in range(0, len(data), stride): 30 | if idx+enc_len+dec_len <= len(data): 31 | data_in.append(data[idx:idx+enc_len]) 32 | data_out.append(data[idx+enc_len:idx+enc_len+dec_len]) 33 | 34 | data_in = np.array(data_in) 35 | data_out = np.array(data_out) 36 | return data_in, data_out 37 | 38 | 39 | def process_start_string(start_string, freq): 40 | ''' 41 | Source: 42 | https://github.com/mbohlkeschneider/gluon-ts/blob/442bd4ffffa4a0fcf9ae7aa25db9632fbe58a7ea/src/gluonts/dataset/common.py#L306 43 | ''' 44 | 45 | timestamp = pd.Timestamp(start_string, freq=freq) 46 | # 'W-SUN' is the standardized freqstr for W 47 | if timestamp.freq.name in ("M", "W-SUN"): 48 | offset = to_offset(freq) 49 | timestamp = timestamp.replace( 50 | hour=0, minute=0, second=0, microsecond=0, nanosecond=0 51 | ) 52 | return pd.Timestamp( 53 | offset.rollback(timestamp), freq=offset.freqstr 54 | ) 55 | if timestamp.freq == 'B': 56 | # does not floor on business day as it is not allowed 57 | return timestamp 58 | return pd.Timestamp( 59 | timestamp.floor(timestamp.freq), freq=timestamp.freq 60 | ) 61 | 62 | def shift_timestamp(ts, offset): 63 | result = ts + offset * ts.freq 64 | return pd.Timestamp(result, freq=ts.freq) 65 | 66 | def get_date_range(start_string, freq, seq_len): 67 | start = process_start_string(start_string, freq) 68 | end = shift_timestamp(start, seq_len) 69 | full_date_range = pd.date_range(start, end, freq=freq) 70 | return full_date_range 71 | 72 | 73 | def get_list_of_dict_format(data): 74 | data_new = list() 75 | for entry in data: 76 | entry_dict = dict() 77 | entry_dict['target'] = entry 78 | data_new.append(entry_dict) 79 | return data_new 80 | 81 | def prune_dev_test_sequence(data, seq_len): 82 | for i in range(len(data)): 83 | data[i]['target'] = data[i]['target'][-seq_len:] 84 | data[i]['feats'] = data[i]['feats'][-seq_len:] 85 | return data 86 | 87 | 88 | def decompose_seq(seq, decompose_type, period, N_output, is_train): 89 | if is_train: 90 | if decompose_type == 'seasonal': 91 | components = seasonal_decompose( 92 | seq, model='additive', period=period, extrapolate_trend=True 93 | ) 94 | coeffs = torch.tensor( 95 | [components.trend, components.seasonal, components.resid] 96 | ).transpose(0,1) 97 | elif decompose_type == 'STL': 98 | stl_components = STL(seq, period=period).fit() 99 | coeffs = torch.tensor( 100 | [stl_components.trend, stl_components.seasonal, stl_components.resid] 101 | ).transpose(0,1) 102 | #coeffs = torch.log(coeffs) 103 | coeffs = (coeffs - coeffs.mean(dim=-1, keepdims=True)) / coeffs.std(dim=-1, keepdims=True) 104 | else: 105 | seq_tr = seq[:-N_output] 106 | seq_out = seq[-N_output:] 107 | if decompose_type == 'seasonal': 108 | components_tr = seasonal_decompose( 109 | seq_tr, model='additive', period=period, extrapolate_trend=True 110 | ) 111 | #components_out = seasonal_decompose( 112 | # seq_out, model='additive', period=period, extrapolate_trend=True 113 | #) 114 | coeffs_tr = torch.tensor([components_tr.trend, components_tr.seasonal, components_tr.resid]).transpose(0,1) 115 | #coeffs_out = torch.tensor([components.trend, components.seasonal, components.resid]).transpose(0,1) 116 | elif decompose_type == 'STL': 117 | stl_tr = STL(seq_tr, period=period).fit() 118 | #stl_out = STL(seq_out, period=period).fit() 119 | coeffs_tr = torch.tensor([stl_tr.trend, stl_tr.seasonal, stl_tr.resid]).transpose(0,1) 120 | #coeffs_out = torch.tensor([stl_out.trend, stl_out.seasonal, stl_out.resid]).transpose(0,1) 121 | 122 | means = coeffs_tr.mean(dim=0, keepdims=True) 123 | stds = coeffs_tr.std(dim=0, keepdims=True) 124 | coeffs_tr = (coeffs_tr - means) / stds 125 | coeffs_out = torch.zeros([seq_out.shape[0], coeffs_tr.shape[1]], dtype=torch.float) 126 | coeffs = torch.cat([coeffs_tr, coeffs_out], dim=0) 127 | #coeffs = torch.log(coeffs) 128 | 129 | return coeffs 130 | 131 | 132 | def parse_Traffic(N_input, N_output): 133 | with open(os.path.join(DATA_DIRS, 'data/traffic/traffic.txt'), 'r') as f: 134 | data = [] 135 | # Taking only first series of length 17544 136 | # TODO: Add all series to the dataset 137 | for line in f: 138 | data.append(line.rstrip().split(',')[0]) 139 | data = np.array(data).astype(np.float32) 140 | data = np.expand_dims(np.expand_dims(data, axis=-1), axis=0) 141 | 142 | #data_train, data_dev, data_test = generate_train_dev_test_data(data, N_input) 143 | 144 | train_len = int(0.6 * data.shape[1]) 145 | dev_len = int(0.2 * data.shape[1]) 146 | test_len = data.shape[1] - train_len - dev_len 147 | 148 | data_train = data[:, :train_len] 149 | 150 | data_dev, data_test = [], [] 151 | dev_tsid_map, test_tsid_map = {}, {} 152 | for i in range(data.shape[0]): 153 | for j in range(train_len, train_len+dev_len, N_output): 154 | data_dev.append(data[i, :j]) 155 | dev_tsid_map[len(data_dev)-1] = i 156 | for i in range(data.shape[0]): 157 | for j in range(train_len+dev_len, data.shape[1], N_output): 158 | data_test.append(data[i, :j]) 159 | test_tsid_map[len(data_test)-1] = i 160 | 161 | data_train = get_list_of_dict_format(data_train) 162 | data_dev = get_list_of_dict_format(data_dev) 163 | data_test = get_list_of_dict_format(data_test) 164 | 165 | return ( 166 | data_train, data_dev, data_test, dev_tsid_map, test_tsid_map 167 | ) 168 | 169 | def parse_ECG5000(N_input, N_output): 170 | with open(os.path.join(DATA_DIRS, 'data/ECG5000/ECG5000_TRAIN.tsv'), 'r') as f: 171 | data = [] 172 | for line in f: 173 | data.append(line.rstrip().split()) 174 | data = np.array(data).astype(np.float32) 175 | data = np.expand_dims(data, axis=-1) 176 | with open(os.path.join(DATA_DIRS, 'data/ECG5000/ECG5000_TEST.tsv'), 'r') as f: 177 | data_test = [] 178 | for line in f: 179 | data_test.append(line.rstrip().split()) 180 | data_test = np.array(data_test).astype(np.float32) 181 | data_test = np.expand_dims(data_test, axis=-1) 182 | 183 | N = data.shape[0] 184 | dev_len = int(0.2*N) 185 | train_len = N - dev_len 186 | data_train, data_dev = data[:train_len], data[train_len:train_len+dev_len] 187 | 188 | data_train_in, data_train_out = data_train[:, :N_input], data_train[:, N_input:N_input+N_output] 189 | data_dev_in, data_dev_out = data_dev[:, :N_input], data_dev[:, N_input:N_input+N_output] 190 | data_test_in, data_test_out = data_test[:, :N_input], data_test[:, N_input:N_input+N_output] 191 | 192 | train_bkp = np.ones(data_train_in.shape[0]) * N_input 193 | dev_bkp = np.ones(data_dev_in.shape[0]) * N_input 194 | test_bkp = np.ones(data_test_in.shape[0]) * N_input 195 | 196 | data_train = get_list_of_dict_format(data_train) 197 | data_dev = get_list_of_dict_format(data_dev) 198 | data_test = get_list_of_dict_format(data_test) 199 | 200 | return ( 201 | data_train_in, data_train_out, data_dev_in, data_dev_out, 202 | data_test_in, data_test_out, train_bkp, dev_bkp, test_bkp, 203 | data_train, data_dev, data_test 204 | ) 205 | 206 | def create_bins(sequence, bin_size, num_bins): 207 | #num_bins = int(np.ceil((sequence[-1] - sequence[0]) * 1. / bin_size)) 208 | counts = [0. for _ in range(num_bins)] 209 | curr_cnt = 0 210 | for ts in sequence: 211 | bin_id = int(ts // bin_size) 212 | counts[bin_id] += 1 213 | 214 | return counts 215 | 216 | def parse_Taxi(N_input, N_output): 217 | # https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2019-01.csv 218 | # https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2019-02.csv 219 | taxi_df_jan = pd.read_csv( 220 | 'data/yellow_tripdata_2019-01.csv', 221 | usecols=["tpep_pickup_datetime", "PULocationID"]) 222 | taxi_df_feb = pd.read_csv( 223 | 'data/yellow_tripdata_2019-02.csv', 224 | usecols=["tpep_pickup_datetime", "PULocationID"]) 225 | taxi_df = taxi_df_jan.append(taxi_df_feb) 226 | taxi_df['tpep_pickup_datetime'] = pd.to_datetime( 227 | taxi_df['tpep_pickup_datetime'], format='%Y-%m-%d %H:%M:%S', errors='coerce' 228 | ) 229 | ## Data cleaning 230 | # Dataset contains some spurious values, such as year 2038 and months other 231 | # than Jan and Feb. Following code purges such rows. 232 | taxi_df = taxi_df[(taxi_df['tpep_pickup_datetime'].dt.year == 2019)] 233 | taxi_df = taxi_df[(taxi_df['tpep_pickup_datetime'].dt.month < 3)] 234 | 235 | taxi_df = taxi_df.sort_values('tpep_pickup_datetime') 236 | taxi_df['timestamp'] = pd.DatetimeIndex(taxi_df['tpep_pickup_datetime']).astype(np.int64)/1000000000 237 | del taxi_df['tpep_pickup_datetime'] 238 | taxi_df = taxi_df.sort_values(by=['timestamp']) 239 | #dataset_name = 'taxi' 240 | #if dataset_name in downsampling: 241 | # taxi_timestamps = downsampling_dataset(taxi_timestamps, dataset_name) 242 | 243 | num_hrs = int(np.ceil((taxi_df['timestamp'].values[-1] - taxi_df['timestamp'].values[0])/3600.)) 244 | loc2counts = dict() 245 | loc2numevents = dict() 246 | loc2startts = dict() 247 | for loc_id, loc_df in taxi_df.groupby(['PULocationID']): 248 | timestamps = loc_df['timestamp'].values 249 | timestamps = timestamps - timestamps[0] 250 | loc2numevents[loc_id] = len(timestamps) 251 | # Select locations in which num_events per hour is >1 252 | if (len(timestamps) >= N_input+N_output and len(timestamps) / num_hrs > 1.): 253 | counts = create_bins(timestamps, bin_size=3600., num_bins=num_hrs) 254 | print(loc_id, len(timestamps), len(timestamps) / num_hrs, len(counts)) 255 | loc2counts[loc_id] = counts 256 | 257 | #start_ts = pd.Timestamp(loc_df['timestamp'][0], unit='s') 258 | #loc2startts = start_ts 259 | 260 | data = np.array([val for val in loc2counts.values()]) 261 | data = np.expand_dims(data, axis=2) 262 | data_train, data_dev, data_test = [], [], [] 263 | data_train_in, data_train_out = [], [] 264 | data_dev_in, data_dev_out = [], [] 265 | data_test_in, data_test_out = [], [] 266 | for seq in data: 267 | seq_train, seq_dev, seq_test = generate_train_dev_test_data(seq, N_input) 268 | batch_train_in, batch_train_out = create_forecast_io_seqs(seq_train, N_input, N_output, int(N_output/3)) 269 | batch_dev_in, batch_dev_out = create_forecast_io_seqs(seq_dev, N_input, N_output, N_output) 270 | batch_test_in, batch_test_out = create_forecast_io_seqs(seq_test, N_input, N_output, N_output) 271 | data_train.append(seq_train) 272 | data_dev.append(seq_dev) 273 | data_test.append(seq_test) 274 | data_train_in.append(batch_train_in) 275 | data_train_out.append(batch_train_out) 276 | data_dev_in.append(batch_dev_in) 277 | data_dev_out.append(batch_dev_out) 278 | data_test_in.append(batch_test_in) 279 | data_test_out.append(batch_test_out) 280 | 281 | data_train_in = np.concatenate(data_train_in, axis=0) 282 | data_train_out = np.concatenate(data_train_out, axis=0) 283 | data_dev_in = np.concatenate(data_dev_in, axis=0) 284 | data_dev_out = np.concatenate(data_dev_out, axis=0) 285 | data_test_in = np.concatenate(data_test_in, axis=0) 286 | data_test_out = np.concatenate(data_test_out, axis=0) 287 | 288 | train_bkp = np.ones(data_train_in.shape[0]) * N_input 289 | dev_bkp = np.ones(data_dev_in.shape[0]) * N_input 290 | test_bkp = np.ones(data_test_in.shape[0]) * N_input 291 | 292 | data_train = get_list_of_dict_format(data_train) 293 | data_dev = get_list_of_dict_format(data_dev) 294 | data_test = get_list_of_dict_format(data_test) 295 | 296 | return ( 297 | data_train_in, data_train_out, data_dev_in, data_dev_out, 298 | data_test_in, data_test_out, train_bkp, dev_bkp, test_bkp, 299 | data_train, data_dev, data_test 300 | ) 301 | 302 | 303 | def parse_gc_datasets(dataset_name, N_input, N_output): 304 | 305 | 306 | if dataset_name in ['Exchange']: 307 | num_rolling_windows = 5 308 | num_val_rolling_windows = 2 309 | dataset_dir = 'exchange_rate_nips' 310 | elif dataset_name in ['Wiki']: 311 | num_rolling_windows = 5 312 | num_val_rolling_windows = 2 313 | dataset_dir = 'wiki-rolling_nips' 314 | elif dataset_name in ['Solar']: 315 | num_rolling_windows = 7 316 | num_val_rolling_windows = 2 317 | dataset_dir = 'solar_nips' 318 | elif dataset_name in ['taxi30min']: 319 | num_rolling_windows = 7 320 | num_val_rolling_windows = 2 321 | dataset_dir = 'taxi_30min' 322 | 323 | data_ = [] 324 | with open(os.path.join(DATA_DIRS, 'data', dataset_dir, 'train', 'train.json')) as f: 325 | for line in f: 326 | data_.append(json.loads(line)) 327 | 328 | data_test_full_ = [] 329 | with open(os.path.join(DATA_DIRS, 'data', dataset_dir, 'test', 'test.json')) as f: 330 | for line in f: 331 | data_test_full_.append(json.loads(line)) 332 | 333 | if dataset_name in ['Wiki']: 334 | num_ts = len(data_) 335 | data = data_[ -2000 : ] 336 | data_test_full = [] 337 | for i in range(0, num_ts*num_rolling_windows, num_ts): 338 | data_test_full += data_test_full_[ i : i+num_ts ][ -2000 : ] 339 | elif dataset_name in ['taxi30min']: 340 | data = data_ 341 | num_ts = 1214 * num_rolling_windows 342 | data_test_full = data_test_full_[ -num_ts : ] 343 | for i in range(len(data_test_full)): 344 | assert data[i % len(data)]['lat'] == data_test_full[i]['lat'] 345 | assert data[i % len(data)]['lng'] == data_test_full[i]['lng'] 346 | data_test_full[i]['target'] = data[i % len(data)]['target'] + data_test_full[i]['target'] 347 | data_test_full[i]['start'] = data[i % len(data)]['start'] 348 | else: 349 | data = data_ 350 | data_test_full = data_test_full_ 351 | 352 | metadata = json.load(open(os.path.join(DATA_DIRS, 'data', dataset_dir, 'metadata', 'metadata.json'))) 353 | 354 | 355 | data_train, data_dev, data_test = [], [], [] 356 | dev_tsid_map, test_tsid_map = {}, {} 357 | data_train_in, data_train_out = [], [] 358 | data_dev_in, data_dev_out = [], [] 359 | data_test_in, data_test_out = [], [] 360 | for i, entry in enumerate(data, 0): 361 | entry_train = dict() 362 | 363 | train_len = len(entry['target']) - N_output*num_val_rolling_windows 364 | seq_train = entry['target'][ : train_len ] 365 | seq_train = np.expand_dims(seq_train, axis=-1) 366 | 367 | seq_dates = get_date_range(entry['start'], metadata['time_granularity'], len(entry['target'])) 368 | start_train = seq_dates[0] 369 | 370 | entry_train['target'] = seq_train 371 | entry_train['start'] = start_train 372 | entry_train['freq_str'] = metadata['time_granularity'] 373 | 374 | data_train.append(entry_train) 375 | 376 | for j in range(train_len+N_output, len(entry['target'])+1, N_output): 377 | entry_dev = {} 378 | seq_dev = entry['target'][:j] 379 | seq_dev = np.expand_dims(seq_dev, axis=-1) 380 | 381 | start_dev = seq_dates[0] 382 | 383 | entry_dev['target'] = seq_dev 384 | entry_dev['start'] = start_dev 385 | entry_dev['freq_str'] = metadata['time_granularity'] 386 | data_dev.append(entry_dev) 387 | dev_tsid_map[len(data_dev)-1] = i 388 | 389 | for i, entry in enumerate(data_test_full, 0): 390 | entry_test = dict() 391 | seq_test = entry['target'] 392 | seq_test = np.expand_dims(seq_test, axis=-1) 393 | 394 | seq_dates = get_date_range(entry['start'], metadata['time_granularity'], len(entry['target'])) 395 | start_test = seq_dates[0] 396 | 397 | entry_test['target'] = seq_test 398 | entry_test['start'] = start_test 399 | entry_test['freq_str'] = metadata['time_granularity'] 400 | data_test.append(entry_test) 401 | test_tsid_map[i] = i%len(data) # Multiple test instances per train series. 402 | 403 | if data_dev == []: 404 | data_dev = data_test 405 | dev_tsid_map = test_tsid_map 406 | return ( 407 | data_train, data_dev, data_test, dev_tsid_map, test_tsid_map 408 | ) 409 | 410 | def parse_weather(dataset_name, N_input, N_output): 411 | 412 | csv_path = os.path.join(DATA_DIRS, 'data', 'jena_climate_2009_2016.csv') 413 | df = pd.read_csv(csv_path) 414 | df = df[5::6] # Sub-sample the data from 10minute interval to 1h 415 | df = df[['T (degC)']] # Select temperature column, 'T (degC)' 416 | df = df.values.T # Retain only values in np format 417 | df = np.expand_dims(df, axis=-1) 418 | n = df.shape[1] 419 | 420 | # Split the data - train, dev, and test 421 | train_len = int(n*0.8) 422 | dev_len = int(n*0.1) 423 | test_len = n - (train_len + dev_len) 424 | data_train = df[:, 0:train_len] 425 | 426 | data_dev, data_test = [], [] 427 | dev_tsid_map, test_tsid_map = {}, {} 428 | for i in range(df.shape[0]): 429 | for j in range(train_len, train_len+dev_len, N_output): 430 | data_dev.append(df[i, :j]) 431 | dev_tsid_map[len(data_dev)-1] = i 432 | for i in range(df.shape[0]): 433 | for j in range(train_len+dev_len, n, N_output): 434 | data_test.append(df[i, :j]) 435 | test_tsid_map[len(data_test)-1] = i 436 | data_train = get_list_of_dict_format(data_train) 437 | data_dev = get_list_of_dict_format(data_dev) 438 | data_test = get_list_of_dict_format(data_test) 439 | 440 | return ( 441 | data_train, data_dev, data_test, dev_tsid_map, test_tsid_map 442 | ) 443 | 444 | 445 | def parse_bafu(dataset_name, N_input, N_output): 446 | file_path = os.path.join(DATA_DIRS, 'data', 'bafu_normal.txt') 447 | data = np.loadtxt(file_path) 448 | data = data.T 449 | data = np.expand_dims(data, axis=-1) 450 | 451 | n = data.shape[1] 452 | test_len = 48*28*7 453 | dev_len = 48*28*2 454 | train_len = n - dev_len - test_len 455 | 456 | data_train = data[:, :train_len] 457 | 458 | data_dev, data_test = [], [] 459 | dev_tsid_map, test_tsid_map = {}, {} 460 | # for i in range(data.shape[0]): 461 | # for j in range(train_len, train_len+dev_len, 1): 462 | # data_dev.append(data[i, :j]) 463 | # dev_tsid_map[len(data_dev)-1] = i 464 | # for i in range(data.shape[0]): 465 | # for j in range(train_len+dev_len, n, N_output): 466 | # data_test.append(data[i, :j]) 467 | # test_tsid_map[len(data_test)-1] = i 468 | 469 | for i in range(data.shape[0]): 470 | for j in range(train_len+N_output, train_len+dev_len+1, N_output): 471 | if j <= train_len+dev_len: 472 | data_dev.append(data[i, :j]) 473 | dev_tsid_map[len(data_dev)-1] = i 474 | #for i in range(data.shape[0]): 475 | for i in range(data.shape[0]): 476 | for j in range(train_len+dev_len+N_output, n+1, N_output): 477 | if j <= n: 478 | data_test.append(data[i, :j]) 479 | test_tsid_map[len(data_test)-1] = i 480 | 481 | data_train = get_list_of_dict_format(data_train) 482 | data_dev = get_list_of_dict_format(data_dev) 483 | data_test = get_list_of_dict_format(data_test) 484 | 485 | return ( 486 | data_train, data_dev, data_test, dev_tsid_map, test_tsid_map 487 | ) 488 | 489 | 490 | def parse_meteo(dataset_name, N_input, N_output): 491 | file_path = os.path.join(DATA_DIRS, 'data', 'meteo_normal.txt') 492 | data = np.loadtxt(file_path) 493 | data = data.T 494 | data = np.expand_dims(data, axis=-1) 495 | 496 | n = data.shape[1] 497 | test_len = 2000 498 | dev_len = 1000 499 | train_len = n - dev_len - test_len 500 | 501 | data_train = data[:, :train_len] 502 | 503 | data_dev, data_test = [], [] 504 | dev_tsid_map, test_tsid_map = {}, {} 505 | for i in range(data.shape[0]): 506 | for j in range(train_len, train_len+dev_len, 1): 507 | data_dev.append(data[i, :j]) 508 | dev_tsid_map[len(data_dev)-1] = i 509 | for i in range(data.shape[0]): 510 | for j in range(train_len+dev_len, n, N_output): 511 | data_test.append(data[i, :j]) 512 | test_tsid_map[len(data_test)-1] = i 513 | 514 | data_train = get_list_of_dict_format(data_train) 515 | data_dev = get_list_of_dict_format(data_dev) 516 | data_test = get_list_of_dict_format(data_test) 517 | 518 | return ( 519 | data_train, data_dev, data_test, dev_tsid_map, test_tsid_map 520 | ) 521 | 522 | 523 | def parse_azure(dataset_name, N_input, N_output, t2v_type=None): 524 | file_path = os.path.join(DATA_DIRS, 'data', 'azure.npy') 525 | data = np.load(file_path) 526 | data = torch.tensor(data, dtype=torch.float) 527 | #data = np.expand_dims(data, axis=-1) 528 | 529 | n = data.shape[1] 530 | test_len = 60*6*8 531 | dev_len = 60*6*8 532 | train_len = n - dev_len - test_len 533 | 534 | if t2v_type is None: 535 | feats_date = np.expand_dims(np.arange(0,n), axis=-1) / n * 10. 536 | elif 'mdh' in t2v_type: 537 | raise NotImplementedError 538 | elif 'idx' in t2v_type or 'local' in t2v_type: 539 | feats_date = np.expand_dims(np.arange(0,n), axis=-1) / n * 10. 540 | 541 | feats_date = np.tile(feats_date, (data.shape[0], 1, 1)) 542 | 543 | feats = np.ones((data.shape[0], 1)) * np.expand_dims(np.arange(n), axis=0) % 60 544 | feats = np.expand_dims(feats, axis=-1) 545 | 546 | feats = np.concatenate([feats, feats_date], axis=-1) 547 | feats = torch.tensor(feats, dtype=torch.float) 548 | 549 | data_train = data[:, :train_len] 550 | feats_train = feats[:, :train_len] 551 | 552 | data_dev, data_test = [], [] 553 | feats_dev, feats_test = [], [] 554 | dev_tsid_map, test_tsid_map = {}, {} 555 | for i in range(data.shape[0]): 556 | #for i in range(2, 3): 557 | for j in range(train_len+N_output, train_len+dev_len+1, N_output): 558 | if j <= train_len+dev_len: 559 | data_dev.append(data[i, :j]) 560 | dev_tsid_map[len(data_dev)-1] = i 561 | feats_dev.append(feats[i, :j]) 562 | for i in range(data.shape[0]): 563 | #for i in range(2, 3): 564 | for j in range(train_len+dev_len+N_output, n+1, N_output): 565 | if j <= n: 566 | data_test.append(data[i, :j]) 567 | test_tsid_map[len(data_test)-1] = i 568 | feats_test.append(feats[i, :j]) 569 | 570 | data_train = get_list_of_dict_format(data_train) 571 | data_dev = get_list_of_dict_format(data_dev) 572 | data_test = get_list_of_dict_format(data_test) 573 | 574 | # Add time-features 575 | for i in range(len(data_train)): 576 | data_train[i]['feats'] = feats_train[i] 577 | for i in range(len(data_dev)): 578 | data_dev[i]['feats'] = feats_dev[i] 579 | for i in range(len(data_test)): 580 | data_test[i]['feats'] = feats_test[i] 581 | 582 | feats_info = {0:(60, 32)} 583 | i = len(feats_info) 584 | for j in range(i, i+feats_date[0,0].shape[0]): 585 | feats_info[j] = (-1, -1) 586 | 587 | return ( 588 | data_train, data_dev, data_test, dev_tsid_map, test_tsid_map, 589 | feats_info 590 | ) 591 | 592 | def parse_ett(dataset_name, N_input, N_output, t2v_type=None): 593 | df = pd.read_csv(os.path.join(DATA_DIRS, 'data', 'ETT', 'ETTm1.csv')) 594 | # Remove incomplete data from last day 595 | df = df[:-80] 596 | 597 | 598 | data = df[['OT']].to_numpy().T 599 | #data = np.expand_dims(data, axis=-1) 600 | 601 | n = data.shape[1] 602 | units = n//N_output 603 | dev_len = int(0.2*units) * N_output 604 | test_len = int(0.2*units) * N_output 605 | train_len = n - dev_len - test_len 606 | #train_len = int(0.7*n) 607 | #dev_len = (int(0.2*n)//N_output) * N_output 608 | #test_len = n - train_len - dev_len 609 | 610 | #import ipdb ; ipdb.set_trace() 611 | 612 | feats_cont = np.expand_dims(df[['HUFL','HULL','MUFL','MULL','LUFL','LULL']].to_numpy(), axis=0) 613 | #feats = ((feats - np.mean(feats, axis=0, keepdims=True)) / np.std(feats, axis=0, keepdims=True)) 614 | #feats = np.ones((data.shape[0], 1)) * np.expand_dims(np.arange(n), axis=0) % 60 615 | #feats_discrete = np.abs((np.ones((data.shape[0], 1)) * np.expand_dims(np.arange(n), axis=0) % 60) // 15) 616 | feats_discrete = np.abs((np.ones((data.shape[0], 1)) * np.expand_dims(np.arange(n), axis=0) % 24*4)) 617 | feats_discrete = np.expand_dims(feats_discrete, axis=-1) 618 | 619 | cal_date = pd.to_datetime(df['date']) 620 | #import ipdb ; ipdb.set_trace() 621 | if t2v_type is None: 622 | feats_date = np.expand_dims(np.arange(0,n), axis=-1) / n * 10. 623 | elif 'mdh' in t2v_type: 624 | feats_date = np.stack( 625 | [ 626 | #cal_date.dt.year, 627 | cal_date.dt.month, 628 | cal_date.dt.day, 629 | cal_date.dt.hour, 630 | #cal_date.dt.minute 631 | ], axis=1 632 | ) 633 | elif 'idx' in t2v_type or 'local' in t2v_type: 634 | feats_date = np.expand_dims(np.arange(0,n), axis=-1) / n * 10. 635 | #import ipdb ; ipdb.set_trace() 636 | feats_date = np.expand_dims(feats_date, axis=0) 637 | 638 | #import ipdb ; ipdb.set_trace() 639 | feats_month = np.expand_dims(np.expand_dims(cal_date.dt.month-1, axis=-1), axis=0) 640 | 641 | #feats = np.concatenate([feats_discrete, feats_cont], axis=-1) 642 | #feats = feats_cont 643 | #feats = np.concatenate([feats_cont, feats_date], axis=-1) 644 | feats = np.concatenate([feats_discrete, feats_cont, feats_date], axis=-1) 645 | #feats = np.concatenate([feats_discrete, feats_month, feats_cont, feats_date], axis=-1) 646 | 647 | #data = (data - np.mean(data, axis=0, keepdims=True)).T 648 | 649 | data = torch.tensor(data, dtype=torch.float) 650 | feats = torch.tensor(feats, dtype=torch.float) 651 | 652 | #import ipdb ; ipdb.set_trace() 653 | 654 | data_train = data[:, :train_len] 655 | feats_train = feats[:, :train_len] 656 | 657 | data_dev, data_test = [], [] 658 | feats_dev, feats_test = [], [] 659 | dev_tsid_map, test_tsid_map = {}, {} 660 | for i in range(data.shape[0]): 661 | for j in range(train_len+N_output, train_len+dev_len+1, N_output): 662 | if j <= n: 663 | data_dev.append(data[i, :j]) 664 | feats_dev.append(feats[i, :j]) 665 | dev_tsid_map[len(data_dev)-1] = i 666 | for i in range(data.shape[0]): 667 | for j in range(train_len+dev_len+N_output, n+1, N_output): 668 | if j <= n: 669 | data_test.append(data[i, :j]) 670 | feats_test.append(feats[i, :j]) 671 | test_tsid_map[len(data_test)-1] = i 672 | 673 | 674 | data_train = get_list_of_dict_format(data_train) 675 | data_dev = get_list_of_dict_format(data_dev) 676 | data_test = get_list_of_dict_format(data_test) 677 | 678 | 679 | decompose_type = 'STL' 680 | period=96 681 | for i in range(len(data_train)): 682 | data_train[i]['feats'] = feats_train[i] 683 | for i in range(len(data_dev)): 684 | data_dev[i]['feats'] = feats_dev[i] 685 | for i in range(len(data_test)): 686 | data_test[i]['feats'] = feats_test[i] 687 | 688 | feats_info = {0:(24*4, 64), 1:(0, 1), 2:(0, 1), 3:(0, 1), 4:(0, 1), 5:(0, 1), 6:(0, 1)} 689 | #feats_info = { 690 | # 0:(0, 1), 1:(0, 1), 2:(0, 1), 3:(0, 1), 4:(0, 1), 5:(0, 1), 691 | #} 692 | #feats_info = {0:(24*4, 32), 1:(12, 8), 2:(0, 1), 3:(0, 1), 4:(0, 1), 5:(0, 1), 6:(0, 1), 7:(0, 1)} 693 | i = len(feats_info) 694 | for j in range(i, data_train[0]['feats'].shape[-1]): 695 | feats_info[j] = (-1, -1) 696 | 697 | seq_len = 2*N_input+N_output 698 | data_dev = prune_dev_test_sequence(data_dev, seq_len) 699 | data_test = prune_dev_test_sequence(data_test, seq_len) 700 | 701 | return ( 702 | data_train, data_dev, data_test, dev_tsid_map, test_tsid_map, 703 | feats_info 704 | ) 705 | 706 | 707 | def parse_sin_noisy(dataset_name, N_input, N_output): 708 | 709 | noise_len = 25 710 | 711 | X = np.arange(10000) 712 | 713 | y = np.sin(X * 2*np.pi/50.) 714 | noise_std = np.linspace(0, 1, noise_len) 715 | 716 | #for i in range(0, len(y), noise_len): 717 | # y[i:i+noise_len] += np.random.normal(loc=np.zeros_like(noise_std), scale=noise_std) 718 | 719 | #data = torch.tensor(np.expand_dims(np.expand_dims(y, axis=-1), axis=0), dtype=torch.float) 720 | data = torch.tensor(np.expand_dims(y, axis=0), dtype=torch.float) 721 | n = data.shape[1] 722 | train_len = int(0.6*n) 723 | dev_len = int(0.2*n) 724 | test_len = n - train_len - dev_len 725 | 726 | #feats_cont = np.expand_dims(df[['HUFL','HULL','MUFL','MULL','LUFL','LULL']].to_numpy(), axis=0) 727 | #feats = ((feats - np.mean(feats, axis=0, keepdims=True)) / np.std(feats, axis=0, keepdims=True)) 728 | #feats = np.ones((data.shape[0], 1)) * np.expand_dims(np.arange(n), axis=0) % 60 729 | feats_discrete = np.abs((np.ones((data.shape[0], 1)) * np.expand_dims(np.arange(n), axis=0) % 50)) 730 | feats_discrete = np.expand_dims(feats_discrete, axis=-1) 731 | 732 | #feats = np.concatenate([feats_discrete, feats_cont], axis=-1) 733 | feats = feats_discrete 734 | 735 | feats = torch.tensor(feats, dtype=torch.float) 736 | 737 | #data = (data - np.mean(data, axis=0, keepdims=True)).T 738 | 739 | #data = torch.tensor(np.expand_dims(data, axis=-1), dtype=torch.float) 740 | #feats = torch.tensor(np.expand_dims(feats, axis=0), dtype=torch.float) 741 | 742 | data_train = data[:, :train_len] 743 | feats_train = feats[:, :train_len] 744 | 745 | data_dev, data_test = [], [] 746 | feats_dev, feats_test = [], [] 747 | dev_tsid_map, test_tsid_map = {}, {} 748 | for i in range(data.shape[0]): 749 | for j in range(train_len+N_output, train_len+dev_len+1, N_output): 750 | if j <= n: 751 | data_dev.append(data[i, :j]) 752 | feats_dev.append(feats[i, :j]) 753 | dev_tsid_map[len(data_dev)-1] = i 754 | for i in range(data.shape[0]): 755 | for j in range(train_len+dev_len+N_output, n+1, N_output): 756 | if j <= n: 757 | data_test.append(data[i, :j]) 758 | feats_test.append(feats[i, :j]) 759 | test_tsid_map[len(data_test)-1] = i 760 | 761 | 762 | data_train = get_list_of_dict_format(data_train) 763 | data_dev = get_list_of_dict_format(data_dev) 764 | data_test = get_list_of_dict_format(data_test) 765 | 766 | for i in range(len(data_train)): 767 | data_train[i]['feats'] = feats_train[i] 768 | for i in range(len(data_dev)): 769 | data_dev[i]['feats'] = feats_dev[i] 770 | for i in range(len(data_test)): 771 | data_test[i]['feats'] = feats_test[i] 772 | 773 | decompose_type = 'seasonal' 774 | period = 50 775 | for i in range(len(data_train)): 776 | data_train[i]['feats'] = feats_train[i] 777 | seq = data_train[i]['target'] 778 | data_train[i]['coeffs'] = decompose_seq(seq, decompose_type, period, N_output, True) 779 | print('train:', i, len(data_train)) 780 | for i in range(len(data_dev)): 781 | data_dev[i]['feats'] = feats_dev[i] 782 | #import ipdb ; ipdb.set_trace() 783 | seq = data_dev[i]['target'] 784 | data_dev[i]['coeffs'] = decompose_seq(seq, decompose_type, period, N_output, False) 785 | print('dev:', i, len(data_dev)) 786 | for i in range(len(data_test)): 787 | data_test[i]['feats'] = feats_test[i] 788 | seq = data_test[i]['target'] 789 | data_test[i]['coeffs'] = decompose_seq(seq, decompose_type, period, N_output, False) 790 | print('test:', i, len(data_test)) 791 | 792 | 793 | feats_info = {0:(50, 64)} 794 | coeffs_info = {0:(0, 1), 1:(0, 1), 2:(0, 1)} 795 | 796 | 797 | return ( 798 | data_train, data_dev, data_test, dev_tsid_map, test_tsid_map, 799 | feats_info, coeffs_info 800 | ) 801 | 802 | 803 | def parse_Solar(dataset_name, N_input, N_output, t2v_type=None): 804 | 805 | data, feats = [], [] 806 | with open(os.path.join(DATA_DIRS, 'data', 'solar_nips', 'train', 'train.json')) as f: 807 | for line in f: 808 | line_dict = json.loads(line) 809 | x = line_dict['target'] 810 | data.append(x) 811 | n = len(x) 812 | x_f = np.expand_dims((np.arange(len(x)) % 24), axis=-1) 813 | cal_date = pd.date_range( 814 | start=line_dict['start'], periods=len(line_dict['target']), freq='H' 815 | ).to_series() 816 | if t2v_type is None: 817 | feats_date = np.expand_dims(np.arange(0,n), axis=-1) / n * 10. 818 | x_f = np.concatenate([x_f, feats_date], axis=-1) 819 | feats.append(x_f) 820 | 821 | data_test, feats_test = [], [] 822 | with open(os.path.join(DATA_DIRS, 'data', 'solar_nips', 'test', 'test.json')) as f: 823 | for line in f: 824 | line_dict = json.loads(line) 825 | x = line_dict['target'] 826 | x = np.array(x) 827 | #x = np.expand_dims(x, axis=-1) 828 | data_test.append(torch.tensor(x, dtype=torch.float)) 829 | x_f = np.expand_dims((np.arange(len(x)) % 24), axis=-1) 830 | n = len(x) 831 | cal_date = pd.date_range( 832 | start=line_dict['start'], periods=len(line_dict['target']), freq='H' 833 | ).to_series() 834 | if t2v_type is None: 835 | feats_date = np.expand_dims(np.arange(0,n), axis=-1) / n * 10. 836 | x_f = np.concatenate([x_f, feats_date], axis=-1) 837 | feats_test.append(torch.tensor(x_f, dtype=torch.float)) 838 | 839 | # Select only last rolling window from test data 840 | m = len(data) 841 | data_test, feats_test = data_test[-m:], feats_test[-m:] 842 | 843 | data = np.array(data) 844 | data = torch.tensor(data, dtype=torch.float) 845 | 846 | # Features 847 | feats = torch.tensor(feats, dtype=torch.float)#.unsqueeze(dim=-1) 848 | 849 | n = data.shape[1] 850 | #train_len = int(0.9*n) 851 | #dev_len = int(0.1*n) 852 | dev_len = 24*7 853 | train_len = n - dev_len 854 | #test_len = data_test.shape[1] 855 | 856 | data_train = data[:, :train_len] 857 | feats_train = feats[:, :train_len] 858 | 859 | data_dev = [] 860 | feats_dev = [] 861 | dev_tsid_map= {} 862 | for i in range(data.shape[0]): 863 | for j in range(train_len+N_output, n+1, N_output): 864 | if j <= n: 865 | data_dev.append(data[i, :j]) 866 | feats_dev.append(feats[i, :j]) 867 | dev_tsid_map[len(data_dev)-1] = i 868 | #for i in range(len(data_test)): 869 | # for j in range(n+N_output, n+1, N_output): 870 | # if j <= len(data_test[i]): 871 | # data_test.append(data_test[i, :j]) 872 | # feats_test.append(feats_test[i, :j]) 873 | # test_tsid_map[len(data_test)-1] = i % len(data) 874 | test_tsid_map = {} 875 | for i in range(len(data_test)): 876 | test_tsid_map[i] = i % len(data) 877 | 878 | data_train = get_list_of_dict_format(data_train) 879 | data_dev = get_list_of_dict_format(data_dev) 880 | data_test = get_list_of_dict_format(data_test) 881 | 882 | for i in range(len(data_train)): 883 | data_train[i]['feats'] = feats_train[i] 884 | for i in range(len(data_dev)): 885 | data_dev[i]['feats'] = feats_dev[i] 886 | for i in range(len(data_test)): 887 | data_test[i]['feats'] = feats_test[i] 888 | 889 | feats_info = {0:(24, 16)} 890 | i = len(feats_info) 891 | for j in range(i, data_train[0]['feats'].shape[-1]): 892 | feats_info[j] = (-1, -1) 893 | 894 | #import ipdb;ipdb.set_trace() 895 | # Only consider last (N_input+N_output)-length chunk from dev and test data 896 | seq_len = 2*N_input+N_output 897 | data_dev = prune_dev_test_sequence(data_dev, seq_len) 898 | data_test = prune_dev_test_sequence(data_test, seq_len) 899 | #import ipdb;ipdb.set_trace() 900 | 901 | return ( 902 | data_train, data_dev, data_test, dev_tsid_map, test_tsid_map, 903 | feats_info 904 | ) 905 | 906 | def parse_etthourly(dataset_name, N_input, N_output, t2v_type=None): 907 | 908 | # train_len = 52*168 909 | # dev_len = 17*168 910 | # test_len = 17*168 911 | # n = train_len + dev_len + test_len 912 | # df = pd.read_csv('../Informer2020/data/ETT/ETTh1.csv').iloc[:n] 913 | 914 | df = pd.read_csv(os.path.join(DATA_DIRS, 'data', 'ETT', 'ETTh1.csv')) 915 | # Remove incomplete data from last day 916 | df = df[:-20] 917 | 918 | data = df[['OT']].to_numpy().T 919 | #data = np.expand_dims(data, axis=-1) 920 | 921 | n = data.shape[1] 922 | units = n//N_output 923 | dev_len = int(0.2*units) * N_output 924 | test_len = int(0.2*units) * N_output 925 | train_len = n - dev_len - test_len 926 | 927 | #train_len = int(0.6*n) 928 | #dev_len = int(0.2*n) 929 | #test_len = n - train_len - dev_len 930 | 931 | feats_cont = np.expand_dims(df[['HUFL','HULL','MUFL','MULL','LUFL','LULL']].to_numpy(), axis=0) 932 | 933 | cal_date = pd.to_datetime(df['date']) 934 | if t2v_type is None: 935 | feats_date = np.expand_dims(np.arange(0,n), axis=-1) / n * 10. 936 | elif 'mdh' in t2v_type: 937 | feats_date = np.stack( 938 | [ 939 | cal_date.dt.month, 940 | cal_date.dt.day, 941 | cal_date.dt.hour, 942 | ], axis=1 943 | ) 944 | elif 'idx' in t2v_type or 'local' in t2v_type: 945 | feats_date = np.expand_dims(np.arange(0,n), axis=-1) / n * 10. 946 | feats_date = np.expand_dims(feats_date, axis=0) 947 | 948 | feats_hod = np.expand_dims(np.expand_dims(cal_date.dt.hour.values, axis=-1), axis=0) 949 | 950 | #import ipdb ; ipdb.set_trace() 951 | 952 | #feats = np.concatenate([feats_discrete, feats_cont], axis=-1) 953 | #feats = feats_discrete 954 | feats = np.concatenate([feats_hod, feats_cont, feats_date], axis=-1) 955 | 956 | #data = (data - np.mean(data, axis=0, keepdims=True)).T 957 | 958 | data = torch.tensor(data, dtype=torch.float) 959 | feats = torch.tensor(feats, dtype=torch.float) 960 | 961 | data_train = data[:, :train_len] 962 | feats_train = feats[:, :train_len] 963 | 964 | data_dev, data_test = [], [] 965 | feats_dev, feats_test = [], [] 966 | dev_tsid_map, test_tsid_map = [], [] 967 | for i in range(data.shape[0]): 968 | for j in range(train_len+N_output, train_len+dev_len+1, N_output): 969 | if j <= n: 970 | data_dev.append(data[i, :j]) 971 | feats_dev.append(feats[i, :j]) 972 | dev_tsid_map.append(i) 973 | for i in range(data.shape[0]): 974 | for j in range(train_len+dev_len+N_output, n+1, N_output): 975 | if j <= n: 976 | print(i,j,n) 977 | data_test.append(data[i, :j]) 978 | feats_test.append(feats[i, :j]) 979 | test_tsid_map.append(i) 980 | 981 | 982 | data_train = get_list_of_dict_format(data_train) 983 | data_dev = get_list_of_dict_format(data_dev) 984 | data_test = get_list_of_dict_format(data_test) 985 | 986 | 987 | decompose_type = 'STL' 988 | for i in range(len(data_train)): 989 | data_train[i]['feats'] = feats_train[i] 990 | for i in range(len(data_dev)): 991 | data_dev[i]['feats'] = feats_dev[i] 992 | for i in range(len(data_test)): 993 | data_test[i]['feats'] = feats_test[i] 994 | 995 | feats_info = {0:(24, 16), 1:(0, 1), 2:(0, 1), 3:(0, 1), 4:(0, 1), 5:(0, 1), 6:(0, 1)} 996 | #feats_info = {0:(24, 1)} 997 | #feats_info = {0:(0, 1)} 998 | i = len(feats_info) 999 | for j in range(i, data_train[0]['feats'].shape[-1]): 1000 | feats_info[j] = (-1, -1) 1001 | 1002 | seq_len = 2*N_input+N_output 1003 | data_dev = prune_dev_test_sequence(data_dev, seq_len) 1004 | data_test = prune_dev_test_sequence(data_test, seq_len) 1005 | #import ipdb ; ipdb.set_trace() 1006 | 1007 | return ( 1008 | data_train, data_dev, data_test, dev_tsid_map, test_tsid_map, 1009 | feats_info 1010 | ) 1011 | 1012 | 1013 | def parse_m4hourly(dataset_name, N_input, N_output): 1014 | hourly_train = pd.read_csv( 1015 | os.path.join(DATA_DIRS, '..', 'M4-methods', 'Dataset', 'Train', 'Hourly-train.csv')) 1016 | hourly_test = pd.read_csv( 1017 | os.path.join(DATA_DIRS, '..', 'M4-methods', 'Dataset', 'Test', 'Hourly-test.csv')) 1018 | m4_info = pd.read_csv(os.path.join(DATA_DIRS, '..', 'M4-methods', 'Dataset', 'M4-info.csv')) 1019 | 1020 | lens = [] 1021 | ht_np = hourly_train.values[:, 1:] 1022 | M, N = ht_np.shape 1023 | for i in range(M): 1024 | series = ht_np[i] 1025 | l = N - np.isnan(series.astype(float)).sum() 1026 | lens.append(l) 1027 | 1028 | hourly = [] 1029 | for (l, i, j) in zip(lens, hourly_train.values[:, 1:].astype(float), hourly_test.values[:, 1:].astype(float)): 1030 | hourly.append(np.concatenate([i[:l], j])) 1031 | 1032 | hourly_train_merged = pd.merge(hourly_train, m4_info, left_on='V1', right_on='M4id', how='left') 1033 | starting_dates = hourly_train_merged['StartingDate'] 1034 | starting_hours = pd.to_datetime(starting_dates).dt.hour.values 1035 | hod = [] 1036 | for i, series in enumerate(hourly): 1037 | hod_s = np.expand_dims((starting_hours[i] + np.arange(len(series))) % 24, axis=-1) 1038 | hod.append(hod_s) 1039 | #hod = (np.expand_dims(starting_hours, axis=1) + np.cumsum(np.ones(hourly_train.shape), axis=1) - 1.) % 24 1040 | 1041 | data = hourly 1042 | feats = hod 1043 | 1044 | data_train, data_dev, data_test = [], [], [] 1045 | feats_train, feats_dev, feats_test = [], [], [] 1046 | dev_tsid_map, test_tsid_map = [], [] 1047 | 1048 | dev_len = N_output 1049 | test_len = N_output 1050 | 1051 | for i in range(len(data)): 1052 | n = len(data[i]) 1053 | train_len = n - dev_len - test_len 1054 | data_train.append(data[i][:train_len]) 1055 | feats_train.append(feats[i][:train_len]) 1056 | 1057 | for j in range(train_len+N_output, train_len+dev_len+1, N_output): 1058 | if j <= n: 1059 | data_dev.append(data[i][:j]) 1060 | feats_dev.append(feats[i][:j]) 1061 | dev_tsid_map.append(i) 1062 | 1063 | for j in range(train_len+dev_len+N_output, n+1, N_output): 1064 | if j <= n: 1065 | data_test.append(data[i][:j]) 1066 | feats_test.append(feats[i][:j]) 1067 | test_tsid_map.append(i) 1068 | 1069 | 1070 | data_train = get_list_of_dict_format(data_train) 1071 | data_dev = get_list_of_dict_format(data_dev) 1072 | data_test = get_list_of_dict_format(data_test) 1073 | 1074 | 1075 | for i in range(len(data_train)): 1076 | data_train[i]['target'] = torch.tensor(data_train[i]['target']) 1077 | data_train[i]['feats'] = torch.tensor(feats_train[i]) 1078 | seq = data_train[i]['target'] 1079 | components = seasonal_decompose( 1080 | seq, model='additive', period=24, extrapolate_trend=True 1081 | ) 1082 | coeffs = torch.tensor( 1083 | [components.trend, components.seasonal, components.resid] 1084 | ).transpose(0,1) 1085 | coeffs = (coeffs - coeffs.mean(dim=-1, keepdims=True)) / coeffs.std(dim=-1, keepdims=True) 1086 | data_train[i]['coeffs'] = coeffs 1087 | for i in range(len(data_dev)): 1088 | data_dev[i]['target'] = torch.tensor(data_dev[i]['target']) 1089 | data_dev[i]['feats'] = torch.tensor(feats_dev[i]) 1090 | seq = data_dev[i]['target'] 1091 | components = seasonal_decompose( 1092 | seq, model='additive', period=24, extrapolate_trend=True 1093 | ) 1094 | coeffs = torch.tensor( 1095 | [components.trend, components.seasonal, components.resid] 1096 | ).transpose(0,1) 1097 | coeffs = (coeffs - coeffs.mean(dim=-1, keepdims=True)) / coeffs.std(dim=-1, keepdims=True) 1098 | data_dev[i]['coeffs'] = coeffs 1099 | for i in range(len(data_test)): 1100 | data_test[i]['target'] = torch.tensor(data_test[i]['target']) 1101 | data_test[i]['feats'] = torch.tensor(feats_test[i]) 1102 | seq = data_test[i]['target'] 1103 | components = seasonal_decompose( 1104 | seq, model='additive', period=24, extrapolate_trend=True 1105 | ) 1106 | coeffs = torch.tensor( 1107 | [components.trend, components.seasonal, components.resid] 1108 | ).transpose(0,1) 1109 | coeffs = (coeffs - coeffs.mean(dim=-1, keepdims=True)) / coeffs.std(dim=-1, keepdims=True) 1110 | data_test[i]['coeffs'] = coeffs 1111 | 1112 | feats_info = {0:(24, 16)}#, 1:(0, 1), 2:(0, 1), 3:(0, 1), 4:(0, 1), 5:(0, 1), 6:(0, 1)} 1113 | coeffs_info = {0:(0, 1), 1:(0, 1), 2:(0, 1)} 1114 | 1115 | return ( 1116 | data_train, data_dev, data_test, dev_tsid_map, test_tsid_map, 1117 | feats_info, coeffs_info 1118 | ) 1119 | 1120 | 1121 | def parse_m4daily(dataset_name, N_input, N_output): 1122 | daily_train = pd.read_csv( 1123 | os.path.join(DATA_DIRS, '..', 'M4-methods', 'Dataset', 'Train', 'Daily-train.csv')) 1124 | daily_test = pd.read_csv( 1125 | os.path.join(DATA_DIRS, '..', 'M4-methods', 'Dataset', 'Test', 'Daily-test.csv')) 1126 | m4_info = pd.read_csv(os.path.join(DATA_DIRS, '..', 'M4-methods', 'Dataset', 'M4-info.csv')) 1127 | 1128 | daily_train_merged = pd.merge(daily_train, m4_info, left_on='V1', right_on='M4id', how='left') 1129 | categories = daily_train_merged['category'] 1130 | indices = categories == 'Industry' 1131 | #indices = categories == 'Macro' 1132 | #indices = categories == 'Finance' 1133 | daily_train = daily_train.loc[indices] 1134 | daily_test = daily_test.loc[indices] 1135 | 1136 | lens = [] 1137 | dt_np = daily_train.values[:, 1:] 1138 | M, N = dt_np.shape 1139 | for i in range(M): 1140 | series = dt_np[i] 1141 | l = N - np.isnan(series.astype(float)).sum() 1142 | lens.append(l) 1143 | 1144 | daily = [] 1145 | for (l, i, j) in zip(lens, daily_train.values[:, 1:].astype(float), daily_test.values[:, 1:].astype(float)): 1146 | daily.append(np.concatenate([i[:l], j])) 1147 | 1148 | starting_dates = daily_train_merged['StartingDate'] 1149 | starting_doy = pd.to_datetime(starting_dates).dt.dayofweek 1150 | doy = [] 1151 | for i, series in enumerate(daily): 1152 | doy_s = np.expand_dims((starting_doy[i] + np.arange(len(series))) % 7, axis=-1) 1153 | doy.append(doy_s) 1154 | 1155 | data = daily #[401:402] 1156 | feats = doy #[401:402] 1157 | 1158 | data_train, data_dev, data_test = [], [], [] 1159 | feats_train, feats_dev, feats_test = [], [], [] 1160 | dev_tsid_map, test_tsid_map = [], [] 1161 | 1162 | dev_len = N_output*3 1163 | test_len = N_output 1164 | 1165 | for i in range(len(data)): 1166 | n = len(data[i]) 1167 | train_len = n - dev_len - test_len 1168 | data_train.append(data[i][:train_len]) 1169 | feats_train.append(feats[i][:train_len]) 1170 | 1171 | for j in range(train_len+N_output, train_len+dev_len+1, N_output): 1172 | if j <= n: 1173 | data_dev.append(data[i][:j]) 1174 | feats_dev.append(feats[i][:j]) 1175 | dev_tsid_map.append(i) 1176 | 1177 | for j in range(train_len+dev_len+N_output, n+1, N_output): 1178 | if j <= n: 1179 | data_test.append(data[i][:j]) 1180 | feats_test.append(feats[i][:j]) 1181 | test_tsid_map.append(i) 1182 | 1183 | 1184 | data_train = get_list_of_dict_format(data_train) 1185 | data_dev = get_list_of_dict_format(data_dev) 1186 | data_test = get_list_of_dict_format(data_test) 1187 | 1188 | 1189 | decompose_type = 'STL' 1190 | period = 90 1191 | for i in range(len(data_train)): 1192 | data_train[i]['target'] = torch.tensor(data_train[i]['target']) 1193 | data_train[i]['feats'] = feats_train[i] 1194 | seq = data_train[i]['target'] 1195 | data_train[i]['coeffs'] = decompose_seq(seq, decompose_type, period, N_output, True) 1196 | print('train:', i, len(data_train)) 1197 | for i in range(len(data_dev)): 1198 | data_dev[i]['target'] = torch.tensor(data_dev[i]['target']) 1199 | data_dev[i]['feats'] = feats_dev[i] 1200 | #import ipdb ; ipdb.set_trace() 1201 | seq = data_dev[i]['target'] 1202 | data_dev[i]['coeffs'] = decompose_seq(seq, decompose_type, period, len(data_train[dev_tsid_map[i]]), False) 1203 | print('dev:', i, len(data_dev)) 1204 | for i in range(len(data_test)): 1205 | data_test[i]['target'] = torch.tensor(data_test[i]['target']) 1206 | data_test[i]['feats'] = feats_test[i] 1207 | seq = data_test[i]['target'] 1208 | data_test[i]['coeffs'] = decompose_seq(seq, decompose_type, period, len(data_train[test_tsid_map[i]]), False) 1209 | print('test:', i, len(data_test)) 1210 | 1211 | #import ipdb 1212 | #ipdb.set_trace() 1213 | 1214 | feats_info = {0:(7, 16)}#, 1:(0, 1), 2:(0, 1), 3:(0, 1), 4:(0, 1), 5:(0, 1), 6:(0, 1)} 1215 | coeffs_info = {0:(0, 1), 1:(0, 1), 2:(0, 1)} 1216 | 1217 | return ( 1218 | data_train, data_dev, data_test, dev_tsid_map, test_tsid_map, 1219 | feats_info, coeffs_info 1220 | ) 1221 | 1222 | 1223 | def parse_taxi30min(dataset_name, N_input, N_output, t2v_type=None): 1224 | 1225 | num_rolling_windows = 1 1226 | num_val_rolling_windows = 2 1227 | dataset_dir = 'taxi_30min' 1228 | 1229 | data, feats = [], [] 1230 | with open(os.path.join(DATA_DIRS, 'data', dataset_dir, 'train', 'train.json')) as f: 1231 | for line in f: 1232 | line_dict = json.loads(line) 1233 | x = line_dict['target'] 1234 | data.append(x) 1235 | n = len(x) 1236 | x_f = np.expand_dims(np.arange(len(x)) % 48, axis=-1) 1237 | cal_date = pd.date_range( 1238 | start=line_dict['start'], periods=len(line_dict['target']), freq='H' 1239 | ).to_series() 1240 | if t2v_type is None: 1241 | feats_date = np.expand_dims(np.arange(0,n), axis=-1) / n * 10. 1242 | elif 'mdh' in t2v_type: 1243 | feats_date = np.stack( 1244 | [ 1245 | cal_date.dt.month, 1246 | cal_date.dt.day, 1247 | cal_date.dt.hour 1248 | ], axis=1 1249 | ) 1250 | elif 'idx' in t2v_type or 'local' in t2v_type: 1251 | feats_date = np.expand_dims(np.arange(0,n), axis=-1) / n * 10. 1252 | x_f = np.concatenate([x_f, feats_date], axis=-1) 1253 | feats.append(x_f) 1254 | 1255 | data_test, feats_test = [], [] 1256 | with open(os.path.join(DATA_DIRS, 'data', dataset_dir, 'test', 'test.json')) as f: 1257 | for line in f: 1258 | line_dict = json.loads(line) 1259 | x = line_dict['target'] 1260 | x = np.array(x) 1261 | data_test.append(torch.tensor(x, dtype=torch.float)) 1262 | x_f = np.expand_dims((np.cumsum(np.ones_like(x)) % 48), axis=-1) 1263 | n = len(x) 1264 | cal_date = pd.date_range( 1265 | start=line_dict['start'], periods=len(line_dict['target']), freq='H' 1266 | ).to_series() 1267 | if t2v_type is None: 1268 | feats_date = np.expand_dims(np.arange(0,n), axis=-1) / n * 10. 1269 | elif 'mdh' in t2v_type: 1270 | feats_date = np.stack( 1271 | [ 1272 | cal_date.dt.month, 1273 | cal_date.dt.day, 1274 | cal_date.dt.hour 1275 | ], axis=1 1276 | ) 1277 | elif 'idx' in t2v_type or 'local' in t2v_type: 1278 | feats_date = np.expand_dims(np.arange(0,n), axis=-1) / n * 10. 1279 | x_f = np.concatenate([x_f, feats_date], axis=-1) 1280 | feats_test.append(torch.tensor(x_f, dtype=torch.float)) 1281 | 1282 | #num_ts = 1214 * num_rolling_windows 1283 | num_ts = 1214 1284 | #import ipdb ; ipdb.set_trace() 1285 | data_test = data_test[ -num_ts : ] 1286 | feats_test = feats_test[ -num_ts : ] 1287 | data_test_rw, feats_test_rw = [], [] 1288 | j = num_rolling_windows - 1 1289 | exclude = j * N_output 1290 | while exclude>=0: 1291 | #print(j, exclude) 1292 | for i in range(len(data_test)): 1293 | if exclude>0: 1294 | data_test_rw.append(data_test[i][:-exclude]) 1295 | feats_test_rw.append(feats_test[i][:-exclude]) 1296 | else: 1297 | data_test_rw.append(data_test[i]) 1298 | feats_test_rw.append(feats_test[i]) 1299 | j-=1 1300 | exclude = j * N_output 1301 | data_test, feats_test = data_test_rw, feats_test_rw 1302 | 1303 | #for i in range(len(data_test)): 1304 | # assert data[i % len(data)]['lat'] == data_test[i]['lat'] 1305 | # assert data[i % len(data)]['lng'] == data_test[i]['lng'] 1306 | 1307 | metadata = json.load(open(os.path.join(DATA_DIRS, 'data', dataset_dir, 'metadata', 'metadata.json'))) 1308 | 1309 | data = np.array(data) 1310 | data = torch.tensor(data, dtype=torch.float) 1311 | feats = torch.tensor(feats, dtype=torch.float) 1312 | 1313 | n = data.shape[1] 1314 | dev_len = N_output*num_val_rolling_windows 1315 | train_len = n - dev_len 1316 | data_train = data[:, :train_len] 1317 | feats_train = feats[:, :train_len] 1318 | 1319 | data_dev, feats_dev = [], [] 1320 | dev_tsid_map = {} 1321 | for i in range(data.shape[0]): 1322 | for j in range(train_len+N_output, n+1, N_output): 1323 | if j <= n: 1324 | data_dev.append(data[i, :j]) 1325 | feats_dev.append(feats[i, :j]) 1326 | dev_tsid_map[len(data_dev)-1] = i 1327 | 1328 | test_tsid_map = {} 1329 | for i, entry in enumerate(data_test, 0): 1330 | test_tsid_map[i] = i%len(data) # Multiple test instances per train series. 1331 | 1332 | data_train = get_list_of_dict_format(data_train) 1333 | data_dev = get_list_of_dict_format(data_dev) 1334 | data_test = get_list_of_dict_format(data_test) 1335 | 1336 | for i in range(len(data_train)): 1337 | data_train[i]['feats'] = feats_train[i] 1338 | for i in range(len(data_dev)): 1339 | data_dev[i]['feats'] = feats_dev[i] 1340 | for i in range(len(data_test)): 1341 | data_test[i]['feats'] = feats_test[i] 1342 | 1343 | feats_info = { 1344 | 0:(48, 16), 1345 | } 1346 | i = len(feats_info) 1347 | for j in range(i, i+data_train[0]['feats'].shape[-1]): 1348 | feats_info[j] = (-1, -1) 1349 | 1350 | #import ipdb ; ipdb.set_trace() 1351 | 1352 | return ( 1353 | data_train, data_dev, data_test, dev_tsid_map, test_tsid_map, feats_info 1354 | ) 1355 | 1356 | 1357 | def parse_Traffic911(N_input, N_output): 1358 | call_df = pd.read_csv(os.path.join(DATA_DIRS, 'data', '911.csv')) 1359 | call_df = call_df[call_df['zip'].isnull()==False] # Ignore calls with NaN zip codes 1360 | # print('Types of Emergencies') 1361 | # print(call_df.title.apply(lambda x: x.split(':')[0]).value_counts()) 1362 | call_df['type'] = call_df.title.apply(lambda x: x.split(':')[0]) 1363 | # print('Subtypes') 1364 | # for each in call_df.type.unique(): 1365 | # subtype_count = call_df[call_df.title.apply(lambda x: x.split(':')[0]==each)].title.value_counts() 1366 | # print('For', each, 'type of Emergency, we have ', subtype_count.count(), 'subtypes') 1367 | # print(subtype_count[subtype_count>100]) 1368 | # print('Out of 3 types, considering only Traffic') 1369 | call_data = call_df[call_df['type']=='Traffic'] 1370 | call_data['timeStamp'] = pd.to_datetime(call_data['timeStamp'], errors='coerce') 1371 | print("We have timeline from", call_data['timeStamp'].min(), "to", call_data['timeStamp'].max()) 1372 | call_data = call_data.sort_values('timeStamp') 1373 | call_data['timeStamp'] = pd.DatetimeIndex(call_data['timeStamp']).astype(np.int64)/1000000000 1374 | 1375 | num_hrs = int( 1376 | np.ceil( 1377 | (call_data['timeStamp'].values[-1] - call_data['timeStamp'].values[0])/(3600.) 1378 | ) 1379 | ) 1380 | timestamps = call_data['timeStamp'].values 1381 | timestamps = timestamps - timestamps[0] 1382 | counts = create_bins(timestamps, bin_size=3600., num_bins=num_hrs) 1383 | data = np.expand_dims(np.array(counts), axis=0) 1384 | #data = np.expand_dims(data, axis=2) 1385 | data = torch.tensor(data, dtype=torch.float) 1386 | 1387 | n = data.shape[1] 1388 | train_len = int(0.7*n) 1389 | dev_len = int(0.1*n) 1390 | test_len = n - train_len - dev_len 1391 | 1392 | feats = np.abs((np.ones((data.shape[0], 1)) * np.expand_dims(np.arange(n), axis=0) % 24)) 1393 | feats = np.expand_dims(feats, axis=-1) 1394 | 1395 | feats = torch.tensor(feats, dtype=torch.float) 1396 | 1397 | data_train = data[:, :train_len] 1398 | feats_train = feats[:, :train_len] 1399 | 1400 | data_dev, data_test = [], [] 1401 | feats_dev, feats_test = [], [] 1402 | dev_tsid_map, test_tsid_map = {}, {} 1403 | for i in range(data.shape[0]): 1404 | for j in range(train_len+N_output, train_len+dev_len+1, N_output): 1405 | if j <= n: 1406 | data_dev.append(data[i, :j]) 1407 | feats_dev.append(feats[i, :j]) 1408 | dev_tsid_map[len(data_dev)-1] = i 1409 | for i in range(data.shape[0]): 1410 | for j in range(train_len+dev_len+N_output, n+1, N_output): 1411 | if j <= n: 1412 | data_test.append(data[i, :j]) 1413 | feats_test.append(feats[i, :j]) 1414 | test_tsid_map[len(data_test)-1] = i 1415 | 1416 | 1417 | data_train = get_list_of_dict_format(data_train) 1418 | data_dev = get_list_of_dict_format(data_dev) 1419 | data_test = get_list_of_dict_format(data_test) 1420 | 1421 | 1422 | decompose_type = 'STL' 1423 | period=96 1424 | for i in range(len(data_train)): 1425 | data_train[i]['feats'] = feats_train[i] 1426 | seq = data_train[i]['target'] 1427 | #data_train[i]['coeffs'] = decompose_seq(seq, decompose_type, period, N_output, True) 1428 | data_train[i]['coeffs'] = torch.zeros((len(seq), 1), dtype=torch.float) 1429 | #print('train:', i, len(data_train)) 1430 | for i in range(len(data_dev)): 1431 | data_dev[i]['feats'] = feats_dev[i] 1432 | #import ipdb ; ipdb.set_trace() 1433 | seq = data_dev[i]['target'] 1434 | #data_dev[i]['coeffs'] = decompose_seq(seq, decompose_type, period, N_output, False) 1435 | data_dev[i]['coeffs'] = torch.zeros((len(seq), 1), dtype=torch.float) 1436 | #print('dev:', i, len(data_dev)) 1437 | for i in range(len(data_test)): 1438 | data_test[i]['feats'] = feats_test[i] 1439 | seq = data_test[i]['target'] 1440 | #data_test[i]['coeffs'] = decompose_seq(seq, decompose_type, period, N_output, False) 1441 | data_test[i]['coeffs'] = torch.zeros((len(seq), 1), dtype=torch.float) 1442 | #print('test:', i, len(data_test)) 1443 | 1444 | 1445 | feats_info = {0:(24, 16)} 1446 | coeffs_info = {0:(0, 1)} 1447 | 1448 | return ( 1449 | data_train, data_dev, data_test, dev_tsid_map, test_tsid_map, 1450 | feats_info, coeffs_info 1451 | ) 1452 | 1453 | def parse_aggtest(dataset_name, N_input, N_output, t2v_type=None): 1454 | n = 500 1455 | data = np.expand_dims(np.arange(n), axis=0) 1456 | units = n//N_output 1457 | dev_len = int(0.2*units) * N_output 1458 | test_len = int(0.2*units) * N_output 1459 | train_len = n - dev_len - test_len 1460 | 1461 | cal_date = pd.date_range( 1462 | start='2015-01-01 00:00:00', periods=n, freq='H' 1463 | ).to_series() 1464 | if t2v_type is None: 1465 | feats_date = np.expand_dims(np.arange(0,n), axis=-1) / n * 10. 1466 | elif 'mdh' in t2v_type: 1467 | feats_date = np.stack( 1468 | [ 1469 | cal_date.dt.month, 1470 | cal_date.dt.day, 1471 | cal_date.dt.hour, 1472 | ], axis=1 1473 | ) 1474 | elif 'idx' in t2v_type or 'local' in t2v_type: 1475 | feats_date = np.expand_dims(np.arange(0,n), axis=-1) / n * 10. 1476 | feats_date = np.expand_dims(feats_date, axis=0) 1477 | 1478 | feats_hod = np.expand_dims(np.expand_dims(cal_date.dt.hour.values, axis=-1), axis=0) 1479 | feats = np.concatenate([feats_hod, feats_date], axis=-1) 1480 | 1481 | data = torch.tensor(data, dtype=torch.float) 1482 | feats = torch.tensor(feats, dtype=torch.float) 1483 | 1484 | data_train = data[:, :train_len] 1485 | feats_train = feats[:, :train_len] 1486 | 1487 | data_dev, data_test = [], [] 1488 | feats_dev, feats_test = [], [] 1489 | dev_tsid_map, test_tsid_map = [], [] 1490 | for i in range(data.shape[0]): 1491 | for j in range(train_len+N_output, train_len+dev_len+1, N_output): 1492 | if j <= n: 1493 | data_dev.append(data[i, :j]) 1494 | feats_dev.append(feats[i, :j]) 1495 | dev_tsid_map.append(i) 1496 | for i in range(data.shape[0]): 1497 | for j in range(train_len+dev_len+N_output, n+1, N_output): 1498 | if j <= n: 1499 | data_test.append(data[i, :j]) 1500 | feats_test.append(feats[i, :j]) 1501 | test_tsid_map.append(i) 1502 | 1503 | data_train = get_list_of_dict_format(data_train) 1504 | data_dev = get_list_of_dict_format(data_dev) 1505 | data_test = get_list_of_dict_format(data_test) 1506 | 1507 | for i in range(len(data_train)): 1508 | data_train[i]['feats'] = feats_train[i] 1509 | for i in range(len(data_dev)): 1510 | data_dev[i]['feats'] = feats_dev[i] 1511 | for i in range(len(data_test)): 1512 | data_test[i]['feats'] = feats_test[i] 1513 | 1514 | feats_info = {0:(24, 16)} 1515 | i = len(feats_info) 1516 | for j in range(i, data_train[0]['feats'].shape[-1]): 1517 | feats_info[j] = (-1, -1) 1518 | 1519 | return ( 1520 | data_train, data_dev, data_test, dev_tsid_map, test_tsid_map, 1521 | feats_info 1522 | ) 1523 | 1524 | def parse_electricity(dataset_name, N_input, N_output, t2v_type=None): 1525 | #df = pd.read_csv('data/electricity_load_forecasting_panama/continuous_dataset.csv') 1526 | df = pd.read_csv( 1527 | os.path.join(DATA_DIRS, 'data', 'electricity_load_forecasting_panama', 'continuous_dataset.csv') 1528 | ) 1529 | data = df[['nat_demand']].to_numpy().T 1530 | 1531 | #n = data.shape[1] 1532 | n = (1903 + 1) * 24 # Select first n=1904*24 entries because of non-stationarity in the data after first n values 1533 | data = data[:, :n] 1534 | df = df.iloc[:n] 1535 | 1536 | 1537 | units = n//N_output 1538 | dev_len = int(0.2*units) * N_output 1539 | test_len = int(0.2*units) * N_output 1540 | train_len = n - dev_len - test_len 1541 | 1542 | #import ipdb ; ipdb.set_trace() 1543 | 1544 | cal_date = pd.to_datetime(df['datetime']) 1545 | if t2v_type is None: 1546 | feats_date = np.expand_dims(np.arange(0,n), axis=-1) / n * 10. 1547 | elif 'mdh' in t2v_type: 1548 | feats_date = np.stack( 1549 | [ 1550 | cal_date.dt.month, 1551 | cal_date.dt.day, 1552 | cal_date.dt.hour, 1553 | ], axis=1 1554 | ) 1555 | elif 'idx' in t2v_type or 'local' in t2v_type: 1556 | feats_date = np.expand_dims(np.arange(0,n), axis=-1) / n * 10. 1557 | feats_date = np.expand_dims(feats_date, axis=0) 1558 | 1559 | feats_hod = np.expand_dims(np.expand_dims(cal_date.dt.hour.values, axis=-1), axis=0) 1560 | 1561 | #import ipdb ; ipdb.set_trace() 1562 | 1563 | feats = np.concatenate([feats_hod, feats_date], axis=-1) 1564 | 1565 | data = torch.tensor(data, dtype=torch.float) 1566 | feats = torch.tensor(feats, dtype=torch.float) 1567 | 1568 | data_train = data[:, :train_len] 1569 | feats_train = feats[:, :train_len] 1570 | 1571 | data_dev, data_test = [], [] 1572 | feats_dev, feats_test = [], [] 1573 | dev_tsid_map, test_tsid_map = [], [] 1574 | for i in range(data.shape[0]): 1575 | for j in range(train_len+N_output, train_len+dev_len+1, N_output): 1576 | if j <= n: 1577 | data_dev.append(data[i, :j]) 1578 | feats_dev.append(feats[i, :j]) 1579 | dev_tsid_map.append(i) 1580 | for i in range(data.shape[0]): 1581 | for j in range(train_len+dev_len+N_output, n+1, N_output): 1582 | if j <= n: 1583 | data_test.append(data[i, :j]) 1584 | feats_test.append(feats[i, :j]) 1585 | test_tsid_map.append(i) 1586 | 1587 | data_train = get_list_of_dict_format(data_train) 1588 | data_dev = get_list_of_dict_format(data_dev) 1589 | data_test = get_list_of_dict_format(data_test) 1590 | 1591 | for i in range(len(data_train)): 1592 | data_train[i]['feats'] = feats_train[i] 1593 | for i in range(len(data_dev)): 1594 | data_dev[i]['feats'] = feats_dev[i] 1595 | for i in range(len(data_test)): 1596 | data_test[i]['feats'] = feats_test[i] 1597 | 1598 | feats_info = {0:(24, 16)} 1599 | i = len(feats_info) 1600 | for j in range(i, data_train[0]['feats'].shape[-1]): 1601 | feats_info[j] = (-1, -1) 1602 | 1603 | seq_len = 2*N_input+N_output 1604 | data_dev = prune_dev_test_sequence(data_dev, seq_len) 1605 | data_test = prune_dev_test_sequence(data_test, seq_len) 1606 | 1607 | return ( 1608 | data_train, data_dev, data_test, dev_tsid_map, test_tsid_map, feats_info 1609 | ) 1610 | 1611 | def parse_foodinflation(dataset_name, N_input, N_output, t2v_type=None): 1612 | #df = pd.read_csv('data/electricity_load_forecasting_panama/continuous_dataset.csv') 1613 | df = pd.read_csv( 1614 | os.path.join(DATA_DIRS, 'data', 'foodinflation', 'train_data.csv') 1615 | ) 1616 | #df['date'] = pd.to_datetime(df['date']) 1617 | data = df[df.columns[1:]].to_numpy().T 1618 | 1619 | m, n = data.shape[0], data.shape[1] 1620 | 1621 | #units = n//N_output 1622 | #dev_len = int(0.2*units) * N_output 1623 | #test_len = int(0.2*units) * N_output 1624 | test_len = 30*3 1625 | dev_len = 30*12 1626 | train_len = n - dev_len - test_len 1627 | 1628 | #import ipdb ; ipdb.set_trace() 1629 | 1630 | cal_date = pd.to_datetime(df['date']) 1631 | if t2v_type is None: 1632 | feats_date = np.expand_dims(np.arange(0,n), axis=-1) / n * 10. 1633 | elif 'mdh' in t2v_type: 1634 | feats_date = np.stack( 1635 | [ 1636 | cal_date.dt.month, 1637 | cal_date.dt.day, 1638 | cal_date.dt.hour, 1639 | ], axis=1 1640 | ) 1641 | elif 'idx' in t2v_type or 'local' in t2v_type: 1642 | feats_date = np.expand_dims(np.arange(0,n), axis=-1) / n * 10. 1643 | feats_date = np.tile(np.expand_dims(feats_date, axis=0), (m, 1, 1)) 1644 | 1645 | feats_day = np.expand_dims(np.expand_dims(cal_date.dt.day.values-1, axis=-1), axis=0) 1646 | feats_day = np.tile(feats_day, (m, 1, 1)) 1647 | feats_month = np.expand_dims(np.expand_dims(cal_date.dt.month.values-1, axis=-1), axis=0) 1648 | feats_month = np.tile(feats_month, (m, 1, 1)) 1649 | feats_dow = np.expand_dims(np.expand_dims(cal_date.dt.dayofweek.values, axis=-1), axis=0) 1650 | feats_dow = np.tile(feats_dow, (m, 1, 1)) 1651 | 1652 | feats_tsid = np.expand_dims(np.expand_dims(np.arange(m), axis=1), axis=2) 1653 | feats_tsid = np.tile(feats_tsid, (1, n, 1)) 1654 | 1655 | #import ipdb ; ipdb.set_trace() 1656 | 1657 | #feats = np.concatenate([feats_day, feats_month, feats_dow, feats_date], axis=-1) 1658 | #feats = np.concatenate([feats_day, feats_dow, feats_tsid, feats_date], axis=-1) 1659 | feats = np.concatenate([feats_day, feats_dow, feats_date], axis=-1) 1660 | #feats = np.concatenate([feats_day, feats_dow, feats_tsid, feats_date], axis=-1) 1661 | 1662 | 1663 | data = torch.tensor(data, dtype=torch.float) 1664 | feats = torch.tensor(feats, dtype=torch.float) 1665 | 1666 | data_train = data[:, :train_len] 1667 | feats_train = feats[:, :train_len] 1668 | 1669 | #import ipdb ; ipdb.set_trace() 1670 | 1671 | data_dev, data_test = [], [] 1672 | feats_dev, feats_test = [], [] 1673 | dev_tsid_map, test_tsid_map = [], [] 1674 | for i in range(data.shape[0]): 1675 | for j in range(train_len+N_output, train_len+dev_len+1, N_output): 1676 | if j <= n: 1677 | data_dev.append(data[i, :j]) 1678 | feats_dev.append(feats[i, :j]) 1679 | dev_tsid_map.append(i) 1680 | for i in range(data.shape[0]): 1681 | for j in range(train_len+dev_len+N_output, n+1, N_output): 1682 | if j <= n: 1683 | data_test.append(data[i, :j]) 1684 | feats_test.append(feats[i, :j]) 1685 | test_tsid_map.append(i) 1686 | 1687 | data_train = get_list_of_dict_format(data_train) 1688 | data_dev = get_list_of_dict_format(data_dev) 1689 | data_test = get_list_of_dict_format(data_test) 1690 | 1691 | for i in range(len(data_train)): 1692 | data_train[i]['feats'] = feats_train[i] 1693 | for i in range(len(data_dev)): 1694 | data_dev[i]['feats'] = feats_dev[i] 1695 | for i in range(len(data_test)): 1696 | data_test[i]['feats'] = feats_test[i] 1697 | 1698 | feats_info = {0:(31, 16), 1:(12, 6)} 1699 | feats_info = {0:(31, 16), 1:(7, 6)} 1700 | #feats_info = {0:(31, 16), 1:(7, 6), 2:(m, -2)} 1701 | i = len(feats_info) 1702 | for j in range(i, data_train[0]['feats'].shape[-1]): 1703 | feats_info[j] = (-1, -1) 1704 | 1705 | seq_len = 2*N_input+N_output 1706 | data_dev = prune_dev_test_sequence(data_dev, seq_len) 1707 | data_test = prune_dev_test_sequence(data_test, seq_len) 1708 | 1709 | #import ipdb ; ipdb.set_trace() 1710 | 1711 | return ( 1712 | data_train, data_dev, data_test, dev_tsid_map, test_tsid_map, feats_info 1713 | ) 1714 | 1715 | def parse_foodinflationmonthly(dataset_name, N_input, N_output, t2v_type=None): 1716 | #df = pd.read_csv('data/electricity_load_forecasting_panama/continuous_dataset.csv') 1717 | df = pd.read_csv( 1718 | os.path.join(DATA_DIRS, 'data', 'foodinflation', 'train_data.csv') 1719 | ) 1720 | 1721 | df['date'] = pd.to_datetime(df['date']) 1722 | df_monthly = df.set_index(df['date']) 1723 | del df_monthly['date'] 1724 | agg_dict = {} 1725 | for food in df.columns[1:]: 1726 | agg_dict[food] = 'mean' 1727 | df_monthly = df_monthly.groupby(pd.Grouper(freq='M')).agg(agg_dict) 1728 | df_monthly.insert(0, 'date', df_monthly.index) 1729 | df_monthly = df_monthly.set_index(np.arange(df_monthly.shape[0])) 1730 | #print(df_monthly) 1731 | df = df_monthly 1732 | 1733 | #df['date'] = pd.to_datetime(df['date']) 1734 | data = df[df.columns[1:]].to_numpy().T 1735 | 1736 | m, n = data.shape[0], data.shape[1] 1737 | 1738 | #units = n//N_output 1739 | #dev_len = int(0.2*units) * N_output 1740 | #test_len = int(0.2*units) * N_output 1741 | test_len = 3 1742 | dev_len = 6 1743 | train_len = n - dev_len - test_len 1744 | 1745 | #import ipdb ; ipdb.set_trace() 1746 | 1747 | cal_date = pd.to_datetime(df['date']) 1748 | feats_date = np.expand_dims(np.arange(0,n), axis=-1) / n * 10. 1749 | feats_date = np.tile(np.expand_dims(feats_date, axis=0), (m, 1, 1)) 1750 | 1751 | feats_day = np.expand_dims(np.expand_dims(cal_date.dt.day.values-1, axis=-1), axis=0) 1752 | feats_day = np.tile(feats_day, (m, 1, 1)) 1753 | feats_month = np.expand_dims(np.expand_dims(cal_date.dt.month.values-1, axis=-1), axis=0) 1754 | feats_month = np.tile(feats_month, (m, 1, 1)) 1755 | feats_dow = np.expand_dims(np.expand_dims(cal_date.dt.dayofweek.values, axis=-1), axis=0) 1756 | feats_dow = np.tile(feats_dow, (m, 1, 1)) 1757 | 1758 | feats_tsid = np.expand_dims(np.expand_dims(np.arange(m), axis=1), axis=2) 1759 | feats_tsid = np.tile(feats_tsid, (1, n, 1)) 1760 | 1761 | #import ipdb ; ipdb.set_trace() 1762 | 1763 | #feats = np.concatenate([feats_day, feats_month, feats_dow, feats_date], axis=-1) 1764 | #feats = np.concatenate([feats_day, feats_dow, feats_tsid, feats_date], axis=-1) 1765 | feats = np.concatenate([feats_month, feats_date], axis=-1) 1766 | #feats = np.concatenate([feats_month, feats_tsid, feats_date], axis=-1) 1767 | 1768 | 1769 | data = torch.tensor(data, dtype=torch.float) 1770 | feats = torch.tensor(feats, dtype=torch.float) 1771 | 1772 | data_train = data[:, :train_len] 1773 | feats_train = feats[:, :train_len] 1774 | 1775 | #import ipdb ; ipdb.set_trace() 1776 | 1777 | data_dev, data_test = [], [] 1778 | feats_dev, feats_test = [], [] 1779 | dev_tsid_map, test_tsid_map = [], [] 1780 | for i in range(data.shape[0]): 1781 | for j in range(train_len+N_output, train_len+dev_len+1, N_output): 1782 | if j <= n: 1783 | data_dev.append(data[i, :j]) 1784 | feats_dev.append(feats[i, :j]) 1785 | dev_tsid_map.append(i) 1786 | for i in range(data.shape[0]): 1787 | for j in range(train_len+dev_len+N_output, n+1, N_output): 1788 | if j <= n: 1789 | data_test.append(data[i, :j]) 1790 | feats_test.append(feats[i, :j]) 1791 | test_tsid_map.append(i) 1792 | 1793 | data_train = get_list_of_dict_format(data_train) 1794 | data_dev = get_list_of_dict_format(data_dev) 1795 | data_test = get_list_of_dict_format(data_test) 1796 | 1797 | for i in range(len(data_train)): 1798 | data_train[i]['feats'] = feats_train[i] 1799 | for i in range(len(data_dev)): 1800 | data_dev[i]['feats'] = feats_dev[i] 1801 | for i in range(len(data_test)): 1802 | data_test[i]['feats'] = feats_test[i] 1803 | 1804 | #feats_info = {0:(31, 16), 1:(12, 6)} 1805 | #feats_info = {0:(31, 16), 1:(7, 6), 2:(m, 16)} 1806 | feats_info = {0:(31, 16)} 1807 | #feats_info = {0:(31, 16), 1:(m, -2)} 1808 | i = len(feats_info) 1809 | for j in range(i, data_train[0]['feats'].shape[-1]): 1810 | feats_info[j] = (-1, -1) 1811 | 1812 | seq_len = 2*N_input+N_output 1813 | data_dev = prune_dev_test_sequence(data_dev, seq_len) 1814 | data_test = prune_dev_test_sequence(data_test, seq_len) 1815 | 1816 | #import ipdb ; ipdb.set_trace() 1817 | 1818 | return ( 1819 | data_train, data_dev, data_test, dev_tsid_map, test_tsid_map, feats_info 1820 | ) 1821 | -------------------------------------------------------------------------------- /data/synthetic_dataset.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import random 4 | from torch.utils.data import Dataset, DataLoader 5 | from copy import deepcopy 6 | 7 | def create_sin_dataset(N, N_input, N_output, sigma): 8 | # N: number of samples in each split (train, test) 9 | # N_input: import of time steps in input series 10 | # N_output: import of time steps in output series 11 | # sigma: standard deviation of additional noise 12 | #N = 2 13 | N_dev = int(0.2 * N) 14 | N_test = int(0.2 * N) 15 | num_rolling_windows = 1 16 | 17 | X = [] 18 | init_offset = np.linspace(-np.pi, np.pi, N) 19 | for k in range(N): 20 | inp = init_offset[k] + np.linspace(0, 30, N_input*10+4*N_output) 21 | #serie = np.sin(inp)*5 + np.random.normal(0, 0.1, size=(inp.shape)) + 5 22 | serie = np.sin(inp) + np.random.normal(0, 0.1, size=(inp.shape)) 23 | X.append(serie) 24 | X = np.expand_dims(np.stack(X), axis=-1) 25 | 26 | data_train = [] 27 | data_dev = [] 28 | data_test = [] 29 | dev_tsid_map, test_tsid_map = {}, {} 30 | 31 | for i, ts in enumerate(X): 32 | entry_train = {} 33 | train_len = len(ts) - 2 * num_rolling_windows * N_output 34 | seq_trn = ts[ : train_len ] # leaving two blocks for dev and test 35 | entry_train['target'] = seq_trn 36 | data_train.append(entry_train) 37 | 38 | for j in range(1, num_rolling_windows+1): 39 | entry_dev = dict() 40 | 41 | dev_len = train_len + j*N_output 42 | seq_dev = ts[ : dev_len ] 43 | 44 | entry_dev['target'] = seq_dev 45 | data_dev.append(entry_dev) 46 | dev_tsid_map[len(data_dev)-1] = i 47 | 48 | for j in range(1, num_rolling_windows+1): 49 | entry_test = dict() 50 | 51 | test_len = train_len + num_rolling_windows*N_output + j*N_output 52 | seq_test = deepcopy(ts[ : test_len]) 53 | 54 | entry_test['target'] = seq_test 55 | data_test.append(entry_test) 56 | test_tsid_map[len(data_dev)-1] = i 57 | 58 | 59 | return ( 60 | data_train, data_dev, data_test, 61 | dev_tsid_map, test_tsid_map 62 | ) 63 | 64 | 65 | def create_synthetic_dataset(N, N_input,N_output,sigma): 66 | # N: number of samples in each split (train, test) 67 | # N_input: import of time steps in input series 68 | # N_output: import of time steps in output series 69 | # sigma: standard deviation of additional noise 70 | N_dev = int(0.2 * N) 71 | N_test = int(0.2 * N) 72 | 73 | X = [] 74 | breakpoints = [] 75 | for k in range(2*N): 76 | serie = np.array([ sigma*random.random() for i in range(N_input+N_output)]) 77 | i1 = random.randint(1,10) 78 | i2 = random.randint(10,18) 79 | j1 = random.random() 80 | j2 = random.random() 81 | interval = abs(i2-i1) + random.randint(-3,3) 82 | serie[i1:i1+1] += j1 83 | serie[i2:i2+1] += j2 84 | serie[i2+interval:] += (j2-j1) 85 | X.append(serie) 86 | breakpoints.append(i2+interval) 87 | X = np.expand_dims(np.stack(X), axis=-1) 88 | breakpoints = np.array(breakpoints) 89 | return ( 90 | X[0:N, 0:N_input], X[0:N, N_input:N_input+N_output], 91 | X[N:N+N_dev, 0:N_input], X[N:N+N_dev, N_input:N_input+N_output], 92 | X[N+N_dev:N+N_dev+N_test, 0:N_input], X[N+N_dev:N+N_dev+N_test, N_input:N_input+N_output], 93 | breakpoints[0:N], breakpoints[N:N+N_dev], breakpoints[N+N_dev:N+N_dev+N_test] 94 | ) 95 | 96 | class SyntheticDataset(torch.utils.data.Dataset): 97 | def __init__(self, X_input, X_target, breakpoints): 98 | super(SyntheticDataset, self).__init__() 99 | self.X_input = X_input 100 | self.X_target = X_target 101 | self.breakpoints = breakpoints 102 | 103 | def __len__(self): 104 | return (self.X_input).shape[0] 105 | 106 | def __getitem__(self, idx): 107 | return (self.X_input[idx], self.X_target[idx], self.breakpoints[idx]) 108 | 109 | -------------------------------------------------------------------------------- /eval.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from tslearn.metrics import dtw, dtw_path 4 | from utils import unnormalize, normalize 5 | import utils 6 | from loss.dilate_loss import dilate_loss 7 | import properscoring as ps 8 | import time 9 | 10 | def eval_base_model(args, model_name, net, loader, norm, gamma, verbose=1, unnorm=False): 11 | 12 | inputs, target, pred_mu, pred_std, pred_d, pred_v = [], [], [], [], [], [] 13 | 14 | criterion = torch.nn.MSELoss() 15 | criterion_mae = torch.nn.L1Loss() 16 | losses_dilate = [] 17 | losses_mse = [] 18 | losses_mae = [] 19 | losses_dtw = [] 20 | losses_tdi = [] 21 | losses_crps = [] 22 | losses_nll = [] 23 | losses_ql = [] 24 | 25 | for i, data in enumerate(loader, 0): 26 | loss_mse, loss_dtw, loss_tdi, loss_mae, losses_nll, losses_ql = torch.tensor(0), torch.tensor(0), torch.tensor(0), torch.tensor(0), torch.tensor(0), torch.tensor(0) 27 | # get the inputs 28 | batch_inputs, batch_target, feats_in, feats_tgt, ids, _, = data 29 | batch_size, N_output = batch_inputs.shape[0:2] 30 | # DO NOT PASS TARGET during forward pass 31 | #import ipdb ; ipdb.set_trace() 32 | with torch.no_grad(): 33 | out = net( 34 | feats_in.to(args.device), batch_inputs.to(args.device), feats_tgt.to(args.device) 35 | ) 36 | if net.is_signature: 37 | if net.estimate_type in ['point']: 38 | batch_pred_mu, _, _ = out 39 | elif net.estimate_type in ['variance']: 40 | batch_pred_mu, batch_pred_d, _, _ = out 41 | elif net.estimate_type in ['covariance']: 42 | batch_pred_mu, batch_pred_d, batch_pred_v, _, _ = out 43 | elif net.estimate_type in ['bivariate']: 44 | batch_pred_mu, batch_pred_d, _, _, _ = out 45 | else: 46 | if net.estimate_type in ['point']: 47 | batch_pred_mu = out 48 | elif net.estimate_type in ['variance']: 49 | batch_pred_mu, batch_pred_d = out 50 | elif net.estimate_type in ['covariance']: 51 | batch_pred_mu, batch_pred_d, batch_pred_v = out 52 | elif net.estimate_type in ['bivariate']: 53 | batch_pred_mu, batch_pred_d, _ = out 54 | batch_pred_mu = batch_pred_mu.cpu() 55 | if net.estimate_type == 'covariance': 56 | batch_pred_d = batch_pred_d.cpu() 57 | batch_pred_v = batch_pred_v.cpu() 58 | 59 | #import ipdb; ipdb.set_trace() 60 | dist = torch.distributions.lowrank_multivariate_normal.LowRankMultivariateNormal( 61 | torch.squeeze(batch_pred_mu, dim=-1), 62 | batch_pred_v, 63 | torch.squeeze(batch_pred_d, dim=-1) 64 | ) 65 | batch_pred_std = torch.diagonal( 66 | dist.covariance_matrix, dim1=-2, dim2=-1).unsqueeze(dim=-1) 67 | if unnorm: 68 | batch_pred_std = norm.unnormalize(batch_pred_std[..., 0], ids=ids, is_var=True).unsqueeze(-1) 69 | elif net.estimate_type in ['variance', 'bivariate']: 70 | batch_pred_std = batch_pred_d.cpu() 71 | batch_pred_v = torch.ones_like(batch_pred_mu) * 1e-9 72 | if unnorm: 73 | batch_pred_std = norm.unnormalize(batch_pred_std[..., 0]**2, ids=ids, is_var=True).sqrt().unsqueeze(-1) 74 | else: 75 | batch_pred_d = torch.ones_like(batch_pred_mu) * 1e-9 76 | batch_pred_v = torch.ones_like(batch_pred_mu) * 1e-9 77 | batch_pred_std = torch.ones_like(batch_pred_mu) * 1e-9 78 | 79 | #batch_target, _ = normalize(batch_target, norm, is_var=False) 80 | 81 | # Unnormalize the data 82 | if unnorm: 83 | batch_pred_mu = norm.unnormalize(batch_pred_mu[..., 0], ids, is_var=False).unsqueeze(-1) 84 | #if net.estimate_type == 'covariance': 85 | # #batch_pred_std = unnormalize(batch_pred_std, norm, is_var=True) 86 | # pass 87 | #elif net.estimate_type == 'variance': 88 | # batch_pred_v = torch.zeros_like(batch_pred_mu) 89 | #else: 90 | # batch_pred_std = torch.ones_like(batch_pred_mu) #* 1e-9 91 | # batch_pred_d = torch.zeros_like(batch_pred_mu) #* 1e-9 92 | # batch_pred_v = torch.zeros_like(batch_pred_mu) #* 1e-9 93 | 94 | if unnorm: 95 | batch_inputs = norm.unnormalize(batch_inputs[..., 0], ids, is_var=False).unsqueeze(-1) 96 | 97 | inputs.append(batch_inputs) 98 | target.append(batch_target) 99 | pred_mu.append(batch_pred_mu) 100 | pred_std.append(batch_pred_std) 101 | pred_d.append(batch_pred_d) 102 | pred_v.append(batch_pred_v) 103 | 104 | del batch_inputs 105 | del batch_target 106 | del batch_pred_mu 107 | del batch_pred_std 108 | del batch_pred_d 109 | del batch_pred_v 110 | #torch.cuda.empty_cache() 111 | #print(i) 112 | 113 | inputs = torch.cat(inputs, dim=0) 114 | target = torch.cat(target, dim=0) 115 | pred_mu = torch.cat(pred_mu, dim=0) 116 | pred_std = torch.cat(pred_std, dim=0) 117 | pred_d = torch.cat(pred_d, dim=0) 118 | pred_v = torch.cat(pred_v, dim=0) 119 | 120 | # MSE 121 | #import ipdb ; ipdb.set_trace() 122 | print('in eval ', target.shape, pred_mu.shape) 123 | loss_mse = criterion(target, pred_mu).item() 124 | loss_mae = criterion_mae(target, pred_mu).item() 125 | 126 | # DILATE loss 127 | if model_name in ['seq2seqdilate']: 128 | loss_dilate, loss_shape, loss_temporal = dilate_loss(target, pred_mu, args.alpha, args.gamma, args.device) 129 | else: 130 | loss_dilate = torch.zeros([]) 131 | loss_dilate = loss_dilate.item() 132 | 133 | # DTW and TDI 134 | loss_dtw, loss_tdi = 0,0 135 | M = target.shape[0] 136 | #for k in range(M): 137 | # print(k) 138 | # target_k_cpu = target[k,:,0:1].view(-1).detach().cpu().numpy() 139 | # output_k_cpu = pred_mu[k,:,0:1].view(-1).detach().cpu().numpy() 140 | 141 | # loss_dtw += dtw(target_k_cpu,output_k_cpu) 142 | # path, sim = dtw_path(target_k_cpu, output_k_cpu) 143 | 144 | # Dist = 0 145 | # for i,j in path: 146 | # Dist += (i-j)*(i-j) 147 | # loss_tdi += Dist / (N_output*N_output) 148 | 149 | loss_dtw = loss_dtw / M 150 | loss_tdi = loss_tdi / M 151 | 152 | # CRPS 153 | loss_crps = ps.crps_gaussian( 154 | target, mu=pred_mu.detach().numpy(), sig=pred_std.detach().numpy() 155 | ).mean() 156 | 157 | # CRPS in parts of horizon 158 | loss_crps_part = [] 159 | N = target.shape[1] 160 | p = max(int(N/4), 1) 161 | for i in range(0, N, p): 162 | if i+p<=N: 163 | loss_crps_part.append( 164 | ps.crps_gaussian( 165 | target[:, i:i+p], 166 | mu=pred_mu[:, i:i+p].detach().numpy(), 167 | sig=pred_std[:, i:i+p].detach().numpy() 168 | ).mean() 169 | ) 170 | loss_crps_part = np.array(loss_crps_part) 171 | 172 | # NLL 173 | if net.estimate_type == 'covariance': 174 | dist = torch.distributions.lowrank_multivariate_normal.LowRankMultivariateNormal( 175 | pred_mu.squeeze(dim=-1), pred_v, pred_d.squeeze(dim=-1) 176 | ) 177 | #dist = torch.distributions.normal.Normal(pred_mu, pred_std) 178 | loss_nll = -torch.mean(dist.log_prob(target.squeeze(dim=-1))).item() 179 | #loss_nll = -torch.mean(dist.log_prob(target)).item() 180 | elif net.estimate_type in ['variance', 'point', 'bivariate']: 181 | dist = torch.distributions.normal.Normal(pred_mu, pred_std) 182 | loss_nll = -torch.mean(dist.log_prob(target)).item() 183 | 184 | metric_dilate = loss_dilate 185 | metric_mse = loss_mse 186 | metric_mae = loss_mae 187 | metric_dtw = loss_dtw 188 | metric_tdi = loss_tdi 189 | metric_crps = loss_crps 190 | metric_crps_part = loss_crps_part 191 | metric_nll = loss_nll 192 | 193 | print('Eval dilateloss= ', metric_dilate, \ 194 | 'mse= ', metric_mse, ' dtw= ', metric_dtw, ' tdi= ', metric_tdi, 195 | 'crps=', metric_crps, 'crps_parts=', metric_crps_part, 196 | 'nll=', metric_nll) 197 | 198 | return ( 199 | inputs, target, pred_mu, pred_std, 200 | metric_dilate, metric_mse, metric_dtw, metric_tdi, 201 | metric_crps, metric_mae, metric_crps_part, metric_nll 202 | ) 203 | 204 | def eval_index_model(args, model_name, net, loader, norm, gamma, N_input, N_output, verbose=1): 205 | 206 | inputs_idx, inputs, target_gaps, target = [], [], [], [] 207 | pred_mu_gaps, pred_mu, pred_std_gaps, pred_std = [], [], [], [] 208 | 209 | criterion = torch.nn.MSELoss() 210 | criterion_mae = torch.nn.L1Loss() 211 | losses_mse_idx, losses_mse = [], [] 212 | losses_mae_idx, losses_mae = [], [] 213 | losses_crps_idx, losses_crps = [], [] 214 | 215 | for i, data in enumerate(loader, 0): 216 | # get the inputs 217 | ( 218 | batch_inputs, batch_target, 219 | feats_in, feats_tgt, norm, _, 220 | batch_indices_in, batch_indices_tgt, 221 | batch_gaps_in, batch_gaps_tgt 222 | ) = data 223 | batch_size, _ = batch_target.shape[0:2] 224 | 225 | # TODO: temporarily using indices_in as the sequence for 226 | # one-step-ahead prediction task 227 | batch_indices = torch.cat((batch_indices_in, batch_indices_tgt), dim=1) 228 | batch_gaps = torch.cat((batch_gaps_in, batch_gaps_tgt), dim=1) 229 | batch_seq = torch.cat((batch_inputs, batch_target), dim=1) 230 | batch_indices_in = batch_indices[:, :-1] 231 | batch_indices_tgt = batch_indices[:, 1:] 232 | batch_gaps_in = batch_gaps[:, :-1] 233 | batch_gaps_tgt = batch_gaps[:, 1:] 234 | batch_inputs = batch_seq[:, :-1] 235 | batch_target = batch_seq[:, 1:] 236 | 237 | end_idx = np.ones((batch_size, 1, 1)) * (N_input+N_output) 238 | hidden = net.init_hidden(batch_inputs.shape[0], args.device) 239 | ( 240 | batch_pred_mu_gaps, batch_pred_std_gaps, 241 | batch_pred_mu, batch_pred_std, _ 242 | ) = net(batch_gaps_in, batch_inputs, hidden) 243 | #) = net.simulate(batch_gaps_in, batch_inputs, hidden, end_idx) 244 | 245 | # Unnormalize the data 246 | #batch_pred_mu_gaps = unnormalize(batch_pred_mu_gaps, norm) 247 | #batch_pred_mu = unnormalize(batch_pred_mu, norm) 248 | if batch_pred_std is not None: 249 | #batch_pred_std_gaps = unnormalize(batch_pred_std_gaps, norm) 250 | #batch_pred_std = unnormalize(batch_pred_std, norm) 251 | pass 252 | else: 253 | batch_pred_std_gaps = torch.ones_like(batch_pred_mu_gaps) * 1e-9 254 | batch_pred_std = torch.ones_like(batch_pred_mu) * 1e-9 255 | 256 | inputs_idx.append(batch_indices_in) 257 | inputs.append(batch_inputs) 258 | target_gaps.append(batch_gaps_tgt) 259 | target.append(batch_target) 260 | pred_mu_gaps.append(batch_pred_mu_gaps) 261 | pred_mu.append(batch_pred_mu) 262 | pred_std_gaps.append(batch_pred_std_gaps) 263 | pred_std.append(batch_pred_std) 264 | 265 | #import ipdb 266 | #ipdb.set_trace() 267 | inputs_idx = torch.cat(inputs_idx, dim=0) 268 | inputs = torch.cat(inputs, dim=0) 269 | target_gaps = torch.cat(target_gaps, dim=0) 270 | target = torch.cat(target, dim=0) 271 | pred_mu_gaps = torch.cat(pred_mu_gaps, dim=0) 272 | pred_mu = torch.cat(pred_mu, dim=0) 273 | pred_std_gaps = torch.cat(pred_std_gaps, dim=0) 274 | pred_std = torch.cat(pred_std, dim=0) 275 | 276 | # MSE 277 | print(target.shape, pred_mu.shape) 278 | metric_mse_idx = criterion(target_gaps, pred_mu_gaps).item() 279 | metric_mse = criterion(target, pred_mu).item() 280 | metric_mae_idx = criterion_mae(target_gaps, pred_mu_gaps).item() 281 | metric_mae = criterion_mae(target, pred_mu).item() 282 | 283 | # # DILATE loss 284 | # if model_name in ['seq2seqdilate']: 285 | # loss_dilate, loss_shape, loss_temporal = dilate_loss(target, pred_mu, args.alpha, args.gamma, args.device) 286 | # else: 287 | # loss_dilate = torch.zeros([]) 288 | # loss_dilate = loss_dilate.item() 289 | 290 | # # DTW and TDI 291 | # loss_dtw, loss_tdi = 0,0 292 | # M = target.shape[0] 293 | # for k in range(M): 294 | # target_k_cpu = target[k,:,0:1].view(-1).detach().cpu().numpy() 295 | # output_k_cpu = pred_mu[k,:,0:1].view(-1).detach().cpu().numpy() 296 | # 297 | # loss_dtw += dtw(target_k_cpu,output_k_cpu) 298 | # path, sim = dtw_path(target_k_cpu, output_k_cpu) 299 | # 300 | # Dist = 0 301 | # for i,j in path: 302 | # Dist += (i-j)*(i-j) 303 | # loss_tdi += Dist / (N_output*N_output) 304 | # 305 | # loss_dtw = loss_dtw / M 306 | # loss_tdi = loss_tdi / M 307 | 308 | # CRPS 309 | metric_crps_idx = ps.crps_gaussian( 310 | target_gaps, mu=pred_mu_gaps.detach().numpy(), sig=pred_std_gaps.detach().numpy() 311 | ).mean() 312 | metric_crps = ps.crps_gaussian( 313 | target, mu=pred_mu.detach().numpy(), sig=pred_std.detach().numpy() 314 | ).mean() 315 | 316 | # # CRPS in parts of horizon 317 | # loss_crps_part = [] 318 | # N = target.shape[1] 319 | # p = max(int(N/4), 1) 320 | # for i in range(0, N, p): 321 | # if i+p<=N: 322 | # loss_crps_part.append( 323 | # ps.crps_gaussian( 324 | # target[:, i:i+p], 325 | # mu=pred_mu[:, i:i+p].detach().numpy(), 326 | # sig=pred_std[:, i:i+p].detach().numpy() 327 | # ).mean() 328 | # ) 329 | # loss_crps_part = np.array(loss_crps_part) 330 | 331 | 332 | print('mse_idx= ', metric_mse_idx, 'mse= ', metric_mse, 333 | 'mae_idx= ', metric_mae_idx, 'mae= ', metric_mae, 334 | 'crps_idx=', metric_crps_idx, 'crps=', metric_crps) 335 | 336 | return ( 337 | inputs_idx, inputs, target_gaps, target, 338 | pred_mu_gaps, pred_std_gaps, pred_mu, pred_std, 339 | metric_mse_idx, metric_mse, 340 | metric_mae_idx, metric_mae, 341 | metric_crps_idx, metric_crps 342 | ) 343 | 344 | def eval_inf_index_model( 345 | args, net, inf_test_inputs_dict, inf_test_norm_dict, target, norm, 346 | inf_test_feats_in_dict, inf_test_feats_tgt_dict, inf_test_inputs_gaps_dict, 347 | gamma, N_input, N_output, inf_test_targets_dict=None, verbose=1 348 | ): 349 | criterion = torch.nn.MSELoss() 350 | criterion_mae = torch.nn.L1Loss() 351 | losses_mse = [] 352 | losses_mae = [] 353 | losses_dtw = [] 354 | losses_tdi = [] 355 | losses_crps = [] 356 | 357 | batch_size, N_output = target.shape[0:2] 358 | end_idx = np.ones((batch_size, 1, 1)) * (N_input+N_output) 359 | pred_mu, pred_std = net( 360 | inf_test_feats_in_dict, inf_test_inputs_dict, 361 | inf_test_feats_tgt_dict, inf_test_norm_dict, 362 | inf_test_inputs_gaps_dict, N_input, N_output, 363 | targets_dict=inf_test_targets_dict, 364 | ) 365 | 366 | # Unnormalize 367 | pred_mu = unnormalize(pred_mu, norm, is_var=False) 368 | if pred_std is not None: 369 | pred_std = unnormalize(pred_std, norm, is_var=True) 370 | else: 371 | pred_std = torch.ones_like(pred_mu) * 1e-9 372 | 373 | # MSE 374 | loss_mse = criterion(target, pred_mu) 375 | loss_mae = criterion_mae(target, pred_mu) 376 | loss_dtw, loss_tdi = 0,0 377 | # DTW and TDI 378 | for k in range(batch_size): 379 | target_k_cpu = target[k,:,0:1].view(-1).detach().cpu().numpy() 380 | output_k_cpu = pred_mu[k,:,0:1].view(-1).detach().cpu().numpy() 381 | 382 | loss_dtw += dtw(target_k_cpu,output_k_cpu) 383 | path, sim = dtw_path(target_k_cpu, output_k_cpu) 384 | 385 | Dist = 0 386 | for i,j in path: 387 | Dist += (i-j)*(i-j) 388 | loss_tdi += Dist / (N_output*N_output) 389 | 390 | loss_dtw = loss_dtw /batch_size 391 | loss_tdi = loss_tdi / batch_size 392 | 393 | # CRPS 394 | loss_crps = ps.crps_gaussian( 395 | target, mu=pred_mu.detach().numpy(), sig=pred_std.detach().numpy() 396 | ) 397 | 398 | # print statistics 399 | losses_crps.append( loss_crps ) 400 | losses_mse.append( loss_mse.item() ) 401 | losses_mae.append( loss_mae.item() ) 402 | losses_dtw.append( loss_dtw ) 403 | losses_tdi.append( loss_tdi ) 404 | 405 | metric_mse = np.array(losses_mse).mean() 406 | metric_mae = np.array(losses_mae).mean() 407 | metric_dtw = np.array(losses_dtw).mean() 408 | metric_tdi = np.array(losses_tdi).mean() 409 | metric_crps = np.array(losses_crps).mean() 410 | 411 | #print('Eval mse= ', metric_mse, ' dtw= ', metric_dtw, ' tdi= ', metric_tdi) 412 | 413 | return ( 414 | pred_mu, pred_std, 415 | metric_mse, metric_dtw, metric_tdi, 416 | metric_crps, metric_mae 417 | ) 418 | 419 | 420 | def eval_inf_model_bak( 421 | args, net, inf_test_inputs_dict, inf_test_norm_dict, target, norm, ids, 422 | inf_test_feats_in_dict, inf_test_feats_tgt_dict, 423 | inf_test_coeffs_in_dict, 424 | gamma, inf_test_targets_dict=None, verbose=1): 425 | criterion = torch.nn.MSELoss() 426 | criterion_mae = torch.nn.L1Loss() 427 | losses_mse = [] 428 | losses_smape = [] 429 | losses_mae = [] 430 | losses_dtw = [] 431 | losses_tdi = [] 432 | losses_crps = [] 433 | 434 | batch_size, N_output = target.shape[0:2] 435 | pred_mu, pred_d, pred_v = net( 436 | inf_test_feats_in_dict, inf_test_inputs_dict, 437 | inf_test_coeffs_in_dict, 438 | inf_test_feats_tgt_dict, 439 | inf_test_norm_dict, 440 | targets_dict=inf_test_targets_dict, 441 | ) 442 | pred_mu = pred_mu.cpu() 443 | if pred_d is not None: 444 | pred_d = pred_d.cpu() 445 | pred_v = pred_v.cpu() 446 | 447 | # Compute pred_std = diag(pred_v.T * pred_v) + pred_d 448 | dist = torch.distributions.lowrank_multivariate_normal.LowRankMultivariateNormal( 449 | torch.squeeze(pred_mu, dim=-1), pred_v, torch.squeeze(pred_d, dim=-1) 450 | ) 451 | pred_std = torch.diagonal(dist.covariance_matrix, dim1=-2, dim2=-1).unsqueeze(dim=-1) 452 | else: 453 | pred_std = None 454 | 455 | # Unnormalize 456 | #import ipdb;ipdb.set_trace() 457 | pred_mu = norm.unnormalize(pred_mu[..., 0], ids).unsqueeze(-1) 458 | if pred_std is not None: 459 | pred_std = norm.unnormalize(pred_std[..., 0], ids, is_var=True).unsqueeze(-1) 460 | else: 461 | pred_std = torch.ones_like(pred_mu) * 1e-9 462 | pred_d = torch.ones_like(pred_mu) * 1e-9 463 | pred_v = torch.ones_like(pred_mu) * 1e-9 464 | 465 | # MSE 466 | loss_mse = criterion(target, pred_mu) 467 | loss_mae = criterion_mae(target, pred_mu) 468 | loss_smape = 200. * ((torch.abs(target-pred_mu)) / (torch.abs(target) + torch.abs(pred_mu))).mean() 469 | loss_dtw, loss_tdi = 0,0 470 | # DTW and TDI 471 | for k in range(batch_size): 472 | target_k_cpu = target[k,:,0:1].view(-1).detach().cpu().numpy() 473 | output_k_cpu = pred_mu[k,:,0:1].view(-1).detach().cpu().numpy() 474 | 475 | loss_dtw += dtw(target_k_cpu,output_k_cpu) 476 | path, sim = dtw_path(target_k_cpu, output_k_cpu) 477 | 478 | Dist = 0 479 | for i,j in path: 480 | Dist += (i-j)*(i-j) 481 | loss_tdi += Dist / (N_output*N_output) 482 | 483 | loss_dtw = loss_dtw /batch_size 484 | loss_tdi = loss_tdi / batch_size 485 | 486 | # CRPS 487 | loss_crps = ps.crps_gaussian( 488 | target, mu=pred_mu.detach().numpy(), sig=pred_std.detach().numpy() 489 | ) 490 | 491 | # print statistics 492 | losses_crps.append( loss_crps ) 493 | losses_mse.append( loss_mse.item() ) 494 | losses_mae.append( loss_mae.item() ) 495 | losses_dtw.append( loss_dtw ) 496 | losses_tdi.append( loss_tdi ) 497 | losses_smape.append( loss_smape.item() ) 498 | 499 | metric_mse = np.array(losses_mse).mean() 500 | metric_mae = np.array(losses_mae).mean() 501 | metric_dtw = np.array(losses_dtw).mean() 502 | metric_tdi = np.array(losses_tdi).mean() 503 | metric_crps = np.array(losses_crps).mean() 504 | metric_smape = np.array(losses_smape).mean() 505 | 506 | #print('Eval mse= ', metric_mse, ' dtw= ', metric_dtw, ' tdi= ', metric_tdi) 507 | 508 | return ( 509 | pred_mu, pred_std, pred_d, pred_v, 510 | metric_mse, metric_dtw, metric_tdi, 511 | metric_crps, metric_mae, metric_smape 512 | ) 513 | 514 | 515 | def eval_inf_model(args, net, dataset, which_split, gamma, verbose=1): 516 | ''' 517 | which_split: str (train, dev, test) 518 | ''' 519 | if which_split in ['train']: 520 | raise NotImplementedError 521 | elif which_split in ['dev']: 522 | loader_str = 'devloader' 523 | norm_str = 'dev_norm' 524 | elif which_split in ['test']: 525 | loader_str = 'testloader' 526 | norm_str = 'test_norm' 527 | 528 | criterion = torch.nn.MSELoss() 529 | criterion_mae = torch.nn.L1Loss() 530 | 531 | num_batches = 0 532 | for _ in dataset['sum'][1][loader_str]: 533 | num_batches += 1 534 | 535 | iters = {} 536 | for agg_method in net.aggregates: 537 | iters[agg_method] = {} 538 | for K in net.K_list: 539 | iters[agg_method][K] = iter(dataset[agg_method][K][loader_str]) 540 | 541 | norms = {} 542 | for agg_method in net.aggregates: 543 | norms[agg_method] = {} 544 | for K in net.K_list: 545 | norms[agg_method][K] = dataset[agg_method][K][norm_str] 546 | 547 | inputs, mapped_ids, target, pred_mu, pred_d, pred_v, pred_std = [], [], [], [], [], [], [] 548 | start_time = time.time() 549 | for i in range(num_batches): 550 | dataset_batch = {} 551 | for agg_method in net.aggregates: 552 | dataset_batch[agg_method] = {} 553 | for K in net.K_list: 554 | dataset_batch[agg_method][K] = iters[agg_method][K].next() 555 | 556 | #import ipdb ; ipdb.set_trace() 557 | print('Batch id:', i, num_batches) 558 | batch_pred_mu, batch_pred_d, batch_pred_v, batch_pred_std = net( 559 | dataset_batch, norms, which_split 560 | ) 561 | 562 | batch_target = dataset_batch['sum'][1][1] 563 | 564 | pred_mu.append(batch_pred_mu.cpu()) 565 | pred_d.append(batch_pred_d.cpu()) 566 | pred_v.append(batch_pred_v.cpu()) 567 | pred_std.append(batch_pred_std.cpu()) 568 | target.append(batch_target.cpu()) 569 | inputs.append(dataset_batch['sum'][1][0]) 570 | mapped_ids.append(dataset_batch['sum'][1][4]) 571 | 572 | end_time = time.time() 573 | 574 | pred_mu = torch.cat(pred_mu, dim=0) 575 | pred_d = torch.cat(pred_d, dim=0) 576 | pred_v = torch.cat(pred_v, dim=0) 577 | pred_std = torch.cat(pred_std, dim=0) 578 | target = torch.cat(target, dim=0) 579 | 580 | inputs = torch.cat(inputs, dim=0) 581 | mapped_ids = torch.cat(mapped_ids, dim=0) 582 | inputs = dataset['sum'][1][norm_str].unnormalize( 583 | inputs[..., 0], ids=mapped_ids 584 | ) 585 | #import ipdb ; ipdb.set_trace() 586 | #if which_split in ['dev']: 587 | # target = dataset['sum'][1][norm_str].unnormalize( 588 | # target[..., 0], ids=mapped_ids 589 | # ).unsqueeze(-1) 590 | 591 | # MSE 592 | loss_mse = criterion(target, pred_mu) 593 | loss_mae = criterion_mae(target, pred_mu) 594 | loss_smape = 200. * ((torch.abs(target-pred_mu)) / (torch.abs(target) + torch.abs(pred_mu))).mean() 595 | loss_dtw, loss_tdi = 0,0 596 | # DTW and TDI 597 | batch_size, N_output = target.shape[0:2] 598 | for k in range(batch_size): 599 | target_k_cpu = target[k,:,0:1].view(-1).detach().cpu().numpy() 600 | output_k_cpu = pred_mu[k,:,0:1].view(-1).detach().cpu().numpy() 601 | 602 | loss_dtw += dtw(target_k_cpu,output_k_cpu) 603 | path, sim = dtw_path(target_k_cpu, output_k_cpu) 604 | 605 | Dist = 0 606 | for i,j in path: 607 | Dist += (i-j)*(i-j) 608 | loss_tdi += Dist / (N_output*N_output) 609 | 610 | loss_dtw = loss_dtw /batch_size 611 | loss_tdi = loss_tdi / batch_size 612 | 613 | # CRPS 614 | loss_crps = ps.crps_gaussian( 615 | target, mu=pred_mu.detach().numpy(), sig=pred_std.detach().numpy() 616 | ).mean() 617 | 618 | #import ipdb ; ipdb.set_trace() 619 | metric_mse = loss_mse.mean() 620 | metric_mae = loss_mae.mean() 621 | metric_dtw = loss_dtw 622 | metric_tdi = loss_tdi 623 | metric_crps = loss_crps 624 | metric_smape = loss_smape.mean() 625 | total_time = end_time - start_time 626 | 627 | #print('Eval mse= ', metric_mse, ' dtw= ', metric_dtw, ' tdi= ', metric_tdi) 628 | #import ipdb ; ipdb.set_trace() 629 | 630 | return ( 631 | inputs, target, pred_mu, pred_std, pred_d, pred_v, 632 | metric_mse, metric_dtw, metric_tdi, 633 | metric_crps, metric_mae, metric_smape, total_time 634 | ) 635 | 636 | 637 | def eval_aggregates(inputs, target, mu, std, d, v=None, K_list=None): 638 | N = target.shape[1] 639 | 640 | criterion = torch.nn.MSELoss() 641 | criterion_mae = torch.nn.L1Loss() 642 | 643 | if K_list is None: 644 | K_candidates = [1, 2, 3, 4, 6, 12, 24, 30] 645 | else: 646 | K_candidates = K_list 647 | K_list = [K for K in K_candidates if N%K==0] 648 | 649 | agg2metrics = {} 650 | for agg in ['sum', 'slope', 'diff']: 651 | agg2metrics[agg] = {} 652 | for K in K_list: 653 | agg2metrics[agg][K] = {} 654 | target_agg = utils.aggregate_data(target[..., 0], agg, K, False).unsqueeze(-1) 655 | mu_agg = utils.aggregate_data(mu[..., 0], agg, K, False).unsqueeze(-1) 656 | var_agg = utils.aggregate_data(d[..., 0], agg, K, True, v=v).unsqueeze(-1) 657 | std_agg = torch.sqrt(var_agg) 658 | 659 | mse = criterion(target_agg, mu_agg).item() 660 | mae = criterion_mae(target_agg, mu_agg).item() 661 | 662 | crps = ps.crps_gaussian( 663 | target_agg.detach().numpy(), mu_agg.detach().numpy(), 664 | std_agg.detach().numpy() 665 | ).mean() 666 | 667 | agg2metrics[agg][K]['mse'] = mse 668 | agg2metrics[agg][K]['mae'] = mae 669 | agg2metrics[agg][K]['crps'] = crps 670 | 671 | 672 | return agg2metrics 673 | -------------------------------------------------------------------------------- /loss/dilate_loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from . import soft_dtw 3 | from . import path_soft_dtw 4 | 5 | def dilate_loss(outputs, targets, alpha, gamma, device): 6 | # outputs, targets: shape (batch_size, N_output, 1) 7 | batch_size, N_output = outputs.shape[0:2] 8 | loss_shape = 0 9 | softdtw_batch = soft_dtw.SoftDTWBatch.apply 10 | D = torch.zeros((batch_size, N_output,N_output )).to(device) 11 | for k in range(batch_size): 12 | Dk = soft_dtw.pairwise_distances(targets[k,:,:].view(-1,1),outputs[k,:,:].view(-1,1)) 13 | D[k:k+1,:,:] = Dk 14 | loss_shape = softdtw_batch(D,gamma) 15 | 16 | path_dtw = path_soft_dtw.PathDTWBatch.apply 17 | path = path_dtw(D,gamma) 18 | Omega = soft_dtw.pairwise_distances(torch.range(1,N_output).view(N_output,1)).to(device) 19 | loss_temporal = torch.sum( path*Omega ) / (N_output*N_output) 20 | loss = alpha*loss_shape+ (1-alpha)*loss_temporal 21 | return loss, loss_shape, loss_temporal -------------------------------------------------------------------------------- /loss/path_soft_dtw.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from torch.autograd import Function 4 | from numba import jit 5 | 6 | 7 | @jit(nopython = True) 8 | def my_max(x, gamma): 9 | # use the log-sum-exp trick 10 | max_x = np.max(x) 11 | exp_x = np.exp((x - max_x) / gamma) 12 | Z = np.sum(exp_x) 13 | return gamma * np.log(Z) + max_x, exp_x / Z 14 | 15 | @jit(nopython = True) 16 | def my_min(x,gamma) : 17 | min_x, argmax_x = my_max(-x, gamma) 18 | return - min_x, argmax_x 19 | 20 | @jit(nopython = True) 21 | def my_max_hessian_product(p, z, gamma): 22 | return ( p * z - p * np.sum(p * z) ) /gamma 23 | 24 | @jit(nopython = True) 25 | def my_min_hessian_product(p, z, gamma): 26 | return - my_max_hessian_product(p, z, gamma) 27 | 28 | 29 | @jit(nopython = True) 30 | def dtw_grad(theta, gamma): 31 | m = theta.shape[0] 32 | n = theta.shape[1] 33 | V = np.zeros((m + 1, n + 1)) 34 | V[:, 0] = 1e10 35 | V[0, :] = 1e10 36 | V[0, 0] = 0 37 | 38 | Q = np.zeros((m + 2, n + 2, 3)) 39 | 40 | for i in range(1, m + 1): 41 | for j in range(1, n + 1): 42 | # theta is indexed starting from 0. 43 | v, Q[i, j] = my_min(np.array([V[i, j - 1], 44 | V[i - 1, j - 1], 45 | V[i - 1, j]]) , gamma) 46 | V[i, j] = theta[i - 1, j - 1] + v 47 | 48 | E = np.zeros((m + 2, n + 2)) 49 | E[m + 1, :] = 0 50 | E[:, n + 1] = 0 51 | E[m + 1, n + 1] = 1 52 | Q[m + 1, n + 1] = 1 53 | 54 | for i in range(m,0,-1): 55 | for j in range(n,0,-1): 56 | E[i, j] = Q[i, j + 1, 0] * E[i, j + 1] + \ 57 | Q[i + 1, j + 1, 1] * E[i + 1, j + 1] + \ 58 | Q[i + 1, j, 2] * E[i + 1, j] 59 | 60 | return V[m, n], E[1:m + 1, 1:n + 1], Q, E 61 | 62 | 63 | @jit(nopython = True) 64 | def dtw_hessian_prod(theta, Z, Q, E, gamma): 65 | m = Z.shape[0] 66 | n = Z.shape[1] 67 | 68 | V_dot = np.zeros((m + 1, n + 1)) 69 | V_dot[0, 0] = 0 70 | 71 | Q_dot = np.zeros((m + 2, n + 2, 3)) 72 | for i in range(1, m + 1): 73 | for j in range(1, n + 1): 74 | # theta is indexed starting from 0. 75 | V_dot[i, j] = Z[i - 1, j - 1] + \ 76 | Q[i, j, 0] * V_dot[i, j - 1] + \ 77 | Q[i, j, 1] * V_dot[i - 1, j - 1] + \ 78 | Q[i, j, 2] * V_dot[i - 1, j] 79 | 80 | v = np.array([V_dot[i, j - 1], V_dot[i - 1, j - 1], V_dot[i - 1, j]]) 81 | Q_dot[i, j] = my_min_hessian_product(Q[i, j], v, gamma) 82 | E_dot = np.zeros((m + 2, n + 2)) 83 | 84 | for j in range(n,0,-1): 85 | for i in range(m,0,-1): 86 | E_dot[i, j] = Q_dot[i, j + 1, 0] * E[i, j + 1] + \ 87 | Q[i, j + 1, 0] * E_dot[i, j + 1] + \ 88 | Q_dot[i + 1, j + 1, 1] * E[i + 1, j + 1] + \ 89 | Q[i + 1, j + 1, 1] * E_dot[i + 1, j + 1] + \ 90 | Q_dot[i + 1, j, 2] * E[i + 1, j] + \ 91 | Q[i + 1, j, 2] * E_dot[i + 1, j] 92 | 93 | return V_dot[m, n], E_dot[1:m + 1, 1:n + 1] 94 | 95 | 96 | class PathDTWBatch(Function): 97 | @staticmethod 98 | def forward(ctx, D, gamma): # D.shape: [batch_size, N , N] 99 | batch_size,N,N = D.shape 100 | device = D.device 101 | D_cpu = D.detach().cpu().numpy() 102 | gamma_gpu = torch.FloatTensor([gamma]).to(device) 103 | 104 | grad_gpu = torch.zeros((batch_size, N ,N)).to(device) 105 | Q_gpu = torch.zeros((batch_size, N+2 ,N+2,3)).to(device) 106 | E_gpu = torch.zeros((batch_size, N+2 ,N+2)).to(device) 107 | 108 | for k in range(0,batch_size): # loop over all D in the batch 109 | _, grad_cpu_k, Q_cpu_k, E_cpu_k = dtw_grad(D_cpu[k,:,:], gamma) 110 | grad_gpu[k,:,:] = torch.FloatTensor(grad_cpu_k).to(device) 111 | Q_gpu[k,:,:,:] = torch.FloatTensor(Q_cpu_k).to(device) 112 | E_gpu[k,:,:] = torch.FloatTensor(E_cpu_k).to(device) 113 | ctx.save_for_backward(grad_gpu,D, Q_gpu ,E_gpu, gamma_gpu) 114 | return torch.mean(grad_gpu, dim=0) 115 | 116 | @staticmethod 117 | def backward(ctx, grad_output): 118 | device = grad_output.device 119 | grad_gpu, D_gpu, Q_gpu, E_gpu, gamma = ctx.saved_tensors 120 | D_cpu = D_gpu.detach().cpu().numpy() 121 | Q_cpu = Q_gpu.detach().cpu().numpy() 122 | E_cpu = E_gpu.detach().cpu().numpy() 123 | gamma = gamma.detach().cpu().numpy()[0] 124 | Z = grad_output.detach().cpu().numpy() 125 | 126 | batch_size,N,N = D_cpu.shape 127 | Hessian = torch.zeros((batch_size, N ,N)).to(device) 128 | for k in range(0,batch_size): 129 | _, hess_k = dtw_hessian_prod(D_cpu[k,:,:], Z, Q_cpu[k,:,:,:], E_cpu[k,:,:], gamma) 130 | Hessian[k:k+1,:,:] = torch.FloatTensor(hess_k).to(device) 131 | 132 | return Hessian, None -------------------------------------------------------------------------------- /loss/soft_dtw.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from numba import jit 4 | from torch.autograd import Function 5 | 6 | def pairwise_distances(x, y=None): 7 | ''' 8 | Input: x is a Nxd matrix 9 | y is an optional Mxd matirx 10 | Output: dist is a NxM matrix where dist[i,j] is the square norm between x[i,:] and y[j,:] 11 | if y is not given then use 'y=x'. 12 | i.e. dist[i,j] = ||x[i,:]-y[j,:]||^2 13 | ''' 14 | x_norm = (x**2).sum(1).view(-1, 1) 15 | if y is not None: 16 | y_t = torch.transpose(y, 0, 1) 17 | y_norm = (y**2).sum(1).view(1, -1) 18 | else: 19 | y_t = torch.transpose(x, 0, 1) 20 | y_norm = x_norm.view(1, -1) 21 | 22 | dist = x_norm + y_norm - 2.0 * torch.mm(x, y_t) 23 | return torch.clamp(dist, 0.0, float('inf')) 24 | 25 | @jit(nopython = True) 26 | def compute_softdtw(D, gamma): 27 | N = D.shape[0] 28 | M = D.shape[1] 29 | R = np.zeros((N + 2, M + 2)) + 1e8 30 | R[0, 0] = 0 31 | for j in range(1, M + 1): 32 | for i in range(1, N + 1): 33 | r0 = -R[i - 1, j - 1] / gamma 34 | r1 = -R[i - 1, j] / gamma 35 | r2 = -R[i, j - 1] / gamma 36 | rmax = max(max(r0, r1), r2) 37 | rsum = np.exp(r0 - rmax) + np.exp(r1 - rmax) + np.exp(r2 - rmax) 38 | softmin = - gamma * (np.log(rsum) + rmax) 39 | R[i, j] = D[i - 1, j - 1] + softmin 40 | return R 41 | 42 | @jit(nopython = True) 43 | def compute_softdtw_backward(D_, R, gamma): 44 | N = D_.shape[0] 45 | M = D_.shape[1] 46 | D = np.zeros((N + 2, M + 2)) 47 | E = np.zeros((N + 2, M + 2)) 48 | D[1:N + 1, 1:M + 1] = D_ 49 | E[-1, -1] = 1 50 | R[:, -1] = -1e8 51 | R[-1, :] = -1e8 52 | R[-1, -1] = R[-2, -2] 53 | for j in range(M, 0, -1): 54 | for i in range(N, 0, -1): 55 | a0 = (R[i + 1, j] - R[i, j] - D[i + 1, j]) / gamma 56 | b0 = (R[i, j + 1] - R[i, j] - D[i, j + 1]) / gamma 57 | c0 = (R[i + 1, j + 1] - R[i, j] - D[i + 1, j + 1]) / gamma 58 | a = np.exp(a0) 59 | b = np.exp(b0) 60 | c = np.exp(c0) 61 | E[i, j] = E[i + 1, j] * a + E[i, j + 1] * b + E[i + 1, j + 1] * c 62 | return E[1:N + 1, 1:M + 1] 63 | 64 | 65 | class SoftDTWBatch(Function): 66 | @staticmethod 67 | def forward(ctx, D, gamma = 1.0): # D.shape: [batch_size, N , N] 68 | dev = D.device 69 | batch_size,N,N = D.shape 70 | gamma = torch.FloatTensor([gamma]).to(dev) 71 | D_ = D.detach().cpu().numpy() 72 | g_ = gamma.item() 73 | 74 | total_loss = 0 75 | R = torch.zeros((batch_size, N+2 ,N+2)).to(dev) 76 | for k in range(0, batch_size): # loop over all D in the batch 77 | Rk = torch.FloatTensor(compute_softdtw(D_[k,:,:], g_)).to(dev) 78 | R[k:k+1,:,:] = Rk 79 | total_loss = total_loss + Rk[-2,-2] 80 | ctx.save_for_backward(D, R, gamma) 81 | return total_loss / batch_size 82 | 83 | @staticmethod 84 | def backward(ctx, grad_output): 85 | dev = grad_output.device 86 | D, R, gamma = ctx.saved_tensors 87 | batch_size,N,N = D.shape 88 | D_ = D.detach().cpu().numpy() 89 | R_ = R.detach().cpu().numpy() 90 | g_ = gamma.item() 91 | 92 | E = torch.zeros((batch_size, N ,N)).to(dev) 93 | for k in range(batch_size): 94 | Ek = torch.FloatTensor(compute_softdtw_backward(D_[k,:,:], R_[k,:,:], g_)).to(dev) 95 | E[k:k+1,:,:] = Ek 96 | 97 | return grad_output * E, None 98 | 99 | 100 | -------------------------------------------------------------------------------- /models/index_models.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import random 5 | import math 6 | import numpy as np 7 | from torch.distributions.normal import Normal 8 | 9 | 10 | 11 | class IndexModel(nn.Module): 12 | """docstring for IndexModel""" 13 | def __init__( 14 | self,input_size, output_size, hidden_size, num_grulstm_layers, fc_units, 15 | point_estimates 16 | ): 17 | super(IndexModel, self).__init__() 18 | self.input_size = input_size 19 | self.output_size = output_size 20 | self.hidden_size = hidden_size 21 | self.num_grulstm_layers = num_grulstm_layers 22 | 23 | self.point_estimates = point_estimates 24 | 25 | self.gru_gaps = nn.GRU( 26 | input_size=1, 27 | hidden_size=hidden_size, 28 | num_layers=num_grulstm_layers, 29 | batch_first=True 30 | ) 31 | self.gru_values = nn.GRU( 32 | input_size=1, 33 | hidden_size=hidden_size, 34 | num_layers=num_grulstm_layers, 35 | batch_first=True 36 | ) 37 | self.fc_gaps = nn.Linear(hidden_size, fc_units) 38 | self.fc_values = nn.Linear(hidden_size, fc_units) 39 | self.out_mean_gaps = nn.Linear(fc_units, 1) 40 | self.out_std_gaps = nn.Linear(fc_units, 1) 41 | self.out_mean_values = nn.Linear(fc_units, 1) 42 | self.out_std_values = nn.Linear(fc_units, 1) 43 | 44 | def init_hidden(self, batch_size, device): 45 | #[num_layers*num_directions,batch,hidden_size] 46 | return ( 47 | torch.zeros(self.num_grulstm_layers, batch_size, self.hidden_size, device=device), 48 | torch.zeros(self.num_grulstm_layers, batch_size, self.hidden_size, device=device) 49 | ) 50 | 51 | 52 | def forward(self, gaps, values, hidden, verbose=False): 53 | 54 | inputs = torch.cat((gaps, values), dim=-1) 55 | hidden_gaps, hidden_values = hidden 56 | 57 | output_gaps, hidden_gaps = self.gru_gaps(gaps, hidden_gaps) 58 | output_values, hidden_values = self.gru_values(values, hidden_values) 59 | output_gaps = self.fc_gaps(output_gaps) 60 | output_values = self.fc_values(output_values) 61 | 62 | means_gaps = F.softplus(self.out_mean_gaps(output_gaps)) + 1e-3 # gaps must be positive 63 | means_values = self.out_mean_values(output_values) 64 | stds_gaps = F.softplus(self.out_std_gaps(output_gaps)) + 1e-3 65 | stds_values = F.softplus(self.out_std_values(output_values)) + 1e-3 66 | #stds = F.softplus(stds) + 1e-3 67 | #means_gaps, means_values = means[:, :, 0:1], means[:, :, 1:] 68 | #means_gaps = 2. + F.softplus(means_gaps) + 1e-3 # gaps must be positive 69 | #stds_gaps, stds_values = stds[:, :, 0:1], stds[:, :, 1:] 70 | #import ipdb 71 | #ipdb.set_trace() 72 | hidden = (hidden_gaps, hidden_values) 73 | if self.point_estimates: 74 | stds_gaps, stds_values = None, None 75 | return means_gaps, stds_gaps, means_values, stds_values, hidden 76 | 77 | def simulate(self, gaps, values, hidden, end_idx): 78 | 79 | means_gaps, means_values, stds_gaps, stds_values = [], [], [], [] 80 | if self.point_estimates: 81 | stds_gaps, stds_values = None, None 82 | means_gaps_t, stds_gaps_t, means_values_t, stds_values_t, hidden_t = self.forward(gaps, values, hidden) 83 | 84 | means_gaps_t = means_gaps_t[:, -1:] 85 | means_values_t = means_values_t[:, -1:] 86 | means_gaps.append(means_gaps_t) 87 | means_values.append(means_values_t) 88 | if not self.point_estimates: 89 | stds_gaps_t = stds_gaps_t[:, -1:] 90 | stds_values_t = stds_values_t[:, -1:] 91 | stds_gaps.append(stds_gaps_t) 92 | stds_values.append(stds_values_t) 93 | pred_idx = torch.sum(gaps, dim=1, keepdim=True).detach().numpy() + means_gaps_t.detach().numpy() 94 | while any(pred_idx < end_idx): 95 | #print(means_gaps_t) 96 | ( 97 | means_gaps_t, stds_gaps_t, means_values_t, stds_values_t, hidden_t 98 | ) = self.forward(means_gaps_t, means_values_t, hidden_t) 99 | 100 | means_gaps.append(means_gaps_t) 101 | means_values.append(means_values_t) 102 | if not self.point_estimates: 103 | stds_gaps.append(stds_gaps_t) 104 | stds_values.append(stds_values_t) 105 | pred_idx += means_gaps_t.detach().numpy() 106 | 107 | means_gaps = torch.cat(means_gaps, dim=1) 108 | means_values = torch.cat(means_values, dim=1) 109 | if not self.point_estimates: 110 | stds_gaps = torch.cat(stds_gaps, dim=1) 111 | stds_values = torch.cat(stds_values, dim=1) 112 | 113 | return means_gaps, stds_gaps, means_values, stds_values, hidden_t 114 | 115 | 116 | def get_index_model( 117 | args, config, level, 118 | N_input, N_output, input_size, output_size, 119 | point_estimates 120 | ): 121 | idx_model = IndexModel( 122 | 2, #input_size=2, 123 | 2, #output_size=2, 124 | args.hidden_size, 125 | args.num_grulstm_layers, args.fc_units, 126 | point_estimates 127 | ) 128 | 129 | return idx_model -------------------------------------------------------------------------------- /models/inf_index_models.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | import numpy as np 6 | import cvxpy as cp 7 | from bisect import bisect_left 8 | #import pywt 9 | 10 | from utils import normalize, unnormalize, sqz, expand 11 | 12 | 13 | 14 | class MSE(torch.nn.Module): 15 | """docstring for MSE""" 16 | def __init__(self, base_models_dict): 17 | super(MSE, self).__init__() 18 | self.base_models_dict = base_models_dict 19 | 20 | def forward(self, feats_in_dict, inputs_dict, feats_tgt_dict, 21 | norm_dict, inputs_gaps_dict, N_input, N_output, targets_dict=None 22 | ): 23 | return self.base_models_dict[1](feats_in_dict[1], inputs_dict[1], feats_tgt_dict[1]) 24 | 25 | class NLL(torch.nn.Module): 26 | """docstring for NLL""" 27 | def __init__(self, base_models_dict): 28 | super(NLL, self).__init__() 29 | self.base_models_dict = base_models_dict 30 | 31 | def forward( 32 | self, feats_in_dict, inputs_dict, feats_tgt_dict, 33 | norm_dict, inputs_gaps_dict, N_input, N_output, targets_dict=None 34 | ): 35 | return self.base_models_dict[1](feats_in_dict[1], inputs_dict[1], feats_tgt_dict[1]) 36 | 37 | 38 | class OPT_st(torch.nn.Module): 39 | """docstring for OPT_st""" 40 | def __init__(self, K_list, base_models_dict, device, disable_sum=False, intercept_type='intercept'): 41 | ''' 42 | K_list: list 43 | list of K-values used for aggregation 44 | base_models_dict: dict of dicts 45 | key: aggregation method 46 | value: dict 47 | key: level in the hierarchy 48 | value: base model at the level 'key' 49 | ''' 50 | super(OPT_st, self).__init__() 51 | self.K_list = K_list 52 | self.base_models_dict = base_models_dict 53 | self.intercept_type = intercept_type 54 | self.disable_sum = disable_sum 55 | self.device = device 56 | 57 | 58 | def aggregate_seq_(self, seq, indices): 59 | #assert seq.shape[0]%K == 0 60 | #agg_seq = np.array([[1./K * cp.sum(seq[i:i+K])] for i in range(0, seq.shape[0], K)]) 61 | agg_seq = [] 62 | prev = 0 63 | for i in range(len(indices[0])): 64 | curr = int(indices[0][i].item()) 65 | #s = 1./(len(seq[prev:curr])) * cp.sum(seq[prev:curr]) 66 | s = 1./(curr-prev) * cp.sum(seq[prev:curr]) 67 | agg_seq.append([s]) 68 | prev = curr 69 | agg_seq = np.array(agg_seq) 70 | return agg_seq 71 | 72 | def fit_slope_with_indices(self, seq, indices): 73 | W = [] 74 | #import ipdb 75 | #ipdb.set_trace() 76 | prev = 0 77 | #print(indices[0][:, 0]) 78 | for i in range(len(indices[0])): 79 | curr = int(indices[0][i].item()) 80 | #print(prev, curr, len(indices[0])) 81 | x = np.cumsum(np.ones(seq[prev:curr].shape)) - 1. 82 | y = seq[prev:curr] 83 | m_x = cp.sum(x)/x.shape[0] 84 | m_y = cp.sum(y)/y.shape[0] 85 | s_xy = cp.sum((x-m_x)*(y-m_y)) 86 | s_xx = cp.sum((x-m_x)**2) 87 | w = s_xy/s_xx 88 | if self.intercept_type in ['intercept']: 89 | b = m_y - w*m_x 90 | elif self.intercept_type in ['sum']: 91 | b = cp.sum(y) 92 | W.append(w) 93 | prev = curr 94 | W = np.expand_dims(np.array(W), axis=1) 95 | return W 96 | 97 | def log_prob(self, ex_preds, means, std): 98 | #import ipdb 99 | #ipdb.set_trace() 100 | return -cp.sum(np.sum(np.log(1/(((2*np.pi)**0.5)*std)) - (((ex_preds - means)**2) / (2*(std)**2)))) 101 | 102 | def optimize(self, params_dict, params_idx_dict, norm_dict): 103 | 104 | ex_preds = cp.Variable(params_dict['sum'][1][0].shape) 105 | for lvl, params in params_dict['slope'].items(): 106 | if lvl==1: 107 | lvl_ex_preds = ex_preds 108 | else: 109 | lvl_ex_preds, _ = normalize( 110 | self.fit_slope_with_indices( 111 | unnormalize(ex_preds, norm_dict['slope'][1]), 112 | params_idx_dict['slope'][lvl] 113 | ), 114 | norm_dict['slope'][lvl] 115 | ) 116 | lvl_loss = self.log_prob( 117 | lvl_ex_preds, 118 | params_dict['slope'][lvl][0].detach().numpy(), 119 | params_dict['slope'][lvl][1].detach().numpy() 120 | ) 121 | if lvl==1: 122 | opt_loss = lvl_loss 123 | else: 124 | opt_loss += lvl_loss 125 | 126 | if not self.disable_sum: 127 | for lvl, params in params_dict['sum'].items(): 128 | if lvl==1: 129 | lvl_ex_preds = ex_preds 130 | else: 131 | lvl_ex_preds, _ = normalize( 132 | self.aggregate_seq_( 133 | unnormalize(ex_preds, norm_dict['sum'][1]), 134 | params_idx_dict['sum'][lvl] 135 | ), 136 | norm_dict['sum'][lvl] 137 | ) 138 | lvl_loss = self.log_prob( 139 | lvl_ex_preds, 140 | params_dict['sum'][lvl][0].detach().numpy(), 141 | params_dict['sum'][lvl][1].detach().numpy() 142 | ) 143 | opt_loss += lvl_loss 144 | 145 | objective = cp.Minimize(opt_loss) 146 | 147 | #constraints = [ex_preds>=0] 148 | 149 | prob = cp.Problem(objective)#, constraints) 150 | 151 | try: 152 | opt_loss = prob.solve() 153 | except cp.error.SolverError: 154 | opt_loss = prob.solve(solver='SCS') 155 | 156 | #if ex_preds.value is None: 157 | 158 | #import ipdb 159 | #ipdb.set_trace() 160 | 161 | return ex_preds.value 162 | 163 | 164 | def forward( 165 | self, feats_in_dict, inputs_dict, feats_tgt_dict, norm_dict, 166 | inputs_gaps_dict, N_input, N_output, targets_dict=None 167 | ): 168 | ''' 169 | inputs_dict: [aggregation method][level] 170 | norm_dict: [aggregation method][level] 171 | ''' 172 | 173 | norm_dict_np = dict() 174 | for agg_method in norm_dict.keys(): 175 | norm_dict_np[agg_method] = dict() 176 | for lvl in norm_dict[agg_method].keys(): 177 | norm_dict_np[agg_method][lvl] = norm_dict[agg_method][lvl].detach().numpy() 178 | 179 | params_dict = dict() 180 | params_idx_dict = dict() 181 | for agg_method in self.base_models_dict.keys(): 182 | params_dict[agg_method] = dict() 183 | params_idx_dict[agg_method] = dict() 184 | if agg_method in ['slope', 'sum']: 185 | for level in self.K_list: 186 | print(agg_method, level) 187 | model = self.base_models_dict[agg_method][level] 188 | inputs = inputs_dict[agg_method][level] 189 | feats_in, feats_tgt = feats_in_dict[agg_method][level], feats_tgt_dict[agg_method][level] 190 | inputs_gaps = inputs_gaps_dict[agg_method][level] 191 | if level == 1: 192 | means, stds = model(feats_in, inputs, feats_tgt) 193 | else: 194 | hidden = model.init_hidden(inputs_gaps.shape[0], self.device) 195 | end_idx = np.ones((inputs.shape[0], 1, 1)) * (N_input + N_output) 196 | means_gaps, stds_gaps, means, stds, _ = model.simulate( 197 | inputs_gaps, inputs, hidden, end_idx 198 | ) 199 | #means_gaps = torch.ones_like(means_gaps)*2. #TODO: Remove this line 200 | means_idx = torch.round(torch.cumsum(means_gaps, dim=1)) 201 | stds_idx = stds_gaps 202 | #import ipdb 203 | #ipdb.set_trace() 204 | 205 | if targets_dict is not None and level != 1: 206 | means = targets_dict[agg_method][level] 207 | 208 | if model.point_estimates: 209 | stds = torch.ones_like(means) 210 | if level != 1: 211 | stds_idx = torch.ones_like(means_idx) 212 | params = [means, stds] 213 | params_dict[agg_method][level] = params 214 | 215 | if level != 1: 216 | params_idx = [means_idx, stds_idx] 217 | params_idx_dict[agg_method][level] = params_idx 218 | 219 | all_preds_mu = [] 220 | all_preds_std = [] 221 | for i in range(params_dict['sum'][1][0].size()[0]): 222 | #print(i) 223 | ex_params_dict = dict() 224 | ex_params_idx_dict = dict() 225 | ex_norm_dict = dict() 226 | for agg_method in params_dict.keys(): 227 | ex_params_dict[agg_method] = dict() 228 | ex_params_idx_dict[agg_method] = dict() 229 | ex_norm_dict[agg_method] = dict() 230 | for lvl in params_dict[agg_method].keys(): 231 | if lvl != 1: 232 | # Discard all-but-first indices greater than end_idx 233 | stp = bisect_left( 234 | params_idx_dict[agg_method][lvl][0][i][:, 0].detach().numpy(), 235 | N_output 236 | ) + 1 237 | 238 | #print(stp) 239 | ex_params_idx_dict[agg_method][lvl] = [ 240 | params_idx_dict[agg_method][lvl][0][i][:stp], 241 | params_idx_dict[agg_method][lvl][1][i][:stp] 242 | ] 243 | else: 244 | stp = len(params_dict[agg_method][lvl][0][i]) 245 | 246 | ex_params_dict[agg_method][lvl] = [ 247 | params_dict[agg_method][lvl][0][i][:stp], 248 | params_dict[agg_method][lvl][1][i][:stp] 249 | ] 250 | ex_norm_dict[agg_method][lvl] = norm_dict_np[agg_method][lvl][i] 251 | 252 | #import ipdb 253 | #ipdb.set_trace() 254 | ex_preds_opt = self.optimize(ex_params_dict, ex_params_idx_dict, ex_norm_dict) 255 | all_preds_mu.append(ex_preds_opt) 256 | all_preds_std.append(params_dict['sum'][1][1][i]) 257 | 258 | all_preds_mu = torch.FloatTensor(all_preds_mu) 259 | all_preds_std = torch.stack(all_preds_std) 260 | 261 | #all_preds, _ = normalize(all_preds, norm_dict[0]) 262 | 263 | return all_preds_mu, all_preds_std -------------------------------------------------------------------------------- /models/informer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | #from utils.masking import TriangularCausalMask, ProbMask 6 | #from models.encoder import Encoder, EncoderLayer, ConvLayer, EncoderStack 7 | #from models.decoder import Decoder, DecoderLayer 8 | #from models.attn import FullAttention, ProbAttention, AttentionLayer 9 | #from models.embed import DataEmbedding 10 | 11 | import numpy as np 12 | from math import sqrt 13 | import math 14 | 15 | class PositionalEmbedding(nn.Module): 16 | def __init__(self, d_model, max_len=5000): 17 | super(PositionalEmbedding, self).__init__() 18 | # Compute the positional encodings once in log space. 19 | pe = torch.zeros(max_len, d_model).float() 20 | pe.require_grad = False 21 | 22 | position = torch.arange(0, max_len).float().unsqueeze(1) 23 | div_term = (torch.arange(0, d_model, 2).float() * -(math.log(10000.0) / d_model)).exp() 24 | 25 | pe[:, 0::2] = torch.sin(position * div_term) 26 | pe[:, 1::2] = torch.cos(position * div_term) 27 | 28 | pe = pe.unsqueeze(0) 29 | self.register_buffer('pe', pe) 30 | 31 | def forward(self, x): 32 | return self.pe[:, :x.size(1)] 33 | 34 | class TokenEmbedding(nn.Module): 35 | def __init__(self, c_in, d_model): 36 | super(TokenEmbedding, self).__init__() 37 | padding = 1 if torch.__version__>='1.5.0' else 2 38 | self.tokenConv = nn.Conv1d(in_channels=c_in, out_channels=d_model, 39 | kernel_size=3, padding=padding, padding_mode='circular') 40 | for m in self.modules(): 41 | if isinstance(m, nn.Conv1d): 42 | nn.init.kaiming_normal_(m.weight,mode='fan_in',nonlinearity='leaky_relu') 43 | 44 | def forward(self, x): 45 | x = self.tokenConv(x.permute(0, 2, 1)).transpose(1,2) 46 | return x 47 | 48 | class FixedEmbedding(nn.Module): 49 | def __init__(self, c_in, d_model): 50 | super(FixedEmbedding, self).__init__() 51 | 52 | w = torch.zeros(c_in, d_model).float() 53 | w.require_grad = False 54 | 55 | position = torch.arange(0, c_in).float().unsqueeze(1) 56 | div_term = (torch.arange(0, d_model, 2).float() * -(math.log(10000.0) / d_model)).exp() 57 | 58 | w[:, 0::2] = torch.sin(position * div_term) 59 | w[:, 1::2] = torch.cos(position * div_term) 60 | 61 | self.emb = nn.Embedding(c_in, d_model) 62 | self.emb.weight = nn.Parameter(w, requires_grad=False) 63 | 64 | def forward(self, x): 65 | return self.emb(x).detach() 66 | 67 | # class TemporalEmbedding(nn.Module): 68 | # def __init__(self, d_model, embed_type='fixed', freq='h'): 69 | # super(TemporalEmbedding, self).__init__() 70 | 71 | # minute_size = 4; hour_size = 24 72 | # weekday_size = 7; day_size = 32; month_size = 13 73 | 74 | # Embed = FixedEmbedding if embed_type=='fixed' else nn.Embedding 75 | # if freq=='t': 76 | # self.minute_embed = Embed(minute_size, d_model) 77 | # self.hour_embed = Embed(hour_size, d_model) 78 | # self.weekday_embed = Embed(weekday_size, d_model) 79 | # self.day_embed = Embed(day_size, d_model) 80 | # self.month_embed = Embed(month_size, d_model) 81 | 82 | # def forward(self, x): 83 | # x = x.long() 84 | 85 | # minute_x = self.minute_embed(x[:,:,4]) if hasattr(self, 'minute_embed') else 0. 86 | # hour_x = self.hour_embed(x[:,:,3]) 87 | # weekday_x = self.weekday_embed(x[:,:,2]) 88 | # day_x = self.day_embed(x[:,:,1]) 89 | # month_x = self.month_embed(x[:,:,0]) 90 | 91 | # return hour_x + weekday_x + day_x + month_x + minute_x 92 | 93 | class TemporalEmbedding(nn.Module): 94 | def __init__(self, d_model, feats_info, embed_type='fixed', freq='h'): 95 | super(TemporalEmbedding, self).__init__() 96 | 97 | self.feats_info = feats_info 98 | self.embed_feat_layers = {} 99 | for idx, (card, emb_size) in self.feats_info.items(): 100 | if card != -1 and card != 0 and emb_size > 0: 101 | #import ipdb ; ipdb.set_trace() 102 | self.embed_feat_layers[str(idx)] = nn.Embedding(card, emb_size) 103 | self.embed_feat_layers = nn.ModuleDict(self.embed_feat_layers) 104 | feats_dim = sum([s for (_, s) in self.feats_info.values() if s>-1]) 105 | self.linear_map = nn.Linear(feats_dim, d_model) 106 | 107 | minute_size = 96; hour_size = 24 108 | weekday_size = 7; day_size = 32; month_size = 13 109 | 110 | # Embed = FixedEmbedding if embed_type=='fixed' else nn.Embedding 111 | # if freq=='15min': 112 | # self.embed = Embed(minute_size, d_model) 113 | # if freq=='h': 114 | # self.embed = Embed(hour_size, d_model) 115 | 116 | def forward(self, x): 117 | x = x.long() 118 | x_merged = [] 119 | for idx, efl in self.embed_feat_layers.items(): 120 | x_i = x[..., int(idx)].long() 121 | x_merged.append(efl(x_i)) 122 | for idx, (card, emb_size) in self.feats_info.items(): 123 | if card == 0: 124 | x_merged.append(x[..., idx:idx+1]) 125 | x_merged = self.linear_map(torch.cat(x_merged, dim=2)) 126 | # embed_x = self.embed(x[:,:,0]) 127 | 128 | return x_merged 129 | 130 | class TimeFeatureEmbedding(nn.Module): 131 | def __init__(self, d_model, embed_type='timeF', freq='h'): 132 | super(TimeFeatureEmbedding, self).__init__() 133 | 134 | freq_map = {'h':4, 't':5, 's':6, 'm':1, 'a':1, 'w':2, 'd':3, 'b':3} 135 | d_inp = freq_map[freq] 136 | self.embed = nn.Linear(d_inp, d_model) 137 | 138 | def forward(self, x): 139 | return self.embed(x) 140 | 141 | class DataEmbedding(nn.Module): 142 | def __init__(self, c_in, d_model, feats_info, embed_type='fixed', freq='h', dropout=0.1): 143 | super(DataEmbedding, self).__init__() 144 | 145 | self.value_embedding = TokenEmbedding(c_in=c_in, d_model=d_model) 146 | self.position_embedding = PositionalEmbedding(d_model=d_model) 147 | self.temporal_embedding = TemporalEmbedding(d_model=d_model, feats_info=feats_info, embed_type=embed_type, freq=freq) if embed_type!='timeF' else TimeFeatureEmbedding(d_model=d_model, embed_type=embed_type, freq=freq) 148 | 149 | self.dropout = nn.Dropout(p=dropout) 150 | 151 | def forward(self, x, x_mark): 152 | #print(self.value_embedding(x).shape) 153 | #print(self.position_embedding(x).shape) 154 | #import ipdb ; ipdb.set_trace() 155 | #print(self.temporal_embedding(x_mark).shape) 156 | x = self.value_embedding(x) + self.position_embedding(x) + self.temporal_embedding(x_mark) 157 | 158 | return self.dropout(x) 159 | 160 | class TriangularCausalMask(): 161 | def __init__(self, B, L, device="cpu"): 162 | mask_shape = [B, 1, L, L] 163 | with torch.no_grad(): 164 | self._mask = torch.triu(torch.ones(mask_shape, dtype=torch.bool), diagonal=1).to(device) 165 | 166 | @property 167 | def mask(self): 168 | return self._mask 169 | 170 | class ProbMask(): 171 | def __init__(self, B, H, L, index, scores, device="cpu"): 172 | _mask = torch.ones(L, scores.shape[-1], dtype=torch.bool).to(device).triu(1) 173 | _mask_ex = _mask[None, None, :].expand(B, H, L, scores.shape[-1]) 174 | indicator = _mask_ex[torch.arange(B)[:, None, None], 175 | torch.arange(H)[None, :, None], 176 | index, :].to(device) 177 | self._mask = indicator.view(scores.shape).to(device) 178 | 179 | @property 180 | def mask(self): 181 | return self._mask 182 | 183 | class FullAttention(nn.Module): 184 | def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False): 185 | super(FullAttention, self).__init__() 186 | self.scale = scale 187 | self.mask_flag = mask_flag 188 | self.output_attention = output_attention 189 | self.dropout = nn.Dropout(attention_dropout) 190 | 191 | def forward(self, queries, keys, values, attn_mask): 192 | B, L, H, E = queries.shape 193 | _, S, _, D = values.shape 194 | scale = self.scale or 1./sqrt(E) 195 | 196 | scores = torch.einsum("blhe,bshe->bhls", queries, keys) 197 | if self.mask_flag: 198 | if attn_mask is None: 199 | attn_mask = TriangularCausalMask(B, L, device=queries.device) 200 | 201 | scores.masked_fill_(attn_mask.mask, -np.inf) 202 | 203 | A = self.dropout(torch.softmax(scale * scores, dim=-1)) 204 | V = torch.einsum("bhls,bshd->blhd", A, values) 205 | 206 | if self.output_attention: 207 | return (V.contiguous(), A) 208 | else: 209 | return (V.contiguous(), None) 210 | 211 | class ProbAttention(nn.Module): 212 | def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False): 213 | super(ProbAttention, self).__init__() 214 | self.factor = factor 215 | self.scale = scale 216 | self.mask_flag = mask_flag 217 | self.output_attention = output_attention 218 | self.dropout = nn.Dropout(attention_dropout) 219 | 220 | def _prob_QK(self, Q, K, sample_k, n_top): # n_top: c*ln(L_q) 221 | # Q [B, H, L, D] 222 | B, H, L_K, E = K.shape 223 | _, _, L_Q, _ = Q.shape 224 | 225 | # calculate the sampled Q_K 226 | K_expand = K.unsqueeze(-3).expand(B, H, L_Q, L_K, E) 227 | index_sample = torch.randint(L_K, (L_Q, sample_k)) # real U = U_part(factor*ln(L_k))*L_q 228 | K_sample = K_expand[:, :, torch.arange(L_Q).unsqueeze(1), index_sample, :] 229 | Q_K_sample = torch.matmul(Q.unsqueeze(-2), K_sample.transpose(-2, -1)).squeeze() 230 | 231 | # find the Top_k query with sparisty measurement 232 | M = Q_K_sample.max(-1)[0] - torch.div(Q_K_sample.sum(-1), L_K) 233 | M_top = M.topk(n_top, sorted=False)[1] 234 | 235 | # use the reduced Q to calculate Q_K 236 | Q_reduce = Q[torch.arange(B)[:, None, None], 237 | torch.arange(H)[None, :, None], 238 | M_top, :] # factor*ln(L_q) 239 | Q_K = torch.matmul(Q_reduce, K.transpose(-2, -1)) # factor*ln(L_q)*L_k 240 | 241 | return Q_K, M_top 242 | 243 | def _get_initial_context(self, V, L_Q): 244 | B, H, L_V, D = V.shape 245 | if not self.mask_flag: 246 | # V_sum = V.sum(dim=-2) 247 | V_sum = V.mean(dim=-2) 248 | contex = V_sum.unsqueeze(-2).expand(B, H, L_Q, V_sum.shape[-1]).clone() 249 | else: # use mask 250 | assert(L_Q == L_V) # requires that L_Q == L_V, i.e. for self-attention only 251 | contex = V.cumsum(dim=-2) 252 | return contex 253 | 254 | def _update_context(self, context_in, V, scores, index, L_Q, attn_mask): 255 | B, H, L_V, D = V.shape 256 | 257 | if self.mask_flag: 258 | attn_mask = ProbMask(B, H, L_Q, index, scores, device=V.device) 259 | scores.masked_fill_(attn_mask.mask, -np.inf) 260 | 261 | attn = torch.softmax(scores, dim=-1) # nn.Softmax(dim=-1)(scores) 262 | 263 | context_in[torch.arange(B)[:, None, None], 264 | torch.arange(H)[None, :, None], 265 | index, :] = torch.matmul(attn, V).type_as(context_in) 266 | if self.output_attention: 267 | attns = (torch.ones([B, H, L_V, L_V])/L_V).type_as(attn).to(attn.device) 268 | attns[torch.arange(B)[:, None, None], torch.arange(H)[None, :, None], index, :] = attn 269 | return (context_in, attns) 270 | else: 271 | return (context_in, None) 272 | 273 | def forward(self, queries, keys, values, attn_mask): 274 | B, L_Q, H, D = queries.shape 275 | _, L_K, _, _ = keys.shape 276 | 277 | queries = queries.transpose(2,1) 278 | keys = keys.transpose(2,1) 279 | values = values.transpose(2,1) 280 | 281 | U_part = self.factor * np.ceil(np.log(L_K)).astype('int').item() # c*ln(L_k) 282 | u = self.factor * np.ceil(np.log(L_Q)).astype('int').item() # c*ln(L_q) 283 | 284 | U_part = U_part if U_part='1.5.0' else 2 344 | self.downConv = nn.Conv1d(in_channels=c_in, 345 | out_channels=c_in, 346 | kernel_size=3, 347 | padding=padding, 348 | padding_mode='circular') 349 | self.norm = nn.BatchNorm1d(c_in) 350 | self.activation = nn.ELU() 351 | self.maxPool = nn.MaxPool1d(kernel_size=3, stride=2, padding=1) 352 | 353 | def forward(self, x): 354 | x = self.downConv(x.permute(0, 2, 1)) 355 | x = self.norm(x) 356 | x = self.activation(x) 357 | x = self.maxPool(x) 358 | x = x.transpose(1,2) 359 | return x 360 | 361 | class EncoderLayer(nn.Module): 362 | def __init__(self, attention, d_model, d_ff=None, dropout=0.1, activation="relu"): 363 | super(EncoderLayer, self).__init__() 364 | d_ff = d_ff or 4*d_model 365 | self.attention = attention 366 | self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1) 367 | self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1) 368 | self.norm1 = nn.LayerNorm(d_model) 369 | self.norm2 = nn.LayerNorm(d_model) 370 | self.dropout = nn.Dropout(dropout) 371 | self.activation = F.relu if activation == "relu" else F.gelu 372 | 373 | def forward(self, x, attn_mask=None): 374 | # x [B, L, D] 375 | # x = x + self.dropout(self.attention( 376 | # x, x, x, 377 | # attn_mask = attn_mask 378 | # )) 379 | #import ipdb ; ipdb.set_trace() 380 | new_x, attn = self.attention( 381 | x, x, x, 382 | attn_mask = attn_mask 383 | ) 384 | x = x + self.dropout(new_x) 385 | 386 | y = x = self.norm1(x) 387 | y = self.dropout(self.activation(self.conv1(y.transpose(-1,1)))) 388 | y = self.dropout(self.conv2(y).transpose(-1,1)) 389 | 390 | return self.norm2(x+y), attn 391 | 392 | class Encoder(nn.Module): 393 | def __init__(self, attn_layers, conv_layers=None, norm_layer=None): 394 | super(Encoder, self).__init__() 395 | self.attn_layers = nn.ModuleList(attn_layers) 396 | self.conv_layers = nn.ModuleList(conv_layers) if conv_layers is not None else None 397 | self.norm = norm_layer 398 | 399 | def forward(self, x, attn_mask=None): 400 | # x [B, L, D] 401 | attns = [] 402 | if self.conv_layers is not None: 403 | for attn_layer, conv_layer in zip(self.attn_layers, self.conv_layers): 404 | #import ipdb ; ipdb.set_trace() 405 | x, attn = attn_layer(x, attn_mask=attn_mask) 406 | x = conv_layer(x) 407 | attns.append(attn) 408 | x, attn = self.attn_layers[-1](x, attn_mask=attn_mask) 409 | attns.append(attn) 410 | else: 411 | for attn_layer in self.attn_layers: 412 | x, attn = attn_layer(x, attn_mask=attn_mask) 413 | attns.append(attn) 414 | 415 | if self.norm is not None: 416 | x = self.norm(x) 417 | 418 | return x, attns 419 | 420 | class EncoderStack(nn.Module): 421 | def __init__(self, encoders, inp_lens): 422 | super(EncoderStack, self).__init__() 423 | self.encoders = nn.ModuleList(encoders) 424 | self.inp_lens = inp_lens 425 | 426 | def forward(self, x, attn_mask=None): 427 | # x [B, L, D] 428 | x_stack = []; attns = [] 429 | for i_len, encoder in zip(self.inp_lens, self.encoders): 430 | inp_len = x.shape[1]//(2**i_len) 431 | x_s, attn = encoder(x[:, -inp_len:, :]) 432 | x_stack.append(x_s); attns.append(attn) 433 | x_stack = torch.cat(x_stack, -2) 434 | 435 | return x_stack, attns 436 | 437 | class DecoderLayer(nn.Module): 438 | def __init__(self, self_attention, cross_attention, d_model, d_ff=None, 439 | dropout=0.1, activation="relu"): 440 | super(DecoderLayer, self).__init__() 441 | d_ff = d_ff or 4*d_model 442 | self.self_attention = self_attention 443 | self.cross_attention = cross_attention 444 | self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1) 445 | self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1) 446 | self.norm1 = nn.LayerNorm(d_model) 447 | self.norm2 = nn.LayerNorm(d_model) 448 | self.norm3 = nn.LayerNorm(d_model) 449 | self.dropout = nn.Dropout(dropout) 450 | self.activation = F.relu if activation == "relu" else F.gelu 451 | 452 | def forward(self, x, cross, x_mask=None, cross_mask=None): 453 | x = x + self.dropout(self.self_attention( 454 | x, x, x, 455 | attn_mask=x_mask 456 | )[0]) 457 | x = self.norm1(x) 458 | 459 | x = x + self.dropout(self.cross_attention( 460 | x, cross, cross, 461 | attn_mask=cross_mask 462 | )[0]) 463 | 464 | y = x = self.norm2(x) 465 | y = self.dropout(self.activation(self.conv1(y.transpose(-1,1)))) 466 | y = self.dropout(self.conv2(y).transpose(-1,1)) 467 | 468 | return self.norm3(x+y) 469 | 470 | class Decoder(nn.Module): 471 | def __init__(self, layers, norm_layer=None): 472 | super(Decoder, self).__init__() 473 | self.layers = nn.ModuleList(layers) 474 | self.norm = norm_layer 475 | 476 | def forward(self, x, cross, x_mask=None, cross_mask=None): 477 | for layer in self.layers: 478 | x = layer(x, cross, x_mask=x_mask, cross_mask=cross_mask) 479 | 480 | if self.norm is not None: 481 | x = self.norm(x) 482 | 483 | return x 484 | 485 | class Informer(nn.Module): 486 | def __init__(self, enc_in, dec_in, c_out, seq_len, label_len, out_len, 487 | factor=5, d_model=512, n_heads=8, e_layers=3, d_layers=2, d_ff=512, 488 | dropout=0.0, attn='prob', embed='fixed', freq='h', activation='gelu', 489 | output_attention = False, distil=True, mix=True, 490 | feats_info=None, 491 | device=torch.device('cuda:0')): 492 | super(Informer, self).__init__() 493 | self.pred_len = out_len 494 | self.attn = attn 495 | self.output_attention = output_attention 496 | self.feats_info = feats_info 497 | self.estimate_type = 'point' 498 | self.is_signature = False 499 | 500 | # Encoding 501 | self.enc_embedding = DataEmbedding(enc_in, d_model, feats_info, embed, freq, dropout) 502 | self.dec_embedding = DataEmbedding(dec_in, d_model, feats_info, embed, freq, dropout) 503 | # Attention 504 | Attn = ProbAttention if attn=='prob' else FullAttention 505 | # Encoder 506 | self.encoder = Encoder( 507 | [ 508 | EncoderLayer( 509 | AttentionLayer(Attn(False, factor, attention_dropout=dropout, output_attention=output_attention), 510 | d_model, n_heads, mix=False), 511 | d_model, 512 | d_ff, 513 | dropout=dropout, 514 | activation=activation 515 | ) for l in range(e_layers) 516 | ], 517 | [ 518 | ConvLayer( 519 | d_model 520 | ) for l in range(e_layers-1) 521 | ] if distil else None, 522 | norm_layer=torch.nn.LayerNorm(d_model) 523 | ) 524 | # Decoder 525 | self.decoder = Decoder( 526 | [ 527 | DecoderLayer( 528 | AttentionLayer(Attn(True, factor, attention_dropout=dropout, output_attention=False), 529 | d_model, n_heads, mix=mix), 530 | AttentionLayer(FullAttention(False, factor, attention_dropout=dropout, output_attention=False), 531 | d_model, n_heads, mix=False), 532 | d_model, 533 | d_ff, 534 | dropout=dropout, 535 | activation=activation, 536 | ) 537 | for l in range(d_layers) 538 | ], 539 | norm_layer=torch.nn.LayerNorm(d_model) 540 | ) 541 | # self.end_conv1 = nn.Conv1d(in_channels=label_len+out_len, out_channels=out_len, kernel_size=1, bias=True) 542 | # self.end_conv2 = nn.Conv1d(in_channels=d_model, out_channels=c_out, kernel_size=1, bias=True) 543 | self.projection = nn.Linear(d_model, c_out, bias=True) 544 | 545 | def forward(self, x_mark_enc, x_enc, x_mark_dec, x_dec=None, 546 | enc_self_mask=None, dec_self_mask=None, dec_enc_mask=None, teacher_force=None): 547 | enc_out = self.enc_embedding(x_enc, x_mark_enc) 548 | #import ipdb ; ipdb.set_trace() 549 | enc_out, attns = self.encoder(enc_out, attn_mask=enc_self_mask) 550 | if x_dec is None: 551 | x_dec_ph = torch.zeros((x_enc.shape[0], self.pred_len, x_enc.shape[2]), dtype=torch.float, device=x_enc.device) 552 | x_dec = torch.cat([x_enc[..., -self.pred_len:, :], x_dec_ph], dim=1) 553 | #import ipdb ; ipdb.set_trace() 554 | x_mark_dec = torch.cat([x_mark_enc[..., -self.pred_len:, :], x_mark_dec], dim=1) 555 | #import ipdb ; ipdb.set_trace() 556 | 557 | dec_out = self.dec_embedding(x_dec, x_mark_dec) 558 | dec_out = self.decoder(dec_out, enc_out, x_mask=dec_self_mask, cross_mask=dec_enc_mask) 559 | dec_out = self.projection(dec_out) 560 | 561 | # dec_out = self.end_conv1(dec_out) 562 | # dec_out = self.end_conv2(dec_out.transpose(2,1)).transpose(1,2) 563 | if self.output_attention: 564 | return dec_out[:,-self.pred_len:,:], attns 565 | else: 566 | return dec_out[:,-self.pred_len:,:] # [B, L, D] 567 | 568 | 569 | class InformerStack(nn.Module): 570 | def __init__(self, enc_in, dec_in, c_out, seq_len, label_len, out_len, 571 | factor=5, d_model=512, n_heads=8, e_layers=[3,2,1], d_layers=2, d_ff=512, 572 | dropout=0.0, attn='prob', embed='fixed', freq='h', activation='gelu', 573 | output_attention = False, distil=True, mix=True, 574 | device=torch.device('cuda:0')): 575 | super(InformerStack, self).__init__() 576 | self.pred_len = out_len 577 | self.attn = attn 578 | self.output_attention = output_attention 579 | 580 | # Encoding 581 | self.enc_embedding = DataEmbedding(enc_in, d_model, embed, freq, dropout) 582 | self.dec_embedding = DataEmbedding(dec_in, d_model, embed, freq, dropout) 583 | # Attention 584 | Attn = ProbAttention if attn=='prob' else FullAttention 585 | # Encoder 586 | 587 | inp_lens = list(range(len(e_layers))) # [0,1,2,...] you can customize here 588 | encoders = [ 589 | Encoder( 590 | [ 591 | EncoderLayer( 592 | AttentionLayer(Attn(False, factor, attention_dropout=dropout, output_attention=output_attention), 593 | d_model, n_heads, mix=False), 594 | d_model, 595 | d_ff, 596 | dropout=dropout, 597 | activation=activation 598 | ) for l in range(el) 599 | ], 600 | [ 601 | ConvLayer( 602 | d_model 603 | ) for l in range(el-1) 604 | ] if distil else None, 605 | norm_layer=torch.nn.LayerNorm(d_model) 606 | ) for el in e_layers] 607 | self.encoder = EncoderStack(encoders, inp_lens) 608 | # Decoder 609 | self.decoder = Decoder( 610 | [ 611 | DecoderLayer( 612 | AttentionLayer(Attn(True, factor, attention_dropout=dropout, output_attention=False), 613 | d_model, n_heads, mix=mix), 614 | AttentionLayer(FullAttention(False, factor, attention_dropout=dropout, output_attention=False), 615 | d_model, n_heads, mix=False), 616 | d_model, 617 | d_ff, 618 | dropout=dropout, 619 | activation=activation, 620 | ) 621 | for l in range(d_layers) 622 | ], 623 | norm_layer=torch.nn.LayerNorm(d_model) 624 | ) 625 | # self.end_conv1 = nn.Conv1d(in_channels=label_len+out_len, out_channels=out_len, kernel_size=1, bias=True) 626 | # self.end_conv2 = nn.Conv1d(in_channels=d_model, out_channels=c_out, kernel_size=1, bias=True) 627 | self.projection = nn.Linear(d_model, c_out, bias=True) 628 | 629 | def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, 630 | enc_self_mask=None, dec_self_mask=None, dec_enc_mask=None): 631 | enc_out = self.enc_embedding(x_enc, x_mark_enc) 632 | enc_out, attns = self.encoder(enc_out, attn_mask=enc_self_mask) 633 | 634 | dec_out = self.dec_embedding(x_dec, x_mark_dec) 635 | dec_out = self.decoder(dec_out, enc_out, x_mask=dec_self_mask, cross_mask=dec_enc_mask) 636 | dec_out = self.projection(dec_out) 637 | 638 | # dec_out = self.end_conv1(dec_out) 639 | # dec_out = self.end_conv2(dec_out.transpose(2,1)).transpose(1,2) 640 | if self.output_attention: 641 | return dec_out[:,-self.pred_len:,:], attns 642 | else: 643 | return dec_out[:,-self.pred_len:,:] # [B, L, D] 644 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | cvxpy==1.1.11 2 | ipdb==0.13.2 3 | matplotlib==3.4.3 4 | numba==0.52.0 5 | numpy==1.21.2 6 | pandas==1.3.3 7 | properscoring==0.1 8 | PyWavelets==1.1.1 9 | scipy==1.5.4 10 | statsmodels==0.12.2 11 | torch==1.7.1+cu101 12 | tslearn==0.4.1 13 | tsmoothie==1.0.1 14 | -------------------------------------------------------------------------------- /script.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #python main.py taxi30min \ 4 | # --N_input 336 --N_output 168 \ 5 | # --saved_models_dir aaai_saved_models_taxi30min_e336_d168 \ 6 | # --output_dir aaai_Outputs_taxi30min_e336_d168 \ 7 | # --device cuda:0 8 | 9 | #for K in 4 6 12; do 10 | #echo aaai_Outputs_ett_e384_d192_gpt_normmin_$K 11 | # --K_list 1 12 \ 12 | #python main.py ett \ 13 | # --N_input 384 --N_output 192 \ 14 | # --saved_models_dir aaai_saved_models_ett_e384_d192_gpt_normmin_sharqall \ 15 | # --output_dir aaai_Outputs_ett_e384_d192_gpt_normmin_sharqall \ 16 | # --K_list 1 2 3 4 6 8 12 24 \ 17 | # --cv_inf 0 \ 18 | # --device cuda:0 19 | #done 20 | 21 | python main.py ett \ 22 | --N_input 384 --N_output 192 \ 23 | --saved_models_dir ijcai_saved_models_ett \ 24 | --output_dir ijcai_Outputs_ett \ 25 | --cv_inf 0 \ 26 | --device cuda:1 27 | 28 | #for K in 2 6 12; do 29 | #echo aaai_Outputs_Solar_e336_d168_2_rp_$K 30 | # --K_list 1 6 \ 31 | #python main.py Solar \ 32 | # --N_input 336 --N_output 168 \ 33 | # --saved_models_dir aaai_saved_models_Solar_e336_d168_2_rp_sharqall \ 34 | # --output_dir aaai_Outputs_Solar_e336_d168_2_rp_sharqall \ 35 | # --K_list 1 2 3 4 6 8 12 24 \ 36 | # --cv_inf 0 \ 37 | # --device cuda:1 38 | #done 39 | 40 | python main.py Solar \ 41 | --N_input 336 --N_output 168 \ 42 | --saved_models_dir ijcai_saved_models_Solar \ 43 | --output_dir ijcai_Outputs_Solar \ 44 | --cv_inf 0 \ 45 | --device cuda:1 46 | 47 | #for K in 2 6; do 48 | #echo aaai_Outputs_etthourly_e168_d168_gpt_normmin_$K 49 | # --K_list 1 6 \ 50 | #python main.py etthourly \ 51 | # --N_input 168 --N_output 168 \ 52 | # --saved_models_dir aaai_saved_models_etthourly_e168_d168_gpt_normmin_sharqall \ 53 | # --output_dir aaai_Outputs_etthourly_e168_d168_gpt_normmin_sharqall \ 54 | # --K_list 1 2 3 4 6 8 12 24 \ 55 | # --cv_inf 0 \ 56 | # --device cuda:2 57 | #done 58 | 59 | python main.py etthourly \ 60 | --N_input 168 --N_output 168 \ 61 | --saved_models_dir ijcai_saved_models_etthourly \ 62 | --output_dir ijcai_Outputs_etthourly \ 63 | --cv_inf 0 \ 64 | --device cuda:1 65 | 66 | #for K in 2 6 12; do 67 | # #echo aaai_Outputs_electricity_e336_d168_testprune_2_rp_$K 68 | # --K_list 1 6 12 \ 69 | #python main.py electricity \ 70 | # --N_input 336 --N_output 168 \ 71 | # --saved_models_dir aaai_saved_models_electricity_e336_d168_testprune_2_rp_sharqall \ 72 | # --output_dir aaai_Outputs_electricity_e336_d168_testprune_2_sharqall \ 73 | # --K_list 1 2 3 4 6 8 12 24 \ 74 | # --cv_inf 0 \ 75 | # --device cuda:1 76 | #done 77 | 78 | python main.py electricity \ 79 | --N_input 336 --N_output 168 \ 80 | --saved_models_dir ijcai_saved_models_electricity \ 81 | --output_dir ijcai_Outputs_electricity \ 82 | --cv_inf 0 \ 83 | --device cuda:1 84 | 85 | #python main.py foodinflation \ 86 | # --N_input 90 --N_output 30 \ 87 | # --saved_models_dir saved_models_foodinflation_gpt_small_normzs_shiftmin \ 88 | # --output_dir Outputs_foodinflation_gpt_small_normzs_shiftmin \ 89 | # --K_list 1 \ 90 | # --cv_inf 0 \ 91 | # --device cuda:0 92 | 93 | #python main.py foodinflationmonthly \ 94 | # --N_input 12 --N_output 3 \ 95 | # --saved_models_dir saved_models_foodinflationmonthly_gpt_small_normzs_shiftmin \ 96 | # --output_dir Outputs_foodinflationmonthly_gpt_small_normzs_shiftmin \ 97 | # --K_list 1 \ 98 | # --cv_inf 0 \ 99 | # --device cuda:0 100 | 101 | # This dataset is used for testing/debugging 102 | #python main.py aggtest \ 103 | # --N_input 20 --N_output 20 \ 104 | # --saved_models_dir aaai_saved_models_aggtest_e20_d20_gpt_conv_feats_nar \ 105 | # --output_dir aaai_Outputs_aggtest_e20_d20_gpt_conv_feats_nar \ 106 | # --K_list 1 \ 107 | # --cv_inf 0 \ 108 | # --device cuda:0 109 | 110 | # Commands for Oracle and SimRetrieval 111 | #python main.py ett \ 112 | # --N_input 3840 --N_output 192 \ 113 | # --saved_models_dir aaai_saved_models_ett_oracle \ 114 | # --output_dir aaai_Outputs_ett_oracle \ 115 | # --normalize same \ 116 | # --device cuda:0 117 | 118 | #python main.py Solar \ 119 | # --N_input 1680 --N_output 168 \ 120 | # --saved_models_dir aaai_saved_models_Solar_oracle \ 121 | # --output_dir aaai_Outputs_Solar_oracle \ 122 | # --normalize same \ 123 | # --device cuda:0 124 | 125 | #python main.py etthourly \ 126 | # --N_input 840 --N_output 168 \ 127 | # --saved_models_dir aaai_saved_models_etthourly_oracle \ 128 | # --output_dir aaai_Outputs_etthourly_oracle \ 129 | # --normalize same \ 130 | # --device cuda:0 131 | # 132 | #python main.py electricity \ 133 | # --N_input 1680 --N_output 168 \ 134 | # --saved_models_dir aaai_saved_models_electricity_oracle \ 135 | # --output_dir aaai_Outputs_electricity_oracle \ 136 | # --normalize same \ 137 | # --device cuda:0 138 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import torch 4 | from torch.optim.lr_scheduler import ReduceLROnPlateau 5 | from loss.dilate_loss import dilate_loss 6 | from eval import eval_base_model, eval_index_model 7 | import time 8 | from models.base_models import get_base_model 9 | from utils import DataProcessor, get_a, aggregate_data 10 | import random 11 | from torch.distributions.normal import Normal 12 | from copy import deepcopy 13 | 14 | 15 | def get_optimizer(args, lr, net): 16 | optimizer = torch.optim.Adam(net.parameters(),lr=lr) 17 | scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.95, patience=5, verbose=True) 18 | return optimizer, scheduler 19 | 20 | 21 | def train_model( 22 | args, model_name, net, data_dict, saved_models_path, writer, agg_method, level, 23 | verbose=1, bottom_net=None, bottom_data_dict=None, sharq_step=0, 24 | ): 25 | 26 | if bottom_net is None and bottom_data_dict is None: 27 | bottom_net = deepcopy(net) 28 | bottom_data_dict = deepcopy(data_dict) 29 | 30 | lr = args.learning_rate 31 | epochs = args.epochs 32 | 33 | trainloader = data_dict['trainloader'] 34 | devloader = data_dict['devloader'] 35 | testloader = data_dict['testloader'] 36 | norm = data_dict['dev_norm'] 37 | N_input = data_dict['N_input'] 38 | N_output = data_dict['N_output'] 39 | input_size = data_dict['input_size'] 40 | output_size = data_dict['output_size'] 41 | bottomloader = bottom_data_dict['trainloader'] 42 | a = get_a('sum', level).to(args.device) 43 | Lambda=1 44 | 45 | optimizer, scheduler = get_optimizer(args, lr, net) 46 | 47 | criterion = torch.nn.MSELoss() 48 | cos_sim = torch.nn.CosineSimilarity(dim=2) 49 | 50 | if (not args.ignore_ckpt) and os.path.isfile(saved_models_path): 51 | print('Loading from saved model') 52 | checkpoint = torch.load(saved_models_path, map_location=args.device) 53 | net.load_state_dict(checkpoint['model_state_dict']) 54 | optimizer.load_state_dict(checkpoint['optimizer_state_dict']) 55 | best_epoch = checkpoint['epoch'] 56 | best_metric = checkpoint['metric'] 57 | epochs = 0 58 | else: 59 | if args.ignore_ckpt: 60 | print('Ignoring saved checkpoint') 61 | else: 62 | print('No saved model found') 63 | best_epoch = -1 64 | best_metric = np.inf 65 | net.train() 66 | 67 | if net.estimate_type in ['point']: 68 | mse_loss = torch.nn.MSELoss() 69 | 70 | curr_patience = args.patience 71 | curr_step = 0 72 | for curr_epoch in range(best_epoch+1, best_epoch+1+epochs): 73 | bottomiterator = iter(bottomloader) 74 | epoch_loss, epoch_time = 0., 0. 75 | for i, data in enumerate(trainloader, 0): 76 | st = time.time() 77 | inputs, target, feats_in, feats_tgt, _, _ = data 78 | target = target.to(args.device) 79 | batch_size, N_output = target.shape[0:2] 80 | 81 | # forward + backward + optimize 82 | teacher_forcing_ratio = args.teacher_forcing_ratio 83 | #teacher_force = True if random.random() <= teacher_forcing_ratio else False 84 | if model_name in [ 85 | 'trans-nll-atr', 'rnn-mse-ar', 'rnn-nll-ar', 86 | 'gpt-nll-ar', 'gpt-mse-ar' 87 | ]: 88 | teacher_force = True 89 | else: 90 | teacher_force = False 91 | out = net( 92 | feats_in.to(args.device), inputs.to(args.device), 93 | feats_tgt.to(args.device), target.to(args.device), 94 | teacher_force=teacher_force 95 | ) 96 | if net.is_signature: 97 | if net.estimate_type in ['point']: 98 | means, dec_state, sig_state = out 99 | elif net.estimate_type in ['variance']: 100 | means, stds, dec_state, sig_state = out 101 | elif net.estimate_type in ['covariance']: 102 | means, stds, vs, dec_state, sig_state = out 103 | elif net.estimate_type in ['bivariate']: 104 | means, stds, rho, dec_state, sig_state = out 105 | else: 106 | if net.estimate_type in ['point']: 107 | means = out 108 | elif net.estimate_type in ['variance']: 109 | means, stds = out 110 | elif net.estimate_type in ['covariance']: 111 | means, stds, vs = out 112 | elif net.estimate_type in ['bivariate']: 113 | means, stds, rho = out 114 | 115 | loss_mse,loss_shape,loss_temporal = torch.tensor(0),torch.tensor(0),torch.tensor(0) 116 | 117 | 118 | if model_name in ['seq2seqdilate']: 119 | raise NotImplementedError 120 | loss, loss_shape, loss_temporal = dilate_loss( 121 | target, means, args.alpha, args.gamma, args.device 122 | ) 123 | if net.estimate_type == 'covariance': 124 | order = torch.randperm(target.shape[1]) 125 | means_shuffled = torch.cat( 126 | torch.split(means[..., order, :], args.b, dim=1), dim=0 127 | ).squeeze(dim=-1) 128 | stds_shuffled = torch.cat( 129 | torch.split(stds[..., order, :], args.b, dim=1), dim=0 130 | ).squeeze(dim=-1) 131 | vs_shuffled = torch.cat( 132 | torch.split(vs[..., order, :], args.b, dim=1), dim=0 133 | ) 134 | target_shuffled = torch.cat( 135 | torch.split(target[..., order, :], args.b, dim=1), dim=0 136 | ).squeeze(dim=-1) 137 | dist = torch.distributions.lowrank_multivariate_normal.LowRankMultivariateNormal( 138 | means_shuffled, vs_shuffled, stds_shuffled 139 | ) 140 | loss = -torch.mean(dist.log_prob(target_shuffled)) 141 | #import ipdb ; ipdb.set_trace() 142 | elif net.estimate_type == 'variance': 143 | #dist = torch.distributions.normal.Normal(means, stds) 144 | #loss = torch.mean(-dist.log_prob(target)) 145 | loss = -torch.mean(-0.5*(target-means)**2/stds**2 - torch.log(stds)) 146 | elif net.estimate_type in ['point']: 147 | loss = mse_loss(target, means) 148 | elif net.estimate_type in ['bivariate']: 149 | means_avg = 0.5 * (means[..., :-1, :] + means[..., 1:, :]) 150 | var_a, var_b = stds[..., :-1, :]**2, stds[..., 1:, :]**2 151 | var_avg = var_a/4. + var_b/4. + rho * var_a * var_b / 2. 152 | stds_avg = var_avg**0.5 153 | target_avg = 0.5 * (target[..., :-1, :] + target[..., 1:, :]) 154 | 155 | dist = torch.distributions.normal.Normal(means, stds) 156 | dist_avg = torch.distributions.normal.Normal(means_avg, stds_avg) 157 | 158 | loss = torch.mean(-dist.log_prob(target)) 159 | loss += torch.mean(-dist_avg.log_prob(target_avg)) 160 | #import ipdb ; ipdb.set_trace() 161 | if net.is_signature: 162 | sig_loss = torch.mean(1. - cos_sim(dec_state, sig_state)) 163 | loss += sig_loss 164 | 165 | if 'sharq' in model_name: 166 | try: 167 | bt_data = next(bottomiterator) 168 | except StopIteration: 169 | bottomiterator = iter(bottomloader) 170 | bt_data = next(bottomiterator) 171 | bt_inputs, bt_target, bt_feats_in, bt_feats_tgt, _, _ = bt_data 172 | 173 | bt_out = bottom_net( 174 | bt_feats_in.to(args.device), bt_inputs.to(args.device), 175 | bt_feats_tgt.to(args.device), bt_target.to(args.device), 176 | teacher_force=teacher_force 177 | ) 178 | 179 | bt_means, bt_stds = bt_out 180 | bt_means = bt_means.detach() 181 | bt_stds = bt_stds.detach() 182 | 183 | bt_means_agg = aggregate_data( 184 | bt_means.squeeze(-1), agg_method, level, False, a 185 | ).unsqueeze(-1) 186 | 187 | #if sharq_step==0: 188 | if True: 189 | if bt_means_agg.shape[0] == means.shape[0]: 190 | loss += args.sharq_reg*torch.mean(torch.square(bt_means_agg-means)) 191 | #elif sharq_step==1: 192 | if True: 193 | bt_stds_agg = aggregate_data( 194 | bt_stds.squeeze(-1)**2, agg_method, level, True, a 195 | ).unsqueeze(-1).sqrt() 196 | quantiles = torch.arange(0.1, 0.91, 0.1) 197 | if bt_means_agg.shape[0] == means.shape[0]: 198 | for q in quantiles: 199 | bt_quantile = Normal(bt_means, bt_stds).icdf(q) 200 | bt_qdiff = torch.square(bt_quantile-bt_means) 201 | bt_qdiff_agg = aggregate_data( 202 | bt_qdiff.squeeze(-1), agg_method, level, True, a 203 | ).unsqueeze(-1) 204 | quantile = Normal(means, stds).icdf(q) 205 | qdiff = torch.square(quantile-means) 206 | loss += torch.square(qdiff-bt_qdiff_agg).mean() 207 | #loss = loss.mean() 208 | 209 | #import ipdb; ipdb.set_trace() 210 | 211 | #raise NotImplementedError 212 | 213 | 214 | epoch_loss += loss.item() 215 | 216 | optimizer.zero_grad() 217 | loss.backward() 218 | optimizer.step() 219 | et = time.time() 220 | epoch_time += (et-st) 221 | print('Time required for batch ', i, ':', \ 222 | et-st, 'loss:', loss.item(), \ 223 | teacher_forcing_ratio, teacher_force, curr_patience) 224 | #if i>=100: 225 | # break 226 | if (curr_step % args.print_every == 0): 227 | ( 228 | _, _, pred_mu, pred_std, 229 | metric_dilate, metric_mse, metric_dtw, metric_tdi, 230 | metric_crps, metric_mae, metric_crps_part, metric_nll 231 | )= eval_base_model( 232 | args, model_name, net, devloader, norm, args.gamma, verbose=1 233 | ) 234 | 235 | if model_name in ['seq2seqdilate']: 236 | raise NotImplementedError 237 | metric = metric_dilate 238 | 239 | if net.estimate_type in ['point']: 240 | metric = metric_mse 241 | elif net.estimate_type in ['variance', 'covariance', 'bivariate']: 242 | metric = metric_nll 243 | #metric = metric_crps 244 | 245 | if metric < best_metric: 246 | curr_patience = args.patience 247 | best_metric = metric 248 | best_epoch = curr_epoch 249 | state_dict = { 250 | 'model_state_dict': net.state_dict(), 251 | 'optimizer_state_dict': optimizer.state_dict(), 252 | 'epoch': best_epoch, 253 | 'metric': best_metric, 254 | } 255 | torch.save(state_dict, saved_models_path) 256 | print('Model saved at epoch', curr_epoch, 'step', curr_step) 257 | else: 258 | curr_patience -= 1 259 | 260 | scheduler.step(metric) 261 | 262 | # ...log the metrics 263 | if model_name in ['seq2seqdilate']: 264 | raise NotImplementedError 265 | writer.add_scalar('dev_metrics/dilate', metric_dilate, curr_step) 266 | writer.add_scalar('dev_metrics/crps', metric_crps, curr_step) 267 | writer.add_scalar('dev_metrics/mae', metric_mae, curr_step) 268 | writer.add_scalar('dev_metrics/mse', metric_mse, curr_step) 269 | writer.add_scalar('dev_metrics/nll', metric_nll, curr_step) 270 | 271 | curr_step += 1 # Increment the step 272 | if curr_patience == 0: 273 | break 274 | 275 | # ...log the epoch_loss 276 | if model_name in ['seq2seqdilate']: 277 | raise NotImplementedError 278 | writer.add_scalar('training_loss/DILATE', epoch_loss, curr_epoch) 279 | writer.add_scalar('training_time/epoch_time', epoch_time, curr_epoch) 280 | writer.add_scalar('training_time/nll_train', epoch_loss, curr_epoch) 281 | 282 | 283 | if(verbose): 284 | if (curr_step % args.print_every == 0): 285 | print('curr_epoch ', curr_epoch, \ 286 | ' epoch_loss ', epoch_loss, \ 287 | ' loss shape ',loss_shape.item(), \ 288 | ' loss temporal ',loss_temporal.item(), \ 289 | 'learning_rate:', optimizer.param_groups[0]['lr']) 290 | 291 | if curr_patience == 0: 292 | break 293 | 294 | print('Best model found at epoch', best_epoch) 295 | #net.load_state_dict(torch.load(saved_models_path)) 296 | checkpoint = torch.load(saved_models_path, map_location=args.device) 297 | net.load_state_dict(checkpoint['model_state_dict']) 298 | optimizer.load_state_dict(checkpoint['optimizer_state_dict']) 299 | net.eval() 300 | ( 301 | _, _, pred_mu, pred_std, 302 | metric_dilate, metric_mse, metric_dtw, metric_tdi, 303 | metric_crps, metric_mae, metric_crps_part, metric_nll 304 | ) = eval_base_model( 305 | args, model_name, net, devloader, norm, args.gamma, verbose=1 306 | ) 307 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | from torch.utils.data import DataLoader 2 | import torch 3 | from torch.distributions.normal import Normal 4 | import numpy as np 5 | import os 6 | from collections import OrderedDict 7 | import pywt 8 | import pandas as pd 9 | import re 10 | import time 11 | import shutil 12 | from tsmoothie.smoother import SpectralSmoother, ExponentialSmoother 13 | from statsmodels.tsa.seasonal import seasonal_decompose 14 | import time 15 | 16 | from data.synthetic_dataset import create_synthetic_dataset, create_sin_dataset, SyntheticDataset 17 | from data.real_dataset import parse_ECG5000, parse_Traffic, parse_Taxi, parse_Traffic911, parse_gc_datasets, parse_weather, parse_bafu, parse_meteo, parse_azure, parse_ett, parse_sin_noisy, parse_Solar, parse_etthourly, parse_m4hourly, parse_m4daily, parse_taxi30min, parse_aggtest, parse_electricity, parse_foodinflation, parse_foodinflationmonthly 18 | 19 | 20 | to_float_tensor = lambda x: torch.FloatTensor(x.copy()) 21 | to_long_tensor = lambda x: torch.FloatTensor(x.copy()) 22 | 23 | def copy_and_overwrite(from_path, to_path): 24 | if os.path.exists(to_path): 25 | shutil.rmtree(to_path) 26 | shutil.copytree(from_path, to_path) 27 | 28 | def clean_trial_checkpoints(result): 29 | for trl in result.trials: 30 | trl_paths = result.get_trial_checkpoints_paths(trl,'metric') 31 | for path, _ in trl_paths: 32 | shutil.rmtree(path) 33 | 34 | def add_metrics_to_dict( 35 | metrics_dict, model_name, metric_mse, metric_dtw, metric_tdi, metric_crps, metric_mae, 36 | metric_smape 37 | ): 38 | #if model_name not in metrics_dict: 39 | # metrics_dict[model_name] = dict() 40 | 41 | metrics_dict['mse'] = metric_mse 42 | metrics_dict['dtw'] = metric_dtw 43 | metrics_dict['tdi'] = metric_tdi 44 | metrics_dict['crps'] = metric_crps 45 | metrics_dict['mae'] = metric_mae 46 | metrics_dict['smape'] = metric_smape 47 | 48 | return metrics_dict 49 | 50 | def add_base_metrics_to_dict( 51 | metrics_dict, agg_method, K, model_name, metric_mse, metric_dtw, metric_tdi, metric_crps, metric_mae, 52 | ): 53 | if agg_method not in metrics_dict: 54 | metrics_dict[agg_method] = {} 55 | if K not in metrics_dict[agg_method]: 56 | metrics_dict[agg_method][K] = {} 57 | if model_name not in metrics_dict[agg_method][K]: 58 | metrics_dict[agg_method][K][model_name] = {} 59 | 60 | metrics_dict[agg_method][K][model_name]['mse'] = metric_mse 61 | metrics_dict[agg_method][K][model_name]['dtw'] = metric_dtw 62 | metrics_dict[agg_method][K][model_name]['tdi'] = metric_tdi 63 | metrics_dict[agg_method][K][model_name]['crps'] = metric_crps 64 | metrics_dict[agg_method][K][model_name]['mae'] = metric_mae 65 | #metrics_dict[model_name]['smape'] = metric_smape 66 | 67 | return metrics_dict 68 | 69 | 70 | def write_arr_to_file( 71 | output_dir, inf_model_name, inputs, targets, pred_mu, pred_std, pred_d, pred_v 72 | ): 73 | 74 | # Files are saved in .npy format 75 | np.save(os.path.join(output_dir, inf_model_name + '_' + 'pred_mu'), pred_mu) 76 | np.save(os.path.join(output_dir, inf_model_name + '_' + 'pred_std'), pred_std) 77 | np.save(os.path.join(output_dir, inf_model_name + '_' + 'pred_d'), pred_d) 78 | np.save(os.path.join(output_dir, inf_model_name + '_' + 'pred_v'), pred_v) 79 | 80 | for fname in os.listdir(output_dir): 81 | if fname.endswith('targets.npy'): 82 | break 83 | else: 84 | np.save(os.path.join(output_dir, 'inputs'), inputs) 85 | np.save(os.path.join(output_dir, 'targets'), targets) 86 | 87 | def write_aggregate_preds_to_file( 88 | output_dir, base_model_name, agg_method, level, inputs, targets, pred_mu, pred_std 89 | ): 90 | 91 | # Files are saved in .npy format 92 | sep = '__' 93 | model_str = base_model_name + sep + agg_method + sep + str(level) 94 | agg_str = agg_method + sep + str(level) 95 | 96 | np.save(os.path.join(output_dir, model_str + sep + 'pred_mu'), pred_mu.detach().numpy()) 97 | np.save(os.path.join(output_dir, model_str + sep + 'pred_std'), pred_std.detach().numpy()) 98 | 99 | suffix = agg_str + sep + 'targets.npy' 100 | for fname in os.listdir(output_dir): 101 | if fname.endswith(suffix): 102 | break 103 | else: 104 | np.save(os.path.join(output_dir, agg_str + sep + 'inputs'), inputs.detach().numpy()) 105 | np.save(os.path.join(output_dir, agg_str + sep + 'targets'), targets.detach().numpy()) 106 | 107 | 108 | class Normalizer(object): 109 | def __init__(self, data, norm_type): 110 | super(Normalizer, self).__init__() 111 | self.norm_type = norm_type 112 | self.N = len(data) 113 | if norm_type in ['same']: 114 | pass 115 | elif norm_type in ['zscore_per_series']: 116 | self.mean = map(lambda x: x.mean(0, keepdims=True), data) #data.mean(1, keepdims=True) 117 | self.std = map(lambda x: x.std(0, keepdims=True), data) #data.std(1, keepdims=True) 118 | #import ipdb ; ipdb.set_trace() 119 | self.mean = torch.stack(list(self.mean), dim=0) 120 | self.std = torch.stack(list(self.std), dim=0) 121 | self.std = self.std.clamp(min=1., max=None) 122 | elif norm_type in ['zeroshift_per_series']: 123 | self.first = map(lambda x: x[0:1], data) #data.mean(1, keepdims=True) 124 | self.std = map(lambda x: x.std(0, keepdims=True), data) 125 | #import ipdb ; ipdb.set_trace() 126 | self.first = torch.stack(list(self.first), dim=0) 127 | self.std = torch.stack(list(self.std), dim=0) 128 | self.std = self.std.clamp(min=1., max=None) 129 | elif norm_type in ['min_per_series']: 130 | self.first = map(lambda x: x.min(0, keepdims=True)[0], data) 131 | self.std = map(lambda x: x.std(0, keepdims=True), data) 132 | #import ipdb ; ipdb.set_trace() 133 | self.first = torch.stack(list(self.first), dim=0) 134 | self.std = torch.stack(list(self.std), dim=0) 135 | self.std = self.std.clamp(min=1., max=None) 136 | elif norm_type in ['log']: 137 | pass 138 | elif norm_type in ['gaussian_copula']: 139 | ns = data.shape[1] * 1. 140 | #self.delta = 1. / (4*np.power(ns, 0.25) * np.power(np.pi*np.log(ns), 0.5)) 141 | self.delta = 1e-5 142 | data_sorted, indices = data.sort(1) 143 | data_sorted_uq = torch.unique(data_sorted, sorted=True, dim=-1) 144 | counts = torch.cat( 145 | [(data_sorted == data_sorted_uq[:, i:i+1]).sum(dim=1, keepdims=True) for i in range(data_sorted_uq.shape[1])], 146 | dim=1 147 | ) 148 | #import ipdb; ipdb.set_trace() 149 | self.x = data_sorted_uq 150 | self.x = torch.cat([self.x, 1.1*data_sorted[..., -1:]], dim=1) 151 | self.y = torch.cumsum(counts, 1)*1./data.shape[1] 152 | self.y = self.y.clamp(self.delta, 1.0-self.delta) 153 | self.y = torch.cat([self.y, torch.ones((data.shape[0], 1))*self.delta], dim=1) 154 | self.m = (self.y[..., 1:] - self.y[..., :-1]) / (self.x[..., 1:] - self.x[..., :-1]) 155 | self.m = torch.maximum(self.m, torch.ones_like(self.m)*1e-4) 156 | self.c = self.y[..., :-1] 157 | #import ipdb; ipdb.set_trace() 158 | 159 | 160 | def normalize(self, data, ids=None, is_var=False): 161 | if ids is None: 162 | ids = torch.arange(self.N) 163 | 164 | if self.norm_type in ['same']: 165 | data_norm = data 166 | elif self.norm_type in ['zscore_per_series']: 167 | if not is_var: 168 | data_norm = (data - self.mean[ids]) / self.std[ids] 169 | else: 170 | data_norm = data / self.std[ids] 171 | elif self.norm_type in ['zeroshift_per_series', 'min_per_series']: 172 | if not is_var: 173 | data_norm = (data - self.first[ids]) / self.std[ids] 174 | else: 175 | data_norm = data / self.std[ids] 176 | elif self.norm_type in ['log']: 177 | data_norm = torch.log(data) 178 | elif self.norm_type in ['gaussian_copula']: 179 | # Piecewise linear fit of CDF 180 | indices = torch.searchsorted(self.x[ids], data).clamp(0, self.x.shape[-1]) 181 | m = torch.gather(self.m[ids], -1, indices) 182 | c = torch.gather(self.c[ids], -1, indices) 183 | x_prev = torch.gather(self.x[ids], -1, indices) 184 | data_norm = (data - x_prev) * m + c 185 | data_norm = data_norm.clamp(self.delta, 1.0-self.delta) 186 | #import ipdb; ipdb.set_trace() 187 | 188 | # ICDF in standard normal 189 | dist = Normal(0., 1.) 190 | data_norm = dist.icdf(data_norm) 191 | #import ipdb; ipdb.set_trace() 192 | 193 | return data_norm.unsqueeze(-1) 194 | 195 | def unnormalize(self, data, ids=None, is_var=False): 196 | #return data # TODO Watch this 197 | if ids is None: 198 | ids = torch.arange(self.N) 199 | if self.norm_type in ['same']: 200 | data_unnorm = data 201 | elif self.norm_type in ['log']: 202 | data_unnorm = torch.exp(data) 203 | elif self.norm_type in ['zscore_per_series']: 204 | if not is_var: 205 | data_unnorm = data * self.std[ids] + self.mean[ids] 206 | else: 207 | data_unnorm = data * self.std[ids] 208 | elif self.norm_type in ['zeroshift_per_series', 'min_per_series']: 209 | if not is_var: 210 | data_unnorm = data * self.std[ids] + self.first[ids] 211 | else: 212 | data_unnorm = data * self.std[ids] 213 | elif self.norm_type in ['gaussian_copula']: 214 | # CDF in standard normal 215 | dist = Normal(0., 1.) 216 | data = dist.cdf(data) 217 | 218 | # Inverse piecewise linear fit of CDF 219 | indices = torch.searchsorted(self.y[ids], data).clamp(0, self.x.shape[-1]) 220 | m = torch.gather(self.m[ids], -1, indices) 221 | c = torch.gather(self.c[ids], -1, indices) 222 | x_prev = torch.gather(self.x[ids], -1, indices) 223 | data_unnorm = (data - c) / m + x_prev 224 | 225 | return data_unnorm 226 | 227 | def normalize(data, norm=None, norm_type=None, is_var=False): 228 | if norm is None: 229 | assert norm_type is not None 230 | 231 | if norm_type in ['same']: # No normalization 232 | scale = np.ones_like(np.mean(data, axis=(1), keepdims=True)) 233 | shift = np.zeros_like(scale) 234 | norm = np.concatenate([shift, scale], axis=-1) 235 | if norm_type in ['avg']: # mean of entire data 236 | norm = np.mean(data, axis=(0, 1)) 237 | scale = np.ones_like(np.mean(data, axis=(1), keepdims=True)) * norm 238 | shift = np.zeros_like(scale) 239 | norm = np.concatenate([shift, scale], axis=-1) 240 | elif norm_type in ['avg_per_series']: # per-series mean 241 | scale = np.mean(data, axis=(1), keepdims=True) 242 | shift = np.zeros_like(scale) 243 | norm = np.concatenate([shift, scale], axis=-1) 244 | elif norm_type in ['quantile90']: # 0.9 quantile of entire data 245 | scale = np.quantile(data, 0.90, axis=(0, 1)) 246 | shift = np.zeros_like(scale) 247 | norm = np.concatenate([shift, scale], axis=-1) 248 | elif norm_type in ['std']: # std of entire data 249 | scale = np.std(data, axis=(0,1)) 250 | shift = np.zeros_like(scale) 251 | norm = np.concatenate([shift, scale], axis=-1) 252 | elif norm_type in ['zscore_per_series']: # z-score at each series 253 | mean = np.mean(data, axis=(1), keepdims=True) # per-series mean 254 | std = np.std(data, axis=(1), keepdims=True) # per-series std 255 | norm = np.concatenate([mean, std], axis=-1) 256 | 257 | if is_var: 258 | data_norm = data * 1.0 / norm[ ... , :, 1:2 ] 259 | else: 260 | data_norm = (data - norm[...,:,0:1])* 1.0/norm[...,:,1:2] 261 | #data_norm = data * 10.0/norm 262 | #import ipdb 263 | #ipdb.set_trace() 264 | return data_norm, norm 265 | 266 | def unnormalize(data, norm, is_var): 267 | if is_var: 268 | data_unnorm = data * norm[ ... , : , 1:2 ] 269 | else: 270 | data_unnorm = data * norm[ ... , : , 1:2 ] + norm[ ... , : , 0:1 ] 271 | 272 | return data_unnorm 273 | 274 | sqz = lambda x: np.squeeze(x, axis=-1) 275 | expand = lambda x: np.expand_dims(x, axis=-1) 276 | 277 | def shift_timestamp(ts, offset): 278 | result = ts + offset * ts.freq 279 | return pd.Timestamp(result, freq=ts.freq) 280 | 281 | def get_date_range(start, seq_len): 282 | end = shift_timestamp(start, seq_len) 283 | full_date_range = pd.date_range(start, end, freq=start.freq) 284 | return full_date_range 285 | 286 | def get_granularity(freq_str: str): 287 | """ 288 | Splits a frequency string such as "7D" into the multiple 7 and the base 289 | granularity "D". 290 | 291 | Parameters 292 | ---------- 293 | 294 | freq_str 295 | Frequency string of the form [multiple][granularity] such as "12H", "5min", "1D" etc. 296 | """ 297 | freq_regex = r'\s*((\d+)?)\s*([^\d]\w*)' 298 | m = re.match(freq_regex, freq_str) 299 | assert m is not None, "Cannot parse frequency string: %s" % freq_str 300 | groups = m.groups() 301 | multiple = int(groups[1]) if groups[1] is not None else 1 302 | granularity = groups[2] 303 | return multiple, granularity 304 | 305 | class TimeFeature: 306 | """ 307 | Base class for features that only depend on time. 308 | """ 309 | 310 | def __init__(self, normalized: bool = True): 311 | self.normalized = normalized 312 | 313 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 314 | pass 315 | 316 | def __repr__(self): 317 | return self.__class__.__name__ + '()' 318 | 319 | class FourrierDateFeatures(TimeFeature): 320 | def __init__(self, freq: str) -> None: 321 | # reocurring freq 322 | freqs = [ 323 | 'month', 324 | 'day', 325 | 'hour', 326 | 'minute', 327 | 'weekofyear', 328 | 'weekday', 329 | 'dayofweek', 330 | 'dayofyear', 331 | 'daysinmonth', 332 | ] 333 | 334 | assert freq in freqs 335 | self.freq = freq 336 | 337 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 338 | values = getattr(index, self.freq) 339 | num_values = max(values) + 1 340 | steps = [x * 2.0 * np.pi / num_values for x in values] 341 | #return np.vstack([np.cos(steps), np.sin(steps)]) 342 | return np.stack([np.cos(steps), np.sin(steps)], axis=-1) 343 | 344 | def time_features_from_frequency_str(freq_str): 345 | multiple, granularity = get_granularity(freq_str) 346 | 347 | features = { 348 | 'M': ['weekofyear'], 349 | 'W': ['daysinmonth', 'weekofyear'], 350 | 'D': ['dayofweek'], 351 | 'B': ['dayofweek', 'dayofyear'], 352 | #'H': ['hour', 'dayofweek'], 353 | 'H': ['hour'], 354 | #'min': ['minute', 'hour', 'dayofweek'], 355 | 'min': ['minute', 'hour'], 356 | 'T': ['minute', 'hour', 'dayofweek'], 357 | } 358 | 359 | assert granularity in features, f"freq {granularity} not supported" 360 | 361 | feature_classes= [ 362 | FourrierDateFeatures(freq=freq) for freq in features[granularity] 363 | ] 364 | return feature_classes 365 | 366 | def fit_slope_with_indices(seq, K, is_var): 367 | x = np.reshape(np.ones_like(seq), (-1, K)) 368 | x = np.cumsum(x, axis=1) - 1 369 | y = np.reshape(seq, (-1, K)) 370 | m_x = np.mean(x, axis=1, keepdims=True) 371 | m_y = np.mean(y, axis=1, keepdims=True) 372 | s_xy = np.sum((x-m_x)*(y-m_y), axis=1, keepdims=True) 373 | s_xx = np.sum((x-m_x)**2, axis=1, keepdims=True) 374 | #w = s_xy/s_xx 375 | a = (x - m_x) / s_xx 376 | #import ipdb 377 | #ipdb.set_trace() 378 | if is_var: 379 | w = np.sum(a**2 * y, axis=1, keepdims=True) 380 | else: 381 | w = np.sum(a * y, axis=1, keepdims=True) 382 | return w 383 | 384 | def aggregate_seqs_sum(seqs, K, is_var): 385 | agg_seqs = [] 386 | for i, seq in enumerate(seqs): 387 | #print(i, len(seqs)) 388 | assert len(seq)%K == 0 389 | if is_var: 390 | agg_seq = [(1./(K*K)) * np.sum(seq[i:i+K], axis=0) for i in range(0, len(seq), K)] 391 | else: 392 | agg_seq = [np.sum(seq[i:i+K], axis=0) for i in range(0, len(seq), K)] 393 | agg_seqs.append(agg_seq) 394 | return np.array(agg_seqs) 395 | 396 | def aggregate_seqs_slope(seqs, K, is_var=False): 397 | agg_seqs = [] 398 | for seq in seqs: 399 | assert len(seq)%K == 0 400 | agg_seq = fit_slope_with_indices(seq, K, is_var) 401 | agg_seqs.append(agg_seq) 402 | return np.array(agg_seqs) 403 | 404 | def aggregate_data_wavelet( 405 | wavelet_levels, train_input, train_target, dev_input, dev_target, 406 | test_input, test_target 407 | ): 408 | 409 | agg_train_input = pywt.wavedec(sqz(train_input), 'haar', level=wavelet_levels, mode='periodic') 410 | agg_train_target = pywt.wavedec(sqz(train_target), 'haar', level=wavelet_levels, mode='periodic') 411 | agg_dev_input = pywt.wavedec(sqz(dev_input), 'haar', level=wavelet_levels, mode='periodic') 412 | agg_dev_target = pywt.wavedec(sqz(dev_target), 'haar', level=wavelet_levels, mode='periodic') 413 | agg_test_input = pywt.wavedec(sqz(test_input), 'haar', level=wavelet_levels, mode='periodic') 414 | agg_test_target = pywt.wavedec(sqz(test_target), 'haar', level=wavelet_levels, mode='periodic') 415 | 416 | agg_train_input = [expand(x) for x in agg_train_input] 417 | agg_train_target = [expand(x) for x in agg_train_target] 418 | agg_dev_input = [expand(x) for x in agg_dev_input] 419 | agg_dev_target = [expand(x) for x in agg_dev_target] 420 | agg_test_input = [expand(x) for x in agg_test_input] 421 | agg_test_target = [expand(x) for x in agg_test_target] 422 | 423 | #import ipdb 424 | #ipdb.set_trace() 425 | 426 | return ( 427 | agg_train_input, agg_train_target, agg_dev_input, agg_dev_target, 428 | agg_test_input, agg_test_target 429 | ) 430 | 431 | def get_a(agg_type, K): 432 | 433 | if K == 1: 434 | return torch.ones(1, dtype=torch.float) 435 | 436 | if agg_type in ['sum']: 437 | a = 1./K * torch.ones(K) 438 | elif agg_type in ['slope']: 439 | x = torch.arange(K, dtype=torch.float) 440 | m_x = x.mean() 441 | s_xx = ((x-m_x)**2).sum() 442 | a = (x - m_x) / s_xx 443 | elif agg_type in ['diff']: 444 | l = K // 2 445 | a_ = torch.ones(K) 446 | a = 1./K * torch.cat([-1.*a_[:l], a_[l:]], dim=0) 447 | return a 448 | 449 | def aggregate_window(y, a, is_var, v=None): 450 | if is_var == False: 451 | y_a = (a*y).sum(dim=1, keepdims=True) 452 | else: 453 | w_d = (a**2*y).sum(dim=1, keepdims=True) 454 | if v is not None: 455 | #w_v = (((a.unsqueeze(-1)*v).sum(-1)**2)).sum(dim=1, keepdims=True) 456 | #av = a.unsqueeze(-1)*v 457 | #av = torch.matmul(av, av.transpose(-2,-1)) 458 | #w_v = (((av).sum(-1)**2)).sum(dim=1, keepdims=True) 459 | w_v = (((a.unsqueeze(-1)*v)**2).sum(-1)).sum(dim=1, keepdims=True) 460 | y_a = w_d + w_v 461 | else: 462 | y_a = w_d 463 | 464 | return y_a 465 | 466 | def aggregate_data(y, agg_type, K, is_var, a=None, v=None): 467 | # y shape: batch_size x N 468 | # if a need not be recomputed in every call, pass a vector directly 469 | # if v is not None, it is used as a V vector of low-rank multivariate gaussian 470 | # v shape: batch_size x N x args.v_dim 471 | bs, N = y.shape[0], y.shape[1] 472 | if a is None: 473 | a = get_a(agg_type, K) 474 | a = a.unsqueeze(0).repeat(bs, 1) 475 | y_agg = [] 476 | for i in range(0, N, K): 477 | y_w = y[..., i:i+K] 478 | if v is not None: 479 | v_w = v[..., i:i+K, :] 480 | y_a = aggregate_window(y_w, a, is_var, v=v_w) 481 | else: 482 | y_a = aggregate_window(y_w, a, is_var) 483 | y_agg.append(y_a) 484 | y_agg = torch.cat(y_agg, dim=1)#.unsqueeze(-1) 485 | return y_agg 486 | 487 | 488 | class TimeSeriesDatasetOfflineAggregate(torch.utils.data.Dataset): 489 | """docstring for TimeSeriesDatasetOfflineAggregate""" 490 | def __init__( 491 | self, data, enc_len, dec_len, aggregation_type, K, 492 | feats_info, which_split, tsid_map=None, input_norm=None, target_norm=None, 493 | norm_type=None, feats_norms=None, train_obj=None 494 | ): 495 | super(TimeSeriesDatasetOfflineAggregate, self).__init__() 496 | 497 | assert enc_len%K == 0 498 | assert dec_len%K == 0 499 | 500 | print('Creating dataset:', aggregation_type, K) 501 | self._base_enc_len = enc_len 502 | self._base_dec_len = dec_len 503 | #self.num_values = len(data[0]['target'][0]) 504 | self.which_split = which_split 505 | self.aggregation_type = aggregation_type 506 | self.K = K 507 | self.input_norm = input_norm 508 | self.target_norm = target_norm 509 | self.norm_type = norm_type 510 | self.feats_info = feats_info 511 | self.tsid_map = tsid_map 512 | self.feats_norms = feats_norms 513 | #self.train_obj = train_obj 514 | #self.generate_a() 515 | self.a = get_a(self.aggregation_type, self.K) 516 | self.S = 1 517 | 518 | # Perform aggregation if level != 1 519 | st = time.time() 520 | data_agg = [] 521 | for i in range(0, len(data)): 522 | #print(i, len(data)) 523 | ex = data[i]['target'] 524 | ex_f = data[i]['feats'] 525 | ex_len = len(ex) 526 | ex = ex[ ex_len%self.K: ] 527 | ex_f = ex_f[ ex_len%self.K: ] 528 | 529 | #bp = np.arange(1,len(ex), 1) 530 | if which_split in ['train']: 531 | bp = [(i, self.K) for i in np.arange(0, len(ex)-self.K+1, self.S)] 532 | elif which_split in ['dev', 'test']: 533 | bp = [(i, self.K) for i in np.arange(0, len(ex), self.K)] 534 | 535 | if self.K != 1: 536 | ex_agg, ex_f_agg = [], [] 537 | for b in range(len(bp)): 538 | s, e = bp[b][0], bp[b][0]+bp[b][1] 539 | ex_agg.append( 540 | aggregate_window( 541 | ex[s:e].unsqueeze(0), self.a, False, 542 | )[0] 543 | ) 544 | #if self.aggregation_type in ['sum']: 545 | # for b in range(len(bp)): 546 | # s, e = bp[b][0], bp[b][0]+bp[b][1] 547 | # import ipdb ; ipdb.set_trace() 548 | # ex_agg.append(self.aggregate_data(ex[s:e])) 549 | 550 | #elif self.aggregation_type in ['slope']: 551 | # for b in range(len(bp)): 552 | # s, e = bp[b][0], bp[b][0]+bp[b][1] 553 | # ex_agg.append(self.aggregate_data_slope(ex[s:e])) 554 | 555 | #elif self.aggregation_type in ['haar']: 556 | # for b in range(len(bp)): 557 | # s, e = bp[b][0], bp[b][0]+bp[b][1] 558 | # ex_agg.append(self.aggregate_data_haar(ex[s:e])) 559 | 560 | # Aggregating features 561 | for b in range(len(bp)): 562 | s, e = bp[b][0], bp[b][0]+bp[b][1] 563 | ex_f_agg.append(self.aggregate_feats(ex_f[s:e])) 564 | 565 | #if which_split in ['dev']: 566 | # import ipdb ; ipdb.set_trace() 567 | 568 | data_agg.append( 569 | { 570 | 'target':torch.cat(ex_agg, dim=0), 571 | 'feats':torch.stack(ex_f_agg, dim=0), 572 | } 573 | ) 574 | 575 | else: 576 | ex_agg = ex 577 | ex_f_agg = ex_f 578 | 579 | data_agg.append( 580 | { 581 | 'target':ex_agg, 582 | 'feats':ex_f_agg, 583 | } 584 | ) 585 | et = time.time() 586 | print(which_split, self.aggregation_type, self.K, 'total time:', et-st) 587 | 588 | #if self.K>1 and which_split in ['dev']: 589 | # import ipdb ; ipdb.set_trace() 590 | 591 | if self.input_norm is None: 592 | assert norm_type is not None 593 | data_for_norm = [] 594 | for i in range(0, len(data)): 595 | ex = data_agg[i]['target'] 596 | data_for_norm.append(torch.FloatTensor(ex)) 597 | #data_for_norm = to_float_tensor(data_for_norm).squeeze(-1) 598 | 599 | self.input_norm = Normalizer(data_for_norm, norm_type=self.norm_type) 600 | self.target_norm = self.input_norm 601 | del data_for_norm 602 | 603 | self.feats_norms = {} 604 | for j in range(len(self.feats_info)): 605 | card = self.feats_info[j][0] 606 | if card == 0: 607 | feat_for_norm = [] 608 | for i in range(0, len(data)): 609 | ex = data_agg[i]['feats'][:, j] 610 | feat_for_norm.append(torch.FloatTensor(ex)) 611 | f_norm = Normalizer(feat_for_norm, norm_type='zscore_per_series') 612 | self.feats_norms[j] = f_norm 613 | 614 | self.data = data_agg 615 | self.indices = [] 616 | for i in range(0, len(self.data)): 617 | if which_split in ['train']: 618 | j = 0 619 | while j < len(self.data[i]['target']): 620 | if j+self.mult*self.base_enc_len+self.base_dec_len <= len(self.data[i]['target']): 621 | self.indices.append((i, j)) 622 | j += 1 623 | #if self.K>1: 624 | # import ipdb ; ipdb.set_trace() 625 | elif which_split == 'dev': 626 | j = len(self.data[i]['target']) - self.enc_len - self.dec_len 627 | self.indices.append((i, j)) 628 | #if self.K>1: 629 | # import ipdb ; ipdb.set_trace() 630 | elif which_split == 'test': 631 | j = len(self.data[i]['target']) - self.enc_len - self.dec_len 632 | self.indices.append((i, j)) 633 | 634 | @property 635 | def base_enc_len(self): 636 | return self._base_enc_len 637 | 638 | @property 639 | def base_dec_len(self): 640 | return self._base_dec_len 641 | 642 | @property 643 | def enc_len(self): 644 | if self.K > 1: 645 | el = (self._base_enc_len // self.K) * self.mult 646 | else: 647 | el = self._base_enc_len 648 | #el = self._base_enc_len 649 | return el 650 | 651 | @property 652 | def dec_len(self): 653 | if self.K > 1: 654 | dl = self._base_dec_len // self.K 655 | else: 656 | dl = self._base_dec_len 657 | return dl 658 | 659 | @property 660 | def mult(self): 661 | if self.K > 1: mult = 2 662 | else: mult = 1 663 | return mult 664 | 665 | @property 666 | def input_size(self): 667 | #input_size = len(self.data[0]['target'][0]) 668 | input_size = 1 669 | #if self.use_feats: 670 | # # Multiplied by 2 because of sin and cos 671 | # input_size += len(self.data[0]['feats'][0]) 672 | for idx, (card, emb) in self.feats_info.items(): 673 | if card != -1: 674 | input_size += emb 675 | return input_size 676 | 677 | @property 678 | def output_size(self): 679 | #output_size = len(self.data[0]['target'][0]) 680 | output_size = 1 681 | return output_size 682 | 683 | def __len__(self): 684 | return len(self.indices) 685 | 686 | def __getitem__(self, idx): 687 | #print(self.indices) 688 | ts_id = self.indices[idx][0] 689 | pos_id = self.indices[idx][1] 690 | 691 | if self.which_split in ['train']: 692 | stride, mult = self.K//self.S, self.mult 693 | el = mult * self.base_enc_len // self.S 694 | dl = self.base_dec_len // self.S 695 | elif self.which_split in ['dev', 'test']: 696 | stride, mult = 1, 1 697 | el = self.enc_len 698 | dl = self.dec_len 699 | 700 | ex_input = self.data[ts_id]['target'][ pos_id : pos_id+el : stride ] 701 | ex_target = self.data[ts_id]['target'][ pos_id+el : pos_id+el+dl : stride ] 702 | #print('after', ex_input.shape, ex_target.shape, ts_id, pos_id) 703 | if self.tsid_map is None: 704 | mapped_id = ts_id 705 | else: 706 | mapped_id = self.tsid_map[ts_id] 707 | ex_input = self.input_norm.normalize(ex_input, mapped_id)#.unsqueeze(-1) 708 | ex_target = self.target_norm.normalize(ex_target, mapped_id)#.unsqueeze(-1) 709 | 710 | ex_input_feats = self.data[ts_id]['feats'][ pos_id : pos_id+el : stride ] 711 | ex_target_feats = self.data[ts_id]['feats'][ pos_id+el : pos_id+el+dl : stride ] 712 | ex_input_feats_norm = [] 713 | ex_target_feats_norm = [] 714 | for i in range(len(self.feats_info)): 715 | if self.feats_norms.get(i, -1) != -1: 716 | ex_input_feats_norm.append(self.feats_norms[i].normalize( 717 | ex_input_feats[:, i], mapped_id) 718 | ) 719 | ex_target_feats_norm.append(self.feats_norms[i].normalize( 720 | ex_target_feats[:, i], mapped_id) 721 | ) 722 | else: 723 | ex_input_feats_norm.append(ex_input_feats[:, i:i+1]) 724 | ex_target_feats_norm.append(ex_target_feats[:, i:i+1]) 725 | ex_input_feats = torch.cat(ex_input_feats_norm, dim=-1) 726 | ex_target_feats = torch.cat(ex_target_feats_norm, dim=-1) 727 | 728 | #i_res = self.enc_len - len(ex_input) 729 | #ex_input = torch.cat( 730 | # [torch.zeros([i_res] + list(ex_input.shape[1:])), ex_input], 731 | # dim=0 732 | #) 733 | #ex_input_feats = torch.cat( 734 | # [torch.zeros([i_res] +list(ex_input_feats.shape[1:])), ex_input_feats], 735 | # dim=0 736 | #) 737 | 738 | #print(ex_input.shape, ex_target.shape, ex_input_feats.shape, ex_target_feats.shape) 739 | 740 | return ( 741 | ex_input, ex_target, 742 | ex_input_feats, ex_target_feats, 743 | mapped_id, 744 | torch.FloatTensor([ts_id, pos_id]) 745 | ) 746 | 747 | def collate_fn(self, batch): 748 | num_items = len(batch[0]) 749 | batched = [[] for _ in range(len(batch[0]))] 750 | for i in range(len(batch)): 751 | for j in range(len(batch[i])): 752 | batched[j].append(torch.tensor(batch[i][j])) 753 | 754 | batched_t = [] 755 | for i, b in enumerate(batched): 756 | batched_t.append(torch.stack(b, dim=0)) 757 | #print(i) 758 | #batched = [torch.stack(b, dim=0) for b in batched] 759 | 760 | return batched_t 761 | 762 | 763 | def aggregate_data(self, values): 764 | return values.mean(dim=0) 765 | 766 | def generate_a(self): 767 | x = torch.arange(self.K, dtype=torch.float) 768 | m_x = x.mean() 769 | s_xx = ((x-m_x)**2).sum() 770 | self.a = (x - m_x) / s_xx 771 | 772 | def aggregate_data_slope(self, y): 773 | return (self.a * y).sum() 774 | #def aggregate_data_slope(self, y, compute_b=False): 775 | # x = torch.arange(y.shape[0], dtype=torch.float) 776 | # m_x = x.mean() 777 | # s_xx = ((x-m_x)**2).sum() 778 | 779 | # #m_y = np.mean(y, axis=0) 780 | # #s_xy = np.sum((x-m_x)*(y-m_y), axis=0) 781 | # #w = s_xy/s_xx 782 | 783 | # a = (x - m_x) / s_xx 784 | # w = (a*y).sum() 785 | 786 | # if compute_b: 787 | # b = m_y - w*m_x 788 | # return w, b 789 | # else: 790 | # return w 791 | 792 | def aggregate_feats(self, feats): 793 | feats_agg = [] 794 | for j in range(len(self.feats_info)): 795 | card = self.feats_info[j][0] 796 | if card != 0: 797 | feats_agg.append(feats[0,j]) 798 | else: 799 | feats_agg.append(feats[:, j].mean()) 800 | feats_agg = torch.stack(feats_agg, dim=0) 801 | return feats_agg 802 | 803 | def aggregate_data_haar(self, values): 804 | i = values.shape[0]//2 805 | return values[i:].mean()-values[:i].mean() 806 | 807 | def aggregate_data_wavelet(self, values, K): 808 | coeffs = pywt.wavedec(sqz(values), 'haar', level=self.wavelet_levels, mode='periodic') 809 | coeffs = [expand(x) for x in coeffs] 810 | coeffs = coeffs[-(K-1)] 811 | return coeffs 812 | 813 | def get_avg_date(self, date_range): 814 | return date_range.mean(axis=0) 815 | 816 | def get_avg_feats(self, time_feats): 817 | return np.mean(time_feats, axis=0) 818 | 819 | def calculate_error(self, segment): 820 | w, b = self.aggregate_data_slope(segment, compute_b=True) 821 | x = np.expand_dims(np.arange(len(segment)), axis=1) 822 | segment_pred = w*x+b 823 | 824 | return np.max(np.abs(segment - segment_pred)) # Using max error 825 | 826 | def smooth(self, series): 827 | #smoother = SpectralSmoother(smooth_fraction=0.4, pad_len=10) 828 | smoother = ExponentialSmoother(window_len=10, alpha=0.15) 829 | series = np.concatenate((np.zeros((10, 1)), series), axis=0) 830 | series_smooth = np.expand_dims(smoother.smooth(series[:, 0]).smooth_data[0], axis=-1) 831 | return series_smooth 832 | 833 | 834 | class DataProcessor(object): 835 | """docstring for DataProcessor""" 836 | def __init__(self, args): 837 | super(DataProcessor, self).__init__() 838 | self.args = args 839 | 840 | if args.dataset_name in ['synth']: 841 | # parameters 842 | N = 500 843 | sigma = 0.01 844 | 845 | # Load synthetic dataset 846 | ( 847 | X_train_input, X_train_target, 848 | X_dev_input, X_dev_target, 849 | X_test_input, X_test_target, 850 | train_bkp, dev_bkp, test_bkp, 851 | ) = create_synthetic_dataset(N, args.N_input, args.N_output, sigma) 852 | 853 | elif args.dataset_name in ['sin']: 854 | N = 100 855 | sigma = 0.01 856 | 857 | ( 858 | data_train, data_dev, data_test, 859 | dev_tsid_map, test_tsid_map 860 | ) = create_sin_dataset(N, args.N_input, args.N_output, sigma) 861 | 862 | elif args.dataset_name in ['ECG5000']: 863 | ( 864 | X_train_input, X_train_target, 865 | X_dev_input, X_dev_target, 866 | X_test_input, X_test_target, 867 | train_bkp, dev_bkp, test_bkp, 868 | data_train, data_dev, data_test 869 | ) = parse_ECG5000(args.N_input, args.N_output) 870 | 871 | elif args.dataset_name in ['Traffic']: 872 | ( 873 | data_train, data_dev, data_test, 874 | dev_tsid_map, test_tsid_map 875 | ) = parse_Traffic(args.N_input, args.N_output) 876 | 877 | elif args.dataset_name in ['Taxi']: 878 | ( 879 | X_train_input, X_train_target, 880 | X_dev_input, X_dev_target, 881 | X_test_input, X_test_target, 882 | train_bkp, dev_bkp, test_bkp, 883 | data_train, data_dev, data_test 884 | ) = parse_Taxi(args.N_input, args.N_output) 885 | 886 | elif args.dataset_name in ['Traffic911']: 887 | ( 888 | data_train, data_dev, data_test, 889 | dev_tsid_map, test_tsid_map, 890 | feats_info, coeffs_info 891 | ) = parse_Traffic911(args.N_input, args.N_output) 892 | elif args.dataset_name in ['Exchange', 'Wiki']: 893 | ( 894 | data_train, data_dev, data_test, 895 | dev_tsid_map, test_tsid_map 896 | ) = parse_gc_datasets(args.dataset_name, args.N_input, args.N_output) 897 | 898 | elif args.dataset_name in ['weather']: 899 | ( 900 | data_train, data_dev, data_test, 901 | dev_tsid_map, test_tsid_map 902 | ) = parse_weather(args.dataset_name, args.N_input, args.N_output) 903 | elif args.dataset_name in ['bafu']: 904 | ( 905 | data_train, data_dev, data_test, 906 | dev_tsid_map, test_tsid_map 907 | ) = parse_bafu(args.dataset_name, args.N_input, args.N_output) 908 | elif args.dataset_name in ['meteo']: 909 | ( 910 | data_train, data_dev, data_test, 911 | dev_tsid_map, test_tsid_map 912 | ) = parse_meteo(args.dataset_name, args.N_input, args.N_output) 913 | elif args.dataset_name in ['azure']: 914 | ( 915 | data_train, data_dev, data_test, 916 | dev_tsid_map, test_tsid_map, 917 | feats_info 918 | ) = parse_azure(args.dataset_name, args.N_input, args.N_output, t2v_type=args.t2v_type) 919 | elif args.dataset_name in ['ett']: 920 | ( 921 | data_train, data_dev, data_test, 922 | dev_tsid_map, test_tsid_map, 923 | feats_info 924 | ) = parse_ett(args.dataset_name, args.N_input, args.N_output, t2v_type=args.t2v_type) 925 | elif args.dataset_name in ['sin_noisy']: 926 | ( 927 | data_train, data_dev, data_test, 928 | dev_tsid_map, test_tsid_map, 929 | feats_info, coeffs_info 930 | ) = parse_sin_noisy(args.dataset_name, args.N_input, args.N_output) 931 | elif args.dataset_name in ['Solar']: 932 | ( 933 | data_train, data_dev, data_test, 934 | dev_tsid_map, test_tsid_map, 935 | feats_info 936 | ) = parse_Solar(args.dataset_name, args.N_input, args.N_output, t2v_type=args.t2v_type) 937 | elif args.dataset_name in ['etthourly']: 938 | ( 939 | data_train, data_dev, data_test, 940 | dev_tsid_map, test_tsid_map, 941 | feats_info 942 | ) = parse_etthourly(args.dataset_name, args.N_input, args.N_output, t2v_type=args.t2v_type) 943 | elif args.dataset_name in ['m4hourly']: 944 | ( 945 | data_train, data_dev, data_test, 946 | dev_tsid_map, test_tsid_map, 947 | feats_info, coeffs_info 948 | ) = parse_m4hourly(args.dataset_name, args.N_input, args.N_output) 949 | elif args.dataset_name in ['m4daily']: 950 | ( 951 | data_train, data_dev, data_test, 952 | dev_tsid_map, test_tsid_map, 953 | feats_info, coeffs_info 954 | ) = parse_m4daily(args.dataset_name, args.N_input, args.N_output) 955 | elif args.dataset_name in ['taxi30min']: 956 | ( 957 | data_train, data_dev, data_test, 958 | dev_tsid_map, test_tsid_map, 959 | feats_info 960 | ) = parse_taxi30min(args.dataset_name, args.N_input, args.N_output, t2v_type=args.t2v_type) 961 | elif args.dataset_name in ['aggtest']: 962 | ( 963 | data_train, data_dev, data_test, 964 | dev_tsid_map, test_tsid_map, 965 | feats_info 966 | ) = parse_aggtest(args.dataset_name, args.N_input, args.N_output, t2v_type=args.t2v_type) 967 | elif args.dataset_name in ['electricity']: 968 | ( 969 | data_train, data_dev, data_test, 970 | dev_tsid_map, test_tsid_map, 971 | feats_info 972 | ) = parse_electricity(args.dataset_name, args.N_input, args.N_output, t2v_type=args.t2v_type) 973 | elif args.dataset_name in ['foodinflation']: 974 | ( 975 | data_train, data_dev, data_test, 976 | dev_tsid_map, test_tsid_map, 977 | feats_info 978 | ) = parse_foodinflation(args.dataset_name, args.N_input, args.N_output, t2v_type=args.t2v_type) 979 | elif args.dataset_name in ['foodinflationmonthly']: 980 | ( 981 | data_train, data_dev, data_test, 982 | dev_tsid_map, test_tsid_map, 983 | feats_info 984 | ) = parse_foodinflationmonthly(args.dataset_name, args.N_input, args.N_output, t2v_type=args.t2v_type) 985 | 986 | 987 | if args.use_feats: 988 | assert 'feats' in data_train[0].keys() 989 | 990 | self.data_train = data_train 991 | self.data_dev = data_dev 992 | self.data_test = data_test 993 | self.dev_tsid_map = dev_tsid_map 994 | self.test_tsid_map = test_tsid_map 995 | self.feats_info = feats_info 996 | 997 | 998 | def get_processed_data(self, args, agg_method, K): 999 | 1000 | if agg_method in ['wavelet']: 1001 | wavelet_levels = args.wavelet_levels 1002 | K_list = range(1, args.wavelet_levels+1+1+1) 1003 | # +1 : wavedec returns args.wavelet_levels+1 coefficients 1004 | # +1 : Extra slot for base values 1005 | # +1 : Because starting index is 1. 1006 | else: 1007 | wavelet_levels = None 1008 | K_list = args.K_list 1009 | 1010 | 1011 | #import ipdb ; ipdb.set_trace() 1012 | lazy_dataset_train = TimeSeriesDatasetOfflineAggregate( 1013 | self.data_train, args.N_input, args.N_output, 1014 | agg_method, K, which_split='train', 1015 | norm_type=args.normalize, 1016 | feats_info=self.feats_info, 1017 | ) 1018 | print('Number of chunks in train data:', len(lazy_dataset_train)) 1019 | norm = lazy_dataset_train.input_norm 1020 | dev_norm, test_norm = norm, norm 1021 | feats_norms = lazy_dataset_train.feats_norms 1022 | #for i in range(len(self.data_dev)): 1023 | # dev_norm.append(norm[self.dev_tsid_map[i]]) 1024 | #for i in range(len(self.data_test)): 1025 | # test_norm.append(norm[self.test_tsid_map[i]]) 1026 | #dev_norm, test_norm = np.stack(dev_norm), np.stack(test_norm) 1027 | #import ipdb 1028 | #ipdb.set_trace() 1029 | lazy_dataset_dev = TimeSeriesDatasetOfflineAggregate( 1030 | self.data_dev, args.N_input, args.N_output, 1031 | agg_method, K, 1032 | input_norm=dev_norm, which_split='dev', 1033 | #target_norm=Normalizer(self.data_dev, 'same'), 1034 | target_norm=dev_norm, 1035 | feats_info=self.feats_info, 1036 | tsid_map=self.dev_tsid_map, 1037 | feats_norms=feats_norms, 1038 | train_obj=lazy_dataset_train 1039 | ) 1040 | print('Number of chunks in dev data:', len(lazy_dataset_dev)) 1041 | lazy_dataset_test = TimeSeriesDatasetOfflineAggregate( 1042 | self.data_test, args.N_input, args.N_output, 1043 | agg_method, K, which_split='test', 1044 | input_norm=test_norm, 1045 | #target_norm=test_norm, 1046 | target_norm=Normalizer(self.data_test, 'same'), 1047 | feats_info=self.feats_info, 1048 | tsid_map=self.test_tsid_map, 1049 | feats_norms=feats_norms, 1050 | train_obj=lazy_dataset_train 1051 | ) 1052 | print('Number of chunks in test data:', len(lazy_dataset_test)) 1053 | if len(lazy_dataset_train) >= args.batch_size: 1054 | batch_size = args.batch_size 1055 | else: 1056 | batch_size = args.batch_size 1057 | while len(lazy_dataset_train) // batch_size < 10: 1058 | batch_size = batch_size // 2 1059 | #import ipdb ; ipdb.set_trace() 1060 | if self.args.dataset_name in ['aggtest']: 1061 | train_shuffle = False 1062 | else: 1063 | train_shuffle = True 1064 | trainloader = DataLoader( 1065 | lazy_dataset_train, batch_size=batch_size, shuffle=True, 1066 | drop_last=False, num_workers=12, pin_memory=True, 1067 | #collate_fn=lazy_dataset_train.collate_fn 1068 | ) 1069 | devloader = DataLoader( 1070 | lazy_dataset_dev, batch_size=batch_size, shuffle=False, 1071 | drop_last=False, num_workers=12, pin_memory=True, 1072 | #collate_fn=lazy_dataset_dev.collate_fn 1073 | ) 1074 | testloader = DataLoader( 1075 | lazy_dataset_test, batch_size=batch_size, shuffle=False, 1076 | drop_last=False, num_workers=12, pin_memory=True, 1077 | #collate_fn=lazy_dataset_test.collate_fn 1078 | ) 1079 | #import ipdb 1080 | #ipdb.set_trace() 1081 | 1082 | return { 1083 | 'trainloader': trainloader, 1084 | 'devloader': devloader, 1085 | 'testloader': testloader, 1086 | 'N_input': lazy_dataset_test.enc_len, 1087 | 'N_output': lazy_dataset_test.dec_len, 1088 | 'input_size': lazy_dataset_test.input_size, 1089 | 'output_size': lazy_dataset_test.output_size, 1090 | 'train_norm': norm, 1091 | 'dev_norm': dev_norm, 1092 | 'test_norm': test_norm, 1093 | 'feats_info': self.feats_info, 1094 | 'dev_tsid_map': lazy_dataset_dev.tsid_map, 1095 | 'test_tsid_map': lazy_dataset_test.tsid_map 1096 | } 1097 | 1098 | --------------------------------------------------------------------------------