├── NOTICE
├── CODE_OF_CONDUCT.md
├── model
    ├── linear_regressor.py
    ├── searcher.py
    ├── extra_trees.py
    ├── random_forest.py
    ├── light_gbm.py
    ├── neural_network.py
    └── neural_aggregator.py
├── README.md
├── CONTRIBUTING.md
├── data
    ├── process_data.py
    ├── download_openml.py
    └── data_loader.py
├── merge_nested_data.py
├── util
    ├── misc.py
    └── metric.py
├── LICENSE
├── nested_aggr_quantile_models.py
└── nested_base_quantile_models.py


/NOTICE:
--------------------------------------------------------------------------------
1 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | ## Code of Conduct
2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
4 | opensource-codeofconduct@amazon.com with any additional questions or comments.
5 | 


--------------------------------------------------------------------------------
/model/linear_regressor.py:
--------------------------------------------------------------------------------
 1 | import statsmodels.api as sm
 2 | from model.searcher import QuantileSearcher
 3 | from sklearn.base import BaseEstimator, RegressorMixin
 4 | 
 5 | 
 6 | class SMQRmodel(BaseEstimator, RegressorMixin):
 7 |     def __init__(self,
 8 |                  quantile,
 9 |                  fit_intercept=True):
10 |         self.quantile = quantile
11 |         self.fit_intercept = fit_intercept
12 | 
13 |     def fit(self, X, y):
14 |         if self.fit_intercept:
15 |             X = sm.add_constant(X)
16 |         self.model_ = sm.QuantReg(y, X)
17 |         self.results_ = self.model_.fit(q=self.quantile, max_iter=10000000)
18 | 
19 |     def predict(self, X):
20 |         if self.fit_intercept:
21 |             X = sm.add_constant(X, has_constant='add')
22 |         return self.results_.predict(X)
23 | 
24 | 
25 | class QuantileRegressor(QuantileSearcher):
26 |     def __init__(self,
27 |                  quantile=0.5,
28 |                  **kwargs):
29 |         self.searcher = SMQRmodel(quantile=quantile)
30 |         self.quantile = quantile
31 | 
32 |     def fit(self, x_train, y_train):
33 |         self.searcher.fit(x_train, y_train.reshape(-1))
34 | 
35 |     def predict(self, x_data, quantile=None):
36 |         return self.searcher.predict(x_data).reshape(-1, 1)
37 | 
38 |     def get_init_model(self):
39 |         return SMQRmodel(quantile=self.quantile)
40 | 
41 | 


--------------------------------------------------------------------------------
/model/searcher.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | class QuantileSearcher:
 5 |     quantile = 0.5
 6 |     searcher = None
 7 |     base_model = None
 8 | 
 9 |     def fit(self, x_train, y_train):
10 |         self.searcher.fit(x_train, y_train.reshape(-1))
11 | 
12 |     def predict(self, x_data, quantile=None):
13 |         if quantile is None:
14 |             quantile = self.quantile
15 |         return self.searcher.best_estimator_.predict(x_data, int(quantile * 100)).reshape(-1, 1)
16 | 
17 |     def eval_loss(self, x_data, y_data, quantile=None):
18 |         if quantile is None:
19 |             quantile = self.quantile
20 |         error_data = y_data - self.predict(x_data, quantile)
21 |         loss_data = np.maximum(quantile * error_data, (quantile - 1) * error_data)
22 |         return loss_data.mean()
23 | 
24 |     def get_init_model(self):
25 |         return self.base_model(**self.searcher.best_params_)
26 | 
27 | 
28 | class MeanSearcher:
29 |     searcher = None
30 |     base_model = None
31 | 
32 |     def fit(self, x_train, y_train):
33 |         self.searcher.fit(x_train, y_train.reshape(-1))
34 | 
35 |     def predict(self, x_data):
36 |         return self.searcher.best_estimator_.predict(x_data).reshape(-1, 1)
37 | 
38 |     def eval_loss(self, x_data, y_data):
39 |         error_data = y_data - self.predict(x_data)
40 |         loss_data = error_data * error_data
41 |         loss_data = np.sqrt(loss_data)
42 |         return loss_data.mean()
43 | 
44 |     def get_init_model(self):
45 |         return self.base_model(**self.searcher.best_params_)
46 | 
47 | 


--------------------------------------------------------------------------------
/model/extra_trees.py:
--------------------------------------------------------------------------------
 1 | from model.searcher import QuantileSearcher, MeanSearcher
 2 | from model.forests import ExtraTreesQuantileRegressor
 3 | from sklearn.ensemble import ExtraTreesRegressor
 4 | from sklearn.model_selection import RandomizedSearchCV, GridSearchCV
 5 | 
 6 | TREE_PARAM_GRID = {'n_estimators': [50],
 7 |                    'min_samples_split': [8, 16, 64],
 8 |                    'min_samples_leaf': [8, 16, 64]}
 9 | 
10 | TREE_NUM_GRIDS = 1
11 | for param in TREE_PARAM_GRID.values():
12 |     TREE_NUM_GRIDS *= len(param)
13 | 
14 | 
15 | class QuantileExtraTrees(QuantileSearcher):
16 |     def __init__(self,
17 |                  num_iters,
18 |                  num_folds,
19 |                  quantile=0.5,
20 |                  num_jobs=-1,
21 |                  rand_seed=111):
22 |         self.base_model = ExtraTreesQuantileRegressor
23 |         if TREE_NUM_GRIDS > num_iters:
24 |             self.searcher = RandomizedSearchCV(estimator=self.base_model(n_jobs=-1),
25 |                                                param_distributions=TREE_PARAM_GRID,
26 |                                                n_iter=num_iters,
27 |                                                cv=num_folds,
28 |                                                random_state=rand_seed,
29 |                                                n_jobs=num_jobs)
30 |         else:
31 |             self.searcher = GridSearchCV(estimator=self.base_model(n_jobs=-1),
32 |                                          param_grid=TREE_PARAM_GRID,
33 |                                          cv=num_folds,
34 |                                          n_jobs=num_jobs)
35 |         self.quantile = quantile
36 | 
37 |     def full_predict(self, x_data, quantile_list):
38 |         return self.searcher.best_estimator_.predict(x_data, quantile_list)
39 | 
40 |     def get_init_model(self):
41 |         return self.base_model(**self.searcher.best_params_, n_jobs=-1)
42 | 
43 | 


--------------------------------------------------------------------------------
/model/random_forest.py:
--------------------------------------------------------------------------------
 1 | from model.searcher import QuantileSearcher, MeanSearcher
 2 | from model.forests import RandomForestQuantileRegressor
 3 | from sklearn.ensemble import RandomForestRegressor
 4 | from sklearn.model_selection import RandomizedSearchCV, GridSearchCV
 5 | 
 6 | TREE_PARAM_GRID = {'n_estimators': [50],
 7 |                    'min_samples_split': [8, 16, 64],
 8 |                    'min_samples_leaf': [8, 16, 64]}
 9 | 
10 | TREE_NUM_GRIDS = 1
11 | for param in TREE_PARAM_GRID.values():
12 |     TREE_NUM_GRIDS *= len(param)
13 | 
14 | 
15 | class QuantileRandomForest(QuantileSearcher):
16 |     def __init__(self,
17 |                  num_iters,
18 |                  num_folds,
19 |                  quantile=0.5,
20 |                  num_jobs=-1,
21 |                  rand_seed=111):
22 |         self.base_model = RandomForestQuantileRegressor
23 |         if TREE_NUM_GRIDS > num_iters:
24 |             self.searcher = RandomizedSearchCV(estimator=self.base_model(n_jobs=-1),
25 |                                                param_distributions=TREE_PARAM_GRID,
26 |                                                n_iter=num_iters,
27 |                                                cv=num_folds,
28 |                                                random_state=rand_seed,
29 |                                                n_jobs=num_jobs)
30 |         else:
31 |             self.searcher = GridSearchCV(estimator=self.base_model(n_jobs=-1),
32 |                                          param_grid=TREE_PARAM_GRID,
33 |                                          cv=num_folds,
34 |                                          n_jobs=num_jobs)
35 |         self.quantile = quantile
36 | 
37 |     def full_predict(self, x_data, quantile_list):
38 |         return self.searcher.best_estimator_.predict(x_data, quantile_list)
39 | 
40 |     def get_init_model(self):
41 |         return self.base_model(**self.searcher.best_params_, n_jobs=-1)
42 | 
43 | 


--------------------------------------------------------------------------------
/model/light_gbm.py:
--------------------------------------------------------------------------------
 1 | from model.searcher import QuantileSearcher, MeanSearcher
 2 | from lightgbm import LGBMRegressor
 3 | from sklearn.model_selection import RandomizedSearchCV, GridSearchCV
 4 | 
 5 | GBM_PARAM_GRID = {'n_estimators': [50],
 6 |                   'num_leaves': [10, 50, 100],
 7 |                   'min_child_samples': [3, 9, 15],
 8 |                   'min_child_weight': [1e-2, 1e-1, 1],
 9 |                   'subsample': [0.4, 0.6, 0.8],
10 |                   'colsample_bytree': [0.4, 0.6],
11 |                   'reg_alpha': [1e-1, 1, 5],
12 |                   'reg_lambda': [1e-1, 1, 5]}
13 | GBM_NUM_GRIDS = 1
14 | for param in GBM_PARAM_GRID.values():
15 |     GBM_NUM_GRIDS *= len(param)
16 | 
17 | 
18 | class QuantileLightGBM(QuantileSearcher):
19 |     def __init__(self,
20 |                  num_iters,
21 |                  num_folds,
22 |                  quantile=0.5,
23 |                  num_jobs=-1,
24 |                  rand_seed=111):
25 |         self.base_model = LGBMRegressor
26 |         if GBM_NUM_GRIDS > num_iters:
27 |             self.searcher = RandomizedSearchCV(estimator=self.base_model(objective='quantile', metric='quantile',
28 |                                                                          alpha=quantile, n_jobs=-1),
29 |                                                param_distributions=GBM_PARAM_GRID,
30 |                                                n_iter=num_iters,
31 |                                                cv=num_folds,
32 |                                                random_state=rand_seed,
33 |                                                n_jobs=num_jobs)
34 |         else:
35 |             self.searcher = GridSearchCV(estimator=self.base_model(objective='quantile', metric='quantile',
36 |                                                                    alpha=quantile, n_jobs=-1),
37 |                                          param_grids=GBM_PARAM_GRID,
38 |                                          cv=num_folds,
39 |                                          n_jobs=num_jobs)
40 |         self.quantile = quantile
41 | 
42 |     def predict(self, x_data, quantile=None):
43 |         return self.searcher.predict(x_data).reshape(-1, 1)
44 | 
45 |     def get_init_model(self):
46 |         return self.base_model(max_depth=-1, objective='quantile', metric='quantile',
47 |                                alpha=self.quantile, n_jobs=-1, **self.searcher.best_params_)
48 | 
49 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | Flexible Model Aggregation for Quantile Regression
 2 | =============================================
 3 | Quantile regression is a fundamental problem in statistical learning motivated by the need to quantify uncertainty in predictions, or to model a diverse population without being overly reductive. For instance, epidemiological forecasts, cost estimates, and revenue predictions all benefit from being able to quantify the range of possible values accurately. As such, many models have been developed for this problem over many years of research in econometrics, statistics, and machine learning.
 4 | 
 5 | Rather than proposing yet another (new) algorithm for quantile regression we adopt a meta viewpoint: we investigate methods for aggregating any number of conditional quantile models, in order to improve accuracy and robustness. We consider weighted ensembles where weights may vary over not only individual
 6 | models, but also over quantile levels, and feature values. All of the models we consider in this paper can be fit using modern deep learning toolkits, and hence are widely accessible (from an implementation point of view) and scalable. 
 7 | 
 8 | To improve the accuracy of the predicted quantiles (or equivalently, prediction intervals), we develop tools for ensuring that quantiles remain monotonically ordered, and apply conformal calibration methods. These can be used without any modification of the original library of base models. We also review some basic theory surrounding quantile aggregation and related scoring rules, and contribute a few new results to this literature (for example, the fact that post sorting or post isotonic regression can only improve the weighted interval score). Finally, we provide an extensive suite of empirical comparisons across 34 data sets from two different benchmark repositories. 
 9 | 
10 | This repository provides the implementation of [Flexible Model Aggregation for Quantile Regression](https://arxiv.org/abs/2103.00083). If you use this code please cite the paper using the following bibtex:
11 | 
12 | ```
13 | @article{fakoor2022quantile,
14 |   title={Flexible Model Aggregation for Quantile Regression},
15 |   author={Rasool Fakoor, Taesup Kim, Jonas Mueller, Alexander J. Smola, Ryan J. Tibshirani},
16 |   journal={arXiv preprint arXiv:2103.00083},
17 |   year={2021},
18 | }
19 | 
20 | 
21 | ```
22 | ## Getting Started
23 | ```
24 | Run the following commands in the specified order: 
25 | 
26 | 1) python -u nested_base_quantile_models.py --DATA_PATH ~/mydata/ --data_loc ~/rawdata/ --task-id yacht -seed 1
27 | 
28 | 2) python -u merge_nested_data.py --DATA_PATH ~/mydata/ --task-id yacht --seed 1
29 | 
30 | 3) python -u nested_aggr_quantile_models.py --DATA_PATH ~/mydata/ --task-id yacht --seed 1 --RESULT_PATH ~/myresult/
31 | 
32 | ```
33 | The code works on both GPU and CPU machines.
34 | 
35 | In order to run this code, you will need to install pytorch, lightgbm, numpy, openml, scikit_learn,scipy, autogluon, statsmodels, etc.
36 | 
37 | ## License
38 | This project is licensed under the Apache-2.0 License.
39 | 
40 | # Contact
41 | 
42 | Please open an issue on [issues tracker](https://github.com/amazon-research/quantile-aggregation) to report problems or to ask questions or send an email to me, [Rasool Fakoor](https://github.com/rasoolfa).
43 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing Guidelines
 2 | 
 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional
 4 | documentation, we greatly value feedback and contributions from our community.
 5 | 
 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary
 7 | information to effectively respond to your bug report or contribution.
 8 | 
 9 | 
10 | ## Reporting Bugs/Feature Requests
11 | 
12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features.
13 | 
14 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already
15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful:
16 | 
17 | * A reproducible test case or series of steps
18 | * The version of our code being used
19 | * Any modifications you've made relevant to the bug
20 | * Anything unusual about your environment or deployment
21 | 
22 | 
23 | ## Contributing via Pull Requests
24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that:
25 | 
26 | 1. You are working against the latest source on the *main* branch.
27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already.
28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted.
29 | 
30 | To send us a pull request, please:
31 | 
32 | 1. Fork the repository.
33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change.
34 | 3. Ensure local tests pass.
35 | 4. Commit to your fork using clear commit messages.
36 | 5. Send us a pull request, answering any default questions in the pull request interface.
37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation.
38 | 
39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and
40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/).
41 | 
42 | 
43 | ## Finding contributions to work on
44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start.
45 | 
46 | 
47 | ## Code of Conduct
48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
50 | opensource-codeofconduct@amazon.com with any additional questions or comments.
51 | 
52 | 
53 | ## Security issue notifications
54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue.
55 | 
56 | 
57 | ## Licensing
58 | 
59 | See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution.
60 | 


--------------------------------------------------------------------------------
/data/process_data.py:
--------------------------------------------------------------------------------
 1 | import warnings
 2 | import pandas as pd
 3 | import numpy as np
 4 | from autogluon import TabularPrediction as task
 5 | 
 6 | 
 7 | def processData(data, label_column=None, ag_predictor=None,
 8 |                 problem_type=None, eval_metric=None):
 9 |     """ Converts pandas Dataframe to matrix of entirely numerical values (stored in DataFrame).
10 |         Performs same data preprocessing as used for AutoGluon's tabular neural network model,
11 |         to deal with issues such as: missing value imputation, one-hot encoding of categoricals,
12 |         handling of high-cardinality categoricals, handling unknown categorical feature-levels at test-time, etc.
13 | 
14 |         If ag_predictor is not None, uses existing autogluon predictor object to process data (must have tabularNN as first model).
15 |         To process training data, ag_predictor should = None. For test data, should != None.
16 |         Returns:
17 |             Tuple (X, y, ag_predictor)
18 |             where y may be None if labels are not present in test data.
19 |     """
20 | 
21 |     # fit dummy neural network model just to preprocess data. Here we ensure no embedding layers are used.
22 |     if ag_predictor is None:
23 |         if label_column is None:
24 |             raise ValueError("when processing training data, label_column cannot be None")
25 |         elif not label_column in data.columns:
26 |             raise ValueError("label_column cannot be missing from training data")
27 |         ag_predictor = task.fit(train_data=task.Dataset(data), tuning_data=task.Dataset(data), label=label_column,
28 |                                 hyperparameter_tune=False, problem_type=problem_type, eval_metric=eval_metric,
29 |                                 hyperparameters={'NN': {'num_epochs': 0, 'proc.embed_min_categories': np.inf}},
30 |                                 num_bagging_folds=0, stack_ensemble_levels=0, label_count_threshold=1, verbosity=2)
31 | 
32 |     model = ag_predictor._trainer.load_model(ag_predictor._trainer.get_model_names_all()[
33 |                                                  0])  # This must be the neural net model which contains data processor
34 |     if 'NeuralNet' not in model.name:
35 |         raise ValueError("Data preprocessing error. This model should be the NeuralNet, not the: %s" % model.name)
36 |     bad_inds = []  # row-indices to remove from dataset
37 |     if label_column is not None and label_column in data.columns:
38 |         label_cleaner = ag_predictor._learner.label_cleaner
39 |         y = data[label_column].values
40 |         data = data.drop([label_column], axis=1, inplace=False)
41 |         y = label_cleaner.transform(y)
42 |         if np.sum(y.isna()) > 0:
43 |             bad_inds = y.index[
44 |                 y.apply(np.isnan)].tolist()  # remove these inds as label is NaN (due to very rare classes)
45 |             warnings.warn("Dropped these rows from data in preprocessing, due to missing labels: " + str(bad_inds))
46 |     else:
47 |         y = None
48 |     data_initial_processed = ag_predictor._learner.transform_features(data)  # general autogluon data processing.
49 |     tabNN_data = model.process_test_data(data_initial_processed, batch_size=64, num_dataloading_workers=4)
50 |     # neural net-specific autogluon data processing required to turn tabular data into numerical matrix.
51 |     numeric_data = tabNN_data.dataset._data  # list of mxnet.NDArrays
52 |     if len(numeric_data) != 1:
53 |         raise ValueError("Data Preprocessing failed.")
54 |     numpy_data = numeric_data[0].asnumpy()  # 2D Numpy array
55 |     X = pd.DataFrame(numpy_data)
56 |     X.columns = ['feature' + str(i) for i in range(X.shape[1])]
57 |     if len(bad_inds) > 0:
58 |         y.drop(index=bad_inds, inplace=True)
59 |         X.drop(index=bad_inds, axis=0, inplace=True)
60 |     return X, y, ag_predictor
61 | 


--------------------------------------------------------------------------------
/data/download_openml.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pickle as pkl
  3 | import openml
  4 | import collections
  5 | import pandas as pd
  6 | from sklearn.model_selection import train_test_split
  7 | from sklearn.preprocessing import StandardScaler
  8 | from argparse import Namespace
  9 | from process_data import processData
 10 | import os.path
 11 | import argparse
 12 | 
 13 | OPENML_DICT = \
 14 |     {
 15 |      '359934': 'tecator',
 16 |      '359931': 'sensory',
 17 |      '359947': 'MIP-2016-regression',
 18 |      '359932': 'socmob',
 19 |      '167210': 'Moneyball',
 20 |      '359951': 'house_prices_nominal',
 21 |      '359945': 'us_crime',
 22 |      '359930': 'quake',
 23 |      '359933': 'space_ga',
 24 |      '359944': 'abalone',
 25 |      '233215': 'Mercedes_Benz_Greener_Manufacturing',
 26 |      '359948': 'SAT11-HAND-runtime-regression',
 27 |      '233214': 'Santander_transaction_value',
 28 |      '13854': 'QSAR-TID-11',
 29 |      '14097': 'QSAR-TID-10980',
 30 |      '359935': 'wine_quality',
 31 |      '359942': 'colleges',
 32 |      '359939': 'topo_2_1',
 33 |      '359940': 'yprop_4_1',
 34 |      '317612': 'Brazilian_houses',
 35 |      '359946': 'pol',
 36 |      '359936': 'elevators',
 37 |      '359949': 'house_sales',
 38 |      '359952': 'house_16H',
 39 |      '359941': 'OnlineNewsPopularity',
 40 |      '233211': 'diamonds',
 41 |      }
 42 | 
 43 | def load_openml(task_id, random_seed=1):
 44 |     data_path = "./dataset/openml_{}_seed{}.pkl".format(task_id, random_seed)
 45 |     if os.path.isfile(data_path):
 46 |         with open(data_path, 'rb') as f:
 47 |             return pkl.load(f)
 48 | 
 49 |     # otherwise load
 50 |     task = openml.tasks.get_task(task_id)
 51 | 
 52 |     # get label
 53 |     label = task.target_name
 54 | 
 55 |     # get full pd_frame
 56 |     full_data = task.get_dataset().get_data()[0]
 57 |     full_size = full_data.shape[0]
 58 | 
 59 |     # split data
 60 |     train_data, test_data = train_test_split(full_data, test_size=0.1, random_state=random_seed)
 61 |     if hasattr(train_data, 'sparse'):
 62 |         train_data = train_data.sparse.to_dense()
 63 |     if hasattr(test_data, 'sparse'):
 64 |         test_data = test_data.sparse.to_dense()
 65 | 
 66 |     # preprocess x_train
 67 |     x_train, y_train, x_transformer = processData(data=train_data, label_column=label, problem_type='regression')
 68 |     x_test, y_test, _ = processData(data=test_data, label_column=label, ag_predictor=x_transformer)
 69 | 
 70 |     # convert to numpy
 71 |     x_train = x_train.values
 72 |     y_train = y_train.values.reshape(-1, 1)
 73 |     x_test = x_test.values
 74 |     y_test = y_test.values.reshape(-1, 1)
 75 | 
 76 |     # y normalizer based on train data
 77 |     y_transformer = StandardScaler().fit(y_train)
 78 | 
 79 |     # transform data
 80 |     y_train = y_transformer.transform(y_train)
 81 |     y_test = y_transformer.transform(y_test)
 82 | 
 83 |     # dataset
 84 |     dataset = Namespace(size=full_size, x_train=x_train, y_train=y_train, x_test=x_test, y_test=y_test)
 85 | 
 86 |     info = {'task_id':  task_id, 
 87 |             'x_train.shape':x_train.shape,
 88 |             'y_train.shape': y_train.shape,
 89 |             'x_test.shape':x_test.shape,
 90 |             'y_test.shape':y_test.shape,
 91 |             'seed':random_seed
 92 |            }
 93 |     print('-------------------------')
 94 |     print('task_id', task_id)
 95 |     print('train-->', x_train.shape, y_train.shape)
 96 |     print('test-->', x_test.shape, y_test.shape)
 97 |     print('-------------------------')
 98 | 
 99 |     # pickle
100 |     with open(data_path, 'wb') as f:
101 |         pkl.dump(dataset, f)
102 | 
103 |     # return dataset
104 |     return dataset, info
105 | 
106 | parser = argparse.ArgumentParser()
107 | parser.add_argument('--seed', type=int, default=1)
108 | 
109 | if __name__ == "__main__":
110 | 
111 |     args = parser.parse_args()
112 |     print('------------')
113 |     print(args.__dict__)
114 |     print('------------')
115 | 
116 |     all_info = []
117 |     for i in OPENML_DICT.keys():
118 |         print('task', i)
119 |         _, info = load_openml(i, random_seed=args.seed)
120 |         all_info.append(info)
121 |         print('*******')
122 |     print('Done')
123 |     print('---------------------------------------')
124 |     print('all_info', all_info)
125 | 


--------------------------------------------------------------------------------
/data/data_loader.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pickle as pkl
  3 | import openml
  4 | import collections
  5 | import pandas as pd
  6 | from sklearn.model_selection import train_test_split
  7 | from sklearn.preprocessing import StandardScaler
  8 | from argparse import Namespace
  9 | from data.process_data import processData
 10 | import os.path
 11 | 
 12 | NUM_RI_DATASETS = 13
 13 | 
 14 | UCI_LIST = \
 15 |     ['yacht',
 16 |      'boston',
 17 |      'energy',
 18 |      'concrete',
 19 |      'kin8nm',
 20 |      'power',
 21 |      'naval',
 22 |      'protein']
 23 | 
 24 | OPENML_DICT = \
 25 |     {'359949': 'house_sales',
 26 |      '359945': 'us_crime',
 27 |      '359943': 'nyc-taxi-green-dec-2016',
 28 |      '359942': 'colleges',
 29 |      '359944': 'abalone',
 30 |      '359941': 'OnlineNewsPopularity',
 31 |      '359926': 'Airlines_DepDelay_1M',
 32 |      '317614': 'Yolanda',
 33 |      '317612': 'Brazilian_houses',
 34 |      '233214': 'Santander_transaction_value',
 35 |      '233212': 'Allstate_Claims_Severity',
 36 |      '233215': 'Mercedes_Benz_Greener_Manufacturing',
 37 |      '359951': 'house_prices_nominal',
 38 |      '233211': 'diamonds',
 39 |      '359948': 'SAT11-HAND-runtime-regression',
 40 |      '359947': 'MIP-2016-regression',
 41 |      '168891': 'black_friday',
 42 |      '167210': 'Moneyball',
 43 |      '233213': 'Buzzinsocialmedia_Twitter',
 44 |      '14097': 'QSAR-TID-10980',
 45 |      '13854': 'QSAR-TID-11',
 46 |      '359952': 'house_16H',
 47 |      '359930': 'quake',
 48 |      '359931': 'sensory',
 49 |      '359932': 'socmob',
 50 |      '4857': 'boston',
 51 |      '359933': 'space_ga',
 52 |      '359934': 'tecator',
 53 |      '359939': 'topo_2_1',
 54 |      '359940': 'yprop_4_1',
 55 |      '359935': 'wine_quality',
 56 |      '359936': 'elevators',
 57 |      '359946': 'pol'}
 58 | 
 59 | 
 60 | def load_uci(dataset_name, random_seed=111, data_loc='./data/dataset/'):
 61 |     if dataset_name not in UCI_LIST:
 62 |         raise NotImplementedError('not available dataset')
 63 |     # load data
 64 |     data = np.loadtxt(os.path.join(data_loc ,"{}.txt".format(dataset_name)))
 65 |     x_full = data[:, :-1]
 66 |     y_full = data[:, -1].reshape(-1, 1)
 67 | 
 68 |     # split into train / test
 69 |     x_train, x_test, y_train, y_test = train_test_split(x_full, y_full, test_size=0.1, random_state=random_seed)
 70 | 
 71 |     # normalizer based on train data
 72 |     x_transformer = StandardScaler().fit(x_train)
 73 |     y_transformer = StandardScaler().fit(y_train)
 74 | 
 75 |     # transform data
 76 |     x_train = x_transformer.transform(x_train)
 77 |     y_train = y_transformer.transform(y_train)
 78 |     x_test = x_transformer.transform(x_test)
 79 |     y_test = y_transformer.transform(y_test)
 80 | 
 81 |     return Namespace(size=x_full.shape[0], x_train=x_train, y_train=y_train, x_test=x_test, y_test=y_test)
 82 | 
 83 | 
 84 | def load_openml(task_id, random_seed=1, data_loc='./data/dataset/'):
 85 |     data_path = os.path.join(data_loc ,"openml_{}_seed{}.pkl".format(task_id, random_seed))
 86 |     if os.path.isfile(data_path):
 87 |         with open(data_path, 'rb') as f:
 88 |             return pkl.load(f)
 89 | 
 90 |     # otherwise load
 91 |     task = openml.tasks.get_task(task_id)
 92 | 
 93 |     # get label
 94 |     label = task.target_name
 95 | 
 96 |     # get full pd_frame
 97 |     full_data = task.get_dataset().get_data()[0]
 98 |     full_size = full_data.shape[0]
 99 | 
100 |     # split data
101 |     train_data, test_data = train_test_split(full_data, test_size=0.1, random_state=random_seed)
102 |     if hasattr(train_data, 'sparse'):
103 |         train_data = train_data.sparse.to_dense()
104 |     if hasattr(test_data, 'sparse'):
105 |         test_data = test_data.sparse.to_dense()
106 | 
107 |     # preprocess x_train
108 |     x_train, y_train, x_transformer = processData(data=train_data, label_column=label, problem_type='regression')
109 |     x_test, y_test, _ = processData(data=test_data, label_column=label, ag_predictor=x_transformer)
110 | 
111 |     # convert to numpy
112 |     x_train = x_train.values
113 |     y_train = y_train.values.reshape(-1, 1)
114 |     x_test = x_test.values
115 |     y_test = y_test.values.reshape(-1, 1)
116 | 
117 |     # y normalizer based on train data
118 |     y_transformer = StandardScaler().fit(y_train)
119 | 
120 |     # transform data
121 |     y_train = y_transformer.transform(y_train)
122 |     y_test = y_transformer.transform(y_test)
123 | 
124 |     # dataset
125 |     dataset = Namespace(size=full_size, x_train=x_train, y_train=y_train, x_test=x_test, y_test=y_test)
126 | 
127 |     # pickle
128 |     with open(data_path, 'wb') as f:
129 |         pkl.dump(dataset, f)
130 | 
131 |     # return dataset
132 |     return dataset
133 | 
134 | 


--------------------------------------------------------------------------------
/merge_nested_data.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import argparse
  3 | import numpy as np
  4 | import pickle as pkl
  5 | MODEL_LIST = \
  6 |     ['QuantileConditionalGaussianNetwork',
  7 |      'QuantileSingleNeuralNetwork',
  8 |      'QuantileJointNeuralNetwork',
  9 |      'QuantileRandomForest',
 10 |      'QuantileExtraTrees',
 11 |      'QuantileLightGBM',
 12 |     ]
 13 | 
 14 | def merge_results(dataset, seed, eparams):
 15 |     merge_z_test = []
 16 |     merge_z_val  = []
 17 | 
 18 |     merge_oof_x_train = {}
 19 |     merge_oof_y_train = {}
 20 |     merge_oof_z_train = {}
 21 |     for model in MODEL_LIST:
 22 |         file_path = eparams.DATA_PATH + eparams.log_id + 'quantile_nested_{}_z_test_{}_cv5_iter20_seed{}.npy'.format(model, dataset, seed)
 23 |         file_path_val = eparams.DATA_PATH + eparams.log_id + 'quantile_nested_{}_z_val_{}_cv5_iter20_seed{}.npy'.format(model, dataset, seed)
 24 | 
 25 |         if os.path.exists(file_path):
 26 |             merge_z_test.append(np.load(file_path))
 27 | 
 28 |         if os.path.exists(file_path_val):
 29 |             merge_z_val.append(np.load(file_path_val))
 30 | 
 31 | 
 32 |         file_path = eparams.DATA_PATH + eparams.log_id + 'quantile_nested_{}_oof_x_train_{}_cv5_iter20_seed{}.pkl'.format(model, dataset, seed)
 33 |         if os.path.exists(file_path) and len(merge_oof_x_train) == 0:
 34 |             with open(file_path, 'rb') as handle:
 35 |                 merge_oof_x_train = pkl.load(handle)
 36 | 
 37 |         file_path = eparams.DATA_PATH + eparams.log_id + 'quantile_nested_{}_oof_y_train_{}_cv5_iter20_seed{}.pkl'.format(model, dataset, seed)
 38 |         if os.path.exists(file_path) and len(merge_oof_y_train) == 0:
 39 |             with open(file_path, 'rb') as handle:
 40 |                 merge_oof_y_train = pkl.load(handle)
 41 | 
 42 |         file_path = eparams.DATA_PATH + eparams.log_id + 'quantile_nested_{}_oof_z_train_{}_cv5_iter20_seed{}.pkl'.format(model, dataset, seed)
 43 |         if os.path.exists(file_path):
 44 |             with open(file_path, 'rb') as handle:
 45 |                 oof_z_train = pkl.load(handle)
 46 | 
 47 |             if len(merge_oof_z_train) == 0:
 48 |                 merge_oof_z_train = oof_z_train
 49 |                 for pair_key in merge_oof_z_train.keys():
 50 |                     if '-' in pair_key:
 51 |                         tmp0, tmp1 = merge_oof_z_train[pair_key]
 52 |                         merge_oof_z_train[pair_key] = [[tmp0], [tmp1]]
 53 |                     else:
 54 |                         tmp0 = merge_oof_z_train[pair_key]
 55 |                         merge_oof_z_train[pair_key] = [tmp0]
 56 |             else:
 57 |                 for pair_key in merge_oof_z_train.keys():
 58 |                     if '-' in pair_key:
 59 |                         tmp0, tmp1 = oof_z_train[pair_key]
 60 |                         merge_oof_z_train[pair_key][0].append(tmp0)
 61 |                         merge_oof_z_train[pair_key][1].append(tmp1)
 62 |                     else:
 63 |                         tmp0 = oof_z_train[pair_key]
 64 |                         merge_oof_z_train[pair_key].append(tmp0)
 65 | 
 66 |     merge_z_test = np.stack(merge_z_test, 1)
 67 |     merge_z_val = np.stack(merge_z_val, 1)
 68 | 
 69 |     for pair_key in merge_oof_z_train.keys():
 70 |         if '-' in pair_key:
 71 |             tmp0, tmp1 = merge_oof_z_train[pair_key]
 72 |             merge_oof_z_train[pair_key] = [np.stack(tmp0, 1), np.stack(tmp1, 1)]
 73 |         else:
 74 |             tmp0 = merge_oof_z_train[pair_key]
 75 |             merge_oof_z_train[pair_key] = np.stack(tmp0, 1)
 76 | 
 77 |     np.save(eparams.DATA_PATH  + eparams.log_id + 'quantile_nested_base_z_test_{}_cv5_iter20_seed{}.npy'.format(dataset, seed), merge_z_test)
 78 |     np.save(eparams.DATA_PATH  + eparams.log_id + 'quantile_nested_base_z_val_{}_cv5_iter20_seed{}.npy'.format(dataset, seed), merge_z_val)
 79 | 
 80 |     file_path = eparams.DATA_PATH  + eparams.log_id + 'quantile_nested_base_oof_x_train_{}_cv5_iter20_seed{}.pkl'.format(dataset, seed)
 81 |     with open(file_path, 'wb') as handle:
 82 |         pkl.dump(merge_oof_x_train, handle, protocol=pkl.HIGHEST_PROTOCOL)
 83 | 
 84 |     file_path = eparams.DATA_PATH  + eparams.log_id + 'quantile_nested_base_oof_y_train_{}_cv5_iter20_seed{}.pkl'.format(dataset, seed)
 85 |     with open(file_path, 'wb') as handle:
 86 |         pkl.dump(merge_oof_y_train, handle, protocol=pkl.HIGHEST_PROTOCOL)
 87 | 
 88 |     file_path = eparams.DATA_PATH  + eparams.log_id + 'quantile_nested_base_oof_z_train_{}_cv5_iter20_seed{}.pkl'.format(dataset, seed)
 89 |     with open(file_path, 'wb') as handle:
 90 |         pkl.dump(merge_oof_z_train, handle, protocol=pkl.HIGHEST_PROTOCOL)
 91 | 
 92 |     return
 93 | 
 94 | if __name__ == "__main__":
 95 |     # arguments
 96 |     parser = argparse.ArgumentParser()
 97 | 
 98 |     # parser
 99 |     parser.add_argument('--task-id', type=str, help='task id')
100 |     parser.add_argument('--seed', type=int, default=1, help='random seed')
101 |     parser.add_argument('--DATA_PATH', default='./output/data/')
102 |     parser.add_argument('--log_id', default='mylogid')
103 | 
104 |     args = parser.parse_args()
105 |     print('------------')
106 |     print(args.__dict__)
107 |     print('------------')
108 | 
109 |     merge_results(args.task_id, args.seed, args)
110 |     print('Done')
111 | 


--------------------------------------------------------------------------------
/util/misc.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | import torch
  4 | from sklearn.isotonic import IsotonicRegression
  5 | import multiprocessing as mp
  6 | from fast_soft_sort.pytorch_ops import soft_sort
  7 | 
  8 | 
  9 | def set2mask(set_data_list, input_size):
 10 |     # init mask
 11 |     mask_data = np.zeros((input_size, input_size))
 12 | 
 13 |     # for each set
 14 |     for i, set_data in enumerate(set_data_list):
 15 |         num_elements = len(set_data)
 16 |         tmp_mask_data = np.zeros((input_size, 1))
 17 |         tmp_mask_data[set_data] = 1.0
 18 |         tmp_mask_data /= float(num_elements)
 19 |         mask_data[:, set_data] = tmp_mask_data
 20 |     return mask_data
 21 | 
 22 | 
 23 | def single_pava(z):
 24 |     # init parition info
 25 |     p_val = []
 26 |     p_cnt = []
 27 |     p_set = []
 28 |     p_idx = -1
 29 | 
 30 |     # for each value
 31 |     for i, val in enumerate(z):
 32 |         # if first, or current value is larger than others
 33 |         if i == 0 or val > p_val[p_idx]:
 34 |             # add value as new partition
 35 |             p_set.append([i])
 36 |             p_val.append(val)
 37 |             p_cnt.append(1)
 38 |             p_idx += 1
 39 |             continue
 40 |         # if the value is same as the latest one, just insert
 41 |         elif val == p_val[p_idx]:
 42 |             # only count up
 43 |             p_set[p_idx].append(i)
 44 |             p_cnt[p_idx] += 1
 45 |             continue
 46 | 
 47 |         # if current value is smaller than the current value
 48 |         assert val < p_val[p_idx]
 49 |         # update partition info
 50 |         p_set[p_idx].append(i)
 51 |         p_val[p_idx] = (p_val[p_idx] * p_cnt[p_idx] + val) / float(p_cnt[p_idx] + 1)
 52 |         p_cnt[p_idx] += 1
 53 | 
 54 |         # clean up
 55 |         while p_idx > 0:
 56 |             # if current parition is equal or smaller than the previous partition
 57 |             if p_val[p_idx] <= p_val[p_idx - 1]:
 58 |                 # merge
 59 |                 p_set[p_idx - 1] += p_set[p_idx]
 60 |                 p_val[p_idx - 1] = (p_val[p_idx] * p_cnt[p_idx] + p_val[p_idx - 1] * p_cnt[p_idx - 1]) / float(
 61 |                     p_cnt[p_idx] + p_cnt[p_idx - 1])
 62 |                 p_cnt[p_idx - 1] = p_cnt[p_idx] + p_cnt[p_idx - 1]
 63 |                 p_set.pop(p_idx)
 64 |                 p_val.pop(p_idx)
 65 |                 p_cnt.pop(p_idx)
 66 |                 p_idx -= 1
 67 |             else:
 68 |                 break
 69 |     return set2mask(p_set, len(z))
 70 | 
 71 | 
 72 | def multi_pava(z_array):
 73 |     pool = mp.Pool(processes=mp.cpu_count())
 74 |     output_mask = np.stack(pool.map(single_pava, z_array), 0)
 75 |     pool.close()
 76 |     return output_mask
 77 | 
 78 | 
 79 | # forward pava (differentiable)
 80 | def pava_forward(input_data):
 81 |     # data size
 82 |     batch_size, num_quantiles = input_data.size()
 83 | 
 84 |     # for each data, make mask
 85 |     input_mask = [single_pava(input_data[i].cpu().data.numpy()) for i in range(batch_size)]
 86 |     #input_mask = multi_pava(input_data.cpu().data.numpy())
 87 |     input_mask = np.stack(input_mask, 0)
 88 |     input_mask = torch.Tensor(input_mask).to(input_data.device)
 89 | 
 90 |     # based on mask, compute pava output
 91 |     output_data = torch.bmm(input_data.unsqueeze(1), input_mask.detach()).squeeze(1)
 92 |     return output_data
 93 | 
 94 | 
 95 | def isotonic(input_data, quantile_list):
 96 |     quantile_list = np.array(quantile_list).reshape(-1)
 97 |     batch_size = input_data.shape[0]
 98 |     new_output_data = []
 99 |     for i in range(batch_size):
100 |         new_output_data.append(IsotonicRegression().fit_transform(quantile_list, input_data[i]))
101 |     return np.stack(new_output_data, 0)
102 | 
103 | 
104 | def fix_crossing(predict_data, fix_type=0):
105 |     is_torch = True
106 |     if type(predict_data) is not torch.Tensor:
107 |         is_torch = False
108 |         predict_data = torch.Tensor(predict_data)
109 | 
110 |     # number of quantiles
111 |     num_quantiles = predict_data.size()[-1]
112 | 
113 |     # above 50% and below 50%
114 |     if fix_type == 0:
115 |         # split into below 50% and above 50%
116 |         idx_50 = num_quantiles // 2
117 | 
118 |         # below 50%
119 |         below_50 = predict_data[:, :(idx_50 + 1)].contiguous()
120 |         below_50 = torch.flip(torch.cummin(torch.flip(below_50, [-1]), -1)[0], [-1])
121 | 
122 |         # above 50%
123 |         above_50 = predict_data[:, idx_50:].contiguous()
124 |         above_50 = torch.cummax(above_50, -1)[0]
125 | 
126 |         # refined output
127 |         ordered_data = torch.cat([below_50[:, :-1], above_50], -1)
128 |     # from 0% to 100%
129 |     elif fix_type == 1:
130 |         ordered_data = torch.cummax(predict_data, -1)[0]
131 |     # from 0% to 100% and from 100% to 0%
132 |     elif fix_type == 2:
133 |         min_ordered_data = torch.flip(torch.cummin(torch.flip(predict_data, [-1]), -1)[0], [-1])
134 |         max_ordered_data = torch.cummax(predict_data, -1)[0]
135 |         ordered_data = 0.5 * (min_ordered_data + max_ordered_data)
136 |     else:
137 |         ordered_data = predict_data
138 | 
139 |     if is_torch:
140 |         return ordered_data
141 |     else:
142 |         return ordered_data.data.cpu().numpy()
143 | 
144 | 
145 | # forward sorting (differentiable)
146 | def sort_forward(input_data, regularization_strength):
147 |     return soft_sort(input_data, regularization_strength=regularization_strength)
148 | 
149 | # make directory
150 | def make_dir(dir_path):
151 |     if not os.path.exists(dir_path):
152 |         try:
153 |             os.makedirs(dir_path)
154 |         except OSError as e:
155 |             raise ValueError(e)
156 | 


--------------------------------------------------------------------------------
/util/metric.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import numpy as np
  3 | 
  4 | 
  5 | # numpy pinball loss
  6 | def pinball_loss_np(predict_data, target_data, quantiles, mean=False):
  7 |     error_data = target_data.reshape(-1, 1) - predict_data
  8 |     loss_data = np.maximum(quantiles * error_data, (quantiles - 1) * error_data)
  9 |     if mean:
 10 |         return loss_data.mean()
 11 |     else:
 12 |         return loss_data.mean(0)
 13 | 
 14 | 
 15 | # pinball loss
 16 | def pinball_loss(predict_data, target_data, quantiles):
 17 |     error_data = target_data.reshape(-1, 1) - predict_data
 18 |     loss_data = torch.max(quantiles * error_data, (quantiles - 1) * error_data)
 19 |     return loss_data.mean()
 20 | 
 21 | 
 22 | # huber-pinball loss
 23 | def huber_loss(predict_data, target_data, quantiles, alpha=0.01):
 24 |     if alpha == 0.0:
 25 |         return pinball_loss(predict_data, target_data, quantiles)
 26 | 
 27 |     error_data = target_data.reshape(-1, 1) - predict_data
 28 |     loss_data = torch.where(torch.abs(error_data) < alpha,
 29 |                             0.5 * error_data * error_data,
 30 |                             alpha * (torch.abs(error_data) - 0.5 * alpha))
 31 |     loss_data /= alpha
 32 | 
 33 |     scale = torch.where(error_data >= 0,
 34 |                         torch.ones_like(error_data) * quantiles,
 35 |                         torch.ones_like(error_data) * (1 - quantiles))
 36 |     loss_data *= scale
 37 |     return loss_data.mean()
 38 | 
 39 | 
 40 | # margin loss (between neighbored prediction)
 41 | def margin_loss(predict_data, margin_data):
 42 |     # number of samples
 43 |     batch_size, num_quantiles = predict_data.size()
 44 | 
 45 |     # compute margin loss (batch_size x output_size(above) x output_size(below))
 46 |     error_data = predict_data.unsqueeze(1) - predict_data.unsqueeze(2)
 47 | 
 48 |     # len(np.shape(margin_data)) ==0, means a scalar
 49 |     if len(np.shape(margin_data)) == 1:
 50 |         # margin data (num_quantiles) ===> (num_quantiles x num_quantiles)
 51 |         if type(margin_data) is not torch.Tensor:
 52 |             margin_data = torch.tensor(margin_data, device=predict_data.device)
 53 | 
 54 |         margin_data = margin_data.reshape(1, -1)
 55 |         margin_data = margin_data.permute(1, 0) - margin_data
 56 |         margin_data = torch.tril(margin_data, -1).relu()
 57 | 
 58 | 
 59 |     loss_data = torch.tril(error_data + margin_data, diagonal=-1)
 60 |     loss_data = loss_data.relu()
 61 |     loss_data = loss_data.sum() / np.float32(batch_size * (num_quantiles * num_quantiles - num_quantiles) * 0.5)
 62 | 
 63 |     # compute accumulated margin
 64 |     #if only_neighbored:
 65 |     #    loss_data = torch.tril(torch.triu(error_data + margin_data, diagonal=-1), diagonal=-1)
 66 |     #    loss_data = loss_data.relu()
 67 |     #    loss_data = loss_data.sum() / np.float32(batch_size * (num_quantiles - 1))
 68 | 
 69 |     return loss_data
 70 | 
 71 | 
 72 | # PICP, percentage of captured points (ratio of true observations falling inside the estimated prediction)
 73 | def prediction_interval_coverage_rate(y_target, y_lower, y_upper):
 74 |     return np.mean((y_target >= y_lower) & (y_target <= y_upper))
 75 | 
 76 | 
 77 | def mean_prediction_interval_coverage_rate(y_target, y_quantile, quantile_list):
 78 |     picp_list = []
 79 |     error_list = []
 80 | 
 81 |     # for each quantile level (from 1% to 99%)
 82 |     num_samples = y_quantile.shape[0]
 83 |     num_quantiles = len(quantile_list)
 84 |     assert num_quantiles == y_quantile.shape[1]
 85 |     y_target = y_target.reshape([num_samples, 1])
 86 | 
 87 |     # for each interval
 88 |     for i in range(num_quantiles // 2):
 89 |         # lower and upper index
 90 |         lower_idx = i
 91 |         upper_idx = -(i + 1)
 92 | 
 93 |         # lower and upper quantile
 94 |         lower_quantile = quantile_list[lower_idx]
 95 |         upper_quantile = quantile_list[upper_idx]
 96 | 
 97 |         # get predicted lower and upper values
 98 |         y_lower = y_quantile[:, lower_idx].reshape([num_samples, 1])
 99 |         y_upper = y_quantile[:, upper_idx].reshape([num_samples, 1])
100 | 
101 |         # compute picp
102 |         picp = prediction_interval_coverage_rate(y_target=y_target, y_lower=y_lower, y_upper=y_upper)
103 |         interval_size = upper_quantile - lower_quantile
104 | 
105 |         picp_list.append(picp)
106 |         error_list.append(np.abs(picp - interval_size))
107 | 
108 |     # mean over all intervals
109 |     return picp_list, np.array(error_list).mean()
110 | 
111 | 
112 | # MeanPredictionIntervalWidth (MPIW)
113 | def mean_prediction_interval_width(y_full, y_lower, y_higher):
114 |     # width of intervals
115 |     y_range = y_full.max() - y_full.min()
116 |     return np.abs(y_higher - y_lower).mean() / y_range
117 | 
118 | 
119 | def mean_abs_calibration_error(y_target, y_quantile, quantile_list):
120 |     # y_target (batch_size x 1)
121 |     y_target = y_target.reshape(-1, 1)
122 |     num_samples = y_target.shape[0]
123 | 
124 |     # y_quantile (batch_size x num_quantiles)
125 |     y_quantile = y_quantile.reshape(num_samples, -1)
126 |     num_quantiles = y_quantile.shape[1]
127 |     assert num_quantiles == len(quantile_list)
128 | 
129 |     # compute coverage (num_quantiles)
130 |     mean_calibration = (y_target <= y_quantile).mean(0)
131 | 
132 |     # compute error (mean over quantile-levels)
133 |     return mean_calibration.tolist(), np.abs(mean_calibration - quantile_list).mean()
134 | 
135 | 
136 | def root_mean_squared_calibration_error(y_target, y_quantile, quantile_list):
137 |     # y_target (batch_size x 1)
138 |     y_target = y_target.reshape(-1, 1)
139 |     num_samples = y_target.shape[0]
140 | 
141 |     # y_quantile (batch_size x num_quantiles)
142 |     y_quantile = y_quantile.reshape(num_samples, -1)
143 |     num_quantiles = y_quantile.shape[1]
144 |     assert num_quantiles == len(quantile_list)
145 | 
146 |     # compute coverage (num_quantiles)
147 |     mean_calibration = (y_target <= y_quantile).mean(0)
148 | 
149 |     # compute error (mean over quantile-levels)
150 |     return np.sqrt(np.mean(np.square(mean_calibration - quantile_list)))
151 | 
152 | 
153 | def mean_interval_score(y_target, y_quantile, quantile_list):
154 |     # assume quantile list is symmetry centered in 50%
155 |     # for each quantile level (from 1% to 99%)
156 |     interval_score_list = []
157 |     num_quantiles = len(quantile_list)
158 |     for i in range(num_quantiles // 2):
159 |         # lower and upper quantile
160 |         lower_quantile = quantile_list[i]
161 |         upper_quantile = quantile_list[-(i + 1)]
162 | 
163 |         # get predicted lower and upper values
164 |         y_lower = y_quantile[:, quantile_list == lower_quantile]
165 |         y_upper = y_quantile[:, quantile_list == upper_quantile]
166 | 
167 |         # get mask below lower, above upper
168 |         below_lower = (y_lower > y_target).astype('float')
169 |         above_upper = (y_upper < y_target).astype('float')
170 | 
171 |         # compute score
172 |         interval_score = (y_upper - y_lower)
173 |         interval_score += (1.0 / lower_quantile) * (y_lower - y_target) * below_lower
174 |         interval_score += (1.0 / lower_quantile) * (y_target - y_upper) * above_upper
175 | 
176 |         # mean over samples
177 |         interval_score = interval_score.mean()
178 |         interval_score_list.append(interval_score)
179 | 
180 |     # mean over all intervals
181 |     return interval_score_list
182 | 
183 | 
184 | def compute_quantile_results(prediction, target, quantile_list):
185 |     check = pinball_loss_np(predict_data=prediction, target_data=target, quantiles=quantile_list)
186 |     interval = mean_interval_score(y_target=target, y_quantile=prediction, quantile_list=quantile_list)
187 |     results_value = check.tolist() + [np.mean(check)] + interval + [np.mean(interval)]
188 |     return results_value
189 | 
190 | 
191 | def compute_calibration_results(prediction, target, quantile_list):
192 |     picp_list, mean_error = mean_prediction_interval_coverage_rate(y_target=target, y_quantile=prediction, quantile_list=quantile_list)
193 |     calib_list, mace = mean_abs_calibration_error(y_target=target, y_quantile=prediction, quantile_list=quantile_list)
194 |     results_value = picp_list + [mean_error] + calib_list + [mace]
195 |     return results_value
196 | 
197 | 
198 | def compute_mean_results(prediction, target):
199 |     error_data = target - prediction
200 |     results_value = [np.sqrt(np.mean(error_data * error_data)),
201 |                      np.mean(error_data * error_data),
202 |                      np.mean(np.abs(error_data))]
203 |     return results_value
204 | 
205 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | 
  2 |                                  Apache License
  3 |                            Version 2.0, January 2004
  4 |                         http://www.apache.org/licenses/
  5 | 
  6 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  7 | 
  8 |    1. Definitions.
  9 | 
 10 |       "License" shall mean the terms and conditions for use, reproduction,
 11 |       and distribution as defined by Sections 1 through 9 of this document.
 12 | 
 13 |       "Licensor" shall mean the copyright owner or entity authorized by
 14 |       the copyright owner that is granting the License.
 15 | 
 16 |       "Legal Entity" shall mean the union of the acting entity and all
 17 |       other entities that control, are controlled by, or are under common
 18 |       control with that entity. For the purposes of this definition,
 19 |       "control" means (i) the power, direct or indirect, to cause the
 20 |       direction or management of such entity, whether by contract or
 21 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 22 |       outstanding shares, or (iii) beneficial ownership of such entity.
 23 | 
 24 |       "You" (or "Your") shall mean an individual or Legal Entity
 25 |       exercising permissions granted by this License.
 26 | 
 27 |       "Source" form shall mean the preferred form for making modifications,
 28 |       including but not limited to software source code, documentation
 29 |       source, and configuration files.
 30 | 
 31 |       "Object" form shall mean any form resulting from mechanical
 32 |       transformation or translation of a Source form, including but
 33 |       not limited to compiled object code, generated documentation,
 34 |       and conversions to other media types.
 35 | 
 36 |       "Work" shall mean the work of authorship, whether in Source or
 37 |       Object form, made available under the License, as indicated by a
 38 |       copyright notice that is included in or attached to the work
 39 |       (an example is provided in the Appendix below).
 40 | 
 41 |       "Derivative Works" shall mean any work, whether in Source or Object
 42 |       form, that is based on (or derived from) the Work and for which the
 43 |       editorial revisions, annotations, elaborations, or other modifications
 44 |       represent, as a whole, an original work of authorship. For the purposes
 45 |       of this License, Derivative Works shall not include works that remain
 46 |       separable from, or merely link (or bind by name) to the interfaces of,
 47 |       the Work and Derivative Works thereof.
 48 | 
 49 |       "Contribution" shall mean any work of authorship, including
 50 |       the original version of the Work and any modifications or additions
 51 |       to that Work or Derivative Works thereof, that is intentionally
 52 |       submitted to Licensor for inclusion in the Work by the copyright owner
 53 |       or by an individual or Legal Entity authorized to submit on behalf of
 54 |       the copyright owner. For the purposes of this definition, "submitted"
 55 |       means any form of electronic, verbal, or written communication sent
 56 |       to the Licensor or its representatives, including but not limited to
 57 |       communication on electronic mailing lists, source code control systems,
 58 |       and issue tracking systems that are managed by, or on behalf of, the
 59 |       Licensor for the purpose of discussing and improving the Work, but
 60 |       excluding communication that is conspicuously marked or otherwise
 61 |       designated in writing by the copyright owner as "Not a Contribution."
 62 | 
 63 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 64 |       on behalf of whom a Contribution has been received by Licensor and
 65 |       subsequently incorporated within the Work.
 66 | 
 67 |    2. Grant of Copyright License. Subject to the terms and conditions of
 68 |       this License, each Contributor hereby grants to You a perpetual,
 69 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 70 |       copyright license to reproduce, prepare Derivative Works of,
 71 |       publicly display, publicly perform, sublicense, and distribute the
 72 |       Work and such Derivative Works in Source or Object form.
 73 | 
 74 |    3. Grant of Patent License. Subject to the terms and conditions of
 75 |       this License, each Contributor hereby grants to You a perpetual,
 76 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 77 |       (except as stated in this section) patent license to make, have made,
 78 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 79 |       where such license applies only to those patent claims licensable
 80 |       by such Contributor that are necessarily infringed by their
 81 |       Contribution(s) alone or by combination of their Contribution(s)
 82 |       with the Work to which such Contribution(s) was submitted. If You
 83 |       institute patent litigation against any entity (including a
 84 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 85 |       or a Contribution incorporated within the Work constitutes direct
 86 |       or contributory patent infringement, then any patent licenses
 87 |       granted to You under this License for that Work shall terminate
 88 |       as of the date such litigation is filed.
 89 | 
 90 |    4. Redistribution. You may reproduce and distribute copies of the
 91 |       Work or Derivative Works thereof in any medium, with or without
 92 |       modifications, and in Source or Object form, provided that You
 93 |       meet the following conditions:
 94 | 
 95 |       (a) You must give any other recipients of the Work or
 96 |           Derivative Works a copy of this License; and
 97 | 
 98 |       (b) You must cause any modified files to carry prominent notices
 99 |           stating that You changed the files; and
100 | 
101 |       (c) You must retain, in the Source form of any Derivative Works
102 |           that You distribute, all copyright, patent, trademark, and
103 |           attribution notices from the Source form of the Work,
104 |           excluding those notices that do not pertain to any part of
105 |           the Derivative Works; and
106 | 
107 |       (d) If the Work includes a "NOTICE" text file as part of its
108 |           distribution, then any Derivative Works that You distribute must
109 |           include a readable copy of the attribution notices contained
110 |           within such NOTICE file, excluding those notices that do not
111 |           pertain to any part of the Derivative Works, in at least one
112 |           of the following places: within a NOTICE text file distributed
113 |           as part of the Derivative Works; within the Source form or
114 |           documentation, if provided along with the Derivative Works; or,
115 |           within a display generated by the Derivative Works, if and
116 |           wherever such third-party notices normally appear. The contents
117 |           of the NOTICE file are for informational purposes only and
118 |           do not modify the License. You may add Your own attribution
119 |           notices within Derivative Works that You distribute, alongside
120 |           or as an addendum to the NOTICE text from the Work, provided
121 |           that such additional attribution notices cannot be construed
122 |           as modifying the License.
123 | 
124 |       You may add Your own copyright statement to Your modifications and
125 |       may provide additional or different license terms and conditions
126 |       for use, reproduction, or distribution of Your modifications, or
127 |       for any such Derivative Works as a whole, provided Your use,
128 |       reproduction, and distribution of the Work otherwise complies with
129 |       the conditions stated in this License.
130 | 
131 |    5. Submission of Contributions. Unless You explicitly state otherwise,
132 |       any Contribution intentionally submitted for inclusion in the Work
133 |       by You to the Licensor shall be under the terms and conditions of
134 |       this License, without any additional terms or conditions.
135 |       Notwithstanding the above, nothing herein shall supersede or modify
136 |       the terms of any separate license agreement you may have executed
137 |       with Licensor regarding such Contributions.
138 | 
139 |    6. Trademarks. This License does not grant permission to use the trade
140 |       names, trademarks, service marks, or product names of the Licensor,
141 |       except as required for reasonable and customary use in describing the
142 |       origin of the Work and reproducing the content of the NOTICE file.
143 | 
144 |    7. Disclaimer of Warranty. Unless required by applicable law or
145 |       agreed to in writing, Licensor provides the Work (and each
146 |       Contributor provides its Contributions) on an "AS IS" BASIS,
147 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 |       implied, including, without limitation, any warranties or conditions
149 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 |       PARTICULAR PURPOSE. You are solely responsible for determining the
151 |       appropriateness of using or redistributing the Work and assume any
152 |       risks associated with Your exercise of permissions under this License.
153 | 
154 |    8. Limitation of Liability. In no event and under no legal theory,
155 |       whether in tort (including negligence), contract, or otherwise,
156 |       unless required by applicable law (such as deliberate and grossly
157 |       negligent acts) or agreed to in writing, shall any Contributor be
158 |       liable to You for damages, including any direct, indirect, special,
159 |       incidental, or consequential damages of any character arising as a
160 |       result of this License or out of the use or inability to use the
161 |       Work (including but not limited to damages for loss of goodwill,
162 |       work stoppage, computer failure or malfunction, or any and all
163 |       other commercial damages or losses), even if such Contributor
164 |       has been advised of the possibility of such damages.
165 | 
166 |    9. Accepting Warranty or Additional Liability. While redistributing
167 |       the Work or Derivative Works thereof, You may choose to offer,
168 |       and charge a fee for, acceptance of support, warranty, indemnity,
169 |       or other liability obligations and/or rights consistent with this
170 |       License. However, in accepting such obligations, You may act only
171 |       on Your own behalf and on Your sole responsibility, not on behalf
172 |       of any other Contributor, and only if You agree to indemnify,
173 |       defend, and hold each Contributor harmless for any liability
174 |       incurred by, or claims asserted against, such Contributor by reason
175 |       of your accepting any such warranty or additional liability.
176 | 


--------------------------------------------------------------------------------
/nested_aggr_quantile_models.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | import torch
  3 | import argparse
  4 | import numpy as np
  5 | import pickle as pkl
  6 | from model.neural_aggregator import QuantileLocalAggregatorTrainer, QuantileGlobalAggregatorTrainer
  7 | from util.misc import make_dir
  8 | from data.data_loader import UCI_LIST, OPENML_DICT
  9 | import warnings
 10 | from util.others import dump_to_json
 11 | import os
 12 | 
 13 | warnings.filterwarnings('ignore')
 14 | 
 15 | # quantile list (from 1% to 99%)
 16 | QUANTILE_LIST = np.arange(1, 100, 1) / 100.0
 17 | NUM_QUANTILES = len(QUANTILE_LIST)
 18 | #CV_RATIO = 0.8
 19 | 
 20 | def model_prediction(model, x_data, z_data):
 21 |     batch_size = 512
 22 |     data_size = x_data.shape[0]
 23 |     num_batches = int(np.ceil(float(data_size) / float(batch_size)))
 24 | 
 25 |     e_data = []
 26 |     for i in range(num_batches):
 27 |         e_data.append(model.predict(x_data[i * batch_size:(i + 1) * batch_size], z_data[i * batch_size:(i + 1) * batch_size]))
 28 | 
 29 |     return np.concatenate(e_data, 0)
 30 | 
 31 | def run_exp(task_id, use_local=True,
 32 |             share_weight=False, cross_weight=True,
 33 |             normalize=True, margin_type=None,
 34 |             trans_type=None, use_grad=False,
 35 |             num_searches=20, num_folds=5, rand_seed=1, device=-1,
 36 |             eparams=None):
 37 | 
 38 |     print('--------------------')
 39 |     print('use_local: ', use_local)
 40 |     print('share_weight: ', share_weight)
 41 |     print('cross_weight: ', cross_weight)
 42 |     print('margin_type: ', margin_type)
 43 |     print('trans_type: ', trans_type)
 44 |     print('use_grad: ', use_grad)
 45 |     print('num_searches: ', num_searches)
 46 |     print('num_folds: ', num_folds)
 47 |     print('rand_seed: ', rand_seed)
 48 |     print('eparams.use_mean_pt: ', eparams.use_mean_pt)
 49 |     print('device: ', device)
 50 |     print('regularization_strength:', eparams.regularization_strength)
 51 |     print('--------------------')
 52 | 
 53 |     # set seed
 54 |     random.seed(rand_seed)
 55 |     np.random.seed(rand_seed)
 56 |     torch.manual_seed(rand_seed)
 57 |     torch.cuda.manual_seed_all(rand_seed)
 58 | 
 59 |     if torch.cuda.is_available() and device > -1:
 60 |         torch.cuda.manual_seed(rand_seed)
 61 |         torch.backends.cudnn.deterministic = True
 62 |         torch.backends.cudnn.benchmark = False
 63 | 
 64 |     # exp setting
 65 |     task_name = task_id
 66 |     if task_id not in UCI_LIST:
 67 |         task_name = OPENML_DICT[task_id]
 68 | 
 69 |     exp_name = [task_id, num_folds, num_searches, rand_seed]
 70 | 
 71 |     # load dataset
 72 |     with open(eparams.DATA_PATH + eparams.log_id_base + 'quantile_nested_base_oof_x_train_{}_cv{}_iter{}_seed{}.pkl'.format(*exp_name), 'rb') as handle:
 73 |         oof_x_train = pkl.load(handle)
 74 | 
 75 |     with open(eparams.DATA_PATH + eparams.log_id_base + 'quantile_nested_base_oof_y_train_{}_cv{}_iter{}_seed{}.pkl'.format(*exp_name), 'rb') as handle:
 76 |         oof_y_train = pkl.load(handle)
 77 | 
 78 |     with open(eparams.DATA_PATH + eparams.log_id_base + 'quantile_nested_base_oof_z_train_{}_cv{}_iter{}_seed{}.pkl'.format(*exp_name), 'rb') as handle:
 79 |         oof_z_train = pkl.load(handle)
 80 | 
 81 |     x_train, y_train, z_train = [], [], []
 82 |     for k in range(num_folds):
 83 |         x_train.append(oof_x_train['{}'.format(k)])
 84 |         y_train.append(oof_y_train['{}'.format(k)])
 85 |         z_train.append(oof_z_train['{}'.format(k)])
 86 | 
 87 |     x_train = np.concatenate(x_train, 0)
 88 |     y_train = np.concatenate(y_train, 0)
 89 |     z_train = np.concatenate(z_train, 0)
 90 | 
 91 |     x_test = np.load(eparams.DATA_PATH  + eparams.log_id_base +  'quantile_nested_base_x_test_{}_cv{}_iter{}_seed{}.npy'.format(*exp_name))
 92 |     y_test = np.load(eparams.DATA_PATH  + eparams.log_id_base +  'quantile_nested_base_y_test_{}_cv{}_iter{}_seed{}.npy'.format(*exp_name))
 93 |     z_test = np.load(eparams.DATA_PATH  + eparams.log_id_base +  'quantile_nested_base_z_test_{}_cv{}_iter{}_seed{}.npy'.format(*exp_name))
 94 | 
 95 |     x_val = np.load(eparams.DATA_PATH  + eparams.log_id_base +  'quantile_nested_base_x_val_{}_cv{}_iter{}_seed{}.npy'.format(*exp_name))
 96 |     y_val = np.load(eparams.DATA_PATH  + eparams.log_id_base +  'quantile_nested_base_y_val_{}_cv{}_iter{}_seed{}.npy'.format(*exp_name))
 97 |     z_val = np.load(eparams.DATA_PATH  + eparams.log_id_base +  'quantile_nested_base_z_val_{}_cv{}_iter{}_seed{}.npy'.format(*exp_name))
 98 | 
 99 |     # get empirical quantile-margin
100 |     if margin_type == 'non':
101 |          margin_list = None
102 | 
103 |     elif margin_type == 'single':
104 |          margin_list = None
105 | 
106 |     elif margin_type == 'adapt':
107 |         # compute error (from estimated median)
108 |         if eparams.use_mean_pt == True:
109 |             print('using mean for adapt..')
110 |             e_train = y_train.reshape(-1) - np.mean(z_train[..., NUM_QUANTILES // 2], 1).reshape(-1).astype(np.float32)
111 | 
112 |         else:
113 |             print('using median..')
114 |             e_train = y_train.reshape(-1) - np.median(z_train[..., NUM_QUANTILES // 2], 1).reshape(-1).astype(np.float32)
115 | 
116 |         margin_list = np.quantile(e_train.reshape(-1), QUANTILE_LIST, 0).astype(np.float32)
117 | 
118 |     elif margin_type == 'vec':
119 |         margin_list = (QUANTILE_LIST.reshape(-1) * 1e-2).astype(np.float32)
120 | 
121 |     else:
122 |         raise ValueError('%s margin not supported' % margin_type)
123 | 
124 |     # data size
125 |     feature_size = x_train.shape[1]
126 |     print('Data: {} (seed {}, train size {}, feature size {}, val size {}, test size {})'.format(
127 |                     task_name, rand_seed, x_train.shape[0], feature_size, x_val.shape[0], x_test.shape[0]))
128 | 
129 |     #train_size = x_train.shape[0]
130 |     train_idx_list = np.load(eparams.DATA_PATH + eparams.log_id_base +'quantile_all_train_idx_{}_cv{}_iter{}_seed{}.npy'.format(*exp_name))
131 |     cv_split = np.load(eparams.DATA_PATH + eparams.log_id_base +'quantile_train_val_idx_{}_cv{}_iter{}_seed{}.npy'.format(*exp_name), allow_pickle=True)
132 | 
133 |     # num of base models and quantiles
134 |     num_models, num_quantiles = z_train.shape[1], z_train.shape[2]
135 |     assert num_quantiles == NUM_QUANTILES
136 | 
137 |     # model name
138 |     if use_local:
139 |         model_name = 'Local'
140 | 
141 |     else:
142 |         model_name = 'Global'
143 | 
144 |     if share_weight:
145 |         model_name += '-Coarse'
146 | 
147 |     elif cross_weight:
148 |         model_name += '-Fine'
149 | 
150 |     else:
151 |         model_name += '-Medium'
152 | 
153 |     print('Train start: ', model_name)
154 | 
155 |     # full experiment name
156 |     if trans_type is not None:
157 |         output_name = 'DQA_{}_norm{}_grad{}_{}_{}_margin_results_{}_cv{}_iter{}_seed{}'.format(model_name,
158 |                                                                                                int(normalize),
159 |                                                                                                int(use_grad),
160 |                                                                                                trans_type,
161 |                                                                                                margin_type,
162 |                                                                                                *exp_name)
163 |     else:
164 |         output_name = 'DQA_{}_norm{}_{}_margin_results_{}_cv{}_iter{}_seed{}'.format(model_name,
165 |                                                                                      int(normalize),
166 |                                                                                      margin_type,
167 |                                                                                      *exp_name)
168 | 
169 |     print(eparams.run_id + output_name)
170 | 
171 |     # set model learner
172 |     batch_size = int(2 ** (3 + np.floor(np.log10(y_train.shape[0] + y_test.shape[0]))))
173 |     model_trainer = QuantileLocalAggregatorTrainer if use_local else QuantileGlobalAggregatorTrainer
174 |     model = model_trainer(quantile_list=QUANTILE_LIST, num_searches=num_searches, cv_split=cv_split,
175 |                           share_weight=share_weight, cross_weight=cross_weight,
176 |                           normalize=normalize, margin_list=margin_list,
177 |                           trans_type=trans_type, use_grad=use_grad,
178 |                           batch_size=batch_size, rand_seed=rand_seed, device=device,
179 |                           margin_type=margin_type,
180 |                           regularization_strength=eparams.regularization_strength)
181 | 
182 |     # fit model
183 |     if use_local:
184 |         model.fit(c_train=x_train, x_train=z_train, y_train=y_train,
185 |                   c_val=x_val, x_val=z_val, y_val=y_val,
186 |                   ac_train=None, ax_train=None)
187 | 
188 |     else:
189 |         #model.fit(z_train, y_train, None)
190 |         model.fit(x_train=z_train, y_train=y_train,
191 |                   x_val=z_val, y_val=y_val,
192 |                   ax_train=None)
193 | 
194 | 
195 |     # compute test prediction
196 |     if use_local:
197 |         org_e_test = model_prediction(model, x_test, z_test)
198 | 
199 |     else:
200 |         org_e_test = model.predict(z_test)
201 | 
202 |     # save results
203 |     np.save(eparams.RESULT_PATH + eparams.run_id  + eparams.log_id_out + output_name + '_org_e_test.npy', org_e_test)
204 |     np.save(eparams.RESULT_PATH + eparams.run_id  + output_name + '_org_e_test.npy', org_e_test)
205 | 
206 |     print('Done.')
207 | 
208 | if __name__ == "__main__":
209 |     # arguments
210 |     parser = argparse.ArgumentParser()
211 | 
212 |     # parser
213 |     parser.add_argument('--gpu', type=int, default=0, help='gpu id')
214 |     parser.add_argument('--fold', type=int, default=5, help='folds for out of fold predictions')
215 |     parser.add_argument('--iter', type=int, default=20, help='number of iterations for grid search')
216 |     parser.add_argument('--task-id', type=str, default='boston', help='task id')
217 |     parser.add_argument('--seed', type=int, default=1, help='random seed')
218 | 
219 |     parser.add_argument('--local', type=int, default=1)
220 |     parser.add_argument('--share', type=int, default=0, help='share combination')
221 |     parser.add_argument('--cross', type=int, default=1, help='full combination')
222 |     parser.add_argument('--grad', type=int, default=0, help='use grad')
223 |     parser.add_argument('--trans', type=str, default='sort', help='non-crossing type')
224 |     parser.add_argument('--margin', type=str, default='single', help='margin type')
225 | 
226 |     parser.add_argument('--norm', type=int, default=1, help='normalize weight')
227 | 
228 |     parser.add_argument('--DATA_PATH', default='./output/data/')
229 |     parser.add_argument('--RESULT_PATH', default='./output/result/')
230 |     parser.add_argument('--log_id_base', default='mylogid')
231 |     parser.add_argument('--log_id_out', default='exNone')
232 |     parser.add_argument('--run_id', default='p1_')
233 |     parser.add_argument('--use_mean_pt', default=False, action='store_true')
234 |     parser.add_argument('--regularization_strength', type=float, default=0.1)
235 | 
236 |     args = parser.parse_args()
237 |     print('------------')
238 |     print(args.__dict__)
239 |     print('------------')
240 | 
241 |     make_dir(args.DATA_PATH)
242 |     make_dir(args.RESULT_PATH)
243 | 
244 |     fname_json = os.path.join(args.RESULT_PATH, args.log_id_out + '_' +
245 |                               args.log_id_base +'_' + args.run_id + args.task_id +'_args_aggr.json' )
246 |     print(fname_json)
247 |     dump_to_json(fname_json, {'args': args.__dict__})
248 | 
249 |     # run
250 |     run_exp(task_id=args.task_id,use_local=bool(args.local),
251 |                 share_weight=args.share, cross_weight=args.cross,
252 |                 normalize=bool(args.norm), margin_type=args.margin,
253 |                 trans_type=args.trans, use_grad=bool(args.grad),
254 |                 num_searches=args.iter, num_folds=args.fold,
255 |                 rand_seed=args.seed, device=args.gpu,
256 |                 eparams=args,
257 |                 )
258 | 


--------------------------------------------------------------------------------
/nested_base_quantile_models.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | import torch
  3 | import argparse
  4 | import numpy as np
  5 | import pickle as pkl
  6 | from model.linear_regressor import QuantileRegressor
  7 | from model.random_forest import QuantileRandomForest
  8 | from model.extra_trees import QuantileExtraTrees
  9 | from model.light_gbm import QuantileLightGBM
 10 | from model.neural_network import QuantileJointNeuralNetwork
 11 | from model.neural_network import QuantileSingleNeuralNetwork
 12 | from model.neural_network import QuantileConditionalGaussianNetwork
 13 | from util.misc import make_dir
 14 | from sklearn.model_selection import KFold
 15 | from data.data_loader import load_uci, load_openml, UCI_LIST, OPENML_DICT
 16 | from util.others import dump_to_json
 17 | import os
 18 | import copy
 19 | 
 20 | # quantile list (from 1% to 99%)
 21 | QUANTILE_LIST = np.arange(1, 100, 1) / 100.0
 22 | CV_RATIO = 0.8
 23 | 
 24 | # run neural networks
 25 | def run_neural(model,
 26 |                x_train, y_train,
 27 |                x_test, y_test,
 28 |                cv_split, kfolder,
 29 |                num_iters, rand_seed=1, device=0,
 30 |                eparams=None):
 31 |     # model name
 32 |     model_name = model.__name__
 33 |     print('Train start: ', model_name)
 34 | 
 35 |     # set model learner
 36 |     batch_size = int(2 ** (3 + np.floor(np.log10(x_train.shape[0] + x_test.shape[0]))))
 37 |     model_learner = model(quantile_list=QUANTILE_LIST,
 38 |                           num_iters=num_iters,
 39 |                           cv_split=cv_split,
 40 |                           batch_size=batch_size,
 41 |                           rand_seed=rand_seed,
 42 |                           device=device,
 43 |                           use_grad=eparams.use_grad,
 44 |                           trans_type=eparams.trans_type,
 45 |                           use_margin=eparams.use_margin,
 46 |                           margin_type=eparams.margin_type
 47 |                           )
 48 |     #########
 49 |     # train/val split happens inside model_learner based on cv_split
 50 |     #########
 51 |     # fit model where x_train and y_train are split into val and train according to cv_plsit
 52 |     model_learner.fit(x_train, y_train, None)
 53 | 
 54 |     # compute test prediction
 55 |     z_test = model_learner.predict(x_test)
 56 |     print('z_test', z_test.shape)
 57 | 
 58 |     # model prediction on validation
 59 |     z_val = model_learner.predict(x_train[cv_split[0][1]])
 60 |     print('z_val', z_val.shape)
 61 | 
 62 |     #########################################
 63 |     #### remove validation from train set ###
 64 |     #########################################
 65 |     print('Full training size', x_train.shape, y_train.shape)
 66 |     x_train = x_train[cv_split[0][0]].copy()
 67 |     y_train = y_train[cv_split[0][0]].copy()
 68 |     print('Training size after removing validation', x_train.shape, y_train.shape)
 69 | 
 70 |     # get nested out-of-fold predictions
 71 |     full_index = np.arange(x_train.shape[0])
 72 |     # fold_list[5,2] where [:,0] is training and [:,1] validation
 73 |     fold_list  = list(kfolder.split(x_train))
 74 |     num_folds  = len(fold_list)
 75 | 
 76 |     # for oof
 77 |     oof_x_train = {}
 78 |     oof_y_train = {}
 79 |     oof_z_train = {}
 80 |     for k0 in range(num_folds):
 81 | 
 82 |         # compute oof for k0 and train
 83 |         oof_index0 = fold_list[k0][1]
 84 |         #train_index = np.setdiff1d(full_index, oof_index0)
 85 |         train_index  = np.setdiff1d(fold_list[k0][0], oof_index0)
 86 | 
 87 |         # split train / valid
 88 |         cv_x_train, cv_y_train   = x_train[train_index], y_train[train_index]
 89 |         cv_x_valid0, cv_y_valid0 = x_train[oof_index0], y_train[oof_index0]
 90 | 
 91 |         # fit on cv
 92 |         model_learner.refit_model(cv_x_train, cv_y_train, None)
 93 | 
 94 |         # obtain prediction over quantiles
 95 |         cv_z_valid0 = model_learner.predict(cv_x_valid0)
 96 | 
 97 |         oof_x_train['{}'.format(k0)] = cv_x_valid0
 98 |         oof_y_train['{}'.format(k0)] = cv_y_valid0
 99 |         oof_z_train['{}'.format(k0)] = cv_z_valid0
100 | 
101 |     return oof_x_train, oof_y_train, oof_z_train, z_test, z_val
102 | 
103 | # run tree models
104 | def run_tree(model,
105 |              x_train, y_train,
106 |              x_test, y_test,
107 |              cv_split, kfolder,
108 |              num_iters, rand_seed=1,
109 |              eparams=None,
110 |              **kwargs):
111 |     # model name
112 |     model_name = model.__name__
113 |     print('Train start: ', model_name)
114 | 
115 |     # set model learner
116 |     model_learner = model(num_iters=num_iters, num_folds=cv_split, rand_seed=rand_seed)
117 |     model_learner.fit(x_train, y_train)
118 | 
119 |     # compute test prediction on best hyper-params
120 |     z_test = model_learner.full_predict(x_test, list(QUANTILE_LIST))
121 |     print('z_test', z_test.shape)
122 | 
123 |     # model prediction on validation
124 |     z_val = model_learner.full_predict(x_train[cv_split[0][1]], list(QUANTILE_LIST))
125 |     print('z_val', z_val.shape)
126 | 
127 |     #########################################
128 |     #### remove validation from train set ###
129 |     #########################################
130 |     print('Full training sizes:', x_train.shape, y_train.shape)
131 |     x_train = x_train[cv_split[0][0]].copy()
132 |     y_train = y_train[cv_split[0][0]].copy()
133 |     print('Training size after removing validation', x_train.shape, y_train.shape)
134 | 
135 |     # get nested out-of-fold predictions
136 |     full_index = np.arange(x_train.shape[0])
137 |     fold_list = list(kfolder.split(x_train))
138 |     num_folds = len(fold_list)
139 | 
140 |     # for oof
141 |     oof_x_train = {}
142 |     oof_y_train = {}
143 |     oof_z_train = {}
144 |     for k0 in range(num_folds):
145 | 
146 |         # compute oof for k0 and train
147 |         oof_index0 = fold_list[k0][1]
148 |         #train_index = np.setdiff1d(full_index, oof_index0)
149 |         train_index = np.setdiff1d(fold_list[k0][0], oof_index0)
150 | 
151 |         # split train / valid
152 |         cv_x_train, cv_y_train = x_train[train_index], y_train[train_index]
153 |         cv_x_valid0, cv_y_valid0 = x_train[oof_index0], y_train[oof_index0]
154 | 
155 |         # fit on cv
156 |         cv_model = model_learner.get_init_model()
157 |         cv_model.fit(cv_x_train, cv_y_train.reshape(-1))
158 | 
159 |         # obtain prediction over quantiles
160 |         cv_z_valid0 = cv_model.predict(cv_x_valid0, list(QUANTILE_LIST))
161 | 
162 |         oof_x_train['{}'.format(k0)] = cv_x_valid0
163 |         oof_y_train['{}'.format(k0)] = cv_y_valid0
164 |         oof_z_train['{}'.format(k0)] = cv_z_valid0
165 | 
166 |     return oof_x_train, oof_y_train, oof_z_train, z_test, z_val
167 | 
168 | # run other models
169 | def run_others(model,
170 |                x_train, y_train,
171 |                x_test, y_test,
172 |                cv_split, kfolder,
173 |                num_iters, rand_seed=1,
174 |                eparams=None,
175 |                **kwargs):
176 |     # model name
177 |     model_name = model.__name__
178 |     print('Train start: ', model_name)
179 | 
180 |     #########################################
181 |     #### remove validation from train set ###
182 |     #########################################
183 |     x_train_org = x_train.copy()
184 |     y_train_org = y_train.copy()
185 | 
186 |     x_val = x_train[cv_split[0][1]].copy()
187 |     print('x_val', x_val.shape)
188 | 
189 |     print('Full training sizes:', x_train.shape, y_train.shape)
190 |     x_train = x_train[cv_split[0][0]].copy()
191 |     y_train = y_train[cv_split[0][0]].copy()
192 |     print('Training size after removing validation', x_train.shape, y_train.shape)
193 | 
194 |     # get nested out-of-fold predictions
195 |     full_index = np.arange(x_train.shape[0])
196 |     fold_list = list(kfolder.split(x_train))
197 |     num_folds = len(fold_list)
198 | 
199 |     # for each quantile
200 |     oof_x_train = {}
201 |     oof_y_train = {}
202 |     oof_z_train = {}
203 |     z_test = []
204 |     z_val  = []
205 | 
206 |     for quantile in QUANTILE_LIST:
207 |         # find best model
208 |         model_learner = model(quantile=quantile, num_iters=num_iters, num_folds=cv_split, rand_seed=rand_seed)
209 |         model_learner.fit(x_train_org, y_train_org)
210 | 
211 |         # compute test prediction
212 |         z_test.append(model_learner.predict(x_test).reshape(-1, 1))
213 | 
214 |         # val prediction
215 |         z_val.append(model_learner.predict(x_val).reshape(-1, 1))
216 | 
217 |         # get out-of-fold predictions
218 |         for k0 in range(num_folds):
219 | 
220 |             # and compute oof for k0 and k1
221 |             oof_index0 = fold_list[k0][1]
222 |             #train_index = np.setdiff1d(full_index, oof_index0)
223 |             train_index = np.setdiff1d(fold_list[k0][0], oof_index0)
224 | 
225 |             # split train / valid
226 |             cv_x_train, cv_y_train   = x_train[train_index], y_train[train_index]
227 |             cv_x_valid0, cv_y_valid0 = x_train[oof_index0], y_train[oof_index0]
228 | 
229 |             # fit on cv
230 |             cv_model = model_learner.get_init_model()
231 |             cv_model.fit(cv_x_train, cv_y_train.reshape(-1))
232 | 
233 |             # get oof
234 |             cv_z_valid0 = cv_model.predict(cv_x_valid0).reshape(-1, 1)
235 |             if '{}'.format(k0) not in oof_x_train:
236 |                 oof_x_train['{}'.format(k0)] = cv_x_valid0
237 |             if '{}'.format(k0) not in oof_y_train:
238 |                 oof_y_train['{}'.format(k0)] = cv_y_valid0
239 |             if '{}'.format(k0) not in oof_z_train:
240 |                 oof_z_train['{}'.format(k0)] = cv_z_valid0
241 |             else:
242 |                 cv_z_valid0 = np.concatenate([oof_z_train['{}'.format(k0)], cv_z_valid0], -1)
243 |                 oof_z_train['{}'.format(k0)] = cv_z_valid0
244 | 
245 |     z_test = np.concatenate(z_test, 1)
246 |     z_val  = np.concatenate(z_val, 1)
247 | 
248 |     return oof_x_train, oof_y_train, oof_z_train, z_test, z_val
249 | 
250 | def prepare_data(task_id,
251 |                 num_iters=20,
252 |                 num_folds=5,
253 |                 rand_seed=1,
254 |                 eparams=None):
255 | 
256 |     print('Data preparation')
257 |     # set seed
258 |     random.seed(rand_seed)
259 |     np.random.seed(rand_seed)
260 |     torch.manual_seed(rand_seed)
261 |     torch.cuda.manual_seed_all(rand_seed)
262 | 
263 |     # load dataset
264 |     if task_id in UCI_LIST:
265 |         task_name = task_id
266 |         dataset = load_uci(task_id, random_seed=rand_seed, data_loc=eparams.data_loc)
267 | 
268 |     else:
269 |         assert task_id in OPENML_DICT
270 |         task_name = OPENML_DICT[task_id]
271 |         dataset = load_openml(task_id, random_seed=rand_seed, data_loc=eparams.data_loc)
272 | 
273 |     dataset_size = dataset.size
274 |     x_train, y_train = dataset.x_train, dataset.y_train
275 |     x_test, y_test = dataset.x_test, dataset.y_test
276 |     feature_size = x_train.shape[1]
277 |     print('Data: {} (seed {}, dataset size {}, feature size {})'.format(task_name, rand_seed, dataset_size, feature_size))
278 | 
279 |     # save data split
280 |     exp_name = [task_id, num_folds, num_iters, rand_seed]
281 |     np.save(eparams.DATA_PATH + eparams.log_id +'quantile_nested_base_x_test_{}_cv{}_iter{}_seed{}.npy'.format(*exp_name), x_test)
282 |     np.save(eparams.DATA_PATH + eparams.log_id +'quantile_nested_base_y_test_{}_cv{}_iter{}_seed{}.npy'.format(*exp_name), y_test)
283 | 
284 |     # set cv splits
285 |     train_size = x_train.shape[0]
286 |     train_idx_list = np.arange(train_size)
287 |     np.random.shuffle(train_idx_list)
288 |     cv_split = [[train_idx_list[:int(train_size * CV_RATIO)], train_idx_list[int(train_size * CV_RATIO):]]]
289 |     ########
290 |     # save the train and val split
291 |     ########
292 |     print(eparams.log_id +'quantile_all_train_idx_{}_cv{}_iter{}_seed{}.npy'.format(*exp_name))
293 |     np.save(eparams.DATA_PATH + eparams.log_id +'quantile_all_train_idx_{}_cv{}_iter{}_seed{}.npy'.format(*exp_name), train_idx_list)
294 |     np.save(eparams.DATA_PATH + eparams.log_id +'quantile_train_val_idx_{}_cv{}_iter{}_seed{}.npy'.format(*exp_name), cv_split)
295 |     print(eparams.log_id +'quantile_train_val_idx_{}_cv{}_iter{}_seed{}.npy'.format(*exp_name))
296 | 
297 |     #########################################
298 |     #### validation is removed from training set
299 |     #########################################
300 |     print('Train data:', x_train[cv_split[0][0]].shape, y_train[cv_split[0][0]].shape)
301 |     np.save(eparams.DATA_PATH + eparams.log_id +'quantile_nested_base_x_train_{}_cv{}_iter{}_seed{}.npy'.format(*exp_name), x_train[cv_split[0][0]])
302 |     np.save(eparams.DATA_PATH + eparams.log_id +'quantile_nested_base_y_train_{}_cv{}_iter{}_seed{}.npy'.format(*exp_name), y_train[cv_split[0][0]])
303 | 
304 |     print('Val data:', x_train[cv_split[0][1]].shape, y_train[cv_split[0][1]].shape)
305 |     np.save(eparams.DATA_PATH + eparams.log_id +'quantile_nested_base_x_val_{}_cv{}_iter{}_seed{}.npy'.format(*exp_name), x_train[cv_split[0][1]])
306 |     np.save(eparams.DATA_PATH + eparams.log_id +'quantile_nested_base_y_val_{}_cv{}_iter{}_seed{}.npy'.format(*exp_name), y_train[cv_split[0][1]])
307 | 
308 |     print('Test data:', x_test.shape, y_test.shape)
309 | 
310 |     output = {'x_train':x_train,
311 |               'y_train':y_train,
312 |               'x_test' :x_test,
313 |               'y_test' :y_test,
314 |               'cv_split':cv_split,
315 |              }
316 |     return output
317 | 
318 | def run_exp(task_id,
319 |             model_name,
320 |             num_iters=20,
321 |             num_folds=5,
322 |             rand_seed=1,
323 |             device=-1,
324 |             cleaned_data=None,
325 |             eparams=None):
326 | 
327 |     # set seed
328 |     random.seed(rand_seed)
329 |     np.random.seed(rand_seed)
330 |     torch.manual_seed(rand_seed)
331 |     torch.cuda.manual_seed_all(rand_seed)
332 |     if torch.cuda.is_available() and device > -1:
333 |         torch.backends.cudnn.deterministic = True
334 |         torch.backends.cudnn.benchmark = False
335 | 
336 |     exp_name = [task_id, num_folds, num_iters, rand_seed]
337 | 
338 |     # set k-Folder for out of fold prediction (no need shuffle)
339 |     kfolder = KFold(n_splits=num_folds)
340 | 
341 |     model, exp_fn = None, None
342 |     if model_name == 'cgn':
343 |         model = QuantileConditionalGaussianNetwork
344 |         exp_fn = run_neural
345 | 
346 |     elif model_name == 'sqr':
347 |         model = QuantileSingleNeuralNetwork
348 |         exp_fn = run_neural
349 | 
350 |     elif model_name == 'mqr':
351 |         model = QuantileJointNeuralNetwork
352 |         exp_fn = run_neural
353 | 
354 |     elif model_name == 'rf':
355 |         model = QuantileRandomForest
356 |         exp_fn = run_tree
357 | 
358 |     elif model_name == 'xt':
359 |         model = QuantileExtraTrees
360 |         exp_fn = run_tree
361 | 
362 |     elif model_name == 'lgbm':
363 |         model = QuantileLightGBM
364 |         exp_fn = run_others
365 | 
366 |     elif model_name == 'qr':
367 |         model = QuantileRegressor
368 |         exp_fn = run_others
369 | 
370 |     # run exp
371 |     oof_x_train, oof_y_train, oof_z_train, z_test, z_val = exp_fn(model=model,
372 |                                                                   x_train=cleaned_data['x_train'].copy(),
373 |                                                                   y_train=cleaned_data['y_train'].copy(),
374 |                                                                   x_test=cleaned_data['x_test'].copy(),
375 |                                                                   y_test=cleaned_data['y_test'].copy(),
376 |                                                                   cv_split=copy.deepcopy(cleaned_data['cv_split']),
377 |                                                                   kfolder=kfolder,
378 |                                                                   num_iters=num_iters,
379 |                                                                   rand_seed=rand_seed,
380 |                                                                   device=device,
381 |                                                                   eparams=eparams)
382 | 
383 |     # save
384 |     print('Saving')
385 |     print('z_test', z_test.shape)
386 |     np.save(eparams.DATA_PATH  + eparams.log_id +'quantile_nested_{}_z_test_{}_cv{}_iter{}_seed{}.npy'.format(model.__name__, *exp_name), z_test)
387 |     print(eparams.log_id +'quantile_nested_{}_z_test_{}_cv{}_iter{}_seed{}.npy'.format(model.__name__, *exp_name))
388 | 
389 |     print('z_val', z_val.shape)
390 |     np.save(eparams.DATA_PATH  + eparams.log_id +'quantile_nested_{}_z_val_{}_cv{}_iter{}_seed{}.npy'.format(model.__name__, *exp_name), z_val)
391 |     print(eparams.log_id +'quantile_nested_{}_z_val_{}_cv{}_iter{}_seed{}.npy'.format(model.__name__, *exp_name))
392 | 
393 |     with open(eparams.DATA_PATH + eparams.log_id +'quantile_nested_{}_oof_x_train_{}_cv{}_iter{}_seed{}.pkl'.format(model.__name__, *exp_name), 'wb') as handle:
394 |         pkl.dump(oof_x_train, handle, protocol=pkl.HIGHEST_PROTOCOL)
395 |     print(eparams.log_id +'quantile_nested_{}_oof_x_train_{}_cv{}_iter{}_seed{}.pkl'.format(model.__name__, *exp_name))
396 | 
397 |     with open(eparams.DATA_PATH  + eparams.log_id +'quantile_nested_{}_oof_y_train_{}_cv{}_iter{}_seed{}.pkl'.format(model.__name__, *exp_name), 'wb') as handle:
398 |         pkl.dump(oof_y_train, handle, protocol=pkl.HIGHEST_PROTOCOL)
399 |     print(eparams.log_id +'quantile_nested_{}_oof_y_train_{}_cv{}_iter{}_seed{}.pkl'.format(model.__name__, *exp_name))
400 | 
401 |     with open(eparams.DATA_PATH  + eparams.log_id +'quantile_nested_{}_oof_z_train_{}_cv{}_iter{}_seed{}.pkl'.format(model.__name__, *exp_name), 'wb') as handle:
402 |         pkl.dump(oof_z_train, handle, protocol=pkl.HIGHEST_PROTOCOL)
403 |     print(eparams.log_id +'quantile_nested_{}_oof_z_train_{}_cv{}_iter{}_seed{}.pkl'.format(model.__name__, *exp_name))
404 | 
405 | if __name__ == "__main__":
406 |     # arguments
407 |     parser = argparse.ArgumentParser()
408 | 
409 |     # parser
410 |     parser.add_argument('--task-id', type=str, help='task id')
411 |     parser.add_argument('--seed', type=int, default=1, help='random seed')
412 |     parser.add_argument('--gpu', type=int, default=0, help='gpu')
413 |     parser.add_argument('--cv', type=int, default=5, help='folds for out of fold predictions')
414 |     parser.add_argument('--iter', type=int, default=20, help='number of iterations for grid search')
415 |     parser.add_argument('--trans_type', default='pava')
416 |     parser.add_argument('--use_grad', default=False, action='store_true')
417 |     parser.add_argument('--use_margin', default=False, action='store_true')
418 |     parser.add_argument('--DATA_PATH', default='./output/data/')
419 |     parser.add_argument('--log_id', default='mylogid')
420 |     parser.add_argument('--data_loc', default='./data/dataset/')
421 |     parser.add_argument('--margin_type', type=str, default='single', help='margin type')
422 | 
423 | 
424 |     args = parser.parse_args()
425 |     print('------------')
426 |     print(args.__dict__)
427 |     print('------------')
428 | 
429 |     make_dir(args.DATA_PATH)
430 |     fname_json = os.path.join(args.DATA_PATH, args.log_id + '-t' + \
431 |                                               str(args.task_id) + '_cv' + str(args.cv) + \
432 |                                               '_i' + str(args.iter) + \
433 |                                               '_S' + str(args.seed) + '.json' )
434 |     dump_to_json(fname_json, {'args': args.__dict__})
435 |     print(fname_json)
436 | 
437 |     # first prepare data
438 |     cleaned_dt = prepare_data(task_id=args.task_id,
439 |                               num_iters=args.iter,
440 |                               num_folds=args.cv,
441 |                               rand_seed=args.seed,
442 |                               eparams=args)
443 |     print('---------------------------------------------------')
444 |     print()
445 |     # run
446 | 
447 |     model_list = ['cgn', 'sqr', 'mqr', 'rf', 'xt', 'lgbm']
448 | 
449 |     for model in model_list:
450 |         print(model , '...')
451 |         run_exp(task_id=args.task_id,
452 |                 model_name=model,
453 |                 num_iters=args.iter,
454 |                 num_folds=args.cv,
455 |                 rand_seed=args.seed,
456 |                 device=args.gpu,
457 |                 cleaned_data=cleaned_dt,
458 |                 eparams=args,
459 |                 )
460 |         print('---------------------------------------------------')
461 | 
462 |     print('Done.')
463 | 
464 | 


--------------------------------------------------------------------------------
/model/neural_network.py:
--------------------------------------------------------------------------------
  1 | import copy
  2 | import random
  3 | import torch
  4 | import numpy as np
  5 | import torch.nn as nn
  6 | import torch.nn.functional as F
  7 | from scipy.stats import norm
  8 | from util.metric import pinball_loss, huber_loss, margin_loss, pinball_loss_np
  9 | from util.misc import fix_crossing, pava_forward, sort_forward
 10 | from torch.utils.data import DataLoader, TensorDataset
 11 | from sklearn.model_selection import ParameterSampler
 12 | EVAL_STEPS = 10
 13 | STOP_STEPS = EVAL_STEPS * 50
 14 | QUANTILE_LOSS_PARAM_GRID = {'alpha': [0.0, 0.01],
 15 |                             'lr': [1e-3, 3e-4],
 16 |                             'wd': [1e-5, 1e-7],
 17 |                             'weight': [0.0, 1.0]}
 18 | QUANTILE_MARGIN_PARAM_GRID = {'margin': [0.0, 0.5, 1.0, 5.0],
 19 |                               'scale': [1e-3, 1e-4]}
 20 | 
 21 | QUANTILE_MARGIN_PARAM_GRID_FIX = {'margin': [0.0, 0.5, 1.0, 5.0],
 22 |                                   'margin_delta': [0.0001, 0.001, 0.005, 0.0075, 0.0099]}
 23 | 
 24 | MEAN_LOSS_PARAM_GRID = {'lr': [1e-3, 1e-4],
 25 |                         'wd': [1e-5, 1e-7]}
 26 | NETWORK_PARAM_GRID = {'hidden_size': [64, 128],
 27 |                       'num_layers': [2, 3],
 28 |                       'dropout': [0.0, 0.05, 0.1]}
 29 | 
 30 | 
 31 | class NeuralJointQuantileRegressor(nn.Module):
 32 |     def __init__(self,
 33 |                  quantile_list,
 34 |                  input_size,
 35 |                  hidden_size=64,
 36 |                  num_layers=3,
 37 |                  dropout=0.0,
 38 |                  activation='elu',
 39 |                  use_grad=True,
 40 |                  trans_type='mono',
 41 |                  use_margin=True,
 42 |                  margin_type=''):
 43 |         super(NeuralJointQuantileRegressor, self).__init__()
 44 |         # quantile list to handle
 45 |         self.num_quantiles = len(quantile_list)
 46 |         self.register_buffer('quantile_list', torch.Tensor(quantile_list).float())
 47 | 
 48 |         # activation
 49 |         act_fn = nn.ELU()
 50 |         if activation == 'elu':
 51 |             act_fn = nn.ELU()
 52 |         elif activation == 'relu':
 53 |             act_fn = nn.ReLU()
 54 |         elif activation == 'tanh':
 55 |             act_fn = nn.Tanh()
 56 | 
 57 |         # network with predicting quantiles
 58 |         layers = [nn.Linear(input_size, hidden_size), act_fn]
 59 |         for _ in range(num_layers - 1):
 60 |             layers.append(nn.Dropout(dropout))
 61 |             layers.append(nn.Linear(hidden_size, hidden_size))
 62 |             layers.append(act_fn)
 63 |         layers.append(nn.Linear(hidden_size, self.num_quantiles))
 64 |         self.network = nn.Sequential(*layers)
 65 | 
 66 |         # post-process
 67 |         self.use_grad = use_grad
 68 |         if trans_type is None:
 69 |             self.use_grad = False
 70 |         self.trans_type = trans_type
 71 |         self.use_margin = use_margin
 72 |         self.margin_type = margin_type
 73 | 
 74 |         #print('----------------')
 75 |         #print('NeuralJointQuantileRegressor')
 76 |         #print('use_grad: ', self.use_grad)
 77 |         #print('trans_type: ', self.trans_type)
 78 |         #print('use_margin: ', self.use_margin)
 79 |         #print('margin_type: ', self.margin_type)
 80 |         #print('----------------')
 81 | 
 82 |     def forward(self, input_data):
 83 |         # get output values
 84 |         output_data = self.network(input_data)
 85 |         return output_data
 86 | 
 87 |     def compute_loss(self,
 88 |                      input_data,
 89 |                      target_data,
 90 |                      aux_data=None,
 91 |                      margin=0.0,
 92 |                      scale=0.0,
 93 |                      alpha=0.0,
 94 |                      weight=0.0,
 95 |                      margin_delta=0.0):
 96 |         # train mode
 97 |         self.train()
 98 | 
 99 |         # margin loss
100 |         if margin > 0.0 and self.use_margin:
101 |             # include aux-data
102 |             if aux_data is not None:
103 |                 batch_size = input_data.size()[0]
104 |                 full_input_data = torch.cat([input_data, aux_data], 0)
105 |                 predict_data = self(full_input_data)
106 |                 m_loss = margin_loss(predict_data, self.quantile_list * scale)
107 |                 predict_data = predict_data[:batch_size].contiguous()
108 |             else:
109 |                 predict_data = self(input_data)
110 |                 if self.margin_type == 'single':
111 |                     m_loss = margin_loss(predict_data, margin_delta)
112 | 
113 |                 else:
114 |                     m_loss = margin_loss(predict_data, self.quantile_list * scale)
115 |         else:
116 |             predict_data = self(input_data)
117 |             m_loss = 0.0
118 | 
119 |         # fix crossing
120 |         if self.use_grad:
121 |             h_loss = weight * huber_loss(predict_data, target_data, self.quantile_list, alpha=alpha)
122 |             if self.trans_type == 'mono':
123 |                 predict_data = fix_crossing(predict_data)
124 | 
125 |             elif self.trans_type == 'pava':
126 |                 predict_data = pava_forward(predict_data)
127 | 
128 |             elif self.trans_type == 'sort':
129 |                 predict_data = sort_forward(predict_data)
130 | 
131 |             else:
132 |                 NotImplementedError()
133 |             h_loss += (1 - weight) * huber_loss(predict_data, target_data, self.quantile_list, alpha=alpha)
134 | 
135 |         else:
136 |             h_loss = huber_loss(predict_data, target_data, self.quantile_list, alpha=alpha)
137 | 
138 |         # combine
139 |         return h_loss + margin * m_loss
140 | 
141 |     def eval_loss(self, input_data, target_data):
142 |         self.eval()
143 |         with torch.no_grad():
144 |             predict_data = self(input_data)
145 | 
146 |             if self.trans_type == 'mono':
147 |                 predict_data = fix_crossing(predict_data)
148 | 
149 |             elif self.trans_type == 'pava':
150 |                 predict_data = pava_forward(predict_data)
151 | 
152 |             elif self.trans_type == 'sort':
153 |                 predict_data = sort_forward(predict_data)#torch.sort(predict_data, -1)[0]
154 | 
155 |             return pinball_loss(predict_data, target_data, self.quantile_list).item()
156 | 
157 |     def predict(self, input_data):
158 |         self.eval()
159 |         with torch.no_grad():
160 |             predict_data = self(input_data)
161 |             if self.trans_type == 'mono':
162 |                 predict_data = fix_crossing(predict_data)
163 | 
164 |             elif self.trans_type == 'pava':
165 |                 predict_data = pava_forward(predict_data)
166 | 
167 |             elif self.trans_type == 'sort':
168 |                 predict_data = sort_forward(predict_data)#torch.sort(predict_data, -1)[0]
169 | 
170 |             return predict_data.data.cpu().numpy()
171 | 
172 | class NeuralSingleQuantileRegressor(nn.Module):
173 |     def __init__(self,
174 |                  input_size,
175 |                  hidden_size=64,
176 |                  num_layers=3,
177 |                  dropout=0.0,
178 |                  activation='elu'):
179 |         super(NeuralSingleQuantileRegressor, self).__init__()
180 |         # activation
181 |         act_fn = nn.ELU()
182 |         if activation == 'elu':
183 |             act_fn = nn.ELU()
184 |         elif activation == 'relu':
185 |             act_fn = nn.ReLU()
186 |         elif activation == 'tanh':
187 |             act_fn = nn.Tanh()
188 | 
189 |         # network with predicting quantiles
190 |         layers = [nn.Linear(input_size + 1, hidden_size), act_fn]
191 |         for _ in range(num_layers - 1):
192 |             layers.append(nn.Dropout(dropout))
193 |             layers.append(nn.Linear(hidden_size, hidden_size))
194 |             layers.append(act_fn)
195 | 
196 |         layers.append(nn.Linear(hidden_size, 1))
197 |         self.network = nn.Sequential(*layers)
198 | 
199 |     def forward(self, input_data, quantile_data):
200 |         quantile_data = quantile_data.reshape(-1, 1)
201 |         assert quantile_data.size()[0] == input_data.size()[0]
202 | 
203 |         # get output values
204 |         output_data = self.network(torch.cat([input_data, quantile_data - 0.5], 1))
205 |         return output_data
206 | 
207 |     def compute_loss(self, input_data, target_data, quantile_data, alpha=0.01):
208 |         # train mode
209 |         self.train()
210 | 
211 |         # prediction
212 |         predict_data = self(input_data, quantile_data)
213 | 
214 |         # compute huber loss
215 |         return huber_loss(predict_data, target_data, quantile_data, alpha)
216 | 
217 |     def eval_loss(self, input_data, target_data, quantile_data):
218 |         self.eval()
219 |         with torch.no_grad():
220 |             predict_data = self(input_data, quantile_data)
221 |             return pinball_loss(predict_data, target_data, quantile_data).item()
222 | 
223 |     def predict(self, input_data, quantile_data):
224 |         self.eval()
225 |         with torch.no_grad():
226 |             predict_data = self(input_data, quantile_data)
227 |             return predict_data.data.cpu().numpy()
228 | 
229 | 
230 | class NeuralCondtionalGaussian(nn.Module):
231 |     def __init__(self,
232 |                  input_size,
233 |                  hidden_size=64,
234 |                  num_layers=3,
235 |                  dropout=0.0,
236 |                  activation='elu'):
237 | 
238 |         super(NeuralCondtionalGaussian, self).__init__()
239 |         # activation
240 |         act_fn = nn.ELU()
241 |         if activation == 'elu':
242 |             act_fn = nn.ELU()
243 |         elif activation == 'relu':
244 |             act_fn = nn.ReLU()
245 |         elif activation == 'tanh':
246 |             act_fn = nn.Tanh()
247 | 
248 |         # network with predicting quantiles
249 |         layers = [nn.Linear(input_size, hidden_size), act_fn]
250 |         for _ in range(num_layers - 1):
251 |             layers.append(nn.Dropout(dropout))
252 |             layers.append(nn.Linear(hidden_size, hidden_size))
253 |             layers.append(act_fn)
254 | 
255 |         layers.append(nn.Linear(hidden_size, 2))
256 |         self.network = nn.Sequential(*layers)
257 | 
258 |     def forward(self, input_data):
259 |         output_data = self.network(input_data)
260 |         mean_data = output_data[:, 0].reshape(-1, 1)
261 |         var_data = F.softplus(output_data[:, 1].reshape(-1, 1)) + 1e-6
262 |         return mean_data, var_data
263 | 
264 |     def compute_loss(self, input_data, target_data):
265 |         # train mode
266 |         self.train()
267 | 
268 |         # prediction
269 |         mean_data, var_data = self(input_data)
270 | 
271 |         # compute negative log-likelihood
272 |         nll_loss  = torch.pow(target_data - mean_data, 2).div(2 * var_data)
273 |         nll_loss += var_data.log().div(2)
274 |         return nll_loss.mean()
275 | 
276 |     def eval_loss(self, input_data, target_data, quantile_data=None):
277 |         if quantile_data is None:
278 |             self.eval()
279 |             with torch.no_grad():
280 |                 mean_data, var_data = self(input_data)
281 |                 error_data = target_data - mean_data
282 |                 return torch.sqrt(torch.mean(error_data * error_data)).item()
283 |         else:
284 |             predict_data = self.predict(input_data, quantile_data)
285 |             target_data = target_data.data.cpu().numpy()
286 |             return pinball_loss_np(predict_data, target_data, quantile_data, True)
287 | 
288 |     def predict(self, input_data, quantile_data=None):
289 |         self.eval()
290 |         with torch.no_grad():
291 |             mean_data, var_data = self(input_data)
292 |             if quantile_data is None:
293 |                 return mean_data.data.cpu().numpy()
294 |             else:
295 |                 std_data = torch.sqrt(var_data).data.cpu().numpy()
296 |                 ppf_data = norm.ppf(quantile_data).reshape(1, -1)
297 |                 predict_data = mean_data.data.cpu().numpy() + std_data * ppf_data
298 |                 return predict_data
299 | 
300 | 
301 | class QuantileJointNeuralNetwork:
302 |     def __init__(self,
303 |                  quantile_list,
304 |                  num_iters,
305 |                  cv_split,
306 |                  batch_size=64,
307 |                  use_grad=True,
308 |                  trans_type='mono',
309 |                  use_margin=True,
310 |                  rand_seed=1,
311 |                  device=-1,
312 |                  margin_type='',
313 |                  **kwargs):
314 | 
315 |         self.num_iters = num_iters
316 |         self.cv_split = cv_split
317 |         self.quantile_list = quantile_list
318 |         self.use_grad = use_grad
319 |         self.trans_type = trans_type
320 |         self.use_margin = use_margin
321 |         self.rand_seed = rand_seed
322 | 
323 |         self.input_size = None
324 |         self.batch_size = batch_size
325 | 
326 |         self.best_model = None
327 |         self.best_params = None
328 | 
329 |         self.margin_type = margin_type
330 | 
331 |         print('----------------')
332 |         print('QuantileJointNeuralNetwork')
333 |         print('use_grad: ', self.use_grad)
334 |         print('trans_type: ', self.trans_type)
335 |         print('use_margin: ', self.use_margin)
336 |         print('margin_type: ', self.margin_type)
337 |         print('num_iters: ', self.num_iters)
338 |         print('----------------')
339 | 
340 |         if torch.cuda.is_available() and device > -1:
341 |             self.device = torch.device("cuda:{}".format(device))
342 |         else:
343 |             self.device = torch.device("cpu")
344 | 
345 |     def fit(self, x_train, y_train, ax_train=None):
346 |         # train models
347 |         x_train = torch.FloatTensor(x_train).to(self.device)
348 |         y_train = torch.FloatTensor(y_train.reshape(-1, 1)).to(self.device)
349 | 
350 |         if ax_train is not None:
351 |             ax_train = torch.FloatTensor(ax_train).to(self.device)
352 | 
353 |         # get input size
354 |         self.input_size = x_train.size()[1]
355 | 
356 |         # build params list
357 |         full_param_grid = {**NETWORK_PARAM_GRID,
358 |                            **QUANTILE_LOSS_PARAM_GRID}
359 | 
360 |         if self.use_margin == True and self.margin_type == 'single':
361 |             full_param_grid = {**full_param_grid,
362 |                                **QUANTILE_MARGIN_PARAM_GRID_FIX}
363 | 
364 |         elif self.use_margin:
365 |             full_param_grid = {**full_param_grid,
366 |                                **QUANTILE_MARGIN_PARAM_GRID}
367 | 
368 |         if not self.use_grad or self.trans_type in ['pava', 'sort']:
369 |             full_param_grid['weight'] = [0.0]
370 |         
371 |         params_list = list(ParameterSampler(param_distributions=full_param_grid,
372 |                                             n_iter=self.num_iters,
373 |                                             random_state=self.rand_seed))
374 | 
375 |         # set data loader
376 |         train_loader = DataLoader(TensorDataset(x_train[self.cv_split[0][0]],
377 |                                                 y_train[self.cv_split[0][0]]),
378 |                                   shuffle=True,
379 |                                   batch_size=self.batch_size,
380 |                                   drop_last=False,
381 |                                   worker_init_fn=np.random.seed(self.rand_seed))
382 | 
383 |         valid_loader = DataLoader(TensorDataset(x_train[self.cv_split[0][1]],
384 |                                                 y_train[self.cv_split[0][1]]),
385 |                                   shuffle=False,
386 |                                   batch_size=1024,
387 |                                   drop_last=False)
388 | 
389 |         if ax_train is not None:
390 |             aux_loader = DataLoader(dataset=TensorDataset(torch.cat([ax_train, x_train[self.cv_split[0][1]]], 0)),
391 |                                     shuffle=True,
392 |                                     batch_size=self.batch_size,
393 |                                     drop_last=False,
394 |                                     worker_init_fn=np.random.seed(self.rand_seed))
395 |         else:
396 |             aux_loader = None
397 | 
398 |         # for each param
399 |         best_eval_loss = np.inf
400 |         best_eval_step = 0
401 |         for p, params in enumerate(params_list):
402 |             print('iter:',p, ' ', params)
403 |             # fit model with given data and params
404 |             eval_loss, eval_step = self.fit_model(train_loader, valid_loader, aux_loader, **params)
405 |             if eval_loss < best_eval_loss:
406 |                 best_eval_loss = eval_loss
407 |                 best_eval_step = eval_step
408 |                 self.best_params = params
409 | 
410 |             print('eval_loss : %.4f, best_eval_sofar: %.4f, eval_step: %d' %(eval_loss, best_eval_loss, eval_step))
411 |             print()
412 | 
413 |         self.best_params['num_steps'] = best_eval_step
414 | 
415 |         # retrain model with train split with best hyper-params
416 |         train_loader = DataLoader(TensorDataset(x_train[self.cv_split[0][0]],
417 |                                                 y_train[self.cv_split[0][0]]),
418 |                                   shuffle=True,
419 |                                   batch_size=self.batch_size,
420 |                                   drop_last=False,
421 |                                   worker_init_fn=np.random.seed(self.rand_seed))
422 | 
423 |         if ax_train is not None:
424 |             aux_loader = DataLoader(TensorDataset(ax_train),
425 |                                     shuffle=True,
426 |                                     batch_size=self.batch_size,
427 |                                     drop_last=False,
428 |                                     worker_init_fn=np.random.seed(self.rand_seed))
429 |         else:
430 |             aux_loader = None
431 | 
432 |         print('best_params:', self.best_params)
433 |         self.best_model = self.fit_model(train_loader, None, aux_loader, **self.best_params)
434 | 
435 |     def fit_model(self,
436 |                   train_loader,
437 |                   valid_loader=None,
438 |                   aux_loader=None,
439 |                   hidden_size=64,
440 |                   num_layers=3,
441 |                   dropout=0.5,
442 |                   activation='elu',
443 |                   lr=1e-3,
444 |                   wd=1e-5,
445 |                   num_steps=None,
446 |                   margin=0.0,
447 |                   scale=0.0,
448 |                   alpha=0.0,
449 |                   weight=0.0,
450 |                   margin_delta=0.0):
451 | 
452 |         # init model
453 |         random.seed(self.rand_seed)
454 |         if self.device == torch.device("cpu"):
455 |             torch.manual_seed(self.rand_seed)
456 | 
457 |         else:
458 |             torch.cuda.manual_seed_all(self.rand_seed)
459 | 
460 |         model = NeuralJointQuantileRegressor(quantile_list=self.quantile_list,
461 |                                              input_size=self.input_size,
462 |                                              hidden_size=hidden_size,
463 |                                              num_layers=num_layers,
464 |                                              dropout=dropout,
465 |                                              activation=activation,
466 |                                              use_grad=self.use_grad,
467 |                                              trans_type=self.trans_type,
468 |                                              use_margin=self.use_margin,
469 |                                              margin_type=self.margin_type)
470 |         model = model.to(self.device)
471 | 
472 |         # init optimizer
473 |         optimizer = torch.optim.Adam(params=model.parameters(),
474 |                                      lr=lr, weight_decay=wd, amsgrad=True)
475 | 
476 |         # init aux_loader
477 |         if aux_loader is None:
478 |             aux_loader_iterator = None
479 |         else:
480 |             aux_loader_iterator = iter(aux_loader)
481 | 
482 |         # for each update
483 |         steps = 0
484 |         best_valid_loss = np.inf
485 |         best_step = 0
486 |         while True:
487 |             # for each batch (update)
488 |             for x_batch, y_batch in train_loader:
489 |                 # aux data
490 |                 if aux_loader_iterator is None:
491 |                     aux_batch = None
492 |                 else:
493 |                     try:
494 |                         aux_batch = next(aux_loader_iterator)[0]
495 |                     except StopIteration:
496 |                         aux_loader_iterator = iter(aux_loader)
497 |                         aux_batch = next(aux_loader_iterator)[0]
498 | 
499 |                 # compute loss
500 |                 weight = weight * (np.cos(min((steps / float(STOP_STEPS)), 1.0) * np.pi) + 1) * 0.5
501 |                 batch_loss = model.compute_loss(input_data=x_batch,
502 |                                                 target_data=y_batch,
503 |                                                 aux_data=aux_batch,
504 |                                                 margin=margin,
505 |                                                 scale=scale,
506 |                                                 alpha=alpha,
507 |                                                 weight=weight,
508 |                                                 margin_delta=margin_delta)
509 | 
510 |                 # backprop and update
511 |                 optimizer.zero_grad()
512 |                 batch_loss.backward()
513 |                 optimizer.step()
514 | 
515 |                 # step up
516 |                 steps += 1
517 | 
518 |                 # validate
519 |                 if steps % 100 == 0 and valid_loader is not None:
520 |                     valid_loss = 0.0
521 |                     valid_size = 0.0
522 | 
523 |                     for x_batch, y_batch in valid_loader:
524 |                         batch_size = x_batch.size()[0]
525 |                         batch_loss = model.eval_loss(input_data=x_batch, target_data=y_batch)
526 |                         valid_loss += batch_loss * batch_size
527 |                         valid_size += batch_size
528 |                     valid_loss /= valid_size
529 | 
530 |                     if best_valid_loss > valid_loss:
531 |                         best_valid_loss = valid_loss
532 |                         best_step = steps
533 |                     elif steps - best_step >= STOP_STEPS:
534 |                         return best_valid_loss, best_step
535 |                 elif num_steps is not None and steps >= num_steps:
536 |                     assert valid_loader is None
537 |                     return copy.deepcopy(model)
538 | 
539 |     def predict(self, x_data):
540 |         x_data = torch.FloatTensor(x_data).to(self.device)
541 |         y_pred = self.best_model.predict(x_data)
542 |         return y_pred
543 | 
544 |     def refit_model(self, x_train, y_train, ax_train=None):
545 |         x_train = torch.FloatTensor(x_train).to(self.device)
546 |         y_train = torch.FloatTensor(y_train.reshape(-1, 1)).to(self.device)
547 | 
548 |         if ax_train is not None:
549 |             ax_train = torch.FloatTensor(ax_train).to(self.device)
550 | 
551 |         train_loader = DataLoader(TensorDataset(x_train, y_train),
552 |                                   shuffle=True,
553 |                                   batch_size=self.batch_size,
554 |                                   drop_last=False)
555 |         if ax_train is not None:
556 |             aux_loader = DataLoader(TensorDataset(ax_train),
557 |                                     shuffle=True,
558 |                                     batch_size=self.batch_size,
559 |                                     drop_last=False)
560 |         else:
561 |             aux_loader = None
562 |         self.best_model = self.fit_model(train_loader, None, aux_loader, **self.best_params)
563 | 
564 | 
565 | class QuantileSingleNeuralNetwork:
566 |     def __init__(self,
567 |                  num_iters,
568 |                  cv_split,
569 |                  quantile_list,
570 |                  batch_size=64,
571 |                  rand_seed=111,
572 |                  device=-1,
573 |                  **kwargs):
574 | 
575 |         self.num_iters = num_iters
576 |         self.cv_split = cv_split
577 |         self.quantile_list = quantile_list
578 |         self.rand_seed = rand_seed
579 | 
580 |         self.input_size = None
581 |         self.batch_size = batch_size
582 | 
583 |         self.best_model = None
584 |         self.best_params = None
585 | 
586 |         if torch.cuda.is_available() and device > -1:
587 |             self.device = torch.device("cuda:{}".format(device))
588 |         else:
589 |             self.device = torch.device("cpu")
590 | 
591 |     def fit(self, x_train, y_train, ax_train=None):
592 |         # train models
593 |         x_train = torch.FloatTensor(x_train).float().to(self.device)
594 |         y_train = torch.FloatTensor(y_train.reshape(-1, 1)).float().to(self.device)
595 | 
596 |         # get input size
597 |         self.input_size = x_train.size()[1]
598 | 
599 |         # build params list
600 |         full_param_grid = {**NETWORK_PARAM_GRID,
601 |                            **QUANTILE_LOSS_PARAM_GRID}
602 |         del full_param_grid['weight']
603 |         params_list = list(ParameterSampler(param_distributions=full_param_grid,
604 |                                             n_iter=self.num_iters,
605 |                                             random_state=self.rand_seed))
606 | 
607 |         # set data loader
608 |         train_loader = DataLoader(TensorDataset(x_train[self.cv_split[0][0]],
609 |                                                 y_train[self.cv_split[0][0]]),
610 |                                   shuffle=True,
611 |                                   batch_size=self.batch_size,
612 |                                   drop_last=False)
613 | 
614 |         valid_loader = DataLoader(TensorDataset(x_train[self.cv_split[0][1]],
615 |                                                 y_train[self.cv_split[0][1]]),
616 |                                   shuffle=False,
617 |                                   batch_size=1024,
618 |                                   drop_last=False)
619 | 
620 |         # for each param
621 |         best_eval_loss = np.inf
622 |         best_eval_step = 0
623 |         for p, params in enumerate(params_list):
624 |             # fit model with given data and params
625 |             eval_loss, eval_step = self.fit_model(train_loader, valid_loader, **params)
626 |             if eval_loss < best_eval_loss:
627 |                 best_eval_loss = eval_loss
628 |                 best_eval_step = eval_step
629 |                 self.best_params = params
630 |         self.best_params['num_steps'] = best_eval_step
631 | 
632 |         # retrain model train split not full data
633 |         #train_loader = DataLoader(TensorDataset(x_train, y_train),
634 |         #                          shuffle=True, batch_size=self.batch_size, drop_last=True)
635 | 
636 |         train_loader = DataLoader(TensorDataset(x_train[self.cv_split[0][0]],
637 |                                                 y_train[self.cv_split[0][0]]),
638 |                                   shuffle=True,
639 |                                   batch_size=self.batch_size,
640 |                                   drop_last=False)
641 | 
642 |         self.best_model = self.fit_model(train_loader, None, **self.best_params)
643 | 
644 |     def fit_model(self, train_loader, valid_loader=None,
645 |                   hidden_size=64, num_layers=3, dropout=0.5, activation='elu',
646 |                   lr=1e-3, wd=1e-5, num_steps=None, alpha=0.01):
647 |         # init model
648 |         random.seed(self.rand_seed)
649 |         if self.device == torch.device("cpu"):
650 |             torch.manual_seed(self.rand_seed)
651 |         else:
652 |             torch.cuda.manual_seed_all(self.rand_seed)
653 | 
654 |         model = NeuralSingleQuantileRegressor(input_size=self.input_size,
655 |                                               hidden_size=hidden_size,
656 |                                               num_layers=num_layers,
657 |                                               dropout=dropout,
658 |                                               activation=activation)
659 |         model = model.to(self.device)
660 | 
661 |         # init optimizer
662 |         optimizer = torch.optim.Adam(params=model.parameters(), lr=lr, weight_decay=wd, amsgrad=True)
663 | 
664 |         # for each update
665 |         steps = 0
666 |         best_valid_loss = np.inf
667 |         best_step = 0
668 |         while True:
669 |             # for each batch (update)
670 |             for x_batch, y_batch in train_loader:
671 |                 # sample quantile
672 |                 batch_size = x_batch.size()[0]
673 |                 q_batch = torch.rand(batch_size, 1).to(self.device)
674 |                 q_batch = torch.clamp(q_batch, 0.001, 0.999)
675 | 
676 |                 # compute loss
677 |                 batch_loss = model.compute_loss(input_data=x_batch, target_data=y_batch,
678 |                                                 quantile_data=q_batch, alpha=alpha)
679 | 
680 |                 # backprop and update
681 |                 optimizer.zero_grad()
682 |                 batch_loss.backward()
683 |                 optimizer.step()
684 | 
685 |                 # step up
686 |                 steps += 1
687 | 
688 |                 # validate
689 |                 if steps % 100 == 0 and valid_loader is not None:
690 |                     valid_loss = 0.0
691 |                     valid_size = 0.0
692 | 
693 |                     for x_batch, y_batch in valid_loader:
694 |                         batch_size = x_batch.size()[0]
695 |                         for q in self.quantile_list:
696 |                             q_batch = q * torch.ones(batch_size, 1).to(self.device)
697 |                             batch_loss = model.eval_loss(input_data=x_batch, target_data=y_batch, quantile_data=q_batch)
698 |                             valid_loss += batch_loss * batch_size
699 |                             valid_size += batch_size
700 |                     valid_loss /= valid_size
701 | 
702 |                     if best_valid_loss > valid_loss:
703 |                         best_valid_loss = valid_loss
704 |                         best_step = steps
705 |                     elif steps - best_step >= STOP_STEPS:
706 |                         return best_valid_loss, best_step
707 |                 elif num_steps is not None and steps >= num_steps:
708 |                     assert valid_loader is None
709 |                     return copy.deepcopy(model)
710 | 
711 |     def predict(self, x_data):
712 |         x_data = torch.FloatTensor(x_data).to(self.device)
713 |         batch_size = x_data.size()[0]
714 |         y_pred_list = []
715 |         for q in self.quantile_list:
716 |             q_data = q * torch.ones(batch_size, 1).to(self.device)
717 |             y_pred = self.best_model.predict(x_data, q_data)
718 |             y_pred_list.append(y_pred)
719 |         return np.concatenate(y_pred_list, 1)
720 | 
721 |     def refit_model(self, x_train, y_train, ax_train=None):
722 |         x_train = torch.FloatTensor(x_train).to(self.device)
723 |         y_train = torch.FloatTensor(y_train.reshape(-1, 1)).to(self.device)
724 | 
725 |         train_loader = DataLoader(TensorDataset(x_train, y_train),
726 |                                   shuffle=True,
727 |                                   batch_size=self.batch_size,
728 |                                   drop_last=False)
729 | 
730 |         self.best_model = self.fit_model(train_loader, None, **self.best_params)
731 | 
732 | 
733 | class QuantileConditionalGaussianNetwork:
734 |     def __init__(self,
735 |                  num_iters,
736 |                  cv_split,
737 |                  quantile_list,
738 |                  batch_size=64,
739 |                  rand_seed=111,
740 |                  device=-1,
741 |                  **kwargs):
742 |         self.num_iters = num_iters
743 |         self.cv_split = cv_split
744 |         self.quantile_list = quantile_list
745 |         self.rand_seed = rand_seed
746 | 
747 |         self.input_size = None
748 |         self.batch_size = batch_size
749 | 
750 |         self.best_model = None
751 |         self.best_params = None
752 | 
753 |         if torch.cuda.is_available() and device > -1:
754 |             self.device = torch.device("cuda:{}".format(device))
755 |         else:
756 |             self.device = torch.device("cpu")
757 | 
758 |     def fit(self, x_train, y_train, ax_train=None):
759 |         # train models
760 |         x_train = torch.FloatTensor(x_train).to(self.device)
761 |         y_train = torch.FloatTensor(y_train.reshape(-1, 1)).to(self.device)
762 | 
763 |         # get input size
764 |         self.input_size = x_train.size()[1]
765 | 
766 |         # build params list
767 |         full_param_grid = NETWORK_PARAM_GRID
768 |         params_list = list(ParameterSampler(param_distributions=full_param_grid,
769 |                                             n_iter=self.num_iters,
770 |                                             random_state=self.rand_seed))
771 | 
772 |         # set data loader
773 |         train_loader = DataLoader(TensorDataset(x_train[self.cv_split[0][0]],
774 |                                                 y_train[self.cv_split[0][0]]),
775 |                                   shuffle=True,
776 |                                   batch_size=self.batch_size,
777 |                                   drop_last=False)
778 | 
779 |         valid_loader = DataLoader(TensorDataset(x_train[self.cv_split[0][1]],
780 |                                                 y_train[self.cv_split[0][1]]),
781 |                                   shuffle=False,
782 |                                   batch_size=1024,
783 |                                   drop_last=False)
784 | 
785 |         # for each param
786 |         best_eval_loss = np.inf
787 |         best_eval_step = 0
788 |         for p, params in enumerate(params_list):
789 |             # fit model with given data and params
790 |             eval_loss, eval_step = self.fit_model(train_loader, valid_loader, **params)
791 |             if eval_loss < best_eval_loss:
792 |                 best_eval_loss = eval_loss
793 |                 best_eval_step = eval_step
794 |                 self.best_params = params
795 |         self.best_params['num_steps'] = best_eval_step
796 | 
797 |         # retrain model with only train split not full data
798 |         # train_loader = DataLoader(TensorDataset(x_train, y_train),
799 |         #                          shuffle=True, batch_size=self.batch_size, drop_last=True)
800 |         train_loader = DataLoader(TensorDataset(x_train[self.cv_split[0][0]],
801 |                                                 y_train[self.cv_split[0][0]]),
802 |                                   shuffle=True,
803 |                                   batch_size=self.batch_size,
804 |                                   drop_last=False)
805 | 
806 |         print('best_params:', self.best_params)
807 |         self.best_model = self.fit_model(train_loader, None, **self.best_params)
808 | 
809 |     def fit_model(self,
810 |                   train_loader,
811 |                   valid_loader=None,
812 |                   hidden_size=64,
813 |                   num_layers=3,
814 |                   dropout=0.5,
815 |                   activation='elu',
816 |                   lr=1e-3,
817 |                   wd=1e-5,
818 |                   num_steps=None):
819 | 
820 |         # init model
821 |         random.seed(self.rand_seed)
822 |         if self.device == torch.device("cpu"):
823 |             torch.manual_seed(self.rand_seed)
824 |         else:
825 |             torch.cuda.manual_seed_all(self.rand_seed)
826 | 
827 |         model = NeuralCondtionalGaussian(input_size=self.input_size,
828 |                                          hidden_size=hidden_size,
829 |                                          num_layers=num_layers,
830 |                                          dropout=dropout,
831 |                                          activation=activation)
832 |         model = model.to(self.device)
833 | 
834 |         # init optimizer
835 |         optimizer = torch.optim.Adam(params=model.parameters(), lr=lr, weight_decay=wd, amsgrad=True)
836 | 
837 |         # for each update
838 |         steps = 0
839 |         best_valid_loss = np.inf
840 |         best_step = 0
841 |         while True:
842 |             # for each batch (update)
843 |             for x_batch, y_batch in train_loader:
844 |                 # compute loss
845 |                 batch_loss = model.compute_loss(input_data=x_batch, target_data=y_batch)
846 | 
847 |                 # backprop and update
848 |                 optimizer.zero_grad()
849 |                 batch_loss.backward()
850 |                 optimizer.step()
851 | 
852 |                 # step up
853 |                 steps += 1
854 | 
855 |                 # validate
856 |                 if steps % 100 == 0 and valid_loader is not None:
857 |                     valid_loss = 0.0
858 |                     valid_size = 0.0
859 |                     for x_batch, y_batch in valid_loader:
860 |                         batch_size = x_batch.size()[0]
861 |                         batch_loss = model.eval_loss(input_data=x_batch, target_data=y_batch,
862 |                                                      quantile_data=self.quantile_list)
863 |                         valid_loss += batch_loss * batch_size
864 |                         valid_size += batch_size
865 |                     valid_loss /= valid_size
866 | 
867 |                     if best_valid_loss > valid_loss:
868 |                         best_valid_loss = valid_loss
869 |                         best_step = steps
870 |                     elif steps - best_step >= STOP_STEPS:
871 |                         return best_valid_loss, best_step
872 |                 elif num_steps is not None and steps >= num_steps:
873 |                     assert valid_loader is None
874 |                     return copy.deepcopy(model)
875 | 
876 |     def predict(self, x_data):
877 |         x_data = torch.FloatTensor(x_data).to(self.device)
878 |         return self.best_model.predict(x_data, self.quantile_list)
879 | 
880 |     def refit_model(self, x_train, y_train, ax_train=None):
881 |         x_train = torch.FloatTensor(x_train).to(self.device)
882 |         y_train = torch.FloatTensor(y_train.reshape(-1, 1)).to(self.device)
883 |         train_loader = DataLoader(TensorDataset(x_train, y_train),
884 |                                   shuffle=True,
885 |                                   batch_size=self.batch_size,
886 |                                   drop_last=False)
887 | 
888 |         self.best_model = self.fit_model(train_loader, None, **self.best_params)
889 | 
890 | 


--------------------------------------------------------------------------------
/model/neural_aggregator.py:
--------------------------------------------------------------------------------
   1 | import copy
   2 | import torch
   3 | import numpy as np
   4 | import torch.nn as nn
   5 | from util.metric import pinball_loss, huber_loss, margin_loss
   6 | from util.misc import fix_crossing, pava_forward, sort_forward
   7 | from torch.utils.data import DataLoader, TensorDataset
   8 | from sklearn.model_selection import ParameterSampler
   9 | EVAL_STEPS = 10
  10 | STOP_STEPS = EVAL_STEPS * 50
  11 | QUANTILE_LOSS_PARAM_GRID = {'alpha': [0.0],
  12 |                             'lr': [1e-3, 5e-4],
  13 |                             'wd': [1e-7],
  14 |                             'margin_weight': [0.5, 1.0, 2.0, 5.0, 10.0],
  15 |                             'margin_scale': [1e-1, 5e-2, 1e-2, 1e-3, 1e-4]}
  16 | 
  17 | NETWORK_PARAM_GRID = {'hidden_size': [64, 128],
  18 |                       'num_layers': [2, 3],
  19 |                       'dropout': [0.0, 0.05, 0.1]
  20 |                       }
  21 | 
  22 | QUANTILE_LOSS_PARAM_GRID_FIX = {'alpha': [0.0],
  23 |                                 'lr': [1e-3],
  24 |                                 'wd': [1e-7],
  25 |                                 'margin_weight': [0.5, 1.0, 2.0, 5.0, 10.0],
  26 |                                 'margin_delta': [0.0001, 0.001, 0.005, 0.0075, 0.0099]
  27 |                                 }
  28 | 
  29 | class QuantileGlobalAggregator(nn.Module):
  30 |     def __init__(self,
  31 |                  num_models,  # number of base models
  32 |                  quantile_list,  # list of quantile levels
  33 |                  normalize=True,  # normalize weights
  34 |                  margin_list=None,  # using margin
  35 |                  trans_type=None,  # apply non-crossing
  36 |                  use_grad=True,  # using non-crossing training
  37 |                  share=False,  # share between base models
  38 |                  cross=False,  # cross between quantile levels
  39 |                  margin_type='',
  40 |                  regularization_strength=1.0,
  41 |                  ):
  42 |         super(QuantileGlobalAggregator, self).__init__()
  43 |         # model size
  44 |         self.num_models = num_models
  45 | 
  46 |         # quantile list to handle
  47 |         self.num_quantiles = len(quantile_list)
  48 |         self.register_buffer('quantiles', torch.FloatTensor(quantile_list))
  49 | 
  50 |         # normalize weights
  51 |         self.normalize = normalize
  52 | 
  53 |         # post-process (monotnoizer)
  54 |         self.trans_type = trans_type
  55 |         self.use_grad = use_grad
  56 |         self.margin_list = margin_list
  57 |         self.margin_type = margin_type
  58 |         self.regularization_strength = regularization_strength
  59 | 
  60 |         # set weight
  61 |         self.share = share
  62 |         self.cross = cross
  63 |         self.model_type = None
  64 |         if self.share:
  65 |             self.weights = nn.Parameter(torch.zeros([1, self.num_models, 1]))
  66 |             self.model_type = 'Coarse'
  67 |         else:
  68 |             if self.cross:
  69 |                 self.weights = nn.Parameter(torch.zeros([1, self.num_models * self.num_quantiles, self.num_quantiles]))
  70 |                 self.model_type = 'Fine'
  71 |             else:
  72 |                 self.weights = nn.Parameter(torch.zeros([1, self.num_models, self.num_quantiles]))
  73 |                 self.model_type = 'Medium'
  74 | 
  75 |     # aggregate estimates
  76 |     def forward(self, input_data):
  77 |         # get convex weight (normalize over weights)
  78 |         if self.normalize:
  79 |             convex_weights = self.weights.softmax(1)
  80 |         else:
  81 |             convex_weights = self.weights
  82 | 
  83 |         # weight sum
  84 |         if self.share:
  85 |             output_data = input_data * convex_weights
  86 |         else:
  87 |             if self.cross:
  88 |                 output_data = input_data.reshape(-1, self.num_models * self.num_quantiles, 1) * convex_weights
  89 |             else:
  90 |                 output_data = input_data * convex_weights
  91 | 
  92 |         # aggregate
  93 |         output_data = torch.sum(output_data, 1)
  94 |         return output_data
  95 | 
  96 |     def compute_loss(self,
  97 |                      input_data,
  98 |                      target_data,
  99 |                      aux_data=None,
 100 |                      margin_weight=0.0,
 101 |                      margin_scale=0.0,
 102 |                      alpha=0.0,
 103 |                      margin_delta=0):
 104 |         # train mode
 105 |         self.train()
 106 |         # get prediction and margin loss
 107 |         if margin_weight > 0.0 and (self.margin_list is not None or self.margin_type == 'single'):
 108 |             if aux_data is not None:
 109 |                 batch_size = input_data.size()[0]
 110 |                 full_input_data = torch.cat([input_data, aux_data], 0)
 111 |                 predict_data = self(full_input_data)
 112 |                 m_loss = margin_weight * margin_loss(predict_data, self.margin_list * margin_scale)
 113 |                 predict_data = predict_data[:batch_size].contiguous()
 114 | 
 115 |             else:
 116 |                 predict_data = self(input_data)
 117 |                 if self.margin_type == 'single':
 118 |                    m_loss = margin_weight * margin_loss(predict_data, margin_delta)
 119 | 
 120 |                 else:
 121 |                     m_loss = margin_weight * margin_loss(predict_data, self.margin_list * margin_scale)
 122 |         else:
 123 |             predict_data = self(input_data)
 124 |             m_loss = 0
 125 | 
 126 |         # back-prop through non-crossing
 127 |         if self.use_grad:
 128 |             if self.trans_type == 'pava':
 129 |                 predict_data = pava_forward(predict_data)
 130 | 
 131 |             elif self.trans_type == 'mono':
 132 |                 predict_data = fix_crossing(predict_data)
 133 | 
 134 |             elif self.trans_type == 'sort':
 135 |                 predict_data = sort_forward(predict_data, self.regularization_strength)
 136 | 
 137 |         # pinball loss
 138 |         h_loss = huber_loss(predict_data, target_data, self.quantiles, alpha)
 139 |         return h_loss + m_loss
 140 | 
 141 |     def eval_loss(self, input_data, target_data):
 142 |         # evaluation mode
 143 |         self.eval()
 144 | 
 145 |         with torch.no_grad():
 146 |             # get aggregated prediction
 147 |             predict_data = self(input_data)
 148 | 
 149 |             # monotonize
 150 |             if self.trans_type == 'pava':
 151 |                 predict_data = pava_forward(predict_data)
 152 | 
 153 |             elif self.trans_type == 'mono':
 154 |                 predict_data = fix_crossing(predict_data)
 155 | 
 156 |             elif self.trans_type == 'sort':
 157 |                 predict_data = sort_forward(predict_data, self.regularization_strength)
 158 | 
 159 |             # compute pinball loss
 160 |             return pinball_loss(predict_data, target_data, self.quantiles).item()
 161 | 
 162 |     def predict(self, input_data):
 163 |         # evaluation mode
 164 |         self.eval()
 165 | 
 166 |         with torch.no_grad():
 167 |             # get aggregated prediction
 168 |             predict_data = self(input_data)
 169 | 
 170 |             # monotonize
 171 |             if self.trans_type == 'pava':
 172 |                 predict_data = pava_forward(predict_data)
 173 | 
 174 |             elif self.trans_type == 'mono':
 175 |                 predict_data = fix_crossing(predict_data)
 176 | 
 177 |             elif self.trans_type == 'sort':
 178 |                 predict_data = sort_forward(predict_data, self.regularization_strength)
 179 | 
 180 |             return predict_data.data.cpu().numpy()
 181 | 
 182 | 
 183 | class QuantileLocalAggregator(nn.Module):
 184 |     def __init__(self,
 185 |                  num_models,  # number of base models
 186 |                  quantile_list,  # list of quantile levels
 187 |                  input_size,  # input feature data size
 188 |                  hidden_size=64,  # hidden size
 189 |                  num_layers=3,  # number of layers
 190 |                  dropout=0.0,  # drop out ratio
 191 |                  activation='elu',  # activation
 192 |                  normalize=True,  # normalize weights
 193 |                  margin_list=None,  # using margin
 194 |                  trans_type=None,  # apply non-crossing
 195 |                  use_grad=True,  # using non-crossing training
 196 |                  share=False,  # share between base models
 197 |                  cross=False,  # cross between quantile levels
 198 |                  margin_type='',
 199 |                  regularization_strength=1,
 200 |                  ):
 201 | 
 202 |         super(QuantileLocalAggregator, self).__init__()
 203 |         # model size
 204 |         self.num_models = num_models
 205 | 
 206 |         # quantile list to handle
 207 |         self.num_quantiles = len(quantile_list)
 208 |         self.register_buffer('quantiles', torch.FloatTensor(quantile_list))
 209 | 
 210 |         # normalize weights
 211 |         self.normalize = normalize
 212 | 
 213 |         # post-process (monotnoizer)
 214 |         self.trans_type = trans_type
 215 |         self.use_grad = use_grad
 216 |         self.margin_list = margin_list
 217 |         self.margin_type = margin_type
 218 |         self.regularization_strength = regularization_strength
 219 | 
 220 |         # set output size
 221 |         self.share = share
 222 |         self.cross = cross
 223 |         self.model_type = None
 224 |         if self.share:
 225 |             num_outputs = self.num_models
 226 |             self.model_type = 'Coarse'
 227 |         else:
 228 |             if self.cross:
 229 |                 num_outputs = self.num_models * self.num_quantiles * self.num_quantiles
 230 |                 self.model_type = 'Fine'
 231 |             else:
 232 |                 num_outputs = self.num_models * self.num_quantiles
 233 |                 self.model_type = 'Medium'
 234 | 
 235 |         # activation
 236 |         act_fn = nn.ELU()
 237 |         if activation == 'elu':
 238 |             act_fn = nn.ELU()
 239 |         elif activation == 'relu':
 240 |             act_fn = nn.ReLU()
 241 |         elif activation == 'tanh':
 242 |             act_fn = nn.Tanh()
 243 | 
 244 |         # network with predicting quantiles
 245 |         layers = [nn.Linear(input_size, hidden_size), act_fn]
 246 |         for _ in range(num_layers - 1):
 247 |             layers.append(nn.Dropout(dropout))
 248 |             layers.append(nn.Linear(hidden_size, hidden_size))
 249 |             layers.append(act_fn)
 250 | 
 251 |         layers.append(nn.Linear(hidden_size, num_outputs))
 252 |         self.network = nn.Sequential(*layers)
 253 |         '''
 254 |         if num_layers == 2:
 255 |             self.network = nn.Sequential(
 256 |                                 nn.Linear(input_size, hidden_size),
 257 |                                 act_fn,
 258 |                                 nn.BatchNorm1d(hidden_size, affine=False),
 259 |                                 nn.Linear(hidden_size, hidden_size),
 260 |                                 act_fn,
 261 |                                 nn.BatchNorm1d(hidden_size, affine=False),
 262 |                                 nn.Linear(hidden_size, num_outputs),
 263 |                                 )
 264 | 
 265 |         elif num_layers == 3:
 266 |             self.network = nn.Sequential(
 267 |                                 nn.Linear(input_size, hidden_size),
 268 |                                 act_fn,
 269 |                                 nn.BatchNorm1d(hidden_size, affine=False),
 270 |                                 nn.Linear(hidden_size, hidden_size),
 271 |                                 act_fn,
 272 |                                 nn.BatchNorm1d(hidden_size, affine=False),
 273 |                                 nn.Linear(hidden_size, hidden_size),
 274 |                                 act_fn,
 275 |                                 nn.BatchNorm1d(hidden_size, affine=False),
 276 |                                 nn.Linear(hidden_size, num_outputs),
 277 |                                 )
 278 |         else:
 279 |             self.network = nn.Sequential(
 280 |                             nn.Linear(input_size, hidden_size),
 281 |                             act_fn,
 282 |                             nn.BatchNorm1d(hidden_size, affine=False),
 283 |                             nn.Linear(hidden_size, hidden_size),
 284 |                             act_fn,
 285 |                             nn.BatchNorm1d(hidden_size, affine=False),
 286 |                             nn.Linear(hidden_size, hidden_size),
 287 |                             act_fn,
 288 |                             nn.BatchNorm1d(hidden_size, affine=False),
 289 |                             nn.Linear(hidden_size, hidden_size),
 290 |                             act_fn,
 291 |                             nn.BatchNorm1d(hidden_size, affine=False),
 292 |                             nn.Linear(hidden_size, num_outputs),
 293 |                             )
 294 |         '''
 295 |         #print(self.network)
 296 |     # aggregate estimates
 297 |     def forward(self, cond_data, input_data):
 298 |         # combination weight
 299 |         convex_weights = self.network(cond_data)
 300 | 
 301 |         # reshape weights
 302 |         if self.share:
 303 |             convex_weights = convex_weights.reshape(-1, self.num_models, 1)
 304 |             input_data = input_data.reshape(-1, self.num_models, self.num_quantiles)
 305 |         else:
 306 |             if self.cross:
 307 |                 convex_weights = convex_weights.reshape(-1, self.num_models * self.num_quantiles, self.num_quantiles)
 308 |                 input_data = input_data.reshape(-1, self.num_models * self.num_quantiles, 1)
 309 |             else:
 310 |                 convex_weights = convex_weights.reshape(-1, self.num_models, self.num_quantiles)
 311 |                 input_data = input_data.reshape(-1, self.num_models, self.num_quantiles)
 312 | 
 313 |         # normalize (sum to 1)
 314 |         if self.normalize:
 315 |             convex_weights = convex_weights.softmax(1)
 316 | 
 317 |         # aggregate
 318 |         output_data = torch.sum(input_data * convex_weights, 1)
 319 |         return output_data
 320 | 
 321 |     def compute_loss(self,
 322 |                      cond_data,
 323 |                      input_data,
 324 |                      target_data,
 325 |                      aux_cond_data=None,
 326 |                      aux_input_data=None,
 327 |                      margin_weight=0.0,
 328 |                      margin_scale=0.0,
 329 |                      alpha=0.0,
 330 |                      margin_delta=0.0):
 331 | 
 332 |         # train mode
 333 |         self.train()
 334 | 
 335 |         if margin_weight > 0.0 and (self.margin_list is not None or self.margin_type == 'single'):
 336 |             if aux_cond_data is not None and aux_input_data is not None:
 337 |                 batch_size = input_data.size()[0]
 338 |                 predict_data = self(torch.cat([cond_data, aux_cond_data], 0),
 339 |                                     torch.cat([input_data, aux_input_data], 0))
 340 |                 m_loss = margin_weight * margin_loss(predict_data, self.margin_list * margin_scale)
 341 |                 predict_data = predict_data[:batch_size].contiguous()
 342 | 
 343 |             else:
 344 |                 predict_data = self(cond_data, input_data)
 345 |                 if self.margin_type == 'single':
 346 |                     m_loss = margin_weight * margin_loss(predict_data, margin_delta)
 347 | 
 348 |                 else:
 349 |                     m_loss = margin_weight * margin_loss(predict_data, self.margin_list * margin_scale)
 350 | 
 351 |         else:
 352 |             predict_data = self(cond_data, input_data)
 353 |             m_loss = 0
 354 | 
 355 |         # back-prop through non-crossing
 356 |         if self.use_grad:
 357 |             if self.trans_type == 'pava':
 358 |                 predict_data = pava_forward(predict_data)
 359 | 
 360 |             elif self.trans_type == 'mono':
 361 |                 predict_data = fix_crossing(predict_data)
 362 | 
 363 |             elif self.trans_type == 'sort':
 364 |                 predict_data = sort_forward(predict_data, self.regularization_strength)
 365 | 
 366 |         # pinball loss
 367 |         h_loss = huber_loss(predict_data, target_data, self.quantiles, alpha)
 368 |         return h_loss + m_loss
 369 | 
 370 |     def eval_loss(self, cond_data, input_data, target_data):
 371 |         # evaluation mode
 372 |         self.eval()
 373 | 
 374 |         with torch.no_grad():
 375 |             # get aggregated prediction
 376 |             predict_data = self(cond_data, input_data)
 377 | 
 378 |             # monotonize
 379 |             if self.trans_type == 'pava':
 380 |                 predict_data = pava_forward(predict_data)
 381 | 
 382 |             elif self.trans_type == 'mono':
 383 |                 predict_data = fix_crossing(predict_data)
 384 | 
 385 |             elif self.trans_type == 'sort':
 386 |                 predict_data = sort_forward(predict_data, self.regularization_strength)
 387 | 
 388 |             # compute pinball loss
 389 |             return pinball_loss(predict_data, target_data, self.quantiles).item()
 390 | 
 391 |     def predict(self, cond_data, input_data):
 392 |         # evaluation mode
 393 |         self.eval()
 394 | 
 395 |         with torch.no_grad():
 396 |             # get aggregated prediction
 397 |             predict_data = self(cond_data, input_data)
 398 | 
 399 |             # monotonize
 400 |             if self.trans_type == 'pava':
 401 |                 predict_data = pava_forward(predict_data)
 402 | 
 403 |             elif self.trans_type == 'mono':
 404 |                 predict_data = fix_crossing(predict_data)
 405 | 
 406 |             elif self.trans_type == 'sort':
 407 |                 predict_data = sort_forward(predict_data, self.regularization_strength)
 408 | 
 409 |             return predict_data.data.cpu().numpy()
 410 | 
 411 | class QuantileGlobalAggregatorTrainer:
 412 |     def __init__(self,
 413 |                  num_searches,  # number of searching
 414 |                  cv_split,  # cross-validation splitting
 415 |                  quantile_list,  # list of quantile levels
 416 |                  batch_size=64,  # mini batch size
 417 |                  normalize=True,  # normalize weights
 418 |                  margin_list=None,  # using margin
 419 |                  trans_type=None,  # apply non-crossing
 420 |                  use_grad=True,  # using non-crossing training
 421 |                  share_weight=False,  # share weight over models
 422 |                  cross_weight=False,  # cross quantiles
 423 |                  rand_seed=111,  # random seed
 424 |                  device=-1,  # device id,
 425 |                  margin_type='',
 426 |                  regularization_strength=1,
 427 |                  ):
 428 |         # training setting
 429 |         self.num_searches = num_searches
 430 |         self.cv_split = cv_split
 431 | 
 432 |         # model setting
 433 |         self.quantile_list = quantile_list
 434 |         self.normalize = normalize
 435 |         self.margin_list = margin_list
 436 |         self.trans_type = trans_type
 437 |         self.use_grad = use_grad
 438 |         self.share_weight = share_weight
 439 |         self.cross_weight = cross_weight
 440 |         self.rand_seed = rand_seed
 441 |         self.num_models = None
 442 |         self.num_quantiles = len(quantile_list)
 443 |         self.batch_size = batch_size
 444 |         self.margin_type = margin_type
 445 |         self.regularization_strength = regularization_strength
 446 | 
 447 |         # best model after training
 448 |         self.best_model = None
 449 |         self.best_params = None
 450 | 
 451 |         # set device
 452 |         if torch.cuda.is_available() and device > -1:
 453 |             self.device = torch.device("cuda:{}".format(device))
 454 |         else:
 455 |             self.device = torch.device("cpu")
 456 | 
 457 |     # fit model by model selection
 458 |     def fit(self,
 459 |             x_train,
 460 |             y_train,
 461 |             x_val,
 462 |             y_val,
 463 |             ax_train=None):
 464 |         # convert data
 465 |         x_train = torch.FloatTensor(x_train).to(self.device)
 466 |         y_train = torch.FloatTensor(y_train.reshape(-1, 1)).to(self.device)
 467 | 
 468 |         x_val = torch.FloatTensor(x_val).to(self.device)
 469 |         y_val = torch.FloatTensor(y_val.reshape(-1, 1)).to(self.device)
 470 | 
 471 |         if ax_train is not None:
 472 |             ax_train = torch.FloatTensor(ax_train).to(self.device)
 473 | 
 474 |         # get number of base models
 475 |         self.num_models = x_train.size()[1]
 476 |         assert self.num_quantiles == x_train.size()[2]
 477 | 
 478 |         # build params list
 479 |         if self.margin_type == 'single':
 480 |             full_param_grid = {**QUANTILE_LOSS_PARAM_GRID_FIX}
 481 | 
 482 |         else:
 483 |             full_param_grid = {**QUANTILE_LOSS_PARAM_GRID}
 484 |             if self.margin_list is None:
 485 |                 del full_param_grid['margin_weight']
 486 |                 del full_param_grid['margin_scale']
 487 | 
 488 | 
 489 |         params_list = list(ParameterSampler(param_distributions=full_param_grid,
 490 |                                             n_iter=self.num_searches,
 491 |                                             random_state=self.rand_seed))
 492 | 
 493 |         # set data loader
 494 |         train_loader = DataLoader(dataset=TensorDataset(x_train,
 495 |                                                         y_train),
 496 |                                   shuffle=True,
 497 |                                   batch_size=self.batch_size,
 498 |                                   drop_last=False,
 499 |                                   worker_init_fn=np.random.seed(self.rand_seed))
 500 | 
 501 |         valid_loader = DataLoader(dataset=TensorDataset(x_val,
 502 |                                                         y_val),
 503 |                                   shuffle=False,
 504 |                                   batch_size=1024,
 505 |                                   drop_last=False)
 506 | 
 507 |         if ax_train is not None:
 508 |             aux_loader = DataLoader(dataset=TensorDataset(torch.cat([ax_train, x_train], 0)),
 509 |                                     shuffle=True,
 510 |                                     batch_size=self.batch_size,
 511 |                                     drop_last=False,
 512 |                                     worker_init_fn=np.random.seed(self.rand_seed))
 513 | 
 514 |         else:
 515 |             aux_loader = None
 516 | 
 517 |         # starting model selection
 518 |         best_eval_loss = np.inf
 519 |         best_eval_step = 0
 520 | 
 521 |         # for each param
 522 |         for p, params in enumerate(params_list):
 523 |             # fit model with given data and params
 524 |             print('iter:',p, ' ', params)
 525 |             eval_loss, eval_step = self.fit_model(train_loader=train_loader,
 526 |                                                   valid_loader=valid_loader,
 527 |                                                   aux_loader=aux_loader,
 528 |                                                   **params)
 529 | 
 530 |             # if best in terms of validation
 531 |             if eval_loss < best_eval_loss:
 532 |                 best_eval_loss = eval_loss
 533 |                 best_eval_step = eval_step
 534 |                 self.best_params = params
 535 | 
 536 |             print('eval_loss : %.4f, best_eval_sofar: %.4f, eval_step: %d' %(eval_loss, best_eval_loss, eval_step))
 537 |             print()
 538 |         self.best_params['num_steps'] = best_eval_step
 539 | 
 540 |         # retrain model with full data
 541 |         train_loader = DataLoader(dataset=TensorDataset(x_train, y_train),
 542 |                                   shuffle=True,
 543 |                                   batch_size=self.batch_size,
 544 |                                   drop_last=False,
 545 |                                   worker_init_fn=np.random.seed(self.rand_seed))
 546 | 
 547 |         if ax_train is not None:
 548 |             aux_loader = DataLoader(dataset=TensorDataset(ax_train),
 549 |                                     shuffle=True,
 550 |                                     batch_size=self.batch_size,
 551 |                                     drop_last=False,
 552 |                                     worker_init_fn=np.random.seed(self.rand_seed))
 553 | 
 554 |         else:
 555 |             aux_loader = None
 556 | 
 557 |         # retrain model and set as best model
 558 |         print('best_params:', self.best_params)
 559 |         print('best_eval_loss: %.4f' % best_eval_loss)
 560 |         self.best_model = self.fit_model(train_loader, None, aux_loader, **self.best_params)
 561 | 
 562 |     # fit single model based on given params
 563 |     def fit_model(self,
 564 |                   train_loader,
 565 |                   valid_loader=None,
 566 |                   aux_loader=None,
 567 |                   num_steps=None,
 568 |                   lr=1e-3,
 569 |                   wd=1e-7,
 570 |                   margin_weight=0.0,
 571 |                   margin_scale=0.0,
 572 |                   alpha=0.0,
 573 |                   margin_delta=0.0):
 574 |         # init model
 575 |         model = QuantileGlobalAggregator(num_models=self.num_models,
 576 |                                          quantile_list=self.quantile_list,
 577 |                                          normalize=self.normalize,
 578 |                                          margin_list=self.margin_list,
 579 |                                          trans_type=self.trans_type,
 580 |                                          use_grad=self.use_grad,
 581 |                                          share=self.share_weight,
 582 |                                          cross=self.cross_weight,
 583 |                                          margin_type=self.margin_type,
 584 |                                          regularization_strength=self.regularization_strength)
 585 |         model = model.to(self.device)
 586 | 
 587 |         # init optimizer
 588 |         optimizer = torch.optim.Adam(params=model.parameters(),
 589 |                                      lr=lr, weight_decay=wd, amsgrad=True)
 590 | 
 591 |         # init results
 592 |         steps = 0
 593 |         best_valid_loss = np.inf
 594 |         best_step = 0
 595 | 
 596 |         # init aux_loader
 597 |         if aux_loader is None:
 598 |             aux_loader_iterator = None
 599 |         else:
 600 |             aux_loader_iterator = iter(aux_loader)
 601 | 
 602 |         # for each epoch
 603 |         while True:
 604 |             # for each batch (update)
 605 |             for x_batch, y_batch in train_loader:
 606 |                 # aux data
 607 |                 if aux_loader_iterator is None:
 608 |                     aux_batch = None
 609 |                 else:
 610 |                     try:
 611 |                         aux_batch = next(aux_loader_iterator)[0]
 612 |                     except StopIteration:
 613 |                         aux_loader_iterator = iter(aux_loader)
 614 |                         aux_batch = next(aux_loader_iterator)[0]
 615 | 
 616 |                 # compute loss
 617 |                 batch_loss = model.compute_loss(input_data=x_batch,
 618 |                                                 target_data=y_batch,
 619 |                                                 aux_data=aux_batch,
 620 |                                                 margin_weight=margin_weight,
 621 |                                                 margin_scale=margin_scale,
 622 |                                                 alpha=alpha,
 623 |                                                 margin_delta=margin_delta)
 624 | 
 625 |                 # backprop and update
 626 |                 optimizer.zero_grad()
 627 |                 batch_loss.backward()
 628 |                 optimizer.step()
 629 | 
 630 |                 # step up
 631 |                 steps += 1
 632 | 
 633 |                 # evaluation over validation set
 634 |                 if steps % 100 == 0 and valid_loader is not None:
 635 |                     valid_loss = 0.0
 636 |                     valid_size = 0.0
 637 | 
 638 |                     # compute validation loss
 639 |                     for x_batch, y_batch in valid_loader:
 640 |                         batch_size = x_batch.size()[0]
 641 |                         batch_loss = model.eval_loss(input_data=x_batch, target_data=y_batch)
 642 |                         valid_loss += batch_loss * batch_size
 643 |                         valid_size += batch_size
 644 |                     valid_loss /= valid_size
 645 | 
 646 |                     # update best validation loss
 647 |                     if best_valid_loss > valid_loss:
 648 |                         best_valid_loss = valid_loss
 649 |                         best_step = steps
 650 |                     # if no improvement seen
 651 |                     elif steps - best_step >= STOP_STEPS:
 652 |                         return best_valid_loss, best_step
 653 |                 # if number of steps is reached
 654 |                 elif num_steps is not None and steps >= num_steps:
 655 |                     assert valid_loader is None
 656 |                     return copy.deepcopy(model)
 657 | 
 658 |     # prediction
 659 |     def predict(self, x_data):
 660 |         x_data = torch.FloatTensor(x_data).to(self.device)
 661 |         y_pred = self.best_model.predict(x_data)
 662 |         return y_pred
 663 | 
 664 |     def refit_model(self,
 665 |                     x_train, y_train,
 666 |                     ax_train=None):
 667 |         self.best_model = None
 668 | 
 669 |         # convert data
 670 |         x_train = torch.FloatTensor(x_train).to(self.device)
 671 |         y_train = torch.FloatTensor(y_train.reshape(-1, 1)).to(self.device)
 672 |         if ax_train is not None:
 673 |             ax_train = torch.FloatTensor(ax_train).to(self.device)
 674 | 
 675 |         # retrain model with full data
 676 |         train_loader = DataLoader(dataset=TensorDataset(x_train, y_train),
 677 |                                   shuffle=True,
 678 |                                   batch_size=self.batch_size,
 679 |                                   drop_last=False,
 680 |                                   worker_init_fn=np.random.seed(self.rand_seed))
 681 | 
 682 |         if ax_train is not None:
 683 |             aux_loader = DataLoader(dataset=TensorDataset(ax_train),
 684 |                                     shuffle=True,
 685 |                                     batch_size=self.batch_size,
 686 |                                     drop_last=False,
 687 |                                     worker_init_fn=np.random.seed(self.rand_seed))
 688 | 
 689 |         else:
 690 |             aux_loader = None
 691 | 
 692 |         print('best_params:', self.best_params)
 693 |         # retrain model and set as best model
 694 |         self.best_model = self.fit_model(train_loader, None, aux_loader, **self.best_params)
 695 | 
 696 | 
 697 | class QuantileLocalAggregatorTrainer:
 698 |     def __init__(self,
 699 |                  num_searches,  # number of searching
 700 |                  cv_split,  # cross-validation splitting
 701 |                  quantile_list,  # list of quantile levels
 702 |                  batch_size=64,  # mini batch size
 703 |                  normalize=True,  # normalize weights
 704 |                  margin_list=None,  # using margin
 705 |                  trans_type=None,  # apply non-crossing
 706 |                  use_grad=True,  # using non-crossing training
 707 |                  share_weight=False,  # share weight over models
 708 |                  cross_weight=False,  # cross quantiles
 709 |                  rand_seed=111,  # random seed
 710 |                  device=-1,  # device id,
 711 |                  margin_type='',
 712 |                  regularization_strength=1.0,
 713 |                  ):
 714 |         # training setting
 715 |         self.num_searches = num_searches
 716 |         self.cv_split = cv_split
 717 | 
 718 |         # model setting
 719 |         self.quantile_list = quantile_list
 720 |         self.normalize = normalize
 721 |         self.margin_list = margin_list
 722 |         self.trans_type = trans_type
 723 |         self.use_grad = use_grad
 724 |         self.share_weight = share_weight
 725 |         self.cross_weight = cross_weight
 726 |         self.rand_seed = rand_seed
 727 |         self.input_size = None
 728 |         self.num_models = None
 729 |         self.num_quantiles = len(quantile_list)
 730 |         self.batch_size = batch_size
 731 |         self.margin_type = margin_type
 732 |         self.regularization_strength = regularization_strength
 733 | 
 734 |         # best model after training
 735 |         self.best_model = None
 736 |         self.best_params = None
 737 | 
 738 |         # set device
 739 |         if torch.cuda.is_available() and device > -1:
 740 |             self.device = torch.device("cuda:{}".format(device))
 741 |         else:
 742 |             self.device = torch.device("cpu")
 743 | 
 744 |     # fit model by model selection
 745 |     def fit(self,
 746 |             c_train,
 747 |             x_train,
 748 |             y_train,
 749 |             c_val,
 750 |             x_val,
 751 |             y_val,
 752 |             ac_train=None,
 753 |             ax_train=None):
 754 | 
 755 |         # convert data
 756 |         c_train = torch.FloatTensor(c_train).to(self.device)
 757 |         x_train = torch.FloatTensor(x_train).to(self.device)
 758 |         y_train = torch.FloatTensor(y_train.reshape(-1, 1)).to(self.device)
 759 | 
 760 |         c_val = torch.FloatTensor(c_val).to(self.device)
 761 |         x_val = torch.FloatTensor(x_val).to(self.device)
 762 |         y_val = torch.FloatTensor(y_val.reshape(-1, 1)).to(self.device)
 763 | 
 764 |         if ac_train is not None and ax_train is not None:
 765 |             ac_train = torch.FloatTensor(ac_train).to(self.device)
 766 |             ax_train = torch.FloatTensor(ax_train).to(self.device)
 767 | 
 768 |         # get input size and number of base models
 769 |         self.input_size = c_train.size()[1]
 770 |         self.num_models = x_train.size()[1]
 771 |         assert self.num_quantiles == x_train.size()[2]
 772 | 
 773 |         # build params list
 774 |         if self.margin_type == 'single':
 775 |             full_param_grid = {**NETWORK_PARAM_GRID,
 776 |                                **QUANTILE_LOSS_PARAM_GRID_FIX}
 777 | 
 778 |         else:
 779 |             full_param_grid = {**NETWORK_PARAM_GRID,
 780 |                                **QUANTILE_LOSS_PARAM_GRID}
 781 | 
 782 |             if self.margin_list is None:
 783 |                 del full_param_grid['margin_weight']
 784 |                 del full_param_grid['margin_scale']
 785 | 
 786 |         params_list = list(ParameterSampler(param_distributions=full_param_grid,
 787 |                                             n_iter=self.num_searches,
 788 |                                             random_state=self.rand_seed))
 789 | 
 790 |         # set data loader
 791 |         train_loader = DataLoader(dataset=TensorDataset(c_train,
 792 |                                                         x_train,
 793 |                                                         y_train),
 794 |                                   shuffle=True,
 795 |                                   batch_size=self.batch_size,
 796 |                                   drop_last=False,
 797 |                                   worker_init_fn=np.random.seed(self.rand_seed))
 798 | 
 799 |         valid_loader = DataLoader(dataset=TensorDataset(c_val,
 800 |                                                         x_val,
 801 |                                                         y_val),
 802 |                                   shuffle=False,
 803 |                                   batch_size=1024,
 804 |                                   drop_last=False)
 805 | 
 806 |         if ac_train is not None and ax_train is not None:
 807 |             aux_loader = DataLoader(dataset=TensorDataset(torch.cat([ac_train, c_val], 0),
 808 |                                                           torch.cat([ax_train, x_val], 0)),
 809 |                                     shuffle=True,
 810 |                                     batch_size=self.batch_size,
 811 |                                     drop_last=False,
 812 |                                     worker_init_fn=np.random.seed(self.rand_seed))
 813 | 
 814 |         else:
 815 |             aux_loader = None
 816 | 
 817 | 
 818 |         # starting model selection
 819 |         best_eval_loss = np.inf
 820 |         best_eval_step = 0
 821 | 
 822 |         # for each param
 823 |         for p, params in enumerate(params_list):
 824 |             # fit model with given data and params
 825 |             print('iter:',p, ' ', params)
 826 |             eval_loss, eval_step = self.fit_model(train_loader=train_loader,
 827 |                                                   valid_loader=valid_loader,
 828 |                                                   aux_loader=aux_loader,
 829 |                                                   **params)
 830 | 
 831 |             # if best in terms of validation
 832 |             if eval_loss < best_eval_loss:
 833 |                 best_eval_loss = eval_loss
 834 |                 best_eval_step = eval_step
 835 |                 self.best_params = params
 836 | 
 837 |             print('eval_loss : %.4f, best_eval_sofar: %.4f, eval_step: %d' %(eval_loss, best_eval_loss, eval_step))
 838 |             print()
 839 |         self.best_params['num_steps'] = best_eval_step
 840 | 
 841 |         # retrain model with full data
 842 |         #train_loader = DataLoader(dataset=TensorDataset(c_train, x_train, y_train),
 843 |         #                          shuffle=True, batch_size=self.batch_size, drop_last=True,
 844 |         #                          worker_init_fn=np.random.seed(self.rand_seed))
 845 |         train_loader = DataLoader(dataset=TensorDataset(c_train,
 846 |                                                         x_train,
 847 |                                                         y_train),
 848 |                                   shuffle=True,
 849 |                                   batch_size=self.batch_size,
 850 |                                   drop_last=False,
 851 |                                   worker_init_fn=np.random.seed(self.rand_seed))
 852 | 
 853 | 
 854 |         if ac_train is not None and ax_train is not None:
 855 |             aux_loader = DataLoader(dataset=TensorDataset(ac_train, ax_train),
 856 |                                     shuffle=True,
 857 |                                     batch_size=self.batch_size,
 858 |                                     drop_last=False,
 859 |                                     worker_init_fn=np.random.seed(self.rand_seed))
 860 | 
 861 |         else:
 862 |             aux_loader = None
 863 | 
 864 |         print('best_params:', self.best_params)
 865 |         print('best_eval_loss: %.4f' % best_eval_loss)
 866 |         # retrain model and set as best model
 867 |         self.best_model = self.fit_model(train_loader, None, aux_loader, **self.best_params)
 868 | 
 869 |     # fit single model based on given params
 870 |     def fit_model(self,
 871 |                   train_loader,
 872 |                   valid_loader=None,
 873 |                   aux_loader=None,
 874 |                   num_steps=None,
 875 |                   hidden_size=64,
 876 |                   num_layers=3,
 877 |                   dropout=0.1,
 878 |                   activation='elu',
 879 |                   lr=1e-3,
 880 |                   wd=1e-7,
 881 |                   margin_weight=0.0,
 882 |                   margin_scale=0.0,
 883 |                   alpha=0.0,
 884 |                   margin_delta=0.0):
 885 | 
 886 |         # init model
 887 |         model = QuantileLocalAggregator(num_models=self.num_models,
 888 |                                         quantile_list=self.quantile_list,
 889 |                                         input_size=self.input_size,
 890 |                                         hidden_size=hidden_size,
 891 |                                         num_layers=num_layers,
 892 |                                         dropout=dropout,
 893 |                                         activation=activation,
 894 |                                         normalize=self.normalize,
 895 |                                         margin_list=self.margin_list,
 896 |                                         trans_type=self.trans_type,
 897 |                                         use_grad=self.use_grad,
 898 |                                         share=self.share_weight,
 899 |                                         cross=self.cross_weight,
 900 |                                         margin_type=self.margin_type,
 901 |                                         regularization_strength=self.regularization_strength)
 902 | 
 903 |         model = model.to(self.device)
 904 | 
 905 |         # init optimizer
 906 |         optimizer = torch.optim.Adam(params=model.parameters(),
 907 |                                      lr=lr, weight_decay=wd, amsgrad=True)
 908 | 
 909 |         # init results
 910 |         steps = 0
 911 |         best_valid_loss = np.inf
 912 |         best_step = 0
 913 | 
 914 |         # init aux_loader
 915 |         if aux_loader is None:
 916 |             aux_loader_iterator = None
 917 |         else:
 918 |             aux_loader_iterator = iter(aux_loader)
 919 | 
 920 |         # for each epoch
 921 |         while True:
 922 |             # for each batch (update)
 923 |             for c_batch, x_batch, y_batch in train_loader:
 924 |                 # aux data
 925 |                 if aux_loader_iterator is None:
 926 |                     ac_batch, ax_batch = None, None
 927 |                 else:
 928 |                     try:
 929 |                         ac_batch, ax_batch = next(aux_loader_iterator)
 930 |                     except StopIteration:
 931 |                         aux_loader_iterator = iter(aux_loader)
 932 |                         ac_batch, ax_batch = next(aux_loader_iterator)
 933 | 
 934 |                 # compute loss
 935 |                 batch_loss = model.compute_loss(cond_data=c_batch,
 936 |                                                 input_data=x_batch,
 937 |                                                 target_data=y_batch,
 938 |                                                 aux_cond_data=ac_batch,
 939 |                                                 aux_input_data=ax_batch,
 940 |                                                 margin_weight=margin_weight,
 941 |                                                 margin_scale=margin_scale,
 942 |                                                 alpha=alpha,
 943 |                                                 margin_delta=margin_delta)
 944 | 
 945 |                 # backprop and update
 946 |                 optimizer.zero_grad()
 947 |                 batch_loss.backward()
 948 |                 optimizer.step()
 949 | 
 950 |                 # step up
 951 |                 steps += 1
 952 | 
 953 |                 # evaluation over validation set
 954 |                 if steps % 100 == 0 and valid_loader is not None:
 955 |                     valid_loss = 0.0
 956 |                     valid_size = 0.0
 957 | 
 958 |                     # compute validation loss
 959 |                     for c_batch, x_batch, y_batch in valid_loader:
 960 |                         batch_size = x_batch.size()[0]
 961 |                         batch_loss = model.eval_loss(cond_data=c_batch, input_data=x_batch, target_data=y_batch)
 962 |                         valid_loss += batch_loss * batch_size
 963 |                         valid_size += batch_size
 964 |                     valid_loss /= valid_size
 965 | 
 966 |                     # update best validation loss
 967 |                     if best_valid_loss > valid_loss:
 968 |                         best_valid_loss = valid_loss
 969 |                         best_step = steps
 970 |                     # if no improvement seen
 971 |                     elif steps - best_step >= STOP_STEPS:
 972 |                         return best_valid_loss, best_step
 973 |                 # if number of steps is reached
 974 |                 elif num_steps is not None and steps >= num_steps:
 975 |                     assert valid_loader is None
 976 |                     return copy.deepcopy(model)
 977 | 
 978 |     # prediction
 979 |     def predict(self, c_data, x_data):
 980 |         c_data = torch.FloatTensor(c_data).to(self.device)
 981 |         x_data = torch.FloatTensor(x_data).to(self.device)
 982 |         y_pred = self.best_model.predict(c_data, x_data)
 983 |         return y_pred
 984 | 
 985 |     def refit_model(self,
 986 |                     c_train,
 987 |                     x_train,
 988 |                     y_train,
 989 |                     ac_train=None,
 990 |                     ax_train=None):
 991 | 
 992 |         self.best_model = None
 993 | 
 994 |         # convert data
 995 |         c_train = torch.FloatTensor(c_train).to(self.device)
 996 |         x_train = torch.FloatTensor(x_train).to(self.device)
 997 |         y_train = torch.FloatTensor(y_train.reshape(-1, 1)).to(self.device)
 998 | 
 999 |         if ac_train is not None and ax_train is not None:
1000 |             ac_train = torch.FloatTensor(ac_train).to(self.device)
1001 |             ax_train = torch.FloatTensor(ax_train).to(self.device)
1002 | 
1003 |         # retrain model with full data
1004 |         train_loader = DataLoader(dataset=TensorDataset(c_train, x_train, y_train),
1005 |                                   shuffle=True,
1006 |                                   batch_size=self.batch_size,
1007 |                                   drop_last=False,
1008 |                                   worker_init_fn=np.random.seed(self.rand_seed))
1009 | 
1010 |         if ac_train is not None and ax_train is not None:
1011 |             aux_loader = DataLoader(dataset=TensorDataset(ac_train, ax_train),
1012 |                                     shuffle=True,
1013 |                                     batch_size=self.batch_size,
1014 |                                     drop_last=False,
1015 |                                     worker_init_fn=np.random.seed(self.rand_seed))
1016 | 
1017 |         else:
1018 |             aux_loader = None
1019 | 
1020 |         print('best_params:', self.best_params)
1021 |         # retrain model and set as best model
1022 |         self.best_model = self.fit_model(train_loader, None, aux_loader, **self.best_params)
1023 | 


--------------------------------------------------------------------------------