├── tnp.png ├── .gitignore ├── regression ├── configs │ ├── gp │ │ ├── bnp.yaml │ │ ├── cnp.yaml │ │ ├── np.yaml │ │ ├── tnpd.yaml │ │ ├── banp.yaml │ │ ├── canp.yaml │ │ ├── tnpa.yaml │ │ ├── anp.yaml │ │ └── tnpnd.yaml │ ├── celeba │ │ ├── bnp.yaml │ │ ├── cnp.yaml │ │ ├── np.yaml │ │ ├── tnpd.yaml │ │ ├── banp.yaml │ │ ├── canp.yaml │ │ ├── tnpa.yaml │ │ ├── anp.yaml │ │ └── tnpnd.yaml │ └── emnist │ │ ├── bnp.yaml │ │ ├── cnp.yaml │ │ ├── np.yaml │ │ ├── banp.yaml │ │ ├── canp.yaml │ │ ├── tnpd.yaml │ │ ├── anp.yaml │ │ ├── tnpa.yaml │ │ └── tnpnd.yaml ├── data │ ├── __pycache__ │ │ ├── gp.cpython-38.pyc │ │ ├── celeba.cpython-38.pyc │ │ ├── emnist.cpython-38.pyc │ │ └── image.cpython-38.pyc │ ├── emnist.py │ ├── celeba.py │ ├── image.py │ └── gp.py ├── models │ ├── __pycache__ │ │ ├── tnp.cpython-38.pyc │ │ ├── tnpa.cpython-38.pyc │ │ ├── tnpd.cpython-38.pyc │ │ ├── tnpnd.cpython-38.pyc │ │ ├── attention.cpython-38.pyc │ │ └── modules.cpython-38.pyc │ ├── cnp.py │ ├── attention.py │ ├── canp.py │ ├── tnpd.py │ ├── tnp.py │ ├── bnp.py │ ├── banp.py │ ├── np.py │ ├── anp.py │ ├── tnpnd.py │ └── tnpa.py ├── utils │ ├── __pycache__ │ │ ├── log.cpython-38.pyc │ │ ├── misc.cpython-38.pyc │ │ └── paths.cpython-38.pyc │ ├── paths.py │ ├── misc.py │ ├── sampling.py │ └── log.py └── README.md ├── bayesian_optimization ├── configs │ └── gp │ │ ├── bnp.yaml │ │ ├── cnp.yaml │ │ ├── np.yaml │ │ ├── tnpd.yaml │ │ ├── tnpa.yaml │ │ ├── banp.yaml │ │ ├── canp.yaml │ │ ├── anp.yaml │ │ └── tnpnd.yaml ├── data │ ├── __pycache__ │ │ ├── gp.cpython-38.pyc │ │ ├── gp.cpython-39.pyc │ │ ├── highdim_gp.cpython-38.pyc │ │ └── highdim_gp.cpython-39.pyc │ ├── highdim_gp.py │ └── gp.py ├── models │ ├── __pycache__ │ │ ├── tnp.cpython-38.pyc │ │ ├── tnp.cpython-39.pyc │ │ ├── tnpa.cpython-38.pyc │ │ ├── tnpa.cpython-39.pyc │ │ ├── tnpd.cpython-38.pyc │ │ ├── tnpd.cpython-39.pyc │ │ ├── tnpnd.cpython-38.pyc │ │ ├── tnpnd.cpython-39.pyc │ │ ├── modules.cpython-38.pyc │ │ ├── modules.cpython-39.pyc │ │ ├── attention.cpython-38.pyc │ │ └── attention.cpython-39.pyc │ ├── tnpd.py │ ├── attention.py │ ├── tnpa.py │ ├── canp.py │ ├── cnp.py │ ├── banp.py │ ├── bnp.py │ ├── tnp.py │ ├── np.py │ ├── anp.py │ └── tnpnd.py ├── utils │ ├── __pycache__ │ │ ├── log.cpython-38.pyc │ │ ├── log.cpython-39.pyc │ │ ├── misc.cpython-38.pyc │ │ ├── misc.cpython-39.pyc │ │ ├── paths.cpython-38.pyc │ │ ├── paths.cpython-39.pyc │ │ └── acquisition.cpython-38.pyc │ ├── paths.py │ ├── misc.py │ ├── sampling.py │ ├── acquisition.py │ └── log.py ├── bayeso_benchmarks │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-38.pyc │ │ ├── benchmark_base.cpython-38.pyc │ │ ├── inf_dim_ackley.cpython-38.pyc │ │ ├── inf_dim_cosines.cpython-38.pyc │ │ ├── inf_dim_rastrigin.cpython-38.pyc │ │ ├── two_dim_dropwave.cpython-38.pyc │ │ ├── two_dim_michalewicz.cpython-38.pyc │ │ ├── three_dim_hartmann3d.cpython-38.pyc │ │ └── two_dim_goldsteinprice.cpython-38.pyc │ ├── two_dim_dropwave.py │ ├── inf_dim_cosines.py │ ├── two_dim_goldsteinprice.py │ ├── one_dim_linear.py │ ├── three_dim_hartmann3d.py │ ├── inf_dim_rastrigin.py │ ├── two_dim_michalewicz.py │ ├── one_dim_step.py │ ├── inf_dim_ackley.py │ └── plot_benchmarks.py └── README.md ├── contextual_bandits ├── configs │ └── wheel │ │ ├── bnp.yaml │ │ ├── cnp.yaml │ │ ├── np.yaml │ │ ├── models_anp.yaml │ │ ├── models_bnp.yaml │ │ ├── models_cnp.yaml │ │ ├── models_np.yaml │ │ ├── banp.yaml │ │ ├── canp.yaml │ │ ├── models_banp.yaml │ │ ├── models_canp.yaml │ │ ├── models_tnpa.yaml │ │ ├── models_tnpd.yaml │ │ ├── models_tnpnd.yaml │ │ ├── tnpd.yaml │ │ ├── tnpa.yaml │ │ ├── anp.yaml │ │ └── tnpnd.yaml ├── paths.yaml ├── data │ └── __pycache__ │ │ └── wheel.cpython-38.pyc ├── models │ ├── __pycache__ │ │ ├── tnp.cpython-38.pyc │ │ ├── tnpa.cpython-38.pyc │ │ ├── tnpd.cpython-38.pyc │ │ ├── tnpnd.cpython-38.pyc │ │ ├── modules.cpython-38.pyc │ │ └── attention.cpython-38.pyc │ ├── attention.py │ ├── tnpd.py │ ├── canp.py │ ├── cnp.py │ ├── tnpa.py │ ├── banp.py │ ├── bnp.py │ ├── tnp.py │ ├── np.py │ ├── anp.py │ └── tnpnd.py ├── utils │ ├── __pycache__ │ │ ├── log.cpython-38.pyc │ │ └── misc.cpython-38.pyc │ ├── misc.py │ ├── sampling.py │ ├── metrics.py │ └── log.py ├── runner │ ├── __pycache__ │ │ ├── args.cpython-38.pyc │ │ ├── __init__.cpython-38.pyc │ │ └── cmab_runner.cpython-38.pyc │ ├── __init__.py │ └── args.py ├── main.py └── README.md ├── LICENSE.md ├── README.md └── env.yml /tnp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/tnp.png -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.vscode* 2 | *__pycache__* 3 | *results* 4 | *evalsets* 5 | *datasets* 6 | -------------------------------------------------------------------------------- /regression/configs/gp/bnp.yaml: -------------------------------------------------------------------------------- 1 | dim_x: 1 2 | dim_y: 1 3 | dim_hid: 128 4 | enc_pre_depth: 4 5 | enc_post_depth: 2 6 | dec_depth: 3 7 | -------------------------------------------------------------------------------- /regression/configs/gp/cnp.yaml: -------------------------------------------------------------------------------- 1 | dim_x: 1 2 | dim_y: 1 3 | dim_hid: 128 4 | enc_pre_depth: 4 5 | enc_post_depth: 2 6 | dec_depth: 3 7 | -------------------------------------------------------------------------------- /regression/configs/celeba/bnp.yaml: -------------------------------------------------------------------------------- 1 | dim_x: 2 2 | dim_y: 3 3 | dim_hid: 128 4 | enc_pre_depth: 6 5 | enc_post_depth: 3 6 | dec_depth: 5 7 | -------------------------------------------------------------------------------- /regression/configs/celeba/cnp.yaml: -------------------------------------------------------------------------------- 1 | dim_x: 2 2 | dim_y: 3 3 | dim_hid: 128 4 | enc_pre_depth: 6 5 | enc_post_depth: 3 6 | dec_depth: 5 7 | -------------------------------------------------------------------------------- /regression/configs/emnist/bnp.yaml: -------------------------------------------------------------------------------- 1 | dim_x: 2 2 | dim_y: 1 3 | dim_hid: 128 4 | enc_pre_depth: 5 5 | enc_post_depth: 3 6 | dec_depth: 4 7 | -------------------------------------------------------------------------------- /regression/configs/emnist/cnp.yaml: -------------------------------------------------------------------------------- 1 | dim_x: 2 2 | dim_y: 1 3 | dim_hid: 128 4 | enc_pre_depth: 5 5 | enc_post_depth: 3 6 | dec_depth: 4 7 | -------------------------------------------------------------------------------- /bayesian_optimization/configs/gp/bnp.yaml: -------------------------------------------------------------------------------- 1 | dim_x: 1 2 | dim_y: 1 3 | dim_hid: 128 4 | enc_pre_depth: 4 5 | enc_post_depth: 2 6 | dec_depth: 3 7 | -------------------------------------------------------------------------------- /bayesian_optimization/configs/gp/cnp.yaml: -------------------------------------------------------------------------------- 1 | dim_x: 1 2 | dim_y: 1 3 | dim_hid: 128 4 | enc_pre_depth: 4 5 | enc_post_depth: 2 6 | dec_depth: 3 7 | -------------------------------------------------------------------------------- /contextual_bandits/configs/wheel/bnp.yaml: -------------------------------------------------------------------------------- 1 | dim_x: 2 2 | dim_y: 5 3 | dim_hid: 128 4 | enc_pre_depth: 4 5 | enc_post_depth: 2 6 | dec_depth: 3 7 | -------------------------------------------------------------------------------- /contextual_bandits/configs/wheel/cnp.yaml: -------------------------------------------------------------------------------- 1 | dim_x: 2 2 | dim_y: 5 3 | dim_hid: 128 4 | enc_pre_depth: 4 5 | enc_post_depth: 2 6 | dec_depth: 3 7 | -------------------------------------------------------------------------------- /regression/data/__pycache__/gp.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/regression/data/__pycache__/gp.cpython-38.pyc -------------------------------------------------------------------------------- /regression/configs/celeba/np.yaml: -------------------------------------------------------------------------------- 1 | dim_x: 2 2 | dim_y: 3 3 | dim_hid: 128 4 | dim_lat: 128 5 | enc_pre_depth: 6 6 | enc_post_depth: 3 7 | dec_depth: 5 8 | -------------------------------------------------------------------------------- /regression/configs/emnist/np.yaml: -------------------------------------------------------------------------------- 1 | dim_x: 2 2 | dim_y: 1 3 | dim_hid: 128 4 | dim_lat: 128 5 | enc_pre_depth: 5 6 | enc_post_depth: 3 7 | dec_depth: 4 8 | -------------------------------------------------------------------------------- /regression/configs/gp/np.yaml: -------------------------------------------------------------------------------- 1 | dim_x: 1 2 | dim_y: 1 3 | dim_hid: 128 4 | dim_lat: 128 5 | enc_pre_depth: 4 6 | enc_post_depth: 2 7 | dec_depth: 3 8 | -------------------------------------------------------------------------------- /regression/data/__pycache__/celeba.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/regression/data/__pycache__/celeba.cpython-38.pyc -------------------------------------------------------------------------------- /regression/data/__pycache__/emnist.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/regression/data/__pycache__/emnist.cpython-38.pyc -------------------------------------------------------------------------------- /regression/data/__pycache__/image.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/regression/data/__pycache__/image.cpython-38.pyc -------------------------------------------------------------------------------- /regression/models/__pycache__/tnp.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/regression/models/__pycache__/tnp.cpython-38.pyc -------------------------------------------------------------------------------- /regression/models/__pycache__/tnpa.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/regression/models/__pycache__/tnpa.cpython-38.pyc -------------------------------------------------------------------------------- /regression/models/__pycache__/tnpd.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/regression/models/__pycache__/tnpd.cpython-38.pyc -------------------------------------------------------------------------------- /regression/utils/__pycache__/log.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/regression/utils/__pycache__/log.cpython-38.pyc -------------------------------------------------------------------------------- /regression/utils/__pycache__/misc.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/regression/utils/__pycache__/misc.cpython-38.pyc -------------------------------------------------------------------------------- /regression/utils/__pycache__/paths.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/regression/utils/__pycache__/paths.cpython-38.pyc -------------------------------------------------------------------------------- /contextual_bandits/paths.yaml: -------------------------------------------------------------------------------- 1 | # paths.yaml 2 | 3 | datasets_path: 4 | "datasets" 5 | evalsets_path: 6 | "evalsets" 7 | results_path: 8 | "results" 9 | -------------------------------------------------------------------------------- /regression/configs/gp/tnpd.yaml: -------------------------------------------------------------------------------- 1 | dim_x: 1 2 | dim_y: 1 3 | d_model: 64 4 | emb_depth: 4 5 | dim_feedforward: 128 6 | nhead: 4 7 | dropout: 0.0 8 | num_layers: 6 -------------------------------------------------------------------------------- /regression/models/__pycache__/tnpnd.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/regression/models/__pycache__/tnpnd.cpython-38.pyc -------------------------------------------------------------------------------- /bayesian_optimization/configs/gp/np.yaml: -------------------------------------------------------------------------------- 1 | dim_x: 1 2 | dim_y: 1 3 | dim_hid: 128 4 | dim_lat: 128 5 | enc_pre_depth: 4 6 | enc_post_depth: 2 7 | dec_depth: 3 8 | -------------------------------------------------------------------------------- /contextual_bandits/configs/wheel/np.yaml: -------------------------------------------------------------------------------- 1 | dim_x: 2 2 | dim_y: 5 3 | dim_hid: 128 4 | dim_lat: 128 5 | enc_pre_depth: 4 6 | enc_post_depth: 2 7 | dec_depth: 3 8 | -------------------------------------------------------------------------------- /regression/configs/celeba/tnpd.yaml: -------------------------------------------------------------------------------- 1 | dim_x: 2 2 | dim_y: 3 3 | d_model: 64 4 | emb_depth: 4 5 | dim_feedforward: 128 6 | nhead: 4 7 | dropout: 0.0 8 | num_layers: 6 -------------------------------------------------------------------------------- /regression/models/__pycache__/attention.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/regression/models/__pycache__/attention.cpython-38.pyc -------------------------------------------------------------------------------- /regression/models/__pycache__/modules.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/regression/models/__pycache__/modules.cpython-38.pyc -------------------------------------------------------------------------------- /bayesian_optimization/data/__pycache__/gp.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/bayesian_optimization/data/__pycache__/gp.cpython-38.pyc -------------------------------------------------------------------------------- /bayesian_optimization/data/__pycache__/gp.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/bayesian_optimization/data/__pycache__/gp.cpython-39.pyc -------------------------------------------------------------------------------- /contextual_bandits/data/__pycache__/wheel.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/contextual_bandits/data/__pycache__/wheel.cpython-38.pyc -------------------------------------------------------------------------------- /contextual_bandits/models/__pycache__/tnp.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/contextual_bandits/models/__pycache__/tnp.cpython-38.pyc -------------------------------------------------------------------------------- /contextual_bandits/utils/__pycache__/log.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/contextual_bandits/utils/__pycache__/log.cpython-38.pyc -------------------------------------------------------------------------------- /contextual_bandits/utils/__pycache__/misc.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/contextual_bandits/utils/__pycache__/misc.cpython-38.pyc -------------------------------------------------------------------------------- /bayesian_optimization/configs/gp/tnpd.yaml: -------------------------------------------------------------------------------- 1 | dim_x: 1 2 | dim_y: 1 3 | d_model: 64 4 | emb_depth: 4 5 | dim_feedforward: 128 6 | nhead: 8 7 | dropout: 0.0 8 | num_layers: 6 -------------------------------------------------------------------------------- /bayesian_optimization/models/__pycache__/tnp.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/bayesian_optimization/models/__pycache__/tnp.cpython-38.pyc -------------------------------------------------------------------------------- /bayesian_optimization/models/__pycache__/tnp.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/bayesian_optimization/models/__pycache__/tnp.cpython-39.pyc -------------------------------------------------------------------------------- /bayesian_optimization/utils/__pycache__/log.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/bayesian_optimization/utils/__pycache__/log.cpython-38.pyc -------------------------------------------------------------------------------- /bayesian_optimization/utils/__pycache__/log.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/bayesian_optimization/utils/__pycache__/log.cpython-39.pyc -------------------------------------------------------------------------------- /bayesian_optimization/utils/__pycache__/misc.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/bayesian_optimization/utils/__pycache__/misc.cpython-38.pyc -------------------------------------------------------------------------------- /bayesian_optimization/utils/__pycache__/misc.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/bayesian_optimization/utils/__pycache__/misc.cpython-39.pyc -------------------------------------------------------------------------------- /contextual_bandits/models/__pycache__/tnpa.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/contextual_bandits/models/__pycache__/tnpa.cpython-38.pyc -------------------------------------------------------------------------------- /contextual_bandits/models/__pycache__/tnpd.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/contextual_bandits/models/__pycache__/tnpd.cpython-38.pyc -------------------------------------------------------------------------------- /contextual_bandits/models/__pycache__/tnpnd.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/contextual_bandits/models/__pycache__/tnpnd.cpython-38.pyc -------------------------------------------------------------------------------- /contextual_bandits/runner/__pycache__/args.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/contextual_bandits/runner/__pycache__/args.cpython-38.pyc -------------------------------------------------------------------------------- /regression/configs/gp/banp.yaml: -------------------------------------------------------------------------------- 1 | dim_x: 1 2 | dim_y: 1 3 | dim_hid: 128 4 | enc_v_depth: 4 5 | enc_qk_depth: 2 6 | enc_pre_depth: 4 7 | enc_post_depth: 2 8 | dec_depth: 3 9 | -------------------------------------------------------------------------------- /regression/configs/gp/canp.yaml: -------------------------------------------------------------------------------- 1 | dim_x: 1 2 | dim_y: 1 3 | dim_hid: 128 4 | enc_v_depth: 4 5 | enc_qk_depth: 2 6 | enc_pre_depth: 4 7 | enc_post_depth: 2 8 | dec_depth: 3 9 | -------------------------------------------------------------------------------- /bayesian_optimization/configs/gp/tnpa.yaml: -------------------------------------------------------------------------------- 1 | dim_x: 1 2 | dim_y: 1 3 | d_model: 64 4 | emb_depth: 4 5 | dim_feedforward: 128 6 | nhead: 8 7 | dropout: 0.0 8 | num_layers: 6 9 | -------------------------------------------------------------------------------- /bayesian_optimization/models/__pycache__/tnpa.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/bayesian_optimization/models/__pycache__/tnpa.cpython-38.pyc -------------------------------------------------------------------------------- /bayesian_optimization/models/__pycache__/tnpa.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/bayesian_optimization/models/__pycache__/tnpa.cpython-39.pyc -------------------------------------------------------------------------------- /bayesian_optimization/models/__pycache__/tnpd.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/bayesian_optimization/models/__pycache__/tnpd.cpython-38.pyc -------------------------------------------------------------------------------- /bayesian_optimization/models/__pycache__/tnpd.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/bayesian_optimization/models/__pycache__/tnpd.cpython-39.pyc -------------------------------------------------------------------------------- /bayesian_optimization/models/__pycache__/tnpnd.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/bayesian_optimization/models/__pycache__/tnpnd.cpython-38.pyc -------------------------------------------------------------------------------- /bayesian_optimization/models/__pycache__/tnpnd.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/bayesian_optimization/models/__pycache__/tnpnd.cpython-39.pyc -------------------------------------------------------------------------------- /bayesian_optimization/utils/__pycache__/paths.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/bayesian_optimization/utils/__pycache__/paths.cpython-38.pyc -------------------------------------------------------------------------------- /bayesian_optimization/utils/__pycache__/paths.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/bayesian_optimization/utils/__pycache__/paths.cpython-39.pyc -------------------------------------------------------------------------------- /contextual_bandits/models/__pycache__/modules.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/contextual_bandits/models/__pycache__/modules.cpython-38.pyc -------------------------------------------------------------------------------- /contextual_bandits/runner/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/contextual_bandits/runner/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /regression/configs/celeba/banp.yaml: -------------------------------------------------------------------------------- 1 | dim_x: 2 2 | dim_y: 3 3 | dim_hid: 128 4 | enc_v_depth: 6 5 | enc_qk_depth: 3 6 | enc_pre_depth: 6 7 | enc_post_depth: 3 8 | dec_depth: 5 9 | -------------------------------------------------------------------------------- /regression/configs/celeba/canp.yaml: -------------------------------------------------------------------------------- 1 | dim_x: 2 2 | dim_y: 3 3 | dim_hid: 128 4 | enc_v_depth: 6 5 | enc_qk_depth: 3 6 | enc_pre_depth: 6 7 | enc_post_depth: 3 8 | dec_depth: 5 9 | -------------------------------------------------------------------------------- /regression/configs/emnist/banp.yaml: -------------------------------------------------------------------------------- 1 | dim_x: 2 2 | dim_y: 1 3 | dim_hid: 128 4 | enc_v_depth: 5 5 | enc_qk_depth: 3 6 | enc_pre_depth: 5 7 | enc_post_depth: 3 8 | dec_depth: 4 9 | -------------------------------------------------------------------------------- /regression/configs/emnist/canp.yaml: -------------------------------------------------------------------------------- 1 | dim_x: 2 2 | dim_y: 1 3 | dim_hid: 128 4 | enc_v_depth: 5 5 | enc_qk_depth: 3 6 | enc_pre_depth: 5 7 | enc_post_depth: 3 8 | dec_depth: 4 9 | -------------------------------------------------------------------------------- /bayesian_optimization/bayeso_benchmarks/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # author: Jungtaek Kim (jtkim@postech.ac.kr) 3 | # last updated: November 5, 2020 4 | # 5 | 6 | __version__ = '0.1.4' 7 | -------------------------------------------------------------------------------- /bayesian_optimization/data/__pycache__/highdim_gp.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/bayesian_optimization/data/__pycache__/highdim_gp.cpython-38.pyc -------------------------------------------------------------------------------- /bayesian_optimization/data/__pycache__/highdim_gp.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/bayesian_optimization/data/__pycache__/highdim_gp.cpython-39.pyc -------------------------------------------------------------------------------- /bayesian_optimization/models/__pycache__/modules.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/bayesian_optimization/models/__pycache__/modules.cpython-38.pyc -------------------------------------------------------------------------------- /bayesian_optimization/models/__pycache__/modules.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/bayesian_optimization/models/__pycache__/modules.cpython-39.pyc -------------------------------------------------------------------------------- /contextual_bandits/configs/wheel/models_anp.yaml: -------------------------------------------------------------------------------- 1 | # cmab_models.py 2 | 3 | - "uniform" # first model is a baseline (for regret normalization in comparison of performance) 4 | - "anp" -------------------------------------------------------------------------------- /contextual_bandits/configs/wheel/models_bnp.yaml: -------------------------------------------------------------------------------- 1 | # cmab_models.py 2 | 3 | - "uniform" # first model is a baseline (for regret normalization in comparison of performance) 4 | - "bnp" -------------------------------------------------------------------------------- /contextual_bandits/configs/wheel/models_cnp.yaml: -------------------------------------------------------------------------------- 1 | # cmab_models.py 2 | 3 | - "uniform" # first model is a baseline (for regret normalization in comparison of performance) 4 | - "cnp" -------------------------------------------------------------------------------- /contextual_bandits/configs/wheel/models_np.yaml: -------------------------------------------------------------------------------- 1 | # cmab_models.py 2 | 3 | - "uniform" # first model is a baseline (for regret normalization in comparison of performance) 4 | - "np" -------------------------------------------------------------------------------- /contextual_bandits/main.py: -------------------------------------------------------------------------------- 1 | from runner import args 2 | from runner.cmab_runner import cmab 3 | 4 | def main(): 5 | cmab(args) 6 | 7 | if __name__ == "__main__": 8 | main() -------------------------------------------------------------------------------- /contextual_bandits/models/__pycache__/attention.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/contextual_bandits/models/__pycache__/attention.cpython-38.pyc -------------------------------------------------------------------------------- /contextual_bandits/runner/__pycache__/cmab_runner.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/contextual_bandits/runner/__pycache__/cmab_runner.cpython-38.pyc -------------------------------------------------------------------------------- /regression/configs/celeba/tnpa.yaml: -------------------------------------------------------------------------------- 1 | dim_x: 2 2 | dim_y: 3 3 | d_model: 64 4 | emb_depth: 4 5 | dim_feedforward: 128 6 | nhead: 4 7 | dropout: 0.0 8 | num_layers: 6 9 | permute: True -------------------------------------------------------------------------------- /regression/configs/emnist/tnpd.yaml: -------------------------------------------------------------------------------- 1 | dim_x: 2 2 | dim_y: 1 3 | d_model: 64 4 | emb_depth: 4 5 | dim_feedforward: 128 6 | nhead: 4 7 | dropout: 0.0 8 | num_layers: 6 9 | bound_std: True -------------------------------------------------------------------------------- /bayesian_optimization/configs/gp/banp.yaml: -------------------------------------------------------------------------------- 1 | dim_x: 1 2 | dim_y: 1 3 | dim_hid: 128 4 | enc_v_depth: 4 5 | enc_qk_depth: 2 6 | enc_pre_depth: 4 7 | enc_post_depth: 2 8 | dec_depth: 3 9 | -------------------------------------------------------------------------------- /bayesian_optimization/configs/gp/canp.yaml: -------------------------------------------------------------------------------- 1 | dim_x: 1 2 | dim_y: 1 3 | dim_hid: 128 4 | enc_v_depth: 4 5 | enc_qk_depth: 2 6 | enc_pre_depth: 4 7 | enc_post_depth: 2 8 | dec_depth: 3 9 | -------------------------------------------------------------------------------- /bayesian_optimization/models/__pycache__/attention.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/bayesian_optimization/models/__pycache__/attention.cpython-38.pyc -------------------------------------------------------------------------------- /bayesian_optimization/models/__pycache__/attention.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/bayesian_optimization/models/__pycache__/attention.cpython-39.pyc -------------------------------------------------------------------------------- /bayesian_optimization/utils/__pycache__/acquisition.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/bayesian_optimization/utils/__pycache__/acquisition.cpython-38.pyc -------------------------------------------------------------------------------- /contextual_bandits/configs/wheel/banp.yaml: -------------------------------------------------------------------------------- 1 | dim_x: 2 2 | dim_y: 5 3 | dim_hid: 128 4 | enc_v_depth: 4 5 | enc_qk_depth: 2 6 | enc_pre_depth: 4 7 | enc_post_depth: 2 8 | dec_depth: 3 9 | -------------------------------------------------------------------------------- /contextual_bandits/configs/wheel/canp.yaml: -------------------------------------------------------------------------------- 1 | dim_x: 2 2 | dim_y: 5 3 | dim_hid: 128 4 | enc_v_depth: 4 5 | enc_qk_depth: 2 6 | enc_pre_depth: 4 7 | enc_post_depth: 2 8 | dec_depth: 3 9 | -------------------------------------------------------------------------------- /contextual_bandits/configs/wheel/models_banp.yaml: -------------------------------------------------------------------------------- 1 | # cmab_models.py 2 | 3 | - "uniform" # first model is a baseline (for regret normalization in comparison of performance) 4 | - "banp" -------------------------------------------------------------------------------- /contextual_bandits/configs/wheel/models_canp.yaml: -------------------------------------------------------------------------------- 1 | # cmab_models.py 2 | 3 | - "uniform" # first model is a baseline (for regret normalization in comparison of performance) 4 | - "canp" -------------------------------------------------------------------------------- /contextual_bandits/configs/wheel/models_tnpa.yaml: -------------------------------------------------------------------------------- 1 | # cmab_models.py 2 | 3 | - "uniform" # first model is a baseline (for regret normalization in comparison of performance) 4 | - "tnpa" -------------------------------------------------------------------------------- /contextual_bandits/configs/wheel/models_tnpd.yaml: -------------------------------------------------------------------------------- 1 | # cmab_models.py 2 | 3 | - "uniform" # first model is a baseline (for regret normalization in comparison of performance) 4 | - "tnpd" -------------------------------------------------------------------------------- /contextual_bandits/configs/wheel/models_tnpnd.yaml: -------------------------------------------------------------------------------- 1 | # cmab_models.py 2 | 3 | - "uniform" # first model is a baseline (for regret normalization in comparison of performance) 4 | - "tnpnd" -------------------------------------------------------------------------------- /contextual_bandits/configs/wheel/tnpd.yaml: -------------------------------------------------------------------------------- 1 | dim_x: 2 2 | dim_y: 5 3 | d_model: 16 4 | emb_depth: 3 5 | dim_feedforward: 64 6 | nhead: 1 7 | dropout: 0.0 8 | num_layers: 4 9 | drop_y: 0.5 -------------------------------------------------------------------------------- /regression/configs/gp/tnpa.yaml: -------------------------------------------------------------------------------- 1 | dim_x: 1 2 | dim_y: 1 3 | d_model: 64 4 | emb_depth: 4 5 | dim_feedforward: 128 6 | nhead: 4 7 | dropout: 0.0 8 | num_layers: 6 9 | permute: True 10 | -------------------------------------------------------------------------------- /contextual_bandits/configs/wheel/tnpa.yaml: -------------------------------------------------------------------------------- 1 | dim_x: 2 2 | dim_y: 5 3 | d_model: 16 4 | emb_depth: 3 5 | dim_feedforward: 64 6 | nhead: 1 7 | dropout: 0.0 8 | num_layers: 4 9 | drop_y: 0.5 10 | -------------------------------------------------------------------------------- /regression/configs/celeba/anp.yaml: -------------------------------------------------------------------------------- 1 | dim_x: 2 2 | dim_y: 3 3 | dim_hid: 128 4 | dim_lat: 128 5 | enc_v_depth: 6 6 | enc_qk_depth: 3 7 | enc_pre_depth: 6 8 | enc_post_depth: 3 9 | dec_depth: 5 10 | -------------------------------------------------------------------------------- /regression/configs/emnist/anp.yaml: -------------------------------------------------------------------------------- 1 | dim_x: 2 2 | dim_y: 1 3 | dim_hid: 128 4 | dim_lat: 128 5 | enc_v_depth: 5 6 | enc_qk_depth: 3 7 | enc_pre_depth: 5 8 | enc_post_depth: 3 9 | dec_depth: 4 10 | -------------------------------------------------------------------------------- /regression/configs/gp/anp.yaml: -------------------------------------------------------------------------------- 1 | dim_x: 1 2 | dim_y: 1 3 | dim_hid: 128 4 | dim_lat: 128 5 | enc_v_depth: 4 6 | enc_qk_depth: 2 7 | enc_pre_depth: 4 8 | enc_post_depth: 2 9 | dec_depth: 3 10 | -------------------------------------------------------------------------------- /bayesian_optimization/bayeso_benchmarks/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/bayesian_optimization/bayeso_benchmarks/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /bayesian_optimization/configs/gp/anp.yaml: -------------------------------------------------------------------------------- 1 | dim_x: 1 2 | dim_y: 1 3 | dim_hid: 128 4 | dim_lat: 128 5 | enc_v_depth: 4 6 | enc_qk_depth: 2 7 | enc_pre_depth: 4 8 | enc_post_depth: 2 9 | dec_depth: 3 10 | -------------------------------------------------------------------------------- /contextual_bandits/configs/wheel/anp.yaml: -------------------------------------------------------------------------------- 1 | dim_x: 2 2 | dim_y: 5 3 | dim_hid: 128 4 | dim_lat: 128 5 | enc_v_depth: 4 6 | enc_qk_depth: 2 7 | enc_pre_depth: 4 8 | enc_post_depth: 2 9 | dec_depth: 3 10 | -------------------------------------------------------------------------------- /bayesian_optimization/bayeso_benchmarks/__pycache__/benchmark_base.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/bayesian_optimization/bayeso_benchmarks/__pycache__/benchmark_base.cpython-38.pyc -------------------------------------------------------------------------------- /bayesian_optimization/bayeso_benchmarks/__pycache__/inf_dim_ackley.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/bayesian_optimization/bayeso_benchmarks/__pycache__/inf_dim_ackley.cpython-38.pyc -------------------------------------------------------------------------------- /regression/configs/emnist/tnpa.yaml: -------------------------------------------------------------------------------- 1 | dim_x: 2 2 | dim_y: 1 3 | d_model: 64 4 | emb_depth: 4 5 | dim_feedforward: 128 6 | nhead: 4 7 | dropout: 0.0 8 | num_layers: 6 9 | permute: True 10 | bound_std: True 11 | -------------------------------------------------------------------------------- /regression/utils/paths.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | ROOT = '' 4 | 5 | evalsets_path = os.path.join(ROOT, 'evalsets') 6 | datasets_path = os.path.join(ROOT, 'datasets') 7 | results_path = os.path.join(ROOT, 'results') 8 | -------------------------------------------------------------------------------- /bayesian_optimization/bayeso_benchmarks/__pycache__/inf_dim_cosines.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/bayesian_optimization/bayeso_benchmarks/__pycache__/inf_dim_cosines.cpython-38.pyc -------------------------------------------------------------------------------- /bayesian_optimization/bayeso_benchmarks/__pycache__/inf_dim_rastrigin.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/bayesian_optimization/bayeso_benchmarks/__pycache__/inf_dim_rastrigin.cpython-38.pyc -------------------------------------------------------------------------------- /bayesian_optimization/bayeso_benchmarks/__pycache__/two_dim_dropwave.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/bayesian_optimization/bayeso_benchmarks/__pycache__/two_dim_dropwave.cpython-38.pyc -------------------------------------------------------------------------------- /bayesian_optimization/bayeso_benchmarks/__pycache__/two_dim_michalewicz.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/bayesian_optimization/bayeso_benchmarks/__pycache__/two_dim_michalewicz.cpython-38.pyc -------------------------------------------------------------------------------- /bayesian_optimization/utils/paths.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | ROOT = '' 4 | 5 | evalsets_path = os.path.join(ROOT, 'evalsets') 6 | datasets_path = os.path.join(ROOT, 'datasets') 7 | results_path = os.path.join(ROOT, 'results') 8 | -------------------------------------------------------------------------------- /bayesian_optimization/bayeso_benchmarks/__pycache__/three_dim_hartmann3d.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/bayesian_optimization/bayeso_benchmarks/__pycache__/three_dim_hartmann3d.cpython-38.pyc -------------------------------------------------------------------------------- /bayesian_optimization/bayeso_benchmarks/__pycache__/two_dim_goldsteinprice.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/bayesian_optimization/bayeso_benchmarks/__pycache__/two_dim_goldsteinprice.cpython-38.pyc -------------------------------------------------------------------------------- /contextual_bandits/runner/__init__.py: -------------------------------------------------------------------------------- 1 | import yaml 2 | 3 | from runner.args import get_args 4 | 5 | args = get_args() 6 | 7 | with open("paths.yaml") as f: 8 | paths = yaml.safe_load(f) 9 | datasets_path = paths["datasets_path"] 10 | evalsets_path = paths["evalsets_path"] 11 | results_path = paths["results_path"] 12 | -------------------------------------------------------------------------------- /regression/configs/celeba/tnpnd.yaml: -------------------------------------------------------------------------------- 1 | dim_x: 2 2 | dim_y: 3 3 | d_model: 64 4 | emb_depth: 4 5 | dim_feedforward: 128 6 | nhead: 4 7 | dropout: 0.0 8 | num_layers: 6 9 | num_std_layers: 2 10 | cov_approx: 'cholesky' # cholesky or lowrank parameterization 11 | prj_dim: 20 12 | prj_depth: 4 13 | diag_depth: 4 # only for lowrank parameterization option -------------------------------------------------------------------------------- /regression/configs/gp/tnpnd.yaml: -------------------------------------------------------------------------------- 1 | dim_x: 1 2 | dim_y: 1 3 | d_model: 64 4 | emb_depth: 4 5 | dim_feedforward: 128 6 | nhead: 4 7 | dropout: 0.0 8 | num_layers: 6 9 | num_std_layers: 2 10 | cov_approx: 'cholesky' # cholesky or lowrank parameterization 11 | prj_dim: 20 12 | prj_depth: 4 13 | diag_depth: 4 # only for lowrank parameterization option -------------------------------------------------------------------------------- /bayesian_optimization/configs/gp/tnpnd.yaml: -------------------------------------------------------------------------------- 1 | dim_x: 1 2 | dim_y: 1 3 | d_model: 64 4 | emb_depth: 4 5 | dim_feedforward: 128 6 | nhead: 8 7 | dropout: 0.0 8 | num_layers: 6 9 | num_std_layers: 2 10 | cov_approx: 'lowrank' # cholesky or lowrank parameterization 11 | prj_dim: 20 12 | prj_depth: 4 13 | diag_depth: 4 # only for lowrank parameterization option -------------------------------------------------------------------------------- /contextual_bandits/configs/wheel/tnpnd.yaml: -------------------------------------------------------------------------------- 1 | dim_x: 2 2 | dim_y: 5 3 | d_model: 16 4 | emb_depth: 3 5 | dim_feedforward: 64 6 | nhead: 1 7 | dropout: 0.0 8 | num_layers: 4 9 | num_std_layers: 2 10 | cov_approx: 'lowrank' # cholesky or lowrank parameterization 11 | prj_dim: 20 12 | prj_depth: 4 13 | diag_depth: 4 # only for lowrank parameterization option 14 | drop_y: 0.5 -------------------------------------------------------------------------------- /regression/configs/emnist/tnpnd.yaml: -------------------------------------------------------------------------------- 1 | dim_x: 2 2 | dim_y: 1 3 | d_model: 64 4 | emb_depth: 4 5 | dim_feedforward: 128 6 | nhead: 4 7 | dropout: 0.0 8 | num_layers: 6 9 | num_std_layers: 2 10 | bound_std: True 11 | cov_approx: 'cholesky' # cholesky or lowrank parameterization 12 | prj_dim: 20 13 | prj_depth: 4 14 | diag_depth: 4 # only for lowrank parameterization option -------------------------------------------------------------------------------- /contextual_bandits/README.md: -------------------------------------------------------------------------------- 1 | ### Training 2 | First, we have to train TNPs on randomly sampled wheel data. Training is similar to meta regression. 3 | ``` 4 | python main.py --cmab_mode=train --model=tnpa --expid=default 5 | ``` 6 | If training for the first time, wheel data will be generated and saved in `datasets`. Model weights and logs will be saved in `results/train-all-R`. 7 | 8 | ### Evaluate 9 | After training, we can run contextual bandit to evaluate the trained model. 10 | ``` 11 | python main.py --cmab_mode=eval --model=tnpa --expid=default 12 | ``` 13 | Model weights according to `{expid}` will be loaded and evaluated. If running contextual bandit for the first time, evaluation data wil be generated and saved in `evalsets`. The results will be saved in `results/eval-all-R`. -------------------------------------------------------------------------------- /regression/data/emnist.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torchvision.datasets as tvds 3 | 4 | from utils.paths import datasets_path 5 | 6 | class EMNIST(tvds.EMNIST): 7 | def __init__(self, train=True, class_range=[0, 47], device='cpu', download=True): 8 | super().__init__(datasets_path, train=train, split='balanced', download=download) 9 | 10 | self.data = self.data.unsqueeze(1).float().div(255).transpose(-1, -2).to(device) 11 | self.targets = self.targets.to(device) 12 | 13 | idxs = [] 14 | for c in range(class_range[0], class_range[1]): 15 | idxs.append(torch.where(self.targets==c)[0]) 16 | idxs = torch.cat(idxs) 17 | 18 | self.data = self.data[idxs] 19 | self.targets = self.targets[idxs] 20 | 21 | def __getitem__(self, idx): 22 | return self.data[idx], self.targets[idx] 23 | -------------------------------------------------------------------------------- /bayesian_optimization/bayeso_benchmarks/two_dim_dropwave.py: -------------------------------------------------------------------------------- 1 | # 2 | # author: Jungtaek Kim (jtkim@postech.ac.kr) 3 | # last updated: February 9, 2021 4 | # 5 | 6 | import numpy as np 7 | 8 | from bayeso_benchmarks.benchmark_base import Function 9 | 10 | 11 | def fun_target(bx, dim_bx): 12 | assert len(bx.shape) == 1 13 | assert bx.shape[0] == dim_bx 14 | 15 | y = -1.0 * (1 + np.cos(12.0 * np.sqrt(bx[0]**2 + bx[1]**2))) / (0.5 * (bx[0]**2 + bx[1]**2) + 2.0) 16 | return y 17 | 18 | 19 | class DropWave(Function): 20 | def __init__(self): 21 | dim_bx = 2 22 | bounds = np.array([ 23 | [-5.12, 5.12], 24 | [-5.12, 5.12], 25 | ]) 26 | global_minimizers = np.array([ 27 | [0.0, 0.0], 28 | ]) 29 | global_minimum = -1.0 30 | function = lambda bx: fun_target(bx, dim_bx) 31 | 32 | Function.__init__(self, dim_bx, bounds, global_minimizers, global_minimum, function) 33 | -------------------------------------------------------------------------------- /bayesian_optimization/bayeso_benchmarks/inf_dim_cosines.py: -------------------------------------------------------------------------------- 1 | # 2 | # author: Jungtaek Kim (jtkim@postech.ac.kr) 3 | # last updated: February 8, 2021 4 | # 5 | 6 | import numpy as np 7 | 8 | from bayeso_benchmarks.benchmark_base import Function 9 | 10 | 11 | def fun_target(bx, dim_bx): 12 | assert len(bx.shape) == 1 13 | assert bx.shape[0] == dim_bx 14 | 15 | y = np.sum(np.cos(bx) * (np.abs(bx) * (0.1 / (2.0 * np.pi)) - 1.0)) 16 | return y 17 | 18 | 19 | class Cosines(Function): 20 | def __init__(self, dim_problem): 21 | assert isinstance(dim_problem, int) 22 | 23 | dim_bx = np.inf 24 | bounds = np.array([ 25 | [-2.0 * np.pi, 2.0 * np.pi], 26 | ]) 27 | global_minimizers = np.array([ 28 | [0.0], 29 | ]) 30 | global_minimum = -1.0 * dim_problem 31 | dim_problem = dim_problem 32 | 33 | function = lambda bx: fun_target(bx, dim_problem) 34 | 35 | Function.__init__(self, dim_bx, bounds, global_minimizers, global_minimum, function, dim_problem=dim_problem) 36 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Tung Nguyen 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /regression/utils/misc.py: -------------------------------------------------------------------------------- 1 | import os 2 | from importlib.machinery import SourceFileLoader 3 | import math 4 | import torch 5 | 6 | def gen_load_func(parser, func): 7 | def load(args, cmdline): 8 | sub_args, cmdline = parser.parse_known_args(cmdline) 9 | for k, v in sub_args.__dict__.items(): 10 | args.__dict__[k] = v 11 | return func(**sub_args.__dict__), cmdline 12 | return load 13 | 14 | 15 | def load_module(filename): 16 | module_name = os.path.splitext(os.path.basename(filename))[0] 17 | return SourceFileLoader(module_name, filename).load_module() 18 | # 19 | # 20 | # ex. 21 | # 22 | 23 | 24 | def logmeanexp(x, dim=0): 25 | return x.logsumexp(dim) - math.log(x.shape[dim]) 26 | 27 | 28 | def stack(x, num_samples=None, dim=0): 29 | return x if num_samples is None \ 30 | else torch.stack([x]*num_samples, dim=dim) 31 | 32 | 33 | def hrminsec(duration): 34 | hours, left = duration // 3600, duration % 3600 35 | mins, secs = left // 60, left % 60 36 | return f"{hours}hrs {mins}mins {secs}secs" -------------------------------------------------------------------------------- /bayesian_optimization/utils/misc.py: -------------------------------------------------------------------------------- 1 | import os 2 | from importlib.machinery import SourceFileLoader 3 | import math 4 | import torch 5 | 6 | 7 | def gen_load_func(parser, func): 8 | def load(args, cmdline): 9 | sub_args, cmdline = parser.parse_known_args(cmdline) 10 | for k, v in sub_args.__dict__.items(): 11 | args.__dict__[k] = v 12 | return func(**sub_args.__dict__), cmdline 13 | return load 14 | 15 | 16 | def load_module(filename): 17 | module_name = os.path.splitext(os.path.basename(filename))[0] 18 | return SourceFileLoader(module_name, filename).load_module(module_name) 19 | # 20 | # 21 | # ex. 22 | # 23 | 24 | 25 | def logmeanexp(x, dim=0): 26 | return x.logsumexp(dim) - math.log(x.shape[dim]) 27 | 28 | 29 | def stack(x, num_samples=None, dim=0): 30 | return x if num_samples is None \ 31 | else torch.stack([x]*num_samples, dim=dim) 32 | 33 | 34 | def hrminsec(duration): 35 | hours, left = duration // 3600, duration % 3600 36 | mins, secs = left // 60, left % 60 37 | return f"{hours}hrs {mins}mins {secs}secs" 38 | -------------------------------------------------------------------------------- /bayesian_optimization/bayeso_benchmarks/two_dim_goldsteinprice.py: -------------------------------------------------------------------------------- 1 | # 2 | # author: Jungtaek Kim (jtkim@postech.ac.kr) 3 | # last updated: February 8, 2021 4 | # 5 | 6 | import numpy as np 7 | 8 | from bayeso_benchmarks.benchmark_base import Function 9 | 10 | 11 | def fun_target(bx, dim_bx): 12 | assert len(bx.shape) == 1 13 | assert bx.shape[0] == dim_bx 14 | 15 | term_1a = (bx[0] + bx[1] + 1.0)**2 16 | term_1b = 19.0 - 14.0 * bx[0] + 3.0 * bx[0]**2 - 14.0 * bx[1] + 6.0 * bx[0] * bx[1] + 3.0 * bx[1]**2 17 | term_1 = 1.0 + term_1a * term_1b 18 | 19 | term_2a = (2.0 * bx[0] - 3.0 * bx[1])**2 20 | term_2b = 18.0 - 32.0 * bx[0] + 12.0 * bx[0]**2 + 48.0 * bx[1] - 36.0 * bx[0] * bx[1] + 27.0 * bx[1]**2 21 | term_2 = 30.0 + term_2a * term_2b 22 | 23 | y = term_1 * term_2 24 | return y 25 | 26 | 27 | class GoldsteinPrice(Function): 28 | def __init__(self): 29 | dim_bx = 2 30 | bounds = np.array([ 31 | [-2.0, 2.0], 32 | [-2.0, 2.0], 33 | ]) 34 | global_minimizers = np.array([ 35 | [0.0, -1.0], 36 | ]) 37 | global_minimum = 3.0 38 | function = lambda bx: fun_target(bx, dim_bx) 39 | 40 | Function.__init__(self, dim_bx, bounds, global_minimizers, global_minimum, function) 41 | -------------------------------------------------------------------------------- /bayesian_optimization/README.md: -------------------------------------------------------------------------------- 1 | ### 1-dimensional BO 2 | --- 3 | ### Training 4 | Training is exactly the same to meta regression. 5 | ``` 6 | python 1d_gp.py --mode=train --model=tnpa --expid=default 7 | ``` 8 | 9 | ### Evaluation 10 | Run BO using a trained model. 11 | ``` 12 | python 1d_bo.py --bo_mode models --bo_kernel rbf --model tnpa --expid=default 13 | ``` 14 | 15 | ## Multi-dimensional BO 16 | --- 17 | ### Training 18 | First, generate the training dataset, and then train. Choose `dimension` (2 or 3), which correspond to 2-D and 3-D problems, respectively. It is recommended that `min_num_points` and `max_num_points` are 30 and 128 for 2-D problems, and 64 and 256 for 3-D problems. 19 | ``` 20 | python highdim_gp.py --mode=generate --model=tnpa --dimension=2 --min_num_points=30 --max_num_points=128 21 | ``` 22 | ``` 23 | python highdim_gp.py --mode=train --model=tnpa --dimension=2 --min_num_points=30 --max_num_points=128 24 | ``` 25 | 26 | ### Evaluation 27 | 28 | Run `highdim_bo.py`. 29 | Please choose objective function to evaluate. The following functions are supported: `ackley`, `cosine`, `rastrigin`, `dropwave`, `goldsteinprice`, `michalewicz`, `hartmann`. 30 | 31 | ``` 32 | python highdim_bo.py --objective=ackley --dimension=2 --model=tnpa --train_min_num_points=30 --train_max_num_points=128 33 | ``` 34 | -------------------------------------------------------------------------------- /contextual_bandits/utils/misc.py: -------------------------------------------------------------------------------- 1 | import os 2 | from importlib.machinery import SourceFileLoader 3 | import math 4 | import torch 5 | 6 | def gen_load_func(parser, func): 7 | def load(args, cmdline): 8 | sub_args, cmdline = parser.parse_known_args(cmdline) 9 | for k, v in sub_args.__dict__.items(): 10 | args.__dict__[k] = v 11 | return func(**sub_args.__dict__), cmdline 12 | return load 13 | 14 | 15 | def load_module(filename): 16 | module_name = os.path.splitext(os.path.basename(filename))[0] 17 | return SourceFileLoader(module_name, filename).load_module() 18 | # 19 | # 20 | # ex. 21 | # 22 | 23 | 24 | def logmeanexp(x, dim=0): 25 | return x.logsumexp(dim) - math.log(x.shape[dim]) 26 | 27 | 28 | def stack(x, num_samples=None, dim=0): 29 | return x if num_samples is None \ 30 | else torch.stack([x]*num_samples, dim=dim) 31 | 32 | 33 | def hrminsec(duration): 34 | hours, left = duration // 3600, duration % 3600 35 | mins, secs = left // 60, left % 60 36 | return f"{hours}hrs {mins}mins {secs}secs" 37 | 38 | 39 | def one_hot(x, num): # [B,N] -> [B,N,num] 40 | B, N = x.shape 41 | _x = torch.zeros([B, N, num], dtype=torch.float32) 42 | for b in range(B): 43 | for n in range(N): 44 | i = x[b, n] 45 | _x[b, n, i] = 1.0 46 | return _x -------------------------------------------------------------------------------- /bayesian_optimization/bayeso_benchmarks/one_dim_linear.py: -------------------------------------------------------------------------------- 1 | # 2 | # author: Jungtaek Kim (jtkim@postech.ac.kr) 3 | # last updated: February 8, 2021 4 | # 5 | 6 | import numpy as np 7 | 8 | from bayeso_benchmarks.benchmark_base import Function 9 | 10 | 11 | def fun_target(bx, dim_bx, slope): 12 | assert len(bx.shape) == 1 13 | assert bx.shape[0] == dim_bx 14 | assert isinstance(slope, float) 15 | 16 | y = slope * bx[0] 17 | return y 18 | 19 | 20 | class Linear(Function): 21 | def __init__(self, 22 | bounds=np.array([ 23 | [-10, 10], 24 | ]), 25 | slope=1.0 26 | ): 27 | assert isinstance(slope, float) 28 | assert isinstance(bounds, np.ndarray) 29 | assert len(bounds.shape) == 2 30 | assert bounds.shape[0] == 1 31 | assert bounds.shape[1] == 2 32 | assert bounds[0, 0] < bounds[0, 1] 33 | 34 | dim_bx = bounds.shape[0] 35 | 36 | if slope > 0.0: 37 | global_minimizers = np.array([ 38 | [bounds[0, 0]], 39 | ]) 40 | global_minimum = slope * bounds[0, 0] 41 | else: 42 | global_minimizers = np.array([ 43 | [bounds[0, 1]], 44 | ]) 45 | global_minimum = slope * bounds[0, 1] 46 | function = lambda bx: fun_target(bx, dim_bx, slope) 47 | 48 | Function.__init__(self, dim_bx, bounds, global_minimizers, global_minimum, function) 49 | -------------------------------------------------------------------------------- /regression/utils/sampling.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | def gather(items, idxs): 4 | K = idxs.shape[0] 5 | idxs = idxs.to(items[0].device) 6 | gathered = [] 7 | for item in items: 8 | gathered.append(torch.gather( 9 | torch.stack([item]*K), -2, 10 | torch.stack([idxs]*item.shape[-1], -1)).squeeze(0)) 11 | return gathered[0] if len(gathered) == 1 else gathered 12 | 13 | def sample_subset(*items, r_N=None, num_samples=None): 14 | r_N = r_N or torch.rand(1).item() 15 | K = num_samples or 1 16 | N = items[0].shape[-2] 17 | Ns = min(max(1, int(r_N * N)), N-1) 18 | batch_shape = items[0].shape[:-2] 19 | idxs = torch.rand((K,)+batch_shape+(N,)).argsort(-1) 20 | return gather(items, idxs[...,:Ns]), gather(items, idxs[...,Ns:]) 21 | 22 | def sample_with_replacement(*items, num_samples=None, r_N=1.0, N_s=None): 23 | K = num_samples or 1 24 | N = items[0].shape[-2] 25 | N_s = N_s or max(1, int(r_N * N)) 26 | batch_shape = items[0].shape[:-2] 27 | idxs = torch.randint(N, size=(K,)+batch_shape+(N_s,)) 28 | return gather(items, idxs) 29 | 30 | def sample_mask(B, N, num_samples=None, min_num=3, prob=0.5): 31 | min_num = min(min_num, N) 32 | K = num_samples or 1 33 | fixed = torch.ones(K, B, min_num) 34 | if N - min_num > 0: 35 | rand = torch.bernoulli(prob*torch.ones(K, B, N-min_num)) 36 | mask = torch.cat([fixed, rand], -1) 37 | return mask.squeeze(0) 38 | else: 39 | return fixed.squeeze(0) -------------------------------------------------------------------------------- /bayesian_optimization/utils/sampling.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def gather(items, idxs): 5 | K = idxs.shape[0] 6 | idxs = idxs.to(items[0].device) 7 | gathered = [] 8 | for item in items: 9 | gathered.append(torch.gather( 10 | torch.stack([item] * K), -2, 11 | torch.stack([idxs] * item.shape[-1], -1)).squeeze(0)) 12 | return gathered[0] if len(gathered) == 1 else gathered 13 | 14 | 15 | def sample_subset(*items, r_N=None, num_samples=None): 16 | r_N = r_N or torch.rand(1).item() 17 | K = num_samples or 1 18 | N = items[0].shape[-2] 19 | Ns = min(max(1, int(r_N * N)), N - 1) 20 | batch_shape = items[0].shape[:-2] 21 | idxs = torch.rand((K,) + batch_shape + (N,)).argsort(-1) 22 | return gather(items, idxs[..., :Ns]), gather(items, idxs[..., Ns:]) 23 | 24 | 25 | def sample_with_replacement(*items, num_samples=None, r_N=1.0, N_s=None): 26 | K = num_samples or 1 27 | N = items[0].shape[-2] 28 | N_s = N_s or max(1, int(r_N * N)) 29 | batch_shape = items[0].shape[:-2] 30 | idxs = torch.randint(N, size=(K,) + batch_shape + (N_s,)) 31 | return gather(items, idxs) 32 | 33 | 34 | def sample_mask(B, N, num_samples=None, min_num=3, prob=0.5): 35 | min_num = min(min_num, N) 36 | K = num_samples or 1 37 | fixed = torch.ones(K, B, min_num) 38 | if N - min_num > 0: 39 | rand = torch.bernoulli(prob * torch.ones(K, B, N - min_num)) 40 | mask = torch.cat([fixed, rand], -1) 41 | return mask.squeeze(0) 42 | else: 43 | return fixed.squeeze(0) 44 | -------------------------------------------------------------------------------- /bayesian_optimization/bayeso_benchmarks/three_dim_hartmann3d.py: -------------------------------------------------------------------------------- 1 | # 2 | # author: Jungtaek Kim (jtkim@postech.ac.kr) 3 | # last updated: February 8, 2021 4 | # 5 | 6 | import numpy as np 7 | 8 | from bayeso_benchmarks.benchmark_base import Function 9 | 10 | 11 | def fun_target(bx, dim_bx): 12 | assert len(bx.shape) == 1 13 | assert bx.shape[0] == dim_bx 14 | 15 | alpha = np.array([1.0, 1.2, 3.0, 3.2]) 16 | A = np.array([ 17 | [3.0, 10.0, 30.0], 18 | [0.1, 10.0, 35.0], 19 | [3.0, 10.0, 30.0], 20 | [0.1, 10.0, 35.0], 21 | ]) 22 | P = 1e-4 * np.array([ 23 | [3689, 1170, 2673], 24 | [4699, 4387, 7470], 25 | [1091, 8732, 5547], 26 | [381, 5743, 8828], 27 | ]) 28 | 29 | outer = 0.0 30 | for i_ in range(0, 4): 31 | inner = 0.0 32 | for j_ in range(0, 3): 33 | inner += A[i_, j_] * (bx[j_] - P[i_, j_])**2 34 | outer += alpha[i_] * np.exp(-1.0 * inner) 35 | 36 | y = -1.0 * outer 37 | return y 38 | 39 | 40 | class Hartmann3D(Function): 41 | def __init__(self, 42 | bounds=np.array([ 43 | [0.0, 1.0], 44 | [0.0, 1.0], 45 | [0.0, 1.0], 46 | ]) 47 | ): 48 | assert isinstance(bounds, np.ndarray) 49 | assert len(bounds.shape) == 2 50 | assert bounds.shape[1] == 2 51 | 52 | dim_bx = 3 53 | assert bounds.shape[0] == dim_bx 54 | 55 | global_minimizers = np.array([ 56 | [0.114614, 0.555649, 0.852547], 57 | ]) 58 | global_minimum = -3.86278 59 | function = lambda bx: fun_target(bx, dim_bx) 60 | 61 | Function.__init__(self, dim_bx, bounds, global_minimizers, global_minimum, function) 62 | -------------------------------------------------------------------------------- /contextual_bandits/utils/sampling.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | def gather(items, idxs, reduce=True): 4 | K = idxs.shape[0] # Ns 5 | idxs = idxs.to(items[0].device) # [Ns,B,N] 6 | gathered = [] # [Ns,B,N,D] 7 | for item in items: # [B,N,D] 8 | _gathered = torch.gather( 9 | torch.stack([item] * K), -2, # [Ns,B,N,D] 10 | torch.stack([idxs] * item.shape[-1], -1)) 11 | gathered.append(_gathered.squeeze(0) if reduce else _gathered) # [Ns,B,N,D] 12 | return gathered[0] if len(gathered) == 1 else gathered 13 | 14 | def sample_subset(*items, r_N=None, num_samples=None): 15 | r_N = r_N or torch.rand(1).item() 16 | K = num_samples or 1 17 | N = items[0].shape[-2] 18 | Ns = min(max(1, int(r_N * N)), N-1) 19 | batch_shape = items[0].shape[:-2] 20 | idxs = torch.rand((K,)+batch_shape+(N,)).argsort(-1) 21 | return gather(items, idxs[...,:Ns]), gather(items, idxs[...,Ns:]) 22 | 23 | def sample_with_replacement(*items, num_samples=None, r_N=1.0, N_s=None, reduce=True): 24 | K = num_samples or 1 # Ns 25 | N = items[0].shape[-2] # N 26 | N_s = N_s or max(1, int(r_N * N)) # N 27 | batch_shape = items[0].shape[:-2] # B 28 | idxs = torch.randint(N, size=(K,)+batch_shape+(N_s,)) # [Ns,B,N] 29 | return gather(items, idxs, reduce) # items: [B,N,D], idxs: [Ns,B,N] 30 | 31 | def sample_mask(B, N, num_samples=None, min_num=3, prob=0.5): 32 | min_num = min(min_num, N) 33 | K = num_samples or 1 34 | fixed = torch.ones(K, B, min_num) 35 | if N - min_num > 0: 36 | rand = torch.bernoulli(prob*torch.ones(K, B, N-min_num)) 37 | mask = torch.cat([fixed, rand], -1) 38 | return mask.squeeze(0) 39 | else: 40 | return fixed.squeeze(0) -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Transformer Neural Processes: Uncertainty-Aware Meta Learning Via Sequence Modeling 2 | 3 | This is the official implementation of the paper [Transformer Neural Processes: Uncertainty-Aware Meta Learning Via Sequence Modeling](https://arxiv.org/abs/2207.04179) in Pytorch. We propose Transformer Neural Processes (TNPs), a new member of the Neural Processes family that casts uncertainty-aware meta learning as a sequence modeling problem. We learn TNPs via an autoregressive likelihood-based objective and instantiate it with a novel transformer-based architecture. TNPs achieve state-ofthe-art performance on various benchmark problems, outperforming all previous NP variants on meta regression, image completion, contextual multi-armed bandits, and Bayesian optimization. 4 | 5 | 6 | 7 | ## Install 8 | 9 | First, clone the repository: 10 | 11 | ``` 12 | git clone https://github.com/tung-nd/TNP-pytorch.git 13 | ``` 14 | 15 | Then install the dependencies as listed in `env.yml` and activate the environment: 16 | 17 | ``` 18 | conda env create -f env.yml 19 | conda activate tnp 20 | ``` 21 | 22 | ## Usage 23 | 24 | Please check the directory of each task for specific usage. 25 | 26 | ## Citation 27 | 28 | If you find this repo useful in your research, please consider citing our paper: 29 | ``` 30 | @article{nguyen2022transformer, 31 | title={Transformer neural processes: Uncertainty-aware meta learning via sequence modeling}, 32 | author={Nguyen, Tung and Grover, Aditya}, 33 | journal={arXiv preprint arXiv:2207.04179}, 34 | year={2022} 35 | } 36 | ``` 37 | 38 | ## Acknowledgement 39 | 40 | The implementation of the baselines is borrowed from the official code base of [Bootstrapping Neural Processes](https://github.com/juho-lee/bnp). -------------------------------------------------------------------------------- /bayesian_optimization/bayeso_benchmarks/inf_dim_rastrigin.py: -------------------------------------------------------------------------------- 1 | # 2 | # author: Jungtaek Kim (jtkim@postech.ac.kr) 3 | # last updated: February 8, 2021 4 | # 5 | 6 | import numpy as np 7 | 8 | from bayeso_benchmarks.benchmark_base import Function 9 | 10 | 11 | def fun_target( 12 | bx, 13 | dim_bx, 14 | A=10.0 15 | ): 16 | assert len(bx.shape) == 1 17 | assert bx.shape[0] == dim_bx 18 | assert isinstance(A, float) 19 | 20 | y = A * dim_bx + np.sum((bx / (2.0 / 5.12)) ** 2 - A * np.cos(2 * np.pi * bx / (2.0 / 5.12)), axis=-1) 21 | return y 22 | 23 | 24 | class Rastrigin(Function): 25 | def __init__(self, dim_problem): 26 | assert isinstance(dim_problem, int) 27 | 28 | dim_bx = np.inf 29 | bounds = np.array([ 30 | [-5.12 * (2.0 / 5.12), 5.12 * (2.0 / 5.12)], 31 | ]) 32 | global_minimizers = np.array([ 33 | [0.0], 34 | ]) 35 | global_minimum = 0.0 36 | dim_problem = dim_problem 37 | 38 | function = lambda bx: fun_target(bx, dim_problem) 39 | 40 | Function.__init__(self, dim_bx, bounds, global_minimizers, global_minimum, function, dim_problem=dim_problem) 41 | 42 | 43 | if __name__ == '__main__': 44 | import matplotlib.pyplot as plt 45 | from mpl_toolkits import mplot3d 46 | 47 | func = Rastrigin(dim_problem=2) 48 | lb, ub = func.get_bounds().transpose() 49 | # lb, ub = np.where(lb < -2, -2, lb), np.where(ub > 2, 2, ub) 50 | 51 | x1 = np.linspace(lb[0], ub[0], 50) 52 | x2 = np.linspace(lb[1], ub[1], 50) 53 | x1, x2 = np.meshgrid(x1, x2) 54 | pts = np.column_stack((x1.ravel(), x2.ravel())) 55 | func_val = func.output(pts) 56 | 57 | fig = plt.figure(figsize=(25, 25)) 58 | 59 | ax = fig.add_subplot(1, 1, 1, projection='3d') 60 | ax.plot_surface(x1, x2, func_val.reshape(x1.shape)) 61 | print(func.output(np.zeros((2, 2)))) 62 | plt.show() 63 | -------------------------------------------------------------------------------- /regression/README.md: -------------------------------------------------------------------------------- 1 | ## 1D Regression 2 | 3 | --- 4 | ### Training 5 | ``` 6 | python gp.py --mode=train --expid=default-tnpa --model=tnpa 7 | ``` 8 | The config of hyperparameters of each model is saved in `configs/gp`. If training for the first time, evaluation data will be generated and saved in `evalsets/gp`. Model weights and logs are saved in `results/gp/{model}/{expid}`. 9 | 10 | ### Evaluation 11 | ``` 12 | python gp.py --mode=evaluate_all_metrics --expid=default-tnpa --model=tnpa 13 | ``` 14 | Note that you have to specify `{expid}` correctly. The model will load weights from `results/gp/{model}/{expid}` to evaluate. 15 | 16 | ## CelebA Image Completion 17 | --- 18 | 19 | ### Prepare data 20 | Download [img_align_celeba.zip](https://drive.google.com/drive/folders/0B7EVK8r0v71pTUZsaXdaSnZBZzg) and unzip. Download [list_eval_partitions.txt](https://drive.google.com/drive/folders/0B7EVK8r0v71pdjI3dmwtNm5jRkE) and [identity_CelebA.txt](https://drive.google.com/drive/folders/0B7EVK8r0v71pOC0wOVZlQnFfaGs). Place downloaded files in `datasets/celeba` folder. Run `python data/celeba.py` to preprocess the data. 21 | 22 | ### Training 23 | ``` 24 | python celeba.py --mode=train --expid=default-tnpa --model=tnpa 25 | ``` 26 | 27 | ### Evaluation 28 | ``` 29 | python celeba.py --mode=evaluate_all_metrics --expid=default-tnpa --model=tnpa 30 | ``` 31 | If evaluating for the first time, evaluation data will be generated and saved in `evalsets/celeba`. 32 | 33 | ## EMNIST Image Completion 34 | --- 35 | 36 | ### Training 37 | ``` 38 | python emnist.py --mode=train --expid=default-tnpa --model=tnpa 39 | ``` 40 | If training for the first time, EMNIST training data will automatically downloaded and saved in `datasets/emnist`. 41 | 42 | ### Evaluation 43 | ``` 44 | python emnist.py --mode=evaluate_all_metrics --expid=default-tnpa --model=tnpa 45 | ``` 46 | If evaluating for the first time, evaluation data will be generated and saved in `evalsets/emnist`. -------------------------------------------------------------------------------- /bayesian_optimization/bayeso_benchmarks/two_dim_michalewicz.py: -------------------------------------------------------------------------------- 1 | # 2 | # author: Jungtaek Kim (jtkim@postech.ac.kr) 3 | # last updated: February 8, 2021 4 | # 5 | 6 | import numpy as np 7 | 8 | from bayeso_benchmarks.benchmark_base import Function 9 | 10 | 11 | def fun_target(bx, dim_bx): 12 | assert len(bx.shape) == 1 13 | assert bx.shape[0] == dim_bx 14 | 15 | y = 0.0 16 | 17 | for ind in range(0, dim_bx): 18 | y += np.sin(bx[ind]) * np.sin(((ind + 1.0) * bx[ind]**2) / np.pi)**(2.0 * 10.0) 19 | y *= -1.0 20 | 21 | return y 22 | 23 | 24 | class Michalewicz(Function): 25 | def __init__(self): 26 | dim_bx = 2 27 | bounds = np.array([ 28 | [0.0, np.pi], 29 | [0.0, np.pi], 30 | ]) 31 | global_minimizers = np.array([ 32 | [2.20279089, 1.57063923], 33 | ]) 34 | global_minimum = -1.801302197 35 | function = lambda bx: fun_target(bx, dim_bx) 36 | 37 | Function.__init__(self, dim_bx, bounds, global_minimizers, global_minimum, function) 38 | 39 | 40 | def translated_fun_target(bx, dim_bx): 41 | assert len(bx.shape) == 1 42 | assert bx.shape[0] == dim_bx 43 | 44 | y = 0.0 45 | 46 | for ind in range(0, dim_bx): 47 | y += np.sin(bx[ind] + 1.5) * np.sin(((ind + 1.0) * (bx[ind] + 1.5)**2) / np.pi)**(2.0 * 10.0) 48 | y *= -1.0 49 | 50 | return y 51 | 52 | 53 | class TranslatedMichalewicz(Function): 54 | def __init__(self): 55 | dim_bx = 2 56 | bounds = np.array([ 57 | [0.0 - 1.5, np.pi - 1.5], 58 | [0.0 - 1.5, np.pi - 1.5], 59 | ]) 60 | global_minimizers = np.array([ 61 | [2.20279089 - 1.5, 1.57063923 - 1.5], 62 | ]) 63 | global_minimum = -1.801302197 64 | function = lambda bx: translated_fun_target(bx, dim_bx) 65 | 66 | Function.__init__(self, dim_bx, bounds, global_minimizers, global_minimum, function) 67 | -------------------------------------------------------------------------------- /bayesian_optimization/models/tnpd.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.distributions.normal import Normal 4 | from attrdict import AttrDict 5 | 6 | from models.tnp import TNP 7 | 8 | 9 | class TNPD(TNP): 10 | def __init__( 11 | self, 12 | dim_x, 13 | dim_y, 14 | d_model, 15 | emb_depth, 16 | dim_feedforward, 17 | nhead, 18 | dropout, 19 | num_layers, 20 | ): 21 | super(TNPD, self).__init__( 22 | dim_x, 23 | dim_y, 24 | d_model, 25 | emb_depth, 26 | dim_feedforward, 27 | nhead, 28 | dropout, 29 | num_layers, 30 | ) 31 | 32 | self.predictor = nn.Sequential( 33 | nn.Linear(d_model, dim_feedforward), 34 | nn.ReLU(), 35 | nn.Linear(dim_feedforward, dim_y*2) 36 | ) 37 | 38 | def forward(self, batch, reduce_ll=True): 39 | out_encoder = self.encode(batch, autoreg=False) 40 | out = self.predictor(out_encoder) 41 | mean, std = torch.chunk(out, 2, dim=-1) 42 | 43 | std = torch.exp(std) 44 | pred_dist = Normal(mean, std) 45 | loss = - pred_dist.log_prob(batch.yt).sum(-1).mean() 46 | 47 | outs = AttrDict() 48 | outs.loss = loss 49 | return outs 50 | 51 | def predict(self, xc, yc, xt, num_samples=None): 52 | if xc.shape[-3] != xt.shape[-3]: 53 | xt = xt.transpose(-3, -2) 54 | 55 | batch = AttrDict() 56 | batch.xc = xc 57 | batch.yc = yc 58 | batch.xt = xt 59 | batch.yt = torch.zeros((xt.shape[0], xt.shape[1], yc.shape[2]), device='cuda') 60 | 61 | out_encoder = self.encode(batch, autoreg=False) 62 | out = self.predictor(out_encoder) 63 | mean, std = torch.chunk(out, 2, dim=-1) 64 | std = torch.exp(std) 65 | 66 | return Normal(mean, std) -------------------------------------------------------------------------------- /regression/models/cnp.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from attrdict import AttrDict 4 | 5 | from models.modules import PoolingEncoder, Decoder 6 | 7 | class CNP(nn.Module): 8 | def __init__(self, 9 | dim_x=1, 10 | dim_y=1, 11 | dim_hid=128, 12 | enc_pre_depth=4, 13 | enc_post_depth=2, 14 | dec_depth=3): 15 | 16 | super().__init__() 17 | 18 | self.enc1 = PoolingEncoder( 19 | dim_x=dim_x, 20 | dim_y=dim_y, 21 | dim_hid=dim_hid, 22 | pre_depth=enc_pre_depth, 23 | post_depth=enc_post_depth) 24 | 25 | self.enc2 = PoolingEncoder( 26 | dim_x=dim_x, 27 | dim_y=dim_y, 28 | dim_hid=dim_hid, 29 | pre_depth=enc_pre_depth, 30 | post_depth=enc_post_depth) 31 | 32 | self.dec = Decoder( 33 | dim_x=dim_x, 34 | dim_y=dim_y, 35 | dim_enc=2*dim_hid, 36 | dim_hid=dim_hid, 37 | depth=dec_depth) 38 | 39 | def predict(self, xc, yc, xt, num_samples=None): 40 | encoded = torch.cat([self.enc1(xc, yc), self.enc2(xc, yc)], -1) 41 | encoded = torch.stack([encoded]*xt.shape[-2], -2) 42 | return self.dec(encoded, xt) 43 | 44 | def forward(self, batch, num_samples=None, reduce_ll=True): 45 | outs = AttrDict() 46 | py = self.predict(batch.xc, batch.yc, batch.x) 47 | ll = py.log_prob(batch.y).sum(-1) 48 | 49 | if self.training: 50 | outs.loss = -ll.mean() 51 | else: 52 | num_ctx = batch.xc.shape[-2] 53 | if reduce_ll: 54 | outs.ctx_ll = ll[...,:num_ctx].mean() 55 | outs.tar_ll = ll[...,num_ctx:].mean() 56 | else: 57 | outs.ctx_ll = ll[...,:num_ctx] 58 | outs.tar_ll = ll[...,num_ctx:] 59 | 60 | return outs 61 | -------------------------------------------------------------------------------- /regression/models/attention.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import math 5 | 6 | class MultiHeadAttn(nn.Module): 7 | def __init__(self, dim_q, dim_k, dim_v, dim_out, num_heads=8): 8 | super().__init__() 9 | self.num_heads = num_heads 10 | self.dim_out = dim_out 11 | self.fc_q = nn.Linear(dim_q, dim_out, bias=False) 12 | self.fc_k = nn.Linear(dim_k, dim_out, bias=False) 13 | self.fc_v = nn.Linear(dim_v, dim_out, bias=False) 14 | self.fc_out = nn.Linear(dim_out, dim_out) 15 | self.ln1 = nn.LayerNorm(dim_out) 16 | self.ln2 = nn.LayerNorm(dim_out) 17 | 18 | def scatter(self, x): 19 | return torch.cat(x.chunk(self.num_heads, -1), -3) 20 | 21 | def gather(self, x): 22 | return torch.cat(x.chunk(self.num_heads, -3), -1) 23 | 24 | def attend(self, q, k, v, mask=None): 25 | q_, k_, v_ = [self.scatter(x) for x in [q, k, v]] 26 | A_logits = q_ @ k_.transpose(-2, -1) / math.sqrt(self.dim_out) 27 | if mask is not None: 28 | mask = mask.bool().to(q.device) 29 | mask = torch.stack([mask]*q.shape[-2], -2) 30 | mask = torch.cat([mask]*self.num_heads, -3) 31 | A = torch.softmax(A_logits.masked_fill(mask, -float('inf')), -1) 32 | A = A.masked_fill(torch.isnan(A), 0.0) 33 | else: 34 | A = torch.softmax(A_logits, -1) 35 | return self.gather(A @ v_) 36 | 37 | def forward(self, q, k, v, mask=None): 38 | q, k, v = self.fc_q(q), self.fc_k(k), self.fc_v(v) 39 | out = self.ln1(q + self.attend(q, k, v, mask=mask)) 40 | out = self.ln2(out + F.relu(self.fc_out(out))) 41 | return out 42 | 43 | class SelfAttn(MultiHeadAttn): 44 | def __init__(self, dim_in, dim_out, num_heads=8): 45 | super().__init__(dim_in, dim_in, dim_in, dim_out, num_heads) 46 | 47 | def forward(self, x, mask=None): 48 | return super().forward(x, x, x, mask=mask) 49 | -------------------------------------------------------------------------------- /contextual_bandits/models/attention.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import math 5 | 6 | class MultiHeadAttn(nn.Module): 7 | def __init__(self, dim_q, dim_k, dim_v, dim_out, num_heads=8): 8 | super().__init__() 9 | self.num_heads = num_heads 10 | self.dim_out = dim_out 11 | self.fc_q = nn.Linear(dim_q, dim_out, bias=False) 12 | self.fc_k = nn.Linear(dim_k, dim_out, bias=False) 13 | self.fc_v = nn.Linear(dim_v, dim_out, bias=False) 14 | self.fc_out = nn.Linear(dim_out, dim_out) 15 | self.ln1 = nn.LayerNorm(dim_out) 16 | self.ln2 = nn.LayerNorm(dim_out) 17 | 18 | def scatter(self, x): 19 | return torch.cat(x.chunk(self.num_heads, -1), -3) 20 | 21 | def gather(self, x): 22 | return torch.cat(x.chunk(self.num_heads, -3), -1) 23 | 24 | def attend(self, q, k, v, mask=None): 25 | q_, k_, v_ = [self.scatter(x) for x in [q, k, v]] 26 | A_logits = q_ @ k_.transpose(-2, -1) / math.sqrt(self.dim_out) 27 | if mask is not None: 28 | mask = mask.bool().to(q.device) 29 | mask = torch.stack([mask]*q.shape[-2], -2) 30 | mask = torch.cat([mask]*self.num_heads, -3) 31 | A = torch.softmax(A_logits.masked_fill(mask, -float('inf')), -1) 32 | A = A.masked_fill(torch.isnan(A), 0.0) 33 | else: 34 | A = torch.softmax(A_logits, -1) 35 | return self.gather(A @ v_) 36 | 37 | def forward(self, q, k, v, mask=None): 38 | q, k, v = self.fc_q(q), self.fc_k(k), self.fc_v(v) 39 | out = self.ln1(q + self.attend(q, k, v, mask=mask)) 40 | out = self.ln2(out + F.relu(self.fc_out(out))) 41 | return out 42 | 43 | class SelfAttn(MultiHeadAttn): 44 | def __init__(self, dim_in, dim_out, num_heads=8): 45 | super().__init__(dim_in, dim_in, dim_in, dim_out, num_heads) 46 | 47 | def forward(self, x, mask=None): 48 | return super().forward(x, x, x, mask=mask) 49 | -------------------------------------------------------------------------------- /contextual_bandits/runner/args.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | 4 | def get_args(): 5 | parser = argparse.ArgumentParser() 6 | 7 | # Experiment 8 | parser.add_argument('--expid', type=str, default=None) 9 | parser.add_argument('--resume', type=str, default=None) 10 | parser.add_argument('--device', type=str, default='cuda') # 'cpu' to use cpu 11 | 12 | # wheel 13 | parser.add_argument("--cmab_data", choices=["wheel"], default="wheel") 14 | parser.add_argument("--cmab_wheel_delta", type=float, default=0.5) 15 | parser.add_argument("--cmab_mode", choices=["train", "eval", "plot", "evalplot"], default="train") 16 | parser.add_argument('--cmab_num_bs', type=int, default=10) 17 | parser.add_argument("--cmab_train_update_freq", type=int, default=1) 18 | parser.add_argument("--cmab_train_num_batches", type=int, default=1) 19 | parser.add_argument("--cmab_train_batch_size", type=int, default=8) 20 | parser.add_argument("--cmab_train_seed", type=int, default=0) 21 | parser.add_argument("--cmab_train_reward", type=str, default="all") 22 | parser.add_argument("--cmab_eval_method", type=str, default="ucb") 23 | parser.add_argument("--cmab_eval_num_contexts", type=int, default=2000) 24 | parser.add_argument("--cmab_eval_seed_start", type=int, default=0) 25 | parser.add_argument("--cmab_eval_seed_end", type=int, default=49) 26 | parser.add_argument("--cmab_plot_seed_start", type=int, default=0) 27 | parser.add_argument("--cmab_plot_seed_end", type=int, default=49) 28 | 29 | # Model 30 | parser.add_argument('--model', type=str, default="tnpa") 31 | 32 | # Training 33 | parser.add_argument('--lr', type=float, default=5e-4) 34 | parser.add_argument('--num_epochs', type=int, default=100000) 35 | parser.add_argument('--print_freq', type=int, default=200) 36 | parser.add_argument('--eval_freq', type=int, default=5000) 37 | parser.add_argument('--save_freq', type=int, default=1000) 38 | 39 | args = parser.parse_args() 40 | 41 | return args -------------------------------------------------------------------------------- /bayesian_optimization/bayeso_benchmarks/one_dim_step.py: -------------------------------------------------------------------------------- 1 | # 2 | # author: Jungtaek Kim (jtkim@postech.ac.kr) 3 | # last updated: February 8, 2021 4 | # 5 | 6 | import numpy as np 7 | 8 | from bayeso_benchmarks.benchmark_base import Function 9 | 10 | 11 | def fun_target(bx, dim_bx, steps, step_values): 12 | assert len(bx.shape) == 1 13 | assert bx.shape[0] == dim_bx 14 | assert isinstance(steps, list) 15 | assert isinstance(step_values, list) 16 | assert len(steps) == len(step_values) + 1 17 | assert isinstance(steps[0], float) 18 | assert isinstance(step_values[0], float) 19 | 20 | y = None 21 | for ind_step in range(0, len(steps) - 1): 22 | if ind_step < (len(steps) - 2) and steps[ind_step] <= bx[0] and bx[0] < steps[ind_step+1]: 23 | y = step_values[ind_step] 24 | break 25 | elif ind_step == (len(steps) - 2) and steps[ind_step] <= bx[0] and bx[0] <= steps[ind_step+1]: 26 | y = step_values[ind_step] 27 | break 28 | 29 | if y is None: 30 | raise ValueError('Conditions for steps') 31 | return y 32 | 33 | 34 | class Step(Function): 35 | def __init__(self, 36 | steps=[-10., -5., 0., 5., 10.], 37 | step_values=[-2., 0., 1., -1.], 38 | ): 39 | assert isinstance(steps, list) 40 | assert isinstance(step_values, list) 41 | assert len(steps) == len(step_values) + 1 42 | assert isinstance(steps[0], float) 43 | assert isinstance(step_values[0], float) 44 | assert np.all(np.sort(steps) == np.asarray(steps)) 45 | 46 | dim_bx = 1 47 | bounds = np.array([ 48 | [np.min(steps), np.max(steps)], 49 | ]) 50 | global_minimizers = np.array([ 51 | [steps[np.argmin(step_values)]], 52 | ]) 53 | global_minimum = np.min(step_values) 54 | function = lambda bx: fun_target(bx, dim_bx, steps, step_values) 55 | 56 | Function.__init__(self, dim_bx, bounds, global_minimizers, global_minimum, function) 57 | -------------------------------------------------------------------------------- /bayesian_optimization/models/attention.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import math 5 | 6 | class MultiHeadAttn(nn.Module): 7 | def __init__(self, dim_q, dim_k, dim_v, dim_out, num_heads=8): 8 | super().__init__() 9 | self.num_heads = num_heads 10 | self.dim_out = dim_out 11 | self.fc_q = nn.Linear(dim_q, dim_out, bias=False) 12 | self.fc_k = nn.Linear(dim_k, dim_out, bias=False) 13 | self.fc_v = nn.Linear(dim_v, dim_out, bias=False) 14 | self.fc_out = nn.Linear(dim_out, dim_out) 15 | self.ln1 = nn.LayerNorm(dim_out) 16 | self.ln2 = nn.LayerNorm(dim_out) 17 | 18 | def scatter(self, x): 19 | return torch.cat(x.chunk(self.num_heads, -1), -3) 20 | 21 | def gather(self, x): 22 | return torch.cat(x.chunk(self.num_heads, -3), -1) 23 | 24 | def attend(self, q, k, v, mask=None): 25 | q, k, v = [self.scatter(x) for x in [q, k, v]] 26 | A_logits = q @ k.transpose(-2, -1) / math.sqrt(self.dim_out) 27 | if mask is not None: 28 | mask = mask.bool().to(q.device) 29 | mask = torch.stack([mask] * q.shape[-2], -2) 30 | mask = torch.cat([mask] * self.num_heads, -3) 31 | A = torch.softmax(A_logits.masked_fill(mask, -float('inf')), -1) 32 | A = A.masked_fill(torch.isnan(A), 0.0) 33 | else: 34 | A = torch.softmax(A_logits, -1) 35 | return self.gather(A @ v) 36 | 37 | def forward(self, q, k, v, mask=None): 38 | q, k, v = self.fc_q(q), self.fc_k(k), self.fc_v(v) 39 | out = self.ln1(q + self.attend(q, k, v, mask=mask)) 40 | out = self.ln2(out + F.relu(self.fc_out(out))) 41 | return out 42 | 43 | 44 | class SelfAttn(MultiHeadAttn): 45 | def __init__(self, dim_in, dim_out, num_heads=8): 46 | super().__init__(dim_in, dim_in, dim_in, dim_out, num_heads) 47 | 48 | def forward(self, x, mask=None, **kwargs): 49 | return super().forward(x, x, x, mask=mask) 50 | -------------------------------------------------------------------------------- /bayesian_optimization/models/tnpa.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.distributions.normal import Normal 4 | from attrdict import AttrDict 5 | 6 | from utils.misc import stack 7 | from models.tnp import TNP 8 | 9 | 10 | class TNPA(TNP): 11 | def __init__( 12 | self, 13 | dim_x, 14 | dim_y, 15 | d_model, 16 | emb_depth, 17 | dim_feedforward, 18 | nhead, 19 | dropout, 20 | num_layers, 21 | ): 22 | super(TNPA, self).__init__( 23 | dim_x, 24 | dim_y, 25 | d_model, 26 | emb_depth, 27 | dim_feedforward, 28 | nhead, 29 | dropout, 30 | num_layers, 31 | ) 32 | 33 | self.predictor = nn.Sequential( 34 | nn.Linear(d_model, dim_feedforward), 35 | nn.ReLU(), 36 | nn.Linear(dim_feedforward, dim_y*2) 37 | ) 38 | 39 | 40 | def forward(self, batch, reduce_ll=True): 41 | out_encoder = self.encode(batch, autoreg=True) 42 | out = self.predictor(out_encoder) 43 | mean, std = torch.chunk(out, 2, dim=-1) 44 | std = torch.exp(std) 45 | 46 | pred_dist = Normal(mean, std) 47 | loss = - pred_dist.log_prob(batch.yt).sum(-1).mean() 48 | 49 | outs = AttrDict() 50 | outs.loss = loss 51 | return outs 52 | 53 | def predict(self, xc, yc, xt, num_samples=None): 54 | if xc.shape[-3] != xt.shape[-3]: 55 | xt = xt.transpose(-3, -2) 56 | 57 | batch = AttrDict() 58 | batch.xc = xc 59 | batch.yc = yc 60 | batch.xt = xt 61 | batch.yt = torch.zeros((xt.shape[0], xt.shape[1], yc.shape[2]), device='cuda') 62 | 63 | # in evaluation tnpa = tnpd because we only have 1 target point to predict 64 | out_encoder = self.encode(batch, autoreg=False) 65 | out = self.predictor(out_encoder) 66 | mean, std = torch.chunk(out, 2, dim=-1) 67 | std = torch.exp(std) 68 | 69 | return Normal(mean, std) -------------------------------------------------------------------------------- /bayesian_optimization/bayeso_benchmarks/inf_dim_ackley.py: -------------------------------------------------------------------------------- 1 | # 2 | # author: Jungtaek Kim (jtkim@postech.ac.kr) 3 | # last updated: February 8, 2021 4 | # 5 | 6 | import numpy as np 7 | 8 | from bayeso_benchmarks.benchmark_base import Function 9 | 10 | 11 | def fun_target( 12 | bx, 13 | dim_bx, 14 | a=20.0, 15 | b=0.2, 16 | c=2.0 * np.pi 17 | ): 18 | assert len(bx.shape) == 1 19 | assert bx.shape[0] == dim_bx 20 | assert isinstance(a, float) 21 | assert isinstance(b, float) 22 | assert isinstance(c, float) 23 | 24 | y = -a * np.exp(-b * np.linalg.norm(bx, ord=2, axis=0) * np.sqrt(1.0 / dim_bx)) - np.exp( 25 | 1.0 / dim_bx * np.sum(np.cos(c * bx), axis=0)) + a + np.exp(1.0) 26 | return y 27 | 28 | 29 | class Ackley(Function): 30 | def __init__(self, dim_problem): 31 | assert isinstance(dim_problem, int) 32 | 33 | dim_bx = np.inf 34 | bounds = np.array([ 35 | [-32.768, 32.768], 36 | ]) 37 | global_minimizers = np.array([ 38 | [0.0], 39 | ]) 40 | global_minimum = 0.0 41 | dim_problem = dim_problem 42 | 43 | function = lambda bx: fun_target(bx, dim_problem) 44 | 45 | Function.__init__(self, dim_bx, bounds, global_minimizers, global_minimum, function, dim_problem=dim_problem) 46 | 47 | 48 | if __name__ == '__main__': 49 | import matplotlib.pyplot as plt 50 | 51 | func = Ackley(dim_problem=2) 52 | lb, ub = func.get_bounds().transpose() 53 | lb, ub = np.where(lb < -2, -2, lb), np.where(ub > 2, 2, ub) 54 | 55 | x1 = np.linspace(lb[0], ub[0], 100) 56 | x2 = np.linspace(lb[1], ub[1], 100) 57 | x1, x2 = np.meshgrid(x1, x2) 58 | pts = np.column_stack((x1.ravel(), x2.ravel())) 59 | y = func.output(pts) 60 | 61 | contour = plt.contourf(x1, x2, y.reshape(x1.shape), 50, cmap='RdGy') 62 | # contour = plt.contourf(x1, x2, y.reshape(x1.shape), 50) 63 | plt.imshow(y.reshape(x1.shape), extent=[lb[0], ub[0], lb[1], ub[1]], origin='lower', 64 | cmap='RdGy') 65 | plt.colorbar(contour) 66 | plt.show() 67 | -------------------------------------------------------------------------------- /regression/models/canp.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from attrdict import AttrDict 4 | 5 | from models.modules import CrossAttnEncoder, Decoder, PoolingEncoder 6 | 7 | class CANP(nn.Module): 8 | def __init__(self, 9 | dim_x=1, 10 | dim_y=1, 11 | dim_hid=128, 12 | enc_v_depth=4, 13 | enc_qk_depth=2, 14 | enc_pre_depth=4, 15 | enc_post_depth=2, 16 | dec_depth=3): 17 | 18 | super().__init__() 19 | 20 | self.enc1 = CrossAttnEncoder( 21 | dim_x=dim_x, 22 | dim_y=dim_y, 23 | dim_hid=dim_hid, 24 | v_depth=enc_v_depth, 25 | qk_depth=enc_qk_depth) 26 | 27 | self.enc2 = PoolingEncoder( 28 | dim_x=dim_x, 29 | dim_y=dim_y, 30 | dim_hid=dim_hid, 31 | self_attn=True, 32 | pre_depth=enc_pre_depth, 33 | post_depth=enc_post_depth) 34 | 35 | self.dec = Decoder( 36 | dim_x=dim_x, 37 | dim_y=dim_y, 38 | dim_enc=2*dim_hid, 39 | dim_hid=dim_hid, 40 | depth=dec_depth) 41 | 42 | def predict(self, xc, yc, xt, num_samples=None): 43 | theta1 = self.enc1(xc, yc, xt) 44 | theta2 = self.enc2(xc, yc) 45 | encoded = torch.cat([theta1, 46 | torch.stack([theta2]*xt.shape[-2], -2)], -1) 47 | return self.dec(encoded, xt) 48 | 49 | def forward(self, batch, num_samples=None, reduce_ll=True): 50 | outs = AttrDict() 51 | py = self.predict(batch.xc, batch.yc, batch.x) 52 | ll = py.log_prob(batch.y).sum(-1) 53 | 54 | if self.training: 55 | outs.loss = -ll.mean() 56 | else: 57 | num_ctx = batch.xc.shape[-2] 58 | if reduce_ll: 59 | outs.ctx_ll = ll[...,:num_ctx].mean() 60 | outs.tar_ll = ll[...,num_ctx:].mean() 61 | else: 62 | outs.ctx_ll = ll[...,:num_ctx] 63 | outs.tar_ll = ll[...,num_ctx:] 64 | 65 | return outs 66 | -------------------------------------------------------------------------------- /contextual_bandits/models/tnpd.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.distributions.normal import Normal 4 | from attrdict import AttrDict 5 | 6 | from models.tnp import TNP 7 | 8 | 9 | class TNPD(TNP): 10 | def __init__( 11 | self, 12 | dim_x, 13 | dim_y, 14 | d_model, 15 | emb_depth, 16 | dim_feedforward, 17 | nhead, 18 | dropout, 19 | num_layers, 20 | drop_y=0.5 21 | ): 22 | super(TNPD, self).__init__( 23 | dim_x, 24 | dim_y, 25 | d_model, 26 | emb_depth, 27 | dim_feedforward, 28 | nhead, 29 | dropout, 30 | num_layers, 31 | drop_y 32 | ) 33 | 34 | self.predictor = nn.Sequential( 35 | nn.Linear(d_model, dim_feedforward), 36 | nn.ReLU(), 37 | nn.Linear(dim_feedforward, dim_y*2) 38 | ) 39 | 40 | def forward(self, batch, reduce_ll=True): 41 | out_encoder = self.encode(batch, autoreg=False, drop_ctx=True) 42 | out = self.predictor(out_encoder) 43 | mean, std = torch.chunk(out, 2, dim=-1) 44 | 45 | std = torch.exp(std) 46 | pred_dist = Normal(mean, std) 47 | loss = - pred_dist.log_prob(batch.y).sum(-1).mean() 48 | 49 | outs = AttrDict() 50 | outs.loss = loss 51 | return outs 52 | 53 | def predict(self, xc, yc, xt): 54 | batch = AttrDict() 55 | batch.xc = xc 56 | batch.yc = yc 57 | batch.xt = xt 58 | batch.yt = torch.zeros((xt.shape[0], xt.shape[1], yc.shape[2]), device='cuda') 59 | 60 | num_context = xc.shape[1] 61 | 62 | out_encoder = self.encode(batch, autoreg=False, drop_ctx=False) 63 | out = self.predictor(out_encoder) 64 | mean, std = torch.chunk(out, 2, dim=-1) 65 | std = torch.exp(std) 66 | mean, std = mean[:, num_context:, :], std[:, num_context:, :] 67 | 68 | outs = AttrDict() 69 | outs.loc = mean.unsqueeze(0) 70 | outs.scale = std.unsqueeze(0) 71 | outs.ys = Normal(outs.loc, outs.scale) 72 | 73 | return outs -------------------------------------------------------------------------------- /contextual_bandits/models/canp.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from attrdict import AttrDict 4 | 5 | from models.modules import CrossAttnEncoder, Decoder, PoolingEncoder 6 | 7 | class CANP(nn.Module): 8 | def __init__(self, 9 | dim_x=1, 10 | dim_y=1, 11 | dim_hid=128, 12 | enc_v_depth=4, 13 | enc_qk_depth=2, 14 | enc_pre_depth=4, 15 | enc_post_depth=2, 16 | dec_depth=3): 17 | 18 | super().__init__() 19 | 20 | self.enc1 = CrossAttnEncoder( 21 | dim_x=dim_x, 22 | dim_y=dim_y, 23 | dim_hid=dim_hid, 24 | v_depth=enc_v_depth, 25 | qk_depth=enc_qk_depth) 26 | 27 | self.enc2 = PoolingEncoder( 28 | dim_x=dim_x, 29 | dim_y=dim_y, 30 | dim_hid=dim_hid, 31 | self_attn=True, 32 | pre_depth=enc_pre_depth, 33 | post_depth=enc_post_depth) 34 | 35 | self.dec = Decoder( 36 | dim_x=dim_x, 37 | dim_y=dim_y, 38 | dim_enc=2*dim_hid, 39 | dim_hid=dim_hid, 40 | depth=dec_depth) 41 | 42 | def predict(self, xc, yc, xt, num_samples=None): 43 | theta1 = self.enc1(xc, yc, xt) # [B,Nt,Eh] 44 | theta2 = self.enc2(xc, yc) # [B,Eh] 45 | encoded = torch.cat([theta1, 46 | torch.stack([theta2]*xt.shape[-2], -2)], -1) # [B,Nt,2Eh] 47 | return self.dec(encoded, xt) 48 | 49 | def forward(self, batch, num_samples=None, reduce_ll=True): 50 | outs = AttrDict() 51 | py = self.predict(batch.xc, batch.yc, batch.x) 52 | ll = py.log_prob(batch.y).sum(-1) 53 | 54 | if self.training: 55 | outs.loss = - ll.mean() 56 | else: 57 | num_ctx = batch.xc.shape[-2] 58 | if reduce_ll: 59 | outs.ctx_loss = ll[...,:num_ctx].mean() 60 | outs.tar_loss = ll[...,num_ctx:].mean() 61 | else: 62 | outs.ctx_loss = ll[...,:num_ctx] 63 | outs.tar_loss = ll[...,num_ctx:] 64 | 65 | return outs 66 | -------------------------------------------------------------------------------- /regression/models/tnpd.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from torch.distributions.normal import Normal 5 | from attrdict import AttrDict 6 | 7 | from models.tnp import TNP 8 | 9 | 10 | class TNPD(TNP): 11 | def __init__( 12 | self, 13 | dim_x, 14 | dim_y, 15 | d_model, 16 | emb_depth, 17 | dim_feedforward, 18 | nhead, 19 | dropout, 20 | num_layers, 21 | bound_std=False 22 | ): 23 | super(TNPD, self).__init__( 24 | dim_x, 25 | dim_y, 26 | d_model, 27 | emb_depth, 28 | dim_feedforward, 29 | nhead, 30 | dropout, 31 | num_layers, 32 | bound_std 33 | ) 34 | 35 | self.predictor = nn.Sequential( 36 | nn.Linear(d_model, dim_feedforward), 37 | nn.ReLU(), 38 | nn.Linear(dim_feedforward, dim_y*2) 39 | ) 40 | 41 | def forward(self, batch, reduce_ll=True): 42 | z_target = self.encode(batch, autoreg=False) 43 | out = self.predictor(z_target) 44 | mean, std = torch.chunk(out, 2, dim=-1) 45 | if self.bound_std: 46 | std = 0.05 + 0.95 * F.softplus(std) 47 | else: 48 | std = torch.exp(std) 49 | 50 | pred_tar = Normal(mean, std) 51 | 52 | outs = AttrDict() 53 | if reduce_ll: 54 | outs.tar_ll = pred_tar.log_prob(batch.yt).sum(-1).mean() 55 | else: 56 | outs.tar_ll = pred_tar.log_prob(batch.yt).sum(-1) 57 | outs.loss = - (outs.tar_ll) 58 | 59 | return outs 60 | 61 | def predict(self, xc, yc, xt): 62 | batch = AttrDict() 63 | batch.xc = xc 64 | batch.yc = yc 65 | batch.xt = xt 66 | batch.yt = torch.zeros((xt.shape[0], xt.shape[1], yc.shape[2]), device='cuda') 67 | 68 | z_target = self.encode(batch, autoreg=False) 69 | out = self.predictor(z_target) 70 | mean, std = torch.chunk(out, 2, dim=-1) 71 | if self.bound_std: 72 | std = 0.05 + 0.95 * F.softplus(std) 73 | else: 74 | std = torch.exp(std) 75 | 76 | return Normal(mean, std) -------------------------------------------------------------------------------- /contextual_bandits/models/cnp.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from attrdict import AttrDict 5 | from models.modules import PoolingEncoder, Decoder 6 | 7 | 8 | class CNP(nn.Module): 9 | def __init__(self, 10 | dim_x=1, 11 | dim_y=1, 12 | dim_hid=128, 13 | enc_pre_depth=4, 14 | enc_post_depth=2, 15 | dec_depth=3): 16 | 17 | super().__init__() 18 | 19 | self.enc1 = PoolingEncoder( 20 | dim_x=dim_x, 21 | dim_y=dim_y, 22 | dim_hid=dim_hid, 23 | pre_depth=enc_pre_depth, 24 | post_depth=enc_post_depth) 25 | 26 | self.enc2 = PoolingEncoder( 27 | dim_x=dim_x, 28 | dim_y=dim_y, 29 | dim_hid=dim_hid, 30 | pre_depth=enc_pre_depth, 31 | post_depth=enc_post_depth) 32 | 33 | self.dec = Decoder( 34 | dim_x=dim_x, 35 | dim_y=dim_y, 36 | dim_enc=2*dim_hid, 37 | dim_hid=dim_hid, 38 | depth=dec_depth) 39 | 40 | def predict(self, xc, yc, xt, num_samples=None): 41 | encoded = torch.cat([self.enc1(xc, yc), self.enc2(xc, yc)], -1) # [B,2Eh] 42 | encoded = torch.stack([encoded]*xt.shape[-2], -2) # [B,N,2Eh] 43 | return self.dec(encoded, xt) # Normal([B,N,1]) 44 | 45 | def forward(self, batch, num_samples=None, reduce_ll=True): 46 | outs = AttrDict() 47 | py = self.predict(batch.xc, batch.yc, batch.x) # Normal([B,N,1]) 48 | ll = py.log_prob(batch.y).sum(-1) # [B,N] 49 | 50 | if self.training: 51 | outs.loss = -ll.mean() 52 | else: 53 | num_ctx = batch.xc.shape[-2] # Nc 54 | if reduce_ll: 55 | outs.ctx_loss = ll[...,:num_ctx].mean() # [1,] 56 | outs.tar_loss = ll[...,num_ctx:].mean() # [1,] 57 | else: 58 | outs.ctx_loss = ll[...,:num_ctx] # [B,Nc] 59 | outs.tar_loss = ll[...,num_ctx:] # [B,Nt] 60 | 61 | return outs 62 | # {"loss": [1,]} while training 63 | # {"ctx_ll": [1,], "tar_ll": [1,]} while evaluating (if reduce_ll = True) -------------------------------------------------------------------------------- /bayesian_optimization/models/canp.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from attrdict import AttrDict 4 | 5 | from models.modules import CrossAttnEncoder, Decoder, PoolingEncoder 6 | 7 | class CANP(nn.Module): 8 | def __init__(self, 9 | dim_x=1, 10 | dim_y=1, 11 | dim_hid=128, 12 | enc_v_depth=4, 13 | enc_qk_depth=2, 14 | enc_pre_depth=4, 15 | enc_post_depth=2, 16 | dec_depth=3): 17 | 18 | super().__init__() 19 | 20 | self.enc1 = CrossAttnEncoder( 21 | dim_x=dim_x, 22 | dim_y=dim_y, 23 | dim_hid=dim_hid, 24 | v_depth=enc_v_depth, 25 | qk_depth=enc_qk_depth) 26 | 27 | self.enc2 = PoolingEncoder( 28 | dim_x=dim_x, 29 | dim_y=dim_y, 30 | dim_hid=dim_hid, 31 | self_attn=True, 32 | pre_depth=enc_pre_depth, 33 | post_depth=enc_post_depth) 34 | 35 | self.dec = Decoder( 36 | dim_x=dim_x, 37 | dim_y=dim_y, 38 | dim_enc=2*dim_hid, 39 | dim_hid=dim_hid, 40 | depth=dec_depth) 41 | 42 | def predict(self, xc, yc, xt, num_samples=None): 43 | if xc.shape[-3] != xt.shape[-3]: 44 | xt = xt.transpose(-3, -2) 45 | theta1 = self.enc1(xc, yc, xt) # [B,Nt,Eh] 46 | theta2 = self.enc2(xc, yc) # [B,Eh] 47 | encoded = torch.cat([theta1, 48 | torch.stack([theta2]*xt.shape[-2], -2)], -1) # [B,Nt,2Eh] 49 | return self.dec(encoded, xt) 50 | 51 | def forward(self, batch, num_samples=None, reduce_ll=True): 52 | outs = AttrDict() 53 | py = self.predict(batch.xc, batch.yc, batch.x) 54 | ll = py.log_prob(batch.y).sum(-1) 55 | 56 | if self.training: 57 | outs.loss = - ll.mean() 58 | else: 59 | num_ctx = batch.xc.shape[-2] 60 | if reduce_ll: 61 | outs.ctx_loss = ll[...,:num_ctx].mean() 62 | outs.tar_loss = ll[...,num_ctx:].mean() 63 | else: 64 | outs.ctx_loss = ll[...,:num_ctx] 65 | outs.tar_loss = ll[...,num_ctx:] 66 | 67 | return outs 68 | -------------------------------------------------------------------------------- /bayesian_optimization/models/cnp.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from attrdict import AttrDict 5 | from models.modules import PoolingEncoder, Decoder 6 | 7 | 8 | class CNP(nn.Module): 9 | def __init__(self, 10 | dim_x=1, 11 | dim_y=1, 12 | dim_hid=128, 13 | enc_pre_depth=4, 14 | enc_post_depth=2, 15 | dec_depth=3): 16 | 17 | super().__init__() 18 | 19 | self.enc1 = PoolingEncoder( 20 | dim_x=dim_x, 21 | dim_y=dim_y, 22 | dim_hid=dim_hid, 23 | pre_depth=enc_pre_depth, 24 | post_depth=enc_post_depth) 25 | 26 | self.enc2 = PoolingEncoder( 27 | dim_x=dim_x, 28 | dim_y=dim_y, 29 | dim_hid=dim_hid, 30 | pre_depth=enc_pre_depth, 31 | post_depth=enc_post_depth) 32 | 33 | self.dec = Decoder( 34 | dim_x=dim_x, 35 | dim_y=dim_y, 36 | dim_enc=2*dim_hid, 37 | dim_hid=dim_hid, 38 | depth=dec_depth) 39 | 40 | def predict(self, xc, yc, xt, num_samples=None): 41 | if xc.shape[-3] != xt.shape[-3]: 42 | xt = xt.transpose(-3, -2) 43 | encoded = torch.cat([self.enc1(xc, yc), self.enc2(xc, yc)], -1) # [B,2Eh] 44 | encoded = torch.stack([encoded]*xt.shape[-2], -2) # [B,N,2Eh] 45 | return self.dec(encoded, xt) # Normal([B,N,1]) 46 | 47 | def forward(self, batch, num_samples=None, reduce_ll=True): 48 | outs = AttrDict() 49 | py = self.predict(batch.xc, batch.yc, batch.x) # Normal([B,N,1]) 50 | ll = py.log_prob(batch.y).sum(-1) # [B,N] 51 | 52 | if self.training: 53 | outs.loss = -ll.mean() 54 | else: 55 | num_ctx = batch.xc.shape[-2] # Nc 56 | if reduce_ll: 57 | outs.ctx_loss = ll[...,:num_ctx].mean() # [1,] 58 | outs.tar_loss = ll[...,num_ctx:].mean() # [1,] 59 | else: 60 | outs.ctx_loss = ll[...,:num_ctx] # [B,Nc] 61 | outs.tar_loss = ll[...,num_ctx:] # [B,Nt] 62 | 63 | return outs 64 | # {"loss": [1,]} while training 65 | # {"ctx_ll": [1,], "tar_ll": [1,]} while evaluating (if reduce_ll = True) -------------------------------------------------------------------------------- /contextual_bandits/models/tnpa.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.distributions.normal import Normal 4 | from attrdict import AttrDict 5 | 6 | from models.tnp import TNP 7 | 8 | 9 | class TNPA(TNP): 10 | def __init__( 11 | self, 12 | dim_x, 13 | dim_y, 14 | d_model, 15 | emb_depth, 16 | dim_feedforward, 17 | nhead, 18 | dropout, 19 | num_layers, 20 | drop_y=0.5, 21 | ): 22 | super(TNPA, self).__init__( 23 | dim_x, 24 | dim_y, 25 | d_model, 26 | emb_depth, 27 | dim_feedforward, 28 | nhead, 29 | dropout, 30 | num_layers, 31 | drop_y, 32 | ) 33 | 34 | self.predictor = nn.Sequential( 35 | nn.Linear(d_model, dim_feedforward), 36 | nn.ReLU(), 37 | nn.Linear(dim_feedforward, dim_y*2) 38 | ) 39 | 40 | 41 | def forward(self, batch, reduce_ll=True): 42 | num_ctx, num_all = batch.xc.shape[1], batch.x.shape[1] 43 | 44 | out_encoder = self.encode(batch, autoreg=True, drop_ctx=True) 45 | out_encoder = torch.cat((out_encoder[:, :num_ctx], out_encoder[:, num_all:]), dim=1) 46 | out = self.predictor(out_encoder) 47 | mean, std = torch.chunk(out, 2, dim=-1) 48 | std = torch.exp(std) 49 | 50 | pred_dist = Normal(mean, std) 51 | loss = - pred_dist.log_prob(batch.y).sum(-1).mean() 52 | 53 | outs = AttrDict() 54 | outs.loss = loss 55 | return outs 56 | 57 | def predict(self, xc, yc, xt): 58 | batch = AttrDict() 59 | batch.xc = xc 60 | batch.yc = yc 61 | batch.xt = xt 62 | batch.yt = torch.zeros((xt.shape[0], xt.shape[1], yc.shape[2]), device='cuda') 63 | 64 | num_context = xc.shape[1] 65 | 66 | # in evaluation tnpa = tnpd because we only have 1 target point to predict 67 | out_encoder = self.encode(batch, autoreg=False, drop_ctx=False) 68 | out = self.predictor(out_encoder) 69 | mean, std = torch.chunk(out, 2, dim=-1) 70 | std = torch.exp(std) 71 | mean, std = mean[:, num_context:, :], std[:, num_context:, :] 72 | 73 | outs = AttrDict() 74 | outs.loc = mean.unsqueeze(0) 75 | outs.scale = std.unsqueeze(0) 76 | outs.ys = Normal(outs.loc, outs.scale) 77 | 78 | return outs -------------------------------------------------------------------------------- /regression/data/celeba.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import os.path as osp 3 | 4 | from utils.paths import datasets_path 5 | 6 | class CelebA(object): 7 | def __init__(self, train=True): 8 | self.data, self.targets = torch.load( 9 | osp.join(datasets_path, 'celeba', 10 | 'train.pt' if train else 'eval.pt')) 11 | self.data = self.data.float() / 255.0 12 | 13 | if train: 14 | self.data, self.targets = self.data, self.targets 15 | else: 16 | self.data, self.targets = self.data, self.targets 17 | 18 | def __len__(self): 19 | return len(self.data) 20 | 21 | def __getitem__(self, index): 22 | return self.data[index], self.targets[index] 23 | 24 | if __name__ == '__main__': 25 | import os 26 | import os.path as osp 27 | from PIL import Image 28 | from tqdm import tqdm 29 | import numpy as np 30 | import torch 31 | 32 | # load train/val/test split 33 | splitdict = {} 34 | with open(osp.join(datasets_path, 'celeba', 'list_eval_partition.txt'), 'r') as f: 35 | for line in f: 36 | fn, split = line.split() 37 | splitdict[fn] = int(split) 38 | 39 | # load identities 40 | iddict = {} 41 | with open(osp.join(datasets_path, 'celeba', 'identity_CelebA.txt'), 'r') as f: 42 | for line in f: 43 | fn, label = line.split() 44 | iddict[fn] = int(label) 45 | 46 | train_imgs = [] 47 | train_labels = [] 48 | eval_imgs = [] 49 | eval_labels = [] 50 | path = osp.join(datasets_path, 'celeba', 'img_align_celeba') 51 | imgfilenames = os.listdir(path) 52 | for fn in tqdm(imgfilenames): 53 | 54 | img = Image.open(osp.join(path, fn)).resize((32, 32)) 55 | if splitdict[fn] == 2: 56 | eval_imgs.append(torch.LongTensor(np.array(img).transpose(2, 0, 1))) 57 | eval_labels.append(iddict[fn]) 58 | else: 59 | train_imgs.append(torch.LongTensor(np.array(img).transpose(2, 0, 1))) 60 | train_labels.append(iddict[fn]) 61 | 62 | print(f'{len(train_imgs)} train, {len(eval_imgs)} eval') 63 | 64 | train_imgs = torch.stack(train_imgs) 65 | train_labels = torch.LongTensor(train_labels) 66 | torch.save([train_imgs, train_labels], osp.join(datasets_path, 'celeba', 'train.pt')) 67 | 68 | eval_imgs = torch.stack(eval_imgs) 69 | eval_labels = torch.LongTensor(eval_labels) 70 | torch.save([eval_imgs, eval_labels], osp.join(datasets_path, 'celeba', 'eval.pt')) 71 | -------------------------------------------------------------------------------- /regression/models/tnp.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from models.modules import build_mlp 5 | 6 | 7 | class TNP(nn.Module): 8 | def __init__( 9 | self, 10 | dim_x, 11 | dim_y, 12 | d_model, 13 | emb_depth, 14 | dim_feedforward, 15 | nhead, 16 | dropout, 17 | num_layers, 18 | bound_std 19 | ): 20 | super(TNP, self).__init__() 21 | 22 | self.embedder = build_mlp(dim_x + dim_y, d_model, d_model, emb_depth) 23 | 24 | encoder_layer = nn.TransformerEncoderLayer(d_model, nhead, dim_feedforward, dropout, batch_first=True) 25 | self.encoder = nn.TransformerEncoder(encoder_layer, num_layers) 26 | 27 | self.bound_std = bound_std 28 | 29 | def construct_input(self, batch, autoreg=False): 30 | x_y_ctx = torch.cat((batch.xc, batch.yc), dim=-1) 31 | x_0_tar = torch.cat((batch.xt, torch.zeros_like(batch.yt)), dim=-1) 32 | if not autoreg: 33 | inp = torch.cat((x_y_ctx, x_0_tar), dim=1) 34 | else: 35 | if self.training and self.bound_std: 36 | yt_noise = batch.yt + 0.05 * torch.randn_like(batch.yt) # add noise to the past to smooth the model 37 | x_y_tar = torch.cat((batch.xt, yt_noise), dim=-1) 38 | else: 39 | x_y_tar = torch.cat((batch.xt, batch.yt), dim=-1) 40 | inp = torch.cat((x_y_ctx, x_y_tar, x_0_tar), dim=1) 41 | return inp 42 | 43 | def create_mask(self, batch, autoreg=False): 44 | num_ctx = batch.xc.shape[1] 45 | num_tar = batch.xt.shape[1] 46 | num_all = num_ctx + num_tar 47 | if not autoreg: 48 | mask = torch.zeros(num_all, num_all, device='cuda').fill_(float('-inf')) 49 | mask[:, :num_ctx] = 0.0 50 | else: 51 | mask = torch.zeros((num_all+num_tar, num_all+num_tar), device='cuda').fill_(float('-inf')) 52 | mask[:, :num_ctx] = 0.0 # all points attend to context points 53 | mask[num_ctx:num_all, num_ctx:num_all].triu_(diagonal=1) # each real target point attends to itself and precedding real target points 54 | mask[num_all:, num_ctx:num_all].triu_(diagonal=0) # each fake target point attends to preceeding real target points 55 | 56 | return mask, num_tar 57 | 58 | def encode(self, batch, autoreg=False): 59 | inp = self.construct_input(batch, autoreg) 60 | mask, num_tar = self.create_mask(batch, autoreg) 61 | embeddings = self.embedder(inp) 62 | out = self.encoder(embeddings, mask=mask) 63 | return out[:, -num_tar:] -------------------------------------------------------------------------------- /contextual_bandits/utils/metrics.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from torch.distributions import Normal 4 | from utils.misc import stack 5 | 6 | 7 | def compute_nll(mu, sigma, y, ws=None, eps=1e-3, mask=None): 8 | if mask is None: 9 | mask = torch.ones(y.shape, dtype=torch.float32).to(mu.device) 10 | Ns = mu.size(0) 11 | sigma = sigma + eps 12 | py = Normal(mu, sigma) # [Ns,B,N,Dy] 13 | if y.dim() < 4: 14 | y = torch.stack([y] * Ns, 0) # [Ns,B,N,Dy] 15 | ll = (py.log_prob(y) * mask).sum(-1) # [Ns,B,N] 16 | 17 | if ws is not None: 18 | Nbs = ws.size(2) 19 | ll = torch.stack([ll] * Nbs, 2) # [Ns,B,Nbs,N] 20 | ll = (ll * ws).mean(2) # [Ns,B,N] 21 | 22 | return - ll # [Ns,B,N] 23 | 24 | 25 | def compute_beta_nll(mu, sigma, y, ws=None, beta=0.5, eps=1e-3, mask=None): # mu,sigma : [Ns,B,N,Dy], y: [B,N,Dy] ws: [Ns,B,Nbs,N] 26 | Ns = mu.size(0) 27 | sigma = sigma + eps 28 | y = torch.stack([y] * Ns, dim=0) # [Ns,B,N,Dy] 29 | 30 | if mask is None: 31 | mask = torch.ones(y.shape, dtype=torch.float32).to(y.device) 32 | ll_mu = - ((((y - mu) ** 2) / (2 * sigma ** 2)) * mask).sum(-1) # [Ns,B,N] 33 | ll_sigma = - (torch.log(sigma) * mask).sum(-1) # [Ns,B,N] 34 | 35 | if ws is not None: # [Ns,B,Nbs,N] 36 | Nbs = ws.size(2) 37 | _ll_mu = torch.stack([ll_mu] * Nbs, 2) # [Ns,B,Nbs,N] 38 | _ll_sigma = torch.stack([ll_sigma] * Nbs, 2) # [Ns,B,Nbs,N] 39 | _ll_mu = (_ll_mu * ws).mean(2) # [Ns,B,N] 40 | _ll_sigma = (_ll_sigma * ws).mean(2) # [Ns,B,N] 41 | ll = 2 * beta * _ll_mu + (2 - 2 * beta) * _ll_sigma # [Ns,B,N] 42 | else: 43 | ll = 2 * beta * ll_mu + (2 - 2 * beta) * ll_sigma # [Ns,B,N] 44 | 45 | return - ll, - ll_mu, - ll_sigma # [Ns,B,N] all 46 | 47 | 48 | def compute_l2(y_hat, y, ws=None, mask=None): # pred: [Ns,B,Nbs,N,Dy], y: [B,N,Dy] 49 | Ns = y_hat.size(0) 50 | Nbs = y_hat.size(2) 51 | y = torch.stack([torch.stack([y] * Ns, dim=0)] * Nbs, dim=2) # [Ns,B,Nbs,N,Dy] 52 | 53 | if mask is None: 54 | mask = torch.ones(y.shape, dtype=torch.float32).to(y.device) 55 | else: 56 | mask = stack(mask, Nbs, 2) 57 | l2 = (((y_hat - y) ** 2) * mask).sum(-1).mean(2) # [Ns,B,N] 58 | return l2 # [Ns,B,N] 59 | 60 | 61 | def compute_rmse(mean, y, mask=None): # mean: [Ns,B,N,Dy], y: [B,N,Dy] 62 | if mean.dim() == 4: 63 | Ns = mean.size(0) 64 | y = torch.stack([y] * Ns, dim=0) # [Ns,B,N,Dy] 65 | if mask is None: 66 | mask = torch.ones(y.shape, dtype=torch.float32).to(mean.device) 67 | rmse = ((((mean - y) ** 2) * mask).sum(-1).mean(-1) ** 0.5).mean() 68 | elif mean.dim() == 3: # CNP, CANP 69 | if mask is None: 70 | mask = torch.ones(y.shape, dtype=torch.float32).to(mean.device) 71 | rmse = ((((mean - y) ** 2) * mask).sum(-1).mean(-1) ** 0.5).mean() 72 | return rmse 73 | -------------------------------------------------------------------------------- /regression/data/image.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from attrdict import AttrDict 3 | from torch.distributions import StudentT 4 | 5 | def img_to_task(img, num_ctx=None, 6 | max_num_points=None, target_all=False, t_noise=None): 7 | 8 | B, C, H, W = img.shape 9 | num_pixels = H*W 10 | img = img.view(B, C, -1) 11 | 12 | if t_noise is not None: 13 | if t_noise == -1: 14 | t_noise = 0.09 * torch.rand(img.shape) 15 | img += t_noise * StudentT(2.1).rsample(img.shape) 16 | 17 | batch = AttrDict() 18 | max_num_points = max_num_points or num_pixels 19 | num_ctx = num_ctx or \ 20 | torch.randint(low=3, high=max_num_points-3, size=[1]).item() 21 | num_tar = max_num_points - num_ctx if target_all else \ 22 | torch.randint(low=3, high=max_num_points-num_ctx, size=[1]).item() 23 | num_points = num_ctx + num_tar 24 | idxs = torch.cuda.FloatTensor(B, num_pixels).uniform_().argsort(-1)[...,:num_points].to(img.device) 25 | x1, x2 = idxs//W, idxs%W 26 | batch.x = torch.stack([ 27 | 2*x1.float()/(H-1) - 1, 28 | 2*x2.float()/(W-1) - 1], -1).to(img.device) 29 | batch.y = (torch.gather(img, -1, idxs.unsqueeze(-2).repeat(1, C, 1))\ 30 | .transpose(-2, -1) - 0.5).to(img.device) 31 | 32 | batch.xc = batch.x[:,:num_ctx] 33 | batch.xt = batch.x[:,num_ctx:] 34 | batch.yc = batch.y[:,:num_ctx] 35 | batch.yt = batch.y[:,num_ctx:] 36 | 37 | return batch 38 | 39 | def coord_to_img(x, y, shape): 40 | x = x.cpu() 41 | y = y.cpu() 42 | B = x.shape[0] 43 | C, H, W = shape 44 | 45 | I = torch.zeros(B, 3, H, W) 46 | I[:,0,:,:] = 0.61 47 | I[:,1,:,:] = 0.55 48 | I[:,2,:,:] = 0.71 49 | 50 | x1, x2 = x[...,0], x[...,1] 51 | x1 = ((x1+1)*(H-1)/2).round().long() 52 | x2 = ((x2+1)*(W-1)/2).round().long() 53 | for b in range(B): 54 | for c in range(3): 55 | I[b,c,x1[b],x2[b]] = y[b,:,min(c,C-1)] 56 | 57 | return I 58 | 59 | def task_to_img(xc, yc, xt, yt, shape): 60 | xc = xc.cpu() 61 | yc = yc.cpu() 62 | xt = xt.cpu() 63 | yt = yt.cpu() 64 | 65 | B = xc.shape[0] 66 | C, H, W = shape 67 | 68 | xc1, xc2 = xc[...,0], xc[...,1] 69 | xc1 = ((xc1+1)*(H-1)/2).round().long() 70 | xc2 = ((xc2+1)*(W-1)/2).round().long() 71 | 72 | xt1, xt2 = xt[...,0], xt[...,1] 73 | xt1 = ((xt1+1)*(H-1)/2).round().long() 74 | xt2 = ((xt2+1)*(W-1)/2).round().long() 75 | 76 | task_img = torch.zeros(B, 3, H, W).to(xc.device) 77 | task_img[:,2,:,:] = 1.0 78 | task_img[:,1,:,:] = 0.4 79 | for b in range(B): 80 | for c in range(3): 81 | task_img[b,c,xc1[b],xc2[b]] = yc[b,:,min(c,C-1)] + 0.5 82 | task_img = task_img.clamp(0, 1) 83 | 84 | completed_img = task_img.clone() 85 | for b in range(B): 86 | for c in range(3): 87 | completed_img[b,c,xt1[b],xt2[b]] = yt[b,:,min(c,C-1)] + 0.5 88 | completed_img = completed_img.clamp(0, 1) 89 | 90 | return task_img, completed_img 91 | -------------------------------------------------------------------------------- /regression/models/bnp.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from attrdict import AttrDict 4 | 5 | from models.cnp import CNP 6 | from utils.misc import stack, logmeanexp 7 | from utils.sampling import sample_with_replacement as SWR, sample_subset 8 | 9 | class BNP(CNP): 10 | def __init__(self, *args, **kwargs): 11 | super().__init__(*args, **kwargs) 12 | self.dec.add_ctx(2*kwargs['dim_hid']) 13 | 14 | def encode(self, xc, yc, xt, mask=None): 15 | encoded = torch.cat([ 16 | self.enc1(xc, yc, mask=mask), 17 | self.enc2(xc, yc, mask=mask)], -1) 18 | return stack(encoded, xt.shape[-2], -2) 19 | 20 | def predict(self, xc, yc, xt, num_samples=None, return_base=False): 21 | with torch.no_grad(): 22 | bxc, byc = SWR(xc, yc, num_samples=num_samples) 23 | sxc, syc = stack(xc, num_samples), stack(yc, num_samples) 24 | 25 | encoded = self.encode(bxc, byc, sxc) 26 | py_res = self.dec(encoded, sxc) 27 | 28 | mu, sigma = py_res.mean, py_res.scale 29 | res = SWR((syc - mu)/sigma).detach() 30 | res = (res - res.mean(-2, keepdim=True)) 31 | 32 | bxc = sxc 33 | byc = mu + sigma * res 34 | 35 | encoded_base = self.encode(xc, yc, xt) 36 | 37 | sxt = stack(xt, num_samples) 38 | encoded_bs = self.encode(bxc, byc, sxt) 39 | 40 | py = self.dec(stack(encoded_base, num_samples), 41 | sxt, ctx=encoded_bs) 42 | 43 | if self.training or return_base: 44 | py_base = self.dec(encoded_base, xt) 45 | return py_base, py 46 | else: 47 | return py 48 | 49 | def sample(self, xc, yc, xt, num_samples=None): 50 | pred_dist = self.predict(xc, yc, xt, z, num_samples, return_base=False) 51 | return pred_dist.loc 52 | 53 | def forward(self, batch, num_samples=None, reduce_ll=True): 54 | outs = AttrDict() 55 | 56 | def compute_ll(py, y): 57 | ll = py.log_prob(y).sum(-1) 58 | if ll.dim() == 3 and reduce_ll: 59 | ll = logmeanexp(ll) 60 | return ll 61 | 62 | if self.training: 63 | py_base, py = self.predict(batch.xc, batch.yc, batch.x, 64 | num_samples=num_samples) 65 | 66 | outs.ll_base = compute_ll(py_base, batch.y).mean() 67 | outs.ll = compute_ll(py, batch.y).mean() 68 | outs.loss = -outs.ll_base - outs.ll 69 | else: 70 | py = self.predict(batch.xc, batch.yc, batch.x, 71 | num_samples=num_samples) 72 | ll = compute_ll(py, batch.y) 73 | num_ctx = batch.xc.shape[-2] 74 | if reduce_ll: 75 | outs.ctx_ll = ll[...,:num_ctx].mean() 76 | outs.tar_ll = ll[...,num_ctx:].mean() 77 | else: 78 | outs.ctx_ll = ll[...,:num_ctx] 79 | outs.tar_ll = ll[...,num_ctx:] 80 | 81 | return outs 82 | -------------------------------------------------------------------------------- /regression/models/banp.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from attrdict import AttrDict 4 | 5 | from models.canp import CANP 6 | from utils.misc import stack, logmeanexp 7 | from utils.sampling import sample_with_replacement as SWR, sample_subset 8 | 9 | class BANP(CANP): 10 | def __init__(self, *args, **kwargs): 11 | super().__init__(*args, **kwargs) 12 | self.dec.add_ctx(2*kwargs['dim_hid']) 13 | 14 | def encode(self, xc, yc, xt, mask=None): 15 | theta1 = self.enc1(xc, yc, xt) 16 | theta2 = self.enc2(xc, yc) 17 | encoded = torch.cat([theta1, 18 | torch.stack([theta2]*xt.shape[-2], -2)], -1) 19 | return encoded 20 | 21 | def predict(self, xc, yc, xt, num_samples=None, return_base=False): 22 | with torch.no_grad(): 23 | bxc, byc = SWR(xc, yc, num_samples=num_samples) 24 | sxc, syc = stack(xc, num_samples), stack(yc, num_samples) 25 | 26 | encoded = self.encode(bxc, byc, sxc) 27 | py_res = self.dec(encoded, sxc) 28 | 29 | mu, sigma = py_res.mean, py_res.scale 30 | res = SWR((syc - mu)/sigma).detach() 31 | res = (res - res.mean(-2, keepdim=True)) 32 | 33 | bxc = sxc 34 | byc = mu + sigma * res 35 | 36 | encoded_base = self.encode(xc, yc, xt) 37 | 38 | sxt = stack(xt, num_samples) 39 | encoded_bs = self.encode(bxc, byc, sxt) 40 | 41 | py = self.dec(stack(encoded_base, num_samples), 42 | sxt, ctx=encoded_bs) 43 | 44 | if self.training or return_base: 45 | py_base = self.dec(encoded_base, xt) 46 | return py_base, py 47 | else: 48 | return py 49 | 50 | def sample(self, xc, yc, xt, num_samples=None): 51 | pred_dist = self.predict(xc, yc, xt, z, num_samples, return_base=False) 52 | return pred_dist.loc 53 | 54 | def forward(self, batch, num_samples=None, reduce_ll=True): 55 | outs = AttrDict() 56 | 57 | def compute_ll(py, y): 58 | ll = py.log_prob(y).sum(-1) 59 | if ll.dim() == 3 and reduce_ll: 60 | ll = logmeanexp(ll) 61 | return ll 62 | 63 | if self.training: 64 | py_base, py = self.predict(batch.xc, batch.yc, batch.x, 65 | num_samples=num_samples) 66 | 67 | outs.ll_base = compute_ll(py_base, batch.y).mean() 68 | outs.ll = compute_ll(py, batch.y).mean() 69 | outs.loss = -outs.ll_base - outs.ll 70 | else: 71 | py = self.predict(batch.xc, batch.yc, batch.x, 72 | num_samples=num_samples) 73 | ll = compute_ll(py, batch.y) 74 | num_ctx = batch.xc.shape[-2] 75 | if reduce_ll: 76 | outs.ctx_ll = ll[...,:num_ctx].mean() 77 | outs.tar_ll = ll[...,num_ctx:].mean() 78 | else: 79 | outs.ctx_ll = ll[...,:num_ctx] 80 | outs.tar_ll = ll[...,num_ctx:] 81 | 82 | return outs 83 | -------------------------------------------------------------------------------- /contextual_bandits/models/banp.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from attrdict import AttrDict 4 | 5 | from models.canp import CANP 6 | from utils.misc import stack, logmeanexp 7 | from utils.sampling import sample_with_replacement as SWR, sample_subset 8 | 9 | 10 | class BANP(CANP): 11 | def __init__(self, *args, **kwargs): 12 | super().__init__(*args, **kwargs) 13 | self.dec.add_ctx(2 * kwargs['dim_hid']) 14 | 15 | def encode(self, xc, yc, xt, mask=None): 16 | theta1 = self.enc1(xc, yc, xt) 17 | theta2 = self.enc2(xc, yc) 18 | encoded = torch.cat([theta1, 19 | torch.stack([theta2] * xt.shape[-2], -2)], -1) 20 | return encoded 21 | 22 | def predict(self, xc, yc, xt, num_samples=None, return_base=False): 23 | # botorch 사용하기 위해 추가된 statement 24 | if xc.shape[-3] != xt.shape[-3]: 25 | xt = xt.transpose(-3, -2) 26 | 27 | with torch.no_grad(): 28 | bxc, byc = SWR(xc, yc, num_samples=num_samples) 29 | sxc, syc = stack(xc, num_samples), stack(yc, num_samples) 30 | 31 | encoded = self.encode(bxc, byc, sxc) 32 | py_res = self.dec(encoded, sxc) 33 | 34 | mu, sigma = py_res.mean, py_res.scale 35 | res = SWR((syc - mu) / sigma).detach() 36 | res = (res - res.mean(-2, keepdim=True)) 37 | 38 | bxc = sxc 39 | byc = mu + sigma * res 40 | 41 | del sxc, mu, sigma, res 42 | 43 | encoded_base = self.encode(xc, yc, xt) 44 | del xc, yc 45 | 46 | sxt = stack(xt, num_samples) 47 | encoded_bs = self.encode(bxc, byc, sxt) 48 | del bxc, byc 49 | 50 | py = self.dec(stack(encoded_base, num_samples), 51 | sxt, ctx=encoded_bs) 52 | del sxt, encoded_bs 53 | 54 | if self.training or return_base: 55 | py_base = self.dec(encoded_base, xt) 56 | return py_base, py 57 | else: 58 | return py 59 | 60 | def forward(self, batch, num_samples=None, reduce_ll=True): 61 | outs = AttrDict() 62 | 63 | def compute_ll(py, y): 64 | ll = py.log_prob(y).sum(-1) 65 | if ll.dim() == 3 and reduce_ll: 66 | ll = logmeanexp(ll) 67 | return ll 68 | 69 | if self.training: 70 | py_base, py = self.predict(batch.xc, batch.yc, batch.x, 71 | num_samples=num_samples) 72 | 73 | outs.ll_base = compute_ll(py_base, batch.y).mean() 74 | outs.ll = compute_ll(py, batch.y).mean() 75 | outs.loss = - outs.ll_base - outs.ll 76 | else: 77 | py = self.predict(batch.xc, batch.yc, batch.x, 78 | num_samples=num_samples) 79 | ll = compute_ll(py, batch.y) 80 | num_ctx = batch.xc.shape[-2] 81 | if reduce_ll: 82 | outs.ctx_loss = ll[..., :num_ctx].mean() 83 | outs.tar_loss = ll[..., num_ctx:].mean() 84 | else: 85 | outs.ctx_loss = ll[..., :num_ctx] 86 | outs.tar_loss = ll[..., num_ctx:] 87 | 88 | return outs 89 | -------------------------------------------------------------------------------- /bayesian_optimization/models/banp.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from attrdict import AttrDict 4 | 5 | from models.canp import CANP 6 | from utils.misc import stack, logmeanexp 7 | from utils.sampling import sample_with_replacement as SWR, sample_subset 8 | 9 | 10 | class BANP(CANP): 11 | def __init__(self, *args, **kwargs): 12 | super().__init__(*args, **kwargs) 13 | self.dec.add_ctx(2 * kwargs['dim_hid']) 14 | 15 | def encode(self, xc, yc, xt, mask=None): 16 | theta1 = self.enc1(xc, yc, xt) 17 | theta2 = self.enc2(xc, yc) 18 | encoded = torch.cat([theta1, 19 | torch.stack([theta2] * xt.shape[-2], -2)], -1) 20 | return encoded 21 | 22 | def predict(self, xc, yc, xt, num_samples=None, return_base=False): 23 | # botorch 사용하기 위해 추가된 statement 24 | if xc.shape[-3] != xt.shape[-3]: 25 | xt = xt.transpose(-3, -2) 26 | 27 | with torch.no_grad(): 28 | bxc, byc = SWR(xc, yc, num_samples=num_samples) 29 | sxc, syc = stack(xc, num_samples), stack(yc, num_samples) 30 | 31 | encoded = self.encode(bxc, byc, sxc) 32 | py_res = self.dec(encoded, sxc) 33 | 34 | mu, sigma = py_res.mean, py_res.scale 35 | res = SWR((syc - mu) / sigma).detach() 36 | res = (res - res.mean(-2, keepdim=True)) 37 | 38 | bxc = sxc 39 | byc = mu + sigma * res 40 | 41 | del sxc, mu, sigma, res 42 | 43 | encoded_base = self.encode(xc, yc, xt) 44 | del xc, yc 45 | 46 | sxt = stack(xt, num_samples) 47 | encoded_bs = self.encode(bxc, byc, sxt) 48 | del bxc, byc 49 | 50 | py = self.dec(stack(encoded_base, num_samples), 51 | sxt, ctx=encoded_bs) 52 | del sxt, encoded_bs 53 | 54 | if self.training or return_base: 55 | py_base = self.dec(encoded_base, xt) 56 | return py_base, py 57 | else: 58 | return py 59 | 60 | def forward(self, batch, num_samples=None, reduce_ll=True): 61 | outs = AttrDict() 62 | 63 | def compute_ll(py, y): 64 | ll = py.log_prob(y).sum(-1) 65 | if ll.dim() == 3 and reduce_ll: 66 | ll = logmeanexp(ll) 67 | return ll 68 | 69 | if self.training: 70 | py_base, py = self.predict(batch.xc, batch.yc, batch.x, 71 | num_samples=num_samples) 72 | 73 | outs.ll_base = compute_ll(py_base, batch.y).mean() 74 | outs.ll = compute_ll(py, batch.y).mean() 75 | outs.loss = - outs.ll_base - outs.ll 76 | else: 77 | py = self.predict(batch.xc, batch.yc, batch.x, 78 | num_samples=num_samples) 79 | ll = compute_ll(py, batch.y) 80 | num_ctx = batch.xc.shape[-2] 81 | if reduce_ll: 82 | outs.ctx_loss = ll[..., :num_ctx].mean() 83 | outs.tar_loss = ll[..., num_ctx:].mean() 84 | else: 85 | outs.ctx_loss = ll[..., :num_ctx] 86 | outs.tar_loss = ll[..., num_ctx:] 87 | 88 | return outs 89 | -------------------------------------------------------------------------------- /bayesian_optimization/models/bnp.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from attrdict import AttrDict 4 | 5 | from models.cnp import CNP 6 | from utils.misc import stack, logmeanexp 7 | from utils.sampling import sample_with_replacement as SWR, sample_subset 8 | 9 | 10 | class BNP(CNP): 11 | def __init__(self, *args, **kwargs): 12 | super(BNP, self).__init__(*args, **kwargs) 13 | self.dec.add_ctx(2 * kwargs['dim_hid']) 14 | 15 | def encode(self, xc, yc, xt, mask=None): 16 | encoded = torch.cat([ 17 | self.enc1(xc, yc, mask=mask), 18 | self.enc2(xc, yc, mask=mask)], -1) 19 | 20 | return stack(encoded, num_samples=xt.shape[-2], dim=-2) 21 | 22 | def predict(self, xc, yc, xt, num_samples=None, return_base=False, get_bootstrap=False): 23 | # botorch 사용하기 위해 추가된 statement 24 | if xc.shape[-3] != xt.shape[-3]: 25 | xt = xt.transpose(-3, -2) 26 | 27 | with torch.no_grad(): 28 | bxc, byc = SWR(xc, yc, num_samples=num_samples) 29 | sxc, syc = stack(xc, num_samples=num_samples), stack(yc, num_samples=num_samples) 30 | 31 | encoded = self.encode(bxc, byc, sxc) 32 | py_res = self.dec(encoded, sxc) 33 | 34 | mu, sigma = py_res.mean, py_res.scale 35 | res = SWR((syc - mu)/sigma).detach() 36 | res = (res - res.mean(-2, keepdim=True)) 37 | 38 | bxc = sxc 39 | byc = mu + sigma * res 40 | 41 | if get_bootstrap: 42 | return bxc, byc 43 | 44 | encoded_base = self.encode(xc, yc, xt) 45 | 46 | sxt = stack(xt, num_samples=num_samples) 47 | encoded_bs = self.encode(bxc, byc, sxt) 48 | 49 | py = self.dec(stack(encoded_base, num_samples), 50 | sxt, ctx=encoded_bs) 51 | 52 | if self.training or return_base: 53 | py_base = self.dec(encoded_base, xt) 54 | return py_base, py 55 | else: 56 | return py 57 | 58 | def forward(self, batch, num_samples=None, reduce_ll=True): 59 | outs = AttrDict() 60 | 61 | def compute_ll(dist, y): 62 | loglikelihood = dist.log_prob(y).sum(-1) 63 | if loglikelihood.dim() == 3 and reduce_ll: 64 | loglikelihood = logmeanexp(loglikelihood) 65 | return loglikelihood 66 | 67 | if self.training: 68 | py_base, py = self.predict(batch.xc, batch.yc, batch.x, 69 | num_samples=num_samples) 70 | 71 | outs.ll_base = compute_ll(py_base, batch.y).mean() 72 | outs.ll = compute_ll(py, batch.y).mean() 73 | outs.loss = - outs.ll_base - outs.ll 74 | else: 75 | py = self.predict(batch.xc, batch.yc, batch.x, 76 | num_samples=num_samples) 77 | ll = compute_ll(py, batch.y) 78 | num_ctx = batch.xc.shape[-2] 79 | if reduce_ll: 80 | outs.ctx_loss = ll[...,:num_ctx].mean() 81 | outs.tar_loss = ll[...,num_ctx:].mean() 82 | else: 83 | outs.ctx_loss = ll[...,:num_ctx] 84 | outs.tar_loss = ll[...,num_ctx:] 85 | 86 | return outs 87 | -------------------------------------------------------------------------------- /contextual_bandits/models/bnp.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from attrdict import AttrDict 4 | 5 | from models.cnp import CNP 6 | from utils.misc import stack, logmeanexp 7 | from utils.sampling import sample_with_replacement as SWR, sample_subset 8 | 9 | 10 | class BNP(CNP): 11 | def __init__(self, *args, **kwargs): 12 | super(BNP, self).__init__(*args, **kwargs) 13 | self.dec.add_ctx(2 * kwargs['dim_hid']) 14 | 15 | def encode(self, xc, yc, xt, mask=None): 16 | encoded = torch.cat([ 17 | self.enc1(xc, yc, mask=mask), 18 | self.enc2(xc, yc, mask=mask)], -1) 19 | 20 | return stack(encoded, num_samples=xt.shape[-2], dim=-2) 21 | 22 | def predict(self, xc, yc, xt, num_samples=None, return_base=False, get_bootstrap=False): 23 | # botorch 사용하기 위해 추가된 statement 24 | if xc.shape[-3] != xt.shape[-3]: 25 | xt = xt.transpose(-3, -2) 26 | 27 | with torch.no_grad(): 28 | bxc, byc = SWR(xc, yc, num_samples=num_samples) 29 | sxc, syc = stack(xc, num_samples=num_samples), stack(yc, num_samples=num_samples) 30 | 31 | encoded = self.encode(bxc, byc, sxc) 32 | py_res = self.dec(encoded, sxc) 33 | 34 | mu, sigma = py_res.mean, py_res.scale 35 | res = SWR((syc - mu)/sigma).detach() 36 | res = (res - res.mean(-2, keepdim=True)) 37 | 38 | bxc = sxc 39 | byc = mu + sigma * res 40 | 41 | if get_bootstrap: 42 | return bxc, byc 43 | 44 | encoded_base = self.encode(xc, yc, xt) 45 | 46 | sxt = stack(xt, num_samples=num_samples) 47 | encoded_bs = self.encode(bxc, byc, sxt) 48 | 49 | py = self.dec(stack(encoded_base, num_samples), 50 | sxt, ctx=encoded_bs) 51 | 52 | if self.training or return_base: 53 | py_base = self.dec(encoded_base, xt) 54 | return py_base, py 55 | else: 56 | return py 57 | 58 | def forward(self, batch, num_samples=None, reduce_ll=True): 59 | outs = AttrDict() 60 | 61 | def compute_ll(dist, y): 62 | loglikelihood = dist.log_prob(y).sum(-1) 63 | if loglikelihood.dim() == 3 and reduce_ll: 64 | loglikelihood = logmeanexp(loglikelihood) 65 | return loglikelihood 66 | 67 | if self.training: 68 | py_base, py = self.predict(batch.xc, batch.yc, batch.x, 69 | num_samples=num_samples) 70 | 71 | outs.ll_base = compute_ll(py_base, batch.y).mean() 72 | outs.ll = compute_ll(py, batch.y).mean() 73 | outs.loss = - outs.ll_base - outs.ll 74 | else: 75 | py = self.predict(batch.xc, batch.yc, batch.x, 76 | num_samples=num_samples) 77 | ll = compute_ll(py, batch.y) 78 | num_ctx = batch.xc.shape[-2] 79 | if reduce_ll: 80 | outs.ctx_loss = ll[...,:num_ctx].mean() 81 | outs.tar_loss = ll[...,num_ctx:].mean() 82 | else: 83 | outs.ctx_loss = ll[...,:num_ctx] 84 | outs.tar_loss = ll[...,num_ctx:] 85 | 86 | return outs 87 | -------------------------------------------------------------------------------- /bayesian_optimization/models/tnp.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import numpy as np 5 | from torch.distributions.normal import Normal 6 | from attrdict import AttrDict 7 | 8 | from models.modules import build_mlp 9 | 10 | 11 | class TNP(nn.Module): 12 | def __init__( 13 | self, 14 | dim_x, 15 | dim_y, 16 | d_model, 17 | emb_depth, 18 | dim_feedforward, 19 | nhead, 20 | dropout, 21 | num_layers, 22 | ): 23 | super(TNP, self).__init__() 24 | 25 | self.embedder = build_mlp(dim_x + dim_y, d_model, d_model, emb_depth) 26 | 27 | encoder_layer = nn.TransformerEncoderLayer(d_model, nhead, dim_feedforward, dropout, batch_first=True) 28 | self.encoder = nn.TransformerEncoder(encoder_layer, num_layers) 29 | 30 | def construct_input(self, batch, autoreg=False): 31 | x_y_ctx = torch.cat((batch.xc, batch.yc), dim=-1) 32 | x_0_tar = torch.cat((batch.xt, torch.zeros_like(batch.yt)), dim=-1) 33 | if not autoreg: 34 | inp = torch.cat((x_y_ctx, x_0_tar), dim=1) 35 | else: 36 | x_y_tar = torch.cat((batch.xt, batch.yt), dim=-1) 37 | inp = torch.cat((x_y_ctx, x_y_tar, x_0_tar), dim=1) 38 | return inp 39 | 40 | def create_mask(self, batch, autoreg=False): 41 | num_ctx = batch.xc.shape[1] 42 | num_tar = batch.xt.shape[1] 43 | num_all = num_ctx + num_tar 44 | if not autoreg: 45 | mask = torch.zeros(num_all, num_all, device='cuda') 46 | mask[:, num_ctx:] = float('-inf') 47 | else: 48 | mask = torch.zeros((num_all+num_tar, num_all+num_tar), device='cuda').fill_(float('-inf')) 49 | mask[:, :num_ctx] = 0.0 # all points attend to context points 50 | mask[num_ctx:num_all, num_ctx:num_all].triu_(diagonal=1) # each real target point attends to itself and precedding real target points 51 | mask[num_all:, num_ctx:num_all].triu_(diagonal=0) # each fake target point attends to preceeding real target points 52 | 53 | return mask, num_tar 54 | 55 | def construct_input_pretrain(self, batch): 56 | x_y = torch.cat((batch.x, batch.y), dim=-1) 57 | x_0 = torch.cat((batch.x, torch.zeros_like(batch.y)), dim=-1)[:, 1:] 58 | inp = torch.cat((x_y, x_0), dim=1) 59 | return inp 60 | 61 | def create_mask_pretrain(self, batch): 62 | num_points = batch.x.shape[1] 63 | 64 | mask = torch.zeros((2*num_points-1, 2*num_points-1), device='cuda').fill_(float('-inf')) 65 | mask[:num_points, :num_points].triu_(diagonal=1) 66 | mask[num_points:, 1:num_points].triu_(diagonal=0) 67 | mask[num_points:, 0] = 0.0 68 | 69 | return mask, num_points-1 70 | 71 | def encode(self, batch, autoreg=False, pretrain=False): 72 | if not pretrain: 73 | inp = self.construct_input(batch, autoreg) 74 | mask, num_tar = self.create_mask(batch, autoreg) 75 | else: 76 | inp = self.construct_input_pretrain(batch) 77 | mask, num_tar = self.create_mask_pretrain(batch) 78 | embeddings = self.embedder(inp) 79 | out = self.encoder(embeddings, mask=mask) 80 | return out[:, -num_tar:] -------------------------------------------------------------------------------- /contextual_bandits/models/tnp.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from models.modules import build_mlp 5 | 6 | 7 | class TNP(nn.Module): 8 | def __init__( 9 | self, 10 | dim_x, 11 | dim_y, 12 | d_model, 13 | emb_depth, 14 | dim_feedforward, 15 | nhead, 16 | dropout, 17 | num_layers, 18 | drop_y=0.5 19 | ): 20 | super(TNP, self).__init__() 21 | 22 | self.drop_y = drop_y 23 | self.embedder = build_mlp(dim_x + dim_y, d_model, d_model, emb_depth) 24 | 25 | encoder_layer = nn.TransformerEncoderLayer(d_model, nhead, dim_feedforward, dropout, batch_first=True) 26 | self.encoder = nn.TransformerEncoder(encoder_layer, num_layers) 27 | 28 | def drop(self, y): 29 | y_dropped = torch.randn_like(y) 30 | not_drop_ids = torch.rand_like(y) > self.drop_y 31 | y_dropped[not_drop_ids] = y[not_drop_ids] 32 | return y_dropped 33 | 34 | def construct_input(self, batch, autoreg=False, drop_ctx=False): 35 | if drop_ctx: 36 | yc_dropped = self.drop(batch.yc) 37 | x_y_ctx = torch.cat((batch.xc, yc_dropped), dim=-1) 38 | else: 39 | x_y_ctx = torch.cat((batch.xc, batch.yc), dim=-1) 40 | x_0_tar = torch.cat((batch.xt, torch.zeros_like(batch.yt)), dim=-1) 41 | if not autoreg: 42 | inp = torch.cat((x_y_ctx, x_0_tar), dim=1) 43 | else: 44 | x_y_tar = torch.cat((batch.xt, batch.yt), dim=-1) 45 | inp = torch.cat((x_y_ctx, x_y_tar, x_0_tar), dim=1) 46 | return inp 47 | 48 | def create_mask(self, batch, autoreg=False): 49 | num_ctx = batch.xc.shape[1] 50 | num_tar = batch.xt.shape[1] 51 | num_all = num_ctx + num_tar 52 | if not autoreg: 53 | mask = torch.zeros(num_all, num_all, device='cuda') 54 | mask[:, num_ctx:] = float('-inf') 55 | else: 56 | mask = torch.zeros((num_all+num_tar, num_all+num_tar), device='cuda').fill_(float('-inf')) 57 | mask[:, :num_ctx] = 0.0 # all points attend to context points 58 | mask[num_ctx:num_all, num_ctx:num_all].triu_(diagonal=1) # each real target point attends to itself and precedding real target points 59 | mask[num_all:, num_ctx:num_all].triu_(diagonal=0) # each fake target point attends to preceeding real target points 60 | 61 | return mask 62 | 63 | def construct_input_pretrain(self, batch, drop_y): 64 | if drop_y: 65 | y = self.drop(batch.y) 66 | else: 67 | y = batch.y 68 | x_y = torch.cat((batch.x, y), dim=-1) 69 | x_0 = torch.cat((batch.x, torch.zeros_like(batch.y)), dim=-1)[:, 1:] 70 | inp = torch.cat((x_y, x_0), dim=1) 71 | return inp 72 | 73 | def create_mask_pretrain(self, batch): 74 | num_points = batch.x.shape[1] 75 | 76 | mask = torch.zeros((2*num_points-1, 2*num_points-1), device='cuda').fill_(float('-inf')) 77 | mask[:num_points, :num_points].triu_(diagonal=1) 78 | mask[num_points:, 1:num_points].triu_(diagonal=0) 79 | mask[num_points:, 0] = 0.0 80 | 81 | return mask 82 | 83 | def encode(self, batch, autoreg=False, drop_ctx=False, pretrain=False): 84 | if not pretrain: 85 | inp = self.construct_input(batch, autoreg, drop_ctx) 86 | mask = self.create_mask(batch, autoreg) 87 | else: 88 | inp = self.construct_input_pretrain(batch, drop_ctx) 89 | mask = self.create_mask_pretrain(batch) 90 | embeddings = self.embedder(inp) 91 | out = self.encoder(embeddings, mask=mask) 92 | return out -------------------------------------------------------------------------------- /bayesian_optimization/models/np.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.distributions import kl_divergence 4 | from attrdict import AttrDict 5 | 6 | from utils.misc import stack, logmeanexp 7 | from utils.sampling import sample_subset 8 | from models.modules import PoolingEncoder, Decoder 9 | 10 | class NP(nn.Module): 11 | def __init__(self, 12 | dim_x=1, 13 | dim_y=1, 14 | dim_hid=128, 15 | dim_lat=128, 16 | enc_pre_depth=4, 17 | enc_post_depth=2, 18 | dec_depth=3): 19 | 20 | super().__init__() 21 | 22 | self.denc = PoolingEncoder( 23 | dim_x=dim_x, 24 | dim_y=dim_y, 25 | dim_hid=dim_hid, 26 | pre_depth=enc_pre_depth, 27 | post_depth=enc_post_depth) 28 | 29 | self.lenc = PoolingEncoder( 30 | dim_x=dim_x, 31 | dim_y=dim_y, 32 | dim_hid=dim_hid, 33 | dim_lat=dim_lat, 34 | pre_depth=enc_pre_depth, 35 | post_depth=enc_post_depth) 36 | 37 | self.dec = Decoder( 38 | dim_x=dim_x, 39 | dim_y=dim_y, 40 | dim_enc=dim_hid+dim_lat, 41 | dim_hid=dim_hid, 42 | depth=dec_depth) 43 | 44 | def predict(self, xc, yc, xt, z=None, num_samples=None): 45 | # botorch 사용하기 위해 추가된 statement 46 | if xc.shape[-3] != xt.shape[-3]: 47 | xt = xt.transpose(-3, -2) 48 | 49 | theta = stack(self.denc(xc, yc), num_samples) 50 | if z is None: 51 | pz = self.lenc(xc, yc) 52 | z = pz.rsample() if num_samples is None \ 53 | else pz.rsample([num_samples]) 54 | encoded = torch.cat([theta, z], -1) 55 | encoded = stack(encoded, xt.shape[-2], -2) 56 | return self.dec(encoded, stack(xt, num_samples)) 57 | 58 | def forward(self, batch, num_samples=None, reduce_ll=True): 59 | outs = AttrDict() 60 | if self.training: 61 | pz = self.lenc(batch.xc, batch.yc) 62 | qz = self.lenc(batch.x, batch.y) 63 | z = qz.rsample() if num_samples is None else \ 64 | qz.rsample([num_samples]) 65 | py = self.predict(batch.xc, batch.yc, batch.x, 66 | z=z, num_samples=num_samples) 67 | 68 | if num_samples > 1: 69 | # K * B * N 70 | recon = py.log_prob(stack(batch.y, num_samples)).sum(-1) 71 | # K * B 72 | log_qz = qz.log_prob(z).sum(-1) 73 | log_pz = pz.log_prob(z).sum(-1) 74 | 75 | # K * B 76 | log_w = recon.sum(-1) + log_pz - log_qz 77 | 78 | outs.loss = -logmeanexp(log_w).mean() / batch.x.shape[-2] 79 | else: 80 | outs.recon = py.log_prob(batch.y).sum(-1).mean() 81 | outs.kld = kl_divergence(qz, pz).sum(-1).mean() 82 | outs.loss = -outs.recon + outs.kld / batch.x.shape[-2] 83 | 84 | else: 85 | py = self.predict(batch.xc, batch.yc, batch.x, num_samples=num_samples) 86 | if num_samples is None: 87 | ll = py.log_prob(batch.y).sum(-1) 88 | else: 89 | y = torch.stack([batch.y]*num_samples) 90 | if reduce_ll: 91 | ll = logmeanexp(py.log_prob(y).sum(-1)) 92 | else: 93 | ll = py.log_prob(y).sum(-1) 94 | num_ctx = batch.xc.shape[-2] 95 | if reduce_ll: 96 | outs.ctx_loss = ll[...,:num_ctx].mean() 97 | outs.tar_loss = ll[...,num_ctx:].mean() 98 | else: 99 | outs.ctx_loss = ll[...,:num_ctx] 100 | outs.tar_loss = ll[...,num_ctx:] 101 | return outs 102 | -------------------------------------------------------------------------------- /contextual_bandits/models/np.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.distributions import kl_divergence 4 | from attrdict import AttrDict 5 | 6 | from utils.misc import stack, logmeanexp 7 | from utils.sampling import sample_subset 8 | from models.modules import PoolingEncoder, Decoder 9 | 10 | class NP(nn.Module): 11 | def __init__(self, 12 | dim_x=1, 13 | dim_y=1, 14 | dim_hid=128, 15 | dim_lat=128, 16 | enc_pre_depth=4, 17 | enc_post_depth=2, 18 | dec_depth=3): 19 | 20 | super().__init__() 21 | 22 | self.denc = PoolingEncoder( 23 | dim_x=dim_x, 24 | dim_y=dim_y, 25 | dim_hid=dim_hid, 26 | pre_depth=enc_pre_depth, 27 | post_depth=enc_post_depth) 28 | 29 | self.lenc = PoolingEncoder( 30 | dim_x=dim_x, 31 | dim_y=dim_y, 32 | dim_hid=dim_hid, 33 | dim_lat=dim_lat, 34 | pre_depth=enc_pre_depth, 35 | post_depth=enc_post_depth) 36 | 37 | self.dec = Decoder( 38 | dim_x=dim_x, 39 | dim_y=dim_y, 40 | dim_enc=dim_hid+dim_lat, 41 | dim_hid=dim_hid, 42 | depth=dec_depth) 43 | 44 | def predict(self, xc, yc, xt, z=None, num_samples=None): 45 | # botorch 사용하기 위해 추가된 statement 46 | if xc.shape[-3] != xt.shape[-3]: 47 | xt = xt.transpose(-3, -2) 48 | 49 | theta = stack(self.denc(xc, yc), num_samples) 50 | if z is None: 51 | pz = self.lenc(xc, yc) 52 | z = pz.rsample() if num_samples is None \ 53 | else pz.rsample([num_samples]) 54 | encoded = torch.cat([theta, z], -1) 55 | encoded = stack(encoded, xt.shape[-2], -2) 56 | return self.dec(encoded, stack(xt, num_samples)) 57 | 58 | def forward(self, batch, num_samples=None, reduce_ll=True): 59 | outs = AttrDict() 60 | if self.training: 61 | pz = self.lenc(batch.xc, batch.yc) 62 | qz = self.lenc(batch.x, batch.y) 63 | z = qz.rsample() if num_samples is None else \ 64 | qz.rsample([num_samples]) 65 | py = self.predict(batch.xc, batch.yc, batch.x, 66 | z=z, num_samples=num_samples) 67 | 68 | if num_samples > 1: 69 | # K * B * N 70 | recon = py.log_prob(stack(batch.y, num_samples)).sum(-1) 71 | # K * B 72 | log_qz = qz.log_prob(z).sum(-1) 73 | log_pz = pz.log_prob(z).sum(-1) 74 | 75 | # K * B 76 | log_w = recon.sum(-1) + log_pz - log_qz 77 | 78 | outs.loss = -logmeanexp(log_w).mean() / batch.x.shape[-2] 79 | else: 80 | outs.recon = py.log_prob(batch.y).sum(-1).mean() 81 | outs.kld = kl_divergence(qz, pz).sum(-1).mean() 82 | outs.loss = -outs.recon + outs.kld / batch.x.shape[-2] 83 | 84 | else: 85 | py = self.predict(batch.xc, batch.yc, batch.x, num_samples=num_samples) 86 | if num_samples is None: 87 | ll = py.log_prob(batch.y).sum(-1) 88 | else: 89 | y = torch.stack([batch.y]*num_samples) 90 | if reduce_ll: 91 | ll = logmeanexp(py.log_prob(y).sum(-1)) 92 | else: 93 | ll = py.log_prob(y).sum(-1) 94 | num_ctx = batch.xc.shape[-2] 95 | if reduce_ll: 96 | outs.ctx_loss = ll[...,:num_ctx].mean() 97 | outs.tar_loss = ll[...,num_ctx:].mean() 98 | else: 99 | outs.ctx_loss = ll[...,:num_ctx] 100 | outs.tar_loss = ll[...,num_ctx:] 101 | return outs 102 | -------------------------------------------------------------------------------- /regression/models/np.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.distributions import kl_divergence 4 | from attrdict import AttrDict 5 | 6 | from utils.misc import stack, logmeanexp 7 | from utils.sampling import sample_subset 8 | from models.modules import PoolingEncoder, Decoder 9 | 10 | class NP(nn.Module): 11 | def __init__(self, 12 | dim_x=1, 13 | dim_y=1, 14 | dim_hid=128, 15 | dim_lat=128, 16 | enc_pre_depth=4, 17 | enc_post_depth=2, 18 | dec_depth=3): 19 | 20 | super().__init__() 21 | 22 | self.denc = PoolingEncoder( 23 | dim_x=dim_x, 24 | dim_y=dim_y, 25 | dim_hid=dim_hid, 26 | pre_depth=enc_pre_depth, 27 | post_depth=enc_post_depth) 28 | 29 | self.lenc = PoolingEncoder( 30 | dim_x=dim_x, 31 | dim_y=dim_y, 32 | dim_hid=dim_hid, 33 | dim_lat=dim_lat, 34 | pre_depth=enc_pre_depth, 35 | post_depth=enc_post_depth) 36 | 37 | self.dec = Decoder( 38 | dim_x=dim_x, 39 | dim_y=dim_y, 40 | dim_enc=dim_hid+dim_lat, 41 | dim_hid=dim_hid, 42 | depth=dec_depth) 43 | 44 | def predict(self, xc, yc, xt, z=None, num_samples=None): 45 | theta = stack(self.denc(xc, yc), num_samples) 46 | if z is None: 47 | pz = self.lenc(xc, yc) 48 | z = pz.rsample() if num_samples is None \ 49 | else pz.rsample([num_samples]) 50 | encoded = torch.cat([theta, z], -1) 51 | encoded = stack(encoded, xt.shape[-2], -2) 52 | return self.dec(encoded, stack(xt, num_samples)) 53 | 54 | def sample(self, xc, yc, xt, z=None, num_samples=None): 55 | pred_dist = self.predict(xc, yc, xt, z, num_samples) 56 | return pred_dist.loc 57 | 58 | def forward(self, batch, num_samples=None, reduce_ll=True): 59 | outs = AttrDict() 60 | if self.training: 61 | pz = self.lenc(batch.xc, batch.yc) 62 | qz = self.lenc(batch.x, batch.y) 63 | z = qz.rsample() if num_samples is None else \ 64 | qz.rsample([num_samples]) 65 | py = self.predict(batch.xc, batch.yc, batch.x, 66 | z=z, num_samples=num_samples) 67 | 68 | if num_samples > 1: 69 | # K * B * N 70 | recon = py.log_prob(stack(batch.y, num_samples)).sum(-1) 71 | # K * B 72 | log_qz = qz.log_prob(z).sum(-1) 73 | log_pz = pz.log_prob(z).sum(-1) 74 | 75 | # K * B 76 | log_w = recon.sum(-1) + log_pz - log_qz 77 | 78 | outs.loss = -logmeanexp(log_w).mean() / batch.x.shape[-2] 79 | else: 80 | outs.recon = py.log_prob(batch.y).sum(-1).mean() 81 | outs.kld = kl_divergence(qz, pz).sum(-1).mean() 82 | outs.loss = -outs.recon + outs.kld / batch.x.shape[-2] 83 | 84 | else: 85 | py = self.predict(batch.xc, batch.yc, batch.x, num_samples=num_samples) 86 | if num_samples is None: 87 | ll = py.log_prob(batch.y).sum(-1) 88 | else: 89 | y = torch.stack([batch.y]*num_samples) 90 | if reduce_ll: 91 | ll = logmeanexp(py.log_prob(y).sum(-1)) 92 | else: 93 | ll = py.log_prob(y).sum(-1) 94 | num_ctx = batch.xc.shape[-2] 95 | if reduce_ll: 96 | outs.ctx_ll = ll[...,:num_ctx].mean() 97 | outs.tar_ll = ll[...,num_ctx:].mean() 98 | else: 99 | outs.ctx_ll = ll[...,:num_ctx] 100 | outs.tar_ll = ll[...,num_ctx:] 101 | return outs 102 | -------------------------------------------------------------------------------- /bayesian_optimization/models/anp.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.distributions import kl_divergence 4 | from attrdict import AttrDict 5 | 6 | from utils.misc import stack, logmeanexp 7 | from models.modules import CrossAttnEncoder, PoolingEncoder, Decoder 8 | 9 | 10 | class ANP(nn.Module): 11 | def __init__(self, 12 | dim_x=1, 13 | dim_y=1, 14 | dim_hid=128, 15 | dim_lat=128, 16 | enc_v_depth=4, 17 | enc_qk_depth=2, 18 | enc_pre_depth=4, 19 | enc_post_depth=2, 20 | dec_depth=3): 21 | super(ANP, self).__init__() 22 | 23 | self.denc = CrossAttnEncoder( 24 | dim_x=dim_x, 25 | dim_y=dim_y, 26 | dim_hid=dim_hid, 27 | v_depth=enc_v_depth, 28 | qk_depth=enc_qk_depth) 29 | 30 | self.lenc = PoolingEncoder( 31 | dim_x=dim_x, 32 | dim_y=dim_y, 33 | dim_hid=dim_hid, 34 | dim_lat=dim_lat, 35 | self_attn=True, 36 | pre_depth=enc_pre_depth, 37 | post_depth=enc_post_depth) 38 | 39 | self.dec = Decoder( 40 | dim_x=dim_x, 41 | dim_y=dim_y, 42 | dim_enc=dim_hid + dim_lat, 43 | dim_hid=dim_hid, 44 | depth=dec_depth) 45 | 46 | def predict(self, xc, yc, xt, z=None, num_samples=None): 47 | # botorch 사용하기 위해 추가된 statement 48 | if xc.shape[-3] != xt.shape[-3]: 49 | xt = xt.transpose(-3, -2) 50 | 51 | theta = stack(self.denc(xc, yc, xt), num_samples) 52 | if z is None: 53 | pz = self.lenc(xc, yc) 54 | z = pz.rsample() if num_samples is None \ 55 | else pz.rsample([num_samples]) 56 | z = stack(z, xt.shape[-2], dim=-2) 57 | encoded = torch.cat([theta, z], -1) 58 | return self.dec(encoded, stack(xt, num_samples)) 59 | 60 | def forward(self, batch, num_samples=None, reduce_ll=True): 61 | outs = AttrDict() 62 | 63 | if self.training: 64 | pz = self.lenc(batch.xc, batch.yc) 65 | qz = self.lenc(batch.x, batch.y) 66 | z = qz.rsample() if num_samples is None else \ 67 | qz.rsample([num_samples]) 68 | py = self.predict(batch.xc, batch.yc, batch.x, 69 | z=z, num_samples=num_samples) 70 | 71 | if num_samples > 1: 72 | # K * B * N 73 | recon = py.log_prob(stack(batch.y, num_samples)).sum(-1) 74 | # K * B 75 | log_qz = qz.log_prob(z).sum(-1) 76 | log_pz = pz.log_prob(z).sum(-1) 77 | 78 | # K * B 79 | log_w = recon.sum(-1) + log_pz - log_qz 80 | 81 | outs.loss = -logmeanexp(log_w).mean() / batch.x.shape[-2] 82 | else: 83 | outs.recon = py.log_prob(batch.y).sum(-1).mean() 84 | outs.kld = kl_divergence(qz, pz).sum(-1).mean() 85 | outs.loss = -outs.recon + outs.kld / batch.x.shape[-2] 86 | 87 | else: 88 | py = self.predict(batch.xc, batch.yc, batch.x, num_samples=num_samples) 89 | 90 | if num_samples is None: 91 | ll = py.log_prob(batch.y).sum(-1) 92 | else: 93 | y = torch.stack([batch.y] * num_samples) 94 | if reduce_ll: 95 | ll = logmeanexp(py.log_prob(y).sum(-1)) 96 | else: 97 | ll = py.log_prob(y).sum(-1) 98 | 99 | num_ctx = batch.xc.shape[-2] 100 | 101 | if reduce_ll: 102 | outs.ctx_ll = ll[..., :num_ctx].mean() 103 | outs.tar_ll = ll[..., num_ctx:].mean() 104 | else: 105 | outs.ctx_ll = ll[..., :num_ctx] 106 | outs.tar_ll = ll[..., num_ctx:] 107 | 108 | return outs 109 | -------------------------------------------------------------------------------- /contextual_bandits/models/anp.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.distributions import kl_divergence 4 | from attrdict import AttrDict 5 | 6 | from utils.misc import stack, logmeanexp 7 | from models.modules import CrossAttnEncoder, PoolingEncoder, Decoder 8 | 9 | 10 | class ANP(nn.Module): 11 | def __init__(self, 12 | dim_x=1, 13 | dim_y=1, 14 | dim_hid=128, 15 | dim_lat=128, 16 | enc_v_depth=4, 17 | enc_qk_depth=2, 18 | enc_pre_depth=4, 19 | enc_post_depth=2, 20 | dec_depth=3): 21 | super(ANP, self).__init__() 22 | 23 | self.denc = CrossAttnEncoder( 24 | dim_x=dim_x, 25 | dim_y=dim_y, 26 | dim_hid=dim_hid, 27 | self_attn=True, 28 | v_depth=enc_v_depth, 29 | qk_depth=enc_qk_depth) 30 | 31 | self.lenc = PoolingEncoder( 32 | dim_x=dim_x, 33 | dim_y=dim_y, 34 | dim_hid=dim_hid, 35 | dim_lat=dim_lat, 36 | self_attn=True, 37 | pre_depth=enc_pre_depth, 38 | post_depth=enc_post_depth) 39 | 40 | self.dec = Decoder( 41 | dim_x=dim_x, 42 | dim_y=dim_y, 43 | dim_enc=dim_hid + dim_lat, 44 | dim_hid=dim_hid, 45 | depth=dec_depth) 46 | 47 | def predict(self, xc, yc, xt, z=None, num_samples=None): 48 | # botorch 사용하기 위해 추가된 statement 49 | if xc.shape[-3] != xt.shape[-3]: 50 | xt = xt.transpose(-3, -2) 51 | 52 | theta = stack(self.denc(xc, yc, xt), num_samples) 53 | if z is None: 54 | pz = self.lenc(xc, yc) 55 | z = pz.rsample() if num_samples is None \ 56 | else pz.rsample([num_samples]) 57 | z = stack(z, xt.shape[-2], dim=-2) 58 | encoded = torch.cat([theta, z], -1) 59 | return self.dec(encoded, stack(xt, num_samples)) 60 | 61 | def forward(self, batch, num_samples=None, reduce_ll=True): 62 | outs = AttrDict() 63 | 64 | if self.training: 65 | pz = self.lenc(batch.xc, batch.yc) 66 | qz = self.lenc(batch.x, batch.y) 67 | z = qz.rsample() if num_samples is None else \ 68 | qz.rsample([num_samples]) 69 | py = self.predict(batch.xc, batch.yc, batch.x, 70 | z=z, num_samples=num_samples) 71 | 72 | if num_samples > 1: 73 | # K * B * N 74 | recon = py.log_prob(stack(batch.y, num_samples)).sum(-1) 75 | # K * B 76 | log_qz = qz.log_prob(z).sum(-1) 77 | log_pz = pz.log_prob(z).sum(-1) 78 | 79 | # K * B 80 | log_w = recon.sum(-1) + log_pz - log_qz 81 | 82 | outs.loss = -logmeanexp(log_w).mean() / batch.x.shape[-2] 83 | else: 84 | outs.recon = py.log_prob(batch.y).sum(-1).mean() 85 | outs.kld = kl_divergence(qz, pz).sum(-1).mean() 86 | outs.loss = -outs.recon + outs.kld / batch.x.shape[-2] 87 | 88 | else: 89 | py = self.predict(batch.xc, batch.yc, batch.x, num_samples=num_samples) 90 | 91 | if num_samples is None: 92 | ll = py.log_prob(batch.y).sum(-1) 93 | else: 94 | y = torch.stack([batch.y] * num_samples) 95 | if reduce_ll: 96 | ll = logmeanexp(py.log_prob(y).sum(-1)) 97 | else: 98 | ll = py.log_prob(y).sum(-1) 99 | 100 | num_ctx = batch.xc.shape[-2] 101 | 102 | if reduce_ll: 103 | outs.ctx_ll = ll[..., :num_ctx].mean() 104 | outs.tar_ll = ll[..., num_ctx:].mean() 105 | else: 106 | outs.ctx_ll = ll[..., :num_ctx] 107 | outs.tar_ll = ll[..., num_ctx:] 108 | 109 | return outs 110 | -------------------------------------------------------------------------------- /regression/utils/log.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import time 3 | import logging 4 | from collections import OrderedDict 5 | import re 6 | import matplotlib 7 | from matplotlib import pyplot as plt 8 | from os.path import split, splitext 9 | 10 | def get_logger(filename, mode='a'): 11 | logging.basicConfig(level=logging.INFO, format='%(message)s') 12 | logger = logging.getLogger() 13 | # 코드 실행 시 run 여러번 돌리면 logger에 handler가 중복되므로, 매번 삭제해줘야 14 | for hdlr in logger.handlers: 15 | logger.removeHandler(hdlr) 16 | logger.addHandler(logging.FileHandler(filename, mode=mode)) 17 | logger.addHandler(logging.StreamHandler()) 18 | return logger 19 | 20 | class RunningAverage(object): 21 | def __init__(self, *keys): 22 | self.sum = OrderedDict() 23 | self.cnt = OrderedDict() 24 | self.clock = time.time() 25 | for key in keys: 26 | self.sum[key] = 0 27 | self.cnt[key] = 0 28 | 29 | def update(self, key, val): 30 | if isinstance(val, torch.Tensor): 31 | val = val.item() 32 | if self.sum.get(key, None) is None: 33 | self.sum[key] = val 34 | self.cnt[key] = 1 35 | else: 36 | self.sum[key] = self.sum[key] + val 37 | self.cnt[key] += 1 38 | 39 | def reset(self): 40 | for key in self.sum.keys(): 41 | self.sum[key] = 0 42 | self.cnt[key] = 0 43 | self.clock = time.time() 44 | 45 | def clear(self): 46 | self.sum = OrderedDict() 47 | self.cnt = OrderedDict() 48 | self.clock = time.time() 49 | 50 | def keys(self): 51 | return self.sum.keys() 52 | 53 | def get(self, key): 54 | assert(self.sum.get(key, None) is not None) 55 | return self.sum[key] / self.cnt[key] 56 | 57 | def info(self, show_et=True): 58 | line = '' 59 | for key in self.sum.keys(): 60 | val = self.sum[key] / self.cnt[key] 61 | if type(val) == float: 62 | line += f'{key} {val:.4f} ' 63 | else: 64 | line += f'{key} {val} '.format(key, val) 65 | if show_et: 66 | line += f'({time.time()-self.clock:.3f} secs)' 67 | return line 68 | 69 | def get_log(fileroot): 70 | step = [] 71 | loss = [] 72 | train_time = [] 73 | eval_time = [] 74 | ctxll = [] 75 | tarll = [] 76 | file = open(fileroot, "r") 77 | lines = file.readlines() 78 | for line in lines: 79 | # training step 80 | if "step" in line: 81 | linesplit = line.split(" ") 82 | step += [int(linesplit[3])] 83 | _loss = linesplit[-3] 84 | loss += [100 if _loss=="nan" else float(_loss)] 85 | train_time += [float(linesplit[-2][1:])] 86 | # evaluation step 87 | elif "ctx_ll" in line: 88 | linesplit = line.split(" ") 89 | ctxll += [float(linesplit[-5])] 90 | tarll += [float(linesplit[-3])] 91 | eval_time += [float(linesplit[-2][1:])] 92 | 93 | return step, loss, None, ctxll, tarll 94 | 95 | 96 | def plot_log(fileroot, x_begin=None, x_end=None): 97 | step, loss, stepll, ctxll, tarll = get_log(fileroot) 98 | step = list(map(int, step)) 99 | loss = list(map(float, loss)) 100 | ctxll = list(map(float, ctxll)) 101 | tarll = list(map(float, tarll)) 102 | stepll = list(map(int, stepll)) if stepll else None 103 | 104 | if x_begin is None: 105 | x_begin = 0 106 | if x_end is None: 107 | x_end = step[-1] 108 | 109 | print_freq = 1 if len(step)==1 else step[1] - step[0] 110 | 111 | plt.clf() 112 | plt.plot(step[x_begin//print_freq:x_end//print_freq], 113 | loss[x_begin//print_freq:x_end//print_freq]) 114 | plt.xlabel('step') 115 | plt.ylabel('loss') 116 | 117 | dir, file = split(fileroot) 118 | filename = splitext(file)[0] 119 | plt.savefig(dir + "/" + filename + f"-{x_begin}-{x_end}.png") 120 | plt.clf() # clear current figure -------------------------------------------------------------------------------- /regression/models/anp.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.distributions import kl_divergence 4 | from attrdict import AttrDict 5 | 6 | from utils.misc import stack, logmeanexp 7 | from utils.sampling import sample_subset 8 | 9 | from models.modules import CrossAttnEncoder, PoolingEncoder, Decoder 10 | 11 | class ANP(nn.Module): 12 | def __init__(self, 13 | dim_x=1, 14 | dim_y=1, 15 | dim_hid=128, 16 | dim_lat=128, 17 | enc_v_depth=4, 18 | enc_qk_depth=2, 19 | enc_pre_depth=4, 20 | enc_post_depth=2, 21 | dec_depth=3): 22 | 23 | super().__init__() 24 | 25 | self.denc = CrossAttnEncoder( 26 | dim_x=dim_x, 27 | dim_y=dim_y, 28 | dim_hid=dim_hid, 29 | v_depth=enc_v_depth, 30 | qk_depth=enc_qk_depth) 31 | 32 | self.lenc = PoolingEncoder( 33 | dim_x=dim_x, 34 | dim_y=dim_y, 35 | dim_hid=dim_hid, 36 | dim_lat=dim_lat, 37 | self_attn=True, 38 | pre_depth=enc_pre_depth, 39 | post_depth=enc_post_depth) 40 | 41 | self.dec = Decoder( 42 | dim_x=dim_x, 43 | dim_y=dim_y, 44 | dim_enc=dim_hid+dim_lat, 45 | dim_hid=dim_hid, 46 | depth=dec_depth) 47 | 48 | def predict(self, xc, yc, xt, z=None, num_samples=None): 49 | theta = stack(self.denc(xc, yc, xt), num_samples) 50 | if z is None: 51 | pz = self.lenc(xc, yc) 52 | z = pz.rsample() if num_samples is None \ 53 | else pz.rsample([num_samples]) 54 | z = stack(z, xt.shape[-2], -2) 55 | encoded = torch.cat([theta, z], -1) 56 | return self.dec(encoded, stack(xt, num_samples)) 57 | 58 | def sample(self, xc, yc, xt, z=None, num_samples=None): 59 | pred_dist = self.predict(xc, yc, xt, z, num_samples) 60 | return pred_dist.loc 61 | 62 | def forward(self, batch, num_samples=None, reduce_ll=True): 63 | outs = AttrDict() 64 | if self.training: 65 | pz = self.lenc(batch.xc, batch.yc) 66 | qz = self.lenc(batch.x, batch.y) 67 | z = qz.rsample() if num_samples is None else \ 68 | qz.rsample([num_samples]) 69 | py = self.predict(batch.xc, batch.yc, batch.x, 70 | z=z, num_samples=num_samples) 71 | 72 | if num_samples > 1: 73 | # K * B * N 74 | recon = py.log_prob(stack(batch.y, num_samples)).sum(-1) 75 | # K * B 76 | log_qz = qz.log_prob(z).sum(-1) 77 | log_pz = pz.log_prob(z).sum(-1) 78 | 79 | # K * B 80 | log_w = recon.sum(-1) + log_pz - log_qz 81 | 82 | outs.loss = -logmeanexp(log_w).mean() / batch.x.shape[-2] 83 | else: 84 | outs.recon = py.log_prob(batch.y).sum(-1).mean() 85 | outs.kld = kl_divergence(qz, pz).sum(-1).mean() 86 | outs.loss = -outs.recon + outs.kld / batch.x.shape[-2] 87 | 88 | else: 89 | py = self.predict(batch.xc, batch.yc, batch.x, num_samples=num_samples) 90 | if num_samples is None: 91 | ll = py.log_prob(batch.y).sum(-1) 92 | else: 93 | y = torch.stack([batch.y]*num_samples) 94 | if reduce_ll: 95 | ll = logmeanexp(py.log_prob(y).sum(-1)) 96 | else: 97 | ll = py.log_prob(y).sum(-1) 98 | num_ctx = batch.xc.shape[-2] 99 | 100 | if reduce_ll: 101 | outs.ctx_ll = ll[...,:num_ctx].mean() 102 | outs.tar_ll = ll[...,num_ctx:].mean() 103 | else: 104 | outs.ctx_ll = ll[...,:num_ctx] 105 | outs.tar_ll = ll[...,num_ctx:] 106 | 107 | return outs 108 | -------------------------------------------------------------------------------- /bayesian_optimization/utils/acquisition.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | from attrdict import AttrDict 5 | from botorch.acquisition import AnalyticAcquisitionFunction 6 | from botorch.utils.transforms import t_batch_mode_transform 7 | from torch import Tensor 8 | from torch.distributions import Normal 9 | from torch.nn import Module 10 | from typing import Union 11 | 12 | 13 | class EI(AnalyticAcquisitionFunction): 14 | def __init__( 15 | self, 16 | model: Module, 17 | observations: AttrDict, 18 | best_f: Union[float, Tensor], 19 | num_bs: int = 200, 20 | maximize: bool = True 21 | ): 22 | model.num_outputs = 1 23 | super(EI, self).__init__(model=model) 24 | 25 | self.obs = observations 26 | if not torch.is_tensor(best_f): 27 | best_f = torch.tensor(best_f) 28 | self.register_buffer("best_f", best_f) 29 | self.num_bs = num_bs 30 | self.maximize = maximize 31 | 32 | @t_batch_mode_transform(expected_q=1, assert_output_shape=False) 33 | def forward(self, X: Tensor) -> Tensor: 34 | self.best_f = self.best_f.to(X) 35 | 36 | posterior = self.model.predict(xc=self.obs.xc, 37 | yc=self.obs.yc, 38 | xt=X, 39 | num_samples=self.num_bs) 40 | mean, std = posterior.mean.squeeze(0), posterior.scale.squeeze(0) 41 | 42 | # shape: (num_samples, 1, num_points, 1) 43 | if mean.dim() == 4: 44 | var = std.pow(2).mean(dim=0) + mean.pow(2).mean(dim=0) - mean.mean(dim=0).pow(2) 45 | std = var.sqrt().squeeze(0) 46 | mean = mean.mean(dim=0).squeeze(0) 47 | 48 | batch_shape = mean.shape[:-2] if mean.dim() >= X.dim() else X.shape[:-2] 49 | mean = mean.view(batch_shape) 50 | std = std.clamp_min(np.sqrt(1e-9)).view(batch_shape) 51 | u = (mean - self.best_f.expand_as(mean)) / std 52 | if not self.maximize: 53 | u = -u 54 | normal = Normal(torch.zeros_like(u), torch.ones_like(u)) 55 | ucdf = normal.cdf(u) 56 | updf = torch.exp(normal.log_prob(u)) 57 | ei = std * (updf + u * ucdf) 58 | return ei 59 | 60 | 61 | class UCB(AnalyticAcquisitionFunction): 62 | def __init__( 63 | self, 64 | model: Module, 65 | observations: AttrDict, 66 | beta: Union[float, Tensor], 67 | num_bs: int = 200, 68 | maximize: bool = True 69 | ): 70 | model.num_outputs = 1 71 | super(UCB, self).__init__(model=model) 72 | 73 | self.obs = observations 74 | if not torch.is_tensor(beta): 75 | beta = torch.tensor(beta) 76 | self.register_buffer("beta", beta) 77 | self.num_bs = num_bs 78 | self.maximize = maximize 79 | 80 | @t_batch_mode_transform(expected_q=1) 81 | def forward(self, X: Tensor, return_mean=False) -> Tensor: 82 | self.beta = self.beta.to(X) 83 | 84 | posterior = self.model.predict(xc=self.obs.xc, 85 | yc=self.obs.yc, 86 | xt=X, 87 | num_samples=self.num_bs) 88 | mean, std = posterior.mean.squeeze(0), posterior.scale.squeeze(0) 89 | 90 | # shape: (num_samples, 1, num_points, 1) 91 | if mean.dim() == 4: 92 | var = std.pow(2).mean(dim=0) + mean.pow(2).mean(dim=0) - mean.mean(dim=0).pow(2) 93 | std = var.sqrt().squeeze(0) 94 | mean = mean.mean(dim=0).squeeze(0) 95 | 96 | batch_shape = X.shape[:-2] 97 | mean = mean.view(batch_shape) 98 | std = std.view(batch_shape) 99 | delta = self.beta.expand_as(mean).sqrt() * std 100 | if return_mean: 101 | return mean 102 | else: 103 | if self.maximize: 104 | return mean + delta 105 | else: 106 | return -mean + delta 107 | -------------------------------------------------------------------------------- /bayesian_optimization/utils/log.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import time 3 | import logging 4 | from collections import OrderedDict 5 | import re 6 | import matplotlib 7 | from matplotlib import pyplot as plt 8 | from os.path import split, splitext 9 | 10 | 11 | def get_logger(filename, mode='a'): 12 | logging.basicConfig(level=logging.INFO, format='%(message)s') 13 | logger = logging.getLogger() 14 | # 코드 실행 시 run 여러번 돌리면 logger에 handler가 중복되므로, 매번 삭제해줘야 15 | for hdlr in logger.handlers: 16 | logger.removeHandler(hdlr) 17 | logger.addHandler(logging.FileHandler(filename, mode=mode)) 18 | logger.addHandler(logging.StreamHandler()) 19 | return logger 20 | 21 | 22 | class RunningAverage(object): 23 | def __init__(self, *keys): 24 | self.sum = OrderedDict() 25 | self.cnt = OrderedDict() 26 | self.clock = time.time() 27 | for key in keys: 28 | self.sum[key] = 0 29 | self.cnt[key] = 0 30 | 31 | def update(self, key, val): 32 | if isinstance(val, torch.Tensor): 33 | val = val.item() 34 | if self.sum.get(key, None) is None: 35 | self.sum[key] = val 36 | self.cnt[key] = 1 37 | else: 38 | self.sum[key] = self.sum[key] + val 39 | self.cnt[key] += 1 40 | 41 | def reset(self): 42 | for key in self.sum.keys(): 43 | self.sum[key] = 0 44 | self.cnt[key] = 0 45 | self.clock = time.time() 46 | 47 | def clear(self): 48 | self.sum = OrderedDict() 49 | self.cnt = OrderedDict() 50 | self.clock = time.time() 51 | 52 | def keys(self): 53 | return self.sum.keys() 54 | 55 | def get(self, key): 56 | assert(self.sum.get(key, None) is not None) 57 | return self.sum[key] / self.cnt[key] 58 | 59 | def info(self, show_et=True): 60 | line = '' 61 | for key in self.sum.keys(): 62 | val = self.sum[key] / self.cnt[key] 63 | if type(val) == float: 64 | line += f'{key} {val:.4f} ' 65 | else: 66 | line += f'{key} {val} '.format(key, val) 67 | if show_et: 68 | line += f'({time.time()-self.clock:.3f} secs)' 69 | return line 70 | 71 | 72 | def get_log(fileroot): 73 | step = [] 74 | loss = [] 75 | train_time = [] 76 | eval_time = [] 77 | ctxll = [] 78 | tarll = [] 79 | file = open(fileroot, "r") 80 | lines = file.readlines() 81 | for line in lines: 82 | # training step 83 | if "step" in line: 84 | linesplit = line.split(" ") 85 | step += [int(linesplit[3])] 86 | _loss = linesplit[-3] 87 | loss += [100 if _loss=="nan" else float(_loss)] 88 | train_time += [float(linesplit[-2][1:])] 89 | # evaluation step 90 | elif "ctx_ll" in line: 91 | linesplit = line.split(" ") 92 | ctxll += [float(linesplit[-5])] 93 | tarll += [float(linesplit[-3])] 94 | eval_time += [float(linesplit[-2][1:])] 95 | 96 | return step, loss, None, ctxll, tarll 97 | 98 | 99 | def plot_log(fileroot, x_begin=None, x_end=None): 100 | step, loss, stepll, ctxll, tarll = get_log(fileroot) 101 | step = list(map(int, step)) 102 | loss = list(map(float, loss)) 103 | ctxll = list(map(float, ctxll)) 104 | tarll = list(map(float, tarll)) 105 | stepll = list(map(int, stepll)) if stepll else None 106 | 107 | if x_begin is None: 108 | x_begin = 0 109 | if x_end is None: 110 | x_end = step[-1] 111 | 112 | print_freq = 1 if len(step) == 1 else step[1] - step[0] 113 | 114 | plt.clf() 115 | plt.plot(step[x_begin//print_freq:x_end//print_freq], 116 | loss[x_begin//print_freq:x_end//print_freq]) 117 | plt.xlabel('step') 118 | plt.ylabel('loss') 119 | 120 | directory, file = split(fileroot) 121 | filename = splitext(file)[0] 122 | plt.savefig(directory + "/" + filename + f"-{x_begin}-{x_end}.png") 123 | plt.clf() # clear current figure 124 | -------------------------------------------------------------------------------- /bayesian_optimization/models/tnpnd.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.distributions.normal import Normal 4 | from attrdict import AttrDict 5 | 6 | from models.modules import build_mlp 7 | from models.tnp import TNP 8 | 9 | 10 | class TNPND(TNP): 11 | def __init__( 12 | self, 13 | dim_x, 14 | dim_y, 15 | d_model, 16 | emb_depth, 17 | dim_feedforward, 18 | nhead, 19 | dropout, 20 | num_layers, 21 | num_std_layers, 22 | cov_approx='cholesky', 23 | prj_dim=5, 24 | prj_depth=4, 25 | diag_depth=4 26 | ): 27 | super(TNPND, self).__init__( 28 | dim_x, 29 | dim_y, 30 | d_model, 31 | emb_depth, 32 | dim_feedforward, 33 | nhead, 34 | dropout, 35 | num_layers, 36 | ) 37 | 38 | assert cov_approx in ['cholesky', 'lowrank'] 39 | self.cov_approx = cov_approx 40 | 41 | self.mean_net = nn.Sequential( 42 | nn.Linear(d_model, dim_feedforward), 43 | nn.ReLU(), 44 | nn.Linear(dim_feedforward, dim_y) 45 | ) 46 | 47 | std_encoder_layer = nn.TransformerEncoderLayer(d_model, nhead, dim_feedforward, dropout, batch_first=True) 48 | self.std_encoder = nn.TransformerEncoder(std_encoder_layer, num_std_layers) 49 | 50 | self.projector = build_mlp(d_model, dim_feedforward, prj_dim*dim_y, prj_depth) 51 | 52 | if cov_approx == 'lowrank': 53 | self.diag_net = build_mlp(d_model, dim_feedforward, dim_y, diag_depth) 54 | 55 | def decode(self, out_encoder, batch_size, dim_y, num_target): 56 | mean_target = self.mean_net(out_encoder).view(batch_size, -1) 57 | 58 | out_std_encoder = self.std_encoder(out_encoder) 59 | std_prj = self.projector(out_std_encoder) 60 | std_prj = std_prj.view((batch_size, num_target*dim_y, -1)) 61 | if self.cov_approx == 'cholesky': 62 | std_tril = torch.bmm(std_prj, std_prj.transpose(1,2)) 63 | std_tril = std_tril.tril() 64 | if self.emnist: 65 | diag_ids = torch.arange(num_target*dim_y, device='cuda') 66 | std_tril[:, diag_ids, diag_ids] = 0.05 + 0.95*torch.tanh(std_tril[:, diag_ids, diag_ids]) 67 | pred_tar = torch.distributions.multivariate_normal.MultivariateNormal(mean_target, scale_tril=std_tril) 68 | else: 69 | diagonal = torch.exp(self.diag_net(out_encoder)).view((batch_size, -1, 1)) 70 | std = torch.bmm(std_prj, std_prj.transpose(1,2)) + torch.diag_embed(diagonal.squeeze(-1)) 71 | pred_tar = torch.distributions.multivariate_normal.MultivariateNormal(mean_target, covariance_matrix=std) 72 | 73 | return pred_tar 74 | 75 | def forward(self, batch, reduce_ll=True): 76 | batch_size = batch.x.shape[0] 77 | dim_y = batch.y.shape[-1] 78 | num_target = batch.xt.shape[1] 79 | 80 | out_encoder = self.encode(batch, autoreg=False) 81 | pred_tar = self.decode(out_encoder, batch_size, dim_y, num_target) 82 | 83 | outs = AttrDict() 84 | yt = batch.yt.reshape(batch.yt.shape[0], -1) 85 | outs.loss = - (pred_tar.log_prob(yt).mean() / num_target) 86 | return outs 87 | 88 | 89 | def predict(self, xc, yc, xt, num_samples=None): 90 | if xc.shape[-3] != xt.shape[-3]: 91 | xt = xt.transpose(-3, -2) 92 | 93 | batch = AttrDict() 94 | batch.xc = xc 95 | batch.yc = yc 96 | batch.xt = xt 97 | batch.yt = torch.zeros((xt.shape[0], xt.shape[1], yc.shape[2]), device='cuda') 98 | 99 | batch_size = xc.shape[0] 100 | dim_y = yc.shape[-1] 101 | num_target = batch.xt.shape[1] 102 | 103 | out_encoder = self.encode(batch, autoreg=False) 104 | pred_tar = self.decode(out_encoder, batch_size, dim_y, num_target) 105 | 106 | return Normal( 107 | pred_tar.mean.view(batch_size, num_target, -1), 108 | torch.diagonal(pred_tar.covariance_matrix, dim1=-2, dim2=-1).reshape(batch_size, num_target, -1) 109 | ) -------------------------------------------------------------------------------- /bayesian_optimization/data/highdim_gp.py: -------------------------------------------------------------------------------- 1 | import gpytorch 2 | import matplotlib.pyplot as plt 3 | import seaborn as sns 4 | import torch 5 | import warnings 6 | sns.set() 7 | warnings.filterwarnings('ignore') 8 | 9 | from attrdict import AttrDict 10 | from gpytorch.kernels import ScaleKernel, RBFKernel 11 | from gpytorch.likelihoods import GaussianLikelihood 12 | from gpytorch.means import ConstantMean 13 | from gpytorch.models import ExactGP 14 | from gpytorch.priors import UniformPrior 15 | from typing import Union, List, Tuple 16 | 17 | 18 | class GaussianProcess(ExactGP): 19 | def __init__(self, x, y, likelihood, device): 20 | super(GaussianProcess, self).__init__(x, y, likelihood) 21 | self.mean_module = ConstantMean() 22 | 23 | self.length_prior = UniformPrior(0.1, 1.0) 24 | self.scale_prior = UniformPrior(0.1, 1.0) 25 | 26 | self.covar_module = ScaleKernel( 27 | RBFKernel(lengthscale_prior=self.length_prior), 28 | outputscale_prior=self.scale_prior 29 | ) 30 | self.device = device 31 | 32 | def forward(self, x, verbose=False, random_parameter=True): 33 | # Sample lengthscale and outputscale randomly 34 | if random_parameter: 35 | self.covar_module.base_kernel.lengthscale = self.length_prior.rsample().to(self.device) 36 | self.covar_module.outputscale = self.scale_prior.rsample().to(self.device) 37 | 38 | if verbose: 39 | print(f'Actual length scale: {self.covar_module.base_kernel.lengthscale}') 40 | print(f'Actual output scale: {self.covar_module.outputscale}') 41 | print('=' * 70) 42 | 43 | mean_x = self.mean_module(x) 44 | covar_x = self.covar_module(x) 45 | return gpytorch.distributions.MultivariateNormal(mean_x, covar_x) 46 | 47 | 48 | class GPSampler: 49 | def __init__( 50 | self, 51 | dimension: int = 5, 52 | device: torch.device = torch.device('cpu'), 53 | seed: int = None 54 | ): 55 | # initialize likelihood and gp 56 | likelihood = GaussianLikelihood().to(device) 57 | self.gp = GaussianProcess(None, None, likelihood=likelihood, device=device).to(device) 58 | self.gp.eval() 59 | 60 | self.dim = dimension 61 | self.device = device 62 | self.seed = seed 63 | if seed is not None: 64 | torch.manual_seed(seed) 65 | torch.cuda.manual_seed(seed) 66 | 67 | def __call__( 68 | self, 69 | batch_size: int = 16, 70 | num_ctx: int = None, 71 | num_tar: int = None, 72 | max_num_points: int = 512, 73 | min_num_points: int = 128, 74 | x_range: Union[List, Tuple] = (-2, 2), 75 | random_parameter: bool = True 76 | ): 77 | lb, ub = x_range 78 | 79 | batch = AttrDict() 80 | 81 | num_ctx = num_ctx or torch.randint(min_num_points, max_num_points - min_num_points, size=[1]).item() 82 | num_tar = num_tar or torch.randint(min_num_points, max_num_points - num_ctx, size=[1]).item() 83 | 84 | num_points = num_ctx + num_tar 85 | batch.x = lb + (ub - lb) * torch.rand([batch_size, num_points, self.dim], device=self.device) 86 | batch.xc = batch.x[:, :num_ctx] 87 | batch.xt = batch.x[:, num_ctx:] 88 | 89 | with gpytorch.settings.prior_mode(True): 90 | batch.y = self.gp(batch.x, 91 | verbose=False, 92 | random_parameter=random_parameter).rsample().unsqueeze(-1) 93 | batch.yc = batch.y[:, :num_ctx] 94 | batch.yt = batch.y[:, num_ctx:] 95 | 96 | return batch 97 | 98 | 99 | if __name__ == '__main__': 100 | sampler = GPSampler(dimension=2) 101 | 102 | fig = plt.figure(figsize=(35, 35)) 103 | 104 | for i, p in enumerate([25, 500], 1): 105 | pts = sampler(num_ctx=p, num_tar=p, random_parameter=False) 106 | 107 | ax = fig.add_subplot(1, 2, i, projection='3d') 108 | ax.scatter(pts.x[0, :, 0].detach().numpy(), 109 | pts.x[0, :, 1].detach().numpy(), 110 | pts.y[0].detach().numpy()) 111 | plt.show() -------------------------------------------------------------------------------- /bayesian_optimization/bayeso_benchmarks/plot_benchmarks.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | import matplotlib.pyplot as plt 4 | 5 | 6 | def plot_1d(obj_fun, 7 | str_fun, 8 | str_x_axis=r'$x$', 9 | str_y_axis=r'$f(x)$', 10 | str_figures='../figures', 11 | ): 12 | print(str_fun) 13 | bounds = obj_fun.get_bounds() 14 | print(bounds) 15 | assert bounds.shape[0] == 1 16 | 17 | X = np.linspace(bounds[0, 0], bounds[0, 1], 1000) 18 | Y = obj_fun.output(X[..., np.newaxis]).flatten() 19 | 20 | assert len(X.shape) == 1 21 | assert len(Y.shape) == 1 22 | assert X.shape[0] == Y.shape[0] 23 | 24 | plt.rc('text', usetex=True) 25 | 26 | _ = plt.figure(figsize=(10, 6)) 27 | ax = plt.gca() 28 | 29 | ax.plot(X, Y, 30 | linewidth=4, 31 | marker='None') 32 | 33 | ax.set_xlabel(str_x_axis, fontsize=36) 34 | ax.set_ylabel(str_y_axis, fontsize=36) 35 | ax.tick_params(labelsize=24) 36 | 37 | ax.set_xlim([np.min(X), np.max(X)]) 38 | ax.grid() 39 | 40 | plt.tight_layout() 41 | plt.savefig(os.path.join(str_figures, str_fun + '.pdf'), 42 | format='pdf', 43 | transparent=True, 44 | bbox_inches='tight') 45 | 46 | plt.show() 47 | 48 | def plot_2d(obj_fun, 49 | str_fun, 50 | str_x1_axis=r'$x_1$', 51 | str_x2_axis=r'$x_2$', 52 | str_y_axis=r'$f(\mathbf{x})$', 53 | str_figures='../figures', 54 | ): 55 | print(str_fun) 56 | bounds = obj_fun.get_bounds() 57 | print(bounds) 58 | assert bounds.shape[0] == 2 59 | 60 | X1 = np.linspace(bounds[0, 0], bounds[0, 1], 200) 61 | X2 = np.linspace(bounds[1, 0], bounds[1, 1], 200) 62 | X1, X2 = np.meshgrid(X1, X2) 63 | X = np.concatenate((X1[..., np.newaxis], X2[..., np.newaxis]), axis=2) 64 | X = np.reshape(X, (X.shape[0] * X.shape[1], X.shape[2])) 65 | 66 | Y = obj_fun.output(X).flatten() 67 | 68 | assert len(X.shape) == 2 69 | assert len(Y.shape) == 1 70 | assert X.shape[0] == Y.shape[0] 71 | 72 | Y = np.reshape(Y, (X1.shape[0], X2.shape[0])) 73 | 74 | plt.rc('text', usetex=True) 75 | 76 | _ = plt.figure(figsize=(8, 6)) 77 | ax = plt.axes(projection='3d') 78 | 79 | surf = ax.plot_surface(X1, X2, Y, 80 | cmap='coolwarm', 81 | linewidth=0) 82 | 83 | ax.set_xlabel(str_x1_axis, fontsize=24, labelpad=10) 84 | ax.set_ylabel(str_x2_axis, fontsize=24, labelpad=10) 85 | ax.set_zlabel(str_y_axis, fontsize=24, labelpad=10) 86 | ax.tick_params(labelsize=16) 87 | 88 | ax.set_xlim([np.min(X1), np.max(X1)]) 89 | ax.set_ylim([np.min(X2), np.max(X2)]) 90 | ax.grid() 91 | 92 | cbar = plt.colorbar(surf, 93 | shrink=0.6, 94 | aspect=12, 95 | pad=0.15, 96 | ) 97 | cbar.ax.tick_params(labelsize=16) 98 | 99 | if np.max(Y) > 1000: 100 | plt.ticklabel_format(axis='z', style='sci', scilimits=(0, 0), useMathText=True) 101 | ax.zaxis.get_offset_text().set_fontsize(14) 102 | 103 | plt.tight_layout() 104 | plt.savefig(os.path.join(str_figures, str_fun + '.pdf'), 105 | format='pdf', 106 | transparent=True, 107 | bbox_inches='tight') 108 | 109 | plt.show() 110 | 111 | 112 | if __name__ == '__main__': 113 | # one dim. 114 | 115 | from inf_dim_ackley import Ackley as target_class 116 | obj_fun = target_class(1) 117 | plot_1d(obj_fun, 'ackley_1d') 118 | 119 | from inf_dim_cosines import Cosines as target_class 120 | obj_fun = target_class(1) 121 | plot_1d(obj_fun, 'cosines_1d') 122 | 123 | 124 | # two dim. 125 | from two_dim_dropwave import DropWave as target_class 126 | obj_fun = target_class() 127 | plot_2d(obj_fun, 'dropwave_2d') 128 | 129 | from two_dim_goldsteinprice import GoldsteinPrice as target_class 130 | obj_fun = target_class() 131 | plot_2d(obj_fun, 'goldsteinprice_2d') 132 | 133 | from two_dim_michalewicz import Michalewicz as target_class 134 | obj_fun = target_class() 135 | plot_2d(obj_fun, 'michalewicz_2d') 136 | 137 | from inf_dim_ackley import Ackley as target_class 138 | obj_fun = target_class(2) 139 | plot_2d(obj_fun, 'ackley_2d') 140 | 141 | from inf_dim_cosines import Cosines as target_class 142 | obj_fun = target_class(2) 143 | plot_2d(obj_fun, 'cosines_2d') 144 | 145 | -------------------------------------------------------------------------------- /regression/models/tnpnd.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.distributions.normal import Normal 4 | from attrdict import AttrDict 5 | 6 | from models.modules import build_mlp 7 | from models.tnp import TNP 8 | 9 | 10 | class TNPND(TNP): 11 | def __init__( 12 | self, 13 | dim_x, 14 | dim_y, 15 | d_model, 16 | emb_depth, 17 | dim_feedforward, 18 | nhead, 19 | dropout, 20 | num_layers, 21 | num_std_layers, 22 | bound_std=False, 23 | cov_approx='cholesky', 24 | prj_dim=5, 25 | prj_depth=4, 26 | diag_depth=4 27 | ): 28 | super(TNPND, self).__init__( 29 | dim_x, 30 | dim_y, 31 | d_model, 32 | emb_depth, 33 | dim_feedforward, 34 | nhead, 35 | dropout, 36 | num_layers, 37 | bound_std 38 | ) 39 | 40 | assert cov_approx in ['cholesky', 'lowrank'] 41 | self.cov_approx = cov_approx 42 | 43 | self.mean_net = nn.Sequential( 44 | nn.Linear(d_model, dim_feedforward), 45 | nn.ReLU(), 46 | nn.Linear(dim_feedforward, dim_y) 47 | ) 48 | 49 | std_encoder_layer = nn.TransformerEncoderLayer(d_model, nhead, dim_feedforward, dropout, batch_first=True) 50 | self.std_encoder = nn.TransformerEncoder(std_encoder_layer, num_std_layers) 51 | 52 | self.projector = build_mlp(d_model, dim_feedforward, prj_dim*dim_y, prj_depth) 53 | 54 | if cov_approx == 'lowrank': 55 | self.diag_net = build_mlp(d_model, dim_feedforward, dim_y, diag_depth) 56 | 57 | def decode(self, out_encoder, batch_size, dim_y, num_target): 58 | mean = self.mean_net(out_encoder).view(batch_size, -1) 59 | 60 | out_std_encoder = self.std_encoder(out_encoder) 61 | std_prj = self.projector(out_std_encoder) 62 | std_prj = std_prj.view((batch_size, num_target*dim_y, -1)) 63 | if self.cov_approx == 'cholesky': 64 | std_tril = torch.bmm(std_prj, std_prj.transpose(1,2)) 65 | std_tril = std_tril.tril() 66 | if self.bound_std: 67 | diag_ids = torch.arange(num_target*dim_y, device='cuda') 68 | std_tril[:, diag_ids, diag_ids] = 0.05 + 0.95*torch.tanh(std_tril[:, diag_ids, diag_ids]) 69 | pred_tar = torch.distributions.multivariate_normal.MultivariateNormal(mean, scale_tril=std_tril) 70 | else: 71 | diagonal = torch.exp(self.diag_net(out_encoder)).view((batch_size, -1, 1)) 72 | std = torch.bmm(std_prj, std_prj.transpose(1,2)) + torch.diag_embed(diagonal.squeeze(-1)) 73 | pred_tar = torch.distributions.multivariate_normal.MultivariateNormal(mean, covariance_matrix=std) 74 | 75 | return pred_tar 76 | 77 | def forward(self, batch, reduce_ll=True): 78 | batch_size = batch.x.shape[0] 79 | dim_y = batch.y.shape[-1] 80 | num_target = batch.xt.shape[1] 81 | 82 | out_encoder = self.encode(batch, autoreg=False) 83 | pred_tar = self.decode(out_encoder, batch_size, dim_y, num_target) 84 | 85 | outs = AttrDict() 86 | outs.tar_ll = pred_tar.log_prob(batch.yt.reshape(batch_size, -1)) 87 | 88 | if not self.training: 89 | outs.tar_ll /= num_target 90 | 91 | if reduce_ll: 92 | outs.tar_ll = outs.tar_ll.mean() 93 | outs.loss = - (outs.tar_ll) 94 | outs.mean_std = torch.mean(pred_tar.covariance_matrix) 95 | else: 96 | outs.tar_ll = outs.tar_ll.unsqueeze(-1) 97 | 98 | return outs 99 | 100 | 101 | def predict(self, xc, yc, xt, num_samples=50, return_samples=False): 102 | batch_size = xc.shape[0] 103 | dim_y = yc.shape[-1] 104 | num_target = xt.shape[1] 105 | 106 | batch = AttrDict() 107 | batch.xc = xc 108 | batch.yc = yc 109 | batch.xt = xt 110 | batch.yt = torch.zeros((xt.shape[0], xt.shape[1], yc.shape[2]), device='cuda') 111 | 112 | out_encoder = self.encode(batch, autoreg=False) 113 | pred_tar = self.decode(out_encoder, batch_size, dim_y, num_target) 114 | 115 | yt_samples = pred_tar.rsample([num_samples]).view(num_samples, batch_size, num_target, -1) 116 | if return_samples: 117 | return yt_samples 118 | 119 | std = yt_samples.std(dim=0) 120 | return Normal(pred_tar.mean.view(batch_size, num_target, -1), std) 121 | 122 | 123 | def sample(self, xc, yc, xt, num_samples=50): 124 | return self.predict(xc, yc, xt, num_samples, return_samples=True) -------------------------------------------------------------------------------- /regression/models/tnpa.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from torch.distributions.normal import Normal 5 | from attrdict import AttrDict 6 | 7 | from utils.misc import stack 8 | from models.tnp import TNP 9 | 10 | 11 | class TNPA(TNP): 12 | def __init__( 13 | self, 14 | dim_x, 15 | dim_y, 16 | d_model, 17 | emb_depth, 18 | dim_feedforward, 19 | nhead, 20 | dropout, 21 | num_layers, 22 | bound_std=False, 23 | permute=False, 24 | ): 25 | super(TNPA, self).__init__( 26 | dim_x, 27 | dim_y, 28 | d_model, 29 | emb_depth, 30 | dim_feedforward, 31 | nhead, 32 | dropout, 33 | num_layers, 34 | bound_std 35 | ) 36 | 37 | self.predictor = nn.Sequential( 38 | nn.Linear(d_model, dim_feedforward), 39 | nn.ReLU(), 40 | nn.Linear(dim_feedforward, dim_y*2) 41 | ) 42 | 43 | self.permute = permute 44 | 45 | def forward(self, batch, reduce_ll=True): 46 | z_target = self.encode(batch, autoreg=True) 47 | out = self.predictor(z_target) 48 | mean, std = torch.chunk(out, 2, dim=-1) 49 | if self.bound_std: 50 | std = 0.05 + 0.95 * F.softplus(std) 51 | else: 52 | std = torch.exp(std) 53 | 54 | pred_tar = Normal(mean, std) 55 | 56 | outs = AttrDict() 57 | if reduce_ll: 58 | outs.tar_ll = pred_tar.log_prob(batch.yt).sum(-1).mean() 59 | else: 60 | outs.tar_ll = pred_tar.log_prob(batch.yt).sum(-1) 61 | outs.loss = - (outs.tar_ll) 62 | 63 | return outs 64 | 65 | def permute_sample_batch(self, xt, yt, num_samples, batch_size, num_target): 66 | # data in each batch is permuted identically 67 | perm_ids = torch.rand(num_samples, num_target, device='cuda').unsqueeze(1).repeat((1, batch_size, 1)) 68 | perm_ids = torch.argsort(perm_ids, dim=-1) 69 | deperm_ids = torch.argsort(perm_ids, dim=-1) 70 | dim_sample = torch.arange(num_samples, device='cuda').unsqueeze(-1).unsqueeze(-1).repeat((1,batch_size,num_target)) 71 | dim_batch = torch.arange(batch_size, device='cuda').unsqueeze(0).unsqueeze(-1).repeat((num_samples,1,num_target)) 72 | return xt[dim_sample, dim_batch, perm_ids], yt[dim_sample, dim_batch, perm_ids], dim_sample, dim_batch, deperm_ids 73 | 74 | def predict(self, xc, yc, xt, num_samples=50, return_samples=False): 75 | batch_size = xc.shape[0] 76 | num_target = xt.shape[1] 77 | 78 | def squeeze(x): 79 | return x.view(-1, x.shape[-2], x.shape[-1]) 80 | def unsqueeze(x): 81 | return x.view(num_samples, batch_size, x.shape[-2], x.shape[-1]) 82 | 83 | xc_stacked = stack(xc, num_samples) 84 | yc_stacked = stack(yc, num_samples) 85 | xt_stacked = stack(xt, num_samples) 86 | yt_pred = torch.zeros((batch_size, num_target, yc.shape[2]), device='cuda') 87 | yt_stacked = stack(yt_pred, num_samples) 88 | if self.permute: 89 | xt_stacked, yt_stacked, dim_sample, dim_batch, deperm_ids = self.permute_sample_batch(xt_stacked, yt_stacked, num_samples, batch_size, num_target) 90 | 91 | batch_stacked = AttrDict() 92 | batch_stacked.xc = squeeze(xc_stacked) 93 | batch_stacked.yc = squeeze(yc_stacked) 94 | batch_stacked.xt = squeeze(xt_stacked) 95 | batch_stacked.yt = squeeze(yt_stacked) 96 | 97 | for step in range(xt.shape[1]): 98 | z_target_stacked = self.encode(batch_stacked, autoreg=True) 99 | out = self.predictor(z_target_stacked) 100 | mean, std = torch.chunk(out, 2, dim=-1) 101 | if self.bound_std: 102 | std = 0.05 + 0.95 * F.softplus(std) 103 | else: 104 | std = torch.exp(std) 105 | mean, std = unsqueeze(mean), unsqueeze(std) 106 | batch_stacked.yt = unsqueeze(batch_stacked.yt) 107 | batch_stacked.yt[:, :, step] = Normal(mean[:, :, step], std[:, :, step]).sample() 108 | batch_stacked.yt = squeeze(batch_stacked.yt) 109 | 110 | if self.permute: 111 | mean, std = mean[dim_sample, dim_batch, deperm_ids], std[dim_sample, dim_batch, deperm_ids] 112 | 113 | if return_samples: 114 | return unsqueeze(batch_stacked.yt) 115 | 116 | return Normal(mean, std) 117 | 118 | def sample(self, xc, yc, xt, num_samples=50): 119 | return self.predict(xc, yc, xt, num_samples, return_samples=True) -------------------------------------------------------------------------------- /contextual_bandits/models/tnpnd.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.distributions.normal import Normal 4 | from attrdict import AttrDict 5 | 6 | from models.modules import build_mlp 7 | from models.tnp import TNP 8 | 9 | 10 | class TNPND(TNP): 11 | def __init__( 12 | self, 13 | dim_x, 14 | dim_y, 15 | d_model, 16 | emb_depth, 17 | dim_feedforward, 18 | nhead, 19 | dropout, 20 | num_layers, 21 | num_std_layers, 22 | drop_y=0.5, 23 | cov_approx='cholesky', 24 | prj_dim=5, 25 | prj_depth=4, 26 | diag_depth=4 27 | ): 28 | super(TNPND, self).__init__( 29 | dim_x, 30 | dim_y, 31 | d_model, 32 | emb_depth, 33 | dim_feedforward, 34 | nhead, 35 | dropout, 36 | num_layers, 37 | drop_y 38 | ) 39 | 40 | assert cov_approx in ['cholesky', 'lowrank'] 41 | self.cov_approx = cov_approx 42 | 43 | self.mean_net = nn.Sequential( 44 | nn.Linear(d_model, dim_feedforward), 45 | nn.ReLU(), 46 | nn.Linear(dim_feedforward, dim_y) 47 | ) 48 | 49 | std_encoder_layer = nn.TransformerEncoderLayer(d_model, nhead, dim_feedforward, dropout, batch_first=True) 50 | self.std_encoder = nn.TransformerEncoder(std_encoder_layer, num_std_layers) 51 | 52 | self.projector = build_mlp(d_model, dim_feedforward, prj_dim*dim_y, prj_depth) 53 | 54 | if cov_approx == 'lowrank': 55 | self.diag_net = build_mlp(d_model, dim_feedforward, dim_y, diag_depth) 56 | 57 | def decode(self, out_encoder, mean_target, batch_size, dim_y, num_target): 58 | mean_target = mean_target.view(batch_size, -1) 59 | 60 | out_std_encoder = self.std_encoder(out_encoder) 61 | std_prj = self.projector(out_std_encoder) 62 | std_prj = std_prj.view((batch_size, num_target*dim_y, -1)) 63 | if self.cov_approx == 'cholesky': 64 | std_tril = torch.bmm(std_prj, std_prj.transpose(1,2)) 65 | std_tril = std_tril.tril() 66 | if self.emnist: 67 | diag_ids = torch.arange(num_target*dim_y, device='cuda') 68 | std_tril[:, diag_ids, diag_ids] = 0.05 + 0.95*torch.tanh(std_tril[:, diag_ids, diag_ids]) 69 | pred_tar = torch.distributions.multivariate_normal.MultivariateNormal(mean_target, scale_tril=std_tril) 70 | else: 71 | diagonal = torch.exp(self.diag_net(out_encoder)).view((batch_size, -1, 1)) 72 | std = torch.bmm(std_prj, std_prj.transpose(1,2)) + torch.diag_embed(diagonal.squeeze(-1)) 73 | pred_tar = torch.distributions.multivariate_normal.MultivariateNormal(mean_target, covariance_matrix=std) 74 | 75 | return pred_tar 76 | 77 | def forward(self, batch, reduce_ll=True): 78 | batch_size = batch.x.shape[0] 79 | dim_y = batch.y.shape[-1] 80 | num_context = batch.xc.shape[1] 81 | num_target = batch.xt.shape[1] 82 | 83 | out_encoder = self.encode(batch, autoreg=False, drop_ctx=True) 84 | mean = self.mean_net(out_encoder) 85 | mean_ctx = mean[:, :num_context] 86 | mean_target = mean[:, num_context:].reshape(batch_size, -1) 87 | pred_tar = self.decode(out_encoder[:, num_context:], mean_target, batch_size, dim_y, num_target) 88 | 89 | outs = AttrDict() 90 | yt = batch.yt.reshape(batch.yt.shape[0], -1) 91 | outs.loss_target = - (pred_tar.log_prob(yt).mean() / num_target) 92 | outs.loss_ctx = torch.sum((batch.yc - mean_ctx)**2, dim=-1).mean() 93 | outs.loss = outs.loss_ctx + outs.loss_target 94 | outs.mean_std = torch.mean(pred_tar.covariance_matrix) 95 | outs.rmse = torch.mean((yt - mean_target)**2) 96 | return outs 97 | 98 | 99 | def predict(self, xc, yc, xt, num_samples=100): 100 | batch = AttrDict() 101 | batch.xc = xc 102 | batch.yc = yc 103 | batch.xt = xt 104 | batch.yt = torch.zeros((xt.shape[0], xt.shape[1], yc.shape[2]), device='cuda') 105 | 106 | batch_size = xc.shape[0] 107 | dim_y = yc.shape[-1] 108 | num_context = batch.xc.shape[1] 109 | num_target = batch.xt.shape[1] 110 | 111 | out_encoder = self.encode(batch, autoreg=False, drop_ctx=False)[:, num_context:] 112 | mean_target = self.mean_net(out_encoder) 113 | pred_tar = self.decode(out_encoder, mean_target, batch_size, dim_y, num_target) 114 | 115 | yt_samples = pred_tar.rsample([num_samples]).reshape(num_samples, batch_size, num_target, -1) 116 | std = yt_samples.std(dim=0) 117 | outs = AttrDict() 118 | outs.loc = mean_target.unsqueeze(0) 119 | outs.scale = std.unsqueeze(0) 120 | outs.ys = Normal(outs.loc, outs.scale) 121 | return outs -------------------------------------------------------------------------------- /env.yml: -------------------------------------------------------------------------------- 1 | name: tnp 2 | channels: 3 | - anaconda 4 | - conda-forge 5 | - defaults 6 | dependencies: 7 | - _libgcc_mutex=0.1=conda_forge 8 | - _openmp_mutex=4.5=1_llvm 9 | - alsa-lib=1.2.3=h516909a_0 10 | - blas=1.0=mkl 11 | - bottleneck=1.3.2=py39hdd57654_1 12 | - brotli=1.0.9=h7f98852_6 13 | - brotli-bin=1.0.9=h7f98852_6 14 | - bzip2=1.0.8=h7f98852_4 15 | - ca-certificates=2020.10.14=0 16 | - certifi=2021.10.8=py39h06a4308_2 17 | - cffi=1.15.0=py39hd667e15_1 18 | - colorama=0.4.4=pyh9f0ad1d_0 19 | - cudatoolkit=11.5.1=hcf5317a_9 20 | - cudnn=8.2.1.32=h86fa8c9_0 21 | - cycler=0.11.0=pyhd8ed1ab_0 22 | - dbus=1.13.18=hb2f20db_0 23 | - expat=2.4.2=h9c3ff4c_0 24 | - fontconfig=2.13.1=hba837de_1005 25 | - fonttools=4.28.5=py39h3811e60_0 26 | - freetype=2.10.4=h0708190_1 27 | - future=0.18.2=py39hf3d152e_4 28 | - glib=2.69.1=h4ff587b_1 29 | - gst-plugins-base=1.14.0=hbbd80ab_1 30 | - gstreamer=1.14.0=h28cd5cc_2 31 | - icu=58.2=he6710b0_3 32 | - jbig=2.1=h7f98852_2003 33 | - jpeg=9d=h36c2ea0_0 34 | - kiwisolver=1.3.2=py39h1a9c180_1 35 | - krb5=1.19.2=hcc1bbae_3 36 | - lcms2=2.12=hddcbb42_0 37 | - ld_impl_linux-64=2.36.1=hea4e1c9_2 38 | - lerc=3.0=h9c3ff4c_0 39 | - libblas=3.9.0=12_linux64_mkl 40 | - libbrotlicommon=1.0.9=h7f98852_6 41 | - libbrotlidec=1.0.9=h7f98852_6 42 | - libbrotlienc=1.0.9=h7f98852_6 43 | - libcblas=3.9.0=12_linux64_mkl 44 | - libclang=11.1.0=default_ha53f305_1 45 | - libdeflate=1.8=h7f98852_0 46 | - libedit=3.1.20191231=he28a2e2_2 47 | - libevent=2.1.10=h9b69904_4 48 | - libffi=3.3=he6710b0_2 49 | - libgcc-ng=11.2.0=h1d223b6_11 50 | - libgfortran-ng=7.5.0=ha8ba4b0_17 51 | - libgfortran4=7.5.0=ha8ba4b0_17 52 | - libiconv=1.16=h516909a_0 53 | - liblapack=3.9.0=12_linux64_mkl 54 | - libllvm11=11.1.0=hf817b99_2 55 | - libnsl=2.0.0=h7f98852_0 56 | - libogg=1.3.4=h7f98852_1 57 | - libopus=1.3.1=h7f98852_1 58 | - libpng=1.6.37=h21135ba_2 59 | - libpq=13.5=hd57d9b9_1 60 | - libprotobuf=3.16.0=h780b84a_0 61 | - libstdcxx-ng=11.2.0=he4da1e4_11 62 | - libtiff=4.3.0=h6f004c6_2 63 | - libuuid=2.32.1=h7f98852_1000 64 | - libvorbis=1.3.7=h9c3ff4c_0 65 | - libwebp-base=1.2.1=h7f98852_0 66 | - libxcb=1.13=h7f98852_1004 67 | - libxkbcommon=1.0.3=he3ba5ed_0 68 | - libxml2=2.9.12=h03d6c58_0 69 | - libzlib=1.2.11=h36c2ea0_1013 70 | - llvm-openmp=12.0.1=h4bd325d_1 71 | - lz4-c=1.9.3=h9c3ff4c_1 72 | - magma=2.5.4=h6103c52_2 73 | - matplotlib=3.4.3=py39h06a4308_0 74 | - matplotlib-base=3.4.3=py39hbbc1b5f_0 75 | - mkl=2021.4.0=h8d4b97c_729 76 | - mkl-service=2.4.0=py39h7f8727e_0 77 | - munkres=1.1.4=pyh9f0ad1d_0 78 | - mysql-common=8.0.27=ha770c72_3 79 | - mysql-libs=8.0.27=hfa10184_3 80 | - nccl=2.11.4.1=hdc17891_0 81 | - ncurses=6.2=h58526e2_4 82 | - ninja=1.10.2=h4bd325d_1 83 | - nspr=4.32=h9c3ff4c_1 84 | - nss=3.74=hb5efdd6_0 85 | - numexpr=2.8.1=py39h6abb31d_0 86 | - numpy=1.22.2=py39h91f2184_0 87 | - olefile=0.46=pyh9f0ad1d_1 88 | - openjpeg=2.4.0=hb52868f_1 89 | - openssl=1.1.1m=h7f8727e_0 90 | - packaging=21.3=pyhd8ed1ab_0 91 | - pandas=1.3.5=py39h8c16a72_0 92 | - pcre=8.45=h9c3ff4c_0 93 | - pillow=8.4.0=py39ha612740_0 94 | - pip=21.2.4=py39h06a4308_0 95 | - pthread-stubs=0.4=h36c2ea0_1001 96 | - pycparser=2.21=pyhd8ed1ab_0 97 | - pyparsing=3.0.6=pyhd8ed1ab_0 98 | - pyqt=5.9.2=py39h2531618_6 99 | - pyqt5-sip=4.19.18=py39he80948d_8 100 | - python=3.9.7=h12debd9_1 101 | - python-dateutil=2.8.2=pyhd8ed1ab_0 102 | - python_abi=3.9=2_cp39 103 | - pytorch=1.9.0=cuda112py39hbeb36f3_1 104 | - pytorch-gpu=1.9.0=cuda112py39h0bbbad9_1 105 | - pytz=2020.1=py_0 106 | - qt=5.9.7=h5867ecd_1 107 | - readline=8.1=h46c0cb4_0 108 | - scipy=1.7.3=py39hc147768_0 109 | - seaborn=0.11.0=py_0 110 | - setuptools=58.0.4=py39h06a4308_0 111 | - sip=4.19.13=py39h295c915_0 112 | - six=1.16.0=pyh6c4a22f_0 113 | - sleef=3.5.1=h9b69904_2 114 | - sqlite=3.37.0=h9cd32fc_0 115 | - tbb=2021.5.0=h4bd325d_0 116 | - tk=8.6.11=h27826a3_1 117 | - tornado=6.1=py39h27cfd23_0 118 | - tqdm=4.62.3=pyhd8ed1ab_0 119 | - typing_extensions=4.0.1=pyha770c72_0 120 | - tzdata=2021e=he74cb21_0 121 | - wheel=0.37.1=pyhd8ed1ab_0 122 | - xorg-libxau=1.0.9=h7f98852_0 123 | - xorg-libxdmcp=1.1.3=h7f98852_0 124 | - xz=5.2.5=h516909a_1 125 | - zlib=1.2.11=h36c2ea0_1013 126 | - zstd=1.5.1=ha95c52a_0 127 | - pip: 128 | - attrdict==2.0.1 129 | - attrs==21.4.0 130 | - bayeso==0.5.2 131 | - black==21.12b0 132 | - click==8.0.3 133 | - cma==3.1.0 134 | - gpytorch==1.6.0 135 | - iniconfig==1.1.1 136 | - joblib==1.1.0 137 | - mypy-extensions==0.4.3 138 | - pathspec==0.9.0 139 | - platformdirs==2.4.1 140 | - pluggy==1.0.0 141 | - py==1.11.0 142 | - pytest==6.2.5 143 | - pyyaml==6.0 144 | - qmcpy==1.2 145 | - scikit-learn==1.0.2 146 | - shapely==1.8.0 147 | - threadpoolctl==3.0.0 148 | - toml==0.10.2 149 | - tomli==1.2.3 150 | - uncertainty-toolbox==0.1.0 -------------------------------------------------------------------------------- /contextual_bandits/utils/log.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import time 3 | import logging 4 | import yaml 5 | import os 6 | import os.path as osp 7 | from attrdict import AttrDict 8 | from collections import OrderedDict 9 | from matplotlib import pyplot as plt 10 | from os.path import split, splitext 11 | 12 | 13 | def get_logger(filename, mode='a'): 14 | logging.basicConfig(level=logging.INFO, format='%(message)s') 15 | logger = logging.getLogger() 16 | # 코드 실행 시 run 여러번 돌리면 logger에 handler가 중복되므로, 매번 삭제해줘야 17 | for hdlr in logger.handlers: 18 | logger.removeHandler(hdlr) 19 | logger.addHandler(logging.FileHandler(filename, mode=mode)) 20 | logger.addHandler(logging.StreamHandler()) 21 | return logger 22 | 23 | 24 | class RunningAverage(object): 25 | def __init__(self, *keys): 26 | self.sum = OrderedDict() 27 | self.cnt = OrderedDict() 28 | self.clock = time.time() 29 | for key in keys: 30 | self.sum[key] = 0 31 | self.cnt[key] = 0 32 | 33 | def update(self, key, val): 34 | if isinstance(val, torch.Tensor): 35 | val = val.item() 36 | if self.sum.get(key, None) is None: 37 | self.sum[key] = val 38 | self.cnt[key] = 1 39 | else: 40 | self.sum[key] = self.sum[key] + val 41 | self.cnt[key] += 1 42 | 43 | def reset(self): 44 | for key in self.sum.keys(): 45 | self.sum[key] = 0 46 | self.cnt[key] = 0 47 | self.clock = time.time() 48 | 49 | def clear(self): 50 | self.sum = OrderedDict() 51 | self.cnt = OrderedDict() 52 | self.clock = time.time() 53 | 54 | def keys(self): 55 | return self.sum.keys() 56 | 57 | def get(self, key): 58 | assert(self.sum.get(key, None) is not None) 59 | return self.sum[key] / self.cnt[key] 60 | 61 | def info(self, show_et=True): 62 | line = '' 63 | for key in self.sum.keys(): 64 | val = self.sum[key] / self.cnt[key] 65 | if type(val) == float: 66 | line += f'{key} {val:.4f} ' 67 | else: 68 | line += f'{key} {val} +-'.format(key, val) 69 | if show_et: 70 | line += f'({time.time()-self.clock:.3f} secs)' 71 | return line 72 | 73 | 74 | def get_log(fileroot): 75 | step = [] 76 | loss = [] 77 | train_time = [] 78 | eval_time = [] 79 | ctxll = [] 80 | tarll = [] 81 | file = open(fileroot, "r") 82 | lines = file.readlines() 83 | for line in lines: 84 | # training step 85 | if "step" in line: 86 | linesplit = line.split(" ") 87 | step += [int(linesplit[3])] 88 | _loss = linesplit[-3] 89 | loss += [100 if _loss=="nan" else float(_loss)] 90 | train_time += [float(linesplit[-2][1:])] 91 | # evaluation step 92 | elif "ctx_ll" in line: 93 | linesplit = line.split(" ") 94 | ctxll += [float(linesplit[-5])] 95 | tarll += [float(linesplit[-3])] 96 | eval_time += [float(linesplit[-2][1:])] 97 | 98 | return step, loss, None, ctxll, tarll 99 | 100 | 101 | def plot_log(fileroot, x_begin=None, x_end=None): 102 | plt.clf() # clear current figure 103 | 104 | step, loss, stepll, ctxll, tarll = get_log(fileroot) 105 | step = list(map(int, step)) 106 | loss = list(map(float, loss)) 107 | ctxll = list(map(float, ctxll)) 108 | tarll = list(map(float, tarll)) 109 | stepll = list(map(int, stepll)) if stepll else None 110 | 111 | if x_begin is None: 112 | x_begin = 0 113 | if x_end is None: 114 | x_end = step[-1] 115 | 116 | print_freq = 1 if len(step)==1 else step[1] - step[0] 117 | 118 | plt.plot(step[x_begin//print_freq:x_end//print_freq], 119 | loss[x_begin//print_freq:x_end//print_freq]) 120 | plt.xlabel('step') 121 | plt.ylabel('loss') 122 | 123 | dir, file = split(fileroot) 124 | filename = splitext(file)[0] 125 | plt.savefig(dir + "/" + filename + f"-{x_begin}-{x_end}.png") 126 | 127 | 128 | def plot_freq_cov(): 129 | with open(osp.join("model_paths.yaml")) as f: 130 | model_paths = yaml.safe_load(f) 131 | root = model_paths["root"] 132 | model_paths = model_paths["models"] 133 | x_base = torch.linspace(-2, 2, 500).unsqueeze(-1) 134 | 135 | for kernel in ["rbf", "periodic", "matern"]: 136 | plt.clf() 137 | for model, path in model_paths.items(): 138 | freq_cov = torch.load(osp.join(root, model, path, f"freq_cov_{kernel}.pt")) 139 | # plt.scatter(x_base.cpu(), freq_cov.cpu(), s=3, alpha=1.0, label=model+f"-{freq_cov.mean():0.2f}") 140 | plt.scatter(x_base.cpu(), freq_cov.cpu(), s=3, alpha=1.0, label=f"{model}-{freq_cov.mean():0.2f}") 141 | plt.ylim([0,1]) 142 | 143 | # models = "_".join(model_paths.keys()) 144 | models = "all" 145 | plt.legend() 146 | plt.title(f"Frequentist Coverage - {kernel}") 147 | if not osp.exists(osp.join(root, "plot", "freq_cov", models)): 148 | os.makedirs(osp.join(root, "plot", "freq_cov", models)) 149 | plt.savefig(osp.join(root, "plot", "freq_cov", models, f"freq_cov_{kernel}.jpg")) 150 | 151 | if __name__ == "__main__": 152 | plot_freq_cov() 153 | -------------------------------------------------------------------------------- /regression/data/gp.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.distributions import MultivariateNormal, StudentT 3 | from attrdict import AttrDict 4 | import math 5 | 6 | 7 | __all__ = ["GPPriorSampler", 'GPSampler', 'RBFKernel', 'PeriodicKernel', 'Matern52Kernel'] 8 | 9 | 10 | class GPPriorSampler(object): 11 | """ 12 | Bayesian Optimization에서 이용 13 | """ 14 | def __init__(self, kernel, t_noise=None): 15 | self.kernel = kernel 16 | self.t_noise = t_noise 17 | 18 | # bx: 1 * num_points * 1 19 | def sample(self, x, device): 20 | # 1 * num_points * num_points 21 | cov = self.kernel(x) 22 | mean = torch.zeros(1, x.shape[1], device=device) 23 | 24 | y = MultivariateNormal(mean, cov).rsample().unsqueeze(-1) 25 | 26 | if self.t_noise is not None: 27 | y += self.t_noise * StudentT(2.1).rsample(y.shape).to(device) 28 | 29 | return y 30 | 31 | 32 | class GPSampler(object): 33 | def __init__(self, kernel, t_noise=None, seed=None): 34 | self.kernel = kernel 35 | self.t_noise = t_noise 36 | if seed is not None: 37 | torch.manual_seed(seed) 38 | torch.cuda.manual_seed(seed) 39 | self.seed = seed 40 | 41 | def sample(self, 42 | batch_size=16, 43 | num_ctx=None, 44 | num_tar=None, 45 | max_num_points=50, 46 | x_range=(-2, 2), 47 | device='cpu'): 48 | 49 | batch = AttrDict() 50 | num_ctx = num_ctx or torch.randint(low=3, high=max_num_points-3, size=[1]).item() # Nc 51 | num_tar = num_tar or torch.randint(low=3, high=max_num_points-num_ctx, size=[1]).item() # Nt 52 | 53 | num_points = num_ctx + num_tar # N = Nc + Nt 54 | batch.x = x_range[0] + (x_range[1] - x_range[0]) \ 55 | * torch.rand([batch_size, num_points, 1], device=device) # [B,N,Dx=1] 56 | batch.xc = batch.x[:,:num_ctx] # [B,Nc,1] 57 | batch.xt = batch.x[:,num_ctx:] # [B,Nt,1] 58 | 59 | # batch_size * num_points * num_points 60 | cov = self.kernel(batch.x) # [B,N,N] 61 | mean = torch.zeros(batch_size, num_points, device=device) # [B,N] 62 | batch.y = MultivariateNormal(mean, cov).rsample().unsqueeze(-1) # [B,N,Dy=1] 63 | batch.yc = batch.y[:,:num_ctx] # [B,Nc,1] 64 | batch.yt = batch.y[:,num_ctx:] # [B,Nt,1] 65 | 66 | if self.t_noise is not None: 67 | if self.t_noise == -1: 68 | t_noise = 0.15 * torch.rand(batch.y.shape).to(device) # [B,N,1] 69 | else: 70 | t_noise = self.t_noise 71 | batch.y += t_noise * StudentT(2.1).rsample(batch.y.shape).to(device) 72 | return batch 73 | # {"x": [B,N,1], "xc": [B,Nc,1], "xt": [B,Nt,1], 74 | # "y": [B,N,1], "yc": [B,Nt,1], "yt": [B,Nt,1]} 75 | 76 | class RBFKernel(object): 77 | def __init__(self, sigma_eps=2e-2, max_length=0.6, max_scale=1.0): 78 | self.sigma_eps = sigma_eps 79 | self.max_length = max_length 80 | self.max_scale = max_scale 81 | 82 | # x: batch_size * num_points * dim [B,N,Dx=1] 83 | def __call__(self, x): 84 | length = 0.1 + (self.max_length-0.1) \ 85 | * torch.rand([x.shape[0], 1, 1, 1], device=x.device) 86 | scale = 0.1 + (self.max_scale-0.1) \ 87 | * torch.rand([x.shape[0], 1, 1], device=x.device) 88 | 89 | # batch_size * num_points * num_points * dim [B,N,N,1] 90 | dist = (x.unsqueeze(-2) - x.unsqueeze(-3))/length 91 | 92 | # batch_size * num_points * num_points [B,N,N] 93 | cov = scale.pow(2) * torch.exp(-0.5 * dist.pow(2).sum(-1)) \ 94 | + self.sigma_eps**2 * torch.eye(x.shape[-2]).to(x.device) 95 | 96 | return cov # [B,N,N] 97 | 98 | class Matern52Kernel(object): 99 | def __init__(self, sigma_eps=2e-2, max_length=0.6, max_scale=1.0): 100 | self.sigma_eps = sigma_eps 101 | self.max_length = max_length 102 | self.max_scale = max_scale 103 | 104 | # x: batch_size * num_points * dim 105 | def __call__(self, x): 106 | length = 0.1 + (self.max_length-0.1) \ 107 | * torch.rand([x.shape[0], 1, 1, 1], device=x.device) 108 | scale = 0.1 + (self.max_scale-0.1) \ 109 | * torch.rand([x.shape[0], 1, 1], device=x.device) 110 | 111 | # batch_size * num_points * num_points 112 | dist = torch.norm((x.unsqueeze(-2) - x.unsqueeze(-3))/length, dim=-1) 113 | 114 | cov = scale.pow(2)*(1 + math.sqrt(5.0)*dist + 5.0*dist.pow(2)/3.0) \ 115 | * torch.exp(-math.sqrt(5.0) * dist) \ 116 | + self.sigma_eps**2 * torch.eye(x.shape[-2]).to(x.device) 117 | 118 | return cov 119 | 120 | class PeriodicKernel(object): 121 | def __init__(self, sigma_eps=2e-2, max_length=0.6, max_scale=1.0): 122 | #self.p = p 123 | self.sigma_eps = sigma_eps 124 | self.max_length = max_length 125 | self.max_scale = max_scale 126 | 127 | # x: batch_size * num_points * dim 128 | def __call__(self, x): 129 | p = 0.1 + 0.4*torch.rand([x.shape[0], 1, 1], device=x.device) 130 | length = 0.1 + (self.max_length-0.1) \ 131 | * torch.rand([x.shape[0], 1, 1], device=x.device) 132 | scale = 0.1 + (self.max_scale-0.1) \ 133 | * torch.rand([x.shape[0], 1, 1], device=x.device) 134 | 135 | dist = x.unsqueeze(-2) - x.unsqueeze(-3) 136 | cov = scale.pow(2) * torch.exp(\ 137 | - 2*(torch.sin(math.pi*dist.abs().sum(-1)/p)/length).pow(2)) \ 138 | + self.sigma_eps**2 * torch.eye(x.shape[-2]).to(x.device) 139 | 140 | return cov -------------------------------------------------------------------------------- /bayesian_optimization/data/gp.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.distributions import MultivariateNormal, StudentT 3 | from attrdict import AttrDict 4 | import math 5 | 6 | 7 | __all__ = ["GPPriorSampler", 'GPSampler', 'RBFKernel', 'PeriodicKernel', 'Matern52Kernel'] 8 | 9 | 10 | class GPPriorSampler(object): 11 | """ 12 | Bayesian Optimization에서 이용 13 | """ 14 | def __init__(self, kernel, t_noise=None): 15 | self.kernel = kernel 16 | self.t_noise = t_noise 17 | 18 | # bx: 1 * num_points * 1 19 | def sample(self, x, device): 20 | # 1 * num_points * num_points 21 | cov = self.kernel(x) 22 | mean = torch.zeros(1, x.shape[1], device=device) 23 | 24 | y = MultivariateNormal(mean, cov).rsample().unsqueeze(-1) 25 | 26 | if self.t_noise is not None: 27 | y += self.t_noise * StudentT(2.1).rsample(y.shape).to(device) 28 | 29 | return y 30 | 31 | 32 | class GPSampler(object): 33 | def __init__(self, kernel, t_noise=None, seed=None): 34 | self.kernel = kernel 35 | self.t_noise = t_noise 36 | if seed is not None: 37 | torch.manual_seed(seed) 38 | torch.cuda.manual_seed(seed) 39 | self.seed = seed 40 | 41 | def sample(self, 42 | batch_size=16, 43 | num_ctx=None, 44 | num_tar=None, 45 | max_num_points=50, 46 | x_range=(-2, 2), 47 | device='cpu'): 48 | 49 | batch = AttrDict() 50 | num_ctx = num_ctx or torch.randint(low=3, high=max_num_points-3, size=[1]).item() # Nc 51 | num_tar = num_tar or torch.randint(low=3, high=max_num_points-num_ctx, size=[1]).item() # Nt 52 | 53 | num_points = num_ctx + num_tar # N = Nc + Nt 54 | batch.x = x_range[0] + (x_range[1] - x_range[0]) \ 55 | * torch.rand([batch_size, num_points, 1], device=device) # [B,N,Dx=1] 56 | batch.xc = batch.x[:,:num_ctx] # [B,Nc,1] 57 | batch.xt = batch.x[:,num_ctx:] # [B,Nt,1] 58 | 59 | # batch_size * num_points * num_points 60 | cov = self.kernel(batch.x) # [B,N,N] 61 | mean = torch.zeros(batch_size, num_points, device=device) # [B,N] 62 | batch.y = MultivariateNormal(mean, cov).rsample().unsqueeze(-1) # [B,N,Dy=1] 63 | batch.yc = batch.y[:,:num_ctx] # [B,Nc,1] 64 | batch.yt = batch.y[:,num_ctx:] # [B,Nt,1] 65 | 66 | if self.t_noise is not None: 67 | if self.t_noise == -1: 68 | t_noise = 0.15 * torch.rand(batch.y.shape).to(device) # [B,N,1] 69 | else: 70 | t_noise = self.t_noise 71 | batch.y += t_noise * StudentT(2.1).rsample(batch.y.shape).to(device) 72 | return batch 73 | # {"x": [B,N,1], "xc": [B,Nc,1], "xt": [B,Nt,1], 74 | # "y": [B,N,1], "yc": [B,Nt,1], "yt": [B,Nt,1]} 75 | 76 | 77 | class RBFKernel(object): 78 | def __init__(self, sigma_eps=2e-2, max_length=0.6, max_scale=1.0): 79 | self.sigma_eps = sigma_eps 80 | self.max_length = max_length 81 | self.max_scale = max_scale 82 | 83 | # x: batch_size * num_points * dim [B,N,Dx=1] 84 | def __call__(self, x): 85 | length = 0.1 + (self.max_length-0.1) \ 86 | * torch.rand([x.shape[0], 1, 1, 1], device=x.device) 87 | scale = 0.1 + (self.max_scale-0.1) \ 88 | * torch.rand([x.shape[0], 1, 1], device=x.device) 89 | 90 | # batch_size * num_points * num_points * dim [B,N,N,1] 91 | dist = (x.unsqueeze(-2) - x.unsqueeze(-3))/length 92 | 93 | # batch_size * num_points * num_points [B,N,N] 94 | cov = scale.pow(2) * torch.exp(-0.5 * dist.pow(2).sum(-1)) \ 95 | + self.sigma_eps**2 * torch.eye(x.shape[-2]).to(x.device) 96 | 97 | return cov # [B,N,N] 98 | 99 | 100 | class Matern52Kernel(object): 101 | def __init__(self, sigma_eps=2e-2, max_length=0.6, max_scale=1.0): 102 | self.sigma_eps = sigma_eps 103 | self.max_length = max_length 104 | self.max_scale = max_scale 105 | 106 | # x: batch_size * num_points * dim 107 | def __call__(self, x): 108 | length = 0.1 + (self.max_length-0.1) \ 109 | * torch.rand([x.shape[0], 1, 1, 1], device=x.device) 110 | scale = 0.1 + (self.max_scale-0.1) \ 111 | * torch.rand([x.shape[0], 1, 1], device=x.device) 112 | 113 | # batch_size * num_points * num_points 114 | dist = torch.norm((x.unsqueeze(-2) - x.unsqueeze(-3))/length, dim=-1) 115 | 116 | cov = scale.pow(2)*(1 + math.sqrt(5.0)*dist + 5.0*dist.pow(2)/3.0) \ 117 | * torch.exp(-math.sqrt(5.0) * dist) \ 118 | + self.sigma_eps**2 * torch.eye(x.shape[-2]).to(x.device) 119 | 120 | return cov 121 | 122 | 123 | class PeriodicKernel(object): 124 | def __init__(self, sigma_eps=2e-2, max_length=0.6, max_scale=1.0): 125 | #self.p = p 126 | self.sigma_eps = sigma_eps 127 | self.max_length = max_length 128 | self.max_scale = max_scale 129 | 130 | # x: batch_size * num_points * dim 131 | def __call__(self, x): 132 | p = 0.1 + 0.4*torch.rand([x.shape[0], 1, 1], device=x.device) 133 | length = 0.1 + (self.max_length-0.1) \ 134 | * torch.rand([x.shape[0], 1, 1], device=x.device) 135 | scale = 0.1 + (self.max_scale-0.1) \ 136 | * torch.rand([x.shape[0], 1, 1], device=x.device) 137 | 138 | dist = x.unsqueeze(-2) - x.unsqueeze(-3) 139 | cov = scale.pow(2) * torch.exp( 140 | - 2*(torch.sin(math.pi*dist.abs().sum(-1)/p)/length).pow(2)) \ 141 | + self.sigma_eps**2 * torch.eye(x.shape[-2]).to(x.device) 142 | 143 | return cov 144 | --------------------------------------------------------------------------------