├── tnp.png
├── .gitignore
├── regression
    ├── configs
    │   ├── gp
    │   │   ├── bnp.yaml
    │   │   ├── cnp.yaml
    │   │   ├── np.yaml
    │   │   ├── tnpd.yaml
    │   │   ├── banp.yaml
    │   │   ├── canp.yaml
    │   │   ├── tnpa.yaml
    │   │   ├── anp.yaml
    │   │   └── tnpnd.yaml
    │   ├── celeba
    │   │   ├── bnp.yaml
    │   │   ├── cnp.yaml
    │   │   ├── np.yaml
    │   │   ├── tnpd.yaml
    │   │   ├── banp.yaml
    │   │   ├── canp.yaml
    │   │   ├── tnpa.yaml
    │   │   ├── anp.yaml
    │   │   └── tnpnd.yaml
    │   └── emnist
    │   │   ├── bnp.yaml
    │   │   ├── cnp.yaml
    │   │   ├── np.yaml
    │   │   ├── banp.yaml
    │   │   ├── canp.yaml
    │   │   ├── tnpd.yaml
    │   │   ├── anp.yaml
    │   │   ├── tnpa.yaml
    │   │   └── tnpnd.yaml
    ├── data
    │   ├── __pycache__
    │   │   ├── gp.cpython-38.pyc
    │   │   ├── celeba.cpython-38.pyc
    │   │   ├── emnist.cpython-38.pyc
    │   │   └── image.cpython-38.pyc
    │   ├── emnist.py
    │   ├── celeba.py
    │   ├── image.py
    │   └── gp.py
    ├── models
    │   ├── __pycache__
    │   │   ├── tnp.cpython-38.pyc
    │   │   ├── tnpa.cpython-38.pyc
    │   │   ├── tnpd.cpython-38.pyc
    │   │   ├── tnpnd.cpython-38.pyc
    │   │   ├── attention.cpython-38.pyc
    │   │   └── modules.cpython-38.pyc
    │   ├── cnp.py
    │   ├── attention.py
    │   ├── canp.py
    │   ├── tnpd.py
    │   ├── tnp.py
    │   ├── bnp.py
    │   ├── banp.py
    │   ├── np.py
    │   ├── anp.py
    │   ├── tnpnd.py
    │   └── tnpa.py
    ├── utils
    │   ├── __pycache__
    │   │   ├── log.cpython-38.pyc
    │   │   ├── misc.cpython-38.pyc
    │   │   └── paths.cpython-38.pyc
    │   ├── paths.py
    │   ├── misc.py
    │   ├── sampling.py
    │   └── log.py
    └── README.md
├── bayesian_optimization
    ├── configs
    │   └── gp
    │   │   ├── bnp.yaml
    │   │   ├── cnp.yaml
    │   │   ├── np.yaml
    │   │   ├── tnpd.yaml
    │   │   ├── tnpa.yaml
    │   │   ├── banp.yaml
    │   │   ├── canp.yaml
    │   │   ├── anp.yaml
    │   │   └── tnpnd.yaml
    ├── data
    │   ├── __pycache__
    │   │   ├── gp.cpython-38.pyc
    │   │   ├── gp.cpython-39.pyc
    │   │   ├── highdim_gp.cpython-38.pyc
    │   │   └── highdim_gp.cpython-39.pyc
    │   ├── highdim_gp.py
    │   └── gp.py
    ├── models
    │   ├── __pycache__
    │   │   ├── tnp.cpython-38.pyc
    │   │   ├── tnp.cpython-39.pyc
    │   │   ├── tnpa.cpython-38.pyc
    │   │   ├── tnpa.cpython-39.pyc
    │   │   ├── tnpd.cpython-38.pyc
    │   │   ├── tnpd.cpython-39.pyc
    │   │   ├── tnpnd.cpython-38.pyc
    │   │   ├── tnpnd.cpython-39.pyc
    │   │   ├── modules.cpython-38.pyc
    │   │   ├── modules.cpython-39.pyc
    │   │   ├── attention.cpython-38.pyc
    │   │   └── attention.cpython-39.pyc
    │   ├── tnpd.py
    │   ├── attention.py
    │   ├── tnpa.py
    │   ├── canp.py
    │   ├── cnp.py
    │   ├── banp.py
    │   ├── bnp.py
    │   ├── tnp.py
    │   ├── np.py
    │   ├── anp.py
    │   └── tnpnd.py
    ├── utils
    │   ├── __pycache__
    │   │   ├── log.cpython-38.pyc
    │   │   ├── log.cpython-39.pyc
    │   │   ├── misc.cpython-38.pyc
    │   │   ├── misc.cpython-39.pyc
    │   │   ├── paths.cpython-38.pyc
    │   │   ├── paths.cpython-39.pyc
    │   │   └── acquisition.cpython-38.pyc
    │   ├── paths.py
    │   ├── misc.py
    │   ├── sampling.py
    │   ├── acquisition.py
    │   └── log.py
    ├── bayeso_benchmarks
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-38.pyc
    │   │   ├── benchmark_base.cpython-38.pyc
    │   │   ├── inf_dim_ackley.cpython-38.pyc
    │   │   ├── inf_dim_cosines.cpython-38.pyc
    │   │   ├── inf_dim_rastrigin.cpython-38.pyc
    │   │   ├── two_dim_dropwave.cpython-38.pyc
    │   │   ├── two_dim_michalewicz.cpython-38.pyc
    │   │   ├── three_dim_hartmann3d.cpython-38.pyc
    │   │   └── two_dim_goldsteinprice.cpython-38.pyc
    │   ├── two_dim_dropwave.py
    │   ├── inf_dim_cosines.py
    │   ├── two_dim_goldsteinprice.py
    │   ├── one_dim_linear.py
    │   ├── three_dim_hartmann3d.py
    │   ├── inf_dim_rastrigin.py
    │   ├── two_dim_michalewicz.py
    │   ├── one_dim_step.py
    │   ├── inf_dim_ackley.py
    │   └── plot_benchmarks.py
    └── README.md
├── contextual_bandits
    ├── configs
    │   └── wheel
    │   │   ├── bnp.yaml
    │   │   ├── cnp.yaml
    │   │   ├── np.yaml
    │   │   ├── models_anp.yaml
    │   │   ├── models_bnp.yaml
    │   │   ├── models_cnp.yaml
    │   │   ├── models_np.yaml
    │   │   ├── banp.yaml
    │   │   ├── canp.yaml
    │   │   ├── models_banp.yaml
    │   │   ├── models_canp.yaml
    │   │   ├── models_tnpa.yaml
    │   │   ├── models_tnpd.yaml
    │   │   ├── models_tnpnd.yaml
    │   │   ├── tnpd.yaml
    │   │   ├── tnpa.yaml
    │   │   ├── anp.yaml
    │   │   └── tnpnd.yaml
    ├── paths.yaml
    ├── data
    │   └── __pycache__
    │   │   └── wheel.cpython-38.pyc
    ├── models
    │   ├── __pycache__
    │   │   ├── tnp.cpython-38.pyc
    │   │   ├── tnpa.cpython-38.pyc
    │   │   ├── tnpd.cpython-38.pyc
    │   │   ├── tnpnd.cpython-38.pyc
    │   │   ├── modules.cpython-38.pyc
    │   │   └── attention.cpython-38.pyc
    │   ├── attention.py
    │   ├── tnpd.py
    │   ├── canp.py
    │   ├── cnp.py
    │   ├── tnpa.py
    │   ├── banp.py
    │   ├── bnp.py
    │   ├── tnp.py
    │   ├── np.py
    │   ├── anp.py
    │   └── tnpnd.py
    ├── utils
    │   ├── __pycache__
    │   │   ├── log.cpython-38.pyc
    │   │   └── misc.cpython-38.pyc
    │   ├── misc.py
    │   ├── sampling.py
    │   ├── metrics.py
    │   └── log.py
    ├── runner
    │   ├── __pycache__
    │   │   ├── args.cpython-38.pyc
    │   │   ├── __init__.cpython-38.pyc
    │   │   └── cmab_runner.cpython-38.pyc
    │   ├── __init__.py
    │   └── args.py
    ├── main.py
    └── README.md
├── LICENSE.md
├── README.md
└── env.yml


/tnp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/tnp.png


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.vscode*
2 | *__pycache__*
3 | *results*
4 | *evalsets*
5 | *datasets*
6 | 


--------------------------------------------------------------------------------
/regression/configs/gp/bnp.yaml:
--------------------------------------------------------------------------------
1 | dim_x: 1
2 | dim_y: 1
3 | dim_hid: 128
4 | enc_pre_depth: 4
5 | enc_post_depth: 2
6 | dec_depth: 3
7 | 


--------------------------------------------------------------------------------
/regression/configs/gp/cnp.yaml:
--------------------------------------------------------------------------------
1 | dim_x: 1
2 | dim_y: 1
3 | dim_hid: 128
4 | enc_pre_depth: 4
5 | enc_post_depth: 2
6 | dec_depth: 3
7 | 


--------------------------------------------------------------------------------
/regression/configs/celeba/bnp.yaml:
--------------------------------------------------------------------------------
1 | dim_x: 2
2 | dim_y: 3
3 | dim_hid: 128
4 | enc_pre_depth: 6
5 | enc_post_depth: 3
6 | dec_depth: 5
7 | 


--------------------------------------------------------------------------------
/regression/configs/celeba/cnp.yaml:
--------------------------------------------------------------------------------
1 | dim_x: 2
2 | dim_y: 3
3 | dim_hid: 128
4 | enc_pre_depth: 6
5 | enc_post_depth: 3
6 | dec_depth: 5
7 | 


--------------------------------------------------------------------------------
/regression/configs/emnist/bnp.yaml:
--------------------------------------------------------------------------------
1 | dim_x: 2
2 | dim_y: 1
3 | dim_hid: 128
4 | enc_pre_depth: 5
5 | enc_post_depth: 3
6 | dec_depth: 4
7 | 


--------------------------------------------------------------------------------
/regression/configs/emnist/cnp.yaml:
--------------------------------------------------------------------------------
1 | dim_x: 2
2 | dim_y: 1
3 | dim_hid: 128
4 | enc_pre_depth: 5
5 | enc_post_depth: 3
6 | dec_depth: 4
7 | 


--------------------------------------------------------------------------------
/bayesian_optimization/configs/gp/bnp.yaml:
--------------------------------------------------------------------------------
1 | dim_x: 1
2 | dim_y: 1
3 | dim_hid: 128
4 | enc_pre_depth: 4
5 | enc_post_depth: 2
6 | dec_depth: 3
7 | 


--------------------------------------------------------------------------------
/bayesian_optimization/configs/gp/cnp.yaml:
--------------------------------------------------------------------------------
1 | dim_x: 1
2 | dim_y: 1
3 | dim_hid: 128
4 | enc_pre_depth: 4
5 | enc_post_depth: 2
6 | dec_depth: 3
7 | 


--------------------------------------------------------------------------------
/contextual_bandits/configs/wheel/bnp.yaml:
--------------------------------------------------------------------------------
1 | dim_x: 2
2 | dim_y: 5
3 | dim_hid: 128
4 | enc_pre_depth: 4
5 | enc_post_depth: 2
6 | dec_depth: 3
7 | 


--------------------------------------------------------------------------------
/contextual_bandits/configs/wheel/cnp.yaml:
--------------------------------------------------------------------------------
1 | dim_x: 2
2 | dim_y: 5
3 | dim_hid: 128
4 | enc_pre_depth: 4
5 | enc_post_depth: 2
6 | dec_depth: 3
7 | 


--------------------------------------------------------------------------------
/regression/data/__pycache__/gp.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/regression/data/__pycache__/gp.cpython-38.pyc


--------------------------------------------------------------------------------
/regression/configs/celeba/np.yaml:
--------------------------------------------------------------------------------
1 | dim_x: 2
2 | dim_y: 3
3 | dim_hid: 128
4 | dim_lat: 128
5 | enc_pre_depth: 6
6 | enc_post_depth: 3
7 | dec_depth: 5
8 | 


--------------------------------------------------------------------------------
/regression/configs/emnist/np.yaml:
--------------------------------------------------------------------------------
1 | dim_x: 2
2 | dim_y: 1
3 | dim_hid: 128
4 | dim_lat: 128
5 | enc_pre_depth: 5
6 | enc_post_depth: 3
7 | dec_depth: 4
8 | 


--------------------------------------------------------------------------------
/regression/configs/gp/np.yaml:
--------------------------------------------------------------------------------
1 | dim_x: 1
2 | dim_y: 1
3 | dim_hid: 128
4 | dim_lat: 128
5 | enc_pre_depth: 4
6 | enc_post_depth: 2
7 | dec_depth: 3
8 | 


--------------------------------------------------------------------------------
/regression/data/__pycache__/celeba.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/regression/data/__pycache__/celeba.cpython-38.pyc


--------------------------------------------------------------------------------
/regression/data/__pycache__/emnist.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/regression/data/__pycache__/emnist.cpython-38.pyc


--------------------------------------------------------------------------------
/regression/data/__pycache__/image.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/regression/data/__pycache__/image.cpython-38.pyc


--------------------------------------------------------------------------------
/regression/models/__pycache__/tnp.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/regression/models/__pycache__/tnp.cpython-38.pyc


--------------------------------------------------------------------------------
/regression/models/__pycache__/tnpa.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/regression/models/__pycache__/tnpa.cpython-38.pyc


--------------------------------------------------------------------------------
/regression/models/__pycache__/tnpd.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/regression/models/__pycache__/tnpd.cpython-38.pyc


--------------------------------------------------------------------------------
/regression/utils/__pycache__/log.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/regression/utils/__pycache__/log.cpython-38.pyc


--------------------------------------------------------------------------------
/regression/utils/__pycache__/misc.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/regression/utils/__pycache__/misc.cpython-38.pyc


--------------------------------------------------------------------------------
/regression/utils/__pycache__/paths.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/regression/utils/__pycache__/paths.cpython-38.pyc


--------------------------------------------------------------------------------
/contextual_bandits/paths.yaml:
--------------------------------------------------------------------------------
1 | # paths.yaml
2 | 
3 | datasets_path:
4 |   "datasets"
5 | evalsets_path:
6 |   "evalsets"
7 | results_path:
8 |   "results"
9 | 


--------------------------------------------------------------------------------
/regression/configs/gp/tnpd.yaml:
--------------------------------------------------------------------------------
1 | dim_x: 1
2 | dim_y: 1
3 | d_model: 64
4 | emb_depth: 4
5 | dim_feedforward: 128
6 | nhead: 4
7 | dropout: 0.0
8 | num_layers: 6


--------------------------------------------------------------------------------
/regression/models/__pycache__/tnpnd.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/regression/models/__pycache__/tnpnd.cpython-38.pyc


--------------------------------------------------------------------------------
/bayesian_optimization/configs/gp/np.yaml:
--------------------------------------------------------------------------------
1 | dim_x: 1
2 | dim_y: 1
3 | dim_hid: 128
4 | dim_lat: 128
5 | enc_pre_depth: 4
6 | enc_post_depth: 2
7 | dec_depth: 3
8 | 


--------------------------------------------------------------------------------
/contextual_bandits/configs/wheel/np.yaml:
--------------------------------------------------------------------------------
1 | dim_x: 2
2 | dim_y: 5
3 | dim_hid: 128
4 | dim_lat: 128
5 | enc_pre_depth: 4
6 | enc_post_depth: 2
7 | dec_depth: 3
8 | 


--------------------------------------------------------------------------------
/regression/configs/celeba/tnpd.yaml:
--------------------------------------------------------------------------------
1 | dim_x: 2
2 | dim_y: 3
3 | d_model: 64
4 | emb_depth: 4
5 | dim_feedforward: 128
6 | nhead: 4
7 | dropout: 0.0
8 | num_layers: 6


--------------------------------------------------------------------------------
/regression/models/__pycache__/attention.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/regression/models/__pycache__/attention.cpython-38.pyc


--------------------------------------------------------------------------------
/regression/models/__pycache__/modules.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/regression/models/__pycache__/modules.cpython-38.pyc


--------------------------------------------------------------------------------
/bayesian_optimization/data/__pycache__/gp.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/bayesian_optimization/data/__pycache__/gp.cpython-38.pyc


--------------------------------------------------------------------------------
/bayesian_optimization/data/__pycache__/gp.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/bayesian_optimization/data/__pycache__/gp.cpython-39.pyc


--------------------------------------------------------------------------------
/contextual_bandits/data/__pycache__/wheel.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/contextual_bandits/data/__pycache__/wheel.cpython-38.pyc


--------------------------------------------------------------------------------
/contextual_bandits/models/__pycache__/tnp.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/contextual_bandits/models/__pycache__/tnp.cpython-38.pyc


--------------------------------------------------------------------------------
/contextual_bandits/utils/__pycache__/log.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/contextual_bandits/utils/__pycache__/log.cpython-38.pyc


--------------------------------------------------------------------------------
/contextual_bandits/utils/__pycache__/misc.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/contextual_bandits/utils/__pycache__/misc.cpython-38.pyc


--------------------------------------------------------------------------------
/bayesian_optimization/configs/gp/tnpd.yaml:
--------------------------------------------------------------------------------
1 | dim_x: 1
2 | dim_y: 1
3 | d_model: 64
4 | emb_depth: 4
5 | dim_feedforward: 128
6 | nhead: 8
7 | dropout: 0.0
8 | num_layers: 6


--------------------------------------------------------------------------------
/bayesian_optimization/models/__pycache__/tnp.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/bayesian_optimization/models/__pycache__/tnp.cpython-38.pyc


--------------------------------------------------------------------------------
/bayesian_optimization/models/__pycache__/tnp.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/bayesian_optimization/models/__pycache__/tnp.cpython-39.pyc


--------------------------------------------------------------------------------
/bayesian_optimization/utils/__pycache__/log.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/bayesian_optimization/utils/__pycache__/log.cpython-38.pyc


--------------------------------------------------------------------------------
/bayesian_optimization/utils/__pycache__/log.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/bayesian_optimization/utils/__pycache__/log.cpython-39.pyc


--------------------------------------------------------------------------------
/bayesian_optimization/utils/__pycache__/misc.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/bayesian_optimization/utils/__pycache__/misc.cpython-38.pyc


--------------------------------------------------------------------------------
/bayesian_optimization/utils/__pycache__/misc.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/bayesian_optimization/utils/__pycache__/misc.cpython-39.pyc


--------------------------------------------------------------------------------
/contextual_bandits/models/__pycache__/tnpa.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/contextual_bandits/models/__pycache__/tnpa.cpython-38.pyc


--------------------------------------------------------------------------------
/contextual_bandits/models/__pycache__/tnpd.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/contextual_bandits/models/__pycache__/tnpd.cpython-38.pyc


--------------------------------------------------------------------------------
/contextual_bandits/models/__pycache__/tnpnd.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/contextual_bandits/models/__pycache__/tnpnd.cpython-38.pyc


--------------------------------------------------------------------------------
/contextual_bandits/runner/__pycache__/args.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/contextual_bandits/runner/__pycache__/args.cpython-38.pyc


--------------------------------------------------------------------------------
/regression/configs/gp/banp.yaml:
--------------------------------------------------------------------------------
1 | dim_x: 1
2 | dim_y: 1
3 | dim_hid: 128
4 | enc_v_depth: 4
5 | enc_qk_depth: 2
6 | enc_pre_depth: 4
7 | enc_post_depth: 2
8 | dec_depth: 3
9 | 


--------------------------------------------------------------------------------
/regression/configs/gp/canp.yaml:
--------------------------------------------------------------------------------
1 | dim_x: 1
2 | dim_y: 1
3 | dim_hid: 128
4 | enc_v_depth: 4
5 | enc_qk_depth: 2
6 | enc_pre_depth: 4
7 | enc_post_depth: 2
8 | dec_depth: 3
9 | 


--------------------------------------------------------------------------------
/bayesian_optimization/configs/gp/tnpa.yaml:
--------------------------------------------------------------------------------
1 | dim_x: 1
2 | dim_y: 1
3 | d_model: 64
4 | emb_depth: 4
5 | dim_feedforward: 128
6 | nhead: 8
7 | dropout: 0.0
8 | num_layers: 6
9 | 


--------------------------------------------------------------------------------
/bayesian_optimization/models/__pycache__/tnpa.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/bayesian_optimization/models/__pycache__/tnpa.cpython-38.pyc


--------------------------------------------------------------------------------
/bayesian_optimization/models/__pycache__/tnpa.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/bayesian_optimization/models/__pycache__/tnpa.cpython-39.pyc


--------------------------------------------------------------------------------
/bayesian_optimization/models/__pycache__/tnpd.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/bayesian_optimization/models/__pycache__/tnpd.cpython-38.pyc


--------------------------------------------------------------------------------
/bayesian_optimization/models/__pycache__/tnpd.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/bayesian_optimization/models/__pycache__/tnpd.cpython-39.pyc


--------------------------------------------------------------------------------
/bayesian_optimization/models/__pycache__/tnpnd.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/bayesian_optimization/models/__pycache__/tnpnd.cpython-38.pyc


--------------------------------------------------------------------------------
/bayesian_optimization/models/__pycache__/tnpnd.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/bayesian_optimization/models/__pycache__/tnpnd.cpython-39.pyc


--------------------------------------------------------------------------------
/bayesian_optimization/utils/__pycache__/paths.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/bayesian_optimization/utils/__pycache__/paths.cpython-38.pyc


--------------------------------------------------------------------------------
/bayesian_optimization/utils/__pycache__/paths.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/bayesian_optimization/utils/__pycache__/paths.cpython-39.pyc


--------------------------------------------------------------------------------
/contextual_bandits/models/__pycache__/modules.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/contextual_bandits/models/__pycache__/modules.cpython-38.pyc


--------------------------------------------------------------------------------
/contextual_bandits/runner/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/contextual_bandits/runner/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/regression/configs/celeba/banp.yaml:
--------------------------------------------------------------------------------
1 | dim_x: 2
2 | dim_y: 3
3 | dim_hid: 128
4 | enc_v_depth: 6
5 | enc_qk_depth: 3
6 | enc_pre_depth: 6
7 | enc_post_depth: 3
8 | dec_depth: 5
9 | 


--------------------------------------------------------------------------------
/regression/configs/celeba/canp.yaml:
--------------------------------------------------------------------------------
1 | dim_x: 2
2 | dim_y: 3
3 | dim_hid: 128
4 | enc_v_depth: 6
5 | enc_qk_depth: 3
6 | enc_pre_depth: 6
7 | enc_post_depth: 3
8 | dec_depth: 5
9 | 


--------------------------------------------------------------------------------
/regression/configs/emnist/banp.yaml:
--------------------------------------------------------------------------------
1 | dim_x: 2
2 | dim_y: 1
3 | dim_hid: 128
4 | enc_v_depth: 5
5 | enc_qk_depth: 3
6 | enc_pre_depth: 5
7 | enc_post_depth: 3
8 | dec_depth: 4
9 | 


--------------------------------------------------------------------------------
/regression/configs/emnist/canp.yaml:
--------------------------------------------------------------------------------
1 | dim_x: 2
2 | dim_y: 1
3 | dim_hid: 128
4 | enc_v_depth: 5
5 | enc_qk_depth: 3
6 | enc_pre_depth: 5
7 | enc_post_depth: 3
8 | dec_depth: 4
9 | 


--------------------------------------------------------------------------------
/bayesian_optimization/bayeso_benchmarks/__init__.py:
--------------------------------------------------------------------------------
1 | #
2 | # author: Jungtaek Kim (jtkim@postech.ac.kr)
3 | # last updated: November 5, 2020
4 | #
5 | 
6 | __version__ = '0.1.4'
7 | 


--------------------------------------------------------------------------------
/bayesian_optimization/data/__pycache__/highdim_gp.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/bayesian_optimization/data/__pycache__/highdim_gp.cpython-38.pyc


--------------------------------------------------------------------------------
/bayesian_optimization/data/__pycache__/highdim_gp.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/bayesian_optimization/data/__pycache__/highdim_gp.cpython-39.pyc


--------------------------------------------------------------------------------
/bayesian_optimization/models/__pycache__/modules.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/bayesian_optimization/models/__pycache__/modules.cpython-38.pyc


--------------------------------------------------------------------------------
/bayesian_optimization/models/__pycache__/modules.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/bayesian_optimization/models/__pycache__/modules.cpython-39.pyc


--------------------------------------------------------------------------------
/contextual_bandits/configs/wheel/models_anp.yaml:
--------------------------------------------------------------------------------
1 | # cmab_models.py
2 | 
3 | - "uniform"  # first model is a baseline (for regret normalization in comparison of performance)
4 | - "anp"


--------------------------------------------------------------------------------
/contextual_bandits/configs/wheel/models_bnp.yaml:
--------------------------------------------------------------------------------
1 | # cmab_models.py
2 | 
3 | - "uniform"  # first model is a baseline (for regret normalization in comparison of performance)
4 | - "bnp"


--------------------------------------------------------------------------------
/contextual_bandits/configs/wheel/models_cnp.yaml:
--------------------------------------------------------------------------------
1 | # cmab_models.py
2 | 
3 | - "uniform"  # first model is a baseline (for regret normalization in comparison of performance)
4 | - "cnp"


--------------------------------------------------------------------------------
/contextual_bandits/configs/wheel/models_np.yaml:
--------------------------------------------------------------------------------
1 | # cmab_models.py
2 | 
3 | - "uniform"  # first model is a baseline (for regret normalization in comparison of performance)
4 | - "np"


--------------------------------------------------------------------------------
/contextual_bandits/main.py:
--------------------------------------------------------------------------------
1 | from runner import args
2 | from runner.cmab_runner import cmab
3 | 
4 | def main():
5 |     cmab(args)
6 | 
7 | if __name__ == "__main__":
8 |     main()


--------------------------------------------------------------------------------
/contextual_bandits/models/__pycache__/attention.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/contextual_bandits/models/__pycache__/attention.cpython-38.pyc


--------------------------------------------------------------------------------
/contextual_bandits/runner/__pycache__/cmab_runner.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/contextual_bandits/runner/__pycache__/cmab_runner.cpython-38.pyc


--------------------------------------------------------------------------------
/regression/configs/celeba/tnpa.yaml:
--------------------------------------------------------------------------------
1 | dim_x: 2
2 | dim_y: 3
3 | d_model: 64
4 | emb_depth: 4
5 | dim_feedforward: 128
6 | nhead: 4
7 | dropout: 0.0
8 | num_layers: 6
9 | permute: True


--------------------------------------------------------------------------------
/regression/configs/emnist/tnpd.yaml:
--------------------------------------------------------------------------------
1 | dim_x: 2
2 | dim_y: 1
3 | d_model: 64
4 | emb_depth: 4
5 | dim_feedforward: 128
6 | nhead: 4
7 | dropout: 0.0
8 | num_layers: 6
9 | bound_std: True


--------------------------------------------------------------------------------
/bayesian_optimization/configs/gp/banp.yaml:
--------------------------------------------------------------------------------
1 | dim_x: 1
2 | dim_y: 1
3 | dim_hid: 128
4 | enc_v_depth: 4
5 | enc_qk_depth: 2
6 | enc_pre_depth: 4
7 | enc_post_depth: 2
8 | dec_depth: 3
9 | 


--------------------------------------------------------------------------------
/bayesian_optimization/configs/gp/canp.yaml:
--------------------------------------------------------------------------------
1 | dim_x: 1
2 | dim_y: 1
3 | dim_hid: 128
4 | enc_v_depth: 4
5 | enc_qk_depth: 2
6 | enc_pre_depth: 4
7 | enc_post_depth: 2
8 | dec_depth: 3
9 | 


--------------------------------------------------------------------------------
/bayesian_optimization/models/__pycache__/attention.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/bayesian_optimization/models/__pycache__/attention.cpython-38.pyc


--------------------------------------------------------------------------------
/bayesian_optimization/models/__pycache__/attention.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/bayesian_optimization/models/__pycache__/attention.cpython-39.pyc


--------------------------------------------------------------------------------
/bayesian_optimization/utils/__pycache__/acquisition.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/bayesian_optimization/utils/__pycache__/acquisition.cpython-38.pyc


--------------------------------------------------------------------------------
/contextual_bandits/configs/wheel/banp.yaml:
--------------------------------------------------------------------------------
1 | dim_x: 2
2 | dim_y: 5
3 | dim_hid: 128
4 | enc_v_depth: 4
5 | enc_qk_depth: 2
6 | enc_pre_depth: 4
7 | enc_post_depth: 2
8 | dec_depth: 3
9 | 


--------------------------------------------------------------------------------
/contextual_bandits/configs/wheel/canp.yaml:
--------------------------------------------------------------------------------
1 | dim_x: 2
2 | dim_y: 5
3 | dim_hid: 128
4 | enc_v_depth: 4
5 | enc_qk_depth: 2
6 | enc_pre_depth: 4
7 | enc_post_depth: 2
8 | dec_depth: 3
9 | 


--------------------------------------------------------------------------------
/contextual_bandits/configs/wheel/models_banp.yaml:
--------------------------------------------------------------------------------
1 | # cmab_models.py
2 | 
3 | - "uniform"  # first model is a baseline (for regret normalization in comparison of performance)
4 | - "banp"


--------------------------------------------------------------------------------
/contextual_bandits/configs/wheel/models_canp.yaml:
--------------------------------------------------------------------------------
1 | # cmab_models.py
2 | 
3 | - "uniform"  # first model is a baseline (for regret normalization in comparison of performance)
4 | - "canp"


--------------------------------------------------------------------------------
/contextual_bandits/configs/wheel/models_tnpa.yaml:
--------------------------------------------------------------------------------
1 | # cmab_models.py
2 | 
3 | - "uniform"  # first model is a baseline (for regret normalization in comparison of performance)
4 | - "tnpa"


--------------------------------------------------------------------------------
/contextual_bandits/configs/wheel/models_tnpd.yaml:
--------------------------------------------------------------------------------
1 | # cmab_models.py
2 | 
3 | - "uniform"  # first model is a baseline (for regret normalization in comparison of performance)
4 | - "tnpd"


--------------------------------------------------------------------------------
/contextual_bandits/configs/wheel/models_tnpnd.yaml:
--------------------------------------------------------------------------------
1 | # cmab_models.py
2 | 
3 | - "uniform"  # first model is a baseline (for regret normalization in comparison of performance)
4 | - "tnpnd"


--------------------------------------------------------------------------------
/contextual_bandits/configs/wheel/tnpd.yaml:
--------------------------------------------------------------------------------
1 | dim_x: 2
2 | dim_y: 5
3 | d_model: 16
4 | emb_depth: 3
5 | dim_feedforward: 64
6 | nhead: 1
7 | dropout: 0.0
8 | num_layers: 4
9 | drop_y: 0.5


--------------------------------------------------------------------------------
/regression/configs/gp/tnpa.yaml:
--------------------------------------------------------------------------------
 1 | dim_x: 1
 2 | dim_y: 1
 3 | d_model: 64
 4 | emb_depth: 4
 5 | dim_feedforward: 128
 6 | nhead: 4
 7 | dropout: 0.0
 8 | num_layers: 6
 9 | permute: True
10 | 


--------------------------------------------------------------------------------
/contextual_bandits/configs/wheel/tnpa.yaml:
--------------------------------------------------------------------------------
 1 | dim_x: 2
 2 | dim_y: 5
 3 | d_model: 16
 4 | emb_depth: 3
 5 | dim_feedforward: 64
 6 | nhead: 1
 7 | dropout: 0.0
 8 | num_layers: 4
 9 | drop_y: 0.5
10 | 


--------------------------------------------------------------------------------
/regression/configs/celeba/anp.yaml:
--------------------------------------------------------------------------------
 1 | dim_x: 2
 2 | dim_y: 3
 3 | dim_hid: 128
 4 | dim_lat: 128
 5 | enc_v_depth: 6
 6 | enc_qk_depth: 3
 7 | enc_pre_depth: 6
 8 | enc_post_depth: 3
 9 | dec_depth: 5
10 | 


--------------------------------------------------------------------------------
/regression/configs/emnist/anp.yaml:
--------------------------------------------------------------------------------
 1 | dim_x: 2
 2 | dim_y: 1
 3 | dim_hid: 128
 4 | dim_lat: 128
 5 | enc_v_depth: 5
 6 | enc_qk_depth: 3
 7 | enc_pre_depth: 5
 8 | enc_post_depth: 3
 9 | dec_depth: 4
10 | 


--------------------------------------------------------------------------------
/regression/configs/gp/anp.yaml:
--------------------------------------------------------------------------------
 1 | dim_x: 1
 2 | dim_y: 1
 3 | dim_hid: 128
 4 | dim_lat: 128
 5 | enc_v_depth: 4
 6 | enc_qk_depth: 2
 7 | enc_pre_depth: 4
 8 | enc_post_depth: 2
 9 | dec_depth: 3
10 | 


--------------------------------------------------------------------------------
/bayesian_optimization/bayeso_benchmarks/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/bayesian_optimization/bayeso_benchmarks/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/bayesian_optimization/configs/gp/anp.yaml:
--------------------------------------------------------------------------------
 1 | dim_x: 1
 2 | dim_y: 1
 3 | dim_hid: 128
 4 | dim_lat: 128
 5 | enc_v_depth: 4
 6 | enc_qk_depth: 2
 7 | enc_pre_depth: 4
 8 | enc_post_depth: 2
 9 | dec_depth: 3
10 | 


--------------------------------------------------------------------------------
/contextual_bandits/configs/wheel/anp.yaml:
--------------------------------------------------------------------------------
 1 | dim_x: 2
 2 | dim_y: 5
 3 | dim_hid: 128
 4 | dim_lat: 128
 5 | enc_v_depth: 4
 6 | enc_qk_depth: 2
 7 | enc_pre_depth: 4
 8 | enc_post_depth: 2
 9 | dec_depth: 3
10 | 


--------------------------------------------------------------------------------
/bayesian_optimization/bayeso_benchmarks/__pycache__/benchmark_base.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/bayesian_optimization/bayeso_benchmarks/__pycache__/benchmark_base.cpython-38.pyc


--------------------------------------------------------------------------------
/bayesian_optimization/bayeso_benchmarks/__pycache__/inf_dim_ackley.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/bayesian_optimization/bayeso_benchmarks/__pycache__/inf_dim_ackley.cpython-38.pyc


--------------------------------------------------------------------------------
/regression/configs/emnist/tnpa.yaml:
--------------------------------------------------------------------------------
 1 | dim_x: 2
 2 | dim_y: 1
 3 | d_model: 64
 4 | emb_depth: 4
 5 | dim_feedforward: 128
 6 | nhead: 4
 7 | dropout: 0.0
 8 | num_layers: 6
 9 | permute: True
10 | bound_std: True
11 | 


--------------------------------------------------------------------------------
/regression/utils/paths.py:
--------------------------------------------------------------------------------
1 | import os
2 | 
3 | ROOT = ''
4 | 
5 | evalsets_path = os.path.join(ROOT, 'evalsets')
6 | datasets_path = os.path.join(ROOT, 'datasets')
7 | results_path = os.path.join(ROOT, 'results')
8 | 


--------------------------------------------------------------------------------
/bayesian_optimization/bayeso_benchmarks/__pycache__/inf_dim_cosines.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/bayesian_optimization/bayeso_benchmarks/__pycache__/inf_dim_cosines.cpython-38.pyc


--------------------------------------------------------------------------------
/bayesian_optimization/bayeso_benchmarks/__pycache__/inf_dim_rastrigin.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/bayesian_optimization/bayeso_benchmarks/__pycache__/inf_dim_rastrigin.cpython-38.pyc


--------------------------------------------------------------------------------
/bayesian_optimization/bayeso_benchmarks/__pycache__/two_dim_dropwave.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/bayesian_optimization/bayeso_benchmarks/__pycache__/two_dim_dropwave.cpython-38.pyc


--------------------------------------------------------------------------------
/bayesian_optimization/bayeso_benchmarks/__pycache__/two_dim_michalewicz.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/bayesian_optimization/bayeso_benchmarks/__pycache__/two_dim_michalewicz.cpython-38.pyc


--------------------------------------------------------------------------------
/bayesian_optimization/utils/paths.py:
--------------------------------------------------------------------------------
1 | import os
2 | 
3 | ROOT = ''
4 | 
5 | evalsets_path = os.path.join(ROOT, 'evalsets')
6 | datasets_path = os.path.join(ROOT, 'datasets')
7 | results_path = os.path.join(ROOT, 'results')
8 | 


--------------------------------------------------------------------------------
/bayesian_optimization/bayeso_benchmarks/__pycache__/three_dim_hartmann3d.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/bayesian_optimization/bayeso_benchmarks/__pycache__/three_dim_hartmann3d.cpython-38.pyc


--------------------------------------------------------------------------------
/bayesian_optimization/bayeso_benchmarks/__pycache__/two_dim_goldsteinprice.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tung-nd/TNP-pytorch/HEAD/bayesian_optimization/bayeso_benchmarks/__pycache__/two_dim_goldsteinprice.cpython-38.pyc


--------------------------------------------------------------------------------
/contextual_bandits/runner/__init__.py:
--------------------------------------------------------------------------------
 1 | import yaml
 2 | 
 3 | from runner.args import get_args
 4 | 
 5 | args = get_args()
 6 | 
 7 | with open("paths.yaml") as f:
 8 |     paths = yaml.safe_load(f)
 9 |     datasets_path = paths["datasets_path"]
10 |     evalsets_path = paths["evalsets_path"]
11 |     results_path = paths["results_path"]
12 | 


--------------------------------------------------------------------------------
/regression/configs/celeba/tnpnd.yaml:
--------------------------------------------------------------------------------
 1 | dim_x: 2
 2 | dim_y: 3
 3 | d_model: 64
 4 | emb_depth: 4
 5 | dim_feedforward: 128
 6 | nhead: 4
 7 | dropout: 0.0
 8 | num_layers: 6
 9 | num_std_layers: 2
10 | cov_approx: 'cholesky' # cholesky or lowrank parameterization
11 | prj_dim: 20
12 | prj_depth: 4
13 | diag_depth: 4 # only for lowrank parameterization option


--------------------------------------------------------------------------------
/regression/configs/gp/tnpnd.yaml:
--------------------------------------------------------------------------------
 1 | dim_x: 1
 2 | dim_y: 1
 3 | d_model: 64
 4 | emb_depth: 4
 5 | dim_feedforward: 128
 6 | nhead: 4
 7 | dropout: 0.0
 8 | num_layers: 6
 9 | num_std_layers: 2
10 | cov_approx: 'cholesky' # cholesky or lowrank parameterization
11 | prj_dim: 20
12 | prj_depth: 4
13 | diag_depth: 4 # only for lowrank parameterization option


--------------------------------------------------------------------------------
/bayesian_optimization/configs/gp/tnpnd.yaml:
--------------------------------------------------------------------------------
 1 | dim_x: 1
 2 | dim_y: 1
 3 | d_model: 64
 4 | emb_depth: 4
 5 | dim_feedforward: 128
 6 | nhead: 8
 7 | dropout: 0.0
 8 | num_layers: 6
 9 | num_std_layers: 2
10 | cov_approx: 'lowrank' # cholesky or lowrank parameterization
11 | prj_dim: 20
12 | prj_depth: 4
13 | diag_depth: 4 # only for lowrank parameterization option


--------------------------------------------------------------------------------
/contextual_bandits/configs/wheel/tnpnd.yaml:
--------------------------------------------------------------------------------
 1 | dim_x: 2
 2 | dim_y: 5
 3 | d_model: 16
 4 | emb_depth: 3
 5 | dim_feedforward: 64
 6 | nhead: 1
 7 | dropout: 0.0
 8 | num_layers: 4
 9 | num_std_layers: 2
10 | cov_approx: 'lowrank' # cholesky or lowrank parameterization
11 | prj_dim: 20
12 | prj_depth: 4
13 | diag_depth: 4 # only for lowrank parameterization option
14 | drop_y: 0.5


--------------------------------------------------------------------------------
/regression/configs/emnist/tnpnd.yaml:
--------------------------------------------------------------------------------
 1 | dim_x: 2
 2 | dim_y: 1
 3 | d_model: 64
 4 | emb_depth: 4
 5 | dim_feedforward: 128
 6 | nhead: 4
 7 | dropout: 0.0
 8 | num_layers: 6
 9 | num_std_layers: 2
10 | bound_std: True
11 | cov_approx: 'cholesky' # cholesky or lowrank parameterization
12 | prj_dim: 20
13 | prj_depth: 4
14 | diag_depth: 4 # only for lowrank parameterization option


--------------------------------------------------------------------------------
/contextual_bandits/README.md:
--------------------------------------------------------------------------------
 1 | ### Training
 2 | First, we have to train TNPs on randomly sampled wheel data. Training is similar to meta regression.
 3 | ```
 4 | python main.py --cmab_mode=train --model=tnpa --expid=default
 5 | ```
 6 | If training for the first time, wheel data will be generated and saved in `datasets`. Model weights and logs will be saved in `results/train-all-R`.
 7 | 
 8 | ### Evaluate
 9 | After training, we can run contextual bandit to evaluate the trained model.
10 | ```
11 | python main.py --cmab_mode=eval --model=tnpa --expid=default
12 | ```
13 | Model weights according to `{expid}` will be loaded and evaluated. If running contextual bandit for the first time, evaluation data wil be generated and saved in `evalsets`. The results will be saved in `results/eval-all-R`.


--------------------------------------------------------------------------------
/regression/data/emnist.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torchvision.datasets as tvds
 3 | 
 4 | from utils.paths import datasets_path
 5 | 
 6 | class EMNIST(tvds.EMNIST):
 7 |     def __init__(self, train=True, class_range=[0, 47], device='cpu', download=True):
 8 |         super().__init__(datasets_path, train=train, split='balanced', download=download)
 9 | 
10 |         self.data = self.data.unsqueeze(1).float().div(255).transpose(-1, -2).to(device)
11 |         self.targets = self.targets.to(device)
12 | 
13 |         idxs = []
14 |         for c in range(class_range[0], class_range[1]):
15 |             idxs.append(torch.where(self.targets==c)[0])
16 |         idxs = torch.cat(idxs)
17 | 
18 |         self.data = self.data[idxs]
19 |         self.targets = self.targets[idxs]
20 | 
21 |     def __getitem__(self, idx):
22 |         return self.data[idx], self.targets[idx]
23 | 


--------------------------------------------------------------------------------
/bayesian_optimization/bayeso_benchmarks/two_dim_dropwave.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # author: Jungtaek Kim (jtkim@postech.ac.kr)
 3 | # last updated: February 9, 2021
 4 | #
 5 | 
 6 | import numpy as np
 7 | 
 8 | from bayeso_benchmarks.benchmark_base import Function
 9 | 
10 | 
11 | def fun_target(bx, dim_bx):
12 |     assert len(bx.shape) == 1
13 |     assert bx.shape[0] == dim_bx
14 | 
15 |     y = -1.0 * (1 + np.cos(12.0 * np.sqrt(bx[0]**2 + bx[1]**2))) / (0.5 * (bx[0]**2 + bx[1]**2) + 2.0)
16 |     return y
17 | 
18 | 
19 | class DropWave(Function):
20 |     def __init__(self):
21 |         dim_bx = 2
22 |         bounds = np.array([
23 |             [-5.12, 5.12],
24 |             [-5.12, 5.12],
25 |         ])
26 |         global_minimizers = np.array([
27 |             [0.0, 0.0],
28 |         ])
29 |         global_minimum = -1.0
30 |         function = lambda bx: fun_target(bx, dim_bx)
31 | 
32 |         Function.__init__(self, dim_bx, bounds, global_minimizers, global_minimum, function)
33 | 


--------------------------------------------------------------------------------
/bayesian_optimization/bayeso_benchmarks/inf_dim_cosines.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # author: Jungtaek Kim (jtkim@postech.ac.kr)
 3 | # last updated: February 8, 2021
 4 | #
 5 | 
 6 | import numpy as np
 7 | 
 8 | from bayeso_benchmarks.benchmark_base import Function
 9 | 
10 | 
11 | def fun_target(bx, dim_bx):
12 |     assert len(bx.shape) == 1
13 |     assert bx.shape[0] == dim_bx
14 | 
15 |     y = np.sum(np.cos(bx) * (np.abs(bx) * (0.1 / (2.0 * np.pi)) - 1.0))
16 |     return y
17 | 
18 | 
19 | class Cosines(Function):
20 |     def __init__(self, dim_problem):
21 |         assert isinstance(dim_problem, int)
22 | 
23 |         dim_bx = np.inf
24 |         bounds = np.array([
25 |             [-2.0 * np.pi, 2.0 * np.pi],
26 |         ])
27 |         global_minimizers = np.array([
28 |             [0.0],
29 |         ])
30 |         global_minimum = -1.0 * dim_problem
31 |         dim_problem = dim_problem
32 | 
33 |         function = lambda bx: fun_target(bx, dim_problem)
34 | 
35 |         Function.__init__(self, dim_bx, bounds, global_minimizers, global_minimum, function, dim_problem=dim_problem)
36 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 Tung Nguyen
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/regression/utils/misc.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from importlib.machinery import SourceFileLoader
 3 | import math
 4 | import torch
 5 | 
 6 | def gen_load_func(parser, func):
 7 |     def load(args, cmdline):
 8 |         sub_args, cmdline = parser.parse_known_args(cmdline)
 9 |         for k, v in sub_args.__dict__.items():
10 |             args.__dict__[k] = v
11 |         return func(**sub_args.__dict__), cmdline
12 |     return load
13 | 
14 | 
15 | def load_module(filename):
16 |     module_name = os.path.splitext(os.path.basename(filename))[0]
17 |     return SourceFileLoader(module_name, filename).load_module()
18 |     # <module "module_name" from "filename">
19 |     #
20 |     # ex.
21 |     # <module "cnp" from "models/cnp.py">
22 | 
23 | 
24 | def logmeanexp(x, dim=0):
25 |     return x.logsumexp(dim) - math.log(x.shape[dim])
26 | 
27 | 
28 | def stack(x, num_samples=None, dim=0):
29 |     return x if num_samples is None \
30 |             else torch.stack([x]*num_samples, dim=dim)
31 | 
32 | 
33 | def hrminsec(duration):
34 |     hours, left = duration // 3600, duration % 3600
35 |     mins, secs = left // 60, left % 60
36 |     return f"{hours}hrs {mins}mins {secs}secs"


--------------------------------------------------------------------------------
/bayesian_optimization/utils/misc.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from importlib.machinery import SourceFileLoader
 3 | import math
 4 | import torch
 5 | 
 6 | 
 7 | def gen_load_func(parser, func):
 8 |     def load(args, cmdline):
 9 |         sub_args, cmdline = parser.parse_known_args(cmdline)
10 |         for k, v in sub_args.__dict__.items():
11 |             args.__dict__[k] = v
12 |         return func(**sub_args.__dict__), cmdline
13 |     return load
14 | 
15 | 
16 | def load_module(filename):
17 |     module_name = os.path.splitext(os.path.basename(filename))[0]
18 |     return SourceFileLoader(module_name, filename).load_module(module_name)
19 |     # <module "module_name" from "filename">
20 |     #
21 |     # ex.
22 |     # <module "cnp" from "models/cnp.py">
23 | 
24 | 
25 | def logmeanexp(x, dim=0):
26 |     return x.logsumexp(dim) - math.log(x.shape[dim])
27 | 
28 | 
29 | def stack(x, num_samples=None, dim=0):
30 |     return x if num_samples is None \
31 |             else torch.stack([x]*num_samples, dim=dim)
32 | 
33 | 
34 | def hrminsec(duration):
35 |     hours, left = duration // 3600, duration % 3600
36 |     mins, secs = left // 60, left % 60
37 |     return f"{hours}hrs {mins}mins {secs}secs"
38 | 


--------------------------------------------------------------------------------
/bayesian_optimization/bayeso_benchmarks/two_dim_goldsteinprice.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # author: Jungtaek Kim (jtkim@postech.ac.kr)
 3 | # last updated: February 8, 2021
 4 | #
 5 | 
 6 | import numpy as np
 7 | 
 8 | from bayeso_benchmarks.benchmark_base import Function
 9 | 
10 | 
11 | def fun_target(bx, dim_bx):
12 |     assert len(bx.shape) == 1
13 |     assert bx.shape[0] == dim_bx
14 | 
15 |     term_1a = (bx[0] + bx[1] + 1.0)**2
16 |     term_1b = 19.0 - 14.0 * bx[0] + 3.0 * bx[0]**2 - 14.0 * bx[1] + 6.0 * bx[0] * bx[1] + 3.0 * bx[1]**2
17 |     term_1 = 1.0 + term_1a * term_1b
18 | 
19 |     term_2a = (2.0 * bx[0] - 3.0 * bx[1])**2
20 |     term_2b = 18.0 - 32.0 * bx[0] + 12.0 * bx[0]**2 + 48.0 * bx[1] - 36.0 * bx[0] * bx[1] + 27.0 * bx[1]**2
21 |     term_2 = 30.0 + term_2a * term_2b
22 | 
23 |     y = term_1 * term_2
24 |     return y
25 | 
26 | 
27 | class GoldsteinPrice(Function):
28 |     def __init__(self):
29 |         dim_bx = 2
30 |         bounds = np.array([
31 |             [-2.0, 2.0],
32 |             [-2.0, 2.0],
33 |         ])
34 |         global_minimizers = np.array([
35 |             [0.0, -1.0],
36 |         ])
37 |         global_minimum = 3.0
38 |         function = lambda bx: fun_target(bx, dim_bx)
39 | 
40 |         Function.__init__(self, dim_bx, bounds, global_minimizers, global_minimum, function)
41 | 


--------------------------------------------------------------------------------
/bayesian_optimization/README.md:
--------------------------------------------------------------------------------
 1 | ### 1-dimensional BO
 2 | ---
 3 | ### Training
 4 | Training is exactly the same to meta regression.
 5 | ```
 6 | python 1d_gp.py --mode=train --model=tnpa --expid=default
 7 | ```
 8 | 
 9 | ### Evaluation
10 | Run BO using a trained model.
11 | ```
12 | python 1d_bo.py --bo_mode models --bo_kernel rbf --model tnpa --expid=default
13 | ```
14 | 
15 | ## Multi-dimensional BO
16 | ---
17 | ### Training
18 | First, generate the training dataset, and then train. Choose `dimension` (2 or 3), which correspond to 2-D and 3-D problems, respectively. It is recommended that `min_num_points` and `max_num_points` are 30 and 128 for 2-D problems, and 64 and 256 for 3-D problems.
19 | ```
20 | python highdim_gp.py --mode=generate --model=tnpa --dimension=2 --min_num_points=30 --max_num_points=128
21 | ```
22 | ```
23 | python highdim_gp.py --mode=train --model=tnpa --dimension=2 --min_num_points=30 --max_num_points=128
24 | ```
25 | 
26 | ### Evaluation
27 | 
28 | Run `highdim_bo.py`.   
29 | Please choose objective function to evaluate. The following functions are supported: `ackley`, `cosine`, `rastrigin`, `dropwave`, `goldsteinprice`, `michalewicz`, `hartmann`.
30 | 
31 | ```
32 | python highdim_bo.py --objective=ackley --dimension=2 --model=tnpa --train_min_num_points=30 --train_max_num_points=128
33 | ```
34 | 


--------------------------------------------------------------------------------
/contextual_bandits/utils/misc.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from importlib.machinery import SourceFileLoader
 3 | import math
 4 | import torch
 5 | 
 6 | def gen_load_func(parser, func):
 7 |     def load(args, cmdline):
 8 |         sub_args, cmdline = parser.parse_known_args(cmdline)
 9 |         for k, v in sub_args.__dict__.items():
10 |             args.__dict__[k] = v
11 |         return func(**sub_args.__dict__), cmdline
12 |     return load
13 | 
14 | 
15 | def load_module(filename):
16 |     module_name = os.path.splitext(os.path.basename(filename))[0]
17 |     return SourceFileLoader(module_name, filename).load_module()
18 |     # <module "module_name" from "filename">
19 |     #
20 |     # ex.
21 |     # <module "cnp" from "models/cnp.py">
22 | 
23 | 
24 | def logmeanexp(x, dim=0):
25 |     return x.logsumexp(dim) - math.log(x.shape[dim])
26 | 
27 | 
28 | def stack(x, num_samples=None, dim=0):
29 |     return x if num_samples is None \
30 |             else torch.stack([x]*num_samples, dim=dim)
31 | 
32 | 
33 | def hrminsec(duration):
34 |     hours, left = duration // 3600, duration % 3600
35 |     mins, secs = left // 60, left % 60
36 |     return f"{hours}hrs {mins}mins {secs}secs"
37 | 
38 | 
39 | def one_hot(x, num):  # [B,N] -> [B,N,num]
40 |     B, N = x.shape
41 |     _x = torch.zeros([B, N, num], dtype=torch.float32)
42 |     for b in range(B):
43 |         for n in range(N):
44 |             i = x[b, n]
45 |             _x[b, n, i] = 1.0
46 |     return _x


--------------------------------------------------------------------------------
/bayesian_optimization/bayeso_benchmarks/one_dim_linear.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # author: Jungtaek Kim (jtkim@postech.ac.kr)
 3 | # last updated: February 8, 2021
 4 | #
 5 | 
 6 | import numpy as np
 7 | 
 8 | from bayeso_benchmarks.benchmark_base import Function
 9 | 
10 | 
11 | def fun_target(bx, dim_bx, slope):
12 |     assert len(bx.shape) == 1
13 |     assert bx.shape[0] == dim_bx
14 |     assert isinstance(slope, float)
15 | 
16 |     y = slope * bx[0]
17 |     return y
18 | 
19 | 
20 | class Linear(Function):
21 |     def __init__(self,
22 |         bounds=np.array([
23 |             [-10, 10],
24 |         ]),
25 |         slope=1.0
26 |     ):
27 |         assert isinstance(slope, float)
28 |         assert isinstance(bounds, np.ndarray)
29 |         assert len(bounds.shape) == 2
30 |         assert bounds.shape[0] == 1
31 |         assert bounds.shape[1] == 2
32 |         assert bounds[0, 0] < bounds[0, 1]
33 | 
34 |         dim_bx = bounds.shape[0]
35 | 
36 |         if slope > 0.0:
37 |             global_minimizers = np.array([
38 |                 [bounds[0, 0]],
39 |             ])
40 |             global_minimum = slope * bounds[0, 0]
41 |         else:
42 |             global_minimizers = np.array([
43 |                 [bounds[0, 1]],
44 |             ])
45 |             global_minimum = slope * bounds[0, 1]
46 |         function = lambda bx: fun_target(bx, dim_bx, slope)
47 | 
48 |         Function.__init__(self, dim_bx, bounds, global_minimizers, global_minimum, function)
49 | 


--------------------------------------------------------------------------------
/regression/utils/sampling.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | def gather(items, idxs):
 4 |     K = idxs.shape[0]
 5 |     idxs = idxs.to(items[0].device)
 6 |     gathered = []
 7 |     for item in items:
 8 |         gathered.append(torch.gather(
 9 |             torch.stack([item]*K), -2,
10 |             torch.stack([idxs]*item.shape[-1], -1)).squeeze(0))
11 |     return gathered[0] if len(gathered) == 1 else gathered
12 | 
13 | def sample_subset(*items, r_N=None, num_samples=None):
14 |     r_N = r_N or torch.rand(1).item()
15 |     K = num_samples or 1
16 |     N = items[0].shape[-2]
17 |     Ns = min(max(1, int(r_N * N)), N-1)
18 |     batch_shape = items[0].shape[:-2]
19 |     idxs = torch.rand((K,)+batch_shape+(N,)).argsort(-1)
20 |     return gather(items, idxs[...,:Ns]), gather(items, idxs[...,Ns:])
21 | 
22 | def sample_with_replacement(*items, num_samples=None, r_N=1.0, N_s=None):
23 |     K = num_samples or 1
24 |     N = items[0].shape[-2]
25 |     N_s = N_s or max(1, int(r_N * N))
26 |     batch_shape = items[0].shape[:-2]
27 |     idxs = torch.randint(N, size=(K,)+batch_shape+(N_s,))
28 |     return gather(items, idxs)
29 | 
30 | def sample_mask(B, N, num_samples=None, min_num=3, prob=0.5):
31 |     min_num = min(min_num, N)
32 |     K = num_samples or 1
33 |     fixed = torch.ones(K, B, min_num)
34 |     if N - min_num > 0:
35 |         rand = torch.bernoulli(prob*torch.ones(K, B, N-min_num))
36 |         mask = torch.cat([fixed, rand], -1)
37 |         return mask.squeeze(0)
38 |     else:
39 |         return fixed.squeeze(0)


--------------------------------------------------------------------------------
/bayesian_optimization/utils/sampling.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | def gather(items, idxs):
 5 |     K = idxs.shape[0]
 6 |     idxs = idxs.to(items[0].device)
 7 |     gathered = []
 8 |     for item in items:
 9 |         gathered.append(torch.gather(
10 |             torch.stack([item] * K), -2,
11 |             torch.stack([idxs] * item.shape[-1], -1)).squeeze(0))
12 |     return gathered[0] if len(gathered) == 1 else gathered
13 | 
14 | 
15 | def sample_subset(*items, r_N=None, num_samples=None):
16 |     r_N = r_N or torch.rand(1).item()
17 |     K = num_samples or 1
18 |     N = items[0].shape[-2]
19 |     Ns = min(max(1, int(r_N * N)), N - 1)
20 |     batch_shape = items[0].shape[:-2]
21 |     idxs = torch.rand((K,) + batch_shape + (N,)).argsort(-1)
22 |     return gather(items, idxs[..., :Ns]), gather(items, idxs[..., Ns:])
23 | 
24 | 
25 | def sample_with_replacement(*items, num_samples=None, r_N=1.0, N_s=None):
26 |     K = num_samples or 1
27 |     N = items[0].shape[-2]
28 |     N_s = N_s or max(1, int(r_N * N))
29 |     batch_shape = items[0].shape[:-2]
30 |     idxs = torch.randint(N, size=(K,) + batch_shape + (N_s,))
31 |     return gather(items, idxs)
32 | 
33 | 
34 | def sample_mask(B, N, num_samples=None, min_num=3, prob=0.5):
35 |     min_num = min(min_num, N)
36 |     K = num_samples or 1
37 |     fixed = torch.ones(K, B, min_num)
38 |     if N - min_num > 0:
39 |         rand = torch.bernoulli(prob * torch.ones(K, B, N - min_num))
40 |         mask = torch.cat([fixed, rand], -1)
41 |         return mask.squeeze(0)
42 |     else:
43 |         return fixed.squeeze(0)
44 | 


--------------------------------------------------------------------------------
/bayesian_optimization/bayeso_benchmarks/three_dim_hartmann3d.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # author: Jungtaek Kim (jtkim@postech.ac.kr)
 3 | # last updated: February 8, 2021
 4 | #
 5 | 
 6 | import numpy as np
 7 | 
 8 | from bayeso_benchmarks.benchmark_base import Function
 9 | 
10 | 
11 | def fun_target(bx, dim_bx):
12 |     assert len(bx.shape) == 1
13 |     assert bx.shape[0] == dim_bx
14 | 
15 |     alpha = np.array([1.0, 1.2, 3.0, 3.2])
16 |     A = np.array([
17 |         [3.0, 10.0, 30.0],
18 |         [0.1, 10.0, 35.0],
19 |         [3.0, 10.0, 30.0],
20 |         [0.1, 10.0, 35.0],
21 |     ])
22 |     P = 1e-4 * np.array([
23 |         [3689, 1170, 2673],
24 |         [4699, 4387, 7470],
25 |         [1091, 8732, 5547],
26 |         [381, 5743, 8828],
27 |     ])
28 | 
29 |     outer = 0.0
30 |     for i_ in range(0, 4):
31 |         inner = 0.0
32 |         for j_ in range(0, 3):
33 |             inner += A[i_, j_] * (bx[j_] - P[i_, j_])**2
34 |         outer += alpha[i_] * np.exp(-1.0 * inner)
35 | 
36 |     y = -1.0 * outer
37 |     return y
38 | 
39 | 
40 | class Hartmann3D(Function):
41 |     def __init__(self,
42 |         bounds=np.array([
43 |             [0.0, 1.0],
44 |             [0.0, 1.0],
45 |             [0.0, 1.0],
46 |         ])
47 |     ):
48 |         assert isinstance(bounds, np.ndarray)
49 |         assert len(bounds.shape) == 2
50 |         assert bounds.shape[1] == 2
51 | 
52 |         dim_bx = 3
53 |         assert bounds.shape[0] == dim_bx
54 | 
55 |         global_minimizers = np.array([
56 |             [0.114614, 0.555649, 0.852547],
57 |         ])
58 |         global_minimum = -3.86278
59 |         function = lambda bx: fun_target(bx, dim_bx)
60 | 
61 |         Function.__init__(self, dim_bx, bounds, global_minimizers, global_minimum, function)
62 | 


--------------------------------------------------------------------------------
/contextual_bandits/utils/sampling.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | def gather(items, idxs, reduce=True):
 4 |     K = idxs.shape[0]  # Ns
 5 |     idxs = idxs.to(items[0].device)  # [Ns,B,N]
 6 |     gathered = []  # [Ns,B,N,D]
 7 |     for item in items:  # [B,N,D]
 8 |         _gathered = torch.gather(
 9 |             torch.stack([item] * K), -2,  # [Ns,B,N,D]
10 |             torch.stack([idxs] * item.shape[-1], -1))
11 |         gathered.append(_gathered.squeeze(0) if reduce else _gathered)  # [Ns,B,N,D]
12 |     return gathered[0] if len(gathered) == 1 else gathered
13 | 
14 | def sample_subset(*items, r_N=None, num_samples=None):
15 |     r_N = r_N or torch.rand(1).item()
16 |     K = num_samples or 1
17 |     N = items[0].shape[-2]
18 |     Ns = min(max(1, int(r_N * N)), N-1)
19 |     batch_shape = items[0].shape[:-2]
20 |     idxs = torch.rand((K,)+batch_shape+(N,)).argsort(-1)
21 |     return gather(items, idxs[...,:Ns]), gather(items, idxs[...,Ns:])
22 | 
23 | def sample_with_replacement(*items, num_samples=None, r_N=1.0, N_s=None, reduce=True):
24 |     K = num_samples or 1  # Ns
25 |     N = items[0].shape[-2]  # N
26 |     N_s = N_s or max(1, int(r_N * N))  # N
27 |     batch_shape = items[0].shape[:-2]  # B
28 |     idxs = torch.randint(N, size=(K,)+batch_shape+(N_s,))  # [Ns,B,N]
29 |     return gather(items, idxs, reduce)  # items: [B,N,D], idxs: [Ns,B,N]
30 | 
31 | def sample_mask(B, N, num_samples=None, min_num=3, prob=0.5):
32 |     min_num = min(min_num, N)
33 |     K = num_samples or 1
34 |     fixed = torch.ones(K, B, min_num)
35 |     if N - min_num > 0:
36 |         rand = torch.bernoulli(prob*torch.ones(K, B, N-min_num))
37 |         mask = torch.cat([fixed, rand], -1)
38 |         return mask.squeeze(0)
39 |     else:
40 |         return fixed.squeeze(0)


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Transformer Neural Processes: Uncertainty-Aware Meta Learning Via Sequence Modeling
 2 | 
 3 | This is the official implementation of the paper [Transformer Neural Processes: Uncertainty-Aware Meta Learning Via Sequence Modeling](https://arxiv.org/abs/2207.04179) in Pytorch. We propose Transformer Neural Processes (TNPs), a new member of the Neural Processes family that casts uncertainty-aware meta learning as a sequence modeling problem. We learn TNPs via an autoregressive likelihood-based objective and instantiate it with a novel transformer-based architecture. TNPs achieve state-ofthe-art performance on various benchmark problems, outperforming all previous NP variants on meta regression, image completion, contextual multi-armed bandits, and Bayesian optimization.
 4 | 
 5 | <img width="100%" src="./tnp.png">
 6 | 
 7 | ## Install
 8 | 
 9 | First, clone the repository:
10 | 
11 | ```
12 | git clone https://github.com/tung-nd/TNP-pytorch.git
13 | ```
14 | 
15 | Then install the dependencies as listed in `env.yml` and activate the environment:
16 | 
17 | ```
18 | conda env create -f env.yml
19 | conda activate tnp
20 | ```
21 | 
22 | ## Usage
23 | 
24 | Please check the directory of each task for specific usage.
25 | 
26 | ## Citation
27 | 
28 | If you find this repo useful in your research, please consider citing our paper:
29 | ```
30 | @article{nguyen2022transformer,
31 |   title={Transformer neural processes: Uncertainty-aware meta learning via sequence modeling},
32 |   author={Nguyen, Tung and Grover, Aditya},
33 |   journal={arXiv preprint arXiv:2207.04179},
34 |   year={2022}
35 | }
36 | ```
37 | 
38 | ## Acknowledgement
39 | 
40 | The implementation of the baselines is borrowed from the official code base of [Bootstrapping Neural Processes](https://github.com/juho-lee/bnp).


--------------------------------------------------------------------------------
/bayesian_optimization/bayeso_benchmarks/inf_dim_rastrigin.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # author: Jungtaek Kim (jtkim@postech.ac.kr)
 3 | # last updated: February 8, 2021
 4 | #
 5 | 
 6 | import numpy as np
 7 | 
 8 | from bayeso_benchmarks.benchmark_base import Function
 9 | 
10 | 
11 | def fun_target(
12 |     bx,
13 |     dim_bx,
14 |     A=10.0
15 | ):
16 |     assert len(bx.shape) == 1
17 |     assert bx.shape[0] == dim_bx
18 |     assert isinstance(A, float)
19 | 
20 |     y = A * dim_bx + np.sum((bx / (2.0 / 5.12)) ** 2 - A * np.cos(2 * np.pi * bx / (2.0 / 5.12)), axis=-1)
21 |     return y
22 | 
23 | 
24 | class Rastrigin(Function):
25 |     def __init__(self, dim_problem):
26 |         assert isinstance(dim_problem, int)
27 | 
28 |         dim_bx = np.inf
29 |         bounds = np.array([
30 |             [-5.12 * (2.0 / 5.12), 5.12 * (2.0 / 5.12)],
31 |         ])
32 |         global_minimizers = np.array([
33 |             [0.0],
34 |         ])
35 |         global_minimum = 0.0
36 |         dim_problem = dim_problem
37 | 
38 |         function = lambda bx: fun_target(bx, dim_problem)
39 | 
40 |         Function.__init__(self, dim_bx, bounds, global_minimizers, global_minimum, function, dim_problem=dim_problem)
41 | 
42 | 
43 | if __name__ == '__main__':
44 |     import matplotlib.pyplot as plt
45 |     from mpl_toolkits import mplot3d
46 | 
47 |     func = Rastrigin(dim_problem=2)
48 |     lb, ub = func.get_bounds().transpose()
49 |     # lb, ub = np.where(lb < -2, -2, lb), np.where(ub > 2, 2, ub)
50 | 
51 |     x1 = np.linspace(lb[0], ub[0], 50)
52 |     x2 = np.linspace(lb[1], ub[1], 50)
53 |     x1, x2 = np.meshgrid(x1, x2)
54 |     pts = np.column_stack((x1.ravel(), x2.ravel()))
55 |     func_val = func.output(pts)
56 | 
57 |     fig = plt.figure(figsize=(25, 25))
58 | 
59 |     ax = fig.add_subplot(1, 1, 1, projection='3d')
60 |     ax.plot_surface(x1, x2, func_val.reshape(x1.shape))
61 |     print(func.output(np.zeros((2, 2))))
62 |     plt.show()
63 | 


--------------------------------------------------------------------------------
/regression/README.md:
--------------------------------------------------------------------------------
 1 | ## 1D Regression
 2 | 
 3 | ---
 4 | ### Training
 5 | ```
 6 | python gp.py --mode=train --expid=default-tnpa --model=tnpa
 7 | ```
 8 | The config of hyperparameters of each model is saved in `configs/gp`. If training for the first time, evaluation data will be generated and saved in `evalsets/gp`. Model weights and logs are saved in `results/gp/{model}/{expid}`.
 9 | 
10 | ### Evaluation
11 | ```
12 | python gp.py --mode=evaluate_all_metrics --expid=default-tnpa --model=tnpa
13 | ```
14 | Note that you have to specify `{expid}` correctly. The model will load weights from `results/gp/{model}/{expid}` to evaluate.
15 | 
16 | ## CelebA Image Completion
17 | ---
18 | 
19 | ### Prepare data
20 | Download [img_align_celeba.zip](https://drive.google.com/drive/folders/0B7EVK8r0v71pTUZsaXdaSnZBZzg) and unzip. Download [list_eval_partitions.txt](https://drive.google.com/drive/folders/0B7EVK8r0v71pdjI3dmwtNm5jRkE) and [identity_CelebA.txt](https://drive.google.com/drive/folders/0B7EVK8r0v71pOC0wOVZlQnFfaGs). Place downloaded files in `datasets/celeba` folder. Run `python data/celeba.py` to preprocess the data.
21 | 
22 | ### Training
23 | ```
24 | python celeba.py --mode=train --expid=default-tnpa --model=tnpa
25 | ```
26 | 
27 | ### Evaluation
28 | ```
29 | python celeba.py --mode=evaluate_all_metrics --expid=default-tnpa --model=tnpa
30 | ```
31 | If evaluating for the first time, evaluation data will be generated and saved in `evalsets/celeba`.
32 | 
33 | ## EMNIST Image Completion
34 | ---
35 | 
36 | ### Training
37 | ```
38 | python emnist.py --mode=train --expid=default-tnpa --model=tnpa
39 | ```
40 | If training for the first time, EMNIST training data will automatically downloaded and saved in `datasets/emnist`.
41 | 
42 | ### Evaluation
43 | ```
44 | python emnist.py --mode=evaluate_all_metrics --expid=default-tnpa --model=tnpa
45 | ```
46 | If evaluating for the first time, evaluation data will be generated and saved in `evalsets/emnist`.


--------------------------------------------------------------------------------
/bayesian_optimization/bayeso_benchmarks/two_dim_michalewicz.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # author: Jungtaek Kim (jtkim@postech.ac.kr)
 3 | # last updated: February 8, 2021
 4 | #
 5 | 
 6 | import numpy as np
 7 | 
 8 | from bayeso_benchmarks.benchmark_base import Function
 9 | 
10 | 
11 | def fun_target(bx, dim_bx):
12 |     assert len(bx.shape) == 1
13 |     assert bx.shape[0] == dim_bx
14 | 
15 |     y = 0.0
16 | 
17 |     for ind in range(0, dim_bx):
18 |         y += np.sin(bx[ind]) * np.sin(((ind + 1.0) * bx[ind]**2) / np.pi)**(2.0 * 10.0)
19 |     y *= -1.0
20 | 
21 |     return y
22 | 
23 | 
24 | class Michalewicz(Function):
25 |     def __init__(self):
26 |         dim_bx = 2
27 |         bounds = np.array([
28 |             [0.0, np.pi],
29 |             [0.0, np.pi],
30 |         ])
31 |         global_minimizers = np.array([
32 |             [2.20279089, 1.57063923],
33 |         ])
34 |         global_minimum = -1.801302197
35 |         function = lambda bx: fun_target(bx, dim_bx)
36 | 
37 |         Function.__init__(self, dim_bx, bounds, global_minimizers, global_minimum, function)
38 | 
39 | 
40 | def translated_fun_target(bx, dim_bx):
41 |     assert len(bx.shape) == 1
42 |     assert bx.shape[0] == dim_bx
43 | 
44 |     y = 0.0
45 | 
46 |     for ind in range(0, dim_bx):
47 |         y += np.sin(bx[ind] + 1.5) * np.sin(((ind + 1.0) * (bx[ind] + 1.5)**2) / np.pi)**(2.0 * 10.0)
48 |     y *= -1.0
49 | 
50 |     return y
51 | 
52 | 
53 | class TranslatedMichalewicz(Function):
54 |     def __init__(self):
55 |         dim_bx = 2
56 |         bounds = np.array([
57 |             [0.0 - 1.5, np.pi - 1.5],
58 |             [0.0 - 1.5, np.pi - 1.5],
59 |         ])
60 |         global_minimizers = np.array([
61 |             [2.20279089 - 1.5, 1.57063923 - 1.5],
62 |         ])
63 |         global_minimum = -1.801302197
64 |         function = lambda bx: translated_fun_target(bx, dim_bx)
65 | 
66 |         Function.__init__(self, dim_bx, bounds, global_minimizers, global_minimum, function)
67 | 


--------------------------------------------------------------------------------
/bayesian_optimization/models/tnpd.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from torch.distributions.normal import Normal
 4 | from attrdict import AttrDict
 5 | 
 6 | from models.tnp import TNP
 7 | 
 8 | 
 9 | class TNPD(TNP):
10 |     def __init__(
11 |         self,
12 |         dim_x,
13 |         dim_y,
14 |         d_model,
15 |         emb_depth,
16 |         dim_feedforward,
17 |         nhead,
18 |         dropout,
19 |         num_layers,
20 |     ):
21 |         super(TNPD, self).__init__(
22 |             dim_x,
23 |             dim_y,
24 |             d_model,
25 |             emb_depth,
26 |             dim_feedforward,
27 |             nhead,
28 |             dropout,
29 |             num_layers,
30 |         )
31 |         
32 |         self.predictor = nn.Sequential(
33 |             nn.Linear(d_model, dim_feedforward),
34 |             nn.ReLU(),
35 |             nn.Linear(dim_feedforward, dim_y*2)
36 |         )
37 | 
38 |     def forward(self, batch, reduce_ll=True):
39 |         out_encoder = self.encode(batch, autoreg=False)
40 |         out = self.predictor(out_encoder)
41 |         mean, std = torch.chunk(out, 2, dim=-1)
42 | 
43 |         std = torch.exp(std)
44 |         pred_dist = Normal(mean, std)
45 |         loss = - pred_dist.log_prob(batch.yt).sum(-1).mean()
46 |         
47 |         outs = AttrDict()
48 |         outs.loss = loss
49 |         return outs
50 | 
51 |     def predict(self, xc, yc, xt, num_samples=None):
52 |         if xc.shape[-3] != xt.shape[-3]:
53 |             xt = xt.transpose(-3, -2)
54 |         
55 |         batch = AttrDict()
56 |         batch.xc = xc
57 |         batch.yc = yc
58 |         batch.xt = xt
59 |         batch.yt = torch.zeros((xt.shape[0], xt.shape[1], yc.shape[2]), device='cuda')
60 | 
61 |         out_encoder = self.encode(batch, autoreg=False)
62 |         out = self.predictor(out_encoder)
63 |         mean, std = torch.chunk(out, 2, dim=-1)
64 |         std = torch.exp(std)
65 | 
66 |         return Normal(mean, std)


--------------------------------------------------------------------------------
/regression/models/cnp.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from attrdict import AttrDict
 4 | 
 5 | from models.modules import PoolingEncoder, Decoder
 6 | 
 7 | class CNP(nn.Module):
 8 |     def __init__(self,
 9 |             dim_x=1,
10 |             dim_y=1,
11 |             dim_hid=128,
12 |             enc_pre_depth=4,
13 |             enc_post_depth=2,
14 |             dec_depth=3):
15 | 
16 |         super().__init__()
17 | 
18 |         self.enc1 = PoolingEncoder(
19 |                 dim_x=dim_x,
20 |                 dim_y=dim_y,
21 |                 dim_hid=dim_hid,
22 |                 pre_depth=enc_pre_depth,
23 |                 post_depth=enc_post_depth)
24 | 
25 |         self.enc2 = PoolingEncoder(
26 |                 dim_x=dim_x,
27 |                 dim_y=dim_y,
28 |                 dim_hid=dim_hid,
29 |                 pre_depth=enc_pre_depth,
30 |                 post_depth=enc_post_depth)
31 | 
32 |         self.dec = Decoder(
33 |                 dim_x=dim_x,
34 |                 dim_y=dim_y,
35 |                 dim_enc=2*dim_hid,
36 |                 dim_hid=dim_hid,
37 |                 depth=dec_depth)
38 | 
39 |     def predict(self, xc, yc, xt, num_samples=None):
40 |         encoded = torch.cat([self.enc1(xc, yc), self.enc2(xc, yc)], -1)
41 |         encoded = torch.stack([encoded]*xt.shape[-2], -2)
42 |         return self.dec(encoded, xt)
43 | 
44 |     def forward(self, batch, num_samples=None, reduce_ll=True):
45 |         outs = AttrDict()
46 |         py = self.predict(batch.xc, batch.yc, batch.x)
47 |         ll = py.log_prob(batch.y).sum(-1)
48 | 
49 |         if self.training:
50 |             outs.loss = -ll.mean()
51 |         else:
52 |             num_ctx = batch.xc.shape[-2]
53 |             if reduce_ll:
54 |                 outs.ctx_ll = ll[...,:num_ctx].mean()
55 |                 outs.tar_ll = ll[...,num_ctx:].mean()
56 |             else:
57 |                 outs.ctx_ll = ll[...,:num_ctx]
58 |                 outs.tar_ll = ll[...,num_ctx:]
59 | 
60 |         return outs
61 | 


--------------------------------------------------------------------------------
/regression/models/attention.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | import math
 5 | 
 6 | class MultiHeadAttn(nn.Module):
 7 |     def __init__(self, dim_q, dim_k, dim_v, dim_out, num_heads=8):
 8 |         super().__init__()
 9 |         self.num_heads = num_heads
10 |         self.dim_out = dim_out
11 |         self.fc_q = nn.Linear(dim_q, dim_out, bias=False)
12 |         self.fc_k = nn.Linear(dim_k, dim_out, bias=False)
13 |         self.fc_v = nn.Linear(dim_v, dim_out, bias=False)
14 |         self.fc_out = nn.Linear(dim_out, dim_out)
15 |         self.ln1 = nn.LayerNorm(dim_out)
16 |         self.ln2 = nn.LayerNorm(dim_out)
17 | 
18 |     def scatter(self, x):
19 |         return torch.cat(x.chunk(self.num_heads, -1), -3)
20 | 
21 |     def gather(self, x):
22 |         return torch.cat(x.chunk(self.num_heads, -3), -1)
23 | 
24 |     def attend(self, q, k, v, mask=None):
25 |         q_, k_, v_ = [self.scatter(x) for x in [q, k, v]]
26 |         A_logits = q_ @ k_.transpose(-2, -1) / math.sqrt(self.dim_out)
27 |         if mask is not None:
28 |             mask = mask.bool().to(q.device)
29 |             mask = torch.stack([mask]*q.shape[-2], -2)
30 |             mask = torch.cat([mask]*self.num_heads, -3)
31 |             A = torch.softmax(A_logits.masked_fill(mask, -float('inf')), -1)
32 |             A = A.masked_fill(torch.isnan(A), 0.0)
33 |         else:
34 |             A = torch.softmax(A_logits, -1)
35 |         return self.gather(A @ v_)
36 | 
37 |     def forward(self, q, k, v, mask=None):
38 |         q, k, v = self.fc_q(q), self.fc_k(k), self.fc_v(v)
39 |         out = self.ln1(q + self.attend(q, k, v, mask=mask))
40 |         out = self.ln2(out + F.relu(self.fc_out(out)))
41 |         return out
42 | 
43 | class SelfAttn(MultiHeadAttn):
44 |     def __init__(self, dim_in, dim_out, num_heads=8):
45 |         super().__init__(dim_in, dim_in, dim_in, dim_out, num_heads)
46 | 
47 |     def forward(self, x, mask=None):
48 |         return super().forward(x, x, x, mask=mask)
49 | 


--------------------------------------------------------------------------------
/contextual_bandits/models/attention.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | import math
 5 | 
 6 | class MultiHeadAttn(nn.Module):
 7 |     def __init__(self, dim_q, dim_k, dim_v, dim_out, num_heads=8):
 8 |         super().__init__()
 9 |         self.num_heads = num_heads
10 |         self.dim_out = dim_out
11 |         self.fc_q = nn.Linear(dim_q, dim_out, bias=False)
12 |         self.fc_k = nn.Linear(dim_k, dim_out, bias=False)
13 |         self.fc_v = nn.Linear(dim_v, dim_out, bias=False)
14 |         self.fc_out = nn.Linear(dim_out, dim_out)
15 |         self.ln1 = nn.LayerNorm(dim_out)
16 |         self.ln2 = nn.LayerNorm(dim_out)
17 | 
18 |     def scatter(self, x):
19 |         return torch.cat(x.chunk(self.num_heads, -1), -3)
20 | 
21 |     def gather(self, x):
22 |         return torch.cat(x.chunk(self.num_heads, -3), -1)
23 | 
24 |     def attend(self, q, k, v, mask=None):
25 |         q_, k_, v_ = [self.scatter(x) for x in [q, k, v]]
26 |         A_logits = q_ @ k_.transpose(-2, -1) / math.sqrt(self.dim_out)
27 |         if mask is not None:
28 |             mask = mask.bool().to(q.device)
29 |             mask = torch.stack([mask]*q.shape[-2], -2)
30 |             mask = torch.cat([mask]*self.num_heads, -3)
31 |             A = torch.softmax(A_logits.masked_fill(mask, -float('inf')), -1)
32 |             A = A.masked_fill(torch.isnan(A), 0.0)
33 |         else:
34 |             A = torch.softmax(A_logits, -1)
35 |         return self.gather(A @ v_)
36 | 
37 |     def forward(self, q, k, v, mask=None):
38 |         q, k, v = self.fc_q(q), self.fc_k(k), self.fc_v(v)
39 |         out = self.ln1(q + self.attend(q, k, v, mask=mask))
40 |         out = self.ln2(out + F.relu(self.fc_out(out)))
41 |         return out
42 | 
43 | class SelfAttn(MultiHeadAttn):
44 |     def __init__(self, dim_in, dim_out, num_heads=8):
45 |         super().__init__(dim_in, dim_in, dim_in, dim_out, num_heads)
46 | 
47 |     def forward(self, x, mask=None):
48 |         return super().forward(x, x, x, mask=mask)
49 | 


--------------------------------------------------------------------------------
/contextual_bandits/runner/args.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | 
 4 | def get_args():
 5 |     parser = argparse.ArgumentParser()
 6 | 
 7 |     # Experiment
 8 |     parser.add_argument('--expid', type=str, default=None)
 9 |     parser.add_argument('--resume', type=str, default=None)
10 |     parser.add_argument('--device', type=str, default='cuda') # 'cpu' to use cpu
11 | 
12 |     # wheel
13 |     parser.add_argument("--cmab_data", choices=["wheel"], default="wheel")
14 |     parser.add_argument("--cmab_wheel_delta", type=float, default=0.5)
15 |     parser.add_argument("--cmab_mode", choices=["train", "eval", "plot", "evalplot"], default="train")
16 |     parser.add_argument('--cmab_num_bs', type=int, default=10)
17 |     parser.add_argument("--cmab_train_update_freq", type=int, default=1)
18 |     parser.add_argument("--cmab_train_num_batches", type=int, default=1)
19 |     parser.add_argument("--cmab_train_batch_size", type=int, default=8)
20 |     parser.add_argument("--cmab_train_seed", type=int, default=0)
21 |     parser.add_argument("--cmab_train_reward", type=str, default="all")
22 |     parser.add_argument("--cmab_eval_method", type=str, default="ucb")
23 |     parser.add_argument("--cmab_eval_num_contexts", type=int, default=2000)
24 |     parser.add_argument("--cmab_eval_seed_start", type=int, default=0)
25 |     parser.add_argument("--cmab_eval_seed_end", type=int, default=49)
26 |     parser.add_argument("--cmab_plot_seed_start", type=int, default=0)
27 |     parser.add_argument("--cmab_plot_seed_end", type=int, default=49)
28 | 
29 |     # Model
30 |     parser.add_argument('--model', type=str, default="tnpa")
31 | 
32 |     # Training
33 |     parser.add_argument('--lr', type=float, default=5e-4)
34 |     parser.add_argument('--num_epochs', type=int, default=100000)
35 |     parser.add_argument('--print_freq', type=int, default=200)
36 |     parser.add_argument('--eval_freq', type=int, default=5000)
37 |     parser.add_argument('--save_freq', type=int, default=1000)
38 | 
39 |     args = parser.parse_args()
40 | 
41 |     return args


--------------------------------------------------------------------------------
/bayesian_optimization/bayeso_benchmarks/one_dim_step.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # author: Jungtaek Kim (jtkim@postech.ac.kr)
 3 | # last updated: February 8, 2021
 4 | #
 5 | 
 6 | import numpy as np
 7 | 
 8 | from bayeso_benchmarks.benchmark_base import Function
 9 | 
10 | 
11 | def fun_target(bx, dim_bx, steps, step_values):
12 |     assert len(bx.shape) == 1
13 |     assert bx.shape[0] == dim_bx
14 |     assert isinstance(steps, list)
15 |     assert isinstance(step_values, list)
16 |     assert len(steps) == len(step_values) + 1
17 |     assert isinstance(steps[0], float)
18 |     assert isinstance(step_values[0], float)
19 | 
20 |     y = None
21 |     for ind_step in range(0, len(steps) - 1):
22 |         if ind_step < (len(steps) - 2) and steps[ind_step] <= bx[0] and bx[0] < steps[ind_step+1]:
23 |             y = step_values[ind_step]
24 |             break
25 |         elif ind_step == (len(steps) - 2) and steps[ind_step] <= bx[0] and bx[0] <= steps[ind_step+1]:
26 |             y = step_values[ind_step]
27 |             break
28 | 
29 |     if y is None:
30 |         raise ValueError('Conditions for steps')
31 |     return y
32 | 
33 | 
34 | class Step(Function):
35 |     def __init__(self,
36 |         steps=[-10., -5., 0., 5., 10.],
37 |         step_values=[-2., 0., 1., -1.],
38 |     ):
39 |         assert isinstance(steps, list)
40 |         assert isinstance(step_values, list)
41 |         assert len(steps) == len(step_values) + 1
42 |         assert isinstance(steps[0], float)
43 |         assert isinstance(step_values[0], float)
44 |         assert np.all(np.sort(steps) == np.asarray(steps))
45 | 
46 |         dim_bx = 1
47 |         bounds = np.array([
48 |             [np.min(steps), np.max(steps)],
49 |         ])
50 |         global_minimizers = np.array([
51 |             [steps[np.argmin(step_values)]],
52 |         ])
53 |         global_minimum = np.min(step_values)
54 |         function = lambda bx: fun_target(bx, dim_bx, steps, step_values)
55 | 
56 |         Function.__init__(self, dim_bx, bounds, global_minimizers, global_minimum, function)
57 | 


--------------------------------------------------------------------------------
/bayesian_optimization/models/attention.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | import math
 5 | 
 6 | class MultiHeadAttn(nn.Module):
 7 |     def __init__(self, dim_q, dim_k, dim_v, dim_out, num_heads=8):
 8 |         super().__init__()
 9 |         self.num_heads = num_heads
10 |         self.dim_out = dim_out
11 |         self.fc_q = nn.Linear(dim_q, dim_out, bias=False)
12 |         self.fc_k = nn.Linear(dim_k, dim_out, bias=False)
13 |         self.fc_v = nn.Linear(dim_v, dim_out, bias=False)
14 |         self.fc_out = nn.Linear(dim_out, dim_out)
15 |         self.ln1 = nn.LayerNorm(dim_out)
16 |         self.ln2 = nn.LayerNorm(dim_out)
17 | 
18 |     def scatter(self, x):
19 |         return torch.cat(x.chunk(self.num_heads, -1), -3)
20 | 
21 |     def gather(self, x):
22 |         return torch.cat(x.chunk(self.num_heads, -3), -1)
23 | 
24 |     def attend(self, q, k, v, mask=None):
25 |         q, k, v = [self.scatter(x) for x in [q, k, v]]
26 |         A_logits = q @ k.transpose(-2, -1) / math.sqrt(self.dim_out)
27 |         if mask is not None:
28 |             mask = mask.bool().to(q.device)
29 |             mask = torch.stack([mask] * q.shape[-2], -2)
30 |             mask = torch.cat([mask] * self.num_heads, -3)
31 |             A = torch.softmax(A_logits.masked_fill(mask, -float('inf')), -1)
32 |             A = A.masked_fill(torch.isnan(A), 0.0)
33 |         else:
34 |             A = torch.softmax(A_logits, -1)
35 |         return self.gather(A @ v)
36 | 
37 |     def forward(self, q, k, v, mask=None):
38 |         q, k, v = self.fc_q(q), self.fc_k(k), self.fc_v(v)
39 |         out = self.ln1(q + self.attend(q, k, v, mask=mask))
40 |         out = self.ln2(out + F.relu(self.fc_out(out)))
41 |         return out
42 | 
43 | 
44 | class SelfAttn(MultiHeadAttn):
45 |     def __init__(self, dim_in, dim_out, num_heads=8):
46 |         super().__init__(dim_in, dim_in, dim_in, dim_out, num_heads)
47 | 
48 |     def forward(self, x, mask=None, **kwargs):
49 |         return super().forward(x, x, x, mask=mask)
50 | 


--------------------------------------------------------------------------------
/bayesian_optimization/models/tnpa.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from torch.distributions.normal import Normal
 4 | from attrdict import AttrDict
 5 | 
 6 | from utils.misc import stack
 7 | from models.tnp import TNP
 8 | 
 9 | 
10 | class TNPA(TNP):
11 |     def __init__(
12 |         self,
13 |         dim_x,
14 |         dim_y,
15 |         d_model,
16 |         emb_depth,
17 |         dim_feedforward,
18 |         nhead,
19 |         dropout,
20 |         num_layers,
21 |     ):
22 |         super(TNPA, self).__init__(
23 |             dim_x,
24 |             dim_y,
25 |             d_model,
26 |             emb_depth,
27 |             dim_feedforward,
28 |             nhead,
29 |             dropout,
30 |             num_layers,
31 |         )
32 |         
33 |         self.predictor = nn.Sequential(
34 |             nn.Linear(d_model, dim_feedforward),
35 |             nn.ReLU(),
36 |             nn.Linear(dim_feedforward, dim_y*2)
37 |         )
38 | 
39 | 
40 |     def forward(self, batch, reduce_ll=True):
41 |         out_encoder = self.encode(batch, autoreg=True)
42 |         out = self.predictor(out_encoder)
43 |         mean, std = torch.chunk(out, 2, dim=-1)
44 |         std = torch.exp(std)
45 | 
46 |         pred_dist = Normal(mean, std)
47 |         loss = - pred_dist.log_prob(batch.yt).sum(-1).mean()
48 |         
49 |         outs = AttrDict()
50 |         outs.loss = loss
51 |         return outs
52 | 
53 |     def predict(self, xc, yc, xt, num_samples=None):
54 |         if xc.shape[-3] != xt.shape[-3]:
55 |             xt = xt.transpose(-3, -2)
56 | 
57 |         batch = AttrDict()
58 |         batch.xc = xc
59 |         batch.yc = yc
60 |         batch.xt = xt
61 |         batch.yt = torch.zeros((xt.shape[0], xt.shape[1], yc.shape[2]), device='cuda')
62 | 
63 |         # in evaluation tnpa = tnpd because we only have 1 target point to predict
64 |         out_encoder = self.encode(batch, autoreg=False)
65 |         out = self.predictor(out_encoder)
66 |         mean, std = torch.chunk(out, 2, dim=-1)
67 |         std = torch.exp(std)
68 |         
69 |         return Normal(mean, std)


--------------------------------------------------------------------------------
/bayesian_optimization/bayeso_benchmarks/inf_dim_ackley.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # author: Jungtaek Kim (jtkim@postech.ac.kr)
 3 | # last updated: February 8, 2021
 4 | #
 5 | 
 6 | import numpy as np
 7 | 
 8 | from bayeso_benchmarks.benchmark_base import Function
 9 | 
10 | 
11 | def fun_target(
12 |         bx,
13 |         dim_bx,
14 |         a=20.0,
15 |         b=0.2,
16 |         c=2.0 * np.pi
17 | ):
18 |     assert len(bx.shape) == 1
19 |     assert bx.shape[0] == dim_bx
20 |     assert isinstance(a, float)
21 |     assert isinstance(b, float)
22 |     assert isinstance(c, float)
23 | 
24 |     y = -a * np.exp(-b * np.linalg.norm(bx, ord=2, axis=0) * np.sqrt(1.0 / dim_bx)) - np.exp(
25 |         1.0 / dim_bx * np.sum(np.cos(c * bx), axis=0)) + a + np.exp(1.0)
26 |     return y
27 | 
28 | 
29 | class Ackley(Function):
30 |     def __init__(self, dim_problem):
31 |         assert isinstance(dim_problem, int)
32 | 
33 |         dim_bx = np.inf
34 |         bounds = np.array([
35 |             [-32.768, 32.768],
36 |         ])
37 |         global_minimizers = np.array([
38 |             [0.0],
39 |         ])
40 |         global_minimum = 0.0
41 |         dim_problem = dim_problem
42 | 
43 |         function = lambda bx: fun_target(bx, dim_problem)
44 | 
45 |         Function.__init__(self, dim_bx, bounds, global_minimizers, global_minimum, function, dim_problem=dim_problem)
46 | 
47 | 
48 | if __name__ == '__main__':
49 |     import matplotlib.pyplot as plt
50 | 
51 |     func = Ackley(dim_problem=2)
52 |     lb, ub = func.get_bounds().transpose()
53 |     lb, ub = np.where(lb < -2, -2, lb), np.where(ub > 2, 2, ub)
54 | 
55 |     x1 = np.linspace(lb[0], ub[0], 100)
56 |     x2 = np.linspace(lb[1], ub[1], 100)
57 |     x1, x2 = np.meshgrid(x1, x2)
58 |     pts = np.column_stack((x1.ravel(), x2.ravel()))
59 |     y = func.output(pts)
60 | 
61 |     contour = plt.contourf(x1, x2, y.reshape(x1.shape), 50, cmap='RdGy')
62 |     # contour = plt.contourf(x1, x2, y.reshape(x1.shape), 50)
63 |     plt.imshow(y.reshape(x1.shape), extent=[lb[0], ub[0], lb[1], ub[1]], origin='lower',
64 |                cmap='RdGy')
65 |     plt.colorbar(contour)
66 |     plt.show()
67 | 


--------------------------------------------------------------------------------
/regression/models/canp.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from attrdict import AttrDict
 4 | 
 5 | from models.modules import CrossAttnEncoder, Decoder, PoolingEncoder
 6 | 
 7 | class CANP(nn.Module):
 8 |     def __init__(self,
 9 |             dim_x=1,
10 |             dim_y=1,
11 |             dim_hid=128,
12 |             enc_v_depth=4,
13 |             enc_qk_depth=2,
14 |             enc_pre_depth=4,
15 |             enc_post_depth=2,
16 |             dec_depth=3):
17 | 
18 |         super().__init__()
19 | 
20 |         self.enc1 = CrossAttnEncoder(
21 |                 dim_x=dim_x,
22 |                 dim_y=dim_y,
23 |                 dim_hid=dim_hid,
24 |                 v_depth=enc_v_depth,
25 |                 qk_depth=enc_qk_depth)
26 | 
27 |         self.enc2 = PoolingEncoder(
28 |                 dim_x=dim_x,
29 |                 dim_y=dim_y,
30 |                 dim_hid=dim_hid,
31 |                 self_attn=True,
32 |                 pre_depth=enc_pre_depth,
33 |                 post_depth=enc_post_depth)
34 | 
35 |         self.dec = Decoder(
36 |                 dim_x=dim_x,
37 |                 dim_y=dim_y,
38 |                 dim_enc=2*dim_hid,
39 |                 dim_hid=dim_hid,
40 |                 depth=dec_depth)
41 | 
42 |     def predict(self, xc, yc, xt, num_samples=None):
43 |         theta1 = self.enc1(xc, yc, xt)
44 |         theta2 = self.enc2(xc, yc)
45 |         encoded = torch.cat([theta1,
46 |             torch.stack([theta2]*xt.shape[-2], -2)], -1)
47 |         return self.dec(encoded, xt)
48 | 
49 |     def forward(self, batch, num_samples=None, reduce_ll=True):
50 |         outs = AttrDict()
51 |         py = self.predict(batch.xc, batch.yc, batch.x)
52 |         ll = py.log_prob(batch.y).sum(-1)
53 | 
54 |         if self.training:
55 |             outs.loss = -ll.mean()
56 |         else:
57 |             num_ctx = batch.xc.shape[-2]
58 |             if reduce_ll:
59 |                 outs.ctx_ll = ll[...,:num_ctx].mean()
60 |                 outs.tar_ll = ll[...,num_ctx:].mean()
61 |             else:
62 |                 outs.ctx_ll = ll[...,:num_ctx]
63 |                 outs.tar_ll = ll[...,num_ctx:]
64 | 
65 |         return outs
66 | 


--------------------------------------------------------------------------------
/contextual_bandits/models/tnpd.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from torch.distributions.normal import Normal
 4 | from attrdict import AttrDict
 5 | 
 6 | from models.tnp import TNP
 7 | 
 8 | 
 9 | class TNPD(TNP):
10 |     def __init__(
11 |         self,
12 |         dim_x,
13 |         dim_y,
14 |         d_model,
15 |         emb_depth,
16 |         dim_feedforward,
17 |         nhead,
18 |         dropout,
19 |         num_layers,
20 |         drop_y=0.5
21 |     ):
22 |         super(TNPD, self).__init__(
23 |             dim_x,
24 |             dim_y,
25 |             d_model,
26 |             emb_depth,
27 |             dim_feedforward,
28 |             nhead,
29 |             dropout,
30 |             num_layers,
31 |             drop_y
32 |         )
33 |         
34 |         self.predictor = nn.Sequential(
35 |             nn.Linear(d_model, dim_feedforward),
36 |             nn.ReLU(),
37 |             nn.Linear(dim_feedforward, dim_y*2)
38 |         )
39 | 
40 |     def forward(self, batch, reduce_ll=True):
41 |         out_encoder = self.encode(batch, autoreg=False, drop_ctx=True)
42 |         out = self.predictor(out_encoder)
43 |         mean, std = torch.chunk(out, 2, dim=-1)
44 | 
45 |         std = torch.exp(std)
46 |         pred_dist = Normal(mean, std)
47 |         loss = - pred_dist.log_prob(batch.y).sum(-1).mean()
48 |         
49 |         outs = AttrDict()
50 |         outs.loss = loss
51 |         return outs
52 | 
53 |     def predict(self, xc, yc, xt):
54 |         batch = AttrDict()
55 |         batch.xc = xc
56 |         batch.yc = yc
57 |         batch.xt = xt
58 |         batch.yt = torch.zeros((xt.shape[0], xt.shape[1], yc.shape[2]), device='cuda')
59 | 
60 |         num_context = xc.shape[1]
61 | 
62 |         out_encoder = self.encode(batch, autoreg=False, drop_ctx=False)
63 |         out = self.predictor(out_encoder)
64 |         mean, std = torch.chunk(out, 2, dim=-1)
65 |         std = torch.exp(std)
66 |         mean, std = mean[:, num_context:, :], std[:, num_context:, :]
67 | 
68 |         outs = AttrDict()
69 |         outs.loc = mean.unsqueeze(0)
70 |         outs.scale = std.unsqueeze(0)
71 |         outs.ys = Normal(outs.loc, outs.scale)
72 |         
73 |         return outs


--------------------------------------------------------------------------------
/contextual_bandits/models/canp.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from attrdict import AttrDict
 4 | 
 5 | from models.modules import CrossAttnEncoder, Decoder, PoolingEncoder
 6 | 
 7 | class CANP(nn.Module):
 8 |     def __init__(self,
 9 |             dim_x=1,
10 |             dim_y=1,
11 |             dim_hid=128,
12 |             enc_v_depth=4,
13 |             enc_qk_depth=2,
14 |             enc_pre_depth=4,
15 |             enc_post_depth=2,
16 |             dec_depth=3):
17 | 
18 |         super().__init__()
19 | 
20 |         self.enc1 = CrossAttnEncoder(
21 |                 dim_x=dim_x,
22 |                 dim_y=dim_y,
23 |                 dim_hid=dim_hid,
24 |                 v_depth=enc_v_depth,
25 |                 qk_depth=enc_qk_depth)
26 | 
27 |         self.enc2 = PoolingEncoder(
28 |                 dim_x=dim_x,
29 |                 dim_y=dim_y,
30 |                 dim_hid=dim_hid,
31 |                 self_attn=True,
32 |                 pre_depth=enc_pre_depth,
33 |                 post_depth=enc_post_depth)
34 | 
35 |         self.dec = Decoder(
36 |                 dim_x=dim_x,
37 |                 dim_y=dim_y,
38 |                 dim_enc=2*dim_hid,
39 |                 dim_hid=dim_hid,
40 |                 depth=dec_depth)
41 | 
42 |     def predict(self, xc, yc, xt, num_samples=None):
43 |         theta1 = self.enc1(xc, yc, xt)  # [B,Nt,Eh]
44 |         theta2 = self.enc2(xc, yc)  # [B,Eh]
45 |         encoded = torch.cat([theta1,
46 |             torch.stack([theta2]*xt.shape[-2], -2)], -1)  # [B,Nt,2Eh]
47 |         return self.dec(encoded, xt)
48 | 
49 |     def forward(self, batch, num_samples=None, reduce_ll=True):
50 |         outs = AttrDict()
51 |         py = self.predict(batch.xc, batch.yc, batch.x)
52 |         ll = py.log_prob(batch.y).sum(-1)
53 | 
54 |         if self.training:
55 |             outs.loss = - ll.mean()
56 |         else:
57 |             num_ctx = batch.xc.shape[-2]
58 |             if reduce_ll:
59 |                 outs.ctx_loss = ll[...,:num_ctx].mean()
60 |                 outs.tar_loss = ll[...,num_ctx:].mean()
61 |             else:
62 |                 outs.ctx_loss = ll[...,:num_ctx]
63 |                 outs.tar_loss = ll[...,num_ctx:]
64 | 
65 |         return outs
66 | 


--------------------------------------------------------------------------------
/regression/models/tnpd.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | from torch.distributions.normal import Normal
 5 | from attrdict import AttrDict
 6 | 
 7 | from models.tnp import TNP
 8 | 
 9 | 
10 | class TNPD(TNP):
11 |     def __init__(
12 |         self,
13 |         dim_x,
14 |         dim_y,
15 |         d_model,
16 |         emb_depth,
17 |         dim_feedforward,
18 |         nhead,
19 |         dropout,
20 |         num_layers,
21 |         bound_std=False
22 |     ):
23 |         super(TNPD, self).__init__(
24 |             dim_x,
25 |             dim_y,
26 |             d_model,
27 |             emb_depth,
28 |             dim_feedforward,
29 |             nhead,
30 |             dropout,
31 |             num_layers,
32 |             bound_std
33 |         )
34 |         
35 |         self.predictor = nn.Sequential(
36 |             nn.Linear(d_model, dim_feedforward),
37 |             nn.ReLU(),
38 |             nn.Linear(dim_feedforward, dim_y*2)
39 |         )
40 | 
41 |     def forward(self, batch, reduce_ll=True):
42 |         z_target = self.encode(batch, autoreg=False)
43 |         out = self.predictor(z_target)
44 |         mean, std = torch.chunk(out, 2, dim=-1)
45 |         if self.bound_std:
46 |             std = 0.05 + 0.95 * F.softplus(std)
47 |         else:
48 |             std = torch.exp(std)
49 | 
50 |         pred_tar = Normal(mean, std)
51 | 
52 |         outs = AttrDict()
53 |         if reduce_ll:
54 |             outs.tar_ll = pred_tar.log_prob(batch.yt).sum(-1).mean()
55 |         else:
56 |             outs.tar_ll = pred_tar.log_prob(batch.yt).sum(-1)
57 |         outs.loss = - (outs.tar_ll)
58 | 
59 |         return outs
60 | 
61 |     def predict(self, xc, yc, xt):
62 |         batch = AttrDict()
63 |         batch.xc = xc
64 |         batch.yc = yc
65 |         batch.xt = xt
66 |         batch.yt = torch.zeros((xt.shape[0], xt.shape[1], yc.shape[2]), device='cuda')
67 | 
68 |         z_target = self.encode(batch, autoreg=False)
69 |         out = self.predictor(z_target)
70 |         mean, std = torch.chunk(out, 2, dim=-1)
71 |         if self.bound_std:
72 |             std = 0.05 + 0.95 * F.softplus(std)
73 |         else:
74 |             std = torch.exp(std)
75 | 
76 |         return Normal(mean, std)


--------------------------------------------------------------------------------
/contextual_bandits/models/cnp.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | from attrdict import AttrDict
 5 | from models.modules import PoolingEncoder, Decoder
 6 | 
 7 | 
 8 | class CNP(nn.Module):
 9 |     def __init__(self,
10 |             dim_x=1,
11 |             dim_y=1,
12 |             dim_hid=128,
13 |             enc_pre_depth=4,
14 |             enc_post_depth=2,
15 |             dec_depth=3):
16 | 
17 |         super().__init__()
18 | 
19 |         self.enc1 = PoolingEncoder(
20 |                 dim_x=dim_x,
21 |                 dim_y=dim_y,
22 |                 dim_hid=dim_hid,
23 |                 pre_depth=enc_pre_depth,
24 |                 post_depth=enc_post_depth)
25 | 
26 |         self.enc2 = PoolingEncoder(
27 |                 dim_x=dim_x,
28 |                 dim_y=dim_y,
29 |                 dim_hid=dim_hid,
30 |                 pre_depth=enc_pre_depth,
31 |                 post_depth=enc_post_depth)
32 | 
33 |         self.dec = Decoder(
34 |                 dim_x=dim_x,
35 |                 dim_y=dim_y,
36 |                 dim_enc=2*dim_hid,
37 |                 dim_hid=dim_hid,
38 |                 depth=dec_depth)
39 | 
40 |     def predict(self, xc, yc, xt, num_samples=None):
41 |         encoded = torch.cat([self.enc1(xc, yc), self.enc2(xc, yc)], -1)  # [B,2Eh]
42 |         encoded = torch.stack([encoded]*xt.shape[-2], -2)  # [B,N,2Eh]
43 |         return self.dec(encoded, xt)  # Normal([B,N,1])
44 | 
45 |     def forward(self, batch, num_samples=None, reduce_ll=True):
46 |         outs = AttrDict()
47 |         py = self.predict(batch.xc, batch.yc, batch.x)  # Normal([B,N,1])
48 |         ll = py.log_prob(batch.y).sum(-1)  # [B,N]
49 | 
50 |         if self.training:
51 |             outs.loss = -ll.mean()
52 |         else:
53 |             num_ctx = batch.xc.shape[-2]  # Nc
54 |             if reduce_ll:
55 |                 outs.ctx_loss = ll[...,:num_ctx].mean()  # [1,]
56 |                 outs.tar_loss = ll[...,num_ctx:].mean()  # [1,]
57 |             else:
58 |                 outs.ctx_loss = ll[...,:num_ctx]  # [B,Nc]
59 |                 outs.tar_loss = ll[...,num_ctx:]  # [B,Nt]
60 | 
61 |         return outs
62 |         # {"loss": [1,]} while training
63 |         # {"ctx_ll": [1,], "tar_ll": [1,]} while evaluating (if reduce_ll = True)


--------------------------------------------------------------------------------
/bayesian_optimization/models/canp.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from attrdict import AttrDict
 4 | 
 5 | from models.modules import CrossAttnEncoder, Decoder, PoolingEncoder
 6 | 
 7 | class CANP(nn.Module):
 8 |     def __init__(self,
 9 |             dim_x=1,
10 |             dim_y=1,
11 |             dim_hid=128,
12 |             enc_v_depth=4,
13 |             enc_qk_depth=2,
14 |             enc_pre_depth=4,
15 |             enc_post_depth=2,
16 |             dec_depth=3):
17 | 
18 |         super().__init__()
19 | 
20 |         self.enc1 = CrossAttnEncoder(
21 |                 dim_x=dim_x,
22 |                 dim_y=dim_y,
23 |                 dim_hid=dim_hid,
24 |                 v_depth=enc_v_depth,
25 |                 qk_depth=enc_qk_depth)
26 | 
27 |         self.enc2 = PoolingEncoder(
28 |                 dim_x=dim_x,
29 |                 dim_y=dim_y,
30 |                 dim_hid=dim_hid,
31 |                 self_attn=True,
32 |                 pre_depth=enc_pre_depth,
33 |                 post_depth=enc_post_depth)
34 | 
35 |         self.dec = Decoder(
36 |                 dim_x=dim_x,
37 |                 dim_y=dim_y,
38 |                 dim_enc=2*dim_hid,
39 |                 dim_hid=dim_hid,
40 |                 depth=dec_depth)
41 | 
42 |     def predict(self, xc, yc, xt, num_samples=None):
43 |         if xc.shape[-3] != xt.shape[-3]:
44 |             xt = xt.transpose(-3, -2)
45 |         theta1 = self.enc1(xc, yc, xt)  # [B,Nt,Eh]
46 |         theta2 = self.enc2(xc, yc)  # [B,Eh]
47 |         encoded = torch.cat([theta1,
48 |             torch.stack([theta2]*xt.shape[-2], -2)], -1)  # [B,Nt,2Eh]
49 |         return self.dec(encoded, xt)
50 | 
51 |     def forward(self, batch, num_samples=None, reduce_ll=True):
52 |         outs = AttrDict()
53 |         py = self.predict(batch.xc, batch.yc, batch.x)
54 |         ll = py.log_prob(batch.y).sum(-1)
55 | 
56 |         if self.training:
57 |             outs.loss = - ll.mean()
58 |         else:
59 |             num_ctx = batch.xc.shape[-2]
60 |             if reduce_ll:
61 |                 outs.ctx_loss = ll[...,:num_ctx].mean()
62 |                 outs.tar_loss = ll[...,num_ctx:].mean()
63 |             else:
64 |                 outs.ctx_loss = ll[...,:num_ctx]
65 |                 outs.tar_loss = ll[...,num_ctx:]
66 | 
67 |         return outs
68 | 


--------------------------------------------------------------------------------
/bayesian_optimization/models/cnp.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | from attrdict import AttrDict
 5 | from models.modules import PoolingEncoder, Decoder
 6 | 
 7 | 
 8 | class CNP(nn.Module):
 9 |     def __init__(self,
10 |             dim_x=1,
11 |             dim_y=1,
12 |             dim_hid=128,
13 |             enc_pre_depth=4,
14 |             enc_post_depth=2,
15 |             dec_depth=3):
16 | 
17 |         super().__init__()
18 | 
19 |         self.enc1 = PoolingEncoder(
20 |                 dim_x=dim_x,
21 |                 dim_y=dim_y,
22 |                 dim_hid=dim_hid,
23 |                 pre_depth=enc_pre_depth,
24 |                 post_depth=enc_post_depth)
25 | 
26 |         self.enc2 = PoolingEncoder(
27 |                 dim_x=dim_x,
28 |                 dim_y=dim_y,
29 |                 dim_hid=dim_hid,
30 |                 pre_depth=enc_pre_depth,
31 |                 post_depth=enc_post_depth)
32 | 
33 |         self.dec = Decoder(
34 |                 dim_x=dim_x,
35 |                 dim_y=dim_y,
36 |                 dim_enc=2*dim_hid,
37 |                 dim_hid=dim_hid,
38 |                 depth=dec_depth)
39 | 
40 |     def predict(self, xc, yc, xt, num_samples=None):
41 |         if xc.shape[-3] != xt.shape[-3]:
42 |             xt = xt.transpose(-3, -2)
43 |         encoded = torch.cat([self.enc1(xc, yc), self.enc2(xc, yc)], -1)  # [B,2Eh]
44 |         encoded = torch.stack([encoded]*xt.shape[-2], -2)  # [B,N,2Eh]
45 |         return self.dec(encoded, xt)  # Normal([B,N,1])
46 | 
47 |     def forward(self, batch, num_samples=None, reduce_ll=True):
48 |         outs = AttrDict()
49 |         py = self.predict(batch.xc, batch.yc, batch.x)  # Normal([B,N,1])
50 |         ll = py.log_prob(batch.y).sum(-1)  # [B,N]
51 | 
52 |         if self.training:
53 |             outs.loss = -ll.mean()
54 |         else:
55 |             num_ctx = batch.xc.shape[-2]  # Nc
56 |             if reduce_ll:
57 |                 outs.ctx_loss = ll[...,:num_ctx].mean()  # [1,]
58 |                 outs.tar_loss = ll[...,num_ctx:].mean()  # [1,]
59 |             else:
60 |                 outs.ctx_loss = ll[...,:num_ctx]  # [B,Nc]
61 |                 outs.tar_loss = ll[...,num_ctx:]  # [B,Nt]
62 | 
63 |         return outs
64 |         # {"loss": [1,]} while training
65 |         # {"ctx_ll": [1,], "tar_ll": [1,]} while evaluating (if reduce_ll = True)


--------------------------------------------------------------------------------
/contextual_bandits/models/tnpa.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from torch.distributions.normal import Normal
 4 | from attrdict import AttrDict
 5 | 
 6 | from models.tnp import TNP
 7 | 
 8 | 
 9 | class TNPA(TNP):
10 |     def __init__(
11 |         self,
12 |         dim_x,
13 |         dim_y,
14 |         d_model,
15 |         emb_depth,
16 |         dim_feedforward,
17 |         nhead,
18 |         dropout,
19 |         num_layers,
20 |         drop_y=0.5,
21 |     ):
22 |         super(TNPA, self).__init__(
23 |             dim_x,
24 |             dim_y,
25 |             d_model,
26 |             emb_depth,
27 |             dim_feedforward,
28 |             nhead,
29 |             dropout,
30 |             num_layers,
31 |             drop_y,
32 |         )
33 |         
34 |         self.predictor = nn.Sequential(
35 |             nn.Linear(d_model, dim_feedforward),
36 |             nn.ReLU(),
37 |             nn.Linear(dim_feedforward, dim_y*2)
38 |         )
39 | 
40 | 
41 |     def forward(self, batch, reduce_ll=True):
42 |         num_ctx, num_all = batch.xc.shape[1], batch.x.shape[1]
43 | 
44 |         out_encoder = self.encode(batch, autoreg=True, drop_ctx=True)
45 |         out_encoder = torch.cat((out_encoder[:, :num_ctx], out_encoder[:, num_all:]), dim=1)
46 |         out = self.predictor(out_encoder)
47 |         mean, std = torch.chunk(out, 2, dim=-1)
48 |         std = torch.exp(std)
49 | 
50 |         pred_dist = Normal(mean, std)
51 |         loss = - pred_dist.log_prob(batch.y).sum(-1).mean()
52 |         
53 |         outs = AttrDict()
54 |         outs.loss = loss
55 |         return outs
56 | 
57 |     def predict(self, xc, yc, xt):
58 |         batch = AttrDict()
59 |         batch.xc = xc
60 |         batch.yc = yc
61 |         batch.xt = xt
62 |         batch.yt = torch.zeros((xt.shape[0], xt.shape[1], yc.shape[2]), device='cuda')
63 | 
64 |         num_context = xc.shape[1]
65 | 
66 |         # in evaluation tnpa = tnpd because we only have 1 target point to predict
67 |         out_encoder = self.encode(batch, autoreg=False, drop_ctx=False)
68 |         out = self.predictor(out_encoder)
69 |         mean, std = torch.chunk(out, 2, dim=-1)
70 |         std = torch.exp(std)
71 |         mean, std = mean[:, num_context:, :], std[:, num_context:, :]
72 | 
73 |         outs = AttrDict()
74 |         outs.loc = mean.unsqueeze(0)
75 |         outs.scale = std.unsqueeze(0)
76 |         outs.ys = Normal(outs.loc, outs.scale)
77 |         
78 |         return outs


--------------------------------------------------------------------------------
/regression/data/celeba.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import os.path as osp
 3 | 
 4 | from utils.paths import datasets_path
 5 | 
 6 | class CelebA(object):
 7 |     def __init__(self, train=True):
 8 |         self.data, self.targets = torch.load(
 9 |                 osp.join(datasets_path, 'celeba',
10 |                     'train.pt' if train else 'eval.pt'))
11 |         self.data = self.data.float() / 255.0
12 | 
13 |         if train:
14 |             self.data, self.targets = self.data, self.targets
15 |         else:
16 |             self.data, self.targets = self.data, self.targets
17 | 
18 |     def __len__(self):
19 |         return len(self.data)
20 | 
21 |     def __getitem__(self, index):
22 |         return self.data[index], self.targets[index]
23 | 
24 | if __name__ == '__main__':
25 |     import os
26 |     import os.path as osp
27 |     from PIL import Image
28 |     from tqdm import tqdm
29 |     import numpy as np
30 |     import torch
31 | 
32 |     # load train/val/test split
33 |     splitdict = {}
34 |     with open(osp.join(datasets_path, 'celeba', 'list_eval_partition.txt'), 'r') as f:
35 |         for line in f:
36 |             fn, split = line.split()
37 |             splitdict[fn] = int(split)
38 | 
39 |     # load identities
40 |     iddict = {}
41 |     with open(osp.join(datasets_path, 'celeba', 'identity_CelebA.txt'), 'r') as f:
42 |         for line in f:
43 |             fn, label = line.split()
44 |             iddict[fn] = int(label)
45 | 
46 |     train_imgs = []
47 |     train_labels = []
48 |     eval_imgs = []
49 |     eval_labels = []
50 |     path = osp.join(datasets_path, 'celeba', 'img_align_celeba')
51 |     imgfilenames = os.listdir(path)
52 |     for fn in tqdm(imgfilenames):
53 | 
54 |         img = Image.open(osp.join(path, fn)).resize((32, 32))
55 |         if splitdict[fn] == 2:
56 |             eval_imgs.append(torch.LongTensor(np.array(img).transpose(2, 0, 1)))
57 |             eval_labels.append(iddict[fn])
58 |         else:
59 |             train_imgs.append(torch.LongTensor(np.array(img).transpose(2, 0, 1)))
60 |             train_labels.append(iddict[fn])
61 | 
62 |     print(f'{len(train_imgs)} train, {len(eval_imgs)} eval')
63 | 
64 |     train_imgs = torch.stack(train_imgs)
65 |     train_labels = torch.LongTensor(train_labels)
66 |     torch.save([train_imgs, train_labels], osp.join(datasets_path, 'celeba', 'train.pt'))
67 | 
68 |     eval_imgs = torch.stack(eval_imgs)
69 |     eval_labels = torch.LongTensor(eval_labels)
70 |     torch.save([eval_imgs, eval_labels], osp.join(datasets_path, 'celeba', 'eval.pt'))
71 | 


--------------------------------------------------------------------------------
/regression/models/tnp.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | from models.modules import build_mlp
 5 | 
 6 | 
 7 | class TNP(nn.Module):
 8 |     def __init__(
 9 |         self,
10 |         dim_x,
11 |         dim_y,
12 |         d_model,
13 |         emb_depth,
14 |         dim_feedforward,
15 |         nhead,
16 |         dropout,
17 |         num_layers,
18 |         bound_std
19 |     ):
20 |         super(TNP, self).__init__()
21 | 
22 |         self.embedder = build_mlp(dim_x + dim_y, d_model, d_model, emb_depth)
23 | 
24 |         encoder_layer = nn.TransformerEncoderLayer(d_model, nhead, dim_feedforward, dropout, batch_first=True)
25 |         self.encoder = nn.TransformerEncoder(encoder_layer, num_layers)
26 | 
27 |         self.bound_std = bound_std
28 | 
29 |     def construct_input(self, batch, autoreg=False):
30 |         x_y_ctx = torch.cat((batch.xc, batch.yc), dim=-1)
31 |         x_0_tar = torch.cat((batch.xt, torch.zeros_like(batch.yt)), dim=-1)
32 |         if not autoreg:
33 |             inp = torch.cat((x_y_ctx, x_0_tar), dim=1)
34 |         else:
35 |             if self.training and self.bound_std:
36 |                 yt_noise = batch.yt + 0.05 * torch.randn_like(batch.yt) # add noise to the past to smooth the model
37 |                 x_y_tar = torch.cat((batch.xt, yt_noise), dim=-1)
38 |             else:
39 |                 x_y_tar = torch.cat((batch.xt, batch.yt), dim=-1)
40 |             inp = torch.cat((x_y_ctx, x_y_tar, x_0_tar), dim=1)
41 |         return inp
42 | 
43 |     def create_mask(self, batch, autoreg=False):
44 |         num_ctx = batch.xc.shape[1]
45 |         num_tar = batch.xt.shape[1]
46 |         num_all = num_ctx + num_tar
47 |         if not autoreg:
48 |             mask = torch.zeros(num_all, num_all, device='cuda').fill_(float('-inf'))
49 |             mask[:, :num_ctx] = 0.0
50 |         else:
51 |             mask = torch.zeros((num_all+num_tar, num_all+num_tar), device='cuda').fill_(float('-inf'))
52 |             mask[:, :num_ctx] = 0.0 # all points attend to context points
53 |             mask[num_ctx:num_all, num_ctx:num_all].triu_(diagonal=1) # each real target point attends to itself and precedding real target points
54 |             mask[num_all:, num_ctx:num_all].triu_(diagonal=0) # each fake target point attends to preceeding real target points
55 | 
56 |         return mask, num_tar
57 | 
58 |     def encode(self, batch, autoreg=False):
59 |         inp = self.construct_input(batch, autoreg)
60 |         mask, num_tar = self.create_mask(batch, autoreg)
61 |         embeddings = self.embedder(inp)
62 |         out = self.encoder(embeddings, mask=mask)
63 |         return out[:, -num_tar:]


--------------------------------------------------------------------------------
/contextual_bandits/utils/metrics.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from torch.distributions import Normal
 4 | from utils.misc import stack
 5 | 
 6 | 
 7 | def compute_nll(mu, sigma, y, ws=None, eps=1e-3, mask=None):
 8 |     if mask is None:
 9 |         mask = torch.ones(y.shape, dtype=torch.float32).to(mu.device)
10 |     Ns = mu.size(0)
11 |     sigma = sigma + eps
12 |     py = Normal(mu, sigma)  # [Ns,B,N,Dy]
13 |     if y.dim() < 4:
14 |         y = torch.stack([y] * Ns, 0)  # [Ns,B,N,Dy]
15 |     ll = (py.log_prob(y) * mask).sum(-1)  # [Ns,B,N]
16 | 
17 |     if ws is not None:
18 |         Nbs = ws.size(2)
19 |         ll = torch.stack([ll] * Nbs, 2)  # [Ns,B,Nbs,N]
20 |         ll = (ll * ws).mean(2)  # [Ns,B,N]
21 | 
22 |     return - ll  # [Ns,B,N]
23 | 
24 | 
25 | def compute_beta_nll(mu, sigma, y, ws=None, beta=0.5, eps=1e-3, mask=None):  # mu,sigma : [Ns,B,N,Dy], y: [B,N,Dy] ws: [Ns,B,Nbs,N]
26 |     Ns = mu.size(0)
27 |     sigma = sigma + eps
28 |     y = torch.stack([y] * Ns, dim=0)  # [Ns,B,N,Dy]
29 | 
30 |     if mask is None:
31 |         mask = torch.ones(y.shape, dtype=torch.float32).to(y.device)
32 |     ll_mu = - ((((y - mu) ** 2) / (2 * sigma ** 2)) * mask).sum(-1)  # [Ns,B,N]
33 |     ll_sigma = - (torch.log(sigma) * mask).sum(-1)  # [Ns,B,N]
34 | 
35 |     if ws is not None:  # [Ns,B,Nbs,N]
36 |         Nbs = ws.size(2)
37 |         _ll_mu = torch.stack([ll_mu] * Nbs, 2)  # [Ns,B,Nbs,N]
38 |         _ll_sigma = torch.stack([ll_sigma] * Nbs, 2)  # [Ns,B,Nbs,N]
39 |         _ll_mu = (_ll_mu * ws).mean(2)  # [Ns,B,N]
40 |         _ll_sigma = (_ll_sigma * ws).mean(2)  # [Ns,B,N]
41 |         ll = 2 * beta * _ll_mu + (2 - 2 * beta) * _ll_sigma  # [Ns,B,N]
42 |     else:
43 |         ll = 2 * beta * ll_mu + (2 - 2 * beta) * ll_sigma  # [Ns,B,N]
44 | 
45 |     return - ll, - ll_mu, - ll_sigma  # [Ns,B,N] all
46 | 
47 | 
48 | def compute_l2(y_hat, y, ws=None, mask=None):  # pred: [Ns,B,Nbs,N,Dy], y: [B,N,Dy]
49 |     Ns = y_hat.size(0)
50 |     Nbs = y_hat.size(2)
51 |     y = torch.stack([torch.stack([y] * Ns, dim=0)] * Nbs, dim=2)  # [Ns,B,Nbs,N,Dy]
52 | 
53 |     if mask is None:
54 |         mask = torch.ones(y.shape, dtype=torch.float32).to(y.device)
55 |     else:
56 |         mask = stack(mask, Nbs, 2)
57 |     l2 = (((y_hat - y) ** 2) * mask).sum(-1).mean(2)  # [Ns,B,N]
58 |     return l2  # [Ns,B,N]
59 | 
60 | 
61 | def compute_rmse(mean, y, mask=None):  # mean: [Ns,B,N,Dy], y: [B,N,Dy]
62 |     if mean.dim() == 4:
63 |         Ns = mean.size(0)
64 |         y = torch.stack([y] * Ns, dim=0)  # [Ns,B,N,Dy]
65 |         if mask is None:
66 |             mask = torch.ones(y.shape, dtype=torch.float32).to(mean.device)
67 |         rmse = ((((mean - y) ** 2) * mask).sum(-1).mean(-1) ** 0.5).mean()
68 |     elif mean.dim() == 3:  # CNP, CANP
69 |         if mask is None:
70 |             mask = torch.ones(y.shape, dtype=torch.float32).to(mean.device)
71 |         rmse = ((((mean - y) ** 2) * mask).sum(-1).mean(-1) ** 0.5).mean()
72 |     return rmse
73 | 


--------------------------------------------------------------------------------
/regression/data/image.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from attrdict import AttrDict
 3 | from torch.distributions import StudentT
 4 | 
 5 | def img_to_task(img, num_ctx=None,
 6 |         max_num_points=None, target_all=False, t_noise=None):
 7 | 
 8 |     B, C, H, W = img.shape
 9 |     num_pixels = H*W
10 |     img = img.view(B, C, -1)
11 | 
12 |     if t_noise is not None:
13 |         if t_noise == -1:
14 |             t_noise = 0.09 * torch.rand(img.shape)
15 |         img += t_noise * StudentT(2.1).rsample(img.shape)
16 | 
17 |     batch = AttrDict()
18 |     max_num_points = max_num_points or num_pixels
19 |     num_ctx = num_ctx or \
20 |             torch.randint(low=3, high=max_num_points-3, size=[1]).item()
21 |     num_tar = max_num_points - num_ctx if target_all else \
22 |             torch.randint(low=3, high=max_num_points-num_ctx, size=[1]).item()
23 |     num_points = num_ctx + num_tar
24 |     idxs = torch.cuda.FloatTensor(B, num_pixels).uniform_().argsort(-1)[...,:num_points].to(img.device)
25 |     x1, x2 = idxs//W, idxs%W
26 |     batch.x = torch.stack([
27 |         2*x1.float()/(H-1) - 1,
28 |         2*x2.float()/(W-1) - 1], -1).to(img.device)
29 |     batch.y = (torch.gather(img, -1, idxs.unsqueeze(-2).repeat(1, C, 1))\
30 |             .transpose(-2, -1) - 0.5).to(img.device)
31 | 
32 |     batch.xc = batch.x[:,:num_ctx]
33 |     batch.xt = batch.x[:,num_ctx:]
34 |     batch.yc = batch.y[:,:num_ctx]
35 |     batch.yt = batch.y[:,num_ctx:]
36 | 
37 |     return batch
38 | 
39 | def coord_to_img(x, y, shape):
40 |     x = x.cpu()
41 |     y = y.cpu()
42 |     B = x.shape[0]
43 |     C, H, W = shape
44 | 
45 |     I = torch.zeros(B, 3, H, W)
46 |     I[:,0,:,:] = 0.61
47 |     I[:,1,:,:] = 0.55
48 |     I[:,2,:,:] = 0.71
49 | 
50 |     x1, x2 = x[...,0], x[...,1]
51 |     x1 = ((x1+1)*(H-1)/2).round().long()
52 |     x2 = ((x2+1)*(W-1)/2).round().long()
53 |     for b in range(B):
54 |         for c in range(3):
55 |             I[b,c,x1[b],x2[b]] = y[b,:,min(c,C-1)]
56 | 
57 |     return I
58 | 
59 | def task_to_img(xc, yc, xt, yt, shape):
60 |     xc = xc.cpu()
61 |     yc = yc.cpu()
62 |     xt = xt.cpu()
63 |     yt = yt.cpu()
64 | 
65 |     B = xc.shape[0]
66 |     C, H, W = shape
67 | 
68 |     xc1, xc2 = xc[...,0], xc[...,1]
69 |     xc1 = ((xc1+1)*(H-1)/2).round().long()
70 |     xc2 = ((xc2+1)*(W-1)/2).round().long()
71 | 
72 |     xt1, xt2 = xt[...,0], xt[...,1]
73 |     xt1 = ((xt1+1)*(H-1)/2).round().long()
74 |     xt2 = ((xt2+1)*(W-1)/2).round().long()
75 | 
76 |     task_img = torch.zeros(B, 3, H, W).to(xc.device)
77 |     task_img[:,2,:,:] = 1.0
78 |     task_img[:,1,:,:] = 0.4
79 |     for b in range(B):
80 |         for c in range(3):
81 |             task_img[b,c,xc1[b],xc2[b]] = yc[b,:,min(c,C-1)] + 0.5
82 |     task_img = task_img.clamp(0, 1)
83 | 
84 |     completed_img = task_img.clone()
85 |     for b in range(B):
86 |         for c in range(3):
87 |             completed_img[b,c,xt1[b],xt2[b]] = yt[b,:,min(c,C-1)] + 0.5
88 |     completed_img = completed_img.clamp(0, 1)
89 | 
90 |     return task_img, completed_img
91 | 


--------------------------------------------------------------------------------
/regression/models/bnp.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from attrdict import AttrDict
 4 | 
 5 | from models.cnp import CNP
 6 | from utils.misc import stack, logmeanexp
 7 | from utils.sampling import sample_with_replacement as SWR, sample_subset
 8 | 
 9 | class BNP(CNP):
10 |     def __init__(self, *args, **kwargs):
11 |         super().__init__(*args, **kwargs)
12 |         self.dec.add_ctx(2*kwargs['dim_hid'])
13 | 
14 |     def encode(self, xc, yc, xt, mask=None):
15 |         encoded = torch.cat([
16 |             self.enc1(xc, yc, mask=mask),
17 |             self.enc2(xc, yc, mask=mask)], -1)
18 |         return stack(encoded, xt.shape[-2], -2)
19 | 
20 |     def predict(self, xc, yc, xt, num_samples=None, return_base=False):
21 |         with torch.no_grad():
22 |             bxc, byc = SWR(xc, yc, num_samples=num_samples)
23 |             sxc, syc = stack(xc, num_samples), stack(yc, num_samples)
24 | 
25 |             encoded = self.encode(bxc, byc, sxc)
26 |             py_res = self.dec(encoded, sxc)
27 | 
28 |             mu, sigma = py_res.mean, py_res.scale
29 |             res = SWR((syc - mu)/sigma).detach()
30 |             res = (res - res.mean(-2, keepdim=True))
31 | 
32 |             bxc = sxc
33 |             byc = mu + sigma * res
34 | 
35 |         encoded_base = self.encode(xc, yc, xt)
36 | 
37 |         sxt = stack(xt, num_samples)
38 |         encoded_bs = self.encode(bxc, byc, sxt)
39 | 
40 |         py = self.dec(stack(encoded_base, num_samples),
41 |                 sxt, ctx=encoded_bs)
42 | 
43 |         if self.training or return_base:
44 |             py_base = self.dec(encoded_base, xt)
45 |             return py_base, py
46 |         else:
47 |             return py
48 | 
49 |     def sample(self, xc, yc, xt, num_samples=None):
50 |         pred_dist = self.predict(xc, yc, xt, z, num_samples, return_base=False)
51 |         return pred_dist.loc
52 | 
53 |     def forward(self, batch, num_samples=None, reduce_ll=True):
54 |         outs = AttrDict()
55 | 
56 |         def compute_ll(py, y):
57 |             ll = py.log_prob(y).sum(-1)
58 |             if ll.dim() == 3 and reduce_ll:
59 |                 ll = logmeanexp(ll)
60 |             return ll
61 | 
62 |         if self.training:
63 |             py_base, py = self.predict(batch.xc, batch.yc, batch.x,
64 |                     num_samples=num_samples)
65 | 
66 |             outs.ll_base = compute_ll(py_base, batch.y).mean()
67 |             outs.ll = compute_ll(py, batch.y).mean()
68 |             outs.loss = -outs.ll_base - outs.ll
69 |         else:
70 |             py = self.predict(batch.xc, batch.yc, batch.x,
71 |                     num_samples=num_samples)
72 |             ll = compute_ll(py, batch.y)
73 |             num_ctx = batch.xc.shape[-2]
74 |             if reduce_ll:
75 |                 outs.ctx_ll = ll[...,:num_ctx].mean()
76 |                 outs.tar_ll = ll[...,num_ctx:].mean()
77 |             else:
78 |                 outs.ctx_ll = ll[...,:num_ctx]
79 |                 outs.tar_ll = ll[...,num_ctx:]
80 | 
81 |         return outs
82 | 


--------------------------------------------------------------------------------
/regression/models/banp.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from attrdict import AttrDict
 4 | 
 5 | from models.canp import CANP
 6 | from utils.misc import stack, logmeanexp
 7 | from utils.sampling import sample_with_replacement as SWR, sample_subset
 8 | 
 9 | class BANP(CANP):
10 |     def __init__(self, *args, **kwargs):
11 |         super().__init__(*args, **kwargs)
12 |         self.dec.add_ctx(2*kwargs['dim_hid'])
13 | 
14 |     def encode(self, xc, yc, xt, mask=None):
15 |         theta1 = self.enc1(xc, yc, xt)
16 |         theta2 = self.enc2(xc, yc)
17 |         encoded = torch.cat([theta1,
18 |             torch.stack([theta2]*xt.shape[-2], -2)], -1)
19 |         return encoded
20 | 
21 |     def predict(self, xc, yc, xt, num_samples=None, return_base=False):
22 |         with torch.no_grad():
23 |             bxc, byc = SWR(xc, yc, num_samples=num_samples)
24 |             sxc, syc = stack(xc, num_samples), stack(yc, num_samples)
25 | 
26 |             encoded = self.encode(bxc, byc, sxc)
27 |             py_res = self.dec(encoded, sxc)
28 | 
29 |             mu, sigma = py_res.mean, py_res.scale
30 |             res = SWR((syc - mu)/sigma).detach()
31 |             res = (res - res.mean(-2, keepdim=True))
32 | 
33 |             bxc = sxc
34 |             byc = mu + sigma * res
35 | 
36 |         encoded_base = self.encode(xc, yc, xt)
37 | 
38 |         sxt = stack(xt, num_samples)
39 |         encoded_bs = self.encode(bxc, byc, sxt)
40 | 
41 |         py = self.dec(stack(encoded_base, num_samples),
42 |                 sxt, ctx=encoded_bs)
43 | 
44 |         if self.training or return_base:
45 |             py_base = self.dec(encoded_base, xt)
46 |             return py_base, py
47 |         else:
48 |             return py
49 | 
50 |     def sample(self, xc, yc, xt, num_samples=None):
51 |         pred_dist = self.predict(xc, yc, xt, z, num_samples, return_base=False)
52 |         return pred_dist.loc
53 | 
54 |     def forward(self, batch, num_samples=None, reduce_ll=True):
55 |         outs = AttrDict()
56 | 
57 |         def compute_ll(py, y):
58 |             ll = py.log_prob(y).sum(-1)
59 |             if ll.dim() == 3 and reduce_ll:
60 |                 ll = logmeanexp(ll)
61 |             return ll
62 | 
63 |         if self.training:
64 |             py_base, py = self.predict(batch.xc, batch.yc, batch.x,
65 |                     num_samples=num_samples)
66 | 
67 |             outs.ll_base = compute_ll(py_base, batch.y).mean()
68 |             outs.ll = compute_ll(py, batch.y).mean()
69 |             outs.loss = -outs.ll_base - outs.ll
70 |         else:
71 |             py = self.predict(batch.xc, batch.yc, batch.x,
72 |                     num_samples=num_samples)
73 |             ll = compute_ll(py, batch.y)
74 |             num_ctx = batch.xc.shape[-2]
75 |             if reduce_ll:
76 |                 outs.ctx_ll = ll[...,:num_ctx].mean()
77 |                 outs.tar_ll = ll[...,num_ctx:].mean()
78 |             else:
79 |                 outs.ctx_ll = ll[...,:num_ctx]
80 |                 outs.tar_ll = ll[...,num_ctx:]
81 | 
82 |         return outs
83 | 


--------------------------------------------------------------------------------
/contextual_bandits/models/banp.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from attrdict import AttrDict
 4 | 
 5 | from models.canp import CANP
 6 | from utils.misc import stack, logmeanexp
 7 | from utils.sampling import sample_with_replacement as SWR, sample_subset
 8 | 
 9 | 
10 | class BANP(CANP):
11 |     def __init__(self, *args, **kwargs):
12 |         super().__init__(*args, **kwargs)
13 |         self.dec.add_ctx(2 * kwargs['dim_hid'])
14 | 
15 |     def encode(self, xc, yc, xt, mask=None):
16 |         theta1 = self.enc1(xc, yc, xt)
17 |         theta2 = self.enc2(xc, yc)
18 |         encoded = torch.cat([theta1,
19 |                              torch.stack([theta2] * xt.shape[-2], -2)], -1)
20 |         return encoded
21 | 
22 |     def predict(self, xc, yc, xt, num_samples=None, return_base=False):
23 |         # botorch 사용하기 위해 추가된 statement
24 |         if xc.shape[-3] != xt.shape[-3]:
25 |             xt = xt.transpose(-3, -2)
26 | 
27 |         with torch.no_grad():
28 |             bxc, byc = SWR(xc, yc, num_samples=num_samples)
29 |             sxc, syc = stack(xc, num_samples), stack(yc, num_samples)
30 | 
31 |             encoded = self.encode(bxc, byc, sxc)
32 |             py_res = self.dec(encoded, sxc)
33 | 
34 |             mu, sigma = py_res.mean, py_res.scale
35 |             res = SWR((syc - mu) / sigma).detach()
36 |             res = (res - res.mean(-2, keepdim=True))
37 | 
38 |             bxc = sxc
39 |             byc = mu + sigma * res
40 | 
41 |             del sxc, mu, sigma, res
42 | 
43 |         encoded_base = self.encode(xc, yc, xt)
44 |         del xc, yc
45 | 
46 |         sxt = stack(xt, num_samples)
47 |         encoded_bs = self.encode(bxc, byc, sxt)
48 |         del bxc, byc
49 | 
50 |         py = self.dec(stack(encoded_base, num_samples),
51 |                       sxt, ctx=encoded_bs)
52 |         del sxt, encoded_bs
53 | 
54 |         if self.training or return_base:
55 |             py_base = self.dec(encoded_base, xt)
56 |             return py_base, py
57 |         else:
58 |             return py
59 | 
60 |     def forward(self, batch, num_samples=None, reduce_ll=True):
61 |         outs = AttrDict()
62 | 
63 |         def compute_ll(py, y):
64 |             ll = py.log_prob(y).sum(-1)
65 |             if ll.dim() == 3 and reduce_ll:
66 |                 ll = logmeanexp(ll)
67 |             return ll
68 | 
69 |         if self.training:
70 |             py_base, py = self.predict(batch.xc, batch.yc, batch.x,
71 |                                        num_samples=num_samples)
72 | 
73 |             outs.ll_base = compute_ll(py_base, batch.y).mean()
74 |             outs.ll = compute_ll(py, batch.y).mean()
75 |             outs.loss = - outs.ll_base - outs.ll
76 |         else:
77 |             py = self.predict(batch.xc, batch.yc, batch.x,
78 |                               num_samples=num_samples)
79 |             ll = compute_ll(py, batch.y)
80 |             num_ctx = batch.xc.shape[-2]
81 |             if reduce_ll:
82 |                 outs.ctx_loss = ll[..., :num_ctx].mean()
83 |                 outs.tar_loss = ll[..., num_ctx:].mean()
84 |             else:
85 |                 outs.ctx_loss = ll[..., :num_ctx]
86 |                 outs.tar_loss = ll[..., num_ctx:]
87 | 
88 |         return outs
89 | 


--------------------------------------------------------------------------------
/bayesian_optimization/models/banp.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from attrdict import AttrDict
 4 | 
 5 | from models.canp import CANP
 6 | from utils.misc import stack, logmeanexp
 7 | from utils.sampling import sample_with_replacement as SWR, sample_subset
 8 | 
 9 | 
10 | class BANP(CANP):
11 |     def __init__(self, *args, **kwargs):
12 |         super().__init__(*args, **kwargs)
13 |         self.dec.add_ctx(2 * kwargs['dim_hid'])
14 | 
15 |     def encode(self, xc, yc, xt, mask=None):
16 |         theta1 = self.enc1(xc, yc, xt)
17 |         theta2 = self.enc2(xc, yc)
18 |         encoded = torch.cat([theta1,
19 |                              torch.stack([theta2] * xt.shape[-2], -2)], -1)
20 |         return encoded
21 | 
22 |     def predict(self, xc, yc, xt, num_samples=None, return_base=False):
23 |         # botorch 사용하기 위해 추가된 statement
24 |         if xc.shape[-3] != xt.shape[-3]:
25 |             xt = xt.transpose(-3, -2)
26 | 
27 |         with torch.no_grad():
28 |             bxc, byc = SWR(xc, yc, num_samples=num_samples)
29 |             sxc, syc = stack(xc, num_samples), stack(yc, num_samples)
30 | 
31 |             encoded = self.encode(bxc, byc, sxc)
32 |             py_res = self.dec(encoded, sxc)
33 | 
34 |             mu, sigma = py_res.mean, py_res.scale
35 |             res = SWR((syc - mu) / sigma).detach()
36 |             res = (res - res.mean(-2, keepdim=True))
37 | 
38 |             bxc = sxc
39 |             byc = mu + sigma * res
40 | 
41 |             del sxc, mu, sigma, res
42 | 
43 |         encoded_base = self.encode(xc, yc, xt)
44 |         del xc, yc
45 | 
46 |         sxt = stack(xt, num_samples)
47 |         encoded_bs = self.encode(bxc, byc, sxt)
48 |         del bxc, byc
49 | 
50 |         py = self.dec(stack(encoded_base, num_samples),
51 |                       sxt, ctx=encoded_bs)
52 |         del sxt, encoded_bs
53 | 
54 |         if self.training or return_base:
55 |             py_base = self.dec(encoded_base, xt)
56 |             return py_base, py
57 |         else:
58 |             return py
59 | 
60 |     def forward(self, batch, num_samples=None, reduce_ll=True):
61 |         outs = AttrDict()
62 | 
63 |         def compute_ll(py, y):
64 |             ll = py.log_prob(y).sum(-1)
65 |             if ll.dim() == 3 and reduce_ll:
66 |                 ll = logmeanexp(ll)
67 |             return ll
68 | 
69 |         if self.training:
70 |             py_base, py = self.predict(batch.xc, batch.yc, batch.x,
71 |                                        num_samples=num_samples)
72 | 
73 |             outs.ll_base = compute_ll(py_base, batch.y).mean()
74 |             outs.ll = compute_ll(py, batch.y).mean()
75 |             outs.loss = - outs.ll_base - outs.ll
76 |         else:
77 |             py = self.predict(batch.xc, batch.yc, batch.x,
78 |                               num_samples=num_samples)
79 |             ll = compute_ll(py, batch.y)
80 |             num_ctx = batch.xc.shape[-2]
81 |             if reduce_ll:
82 |                 outs.ctx_loss = ll[..., :num_ctx].mean()
83 |                 outs.tar_loss = ll[..., num_ctx:].mean()
84 |             else:
85 |                 outs.ctx_loss = ll[..., :num_ctx]
86 |                 outs.tar_loss = ll[..., num_ctx:]
87 | 
88 |         return outs
89 | 


--------------------------------------------------------------------------------
/bayesian_optimization/models/bnp.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from attrdict import AttrDict
 4 | 
 5 | from models.cnp import CNP
 6 | from utils.misc import stack, logmeanexp
 7 | from utils.sampling import sample_with_replacement as SWR, sample_subset
 8 | 
 9 | 
10 | class BNP(CNP):
11 |     def __init__(self, *args, **kwargs):
12 |         super(BNP, self).__init__(*args, **kwargs)
13 |         self.dec.add_ctx(2 * kwargs['dim_hid'])
14 | 
15 |     def encode(self, xc, yc, xt, mask=None):
16 |         encoded = torch.cat([
17 |             self.enc1(xc, yc, mask=mask),
18 |             self.enc2(xc, yc, mask=mask)], -1)
19 | 
20 |         return stack(encoded, num_samples=xt.shape[-2], dim=-2)
21 | 
22 |     def predict(self, xc, yc, xt, num_samples=None, return_base=False, get_bootstrap=False):
23 |         # botorch 사용하기 위해 추가된 statement
24 |         if xc.shape[-3] != xt.shape[-3]:
25 |             xt = xt.transpose(-3, -2)
26 | 
27 |         with torch.no_grad():
28 |             bxc, byc = SWR(xc, yc, num_samples=num_samples)
29 |             sxc, syc = stack(xc, num_samples=num_samples), stack(yc, num_samples=num_samples)
30 | 
31 |             encoded = self.encode(bxc, byc, sxc)
32 |             py_res = self.dec(encoded, sxc)
33 | 
34 |             mu, sigma = py_res.mean, py_res.scale
35 |             res = SWR((syc - mu)/sigma).detach()
36 |             res = (res - res.mean(-2, keepdim=True))
37 | 
38 |             bxc = sxc
39 |             byc = mu + sigma * res
40 | 
41 |         if get_bootstrap:
42 |             return bxc, byc
43 | 
44 |         encoded_base = self.encode(xc, yc, xt)
45 | 
46 |         sxt = stack(xt, num_samples=num_samples)
47 |         encoded_bs = self.encode(bxc, byc, sxt)
48 | 
49 |         py = self.dec(stack(encoded_base, num_samples),
50 |                       sxt, ctx=encoded_bs)
51 | 
52 |         if self.training or return_base:
53 |             py_base = self.dec(encoded_base, xt)
54 |             return py_base, py
55 |         else:
56 |             return py
57 | 
58 |     def forward(self, batch, num_samples=None, reduce_ll=True):
59 |         outs = AttrDict()
60 | 
61 |         def compute_ll(dist, y):
62 |             loglikelihood = dist.log_prob(y).sum(-1)
63 |             if loglikelihood.dim() == 3 and reduce_ll:
64 |                 loglikelihood = logmeanexp(loglikelihood)
65 |             return loglikelihood
66 | 
67 |         if self.training:
68 |             py_base, py = self.predict(batch.xc, batch.yc, batch.x,
69 |                                        num_samples=num_samples)
70 | 
71 |             outs.ll_base = compute_ll(py_base, batch.y).mean()
72 |             outs.ll = compute_ll(py, batch.y).mean()
73 |             outs.loss = - outs.ll_base - outs.ll
74 |         else:
75 |             py = self.predict(batch.xc, batch.yc, batch.x,
76 |                               num_samples=num_samples)
77 |             ll = compute_ll(py, batch.y)
78 |             num_ctx = batch.xc.shape[-2]
79 |             if reduce_ll:
80 |                 outs.ctx_loss = ll[...,:num_ctx].mean()
81 |                 outs.tar_loss = ll[...,num_ctx:].mean()
82 |             else:
83 |                 outs.ctx_loss = ll[...,:num_ctx]
84 |                 outs.tar_loss = ll[...,num_ctx:]
85 | 
86 |         return outs
87 | 


--------------------------------------------------------------------------------
/contextual_bandits/models/bnp.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from attrdict import AttrDict
 4 | 
 5 | from models.cnp import CNP
 6 | from utils.misc import stack, logmeanexp
 7 | from utils.sampling import sample_with_replacement as SWR, sample_subset
 8 | 
 9 | 
10 | class BNP(CNP):
11 |     def __init__(self, *args, **kwargs):
12 |         super(BNP, self).__init__(*args, **kwargs)
13 |         self.dec.add_ctx(2 * kwargs['dim_hid'])
14 | 
15 |     def encode(self, xc, yc, xt, mask=None):
16 |         encoded = torch.cat([
17 |             self.enc1(xc, yc, mask=mask),
18 |             self.enc2(xc, yc, mask=mask)], -1)
19 | 
20 |         return stack(encoded, num_samples=xt.shape[-2], dim=-2)
21 | 
22 |     def predict(self, xc, yc, xt, num_samples=None, return_base=False, get_bootstrap=False):
23 |         # botorch 사용하기 위해 추가된 statement
24 |         if xc.shape[-3] != xt.shape[-3]:
25 |             xt = xt.transpose(-3, -2)
26 | 
27 |         with torch.no_grad():
28 |             bxc, byc = SWR(xc, yc, num_samples=num_samples)
29 |             sxc, syc = stack(xc, num_samples=num_samples), stack(yc, num_samples=num_samples)
30 | 
31 |             encoded = self.encode(bxc, byc, sxc)
32 |             py_res = self.dec(encoded, sxc)
33 | 
34 |             mu, sigma = py_res.mean, py_res.scale
35 |             res = SWR((syc - mu)/sigma).detach()
36 |             res = (res - res.mean(-2, keepdim=True))
37 | 
38 |             bxc = sxc
39 |             byc = mu + sigma * res
40 | 
41 |         if get_bootstrap:
42 |             return bxc, byc
43 | 
44 |         encoded_base = self.encode(xc, yc, xt)
45 | 
46 |         sxt = stack(xt, num_samples=num_samples)
47 |         encoded_bs = self.encode(bxc, byc, sxt)
48 | 
49 |         py = self.dec(stack(encoded_base, num_samples),
50 |                       sxt, ctx=encoded_bs)
51 | 
52 |         if self.training or return_base:
53 |             py_base = self.dec(encoded_base, xt)
54 |             return py_base, py
55 |         else:
56 |             return py
57 | 
58 |     def forward(self, batch, num_samples=None, reduce_ll=True):
59 |         outs = AttrDict()
60 | 
61 |         def compute_ll(dist, y):
62 |             loglikelihood = dist.log_prob(y).sum(-1)
63 |             if loglikelihood.dim() == 3 and reduce_ll:
64 |                 loglikelihood = logmeanexp(loglikelihood)
65 |             return loglikelihood
66 | 
67 |         if self.training:
68 |             py_base, py = self.predict(batch.xc, batch.yc, batch.x,
69 |                                        num_samples=num_samples)
70 | 
71 |             outs.ll_base = compute_ll(py_base, batch.y).mean()
72 |             outs.ll = compute_ll(py, batch.y).mean()
73 |             outs.loss = - outs.ll_base - outs.ll
74 |         else:
75 |             py = self.predict(batch.xc, batch.yc, batch.x,
76 |                               num_samples=num_samples)
77 |             ll = compute_ll(py, batch.y)
78 |             num_ctx = batch.xc.shape[-2]
79 |             if reduce_ll:
80 |                 outs.ctx_loss = ll[...,:num_ctx].mean()
81 |                 outs.tar_loss = ll[...,num_ctx:].mean()
82 |             else:
83 |                 outs.ctx_loss = ll[...,:num_ctx]
84 |                 outs.tar_loss = ll[...,num_ctx:]
85 | 
86 |         return outs
87 | 


--------------------------------------------------------------------------------
/bayesian_optimization/models/tnp.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | import numpy as np
 5 | from torch.distributions.normal import Normal
 6 | from attrdict import AttrDict
 7 | 
 8 | from models.modules import build_mlp
 9 | 
10 | 
11 | class TNP(nn.Module):
12 |     def __init__(
13 |         self,
14 |         dim_x,
15 |         dim_y,
16 |         d_model,
17 |         emb_depth,
18 |         dim_feedforward,
19 |         nhead,
20 |         dropout,
21 |         num_layers,
22 |     ):
23 |         super(TNP, self).__init__()
24 | 
25 |         self.embedder = build_mlp(dim_x + dim_y, d_model, d_model, emb_depth)
26 | 
27 |         encoder_layer = nn.TransformerEncoderLayer(d_model, nhead, dim_feedforward, dropout, batch_first=True)
28 |         self.encoder = nn.TransformerEncoder(encoder_layer, num_layers)
29 | 
30 |     def construct_input(self, batch, autoreg=False):
31 |         x_y_ctx = torch.cat((batch.xc, batch.yc), dim=-1)
32 |         x_0_tar = torch.cat((batch.xt, torch.zeros_like(batch.yt)), dim=-1)
33 |         if not autoreg:
34 |             inp = torch.cat((x_y_ctx, x_0_tar), dim=1)
35 |         else:
36 |             x_y_tar = torch.cat((batch.xt, batch.yt), dim=-1)
37 |             inp = torch.cat((x_y_ctx, x_y_tar, x_0_tar), dim=1)
38 |         return inp
39 | 
40 |     def create_mask(self, batch, autoreg=False):
41 |         num_ctx = batch.xc.shape[1]
42 |         num_tar = batch.xt.shape[1]
43 |         num_all = num_ctx + num_tar
44 |         if not autoreg:
45 |             mask = torch.zeros(num_all, num_all, device='cuda')
46 |             mask[:, num_ctx:] = float('-inf')
47 |         else:
48 |             mask = torch.zeros((num_all+num_tar, num_all+num_tar), device='cuda').fill_(float('-inf'))
49 |             mask[:, :num_ctx] = 0.0 # all points attend to context points
50 |             mask[num_ctx:num_all, num_ctx:num_all].triu_(diagonal=1) # each real target point attends to itself and precedding real target points
51 |             mask[num_all:, num_ctx:num_all].triu_(diagonal=0) # each fake target point attends to preceeding real target points
52 | 
53 |         return mask, num_tar
54 | 
55 |     def construct_input_pretrain(self, batch):
56 |         x_y = torch.cat((batch.x, batch.y), dim=-1)
57 |         x_0 = torch.cat((batch.x, torch.zeros_like(batch.y)), dim=-1)[:, 1:]
58 |         inp = torch.cat((x_y, x_0), dim=1)
59 |         return inp
60 | 
61 |     def create_mask_pretrain(self, batch):
62 |         num_points = batch.x.shape[1]
63 | 
64 |         mask = torch.zeros((2*num_points-1, 2*num_points-1), device='cuda').fill_(float('-inf'))
65 |         mask[:num_points, :num_points].triu_(diagonal=1)
66 |         mask[num_points:, 1:num_points].triu_(diagonal=0)
67 |         mask[num_points:, 0] = 0.0
68 | 
69 |         return mask, num_points-1
70 | 
71 |     def encode(self, batch, autoreg=False, pretrain=False):
72 |         if not pretrain:
73 |             inp = self.construct_input(batch, autoreg)
74 |             mask, num_tar = self.create_mask(batch, autoreg)
75 |         else:
76 |             inp = self.construct_input_pretrain(batch)
77 |             mask, num_tar = self.create_mask_pretrain(batch)
78 |         embeddings = self.embedder(inp)
79 |         out = self.encoder(embeddings, mask=mask)
80 |         return out[:, -num_tar:]


--------------------------------------------------------------------------------
/contextual_bandits/models/tnp.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | from models.modules import build_mlp
 5 | 
 6 | 
 7 | class TNP(nn.Module):
 8 |     def __init__(
 9 |         self,
10 |         dim_x,
11 |         dim_y,
12 |         d_model,
13 |         emb_depth,
14 |         dim_feedforward,
15 |         nhead,
16 |         dropout,
17 |         num_layers,
18 |         drop_y=0.5
19 |     ):
20 |         super(TNP, self).__init__()
21 | 
22 |         self.drop_y = drop_y
23 |         self.embedder = build_mlp(dim_x + dim_y, d_model, d_model, emb_depth)
24 | 
25 |         encoder_layer = nn.TransformerEncoderLayer(d_model, nhead, dim_feedforward, dropout, batch_first=True)
26 |         self.encoder = nn.TransformerEncoder(encoder_layer, num_layers)
27 | 
28 |     def drop(self, y):
29 |         y_dropped = torch.randn_like(y)
30 |         not_drop_ids = torch.rand_like(y) > self.drop_y
31 |         y_dropped[not_drop_ids] = y[not_drop_ids]
32 |         return y_dropped
33 | 
34 |     def construct_input(self, batch, autoreg=False, drop_ctx=False):
35 |         if drop_ctx:
36 |             yc_dropped = self.drop(batch.yc)
37 |             x_y_ctx = torch.cat((batch.xc, yc_dropped), dim=-1)
38 |         else:
39 |             x_y_ctx = torch.cat((batch.xc, batch.yc), dim=-1)
40 |         x_0_tar = torch.cat((batch.xt, torch.zeros_like(batch.yt)), dim=-1)
41 |         if not autoreg:
42 |             inp = torch.cat((x_y_ctx, x_0_tar), dim=1)
43 |         else:
44 |             x_y_tar = torch.cat((batch.xt, batch.yt), dim=-1)
45 |             inp = torch.cat((x_y_ctx, x_y_tar, x_0_tar), dim=1)
46 |         return inp
47 | 
48 |     def create_mask(self, batch, autoreg=False):
49 |         num_ctx = batch.xc.shape[1]
50 |         num_tar = batch.xt.shape[1]
51 |         num_all = num_ctx + num_tar
52 |         if not autoreg:
53 |             mask = torch.zeros(num_all, num_all, device='cuda')
54 |             mask[:, num_ctx:] = float('-inf')
55 |         else:
56 |             mask = torch.zeros((num_all+num_tar, num_all+num_tar), device='cuda').fill_(float('-inf'))
57 |             mask[:, :num_ctx] = 0.0 # all points attend to context points
58 |             mask[num_ctx:num_all, num_ctx:num_all].triu_(diagonal=1) # each real target point attends to itself and precedding real target points
59 |             mask[num_all:, num_ctx:num_all].triu_(diagonal=0) # each fake target point attends to preceeding real target points
60 | 
61 |         return mask
62 | 
63 |     def construct_input_pretrain(self, batch, drop_y):
64 |         if drop_y:
65 |             y = self.drop(batch.y)
66 |         else:
67 |             y = batch.y
68 |         x_y = torch.cat((batch.x, y), dim=-1)
69 |         x_0 = torch.cat((batch.x, torch.zeros_like(batch.y)), dim=-1)[:, 1:]
70 |         inp = torch.cat((x_y, x_0), dim=1)
71 |         return inp
72 | 
73 |     def create_mask_pretrain(self, batch):
74 |         num_points = batch.x.shape[1]
75 | 
76 |         mask = torch.zeros((2*num_points-1, 2*num_points-1), device='cuda').fill_(float('-inf'))
77 |         mask[:num_points, :num_points].triu_(diagonal=1)
78 |         mask[num_points:, 1:num_points].triu_(diagonal=0)
79 |         mask[num_points:, 0] = 0.0
80 | 
81 |         return mask
82 | 
83 |     def encode(self, batch, autoreg=False, drop_ctx=False, pretrain=False):
84 |         if not pretrain:
85 |             inp = self.construct_input(batch, autoreg, drop_ctx)
86 |             mask = self.create_mask(batch, autoreg)
87 |         else:
88 |             inp = self.construct_input_pretrain(batch, drop_ctx)
89 |             mask = self.create_mask_pretrain(batch)
90 |         embeddings = self.embedder(inp)
91 |         out = self.encoder(embeddings, mask=mask)
92 |         return out


--------------------------------------------------------------------------------
/bayesian_optimization/models/np.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from torch.distributions import kl_divergence
  4 | from attrdict import AttrDict
  5 | 
  6 | from utils.misc import stack, logmeanexp
  7 | from utils.sampling import sample_subset
  8 | from models.modules import PoolingEncoder, Decoder
  9 | 
 10 | class NP(nn.Module):
 11 |     def __init__(self,
 12 |             dim_x=1,
 13 |             dim_y=1,
 14 |             dim_hid=128,
 15 |             dim_lat=128,
 16 |             enc_pre_depth=4,
 17 |             enc_post_depth=2,
 18 |             dec_depth=3):
 19 | 
 20 |         super().__init__()
 21 | 
 22 |         self.denc = PoolingEncoder(
 23 |                 dim_x=dim_x,
 24 |                 dim_y=dim_y,
 25 |                 dim_hid=dim_hid,
 26 |                 pre_depth=enc_pre_depth,
 27 |                 post_depth=enc_post_depth)
 28 | 
 29 |         self.lenc = PoolingEncoder(
 30 |                 dim_x=dim_x,
 31 |                 dim_y=dim_y,
 32 |                 dim_hid=dim_hid,
 33 |                 dim_lat=dim_lat,
 34 |                 pre_depth=enc_pre_depth,
 35 |                 post_depth=enc_post_depth)
 36 | 
 37 |         self.dec = Decoder(
 38 |                 dim_x=dim_x,
 39 |                 dim_y=dim_y,
 40 |                 dim_enc=dim_hid+dim_lat,
 41 |                 dim_hid=dim_hid,
 42 |                 depth=dec_depth)
 43 | 
 44 |     def predict(self, xc, yc, xt, z=None, num_samples=None):
 45 |         # botorch 사용하기 위해 추가된 statement
 46 |         if xc.shape[-3] != xt.shape[-3]:
 47 |             xt = xt.transpose(-3, -2)
 48 | 
 49 |         theta = stack(self.denc(xc, yc), num_samples)
 50 |         if z is None:
 51 |             pz = self.lenc(xc, yc)
 52 |             z = pz.rsample() if num_samples is None \
 53 |                     else pz.rsample([num_samples])
 54 |         encoded = torch.cat([theta, z], -1)
 55 |         encoded = stack(encoded, xt.shape[-2], -2)
 56 |         return self.dec(encoded, stack(xt, num_samples))
 57 | 
 58 |     def forward(self, batch, num_samples=None, reduce_ll=True):
 59 |         outs = AttrDict()
 60 |         if self.training:
 61 |             pz = self.lenc(batch.xc, batch.yc)
 62 |             qz = self.lenc(batch.x, batch.y)
 63 |             z = qz.rsample() if num_samples is None else \
 64 |                     qz.rsample([num_samples])
 65 |             py = self.predict(batch.xc, batch.yc, batch.x,
 66 |                     z=z, num_samples=num_samples)
 67 | 
 68 |             if num_samples > 1:
 69 |                 # K * B * N
 70 |                 recon = py.log_prob(stack(batch.y, num_samples)).sum(-1)
 71 |                 # K * B
 72 |                 log_qz = qz.log_prob(z).sum(-1)
 73 |                 log_pz = pz.log_prob(z).sum(-1)
 74 | 
 75 |                 # K * B
 76 |                 log_w = recon.sum(-1) + log_pz - log_qz
 77 | 
 78 |                 outs.loss = -logmeanexp(log_w).mean() / batch.x.shape[-2]
 79 |             else:
 80 |                 outs.recon = py.log_prob(batch.y).sum(-1).mean()
 81 |                 outs.kld = kl_divergence(qz, pz).sum(-1).mean()
 82 |                 outs.loss = -outs.recon + outs.kld / batch.x.shape[-2]
 83 | 
 84 |         else:
 85 |             py = self.predict(batch.xc, batch.yc, batch.x, num_samples=num_samples)
 86 |             if num_samples is None:
 87 |                 ll = py.log_prob(batch.y).sum(-1)
 88 |             else:
 89 |                 y = torch.stack([batch.y]*num_samples)
 90 |                 if reduce_ll:
 91 |                     ll = logmeanexp(py.log_prob(y).sum(-1))
 92 |                 else:
 93 |                     ll = py.log_prob(y).sum(-1)
 94 |             num_ctx = batch.xc.shape[-2]
 95 |             if reduce_ll:
 96 |                 outs.ctx_loss = ll[...,:num_ctx].mean()
 97 |                 outs.tar_loss = ll[...,num_ctx:].mean()
 98 |             else:
 99 |                 outs.ctx_loss = ll[...,:num_ctx]
100 |                 outs.tar_loss = ll[...,num_ctx:]
101 |         return outs
102 | 


--------------------------------------------------------------------------------
/contextual_bandits/models/np.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from torch.distributions import kl_divergence
  4 | from attrdict import AttrDict
  5 | 
  6 | from utils.misc import stack, logmeanexp
  7 | from utils.sampling import sample_subset
  8 | from models.modules import PoolingEncoder, Decoder
  9 | 
 10 | class NP(nn.Module):
 11 |     def __init__(self,
 12 |             dim_x=1,
 13 |             dim_y=1,
 14 |             dim_hid=128,
 15 |             dim_lat=128,
 16 |             enc_pre_depth=4,
 17 |             enc_post_depth=2,
 18 |             dec_depth=3):
 19 | 
 20 |         super().__init__()
 21 | 
 22 |         self.denc = PoolingEncoder(
 23 |                 dim_x=dim_x,
 24 |                 dim_y=dim_y,
 25 |                 dim_hid=dim_hid,
 26 |                 pre_depth=enc_pre_depth,
 27 |                 post_depth=enc_post_depth)
 28 | 
 29 |         self.lenc = PoolingEncoder(
 30 |                 dim_x=dim_x,
 31 |                 dim_y=dim_y,
 32 |                 dim_hid=dim_hid,
 33 |                 dim_lat=dim_lat,
 34 |                 pre_depth=enc_pre_depth,
 35 |                 post_depth=enc_post_depth)
 36 | 
 37 |         self.dec = Decoder(
 38 |                 dim_x=dim_x,
 39 |                 dim_y=dim_y,
 40 |                 dim_enc=dim_hid+dim_lat,
 41 |                 dim_hid=dim_hid,
 42 |                 depth=dec_depth)
 43 | 
 44 |     def predict(self, xc, yc, xt, z=None, num_samples=None):
 45 |         # botorch 사용하기 위해 추가된 statement
 46 |         if xc.shape[-3] != xt.shape[-3]:
 47 |             xt = xt.transpose(-3, -2)
 48 | 
 49 |         theta = stack(self.denc(xc, yc), num_samples)
 50 |         if z is None:
 51 |             pz = self.lenc(xc, yc)
 52 |             z = pz.rsample() if num_samples is None \
 53 |                     else pz.rsample([num_samples])
 54 |         encoded = torch.cat([theta, z], -1)
 55 |         encoded = stack(encoded, xt.shape[-2], -2)
 56 |         return self.dec(encoded, stack(xt, num_samples))
 57 | 
 58 |     def forward(self, batch, num_samples=None, reduce_ll=True):
 59 |         outs = AttrDict()
 60 |         if self.training:
 61 |             pz = self.lenc(batch.xc, batch.yc)
 62 |             qz = self.lenc(batch.x, batch.y)
 63 |             z = qz.rsample() if num_samples is None else \
 64 |                     qz.rsample([num_samples])
 65 |             py = self.predict(batch.xc, batch.yc, batch.x,
 66 |                     z=z, num_samples=num_samples)
 67 | 
 68 |             if num_samples > 1:
 69 |                 # K * B * N
 70 |                 recon = py.log_prob(stack(batch.y, num_samples)).sum(-1)
 71 |                 # K * B
 72 |                 log_qz = qz.log_prob(z).sum(-1)
 73 |                 log_pz = pz.log_prob(z).sum(-1)
 74 | 
 75 |                 # K * B
 76 |                 log_w = recon.sum(-1) + log_pz - log_qz
 77 | 
 78 |                 outs.loss = -logmeanexp(log_w).mean() / batch.x.shape[-2]
 79 |             else:
 80 |                 outs.recon = py.log_prob(batch.y).sum(-1).mean()
 81 |                 outs.kld = kl_divergence(qz, pz).sum(-1).mean()
 82 |                 outs.loss = -outs.recon + outs.kld / batch.x.shape[-2]
 83 | 
 84 |         else:
 85 |             py = self.predict(batch.xc, batch.yc, batch.x, num_samples=num_samples)
 86 |             if num_samples is None:
 87 |                 ll = py.log_prob(batch.y).sum(-1)
 88 |             else:
 89 |                 y = torch.stack([batch.y]*num_samples)
 90 |                 if reduce_ll:
 91 |                     ll = logmeanexp(py.log_prob(y).sum(-1))
 92 |                 else:
 93 |                     ll = py.log_prob(y).sum(-1)
 94 |             num_ctx = batch.xc.shape[-2]
 95 |             if reduce_ll:
 96 |                 outs.ctx_loss = ll[...,:num_ctx].mean()
 97 |                 outs.tar_loss = ll[...,num_ctx:].mean()
 98 |             else:
 99 |                 outs.ctx_loss = ll[...,:num_ctx]
100 |                 outs.tar_loss = ll[...,num_ctx:]
101 |         return outs
102 | 


--------------------------------------------------------------------------------
/regression/models/np.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from torch.distributions import kl_divergence
  4 | from attrdict import AttrDict
  5 | 
  6 | from utils.misc import stack, logmeanexp
  7 | from utils.sampling import sample_subset
  8 | from models.modules import PoolingEncoder, Decoder
  9 | 
 10 | class NP(nn.Module):
 11 |     def __init__(self,
 12 |             dim_x=1,
 13 |             dim_y=1,
 14 |             dim_hid=128,
 15 |             dim_lat=128,
 16 |             enc_pre_depth=4,
 17 |             enc_post_depth=2,
 18 |             dec_depth=3):
 19 | 
 20 |         super().__init__()
 21 | 
 22 |         self.denc = PoolingEncoder(
 23 |                 dim_x=dim_x,
 24 |                 dim_y=dim_y,
 25 |                 dim_hid=dim_hid,
 26 |                 pre_depth=enc_pre_depth,
 27 |                 post_depth=enc_post_depth)
 28 | 
 29 |         self.lenc = PoolingEncoder(
 30 |                 dim_x=dim_x,
 31 |                 dim_y=dim_y,
 32 |                 dim_hid=dim_hid,
 33 |                 dim_lat=dim_lat,
 34 |                 pre_depth=enc_pre_depth,
 35 |                 post_depth=enc_post_depth)
 36 | 
 37 |         self.dec = Decoder(
 38 |                 dim_x=dim_x,
 39 |                 dim_y=dim_y,
 40 |                 dim_enc=dim_hid+dim_lat,
 41 |                 dim_hid=dim_hid,
 42 |                 depth=dec_depth)
 43 | 
 44 |     def predict(self, xc, yc, xt, z=None, num_samples=None):
 45 |         theta = stack(self.denc(xc, yc), num_samples)
 46 |         if z is None:
 47 |             pz = self.lenc(xc, yc)
 48 |             z = pz.rsample() if num_samples is None \
 49 |                     else pz.rsample([num_samples])
 50 |         encoded = torch.cat([theta, z], -1)
 51 |         encoded = stack(encoded, xt.shape[-2], -2)
 52 |         return self.dec(encoded, stack(xt, num_samples))
 53 | 
 54 |     def sample(self, xc, yc, xt, z=None, num_samples=None):
 55 |         pred_dist = self.predict(xc, yc, xt, z, num_samples)
 56 |         return pred_dist.loc
 57 | 
 58 |     def forward(self, batch, num_samples=None, reduce_ll=True):
 59 |         outs = AttrDict()
 60 |         if self.training:
 61 |             pz = self.lenc(batch.xc, batch.yc)
 62 |             qz = self.lenc(batch.x, batch.y)
 63 |             z = qz.rsample() if num_samples is None else \
 64 |                     qz.rsample([num_samples])
 65 |             py = self.predict(batch.xc, batch.yc, batch.x,
 66 |                     z=z, num_samples=num_samples)
 67 | 
 68 |             if num_samples > 1:
 69 |                 # K * B * N
 70 |                 recon = py.log_prob(stack(batch.y, num_samples)).sum(-1)
 71 |                 # K * B
 72 |                 log_qz = qz.log_prob(z).sum(-1)
 73 |                 log_pz = pz.log_prob(z).sum(-1)
 74 | 
 75 |                 # K * B
 76 |                 log_w = recon.sum(-1) + log_pz - log_qz
 77 | 
 78 |                 outs.loss = -logmeanexp(log_w).mean() / batch.x.shape[-2]
 79 |             else:
 80 |                 outs.recon = py.log_prob(batch.y).sum(-1).mean()
 81 |                 outs.kld = kl_divergence(qz, pz).sum(-1).mean()
 82 |                 outs.loss = -outs.recon + outs.kld / batch.x.shape[-2]
 83 | 
 84 |         else:
 85 |             py = self.predict(batch.xc, batch.yc, batch.x, num_samples=num_samples)
 86 |             if num_samples is None:
 87 |                 ll = py.log_prob(batch.y).sum(-1)
 88 |             else:
 89 |                 y = torch.stack([batch.y]*num_samples)
 90 |                 if reduce_ll:
 91 |                     ll = logmeanexp(py.log_prob(y).sum(-1))
 92 |                 else:
 93 |                     ll = py.log_prob(y).sum(-1)
 94 |             num_ctx = batch.xc.shape[-2]
 95 |             if reduce_ll:
 96 |                 outs.ctx_ll = ll[...,:num_ctx].mean()
 97 |                 outs.tar_ll = ll[...,num_ctx:].mean()
 98 |             else:
 99 |                 outs.ctx_ll = ll[...,:num_ctx]
100 |                 outs.tar_ll = ll[...,num_ctx:]
101 |         return outs
102 | 


--------------------------------------------------------------------------------
/bayesian_optimization/models/anp.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from torch.distributions import kl_divergence
  4 | from attrdict import AttrDict
  5 | 
  6 | from utils.misc import stack, logmeanexp
  7 | from models.modules import CrossAttnEncoder, PoolingEncoder, Decoder
  8 | 
  9 | 
 10 | class ANP(nn.Module):
 11 |     def __init__(self,
 12 |                  dim_x=1,
 13 |                  dim_y=1,
 14 |                  dim_hid=128,
 15 |                  dim_lat=128,
 16 |                  enc_v_depth=4,
 17 |                  enc_qk_depth=2,
 18 |                  enc_pre_depth=4,
 19 |                  enc_post_depth=2,
 20 |                  dec_depth=3):
 21 |         super(ANP, self).__init__()
 22 | 
 23 |         self.denc = CrossAttnEncoder(
 24 |             dim_x=dim_x,
 25 |             dim_y=dim_y,
 26 |             dim_hid=dim_hid,
 27 |             v_depth=enc_v_depth,
 28 |             qk_depth=enc_qk_depth)
 29 | 
 30 |         self.lenc = PoolingEncoder(
 31 |             dim_x=dim_x,
 32 |             dim_y=dim_y,
 33 |             dim_hid=dim_hid,
 34 |             dim_lat=dim_lat,
 35 |             self_attn=True,
 36 |             pre_depth=enc_pre_depth,
 37 |             post_depth=enc_post_depth)
 38 | 
 39 |         self.dec = Decoder(
 40 |             dim_x=dim_x,
 41 |             dim_y=dim_y,
 42 |             dim_enc=dim_hid + dim_lat,
 43 |             dim_hid=dim_hid,
 44 |             depth=dec_depth)
 45 | 
 46 |     def predict(self, xc, yc, xt, z=None, num_samples=None):
 47 |         # botorch 사용하기 위해 추가된 statement
 48 |         if xc.shape[-3] != xt.shape[-3]:
 49 |             xt = xt.transpose(-3, -2)
 50 | 
 51 |         theta = stack(self.denc(xc, yc, xt), num_samples)
 52 |         if z is None:
 53 |             pz = self.lenc(xc, yc)
 54 |             z = pz.rsample() if num_samples is None \
 55 |                 else pz.rsample([num_samples])
 56 |         z = stack(z, xt.shape[-2], dim=-2)
 57 |         encoded = torch.cat([theta, z], -1)
 58 |         return self.dec(encoded, stack(xt, num_samples))
 59 | 
 60 |     def forward(self, batch, num_samples=None, reduce_ll=True):
 61 |         outs = AttrDict()
 62 | 
 63 |         if self.training:
 64 |             pz = self.lenc(batch.xc, batch.yc)
 65 |             qz = self.lenc(batch.x, batch.y)
 66 |             z = qz.rsample() if num_samples is None else \
 67 |                 qz.rsample([num_samples])
 68 |             py = self.predict(batch.xc, batch.yc, batch.x,
 69 |                               z=z, num_samples=num_samples)
 70 | 
 71 |             if num_samples > 1:
 72 |                 # K * B * N
 73 |                 recon = py.log_prob(stack(batch.y, num_samples)).sum(-1)
 74 |                 # K * B
 75 |                 log_qz = qz.log_prob(z).sum(-1)
 76 |                 log_pz = pz.log_prob(z).sum(-1)
 77 | 
 78 |                 # K * B
 79 |                 log_w = recon.sum(-1) + log_pz - log_qz
 80 | 
 81 |                 outs.loss = -logmeanexp(log_w).mean() / batch.x.shape[-2]
 82 |             else:
 83 |                 outs.recon = py.log_prob(batch.y).sum(-1).mean()
 84 |                 outs.kld = kl_divergence(qz, pz).sum(-1).mean()
 85 |                 outs.loss = -outs.recon + outs.kld / batch.x.shape[-2]
 86 | 
 87 |         else:
 88 |             py = self.predict(batch.xc, batch.yc, batch.x, num_samples=num_samples)
 89 | 
 90 |             if num_samples is None:
 91 |                 ll = py.log_prob(batch.y).sum(-1)
 92 |             else:
 93 |                 y = torch.stack([batch.y] * num_samples)
 94 |                 if reduce_ll:
 95 |                     ll = logmeanexp(py.log_prob(y).sum(-1))
 96 |                 else:
 97 |                     ll = py.log_prob(y).sum(-1)
 98 | 
 99 |             num_ctx = batch.xc.shape[-2]
100 | 
101 |             if reduce_ll:
102 |                 outs.ctx_ll = ll[..., :num_ctx].mean()
103 |                 outs.tar_ll = ll[..., num_ctx:].mean()
104 |             else:
105 |                 outs.ctx_ll = ll[..., :num_ctx]
106 |                 outs.tar_ll = ll[..., num_ctx:]
107 | 
108 |         return outs
109 | 


--------------------------------------------------------------------------------
/contextual_bandits/models/anp.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from torch.distributions import kl_divergence
  4 | from attrdict import AttrDict
  5 | 
  6 | from utils.misc import stack, logmeanexp
  7 | from models.modules import CrossAttnEncoder, PoolingEncoder, Decoder
  8 | 
  9 | 
 10 | class ANP(nn.Module):
 11 |     def __init__(self,
 12 |                  dim_x=1,
 13 |                  dim_y=1,
 14 |                  dim_hid=128,
 15 |                  dim_lat=128,
 16 |                  enc_v_depth=4,
 17 |                  enc_qk_depth=2,
 18 |                  enc_pre_depth=4,
 19 |                  enc_post_depth=2,
 20 |                  dec_depth=3):
 21 |         super(ANP, self).__init__()
 22 | 
 23 |         self.denc = CrossAttnEncoder(
 24 |             dim_x=dim_x,
 25 |             dim_y=dim_y,
 26 |             dim_hid=dim_hid,
 27 |             self_attn=True,
 28 |             v_depth=enc_v_depth,
 29 |             qk_depth=enc_qk_depth)
 30 | 
 31 |         self.lenc = PoolingEncoder(
 32 |             dim_x=dim_x,
 33 |             dim_y=dim_y,
 34 |             dim_hid=dim_hid,
 35 |             dim_lat=dim_lat,
 36 |             self_attn=True,
 37 |             pre_depth=enc_pre_depth,
 38 |             post_depth=enc_post_depth)
 39 | 
 40 |         self.dec = Decoder(
 41 |             dim_x=dim_x,
 42 |             dim_y=dim_y,
 43 |             dim_enc=dim_hid + dim_lat,
 44 |             dim_hid=dim_hid,
 45 |             depth=dec_depth)
 46 | 
 47 |     def predict(self, xc, yc, xt, z=None, num_samples=None):
 48 |         # botorch 사용하기 위해 추가된 statement
 49 |         if xc.shape[-3] != xt.shape[-3]:
 50 |             xt = xt.transpose(-3, -2)
 51 | 
 52 |         theta = stack(self.denc(xc, yc, xt), num_samples)
 53 |         if z is None:
 54 |             pz = self.lenc(xc, yc)
 55 |             z = pz.rsample() if num_samples is None \
 56 |                 else pz.rsample([num_samples])
 57 |         z = stack(z, xt.shape[-2], dim=-2)
 58 |         encoded = torch.cat([theta, z], -1)
 59 |         return self.dec(encoded, stack(xt, num_samples))
 60 | 
 61 |     def forward(self, batch, num_samples=None, reduce_ll=True):
 62 |         outs = AttrDict()
 63 | 
 64 |         if self.training:
 65 |             pz = self.lenc(batch.xc, batch.yc)
 66 |             qz = self.lenc(batch.x, batch.y)
 67 |             z = qz.rsample() if num_samples is None else \
 68 |                 qz.rsample([num_samples])
 69 |             py = self.predict(batch.xc, batch.yc, batch.x,
 70 |                               z=z, num_samples=num_samples)
 71 | 
 72 |             if num_samples > 1:
 73 |                 # K * B * N
 74 |                 recon = py.log_prob(stack(batch.y, num_samples)).sum(-1)
 75 |                 # K * B
 76 |                 log_qz = qz.log_prob(z).sum(-1)
 77 |                 log_pz = pz.log_prob(z).sum(-1)
 78 | 
 79 |                 # K * B
 80 |                 log_w = recon.sum(-1) + log_pz - log_qz
 81 | 
 82 |                 outs.loss = -logmeanexp(log_w).mean() / batch.x.shape[-2]
 83 |             else:
 84 |                 outs.recon = py.log_prob(batch.y).sum(-1).mean()
 85 |                 outs.kld = kl_divergence(qz, pz).sum(-1).mean()
 86 |                 outs.loss = -outs.recon + outs.kld / batch.x.shape[-2]
 87 | 
 88 |         else:
 89 |             py = self.predict(batch.xc, batch.yc, batch.x, num_samples=num_samples)
 90 | 
 91 |             if num_samples is None:
 92 |                 ll = py.log_prob(batch.y).sum(-1)
 93 |             else:
 94 |                 y = torch.stack([batch.y] * num_samples)
 95 |                 if reduce_ll:
 96 |                     ll = logmeanexp(py.log_prob(y).sum(-1))
 97 |                 else:
 98 |                     ll = py.log_prob(y).sum(-1)
 99 | 
100 |             num_ctx = batch.xc.shape[-2]
101 | 
102 |             if reduce_ll:
103 |                 outs.ctx_ll = ll[..., :num_ctx].mean()
104 |                 outs.tar_ll = ll[..., num_ctx:].mean()
105 |             else:
106 |                 outs.ctx_ll = ll[..., :num_ctx]
107 |                 outs.tar_ll = ll[..., num_ctx:]
108 | 
109 |         return outs
110 | 


--------------------------------------------------------------------------------
/regression/utils/log.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import time
  3 | import logging
  4 | from collections import OrderedDict
  5 | import re
  6 | import matplotlib
  7 | from matplotlib import pyplot as plt
  8 | from os.path import split, splitext
  9 | 
 10 | def get_logger(filename, mode='a'):
 11 |     logging.basicConfig(level=logging.INFO, format='%(message)s')
 12 |     logger = logging.getLogger()
 13 |     # 코드 실행 시 run 여러번 돌리면 logger에 handler가 중복되므로, 매번 삭제해줘야
 14 |     for hdlr in logger.handlers:
 15 |         logger.removeHandler(hdlr)
 16 |     logger.addHandler(logging.FileHandler(filename, mode=mode))
 17 |     logger.addHandler(logging.StreamHandler())
 18 |     return logger
 19 | 
 20 | class RunningAverage(object):
 21 |     def __init__(self, *keys):
 22 |         self.sum = OrderedDict()
 23 |         self.cnt = OrderedDict()
 24 |         self.clock = time.time()
 25 |         for key in keys:
 26 |             self.sum[key] = 0
 27 |             self.cnt[key] = 0
 28 | 
 29 |     def update(self, key, val):
 30 |         if isinstance(val, torch.Tensor):
 31 |             val = val.item()
 32 |         if self.sum.get(key, None) is None:
 33 |             self.sum[key] = val
 34 |             self.cnt[key] = 1
 35 |         else:
 36 |             self.sum[key] = self.sum[key] + val
 37 |             self.cnt[key] += 1
 38 | 
 39 |     def reset(self):
 40 |         for key in self.sum.keys():
 41 |             self.sum[key] = 0
 42 |             self.cnt[key] = 0
 43 |         self.clock = time.time()
 44 | 
 45 |     def clear(self):
 46 |         self.sum = OrderedDict()
 47 |         self.cnt = OrderedDict()
 48 |         self.clock = time.time()
 49 | 
 50 |     def keys(self):
 51 |         return self.sum.keys()
 52 | 
 53 |     def get(self, key):
 54 |         assert(self.sum.get(key, None) is not None)
 55 |         return self.sum[key] / self.cnt[key]
 56 | 
 57 |     def info(self, show_et=True):
 58 |         line = ''
 59 |         for key in self.sum.keys():
 60 |             val = self.sum[key] / self.cnt[key]
 61 |             if type(val) == float:
 62 |                 line += f'{key} {val:.4f} '
 63 |             else:
 64 |                 line += f'{key} {val} '.format(key, val)
 65 |         if show_et:
 66 |             line += f'({time.time()-self.clock:.3f} secs)'
 67 |         return line
 68 | 
 69 | def get_log(fileroot):
 70 |     step = []
 71 |     loss = []
 72 |     train_time = []
 73 |     eval_time = []
 74 |     ctxll = []
 75 |     tarll = []
 76 |     file = open(fileroot, "r")
 77 |     lines = file.readlines()
 78 |     for line in lines:
 79 |         # training step
 80 |         if "step" in line:
 81 |             linesplit = line.split(" ")
 82 |             step += [int(linesplit[3])]
 83 |             _loss = linesplit[-3]
 84 |             loss += [100 if _loss=="nan" else float(_loss)]
 85 |             train_time += [float(linesplit[-2][1:])]
 86 |         # evaluation step
 87 |         elif "ctx_ll" in line:
 88 |             linesplit = line.split(" ")
 89 |             ctxll += [float(linesplit[-5])]
 90 |             tarll += [float(linesplit[-3])]
 91 |             eval_time += [float(linesplit[-2][1:])]
 92 |     
 93 |     return step, loss, None, ctxll, tarll
 94 | 
 95 | 
 96 | def plot_log(fileroot, x_begin=None, x_end=None):
 97 |     step, loss, stepll, ctxll, tarll = get_log(fileroot)
 98 |     step = list(map(int, step))
 99 |     loss = list(map(float, loss))
100 |     ctxll = list(map(float, ctxll))
101 |     tarll = list(map(float, tarll))
102 |     stepll = list(map(int, stepll)) if stepll else None
103 |     
104 |     if x_begin is None:
105 |         x_begin = 0
106 |     if x_end is None:
107 |         x_end = step[-1]
108 |     
109 |     print_freq = 1 if len(step)==1 else step[1] - step[0]
110 | 
111 |     plt.clf()
112 |     plt.plot(step[x_begin//print_freq:x_end//print_freq],
113 |              loss[x_begin//print_freq:x_end//print_freq])
114 |     plt.xlabel('step')
115 |     plt.ylabel('loss')
116 | 
117 |     dir, file = split(fileroot)
118 |     filename = splitext(file)[0]
119 |     plt.savefig(dir + "/" + filename + f"-{x_begin}-{x_end}.png")
120 |     plt.clf()  # clear current figure


--------------------------------------------------------------------------------
/regression/models/anp.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from torch.distributions import kl_divergence
  4 | from attrdict import AttrDict
  5 | 
  6 | from utils.misc import stack, logmeanexp
  7 | from utils.sampling import sample_subset
  8 | 
  9 | from models.modules import CrossAttnEncoder, PoolingEncoder, Decoder
 10 | 
 11 | class ANP(nn.Module):
 12 |     def __init__(self,
 13 |             dim_x=1,
 14 |             dim_y=1,
 15 |             dim_hid=128,
 16 |             dim_lat=128,
 17 |             enc_v_depth=4,
 18 |             enc_qk_depth=2,
 19 |             enc_pre_depth=4,
 20 |             enc_post_depth=2,
 21 |             dec_depth=3):
 22 | 
 23 |         super().__init__()
 24 | 
 25 |         self.denc = CrossAttnEncoder(
 26 |                 dim_x=dim_x,
 27 |                 dim_y=dim_y,
 28 |                 dim_hid=dim_hid,
 29 |                 v_depth=enc_v_depth,
 30 |                 qk_depth=enc_qk_depth)
 31 | 
 32 |         self.lenc = PoolingEncoder(
 33 |                 dim_x=dim_x,
 34 |                 dim_y=dim_y,
 35 |                 dim_hid=dim_hid,
 36 |                 dim_lat=dim_lat,
 37 |                 self_attn=True,
 38 |                 pre_depth=enc_pre_depth,
 39 |                 post_depth=enc_post_depth)
 40 | 
 41 |         self.dec = Decoder(
 42 |                 dim_x=dim_x,
 43 |                 dim_y=dim_y,
 44 |                 dim_enc=dim_hid+dim_lat,
 45 |                 dim_hid=dim_hid,
 46 |                 depth=dec_depth)
 47 | 
 48 |     def predict(self, xc, yc, xt, z=None, num_samples=None):
 49 |         theta = stack(self.denc(xc, yc, xt), num_samples)
 50 |         if z is None:
 51 |             pz = self.lenc(xc, yc)
 52 |             z = pz.rsample() if num_samples is None \
 53 |                     else pz.rsample([num_samples])
 54 |         z = stack(z, xt.shape[-2], -2)
 55 |         encoded = torch.cat([theta, z], -1)
 56 |         return self.dec(encoded, stack(xt, num_samples))
 57 |     
 58 |     def sample(self, xc, yc, xt, z=None, num_samples=None):
 59 |         pred_dist = self.predict(xc, yc, xt, z, num_samples)
 60 |         return pred_dist.loc
 61 | 
 62 |     def forward(self, batch, num_samples=None, reduce_ll=True):
 63 |         outs = AttrDict()
 64 |         if self.training:
 65 |             pz = self.lenc(batch.xc, batch.yc)
 66 |             qz = self.lenc(batch.x, batch.y)
 67 |             z = qz.rsample() if num_samples is None else \
 68 |                     qz.rsample([num_samples])
 69 |             py = self.predict(batch.xc, batch.yc, batch.x,
 70 |                     z=z, num_samples=num_samples)
 71 | 
 72 |             if num_samples > 1:
 73 |                 # K * B * N
 74 |                 recon = py.log_prob(stack(batch.y, num_samples)).sum(-1)
 75 |                 # K * B
 76 |                 log_qz = qz.log_prob(z).sum(-1)
 77 |                 log_pz = pz.log_prob(z).sum(-1)
 78 | 
 79 |                 # K * B
 80 |                 log_w = recon.sum(-1) + log_pz - log_qz
 81 | 
 82 |                 outs.loss = -logmeanexp(log_w).mean() / batch.x.shape[-2]
 83 |             else:
 84 |                 outs.recon = py.log_prob(batch.y).sum(-1).mean()
 85 |                 outs.kld = kl_divergence(qz, pz).sum(-1).mean()
 86 |                 outs.loss = -outs.recon + outs.kld / batch.x.shape[-2]
 87 | 
 88 |         else:
 89 |             py = self.predict(batch.xc, batch.yc, batch.x, num_samples=num_samples)
 90 |             if num_samples is None:
 91 |                 ll = py.log_prob(batch.y).sum(-1)
 92 |             else:
 93 |                 y = torch.stack([batch.y]*num_samples)
 94 |                 if reduce_ll:
 95 |                     ll = logmeanexp(py.log_prob(y).sum(-1))
 96 |                 else:
 97 |                     ll = py.log_prob(y).sum(-1)
 98 |             num_ctx = batch.xc.shape[-2]
 99 | 
100 |             if reduce_ll:
101 |                 outs.ctx_ll = ll[...,:num_ctx].mean()
102 |                 outs.tar_ll = ll[...,num_ctx:].mean()
103 |             else:
104 |                 outs.ctx_ll = ll[...,:num_ctx]
105 |                 outs.tar_ll = ll[...,num_ctx:]
106 | 
107 |         return outs
108 | 


--------------------------------------------------------------------------------
/bayesian_optimization/utils/acquisition.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | 
  4 | from attrdict import AttrDict
  5 | from botorch.acquisition import AnalyticAcquisitionFunction
  6 | from botorch.utils.transforms import t_batch_mode_transform
  7 | from torch import Tensor
  8 | from torch.distributions import Normal
  9 | from torch.nn import Module
 10 | from typing import Union
 11 | 
 12 | 
 13 | class EI(AnalyticAcquisitionFunction):
 14 |     def __init__(
 15 |             self,
 16 |             model: Module,
 17 |             observations: AttrDict,
 18 |             best_f: Union[float, Tensor],
 19 |             num_bs: int = 200,
 20 |             maximize: bool = True
 21 |     ):
 22 |         model.num_outputs = 1
 23 |         super(EI, self).__init__(model=model)
 24 | 
 25 |         self.obs = observations
 26 |         if not torch.is_tensor(best_f):
 27 |             best_f = torch.tensor(best_f)
 28 |         self.register_buffer("best_f", best_f)
 29 |         self.num_bs = num_bs
 30 |         self.maximize = maximize
 31 | 
 32 |     @t_batch_mode_transform(expected_q=1, assert_output_shape=False)
 33 |     def forward(self, X: Tensor) -> Tensor:
 34 |         self.best_f = self.best_f.to(X)
 35 | 
 36 |         posterior = self.model.predict(xc=self.obs.xc,
 37 |                                         yc=self.obs.yc,
 38 |                                         xt=X,
 39 |                                         num_samples=self.num_bs)
 40 |         mean, std = posterior.mean.squeeze(0), posterior.scale.squeeze(0)
 41 | 
 42 |         # shape: (num_samples, 1, num_points, 1)
 43 |         if mean.dim() == 4:
 44 |             var = std.pow(2).mean(dim=0) + mean.pow(2).mean(dim=0) - mean.mean(dim=0).pow(2)
 45 |             std = var.sqrt().squeeze(0)
 46 |             mean = mean.mean(dim=0).squeeze(0)
 47 | 
 48 |         batch_shape = mean.shape[:-2] if mean.dim() >= X.dim() else X.shape[:-2]
 49 |         mean = mean.view(batch_shape)
 50 |         std = std.clamp_min(np.sqrt(1e-9)).view(batch_shape)
 51 |         u = (mean - self.best_f.expand_as(mean)) / std
 52 |         if not self.maximize:
 53 |             u = -u
 54 |         normal = Normal(torch.zeros_like(u), torch.ones_like(u))
 55 |         ucdf = normal.cdf(u)
 56 |         updf = torch.exp(normal.log_prob(u))
 57 |         ei = std * (updf + u * ucdf)
 58 |         return ei
 59 | 
 60 | 
 61 | class UCB(AnalyticAcquisitionFunction):
 62 |     def __init__(
 63 |             self,
 64 |             model: Module,
 65 |             observations: AttrDict,
 66 |             beta: Union[float, Tensor],
 67 |             num_bs: int = 200,
 68 |             maximize: bool = True
 69 |     ):
 70 |         model.num_outputs = 1
 71 |         super(UCB, self).__init__(model=model)
 72 | 
 73 |         self.obs = observations
 74 |         if not torch.is_tensor(beta):
 75 |             beta = torch.tensor(beta)
 76 |         self.register_buffer("beta", beta)
 77 |         self.num_bs = num_bs
 78 |         self.maximize = maximize
 79 | 
 80 |     @t_batch_mode_transform(expected_q=1)
 81 |     def forward(self, X: Tensor, return_mean=False) -> Tensor:
 82 |         self.beta = self.beta.to(X)
 83 | 
 84 |         posterior = self.model.predict(xc=self.obs.xc,
 85 |                                         yc=self.obs.yc,
 86 |                                         xt=X,
 87 |                                         num_samples=self.num_bs)
 88 |         mean, std = posterior.mean.squeeze(0), posterior.scale.squeeze(0)
 89 | 
 90 |         # shape: (num_samples, 1, num_points, 1)
 91 |         if mean.dim() == 4:
 92 |             var = std.pow(2).mean(dim=0) + mean.pow(2).mean(dim=0) - mean.mean(dim=0).pow(2)
 93 |             std = var.sqrt().squeeze(0)
 94 |             mean = mean.mean(dim=0).squeeze(0)
 95 | 
 96 |         batch_shape = X.shape[:-2]
 97 |         mean = mean.view(batch_shape)
 98 |         std = std.view(batch_shape)
 99 |         delta = self.beta.expand_as(mean).sqrt() * std
100 |         if return_mean:
101 |             return mean
102 |         else:
103 |             if self.maximize:
104 |                 return mean + delta
105 |             else:
106 |                 return -mean + delta
107 | 


--------------------------------------------------------------------------------
/bayesian_optimization/utils/log.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import time
  3 | import logging
  4 | from collections import OrderedDict
  5 | import re
  6 | import matplotlib
  7 | from matplotlib import pyplot as plt
  8 | from os.path import split, splitext
  9 | 
 10 | 
 11 | def get_logger(filename, mode='a'):
 12 |     logging.basicConfig(level=logging.INFO, format='%(message)s')
 13 |     logger = logging.getLogger()
 14 |     # 코드 실행 시 run 여러번 돌리면 logger에 handler가 중복되므로, 매번 삭제해줘야
 15 |     for hdlr in logger.handlers:
 16 |         logger.removeHandler(hdlr)
 17 |     logger.addHandler(logging.FileHandler(filename, mode=mode))
 18 |     logger.addHandler(logging.StreamHandler())
 19 |     return logger
 20 | 
 21 | 
 22 | class RunningAverage(object):
 23 |     def __init__(self, *keys):
 24 |         self.sum = OrderedDict()
 25 |         self.cnt = OrderedDict()
 26 |         self.clock = time.time()
 27 |         for key in keys:
 28 |             self.sum[key] = 0
 29 |             self.cnt[key] = 0
 30 | 
 31 |     def update(self, key, val):
 32 |         if isinstance(val, torch.Tensor):
 33 |             val = val.item()
 34 |         if self.sum.get(key, None) is None:
 35 |             self.sum[key] = val
 36 |             self.cnt[key] = 1
 37 |         else:
 38 |             self.sum[key] = self.sum[key] + val
 39 |             self.cnt[key] += 1
 40 | 
 41 |     def reset(self):
 42 |         for key in self.sum.keys():
 43 |             self.sum[key] = 0
 44 |             self.cnt[key] = 0
 45 |         self.clock = time.time()
 46 | 
 47 |     def clear(self):
 48 |         self.sum = OrderedDict()
 49 |         self.cnt = OrderedDict()
 50 |         self.clock = time.time()
 51 | 
 52 |     def keys(self):
 53 |         return self.sum.keys()
 54 | 
 55 |     def get(self, key):
 56 |         assert(self.sum.get(key, None) is not None)
 57 |         return self.sum[key] / self.cnt[key]
 58 | 
 59 |     def info(self, show_et=True):
 60 |         line = ''
 61 |         for key in self.sum.keys():
 62 |             val = self.sum[key] / self.cnt[key]
 63 |             if type(val) == float:
 64 |                 line += f'{key} {val:.4f} '
 65 |             else:
 66 |                 line += f'{key} {val} '.format(key, val)
 67 |         if show_et:
 68 |             line += f'({time.time()-self.clock:.3f} secs)'
 69 |         return line
 70 | 
 71 | 
 72 | def get_log(fileroot):
 73 |     step = []
 74 |     loss = []
 75 |     train_time = []
 76 |     eval_time = []
 77 |     ctxll = []
 78 |     tarll = []
 79 |     file = open(fileroot, "r")
 80 |     lines = file.readlines()
 81 |     for line in lines:
 82 |         # training step
 83 |         if "step" in line:
 84 |             linesplit = line.split(" ")
 85 |             step += [int(linesplit[3])]
 86 |             _loss = linesplit[-3]
 87 |             loss += [100 if _loss=="nan" else float(_loss)]
 88 |             train_time += [float(linesplit[-2][1:])]
 89 |         # evaluation step
 90 |         elif "ctx_ll" in line:
 91 |             linesplit = line.split(" ")
 92 |             ctxll += [float(linesplit[-5])]
 93 |             tarll += [float(linesplit[-3])]
 94 |             eval_time += [float(linesplit[-2][1:])]
 95 |     
 96 |     return step, loss, None, ctxll, tarll
 97 | 
 98 | 
 99 | def plot_log(fileroot, x_begin=None, x_end=None):
100 |     step, loss, stepll, ctxll, tarll = get_log(fileroot)
101 |     step = list(map(int, step))
102 |     loss = list(map(float, loss))
103 |     ctxll = list(map(float, ctxll))
104 |     tarll = list(map(float, tarll))
105 |     stepll = list(map(int, stepll)) if stepll else None
106 |     
107 |     if x_begin is None:
108 |         x_begin = 0
109 |     if x_end is None:
110 |         x_end = step[-1]
111 |     
112 |     print_freq = 1 if len(step) == 1 else step[1] - step[0]
113 | 
114 |     plt.clf()
115 |     plt.plot(step[x_begin//print_freq:x_end//print_freq],
116 |              loss[x_begin//print_freq:x_end//print_freq])
117 |     plt.xlabel('step')
118 |     plt.ylabel('loss')
119 | 
120 |     directory, file = split(fileroot)
121 |     filename = splitext(file)[0]
122 |     plt.savefig(directory + "/" + filename + f"-{x_begin}-{x_end}.png")
123 |     plt.clf()  # clear current figure
124 | 


--------------------------------------------------------------------------------
/bayesian_optimization/models/tnpnd.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from torch.distributions.normal import Normal
  4 | from attrdict import AttrDict
  5 | 
  6 | from models.modules import build_mlp
  7 | from models.tnp import TNP
  8 | 
  9 | 
 10 | class TNPND(TNP):
 11 |     def __init__(
 12 |         self,
 13 |         dim_x,
 14 |         dim_y,
 15 |         d_model,
 16 |         emb_depth,
 17 |         dim_feedforward,
 18 |         nhead,
 19 |         dropout,
 20 |         num_layers,
 21 |         num_std_layers,
 22 |         cov_approx='cholesky',
 23 |         prj_dim=5,
 24 |         prj_depth=4,
 25 |         diag_depth=4
 26 |     ):
 27 |         super(TNPND, self).__init__(
 28 |             dim_x,
 29 |             dim_y,
 30 |             d_model,
 31 |             emb_depth,
 32 |             dim_feedforward,
 33 |             nhead,
 34 |             dropout,
 35 |             num_layers,
 36 |         )
 37 | 
 38 |         assert cov_approx in ['cholesky', 'lowrank']
 39 |         self.cov_approx = cov_approx
 40 |         
 41 |         self.mean_net = nn.Sequential(
 42 |             nn.Linear(d_model, dim_feedforward),
 43 |             nn.ReLU(),
 44 |             nn.Linear(dim_feedforward, dim_y)
 45 |         )
 46 | 
 47 |         std_encoder_layer = nn.TransformerEncoderLayer(d_model, nhead, dim_feedforward, dropout, batch_first=True)
 48 |         self.std_encoder = nn.TransformerEncoder(std_encoder_layer, num_std_layers)
 49 | 
 50 |         self.projector = build_mlp(d_model, dim_feedforward, prj_dim*dim_y, prj_depth)
 51 | 
 52 |         if cov_approx == 'lowrank':
 53 |             self.diag_net = build_mlp(d_model, dim_feedforward, dim_y, diag_depth)
 54 | 
 55 |     def decode(self, out_encoder, batch_size, dim_y, num_target):
 56 |         mean_target = self.mean_net(out_encoder).view(batch_size, -1)
 57 | 
 58 |         out_std_encoder = self.std_encoder(out_encoder)
 59 |         std_prj = self.projector(out_std_encoder)
 60 |         std_prj = std_prj.view((batch_size, num_target*dim_y, -1))
 61 |         if self.cov_approx == 'cholesky':
 62 |             std_tril = torch.bmm(std_prj, std_prj.transpose(1,2))
 63 |             std_tril = std_tril.tril()
 64 |             if self.emnist:
 65 |                 diag_ids = torch.arange(num_target*dim_y, device='cuda')
 66 |                 std_tril[:, diag_ids, diag_ids] = 0.05 + 0.95*torch.tanh(std_tril[:, diag_ids, diag_ids])
 67 |             pred_tar = torch.distributions.multivariate_normal.MultivariateNormal(mean_target, scale_tril=std_tril)
 68 |         else:
 69 |             diagonal = torch.exp(self.diag_net(out_encoder)).view((batch_size, -1, 1))
 70 |             std = torch.bmm(std_prj, std_prj.transpose(1,2)) + torch.diag_embed(diagonal.squeeze(-1))
 71 |             pred_tar = torch.distributions.multivariate_normal.MultivariateNormal(mean_target, covariance_matrix=std)
 72 | 
 73 |         return pred_tar
 74 | 
 75 |     def forward(self, batch, reduce_ll=True):
 76 |         batch_size = batch.x.shape[0]
 77 |         dim_y = batch.y.shape[-1]
 78 |         num_target = batch.xt.shape[1]
 79 | 
 80 |         out_encoder = self.encode(batch, autoreg=False)
 81 |         pred_tar = self.decode(out_encoder, batch_size, dim_y, num_target)
 82 | 
 83 |         outs = AttrDict()
 84 |         yt = batch.yt.reshape(batch.yt.shape[0], -1)
 85 |         outs.loss = - (pred_tar.log_prob(yt).mean() / num_target)
 86 |         return outs
 87 | 
 88 | 
 89 |     def predict(self, xc, yc, xt, num_samples=None):
 90 |         if xc.shape[-3] != xt.shape[-3]:
 91 |             xt = xt.transpose(-3, -2)
 92 | 
 93 |         batch = AttrDict()
 94 |         batch.xc = xc
 95 |         batch.yc = yc
 96 |         batch.xt = xt
 97 |         batch.yt = torch.zeros((xt.shape[0], xt.shape[1], yc.shape[2]), device='cuda')
 98 | 
 99 |         batch_size = xc.shape[0]
100 |         dim_y = yc.shape[-1]
101 |         num_target = batch.xt.shape[1]
102 | 
103 |         out_encoder = self.encode(batch, autoreg=False)
104 |         pred_tar = self.decode(out_encoder, batch_size, dim_y, num_target)
105 | 
106 |         return Normal(
107 |             pred_tar.mean.view(batch_size, num_target, -1),
108 |             torch.diagonal(pred_tar.covariance_matrix, dim1=-2, dim2=-1).reshape(batch_size, num_target, -1)
109 |         )


--------------------------------------------------------------------------------
/bayesian_optimization/data/highdim_gp.py:
--------------------------------------------------------------------------------
  1 | import gpytorch
  2 | import matplotlib.pyplot as plt
  3 | import seaborn as sns
  4 | import torch
  5 | import warnings
  6 | sns.set()
  7 | warnings.filterwarnings('ignore')
  8 | 
  9 | from attrdict import AttrDict
 10 | from gpytorch.kernels import ScaleKernel, RBFKernel
 11 | from gpytorch.likelihoods import GaussianLikelihood
 12 | from gpytorch.means import ConstantMean
 13 | from gpytorch.models import ExactGP
 14 | from gpytorch.priors import UniformPrior
 15 | from typing import Union, List, Tuple
 16 | 
 17 | 
 18 | class GaussianProcess(ExactGP):
 19 |     def __init__(self, x, y, likelihood, device):
 20 |         super(GaussianProcess, self).__init__(x, y, likelihood)
 21 |         self.mean_module = ConstantMean()
 22 | 
 23 |         self.length_prior = UniformPrior(0.1, 1.0)
 24 |         self.scale_prior = UniformPrior(0.1, 1.0)
 25 | 
 26 |         self.covar_module = ScaleKernel(
 27 |             RBFKernel(lengthscale_prior=self.length_prior),
 28 |             outputscale_prior=self.scale_prior
 29 |         )
 30 |         self.device = device
 31 | 
 32 |     def forward(self, x, verbose=False, random_parameter=True):
 33 |         # Sample lengthscale and outputscale randomly
 34 |         if random_parameter:
 35 |             self.covar_module.base_kernel.lengthscale = self.length_prior.rsample().to(self.device)
 36 |             self.covar_module.outputscale = self.scale_prior.rsample().to(self.device)
 37 | 
 38 |         if verbose:
 39 |             print(f'Actual length scale: {self.covar_module.base_kernel.lengthscale}')
 40 |             print(f'Actual output scale: {self.covar_module.outputscale}')
 41 |             print('=' * 70)
 42 | 
 43 |         mean_x = self.mean_module(x)
 44 |         covar_x = self.covar_module(x)
 45 |         return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)
 46 | 
 47 | 
 48 | class GPSampler:
 49 |     def __init__(
 50 |             self,
 51 |             dimension: int = 5,
 52 |             device: torch.device = torch.device('cpu'),
 53 |             seed: int = None
 54 |     ):
 55 |         # initialize likelihood and gp
 56 |         likelihood = GaussianLikelihood().to(device)
 57 |         self.gp = GaussianProcess(None, None, likelihood=likelihood, device=device).to(device)
 58 |         self.gp.eval()
 59 | 
 60 |         self.dim = dimension
 61 |         self.device = device
 62 |         self.seed = seed
 63 |         if seed is not None:
 64 |             torch.manual_seed(seed)
 65 |             torch.cuda.manual_seed(seed)
 66 | 
 67 |     def __call__(
 68 |             self,
 69 |             batch_size: int = 16,
 70 |             num_ctx: int = None,
 71 |             num_tar: int = None,
 72 |             max_num_points: int = 512,
 73 |             min_num_points: int = 128,
 74 |             x_range: Union[List, Tuple] = (-2, 2),
 75 |             random_parameter: bool = True
 76 |     ):
 77 |         lb, ub = x_range
 78 | 
 79 |         batch = AttrDict()
 80 | 
 81 |         num_ctx = num_ctx or torch.randint(min_num_points, max_num_points - min_num_points, size=[1]).item()
 82 |         num_tar = num_tar or torch.randint(min_num_points, max_num_points - num_ctx, size=[1]).item()
 83 | 
 84 |         num_points = num_ctx + num_tar
 85 |         batch.x = lb + (ub - lb) * torch.rand([batch_size, num_points, self.dim], device=self.device)
 86 |         batch.xc = batch.x[:, :num_ctx]
 87 |         batch.xt = batch.x[:, num_ctx:]
 88 | 
 89 |         with gpytorch.settings.prior_mode(True):
 90 |             batch.y = self.gp(batch.x,
 91 |                               verbose=False,
 92 |                               random_parameter=random_parameter).rsample().unsqueeze(-1)
 93 |             batch.yc = batch.y[:, :num_ctx]
 94 |             batch.yt = batch.y[:, num_ctx:]
 95 | 
 96 |         return batch
 97 | 
 98 | 
 99 | if __name__ == '__main__':
100 |     sampler = GPSampler(dimension=2)
101 | 
102 |     fig = plt.figure(figsize=(35, 35))
103 | 
104 |     for i, p in enumerate([25, 500], 1):
105 |         pts = sampler(num_ctx=p, num_tar=p, random_parameter=False)
106 | 
107 |         ax = fig.add_subplot(1, 2, i, projection='3d')
108 |         ax.scatter(pts.x[0, :, 0].detach().numpy(),
109 |                    pts.x[0, :, 1].detach().numpy(),
110 |                    pts.y[0].detach().numpy())
111 |     plt.show()


--------------------------------------------------------------------------------
/bayesian_optimization/bayeso_benchmarks/plot_benchmarks.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import os
  3 | import matplotlib.pyplot as plt
  4 | 
  5 | 
  6 | def plot_1d(obj_fun,
  7 |     str_fun,
  8 |     str_x_axis=r'$x$',
  9 |     str_y_axis=r'$f(x)$',
 10 |     str_figures='../figures',
 11 | ):
 12 |     print(str_fun)
 13 |     bounds = obj_fun.get_bounds()
 14 |     print(bounds)
 15 |     assert bounds.shape[0] == 1
 16 | 
 17 |     X = np.linspace(bounds[0, 0], bounds[0, 1], 1000)
 18 |     Y = obj_fun.output(X[..., np.newaxis]).flatten()
 19 | 
 20 |     assert len(X.shape) == 1
 21 |     assert len(Y.shape) == 1
 22 |     assert X.shape[0] == Y.shape[0]
 23 | 
 24 |     plt.rc('text', usetex=True)
 25 | 
 26 |     _ = plt.figure(figsize=(10, 6))
 27 |     ax = plt.gca()
 28 | 
 29 |     ax.plot(X, Y,
 30 |         linewidth=4,
 31 |         marker='None')
 32 | 
 33 |     ax.set_xlabel(str_x_axis, fontsize=36)
 34 |     ax.set_ylabel(str_y_axis, fontsize=36)
 35 |     ax.tick_params(labelsize=24)
 36 | 
 37 |     ax.set_xlim([np.min(X), np.max(X)])
 38 |     ax.grid()
 39 | 
 40 |     plt.tight_layout()
 41 |     plt.savefig(os.path.join(str_figures, str_fun + '.pdf'),
 42 |         format='pdf',
 43 |         transparent=True,
 44 |         bbox_inches='tight')
 45 | 
 46 |     plt.show()
 47 | 
 48 | def plot_2d(obj_fun,
 49 |     str_fun,
 50 |     str_x1_axis=r'$x_1$',
 51 |     str_x2_axis=r'$x_2$',
 52 |     str_y_axis=r'$f(\mathbf{x})$',
 53 |     str_figures='../figures',
 54 | ):
 55 |     print(str_fun)
 56 |     bounds = obj_fun.get_bounds()
 57 |     print(bounds)
 58 |     assert bounds.shape[0] == 2
 59 | 
 60 |     X1 = np.linspace(bounds[0, 0], bounds[0, 1], 200)
 61 |     X2 = np.linspace(bounds[1, 0], bounds[1, 1], 200)
 62 |     X1, X2 = np.meshgrid(X1, X2)
 63 |     X = np.concatenate((X1[..., np.newaxis], X2[..., np.newaxis]), axis=2)
 64 |     X = np.reshape(X, (X.shape[0] * X.shape[1], X.shape[2]))
 65 | 
 66 |     Y = obj_fun.output(X).flatten()
 67 | 
 68 |     assert len(X.shape) == 2
 69 |     assert len(Y.shape) == 1
 70 |     assert X.shape[0] == Y.shape[0]
 71 | 
 72 |     Y = np.reshape(Y, (X1.shape[0], X2.shape[0]))
 73 | 
 74 |     plt.rc('text', usetex=True)
 75 | 
 76 |     _ = plt.figure(figsize=(8, 6))
 77 |     ax = plt.axes(projection='3d')
 78 | 
 79 |     surf = ax.plot_surface(X1, X2, Y,
 80 |         cmap='coolwarm',
 81 |         linewidth=0)
 82 | 
 83 |     ax.set_xlabel(str_x1_axis, fontsize=24, labelpad=10)
 84 |     ax.set_ylabel(str_x2_axis, fontsize=24, labelpad=10)
 85 |     ax.set_zlabel(str_y_axis, fontsize=24, labelpad=10)
 86 |     ax.tick_params(labelsize=16)
 87 | 
 88 |     ax.set_xlim([np.min(X1), np.max(X1)])
 89 |     ax.set_ylim([np.min(X2), np.max(X2)])
 90 |     ax.grid()
 91 | 
 92 |     cbar = plt.colorbar(surf,
 93 |         shrink=0.6,
 94 |         aspect=12,
 95 |         pad=0.15,
 96 |     )
 97 |     cbar.ax.tick_params(labelsize=16)
 98 | 
 99 |     if np.max(Y) > 1000:
100 |         plt.ticklabel_format(axis='z', style='sci', scilimits=(0, 0), useMathText=True)
101 |         ax.zaxis.get_offset_text().set_fontsize(14)
102 | 
103 |     plt.tight_layout()
104 |     plt.savefig(os.path.join(str_figures, str_fun + '.pdf'),
105 |         format='pdf',
106 |         transparent=True,
107 |         bbox_inches='tight')
108 | 
109 |     plt.show()
110 | 
111 | 
112 | if __name__ == '__main__':
113 |     # one dim.
114 | 
115 |     from inf_dim_ackley import Ackley as target_class
116 |     obj_fun = target_class(1)
117 |     plot_1d(obj_fun, 'ackley_1d')
118 | 
119 |     from inf_dim_cosines import Cosines as target_class
120 |     obj_fun = target_class(1)
121 |     plot_1d(obj_fun, 'cosines_1d')
122 | 
123 | 
124 |     # two dim.
125 |     from two_dim_dropwave import DropWave as target_class
126 |     obj_fun = target_class()
127 |     plot_2d(obj_fun, 'dropwave_2d')
128 | 
129 |     from two_dim_goldsteinprice import GoldsteinPrice as target_class
130 |     obj_fun = target_class()
131 |     plot_2d(obj_fun, 'goldsteinprice_2d')
132 | 
133 |     from two_dim_michalewicz import Michalewicz as target_class
134 |     obj_fun = target_class()
135 |     plot_2d(obj_fun, 'michalewicz_2d')
136 | 
137 |     from inf_dim_ackley import Ackley as target_class
138 |     obj_fun = target_class(2)
139 |     plot_2d(obj_fun, 'ackley_2d')
140 | 
141 |     from inf_dim_cosines import Cosines as target_class
142 |     obj_fun = target_class(2)
143 |     plot_2d(obj_fun, 'cosines_2d')
144 | 
145 | 


--------------------------------------------------------------------------------
/regression/models/tnpnd.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from torch.distributions.normal import Normal
  4 | from attrdict import AttrDict
  5 | 
  6 | from models.modules import build_mlp
  7 | from models.tnp import TNP
  8 | 
  9 | 
 10 | class TNPND(TNP):
 11 |     def __init__(
 12 |         self,
 13 |         dim_x,
 14 |         dim_y,
 15 |         d_model,
 16 |         emb_depth,
 17 |         dim_feedforward,
 18 |         nhead,
 19 |         dropout,
 20 |         num_layers,
 21 |         num_std_layers,
 22 |         bound_std=False,
 23 |         cov_approx='cholesky',
 24 |         prj_dim=5,
 25 |         prj_depth=4,
 26 |         diag_depth=4
 27 |     ):
 28 |         super(TNPND, self).__init__(
 29 |             dim_x,
 30 |             dim_y,
 31 |             d_model,
 32 |             emb_depth,
 33 |             dim_feedforward,
 34 |             nhead,
 35 |             dropout,
 36 |             num_layers,
 37 |             bound_std
 38 |         )
 39 | 
 40 |         assert cov_approx in ['cholesky', 'lowrank']
 41 |         self.cov_approx = cov_approx
 42 |         
 43 |         self.mean_net = nn.Sequential(
 44 |             nn.Linear(d_model, dim_feedforward),
 45 |             nn.ReLU(),
 46 |             nn.Linear(dim_feedforward, dim_y)
 47 |         )
 48 | 
 49 |         std_encoder_layer = nn.TransformerEncoderLayer(d_model, nhead, dim_feedforward, dropout, batch_first=True)
 50 |         self.std_encoder = nn.TransformerEncoder(std_encoder_layer, num_std_layers)
 51 | 
 52 |         self.projector = build_mlp(d_model, dim_feedforward, prj_dim*dim_y, prj_depth)
 53 | 
 54 |         if cov_approx == 'lowrank':
 55 |             self.diag_net = build_mlp(d_model, dim_feedforward, dim_y, diag_depth)
 56 | 
 57 |     def decode(self, out_encoder, batch_size, dim_y, num_target):
 58 |         mean = self.mean_net(out_encoder).view(batch_size, -1)
 59 | 
 60 |         out_std_encoder = self.std_encoder(out_encoder)
 61 |         std_prj = self.projector(out_std_encoder)
 62 |         std_prj = std_prj.view((batch_size, num_target*dim_y, -1))
 63 |         if self.cov_approx == 'cholesky':
 64 |             std_tril = torch.bmm(std_prj, std_prj.transpose(1,2))
 65 |             std_tril = std_tril.tril()
 66 |             if self.bound_std:
 67 |                 diag_ids = torch.arange(num_target*dim_y, device='cuda')
 68 |                 std_tril[:, diag_ids, diag_ids] = 0.05 + 0.95*torch.tanh(std_tril[:, diag_ids, diag_ids])
 69 |             pred_tar = torch.distributions.multivariate_normal.MultivariateNormal(mean, scale_tril=std_tril)
 70 |         else:
 71 |             diagonal = torch.exp(self.diag_net(out_encoder)).view((batch_size, -1, 1))
 72 |             std = torch.bmm(std_prj, std_prj.transpose(1,2)) + torch.diag_embed(diagonal.squeeze(-1))
 73 |             pred_tar = torch.distributions.multivariate_normal.MultivariateNormal(mean, covariance_matrix=std)
 74 | 
 75 |         return pred_tar
 76 | 
 77 |     def forward(self, batch, reduce_ll=True):
 78 |         batch_size = batch.x.shape[0]
 79 |         dim_y = batch.y.shape[-1]
 80 |         num_target = batch.xt.shape[1]
 81 | 
 82 |         out_encoder = self.encode(batch, autoreg=False)
 83 |         pred_tar = self.decode(out_encoder, batch_size, dim_y, num_target)
 84 | 
 85 |         outs = AttrDict()
 86 |         outs.tar_ll = pred_tar.log_prob(batch.yt.reshape(batch_size, -1))
 87 | 
 88 |         if not self.training:
 89 |             outs.tar_ll /= num_target
 90 | 
 91 |         if reduce_ll:
 92 |             outs.tar_ll = outs.tar_ll.mean()
 93 |             outs.loss = - (outs.tar_ll)
 94 |             outs.mean_std = torch.mean(pred_tar.covariance_matrix)
 95 |         else:
 96 |             outs.tar_ll = outs.tar_ll.unsqueeze(-1)
 97 | 
 98 |         return outs
 99 | 
100 | 
101 |     def predict(self, xc, yc, xt, num_samples=50, return_samples=False):
102 |         batch_size = xc.shape[0]
103 |         dim_y = yc.shape[-1]
104 |         num_target = xt.shape[1]
105 | 
106 |         batch = AttrDict()
107 |         batch.xc = xc
108 |         batch.yc = yc
109 |         batch.xt = xt
110 |         batch.yt = torch.zeros((xt.shape[0], xt.shape[1], yc.shape[2]), device='cuda')
111 | 
112 |         out_encoder = self.encode(batch, autoreg=False)
113 |         pred_tar = self.decode(out_encoder, batch_size, dim_y, num_target)
114 |             
115 |         yt_samples  = pred_tar.rsample([num_samples]).view(num_samples, batch_size, num_target, -1)
116 |         if return_samples:
117 |             return yt_samples
118 | 
119 |         std = yt_samples.std(dim=0)
120 |         return Normal(pred_tar.mean.view(batch_size, num_target, -1), std)
121 | 
122 | 
123 |     def sample(self, xc, yc, xt, num_samples=50):
124 |         return self.predict(xc, yc, xt, num_samples, return_samples=True)


--------------------------------------------------------------------------------
/regression/models/tnpa.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | from torch.distributions.normal import Normal
  5 | from attrdict import AttrDict
  6 | 
  7 | from utils.misc import stack
  8 | from models.tnp import TNP
  9 | 
 10 | 
 11 | class TNPA(TNP):
 12 |     def __init__(
 13 |         self,
 14 |         dim_x,
 15 |         dim_y,
 16 |         d_model,
 17 |         emb_depth,
 18 |         dim_feedforward,
 19 |         nhead,
 20 |         dropout,
 21 |         num_layers,
 22 |         bound_std=False,
 23 |         permute=False,
 24 |     ):
 25 |         super(TNPA, self).__init__(
 26 |             dim_x,
 27 |             dim_y,
 28 |             d_model,
 29 |             emb_depth,
 30 |             dim_feedforward,
 31 |             nhead,
 32 |             dropout,
 33 |             num_layers,
 34 |             bound_std
 35 |         )
 36 |         
 37 |         self.predictor = nn.Sequential(
 38 |             nn.Linear(d_model, dim_feedforward),
 39 |             nn.ReLU(),
 40 |             nn.Linear(dim_feedforward, dim_y*2)
 41 |         )
 42 | 
 43 |         self.permute = permute
 44 | 
 45 |     def forward(self, batch, reduce_ll=True):
 46 |         z_target = self.encode(batch, autoreg=True)
 47 |         out = self.predictor(z_target)
 48 |         mean, std = torch.chunk(out, 2, dim=-1)
 49 |         if self.bound_std:
 50 |             std = 0.05 + 0.95 * F.softplus(std)
 51 |         else:
 52 |             std = torch.exp(std)
 53 | 
 54 |         pred_tar = Normal(mean, std)
 55 | 
 56 |         outs = AttrDict()
 57 |         if reduce_ll:
 58 |             outs.tar_ll = pred_tar.log_prob(batch.yt).sum(-1).mean()
 59 |         else:
 60 |             outs.tar_ll = pred_tar.log_prob(batch.yt).sum(-1)
 61 |         outs.loss = - (outs.tar_ll)
 62 | 
 63 |         return outs
 64 | 
 65 |     def permute_sample_batch(self, xt, yt, num_samples, batch_size, num_target):
 66 |         # data in each batch is permuted identically
 67 |         perm_ids = torch.rand(num_samples, num_target, device='cuda').unsqueeze(1).repeat((1, batch_size, 1))
 68 |         perm_ids = torch.argsort(perm_ids, dim=-1)
 69 |         deperm_ids = torch.argsort(perm_ids, dim=-1)
 70 |         dim_sample = torch.arange(num_samples, device='cuda').unsqueeze(-1).unsqueeze(-1).repeat((1,batch_size,num_target))
 71 |         dim_batch = torch.arange(batch_size, device='cuda').unsqueeze(0).unsqueeze(-1).repeat((num_samples,1,num_target))
 72 |         return xt[dim_sample, dim_batch, perm_ids], yt[dim_sample, dim_batch, perm_ids], dim_sample, dim_batch, deperm_ids
 73 | 
 74 |     def predict(self, xc, yc, xt, num_samples=50, return_samples=False):
 75 |         batch_size = xc.shape[0]
 76 |         num_target = xt.shape[1]
 77 |         
 78 |         def squeeze(x):
 79 |             return x.view(-1, x.shape[-2], x.shape[-1])
 80 |         def unsqueeze(x):
 81 |             return x.view(num_samples, batch_size, x.shape[-2], x.shape[-1])
 82 | 
 83 |         xc_stacked = stack(xc, num_samples)
 84 |         yc_stacked = stack(yc, num_samples)
 85 |         xt_stacked = stack(xt, num_samples)
 86 |         yt_pred = torch.zeros((batch_size, num_target, yc.shape[2]), device='cuda')
 87 |         yt_stacked = stack(yt_pred, num_samples)
 88 |         if self.permute:
 89 |             xt_stacked, yt_stacked, dim_sample, dim_batch, deperm_ids = self.permute_sample_batch(xt_stacked, yt_stacked, num_samples, batch_size, num_target)
 90 | 
 91 |         batch_stacked = AttrDict()
 92 |         batch_stacked.xc = squeeze(xc_stacked)
 93 |         batch_stacked.yc = squeeze(yc_stacked)
 94 |         batch_stacked.xt = squeeze(xt_stacked)
 95 |         batch_stacked.yt = squeeze(yt_stacked)
 96 | 
 97 |         for step in range(xt.shape[1]):
 98 |             z_target_stacked = self.encode(batch_stacked, autoreg=True)
 99 |             out = self.predictor(z_target_stacked)
100 |             mean, std = torch.chunk(out, 2, dim=-1)
101 |             if self.bound_std:
102 |                 std = 0.05 + 0.95 * F.softplus(std)
103 |             else:
104 |                 std = torch.exp(std)
105 |             mean, std = unsqueeze(mean), unsqueeze(std)
106 |             batch_stacked.yt = unsqueeze(batch_stacked.yt)
107 |             batch_stacked.yt[:, :, step] = Normal(mean[:, :, step], std[:, :, step]).sample()
108 |             batch_stacked.yt = squeeze(batch_stacked.yt)
109 | 
110 |         if self.permute:
111 |             mean, std = mean[dim_sample, dim_batch, deperm_ids], std[dim_sample, dim_batch, deperm_ids]
112 | 
113 |         if return_samples:
114 |             return unsqueeze(batch_stacked.yt)
115 | 
116 |         return Normal(mean, std)
117 | 
118 |     def sample(self, xc, yc, xt, num_samples=50):
119 |         return self.predict(xc, yc, xt, num_samples, return_samples=True)


--------------------------------------------------------------------------------
/contextual_bandits/models/tnpnd.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from torch.distributions.normal import Normal
  4 | from attrdict import AttrDict
  5 | 
  6 | from models.modules import build_mlp
  7 | from models.tnp import TNP
  8 | 
  9 | 
 10 | class TNPND(TNP):
 11 |     def __init__(
 12 |         self,
 13 |         dim_x,
 14 |         dim_y,
 15 |         d_model,
 16 |         emb_depth,
 17 |         dim_feedforward,
 18 |         nhead,
 19 |         dropout,
 20 |         num_layers,
 21 |         num_std_layers,
 22 |         drop_y=0.5,
 23 |         cov_approx='cholesky',
 24 |         prj_dim=5,
 25 |         prj_depth=4,
 26 |         diag_depth=4
 27 |     ):
 28 |         super(TNPND, self).__init__(
 29 |             dim_x,
 30 |             dim_y,
 31 |             d_model,
 32 |             emb_depth,
 33 |             dim_feedforward,
 34 |             nhead,
 35 |             dropout,
 36 |             num_layers,
 37 |             drop_y
 38 |         )
 39 | 
 40 |         assert cov_approx in ['cholesky', 'lowrank']
 41 |         self.cov_approx = cov_approx
 42 |         
 43 |         self.mean_net = nn.Sequential(
 44 |             nn.Linear(d_model, dim_feedforward),
 45 |             nn.ReLU(),
 46 |             nn.Linear(dim_feedforward, dim_y)
 47 |         )
 48 | 
 49 |         std_encoder_layer = nn.TransformerEncoderLayer(d_model, nhead, dim_feedforward, dropout, batch_first=True)
 50 |         self.std_encoder = nn.TransformerEncoder(std_encoder_layer, num_std_layers)
 51 | 
 52 |         self.projector = build_mlp(d_model, dim_feedforward, prj_dim*dim_y, prj_depth)
 53 | 
 54 |         if cov_approx == 'lowrank':
 55 |             self.diag_net = build_mlp(d_model, dim_feedforward, dim_y, diag_depth)
 56 | 
 57 |     def decode(self, out_encoder, mean_target, batch_size, dim_y, num_target):
 58 |         mean_target = mean_target.view(batch_size, -1)
 59 | 
 60 |         out_std_encoder = self.std_encoder(out_encoder)
 61 |         std_prj = self.projector(out_std_encoder)
 62 |         std_prj = std_prj.view((batch_size, num_target*dim_y, -1))
 63 |         if self.cov_approx == 'cholesky':
 64 |             std_tril = torch.bmm(std_prj, std_prj.transpose(1,2))
 65 |             std_tril = std_tril.tril()
 66 |             if self.emnist:
 67 |                 diag_ids = torch.arange(num_target*dim_y, device='cuda')
 68 |                 std_tril[:, diag_ids, diag_ids] = 0.05 + 0.95*torch.tanh(std_tril[:, diag_ids, diag_ids])
 69 |             pred_tar = torch.distributions.multivariate_normal.MultivariateNormal(mean_target, scale_tril=std_tril)
 70 |         else:
 71 |             diagonal = torch.exp(self.diag_net(out_encoder)).view((batch_size, -1, 1))
 72 |             std = torch.bmm(std_prj, std_prj.transpose(1,2)) + torch.diag_embed(diagonal.squeeze(-1))
 73 |             pred_tar = torch.distributions.multivariate_normal.MultivariateNormal(mean_target, covariance_matrix=std)
 74 | 
 75 |         return pred_tar
 76 | 
 77 |     def forward(self, batch, reduce_ll=True):
 78 |         batch_size = batch.x.shape[0]
 79 |         dim_y = batch.y.shape[-1]
 80 |         num_context = batch.xc.shape[1]
 81 |         num_target = batch.xt.shape[1]
 82 | 
 83 |         out_encoder = self.encode(batch, autoreg=False, drop_ctx=True)
 84 |         mean = self.mean_net(out_encoder)
 85 |         mean_ctx = mean[:, :num_context]
 86 |         mean_target = mean[:, num_context:].reshape(batch_size, -1)
 87 |         pred_tar = self.decode(out_encoder[:, num_context:], mean_target, batch_size, dim_y, num_target)
 88 | 
 89 |         outs = AttrDict()
 90 |         yt = batch.yt.reshape(batch.yt.shape[0], -1)
 91 |         outs.loss_target = - (pred_tar.log_prob(yt).mean() / num_target)
 92 |         outs.loss_ctx = torch.sum((batch.yc - mean_ctx)**2, dim=-1).mean()
 93 |         outs.loss = outs.loss_ctx + outs.loss_target
 94 |         outs.mean_std = torch.mean(pred_tar.covariance_matrix)
 95 |         outs.rmse = torch.mean((yt - mean_target)**2)
 96 |         return outs
 97 | 
 98 | 
 99 |     def predict(self, xc, yc, xt, num_samples=100):
100 |         batch = AttrDict()
101 |         batch.xc = xc
102 |         batch.yc = yc
103 |         batch.xt = xt
104 |         batch.yt = torch.zeros((xt.shape[0], xt.shape[1], yc.shape[2]), device='cuda')
105 | 
106 |         batch_size = xc.shape[0]
107 |         dim_y = yc.shape[-1]
108 |         num_context = batch.xc.shape[1]
109 |         num_target = batch.xt.shape[1]
110 | 
111 |         out_encoder = self.encode(batch, autoreg=False, drop_ctx=False)[:, num_context:]
112 |         mean_target = self.mean_net(out_encoder)
113 |         pred_tar = self.decode(out_encoder, mean_target, batch_size, dim_y, num_target)
114 | 
115 |         yt_samples  = pred_tar.rsample([num_samples]).reshape(num_samples, batch_size, num_target, -1)
116 |         std = yt_samples.std(dim=0)
117 |         outs = AttrDict()
118 |         outs.loc = mean_target.unsqueeze(0)
119 |         outs.scale = std.unsqueeze(0)
120 |         outs.ys = Normal(outs.loc, outs.scale)
121 |         return outs


--------------------------------------------------------------------------------
/env.yml:
--------------------------------------------------------------------------------
  1 | name: tnp
  2 | channels:
  3 |   - anaconda
  4 |   - conda-forge
  5 |   - defaults
  6 | dependencies:
  7 |   - _libgcc_mutex=0.1=conda_forge
  8 |   - _openmp_mutex=4.5=1_llvm
  9 |   - alsa-lib=1.2.3=h516909a_0
 10 |   - blas=1.0=mkl
 11 |   - bottleneck=1.3.2=py39hdd57654_1
 12 |   - brotli=1.0.9=h7f98852_6
 13 |   - brotli-bin=1.0.9=h7f98852_6
 14 |   - bzip2=1.0.8=h7f98852_4
 15 |   - ca-certificates=2020.10.14=0
 16 |   - certifi=2021.10.8=py39h06a4308_2
 17 |   - cffi=1.15.0=py39hd667e15_1
 18 |   - colorama=0.4.4=pyh9f0ad1d_0
 19 |   - cudatoolkit=11.5.1=hcf5317a_9
 20 |   - cudnn=8.2.1.32=h86fa8c9_0
 21 |   - cycler=0.11.0=pyhd8ed1ab_0
 22 |   - dbus=1.13.18=hb2f20db_0
 23 |   - expat=2.4.2=h9c3ff4c_0
 24 |   - fontconfig=2.13.1=hba837de_1005
 25 |   - fonttools=4.28.5=py39h3811e60_0
 26 |   - freetype=2.10.4=h0708190_1
 27 |   - future=0.18.2=py39hf3d152e_4
 28 |   - glib=2.69.1=h4ff587b_1
 29 |   - gst-plugins-base=1.14.0=hbbd80ab_1
 30 |   - gstreamer=1.14.0=h28cd5cc_2
 31 |   - icu=58.2=he6710b0_3
 32 |   - jbig=2.1=h7f98852_2003
 33 |   - jpeg=9d=h36c2ea0_0
 34 |   - kiwisolver=1.3.2=py39h1a9c180_1
 35 |   - krb5=1.19.2=hcc1bbae_3
 36 |   - lcms2=2.12=hddcbb42_0
 37 |   - ld_impl_linux-64=2.36.1=hea4e1c9_2
 38 |   - lerc=3.0=h9c3ff4c_0
 39 |   - libblas=3.9.0=12_linux64_mkl
 40 |   - libbrotlicommon=1.0.9=h7f98852_6
 41 |   - libbrotlidec=1.0.9=h7f98852_6
 42 |   - libbrotlienc=1.0.9=h7f98852_6
 43 |   - libcblas=3.9.0=12_linux64_mkl
 44 |   - libclang=11.1.0=default_ha53f305_1
 45 |   - libdeflate=1.8=h7f98852_0
 46 |   - libedit=3.1.20191231=he28a2e2_2
 47 |   - libevent=2.1.10=h9b69904_4
 48 |   - libffi=3.3=he6710b0_2
 49 |   - libgcc-ng=11.2.0=h1d223b6_11
 50 |   - libgfortran-ng=7.5.0=ha8ba4b0_17
 51 |   - libgfortran4=7.5.0=ha8ba4b0_17
 52 |   - libiconv=1.16=h516909a_0
 53 |   - liblapack=3.9.0=12_linux64_mkl
 54 |   - libllvm11=11.1.0=hf817b99_2
 55 |   - libnsl=2.0.0=h7f98852_0
 56 |   - libogg=1.3.4=h7f98852_1
 57 |   - libopus=1.3.1=h7f98852_1
 58 |   - libpng=1.6.37=h21135ba_2
 59 |   - libpq=13.5=hd57d9b9_1
 60 |   - libprotobuf=3.16.0=h780b84a_0
 61 |   - libstdcxx-ng=11.2.0=he4da1e4_11
 62 |   - libtiff=4.3.0=h6f004c6_2
 63 |   - libuuid=2.32.1=h7f98852_1000
 64 |   - libvorbis=1.3.7=h9c3ff4c_0
 65 |   - libwebp-base=1.2.1=h7f98852_0
 66 |   - libxcb=1.13=h7f98852_1004
 67 |   - libxkbcommon=1.0.3=he3ba5ed_0
 68 |   - libxml2=2.9.12=h03d6c58_0
 69 |   - libzlib=1.2.11=h36c2ea0_1013
 70 |   - llvm-openmp=12.0.1=h4bd325d_1
 71 |   - lz4-c=1.9.3=h9c3ff4c_1
 72 |   - magma=2.5.4=h6103c52_2
 73 |   - matplotlib=3.4.3=py39h06a4308_0
 74 |   - matplotlib-base=3.4.3=py39hbbc1b5f_0
 75 |   - mkl=2021.4.0=h8d4b97c_729
 76 |   - mkl-service=2.4.0=py39h7f8727e_0
 77 |   - munkres=1.1.4=pyh9f0ad1d_0
 78 |   - mysql-common=8.0.27=ha770c72_3
 79 |   - mysql-libs=8.0.27=hfa10184_3
 80 |   - nccl=2.11.4.1=hdc17891_0
 81 |   - ncurses=6.2=h58526e2_4
 82 |   - ninja=1.10.2=h4bd325d_1
 83 |   - nspr=4.32=h9c3ff4c_1
 84 |   - nss=3.74=hb5efdd6_0
 85 |   - numexpr=2.8.1=py39h6abb31d_0
 86 |   - numpy=1.22.2=py39h91f2184_0
 87 |   - olefile=0.46=pyh9f0ad1d_1
 88 |   - openjpeg=2.4.0=hb52868f_1
 89 |   - openssl=1.1.1m=h7f8727e_0
 90 |   - packaging=21.3=pyhd8ed1ab_0
 91 |   - pandas=1.3.5=py39h8c16a72_0
 92 |   - pcre=8.45=h9c3ff4c_0
 93 |   - pillow=8.4.0=py39ha612740_0
 94 |   - pip=21.2.4=py39h06a4308_0
 95 |   - pthread-stubs=0.4=h36c2ea0_1001
 96 |   - pycparser=2.21=pyhd8ed1ab_0
 97 |   - pyparsing=3.0.6=pyhd8ed1ab_0
 98 |   - pyqt=5.9.2=py39h2531618_6
 99 |   - pyqt5-sip=4.19.18=py39he80948d_8
100 |   - python=3.9.7=h12debd9_1
101 |   - python-dateutil=2.8.2=pyhd8ed1ab_0
102 |   - python_abi=3.9=2_cp39
103 |   - pytorch=1.9.0=cuda112py39hbeb36f3_1
104 |   - pytorch-gpu=1.9.0=cuda112py39h0bbbad9_1
105 |   - pytz=2020.1=py_0
106 |   - qt=5.9.7=h5867ecd_1
107 |   - readline=8.1=h46c0cb4_0
108 |   - scipy=1.7.3=py39hc147768_0
109 |   - seaborn=0.11.0=py_0
110 |   - setuptools=58.0.4=py39h06a4308_0
111 |   - sip=4.19.13=py39h295c915_0
112 |   - six=1.16.0=pyh6c4a22f_0
113 |   - sleef=3.5.1=h9b69904_2
114 |   - sqlite=3.37.0=h9cd32fc_0
115 |   - tbb=2021.5.0=h4bd325d_0
116 |   - tk=8.6.11=h27826a3_1
117 |   - tornado=6.1=py39h27cfd23_0
118 |   - tqdm=4.62.3=pyhd8ed1ab_0
119 |   - typing_extensions=4.0.1=pyha770c72_0
120 |   - tzdata=2021e=he74cb21_0
121 |   - wheel=0.37.1=pyhd8ed1ab_0
122 |   - xorg-libxau=1.0.9=h7f98852_0
123 |   - xorg-libxdmcp=1.1.3=h7f98852_0
124 |   - xz=5.2.5=h516909a_1
125 |   - zlib=1.2.11=h36c2ea0_1013
126 |   - zstd=1.5.1=ha95c52a_0
127 |   - pip:
128 |     - attrdict==2.0.1
129 |     - attrs==21.4.0
130 |     - bayeso==0.5.2
131 |     - black==21.12b0
132 |     - click==8.0.3
133 |     - cma==3.1.0
134 |     - gpytorch==1.6.0
135 |     - iniconfig==1.1.1
136 |     - joblib==1.1.0
137 |     - mypy-extensions==0.4.3
138 |     - pathspec==0.9.0
139 |     - platformdirs==2.4.1
140 |     - pluggy==1.0.0
141 |     - py==1.11.0
142 |     - pytest==6.2.5
143 |     - pyyaml==6.0
144 |     - qmcpy==1.2
145 |     - scikit-learn==1.0.2
146 |     - shapely==1.8.0
147 |     - threadpoolctl==3.0.0
148 |     - toml==0.10.2
149 |     - tomli==1.2.3
150 |     - uncertainty-toolbox==0.1.0


--------------------------------------------------------------------------------
/contextual_bandits/utils/log.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import time
  3 | import logging
  4 | import yaml
  5 | import os
  6 | import os.path as osp
  7 | from attrdict import AttrDict
  8 | from collections import OrderedDict
  9 | from matplotlib import pyplot as plt
 10 | from os.path import split, splitext
 11 | 
 12 | 
 13 | def get_logger(filename, mode='a'):
 14 |     logging.basicConfig(level=logging.INFO, format='%(message)s')
 15 |     logger = logging.getLogger()
 16 |     # 코드 실행 시 run 여러번 돌리면 logger에 handler가 중복되므로, 매번 삭제해줘야
 17 |     for hdlr in logger.handlers:
 18 |         logger.removeHandler(hdlr)
 19 |     logger.addHandler(logging.FileHandler(filename, mode=mode))
 20 |     logger.addHandler(logging.StreamHandler())
 21 |     return logger
 22 | 
 23 | 
 24 | class RunningAverage(object):
 25 |     def __init__(self, *keys):
 26 |         self.sum = OrderedDict()
 27 |         self.cnt = OrderedDict()
 28 |         self.clock = time.time()
 29 |         for key in keys:
 30 |             self.sum[key] = 0
 31 |             self.cnt[key] = 0
 32 | 
 33 |     def update(self, key, val):
 34 |         if isinstance(val, torch.Tensor):
 35 |             val = val.item()
 36 |         if self.sum.get(key, None) is None:
 37 |             self.sum[key] = val
 38 |             self.cnt[key] = 1
 39 |         else:
 40 |             self.sum[key] = self.sum[key] + val
 41 |             self.cnt[key] += 1
 42 | 
 43 |     def reset(self):
 44 |         for key in self.sum.keys():
 45 |             self.sum[key] = 0
 46 |             self.cnt[key] = 0
 47 |         self.clock = time.time()
 48 | 
 49 |     def clear(self):
 50 |         self.sum = OrderedDict()
 51 |         self.cnt = OrderedDict()
 52 |         self.clock = time.time()
 53 | 
 54 |     def keys(self):
 55 |         return self.sum.keys()
 56 | 
 57 |     def get(self, key):
 58 |         assert(self.sum.get(key, None) is not None)
 59 |         return self.sum[key] / self.cnt[key]
 60 | 
 61 |     def info(self, show_et=True):
 62 |         line = ''
 63 |         for key in self.sum.keys():
 64 |             val = self.sum[key] / self.cnt[key]
 65 |             if type(val) == float:
 66 |                 line += f'{key} {val:.4f} '
 67 |             else:
 68 |                 line += f'{key} {val} +-'.format(key, val)
 69 |         if show_et:
 70 |             line += f'({time.time()-self.clock:.3f} secs)'
 71 |         return line
 72 | 
 73 | 
 74 | def get_log(fileroot):
 75 |     step = []
 76 |     loss = []
 77 |     train_time = []
 78 |     eval_time = []
 79 |     ctxll = []
 80 |     tarll = []
 81 |     file = open(fileroot, "r")
 82 |     lines = file.readlines()
 83 |     for line in lines:
 84 |         # training step
 85 |         if "step" in line:
 86 |             linesplit = line.split(" ")
 87 |             step += [int(linesplit[3])]
 88 |             _loss = linesplit[-3]
 89 |             loss += [100 if _loss=="nan" else float(_loss)]
 90 |             train_time += [float(linesplit[-2][1:])]
 91 |         # evaluation step
 92 |         elif "ctx_ll" in line:
 93 |             linesplit = line.split(" ")
 94 |             ctxll += [float(linesplit[-5])]
 95 |             tarll += [float(linesplit[-3])]
 96 |             eval_time += [float(linesplit[-2][1:])]
 97 |     
 98 |     return step, loss, None, ctxll, tarll
 99 | 
100 | 
101 | def plot_log(fileroot, x_begin=None, x_end=None):
102 |     plt.clf()  # clear current figure
103 | 
104 |     step, loss, stepll, ctxll, tarll = get_log(fileroot)
105 |     step = list(map(int, step))
106 |     loss = list(map(float, loss))
107 |     ctxll = list(map(float, ctxll))
108 |     tarll = list(map(float, tarll))
109 |     stepll = list(map(int, stepll)) if stepll else None
110 |     
111 |     if x_begin is None:
112 |         x_begin = 0
113 |     if x_end is None:
114 |         x_end = step[-1]
115 |     
116 |     print_freq = 1 if len(step)==1 else step[1] - step[0]
117 |     
118 |     plt.plot(step[x_begin//print_freq:x_end//print_freq],
119 |              loss[x_begin//print_freq:x_end//print_freq])
120 |     plt.xlabel('step')
121 |     plt.ylabel('loss')
122 | 
123 |     dir, file = split(fileroot)
124 |     filename = splitext(file)[0]
125 |     plt.savefig(dir + "/" + filename + f"-{x_begin}-{x_end}.png")
126 | 
127 | 
128 | def plot_freq_cov():
129 |     with open(osp.join("model_paths.yaml")) as f:
130 |         model_paths = yaml.safe_load(f)
131 |     root = model_paths["root"]
132 |     model_paths = model_paths["models"]
133 |     x_base = torch.linspace(-2, 2, 500).unsqueeze(-1)
134 | 
135 |     for kernel in ["rbf", "periodic", "matern"]:
136 |         plt.clf()
137 |         for model, path in model_paths.items():
138 |             freq_cov = torch.load(osp.join(root, model, path, f"freq_cov_{kernel}.pt"))
139 |             # plt.scatter(x_base.cpu(), freq_cov.cpu(), s=3, alpha=1.0, label=model+f"-{freq_cov.mean():0.2f}")
140 |             plt.scatter(x_base.cpu(), freq_cov.cpu(), s=3, alpha=1.0, label=f"{model}-{freq_cov.mean():0.2f}")
141 |             plt.ylim([0,1])
142 | 
143 |         # models = "_".join(model_paths.keys())
144 |         models = "all"
145 |         plt.legend()
146 |         plt.title(f"Frequentist Coverage - {kernel}")
147 |         if not osp.exists(osp.join(root, "plot", "freq_cov", models)):
148 |             os.makedirs(osp.join(root, "plot", "freq_cov", models))
149 |         plt.savefig(osp.join(root, "plot", "freq_cov", models, f"freq_cov_{kernel}.jpg"))
150 | 
151 | if __name__ == "__main__":
152 |     plot_freq_cov()
153 | 


--------------------------------------------------------------------------------
/regression/data/gp.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch.distributions import MultivariateNormal, StudentT
  3 | from attrdict import AttrDict
  4 | import math
  5 | 
  6 | 
  7 | __all__ = ["GPPriorSampler", 'GPSampler', 'RBFKernel', 'PeriodicKernel', 'Matern52Kernel']
  8 | 
  9 | 
 10 | class GPPriorSampler(object):
 11 |     """
 12 |     Bayesian Optimization에서 이용
 13 |     """
 14 |     def __init__(self, kernel, t_noise=None):
 15 |         self.kernel = kernel
 16 |         self.t_noise = t_noise
 17 | 
 18 |     # bx: 1 * num_points * 1
 19 |     def sample(self, x, device):
 20 |         # 1 * num_points * num_points
 21 |         cov = self.kernel(x)
 22 |         mean = torch.zeros(1, x.shape[1], device=device)
 23 | 
 24 |         y = MultivariateNormal(mean, cov).rsample().unsqueeze(-1)
 25 | 
 26 |         if self.t_noise is not None:
 27 |             y += self.t_noise * StudentT(2.1).rsample(y.shape).to(device)
 28 | 
 29 |         return y
 30 | 
 31 | 
 32 | class GPSampler(object):
 33 |     def __init__(self, kernel, t_noise=None, seed=None):
 34 |         self.kernel = kernel
 35 |         self.t_noise = t_noise
 36 |         if seed is not None:
 37 |             torch.manual_seed(seed)
 38 |             torch.cuda.manual_seed(seed)
 39 |         self.seed = seed
 40 | 
 41 |     def sample(self,
 42 |             batch_size=16,
 43 |             num_ctx=None,
 44 |             num_tar=None,
 45 |             max_num_points=50,
 46 |             x_range=(-2, 2),
 47 |             device='cpu'):
 48 | 
 49 |         batch = AttrDict()
 50 |         num_ctx = num_ctx or torch.randint(low=3, high=max_num_points-3, size=[1]).item()  # Nc
 51 |         num_tar = num_tar or torch.randint(low=3, high=max_num_points-num_ctx, size=[1]).item()  # Nt
 52 | 
 53 |         num_points = num_ctx + num_tar  # N = Nc + Nt
 54 |         batch.x = x_range[0] + (x_range[1] - x_range[0]) \
 55 |                 * torch.rand([batch_size, num_points, 1], device=device)  # [B,N,Dx=1]
 56 |         batch.xc = batch.x[:,:num_ctx]  # [B,Nc,1]
 57 |         batch.xt = batch.x[:,num_ctx:]  # [B,Nt,1]
 58 | 
 59 |         # batch_size * num_points * num_points
 60 |         cov = self.kernel(batch.x)  # [B,N,N]
 61 |         mean = torch.zeros(batch_size, num_points, device=device)  # [B,N]
 62 |         batch.y = MultivariateNormal(mean, cov).rsample().unsqueeze(-1)  # [B,N,Dy=1]
 63 |         batch.yc = batch.y[:,:num_ctx]  # [B,Nc,1]
 64 |         batch.yt = batch.y[:,num_ctx:]  # [B,Nt,1]
 65 | 
 66 |         if self.t_noise is not None:
 67 |             if self.t_noise == -1:
 68 |                 t_noise = 0.15 * torch.rand(batch.y.shape).to(device)  # [B,N,1]
 69 |             else:
 70 |                 t_noise = self.t_noise
 71 |             batch.y += t_noise * StudentT(2.1).rsample(batch.y.shape).to(device)
 72 |         return batch
 73 |         # {"x": [B,N,1], "xc": [B,Nc,1], "xt": [B,Nt,1],
 74 |         #  "y": [B,N,1], "yc": [B,Nt,1], "yt": [B,Nt,1]}
 75 | 
 76 | class RBFKernel(object):
 77 |     def __init__(self, sigma_eps=2e-2, max_length=0.6, max_scale=1.0):
 78 |         self.sigma_eps = sigma_eps
 79 |         self.max_length = max_length
 80 |         self.max_scale = max_scale
 81 | 
 82 |     # x: batch_size * num_points * dim  [B,N,Dx=1]
 83 |     def __call__(self, x):
 84 |         length = 0.1 + (self.max_length-0.1) \
 85 |                 * torch.rand([x.shape[0], 1, 1, 1], device=x.device)
 86 |         scale = 0.1 + (self.max_scale-0.1) \
 87 |                 * torch.rand([x.shape[0], 1, 1], device=x.device)
 88 | 
 89 |         # batch_size * num_points * num_points * dim  [B,N,N,1]
 90 |         dist = (x.unsqueeze(-2) - x.unsqueeze(-3))/length
 91 | 
 92 |         # batch_size * num_points * num_points  [B,N,N]
 93 |         cov = scale.pow(2) * torch.exp(-0.5 * dist.pow(2).sum(-1)) \
 94 |                 + self.sigma_eps**2 * torch.eye(x.shape[-2]).to(x.device)
 95 | 
 96 |         return cov  # [B,N,N]
 97 | 
 98 | class Matern52Kernel(object):
 99 |     def __init__(self, sigma_eps=2e-2, max_length=0.6, max_scale=1.0):
100 |         self.sigma_eps = sigma_eps
101 |         self.max_length = max_length
102 |         self.max_scale = max_scale
103 | 
104 |     # x: batch_size * num_points * dim
105 |     def __call__(self, x):
106 |         length = 0.1 + (self.max_length-0.1) \
107 |                 * torch.rand([x.shape[0], 1, 1, 1], device=x.device)
108 |         scale = 0.1 + (self.max_scale-0.1) \
109 |                 * torch.rand([x.shape[0], 1, 1], device=x.device)
110 | 
111 |         # batch_size * num_points * num_points
112 |         dist = torch.norm((x.unsqueeze(-2) - x.unsqueeze(-3))/length, dim=-1)
113 | 
114 |         cov = scale.pow(2)*(1 + math.sqrt(5.0)*dist + 5.0*dist.pow(2)/3.0) \
115 |                 * torch.exp(-math.sqrt(5.0) * dist) \
116 |                 + self.sigma_eps**2 * torch.eye(x.shape[-2]).to(x.device)
117 | 
118 |         return cov
119 | 
120 | class PeriodicKernel(object):
121 |     def __init__(self, sigma_eps=2e-2, max_length=0.6, max_scale=1.0):
122 |         #self.p = p
123 |         self.sigma_eps = sigma_eps
124 |         self.max_length = max_length
125 |         self.max_scale = max_scale
126 | 
127 |     # x: batch_size * num_points * dim
128 |     def __call__(self, x):
129 |         p = 0.1 + 0.4*torch.rand([x.shape[0], 1, 1], device=x.device)
130 |         length = 0.1 + (self.max_length-0.1) \
131 |                 * torch.rand([x.shape[0], 1, 1], device=x.device)
132 |         scale = 0.1 + (self.max_scale-0.1) \
133 |                 * torch.rand([x.shape[0], 1, 1], device=x.device)
134 | 
135 |         dist = x.unsqueeze(-2) - x.unsqueeze(-3)
136 |         cov = scale.pow(2) * torch.exp(\
137 |                 - 2*(torch.sin(math.pi*dist.abs().sum(-1)/p)/length).pow(2)) \
138 |                 + self.sigma_eps**2 * torch.eye(x.shape[-2]).to(x.device)
139 | 
140 |         return cov


--------------------------------------------------------------------------------
/bayesian_optimization/data/gp.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch.distributions import MultivariateNormal, StudentT
  3 | from attrdict import AttrDict
  4 | import math
  5 | 
  6 | 
  7 | __all__ = ["GPPriorSampler", 'GPSampler', 'RBFKernel', 'PeriodicKernel', 'Matern52Kernel']
  8 | 
  9 | 
 10 | class GPPriorSampler(object):
 11 |     """
 12 |     Bayesian Optimization에서 이용
 13 |     """
 14 |     def __init__(self, kernel, t_noise=None):
 15 |         self.kernel = kernel
 16 |         self.t_noise = t_noise
 17 | 
 18 |     # bx: 1 * num_points * 1
 19 |     def sample(self, x, device):
 20 |         # 1 * num_points * num_points
 21 |         cov = self.kernel(x)
 22 |         mean = torch.zeros(1, x.shape[1], device=device)
 23 | 
 24 |         y = MultivariateNormal(mean, cov).rsample().unsqueeze(-1)
 25 | 
 26 |         if self.t_noise is not None:
 27 |             y += self.t_noise * StudentT(2.1).rsample(y.shape).to(device)
 28 | 
 29 |         return y
 30 | 
 31 | 
 32 | class GPSampler(object):
 33 |     def __init__(self, kernel, t_noise=None, seed=None):
 34 |         self.kernel = kernel
 35 |         self.t_noise = t_noise
 36 |         if seed is not None:
 37 |             torch.manual_seed(seed)
 38 |             torch.cuda.manual_seed(seed)
 39 |         self.seed = seed
 40 | 
 41 |     def sample(self,
 42 |             batch_size=16,
 43 |             num_ctx=None,
 44 |             num_tar=None,
 45 |             max_num_points=50,
 46 |             x_range=(-2, 2),
 47 |             device='cpu'):
 48 | 
 49 |         batch = AttrDict()
 50 |         num_ctx = num_ctx or torch.randint(low=3, high=max_num_points-3, size=[1]).item()  # Nc
 51 |         num_tar = num_tar or torch.randint(low=3, high=max_num_points-num_ctx, size=[1]).item()  # Nt
 52 | 
 53 |         num_points = num_ctx + num_tar  # N = Nc + Nt
 54 |         batch.x = x_range[0] + (x_range[1] - x_range[0]) \
 55 |                 * torch.rand([batch_size, num_points, 1], device=device)  # [B,N,Dx=1]
 56 |         batch.xc = batch.x[:,:num_ctx]  # [B,Nc,1]
 57 |         batch.xt = batch.x[:,num_ctx:]  # [B,Nt,1]
 58 | 
 59 |         # batch_size * num_points * num_points
 60 |         cov = self.kernel(batch.x)  # [B,N,N]
 61 |         mean = torch.zeros(batch_size, num_points, device=device)  # [B,N]
 62 |         batch.y = MultivariateNormal(mean, cov).rsample().unsqueeze(-1)  # [B,N,Dy=1]
 63 |         batch.yc = batch.y[:,:num_ctx]  # [B,Nc,1]
 64 |         batch.yt = batch.y[:,num_ctx:]  # [B,Nt,1]
 65 | 
 66 |         if self.t_noise is not None:
 67 |             if self.t_noise == -1:
 68 |                 t_noise = 0.15 * torch.rand(batch.y.shape).to(device)  # [B,N,1]
 69 |             else:
 70 |                 t_noise = self.t_noise
 71 |             batch.y += t_noise * StudentT(2.1).rsample(batch.y.shape).to(device)
 72 |         return batch
 73 |         # {"x": [B,N,1], "xc": [B,Nc,1], "xt": [B,Nt,1],
 74 |         #  "y": [B,N,1], "yc": [B,Nt,1], "yt": [B,Nt,1]}
 75 | 
 76 | 
 77 | class RBFKernel(object):
 78 |     def __init__(self, sigma_eps=2e-2, max_length=0.6, max_scale=1.0):
 79 |         self.sigma_eps = sigma_eps
 80 |         self.max_length = max_length
 81 |         self.max_scale = max_scale
 82 | 
 83 |     # x: batch_size * num_points * dim  [B,N,Dx=1]
 84 |     def __call__(self, x):
 85 |         length = 0.1 + (self.max_length-0.1) \
 86 |                 * torch.rand([x.shape[0], 1, 1, 1], device=x.device)
 87 |         scale = 0.1 + (self.max_scale-0.1) \
 88 |                 * torch.rand([x.shape[0], 1, 1], device=x.device)
 89 | 
 90 |         # batch_size * num_points * num_points * dim  [B,N,N,1]
 91 |         dist = (x.unsqueeze(-2) - x.unsqueeze(-3))/length
 92 | 
 93 |         # batch_size * num_points * num_points  [B,N,N]
 94 |         cov = scale.pow(2) * torch.exp(-0.5 * dist.pow(2).sum(-1)) \
 95 |                 + self.sigma_eps**2 * torch.eye(x.shape[-2]).to(x.device)
 96 | 
 97 |         return cov  # [B,N,N]
 98 | 
 99 | 
100 | class Matern52Kernel(object):
101 |     def __init__(self, sigma_eps=2e-2, max_length=0.6, max_scale=1.0):
102 |         self.sigma_eps = sigma_eps
103 |         self.max_length = max_length
104 |         self.max_scale = max_scale
105 | 
106 |     # x: batch_size * num_points * dim
107 |     def __call__(self, x):
108 |         length = 0.1 + (self.max_length-0.1) \
109 |                 * torch.rand([x.shape[0], 1, 1, 1], device=x.device)
110 |         scale = 0.1 + (self.max_scale-0.1) \
111 |                 * torch.rand([x.shape[0], 1, 1], device=x.device)
112 | 
113 |         # batch_size * num_points * num_points
114 |         dist = torch.norm((x.unsqueeze(-2) - x.unsqueeze(-3))/length, dim=-1)
115 | 
116 |         cov = scale.pow(2)*(1 + math.sqrt(5.0)*dist + 5.0*dist.pow(2)/3.0) \
117 |                 * torch.exp(-math.sqrt(5.0) * dist) \
118 |                 + self.sigma_eps**2 * torch.eye(x.shape[-2]).to(x.device)
119 | 
120 |         return cov
121 | 
122 | 
123 | class PeriodicKernel(object):
124 |     def __init__(self, sigma_eps=2e-2, max_length=0.6, max_scale=1.0):
125 |         #self.p = p
126 |         self.sigma_eps = sigma_eps
127 |         self.max_length = max_length
128 |         self.max_scale = max_scale
129 | 
130 |     # x: batch_size * num_points * dim
131 |     def __call__(self, x):
132 |         p = 0.1 + 0.4*torch.rand([x.shape[0], 1, 1], device=x.device)
133 |         length = 0.1 + (self.max_length-0.1) \
134 |                 * torch.rand([x.shape[0], 1, 1], device=x.device)
135 |         scale = 0.1 + (self.max_scale-0.1) \
136 |                 * torch.rand([x.shape[0], 1, 1], device=x.device)
137 | 
138 |         dist = x.unsqueeze(-2) - x.unsqueeze(-3)
139 |         cov = scale.pow(2) * torch.exp(
140 |             - 2*(torch.sin(math.pi*dist.abs().sum(-1)/p)/length).pow(2)) \
141 |                 + self.sigma_eps**2 * torch.eye(x.shape[-2]).to(x.device)
142 | 
143 |         return cov
144 | 


--------------------------------------------------------------------------------