├── .gitignore ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── NOTICE ├── README.md ├── data └── download_data.py ├── examples └── uci │ ├── README_UCI.md │ ├── example_autompg.ipynb │ ├── outputs │ ├── autoMPG │ │ ├── cumulative_sobol_0.pdf │ │ ├── cumulative_sobol_1.pdf │ │ ├── cumulative_sobol_2.pdf │ │ ├── cumulative_sobol_3.pdf │ │ ├── cumulative_sobol_4.pdf │ │ ├── decomposition │ │ │ ├── acceleration (R=0.037).pdf │ │ │ ├── displacement (R=0.232).pdf │ │ │ ├── horsepower (R=0.137).pdf │ │ │ ├── weight (R=0.220).pdf │ │ │ └── year (R=0.348).pdf │ │ ├── out_0.npz │ │ ├── out_1.npz │ │ ├── out_2.npz │ │ ├── out_3.npz │ │ └── out_4.npz │ └── breast │ │ ├── cumulative_sobol_0.pdf │ │ ├── cumulative_sobol_1.pdf │ │ ├── cumulative_sobol_2.pdf │ │ ├── cumulative_sobol_3.pdf │ │ ├── cumulative_sobol_4.pdf │ │ ├── out_0.npz │ │ ├── out_1.npz │ │ ├── out_2.npz │ │ ├── out_3.npz │ │ └── out_4.npz │ ├── uci_classification_train.py │ ├── uci_plotting.py │ └── uci_regression_train.py ├── oak ├── __init__.py ├── input_measures.py ├── model_utils.py ├── normalising_flow.py ├── oak_kernel.py ├── ortho_binary_kernel.py ├── ortho_categorical_kernel.py ├── ortho_rbf_kernel.py ├── plotting_utils.py └── utils.py ├── requirements.txt ├── setup.py └── tests ├── __init__.py ├── conftest.py ├── test_categorical_kernel.py ├── test_kernel_properties.py ├── test_normalising_flow.py ├── test_oak_kernel.py ├── test_oak_model.py ├── test_optimisation.py ├── test_orthogonality.py ├── test_sobol.py ├── test_sobol_oak_kernel.py └── test_utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | *# 3 | *.swp 4 | 5 | *.DS_Store 6 | 7 | __pycache__/ 8 | *.py[cod] 9 | *$py.class 10 | *.egg-info/ 11 | 12 | /.coverage 13 | /.coverage.* 14 | /.cache 15 | /.pytest_cache 16 | /.mypy_cache 17 | dist 18 | /doc/_apidoc/ 19 | build 20 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | ## Code of Conduct 2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 4 | opensource-codeofconduct@amazon.com with any additional questions or comments. 5 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing Guidelines 2 | 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional 4 | documentation, we greatly value feedback and contributions from our community. 5 | 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary 7 | information to effectively respond to your bug report or contribution. 8 | 9 | 10 | ## Reporting Bugs/Feature Requests 11 | 12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features. 13 | 14 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already 15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful: 16 | 17 | * A reproducible test case or series of steps 18 | * The version of our code being used 19 | * Any modifications you've made relevant to the bug 20 | * Anything unusual about your environment or deployment 21 | 22 | 23 | ## Contributing via Pull Requests 24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that: 25 | 26 | 1. You are working against the latest source on the *main* branch. 27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already. 28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted. 29 | 30 | To send us a pull request, please: 31 | 32 | 1. Fork the repository. 33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change. 34 | 3. Ensure local tests pass. 35 | 4. Commit to your fork using clear commit messages. 36 | 5. Send us a pull request, answering any default questions in the pull request interface. 37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation. 38 | 39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and 40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/). 41 | 42 | 43 | ## Finding contributions to work on 44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start. 45 | 46 | 47 | ## Code of Conduct 48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 50 | opensource-codeofconduct@amazon.com with any additional questions or comments. 51 | 52 | 53 | ## Security issue notifications 54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue. 55 | 56 | 57 | ## Licensing 58 | 59 | See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution. 60 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | Orthogonal Additive Gaussian Processes 2 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Orthogonal Additive Gaussian Processes 2 | 3 | 4 | This is the code repo for the paper Additive Gaussian Processes Revisited (https://arxiv.org/pdf/2206.09861.pdf) 5 | 6 | 7 | ## Getting Started 8 | ### Installation 9 | Clone the repository (https://github.com/amzn/orthogonal-additive-gaussian-processes) and install the package with `pip install -e .`. The package is tested with Python 3.7. 10 | The main dependency is `gpflow` and we relied on `gpflow == 2.2.1`, where in particular implements the posteriors module. 11 | 12 | ### Tests 13 | Run `pytest` to run the tests in the `tests` folder. 14 | 15 | ### Key Components 16 | 17 | - Kernels: 18 | - `ortho_binary_kernel.py` implements the constrained binary kernel 19 | 20 | - `ortho_categorical_kernel.py` implements the constrained coregional kernel for categorical variables 21 | 22 | - `ortho_rbf_kernel.py` implements the constrained squared exponential (SE) kernel for continuous variables 23 | 24 | - `oak_kernel.py` multiples and adds kernels over feature dimensions using Newton Girard method 25 | 26 | - Measures: 27 | - `input_measures.py` implements Uniform measure, (mixture of) Gaussian measure, empirical measure for input distributions 28 | 29 | 30 | - Normalising Flow: 31 | - `normalising_flow.py` implements normalising flows to transform input densities into Gaussian random variables 32 | 33 | 34 | - Model API: 35 | - `model_utils.py` is the model API for model inference, prediction and plotting, and Sobol calculations 36 | 37 | - Utilities: 38 | - `utils.py` contains utility functions 39 | - `plotting_utils.py` contains utility functions for plotting 40 | 41 | 42 | ## Usage 43 | 44 | **Data** 45 | 46 | UCI benchmark data are saved in the `./data` directory. They are obtained from https://github.com/duvenaud/additive-gps/blob/master/data/. Run `./data/download_data.py` to download all the datasets. 47 | 48 | **Examples** 49 | 50 | Example tutorials and scripts are in the `./example` directory. 51 | 52 | *UCI:* 53 | 54 | * Contains training scripts for UCI regression and classification 55 | benchmark datasets. See `./examples/uci/README_UCI.md` for details. 56 | 57 | 58 | ## Security 59 | 60 | See [CONTRIBUTING](CONTRIBUTING.md#security-issue-notifications) for more information. 61 | 62 | ## License 63 | 64 | This project is licensed under the Apache-2.0 License. 65 | 66 | 67 | -------------------------------------------------------------------------------- /data/download_data.py: -------------------------------------------------------------------------------- 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | import os 5 | import urllib.request 6 | 7 | # download UCI datasets from https://github.com/duvenaud/additive-gps/ and save to ./data directory 8 | 9 | 10 | data_path_prefix = os.path.abspath(os.path.join(os.path.dirname(__file__), "./")) + "/" 11 | 12 | regression_filenames = [ 13 | "autompg.mat", 14 | "housing.mat", 15 | "r_concrete_1030.mat", 16 | "pumadyn8nh.mat", 17 | ] 18 | 19 | classification_filenames = [ 20 | "breast.mat", 21 | "pima.mat", 22 | "sonar.mat", 23 | "ionosphere.mat", 24 | "r_liver.mat", 25 | "r_heart.mat", 26 | ] 27 | 28 | 29 | for filename in regression_filenames + classification_filenames: 30 | if not os.path.isfile(filename): 31 | if filename == "autompg.mat": 32 | url = f'https://github.com/duvenaud/additive-gps/raw/master/data/regression/autompg/{filename}' 33 | else: 34 | if filename in regression_filenames: 35 | url = f'https://github.com/duvenaud/additive-gps/raw/master/data/regression/{filename}' 36 | else: 37 | url = f'https://github.com/duvenaud/additive-gps/raw/master/data/classification/{filename}' 38 | print(f"Downloading {filename}") 39 | urllib.request.urlretrieve(url, data_path_prefix + filename) 40 | 41 | -------------------------------------------------------------------------------- /examples/uci/README_UCI.md: -------------------------------------------------------------------------------- 1 | 2 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | 4 | SPDX-License-Identifier: Apache-2.0 5 | 6 | # UCI experiments 7 | 8 | ## Running scripts 9 | * To run the experiments on regression dataset, run 10 | ``` 11 | python examples/uci/uci_regression_train.py --dataset_name=NAME 12 | ``` 13 | * on classification dataset, run 14 | ``` 15 | python examples/uci/uci_classification_train.py --dataset_name=NAME 16 | ``` 17 | where ```NAME``` is the name of the dataset (regression data: ```autoMPG, Housing, concrete, pumadyn``` and classification data: ```breast, pima, sonar, ionosphere, liver, heart```. The two scripts save the model and metrics in the ```example/uci/outputs/``` folder. 18 | 19 | To visualise the functional decomposition, run 20 | ``` 21 | python examples/uci/uci_plotting.py --dataset_name=NAME 22 | ``` 23 | This will save the plots in the ```example/uci/outputs/NAME/decomposition``` folder. 24 | For illustration, we have run the scripts for the autoMPG and breast datasets, with results saved in the above output folder. 25 | 26 | 27 | ## Example notebook 28 | We provide one example notebook on the AutoMPG UCI regression problem, in ```example/uci/example_autompg.ipynb```. 29 | 30 | 31 | ```python 32 | 33 | ``` 34 | -------------------------------------------------------------------------------- /examples/uci/outputs/autoMPG/cumulative_sobol_0.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amzn/orthogonal-additive-gaussian-processes/1aae10e792f1ff7099f92d716bca377c9be830f3/examples/uci/outputs/autoMPG/cumulative_sobol_0.pdf -------------------------------------------------------------------------------- /examples/uci/outputs/autoMPG/cumulative_sobol_1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amzn/orthogonal-additive-gaussian-processes/1aae10e792f1ff7099f92d716bca377c9be830f3/examples/uci/outputs/autoMPG/cumulative_sobol_1.pdf -------------------------------------------------------------------------------- /examples/uci/outputs/autoMPG/cumulative_sobol_2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amzn/orthogonal-additive-gaussian-processes/1aae10e792f1ff7099f92d716bca377c9be830f3/examples/uci/outputs/autoMPG/cumulative_sobol_2.pdf -------------------------------------------------------------------------------- /examples/uci/outputs/autoMPG/cumulative_sobol_3.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amzn/orthogonal-additive-gaussian-processes/1aae10e792f1ff7099f92d716bca377c9be830f3/examples/uci/outputs/autoMPG/cumulative_sobol_3.pdf -------------------------------------------------------------------------------- /examples/uci/outputs/autoMPG/cumulative_sobol_4.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amzn/orthogonal-additive-gaussian-processes/1aae10e792f1ff7099f92d716bca377c9be830f3/examples/uci/outputs/autoMPG/cumulative_sobol_4.pdf -------------------------------------------------------------------------------- /examples/uci/outputs/autoMPG/decomposition/acceleration (R=0.037).pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amzn/orthogonal-additive-gaussian-processes/1aae10e792f1ff7099f92d716bca377c9be830f3/examples/uci/outputs/autoMPG/decomposition/acceleration (R=0.037).pdf -------------------------------------------------------------------------------- /examples/uci/outputs/autoMPG/decomposition/displacement (R=0.232).pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amzn/orthogonal-additive-gaussian-processes/1aae10e792f1ff7099f92d716bca377c9be830f3/examples/uci/outputs/autoMPG/decomposition/displacement (R=0.232).pdf -------------------------------------------------------------------------------- /examples/uci/outputs/autoMPG/decomposition/horsepower (R=0.137).pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amzn/orthogonal-additive-gaussian-processes/1aae10e792f1ff7099f92d716bca377c9be830f3/examples/uci/outputs/autoMPG/decomposition/horsepower (R=0.137).pdf -------------------------------------------------------------------------------- /examples/uci/outputs/autoMPG/decomposition/weight (R=0.220).pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amzn/orthogonal-additive-gaussian-processes/1aae10e792f1ff7099f92d716bca377c9be830f3/examples/uci/outputs/autoMPG/decomposition/weight (R=0.220).pdf -------------------------------------------------------------------------------- /examples/uci/outputs/autoMPG/decomposition/year (R=0.348).pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amzn/orthogonal-additive-gaussian-processes/1aae10e792f1ff7099f92d716bca377c9be830f3/examples/uci/outputs/autoMPG/decomposition/year (R=0.348).pdf -------------------------------------------------------------------------------- /examples/uci/outputs/autoMPG/out_0.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amzn/orthogonal-additive-gaussian-processes/1aae10e792f1ff7099f92d716bca377c9be830f3/examples/uci/outputs/autoMPG/out_0.npz -------------------------------------------------------------------------------- /examples/uci/outputs/autoMPG/out_1.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amzn/orthogonal-additive-gaussian-processes/1aae10e792f1ff7099f92d716bca377c9be830f3/examples/uci/outputs/autoMPG/out_1.npz -------------------------------------------------------------------------------- /examples/uci/outputs/autoMPG/out_2.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amzn/orthogonal-additive-gaussian-processes/1aae10e792f1ff7099f92d716bca377c9be830f3/examples/uci/outputs/autoMPG/out_2.npz -------------------------------------------------------------------------------- /examples/uci/outputs/autoMPG/out_3.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amzn/orthogonal-additive-gaussian-processes/1aae10e792f1ff7099f92d716bca377c9be830f3/examples/uci/outputs/autoMPG/out_3.npz -------------------------------------------------------------------------------- /examples/uci/outputs/autoMPG/out_4.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amzn/orthogonal-additive-gaussian-processes/1aae10e792f1ff7099f92d716bca377c9be830f3/examples/uci/outputs/autoMPG/out_4.npz -------------------------------------------------------------------------------- /examples/uci/outputs/breast/cumulative_sobol_0.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amzn/orthogonal-additive-gaussian-processes/1aae10e792f1ff7099f92d716bca377c9be830f3/examples/uci/outputs/breast/cumulative_sobol_0.pdf -------------------------------------------------------------------------------- /examples/uci/outputs/breast/cumulative_sobol_1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amzn/orthogonal-additive-gaussian-processes/1aae10e792f1ff7099f92d716bca377c9be830f3/examples/uci/outputs/breast/cumulative_sobol_1.pdf -------------------------------------------------------------------------------- /examples/uci/outputs/breast/cumulative_sobol_2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amzn/orthogonal-additive-gaussian-processes/1aae10e792f1ff7099f92d716bca377c9be830f3/examples/uci/outputs/breast/cumulative_sobol_2.pdf -------------------------------------------------------------------------------- /examples/uci/outputs/breast/cumulative_sobol_3.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amzn/orthogonal-additive-gaussian-processes/1aae10e792f1ff7099f92d716bca377c9be830f3/examples/uci/outputs/breast/cumulative_sobol_3.pdf -------------------------------------------------------------------------------- /examples/uci/outputs/breast/cumulative_sobol_4.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amzn/orthogonal-additive-gaussian-processes/1aae10e792f1ff7099f92d716bca377c9be830f3/examples/uci/outputs/breast/cumulative_sobol_4.pdf -------------------------------------------------------------------------------- /examples/uci/outputs/breast/out_0.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amzn/orthogonal-additive-gaussian-processes/1aae10e792f1ff7099f92d716bca377c9be830f3/examples/uci/outputs/breast/out_0.npz -------------------------------------------------------------------------------- /examples/uci/outputs/breast/out_1.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amzn/orthogonal-additive-gaussian-processes/1aae10e792f1ff7099f92d716bca377c9be830f3/examples/uci/outputs/breast/out_1.npz -------------------------------------------------------------------------------- /examples/uci/outputs/breast/out_2.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amzn/orthogonal-additive-gaussian-processes/1aae10e792f1ff7099f92d716bca377c9be830f3/examples/uci/outputs/breast/out_2.npz -------------------------------------------------------------------------------- /examples/uci/outputs/breast/out_3.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amzn/orthogonal-additive-gaussian-processes/1aae10e792f1ff7099f92d716bca377c9be830f3/examples/uci/outputs/breast/out_3.npz -------------------------------------------------------------------------------- /examples/uci/outputs/breast/out_4.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amzn/orthogonal-additive-gaussian-processes/1aae10e792f1ff7099f92d716bca377c9be830f3/examples/uci/outputs/breast/out_4.npz -------------------------------------------------------------------------------- /examples/uci/uci_classification_train.py: -------------------------------------------------------------------------------- 1 | # + 2 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | import argparse 6 | import time 7 | import gpflow 8 | import numpy as np 9 | import tensorflow as tf 10 | import os 11 | import matplotlib.pyplot as plt 12 | from gpflow import set_trainable 13 | from oak.model_utils import oak_model, save_model 14 | from oak.utils import get_model_sufficient_statistics, get_prediction_component 15 | from scipy import io 16 | from scipy.cluster.vq import kmeans 17 | from sklearn.model_selection import KFold 18 | from pathlib import Path 19 | from sklearn.metrics import roc_auc_score 20 | import matplotlib 21 | 22 | 23 | matplotlib.rcParams.update({"font.size": 25}) 24 | 25 | data_path_prefix = os.path.abspath( 26 | os.path.join(os.path.dirname(__file__), "../../data") 27 | ) + '/' 28 | 29 | filenames = [ 30 | data_path_prefix + "breast.mat", 31 | data_path_prefix + "pima.mat", 32 | data_path_prefix + "sonar.mat", 33 | data_path_prefix + "ionosphere.mat", 34 | data_path_prefix + "r_liver.mat", 35 | data_path_prefix + "r_heart.mat", 36 | ] 37 | dataset_names = ["breast", "pima", "sonar", "ionosphere", "liver", "heart"] 38 | 39 | np.set_printoptions(formatter={"float": lambda x: "{0:0.5f}".format(x)}) 40 | np.random.seed(4) 41 | tf.random.set_seed(4) 42 | 43 | def inv_logit(x): 44 | jitter = 1e-3 45 | return tf.math.sigmoid(x) * (1 - 2 * jitter) + jitter 46 | 47 | 48 | def main(): 49 | """ 50 | :param dataset_name: name of the dataset, should be one of the above dataset_names 51 | :param k: number of train-test fold, default to 5. 52 | :return: fit OAK model on the dataset, saves the model, the model predictive performance, 53 | and the plot on cumulative Sobol indices. 54 | """ 55 | args_parser = argparse.ArgumentParser() 56 | args_parser.add_argument( 57 | "--dataset_name", default="breast", type=str, help="dataset name" 58 | ) 59 | args_parser.add_argument( 60 | "--k", type=int, default=5, help="k-fold train-test splits" 61 | ) 62 | 63 | args, unknown = args_parser.parse_known_args() 64 | 65 | dataset_name, k = ( 66 | args.dataset_name, 67 | args.k, 68 | ) 69 | 70 | # save results to outputs folder 71 | output_prefix = os.path.abspath( 72 | os.path.join(os.path.dirname(__file__), f"./outputs/{dataset_name}/") 73 | ) 74 | if not os.path.exists(output_prefix): 75 | os.mkdir(output_prefix) 76 | 77 | np.random.seed(4) 78 | tf.random.set_seed(4) 79 | 80 | filename = filenames[dataset_names.index(dataset_name)] 81 | 82 | d = io.loadmat(filename) 83 | X, y = d["X"], d["y"] 84 | y = (y + 1) / 2 85 | idx = np.random.permutation(range(X.shape[0])) 86 | X = X[idx, :] 87 | y = y[idx] 88 | 89 | kf = KFold(n_splits=k) 90 | fold = 0 91 | for train_index, test_index in kf.split(X): 92 | X_train, X_test = X[train_index], X[test_index] 93 | y_train, y_test = y[train_index], y[test_index] 94 | 95 | # limit maximum number of interactions due to computation, sonar has 60 features therefore limiting it to 2 96 | # Sonar has ~60 features, truncating the maximum order of interaction to 2 97 | depth = 4 if dataset_name != "sonar" else 2 98 | oak = oak_model(max_interaction_depth=depth, num_inducing=200) 99 | oak.fit(X_train, y_train, optimise=False) 100 | data = (oak.m.data[0], y_train) 101 | t_start = time.time() 102 | 103 | Z = ( 104 | kmeans(oak.m.data[0].numpy(), 200)[0] 105 | if X_train.shape[0] > 200 106 | else oak.m.data[0].numpy() 107 | ) 108 | likelihood = gpflow.likelihoods.Bernoulli(invlink=inv_logit) 109 | 110 | oak.m = gpflow.models.SVGP( 111 | kernel=oak.m.kernel, 112 | likelihood=likelihood, 113 | inducing_variable=Z, 114 | whiten=True, 115 | q_diag=True, 116 | ) 117 | 118 | set_trainable(oak.m.inducing_variable, False) 119 | opt = gpflow.optimizers.Scipy() 120 | opt.minimize( 121 | oak.m.training_loss_closure(data), 122 | oak.m.trainable_variables, 123 | method="BFGS", 124 | ) 125 | 126 | # test performance 127 | x_max, x_min = X_train.max(0), X_train.min(0) 128 | mu, var = oak.m.predict_f(oak._transform_x(np.clip(X_test, x_min, x_max))) 129 | prob = inv_logit(mu) 130 | classification_accuracy = np.sum( 131 | np.abs((prob > 0.5).numpy().astype(int)[:, 0] - y_test[:, 0]) 132 | ) / len(y_test[:, 0]) 133 | # clipping X_test to be in the range of training data to avoid nan error in calculating nll due to normalsing flow 134 | X_test_scaled = oak._transform_x(np.clip(X_test, x_min, x_max)) 135 | nll = -oak.m.predict_log_density((X_test_scaled, y_test)).numpy().mean() 136 | 137 | print(f"fold {fold}, training dataset has size {X_train.shape}") 138 | print( 139 | f"oak test percentage classification error = {np.round(classification_accuracy, 4)}, " 140 | f"nll = {np.round(nll,4)}" 141 | ) 142 | 143 | # calculate sobol 144 | oak.m.data = data 145 | Sobol = None 146 | try: 147 | oak.get_sobol() 148 | tuple_of_indices, normalised_sobols = ( 149 | oak.tuple_of_indices, 150 | oak.normalised_sobols, 151 | ) 152 | # aggregate sobol per order of interactions 153 | Sobol = np.zeros(len(tuple_of_indices[-1])) 154 | for i in range(len(tuple_of_indices)): 155 | Sobol[len(tuple_of_indices[i]) - 1] += normalised_sobols[i] 156 | except: 157 | # sobol calculation fails due to cholesky decomposition error 158 | print(f"Sobol calculation failed") 159 | pass 160 | print(f"sobol is {Sobol}") 161 | print(f"Computation took {time.time() - t_start:.1f} seconds\n") 162 | 163 | # cumulative Sobol as we add terms one by one ranked by their Sobol 164 | x_max, x_min = X_train.max(0), X_train.min(0) 165 | XT = oak._transform_x(np.clip(X_test, x_min, x_max)) 166 | oak.alpha = get_model_sufficient_statistics(oak.m, get_L=False) 167 | # get the predicted y for all the kernel components 168 | prediction_list = get_prediction_component( 169 | oak.m, 170 | oak.alpha, 171 | XT, 172 | ) 173 | # predicted y for the constant kernel 174 | constant_term = oak.alpha.numpy().sum() * oak.m.kernel.variances[0].numpy() 175 | print(f"constant_term = {constant_term}") 176 | y_pred_component = np.ones(y_test.shape[0]) * constant_term 177 | 178 | cumulative_sobol, auc_component = [], [] 179 | order = np.argsort(normalised_sobols)[::-1] 180 | for n in order: 181 | # add predictions of the terms one by one ranked by their Sobol index 182 | y_pred_component += prediction_list[n].numpy() 183 | prob = inv_logit(y_pred_component) 184 | auc_component.append(roc_auc_score(y_test, prob.numpy())) 185 | cumulative_sobol.append(normalised_sobols[n]) 186 | cumulative_sobol = np.cumsum(cumulative_sobol) 187 | 188 | # generate plots in Fig. 5 (\ref{fig:sobol_plots}) of paper 189 | plt.figure(figsize=(8, 4)) 190 | fig, ax1 = plt.subplots() 191 | 192 | ax2 = ax1.twinx() 193 | ax1.plot(np.arange(len(order)), auc_component, "r", linewidth=4) 194 | ax2.plot(np.arange(len(order)), cumulative_sobol, "-.g", linewidth=4) 195 | 196 | ax1.set_xlabel("Number of Terms Added") 197 | ax1.set_ylabel("AUC", color="r") 198 | ax2.set_ylabel("Cumulative Sobol", color="g") 199 | 200 | plt.title(dataset_name) 201 | plt.tight_layout() 202 | plt.savefig(output_prefix + "/cumulative_sobol_%d.pdf" % fold) 203 | 204 | # aggregate sobol per order of interactions 205 | sobol_order = np.zeros(len(tuple_of_indices[-1])) 206 | for i in range(len(tuple_of_indices)): 207 | sobol_order[len(tuple_of_indices[i]) - 1] += normalised_sobols[i] 208 | # save learned model 209 | save_model( 210 | oak.m, 211 | filename=Path(output_prefix + f"/model_oak_%d" % fold), 212 | ) 213 | # save model performance metrics 214 | np.savez( 215 | output_prefix + "/out_%d" % fold, 216 | normalised_sobols=normalised_sobols, 217 | classification_accuracy=classification_accuracy, 218 | nll=nll, 219 | sobol_order=sobol_order, 220 | ) 221 | fold += 1 222 | 223 | if __name__ == "__main__": 224 | main() 225 | -------------------------------------------------------------------------------- /examples/uci/uci_plotting.py: -------------------------------------------------------------------------------- 1 | # + 2 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | import argparse 6 | import gpflow 7 | import os 8 | import numpy as np 9 | import tensorflow as tf 10 | from gpflow import set_trainable 11 | from oak.model_utils import oak_model, load_model 12 | from scipy import io 13 | from pathlib import Path 14 | # - 15 | 16 | 17 | # this script use the saved model to plot top 5 important features (Sobol) 18 | # in the decomposition 19 | covariate_names = {} 20 | covariate_names["Housing"] = [ 21 | "crime", 22 | "zoned", 23 | "industrial", 24 | "river", 25 | "NOX", 26 | "rooms", 27 | "age", 28 | "empl. dist.", 29 | "highway acc.", 30 | "tax", 31 | "pupil ratio", 32 | "black pct", 33 | "low status pct", 34 | ] 35 | 36 | covariate_names["concrete"] = [ 37 | "Cement", 38 | "Blast Furnace Slag", 39 | "Fly Ash", 40 | "Water", 41 | "Superplasticizer", 42 | "Coarse Aggregate", 43 | "Fine Aggregate", 44 | "Age", 45 | ] 46 | # no covariate name found for pumadyn 47 | covariate_names["pumadyn"] = [f"input {i}" for i in range(8)] 48 | covariate_names["autoMPG"] = [ 49 | "cylinders", 50 | "displacement", 51 | "horsepower", 52 | "weight", 53 | "acceleration", 54 | "year", 55 | "origin", 56 | ] 57 | 58 | covariate_names["breast"] = [ 59 | "ClumpThickness", 60 | "CellSize", 61 | "CellShape", 62 | "MarginalAdhesion", 63 | "SingleEpithelialCellSize", 64 | "BareNuclei", 65 | "BlandChromatin", 66 | "NormalNucleoli", 67 | "Mitoses", 68 | ] 69 | covariate_names["pima"] = [ 70 | "Pregnancies", 71 | "Glucose", 72 | "BloodPressure", 73 | "SkinThickness", 74 | "Insulin", 75 | "BMI", 76 | "DiabetesPedigreeFunction", 77 | "Age", 78 | ] 79 | covariate_names["sonar"] = [f"input {i}" for i in range(60)] 80 | covariate_names["ionosphere"] = [f"input {i}" for i in range(32)] 81 | covariate_names["liver"] = ["mcv", "alkphos", "sgpt", "sgot", "gammagt", "drinks"] 82 | covariate_names["heart"] = [ 83 | "age", 84 | "sex", 85 | "cp", 86 | "trestbps", 87 | "chol", 88 | "fbs", 89 | "restecg", 90 | "thelach", 91 | "exang", 92 | "oldpeak", 93 | "slope", 94 | "ca", 95 | "thal", 96 | ] 97 | 98 | data_path_prefix = os.path.abspath( 99 | os.path.join(os.path.dirname(__file__), "../../data") 100 | ) + '/' 101 | 102 | filenames = [ 103 | data_path_prefix + "autompg.mat", 104 | data_path_prefix + "housing.mat", 105 | data_path_prefix + "r_concrete_1030.mat", 106 | data_path_prefix + "pumadyn8nh.mat", 107 | data_path_prefix + "breast.mat", 108 | data_path_prefix + "pima.mat", 109 | data_path_prefix + "sonar.mat", 110 | data_path_prefix + "ionosphere.mat", 111 | data_path_prefix + "r_liver.mat", 112 | data_path_prefix + "r_heart.mat", 113 | ] 114 | 115 | dataset_names = [ 116 | "autoMPG", 117 | "Housing", 118 | "concrete", 119 | "pumadyn", 120 | "breast", 121 | "pima", 122 | "sonar", 123 | "ionosphere", 124 | "liver", 125 | "heart", 126 | ] 127 | 128 | 129 | def inv_logit(x): 130 | jitter = 1e-3 131 | return tf.math.sigmoid(x) * (1 - 2 * jitter) + jitter 132 | 133 | 134 | def main(): 135 | """ 136 | :param dataset_name: name of the dataset, should be one of the above dataset_names 137 | :param fold: fold of the train-test splits to plot the model on, each fold has a model with different data 138 | :return: load model and plot the OAK decomposition 139 | """ 140 | args_parser = argparse.ArgumentParser() 141 | args_parser.add_argument( 142 | "--dataset_name", type=str, default="autoMPG", help="dataset name" 143 | ) 144 | args_parser.add_argument( 145 | "--fold", type=int, default=0, help="Train-test split fold" 146 | ) 147 | args, unknown = args_parser.parse_known_args() 148 | dataset_name, fold = args.dataset_name, args.fold 149 | 150 | filename = filenames[dataset_names.index(dataset_name)] 151 | 152 | print(f"dataset {dataset_name}\n") 153 | d = io.loadmat(filename) 154 | if dataset_name == "autoMPG": 155 | X, y = d["X"][:, 1:], d["X"][:, :1] 156 | else: 157 | X, y = d["X"], d["y"] 158 | if len(np.unique(y)) == 2: 159 | y = (y + 1) / 2 160 | 161 | # distinguish between regression and classification 162 | if len(np.unique(y)) > 2: 163 | oak = oak_model(max_interaction_depth=X.shape[1], lengthscale_bounds=None) 164 | oak.fit(X, y, optimise=False) 165 | 166 | else: 167 | depth = 4 if X.shape[1] < 40 else 2 168 | # number of inducing points was decided by the number of training instances (80% of the X) 169 | M = 200 if (X.shape[0] * 0.8) > 200 else int(X.shape[0] * 0.8) 170 | oak = oak_model( 171 | max_interaction_depth=depth, num_inducing=M, lengthscale_bounds=None 172 | ) 173 | oak.fit(X, y, optimise=False) 174 | data = (oak.m.data[0], y) 175 | # Z is set to be a placeholder to load the values from saved model 176 | Z = ( 177 | np.zeros((M, X.shape[1])) 178 | if M == 200 179 | else np.zeros((int(X.shape[0] * 0.8), X.shape[1])) 180 | ) 181 | likelihood = gpflow.likelihoods.Bernoulli(invlink=inv_logit) 182 | oak.m = gpflow.models.SVGP( 183 | kernel=oak.m.kernel, 184 | likelihood=likelihood, 185 | inducing_variable=Z, 186 | whiten=True, 187 | q_diag=True, 188 | ) 189 | set_trainable(oak.m.inducing_variable, False) 190 | oak.m.data = data 191 | 192 | # load the model and plot decomposition 193 | output_prefix = os.path.abspath( 194 | os.path.join(os.path.dirname(__file__), f"./outputs/{dataset_name}/") 195 | ) 196 | load_model( 197 | oak.m, 198 | filename=Path(output_prefix + f"/model_oak_%d.npz" % fold), 199 | load_all_parameters=True, 200 | ) 201 | oak.plot( 202 | top_n=5, 203 | semilogy=False, 204 | X_columns=covariate_names[dataset_name], 205 | save_fig=output_prefix + f"/decomposition", 206 | ) 207 | 208 | 209 | if __name__ == "__main__": 210 | main() 211 | -------------------------------------------------------------------------------- /examples/uci/uci_regression_train.py: -------------------------------------------------------------------------------- 1 | # + 2 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | import argparse 6 | import os 7 | import numpy as np 8 | import tensorflow as tf 9 | import matplotlib.pyplot as plt 10 | import matplotlib 11 | from oak.model_utils import oak_model, save_model 12 | from oak.utils import get_model_sufficient_statistics, get_prediction_component 13 | from scipy import io 14 | from sklearn.model_selection import KFold 15 | from pathlib import Path 16 | matplotlib.rcParams.update({"font.size": 25}) 17 | 18 | # + 19 | # data from repo: https://github.com/duvenaud/additive-gps/blob/master/data/regression/ 20 | # this script is for experiments in Sec 5.1 for regression problems in the paper 21 | data_path_prefix = os.path.abspath( 22 | os.path.join(os.path.dirname(__file__), "../../data") 23 | ) + '/' 24 | 25 | filenames = [ 26 | data_path_prefix + "autompg.mat", 27 | data_path_prefix + "housing.mat", 28 | data_path_prefix + "r_concrete_1030.mat", 29 | data_path_prefix + "pumadyn8nh.mat", 30 | ] 31 | dataset_names = ["autoMPG", "Housing", "concrete", "pumadyn"] 32 | np.set_printoptions(formatter={"float": lambda x: "{0:0.5f}".format(x)}) 33 | 34 | 35 | # - 36 | 37 | 38 | def main(): 39 | """ 40 | :param dataset_name: name of the dataset, should be one of the above dataset_names 41 | :param k: number of train-test fold, default to 5. 42 | :return: fit OAK model on the dataset, saves the model, the model predictive performance, 43 | and the plot on cumulative Sobol indices. 44 | """ 45 | args_parser = argparse.ArgumentParser() 46 | args_parser.add_argument( 47 | "--dataset_name", default="autoMPG", type=str, help="dataset name" 48 | ) 49 | args_parser.add_argument( 50 | "--k", type=int, default=5, help="k-fold train-test splits" 51 | ) 52 | 53 | args, unknown = args_parser.parse_known_args() 54 | dataset_name, k = args.dataset_name, args.k 55 | 56 | # save results to outputs folder 57 | output_prefix = os.path.abspath( 58 | os.path.join(os.path.dirname(__file__), f"./outputs/{dataset_name}/") 59 | ) 60 | if not os.path.exists(output_prefix): 61 | os.mkdir(output_prefix) 62 | 63 | np.random.seed(4) 64 | tf.random.set_seed(4) 65 | filename = filenames[dataset_names.index(dataset_name)] 66 | 67 | print(f"dataset {dataset_name}\n") 68 | d = io.loadmat(filename) 69 | if dataset_name == "autoMPG": 70 | # for autoMPG dataset, the first column is the response y 71 | X, y = d["X"][:, 1:], d["X"][:, :1] 72 | else: 73 | X, y = d["X"], d["y"] 74 | 75 | idx = np.random.permutation(range(X.shape[0])) 76 | 77 | X = X[idx, :] 78 | y = y[idx] 79 | kf = KFold(n_splits=k) 80 | fold = 0 81 | for train_index, test_index in kf.split(X): 82 | X_train, X_test = X[train_index], X[test_index] 83 | y_train, y_test = y[train_index], y[test_index] 84 | 85 | oak = oak_model( 86 | max_interaction_depth=X.shape[1], 87 | num_inducing=500, 88 | ) 89 | oak.fit(X_train, y_train) 90 | 91 | # test performance 92 | x_max, x_min = X_train.max(0), X_train.min(0) 93 | y_pred = oak.predict(np.clip(X_test, x_min, x_max)) 94 | rss = ((y_pred - y_test[:, 0]) ** 2).mean() 95 | tss = ( 96 | (y_test[:, 0] - y_test[:, 0].mean() * np.ones(y_test[:, 0].shape)) ** 2 97 | ).mean() 98 | r2 = 1 - rss / tss 99 | rmse = np.sqrt(rss) 100 | 101 | # calculate sobol 102 | oak.get_sobol() 103 | tuple_of_indices, normalised_sobols = ( 104 | oak.tuple_of_indices, 105 | oak.normalised_sobols, 106 | ) 107 | 108 | # cumulative Sobol as we add terms one by one ranked by their Sobol 109 | x_max, x_min = X_train.max(0), X_train.min(0) 110 | XT = oak._transform_x(np.clip(X_test, x_min, x_max)) 111 | oak.alpha = get_model_sufficient_statistics(oak.m, get_L=False) 112 | # get the predicted y for all the kernel components 113 | prediction_list = get_prediction_component( 114 | oak.m, 115 | oak.alpha, 116 | XT, 117 | ) 118 | # predicted y for the constant kernel 119 | constant_term = oak.alpha.numpy().sum() * oak.m.kernel.variances[0].numpy() 120 | print(f"constant_term = {constant_term}") 121 | y_pred_component = np.ones(y_test.shape[0]) * constant_term 122 | 123 | cumulative_sobol, rmse_component = [], [] 124 | order = np.argsort(normalised_sobols)[::-1] 125 | for n in order: 126 | # add predictions of the terms one by one ranked by their Sobol index 127 | y_pred_component += prediction_list[n].numpy() 128 | y_pred_component_transformed = oak.scaler_y.inverse_transform( 129 | y_pred_component.reshape(-1, 1) 130 | ) 131 | error_component = np.sqrt( 132 | ((y_pred_component_transformed - y_test) ** 2).mean() 133 | ) 134 | rmse_component.append(error_component) 135 | cumulative_sobol.append(normalised_sobols[n]) 136 | cumulative_sobol = np.cumsum(cumulative_sobol) 137 | 138 | # sanity check that predictions by summing over the components is equal 139 | # to the prediction of the OAK model 140 | np.testing.assert_allclose(y_pred_component_transformed[:, 0], y_pred) 141 | # generate plots in Fig. 5 (\ref{fig:sobol_plots}) of paper 142 | plt.figure(figsize=(8, 4)) 143 | fig, ax1 = plt.subplots() 144 | 145 | ax2 = ax1.twinx() 146 | ax1.plot(np.arange(len(order)), rmse_component, "r", linewidth=4) 147 | ax2.plot(np.arange(len(order)), cumulative_sobol, "-.g", linewidth=4) 148 | 149 | ax1.set_xlabel("Number of Terms Added") 150 | ax1.set_ylabel("RMSE", color="r") 151 | ax2.set_ylabel("Cumulative Sobol", color="g") 152 | 153 | plt.title(dataset_name) 154 | plt.tight_layout() 155 | plt.savefig(output_prefix + "/cumulative_sobol_%d.pdf" % fold) 156 | 157 | # aggregate sobol per order of interactions 158 | sobol_order = np.zeros(len(tuple_of_indices[-1])) 159 | for i in range(len(tuple_of_indices)): 160 | sobol_order[len(tuple_of_indices[i]) - 1] += normalised_sobols[i] 161 | 162 | nll = ( 163 | -oak.m.predict_log_density( 164 | ( 165 | oak._transform_x(np.clip(X_test, x_min, x_max)), 166 | oak.scaler_y.transform(y_test), 167 | ) 168 | ) 169 | .numpy() 170 | .mean() 171 | ) 172 | # printing 173 | print(f"fold {fold}, training dataset has size {X_train.shape}") 174 | print(f"sobol per interaction order is {sobol_order}") 175 | print(f"oak test rmse = {rmse}, r2 = {r2}\n") 176 | print(f"RBF test nll = {np.round(nll, 4)}\n") 177 | # save learned model 178 | save_model( 179 | oak.m, 180 | filename=Path(output_prefix + "/model_oak_%d" % fold), 181 | ) 182 | # save model performance metrics 183 | np.savez( 184 | output_prefix + "/out_%d" % fold, 185 | cumulative_sobol=cumulative_sobol, 186 | order=order, 187 | rmse=rmse, 188 | nll=nll, 189 | sobol_order=sobol_order, 190 | ) 191 | fold += 1 192 | 193 | 194 | if __name__ == "__main__": 195 | main() 196 | -------------------------------------------------------------------------------- /oak/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amzn/orthogonal-additive-gaussian-processes/1aae10e792f1ff7099f92d716bca377c9be830f3/oak/__init__.py -------------------------------------------------------------------------------- /oak/input_measures.py: -------------------------------------------------------------------------------- 1 | # + 2 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | from typing import Optional 6 | # - 7 | 8 | import numpy as np 9 | import tensorflow as tf 10 | 11 | """ 12 | Input measure 13 | """ 14 | 15 | 16 | class Measure: 17 | pass 18 | 19 | 20 | class UniformMeasure(Measure): 21 | """ 22 | :param a: lower bound of the uniform distribution 23 | :param b: upper bound of the uniform distribution 24 | :return: Uniform measure for inputs 25 | """ 26 | 27 | def __init__(self, a: float, b: float): 28 | self.a, self.b = a, b 29 | 30 | 31 | class GaussianMeasure(Measure): 32 | """ 33 | :param mu: Mean of Gaussian measure 34 | :param var: variance of Gaussian measure 35 | :return: Gaussian measure for inputs 36 | """ 37 | 38 | def __init__(self, mu: float, var: float): 39 | self.mu, self.var = mu, var 40 | 41 | 42 | class EmpiricalMeasure(Measure): 43 | """ 44 | :param location: location of the input data 45 | :param weights: weights on the location of the data 46 | :return: Empirical dirac measure for inputs with weights on the locations 47 | """ 48 | 49 | def __init__(self, location: np.ndarray, weights: Optional[np.ndarray] = None): 50 | self.location = location 51 | if weights is None: 52 | weights = 1 / len(location) * np.ones((location.shape[0], 1)) 53 | assert np.isclose( 54 | weights.sum(), 1.0, atol=1e-6 55 | ), f"not close to 1 {weights.sum()}" 56 | self.weights = weights 57 | 58 | 59 | class MOGMeasure(Measure): 60 | """ 61 | :param means: mean of the Gaussian measures 62 | :param variances: variances of the Gaussian measures 63 | :param weights: weights on the Gaussian measures 64 | :return: mixture of Gaussian measure 65 | """ 66 | 67 | def __init__(self, means: np.ndarray, variances: np.ndarray, weights: np.ndarray): 68 | tf.debugging.assert_shapes( 69 | [(means, ("K",)), (variances, ("K",)), (weights, ("K",))] 70 | ) 71 | assert np.isclose( 72 | weights.sum(), 1.0, atol=1e-6 73 | ), f"Weights not close to 1 {weights.sum()}" 74 | self.means, self.variances, self.weights = ( 75 | means.astype(float), 76 | variances.astype(float), 77 | weights, 78 | ) 79 | -------------------------------------------------------------------------------- /oak/model_utils.py: -------------------------------------------------------------------------------- 1 | # + 2 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | import os 6 | import time 7 | from pathlib import Path 8 | from typing import Callable, List, Optional, Union 9 | import gpflow 10 | import numpy as np 11 | import tensorflow as tf 12 | from gpflow import set_trainable 13 | from gpflow.inducing_variables import InducingPoints 14 | from gpflow.models import GPR, SGPR, GPModel 15 | from gpflow.models.training_mixins import RegressionData 16 | from sklearn import preprocessing 17 | from sklearn.cluster import KMeans 18 | from sklearn.mixture import GaussianMixture 19 | from tensorflow_probability import distributions as tfd 20 | from oak import plotting_utils 21 | from oak.input_measures import MOGMeasure 22 | from oak.normalising_flow import Normalizer 23 | from oak.oak_kernel import OAKKernel, get_list_representation 24 | from oak.plotting_utils import FigureDescription, save_fig_list 25 | from oak.utils import compute_sobol_oak, initialize_kmeans_with_categorical 26 | # - 27 | 28 | f64 = gpflow.utilities.to_default_float 29 | 30 | 31 | def get_kmeans_centers(X: np.ndarray, K: int = 500) -> np.ndarray: 32 | """ 33 | :param X: N * D input array 34 | :param K: number of clusters 35 | :return: K-means clustering of input X 36 | """ 37 | np.random.seed(44) 38 | tf.random.set_seed(44) 39 | kmeans = KMeans(n_clusters=K, random_state=0).fit(X) 40 | Z = kmeans.cluster_centers_ 41 | return Z 42 | 43 | 44 | def save_model( 45 | model: GPModel, 46 | filename: Path, 47 | ) -> None: 48 | """ 49 | :param model: GPflow model parameters to save 50 | :param filename: location to save the model to 51 | :return save model parameters to a local directory 52 | """ 53 | if isinstance(model, gpflow.models.SVGP): 54 | hyperparams = [ 55 | model.parameters[i].numpy() for i in range(len(model.parameters)) 56 | ] 57 | else: 58 | hyperparams = [ 59 | model.trainable_parameters[i].numpy() 60 | for i in range(len(model.trainable_parameters)) 61 | ] 62 | 63 | os.makedirs(filename.parents[0], exist_ok=True) 64 | np.savez(filename, hyperparams=hyperparams) 65 | 66 | 67 | def load_model( 68 | model: GPModel, 69 | filename: Path, 70 | load_all_parameters=False, 71 | ) -> None: 72 | """ 73 | :param model: GPflow model parameters to load 74 | :param filename: location to load the model from 75 | :param load_all_parameters: whether to load all parameters or only trainable parameters 76 | :return load model parameters from a local directory 77 | """ 78 | # We need allow_pickle=True because model parameters include objects (e.g. InducingPoints) 79 | model_params = np.load(str(filename), allow_pickle=True)["hyperparams"] 80 | 81 | if load_all_parameters: 82 | for i in range(len(model.parameters)): 83 | model.parameters[i].assign(model_params[i]) 84 | else: 85 | for i in range(len(model.trainable_parameters)): 86 | print(model_params[i], model.trainable_parameters[i]) 87 | model.trainable_parameters[i].assign(model_params[i]) 88 | 89 | 90 | def create_model_oak( 91 | data: RegressionData, 92 | max_interaction_depth: int = 2, 93 | constrain_orthogonal: bool = True, 94 | inducing_pts: np.ndarray = None, 95 | optimise=False, 96 | zfixed=True, 97 | p0=None, 98 | p=None, 99 | lengthscale_bounds=None, 100 | empirical_locations: Optional[List[float]] = None, 101 | empirical_weights: Optional[List[float]] = None, 102 | use_sparsity_prior: bool = True, 103 | gmm_measures: Optional[List[MOGMeasure]] = None, 104 | share_var_across_orders: Optional[bool] = True, 105 | ) -> GPModel: 106 | """ 107 | :param num_dims: number of dimensions of inputs 108 | :param max_interaction_depth: maximum order of interactions 109 | :param constrain_orthogonal: whether to use the orthogonal version of the kernel 110 | :param inducing_pts: inducing points, if None, it uses K-means centers 111 | :param optimise: whether to optimise the hyper parameters of the model 112 | :param zfixed: whether to fix or learn the inducing points 113 | :param p0: list of probability measures for binary kernels, set to None if it is not binary 114 | :param p: list of probability measures for categorical kernels, set to None if it is not categorical 115 | :param lengthscale_bounds: bounds of the lengthscale parameters 116 | :param empirical_locations: list of locations of empirical measure, set to None if not using the empirical measure 117 | :param empirical_weights: list of weights of empirical measure, set to None if not using the empirical measure 118 | :param use_sparsity_prior: whether to use sparse prior on the kernel variance parameters 119 | :param gmm_measures: list of Gaussian mixture measures 120 | :param share_var_across_orders: whether to use the same variance parameter across interaction order 121 | :return: a GP model with OAK kernel 122 | """ 123 | num_dims = data[0].shape[1] 124 | 125 | # create oak kernel 126 | if p0 is None: 127 | p0 = [None] * num_dims 128 | if p is None: 129 | p = [None] * num_dims 130 | base_kernels = [None] * num_dims 131 | for dim in range(num_dims): 132 | if (p0[dim] is None) and (p[dim] is None): 133 | base_kernels[dim] = gpflow.kernels.RBF 134 | 135 | k = OAKKernel( 136 | base_kernels, 137 | num_dims=num_dims, 138 | max_interaction_depth=max_interaction_depth, 139 | constrain_orthogonal=constrain_orthogonal, 140 | p0=p0, 141 | p=p, 142 | lengthscale_bounds=lengthscale_bounds, 143 | empirical_locations=empirical_locations, 144 | empirical_weights=empirical_weights, 145 | gmm_measures=gmm_measures, 146 | share_var_across_orders=share_var_across_orders, 147 | ) 148 | 149 | if inducing_pts is not None: 150 | model = SGPR( 151 | data, 152 | mean_function=None, 153 | kernel=k, 154 | inducing_variable=InducingPoints(inducing_pts), 155 | ) 156 | if zfixed: 157 | set_trainable(model.inducing_variable, False) 158 | else: 159 | model = GPR(data, mean_function=None, kernel=k) 160 | # set priors for variance 161 | if use_sparsity_prior: 162 | print("Using sparsity prior") 163 | if share_var_across_orders: 164 | for p in model.kernel.variances: 165 | p.prior = tfd.Gamma(f64(1.0), f64(0.2)) 166 | # Initialise likelihood variance to small value to avoid finding all-noise explanation minima 167 | model.likelihood.variance.assign(0.01) 168 | if optimise: 169 | t_start = time.time() 170 | opt = gpflow.optimizers.Scipy() 171 | opt.minimize( 172 | model.training_loss_closure(), model.trainable_variables, method="BFGS" 173 | ) 174 | gpflow.utilities.print_summary(model, fmt="notebook") 175 | print(f"Training took {time.time() - t_start:.1f} seconds.") 176 | return model 177 | 178 | 179 | def apply_normalise_flow(X: tf.Tensor, input_flows: List[Normalizer]) -> tf.Tensor: 180 | """ 181 | :param X: input of which the normalising flow is to be applied 182 | :param input_flows: list of normalising flows to apply to each feature dimension 183 | :return: inputs after transformations of the flow 184 | """ 185 | X_scaled = np.zeros((X.shape)) 186 | for ii in range(X.shape[1]): 187 | if input_flows[ii] is None: 188 | X_scaled[:, ii] = X[:, ii] 189 | else: 190 | X_scaled[:, ii] = input_flows[ii].bijector(X[:, ii]) 191 | return X_scaled 192 | 193 | 194 | class oak_model: 195 | def __init__( 196 | self, 197 | max_interaction_depth=2, 198 | num_inducing=200, 199 | lengthscale_bounds=[1e-3, 1e3], 200 | binary_feature: Optional[List[int]] = None, 201 | categorical_feature: Optional[List[int]] = None, 202 | empirical_measure: Optional[List[int]] = None, 203 | use_sparsity_prior: bool = True, 204 | gmm_measure: Optional[List[int]] = None, 205 | sparse: bool = False, 206 | use_normalising_flow: bool = True, 207 | share_var_across_orders: bool = True, 208 | ): 209 | """ 210 | :param max_interaction_depth: maximum number of interaction terms to consider 211 | :param num_inducing: number of inducing points 212 | :param lengthscale_bounds: bounds for lengthscale parameters 213 | :param binary_feature: list of indices for binary features 214 | :param categorical_feature: list of indices for categorical features 215 | :param empirical_measure: list of indices using empirical measures, if using Gaussian measure, this is set to None 216 | :param use_sparsity_prior: use sparsity prior on kernel variances 217 | :param gmm_measure: use gaussian mixture model. If index is 0 it will use a Gaussian measure, otherwise 218 | :param sparse: Boolean to indicate whether to use sparse GP with inducing points. Defaults to False. 219 | :param use_normalising_flow: whether to use normalising flow, if not, continuous features are standardised 220 | :param share_var_across_orders: whether to share the same variance across orders, 221 | if False, it uses kernel of the form \prod_i(1+k_i) in Duvenaud (2011). 222 | :return: OAK model class with model fitting, prediction, attribution and plotting utils. 223 | """ 224 | self.max_interaction_depth = max_interaction_depth 225 | self.num_inducing = num_inducing 226 | self.lengthscale_bounds = lengthscale_bounds 227 | self.binary_feature = binary_feature 228 | self.categorical_feature = categorical_feature 229 | self.use_sparsity_prior = use_sparsity_prior 230 | 231 | # state filled in during fit call 232 | self.input_flows = None 233 | self.scaler_y = None 234 | self.Y_scaled = None 235 | self.X_scaled = None 236 | self.alpha = None 237 | self.continuous_index = None 238 | self.binary_index = None 239 | self.categorical_index = None 240 | self.empirical_measure = empirical_measure 241 | self.empirical_locations = None 242 | self.empirical_weights = None 243 | self.gmm_measure = gmm_measure 244 | self.estimated_gmm_measures = None # sklearn GMM estimates 245 | self.sparse = sparse 246 | self.use_normalising_flow = use_normalising_flow 247 | self.share_var_across_orders = share_var_across_orders 248 | 249 | def fit( 250 | self, 251 | X: tf.Tensor, 252 | Y: tf.Tensor, 253 | optimise: bool = True, 254 | initialise_inducing_points: bool = True, 255 | ): 256 | """ 257 | :param X, Y data to fit the model on 258 | :param optimise: whether to optimise the model 259 | :param initialise_inducing_points: whether to initialise inducing points with K-means 260 | """ 261 | self.xmin, self.xmax = X.min(0), X.max(0) 262 | self.num_dims = X.shape[1] 263 | 264 | ( 265 | self.continuous_index, 266 | self.binary_index, 267 | self.categorical_index, 268 | p0, 269 | p, 270 | ) = _calculate_features( 271 | X, 272 | categorical_feature=self.categorical_feature, 273 | binary_feature=self.binary_feature, 274 | ) 275 | # discrete_input_set = set(self.binary_index).union(set(self.categorical_index)) 276 | if self.empirical_measure is not None: 277 | if not set(self.empirical_measure).issubset(self.continuous_index): 278 | raise ValueError( 279 | f"Empirical measure={self.empirical_measure} should only be used on non-binary/categorical inputs {self.continuous_index}" 280 | ) 281 | if self.gmm_measure is not None: 282 | if len(self.gmm_measure) != self.num_dims: 283 | return ValueError( 284 | f"Must specify number of components for each inputs dimension 1..{X.shape[0]}" 285 | ) 286 | idx_gmm = np.flatnonzero(self.gmm_measure) 287 | if not set(idx_gmm).issubset(self.continuous_index): 288 | raise ValueError( 289 | f"GMM measure on inputs {idx_gmm} should only be used on continuous inputs {self.continuous_index}" 290 | ) 291 | 292 | # Measure 293 | self.estimated_gmm_measures = [None] * self.num_dims 294 | if self.gmm_measure is not None: 295 | for i_dim in np.flatnonzero(self.gmm_measure): 296 | K_for_input_i = self.gmm_measure[i_dim] 297 | self.estimated_gmm_measures[i_dim] = estimate_one_dim_gmm( 298 | K=K_for_input_i, X=X[:, i_dim] 299 | ) 300 | 301 | self.empirical_locations = [None] * self.num_dims 302 | self.empirical_weights = [None] * self.num_dims 303 | 304 | # scaling 305 | self.input_flows = [None] * self.num_dims 306 | for i in self.continuous_index: 307 | if (self.empirical_measure is not None) and (i in self.empirical_measure): 308 | continue # skip 309 | if self.estimated_gmm_measures[i] is not None: 310 | continue 311 | d = X[:, i] 312 | 313 | if self.use_normalising_flow: 314 | n = Normalizer(d) 315 | opt = gpflow.optimizers.Scipy() 316 | opt.minimize(n.KL_objective, n.trainable_variables) 317 | self.input_flows[i] = n 318 | 319 | self.alpha = None 320 | self.scaler_y = preprocessing.StandardScaler().fit(Y) 321 | self.Y_scaled = self.scaler_y.transform(Y) 322 | # standardize features with empirical measure to avoid cholesky decomposition error 323 | if self.empirical_measure is not None: 324 | self.scaler_X_empirical = preprocessing.StandardScaler().fit( 325 | X[:, self.empirical_measure] 326 | ) 327 | if not self.use_normalising_flow: 328 | self.scaler_X_continuous = preprocessing.StandardScaler().fit( 329 | X[:, self.continuous_index] 330 | ) 331 | self.X_scaled = self._transform_x(X) 332 | 333 | # calculate empirical location and weights after applying scaling X 334 | if self.empirical_measure is not None: 335 | for ii in self.empirical_measure: 336 | self.empirical_locations[ii], self.empirical_weights[ii] = np.unique( 337 | self.X_scaled[:, ii], return_counts=True 338 | ) 339 | self.empirical_weights[ii] = ( 340 | self.empirical_weights[ii] / self.empirical_weights[ii].sum() 341 | ).reshape(-1, 1) 342 | self.empirical_locations[ii] = self.empirical_locations[ii].reshape( 343 | -1, 1 344 | ) 345 | 346 | assert np.allclose( 347 | self.X_scaled[:, self.binary_index], X[:, self.binary_index] 348 | ), "Flow applied to binary inputs" 349 | assert np.allclose( 350 | self.X_scaled[:, self.categorical_index], X[:, self.categorical_index] 351 | ), "Flow applied to categorical inputs" 352 | if self.gmm_measure is not None: 353 | assert np.allclose( 354 | self.X_scaled[:, np.flatnonzero(self.gmm_measure)], 355 | X[:, np.flatnonzero(self.gmm_measure)], 356 | ), "Flow applied to GMM measure inputs" 357 | if self.empirical_measure is not None: 358 | assert np.allclose( 359 | np.reshape( 360 | np.concatenate( 361 | [ 362 | self._get_x_inverse_transformer(i)(self.X_scaled[:, i]) 363 | for i in self.empirical_measure 364 | ] 365 | ), 366 | X[:, self.empirical_measure].shape, 367 | order="F", 368 | ), 369 | X[:, self.empirical_measure], 370 | ), "Flow applied to empirical measure inputs" 371 | 372 | Z = None 373 | # using sparse GP when size of data > 1000 374 | if X.shape[0] > 1000 or self.sparse: 375 | X_inducing = self.X_scaled 376 | 377 | if initialise_inducing_points: 378 | if (p0 is None) and (p is None): 379 | print("all features are continuous") 380 | kmeans = KMeans(n_clusters=self.num_inducing, random_state=0).fit( 381 | X_inducing 382 | ) 383 | Z = kmeans.cluster_centers_ 384 | else: 385 | Z = initialize_kmeans_with_categorical( 386 | X_inducing, 387 | binary_index=self.binary_index, 388 | categorical_index=self.categorical_index, 389 | continuous_index=self.continuous_index, 390 | n_clusters=self.num_inducing, 391 | ) 392 | else: 393 | Z = X_inducing[: self.num_inducing, :] 394 | 395 | self.m = create_model_oak( 396 | (self.X_scaled, self.Y_scaled), 397 | max_interaction_depth=self.max_interaction_depth, 398 | inducing_pts=Z, 399 | optimise=optimise, 400 | p0=p0, 401 | p=p, 402 | lengthscale_bounds=self.lengthscale_bounds, 403 | use_sparsity_prior=self.use_sparsity_prior, 404 | empirical_locations=self.empirical_locations, 405 | empirical_weights=self.empirical_weights, 406 | gmm_measures=self.estimated_gmm_measures, 407 | share_var_across_orders=self.share_var_across_orders, 408 | ) 409 | 410 | def optimise( 411 | self, 412 | compile: bool = True, 413 | ): 414 | 415 | print("Model prior to optimisation") 416 | gpflow.utilities.print_summary(self.m, fmt="notebook") 417 | self.alpha = None 418 | t_start = time.time() 419 | opt = gpflow.optimizers.Scipy() 420 | opt.minimize( 421 | self.m.training_loss_closure(), 422 | self.m.trainable_variables, 423 | method="BFGS", 424 | compile=compile, 425 | ) 426 | gpflow.utilities.print_summary(self.m, fmt="notebook") 427 | print(f"Training took {time.time() - t_start:.1f} seconds.") 428 | 429 | def predict(self, X: tf.Tensor, clip=False) -> tf.Tensor: 430 | """ 431 | :param X: inputs to predict the response on 432 | :param clip: whether to slip X between x_min and x_max along each dimension 433 | :return: predicted response on input X 434 | """ 435 | if clip: 436 | X_scaled = self._transform_x(np.clip(X, self.xmin, self.xmax)) 437 | else: 438 | X_scaled = self._transform_x(X) 439 | try: 440 | y_pred = self.m.predict_f(X_scaled)[0].numpy() 441 | return self.scaler_y.inverse_transform(y_pred)[:, 0] 442 | except ValueError: 443 | print("test X is outside the range of training input, try clipping X.") 444 | 445 | def get_loglik(self, X: tf.Tensor, y: tf.Tensor, clip=False) -> tf.Tensor: 446 | """ 447 | :param X,y: inputs and output 448 | :param clip: whether to slip X between x_min and x_max along each dimension 449 | :return log likelihood on (X,y) 450 | """ 451 | if clip: 452 | X_scaled = self._transform_x(np.clip(X, self.xmin, self.xmax)) 453 | else: 454 | X_scaled = self._transform_x(X) 455 | 456 | return ( 457 | self.m.predict_log_density((X_scaled, self.scaler_y.transform(y))) 458 | .numpy() 459 | .mean() 460 | ) 461 | 462 | def _transform_x(self, X: tf.Tensor) -> tf.Tensor: 463 | """ 464 | :param X: input to do transformation on 465 | :return: transformation for continuous features: normalising flow with Gaussian measure or standardization with empirical measure 466 | """ 467 | X = apply_normalise_flow(X, self.input_flows) 468 | if self.empirical_measure is not None: 469 | X[:, self.empirical_measure] = self.scaler_X_empirical.transform( 470 | X[:, self.empirical_measure] 471 | ) 472 | if not self.use_normalising_flow: 473 | X[:, self.continuous_index] = self.scaler_X_continuous.transform( 474 | X[:, self.continuous_index] 475 | ) 476 | return X 477 | 478 | def _get_x_inverse_transformer( 479 | self, i: int 480 | ) -> Optional[Union[Normalizer, Callable[[tf.Tensor], tf.Tensor]]]: 481 | """ 482 | :param i: index of feature i 483 | :return: inverse transformation for continuous feature i 484 | """ 485 | assert i in self.continuous_index 486 | 487 | if self.empirical_measure is not None and i in self.empirical_measure: 488 | continuous_i = self.empirical_measure.index(i) 489 | mean_i, std_i = self.scaler_X_empirical.mean_[continuous_i], np.sqrt( 490 | self.scaler_X_empirical.var_[continuous_i] 491 | ) 492 | transformer_x = lambda x: x * std_i + mean_i 493 | elif self.gmm_measure is not None and i in self.gmm_measure: 494 | transformer_x = None 495 | else: 496 | transformer_x = self.input_flows[i].bijector.inverse 497 | return transformer_x 498 | 499 | def get_sobol(self, likelihood_variance=False): 500 | """ 501 | :param likelihood_variance: whether to include likelihood noise in Sobol calculation 502 | :return: normalised Sobol indices for each additive term in the model 503 | """ 504 | num_dims = self.num_dims 505 | 506 | delta = 1 507 | mu = 0 508 | selected_dims, _ = get_list_representation(self.m.kernel, num_dims=num_dims) 509 | tuple_of_indices = selected_dims[1:] 510 | model_indices, sobols = compute_sobol_oak( 511 | self.m, 512 | delta, 513 | mu, 514 | share_var_across_orders=self.share_var_across_orders, 515 | ) 516 | if likelihood_variance: 517 | normalised_sobols = sobols / ( 518 | np.sum(sobols) + self.m.likelihood.variance.numpy() 519 | ) 520 | else: 521 | normalised_sobols = sobols / np.sum(sobols) 522 | self.normalised_sobols = normalised_sobols 523 | self.tuple_of_indices = tuple_of_indices 524 | return normalised_sobols 525 | 526 | def plot( 527 | self, 528 | transformer_y=None, 529 | X_columns=None, 530 | X_lists=None, 531 | top_n=None, 532 | likelihood_variance=False, 533 | semilogy=True, 534 | save_fig: Optional[str] = None, 535 | tikz_path: Optional[str] = None, 536 | ylim: Optional[List[float]] = None, 537 | quantile_range: Optional[List[float]] = None, 538 | log_axis: Optional[List[bool]] = [False, False], 539 | grid_range: Optional[List[np.ndarray]] = None, 540 | log_bin: Optional[List[bool]] = None, 541 | num_bin: Optional[int] = 100, 542 | ): 543 | """ 544 | :param transformer_y: tranformation of the target (e.g. log), we are plotting the median and quantiles after log-transformation 545 | :param X_columns: list of feature names 546 | :param X_list: list of features from data 1 and data 2, if None, then training features will be plotted on the histogram 547 | :param top_n: plot top n effects based on sobol indices 548 | :param likelihood_variance: Whether to add the likelihood variance or not to the total Sobol 549 | :param save_fig: save the figure saved in the directory 550 | :param tikz_path: save latex for figures in the directory 551 | :param ylim: list of limits on the y-axis for each feature 552 | :param quantile_range: list of quantile range of each feature to plot. If None, use the whole range 553 | :param log_axis: Boolean indicating whether to log x-axis and y-axis for the contour plot 554 | :param grid_range: list of ranges to plot functions on the contour plot for each feature, if None, use linspace of the feature ranges 555 | :param log_bin: list of Booleans indicating whether to log bins for histograms for each feature 556 | :param num_bin: number of bins for histogram 557 | :return: plotting of individual effects 558 | """ 559 | if X_columns is None: 560 | X_columns = ["feature %d" % i for i in range(self.num_dims)] 561 | 562 | if X_lists is None: 563 | X_lists = [None for i in range(len(X_columns))] 564 | 565 | if grid_range is None: 566 | grid_range = [None for i in range(len(X_columns))] 567 | 568 | if ylim is None: 569 | ylim = [None for i in range(len(X_columns))] 570 | 571 | if quantile_range is None: 572 | quantile_range = [None for i in range(len(X_columns))] 573 | 574 | if log_bin is None: 575 | log_bin = [False for i in range(len(X_columns))] 576 | 577 | num_dims = self.num_dims 578 | selected_dims, _ = get_list_representation(self.m.kernel, num_dims=num_dims) 579 | tuple_of_indices = selected_dims[1:] 580 | 581 | self.get_sobol(likelihood_variance=likelihood_variance) 582 | order = np.argsort(self.normalised_sobols)[::-1] 583 | fig_list: List[FigureDescription] = [] 584 | if top_n is None: 585 | top_n = len(order) 586 | for n in order[: min(top_n, len(order))]: 587 | if len(tuple_of_indices[n]) == 1: 588 | i = tuple_of_indices[n][0] 589 | if i in self.continuous_index: 590 | fig_list.append( 591 | plotting_utils.plot_single_effect( 592 | m=self.m, 593 | i=i, 594 | covariate_name=X_columns[i], 595 | title=f"{X_columns[i]} (R={self.normalised_sobols[n]:.3f})", 596 | x_transform=self._get_x_inverse_transformer(i), 597 | y_transform=transformer_y, 598 | semilogy=semilogy, 599 | plot_corrected_data=False, 600 | plot_raw_data=False, 601 | X_list=X_lists[i], 602 | tikz_path=tikz_path, 603 | ylim=ylim[i], 604 | quantile_range=quantile_range[i], 605 | log_bin=log_bin[i], 606 | num_bin=num_bin, 607 | ) 608 | ) 609 | 610 | elif i in self.binary_index: 611 | fig_list.append( 612 | plotting_utils.plot_single_effect_binary( 613 | self.m, 614 | i, 615 | ["0", "1"], 616 | title=f"{X_columns[i]} (R={self.normalised_sobols[n]:.3f})", 617 | y_transform=transformer_y, 618 | semilogy=semilogy, 619 | tikz_path=tikz_path, 620 | ) 621 | ) 622 | else: 623 | fig_list.append( 624 | plotting_utils.plot_single_effect_categorical( 625 | self.m, 626 | i, 627 | [str(i) for i in range(self.m.kernel.kernels[i].num_cat)], 628 | title=f"{X_columns[i]} (R={self.normalised_sobols[n]:.3f})", 629 | y_transform=transformer_y, 630 | semilogy=semilogy, 631 | tikz_path=tikz_path, 632 | ) 633 | ) 634 | 635 | elif len(tuple_of_indices[n]) == 2: 636 | i = tuple_of_indices[n][0] 637 | j = tuple_of_indices[n][1] 638 | if i in self.continuous_index and j in self.continuous_index: 639 | fig_list.append( 640 | plotting_utils.plot_second_order( 641 | self.m, 642 | i, 643 | j, 644 | [X_columns[i], X_columns[j]], 645 | [ 646 | self._get_x_inverse_transformer(i), 647 | self._get_x_inverse_transformer(j), 648 | ], 649 | transformer_y, 650 | title=X_columns[i] 651 | + "&" 652 | + X_columns[j] 653 | + f" (R={self.normalised_sobols[n]:.3f})", 654 | tikz_path=tikz_path, 655 | quantile_range=[quantile_range[i], quantile_range[j]], 656 | log_axis=log_axis, 657 | xx=grid_range[i], 658 | yy=grid_range[j], 659 | num_bin=num_bin, 660 | ) 661 | ) 662 | 663 | elif i in self.continuous_index and j in self.binary_index: 664 | fig_list.append( 665 | plotting_utils.plot_second_order_binary( 666 | self.m, 667 | i, 668 | j, 669 | ["0", "1"], 670 | [X_columns[i], X_columns[j]], 671 | x_transforms=[self._get_x_inverse_transformer(i)], 672 | y_transform=transformer_y, 673 | title=f"{X_columns[i]} (R={self.normalised_sobols[n]:.3f})", 674 | tikz_path=tikz_path, 675 | ) 676 | ) 677 | 678 | elif i in self.binary_index and j in self.continuous_index: 679 | fig_list.append( 680 | plotting_utils.plot_second_order_binary( 681 | self.m, 682 | j, 683 | i, 684 | ["0", "1"], 685 | [X_columns[j], X_columns[i]], 686 | x_transforms=[self._get_x_inverse_transformer(j)], 687 | y_transform=transformer_y, 688 | title=X_columns[i] 689 | + "&" 690 | + X_columns[j] 691 | + f" (R={self.normalised_sobols[n]:.3f})", 692 | tikz_path=tikz_path, 693 | ) 694 | ) 695 | 696 | else: 697 | raise NotImplementedError 698 | 699 | if save_fig is not None: 700 | save_fig_list(fig_list=fig_list, dirname=Path(save_fig)) 701 | 702 | 703 | def _calculate_features( 704 | X: tf.Tensor, categorical_feature: List[int], binary_feature: List[int] 705 | ): 706 | """ 707 | Calculate features index set 708 | :param X: input data 709 | :param categorical_feature: index of categorical features 710 | :param binary_feature: index of binary features 711 | :return: 712 | continuous_index, binary_index, categorical_index: list of indices for type of feature 713 | p0: list of probability measure for binary kernels, for continuous/categorical kernel, it is set to None 714 | p: list of probability measure for categorical kernels, for continuous/binary kernel, it is set to None 715 | """ 716 | if binary_feature is None and categorical_feature is None: 717 | # all features are continuous 718 | p0 = None 719 | p = None 720 | continuous_index = list(range(X.shape[1])) 721 | binary_index = [] 722 | categorical_index = [] 723 | else: 724 | if binary_feature is not None and categorical_feature is not None: 725 | overlapping_set = set(binary_feature).intersection(categorical_feature) 726 | if len(overlapping_set) > 0: 727 | raise ValueError(f"Overlapping feature set {overlapping_set}") 728 | binary_index, categorical_index, continuous_index, p0, p = [], [], [], [], [] 729 | for j in range(X.shape[1]): 730 | if binary_feature is not None and j in binary_feature: 731 | p0.append(1 - X[:, j].mean()) 732 | p.append(None) 733 | binary_index.append(j) 734 | elif categorical_feature is not None and j in categorical_feature: 735 | p0.append(None) 736 | prob = [] 737 | for jj in np.unique(X[:, j]): 738 | prob.append(len(np.where(X[:, j] == jj)[0]) / len(X[:, j])) 739 | p.append(np.reshape(prob, (-1, 1))) 740 | assert np.abs(p[-1].sum() - 1) < 1e-6 741 | categorical_index.append(j) 742 | else: 743 | p.append(None) 744 | p0.append(None) 745 | continuous_index.append(j) 746 | print("indices of binary feature ", binary_index) 747 | print("indices of continuous feature ", continuous_index) 748 | print("indices of categorical feature ", categorical_index) 749 | 750 | return continuous_index, binary_index, categorical_index, p0, p 751 | 752 | 753 | def estimate_one_dim_gmm(K: int, X: np.ndarray) -> MOGMeasure: 754 | """ 755 | :param K: number of mixtures 756 | :param X: input data 757 | :return: estimated Gaussian mixture model on the data X 758 | """ 759 | tf.debugging.assert_shapes([(X, ("N",))]) 760 | assert K > 0 761 | gm = GaussianMixture( 762 | n_components=K, random_state=0, covariance_type="spherical" 763 | ).fit(X.reshape(-1, 1)) 764 | assert np.allclose(gm.weights_.sum(), 1.0) 765 | assert gm.means_.shape == (K, 1) 766 | assert gm.covariances_.shape == (K,) 767 | assert gm.weights_.shape == (K,) 768 | return MOGMeasure( 769 | weights=gm.weights_, means=gm.means_.reshape(-1), variances=gm.covariances_ 770 | ) 771 | -------------------------------------------------------------------------------- /oak/normalising_flow.py: -------------------------------------------------------------------------------- 1 | # + 2 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | import gpflow 6 | import numpy as np 7 | import tensorflow as tf 8 | import tensorflow_probability as tfp 9 | from matplotlib import pyplot as plt 10 | from scipy import stats 11 | # - 12 | 13 | tfb = tfp.bijectors 14 | 15 | 16 | def make_sinharcsinh(): 17 | return tfb.SinhArcsinh( 18 | skewness=gpflow.Parameter(0.0), 19 | tailweight=gpflow.Parameter(1.0, transform=tfb.Exp()), 20 | ) 21 | 22 | 23 | def make_standardizer(x): 24 | return [ 25 | tfb.Scale(gpflow.Parameter(1.0 / np.std(x), transform=tfb.Exp())), 26 | tfb.Shift(gpflow.Parameter(-np.mean(x))), 27 | ] 28 | 29 | 30 | class Normalizer(gpflow.base.Module): 31 | """ 32 | :param x: input to transform 33 | :param log: whether to log x first before applying flows of transformations 34 | :return: flows of transformations to match x to standard Gaussian 35 | """ 36 | 37 | def __init__( 38 | self, 39 | x, 40 | log=True, 41 | **kwargs, 42 | ): 43 | super().__init__(**kwargs) 44 | self.x = x 45 | 46 | if log: 47 | offset = np.min(x) - 1.0 48 | self.bijector = tfb.Chain( 49 | [make_sinharcsinh() for _ in range(1)] 50 | + make_standardizer(np.log(x - offset)) 51 | + [tfb.Log(), tfb.Shift(-offset)] 52 | ) 53 | else: 54 | self.bijector = tfb.Chain( 55 | [make_sinharcsinh() for _ in range(1)] + make_standardizer(x) 56 | ) 57 | 58 | def plot(self, title='Normalising Flow'): 59 | f = plt.figure() 60 | ax = f.add_axes([0.3, 0.3, 0.65, 0.65]) 61 | x = self.x 62 | y = self.bijector(x).numpy() 63 | ax.plot(x, y, "k.", label="Gaussian") 64 | ax.legend() 65 | 66 | ax_x = f.add_axes([0.3, 0.05, 0.65, 0.25], sharex=ax) 67 | ax_x.hist(x, bins=20) 68 | ax_y = f.add_axes([0.05, 0.3, 0.25, 0.65], sharey=ax) 69 | ax_y.hist(y, bins=20, orientation="horizontal") 70 | ax_y.set_xlim(ax_y.get_xlim()[::-1]) 71 | plt.title(title) 72 | 73 | 74 | def KL_objective(self): 75 | return 0.5 * tf.reduce_mean( 76 | tf.square(self.bijector(self.x)) 77 | ) - tf.reduce_mean( 78 | self.bijector.forward_log_det_jacobian(self.x, event_ndims=0) 79 | ) 80 | 81 | def kstest(self): 82 | # Kolmogorov-Smirnov test for normality of transformed data 83 | s, pvalue = stats.kstest(self.bijector(self.x).numpy()[:,0], "norm") 84 | print("KS test statistic is %.3f, p-value is %.8f" % (s, pvalue)) 85 | return s, pvalue 86 | -------------------------------------------------------------------------------- /oak/oak_kernel.py: -------------------------------------------------------------------------------- 1 | # + 2 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | import itertools 6 | import gpflow 7 | import numpy as np 8 | import tensorflow as tf 9 | from functools import reduce 10 | from typing import List, Optional, Tuple, Type 11 | from oak.input_measures import ( 12 | EmpiricalMeasure, 13 | GaussianMeasure, 14 | MOGMeasure, 15 | ) 16 | from oak.ortho_binary_kernel import OrthogonalBinary 17 | from oak.ortho_categorical_kernel import OrthogonalCategorical 18 | from oak.ortho_rbf_kernel import OrthogonalRBFKernel 19 | from tensorflow_probability import bijectors as tfb 20 | 21 | 22 | # - 23 | 24 | def bounded_param(low: float, high: float, param: float) -> gpflow.Parameter: 25 | """ 26 | :param low: lower bound of the parameter 27 | :param high: upper bound of the parameter 28 | :param param: initial parameter value 29 | :return: tfp Parameter with optimization bounds 30 | """ 31 | sigmoid = tfb.Sigmoid(low=tf.cast(low, tf.float64), high=tf.cast(high, tf.float64)) 32 | parameter = gpflow.Parameter(param, transform=sigmoid, dtype=tf.float64) 33 | return parameter 34 | 35 | 36 | class OAKKernel(gpflow.kernels.Kernel): 37 | """ 38 | Compute OAK kernel 39 | :param base_kernels: List of base kernel classes for constructing durrande kernel for non-binary inputs. To be deleted 40 | after initialisation 41 | :param num_dims: dimensionality of input data 42 | :param max_interaction_depth: maximum order of interactions 43 | :param active_dims: pass active_dims in case there is not one kernel per input dim 44 | (e.g. in the distributional case) 45 | :param constrain_orthogonal: whether to use orthogonal kernel or not 46 | :param p0: list of probability measure for binary kernels, for continuous/categorical kernel, it is set to None 47 | :param p: list of probability measure for categorical kernels, for continuous/binary kernel, it is set to None 48 | :param lengthscale_bounds: list of lower and upper bounds for lengthscale, this is to upper bound the normalisation in high dimensions 49 | Code currently supports a common bound for all lengthscales. In the future we could extend it to 50 | have different bounds per dimension but this is probably not needed due our use of input normalisation. 51 | :param empirical_locations: list of locations for empirical measures, if using Gaussian measure, this is set to None 52 | :param empirical_weights: list of weights for empirical measures, if using Gaussian measure, this is None 53 | :param gmm_measures: Gaussian mixture model measure for continuous inputs. 54 | :param share_var_across_orders: whether to share the same variance across orders, 55 | if False, it uses original constrained kernel \prod_i(1+k_i); if True, this is the OAK kernel 56 | :return: OAK kernel 57 | """ 58 | 59 | def __init__( 60 | self, 61 | base_kernels: List[Type[gpflow.kernels.Kernel]], 62 | num_dims: int, 63 | max_interaction_depth: int, 64 | active_dims: Optional[List[List[int]]] = None, 65 | constrain_orthogonal: bool = False, 66 | p0: Optional[List[float]] = None, 67 | p: Optional[List[float]] = None, 68 | lengthscale_bounds: Optional[List[float]] = None, 69 | empirical_locations: Optional[List[float]] = None, 70 | empirical_weights: Optional[List[float]] = None, 71 | gmm_measures: Optional[List[MOGMeasure]] = None, 72 | share_var_across_orders: Optional[bool] = True, 73 | ): 74 | super().__init__(active_dims=range(num_dims)) 75 | if active_dims is None: 76 | active_dims = [[dim] for dim in range(num_dims)] 77 | # assert that active dims doesn't contain duplicates and doesn't exceed the total num_dims 78 | flat_dims = [dim for sublist in active_dims for dim in sublist] 79 | assert max(flat_dims) <= num_dims, "Active dims exceeding num dims." 80 | assert len(flat_dims) == len( 81 | np.unique(flat_dims) 82 | ), "Active dims contains duplicates." 83 | 84 | delta2 = 1 # prior measure process variance hardcoded to 1 85 | # set up kernels (without parameters for variance) 86 | self.base_kernels, self.max_interaction_depth = ( 87 | base_kernels, 88 | max_interaction_depth, 89 | ) 90 | self.share_var_across_orders = share_var_across_orders 91 | # p0 is a list of probability measures for binary kernels, set to None if it is not binary 92 | if p0 is None: 93 | p0 = [None] * len(active_dims) 94 | 95 | # p is a list of probability measures for categorical kernels, set to None if it is not categorical 96 | if p is None: 97 | p = [None] * len(active_dims) 98 | 99 | if constrain_orthogonal: 100 | if empirical_locations is None: 101 | assert ( 102 | empirical_weights is None 103 | ), "Cannot have weights without locations" 104 | empirical_locations = [None] * len(active_dims) 105 | empirical_weights = [None] * len(active_dims) 106 | else: 107 | if empirical_weights is not None: 108 | location_shapes = [ 109 | len(empirical_locations[dim]) 110 | if empirical_locations[dim] is not None 111 | else None 112 | for dim in range(len(active_dims)) 113 | ] 114 | location_weights = [ 115 | len(empirical_weights[dim]) 116 | if empirical_weights[dim] is not None 117 | else None 118 | for dim in range(len(active_dims)) 119 | ] 120 | print(location_shapes) 121 | assert ( 122 | location_shapes == location_weights 123 | ), f"Shape of empirical measure locations {location_shapes} do not match weights {location_weights}" 124 | 125 | if gmm_measures is None: 126 | gmm_measures = [None] * len(active_dims) 127 | 128 | self.kernels = [] 129 | 130 | for dim in range(len(active_dims)): 131 | # len(active_dims) can be < num_dims if some inputs are grouped 132 | if ( 133 | empirical_locations[dim] is not None 134 | and gmm_measures[dim] is not None 135 | ): 136 | raise ValueError( 137 | f"Both empirical and GMM measure defined for input {dim}" 138 | ) 139 | 140 | if (p0[dim] is None) and (p[dim] is None): 141 | if empirical_locations[dim] is not None: 142 | k = OrthogonalRBFKernel( 143 | base_kernels[dim](), 144 | EmpiricalMeasure( 145 | empirical_locations[dim], empirical_weights[dim] 146 | ), 147 | active_dims=active_dims[dim], 148 | ) 149 | elif gmm_measures[dim] is not None: 150 | k = OrthogonalRBFKernel( 151 | base_kernels[dim](), 152 | measure=gmm_measures[dim], 153 | active_dims=active_dims[dim], 154 | ) 155 | 156 | else: 157 | # Continuous input with Gaussian measure 158 | k = OrthogonalRBFKernel( 159 | base_kernels[dim](), 160 | GaussianMeasure(0, delta2), 161 | active_dims=active_dims[dim], 162 | ) 163 | if share_var_across_orders: 164 | k.base_kernel.variance = tf.ones( 165 | 1, dtype=gpflow.config.default_float() 166 | ) 167 | 168 | if lengthscale_bounds is not None: 169 | k.base_kernel.lengthscales = bounded_param( 170 | lengthscale_bounds[0], lengthscale_bounds[1], 1 171 | ) 172 | elif p[dim] is not None: 173 | assert base_kernels[dim] is None 174 | k = OrthogonalCategorical( 175 | p=p[dim], 176 | active_dims=active_dims[dim], 177 | ) 178 | if share_var_across_orders: 179 | k.variance = tf.ones(1, dtype=gpflow.config.default_float()) 180 | else: 181 | assert base_kernels[dim] is None 182 | k = OrthogonalBinary( 183 | p0=p0[dim], 184 | active_dims=active_dims[dim], 185 | ) 186 | if share_var_across_orders: 187 | k.variance = tf.ones(1, dtype=gpflow.config.default_float()) 188 | 189 | self.kernels.append(k) 190 | # unconstrained kernel with the additive model structure 191 | else: 192 | assert ( 193 | empirical_locations is None 194 | ), "Cannot have empirical locations without orthogonal constraint" 195 | assert ( 196 | empirical_weights is None 197 | ), "Cannot have empirical weights without orthogonal constraint" 198 | 199 | self.kernels = [] 200 | for dim in range(len(active_dims)): 201 | # point cases 202 | if p0[dim] is None: 203 | k = base_kernels[dim](active_dims=active_dims[dim]) 204 | 205 | else: 206 | assert base_kernels[dim] is None 207 | k = OrthogonalBinary(p0=p0[dim], active_dims=active_dims[dim]) 208 | if share_var_across_orders: 209 | k.variance = tf.ones(1, dtype=gpflow.config.default_float()) 210 | self.kernels.append(k) 211 | # add parameters to control the variances for various interaction orders (+1 for bias/constant term) 212 | if self.share_var_across_orders: 213 | self.variances = [ 214 | gpflow.Parameter(1.0, transform=gpflow.utilities.positive()) 215 | for _ in range(max_interaction_depth + 1) 216 | ] 217 | else: 218 | # only have additional variance for the constant kernel 219 | self.variances = [ 220 | gpflow.Parameter(1.0, transform=gpflow.utilities.positive()) 221 | ] 222 | 223 | def compute_additive_terms(self, kernel_matrices): 224 | """ 225 | Given a list of tensors (kernel matrices), compute a new list 226 | containing all products up to order self.max_interaction_depth. 227 | 228 | Example: 229 | input: [a, b, c, d] 230 | output: [1, (a+b+c+d), (ab+ac+ad+bc+bd+cd), (abc+abd+acd+bcd), abcd)] 231 | 232 | Uses the Girard Newton identity, as found in Duvenaud et al "Additive GPs". this avoid 233 | computing exponentially many terms, computations scale with O(D^2) (where D is the length of 234 | the kernel list or self.max_interaction_depth) 235 | """ 236 | s = [ 237 | reduce(tf.add, [tf.pow(k, p) for k in kernel_matrices]) 238 | for p in range(self.max_interaction_depth + 1) 239 | ] 240 | e = [tf.ones_like(kernel_matrices[0])] # start with constant term 241 | for n in range(1, self.max_interaction_depth + 1): 242 | e.append( 243 | (1.0 / n) 244 | * reduce( 245 | tf.add, 246 | [((-1) ** (k - 1)) * e[n - k] * s[k] for k in range(1, n + 1)], 247 | ) 248 | ) 249 | return e 250 | 251 | def K(self, X, X2=None): 252 | kernel_matrices = [ 253 | k(X, X2) for k in self.kernels 254 | ] # note that active dims gets applied by each kernel 255 | additive_terms = self.compute_additive_terms(kernel_matrices) 256 | if self.share_var_across_orders: 257 | return reduce( 258 | tf.add, 259 | [sigma2 * k for sigma2, k in zip(self.variances, additive_terms)], 260 | ) 261 | else: 262 | # add constant kernel 263 | return reduce( 264 | tf.add, [self.variances[0] * additive_terms[0]] + additive_terms[1:] 265 | ) 266 | 267 | def K_diag(self, X): 268 | kernel_diags = [k.K_diag(k.slice(X)[0]) for k in self.kernels] 269 | additive_terms = self.compute_additive_terms(kernel_diags) 270 | if self.share_var_across_orders: 271 | return reduce( 272 | tf.add, 273 | [sigma2 * k for sigma2, k in zip(self.variances, additive_terms)], 274 | ) 275 | else: 276 | return reduce( 277 | tf.add, [self.variances[0] * additive_terms[0]] + additive_terms[1:] 278 | ) 279 | 280 | 281 | class KernelComponenent(gpflow.kernels.Kernel): 282 | def __init__( 283 | self, 284 | oak_kernel: OAKKernel, 285 | iComponent_list: List[int], 286 | share_var_across_orders: Optional[bool] = True, 287 | ): 288 | # Orthogonal kernel + interactions kernel 289 | # sort out active_dims - it must be a list of integers 290 | super().__init__(active_dims=oak_kernel.active_dims) 291 | self.oak_kernel = oak_kernel 292 | self.iComponent_list = iComponent_list 293 | self.share_var_across_orders = share_var_across_orders 294 | self.kernels = [ 295 | k 296 | for i, k in enumerate(self.oak_kernel.kernels) 297 | if i in self.iComponent_list 298 | ] 299 | 300 | def K(self, X, X2=None): 301 | if len(self.iComponent_list) == 0: 302 | shape = ( 303 | [tf.shape(X)[0], tf.shape(X)[0]] 304 | if X2 is None 305 | else [tf.shape(X)[0], tf.shape(X2)[0]] 306 | ) 307 | return self.oak_kernel.variances[0] * tf.ones( 308 | shape, dtype=gpflow.default_float() 309 | ) # start with constant term 310 | else: 311 | # element wise product 312 | # compute kernel in iComponent_list only 313 | n_order = len(self.iComponent_list) # [0, 1] 314 | k_mats = [k(X, X2) for k in self.kernels] 315 | variances_n = ( 316 | self.oak_kernel.variances[n_order] 317 | if self.share_var_across_orders 318 | else 1 319 | ) 320 | return variances_n * tf.reduce_prod(k_mats, axis=0) 321 | 322 | def K_diag(self, X): 323 | if len(self.iComponent_list) == 0: 324 | return self.oak_kernel.variances[0] * tf.ones( 325 | tf.shape(X)[0], dtype=gpflow.default_float() 326 | ) # start with constant term 327 | else: 328 | n_order = len(self.iComponent_list) 329 | k_mats = [k.K_diag(k.slice(X)[0]) for k in self.kernels] 330 | variances_n = ( 331 | self.oak_kernel.variances[n_order] 332 | if self.share_var_across_orders 333 | else 1 334 | ) 335 | return variances_n * tf.reduce_prod(k_mats, axis=0) 336 | 337 | 338 | def get_list_representation( 339 | kernel: OAKKernel, 340 | num_dims: int, 341 | share_var_across_orders: Optional[bool] = True, 342 | ) -> Tuple[List[List[int]], List[KernelComponenent]]: 343 | """ 344 | Construct kernel list representation of OAK kernel 345 | """ 346 | assert isinstance(kernel, OAKKernel) 347 | selected_dims = [] 348 | kernel_list = [] 349 | selected_dims.append([]) # no dimensions for constant term 350 | kernel_list.append( 351 | KernelComponenent(kernel, [], share_var_across_orders=share_var_across_orders) 352 | ) # add constant 353 | if kernel.max_interaction_depth > 0: 354 | for ii in range(kernel.max_interaction_depth + 1): 355 | if ii > 0: 356 | tmp = [ 357 | list(tup) for tup in itertools.combinations(np.arange(num_dims), ii) 358 | ] 359 | selected_dims = selected_dims + tmp 360 | 361 | for jj in range(len(tmp)): 362 | kernel_list.append(KernelComponenent(kernel, tmp[jj])) 363 | 364 | return selected_dims, kernel_list 365 | -------------------------------------------------------------------------------- /oak/ortho_binary_kernel.py: -------------------------------------------------------------------------------- 1 | # + 2 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | import gpflow 6 | import numpy as np 7 | import tensorflow as tf 8 | from gpflow.utilities import positive 9 | 10 | 11 | # - 12 | 13 | class OrthogonalBinary(gpflow.kernels.Kernel): 14 | """ 15 | :param p0: probability of binary measure 16 | :param active_dims: active dimension along which the kernel is to be applied 17 | :return: constrained binary kernel 18 | """ 19 | 20 | def __init__( 21 | self, 22 | p0: float = 0.5, 23 | active_dims: int = None, 24 | ): 25 | super().__init__(active_dims=active_dims) 26 | self.variance = gpflow.Parameter(1.0, transform=positive()) 27 | self.p0 = p0 28 | 29 | def output_covariance(self): 30 | p0 = self.p0 31 | p1 = 1.0 - p0 32 | B = np.array([[np.square(p1), -p0 * p1], [-p0 * p1, np.square(p0)]]) 33 | return B * self.variance 34 | 35 | def output_variance(self): 36 | p0 = self.p0 37 | p1 = 1.0 - p0 38 | return np.array([np.square(p1), np.square(p0)]) * self.variance 39 | 40 | def K(self, X, X2=None): 41 | shape_constraints = [ 42 | (X, [..., "N", 1]), 43 | ] 44 | if X2 is not None: 45 | shape_constraints.append((X2, [..., "M", 1])) 46 | tf.debugging.assert_shapes(shape_constraints) 47 | X = tf.cast(X[..., 0], tf.int32) 48 | if X2 is None: 49 | X2 = X 50 | else: 51 | X2 = tf.cast(X2[..., 0], tf.int32) 52 | B = self.output_covariance() 53 | return tf.gather(tf.transpose(tf.gather(B, X2)), X) 54 | 55 | def K_diag(self, X): 56 | tf.debugging.assert_shapes([(X, [..., "N", 1])]) 57 | X = tf.cast(X[..., 0], tf.int32) 58 | B_diag = self.output_variance() 59 | return tf.gather(B_diag, X) 60 | -------------------------------------------------------------------------------- /oak/ortho_categorical_kernel.py: -------------------------------------------------------------------------------- 1 | # + 2 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | import gpflow 6 | import tensorflow as tf 7 | from gpflow.base import Parameter 8 | from gpflow.utilities import positive 9 | from typing import List 10 | 11 | 12 | # - 13 | 14 | class OrthogonalCategorical(gpflow.kernels.Kernel): 15 | """ 16 | :param p: list of probability measure for categorical kernels which sums up to 1, p_i = Prob(X = i) 17 | :param rank: the number of degrees of correlation between the outputs, see details in coregion kernel https://gpflow.readthedocs.io/en/master/_modules/gpflow/kernels/misc.html#Coregion 18 | :param active_dims: active dimension of input to apply this kernel to 19 | :return Constrained categorical kernel 20 | """ 21 | 22 | def __init__(self, p: List, rank: int = 2, active_dims: int = None): 23 | super().__init__(active_dims=active_dims) 24 | num_cat = len(p) 25 | self.num_cat = num_cat 26 | self.p = p 27 | self.variance = gpflow.Parameter(1.0, transform=positive()) 28 | W = tf.random.uniform(shape=[num_cat, rank]) 29 | kappa = tf.ones(self.num_cat) 30 | # kappa = tf.zeros(self.num_cat) 31 | self.W = Parameter(W) 32 | self.kappa = Parameter(kappa, transform=positive()) 33 | 34 | def output_covariance(self): 35 | A = tf.linalg.matmul(self.W, self.W, transpose_b=True) + tf.linalg.diag( 36 | self.kappa 37 | ) 38 | Ap = tf.linalg.matmul(A, self.p) 39 | B = A - tf.linalg.matmul(Ap, Ap, transpose_b=True) / ( 40 | tf.linalg.matmul(self.p, Ap, transpose_a=True)[0] 41 | ) 42 | return B * self.variance 43 | 44 | def output_variance(self): 45 | A = tf.linalg.matmul(self.W, self.W, transpose_b=True) + tf.linalg.diag( 46 | self.kappa 47 | ) 48 | Ap = tf.linalg.matmul(A, self.p) 49 | A_diag = tf.reduce_sum(tf.square(self.W), 1) + self.kappa 50 | B_diag = A_diag - tf.reduce_sum(tf.square(Ap), 1) / ( 51 | tf.linalg.matmul(self.p, Ap, transpose_a=True)[0] 52 | ) 53 | return B_diag * self.variance 54 | 55 | def K(self, X, X2=None): 56 | shape_constraints = [ 57 | (X, [..., "N", 1]), 58 | ] 59 | if X2 is not None: 60 | shape_constraints.append((X2, [..., "M", 1])) 61 | tf.debugging.assert_shapes(shape_constraints) 62 | X = tf.cast(X[..., 0], tf.int32) 63 | if X2 is None: 64 | X2 = X 65 | else: 66 | X2 = tf.cast(X2[..., 0], tf.int32) 67 | B = self.output_covariance() 68 | return tf.gather(tf.transpose(tf.gather(B, X2)), X) 69 | 70 | def K_diag(self, X): 71 | tf.debugging.assert_shapes([(X, [..., "N", 1])]) 72 | X = tf.cast(X[..., 0], tf.int32) 73 | B_diag = self.output_variance() 74 | return tf.gather(B_diag, X) 75 | -------------------------------------------------------------------------------- /oak/ortho_rbf_kernel.py: -------------------------------------------------------------------------------- 1 | # + 2 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | import gpflow 6 | import numpy as np 7 | import tensorflow as tf 8 | from typing import Optional 9 | from oak.input_measures import ( 10 | Measure, 11 | EmpiricalMeasure, 12 | GaussianMeasure, 13 | MOGMeasure, 14 | UniformMeasure, 15 | ) 16 | 17 | 18 | # - 19 | 20 | class OrthogonalRBFKernel(gpflow.kernels.Kernel): 21 | """ 22 | :param base_kernel: base RBF kernel before applying orthogonality constraint 23 | :param measure: input measure 24 | :param active_dims: active dimension 25 | :return: constrained BRF kernel 26 | """ 27 | 28 | def __init__( 29 | self, base_kernel: gpflow.kernels.RBF, measure: Measure, active_dims=None 30 | ): 31 | super().__init__(active_dims=active_dims) 32 | self.base_kernel, self.measure = base_kernel, measure 33 | self.active_dims = self.active_dims 34 | if not isinstance(base_kernel, gpflow.kernels.RBF): 35 | raise NotImplementedError 36 | if not isinstance( 37 | measure, 38 | ( 39 | UniformMeasure, 40 | GaussianMeasure, 41 | EmpiricalMeasure, 42 | MOGMeasure, 43 | ), 44 | ): 45 | raise NotImplementedError 46 | 47 | if isinstance(self.measure, UniformMeasure): 48 | 49 | def cov_X_s(X): 50 | tf.debugging.assert_shapes([(X, ("N", 1))]) 51 | l = self.base_kernel.lengthscales 52 | sigma2 = self.base_kernel.variance 53 | return ( 54 | sigma2 55 | * l 56 | / (self.measure.b - self.measure.a) 57 | * np.sqrt(np.pi / 2) 58 | * ( 59 | tf.math.erf((self.measure.b - X) / np.sqrt(2) / l) 60 | - tf.math.erf((self.measure.a - X) / np.sqrt(2) / l) 61 | ) 62 | ) 63 | 64 | def var_s(): 65 | l = self.base_kernel.lengthscales 66 | sigma2 = self.base_kernel.variance 67 | y = (self.measure.b - self.measure.a) / np.sqrt(2) / l 68 | return ( 69 | 2.0 70 | / ((self.measure.b - self.measure.a) ** 2) 71 | * sigma2 72 | * l ** 2 73 | * ( 74 | np.sqrt(np.pi) * y * tf.math.erf(y) 75 | + tf.exp(-tf.square(y)) 76 | - 1.0 77 | ) 78 | ) 79 | 80 | if isinstance(self.measure, GaussianMeasure): 81 | 82 | def cov_X_s(X): 83 | tf.debugging.assert_shapes([(X, (..., "N", 1))]) 84 | l = self.base_kernel.lengthscales 85 | sigma2 = self.base_kernel.variance 86 | mu, var = self.measure.mu, self.measure.var 87 | return ( 88 | sigma2 89 | * l 90 | / tf.sqrt(l ** 2 + var) 91 | * tf.exp(-0.5 * ((X - mu) ** 2) / (l ** 2 + var)) 92 | ) 93 | 94 | def var_s(): 95 | l = self.base_kernel.lengthscales 96 | sigma2 = self.base_kernel.variance 97 | return sigma2 * l / tf.sqrt(l ** 2 + 2 * self.measure.var) 98 | 99 | if isinstance(self.measure, EmpiricalMeasure): 100 | 101 | def cov_X_s(X): 102 | location = self.measure.location 103 | weights = self.measure.weights 104 | tf.debugging.assert_shapes( 105 | [(X, ("N", 1)), (location, ("M", 1)), (weights, ("M", 1))] 106 | ) 107 | return tf.matmul(self.base_kernel(X, location), weights) 108 | 109 | def var_s(): 110 | location = self.measure.location 111 | weights = self.measure.weights 112 | tf.debugging.assert_shapes([(location, ("M", 1)), (weights, ("M", 1))]) 113 | return tf.squeeze( 114 | tf.matmul( 115 | tf.matmul( 116 | weights, self.base_kernel(location), transpose_a=True 117 | ), 118 | weights, 119 | ) 120 | ) 121 | 122 | if isinstance(self.measure, MOGMeasure): 123 | 124 | def cov_X_s(X): 125 | tf.debugging.assert_shapes([(X, ("N", 1))]) 126 | l = self.base_kernel.lengthscales 127 | sigma2 = self.base_kernel.variance 128 | mu, var, weights = ( 129 | self.measure.means, 130 | self.measure.variances, 131 | self.measure.weights, 132 | ) 133 | tmp = tf.exp(-0.5 * ((X - mu) ** 2) / (l ** 2 + var)) / tf.sqrt( 134 | l ** 2 + var 135 | ) 136 | 137 | return sigma2 * l * tf.matmul(tmp, tf.reshape(weights, (-1, 1))) 138 | 139 | def var_s(): 140 | l = self.base_kernel.lengthscales 141 | 142 | sigma2 = self.base_kernel.variance 143 | mu, var, w = ( 144 | self.measure.means, 145 | self.measure.variances, 146 | self.measure.weights, 147 | ) 148 | dists = tf.square(mu[:, None] - mu[None, :]) 149 | scales = tf.square(l) + var[:, None] + var[None, :] 150 | tmp = sigma2 * l / tf.sqrt(scales) * tf.exp(-0.5 * dists / scales) 151 | 152 | return tf.squeeze(tf.matmul(tf.matmul(w[None, :], tmp), w[:, None])) 153 | 154 | self.cov_X_s = cov_X_s 155 | self.var_s = var_s 156 | 157 | def K(self, X: np.ndarray, X2: Optional[np.ndarray] = None) -> np.ndarray: 158 | """ 159 | :param X: input array X 160 | :param X2: input array X2, if None, set to X 161 | :return: kernel matrix K(X,X2) 162 | """ 163 | cov_X_s = self.cov_X_s(X) 164 | if X2 is None: 165 | cov_X2_s = cov_X_s 166 | else: 167 | cov_X2_s = self.cov_X_s(X2) 168 | k = ( 169 | self.base_kernel(X, X2) 170 | - tf.tensordot(cov_X_s, tf.transpose(cov_X2_s), 1) / self.var_s() 171 | ) 172 | return k 173 | 174 | def K_diag(self, X): 175 | cov_X_s = self.cov_X_s(X) 176 | k = self.base_kernel.K_diag(X) - tf.square(cov_X_s[:, 0]) / self.var_s() 177 | return k 178 | -------------------------------------------------------------------------------- /oak/plotting_utils.py: -------------------------------------------------------------------------------- 1 | # + 2 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | from dataclasses import dataclass 6 | from pathlib import Path 7 | from typing import Callable, List, Optional, Union 8 | import gpflow 9 | import matplotlib 10 | import numpy as np 11 | import tensorflow as tf 12 | import tikzplotlib 13 | from matplotlib import pyplot as plt 14 | from oak.utils import get_model_sufficient_statistics 15 | 16 | 17 | # - 18 | 19 | @dataclass 20 | class FigureDescription: 21 | fig: matplotlib.figure.Figure 22 | description: str 23 | 24 | 25 | def save_fig_list( 26 | fig_list: List[FigureDescription], 27 | dirname: Path, 28 | ): 29 | # save figures to local directory 30 | dirname.mkdir(parents=True, exist_ok=True) 31 | print(f"Saving figures to {dirname}") 32 | for f in fig_list: 33 | f.fig.savefig(dirname / (f.description + ".pdf"), bbox_inches="tight") 34 | 35 | 36 | def plot_single_effect( 37 | m: Union[gpflow.models.GPR, gpflow.models.SGPR, gpflow.models.SVGP], 38 | i: int, 39 | covariate_name: str = "", 40 | title: str = "", 41 | x_transform=None, 42 | y_transform=None, 43 | semilogy=False, 44 | plot_corrected_data=False, 45 | plot_raw_data=False, 46 | X_list=None, 47 | fontsize=22, 48 | tikz_path=None, 49 | ylim=None, 50 | quantile_range: Optional[List] = None, 51 | log_bin=False, 52 | num_bin: Optional[int] = 100, 53 | ): 54 | """ 55 | :param m: a gpflow GPR or SVGP instance, it is expected to contain an instance of the OAK Kernel. 56 | :param i: (integer) the index of the effect to plot 57 | :param covariate_name: str, used for the plot title 58 | :param title: title of the plot 59 | :param x_transform: callable function that maps the i'th column of X back to original coordinates 60 | :param y_transform: callable function that maps the Y-data back to original coordinates 61 | :param semilogy: whether to log transform the y-axis 62 | :param plot_corrected_data: whether to scatter plot corrected data (substract other effects) 63 | :param plot_raw_data: whether to scatter plot raw data 64 | :param X_list: optional list of input features [X0, X1] If provided, plot histograms of elements X0 and X1 65 | :param tikz_path: path to save tikz figure 66 | :param ylim: range on the y-axis 67 | :param quantile_range: quantile of range of features to plot, should be in [0,100], e.g. [2,98] 68 | :param log_bin: whether to use bins on log scale for histograms 69 | :param num_bin: number of bins for histogram 70 | :return Make a plot of a single effect (aka main effect). 71 | """ 72 | matplotlib.rcParams.update({"font.size": fontsize}) 73 | X, Y = m.data 74 | 75 | if isinstance(m, gpflow.models.SVGP): 76 | posterior = m.posterior() 77 | alpha, Qinv = posterior.alpha, posterior.Qinv[0] 78 | # separate condition when we plot the latent effects 79 | if i == m.data[0].shape[1]: 80 | Xi = np.linspace(-3, 3, 100) 81 | X_histogram = np.random.normal(size=1000) 82 | else: 83 | Xi = X[:, i].numpy() 84 | else: 85 | alpha, L = get_model_sufficient_statistics(m) 86 | Xi = X[:, i].numpy() 87 | 88 | if isinstance(m, gpflow.models.GPR): 89 | X_conditioned = X 90 | elif isinstance(m, (gpflow.models.SGPR, gpflow.models.SVGP)): 91 | X_conditioned = m.inducing_variable.Z 92 | 93 | if quantile_range is None: 94 | quantile_range = [0, 100] 95 | xmin, xmax = np.percentile(Xi, (quantile_range[0], quantile_range[1])) 96 | xx = np.linspace(xmin, xmax, 100) 97 | Kxx = ( 98 | m.kernel.kernels[i].K(xx[:, None], X_conditioned[:, i : i + 1]) 99 | * m.kernel.variances[1] 100 | ) 101 | mu = tf.matmul(Kxx, alpha)[:, 0] 102 | if isinstance(m, gpflow.models.SVGP): 103 | Kxx = tf.transpose(Kxx) 104 | tmp = tf.matmul(Kxx, Qinv @ Kxx, transpose_a=True) 105 | var = m.kernel.kernels[i].K_diag(xx[:, None]) * m.kernel.variances[ 106 | 1 107 | ] - tf.linalg.diag_part(tmp) 108 | else: 109 | tmp = tf.linalg.triangular_solve(L, tf.transpose(Kxx)) 110 | var = m.kernel.kernels[i].K_diag(xx[:, None]) * m.kernel.variances[1] - np.sum( 111 | tmp ** 2, axis=0 112 | ) 113 | lower = mu - 2 * np.sqrt(var) 114 | upper = mu + 2 * np.sqrt(var) 115 | 116 | # do "data correction" for what each component is seeing 117 | if plot_corrected_data: 118 | K_sub = m.kernel(X, X_conditioned) 119 | K_sub -= ( 120 | m.kernel.kernels[i].K(X[:, i : i + 1], X_conditioned[:, i : i + 1]) 121 | * m.kernel.variances[1] 122 | ) 123 | Y_corrected = Y - tf.matmul(K_sub, alpha) 124 | 125 | # rescale the x-data. 126 | if x_transform is None: 127 | xx_rescaled = 1.0 * xx 128 | Xi_rescaled = 1.0 * Xi 129 | else: 130 | xx_rescaled = x_transform(xx) 131 | Xi_rescaled = x_transform(Xi) 132 | if not isinstance(xx_rescaled, np.ndarray): 133 | xx_rescaled = xx_rescaled.numpy() 134 | if not isinstance(Xi_rescaled, np.ndarray): 135 | Xi_rescaled = Xi_rescaled.numpy() 136 | 137 | # re-scale the predictions and the y-data 138 | if y_transform is None: 139 | mu_rescaled = 1.0 * mu 140 | lower_rescaled = 1.0 * lower 141 | upper_rescaled = 1.0 * upper 142 | Y_rescaled = Y * 1.0 143 | if plot_corrected_data: 144 | Y_corrected_rescaled = 1.0 * Y_corrected 145 | else: 146 | mu_rescaled = y_transform(mu) 147 | lower_rescaled = y_transform(lower) 148 | upper_rescaled = y_transform(upper) 149 | Y_rescaled = y_transform(Y) 150 | if plot_corrected_data: 151 | Y_corrected_rescaled = y_transform(Y_corrected) 152 | 153 | # do the actual plotting 154 | figure = plt.figure(figsize=(8, 4)) 155 | ax1 = figure.add_axes([0.2, 0.2, 0.75, 0.75]) 156 | ax1.plot(xx_rescaled, mu_rescaled, linewidth=1, color="k", zorder=11) 157 | ax1.plot(xx_rescaled, lower_rescaled, linewidth=0.5, color="k", zorder=11) 158 | ax1.plot(xx_rescaled, upper_rescaled, linewidth=0.5, color="k", zorder=11) 159 | ax1.fill_between(xx_rescaled, lower_rescaled, upper_rescaled, alpha=0.2, color="C0") 160 | if plot_corrected_data: 161 | ax1.plot( 162 | Xi_rescaled, 163 | Y_corrected_rescaled, 164 | "C0x", 165 | label="data with other effects removed", 166 | ) 167 | ax1.set_ylim(*np.percentile(Y_corrected_rescaled, (2, 98))) 168 | else: 169 | ax1.set_ylim( 170 | *np.percentile(Y_rescaled, (0, 98)) 171 | ) if ylim is None else ax1.set_ylim(ylim) 172 | ax1.set_xlim(xx_rescaled.min(), xx_rescaled.max()) 173 | 174 | if plot_raw_data: 175 | ax1a = ax1.twinx() 176 | ax1a.plot(Xi_rescaled, Y_rescaled, "C1x") 177 | ax1a.set_ylabel("Raw data", color="C1") 178 | ax1a.spines["bottom"].set_visible("False") 179 | ax1.set_zorder(ax1a.get_zorder() + 1) 180 | if semilogy: 181 | ax1a.semilogy() 182 | 183 | ax1.patch.set_visible(False) 184 | for tick in ax1.get_xticklabels(): 185 | tick.set_visible(False) 186 | 187 | ax1.set_ylabel("$f_{" + covariate_name + "}$") 188 | ax1.set_title(title) 189 | ax1.spines["bottom"].set_visible("False") 190 | 191 | ax2 = figure.add_axes([0.2, 0.05, 0.75, 0.15], sharex=ax1) 192 | bins = ( 193 | num_bin 194 | if not log_bin 195 | else np.logspace( 196 | start=np.log10(Xi_rescaled.min() + 1), 197 | stop=np.log10(Xi_rescaled.max() + 1), 198 | num=num_bin, 199 | ) 200 | ) 201 | if X_list is not None: 202 | assert len(X_list) == 2 203 | ax2.hist(X_list[0], alpha=0.3, color="orange", bins=bins, label="data 1") 204 | ax2.hist(X_list[1], alpha=0.3, color="blue", bins=bins, label="data 2") 205 | ax2.legend(loc="upper right", prop={"size": 12}) 206 | else: 207 | ax2.hist(Xi_rescaled.flatten(), alpha=0.2, color="grey", bins=bins) 208 | ax2.set_yticks([]) 209 | ax2.set_xlabel(covariate_name) 210 | if semilogy: 211 | ax1.semilogy() 212 | fig_list = FigureDescription(fig=figure, description=title) 213 | if tikz_path is not None: 214 | tikzplotlib.save(tikz_path + f"{title}.tex") 215 | return fig_list 216 | 217 | 218 | def plot_second_order( 219 | m: Union[gpflow.models.GPR, gpflow.models.SGPR, gpflow.models.SVGP], 220 | i: int, 221 | j: int, 222 | covariate_names: Optional[str] = None, 223 | x_transforms: Optional[Callable[[np.ndarray], np.ndarray]] = None, 224 | y_transform: Optional[Callable[[np.ndarray], np.ndarray]] = None, 225 | title: Optional[str] = "", 226 | tikz_path: Optional[str] = None, 227 | quantile_range: Optional[List[List]] = [[2, 98], [2, 98]], 228 | log_axis: Optional[List[bool]] = [False, False], 229 | xx: Optional[np.ndarray] = None, 230 | yy: Optional[np.ndarray] = None, 231 | num_bin: Optional[int] = 100, 232 | ): 233 | """ 234 | :param m: gpflow model 235 | :param i: index of feature on the x-axis 236 | :param j: index of feature on the y-axis 237 | :param covariate_names: list of feature names to label on the axes 238 | :param x_transforms: inverse transformation of features from the standardized space back to original space 239 | :param y_transform: transformation of output to the original space 240 | :param title: title of plot 241 | :param tikz_path: path to save tikz figure 242 | :param quantile_range: list of range of features i and j to plot 243 | :param log_axis: list of boolean indicating whether to plot axis on log(x+1) space 244 | :param xx: x-value of the grid points to evaluate functions on, if None, use linspace of standardised feature i 245 | :param yy: y-value of the grid points to evaluate functions on, if None, use linspace of standardised feature j 246 | :param num_bin: number of bins for histogram 247 | """ 248 | if covariate_names is None: 249 | covariate_names = [f"input {i}", f"input {j}"] 250 | 251 | X, Y = m.data 252 | 253 | if isinstance(m, gpflow.models.SVGP): 254 | posterior = m.posterior() 255 | alpha = posterior.alpha 256 | else: 257 | alpha, _ = get_model_sufficient_statistics(m) 258 | Xi = X[:, i].numpy() 259 | Xj = X[:, j].numpy() 260 | 261 | if isinstance(m, gpflow.models.GPR): 262 | X_conditioned = X 263 | elif isinstance(m, (gpflow.models.SGPR, gpflow.models.SVGP)): 264 | X_conditioned = m.inducing_variable.Z 265 | 266 | if quantile_range[0] is not None: 267 | xmin, xmax = np.percentile(Xi, (quantile_range[0][0], quantile_range[0][1])) 268 | else: 269 | xmin, xmax = Xi.min(), Xi.max() 270 | if quantile_range[1] is not None: 271 | ymin, ymax = np.percentile(Xj, (quantile_range[1][0], quantile_range[1][1])) 272 | else: 273 | ymin, ymax = Xj.min(), Xj.max() 274 | 275 | xx_range = np.linspace(start=xmin, stop=xmax, num=50) if xx is None else xx 276 | yy_range = np.linspace(start=ymin, stop=ymax, num=50) if yy is None else yy 277 | 278 | xx, yy = np.meshgrid(xx_range, yy_range) 279 | XX = np.vstack([xx.flatten(), yy.flatten()]).T 280 | Kxx = ( 281 | m.kernel.kernels[i].K(XX[:, 0:1], X_conditioned[:, i : i + 1]) 282 | * m.kernel.variances[2] 283 | ) 284 | Kxx *= m.kernel.kernels[j].K(XX[:, 1:2], X_conditioned[:, j : j + 1]) 285 | mu = np.dot(Kxx, alpha) 286 | 287 | # rescale the x- and y-data. 288 | if x_transforms is None: 289 | xx_rescaled = 1.0 * xx 290 | Xi_rescaled = 1.0 * Xi 291 | yy_rescaled = 1.0 * yy 292 | Xj_rescaled = 1.0 * Xj 293 | else: 294 | xx_rescaled = x_transforms[0](xx) 295 | Xi_rescaled = x_transforms[0](Xi) 296 | yy_rescaled = x_transforms[1](yy) 297 | Xj_rescaled = x_transforms[1](Xj) 298 | 299 | if not isinstance(xx_rescaled, np.ndarray): 300 | xx_rescaled = xx_rescaled.numpy() 301 | if not isinstance(Xi_rescaled, np.ndarray): 302 | Xi_rescaled = Xi_rescaled.numpy() 303 | if not isinstance(yy_rescaled, np.ndarray): 304 | yy_rescaled = yy_rescaled.numpy() 305 | if not isinstance(Xj_rescaled, np.ndarray): 306 | Xj_rescaled = Xj_rescaled.numpy() 307 | 308 | # re-scale the predictions 309 | if y_transform is None: 310 | mu_rescaled = 1.0 * mu 311 | else: 312 | mu_rescaled = y_transform(mu) 313 | 314 | # do the actual plotting 315 | figure = plt.figure(figsize=(8, 4)) 316 | ax1 = figure.add_axes([0.2, 0.2, 0.75, 0.75]) 317 | bins_i = bins_j = num_bin 318 | if log_axis[0] is True: 319 | ax1.set_xscale("log") 320 | # plot log(x+1) if on log scale 321 | xx_rescaled += 1 322 | Xi_rescaled += 1 323 | bins_i = np.logspace( 324 | start=np.log10(Xi_rescaled.min() + 1), 325 | stop=np.log10(Xi_rescaled.max() + 1), 326 | num=num_bin, 327 | ) 328 | 329 | if log_axis[1] is True: 330 | ax1.set_yscale("log") 331 | yy_rescaled += 1 332 | Xj_rescaled += 1 333 | bins_j = np.logspace( 334 | start=np.log10(Xj_rescaled.min() + 1), 335 | stop=np.log10(Xj_rescaled.max() + 1), 336 | num=num_bin, 337 | ) 338 | 339 | contours = ax1.contour( 340 | xx_rescaled, 341 | yy_rescaled, 342 | mu_rescaled.reshape(*xx.shape), 343 | linewidths=1.4, 344 | colors="C0", 345 | ) 346 | ax1.clabel(contours, inline=1, fontsize=20) 347 | ax1.set_title(title) 348 | 349 | ax2 = figure.add_axes([0.2, 0.05, 0.75, 0.15], sharex=ax1) 350 | ax2.hist(Xi_rescaled.flatten(), alpha=0.2, color="grey", bins=bins_i) 351 | ax2.set_yticks([]) 352 | ax2.set_xlabel(covariate_names[0]) 353 | 354 | ax3 = figure.add_axes([0.08, 0.2, 0.12, 0.75], sharey=ax1) 355 | ax3.hist( 356 | Xj_rescaled.flatten(), 357 | alpha=0.2, 358 | color="grey", 359 | bins=bins_j, 360 | orientation="horizontal", 361 | ) 362 | ax3.set_xticks([]) 363 | ax3.set_xlim(ax3.get_xlim()[::-1]) 364 | ax3.set_ylabel(covariate_names[1]) 365 | 366 | ax1.set_xlim(xx_rescaled.min(), xx_rescaled.max()) 367 | ax1.set_ylim(yy_rescaled.min(), yy_rescaled.max()) 368 | 369 | for tick in ax1.get_xticklabels() + ax1.get_yticklabels(): 370 | tick.set_visible(False) 371 | 372 | fig_list = FigureDescription(fig=figure, description=title) 373 | if tikz_path is not None: 374 | tikzplotlib.save(tikz_path + f"{title}.tex") 375 | return fig_list 376 | 377 | 378 | def plot_single_effect_binary( 379 | m: Union[gpflow.models.GPR, gpflow.models.SGPR, gpflow.models.SVGP], 380 | i: int, 381 | binary_name: list, 382 | covariate_name: str = "", 383 | title: str = "Output Effect", 384 | y_transform: Optional[Callable[[np.ndarray], np.ndarray]] = None, 385 | semilogy: bool = False, 386 | tikz_path=None, 387 | ): 388 | X, Y = m.data 389 | Xi = X[:, i].numpy() 390 | alpha, L = get_model_sufficient_statistics(m) 391 | if isinstance(m, gpflow.models.GPR): 392 | X_conditioned = X 393 | elif isinstance(m, (gpflow.models.SGPR, gpflow.models.SVGP)): 394 | X_conditioned = m.inducing_variable.Z 395 | 396 | xx = np.array([0, 1]) 397 | Kxx = ( 398 | m.kernel.kernels[i].K(xx[:, None], X_conditioned[:, i : i + 1]) 399 | * m.kernel.variances[1] 400 | ) 401 | mu = tf.matmul(Kxx, alpha)[:, 0] 402 | tmp = tf.linalg.triangular_solve(L, tf.transpose(Kxx)) 403 | var = m.kernel.kernels[i].K_diag(xx[:, None]) * m.kernel.variances[1] - np.sum( 404 | tmp ** 2, axis=0 405 | ) 406 | 407 | lower = mu - 2 * np.sqrt(var) 408 | upper = mu + 2 * np.sqrt(var) 409 | 410 | if y_transform is None: 411 | mu_rescaled = 1.0 * mu 412 | lower_rescaled = 1.0 * lower 413 | upper_rescaled = 1.0 * upper 414 | else: 415 | mu_rescaled = y_transform(mu) 416 | lower_rescaled = y_transform(lower) 417 | upper_rescaled = y_transform(upper) 418 | 419 | fig, ax1 = plt.subplots(1, 1, figsize=(10, 6)) 420 | 421 | ax1.plot([0, 0], [lower_rescaled[0], upper_rescaled[0]], linewidth=8, color="r") 422 | ax1.plot([1, 1], [lower_rescaled[1], upper_rescaled[1]], linewidth=8, color="r") 423 | ax1a = ax1.twinx() 424 | ax1.get_shared_y_axes().join(ax1, ax1a) 425 | 426 | Y_dict = { 427 | binary_name[0]: Y_corrected_rescaled[Xi == 0][:, 0], 428 | binary_name[1]: Y_corrected_rescaled[Xi == 1][:, 0], 429 | } 430 | ax1a.boxplot( 431 | Y_dict.values(), positions=np.array(range(2)) 432 | ) 433 | 434 | ax1a.set_xticklabels([binary_name[0], binary_name[1]]) 435 | ax1a.set_ylabel("data with other effects removed", color="k") 436 | 437 | ax1.plot(1, mu_rescaled[1], "x", linewidth=40, color="b") 438 | ax1.plot(0, mu_rescaled[0], "x", linewidth=40, color="b") 439 | plt.xticks(np.arange(2), [binary_name[0], binary_name[1]]) 440 | plt.xlim([-1, 2]) 441 | plt.tight_layout() 442 | 443 | ax1.set_ylabel(title, color="r") 444 | ax1.set_title(covariate_name) 445 | 446 | if semilogy: 447 | ax1.semilogy() 448 | ax1a.semilogy() 449 | 450 | fig_list = FigureDescription(fig=fig, description=title) 451 | if tikz_path is not None: 452 | tikzplotlib.save(tikz_path + f"{title}.tex") 453 | return fig_list 454 | 455 | 456 | def plot_second_order_binary( 457 | m: Union[gpflow.models.GPR, gpflow.models.SGPR], 458 | i: int, 459 | j: int, 460 | binary_name: list, 461 | covariate_names: Optional[list] = None, 462 | title: str = "", 463 | x_transforms=None, 464 | y_transform=None, 465 | tikz_path=None, 466 | ): 467 | """ 468 | :param m: GP model 469 | :param i: index of continuous feature 470 | :param j: index of binary feature 471 | :param covariate_name: list of continuous and binary feature name 472 | :param x_transforms: transformation for continuous feature 473 | :param y_transform: transformation for the output 474 | 475 | """ 476 | if covariate_names is None: 477 | covariate_names = [f"input {i}", f"input {j}"] 478 | 479 | X, Y = m.data 480 | Xi = X[:, i].numpy() 481 | alpha, L = get_model_sufficient_statistics(m) 482 | if isinstance(m, gpflow.models.GPR): 483 | X_conditioned = X 484 | elif isinstance(m, gpflow.models.SGPR): 485 | X_conditioned = m.inducing_variable.Z 486 | 487 | xmin, xmax = np.percentile(Xi, (2, 98)) 488 | 489 | xx, yy = np.mgrid[xmin:xmax:100j, 0:1:2j] 490 | XX = np.vstack([xx.flatten(), yy.flatten()]).T 491 | Kxx = ( 492 | m.kernel.kernels[i].K(XX[:, 0:1], X_conditioned[:, i : i + 1]) 493 | * m.kernel.variances[2] 494 | ) 495 | Kxx *= m.kernel.kernels[j].K(XX[:, 1:2], X_conditioned[:, j : j + 1]) 496 | mu = np.dot(Kxx, alpha)[:, 0] 497 | 498 | tmp = tf.linalg.triangular_solve(L, tf.transpose(Kxx)) 499 | var = m.kernel.kernels[i].K_diag(XX[:, 0:1]) * m.kernel.kernels[j].K_diag( 500 | XX[:, 1:2] 501 | ) * m.kernel.variances[2] - np.sum(tmp ** 2, axis=0) 502 | 503 | lower = mu - 2 * np.sqrt(var) 504 | upper = mu + 2 * np.sqrt(var) 505 | 506 | # do "data correction" for what each component is seeing 507 | K_sub = m.kernel(X, X_conditioned) 508 | K_sub -= ( 509 | m.kernel.kernels[i].K(X[:, i : i + 1], X_conditioned[:, i : i + 1]) 510 | * m.kernel.kernels[j].K(X[:, j : j + 1], X_conditioned[:, j : j + 1]) 511 | * m.kernel.variances[2] 512 | ) 513 | 514 | if x_transforms is None: 515 | xx_rescaled = 1.0 * xx[:, 0] 516 | Xi_rescaled = 1.0 * Xi 517 | else: 518 | xx_rescaled = x_transforms[0](xx[:, 0]).numpy() 519 | Xi_rescaled = x_transforms[0](Xi).numpy() 520 | 521 | # re-scale the predictions and the y-data 522 | if y_transform is None: 523 | mu_rescaled = 1.0 * mu 524 | lower_rescaled = 1.0 * lower 525 | upper_rescaled = 1.0 * upper 526 | else: 527 | mu_rescaled = y_transform(mu) 528 | lower_rescaled = y_transform(lower) 529 | upper_rescaled = y_transform(upper) 530 | 531 | # do the actual plotting 532 | fig, axes = plt.subplots(nrows=2, ncols=1, sharex="col", figsize=(10, 6)) 533 | plt.subplots_adjust(left=0.25, bottom=0.25, right=1) 534 | 535 | ax1 = axes[0] 536 | ax2 = axes[1] 537 | 538 | mu_rescaled0 = mu_rescaled[yy.flatten() == 0] 539 | mu_rescaled1 = mu_rescaled[yy.flatten() == 1] 540 | lower_rescaled0 = lower_rescaled[yy.flatten() == 0] 541 | lower_rescaled1 = lower_rescaled[yy.flatten() == 1] 542 | upper_rescaled0 = upper_rescaled[yy.flatten() == 0] 543 | upper_rescaled1 = upper_rescaled[yy.flatten() == 1] 544 | 545 | ax1.plot(xx_rescaled, lower_rescaled0, linewidth=0.5, color="k", zorder=11) 546 | ax1.plot(xx_rescaled, upper_rescaled0, linewidth=0.5, color="k", zorder=11) 547 | ax1.plot( 548 | xx_rescaled, 549 | mu_rescaled0, 550 | linewidth=2, 551 | color="C0", 552 | zorder=10, 553 | label=binary_name[0], 554 | ) 555 | ax1.fill_between( 556 | xx_rescaled, lower_rescaled0, upper_rescaled0, alpha=0.2, color="C0" 557 | ) 558 | 559 | ax1.legend() 560 | 561 | ax2.plot( 562 | xx_rescaled, 563 | mu_rescaled1, 564 | linewidth=2, 565 | color="C0", 566 | zorder=10, 567 | label=binary_name[1], 568 | ) 569 | ax2.plot(xx_rescaled, lower_rescaled1, linewidth=0.5, color="k", zorder=11) 570 | ax2.plot(xx_rescaled, upper_rescaled1, linewidth=0.5, color="k", zorder=11) 571 | ax2.fill_between( 572 | xx_rescaled, lower_rescaled1, upper_rescaled1, alpha=0.2, color="C0" 573 | ) 574 | 575 | ax2.legend() 576 | ax1.set_title(title) 577 | 578 | ax1.set_xlim(xx_rescaled.min(), xx_rescaled.max()) 579 | 580 | ax3 = fig.add_axes([0.25, 0.02, 0.75, 0.15], sharex=ax2) 581 | ax3.hist(Xi_rescaled.flatten(), alpha=0.2, color="grey", bins=50) 582 | ax3.set_yticks([]) 583 | ax3.set_xlabel(covariate_names[0]) 584 | 585 | for tick in ax1.get_xticklabels() + ax2.get_xticklabels(): 586 | tick.set_visible(False) 587 | 588 | fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(10, 6)) 589 | Y_dict = {binary_name[0]: mu_rescaled0, binary_name[1]: mu_rescaled1} # , \ 590 | 591 | ax.boxplot(Y_dict.values(), positions=np.array(range(2))) 592 | ax.set_xticklabels(Y_dict.keys()) 593 | ax.set_ylabel("Predicted Effect", color="k") 594 | ax.set_title(title) 595 | 596 | fig_list = FigureDescription(fig=fig, description=title) 597 | if tikz_path is not None: 598 | tikzplotlib.save(tikz_path + f"{title}.tex") 599 | return fig_list 600 | 601 | 602 | def plot_single_effect_categorical( 603 | m: Union[gpflow.models.GPR, gpflow.models.SGPR, gpflow.models.SVGP], 604 | i: int, 605 | categorical_name: list, 606 | title: str = "Output Effect", 607 | y_transform=None, 608 | semilogy=False, 609 | tikz_path=None, 610 | ): 611 | X, Y = m.data 612 | alpha, L = get_model_sufficient_statistics(m) 613 | if isinstance(m, gpflow.models.GPR): 614 | X_conditioned = X 615 | elif isinstance(m, (gpflow.models.SVGP, gpflow.models.SGPR)): 616 | X_conditioned = m.inducing_variable.Z 617 | 618 | num_cat = m.kernel.kernels[i].num_cat 619 | xx = np.arange(num_cat) 620 | Kxx = ( 621 | m.kernel.kernels[i].K(xx[:, None], X_conditioned[:, i : i + 1]) 622 | * m.kernel.variances[1] 623 | ) 624 | mu = tf.matmul(Kxx, alpha)[:, 0] 625 | tmp = tf.linalg.triangular_solve(L, tf.transpose(Kxx)) 626 | var = m.kernel.kernels[i].K_diag(xx[:, None]) * m.kernel.variances[1] - np.sum( 627 | tmp ** 2, axis=0 628 | ) 629 | 630 | lower = mu - 2 * np.sqrt(var) 631 | upper = mu + 2 * np.sqrt(var) 632 | 633 | if y_transform is None: 634 | mu_rescaled = 1.0 * mu 635 | lower_rescaled = 1.0 * lower 636 | upper_rescaled = 1.0 * upper 637 | else: 638 | mu_rescaled = y_transform(mu) 639 | lower_rescaled = y_transform(lower) 640 | upper_rescaled = y_transform(upper) 641 | 642 | fig, ax1 = plt.subplots(1, 1, figsize=(10, 6)) 643 | 644 | for ii in range(num_cat): 645 | ax1.plot( 646 | [ii, ii], 647 | [lower_rescaled[ii], upper_rescaled[ii]], 648 | linewidth=8, 649 | color="cornflowerblue", 650 | ) 651 | ax1.plot(ii, mu_rescaled[ii], "x", linewidth=20, color="r") 652 | 653 | plt.xticks(np.arange(num_cat), [categorical_name[ii] for ii in range(num_cat)]) 654 | plt.xlim([-1, num_cat]) 655 | plt.tight_layout() 656 | 657 | ax1.set_ylabel("Output Effect") 658 | ax1.set_title(title) 659 | 660 | if semilogy: 661 | ax1.semilogy() 662 | 663 | fig_list = FigureDescription(fig=fig, description=title) 664 | if tikz_path is not None: 665 | tikzplotlib.save(tikz_path + f"{title}.tex") 666 | return fig_list 667 | -------------------------------------------------------------------------------- /oak/utils.py: -------------------------------------------------------------------------------- 1 | # + 2 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | from typing import List, Optional, Tuple 6 | import gpflow 7 | import numpy as np 8 | import tensorflow as tf 9 | import tensorflow_probability as tfp 10 | from gpflow import set_trainable 11 | from gpflow.config import default_float, default_jitter 12 | from gpflow.covariances.dispatch import Kuf, Kuu 13 | from sklearn.cluster import KMeans 14 | from gpflow.models import GPModel 15 | from oak.input_measures import EmpiricalMeasure, GaussianMeasure, MOGMeasure 16 | from oak.oak_kernel import ( 17 | KernelComponenent, 18 | OAKKernel, 19 | bounded_param, 20 | get_list_representation, 21 | ) 22 | from oak.ortho_binary_kernel import OrthogonalBinary 23 | from oak.ortho_categorical_kernel import OrthogonalCategorical 24 | from oak.ortho_rbf_kernel import OrthogonalRBFKernel 25 | # - 26 | 27 | opt = gpflow.optimizers.Scipy() 28 | tfd = tfp.distributions 29 | f64 = gpflow.utilities.to_default_float 30 | 31 | 32 | def model_to_kernel_list(model: GPModel, selected_dims: List): 33 | # exact list of kernels from the OAK model 34 | kernel = [] 35 | model_dims = extract_active_dims(model) 36 | for i in range(len(selected_dims)): 37 | for j in range(len(model.kernel.kernels) - 1): 38 | if model_dims[j] == selected_dims[i]: 39 | kernel.append(model.kernel.kernels[j]) 40 | # append offset kernel 41 | kernel.append(model.kernel.kernels[-1]) 42 | return kernel 43 | 44 | 45 | def extract_active_dims(m): 46 | # exact list of active dimensions from the OAK model m 47 | active_dims = [] 48 | for i in range(len(m.kernel.kernels) - 1): 49 | # interaction with product kernel 50 | if type(m.kernel.kernels[i]) == gpflow.kernels.base.Product: 51 | sub_m = m.kernel.kernels[i].kernels 52 | dims = [] 53 | for j in range(len(sub_m)): 54 | dim = sub_m[j].active_dims 55 | dims.append(dim[0]) 56 | else: 57 | dims = m.kernel.kernels[i].active_dims 58 | 59 | active_dims.append(list(dims)) 60 | return active_dims 61 | 62 | 63 | def grammer_to_kernel( 64 | selected_dims, 65 | offset, 66 | measure=GaussianMeasure(0, 10), 67 | lengthscales_lo=1e-3, 68 | lengthscales_hi=100, 69 | variance_lo=0.01, 70 | variance_hi=100, 71 | ): 72 | # construct list of kernels 73 | # selected_dims: list of kernel indices 74 | selected_kernels = [] 75 | for i in range(len(selected_dims)): 76 | # loop through depth 77 | k_list = [] 78 | for j in range(len(selected_dims[i])): 79 | 80 | lengthscales = np.random.uniform(low=lengthscales_lo, high=lengthscales_hi) 81 | variance = np.random.uniform(low=variance_lo, high=variance_hi) 82 | 83 | dim = selected_dims[i][j] + offset 84 | if isinstance(measure, EmpiricalMeasure): 85 | location = measure.location 86 | k = OrthogonalRBFKernel( 87 | gpflow.kernels.RBF(lengthscales=lengthscales, variance=variance), 88 | EmpiricalMeasure(np.reshape(location[:, dim], (-1, 1))), 89 | active_dims=[dim], 90 | ) 91 | else: 92 | k = OrthogonalRBFKernel( 93 | gpflow.kernels.RBF(lengthscales=lengthscales, variance=variance), 94 | measure, 95 | active_dims=[dim], 96 | ) 97 | k.base_kernel.lengthscales = bounded_param( 98 | lengthscales_lo, lengthscales_hi, lengthscales 99 | ) 100 | k.base_kernel.variance = bounded_param(variance_lo, variance_hi, variance) 101 | if j > 0: 102 | k.base_kernel.variance.assign(1) 103 | set_trainable(k.base_kernel.variance, False) 104 | 105 | k_list.append(k) 106 | k = np.prod(k_list) 107 | selected_kernels.append(k) 108 | 109 | # add a constant kernel 110 | k0 = gpflow.kernels.Constant(variance=10) 111 | selected_kernels.append(k0) 112 | 113 | return selected_kernels 114 | 115 | 116 | def f1(x, y, sigma, lengthscales, delta, mu): 117 | # eq (44) in Appendix G.1 of paper for calculating Sobol indices 118 | return ( 119 | sigma ** 4 120 | * lengthscales 121 | / np.sqrt(lengthscales ** 2 + 2 * delta ** 2) 122 | * np.exp(-((x - y) ** 2) / (4 * lengthscales ** 2)) 123 | * np.exp(-((mu - (x + y) / 2) ** 2) / (2 * delta ** 2 + lengthscales ** 2)) 124 | ) 125 | 126 | 127 | def f2(x, y, sigma, lengthscales, delta, mu): 128 | # eq (45) in Appendix G.1 of paper for calculating Sobol indices 129 | M = 1 / (lengthscales ** 2) + 1 / (lengthscales ** 2 + delta ** 2) 130 | m = 1 / M * (mu / (lengthscales ** 2 + delta ** 2) + x / lengthscales ** 2) 131 | C = ( 132 | x ** 2 / (lengthscales ** 2) 133 | + mu ** 2 / (lengthscales ** 2 + delta ** 2) 134 | - m ** 2 * M 135 | ) 136 | return ( 137 | sigma ** 4 138 | * lengthscales 139 | * np.sqrt((lengthscales ** 2 + 2 * delta ** 2) / (delta ** 2 * M + 1)) 140 | * np.exp(-C / 2) 141 | / (lengthscales ** 2 + delta ** 2) 142 | * np.exp(-((y - mu) ** 2) / (2 * (lengthscales ** 2 + delta ** 2))) 143 | * np.exp(-((m - mu) ** 2) / (2 * (1 / M + delta ** 2))) 144 | ) 145 | 146 | 147 | def f3(x, y, sigma, lengthscales, delta, mu): 148 | # eq (46) in Appendix G.1 of paper for calculating Sobol indices 149 | return f2(y, x, sigma, lengthscales, delta, mu) 150 | 151 | 152 | def f4(x, y, sigma, lengthscales, delta, mu): 153 | # eq (47) in Appendix G.1 of paper for calculating Sobol indices 154 | return ( 155 | sigma ** 4 156 | * lengthscales ** 2 157 | * (lengthscales ** 2 + 2 * delta ** 2) 158 | * np.sqrt( 159 | (lengthscales ** 2 + delta ** 2) / (lengthscales ** 2 + 3 * delta ** 2) 160 | ) 161 | / ((lengthscales ** 2 + delta ** 2) ** 2) 162 | * np.exp( 163 | -((x - mu) ** 2 + (y - mu) ** 2) / (2 * (lengthscales ** 2 + delta ** 2)) 164 | ) 165 | ) 166 | 167 | 168 | def get_model_sufficient_statistics(m, get_L=True): 169 | """ 170 | Compute a vector "alpha" and a matrix "L" which can be used for easy prediction. 171 | """ 172 | 173 | X_data, Y_data = m.data 174 | if isinstance(m, gpflow.models.SVGP): 175 | posterior = m.posterior() 176 | # details of Qinv can be found https://github.com/GPflow/GPflow/blob/develop/gpflow/posteriors.py 177 | alpha = posterior.alpha 178 | if get_L: 179 | L = tf.linalg.cholesky(tf.linalg.inv(posterior.Qinv[0])) 180 | elif isinstance(m, gpflow.models.SGPR): 181 | 182 | num_inducing = len(m.inducing_variable) 183 | err = Y_data - m.mean_function(X_data) 184 | kuf = Kuf(m.inducing_variable, m.kernel, X_data) 185 | kuu = Kuu(m.inducing_variable, m.kernel, jitter=default_jitter()) 186 | 187 | sigma = tf.sqrt(m.likelihood.variance) 188 | L = tf.linalg.cholesky(kuu) 189 | A = tf.linalg.triangular_solve(L, kuf, lower=True) / sigma 190 | B = tf.linalg.matmul(A, A, transpose_b=True) + tf.eye( 191 | num_inducing, dtype=default_float() 192 | ) 193 | LB = tf.linalg.cholesky(B) 194 | Aerr = tf.linalg.matmul(A, err) 195 | c = tf.linalg.triangular_solve(LB, Aerr, lower=True) / sigma 196 | 197 | tmp1 = tf.linalg.solve(tf.transpose(LB), c) 198 | alpha = tf.linalg.solve(tf.transpose(L), tmp1) 199 | 200 | if get_L: 201 | # compute the effective L 202 | LAi = tf.linalg.triangular_solve(L, np.eye(L.shape[0])) 203 | LBiLAi = tf.linalg.triangular_solve(LB, LAi) 204 | L = tf.linalg.inv(LAi - LBiLAi) 205 | 206 | elif isinstance(m, gpflow.models.GPR): 207 | # prepare for prediction 208 | K = m.kernel(X_data) 209 | Ktilde = K + np.eye(X_data.shape[0]) * m.likelihood.variance 210 | L = np.linalg.cholesky(Ktilde) 211 | alpha = tf.linalg.cholesky_solve(L, Y_data) 212 | 213 | else: 214 | raise NotImplementedError 215 | if get_L: 216 | return alpha, L 217 | else: 218 | return alpha 219 | 220 | 221 | def compute_L( 222 | X: tf.Tensor, lengthscale: float, variance: float, dim: int, delta: float, mu: float 223 | ) -> np.ndarray: 224 | # calculate the integral in eq (40) of Appendix G.1 in paper 225 | N = X.shape[0] 226 | sigma = np.sqrt(variance) 227 | xx = X[:, dim] 228 | yy = X[:, dim] 229 | 230 | x = np.repeat(xx, N) 231 | y = np.tile(yy, N) 232 | L = ( 233 | f1(x, y, sigma, lengthscale, delta, mu) 234 | - f2(x, y, sigma, lengthscale, delta, mu) 235 | - f3(x, y, sigma, lengthscale, delta, mu) 236 | + f4(x, y, sigma, lengthscale, delta, mu) 237 | ) 238 | L = np.reshape(L, (N, N)) 239 | 240 | return L 241 | 242 | 243 | def compute_L_binary_kernel( 244 | X: tf.Tensor, p0: float, variance: float, dim: int 245 | ) -> np.ndarray: 246 | 247 | """ 248 | Compute L matrix needed for sobol index calculation for orthogonal binary kernels. 249 | :param X: training input tensor 250 | :param p0: probability measure for the data distribution (Prob(x=0)) 251 | :param variance: variance parameter for the binary kernel, default is 1 252 | :param dim: active dimension of the kernel 253 | :return: sobol value L matrix 254 | 255 | """ 256 | assert 0 <= p0 <= 1 257 | 258 | N = X.shape[0] 259 | xx = X[:, dim] 260 | yy = X[:, dim] 261 | 262 | x = np.repeat(xx, N) 263 | y = np.tile(yy, N) 264 | p1 = 1 - p0 265 | 266 | L = variance * ( 267 | p0 * (p1 ** 2 * (1 - x) - p0 * p1 * x) * (p1 ** 2 * (1 - y) - p0 * p1 * y) 268 | + p1 * (-p0 * p1 * (1 - x) + p0 ** 2 * x) * (-p0 * p1 * (1 - y) + p0 ** 2 * y) 269 | ) 270 | L = np.reshape(L, (N, N)) 271 | 272 | return L 273 | 274 | 275 | def compute_L_categorical_kernel( 276 | X: tf.Tensor, W: tf.Tensor, kappa: tf.Tensor, p: float, variance: float, dim: int 277 | ) -> np.ndarray: 278 | 279 | """ 280 | Compute L matrix needed for sobol index calculation for orthogonal categorical kernels. 281 | :param X: training input tensor 282 | :param W: parameter of categorical kernel 283 | :param kappa: parameter of categorical kernel 284 | :param p: probability measure for the data distribution (Prob(x=0)) 285 | :param variance: variance parameter for the categorical kernel, default is 1 286 | :param dim: active dimension of the kernel 287 | :return: sobol value L matrix 288 | 289 | """ 290 | assert np.abs(p.sum() - 1) < 1e-6 291 | 292 | N = X.shape[0] 293 | 294 | A = tf.linalg.matmul(W, W, transpose_b=True) + tf.linalg.diag(kappa) 295 | Ap = tf.linalg.matmul(A, p) 296 | B = A - tf.linalg.matmul(Ap, Ap, transpose_b=True) / ( 297 | tf.linalg.matmul(p, Ap, transpose_a=True)[0] 298 | ) 299 | B = B * variance 300 | 301 | xx = tf.range(len(p), dtype=gpflow.config.default_float()) 302 | 303 | K = tf.gather( 304 | tf.transpose(tf.gather(B, tf.cast(X[:, dim], tf.int32))), tf.cast(xx, tf.int32) 305 | ) 306 | 307 | L = tf.linalg.matmul(K, K * p, transpose_a=True) 308 | 309 | return L 310 | 311 | 312 | @tf.function 313 | def compute_L_empirical_measure( 314 | x: tf.Tensor, w: tf.Tensor, kernel: OrthogonalRBFKernel, z: tf.Tensor 315 | ) -> np.ndarray: 316 | """ 317 | Compute L matrix needed for sobol index calculation with empirical measure 318 | :param x: location of empirical measure 319 | :param w: weights of empirical measure, input density of the form 1/(\sum_i w_i) * \sum_i w_i (x==x_i) 320 | :param kernel: constrained kernel 321 | :param z: training data in full GP or inducing points locations in sparse GP 322 | :return: sobol value L matrix 323 | """ 324 | 325 | # number of training/inducing points 326 | m = z.shape[0] 327 | # number of empirical locations 328 | n = x.shape[0] 329 | 330 | kxu = kernel.K(x, z) 331 | tf.debugging.assert_shapes([(kxu, (n, m))]) 332 | w = tf.reshape(w, [1, n]) 333 | L = tf.matmul(w * tf.transpose(kxu), kxu) 334 | 335 | return L 336 | 337 | 338 | def compute_sobol_oak( 339 | model: gpflow.models.BayesianModel, 340 | delta: float, 341 | mu: float, 342 | share_var_across_orders: Optional[bool] = True, 343 | ) -> Tuple[List[List[int]], List[float]]: 344 | """ 345 | Compute sobol indices for Duvenaud model 346 | :param model: gpflowm odel 347 | :param delta: prior variance of measure p(X) 348 | :param mu: prior mean of measure p(x) 349 | :param share_var_across_orders: whether to share the same variance across orders, 350 | if False, it uses original OrthogonalRBFKernel kernel \prod_i(1+k_i). 351 | :return: list of input dimension indices and list of sobol indices 352 | """ 353 | print(model.kernel) 354 | assert isinstance(model.kernel, OAKKernel), "only work for OAK kernel" 355 | num_dims = model.data[0].shape[1] 356 | 357 | selected_dims_oak, kernel_list = get_list_representation( 358 | model.kernel, num_dims=num_dims 359 | ) 360 | selected_dims_oak = selected_dims_oak[1:] # skip constant term 361 | if isinstance(model, (gpflow.models.SGPR, gpflow.models.SVGP)): 362 | X = model.inducing_variable.Z 363 | else: 364 | X = model.data[0] 365 | N = X.shape[0] 366 | alpha = get_model_sufficient_statistics(model, get_L=False) 367 | sobol = [] 368 | L_list = [] 369 | for kernel in kernel_list: 370 | assert isinstance(kernel, KernelComponenent) 371 | if len(kernel.iComponent_list) == 0: 372 | continue # skip constant term 373 | L = np.ones((N, N)) 374 | n_order = len(kernel.kernels) 375 | for j in range(len(kernel.kernels)): 376 | if share_var_across_orders: 377 | if j < 1: 378 | v = kernel.oak_kernel.variances[n_order].numpy() 379 | else: 380 | v = 1 381 | else: 382 | v = kernel.kernels[j].base_kernel.variance.numpy() 383 | 384 | dim = kernel.kernels[j].active_dims[0] 385 | 386 | if isinstance(kernel.kernels[j], OrthogonalRBFKernel): 387 | 388 | if isinstance(kernel.kernels[j].base_kernel, gpflow.kernels.RBF) and ( 389 | not isinstance(kernel.kernels[j].measure, EmpiricalMeasure) 390 | and (not isinstance(kernel.kernels[j].measure, MOGMeasure)) 391 | ): 392 | l = kernel.kernels[j].base_kernel.lengthscales.numpy() 393 | L = L * compute_L( 394 | X, 395 | l, 396 | v, 397 | dim, 398 | delta, 399 | mu, 400 | ) 401 | 402 | elif isinstance(kernel.kernels[j].measure, EmpiricalMeasure): 403 | L = ( 404 | v ** 2 405 | * L 406 | * compute_L_empirical_measure( 407 | kernel.kernels[j].measure.location, 408 | kernel.kernels[j].measure.weights, 409 | kernel.kernels[j], 410 | tf.reshape(X[:, dim], [-1, 1]), 411 | ) 412 | ) 413 | else: 414 | raise NotImplementedError 415 | 416 | elif isinstance(kernel.kernels[j], OrthogonalBinary): 417 | p0 = kernel.kernels[j].p0 418 | L = L * compute_L_binary_kernel(X, p0, v, dim) 419 | 420 | elif isinstance(kernel.kernels[j], OrthogonalCategorical): 421 | p = kernel.kernels[j].p 422 | W = kernel.kernels[j].W 423 | kappa = kernel.kernels[j].kappa 424 | L = L * compute_L_categorical_kernel(X, W, kappa, p, v, dim) 425 | 426 | else: 427 | raise NotImplementedError 428 | L_list.append(L) 429 | mean_term = tf.tensordot( 430 | tf.tensordot(tf.transpose(alpha), L, axes=1), alpha, axes=1 431 | ).numpy()[0][0] 432 | sobol.append(mean_term) 433 | 434 | assert len(selected_dims_oak) == len(sobol) 435 | return selected_dims_oak, sobol 436 | 437 | 438 | def compute_sobol( 439 | model: GPModel, 440 | kernel_list: list, 441 | delta: float, 442 | mu: float, 443 | alpha: np.ndarray, 444 | sparse_gp: bool = True, 445 | ): 446 | # compute Sobol in eq (40) of G.1 of paper 447 | if sparse_gp: 448 | X = model.inducing_variable.Z 449 | else: 450 | X = model.data[0] 451 | N = X.shape[0] 452 | sobol = [] 453 | L_list = [] 454 | for kernel in kernel_list: 455 | assert not isinstance( 456 | kernel, KernelComponenent 457 | ), "should use duvenaud sobol calculation code" 458 | if isinstance(kernel, gpflow.kernels.base.Product): # exclude constant term 459 | L = np.ones((N, N)) 460 | for j in range(len(kernel.kernels)): 461 | l = kernel.kernels[j].base_kernel.lengthscales.numpy() 462 | v = kernel.kernels[j].base_kernel.variance.numpy() 463 | dim = kernel.kernels[j].active_dims[0] 464 | L = L * compute_L(X, l, v, dim, delta, mu) 465 | L_list.append(L) 466 | sobol.append( 467 | tf.tensordot( 468 | tf.tensordot(tf.transpose(alpha), L, axes=1), alpha, axes=1 469 | ).numpy()[0][0] 470 | ) 471 | 472 | else: 473 | if type(kernel) != gpflow.kernels.statics.Constant and not isinstance( 474 | kernel, KernelComponenent 475 | ): 476 | l = kernel.base_kernel.lengthscales.numpy() 477 | v = kernel.base_kernel.variance.numpy() 478 | dim = kernel.active_dims[0] 479 | L = compute_L(X, l, v, dim, delta, mu) 480 | 481 | L_list.append(L) 482 | sobol.append( 483 | tf.tensordot( 484 | tf.tensordot(tf.transpose(alpha), L, axes=1), alpha, axes=1 485 | ).numpy()[0][0] 486 | ) 487 | 488 | return sobol 489 | 490 | 491 | def get_prediction_component( 492 | m: gpflow.models.BayesianModel, 493 | alpha: tf.Tensor, 494 | X: np.ndarray = None, 495 | share_var_across_orders: Optional[bool] = True, 496 | ) -> list: 497 | """ 498 | Return predictive mean for dataset 1 and 2 499 | :param m: GP model 500 | :param X: concatenation of data to make predictions: first half of X are from dataset 1, 501 | last half of X are from dataset 2. If it is None, then X is set to be the training data. 502 | :param alpha: statistics used to make predictions, e.g. K^{-1}y 503 | :param share_var_across_orders: whether to share the same variance across orders, 504 | if False, it uses original OrthogonalRBFKernel kernel \prod_i(1+k_i) 505 | :return: prediction of each kernel component of two datasets (e.g., two different simulation runs), concatenated together 506 | """ 507 | if X is None: 508 | X = m.data[0] 509 | selected_dims, _ = get_list_representation(m.kernel, num_dims=X.shape[1]) 510 | tuple_of_indices = selected_dims[1:] 511 | out = [] 512 | if isinstance(m, gpflow.models.GPR): 513 | X_conditioned = m.data[0] 514 | elif isinstance(m, (gpflow.models.SGPR, gpflow.models.SVGP)): 515 | X_conditioned = m.inducing_variable.Z 516 | 517 | for n in range(len(tuple_of_indices)): 518 | Kxx = tf.ones([X.shape[0], alpha.shape[0]], dtype=tf.dtypes.float64) 519 | num_interaction = len(tuple_of_indices[n]) 520 | for ii in range(num_interaction): 521 | idx = tuple_of_indices[n][ii] 522 | Kxx *= m.kernel.kernels[idx].K( 523 | np.reshape(X[:, idx], (-1, 1)), X_conditioned[:, idx : idx + 1] 524 | ) 525 | if share_var_across_orders: 526 | Kxx *= m.kernel.variances[num_interaction] 527 | 528 | predictive_component_mean = tf.matmul(Kxx, alpha) 529 | out.append(predictive_component_mean[:, 0]) 530 | return out 531 | 532 | 533 | def initialize_kmeans_with_binary( 534 | X: tf.Tensor, 535 | binary_index: list, 536 | continuous_index: Optional[list] = None, 537 | n_clusters: Optional[int] = 200, 538 | ): 539 | # K-means with combination of continuous and binary feature 540 | Z = np.zeros([n_clusters, X.shape[1]]) 541 | 542 | for index in binary_index: 543 | kmeans = KMeans(n_clusters=n_clusters, random_state=0).fit(X[:, index][:, None]) 544 | Z[:, index] = kmeans.cluster_centers_.astype(int)[:, 0] 545 | 546 | if continuous_index is not None: 547 | kmeans_continuous = KMeans(n_clusters=n_clusters, random_state=0).fit( 548 | X[:, continuous_index] 549 | ) 550 | Z[:, continuous_index] = kmeans_continuous.cluster_centers_ 551 | 552 | return Z 553 | 554 | 555 | def initialize_kmeans_with_categorical( 556 | X: tf.Tensor, 557 | binary_index: list, 558 | categorical_index: list, 559 | continuous_index: list, 560 | n_clusters: Optional[int] = 200, 561 | ): 562 | # K-means with combination of continuous and categorical feature 563 | Z = np.zeros([n_clusters, X.shape[1]]) 564 | 565 | for index in binary_index + categorical_index: 566 | kmeans = KMeans(n_clusters=n_clusters, random_state=0).fit(X[:, index][:, None]) 567 | Z[:, index] = kmeans.cluster_centers_.astype(int)[:, 0] 568 | 569 | kmeans_continuous = KMeans(n_clusters=n_clusters, random_state=0).fit( 570 | X[:, continuous_index] 571 | ) 572 | Z[:, continuous_index] = kmeans_continuous.cluster_centers_ 573 | 574 | return Z 575 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | gpflow 2 | pytest 3 | lint 4 | black 5 | mypy 6 | flake8 7 | jupytext 8 | seaborn 9 | jupyter 10 | tqdm 11 | numpy 12 | matplotlib 13 | IPython 14 | scikit-learn 15 | tikzplotlib 16 | scikit-learn-extra==0.2.0 17 | tensorflow==2.11.1 18 | tensorflow_probability==0.11.0 -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # + 2 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | from setuptools import find_packages, setup 6 | # - 7 | 8 | setup( 9 | name="oak", 10 | version="0.0.1", 11 | packages=find_packages(include=['oak/oak', 'oak.*']), 12 | install_requires=[ 13 | "gpflow==2.2.1", 14 | "pytest==5.4.1", 15 | "lint", 16 | "black", 17 | "mypy", 18 | "flake8", 19 | "jupytext", 20 | "seaborn", 21 | "ipython", 22 | "jupyter", 23 | "tqdm==4.44.1", 24 | "tikzplotlib", 25 | "scikit-learn", 26 | "numpy", 27 | "matplotlib", 28 | "seaborn", 29 | "IPython", 30 | "tensorflow==2.11.1", 31 | "s3fs==0.4.0", 32 | "scikit-learn-extra==0.2.0", 33 | "tensorflow_probability==0.11.0", 34 | ], 35 | ) 36 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amzn/orthogonal-additive-gaussian-processes/1aae10e792f1ff7099f92d716bca377c9be830f3/tests/__init__.py -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | # + 2 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | import numpy as np 6 | import pytest 7 | 8 | 9 | # - 10 | 11 | @pytest.fixture 12 | def concrete_normalised_10_rows_data(): 13 | X = np.array( 14 | [ 15 | [1.988, -1.536, -1.155, -0.274, 1.73, -0.3, -0.629], 16 | [1.988, -1.536, -1.155, -0.274, 1.996, -0.3, -0.629], 17 | [-0.061, 0.321, 0.855, -0.589, -0.188, -1.064, 1.261], 18 | [-0.061, 0.321, 0.855, -0.589, -0.188, -1.064, 2.003], 19 | [-1.383, 0.189, -0.241, -0.589, 0.636, 1.093, 1.964], 20 | [-0.718, -0.051, 0.855, -0.589, -0.188, -0.356, -0.145], 21 | [0.408, -0.298, 0.855, -0.589, -0.188, -1.064, 2.003], 22 | [0.408, -0.298, 0.855, -0.589, -0.188, -1.064, -0.629], 23 | [-0.718, -0.051, 0.855, -0.589, -0.188, -0.356, -0.629], 24 | [1.346, -1.536, 0.855, -0.589, -0.188, -1.064, -0.629], 25 | ] 26 | ) 27 | y = np.array( 28 | [ 29 | [3.346], 30 | [1.825], 31 | [0.008], 32 | [0.073], 33 | [0.346], 34 | [0.576], 35 | [0.296], 36 | [-0.313], 37 | [0.477], 38 | [-0.075], 39 | ] 40 | ) 41 | return X, y 42 | -------------------------------------------------------------------------------- /tests/test_categorical_kernel.py: -------------------------------------------------------------------------------- 1 | # + 2 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | import numpy as np 6 | import tensorflow as tf 7 | from oak.ortho_categorical_kernel import OrthogonalCategorical 8 | # - 9 | 10 | _THRESHOLD_NUMERICAL_ACCURACY = 1e-3 11 | 12 | 13 | def test_OrthogonalCategorical(): 14 | np.random.seed(44) 15 | tf.random.set_seed(44) 16 | N = 1000 17 | num_cat = 2 18 | p = np.ones((num_cat, 1)) / num_cat 19 | k = OrthogonalCategorical(p, rank=2, active_dims=[0]) 20 | xx = np.reshape(np.random.choice(num_cat, N, p=p[:, 0]), (-1, 1)) 21 | mu = np.zeros(N) 22 | f = np.random.multivariate_normal(mu, k.K(xx), size=2000) 23 | assert np.abs(f.mean()) < _THRESHOLD_NUMERICAL_ACCURACY 24 | -------------------------------------------------------------------------------- /tests/test_kernel_properties.py: -------------------------------------------------------------------------------- 1 | # + 2 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | """ 6 | Tests kernel behave as expected 7 | """ 8 | from functools import reduce 9 | from itertools import combinations 10 | import gpflow 11 | import numpy as np 12 | import pytest 13 | # - 14 | 15 | from oak.input_measures import ( 16 | EmpiricalMeasure, 17 | GaussianMeasure, 18 | MOGMeasure, 19 | UniformMeasure, 20 | ) 21 | from oak.oak_kernel import OAKKernel 22 | from oak.ortho_binary_kernel import OrthogonalBinary 23 | from oak.ortho_rbf_kernel import OrthogonalRBFKernel 24 | 25 | 26 | @pytest.mark.parametrize( 27 | "kernel", 28 | ( 29 | OAKKernel( 30 | [gpflow.kernels.RBF], 31 | num_dims=1, 32 | max_interaction_depth=1, 33 | ), 34 | OAKKernel( 35 | [gpflow.kernels.RBF], 36 | num_dims=1, 37 | max_interaction_depth=1, 38 | constrain_orthogonal=True, 39 | ), 40 | OrthogonalBinary(), 41 | OrthogonalRBFKernel(gpflow.kernels.RBF(), GaussianMeasure(0, 1)), 42 | OrthogonalRBFKernel(gpflow.kernels.RBF(), UniformMeasure(0, 1)), 43 | OrthogonalRBFKernel( 44 | gpflow.kernels.RBF(), EmpiricalMeasure(np.array([[0.1], [0.5], [0.5]])) 45 | ), 46 | OrthogonalRBFKernel( 47 | gpflow.kernels.RBF(), GaussianMeasure(0, 1), 48 | ), 49 | OrthogonalRBFKernel( 50 | gpflow.kernels.RBF(), 51 | MOGMeasure( 52 | np.array([3.0, 2.0]), np.array([3.0, 10.0]), np.array([0.6, 0.4]) 53 | ), 54 | ), 55 | ), 56 | ) 57 | def test_kernel_1d(kernel: gpflow.kernels.Kernel): 58 | X = np.array([[0.1], [0.5], [0.5]]) 59 | np.testing.assert_allclose( 60 | np.diag(kernel.K(X, X)), 61 | kernel.K_diag(X), 62 | err_msg="diagonal calculation is not correct", 63 | ) 64 | np.testing.assert_allclose( 65 | kernel.K(X, X), kernel(X, X), err_msg="k and k.K not the same" 66 | ) 67 | 68 | 69 | @pytest.mark.parametrize("num_dims", [3, 4]) 70 | def test_newton_girard(num_dims): 71 | k = OAKKernel( 72 | [gpflow.kernels.RBF for i in range(num_dims)], 73 | num_dims=num_dims, 74 | max_interaction_depth=num_dims, 75 | ) 76 | xx = [np.random.randn(2, 2) for _ in range(num_dims)] 77 | result = k.compute_additive_terms(xx) 78 | 79 | # compute the result the hard way 80 | result_hard = [np.ones((2, 2))] + [ 81 | reduce(np.add, map(lambda x: np.prod(x, axis=0), combinations(xx, i))) 82 | for i in range(1, num_dims) 83 | ] 84 | 85 | for r1, r2 in zip(result, result_hard): 86 | np.testing.assert_allclose(r1, r2) 87 | 88 | 89 | @pytest.mark.parametrize("active_dims", [[0], [1]]) 90 | @pytest.mark.parametrize( 91 | "measure", 92 | ( 93 | GaussianMeasure(0, 1), 94 | UniformMeasure(0, 1), 95 | EmpiricalMeasure(np.array([[0.1], [0.5]])), 96 | MOGMeasure(np.array([3.0, 2.0]), np.array([3.0, 10.0]), np.array([0.6, 0.4])), 97 | MOGMeasure( 98 | np.array([3, 2], dtype=int), 99 | np.array([3, 10], dtype=int), 100 | np.array([0.6, 0.4]), 101 | ), 102 | ), 103 | ) 104 | def test_orthogonal_rbf_kernel_2d_with_active_dims(active_dims, measure): 105 | k = OrthogonalRBFKernel( 106 | gpflow.kernels.RBF(lengthscales=10), measure, active_dims=active_dims 107 | ) 108 | X = np.array([[0.1, 0.2], [0.5, 0.5], [0.5, 0.7]]) 109 | np.testing.assert_allclose( 110 | np.diag(k.K(X[:, active_dims], X[:, active_dims])), 111 | k.K_diag(X[:, active_dims]), 112 | err_msg="diagonal calculation is not correct", 113 | ) 114 | np.testing.assert_allclose( 115 | k.K(X[:, active_dims]), k(X, X), err_msg="k and k.K not the same" 116 | ) 117 | -------------------------------------------------------------------------------- /tests/test_normalising_flow.py: -------------------------------------------------------------------------------- 1 | # + 2 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | import gpflow 6 | import numpy as np 7 | import pytest 8 | import oak.normalising_flow as normalising_flow 9 | from unittest import mock 10 | from oak.normalising_flow import Normalizer 11 | 12 | 13 | # - 14 | 15 | # mock is used to test figures 16 | @mock.patch("%s.normalising_flow.plt" % __name__) 17 | def test_normalising_flow(mock_plt): 18 | np.random.seed(44) 19 | N = 100 20 | x = np.random.normal(2, 0.5, size=(N, 1)) 21 | 22 | # apply normalising flow on x to check it is transformed to N(0,1) 23 | n = Normalizer(x, log=False) 24 | kl_before_optimising = n.KL_objective() 25 | 26 | opt = gpflow.optimizers.Scipy() 27 | opt.minimize(n.KL_objective, n.trainable_variables) 28 | 29 | y = n.bijector(x).numpy() 30 | # check transformed x has mean = 0 and var = 1 31 | np.testing.assert_almost_equal(0, y.mean(), decimal=2) 32 | np.testing.assert_almost_equal(1, y.std(), decimal=2) 33 | 34 | # check Kolmogorov-Smirnov test https://en.wikipedia.org/wiki/Kolmogorov%E2%80%93Smirnov_test do not 35 | # reject null hypothesis that transformed x and N(0,1) are identical at 5% significant level 36 | s, pvalue = n.kstest() 37 | assert pvalue > 0.05 38 | 39 | # test KL objective has decreased 40 | kl_after_optimising = n.KL_objective() 41 | assert kl_after_optimising < kl_before_optimising 42 | 43 | # Assert plt.figure got called 44 | n.plot(title="NF") 45 | assert mock_plt.figure.called 46 | 47 | # Assert plt.title has been called with expected arg 48 | mock_plt.title.assert_called_once_with("NF") 49 | -------------------------------------------------------------------------------- /tests/test_oak_kernel.py: -------------------------------------------------------------------------------- 1 | # + 2 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | import gpflow 6 | import numpy as np 7 | import pytest 8 | # - 9 | 10 | from oak.model_utils import create_model_oak 11 | from oak.oak_kernel import KernelComponenent, OAKKernel, get_list_representation 12 | 13 | 14 | @pytest.mark.parametrize( 15 | "data", [[[0.0], [1.0], [2.0]], [[0.0, 1.0], [1.0, 1.0], [2.0, 2.0]]] 16 | ) 17 | @pytest.mark.parametrize("num_inducings", [0, 2]) 18 | @pytest.mark.parametrize("lengthscale_bounds", [[1e-6, 2], None]) 19 | def test_oak(data, num_inducings, lengthscale_bounds): 20 | X = np.array(data) 21 | y = np.array(data)[:, 0].reshape(-1, 1) # 1-D output 22 | Z = X[:num_inducings, :] if num_inducings > 0 else None 23 | model = create_model_oak( 24 | (X, y), 25 | inducing_pts=Z, 26 | lengthscale_bounds=lengthscale_bounds, 27 | ) 28 | 29 | assert not np.isnan(model.maximum_log_likelihood_objective()) 30 | 31 | 32 | def test_kernel_component_constant(concrete_normalised_10_rows_data): 33 | X, y = concrete_normalised_10_rows_data 34 | x_try = X[:, 1][:, None] 35 | k = OAKKernel( 36 | [gpflow.kernels.RBF for i in range(x_try.shape[1])], 37 | num_dims=x_try.shape[1], 38 | max_interaction_depth=0, 39 | constrain_orthogonal=True, 40 | ) 41 | k.variances[0].assign(0.3) 42 | np.testing.assert_allclose( 43 | k(x_try), KernelComponenent(k, [])(x_try), err_msg="0 order" 44 | ) 45 | 46 | 47 | def test_kernel_component_one_dimensional( 48 | concrete_normalised_10_rows_data 49 | ): 50 | X, y = concrete_normalised_10_rows_data 51 | x_try = X[:, 1][:, None] 52 | k = OAKKernel( 53 | [gpflow.kernels.RBF for i in range(x_try.shape[1])], 54 | num_dims=x_try.shape[1], 55 | max_interaction_depth=1, 56 | constrain_orthogonal=True, 57 | ) 58 | k.variances[0].assign(0.3) 59 | k.variances[1].assign(3.3) 60 | np.testing.assert_allclose( 61 | k(x_try), 62 | KernelComponenent(k, [])(x_try) + KernelComponenent(k, [0])(x_try), 63 | err_msg="1 order 1-D", 64 | ) 65 | 66 | 67 | def test_kernel_component_two_dimensional_order_one_effects( 68 | concrete_normalised_10_rows_data 69 | ): 70 | X, y = concrete_normalised_10_rows_data 71 | x_try = X[:, :2] 72 | k = OAKKernel( 73 | [gpflow.kernels.RBF for i in range(x_try.shape[1])], 74 | num_dims=x_try.shape[1], 75 | max_interaction_depth=1, 76 | constrain_orthogonal=True, 77 | ) 78 | np.testing.assert_allclose( 79 | k(x_try), 80 | KernelComponenent(k, [])(x_try) 81 | + KernelComponenent(k, [0])(x_try) 82 | + KernelComponenent(k, [1])(x_try), 83 | err_msg="1 order 2-D", 84 | ) 85 | 86 | 87 | def test_kernel_component_two_dimensional_order_two_effects( 88 | concrete_normalised_10_rows_data 89 | ): 90 | X, y = concrete_normalised_10_rows_data 91 | x_try = X[:, :2] 92 | k = OAKKernel( 93 | [gpflow.kernels.RBF for i in range(x_try.shape[1])], 94 | num_dims=x_try.shape[1], 95 | max_interaction_depth=2, 96 | constrain_orthogonal=True, 97 | ) 98 | k.variances[0].assign(1.3) # so we see if variance change 99 | k.variances[1].assign(3.3) 100 | k.variances[2].assign(4.3) 101 | np.testing.assert_allclose( 102 | k(x_try), 103 | KernelComponenent(k, [])(x_try) 104 | + KernelComponenent(k, [0])(x_try) 105 | + KernelComponenent(k, [1])(x_try) 106 | + KernelComponenent(k, [0, 1])(x_try), 107 | err_msg="2 order 2-D", 108 | ) 109 | np.testing.assert_allclose( 110 | k.K_diag(x_try), 111 | KernelComponenent(k, []).K_diag(x_try) 112 | + KernelComponenent(k, [0]).K_diag(x_try) 113 | + KernelComponenent(k, [1]).K_diag(x_try) 114 | + KernelComponenent(k, [0, 1]).K_diag(x_try), 115 | err_msg="2 order 2-D K_diag", 116 | ) 117 | 118 | 119 | @pytest.mark.parametrize("num_dims", (2, 5, 7)) 120 | def test_get_list_representation_two_dimensional( 121 | num_dims, concrete_normalised_10_rows_data 122 | ): 123 | X, y = concrete_normalised_10_rows_data 124 | x_try = X[:, :2] 125 | k = OAKKernel( 126 | [gpflow.kernels.RBF for i in range(x_try.shape[1])], 127 | num_dims=x_try.shape[1], 128 | max_interaction_depth=2, 129 | constrain_orthogonal=True, 130 | ) 131 | selected_dims, kernel_list = get_list_representation(k, num_dims=2) 132 | if num_dims == 2: 133 | assert selected_dims == [[], [0], [1], [0, 1]] 134 | assert len(kernel_list) == len(selected_dims) 135 | 136 | # checking K_Diag 137 | np.testing.assert_allclose(k.K_diag(X), np.diag(k(X))) 138 | k_el_diag = [l.K_diag(X) for l in kernel_list] 139 | np.testing.assert_allclose(k.K_diag(X), np.sum(k_el_diag, axis=0)) 140 | 141 | # checking K 142 | K = k(X) 143 | k_el = [l(X) for l in kernel_list] 144 | np.testing.assert_allclose(K, np.sum(k_el, axis=0)) 145 | -------------------------------------------------------------------------------- /tests/test_oak_model.py: -------------------------------------------------------------------------------- 1 | # + 2 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | import numpy as np 6 | import pytest 7 | import tensorflow as tf 8 | from sklearn.metrics import mean_squared_error 9 | from sklearn.model_selection import train_test_split 10 | 11 | from oak.input_measures import MOGMeasure 12 | from oak.model_utils import oak_model 13 | 14 | 15 | @pytest.mark.parametrize("interaction_depth", [2]) 16 | @pytest.mark.parametrize("use_sparsity_prior", [True, False]) 17 | @pytest.mark.parametrize("initialise_inducing_points", [True, False]) 18 | @pytest.mark.parametrize("sparse", [True, False]) 19 | @pytest.mark.parametrize("clip", [True, False]) 20 | def test_oak_model( 21 | interaction_depth: int, 22 | use_sparsity_prior: bool, 23 | initialise_inducing_points: bool, 24 | sparse: bool, 25 | clip: bool, 26 | ): 27 | np.random.seed(44) 28 | tf.random.set_seed(44) 29 | 30 | N = 100 31 | X = np.random.normal(0, 1, (N, 3)) 32 | y = X[:, 0] ** 2 + X[:, 1] + X[:, 1] * X[:, 2] + np.random.normal(0, 0.01, (N,)) 33 | 34 | X_train, X_test, y_train, y_test = train_test_split( 35 | X, y[:, None], test_size=0.2, random_state=42 36 | ) 37 | 38 | oak = oak_model( 39 | num_inducing=50, 40 | max_interaction_depth=interaction_depth, 41 | use_sparsity_prior=use_sparsity_prior, 42 | sparse=sparse, 43 | ) 44 | oak.fit( 45 | X_train, 46 | y_train, 47 | initialise_inducing_points=initialise_inducing_points, 48 | optimise=False, 49 | ) 50 | 51 | y_pred = oak.predict(X_test, clip=clip) 52 | rss = mean_squared_error(y_pred, y_test[:, 0]) 53 | # check the model is better than predicting using the mean prediction 54 | assert rss < mean_squared_error( 55 | y_test.mean() * np.ones(y_test[:, 0].shape), y_test[:, 0] 56 | ) 57 | 58 | 59 | @pytest.mark.parametrize("interaction_depth", [1, 2]) 60 | @pytest.mark.parametrize("use_sparsity_prior", [True, False]) 61 | def test_oak_model_with_binary_and_categorical_data( 62 | interaction_depth: int, 63 | use_sparsity_prior: bool, 64 | ): 65 | np.random.seed(44) 66 | tf.random.set_seed(44) 67 | 68 | N = 20 69 | x_cat = np.random.choice([0, 1, 2, 3], size=N, p=[0.2, 0.2, 0.3, 0.3]) 70 | x_binary = np.random.choice([0, 1], size=N, p=[0.8, 0.2]) 71 | x_cont = np.random.randn(N) 72 | X = np.vstack([x_binary, x_cat, x_cont]).T 73 | 74 | y = np.sin(X[:, 2]) + np.random.normal(0, 0.01, (N,)) 75 | Y = y.reshape(-1, 1) 76 | 77 | oak = oak_model( 78 | binary_feature=[0], 79 | categorical_feature=[1], 80 | max_interaction_depth=interaction_depth, 81 | use_sparsity_prior=use_sparsity_prior, 82 | ) 83 | oak.fit(X, Y, optimise=False) 84 | log_lik = ( 85 | oak.m.log_marginal_likelihood() 86 | ) # check log likelihood calculation. This can reveal errors in shapes. 87 | assert not np.isnan(log_lik) 88 | 89 | 90 | @pytest.fixture 91 | def binary_5D_data(): 92 | N = 3 93 | D = 5 94 | np.random.seed(42) 95 | X = np.random.randint(0, 2, N * D).reshape(N, D).astype(float) 96 | Y = np.random.randn(N, 1) 97 | return X, Y 98 | 99 | 100 | @pytest.mark.parametrize( 101 | "binary_feature, categorical_feature, gmm_measure, empirical_measure", 102 | [ 103 | [ 104 | [0], 105 | [1], 106 | [0, 0, 2, 3, 0], 107 | [4], 108 | ], # GMM with 2 and 3 clusters 109 | [[0], [1], None, [2, 3]], 110 | ], 111 | ) 112 | def test_oak_model_creation( 113 | binary_5D_data, 114 | binary_feature, 115 | categorical_feature, 116 | gmm_measure, 117 | empirical_measure, 118 | ): 119 | X, Y = binary_5D_data 120 | oak = oak_model( 121 | num_inducing=3, 122 | binary_feature=binary_feature, 123 | categorical_feature=categorical_feature, 124 | gmm_measure=gmm_measure, 125 | empirical_measure=empirical_measure, 126 | ) 127 | oak.fit(X, Y, optimise=False) 128 | 129 | 130 | @pytest.mark.parametrize( 131 | "binary_feature, categorical_feature, gmm_measure, empirical_measure", 132 | [ 133 | [[0], [1], None, None], 134 | [[0], [1, 3], None, None], 135 | ], 136 | ) 137 | def test_oak_sobol_supported( 138 | binary_5D_data, binary_feature, categorical_feature, gmm_measure, empirical_measure 139 | ): 140 | X, Y = binary_5D_data 141 | # add random noise to input to make it continuous for continuous dimensions 142 | continuous_idx = list(set(np.arange(5)) - set(binary_feature + categorical_feature)) 143 | X[:,continuous_idx] = X[:,continuous_idx] + np.random.normal(0, 1, (X.shape[0],len(continuous_idx))) 144 | 145 | oak = oak_model( 146 | binary_feature=binary_feature, 147 | categorical_feature=categorical_feature, 148 | gmm_measure=gmm_measure, 149 | empirical_measure=empirical_measure, 150 | ) 151 | 152 | oak.fit(X, Y, optimise=False) 153 | 154 | sobol = oak.get_sobol() 155 | assert np.all(sobol >= 0) 156 | 157 | 158 | @pytest.mark.parametrize( 159 | "gmm_measure", 160 | [[0, 0, 3, 0, 0]], 161 | ) 162 | def test_oak_sobol_not_supported(binary_5D_data, gmm_measure): 163 | X, Y = binary_5D_data 164 | # add random noise to input to make it continuous 165 | X = X + np.random.normal(0, 1, X.shape) 166 | 167 | oak = oak_model( 168 | gmm_measure=gmm_measure 169 | ) 170 | oak.fit(X, Y, optimise=False) 171 | with pytest.raises(NotImplementedError): 172 | _ = oak.get_sobol() 173 | 174 | 175 | # empirical_measure should throw for Sobol 176 | # GMM should throw for Sobol 177 | # empirical measure for discrete data only.. so should throw if specified of continuous 178 | @pytest.mark.parametrize( 179 | "binary_feature, categorical_feature, gmm_measure, empirical_measure", 180 | [ 181 | [[0, 1], [2], [0,0,0,2,0], [4]], # empirical measure on binary 182 | [[0, 1], [2], None, [3, 4]], # No GMM measure 183 | ], 184 | ) 185 | def test_oak_good_model_creation_overlapping_indices( 186 | binary_5D_data, 187 | binary_feature, 188 | categorical_feature, 189 | gmm_measure, 190 | empirical_measure, 191 | ): 192 | X, Y = binary_5D_data 193 | oak = oak_model( 194 | binary_feature=binary_feature, 195 | categorical_feature=categorical_feature, 196 | gmm_measure=gmm_measure, 197 | empirical_measure=empirical_measure, 198 | ) 199 | oak.fit(X, Y, optimise=False) 200 | 201 | 202 | @pytest.mark.parametrize( 203 | "binary_feature, categorical_feature, gmm_measure, empirical_measure", 204 | [ 205 | [[0, 1], [1], [0] * 5, [3]], # overlapping binary & categorical 206 | [[0], [1], None, [0]], # empirical measure on binary input 207 | [[0], [1], None, [1]], # empirical measure on categorical input 208 | [ 209 | [0], 210 | [1], 211 | [2, 0, 0, 0, 0], 212 | [2, 4], 213 | ], # gmm specified on discrete input 214 | ], 215 | ) 216 | def test_oak_illegal_model_creation_overlapping_indices( 217 | binary_5D_data, 218 | binary_feature, 219 | categorical_feature, 220 | gmm_measure, 221 | empirical_measure, 222 | ): 223 | X, Y = binary_5D_data 224 | oak = oak_model( 225 | binary_feature=binary_feature, 226 | categorical_feature=categorical_feature, 227 | gmm_measure=gmm_measure, 228 | empirical_measure=empirical_measure, 229 | ) 230 | with pytest.raises(ValueError): 231 | oak.fit(X, Y, optimise=False) 232 | 233 | 234 | def test_oak_gmm_applied_without_flows(binary_5D_data): 235 | np.random.seed(44) 236 | X, Y = binary_5D_data 237 | # add random noise to input to make it continuous for first and second dimension 238 | X[:,:-1] = X[:,:-1] + np.random.normal(0, 1, (X.shape[0],4)) 239 | gmm_measure = [0, 0, 0, 0, 2] 240 | oak = oak_model(gmm_measure=gmm_measure) 241 | oak.fit(X, Y, optimise=False) 242 | assert oak.estimated_gmm_measures[:-1] == [None] * 4 243 | assert isinstance(oak.estimated_gmm_measures[-1], MOGMeasure) 244 | assert np.allclose( 245 | # check the means for the 3rd dimension is unchanged (0, 1) 246 | np.sort(oak.estimated_gmm_measures[-1].means), np.array([0, 1.0]) 247 | ) 248 | 249 | assert ( 250 | oak.input_flows[-1] is None 251 | ), f"Flow applied on GMM measure input {oak.input_flows[-1]}" 252 | assert ( 253 | np.array(oak.input_flows[:-1]) != None 254 | ).sum() == 4, f"Should have normalising flow for every continuous input without a GMM measure {oak.input_flows[:-1]}" 255 | 256 | -------------------------------------------------------------------------------- /tests/test_optimisation.py: -------------------------------------------------------------------------------- 1 | # + 2 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | import gpflow 6 | import numpy as np 7 | import pytest 8 | from gpflow.inducing_variables import InducingPoints 9 | from gpflow.models import GPR, SGPR 10 | from oak.input_measures import GaussianMeasure 11 | from oak.model_utils import create_model_oak 12 | from oak.ortho_rbf_kernel import OrthogonalRBFKernel 13 | 14 | 15 | # - 16 | 17 | @pytest.mark.parametrize("num_inducings", [0, 2]) 18 | def test_OrthogonalRBFKernel_optimisation(num_inducings): 19 | data = [[0.0], [1.0], [2.0]] 20 | X = np.array(data) 21 | y = np.array(data)[:, 0].reshape(-1, 1) # 1-D output 22 | Z = X[:num_inducings, :] if num_inducings > 0 else None 23 | 24 | k = OrthogonalRBFKernel( 25 | gpflow.kernels.RBF(lengthscales=10), 26 | GaussianMeasure(0, 1), 27 | ) 28 | 29 | if Z is not None: 30 | model = SGPR((X, y), kernel=k, inducing_variable=InducingPoints(Z)) 31 | else: 32 | model = GPR((X, y), kernel=k) 33 | 34 | initial_log_likelihood = model.maximum_log_likelihood_objective() 35 | assert not np.isnan(initial_log_likelihood) 36 | 37 | opt = gpflow.optimizers.Scipy() 38 | opt.minimize( 39 | model.training_loss_closure(), 40 | model.trainable_variables, 41 | method="BFGS", 42 | compile=True, 43 | options=dict(disp=True, maxiter=2), 44 | ) 45 | assert initial_log_likelihood < model.maximum_log_likelihood_objective() 46 | 47 | 48 | @pytest.mark.parametrize("num_inducings", [0, 2]) 49 | def test_oak_optimisation(num_inducings): 50 | data = [[0.0], [1.0], [2.0]] 51 | X = np.array(data) 52 | y = np.array(data)[:, 0].reshape(-1, 1) # 1-D output 53 | Z = X[:num_inducings, :] if num_inducings > 0 else None 54 | 55 | model = create_model_oak( 56 | (X, y), inducing_pts=Z, optimise=False, zfixed=True, 57 | ) 58 | 59 | initial_log_likelihood = model.maximum_log_likelihood_objective() 60 | assert not np.isnan(initial_log_likelihood) 61 | 62 | opt = gpflow.optimizers.Scipy() 63 | opt.minimize( 64 | model.training_loss_closure(), 65 | model.trainable_variables, 66 | method="BFGS", 67 | compile=True, 68 | options=dict(disp=True, maxiter=2), 69 | ) 70 | assert initial_log_likelihood < model.maximum_log_likelihood_objective() 71 | -------------------------------------------------------------------------------- /tests/test_orthogonality.py: -------------------------------------------------------------------------------- 1 | # + 2 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | import gpflow 6 | import numpy as np 7 | import pytest 8 | # - 9 | 10 | from oak.input_measures import ( 11 | EmpiricalMeasure, 12 | GaussianMeasure, 13 | MOGMeasure, 14 | UniformMeasure, 15 | ) 16 | from oak.model_utils import estimate_one_dim_gmm 17 | from oak.ortho_rbf_kernel import OrthogonalRBFKernel 18 | 19 | _THRESHOLD_NUMERICAL_ACCURACY = 2 20 | np.random.seed(0) 21 | 22 | 23 | @pytest.mark.parametrize( 24 | "kernel", 25 | [gpflow.kernels.RBF(lengthscales=10)], 26 | ) 27 | def test_cov_X_s_Gaussian(kernel: gpflow.kernels.Kernel): 28 | k = OrthogonalRBFKernel(kernel, GaussianMeasure(0, 1)) 29 | k_cov = k.cov_X_s(np.zeros((1, 1))) 30 | 31 | samples_std_normal = np.random.normal(size=10000)[:, None] 32 | k_cov_numeric = np.mean(k.base_kernel.K(np.zeros((1, 1)), samples_std_normal)) 33 | print(np.abs(k_cov - k_cov_numeric)) 34 | np.testing.assert_almost_equal( 35 | np.abs(k_cov - k_cov_numeric), 0.0, decimal=_THRESHOLD_NUMERICAL_ACCURACY 36 | ) 37 | 38 | 39 | @pytest.mark.parametrize( 40 | "kernel", 41 | [gpflow.kernels.RBF(lengthscales=10)], 42 | ) 43 | def test_var_s_Gaussian(kernel: gpflow.kernels.Kernel): 44 | k = OrthogonalRBFKernel(kernel, GaussianMeasure(0, 1)) 45 | k_var = k.var_s() 46 | 47 | samples_std_normal = np.random.normal(size=10000)[:, None] 48 | k_var_numeric = np.mean(k.cov_X_s(samples_std_normal)) 49 | print(np.abs(k_var - k_var_numeric)) 50 | np.testing.assert_almost_equal( 51 | np.abs(k_var - k_var_numeric), 0.0, decimal=_THRESHOLD_NUMERICAL_ACCURACY 52 | ) 53 | 54 | 55 | def test_cov_X_s_Uniform(): 56 | k = OrthogonalRBFKernel(gpflow.kernels.RBF(lengthscales=10), UniformMeasure(0, 1)) 57 | k_cov = k.cov_X_s(np.zeros((1, 1))) 58 | 59 | samples_std_normal = np.random.uniform(size=10000)[:, None] 60 | k_cov_numeric = np.mean(k.base_kernel.K(np.zeros((1, 1)), samples_std_normal)) 61 | print(np.abs(k_cov - k_cov_numeric)) 62 | np.testing.assert_almost_equal( 63 | np.abs(k_cov - k_cov_numeric), 0.0, decimal=_THRESHOLD_NUMERICAL_ACCURACY 64 | ) 65 | 66 | 67 | def test_var_s_Uniform(): 68 | k = OrthogonalRBFKernel(gpflow.kernels.RBF(lengthscales=10), UniformMeasure(0, 1)) 69 | k_var = k.var_s() 70 | 71 | samples_std_normal = np.random.uniform(size=10000)[:, None] 72 | k_var_numeric = np.mean(k.cov_X_s(samples_std_normal)) 73 | print(np.abs(k_var - k_var_numeric)) 74 | np.testing.assert_almost_equal( 75 | np.abs(k_var - k_var_numeric), 0.0, decimal=_THRESHOLD_NUMERICAL_ACCURACY 76 | ) 77 | 78 | 79 | @pytest.mark.parametrize( 80 | "kernel", 81 | [gpflow.kernels.RBF(lengthscales=10)], 82 | ) 83 | def test_GaussianMeasure(kernel: gpflow.kernels.Kernel): 84 | N = 1000 85 | k = OrthogonalRBFKernel(kernel, GaussianMeasure(0, 1)) 86 | xx = np.random.normal(0, 1, (N, 1)) 87 | mu = np.zeros(N) 88 | f = np.random.multivariate_normal(mu, k.K(xx), size=1) 89 | np.testing.assert_almost_equal(f.mean(), 0.0, decimal=_THRESHOLD_NUMERICAL_ACCURACY) 90 | 91 | 92 | def test_UniformMeasure(): 93 | N = 1000 94 | k = OrthogonalRBFKernel(gpflow.kernels.RBF(lengthscales=10), UniformMeasure(0, 1)) 95 | xx = np.random.uniform(0, 1, (N, 1)) 96 | mu = np.zeros(N) 97 | f = np.random.multivariate_normal(mu, k.K(xx), size=1) 98 | np.testing.assert_almost_equal(f.mean(), 0.0, decimal=_THRESHOLD_NUMERICAL_ACCURACY) 99 | 100 | 101 | def test_EmpiricalMeasure(): 102 | N = 1000 103 | location = np.reshape(np.linspace(0, 1, N), (-1, 1)) 104 | k = OrthogonalRBFKernel( 105 | gpflow.kernels.RBF(lengthscales=10), EmpiricalMeasure(location) 106 | ) 107 | 108 | mu = np.zeros(N) 109 | f = np.random.multivariate_normal(mu, k.K(location), size=1) 110 | np.testing.assert_almost_equal(f.mean(), 0.0, decimal=_THRESHOLD_NUMERICAL_ACCURACY) 111 | 112 | 113 | def test_EmpiricalMeasure_with_weights(): 114 | np.random.seed(44) 115 | N = 10 116 | location = np.reshape(np.linspace(0, 1, N), (-1, 1)) 117 | weights = np.random.randn(N, 1) 118 | weights /= weights.sum() 119 | k = OrthogonalRBFKernel( 120 | gpflow.kernels.RBF(lengthscales=10), EmpiricalMeasure(location, weights) 121 | ) 122 | mu = np.zeros(N) 123 | f = np.random.multivariate_normal(mu, k.K(location), size=1) 124 | np.testing.assert_almost_equal( 125 | np.dot(f, weights).mean(), 0.0, decimal=_THRESHOLD_NUMERICAL_ACCURACY 126 | ) 127 | 128 | 129 | def test_MOGMeasure(): 130 | np.random.seed(44) 131 | K = 5 132 | N = 10 133 | means = np.random.randn(K) 134 | weights = np.random.rand(K) 135 | weights /= weights.sum() 136 | variances = np.random.rand(K) + 0.1 137 | k = OrthogonalRBFKernel( 138 | gpflow.kernels.RBF(lengthscales=10), MOGMeasure(means, variances, weights) 139 | ) 140 | 141 | # sample from the MOG 142 | xx = np.random.randn(N, K) * np.sqrt(variances) + means 143 | index = np.random.multinomial(1, weights, N).argmax(1) 144 | xx = xx[np.arange(N), index] 145 | 146 | # sample from the GP 147 | mu = np.zeros(N) 148 | f = np.random.multivariate_normal(mu, k.K(xx.reshape(-1, 1)), size=1) 149 | np.testing.assert_almost_equal(f.mean(), 0.0, decimal=_THRESHOLD_NUMERICAL_ACCURACY) 150 | 151 | 152 | def test_MOGMeasure_equivalence_to_GaussianMeasure(): 153 | mu = np.array([3.0, 3.0]) 154 | var = np.array([5.0, 5.0]) 155 | lengthscales = 10.0 156 | weights = np.array([0.2, 0.8]) 157 | k_gmm = OrthogonalRBFKernel( 158 | gpflow.kernels.RBF(lengthscales=lengthscales), MOGMeasure(mu, var, weights) 159 | ) 160 | k_gaussian = OrthogonalRBFKernel( 161 | gpflow.kernels.RBF(lengthscales=lengthscales), GaussianMeasure(3, 5) 162 | ) 163 | 164 | xx = np.array([[-2], [2.0], [3.0]]) 165 | np.testing.assert_allclose(k_gaussian.K(xx), k_gmm.K(xx)) 166 | 167 | 168 | def test_gmm_fit(): 169 | X = np.array([1.0, 1, 1, 10, 10, 10]) 170 | measure = estimate_one_dim_gmm(K=2, X=X) 171 | np.testing.assert_almost_equal(np.sort(measure.means), np.array([1.0, 10.0])) 172 | 173 | -------------------------------------------------------------------------------- /tests/test_sobol.py: -------------------------------------------------------------------------------- 1 | # + 2 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | import gpflow 6 | import numpy as np 7 | import pytest 8 | import tensorflow as tf 9 | from gpflow import set_trainable 10 | from gpflow.models import SGPR 11 | from sklearn.cluster import KMeans 12 | # - 13 | 14 | from oak.input_measures import GaussianMeasure 15 | from oak.ortho_binary_kernel import OrthogonalBinary 16 | from oak.utils import get_model_sufficient_statistics 17 | 18 | opt = gpflow.optimizers.Scipy() 19 | 20 | 21 | from oak.utils import ( 22 | compute_L_binary_kernel, 23 | compute_sobol, 24 | f1, 25 | f2, 26 | f4, 27 | grammer_to_kernel, 28 | model_to_kernel_list, 29 | ) 30 | 31 | TOL = 1e-3 32 | 33 | 34 | def test_f1(): 35 | 36 | sigma = 1 37 | delta = 1 38 | lengthscales = 1 39 | mu = 0 40 | 41 | def kk1(x, y): 42 | return sigma ** 2 * np.exp(-((x - y) ** 2) / (2 * lengthscales ** 2)) 43 | 44 | def kk2(x, y): 45 | return ( 46 | sigma ** 2 47 | * lengthscales 48 | * np.sqrt(lengthscales ** 2 + 2 * delta ** 2) 49 | / (lengthscales ** 2 + delta ** 2) 50 | * np.exp( 51 | -((x - mu) ** 2 + (y - mu) ** 2) 52 | / (2 * (lengthscales ** 2 + delta ** 2)) 53 | ) 54 | ) 55 | 56 | np.random.seed(44) 57 | tf.random.set_seed(44) 58 | Z = np.random.normal(0, 1, (2, 1)) 59 | 60 | zz = np.random.normal(0, delta, size=(100000, 1)) 61 | 62 | y_numerical = np.mean( 63 | [kk1(Z[0, :], zz[jj]) * kk1(Z[1, :], zz[jj]) for jj in range(100000)] 64 | ) 65 | y = f1(Z[0, :], Z[1, :], sigma, lengthscales, delta, mu) 66 | 67 | print(np.abs(y_numerical - y)) 68 | assert np.abs(y_numerical - y) < TOL 69 | 70 | 71 | def test_f2(): 72 | 73 | sigma = 1 74 | delta = 1 75 | lengthscales = 1 76 | mu = 0 77 | 78 | def kk1(x, y): 79 | return sigma ** 2 * np.exp(-((x - y) ** 2) / (2 * lengthscales ** 2)) 80 | 81 | def kk2(x, y): 82 | return ( 83 | sigma ** 2 84 | * lengthscales 85 | * np.sqrt(lengthscales ** 2 + 2 * delta ** 2) 86 | / (lengthscales ** 2 + delta ** 2) 87 | * np.exp( 88 | -((x - mu) ** 2 + (y - mu) ** 2) 89 | / (2 * (lengthscales ** 2 + delta ** 2)) 90 | ) 91 | ) 92 | 93 | np.random.seed(44) 94 | tf.random.set_seed(44) 95 | Z = np.random.normal(0, 1, (2, 1)) 96 | 97 | zz = np.random.normal(0, delta, size=(100000, 1)) 98 | 99 | y_numerical = np.mean( 100 | [kk1(Z[0, :], zz[jj]) * kk2(Z[1, :], zz[jj]) for jj in range(100000)] 101 | ) 102 | y = f2(Z[0, :], Z[1, :], sigma, lengthscales, delta, mu) 103 | 104 | assert np.abs(y_numerical - y) < TOL 105 | 106 | 107 | def test_f4(): 108 | 109 | sigma = 1 110 | delta = 1 111 | lengthscales = 1 112 | mu = 0 113 | 114 | def kk1(x, y): 115 | return sigma ** 2 * np.exp(-((x - y) ** 2) / (2 * lengthscales ** 2)) 116 | 117 | def kk2(x, y): 118 | return ( 119 | sigma ** 2 120 | * lengthscales 121 | * np.sqrt(lengthscales ** 2 + 2 * delta ** 2) 122 | / (lengthscales ** 2 + delta ** 2) 123 | * np.exp( 124 | -((x - mu) ** 2 + (y - mu) ** 2) 125 | / (2 * (lengthscales ** 2 + delta ** 2)) 126 | ) 127 | ) 128 | 129 | np.random.seed(44) 130 | tf.random.set_seed(44) 131 | Z = np.random.normal(0, 1, (2, 1)) 132 | 133 | zz = np.random.normal(0, delta, size=(100000, 1)) 134 | 135 | y_numerical = np.mean( 136 | [kk2(Z[0, :], zz[jj]) * kk2(Z[1, :], zz[jj]) for jj in range(100000)] 137 | ) 138 | y = f4(Z[0, :], Z[1, :], sigma, lengthscales, delta, mu) 139 | 140 | assert np.abs(y_numerical - y) < TOL 141 | 142 | 143 | 144 | @pytest.mark.skip( 145 | reason="too slow a test takes about 30 seconds covered by test_sobol_indices which is faster" 146 | ) 147 | def test_compute_sobol(): 148 | 149 | selected_dims = [[0], [1], [0, 1]] 150 | P_dims = [] 151 | offset = 0 152 | delta = 1 153 | mu = 0 154 | 155 | X_train = np.random.normal(0, delta, (500, 2)) 156 | Y_train = np.reshape( 157 | X_train[:, 0] ** 2 + X_train[:, 1] * 2 + X_train[:, 0] * X_train[:, 1], (-1, 1) 158 | ) 159 | 160 | kmeans = KMeans(n_clusters=500, random_state=0).fit(X_train) 161 | Z = kmeans.cluster_centers_ 162 | 163 | data = (X_train, Y_train) 164 | 165 | sgpr = SGPR( 166 | data, 167 | kernel=np.sum( 168 | grammer_to_kernel( 169 | selected_dims, P_dims, offset, GaussianMeasure(mu, delta ** 2) 170 | ) 171 | ), 172 | inducing_variable=Z, 173 | ) 174 | 175 | set_trainable(sgpr.inducing_variable, False) 176 | 177 | opt.minimize(sgpr.training_loss, sgpr.trainable_variables, method="BFGS") 178 | 179 | alpha = get_model_sufficient_statistics(sgpr, get_L=False) 180 | kernel_list = model_to_kernel_list(sgpr, selected_dims) 181 | sobol = compute_sobol(sgpr, kernel_list, delta, mu, alpha) 182 | 183 | assert np.abs(sobol - np.array([2, 4, 1])).max() < TOL 184 | 185 | 186 | # try a few values of p including the corner cases 187 | @pytest.mark.parametrize("p", (0.0, 0.77, 1.0)) 188 | def test_compute_L_binary_kernel(p: float): 189 | 190 | TOL = 1e-16 191 | 192 | # this test verify the calculation of L in Appendix G.1 is correct. 193 | 194 | # generate training data X 195 | X = tf.convert_to_tensor(np.reshape(np.random.binomial(1, p, 1000), (-1, 1))) 196 | # use compute_L_binary_kernel function to calculate the L matrix 197 | L = compute_L_binary_kernel(X, p, 1, 0) 198 | 199 | # calculate L using the binary kernel directly 200 | x0 = np.reshape(0, (-1, 1)) 201 | x1 = np.reshape(1, (-1, 1)) 202 | 203 | K = OrthogonalBinary(p0=p, active_dims=[0]) 204 | 205 | L1 = np.matmul(K(X, x0), K(x0, X)) * p + np.matmul(K(X, x1), K(x1, X)) * (1 - p) 206 | 207 | print(np.max(L - L1)) 208 | assert np.max(L - L1) < TOL 209 | 210 | -------------------------------------------------------------------------------- /tests/test_sobol_oak_kernel.py: -------------------------------------------------------------------------------- 1 | # + 2 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | from typing import List 6 | import gpflow 7 | import numpy as np 8 | import pytest 9 | import tensorflow as tf 10 | from gpflow import set_trainable 11 | from gpflow.models import GPR, SGPR 12 | from sklearn.cluster import KMeans 13 | from oak.input_measures import GaussianMeasure 14 | from oak.model_utils import create_model_oak, oak_model 15 | from oak.oak_kernel import get_list_representation 16 | from oak.ortho_rbf_kernel import EmpiricalMeasure, OrthogonalRBFKernel 17 | from oak.utils import ( 18 | compute_L_empirical_measure, 19 | compute_sobol, 20 | compute_sobol_oak, 21 | get_model_sufficient_statistics, 22 | get_prediction_component, 23 | grammer_to_kernel, 24 | initialize_kmeans_with_binary, 25 | model_to_kernel_list, 26 | ) 27 | 28 | 29 | # - 30 | 31 | @pytest.mark.parametrize("is_oak_kernel", (False, True)) 32 | @pytest.mark.parametrize("is_sgpr", (False, True)) 33 | @pytest.mark.parametrize("lengthscale_bounds", [[1e-6, 100], None]) 34 | @pytest.mark.parametrize("share_var_across_orders", [True, False]) 35 | def test_compute_sobol( 36 | is_sgpr: bool, 37 | is_oak_kernel: bool, 38 | lengthscale_bounds: list, 39 | share_var_across_orders: bool, 40 | ): 41 | selected_dims = [[0], [1], [0, 1]] 42 | offset = 0 43 | delta = 1 44 | mu = 0 45 | N = 500 46 | X_train = np.random.normal(0, delta, (N, 2)) 47 | Y_train = np.reshape( 48 | X_train[:, 0] ** 2 + X_train[:, 1] * 2 + X_train[:, 0] * X_train[:, 1], (-1, 1) 49 | ) 50 | if is_sgpr: 51 | kmeans = KMeans(n_clusters=300, random_state=0).fit(X_train) 52 | Z = kmeans.cluster_centers_ 53 | else: 54 | Z = None # GPR model 55 | data = (X_train, Y_train) 56 | 57 | if is_oak_kernel: 58 | model = create_model_oak( 59 | data, 60 | inducing_pts=Z, 61 | optimise=False, 62 | zfixed=False, 63 | lengthscale_bounds=lengthscale_bounds, 64 | share_var_across_orders=share_var_across_orders, 65 | ) 66 | if share_var_across_orders: 67 | model.kernel.variances[0].assign(0.76) 68 | model.kernel.variances[1].assign(96.935) 69 | model.kernel.variances[2].assign(128.27) 70 | else: 71 | model.kernel.variances[0].assign(0.01) 72 | model.kernel.kernels[0].base_kernel.variance.assign(1) 73 | model.kernel.kernels[1].base_kernel.variance.assign(1) 74 | model.kernel.kernels[0].base_kernel.lengthscales.assign(2.91) 75 | model.kernel.kernels[1].base_kernel.lengthscales.assign(9.20) 76 | 77 | model_indices, sobol = compute_sobol_oak( 78 | model, 79 | delta, 80 | mu, 81 | share_var_across_orders=share_var_across_orders, 82 | ) 83 | assert len(model_indices) == len(sobol) 84 | assert model_indices == selected_dims 85 | else: 86 | if is_sgpr: 87 | model = SGPR( 88 | data, 89 | kernel=np.sum( 90 | grammer_to_kernel( 91 | selected_dims, offset, GaussianMeasure(mu, delta ** 2) 92 | ) 93 | ), 94 | inducing_variable=Z, 95 | ) 96 | set_trainable(model.inducing_variable, False) 97 | sparse_gp = True 98 | else: 99 | model = GPR( 100 | data, 101 | kernel=np.sum( 102 | grammer_to_kernel( 103 | selected_dims, offset, GaussianMeasure(mu, delta ** 2) 104 | ) 105 | ), 106 | ) 107 | sparse_gp = False 108 | model.kernel.kernels[0].base_kernel.variance.assign(99.98) 109 | model.kernel.kernels[0].base_kernel.lengthscales.assign(2.75) 110 | model.kernel.kernels[1].base_kernel.variance.assign(99.99) 111 | model.kernel.kernels[1].base_kernel.lengthscales.assign(99.99) 112 | # interaction term 113 | model.kernel.kernels[2].kernels[0].base_kernel.lengthscales.assign(4.762) 114 | model.kernel.kernels[2].kernels[0].base_kernel.variance.assign(99.99) 115 | model.kernel.kernels[2].kernels[1].base_kernel.lengthscales.assign(4.499) 116 | model.kernel.kernels[2].kernels[1].base_kernel.variance.assign(1.00) 117 | # constant term 118 | model.kernel.kernels[3].variance.assign(1.00) 119 | model.likelihood.variance.assign(1e-5) 120 | kernel_list = model_to_kernel_list(model, selected_dims) 121 | alpha = get_model_sufficient_statistics(model, get_L=False) 122 | sobol = compute_sobol(model, kernel_list, delta, mu, alpha, sparse_gp=sparse_gp) 123 | gpflow.utilities.print_summary(model) 124 | np.testing.assert_array_almost_equal( 125 | sobol, np.array([2, 4, 1], dtype=float), decimal=1 126 | ) 127 | 128 | 129 | def test_sobol_empirical_measure(): 130 | x = np.random.normal(0, 1, (10, 1)) 131 | y = x ** 2 + np.cos(x) + np.random.normal(0, 0.1, (10, 1)) 132 | kernel = OrthogonalRBFKernel( 133 | gpflow.kernels.RBF(), 134 | EmpiricalMeasure(x, np.ones(x.shape) / 10), 135 | active_dims=[0], 136 | ) 137 | 138 | m = GPR((x, y), kernel=kernel) 139 | 140 | var_samples = np.var(m.predict_f(x)[0].numpy()) 141 | alpha = get_model_sufficient_statistics(m, get_L=False) 142 | L = compute_L_empirical_measure( 143 | tf.reshape(m.kernel.measure.location, [-1, 1]), 144 | m.kernel.measure.weights, 145 | m.kernel, 146 | x, 147 | ) 148 | var_sobol = tf.tensordot( 149 | tf.tensordot(tf.transpose(alpha), L, axes=1), alpha, axes=1 150 | ).numpy()[0][0] 151 | gpflow.utilities.print_summary(m) 152 | np.testing.assert_array_almost_equal(var_samples, var_sobol, decimal=5) 153 | 154 | 155 | @pytest.mark.parametrize("empirical_measure", [[0], [0, 1]]) 156 | @pytest.mark.parametrize("share_var_across_orders", [True, False]) 157 | def test_sobol_oak_kernel_empirical( 158 | empirical_measure: List[float], share_var_across_orders: bool 159 | ): 160 | # test Sobol with empirical measure works with the model API, the test compares 161 | # calculated Sobol with empirical variance of function components 162 | np.random.seed(44) 163 | n = 100 164 | m = 50 165 | X = np.random.normal(0, 1, (n, 2)) 166 | y = np.reshape(X[:, 0] ** 2 + X[:, 1] * 2 + X[:, 0] * X[:, 1], (-1, 1)) 167 | oak = oak_model( 168 | max_interaction_depth=X.shape[1], 169 | num_inducing=m, 170 | sparse=True, 171 | empirical_measure=empirical_measure, 172 | share_var_across_orders=share_var_across_orders, 173 | ) 174 | oak.fit(X, y, optimise=False) 175 | # save hyperparameters to reduce computation time 176 | oak.m.kernel.kernels[0].base_kernel.lengthscales.assign(2) 177 | oak.m.kernel.kernels[1].base_kernel.lengthscales.assign(5) 178 | if share_var_across_orders: 179 | oak.m.kernel.variances[0].assign(1e-3) 180 | oak.m.kernel.variances[1].assign(90) 181 | oak.m.kernel.variances[2].assign(15) 182 | gpflow.utilities.print_summary(oak.m) 183 | 184 | oak.get_sobol() 185 | alpha = get_model_sufficient_statistics(oak.m, get_L=False) 186 | prediction_list = get_prediction_component( 187 | oak.m, 188 | alpha, 189 | oak._transform_x(X), 190 | share_var_across_orders=share_var_across_orders, 191 | ) 192 | # calculate variance of each functional component with empirical data 193 | var_samples = np.array( 194 | [ 195 | np.var(prediction_list[0].numpy()), 196 | np.var(prediction_list[1].numpy()), 197 | np.var(prediction_list[2].numpy()), 198 | ] 199 | ) 200 | var_samples = var_samples / var_samples.sum() 201 | np.testing.assert_array_almost_equal(var_samples, oak.normalised_sobols, decimal=1) 202 | 203 | @pytest.mark.parametrize("num_dims", (2, 7)) 204 | @pytest.mark.parametrize("zfixed", (True, False)) 205 | @pytest.mark.parametrize("normalisation", (True, False)) 206 | @pytest.mark.parametrize("share_var_across_orders", (True, False)) 207 | def test_sobol_indices( 208 | num_dims, 209 | zfixed, 210 | concrete_normalised_10_rows_data, 211 | normalisation, 212 | share_var_across_orders, 213 | ): 214 | X, y = concrete_normalised_10_rows_data 215 | max_interaction_depth = 2 216 | optimise = False 217 | Z = X[:3, :] 218 | 219 | def create_sgpr(X, y): 220 | sgpr = create_model_oak( 221 | (X, y), 222 | max_interaction_depth=max_interaction_depth, 223 | constrain_orthogonal=True, 224 | inducing_pts=Z, 225 | optimise=optimise, 226 | zfixed=zfixed, 227 | ) 228 | selected_dims, kernel_list = get_list_representation( 229 | sgpr.kernel, num_dims=num_dims 230 | ) 231 | alpha = get_model_sufficient_statistics(sgpr, get_L=False) 232 | return sgpr, selected_dims, kernel_list, alpha 233 | 234 | sgpr, selected_dims, kernel_list, alpha = create_sgpr(X, y) 235 | delta = 1 236 | mu = 0 237 | model_indices, sobol = compute_sobol_oak( 238 | sgpr, 239 | delta, 240 | mu, 241 | share_var_across_orders=share_var_across_orders, 242 | ) 243 | assert np.all(np.array(sobol) > 0) 244 | 245 | 246 | @pytest.mark.parametrize("is_sgpr", (False, True)) 247 | @pytest.mark.parametrize("both_binary", (False, True)) 248 | def test_compute_sobol_with_binary(is_sgpr: bool, both_binary: bool): 249 | # test sobol computation for two cases: 1) two binary kernel, 2) one continuous and one binary kernel 250 | delta = 1 251 | mu = 0 252 | N = 200 253 | 254 | p1 = 0.5 255 | np.random.seed(42) 256 | X1 = np.reshape(np.random.binomial(1, p1, N), (N, 1)) 257 | 258 | selected_dims = [[0], [1], [0, 1]] 259 | 260 | if both_binary: 261 | p2 = 0.9 262 | X2 = np.reshape(np.random.binomial(1, p2, N), (N, 1)) 263 | else: 264 | X2 = np.reshape(np.random.normal(mu, np.sqrt(delta), N), (N, 1)) 265 | 266 | X_train = np.concatenate((X1, X2), 1).astype("float64") 267 | Y_train = np.reshape( 268 | X_train[:, 0] 269 | + X_train[:, 1] 270 | + X_train[:, 0] * X_train[:, 1] 271 | + np.random.normal(0, 0.1, N), 272 | (-1, 1), 273 | ) 274 | 275 | Y_train = Y_train - Y_train.mean() 276 | if is_sgpr: 277 | Z = ( 278 | initialize_kmeans_with_binary(X_train, binary_index=[0, 1], n_clusters=100) 279 | if both_binary 280 | else initialize_kmeans_with_binary( 281 | X_train, binary_index=[0], continuous_index=[1], n_clusters=100 282 | ) 283 | ) 284 | else: 285 | Z = None 286 | 287 | data = (X_train, Y_train) 288 | 289 | p0 = [1 - p1, 1 - p2] if both_binary else [1 - p1, None] 290 | 291 | model = create_model_oak(data, inducing_pts=Z, optimise=False, zfixed=True, p0=p0) 292 | 293 | if not both_binary: 294 | model.kernel.kernels[1].base_kernel.lengthscales.assign(9.20) 295 | 296 | model_indices, sobol = compute_sobol_oak( 297 | model, 298 | delta, 299 | mu, 300 | ) 301 | 302 | assert len(model_indices) == len(sobol) 303 | assert model_indices == selected_dims 304 | assert np.all(np.array(sobol) >= 0) 305 | 306 | print(sobol) 307 | if both_binary: 308 | s1 = (1 + p2) ** 2 * p1 * (1 - p1) 309 | s2 = (1 + p1) ** 2 * p2 * (1 - p2) 310 | print( 311 | np.array( 312 | [ 313 | s1, 314 | s2, 315 | p1 316 | - p1 ** 2 317 | + p2 318 | - p2 ** 2 319 | + 5 * p1 * p2 320 | - p1 ** 2 * p2 ** 2 321 | - 2 * p1 ** 2 * p2 322 | - 2 * p1 * p2 ** 2 323 | - s1 324 | - s2, 325 | ], 326 | dtype=float, 327 | ) 328 | ) 329 | np.testing.assert_array_almost_equal( 330 | sobol, 331 | np.array( 332 | [ 333 | s1, 334 | s2, 335 | p1 336 | - p1 ** 2 337 | + p2 338 | - p2 ** 2 339 | + 5 * p1 * p2 340 | - p1 ** 2 * p2 ** 2 341 | - 2 * p1 ** 2 * p2 342 | - 2 * p1 * p2 ** 2 343 | - s1 344 | - s2, 345 | ], 346 | dtype=float, 347 | ), 348 | decimal=1, 349 | ) 350 | 351 | else: 352 | s1 = p1 * (1 - p1) 353 | s2 = delta * (1 + p1) ** 2 354 | print( 355 | np.array( 356 | [s1, s2, delta + p1 * (1 - p1) + 3 * p1 * delta - s1 - s2], dtype=float 357 | ) 358 | ) 359 | np.testing.assert_array_almost_equal( 360 | sobol, 361 | np.array( 362 | [s1, s2, delta + p1 * (1 - p1) + 3 * p1 * delta - s1 - s2], dtype=float 363 | ), 364 | decimal=1, 365 | ) 366 | -------------------------------------------------------------------------------- /tests/test_utils.py: -------------------------------------------------------------------------------- 1 | # + 2 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | import numpy as np 6 | import pytest 7 | import tensorflow as tf 8 | # - 9 | 10 | from oak.model_utils import oak_model 11 | from oak.utils import ( 12 | get_model_sufficient_statistics, 13 | get_prediction_component, 14 | initialize_kmeans_with_binary, 15 | ) 16 | 17 | 18 | @pytest.mark.parametrize("binary_index", [[0, 1, 2], [0, 2], [1]]) 19 | @pytest.mark.parametrize("n_cluster", [1, 50]) 20 | def test_initialize_kmeans_with_binary(binary_index: list, n_cluster: int): 21 | np.random.seed(44) 22 | continuous_index = list(set(range(3)) - set(binary_index)) 23 | N = 100 24 | dim = len(binary_index) + len(continuous_index) 25 | X = np.zeros((N, dim)) 26 | for i in binary_index: 27 | print("shape of X ", X.shape) 28 | print("i = ", i) 29 | X[:, i] = np.random.binomial(1, 0.33, N) 30 | print("continuous_index", continuous_index) 31 | if len(continuous_index) > 0: 32 | for j in continuous_index: 33 | X[:, j] = np.random.normal(0, 4, N) 34 | else: 35 | continuous_index = None 36 | Z = initialize_kmeans_with_binary(X, binary_index, continuous_index, n_cluster) 37 | print(X) 38 | assert Z.shape == (n_cluster, dim) 39 | assert isinstance(Z, np.ndarray) 40 | 41 | 42 | @pytest.mark.parametrize("share_var_across_orders", [True, False]) 43 | def test_get_prediction_component(share_var_across_orders: bool): 44 | # sum of predictions for all the functional components equals the final prediction 45 | np.random.seed(44) 46 | tf.random.set_seed(44) 47 | 48 | N = 2000 49 | X = np.random.normal(0, 1, (N, 3)) 50 | y = ( 51 | X[:, 0] ** 2 + X[:, 1] + X[:, 1] * X[:, 2] + np.random.normal(0, 0.01, (N,)) 52 | ).reshape(-1, 1) 53 | 54 | oak = oak_model( 55 | num_inducing=50, 56 | max_interaction_depth=2, 57 | share_var_across_orders=share_var_across_orders, 58 | ) 59 | oak.fit(X, y, optimise=False) 60 | oak.m.kernel.variances[0].assign(1e-16) 61 | oak.alpha = get_model_sufficient_statistics(oak.m, get_L=False) 62 | 63 | prediction_list = get_prediction_component( 64 | oak.m, 65 | oak.alpha, 66 | oak._transform_x(X), 67 | share_var_across_orders=share_var_across_orders, 68 | ) 69 | out = np.zeros(y.shape[0]) 70 | for i in range(len(prediction_list)): 71 | out += prediction_list[i].numpy() 72 | 73 | out_all = oak.m.predict_f(oak._transform_x(X))[0].numpy()[:, 0] 74 | print(f"variance 0 = {oak.m.kernel.variances[0]}") 75 | np.testing.assert_allclose(out, out_all) 76 | 77 | --------------------------------------------------------------------------------