├── Day1 └── probAI-day1.pdf ├── Day2-Evening └── probAI-day2_evening.pdf ├── Day2-AfterLunch ├── probAI-day2_after_lunch.pdf └── notebooks │ ├── Figures │ └── simple_pyro_exercise.png │ ├── solution_simple_gaussian_model_pyro.ipynb │ ├── student_simple_gaussian_model_pyro.ipynb │ └── students_bayesian_logistic_regression.ipynb ├── Day2-BeforeLunch ├── probAI-day2_before_lunch.pdf └── notebooks │ ├── Bayesian_linear_regression.png │ ├── Figures │ ├── updating_equations.png │ └── students_simple_model.png │ ├── solution_simple_model.ipynb │ └── students_simple_model.ipynb ├── README.md └── LICENSE /Day1/probAI-day1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PGM-Lab/2022-ProbAI/HEAD/Day1/probAI-day1.pdf -------------------------------------------------------------------------------- /Day2-Evening/probAI-day2_evening.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PGM-Lab/2022-ProbAI/HEAD/Day2-Evening/probAI-day2_evening.pdf -------------------------------------------------------------------------------- /Day2-AfterLunch/probAI-day2_after_lunch.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PGM-Lab/2022-ProbAI/HEAD/Day2-AfterLunch/probAI-day2_after_lunch.pdf -------------------------------------------------------------------------------- /Day2-BeforeLunch/probAI-day2_before_lunch.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PGM-Lab/2022-ProbAI/HEAD/Day2-BeforeLunch/probAI-day2_before_lunch.pdf -------------------------------------------------------------------------------- /Day2-AfterLunch/notebooks/Figures/simple_pyro_exercise.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PGM-Lab/2022-ProbAI/HEAD/Day2-AfterLunch/notebooks/Figures/simple_pyro_exercise.png -------------------------------------------------------------------------------- /Day2-BeforeLunch/notebooks/Bayesian_linear_regression.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PGM-Lab/2022-ProbAI/HEAD/Day2-BeforeLunch/notebooks/Bayesian_linear_regression.png -------------------------------------------------------------------------------- /Day2-BeforeLunch/notebooks/Figures/updating_equations.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PGM-Lab/2022-ProbAI/HEAD/Day2-BeforeLunch/notebooks/Figures/updating_equations.png -------------------------------------------------------------------------------- /Day2-BeforeLunch/notebooks/Figures/students_simple_model.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PGM-Lab/2022-ProbAI/HEAD/Day2-BeforeLunch/notebooks/Figures/students_simple_model.png -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ProbAI 2022 - Probabilistic Programming, Variational Inference and Optimization Tutorial with Pryo 2 | 3 | 4 | ## Day 1 (June 13 -- 1.30pm - 4pm) 5 | 6 | * [Slides](https://github.com/PGM-Lab/2022-ProbAI/raw/main/Day1/probAI-day1.pdf) 7 | * Notebook: [students_PPLs_Intro](https://colab.research.google.com/github/PGM-Lab/2022-ProbAI/blob/main/Day1/notebooks/students_PPLs_Intro.ipynb) 8 | * Notebook: [solutions_PPLs_Intro](https://colab.research.google.com/github/PGM-Lab/2022-ProbAI/blob/main/Day1/notebooks/solutions_PPLs_Intro.ipynb) 9 | 10 | 11 | ## Day 2 - Before Lunch (June 14 -- 9am-12pm) 12 | * [Slides](https://github.com/PGM-Lab/2022-ProbAI/raw/main/Day2-BeforeLunch/probAI-day2_before_lunch.pdf) 13 | * Notebook: [students_simple_model](https://colab.research.google.com/github/PGM-Lab/2022-ProbAI/blob/main/Day2-BeforeLunch/notebooks/students_simple_model.ipynb) 14 | * Notebook: [solutions_simple_model](https://colab.research.google.com/github/PGM-Lab/2022-ProbAI/blob/main/Day2-BeforeLunch/notebooks/solution_simple_model.ipynb) 15 | * Notebook: [CAVI-linreg](https://colab.research.google.com/github/PGM-Lab/2022-ProbAI/blob/main/Day2-BeforeLunch/notebooks/CAVI-linreg.ipynb) 16 | 17 | 18 | ## Day 2 - After Lunch (June 14 -- 1pm-4pm) 19 | * [Slides](https://github.com/PGM-Lab/2022-ProbAI/raw/main/Day2-AfterLunch/probAI-day2_after_lunch.pdf) 20 | * Notebook: [BayesianNeuralNetworks](https://colab.research.google.com/github/PGM-Lab/2022-ProbAI/blob/main/Day2-AfterLunch/notebooks/BayesianNeuralNetworks.ipynb) 21 | * Notebook: [students_BBVI](https://colab.research.google.com/github/PGM-Lab/2022-ProbAI/blob/main/Day2-AfterLunch/notebooks/students_BBVI.ipynb) 22 | * Notebook: [solutions_BBVI](https://colab.research.google.com/github/PGM-Lab/2022-ProbAI/blob/main/Day2-AfterLunch/notebooks/solutions_BBVI.ipynb) 23 | * Notebook: [student_simple_gaussian_model_pyro](https://colab.research.google.com/github/PGM-Lab/2022-ProbAI/blob/main/Day2-AfterLunch/notebooks/student_simple_gaussian_model_pyro.ipynb) 24 | * Notebook: [solution_simple_gaussian_model_pyro](https://colab.research.google.com/github/PGM-Lab/2022-ProbAI/blob/main/Day2-AfterLunch/notebooks/solution_simple_gaussian_model_pyro.ipynb) 25 | * Notebook: [bayesian_linear_regression](https://colab.research.google.com/github/PGM-Lab/2022-ProbAI/blob/main/Day2-AfterLunch/notebooks/bayesian_linear_regression.ipynb) 26 | * Notebook: [students_bayesian_logistic_regression](https://colab.research.google.com/github/PGM-Lab/2022-ProbAI/blob/main/Day2-AfterLunch/notebooks/students_bayesian_logistic_regression.ipynb) 27 | * Notebook: [solutions_bayesian_logistic_regression](https://colab.research.google.com/github/PGM-Lab/2022-ProbAI/blob/main/Day2-AfterLunch/notebooks/solutions_bayesian_logistic_regression.ipynb) 28 | 29 | 30 | ## Day 2 - Evening (June 14 -- 4.30pm-5.30pm) 31 | * [Slides](https://github.com/PGM-Lab/2022-ProbAI/raw/main/Day2-Evening/probAI-day2_evening.pdf) 32 | * Notebook: [students_VAE](https://colab.research.google.com/github/PGM-Lab/2022-ProbAI/blob/main/Day2-Evening/notebooks/students_VAE.ipynb) 33 | * Notebook: [solutions_VAE](https://colab.research.google.com/github/PGM-Lab/2022-ProbAI/blob/main/Day2-Evening/notebooks/solutions_VAE.ipynb) 34 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /Day2-AfterLunch/notebooks/solution_simple_gaussian_model_pyro.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "solution_simple_gaussian_model_pyro.ipynb", 7 | "provenance": [], 8 | "include_colab_link": true 9 | }, 10 | "kernelspec": { 11 | "display_name": "Python 3", 12 | "language": "python", 13 | "name": "python3" 14 | }, 15 | "language_info": { 16 | "codemirror_mode": { 17 | "name": "ipython", 18 | "version": 3 19 | }, 20 | "file_extension": ".py", 21 | "mimetype": "text/x-python", 22 | "name": "python", 23 | "nbconvert_exporter": "python", 24 | "pygments_lexer": "ipython3", 25 | "version": "3.6.6" 26 | } 27 | }, 28 | "cells": [ 29 | { 30 | "cell_type": "markdown", 31 | "metadata": { 32 | "id": "view-in-github", 33 | "colab_type": "text" 34 | }, 35 | "source": [ 36 | "\"Open" 37 | ] 38 | }, 39 | { 40 | "cell_type": "markdown", 41 | "metadata": { 42 | "id": "dUtkG-f8oeI4" 43 | }, 44 | "source": [ 45 | "" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "metadata": { 51 | "id": "45sg92iioeI7", 52 | "colab": { 53 | "base_uri": "https://localhost:8080/" 54 | }, 55 | "outputId": "d14dc0f2-69f7-42c8-8cc0-4736dbc0ce21" 56 | }, 57 | "source": [ 58 | "!pip install -q --upgrade pyro-ppl torch \n", 59 | "\n", 60 | "import numpy as np\n", 61 | "import torch\n", 62 | "from torch.distributions import constraints\n", 63 | "import matplotlib.pyplot as plt\n", 64 | "\n", 65 | "import pyro\n", 66 | "from pyro.distributions import Normal, Gamma, MultivariateNormal\n", 67 | "from pyro.infer import SVI, Trace_ELBO\n", 68 | "from pyro.optim import Adam\n", 69 | "import pyro.optim as optim" 70 | ], 71 | "execution_count": null, 72 | "outputs": [ 73 | { 74 | "output_type": "stream", 75 | "name": "stdout", 76 | "text": [ 77 | "\u001b[K |████████████████████████████████| 750.6 MB 10 kB/s \n", 78 | "\u001b[?25h" 79 | ] 80 | } 81 | ] 82 | }, 83 | { 84 | "cell_type": "markdown", 85 | "metadata": { 86 | "id": "upVGYRd6oeI8" 87 | }, 88 | "source": [ 89 | "## Generate some data" 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "metadata": { 95 | "id": "x1SeB_bboeI8" 96 | }, 97 | "source": [ 98 | "# Sample data\n", 99 | "np.random.seed(123)\n", 100 | "N = 100\n", 101 | "correct_mean = 5\n", 102 | "correct_precision = 1\n", 103 | "data = torch.tensor(np.random.normal(loc=correct_mean, scale=np.sqrt(1./correct_precision), size=N), dtype=torch.float)\n" 104 | ], 105 | "execution_count": null, 106 | "outputs": [] 107 | }, 108 | { 109 | "cell_type": "markdown", 110 | "metadata": { 111 | "id": "Ho5Uc-OToeI9" 112 | }, 113 | "source": [ 114 | "## Our model specification" 115 | ] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "metadata": { 120 | "id": "stcxQyx5oeI9" 121 | }, 122 | "source": [ 123 | "def model(data):\n", 124 | " gamma = pyro.sample(\"gamma\", Gamma(torch.tensor(1.), torch.tensor(1.)))\n", 125 | " mu = pyro.sample(\"mu\", Normal(torch.zeros(1), torch.tensor(10000.0)))\n", 126 | " with pyro.plate(\"data\", len(data)):\n", 127 | " pyro.sample(\"x\", Normal(loc=mu, scale=torch.sqrt(1. / gamma)), obs=data)" 128 | ], 129 | "execution_count": null, 130 | "outputs": [] 131 | }, 132 | { 133 | "cell_type": "markdown", 134 | "metadata": { 135 | "id": "DsM46XV-oeI9" 136 | }, 137 | "source": [ 138 | "## Our guide specification" 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "metadata": { 144 | "id": "N2ejdORDoeI-" 145 | }, 146 | "source": [ 147 | "def guide(data=None):\n", 148 | " alpha_q = pyro.param(\"alpha_q\", torch.tensor(1.), constraint=constraints.positive)\n", 149 | " beta_q = pyro.param(\"beta_q\", torch.tensor(1.), constraint=constraints.positive)\n", 150 | " pyro.sample(\"gamma\", Gamma(alpha_q, beta_q))\n", 151 | "\n", 152 | " mean_q = pyro.param(\"mean_q\", torch.tensor(0.))\n", 153 | " scale_q = pyro.param(\"scale_q\", torch.tensor(1.), constraint=constraints.positive)\n", 154 | " pyro.sample(\"mu\", Normal(mean_q, scale_q))" 155 | ], 156 | "execution_count": null, 157 | "outputs": [] 158 | }, 159 | { 160 | "cell_type": "markdown", 161 | "metadata": { 162 | "id": "Cp_F7FgnoeI-" 163 | }, 164 | "source": [ 165 | "## Do learning" 166 | ] 167 | }, 168 | { 169 | "cell_type": "code", 170 | "metadata": { 171 | "colab": { 172 | "base_uri": "https://localhost:8080/" 173 | }, 174 | "id": "D4CViT55oeI-", 175 | "outputId": "ee1de6f8-ff7b-40e7-ab4a-dad06f05aac2" 176 | }, 177 | "source": [ 178 | "# setup the optimizer\n", 179 | "adam_args = {\"lr\": 0.01}\n", 180 | "optimizer = Adam(adam_args)\n", 181 | "\n", 182 | "pyro.clear_param_store()\n", 183 | "svi = SVI(model, guide, optimizer, loss=Trace_ELBO())\n", 184 | "train_elbo = []\n", 185 | "# training loop\n", 186 | "for epoch in range(3000):\n", 187 | " loss = svi.step(data)\n", 188 | " train_elbo.append(-loss)\n", 189 | " if (epoch % 500) == 0:\n", 190 | " print(\"[epoch %03d] average training loss: %.4f\" % (epoch, loss))" 191 | ], 192 | "execution_count": null, 193 | "outputs": [ 194 | { 195 | "output_type": "stream", 196 | "name": "stderr", 197 | "text": [ 198 | "/usr/local/lib/python3.7/dist-packages/pyro/infer/svi.py:53: FutureWarning: The `num_samples` argument to SVI is deprecated and will be removed in a future release. Use `pyro.infer.Predictive` class to draw samples from the posterior.\n", 199 | " \"number of iterations.\",\n" 200 | ] 201 | }, 202 | { 203 | "output_type": "stream", 204 | "name": "stdout", 205 | "text": [ 206 | "[epoch 000] average training loss: 4678.7293\n", 207 | "[epoch 500] average training loss: 277.2488\n", 208 | "[epoch 1000] average training loss: 251.4889\n", 209 | "[epoch 1500] average training loss: 217.9515\n", 210 | "[epoch 2000] average training loss: 174.1540\n", 211 | "[epoch 2500] average training loss: 165.9747\n" 212 | ] 213 | } 214 | ] 215 | }, 216 | { 217 | "cell_type": "code", 218 | "metadata": { 219 | "colab": { 220 | "base_uri": "https://localhost:8080/" 221 | }, 222 | "id": "lpmXAE6xoeJA", 223 | "outputId": "b6a193fd-e360-4faf-a1cd-76f7f7c52e7f" 224 | }, 225 | "source": [ 226 | "for name, value in pyro.get_param_store().items():\n", 227 | " print(name, pyro.param(name).data.numpy())" 228 | ], 229 | "execution_count": null, 230 | "outputs": [ 231 | { 232 | "output_type": "stream", 233 | "name": "stdout", 234 | "text": [ 235 | "alphav 3.4341245\n", 236 | "beta_q 4.0748396\n", 237 | "mean_q 5.015445\n", 238 | "scale_q 0.19180033\n" 239 | ] 240 | } 241 | ] 242 | }, 243 | { 244 | "cell_type": "code", 245 | "metadata": { 246 | "colab": { 247 | "base_uri": "https://localhost:8080/", 248 | "height": 279 249 | }, 250 | "id": "bb39F8-loeJB", 251 | "outputId": "2e57dc01-e3b2-423b-b6fa-f09ad8e5ac9a" 252 | }, 253 | "source": [ 254 | "plt.plot(range(len(train_elbo)), train_elbo)\n", 255 | "plt.xlabel(\"Number of iterations\")\n", 256 | "plt.ylabel(\"ELBO\")\n", 257 | "plt.show()" 258 | ], 259 | "execution_count": null, 260 | "outputs": [ 261 | { 262 | "output_type": "display_data", 263 | "data": { 264 | "text/plain": [ 265 | "
" 266 | ], 267 | "image/png": "\n" 268 | }, 269 | "metadata": { 270 | "needs_background": "light" 271 | } 272 | } 273 | ] 274 | } 275 | ] 276 | } -------------------------------------------------------------------------------- /Day2-AfterLunch/notebooks/student_simple_gaussian_model_pyro.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "language_info": { 6 | "codemirror_mode": { 7 | "name": "ipython", 8 | "version": 3 9 | }, 10 | "file_extension": ".py", 11 | "mimetype": "text/x-python", 12 | "name": "python", 13 | "nbconvert_exporter": "python", 14 | "pygments_lexer": "ipython3", 15 | "version": "3.7.0" 16 | }, 17 | "colab": { 18 | "name": "student_simple_gaussian_model_pyro.ipynb", 19 | "provenance": [], 20 | "include_colab_link": true 21 | } 22 | }, 23 | "cells": [ 24 | { 25 | "cell_type": "markdown", 26 | "metadata": { 27 | "id": "view-in-github", 28 | "colab_type": "text" 29 | }, 30 | "source": [ 31 | "\"Open" 32 | ] 33 | }, 34 | { 35 | "cell_type": "markdown", 36 | "metadata": { 37 | "id": "wJ5e7RVcoKT-" 38 | }, 39 | "source": [ 40 | "\n" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "metadata": { 46 | "id": "vNaU7LMtoKUB" 47 | }, 48 | "source": [ 49 | "!pip install -q --upgrade pyro-ppl torch \n", 50 | "\n", 51 | "\n", 52 | "import numpy as np\n", 53 | "import torch\n", 54 | "from torch.distributions import constraints\n", 55 | "import matplotlib.pyplot as plt\n", 56 | "\n", 57 | "import pyro\n", 58 | "from pyro.distributions import Normal, Gamma, MultivariateNormal\n", 59 | "from pyro.infer import SVI, Trace_ELBO\n", 60 | "from pyro.optim import Adam\n", 61 | "import pyro.optim as optim" 62 | ], 63 | "execution_count": null, 64 | "outputs": [] 65 | }, 66 | { 67 | "cell_type": "markdown", 68 | "metadata": { 69 | "id": "bIp4KvsEoKUB" 70 | }, 71 | "source": [ 72 | "## Generate some data" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "metadata": { 78 | "id": "IvSrjN_4oKUC" 79 | }, 80 | "source": [ 81 | "# Sample data\n", 82 | "np.random.seed(123)\n", 83 | "N = 100\n", 84 | "correct_mean = 5\n", 85 | "correct_precision = 1\n", 86 | "data = torch.tensor(np.random.normal(loc=correct_mean, scale=np.sqrt(1./correct_precision), size=N), dtype=torch.float)\n" 87 | ], 88 | "execution_count": null, 89 | "outputs": [] 90 | }, 91 | { 92 | "cell_type": "markdown", 93 | "metadata": { 94 | "id": "9TAlxpNToKUC" 95 | }, 96 | "source": [ 97 | "## Our model specification" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "metadata": { 103 | "id": "jE4ItwMhoKUD" 104 | }, 105 | "source": [ 106 | "def model(data):\n", 107 | " gamma = pyro.sample(\"gamma\", Gamma(torch.tensor(1.), torch.tensor(1.)))\n", 108 | " mu = pyro.sample(\"mu\", Normal(torch.zeros(1), torch.tensor(10000.0)))\n", 109 | " with pyro.plate(\"data\", len(data)):\n", 110 | " pyro.sample(\"x\", Normal(loc=mu, scale=torch.sqrt(1. / gamma)), obs=data)" 111 | ], 112 | "execution_count": null, 113 | "outputs": [] 114 | }, 115 | { 116 | "cell_type": "markdown", 117 | "metadata": { 118 | "id": "U5AFBtuEoKUD" 119 | }, 120 | "source": [ 121 | "## Our guide specification" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "metadata": { 127 | "id": "venykT3VoKUD" 128 | }, 129 | "source": [ 130 | "# Define the right guide for the above model, including the variational parameters. \n", 131 | "def guide(data=None):\n" 132 | ], 133 | "execution_count": null, 134 | "outputs": [] 135 | }, 136 | { 137 | "cell_type": "markdown", 138 | "metadata": { 139 | "id": "0G7u3JOLoKUE" 140 | }, 141 | "source": [ 142 | "## Do learning" 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "metadata": { 148 | "id": "Sds04uVeoKUE", 149 | "outputId": "7779aa97-e02d-48df-d69d-341f96fc28c2" 150 | }, 151 | "source": [ 152 | "# setup the optimizer\n", 153 | "adam_args = {\"lr\": 0.01}\n", 154 | "optimizer = Adam(adam_args)\n", 155 | "\n", 156 | "pyro.clear_param_store()\n", 157 | "svi = SVI(model, guide, optimizer, loss=Trace_ELBO())\n", 158 | "train_elbo = []\n", 159 | "# training loop\n", 160 | "for epoch in range(3000):\n", 161 | " loss = svi.step(data)\n", 162 | " train_elbo.append(-loss)\n", 163 | " if (epoch % 500) == 0:\n", 164 | " print(\"[epoch %03d] average training loss: %.4f\" % (epoch, loss))" 165 | ], 166 | "execution_count": null, 167 | "outputs": [ 168 | { 169 | "output_type": "stream", 170 | "text": [ 171 | "[epoch 000] average training loss: 1599.6830\n", 172 | "[epoch 500] average training loss: 546.2211\n", 173 | "[epoch 1000] average training loss: 284.1279\n", 174 | "[epoch 1500] average training loss: 185.3350\n", 175 | "[epoch 2000] average training loss: 195.8745\n", 176 | "[epoch 2500] average training loss: 178.9226\n" 177 | ], 178 | "name": "stdout" 179 | } 180 | ] 181 | }, 182 | { 183 | "cell_type": "code", 184 | "metadata": { 185 | "id": "Kz5F9TzKoKUF", 186 | "outputId": "64b37913-0438-445e-81e6-c990d148b320" 187 | }, 188 | "source": [ 189 | "for name, value in pyro.get_param_store().items():\n", 190 | " print(name, pyro.param(name))" 191 | ], 192 | "execution_count": null, 193 | "outputs": [ 194 | { 195 | "output_type": "stream", 196 | "text": [ 197 | "rate tensor(1.8581, requires_grad=True)\n", 198 | "conc tensor(2.1757, requires_grad=True)\n", 199 | "mu_mean tensor(5.0201, requires_grad=True)\n", 200 | "mu_scale tensor(0.1044, requires_grad=True)\n" 201 | ], 202 | "name": "stdout" 203 | } 204 | ] 205 | }, 206 | { 207 | "cell_type": "code", 208 | "metadata": { 209 | "id": "-W3H0l_toKUG", 210 | "outputId": "1489c2ea-6b37-4f09-8a8c-523019601964" 211 | }, 212 | "source": [ 213 | "plt.plot(range(len(train_elbo)), train_elbo)\n", 214 | "plt.xlabel(\"Number of iterations\")\n", 215 | "plt.ylabel(\"ELBO\")\n", 216 | "plt.show()" 217 | ], 218 | "execution_count": null, 219 | "outputs": [ 220 | { 221 | "output_type": "display_data", 222 | "data": { 223 | "image/png": "\n", 224 | "text/plain": [ 225 | "
" 226 | ] 227 | }, 228 | "metadata": { 229 | "tags": [] 230 | } 231 | } 232 | ] 233 | } 234 | ] 235 | } -------------------------------------------------------------------------------- /Day2-AfterLunch/notebooks/students_bayesian_logistic_regression.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "id": "view-in-github", 7 | "colab_type": "text" 8 | }, 9 | "source": [ 10 | "\"Open" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": { 16 | "id": "wXcRh2TQfyhp" 17 | }, 18 | "source": [ 19 | "## Setup\n", 20 | "Let's begin by installing and importing the modules we'll need." 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": null, 26 | "metadata": { 27 | "id": "EggRgZ1gfyhq" 28 | }, 29 | "outputs": [], 30 | "source": [ 31 | "!pip install -q pyro-ppl torch\n", 32 | "\n", 33 | "import numpy as np\n", 34 | "import pandas as pd\n", 35 | "import seaborn as sns\n", 36 | "import torch\n", 37 | "import types\n", 38 | "import matplotlib.pyplot as plt\n", 39 | "from pyro.infer import Predictive\n", 40 | "import pyro\n", 41 | "from pyro.distributions import Normal, Uniform, Delta, Gamma, Binomial\n", 42 | "from pyro.infer import SVI, Trace_ELBO\n", 43 | "from pyro.optim import Adam\n", 44 | "import torch.distributions.constraints as constraints\n", 45 | "import pyro.optim as optim\n", 46 | "from pyro.contrib.autoguide import AutoNormal\n", 47 | "import matplotlib.pyplot as plt\n", 48 | "\n", 49 | "import warnings\n", 50 | "warnings.simplefilter(action='ignore', category=FutureWarning)\n", 51 | "\n", 52 | "import ssl\n", 53 | "ssl._create_default_https_context = ssl._create_unverified_context\n", 54 | "\n", 55 | "# for CI testing\n", 56 | "pyro.set_rng_seed(1)\n", 57 | "pyro.enable_validation(True)" 58 | ] 59 | }, 60 | { 61 | "cell_type": "markdown", 62 | "metadata": { 63 | "id": "KtC9nacRfyhq" 64 | }, 65 | "source": [ 66 | "# Dataset \n", 67 | "\n", 68 | "The following example is taken from \\[1\\]. We would like to explore the relationship between topographic heterogeneity of a nation as measured by the Terrain Ruggedness Index (variable *rugged* in the dataset) and its GDP per capita. In particular, it was noted by the authors in \\[1\\] that terrain ruggedness or bad geography is related to poorer economic performance outside of Africa, but rugged terrains have had a reverse effect on income for African nations. Let us look at the data \\[2\\] and investigate this relationship. We will be focusing on three features from the dataset:\n", 69 | " - `cont_africa`: whether the given nation is in Africa\n", 70 | " - `rugged`: quantifies the Terrain Ruggedness Index\n", 71 | " - `rgdppc_2000`: Real GDP per capita for the year 2000\n", 72 | " \n", 73 | " \n", 74 | "We will take the logarithm for the response variable GDP as it tends to vary exponentially. " 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": null, 80 | "metadata": { 81 | "id": "akcHul9xfyhr" 82 | }, 83 | "outputs": [], 84 | "source": [ 85 | "DATA_URL = \"https://raw.githubusercontent.com/pyro-ppl/brmp/master/brmp/examples/rugged_data.csv\"\n", 86 | "data = pd.read_csv(DATA_URL, encoding=\"ISO-8859-1\")\n", 87 | "df = data[[\"cont_africa\", \"rugged\", \"rgdppc_2000\"]]\n", 88 | "df = df[np.isfinite(df.rgdppc_2000)]\n", 89 | "df[\"rgdppc_2000\"] = np.log(df[\"rgdppc_2000\"])\n", 90 | "\n", 91 | "data = torch.tensor(df.values, dtype=torch.float)\n", 92 | "x_data, y_data = data[:, (1,2)], data[:, 0]" 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": null, 98 | "metadata": { 99 | "id": "EjOGg_Dafyhu" 100 | }, 101 | "outputs": [], 102 | "source": [ 103 | "# Display first 10 entries \n", 104 | "display(df[0:10])" 105 | ] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "execution_count": null, 110 | "metadata": { 111 | "id": "itLI29xgqkaV" 112 | }, 113 | "outputs": [], 114 | "source": [ 115 | "def prepare_figure(title='Scatter plot of data', x_data_ = None, y_data_ = None):\n", 116 | " \"\"\"\n", 117 | " Plot the data and return the figure axis for possible subsequent additional plotting.\n", 118 | " :param title: Title of the plot\n", 119 | " :param x_data_: Nx2 numpy array or torch tensor\n", 120 | " :param y_data_: Nx1 numpy array or torch tensor with the class labels.\n", 121 | " :return: Figure axis.\n", 122 | " \"\"\"\n", 123 | " if x_data_ is None and y_data_ is None:\n", 124 | " x_data_ = x_data\n", 125 | " y_data_ = y_data\n", 126 | "\n", 127 | " if type(x_data_) is torch.Tensor:\n", 128 | " x_data_ = x_data_.numpy()\n", 129 | " y_data_ = y_data_.numpy()\n", 130 | "\n", 131 | " xx, yy = np.mgrid[np.floor(np.min(x_data_[:, 0])):np.ceil(np.max(x_data_[:, 0])):.01,\n", 132 | " np.floor(np.min(x_data_[:, 1])):np.ceil(np.max(x_data_[:, 1])):.01]\n", 133 | "\n", 134 | " grid = torch.tensor(np.c_[xx.ravel(), yy.ravel()], dtype=torch.float32)\n", 135 | "\n", 136 | " f, ax = plt.subplots(figsize=(8, 6))\n", 137 | " f.suptitle(title, fontsize=16)\n", 138 | "\n", 139 | " ax.scatter(x_data[y_data_==0,0], x_data[y_data_==0, 1], c='g', s=50,\n", 140 | " cmap=\"RdBu\", vmin=-.2, vmax=1.2,\n", 141 | " edgecolor=\"white\", linewidth=1, label='Non-African')\n", 142 | "\n", 143 | " ax.scatter(x_data[y_data_==1,0], x_data[y_data_==1, 1], c='orange', s=50,\n", 144 | " cmap=\"RdBu\", vmin=-.2, vmax=1.2,\n", 145 | " edgecolor=\"white\", linewidth=1, label='African')\n", 146 | "\n", 147 | " ax.set(aspect=\"equal\",\n", 148 | " xlim=(0, 7), ylim=(6, 11),\n", 149 | " xlabel=\"Rugged\", ylabel=\"Log GDP\")\n", 150 | "\n", 151 | " ax.legend()\n", 152 | "\n", 153 | " return ax, grid, xx, yy" 154 | ] 155 | }, 156 | { 157 | "cell_type": "code", 158 | "execution_count": null, 159 | "metadata": { 160 | "id": "sgTWhdZIqkaW" 161 | }, 162 | "outputs": [], 163 | "source": [ 164 | "prepare_figure()\n", 165 | "plt.show()" 166 | ] 167 | }, 168 | { 169 | "cell_type": "markdown", 170 | "metadata": { 171 | "id": "vFJeNXiUfyhx" 172 | }, 173 | "source": [ 174 | "# 1. Logistic Regression\n", 175 | "\n", 176 | "Logistic Regression is one of the most commonly used supervised learning tasksin machine learning. Suppose we're given a dataset $\\mathcal{D}$ of the form\n", 177 | "\n", 178 | "$$ \\mathcal{D} = \\{ ({\\bf x_i}, y_i) \\} \\qquad \\text{for}\\qquad i=1,2,...,N$$\n", 179 | "\n", 180 | "where ${\\bf X_i}\\in {\\mathbb R}^m$ and $y_i\\in \\{0,1\\}$.\n", 181 | "\n", 182 | "The goal of logistic regression is to fit a model that correctly predicts the probabilities of the class labels:\n", 183 | "\n", 184 | "$$ p(y|x) = \\frac{1}{1+e^{-b -{\\bf w}^T {\\bf x} }}$$\n", 185 | "\n", 186 | "where ${\\bf w}$ and $b$ are learnable parameters. Specifically $w$ is a vector of weights and $b$ is a bias term.\n", 187 | "\n", 188 | "First we implement a logistic regression model in PyTorch and learn point estimates for the parameters ${\\bf w}$ and $b$. Afterwards we'll see how to incorporate uncertainty into our estimates by using Pyro to doing Bayesian logistic regression." 189 | ] 190 | }, 191 | { 192 | "cell_type": "markdown", 193 | "metadata": { 194 | "id": "hHG9QQYhfyhy" 195 | }, 196 | "source": [ 197 | "## 1.1 Model\n", 198 | "Using a logistic regresison model, we want to predict whether a nation is african or not as a function of the terrain rugedness index and log GDP per capita of a nation. \n", 199 | "\n", 200 | "Our input `x_data` is a tensor of size $N \\times 2$ and our output `y_data` is a tensor of size $N \\times 1$. The method `predict(self,x_data)` defines a sigmoid transformation of the form $\\mathit{sigmoid}({\\bf x}^T{\\bf w} + b)$, where ${\\bf w}$ is the weight vector and $b$ is the additive bias.\n", 201 | "\n", 202 | "The parameters of the model are defined using ``torch.nn.Parameter``, and will be learned during training. " 203 | ] 204 | }, 205 | { 206 | "cell_type": "code", 207 | "execution_count": null, 208 | "metadata": { 209 | "id": "NBQBgFkPfyhz" 210 | }, 211 | "outputs": [], 212 | "source": [ 213 | "class LogisticRegressionModel():\n", 214 | " def __init__(self):\n", 215 | " self.w = torch.nn.Parameter(torch.zeros(1, 2))\n", 216 | " self.b = torch.nn.Parameter(torch.zeros(1, 1))\n", 217 | "\n", 218 | " def params(self):\n", 219 | " return {\"b\":self.b, \"w\": self.w}\n", 220 | "\n", 221 | " def predict(self, x_data):\n", 222 | " return torch.sigmoid(-self.b - torch.mm(self.w, torch.t(x_data))).squeeze(0)\n", 223 | "\n", 224 | " def logits(self, x_data):\n", 225 | " return (self.b + torch.mm(self.w, torch.t(x_data))).squeeze(0)" 226 | ] 227 | }, 228 | { 229 | "cell_type": "code", 230 | "execution_count": null, 231 | "metadata": { 232 | "id": "h80i4g8xqkaY" 233 | }, 234 | "outputs": [], 235 | "source": [ 236 | "logistic_regression_model = LogisticRegressionModel()" 237 | ] 238 | }, 239 | { 240 | "cell_type": "markdown", 241 | "metadata": { 242 | "id": "mlGU_7YPfyhz" 243 | }, 244 | "source": [ 245 | "## 1.2 Training\n", 246 | "For training we will use the cross entropy as our loss and Adam as our optimizer. We will use a somewhat large learning rate of `0.05` and run for 1000 iterations." 247 | ] 248 | }, 249 | { 250 | "cell_type": "code", 251 | "execution_count": null, 252 | "metadata": { 253 | "id": "_N6WPDJufyh0", 254 | "scrolled": true 255 | }, 256 | "outputs": [], 257 | "source": [ 258 | "def train(num_iterations = 1000):\n", 259 | " loss_fn = torch.nn.BCEWithLogitsLoss(reduction='sum')\n", 260 | " optim = torch.optim.Adam(logistic_regression_model.params().values(), lr=0.05)\n", 261 | "\n", 262 | " for j in range(num_iterations):\n", 263 | " # run the model forward on the data\n", 264 | " logits = logistic_regression_model.logits(x_data)\n", 265 | " # calculate the cross-entropy loss\n", 266 | " loss = loss_fn(logits,y_data)\n", 267 | " # initialize gradients to zero\n", 268 | " optim.zero_grad()\n", 269 | " # backpropagate\n", 270 | " loss.backward()\n", 271 | " # take a gradient step\n", 272 | " optim.step()\n", 273 | " if (j + 1) % 500 == 0:\n", 274 | " print(\"[iteration %04d] loss: %.4f\" % (j + 1, loss.item()))\n", 275 | " # Inspect learned parameters\n", 276 | " print(\"Learned parameters:\")\n", 277 | " for name, param in logistic_regression_model.params().items():\n", 278 | " print(name, param.data.numpy())" 279 | ] 280 | }, 281 | { 282 | "cell_type": "code", 283 | "execution_count": null, 284 | "metadata": { 285 | "id": "g_F9KDFyqkaZ" 286 | }, 287 | "outputs": [], 288 | "source": [ 289 | "train()" 290 | ] 291 | }, 292 | { 293 | "cell_type": "markdown", 294 | "metadata": { 295 | "id": "LyfAY0h-fyh0" 296 | }, 297 | "source": [ 298 | "## 1.3 Evaluating the model" 299 | ] 300 | }, 301 | { 302 | "cell_type": "markdown", 303 | "metadata": { 304 | "id": "tmgazCZJfyh1" 305 | }, 306 | "source": [ 307 | "We now plot the decision line learned for african and non-afrian nations relating the rugeedness index with the GDP of the country." 308 | ] 309 | }, 310 | { 311 | "cell_type": "code", 312 | "execution_count": null, 313 | "metadata": { 314 | "id": "JWUs5dc1fyh1" 315 | }, 316 | "outputs": [], 317 | "source": [ 318 | "ax, grid, xx, yy = prepare_figure('Decision line')\n", 319 | "probs = logistic_regression_model.predict(grid).reshape(xx.shape).detach().numpy()\n", 320 | "ax.contour(xx, yy, probs, levels=[.5], cmap=\"Reds\", vmin=0, vmax=.6)\n", 321 | "plt.show()" 322 | ] 323 | }, 324 | { 325 | "cell_type": "markdown", 326 | "metadata": { 327 | "id": "2yrEaqT5fyh3" 328 | }, 329 | "source": [ 330 | "# 2. Bayesian Logistic Regression\n", 331 | "\n", 332 | "\n", 333 | "[Bayesian modeling](http://mlg.eng.cam.ac.uk/zoubin/papers/NatureReprint15.pdf) offers a systematic framework for reasoning about model uncertainty. Instead of just learning point estimates, we're going to learn a _distribution_ over variables that are consistent with the observed data.\n", 334 | "\n", 335 | "In order to make our linear regression Bayesian, we need to put priors on the parameters ${\\bf w}$ and $b$. These are distributions that represent our prior belief about reasonable values for $\\{bf w}$ and ${\\bf b}$ (before observing any data).\n", 336 | "\n", 337 | "A graphical representation would be as follows:\n", 338 | "\n", 339 | "\n" 340 | ] 341 | }, 342 | { 343 | "cell_type": "markdown", 344 | "metadata": { 345 | "id": "kltwl1J9fyh3" 346 | }, 347 | "source": [ 348 | "## 2.1 Model\n", 349 | "\n", 350 | "We now have all the ingredients needed to specify our model. First we define priors over weights and bias. The prior on the intercept parameter is very flat as we would like this to be learnt from the data. We are using a weakly regularizing prior on the regression coefficients to avoid overfitting to the data.\n", 351 | "\n", 352 | "We use the `obs` argument to the `pyro.sample` statement to condition on the observed data `y_data`." 353 | ] 354 | }, 355 | { 356 | "cell_type": "markdown", 357 | "metadata": { 358 | "id": "NUWfkBXUfyh3" 359 | }, 360 | "source": [ 361 | "### Exercise \n", 362 | " \n", 363 | "* Define a random variable \"b\" to model the intercept. \n", 364 | "* Define the class random variable \"african/non-african\" for the predicited labels.\n", 365 | "* This random variable is defined as Binomial distribution and is parametrized with the logits. \n", 366 | "* If time permits, explore and experiment with the notebook; e.g., specification of prior distributions, manually specified guides, and modifications to the model." 367 | ] 368 | }, 369 | { 370 | "cell_type": "code", 371 | "execution_count": null, 372 | "metadata": { 373 | "id": "_19buBJsfyh4" 374 | }, 375 | "outputs": [], 376 | "source": [ 377 | "def model(x_data, y_data):\n", 378 | " # weight and bias priors\n", 379 | " with pyro.plate(\"plate_w\", 2):\n", 380 | " w = pyro.sample(\"w\", Normal(torch.zeros(1,1), torch.ones(1,1)))\n", 381 | "\n", 382 | " # Define a random variable \"b\" to model the intercept.\n", 383 | " \n", 384 | "\n", 385 | " with pyro.plate(\"map\", len(x_data)):\n", 386 | " # Compute logits (i.e. log p(x=0)/p(x=1)) as a linear combination between data and weights.\n", 387 | " logits = (b + torch.mm(x_data,torch.t(w))).squeeze(-1)\n", 388 | " # Define a Binomial distribution as the observed value parameterized by the logits.\n", 389 | " " 390 | ] 391 | }, 392 | { 393 | "cell_type": "markdown", 394 | "metadata": { 395 | "id": "zcu6i1mYfyh6" 396 | }, 397 | "source": [ 398 | "## 2.2 Guide\n", 399 | "\n", 400 | "In order to do inference we're going to need a guide, i.e. a variational family of distributions. We will use Pyro's [autoguide library](https://docs.pyro.ai/en/stable/infer.autoguide.html). Under the hood, this defines a `guide` function, which in this case provides us with `Normal` variation distributions with learnable parameters, one for each sample `sample()` statement in the model." 401 | ] 402 | }, 403 | { 404 | "cell_type": "code", 405 | "execution_count": null, 406 | "metadata": { 407 | "id": "qtcC93Jkfyh7" 408 | }, 409 | "outputs": [], 410 | "source": [ 411 | "guide = AutoNormal(model)" 412 | ] 413 | }, 414 | { 415 | "cell_type": "markdown", 416 | "metadata": { 417 | "id": "ktwd6CCUfyh8" 418 | }, 419 | "source": [ 420 | "## 2.3 Inference\n", 421 | "\n", 422 | "To do inference we'll use stochastic variational inference (SVI). Just like in the non-Bayesian linear regression, each iteration of our training loop will take a gradient step, but now we will use the ELBO objective instead of binary cross entropy by constructing a `Trace_ELBO` object that we pass to `SVI`. " 423 | ] 424 | }, 425 | { 426 | "cell_type": "markdown", 427 | "metadata": { 428 | "id": "py-1QUeyfyh9" 429 | }, 430 | "source": [ 431 | "To take an ELBO gradient step we simply call the step method of SVI. Notice that the data argument we pass to step will be passed to both model() and guide(). " 432 | ] 433 | }, 434 | { 435 | "cell_type": "code", 436 | "execution_count": null, 437 | "metadata": { 438 | "id": "4Wh1Tyqjfyh9" 439 | }, 440 | "outputs": [], 441 | "source": [ 442 | "def train_vi(x_data, y_data, model, guide=None, num_iterations = 1000):\n", 443 | " optim = Adam({\"lr\": 0.1})\n", 444 | "\n", 445 | " # if no guide is provided, resort to an autoguide\n", 446 | " guide_ = guide if guide is not None else AutoNormal(model)\n", 447 | "\n", 448 | " svi = SVI(model, guide_, optim, loss=Trace_ELBO(), num_samples=10)\n", 449 | "\n", 450 | " pyro.clear_param_store()\n", 451 | " for j in range(num_iterations):\n", 452 | " # calculate the loss and take a gradient step\n", 453 | " loss = svi.step(x_data, y_data)\n", 454 | " if j % 500 == 0:\n", 455 | " print(\"[iteration %04d] loss: %.4f\" % (j + 1, loss / len(data)))" 456 | ] 457 | }, 458 | { 459 | "cell_type": "markdown", 460 | "metadata": { 461 | "id": "0_7bGZH8qkac" 462 | }, 463 | "source": [ 464 | "Learn the model" 465 | ] 466 | }, 467 | { 468 | "cell_type": "code", 469 | "execution_count": null, 470 | "metadata": { 471 | "id": "r11Qa4DSqkac" 472 | }, 473 | "outputs": [], 474 | "source": [ 475 | "guide = AutoNormal(model)\n", 476 | "train_vi(x_data, y_data, model, guide=guide)" 477 | ] 478 | }, 479 | { 480 | "cell_type": "markdown", 481 | "metadata": { 482 | "id": "b8GLwJV6qkac" 483 | }, 484 | "source": [ 485 | "Get the learned parameters" 486 | ] 487 | }, 488 | { 489 | "cell_type": "code", 490 | "execution_count": null, 491 | "metadata": { 492 | "id": "NfaPyhfTfyh9" 493 | }, 494 | "outputs": [], 495 | "source": [ 496 | "for name, value in pyro.get_param_store().items():\n", 497 | " print(name, pyro.param(name).data.numpy())" 498 | ] 499 | }, 500 | { 501 | "cell_type": "markdown", 502 | "metadata": { 503 | "id": "PKfYcpnYfyh-" 504 | }, 505 | "source": [ 506 | "As you can see, instead of just point estimates, we now have uncertainty estimates over our model parameters." 507 | ] 508 | }, 509 | { 510 | "cell_type": "markdown", 511 | "metadata": { 512 | "id": "pag7bEcmqkad" 513 | }, 514 | "source": [ 515 | "## 2.4 Model Evaluation: Model's Uncertainty\n", 516 | "We will sample different logistic regression lines to see how using a Bayesian approach can capture model undertainty.\n", 517 | "\n", 518 | "Here we again rely on Pyro's Predictive class, which allows for easy sampling of the model parameters." 519 | ] 520 | }, 521 | { 522 | "cell_type": "code", 523 | "execution_count": null, 524 | "metadata": { 525 | "id": "AgYRa4h9qkad" 526 | }, 527 | "outputs": [], 528 | "source": [ 529 | "ax, grid, xx, yy = prepare_figure('Model evaluation')\n", 530 | "num_samples=10\n", 531 | "predictive = pyro.infer.Predictive(model, guide=guide, num_samples=num_samples)\n", 532 | "svi_samples = predictive(grid, None)\n", 533 | "\n", 534 | "# Plot the mean decision surface \n", 535 | "logits = torch.mean(torch.mm(grid, torch.t(svi_samples['w'].squeeze())) + svi_samples['b'].squeeze(), axis=1).squeeze(-1)\n", 536 | "probs = Binomial(logits = logits).mean\n", 537 | "ax.contour(xx, yy, probs.reshape(xx.shape).detach().numpy(), levels=[.5], cmap=\"Reds\", vmin=0, vmax=1.5)\n", 538 | "\n", 539 | "# Sample and plot decision surfaces\n", 540 | "for i in range(num_samples):\n", 541 | " logits = (torch.mm(grid, torch.t(svi_samples['w'][i,:])) + svi_samples['b'][i,:]).squeeze(-1)\n", 542 | " probs = Binomial(logits = logits).mean\n", 543 | " ax.contour(xx, yy, probs.reshape(xx.shape).detach().numpy(), levels=[.5], cmap=\"Greys\", vmin=0, vmax=1.5)\n", 544 | "\n", 545 | "plt.show()" 546 | ] 547 | }, 548 | { 549 | "cell_type": "markdown", 550 | "metadata": { 551 | "id": "Pm7rm2Uhqkad" 552 | }, 553 | "source": [ 554 | "The above figure shows the uncertainty in our estimate of the logistic regression line. Note that for lower values of ruggedness there are many more data points, and as such, the regression lines are less uncertainty than for high ruggness values, where there is much more uncertainty. " 555 | ] 556 | }, 557 | { 558 | "cell_type": "markdown", 559 | "metadata": { 560 | "id": "CptrzXtXfyiG" 561 | }, 562 | "source": [ 563 | "## 2.5 The relationship between ruggedness and log GPD\n", 564 | "\n", 565 | "Finally, we can look about the uncertainty about the weights associated to Terrain Rugedness and logarithm of GDP. " 566 | ] 567 | }, 568 | { 569 | "cell_type": "markdown", 570 | "metadata": { 571 | "id": "EOvNBT14qkad" 572 | }, 573 | "source": [ 574 | "Recall the learned parameters:" 575 | ] 576 | }, 577 | { 578 | "cell_type": "code", 579 | "execution_count": null, 580 | "metadata": { 581 | "id": "maJ-Mxw6qkad" 582 | }, 583 | "outputs": [], 584 | "source": [ 585 | "for name, value in pyro.get_param_store().items():\n", 586 | " print(name, pyro.param(name).data.numpy())" 587 | ] 588 | }, 589 | { 590 | "cell_type": "code", 591 | "execution_count": null, 592 | "metadata": { 593 | "id": "hrGPEIGLqkad" 594 | }, 595 | "outputs": [], 596 | "source": [ 597 | "import scipy.stats as stats\n", 598 | "\n", 599 | "f, ax = plt.subplots(1, 2, figsize=(8, 6), sharex=True)\n", 600 | "for i in range(2):\n", 601 | " mu = pyro.param('AutoNormal.locs.w')[0,i].data.numpy().squeeze()\n", 602 | " std = pyro.param('AutoNormal.scales.w')[0,i].data.numpy().squeeze()\n", 603 | " #x = np.linspace(mu - 3*std, mu + 3*std, 100)\n", 604 | " x = np.linspace(-2,1, 100)\n", 605 | " ax[i].plot(x, stats.norm.pdf(x, mu, std))\n", 606 | "ax[0].set_xlabel('Weight for ruggedness')\n", 607 | "ax[1].set_xlabel('Weight for log GDP')\n", 608 | "plt.show()" 609 | ] 610 | }, 611 | { 612 | "cell_type": "markdown", 613 | "metadata": { 614 | "id": "hpo6kGPRfyiL" 615 | }, 616 | "source": [ 617 | "### References\n", 618 | " 1. McElreath, D., *Statistical Rethinking, Chapter 7*, 2016\n", 619 | " 2. Nunn, N. & Puga, D., *[Ruggedness: The blessing of bad geography in Africa\"](https://diegopuga.org/papers/rugged.pdf)*, Review of Economics and Statistics 94(1), Feb. 2012" 620 | ] 621 | } 622 | ], 623 | "metadata": { 624 | "anaconda-cloud": {}, 625 | "colab": { 626 | "name": "students_bayesian_logistic_regression.ipynb", 627 | "provenance": [], 628 | "include_colab_link": true 629 | }, 630 | "kernelspec": { 631 | "display_name": "Python 3", 632 | "language": "python", 633 | "name": "python3" 634 | }, 635 | "language_info": { 636 | "codemirror_mode": { 637 | "name": "ipython", 638 | "version": 3 639 | }, 640 | "file_extension": ".py", 641 | "mimetype": "text/x-python", 642 | "name": "python", 643 | "nbconvert_exporter": "python", 644 | "pygments_lexer": "ipython3", 645 | "version": "3.9.0" 646 | } 647 | }, 648 | "nbformat": 4, 649 | "nbformat_minor": 0 650 | } -------------------------------------------------------------------------------- /Day2-BeforeLunch/notebooks/solution_simple_model.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "id": "view-in-github", 7 | "colab_type": "text" 8 | }, 9 | "source": [ 10 | "\"Open" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": { 16 | "id": "khKi5Kpyh3Po" 17 | }, 18 | "source": [ 19 | "# Exercise\n", 20 | "\n", 21 | "
\n", 22 | "\"Drawing\"\n", 23 | "
\n" 24 | ] 25 | }, 26 | { 27 | "cell_type": "markdown", 28 | "metadata": { 29 | "id": "nIxiDUQfh3Pr" 30 | }, 31 | "source": [ 32 | "### Imports" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": 1, 38 | "metadata": { 39 | "id": "spxajQOGh3Ps" 40 | }, 41 | "outputs": [], 42 | "source": [ 43 | "import numpy as np\n", 44 | "from scipy import special, stats\n", 45 | "import matplotlib.pyplot as plt\n", 46 | "%matplotlib notebook" 47 | ] 48 | }, 49 | { 50 | "cell_type": "markdown", 51 | "metadata": { 52 | "id": "L2gwpsEMh3Pt" 53 | }, 54 | "source": [ 55 | "### Startup: Define priors, and sample artificial training data" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": 2, 61 | "metadata": { 62 | "id": "opTUXGnIh3Pt" 63 | }, 64 | "outputs": [], 65 | "source": [ 66 | "# Define priors\n", 67 | "alpha_prior, beta_prior = 1E-2, 1E-2 # Parameters for the prior over gamma\n", 68 | "mu_prior = 0 # A priori mean for mu\n", 69 | "tau_prior = 1E-6 # A priori precision for mu\n", 70 | "\n", 71 | "# Sample data\n", 72 | "np.random.seed(123)\n", 73 | "N = 100\n", 74 | "correct_mean = 5\n", 75 | "correct_precision = 1\n", 76 | "x = np.random.normal(loc=correct_mean, scale=1./np.sqrt(correct_precision), size=N)" 77 | ] 78 | }, 79 | { 80 | "cell_type": "markdown", 81 | "metadata": { 82 | "id": "mBHQ4YWoh3Pt" 83 | }, 84 | "source": [ 85 | "## Helper-routine: Make plot of density" 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": 3, 91 | "metadata": { 92 | "id": "0WYMplwah3Pu" 93 | }, 94 | "outputs": [], 95 | "source": [ 96 | "#@title\n", 97 | "def plot_density(posterior_mean_mu, posterior_prec_mu,\n", 98 | " posterior_alpha_gamma, posterior_beta_gamma,\n", 99 | " correct_mean, correct_precision):\n", 100 | " mu_range = np.linspace(-15,15, 500).astype(np.float32)\n", 101 | " precision_range = np.linspace(1E-2, 3, 500).astype(np.float32)\n", 102 | " mu_mesh, precision_mesh = np.meshgrid(mu_range, precision_range)\n", 103 | " variational_log_pdf = \\\n", 104 | " stats.norm.logpdf(mu_mesh, loc=posterior_mean_mu, scale=1. / np.sqrt(posterior_prec_mu)) + \\\n", 105 | " stats.gamma.logpdf(x=precision_mesh,\n", 106 | " a=posterior_alpha_gamma,\n", 107 | " scale=1. / posterior_beta_gamma)\n", 108 | " plt.figure()\n", 109 | " plt.contour(mu_mesh, precision_mesh, variational_log_pdf, 25)\n", 110 | " plt.plot(correct_mean, correct_precision, \"bo\")\n", 111 | " plt.title('Density over $(\\mu, \\\\tau)$. Blue dot: True parameters')\n", 112 | " plt.xlabel(\"Mean $\\mu$\")\n", 113 | " plt.ylabel(\"Precision $\\\\tau$\")" 114 | ] 115 | }, 116 | { 117 | "cell_type": "markdown", 118 | "metadata": { 119 | "id": "pU5flZ_sh3Pu" 120 | }, 121 | "source": [ 122 | "## Helper-routine: Calculate ELBO" 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": 5, 128 | "metadata": { 129 | "id": "gi8hKiW0h3Pv", 130 | "cellView": "form" 131 | }, 132 | "outputs": [], 133 | "source": [ 134 | "#@title\n", 135 | "def calculate_ELBO(data, tau, alpha, beta, nu_p, tau_p, alpha_p, beta_p):\n", 136 | " \"\"\"\n", 137 | " Helper routine: Calculate ELBO. Data is the sampled x-values, anything without a _p relates to the prior,\n", 138 | " everything _with_ a _p relates to the variational posterior.\n", 139 | " Note that we have no nu without a _p; we are simplifying by forcing this to be zero a priori\n", 140 | "\n", 141 | " Note: This function obviously only works when the model is as in this code challenge,\n", 142 | " and is not a general solution.\n", 143 | "\n", 144 | " :param data: The sampled data\n", 145 | " :param tau: prior precision for mu, the mean for the data generation\n", 146 | " :param alpha: prior shape of dist for gamma, the precision of the data generation\n", 147 | " :param beta: prior rate of dist for gamma, the precision of the data generation\n", 148 | " :param nu_p: VB posterior mean for the distribution of mu - the mean of the data generation\n", 149 | " :param tau_p: VB posterior precision for the distribution of mu - the mean of the data generation\n", 150 | " :param alpha_p: VB posterior shape of dist for gamma, the precision of the data generation\n", 151 | " :param beta_p: VB posterior shape of dist for gamma, the precision of the data generation\n", 152 | " :return: the ELBO\n", 153 | " \"\"\"\n", 154 | "\n", 155 | " # We calculate ELBO as E_q log p(x,z) - E_q log q(z)\n", 156 | " # log p(x,z) here is log p(mu) + log p(gamma) + \\sum_i log p(x_i | mu, gamma)\n", 157 | "\n", 158 | " # E_q log p(mu)\n", 159 | " log_p = -.5 * np.log(2 * np.pi) + .5 * np.log(tau) - .5 * tau * (1 / tau_p + nu_p * nu_p)\n", 160 | "\n", 161 | " # E_q log p(gamma)\n", 162 | " log_p = log_p + alpha * np.log(beta) + \\\n", 163 | " (alpha - 1) * (special.digamma(alpha_p) - np.log(beta_p)) - beta * alpha_p / beta_p\n", 164 | "\n", 165 | " # E_q log p(x_i|mu, gamma)\n", 166 | " for xi in data:\n", 167 | " log_p += -.5 * np.log(2 * np.pi) \\\n", 168 | " + .5 * (special.digamma(alpha_p) - np.log(beta_p)) \\\n", 169 | " - .5 * alpha_p / beta_p * (xi * xi - 2 * xi * nu_p + 1 / tau_p + nu_p * nu_p)\n", 170 | "\n", 171 | " # Entropy of mu (Gaussian)\n", 172 | " entropy = .5 * np.log(2 * np.pi * np.exp(1) / tau_p)\n", 173 | " entropy += alpha_p - np.log(beta_p) + special.gammaln(alpha_p) \\\n", 174 | " + (1 - alpha_p) * special.digamma(alpha_p)\n", 175 | "\n", 176 | " return log_p + entropy\n" 177 | ] 178 | }, 179 | { 180 | "cell_type": "markdown", 181 | "metadata": { 182 | "id": "U7xhZAEth3Pv" 183 | }, 184 | "source": [ 185 | "## Do the VB\n", 186 | "\n", 187 | "The task is to implemente the variational updating equations appearing below." 188 | ] 189 | }, 190 | { 191 | "cell_type": "markdown", 192 | "source": [ 193 | "
\n", 194 | "\"Drawing\"\n", 195 | "
" 196 | ], 197 | "metadata": { 198 | "id": "CAo2PB1bUqmz" 199 | } 200 | }, 201 | { 202 | "cell_type": "code", 203 | "execution_count": 6, 204 | "metadata": { 205 | "id": "jG59YwICh3Px", 206 | "outputId": "76559bed-4cdf-417e-ba0d-81374a99035b", 207 | "colab": { 208 | "base_uri": "https://localhost:8080/" 209 | } 210 | }, 211 | "outputs": [ 212 | { 213 | "output_type": "stream", 214 | "name": "stdout", 215 | "text": [ 216 | "\n", 217 | "====================================================================================================\n", 218 | " VB iterations:\n", 219 | "====================================================================================================\n", 220 | " 1: ELBO: -786.1881435, alpha_q: 50.010, beta_q: 50001327.248, nu_q: 4.977, tau_q: 0.000\n", 221 | " 2: ELBO: -557.6915325, alpha_q: 50.010, beta_q: 495028.283, nu_q: 5.027, tau_q: 0.010\n", 222 | " 3: ELBO: -330.9361681, alpha_q: 50.010, beta_q: 5012.459, nu_q: 5.027, tau_q: 0.998\n", 223 | " 4: ELBO: -170.8624409, alpha_q: 50.010, beta_q: 113.771, nu_q: 5.027, tau_q: 43.957\n", 224 | " 5: ELBO: -164.1402693, alpha_q: 50.010, beta_q: 64.794, nu_q: 5.027, tau_q: 77.183\n", 225 | " 6: ELBO: -164.1388195, alpha_q: 50.010, beta_q: 64.304, nu_q: 5.027, tau_q: 77.771\n", 226 | " 7: ELBO: -164.1388193, alpha_q: 50.010, beta_q: 64.299, nu_q: 5.027, tau_q: 77.777\n", 227 | " 8: ELBO: -164.1388193, alpha_q: 50.010, beta_q: 64.299, nu_q: 5.027, tau_q: 77.777\n", 228 | " 9: ELBO: -164.1388193, alpha_q: 50.010, beta_q: 64.299, nu_q: 5.027, tau_q: 77.777\n", 229 | "\n", 230 | "====================================================================================================\n", 231 | " Result:\n", 232 | "====================================================================================================\n", 233 | "E[mu] = 5.027 with data average 5.027 and prior mean 0.000.\n", 234 | "E[gamma] = 0.778 with inverse of data covariance 0.778 and prior 1.000.\n" 235 | ] 236 | } 237 | ], 238 | "source": [ 239 | "# Initialization\n", 240 | "alpha_q = alpha_prior\n", 241 | "beta_q = beta_prior\n", 242 | "mu_q = 0\n", 243 | "tau_q = tau_prior\n", 244 | "previous_elbo = -np.inf\n", 245 | "\n", 246 | "# Start iterating\n", 247 | "print(\"\\n\" + 100 * \"=\" + \"\\n VB iterations:\\n\" + 100 * \"=\")\n", 248 | "for iteration in range(1000):\n", 249 | " # Update gamma distribution: q(\\gamma)=Gamma(\\alpha_q,\\beta_q)\n", 250 | " alpha_q = alpha_prior + .5 * N \n", 251 | " beta_q = beta_prior + .5 * np.sum(x * x) - mu_q * np.sum(x) + .5 * N * (1. / tau_q + mu_q * mu_q)\n", 252 | "\n", 253 | " # Update Gaussian distribution: q(\\mu)=N(\\mu_q,\\tau_q^{-1})\n", 254 | " expected_gamma = alpha_q / beta_q\n", 255 | " tau_q = tau_prior + N * expected_gamma\n", 256 | " mu_q = expected_gamma * np.sum(x) / tau_q\n", 257 | " \n", 258 | " # Calculate Lower-bound\n", 259 | " current_elbo = calculate_ELBO(data=x, tau=tau_prior, alpha=alpha_prior, beta=beta_prior,\n", 260 | " nu_p=mu_q, tau_p=tau_q, alpha_p=alpha_q, beta_p=beta_q)\n", 261 | " \n", 262 | " print(\"{:2d}: ELBO: {:12.7f}, alpha_q: {:6.3f}, beta_q: {:12.3f}, nu_q: {:6.3f}, tau_q: {:6.3f}\".format(\n", 263 | " iteration + 1, current_elbo, alpha_q, beta_q, mu_q, tau_q))\n", 264 | " \n", 265 | " if current_elbo < previous_elbo:\n", 266 | " raise ValueError(\"ELBO is decreasing. Something is wrong! Goodbye...\")\n", 267 | " \n", 268 | " if iteration > 0 and np.abs((current_elbo - previous_elbo) / previous_elbo) < 1E-20:\n", 269 | " # Very little improvement. We are done.\n", 270 | " break\n", 271 | " \n", 272 | " # If we didn't break we need to run again. Update the value for \"previous\"\n", 273 | " previous_elbo = current_elbo\n", 274 | " \n", 275 | "\n", 276 | "print(\"\\n\" + 100 * \"=\" + \"\\n Result:\\n\" + 100 * \"=\")\n", 277 | "print(\"E[mu] = {:5.3f} with data average {:5.3f} and prior mean {:5.3f}.\".format(mu_q, np.mean(x), 0.))\n", 278 | "print(\"E[gamma] = {:5.3f} with inverse of data covariance {:5.3f} and prior {:5.3f}.\".format(\n", 279 | " alpha_q / beta_q, 1. / np.cov(x), alpha_prior / beta_prior))" 280 | ] 281 | }, 282 | { 283 | "cell_type": "markdown", 284 | "metadata": { 285 | "id": "f0pKpGZhh3Py" 286 | }, 287 | "source": [ 288 | "### Plot of the Prior density" 289 | ] 290 | }, 291 | { 292 | "cell_type": "code", 293 | "source": [ 294 | "plot_density(mu_prior, tau_prior, alpha_prior, beta_prior, correct_mean, correct_precision)\n", 295 | "plt.show()" 296 | ], 297 | "metadata": { 298 | "id": "Gpi5rPBJXCwc", 299 | "outputId": "ae69839d-8293-4251-c16f-3845c0cc6e58", 300 | "colab": { 301 | "base_uri": "https://localhost:8080/", 302 | "height": 301 303 | } 304 | }, 305 | "execution_count": 7, 306 | "outputs": [ 307 | { 308 | "output_type": "display_data", 309 | "data": { 310 | "text/plain": [ 311 | "
" 312 | ], 313 | "image/png": "\n" 314 | }, 315 | "metadata": { 316 | "needs_background": "light" 317 | } 318 | } 319 | ] 320 | }, 321 | { 322 | "cell_type": "markdown", 323 | "source": [ 324 | "### Plot of the Variational Posterior density" 325 | ], 326 | "metadata": { 327 | "id": "Z8zn_7UFX2NZ" 328 | } 329 | }, 330 | { 331 | "cell_type": "code", 332 | "execution_count": 8, 333 | "metadata": { 334 | "id": "kpCATABph3Pz", 335 | "outputId": "d25d955f-65c0-45b6-d019-fec22a99b6ab", 336 | "colab": { 337 | "base_uri": "https://localhost:8080/", 338 | "height": 301 339 | } 340 | }, 341 | "outputs": [ 342 | { 343 | "output_type": "display_data", 344 | "data": { 345 | "text/plain": [ 346 | "
" 347 | ], 348 | "image/png": "\n" 349 | }, 350 | "metadata": { 351 | "needs_background": "light" 352 | } 353 | } 354 | ], 355 | "source": [ 356 | "plot_density(mu_q, tau_q, alpha_q, beta_q, correct_mean, correct_precision)\n", 357 | "plt.show()" 358 | ] 359 | } 360 | ], 361 | "metadata": { 362 | "colab": { 363 | "name": "solution_simple_model.ipynb", 364 | "provenance": [], 365 | "include_colab_link": true 366 | }, 367 | "kernelspec": { 368 | "display_name": "Python 3", 369 | "language": "python", 370 | "name": "python3" 371 | }, 372 | "language_info": { 373 | "codemirror_mode": { 374 | "name": "ipython", 375 | "version": 3 376 | }, 377 | "file_extension": ".py", 378 | "mimetype": "text/x-python", 379 | "name": "python", 380 | "nbconvert_exporter": "python", 381 | "pygments_lexer": "ipython3", 382 | "version": "3.6.6" 383 | } 384 | }, 385 | "nbformat": 4, 386 | "nbformat_minor": 0 387 | } -------------------------------------------------------------------------------- /Day2-BeforeLunch/notebooks/students_simple_model.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "id": "view-in-github", 7 | "colab_type": "text" 8 | }, 9 | "source": [ 10 | "\"Open" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": { 16 | "id": "khKi5Kpyh3Po" 17 | }, 18 | "source": [ 19 | "# Exercise\n", 20 | "\n", 21 | "
\n", 22 | "\"Drawing\"\n", 23 | "
\n" 24 | ] 25 | }, 26 | { 27 | "cell_type": "markdown", 28 | "metadata": { 29 | "id": "nIxiDUQfh3Pr" 30 | }, 31 | "source": [ 32 | "### Imports" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": null, 38 | "metadata": { 39 | "id": "spxajQOGh3Ps" 40 | }, 41 | "outputs": [], 42 | "source": [ 43 | "import numpy as np\n", 44 | "from scipy import special, stats\n", 45 | "import matplotlib.pyplot as plt\n", 46 | "%matplotlib notebook" 47 | ] 48 | }, 49 | { 50 | "cell_type": "markdown", 51 | "metadata": { 52 | "id": "L2gwpsEMh3Pt" 53 | }, 54 | "source": [ 55 | "### Startup: Define priors, and sample artificial training data" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": null, 61 | "metadata": { 62 | "id": "opTUXGnIh3Pt" 63 | }, 64 | "outputs": [], 65 | "source": [ 66 | "# Define priors\n", 67 | "alpha_prior, beta_prior = 1E-2, 1E-2 # Parameters for the prior over gamma\n", 68 | "mu_prior = 0 # A priori mean for mu\n", 69 | "tau_prior = 1E-6 # A priori precision for mu\n", 70 | "\n", 71 | "# Sample data\n", 72 | "np.random.seed(123)\n", 73 | "N = 100\n", 74 | "correct_mean = 5\n", 75 | "correct_precision = 1\n", 76 | "x = np.random.normal(loc=correct_mean, scale=1./np.sqrt(correct_precision), size=N)" 77 | ] 78 | }, 79 | { 80 | "cell_type": "markdown", 81 | "metadata": { 82 | "id": "mBHQ4YWoh3Pt" 83 | }, 84 | "source": [ 85 | "## Helper-routine: Make plot of density" 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": null, 91 | "metadata": { 92 | "id": "0WYMplwah3Pu", 93 | "cellView": "form" 94 | }, 95 | "outputs": [], 96 | "source": [ 97 | "#@title\n", 98 | "def plot_density(posterior_mean_mu, posterior_prec_mu,\n", 99 | " posterior_alpha_gamma, posterior_beta_gamma,\n", 100 | " correct_mean, correct_precision):\n", 101 | " mu_range = np.linspace(-15,15, 500).astype(np.float32)\n", 102 | " precision_range = np.linspace(1E-2, 3, 500).astype(np.float32)\n", 103 | " mu_mesh, precision_mesh = np.meshgrid(mu_range, precision_range)\n", 104 | " variational_log_pdf = \\\n", 105 | " stats.norm.logpdf(mu_mesh, loc=posterior_mean_mu, scale=1. / np.sqrt(posterior_prec_mu)) + \\\n", 106 | " stats.gamma.logpdf(x=precision_mesh,\n", 107 | " a=posterior_alpha_gamma,\n", 108 | " scale=1. / posterior_beta_gamma)\n", 109 | " plt.figure()\n", 110 | " plt.contour(mu_mesh, precision_mesh, variational_log_pdf, 25)\n", 111 | " plt.plot(correct_mean, correct_precision, \"bo\")\n", 112 | " plt.title('Density over $(\\mu, \\\\tau)$. Blue dot: True parameters')\n", 113 | " plt.xlabel(\"Mean $\\mu$\")\n", 114 | " plt.ylabel(\"Precision $\\\\tau$\")" 115 | ] 116 | }, 117 | { 118 | "cell_type": "markdown", 119 | "metadata": { 120 | "id": "pU5flZ_sh3Pu" 121 | }, 122 | "source": [ 123 | "## Helper-routine: Calculate ELBO" 124 | ] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "execution_count": null, 129 | "metadata": { 130 | "id": "gi8hKiW0h3Pv", 131 | "cellView": "form" 132 | }, 133 | "outputs": [], 134 | "source": [ 135 | "#@title\n", 136 | "def calculate_ELBO(data, tau, alpha, beta, nu_p, tau_p, alpha_p, beta_p):\n", 137 | " \"\"\"\n", 138 | " Helper routine: Calculate ELBO. Data is the sampled x-values, anything without a _p relates to the prior,\n", 139 | " everything _with_ a _p relates to the variational posterior.\n", 140 | " Note that we have no nu without a _p; we are simplifying by forcing this to be zero a priori\n", 141 | "\n", 142 | " Note: This function obviously only works when the model is as in this code challenge,\n", 143 | " and is not a general solution.\n", 144 | "\n", 145 | " :param data: The sampled data\n", 146 | " :param tau: prior precision for mu, the mean for the data generation\n", 147 | " :param alpha: prior shape of dist for gamma, the precision of the data generation\n", 148 | " :param beta: prior rate of dist for gamma, the precision of the data generation\n", 149 | " :param nu_p: VB posterior mean for the distribution of mu - the mean of the data generation\n", 150 | " :param tau_p: VB posterior precision for the distribution of mu - the mean of the data generation\n", 151 | " :param alpha_p: VB posterior shape of dist for gamma, the precision of the data generation\n", 152 | " :param beta_p: VB posterior shape of dist for gamma, the precision of the data generation\n", 153 | " :return: the ELBO\n", 154 | " \"\"\"\n", 155 | "\n", 156 | " # We calculate ELBO as E_q log p(x,z) - E_q log q(z)\n", 157 | " # log p(x,z) here is log p(mu) + log p(gamma) + \\sum_i log p(x_i | mu, gamma)\n", 158 | "\n", 159 | " # E_q log p(mu)\n", 160 | " log_p = -.5 * np.log(2 * np.pi) + .5 * np.log(tau) - .5 * tau * (1 / tau_p + nu_p * nu_p)\n", 161 | "\n", 162 | " # E_q log p(gamma)\n", 163 | " log_p = log_p + alpha * np.log(beta) + \\\n", 164 | " (alpha - 1) * (special.digamma(alpha_p) - np.log(beta_p)) - beta * alpha_p / beta_p\n", 165 | "\n", 166 | " # E_q log p(x_i|mu, gamma)\n", 167 | " for xi in data:\n", 168 | " log_p += -.5 * np.log(2 * np.pi) \\\n", 169 | " + .5 * (special.digamma(alpha_p) - np.log(beta_p)) \\\n", 170 | " - .5 * alpha_p / beta_p * (xi * xi - 2 * xi * nu_p + 1 / tau_p + nu_p * nu_p)\n", 171 | "\n", 172 | " # Entropy of mu (Gaussian)\n", 173 | " entropy = .5 * np.log(2 * np.pi * np.exp(1) / tau_p)\n", 174 | " entropy += alpha_p - np.log(beta_p) + special.gammaln(alpha_p) \\\n", 175 | " + (1 - alpha_p) * special.digamma(alpha_p)\n", 176 | "\n", 177 | " return log_p + entropy\n" 178 | ] 179 | }, 180 | { 181 | "cell_type": "markdown", 182 | "metadata": { 183 | "id": "U7xhZAEth3Pv" 184 | }, 185 | "source": [ 186 | "## Do the VB\n", 187 | "\n", 188 | "The task is to implemente the variational updating equations appearing below." 189 | ] 190 | }, 191 | { 192 | "cell_type": "markdown", 193 | "source": [ 194 | "
\n", 195 | "\"Drawing\"\n", 196 | "
" 197 | ], 198 | "metadata": { 199 | "id": "CAo2PB1bUqmz" 200 | } 201 | }, 202 | { 203 | "cell_type": "code", 204 | "execution_count": null, 205 | "metadata": { 206 | "id": "jG59YwICh3Px", 207 | "outputId": "f69f7dcf-4c99-4738-a2a0-ea6586b0269d", 208 | "colab": { 209 | "base_uri": "https://localhost:8080/" 210 | } 211 | }, 212 | "outputs": [ 213 | { 214 | "output_type": "stream", 215 | "name": "stdout", 216 | "text": [ 217 | "\n", 218 | "====================================================================================================\n", 219 | " VB iterations:\n", 220 | "====================================================================================================\n", 221 | " 1: ELBO: -786.1881435, alpha_q: 50.010, beta_q: 50001327.248, nu_q: 4.977, tau_q: 0.000\n", 222 | " 2: ELBO: -557.6915325, alpha_q: 50.010, beta_q: 495028.283, nu_q: 5.027, tau_q: 0.010\n", 223 | " 3: ELBO: -330.9361681, alpha_q: 50.010, beta_q: 5012.459, nu_q: 5.027, tau_q: 0.998\n", 224 | " 4: ELBO: -170.8624409, alpha_q: 50.010, beta_q: 113.771, nu_q: 5.027, tau_q: 43.957\n", 225 | " 5: ELBO: -164.1402693, alpha_q: 50.010, beta_q: 64.794, nu_q: 5.027, tau_q: 77.183\n", 226 | " 6: ELBO: -164.1388195, alpha_q: 50.010, beta_q: 64.304, nu_q: 5.027, tau_q: 77.771\n", 227 | " 7: ELBO: -164.1388193, alpha_q: 50.010, beta_q: 64.299, nu_q: 5.027, tau_q: 77.777\n", 228 | " 8: ELBO: -164.1388193, alpha_q: 50.010, beta_q: 64.299, nu_q: 5.027, tau_q: 77.777\n", 229 | " 9: ELBO: -164.1388193, alpha_q: 50.010, beta_q: 64.299, nu_q: 5.027, tau_q: 77.777\n", 230 | "\n", 231 | "====================================================================================================\n", 232 | " Result:\n", 233 | "====================================================================================================\n", 234 | "E[mu] = 5.027 with data average 5.027 and prior mean 0.000.\n", 235 | "E[gamma] = 0.778 with inverse of data covariance 0.778 and prior 1.000.\n" 236 | ] 237 | } 238 | ], 239 | "source": [ 240 | "# Initialization\n", 241 | "alpha_q = alpha_prior\n", 242 | "beta_q = beta_prior\n", 243 | "mu_q = 0\n", 244 | "tau_q = tau_prior\n", 245 | "previous_elbo = -np.inf\n", 246 | "\n", 247 | "# Start iterating\n", 248 | "print(\"\\n\" + 100 * \"=\" + \"\\n VB iterations:\\n\" + 100 * \"=\")\n", 249 | "for iteration in range(1000):\n", 250 | " # Update gamma distribution\n", 251 | " alpha_q = 0 ## Code the updating equation\n", 252 | " beta_q = beta_prior + .5 * np.sum(x * x) - mu_q * np.sum(x) + .5 * N * (1. / tau_q + mu_q * mu_q)\n", 253 | "\n", 254 | " # Update Gaussian distribution\n", 255 | " expected_gamma = 0 ## Code the updating equation\n", 256 | " tau_q = 0 ## Code the updating equation\n", 257 | " mu_q = 0 ## Code the updating equation\n", 258 | " \n", 259 | " # Calculate Lower-bound\n", 260 | " current_elbo = calculate_ELBO(data=x, tau=tau_prior, alpha=alpha_prior, beta=beta_prior,\n", 261 | " nu_p=mu_q, tau_p=tau_q, alpha_p=alpha_q, beta_p=beta_q)\n", 262 | " \n", 263 | " print(\"{:2d}: ELBO: {:12.7f}, alpha_q: {:6.3f}, beta_q: {:12.3f}, nu_q: {:6.3f}, tau_q: {:6.3f}\".format(\n", 264 | " iteration + 1, current_elbo, alpha_q, beta_q, mu_q, tau_q))\n", 265 | " \n", 266 | " if current_elbo < previous_elbo:\n", 267 | " raise ValueError(\"ELBO is decreasing. Something is wrong! Goodbye...\")\n", 268 | " \n", 269 | " if iteration > 0 and np.abs((current_elbo - previous_elbo) / previous_elbo) < 1E-20:\n", 270 | " # Very little improvement. We are done.\n", 271 | " break\n", 272 | " \n", 273 | " # If we didn't break we need to run again. Update the value for \"previous\"\n", 274 | " previous_elbo = current_elbo\n", 275 | " \n", 276 | "\n", 277 | "print(\"\\n\" + 100 * \"=\" + \"\\n Result:\\n\" + 100 * \"=\")\n", 278 | "print(\"E[mu] = {:5.3f} with data average {:5.3f} and prior mean {:5.3f}.\".format(mu_q, np.mean(x), 0.))\n", 279 | "print(\"E[gamma] = {:5.3f} with inverse of data covariance {:5.3f} and prior {:5.3f}.\".format(\n", 280 | " alpha_q / beta_q, 1. / np.cov(x), alpha_prior / beta_prior))" 281 | ] 282 | }, 283 | { 284 | "cell_type": "markdown", 285 | "metadata": { 286 | "id": "f0pKpGZhh3Py" 287 | }, 288 | "source": [ 289 | "### Plot of the Prior density" 290 | ] 291 | }, 292 | { 293 | "cell_type": "code", 294 | "source": [ 295 | "plot_density(mu_prior, tau_prior, alpha_prior, beta_prior, correct_mean, correct_precision)\n", 296 | "plt.show()" 297 | ], 298 | "metadata": { 299 | "id": "Gpi5rPBJXCwc", 300 | "outputId": "34dba885-07ac-4af2-c949-4dfb9ba20ce5", 301 | "colab": { 302 | "base_uri": "https://localhost:8080/", 303 | "height": 301 304 | } 305 | }, 306 | "execution_count": null, 307 | "outputs": [ 308 | { 309 | "output_type": "display_data", 310 | "data": { 311 | "text/plain": [ 312 | "
" 313 | ], 314 | "image/png": "\n" 315 | }, 316 | "metadata": { 317 | "needs_background": "light" 318 | } 319 | } 320 | ] 321 | }, 322 | { 323 | "cell_type": "markdown", 324 | "source": [ 325 | "### Plot of the Variational Posterior density" 326 | ], 327 | "metadata": { 328 | "id": "Z8zn_7UFX2NZ" 329 | } 330 | }, 331 | { 332 | "cell_type": "code", 333 | "execution_count": null, 334 | "metadata": { 335 | "id": "kpCATABph3Pz", 336 | "outputId": "3f6bac10-c7f7-4337-c2f1-35504c85bdfc", 337 | "colab": { 338 | "base_uri": "https://localhost:8080/", 339 | "height": 301 340 | } 341 | }, 342 | "outputs": [ 343 | { 344 | "output_type": "display_data", 345 | "data": { 346 | "text/plain": [ 347 | "
" 348 | ], 349 | "image/png": "\n" 350 | }, 351 | "metadata": { 352 | "needs_background": "light" 353 | } 354 | } 355 | ], 356 | "source": [ 357 | "plot_density(mu_q, tau_q, alpha_q, beta_q, correct_mean, correct_precision)\n", 358 | "plt.show()" 359 | ] 360 | } 361 | ], 362 | "metadata": { 363 | "colab": { 364 | "name": "students_simple_model.ipynb", 365 | "provenance": [], 366 | "include_colab_link": true 367 | }, 368 | "kernelspec": { 369 | "display_name": "Python 3", 370 | "language": "python", 371 | "name": "python3" 372 | }, 373 | "language_info": { 374 | "codemirror_mode": { 375 | "name": "ipython", 376 | "version": 3 377 | }, 378 | "file_extension": ".py", 379 | "mimetype": "text/x-python", 380 | "name": "python", 381 | "nbconvert_exporter": "python", 382 | "pygments_lexer": "ipython3", 383 | "version": "3.6.6" 384 | } 385 | }, 386 | "nbformat": 4, 387 | "nbformat_minor": 0 388 | } --------------------------------------------------------------------------------