├── Day1 ├── .ipynb_checkpoints │ ├── students_Bayesian_regression-checkpoint.ipynb │ └── students_PPLs_Intro-checkpoint.ipynb ├── slides-L1.pdf ├── slides │ └── Figures │ │ ├── Ice-cream_shop_-_Florida.jpg │ │ ├── PGM-Tem-Sensor.png │ │ ├── PGM-Tem-Sensor2.png │ │ ├── PGM-Tem-Sensor3.png │ │ ├── africa-III.png │ │ ├── africa-IX.png │ │ ├── icecream-model-temporal.png │ │ ├── tempmodel-II-graph.png │ │ ├── tempmodel-V-b.png │ │ ├── tempmodel-VI-b.png │ │ └── tempmodel-temporal-III.png ├── solutions_PPLs_Intro.ipynb ├── solutions_bayesian_regression.ipynb ├── students_Bayesian_regression.ipynb └── students_PPLs_Intro.ipynb ├── Day2 ├── slides-L2.pdf ├── solution_lin_reg.ipynb ├── solution_simple_model.ipynb ├── students_lin_reg.ipynb └── students_simple_model.ipynb ├── Day3 ├── .ipynb_checkpoints │ ├── Bayesian_linear_regression-checkpoint.ipynb │ ├── FA-checkpoint.ipynb │ ├── VAE-checkpoint.ipynb │ ├── solution_BBVI-checkpoint.ipynb │ └── solution_simple_model-checkpoint.ipynb ├── BBVI-gradient-variance.eps ├── BBVI_exercise.png ├── Bayesian_linear_regression.ipynb ├── FA.ipynb ├── FA_model.png ├── VAE.ipynb ├── elbo_evolution.pdf ├── elbo_evolution_with_1_samples.pdf ├── reg_model.png ├── simple_pyro_exercise.png ├── slides-L3.pdf ├── solution_BBVI.ipynb ├── solution_simple_model.ipynb ├── student_BBVI.ipynb └── student_simple_model.ipynb ├── Readme.md └── environment.yml /Day1/slides-L1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PGM-Lab/probabilisticAI_tutorials/40b644dccee2d26e469bb5fd32ae01e430e66200/Day1/slides-L1.pdf -------------------------------------------------------------------------------- /Day1/slides/Figures/Ice-cream_shop_-_Florida.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PGM-Lab/probabilisticAI_tutorials/40b644dccee2d26e469bb5fd32ae01e430e66200/Day1/slides/Figures/Ice-cream_shop_-_Florida.jpg -------------------------------------------------------------------------------- /Day1/slides/Figures/PGM-Tem-Sensor.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PGM-Lab/probabilisticAI_tutorials/40b644dccee2d26e469bb5fd32ae01e430e66200/Day1/slides/Figures/PGM-Tem-Sensor.png -------------------------------------------------------------------------------- /Day1/slides/Figures/PGM-Tem-Sensor2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PGM-Lab/probabilisticAI_tutorials/40b644dccee2d26e469bb5fd32ae01e430e66200/Day1/slides/Figures/PGM-Tem-Sensor2.png -------------------------------------------------------------------------------- /Day1/slides/Figures/PGM-Tem-Sensor3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PGM-Lab/probabilisticAI_tutorials/40b644dccee2d26e469bb5fd32ae01e430e66200/Day1/slides/Figures/PGM-Tem-Sensor3.png -------------------------------------------------------------------------------- /Day1/slides/Figures/africa-III.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PGM-Lab/probabilisticAI_tutorials/40b644dccee2d26e469bb5fd32ae01e430e66200/Day1/slides/Figures/africa-III.png -------------------------------------------------------------------------------- /Day1/slides/Figures/africa-IX.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PGM-Lab/probabilisticAI_tutorials/40b644dccee2d26e469bb5fd32ae01e430e66200/Day1/slides/Figures/africa-IX.png -------------------------------------------------------------------------------- /Day1/slides/Figures/icecream-model-temporal.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PGM-Lab/probabilisticAI_tutorials/40b644dccee2d26e469bb5fd32ae01e430e66200/Day1/slides/Figures/icecream-model-temporal.png -------------------------------------------------------------------------------- /Day1/slides/Figures/tempmodel-II-graph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PGM-Lab/probabilisticAI_tutorials/40b644dccee2d26e469bb5fd32ae01e430e66200/Day1/slides/Figures/tempmodel-II-graph.png -------------------------------------------------------------------------------- /Day1/slides/Figures/tempmodel-V-b.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PGM-Lab/probabilisticAI_tutorials/40b644dccee2d26e469bb5fd32ae01e430e66200/Day1/slides/Figures/tempmodel-V-b.png -------------------------------------------------------------------------------- /Day1/slides/Figures/tempmodel-VI-b.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PGM-Lab/probabilisticAI_tutorials/40b644dccee2d26e469bb5fd32ae01e430e66200/Day1/slides/Figures/tempmodel-VI-b.png -------------------------------------------------------------------------------- /Day1/slides/Figures/tempmodel-temporal-III.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PGM-Lab/probabilisticAI_tutorials/40b644dccee2d26e469bb5fd32ae01e430e66200/Day1/slides/Figures/tempmodel-temporal-III.png -------------------------------------------------------------------------------- /Day2/slides-L2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PGM-Lab/probabilisticAI_tutorials/40b644dccee2d26e469bb5fd32ae01e430e66200/Day2/slides-L2.pdf -------------------------------------------------------------------------------- /Day2/students_lin_reg.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import numpy as np\n", 10 | "import pandas as pd\n", 11 | "import seaborn as sns\n", 12 | "import matplotlib.pyplot as plt\n", 13 | "%matplotlib inline" 14 | ] 15 | }, 16 | { 17 | "cell_type": "markdown", 18 | "metadata": {}, 19 | "source": [ 20 | "In this code task you should implement your updating for the variational distribution of the intercept 'b', and use your implementation to learn a Bayesian linear regression model for the 'ruggedness' data, which we also considered yesterday. " 21 | ] 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "metadata": {}, 26 | "source": [ 27 | "# Dataset \n", 28 | "\n", 29 | "The following example is adapted from \\[1\\]. We would like to explore the relationship between topographic heterogeneity of a nation as measured by the Terrain Ruggedness Index (variable *rugged* in the dataset) and its GDP per capita. In particular, it was noted by the authors in \\[1\\] that terrain ruggedness or bad geography is related to poorer economic performance outside of Africa, but rugged terrains have had a reverse effect on income for African nations. Let us look at the data \\[2\\] and investigate this relationship. We will be focusing on three features from the dataset:\n", 30 | " - `rugged`: quantifies the Terrain Ruggedness Index\n", 31 | " - `cont_africa`: whether the given nation is in Africa\n", 32 | " - `rgdppc_2000`: Real GDP per capita for the year 2000\n", 33 | " \n", 34 | "We will take the logarithm for the response variable GDP as it tends to vary exponentially." 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 2, 40 | "metadata": {}, 41 | "outputs": [], 42 | "source": [ 43 | "DATA_URL = \"https://d2fefpcigoriu7.cloudfront.net/datasets/rugged_data.csv\"\n", 44 | "data = pd.read_csv(DATA_URL, encoding=\"ISO-8859-1\")\n", 45 | "df = data[[\"cont_africa\", \"rugged\", \"rgdppc_2000\"]]\n", 46 | "df = df[np.isfinite(df.rgdppc_2000)]\n", 47 | "df[\"rgdppc_2000\"] = np.log(df[\"rgdppc_2000\"])\n", 48 | "df[\"african_rugged\"] = data[\"cont_africa\"] * data[\"rugged\"]\n", 49 | "df = df[[\"cont_africa\", \"rugged\", \"african_rugged\", \"rgdppc_2000\"]]\n", 50 | "\n", 51 | "# Divide the data into poredictors and response and store the data in numpy arrays\n", 52 | "data = np.array(df)\n", 53 | "x_data = data[:, :-1]\n", 54 | "y_data = data[:, -1]" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": 3, 60 | "metadata": { 61 | "scrolled": true 62 | }, 63 | "outputs": [ 64 | { 65 | "data": { 66 | "text/html": [ 67 | "
\n", 68 | "\n", 81 | "\n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | "
cont_africaruggedafrican_ruggedrgdppc_2000
210.8580.8587.492609
403.4270.0008.216929
700.7690.0009.933263
800.7750.0009.407032
902.6880.0007.792343
1100.0060.0009.212541
1200.1430.00010.143191
1303.5130.00010.274632
1401.6720.0007.852028
1511.7801.7806.432380
\n", 164 | "
" 165 | ], 166 | "text/plain": [ 167 | " cont_africa rugged african_rugged rgdppc_2000\n", 168 | "2 1 0.858 0.858 7.492609\n", 169 | "4 0 3.427 0.000 8.216929\n", 170 | "7 0 0.769 0.000 9.933263\n", 171 | "8 0 0.775 0.000 9.407032\n", 172 | "9 0 2.688 0.000 7.792343\n", 173 | "11 0 0.006 0.000 9.212541\n", 174 | "12 0 0.143 0.000 10.143191\n", 175 | "13 0 3.513 0.000 10.274632\n", 176 | "14 0 1.672 0.000 7.852028\n", 177 | "15 1 1.780 1.780 6.432380" 178 | ] 179 | }, 180 | "metadata": {}, 181 | "output_type": "display_data" 182 | } 183 | ], 184 | "source": [ 185 | "# Display first 10 entries \n", 186 | "display(df[0:10])" 187 | ] 188 | }, 189 | { 190 | "cell_type": "markdown", 191 | "metadata": {}, 192 | "source": [ 193 | "# The model\n", 194 | "\n", 195 | "Following the approach from Day 1 we will model the data using a Bayesian linear regression model:\n", 196 | "\n", 197 | "The quantitative part of the model is specified as: \n", 198 | "- Number of data dim: $M$\n", 199 | "- Number of data inst: $N$\n", 200 | "- $Y_{i}|\\{{\\bf w}, {\\bf x}_i, b, \\theta \\} \\sim \\mathcal{N}({\\bf w}^T{\\bf x}_i +b, 1/\\theta)$ \n", 201 | "- ${\\bf W} \\sim {\\mathcal N}({\\bf 0}, \\gamma_w^{-1}{\\bf I}_{M\\times M})$\n", 202 | "- $b\\sim {\\mathcal N}(0,\\gamma_b^{-1})$" 203 | ] 204 | }, 205 | { 206 | "cell_type": "markdown", 207 | "metadata": {}, 208 | "source": [ 209 | "## Helper-routine: Calculate ELBO" 210 | ] 211 | }, 212 | { 213 | "cell_type": "code", 214 | "execution_count": 4, 215 | "metadata": {}, 216 | "outputs": [], 217 | "source": [ 218 | "def calculate_ELBO(x_data, y_data, gamma_w, gamma_b, theta, q_w_mean, q_w_prec, q_b_mean, q_b_prec):\n", 219 | " \"\"\"\n", 220 | " Helper routine: Calculate ELBO. Data is the sampled x and y values, gamma_w and gamma_b are the prior precisions \n", 221 | " over the weights and intercdpt, respectively, and theta is the prior precision associated with y. Everything \n", 222 | " prefixed with a 'q' relates to the variational posterior.\n", 223 | " \n", 224 | " Note: This function obviously only works for this particular model and is not a general solution.\n", 225 | "\n", 226 | " :param x_data: The predictors\n", 227 | " :param y_data: The response variable\n", 228 | " :param gamma_w: prior precision for the weights\n", 229 | " :param gamma_b: prior precision for the intercept\n", 230 | " :param theta: prior precision for y\n", 231 | " :param q_w_mean: VB posterior mean for the distribution of the weights w \n", 232 | " :param q_w_prec: VB posterior precision (diagonal matrix) for the distribution of the weights w \n", 233 | " :param q_b_mean: VB posterior mean for the intercept b\n", 234 | " :param q_b_prec: VB posterior precision for the intercept b\n", 235 | " :return: the ELBO\n", 236 | " \"\"\"\n", 237 | " \n", 238 | " # We calculate the ELBO as E_q log p(y,x,w,b) - E_q log q(w,b), where\n", 239 | " # log p(y,x,w) = sum_i log p(y|x,w,b) + log p(w) + log p(b)\n", 240 | " # log q(w,b) = log q(w) + log q(b)\n", 241 | "\n", 242 | " M = x_data.shape[1]\n", 243 | "\n", 244 | " # E_q log p(w)\n", 245 | " E_log_p = -0.5 * M * np.log(2 * np.pi) + 0.5 * M * gamma_w - 0.5 * gamma_w * np.sum(np.diagonal(np.linalg.inv(q_w_prec))\n", 246 | " + (q_w_mean*q_w_mean).flatten())\n", 247 | " # E_q log p(b)\n", 248 | " E_log_p += -0.5 * np.log(2 * np.pi) + 0.5 * np.log(gamma_b) - 0.5 * gamma_b * (1/q_b_prec + q_b_mean**2)\n", 249 | "\n", 250 | " # sum_i E_q log p(y|x,w,b)\n", 251 | " E_w_w = np.linalg.inv(q_w_prec) + q_w_mean @ q_w_mean.transpose()\n", 252 | " E_b_b = 1/q_b_prec + q_b_mean**2\n", 253 | " for i in range(x_data.shape[0]):\n", 254 | " E_x_ww_x = np.matmul(x_data[i, :].transpose(), np.matmul(E_w_w, x_data[i, :]))\n", 255 | " E_log_p += -0.5 * np.log(2 * np.pi) + 0.5 * np.log(theta) \\\n", 256 | " - 0.5 * theta * (y_data[i]**2 + E_x_ww_x + E_b_b\n", 257 | " + 2 * q_b_mean * np.matmul(q_w_mean.transpose(), x_data[i, :])\n", 258 | " - 2 * y_data[i] * np.matmul(q_w_mean.transpose(), x_data[i,:])\n", 259 | " - 2 * y_data[i] * q_b_mean)\n", 260 | "\n", 261 | " # Entropy of q_b\n", 262 | " ent = 0.5 * np.log(1 * np.pi * np.exp(1) / q_b_prec)\n", 263 | " ent += 0.5 * np.log(np.linalg.det(2 * np.pi * np.exp(1) * np.linalg.inv(q_w_prec)))\n", 264 | "\n", 265 | " return E_log_p - ent" 266 | ] 267 | }, 268 | { 269 | "cell_type": "markdown", 270 | "metadata": {}, 271 | "source": [ 272 | "# Full mean field\n", 273 | "First we consider a full mean filed approach, where the variational approximation factorizes as\n", 274 | "$$\n", 275 | "q({\\bf w}, b) = q(b)\\prod _{i=1}^Mq(w_i)\n", 276 | "$$\n", 277 | "\n", 278 | "The following method codes the variational updating equation for the linear regression weights, $\\textbf{W}$, derived in the slide number 11 (page 39). " 279 | ] 280 | }, 281 | { 282 | "cell_type": "code", 283 | "execution_count": null, 284 | "metadata": {}, 285 | "outputs": [], 286 | "source": [ 287 | "# The variational updating rule for weight component 'comp'. Observe that this is a direct implementaiton of the \n", 288 | "# updating rule from the slide.\n", 289 | "def update_w_comp(x_data, y_data, gamma_w, theta, q_w_mean, q_w_prec, q_b_mean, comp):\n", 290 | "\n", 291 | " # Lenght of weight vector\n", 292 | " M = x_data.shape[1]\n", 293 | " # The precision (a scalar)\n", 294 | " tau = gamma_w\n", 295 | " # The mean (a scalar)\n", 296 | " mu = 0.0\n", 297 | " for i in range(x_data.shape[0]):\n", 298 | " tau += theta * x_data[i, comp]**2\n", 299 | " mu += (y_data[i] - q_b_mean - (np.sum(x_data[i, :] @ q_w_mean) - x_data[i, comp]*q_w_mean[comp])) \\\n", 300 | " * x_data[i, comp]\n", 301 | " mu = theta * 1/tau * mu\n", 302 | "\n", 303 | " # Update the appropriate entries in the mean vector and precision matrix\n", 304 | " q_w_prec[comp, comp] = tau\n", 305 | " q_w_mean[comp] = mu.item()\n", 306 | "\n", 307 | " return q_w_prec, q_w_mean\n", 308 | "\n" 309 | ] 310 | }, 311 | { 312 | "cell_type": "markdown", 313 | "metadata": {}, 314 | "source": [ 315 | "Now you have to code the variational updating rule for the intercetp $B$. This updating rule only depends on $\\textbf{x}$, $\\textbf{y}$, $\\gamma_b$, $\\theta$ and the mean of the variational posterior distribution over the weights $\\textbf{W}$." 316 | ] 317 | }, 318 | { 319 | "cell_type": "code", 320 | "execution_count": null, 321 | "metadata": {}, 322 | "outputs": [], 323 | "source": [ 324 | "# The variational updating rule for the intercept\n", 325 | "def update_b(x_data, y_data, gamma_b, theta, q_w_mean):\n", 326 | "\n", 327 | " # The precision (a scalar)\n", 328 | " tau = ???????\n", 329 | " # The mean (a scalar)\n", 330 | " mu = ???????\n", 331 | "\n", 332 | " return tau, mu" 333 | ] 334 | }, 335 | { 336 | "cell_type": "markdown", 337 | "metadata": {}, 338 | "source": [ 339 | "Once coded, you can test if it works by running the code below and looking that the ELBO monotonically increases. " 340 | ] 341 | }, 342 | { 343 | "cell_type": "markdown", 344 | "metadata": {}, 345 | "source": [ 346 | "## Do the VB (full mean field)" 347 | ] 348 | }, 349 | { 350 | "cell_type": "code", 351 | "execution_count": null, 352 | "metadata": {}, 353 | "outputs": [], 354 | "source": [ 355 | "# Initialize the variational distributions\n", 356 | "M = x_data.shape[1]\n", 357 | "gamma_w = 1\n", 358 | "gamma_b = 1\n", 359 | "theta = 1\n", 360 | "q_w_mean = np.random.normal(0, 1, (3, 1))\n", 361 | "q_w_prec = np.diag((1, 1, 1)) # We store the precisions for the weights in sa diagonal matrix\n", 362 | "q_b_mean = np.random.normal(0, 1)\n", 363 | "q_b_prec = 1\n", 364 | "\n", 365 | "# Keep track of the ELBO values\n", 366 | "elbos = []\n", 367 | "\n", 368 | "# Calculate ELBO\n", 369 | "this_lb = calculate_ELBO(x_data, y_data, gamma_w, gamma_b, theta, q_w_mean, q_w_prec, q_b_mean, q_b_prec)\n", 370 | "elbos.append(this_lb)\n", 371 | "\n", 372 | "# Start iterating\n", 373 | "previous_lb = -np.inf\n", 374 | "print(\"\\n\" + 100 * \"=\" + \"\\n VB iterations:\\n\" + 100 * \"=\")\n", 375 | "for iteration in range(100):\n", 376 | "\n", 377 | " # Update the variational distributions; one update for each component in the weight vectoe\n", 378 | " for i in range(M):\n", 379 | " q_w_prec, q_w_mean = update_w_comp(x_data, y_data, gamma_w, theta, q_w_mean, q_w_prec, q_b_mean, i)\n", 380 | " q_b_prec, q_b_mean = update_b(x_data, y_data, gamma_b, theta, q_w_mean)\n", 381 | "\n", 382 | " # Calculate the ELBO\n", 383 | " this_lb = calculate_ELBO(x_data, y_data, gamma_w, gamma_b, theta, q_w_mean, q_w_prec, q_b_mean, q_b_prec)\n", 384 | " elbos.append(this_lb)\n", 385 | " print(f\"Iteration {iteration:2d}. ELBO: {this_lb.item():13.7f}\")\n", 386 | " if this_lb < previous_lb:\n", 387 | " raise ValueError(\"ELBO is decreasing. Something is wrong! Goodbye...\")\n", 388 | " \n", 389 | " if iteration > 0 and np.abs((this_lb - previous_lb) / previous_lb) < 1E-8:\n", 390 | " # Very little improvement. We are done.\n", 391 | " break\n", 392 | " \n", 393 | " # If we didn't break we need to run again. Update the value for \"previous\"\n", 394 | " previous_lb = this_lb\n", 395 | "print(\"\\n\" + 100 * \"=\" + \"\\n\")\n", 396 | "\n", 397 | "# Store the results\n", 398 | "w_mean_mf = q_w_mean\n", 399 | "w_prec_mf = q_w_prec\n", 400 | "b_mean_mf = q_b_mean\n", 401 | "b_prec_mf = q_b_prec\n", 402 | "\n", 403 | "plt.plot(range(len(elbos)), elbos)\n", 404 | "plt.xlabel('NUmber of iterations')\n", 405 | "plt.ylabel('ELBO')" 406 | ] 407 | }, 408 | { 409 | "cell_type": "markdown", 410 | "metadata": {}, 411 | "source": [ 412 | "## Model evaluation\n", 413 | "\n", 414 | "To get a sense of the robustness of the model we draw samples from the posterior variational distributions over the weights and intercept; each sample correspond to a regression line" 415 | ] 416 | }, 417 | { 418 | "cell_type": "code", 419 | "execution_count": null, 420 | "metadata": {}, 421 | "outputs": [], 422 | "source": [ 423 | "fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(12, 6), sharey=True)\n", 424 | "fig.suptitle(\"Uncertainty in Regression line \", fontsize=16)\n", 425 | "num_samples = 20\n", 426 | "\n", 427 | "ax[0].scatter(x_data[x_data[:,0]==0,1], y_data[x_data[:,0]==0])\n", 428 | "for _ in range(num_samples):\n", 429 | " b_sample = np.random.normal(loc=q_b_mean, scale=1/np.sqrt(q_b_prec))\n", 430 | " w_sample = np.random.multivariate_normal(mean=q_w_mean.flatten(), cov=np.linalg.inv(q_w_prec))\n", 431 | " ax[0].plot(x_data[x_data[:,0]==0,1], (x_data[x_data[:,0]==0,:] @ w_sample)+b_sample, 'r-')\n", 432 | "ax[0].set(xlabel=\"Terrain Ruggedness Index\",\n", 433 | " ylabel=\"log GDP (2000)\",\n", 434 | " title=\"Non African Nations\")\n", 435 | "\n", 436 | "ax[1].scatter(x_data[x_data[:,0]==1,1], y_data[x_data[:,0]==1])\n", 437 | "for _ in range(num_samples):\n", 438 | " b_sample = np.random.normal(loc=q_b_mean, scale=1/np.sqrt(q_b_prec))\n", 439 | " w_sample = np.random.multivariate_normal(mean=q_w_mean.flatten(), cov=np.linalg.inv(q_w_prec))\n", 440 | " ax[1].plot(x_data[x_data[:,0]==1,1], (x_data[x_data[:,0]==1,:] @ w_sample)+b_sample, 'r-')\n", 441 | "ax[1].set(xlabel=\"Terrain Ruggedness Index\",\n", 442 | " ylabel=\"log GDP (2000)\",\n", 443 | " title=\"African Nations\")\n", 444 | "\n", 445 | "plt.show()" 446 | ] 447 | } 448 | ], 449 | "metadata": { 450 | "kernelspec": { 451 | "display_name": "probabilistic.ai", 452 | "language": "python", 453 | "name": "probabilistic.ai" 454 | }, 455 | "language_info": { 456 | "codemirror_mode": { 457 | "name": "ipython", 458 | "version": 3 459 | }, 460 | "file_extension": ".py", 461 | "mimetype": "text/x-python", 462 | "name": "python", 463 | "nbconvert_exporter": "python", 464 | "pygments_lexer": "ipython3", 465 | "version": "3.7.0" 466 | } 467 | }, 468 | "nbformat": 4, 469 | "nbformat_minor": 2 470 | } 471 | -------------------------------------------------------------------------------- /Day2/students_simple_model.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Simple model description\n", 8 | "In this code-task we work with a fairly simple model, where we have observations $x_i$, $i=1,\\ldots N$, that we assume follow a Gaussian distribution. The mean and precision (inverse variance) are unknown, so we model them in Bayesian way: The mean denoted by the random variable $\\mu$ is a Gaussian with a priori mean $0$ and precision $\\tau$. The precision of the data generating process is modelled using the random variable $\\gamma$. $\\gamma$ is a priori Gamma distributed with parameters $\\alpha$ (shape) and $\\beta$ (rate).\n", 9 | "\n", 10 | "$$\n", 11 | "\\mu \\sim Normal(0,\\tau^{-1})\\\\\n", 12 | "\\gamma \\sim Gamma(\\alpha,\\beta)\\\\\n", 13 | "x_i \\sim Normal(\\mu, \\gamma)\n", 14 | "$$\n", 15 | "\n", 16 | "In total, the model is thus like this: $\\mu \\rightarrow X_i \\leftarrow \\gamma$ (hyper-parameters not shown)." 17 | ] 18 | }, 19 | { 20 | "cell_type": "markdown", 21 | "metadata": {}, 22 | "source": [ 23 | "### Imports" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": 1, 29 | "metadata": {}, 30 | "outputs": [], 31 | "source": [ 32 | "import numpy as np\n", 33 | "from scipy import special, stats\n", 34 | "import matplotlib.pyplot as plt\n", 35 | "%matplotlib notebook" 36 | ] 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "metadata": {}, 41 | "source": [ 42 | "### Startup: Define priors, and sample data" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 2, 48 | "metadata": {}, 49 | "outputs": [], 50 | "source": [ 51 | "# Define priors\n", 52 | "alpha_prior, beta_prior = 1E-2, 1E-2 # Parameters for the prior for the precision of x\n", 53 | "tau_prior = 1E-6 # A priori precision for the precision of mu\n", 54 | "\n", 55 | "# Sample data\n", 56 | "np.random.seed(123)\n", 57 | "N = 4\n", 58 | "correct_mean = 5\n", 59 | "correct_precision = 1\n", 60 | "x = np.random.normal(loc=correct_mean, scale=1./np.sqrt(correct_precision), size=N)" 61 | ] 62 | }, 63 | { 64 | "cell_type": "markdown", 65 | "metadata": {}, 66 | "source": [ 67 | "## Helper-routine: Make plot of posterior" 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": 3, 73 | "metadata": {}, 74 | "outputs": [], 75 | "source": [ 76 | "def plot_posterior(posterior_mean_mu, posterior_prec_mu,\n", 77 | " posterior_alpha_gamma, posterior_beta_gamma,\n", 78 | " correct_mean, correct_precision):\n", 79 | " mu_range = np.linspace(posterior_mean_mu - 5./np.sqrt(posterior_prec_mu),\n", 80 | " posterior_mean_mu + 5. / np.sqrt(posterior_prec_mu), 500).astype(np.float32)\n", 81 | " precision_range = np.linspace(1E-2, 3, 500).astype(np.float32)\n", 82 | " mu_mesh, precision_mesh = np.meshgrid(mu_range, precision_range)\n", 83 | " variational_log_pdf = \\\n", 84 | " stats.norm.logpdf(mu_mesh, loc=posterior_mean_mu, scale=1. / np.sqrt(posterior_prec_mu)) + \\\n", 85 | " stats.gamma.logpdf(x=precision_mesh,\n", 86 | " a=posterior_alpha_gamma,\n", 87 | " scale=1. / posterior_beta_gamma)\n", 88 | " plt.figure()\n", 89 | " plt.contour(mu_mesh, precision_mesh, variational_log_pdf, 25)\n", 90 | " plt.plot(correct_mean, correct_precision, \"bo\")\n", 91 | " plt.title('Posterior over $(\\mu, \\\\tau)$. Blue dot: True parameters')\n", 92 | " plt.xlabel(\"Mean $\\mu$\")\n", 93 | " plt.ylabel(\"Precision $\\\\tau$\")" 94 | ] 95 | }, 96 | { 97 | "cell_type": "markdown", 98 | "metadata": {}, 99 | "source": [ 100 | "## Helper-routine: Calculate ELBO" 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": 4, 106 | "metadata": {}, 107 | "outputs": [], 108 | "source": [ 109 | "def calculate_lower_bound(data, tau, alpha, beta, nu_star, tau_star, alpha_star, beta_star):\n", 110 | " \"\"\"\n", 111 | " Helper routine: Calculate ELBO. Data is the sampled x-values, anything without a star relates to the prior,\n", 112 | " everything _with_ a star relates to the variational posterior.\n", 113 | " Note that we have no nu without a star; I am simplifying by forcing this to be zero a priori\n", 114 | "\n", 115 | " Note: This function obviously only works when the model is as in this code challenge,\n", 116 | " and is not a general solution.\n", 117 | "\n", 118 | " :param data: The sampled data\n", 119 | " :param tau: prior precision for mu, the mean for the data generation\n", 120 | " :param alpha: prior shape of dist for gamma, the precision of the data generation\n", 121 | " :param beta: prior rate of dist for gamma, the precision of the data generation\n", 122 | " :param nu_star: VB posterior mean for the distribution of mu - the mean of the data generation\n", 123 | " :param tau_star: VB posterior precision for the distribution of mu - the mean of the data generation\n", 124 | " :param alpha_star: VB posterior shape of dist for gamma, the precision of the data generation\n", 125 | " :param beta_star: VB posterior shape of dist for gamma, the precision of the data generation\n", 126 | " :return: the ELBO\n", 127 | " \"\"\"\n", 128 | "\n", 129 | " # We calculate ELBO as E_q log p(x,z) - E_q log q(z)\n", 130 | " # log p(x,z) here is log p(mu) + log p(gamma) + \\sum_i log p(x_i | mu, gamma)\n", 131 | "\n", 132 | " # E_q log p(mu)\n", 133 | " log_p = -.5 * np.log(2 * np.pi) + .5 * np.log(tau) - .5 * tau * (1 / tau_star + nu_star * nu_star)\n", 134 | "\n", 135 | " # E_q log p(gamma)\n", 136 | " log_p = log_p + alpha * np.log(beta) + \\\n", 137 | " (alpha - 1) * (special.digamma(alpha_star) - np.log(beta_star)) - beta * alpha_star / beta_star\n", 138 | "\n", 139 | " # E_q log p(x_i|mu, gamma)\n", 140 | " for xi in data:\n", 141 | " log_p += -.5 * np.log(2 * np.pi) \\\n", 142 | " + .5 * (special.digamma(alpha_star) - np.log(beta_star)) \\\n", 143 | " - .5 * alpha_star / beta_star * (xi * xi - 2 * xi * nu_star + 1 / tau_star + nu_star * nu_star)\n", 144 | "\n", 145 | " # Entropy of mu (Gaussian)\n", 146 | " entropy = .5 * np.log(2 * np.pi * np.exp(1) / tau_star)\n", 147 | " entropy += alpha_star - np.log(beta_star) + special.gammaln(alpha_star) \\\n", 148 | " + (1 - alpha_star) * special.digamma(alpha_star)\n", 149 | "\n", 150 | " return log_p + entropy\n" 151 | ] 152 | }, 153 | { 154 | "cell_type": "markdown", 155 | "metadata": {}, 156 | "source": [ 157 | "## Do the VB" 158 | ] 159 | }, 160 | { 161 | "cell_type": "markdown", 162 | "metadata": {}, 163 | "source": [ 164 | "The task is to compute a variational approxmation of the posterior over the unknown paramters $\\mu$ and $\\gamma$, \n", 165 | "\n", 166 | "$$\n", 167 | "p(\\mu,\\gamma|x_1,\\ldots,x_n) \\approx q(\\mu)q(\\gamma)\n", 168 | "$$\n", 169 | "\n", 170 | "\n", 171 | "We are looking for VB posteriors over $\\mu$ and $\\gamma$. It turns out after some pencil pushing that the posteriors are in the same distributional families as the priors were, so $\\mu$ remains Gaussian, $\\gamma$ remains Gamma distributed. What we need is the updated parameters for these two distributions. We have two parameters to update $q(\\mu)$, which are denoted as `q_mu` and `q_tau`, and another two parameters to update $q(\\gamma)$, which are denoted as `q_alpha` and `q_beta`.\n", 172 | "The parameters of the (prior) distribution $p(\\cdot)$ are called something ending with `_prior`, like `alpha_prior` for $\\alpha$." 173 | ] 174 | }, 175 | { 176 | "cell_type": "code", 177 | "execution_count": null, 178 | "metadata": {}, 179 | "outputs": [], 180 | "source": [ 181 | "# Initialization\n", 182 | "q_alpha = alpha_prior\n", 183 | "q_beta = beta_prior\n", 184 | "q_mu = 0\n", 185 | "q_tau = tau_prior\n", 186 | "previous_lb = -np.inf\n", 187 | "\n", 188 | "# Start iterating\n", 189 | "print(\"\\n\" + 100 * \"=\" + \"\\n VB iterations:\\n\" + 100 * \"=\")\n", 190 | "for iteration in range(1000):\n", 191 | " # Update gamma distribution\n", 192 | " q_alpha = ?????\n", 193 | " q_beta = ?????\n", 194 | " expected_gamma = ?????\n", 195 | " \n", 196 | " # Update Gaussian distribution\n", 197 | " q_tau = ?????\n", 198 | " q_mu = ?????\n", 199 | " \n", 200 | " # Calculate Lower-bound\n", 201 | " this_lb = calculate_lower_bound(data=x, tau=tau_prior, alpha=alpha_prior, beta=beta_prior,\n", 202 | " nu_star=q_mu, tau_star=q_tau, alpha_star=q_alpha, beta_star=q_beta)\n", 203 | "\n", 204 | " print(\"{:2d}. alpha: {:6.3f}, beta: {:12.3f}, nu: {:6.3f}, tau: {:6.3f}, ELBO: {:12.7f}\".format(\n", 205 | " iteration + 1, q_alpha, q_beta, q_mu, q_tau, this_lb))\n", 206 | " \n", 207 | " if this_lb < previous_lb:\n", 208 | " raise ValueError(\"ELBO is decreasing. Something is wrong! Goodbye...\")\n", 209 | " \n", 210 | " if iteration > 0 and np.abs((this_lb - previous_lb) / previous_lb) < 1E-8:\n", 211 | " # Very little improvement. We are done.\n", 212 | " break\n", 213 | " \n", 214 | " # If we didn't break we need to run again. Update the value for \"previous\"\n", 215 | " previous_lb = this_lb\n", 216 | " \n", 217 | "\n", 218 | "print(\"\\n\" + 100 * \"=\" + \"\\n Result:\\n\" + 100 * \"=\")\n", 219 | "print(\"E[mu] = {:5.3f} with data average {:5.3f} and prior mean {:5.3f}.\".format(q_mu, np.mean(x), 0.))\n", 220 | "print(\"E[gamma] = {:5.3f} with inverse of data covariance {:5.3f} and prior {:5.3f}.\".format(\n", 221 | " q_alpha / q_beta, 1. / np.cov(x), alpha_prior / beta_prior))" 222 | ] 223 | }, 224 | { 225 | "cell_type": "markdown", 226 | "metadata": {}, 227 | "source": [ 228 | "### Make plot of Variational Bayes posterior" 229 | ] 230 | }, 231 | { 232 | "cell_type": "code", 233 | "execution_count": null, 234 | "metadata": {}, 235 | "outputs": [], 236 | "source": [ 237 | "plot_posterior(q_mu, q_tau, q_alpha, q_beta, correct_mean, correct_precision)\n", 238 | "plt.show()" 239 | ] 240 | } 241 | ], 242 | "metadata": { 243 | "kernelspec": { 244 | "display_name": "probabilistic.ai", 245 | "language": "python", 246 | "name": "probabilistic.ai" 247 | }, 248 | "language_info": { 249 | "codemirror_mode": { 250 | "name": "ipython", 251 | "version": 3 252 | }, 253 | "file_extension": ".py", 254 | "mimetype": "text/x-python", 255 | "name": "python", 256 | "nbconvert_exporter": "python", 257 | "pygments_lexer": "ipython3", 258 | "version": "3.7.0" 259 | } 260 | }, 261 | "nbformat": 4, 262 | "nbformat_minor": 2 263 | } 264 | -------------------------------------------------------------------------------- /Day3/.ipynb_checkpoints/VAE-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Adapt the variational auto encoder\n", 8 | "\n", 9 | "Below you will find an implementation of a VAE for the MNIST data. To allow for faster learning time, we only consider the digits 0,1, and 2 and only the first 100 samples of those digits.\n", 10 | "\n", 11 | "In this exercise, you should familiarize yourself with the implementation below and experiment with the structure of the VAE specification in order to emphasize digit separation in the latent space and the generation of images when sampling from the latent space.\n", 12 | "\n", 13 | "Part of the implementation is based on code from the official Pyro examples." 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": 1, 19 | "metadata": {}, 20 | "outputs": [], 21 | "source": [ 22 | "import numpy as np\n", 23 | "import torch\n", 24 | "import torchvision.datasets as datasets\n", 25 | "import torch.nn as nn\n", 26 | "import torchvision.transforms as transforms\n", 27 | "import pyro\n", 28 | "import pyro.distributions as dist\n", 29 | "from pyro.infer import SVI, Trace_ELBO\n", 30 | "from pyro.optim import Adam\n", 31 | "import datetime\n", 32 | "import os\n", 33 | "import matplotlib.gridspec as gridspec\n", 34 | "from matplotlib import pyplot\n", 35 | "import matplotlib.pyplot as plt\n", 36 | "from scipy.stats import norm" 37 | ] 38 | }, 39 | { 40 | "cell_type": "markdown", 41 | "metadata": {}, 42 | "source": [ 43 | "### Get the MNIST data\n", 44 | "\n", 45 | "We will wrap the MNIST data set in a Pyro data loader. " 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 2, 51 | "metadata": { 52 | "scrolled": true 53 | }, 54 | "outputs": [], 55 | "source": [ 56 | "def setup_data_loader(batch_size=64):\n", 57 | " #data = datasets.MNIST('./data', train=True, download=True,\n", 58 | " # transform=transforms.Compose([\n", 59 | " # transforms.ToTensor(),\n", 60 | " # transforms.Normalize((0.1307,), (0.3081,))\n", 61 | " # ]))\n", 62 | " data = datasets.MNIST('./data', train=True, download=True, transform=transforms.Compose([transforms.ToTensor()]))\n", 63 | " \n", 64 | " # We only select the digits 0, 1, and 2 and only the first 100 of each of these\n", 65 | " # digits\n", 66 | " selector = np.array([], dtype=int)\n", 67 | " for i in [5, 6, 7]:\n", 68 | " selector = np.concatenate((selector, np.where(data.targets == i)[0][:100]))\n", 69 | " data.data = data.data[selector, :, :]\n", 70 | " data.targets = data.targets[selector]\n", 71 | " \n", 72 | " # Binarize the data\n", 73 | " data.data[data.data<128] = 0\n", 74 | " data.data[data.data>=128] = 1\n", 75 | "\n", 76 | " data.data = data.data.type(torch.float)\n", 77 | " \n", 78 | " # Put the data within a data loader \n", 79 | " train_loader = torch.utils.data.DataLoader(data, batch_size=batch_size, shuffle=True)\n", 80 | " return train_loader\n", 81 | "\n", 82 | "\n", 83 | "train_loader = setup_data_loader(batch_size=300)" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": 3, 89 | "metadata": {}, 90 | "outputs": [ 91 | { 92 | "data": { 93 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAP8AAAD8CAYAAAC4nHJkAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAA5NJREFUeJzt3VFuwjAUAMG64v5Xdi9QoGowCd6ZbwQBafU+XhzGnPML6Pk++wKAc4gfosQPUeKHKPFDlPghSvwQJX6IEj9E3d75YWMMtxPCYnPO8ZfXmfwQJX6IEj9EiR+ixA9R4oco8UOU+CFK/BAlfogSP0SJH6LED1HihyjxQ5T4IUr8ECV+iBI/RIkfosQPUeKHKPFDlPghSvwQJX6IEj9EiR+ixA9R4oco8UOU+CFK/BAlfogSP0SJH6LED1Hih6jb2RfAtc05l733GGPZe/OcyQ9R4oco8UOU+CFK/BAlfogSP0TZ88et3OMf/Wz3Aaxl8kOU+CFK/BAlfogSP0SJH6Ks+jZ3dJW3ct125poRkx+yxA9R4oco8UOU+CFK/BAlfoiy59/AkX35lY/NOvK7lskPUeKHKPFDlPghSvwQJX6IEj9E2fNvzi6ce0x+iBI/RIkfosQPUeKHKPFDlPghyp7/A3i+PSuY/BAlfogSP0SJH6LED1HihyjxQ5T4IUr8ECV+iBI/RIkfosQPUeKHKPFDlPghSvwQJX6IEj9EiR+ixA9R4ocoj+7+ANW/2a5+73cx+SFK/BAlfogSP0SJH6LED1Hihyh7fpby9+LXZfJDlPghSvwQJX6IEj9EiR+ixA9R9vw8ZE+/L5MfosQPUeKHKPFDlPghSvwQJX6Isue/ALv03z37XTzX/xiTH6LED1HihyjxQ5T4IUr8EGXV9wJWdf9jVXcukx+ixA9R4oco8UOU+CFK/BAlfoiy53+BnffVR+9h2Pm3+XQmP0SJH6LED1HihyjxQ5T4IUr8EGXPH+dZBF0mP0SJH6LED1HihyjxQ5T4IUr8EGXPzyHO638ukx+ixA9R4oco8UOU+CFK/BBl1bc5R3a5x+SHKPFDlPghSvwQJX6IEj9EiR+i7Pl5yJHdfZn8ECV+iBI/RIkfosQPUeKHKPFDlD1/nD1+l8kPUeKHKPFDlPghSvwQJX6IEj9EiR+ixA9R4oco8UOU+CFK/BAlfogSP0Q5z7855/W5x+SHKPFDlPghSvwQJX6IEj9EiR+ixA9R4oco8UOU+CFK/BAlfogSP0SJH6LED1HihyjxQ5T4IUr8ECV+iBI/RI0559nXAJzA5Ico8UOU+CFK/BAlfogSP0SJH6LED1HihyjxQ5T4IUr8ECV+iBI/RIkfosQPUeKHKPFDlPghSvwQJX6IEj9EiR+ifgCJ5jwXeHW/1QAAAABJRU5ErkJggg==\n", 94 | "text/plain": [ 95 | "
" 96 | ] 97 | }, 98 | "metadata": {}, 99 | "output_type": "display_data" 100 | } 101 | ], 102 | "source": [ 103 | "def display_image(x):\n", 104 | " plt.axis('off')\n", 105 | " pyplot.imshow(x.reshape((28, 28)), cmap=\"gray\")\n", 106 | " \n", 107 | "toy_image = train_loader.dataset.data[215,:,:]\n", 108 | "display_image(toy_image)" 109 | ] 110 | }, 111 | { 112 | "cell_type": "markdown", 113 | "metadata": {}, 114 | "source": [ 115 | "### Setup the decoder network" 116 | ] 117 | }, 118 | { 119 | "cell_type": "code", 120 | "execution_count": 4, 121 | "metadata": {}, 122 | "outputs": [], 123 | "source": [ 124 | "class Decoder(nn.Module):\n", 125 | " def __init__(self, z_dim, hidden_dim):\n", 126 | " super(Decoder, self).__init__()\n", 127 | " # setup the two linear transformations used\n", 128 | " self.fc1 = nn.Linear(z_dim, hidden_dim)\n", 129 | " self.fc21 = nn.Linear(hidden_dim, 784)\n", 130 | " # setup the non-linearities\n", 131 | " self.softplus = nn.Softplus()\n", 132 | " self.sigmoid = nn.Sigmoid()\n", 133 | "\n", 134 | " def forward(self, z):\n", 135 | " # define the forward computation on the latent z\n", 136 | " # first compute the hidden units\n", 137 | " hidden = self.softplus(self.fc1(z))\n", 138 | " # return the parameter for the output Bernoulli\n", 139 | " # each is of size batch_size x 784\n", 140 | " #loc_img = self.sigmoid(self.fc21(hidden))\n", 141 | " loc_img = self.fc21(hidden)\n", 142 | " return loc_img" 143 | ] 144 | }, 145 | { 146 | "cell_type": "markdown", 147 | "metadata": {}, 148 | "source": [ 149 | "### Setup the encoder network" 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "execution_count": 5, 155 | "metadata": {}, 156 | "outputs": [], 157 | "source": [ 158 | "class Encoder(nn.Module):\n", 159 | " def __init__(self, z_dim, hidden_dim):\n", 160 | " super(Encoder, self).__init__()\n", 161 | " # setup the three linear transformations used\n", 162 | " self.fc1 = nn.Linear(784, hidden_dim)\n", 163 | " self.fc21 = nn.Linear(hidden_dim, z_dim)\n", 164 | " self.fc22 = nn.Linear(hidden_dim, z_dim)\n", 165 | " # setup the non-linearities\n", 166 | " self.softplus = nn.Softplus()\n", 167 | "\n", 168 | " def forward(self, x):\n", 169 | " # define the forward computation on the image x\n", 170 | " # first shape the mini-batch to have pixels in the rightmost dimension\n", 171 | " x = x.reshape(-1, 784)\n", 172 | " # then compute the hidden units\n", 173 | " hidden = self.softplus(self.fc1(x))\n", 174 | " # then return a mean vector and a (positive) square root covariance\n", 175 | " # each of size batch_size x z_dim\n", 176 | " z_loc = self.fc21(hidden)\n", 177 | " z_scale = torch.exp(self.fc22(hidden))\n", 178 | " return z_loc, z_scale" 179 | ] 180 | }, 181 | { 182 | "cell_type": "markdown", 183 | "metadata": {}, 184 | "source": [ 185 | "### Packaging it all together" 186 | ] 187 | }, 188 | { 189 | "cell_type": "code", 190 | "execution_count": 6, 191 | "metadata": {}, 192 | "outputs": [], 193 | "source": [ 194 | "class VAE(nn.Module):\n", 195 | " # by default our latent space is 50-dimensional\n", 196 | " # and we use 400 hidden units\n", 197 | " def __init__(self, z_dim=2, hidden_dim=400, use_cuda=False):\n", 198 | " super(VAE, self).__init__()\n", 199 | " # create the encoder and decoder networks\n", 200 | " self.encoder = Encoder(z_dim, hidden_dim)\n", 201 | " self.decoder = Decoder(z_dim, hidden_dim)\n", 202 | " self.z_dim = z_dim\n", 203 | "\n", 204 | " # define the model p(x|z)p(z)\n", 205 | " def model(self, x):\n", 206 | " # register PyTorch module `decoder` with Pyro\n", 207 | " pyro.module(\"decoder\", self.decoder)\n", 208 | " with pyro.plate(\"data\", x.shape[0]):\n", 209 | " # setup hyperparameters for prior p(z)\n", 210 | " z_loc = x.new_zeros(torch.Size((x.shape[0], self.z_dim)))\n", 211 | " z_scale = x.new_ones(torch.Size((x.shape[0], self.z_dim)))\n", 212 | " # sample from prior (value will be sampled by guide when computing the ELBO)\n", 213 | " z = pyro.sample(\"latent\", dist.Normal(z_loc, z_scale).to_event(1))\n", 214 | " # decode the latent code z\n", 215 | " loc_img = self.decoder.forward(z)\n", 216 | " # score against actual images\n", 217 | " pyro.sample(\"obs\", dist.Bernoulli(logits=loc_img).to_event(1), obs=x.reshape(-1, 784))\n", 218 | " #pyro.sample(\"obs\", dist.Bernoulli(loc_img).to_event(1), obs=x.reshape(-1, 784))\n", 219 | "\n", 220 | " # define the guide (i.e. variational distribution) q(z|x)\n", 221 | " def guide(self, x):\n", 222 | " # register PyTorch module `encoder` with Pyro\n", 223 | " pyro.module(\"encoder\", self.encoder)\n", 224 | " with pyro.plate(\"data\", x.shape[0]):\n", 225 | " # use the encoder to get the parameters used to define q(z|x)\n", 226 | " z_loc, z_scale = self.encoder.forward(x)\n", 227 | " # sample the latent code z\n", 228 | " pyro.sample(\"latent\", dist.Normal(z_loc, z_scale).to_event(1))\n", 229 | "\n", 230 | " # define a helper function for reconstructing images\n", 231 | " def reconstruct_img(self, x):\n", 232 | " # encode image x\n", 233 | " z_loc, z_scale = self.encoder(x)\n", 234 | " # sample in latent space\n", 235 | " z = dist.Normal(z_loc, z_scale).sample()\n", 236 | " # decode the image (note we don't sample in image space)\n", 237 | " loc_img = self.decoder(z)\n", 238 | " return loc_img\n", 239 | "\n", 240 | " def sample_images(self, dim=10):\n", 241 | "\n", 242 | " plt.figure(figsize=(dim, dim))\n", 243 | " gs1 = gridspec.GridSpec(dim, dim)\n", 244 | " gs1.update(wspace=0.025, hspace=0.05) # set the spacing between axes.\n", 245 | "\n", 246 | " z_1 = norm.ppf(np.linspace(0.00001, 0.99999, dim), loc=0, scale=1)\n", 247 | " z_2 = norm.ppf(np.linspace(0.00001, 0.99999, dim), loc=0, scale=1)\n", 248 | " for j in range(dim):\n", 249 | " for i in range(dim):\n", 250 | " x_val = self.decoder.forward(torch.tensor([z_1[i], z_2[j]], dtype=torch.float32))\n", 251 | " plt.subplot(gs1[i*dim+j])\n", 252 | " plt.axis('off')\n", 253 | " plt.imshow(x_val.detach().numpy().reshape((28, 28)), cmap=\"gray_r\")\n", 254 | " plt.show()" 255 | ] 256 | }, 257 | { 258 | "cell_type": "markdown", 259 | "metadata": {}, 260 | "source": [ 261 | "### Setup training (single epoch)" 262 | ] 263 | }, 264 | { 265 | "cell_type": "code", 266 | "execution_count": 7, 267 | "metadata": {}, 268 | "outputs": [], 269 | "source": [ 270 | "def train(svi, train_loader):\n", 271 | " # initialize loss accumulator\n", 272 | " epoch_loss = 0.\n", 273 | " # do a training epoch over each mini-batch x returned\n", 274 | " # by the data loader\n", 275 | " for x, _ in train_loader:\n", 276 | " # do ELBO gradient and accumulate loss\n", 277 | " epoch_loss += svi.step(x)\n", 278 | "\n", 279 | " # return epoch loss\n", 280 | " normalizer_train = len(train_loader.dataset)\n", 281 | " total_epoch_loss_train = epoch_loss / normalizer_train\n", 282 | " return total_epoch_loss_train" 283 | ] 284 | }, 285 | { 286 | "cell_type": "markdown", 287 | "metadata": {}, 288 | "source": [ 289 | "### Perform learning" 290 | ] 291 | }, 292 | { 293 | "cell_type": "code", 294 | "execution_count": 8, 295 | "metadata": {}, 296 | "outputs": [ 297 | { 298 | "name": "stdout", 299 | "output_type": "stream", 300 | "text": [ 301 | "[epoch 000] average training loss: 561.9964\n", 302 | "[epoch 100] average training loss: 20.7505\n", 303 | "[epoch 200] average training loss: 19.0819\n", 304 | "[epoch 300] average training loss: 18.7468\n", 305 | "[epoch 400] average training loss: 18.1215\n", 306 | "[epoch 500] average training loss: 17.7618\n", 307 | "[epoch 600] average training loss: 17.4973\n", 308 | "[epoch 700] average training loss: 17.4621\n", 309 | "[epoch 800] average training loss: 17.3345\n", 310 | "[epoch 900] average training loss: 17.1521\n" 311 | ] 312 | }, 313 | { 314 | "data": { 315 | "image/png": "\n", 316 | "text/plain": [ 317 | "
" 318 | ] 319 | }, 320 | "metadata": {}, 321 | "output_type": "display_data" 322 | } 323 | ], 324 | "source": [ 325 | "vae = VAE(z_dim=2, hidden_dim=400)\n", 326 | "\n", 327 | "# Run options\n", 328 | "LEARNING_RATE = 1.0e-2\n", 329 | "\n", 330 | "# Run only for a single iteration for testing\n", 331 | "NUM_EPOCHS = 1000\n", 332 | "\n", 333 | "#train_loader = setup_data_loader(batch_size=300)\n", 334 | "\n", 335 | "# clear param store\n", 336 | "pyro.clear_param_store()\n", 337 | "\n", 338 | "# setup the optimizer\n", 339 | "adam_args = {\"lr\": LEARNING_RATE}\n", 340 | "optimizer = Adam(adam_args)\n", 341 | "\n", 342 | "# setup the inference algorithm\n", 343 | "svi = SVI(vae.model, vae.guide, optimizer, loss=Trace_ELBO())\n", 344 | "train_elbo = []\n", 345 | "# training loop\n", 346 | "for epoch in range(NUM_EPOCHS):\n", 347 | " total_epoch_loss_train = train(svi, train_loader)\n", 348 | " train_elbo.append(-total_epoch_loss_train)\n", 349 | " if (epoch % 100) == 0:\n", 350 | " print(\"[epoch %03d] average training loss: %.4f\" % (epoch, total_epoch_loss_train))\n", 351 | "\n", 352 | "plt.plot(range(len(train_elbo)), train_elbo)\n", 353 | "plt.xlabel(\"Number of iterations\")\n", 354 | "plt.ylabel(\"ELBO\")\n", 355 | "plt.show()" 356 | ] 357 | }, 358 | { 359 | "cell_type": "markdown", 360 | "metadata": {}, 361 | "source": [ 362 | "### Plot the data in the embedding space" 363 | ] 364 | }, 365 | { 366 | "cell_type": "code", 367 | "execution_count": 9, 368 | "metadata": { 369 | "scrolled": true 370 | }, 371 | "outputs": [ 372 | { 373 | "data": { 374 | "image/png": "\n", 375 | "text/plain": [ 376 | "
" 377 | ] 378 | }, 379 | "metadata": {}, 380 | "output_type": "display_data" 381 | } 382 | ], 383 | "source": [ 384 | "for x, x_l in train_loader:\n", 385 | " z_loc, z_scale = vae.encoder(x)\n", 386 | "\n", 387 | "legends = [\"Digit 5\", \"Digit 6\", \"Digit 7\"]\n", 388 | "z_loc = z_loc.detach().numpy()\n", 389 | "for idx, i in enumerate([5,6,7]):\n", 390 | " plt.scatter(z_loc[x_l.numpy()==i,0], z_loc[x_l.numpy()==i,1], label=legends[idx])\n", 391 | "plt.legend()\n", 392 | "plt.show()" 393 | ] 394 | } 395 | ], 396 | "metadata": { 397 | "kernelspec": { 398 | "display_name": "probabilistic.ai", 399 | "language": "python", 400 | "name": "probabilistic.ai" 401 | }, 402 | "language_info": { 403 | "codemirror_mode": { 404 | "name": "ipython", 405 | "version": 3 406 | }, 407 | "file_extension": ".py", 408 | "mimetype": "text/x-python", 409 | "name": "python", 410 | "nbconvert_exporter": "python", 411 | "pygments_lexer": "ipython3", 412 | "version": "3.7.0" 413 | } 414 | }, 415 | "nbformat": 4, 416 | "nbformat_minor": 2 417 | } 418 | -------------------------------------------------------------------------------- /Day3/.ipynb_checkpoints/solution_simple_model-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import numpy as np\n", 17 | "import torch\n", 18 | "from torch.distributions import constraints\n", 19 | "import matplotlib.pyplot as plt\n", 20 | "\n", 21 | "import pyro\n", 22 | "from pyro.distributions import Normal, Gamma, MultivariateNormal\n", 23 | "from pyro.infer import SVI, Trace_ELBO\n", 24 | "from pyro.optim import Adam\n", 25 | "import pyro.optim as optim" 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": {}, 31 | "source": [ 32 | "## Generate some data" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": 2, 38 | "metadata": {}, 39 | "outputs": [], 40 | "source": [ 41 | "# Sample data\n", 42 | "np.random.seed(123)\n", 43 | "N = 100\n", 44 | "correct_mean = 5\n", 45 | "correct_precision = 1\n", 46 | "data = torch.tensor(np.random.normal(loc=correct_mean, scale=np.sqrt(1./correct_precision), size=N), dtype=torch.float)\n" 47 | ] 48 | }, 49 | { 50 | "cell_type": "markdown", 51 | "metadata": {}, 52 | "source": [ 53 | "## Our model specification" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": 3, 59 | "metadata": {}, 60 | "outputs": [], 61 | "source": [ 62 | "def model(data):\n", 63 | " gamma = pyro.sample(\"gamma\", Gamma(torch.tensor(1.), torch.tensor(1.)))\n", 64 | " mu = pyro.sample(\"mu\", Normal(torch.zeros(1), torch.tensor(10000.0)))\n", 65 | " with pyro.plate(\"data\", len(data)):\n", 66 | " pyro.sample(\"x\", Normal(loc=mu, scale=torch.sqrt(1. / gamma)), obs=data)" 67 | ] 68 | }, 69 | { 70 | "cell_type": "markdown", 71 | "metadata": {}, 72 | "source": [ 73 | "## Our guide specification" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": 4, 79 | "metadata": {}, 80 | "outputs": [], 81 | "source": [ 82 | "def guide(data=None):\n", 83 | " rate = pyro.param(\"rate\", torch.tensor(1.))\n", 84 | " conc = pyro.param(\"conc\", torch.tensor(1.))\n", 85 | " pyro.sample(\"gamma\", Gamma(rate, conc))\n", 86 | "\n", 87 | " mu_mean = pyro.param(\"mu_mean\", torch.tensor(0.))\n", 88 | " mu_scale = pyro.param(\"mu_scale\", torch.tensor(1.))\n", 89 | " pyro.sample(\"mu\", Normal(mu_mean, mu_scale))" 90 | ] 91 | }, 92 | { 93 | "cell_type": "markdown", 94 | "metadata": {}, 95 | "source": [ 96 | "## Do learning" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": 5, 102 | "metadata": {}, 103 | "outputs": [ 104 | { 105 | "name": "stdout", 106 | "output_type": "stream", 107 | "text": [ 108 | "[epoch 000] average training loss: 1443.1143\n", 109 | "[epoch 500] average training loss: 336.6232\n", 110 | "[epoch 1000] average training loss: 243.5829\n", 111 | "[epoch 1500] average training loss: 178.8034\n", 112 | "[epoch 2000] average training loss: 179.9048\n", 113 | "[epoch 2500] average training loss: 187.7421\n" 114 | ] 115 | } 116 | ], 117 | "source": [ 118 | "# setup the optimizer\n", 119 | "adam_args = {\"lr\": 0.01}\n", 120 | "optimizer = Adam(adam_args)\n", 121 | "\n", 122 | "pyro.clear_param_store()\n", 123 | "svi = SVI(model, guide, optimizer, loss=Trace_ELBO(), num_samples=10)\n", 124 | "train_elbo = []\n", 125 | "# training loop\n", 126 | "for epoch in range(3000):\n", 127 | " loss = svi.step(data)\n", 128 | " train_elbo.append(-loss)\n", 129 | " if (epoch % 500) == 0:\n", 130 | " print(\"[epoch %03d] average training loss: %.4f\" % (epoch, loss))" 131 | ] 132 | }, 133 | { 134 | "cell_type": "code", 135 | "execution_count": 6, 136 | "metadata": {}, 137 | "outputs": [ 138 | { 139 | "name": "stdout", 140 | "output_type": "stream", 141 | "text": [ 142 | "rate tensor(1.8184, requires_grad=True)\n", 143 | "conc tensor(2.1202, requires_grad=True)\n", 144 | "mu_mean tensor(5.0471, requires_grad=True)\n", 145 | "mu_scale tensor(0.0859, requires_grad=True)\n" 146 | ] 147 | } 148 | ], 149 | "source": [ 150 | "for name, value in pyro.get_param_store().items():\n", 151 | " print(name, pyro.param(name))" 152 | ] 153 | }, 154 | { 155 | "cell_type": "code", 156 | "execution_count": 7, 157 | "metadata": {}, 158 | "outputs": [ 159 | { 160 | "data": { 161 | "image/png": "\n", 162 | "text/plain": [ 163 | "
" 164 | ] 165 | }, 166 | "metadata": {}, 167 | "output_type": "display_data" 168 | } 169 | ], 170 | "source": [ 171 | "plt.plot(range(len(train_elbo)), train_elbo)\n", 172 | "plt.xlabel(\"Number of iterations\")\n", 173 | "plt.ylabel(\"ELBO\")\n", 174 | "plt.show()" 175 | ] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": null, 180 | "metadata": {}, 181 | "outputs": [], 182 | "source": [] 183 | } 184 | ], 185 | "metadata": { 186 | "kernelspec": { 187 | "display_name": "probabilistic.ai", 188 | "language": "python", 189 | "name": "probabilistic.ai" 190 | }, 191 | "language_info": { 192 | "codemirror_mode": { 193 | "name": "ipython", 194 | "version": 3 195 | }, 196 | "file_extension": ".py", 197 | "mimetype": "text/x-python", 198 | "name": "python", 199 | "nbconvert_exporter": "python", 200 | "pygments_lexer": "ipython3", 201 | "version": "3.7.0" 202 | } 203 | }, 204 | "nbformat": 4, 205 | "nbformat_minor": 2 206 | } 207 | -------------------------------------------------------------------------------- /Day3/BBVI-gradient-variance.eps: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PGM-Lab/probabilisticAI_tutorials/40b644dccee2d26e469bb5fd32ae01e430e66200/Day3/BBVI-gradient-variance.eps -------------------------------------------------------------------------------- /Day3/BBVI_exercise.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PGM-Lab/probabilisticAI_tutorials/40b644dccee2d26e469bb5fd32ae01e430e66200/Day3/BBVI_exercise.png -------------------------------------------------------------------------------- /Day3/FA_model.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PGM-Lab/probabilisticAI_tutorials/40b644dccee2d26e469bb5fd32ae01e430e66200/Day3/FA_model.png -------------------------------------------------------------------------------- /Day3/VAE.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Adapt the variational auto encoder\n", 8 | "\n", 9 | "Below you will find an implementation of a VAE for the MNIST data. To allow for faster learning time, we only consider the digits 0,1, and 2 and only the first 100 samples of those digits.\n", 10 | "\n", 11 | "In this exercise, you should familiarize yourself with the implementation below and experiment with the structure of the VAE specification in order to emphasize digit separation in the latent space and the generation of images when sampling from the latent space.\n", 12 | "\n", 13 | "Part of the implementation is based on code from the official Pyro examples." 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": 1, 19 | "metadata": {}, 20 | "outputs": [], 21 | "source": [ 22 | "import numpy as np\n", 23 | "import torch\n", 24 | "import torchvision.datasets as datasets\n", 25 | "import torch.nn as nn\n", 26 | "import torchvision.transforms as transforms\n", 27 | "import pyro\n", 28 | "import pyro.distributions as dist\n", 29 | "from pyro.infer import SVI, Trace_ELBO\n", 30 | "from pyro.optim import Adam\n", 31 | "import datetime\n", 32 | "import os\n", 33 | "import matplotlib.gridspec as gridspec\n", 34 | "from matplotlib import pyplot\n", 35 | "import matplotlib.pyplot as plt\n", 36 | "from scipy.stats import norm" 37 | ] 38 | }, 39 | { 40 | "cell_type": "markdown", 41 | "metadata": {}, 42 | "source": [ 43 | "### Get the MNIST data\n", 44 | "\n", 45 | "We will wrap the MNIST data set in a Pyro data loader. " 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 2, 51 | "metadata": { 52 | "scrolled": true 53 | }, 54 | "outputs": [], 55 | "source": [ 56 | "def setup_data_loader(batch_size=64):\n", 57 | " #data = datasets.MNIST('./data', train=True, download=True,\n", 58 | " # transform=transforms.Compose([\n", 59 | " # transforms.ToTensor(),\n", 60 | " # transforms.Normalize((0.1307,), (0.3081,))\n", 61 | " # ]))\n", 62 | " data = datasets.MNIST('./data', train=True, download=True, transform=transforms.Compose([transforms.ToTensor()]))\n", 63 | " \n", 64 | " # We only select the digits 0, 1, and 2 and only the first 100 of each of these\n", 65 | " # digits\n", 66 | " selector = np.array([], dtype=int)\n", 67 | " for i in [5, 6, 7]:\n", 68 | " selector = np.concatenate((selector, np.where(data.targets == i)[0][:100]))\n", 69 | " data.data = data.data[selector, :, :]\n", 70 | " data.targets = data.targets[selector]\n", 71 | " \n", 72 | " # Binarize the data\n", 73 | " data.data[data.data<128] = 0\n", 74 | " data.data[data.data>=128] = 1\n", 75 | "\n", 76 | " data.data = data.data.type(torch.float)\n", 77 | " \n", 78 | " # Put the data within a data loader \n", 79 | " train_loader = torch.utils.data.DataLoader(data, batch_size=batch_size, shuffle=True)\n", 80 | " return train_loader\n", 81 | "\n", 82 | "\n", 83 | "train_loader = setup_data_loader(batch_size=300)" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": 3, 89 | "metadata": {}, 90 | "outputs": [ 91 | { 92 | "data": { 93 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAP8AAAD8CAYAAAC4nHJkAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAA5NJREFUeJzt3VFuwjAUAMG64v5Xdi9QoGowCd6ZbwQBafU+XhzGnPML6Pk++wKAc4gfosQPUeKHKPFDlPghSvwQJX6IEj9E3d75YWMMtxPCYnPO8ZfXmfwQJX6IEj9EiR+ixA9R4oco8UOU+CFK/BAlfogSP0SJH6LED1HihyjxQ5T4IUr8ECV+iBI/RIkfosQPUeKHKPFDlPghSvwQJX6IEj9EiR+ixA9R4oco8UOU+CFK/BAlfogSP0SJH6LED1Hih6jb2RfAtc05l733GGPZe/OcyQ9R4oco8UOU+CFK/BAlfogSP0TZ88et3OMf/Wz3Aaxl8kOU+CFK/BAlfogSP0SJH6Ks+jZ3dJW3ct125poRkx+yxA9R4oco8UOU+CFK/BAlfoiy59/AkX35lY/NOvK7lskPUeKHKPFDlPghSvwQJX6IEj9E2fNvzi6ce0x+iBI/RIkfosQPUeKHKPFDlPghyp7/A3i+PSuY/BAlfogSP0SJH6LED1HihyjxQ5T4IUr8ECV+iBI/RIkfosQPUeKHKPFDlPghSvwQJX6IEj9EiR+ixA9R4ocoj+7+ANW/2a5+73cx+SFK/BAlfogSP0SJH6LED1Hihyh7fpby9+LXZfJDlPghSvwQJX6IEj9EiR+ixA9R9vw8ZE+/L5MfosQPUeKHKPFDlPghSvwQJX6Isue/ALv03z37XTzX/xiTH6LED1HihyjxQ5T4IUr8EGXV9wJWdf9jVXcukx+ixA9R4oco8UOU+CFK/BAlfoiy53+BnffVR+9h2Pm3+XQmP0SJH6LED1HihyjxQ5T4IUr8EGXPH+dZBF0mP0SJH6LED1HihyjxQ5T4IUr8EGXPzyHO638ukx+ixA9R4oco8UOU+CFK/BBl1bc5R3a5x+SHKPFDlPghSvwQJX6IEj9EiR+i7Pl5yJHdfZn8ECV+iBI/RIkfosQPUeKHKPFDlD1/nD1+l8kPUeKHKPFDlPghSvwQJX6IEj9EiR+ixA9R4oco8UOU+CFK/BAlfogSP0Q5z7855/W5x+SHKPFDlPghSvwQJX6IEj9EiR+ixA9R4oco8UOU+CFK/BAlfogSP0SJH6LED1HihyjxQ5T4IUr8ECV+iBI/RI0559nXAJzA5Ico8UOU+CFK/BAlfogSP0SJH6LED1HihyjxQ5T4IUr8ECV+iBI/RIkfosQPUeKHKPFDlPghSvwQJX6IEj9EiR+ifgCJ5jwXeHW/1QAAAABJRU5ErkJggg==\n", 94 | "text/plain": [ 95 | "
" 96 | ] 97 | }, 98 | "metadata": {}, 99 | "output_type": "display_data" 100 | } 101 | ], 102 | "source": [ 103 | "def display_image(x):\n", 104 | " plt.axis('off')\n", 105 | " pyplot.imshow(x.reshape((28, 28)), cmap=\"gray\")\n", 106 | " \n", 107 | "toy_image = train_loader.dataset.data[215,:,:]\n", 108 | "display_image(toy_image)" 109 | ] 110 | }, 111 | { 112 | "cell_type": "markdown", 113 | "metadata": {}, 114 | "source": [ 115 | "### Setup the decoder network" 116 | ] 117 | }, 118 | { 119 | "cell_type": "code", 120 | "execution_count": 4, 121 | "metadata": {}, 122 | "outputs": [], 123 | "source": [ 124 | "class Decoder(nn.Module):\n", 125 | " def __init__(self, z_dim, hidden_dim):\n", 126 | " super(Decoder, self).__init__()\n", 127 | " # setup the two linear transformations used\n", 128 | " self.fc1 = nn.Linear(z_dim, hidden_dim)\n", 129 | " self.fc21 = nn.Linear(hidden_dim, 784)\n", 130 | " # setup the non-linearities\n", 131 | " self.softplus = nn.Softplus()\n", 132 | " self.sigmoid = nn.Sigmoid()\n", 133 | "\n", 134 | " def forward(self, z):\n", 135 | " # define the forward computation on the latent z\n", 136 | " # first compute the hidden units\n", 137 | " hidden = self.softplus(self.fc1(z))\n", 138 | " # return the parameter for the output Bernoulli\n", 139 | " # each is of size batch_size x 784\n", 140 | " #loc_img = self.sigmoid(self.fc21(hidden))\n", 141 | " loc_img = self.fc21(hidden)\n", 142 | " return loc_img" 143 | ] 144 | }, 145 | { 146 | "cell_type": "markdown", 147 | "metadata": {}, 148 | "source": [ 149 | "### Setup the encoder network" 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "execution_count": 5, 155 | "metadata": {}, 156 | "outputs": [], 157 | "source": [ 158 | "class Encoder(nn.Module):\n", 159 | " def __init__(self, z_dim, hidden_dim):\n", 160 | " super(Encoder, self).__init__()\n", 161 | " # setup the three linear transformations used\n", 162 | " self.fc1 = nn.Linear(784, hidden_dim)\n", 163 | " self.fc21 = nn.Linear(hidden_dim, z_dim)\n", 164 | " self.fc22 = nn.Linear(hidden_dim, z_dim)\n", 165 | " # setup the non-linearities\n", 166 | " self.softplus = nn.Softplus()\n", 167 | "\n", 168 | " def forward(self, x):\n", 169 | " # define the forward computation on the image x\n", 170 | " # first shape the mini-batch to have pixels in the rightmost dimension\n", 171 | " x = x.reshape(-1, 784)\n", 172 | " # then compute the hidden units\n", 173 | " hidden = self.softplus(self.fc1(x))\n", 174 | " # then return a mean vector and a (positive) square root covariance\n", 175 | " # each of size batch_size x z_dim\n", 176 | " z_loc = self.fc21(hidden)\n", 177 | " z_scale = torch.exp(self.fc22(hidden))\n", 178 | " return z_loc, z_scale" 179 | ] 180 | }, 181 | { 182 | "cell_type": "markdown", 183 | "metadata": {}, 184 | "source": [ 185 | "### Packaging it all together" 186 | ] 187 | }, 188 | { 189 | "cell_type": "code", 190 | "execution_count": 6, 191 | "metadata": {}, 192 | "outputs": [], 193 | "source": [ 194 | "class VAE(nn.Module):\n", 195 | " # by default our latent space is 50-dimensional\n", 196 | " # and we use 400 hidden units\n", 197 | " def __init__(self, z_dim=2, hidden_dim=400, use_cuda=False):\n", 198 | " super(VAE, self).__init__()\n", 199 | " # create the encoder and decoder networks\n", 200 | " self.encoder = Encoder(z_dim, hidden_dim)\n", 201 | " self.decoder = Decoder(z_dim, hidden_dim)\n", 202 | " self.z_dim = z_dim\n", 203 | "\n", 204 | " # define the model p(x|z)p(z)\n", 205 | " def model(self, x):\n", 206 | " # register PyTorch module `decoder` with Pyro\n", 207 | " pyro.module(\"decoder\", self.decoder)\n", 208 | " with pyro.plate(\"data\", x.shape[0]):\n", 209 | " # setup hyperparameters for prior p(z)\n", 210 | " z_loc = x.new_zeros(torch.Size((x.shape[0], self.z_dim)))\n", 211 | " z_scale = x.new_ones(torch.Size((x.shape[0], self.z_dim)))\n", 212 | " # sample from prior (value will be sampled by guide when computing the ELBO)\n", 213 | " z = pyro.sample(\"latent\", dist.Normal(z_loc, z_scale).to_event(1))\n", 214 | " # decode the latent code z\n", 215 | " loc_img = self.decoder.forward(z)\n", 216 | " # score against actual images\n", 217 | " pyro.sample(\"obs\", dist.Bernoulli(logits=loc_img).to_event(1), obs=x.reshape(-1, 784))\n", 218 | " #pyro.sample(\"obs\", dist.Bernoulli(loc_img).to_event(1), obs=x.reshape(-1, 784))\n", 219 | "\n", 220 | " # define the guide (i.e. variational distribution) q(z|x)\n", 221 | " def guide(self, x):\n", 222 | " # register PyTorch module `encoder` with Pyro\n", 223 | " pyro.module(\"encoder\", self.encoder)\n", 224 | " with pyro.plate(\"data\", x.shape[0]):\n", 225 | " # use the encoder to get the parameters used to define q(z|x)\n", 226 | " z_loc, z_scale = self.encoder.forward(x)\n", 227 | " # sample the latent code z\n", 228 | " pyro.sample(\"latent\", dist.Normal(z_loc, z_scale).to_event(1))\n", 229 | "\n", 230 | " # define a helper function for reconstructing images\n", 231 | " def reconstruct_img(self, x):\n", 232 | " # encode image x\n", 233 | " z_loc, z_scale = self.encoder(x)\n", 234 | " # sample in latent space\n", 235 | " z = dist.Normal(z_loc, z_scale).sample()\n", 236 | " # decode the image (note we don't sample in image space)\n", 237 | " loc_img = self.decoder(z)\n", 238 | " return loc_img\n", 239 | "\n", 240 | " def sample_images(self, dim=10):\n", 241 | "\n", 242 | " plt.figure(figsize=(dim, dim))\n", 243 | " gs1 = gridspec.GridSpec(dim, dim)\n", 244 | " gs1.update(wspace=0.025, hspace=0.05) # set the spacing between axes.\n", 245 | "\n", 246 | " z_1 = norm.ppf(np.linspace(0.00001, 0.99999, dim), loc=0, scale=1)\n", 247 | " z_2 = norm.ppf(np.linspace(0.00001, 0.99999, dim), loc=0, scale=1)\n", 248 | " for j in range(dim):\n", 249 | " for i in range(dim):\n", 250 | " x_val = self.decoder.forward(torch.tensor([z_1[i], z_2[j]], dtype=torch.float32))\n", 251 | " plt.subplot(gs1[i*dim+j])\n", 252 | " plt.axis('off')\n", 253 | " plt.imshow(x_val.detach().numpy().reshape((28, 28)), cmap=\"gray_r\")\n", 254 | " plt.show()" 255 | ] 256 | }, 257 | { 258 | "cell_type": "markdown", 259 | "metadata": {}, 260 | "source": [ 261 | "### Setup training (single epoch)" 262 | ] 263 | }, 264 | { 265 | "cell_type": "code", 266 | "execution_count": 7, 267 | "metadata": {}, 268 | "outputs": [], 269 | "source": [ 270 | "def train(svi, train_loader):\n", 271 | " # initialize loss accumulator\n", 272 | " epoch_loss = 0.\n", 273 | " # do a training epoch over each mini-batch x returned\n", 274 | " # by the data loader\n", 275 | " for x, _ in train_loader:\n", 276 | " # do ELBO gradient and accumulate loss\n", 277 | " epoch_loss += svi.step(x)\n", 278 | "\n", 279 | " # return epoch loss\n", 280 | " normalizer_train = len(train_loader.dataset)\n", 281 | " total_epoch_loss_train = epoch_loss / normalizer_train\n", 282 | " return total_epoch_loss_train" 283 | ] 284 | }, 285 | { 286 | "cell_type": "markdown", 287 | "metadata": {}, 288 | "source": [ 289 | "### Perform learning" 290 | ] 291 | }, 292 | { 293 | "cell_type": "code", 294 | "execution_count": 8, 295 | "metadata": {}, 296 | "outputs": [ 297 | { 298 | "name": "stdout", 299 | "output_type": "stream", 300 | "text": [ 301 | "[epoch 000] average training loss: 561.9964\n", 302 | "[epoch 100] average training loss: 20.7505\n", 303 | "[epoch 200] average training loss: 19.0819\n", 304 | "[epoch 300] average training loss: 18.7468\n", 305 | "[epoch 400] average training loss: 18.1215\n", 306 | "[epoch 500] average training loss: 17.7618\n", 307 | "[epoch 600] average training loss: 17.4973\n", 308 | "[epoch 700] average training loss: 17.4621\n", 309 | "[epoch 800] average training loss: 17.3345\n", 310 | "[epoch 900] average training loss: 17.1521\n" 311 | ] 312 | }, 313 | { 314 | "data": { 315 | "image/png": "\n", 316 | "text/plain": [ 317 | "
" 318 | ] 319 | }, 320 | "metadata": {}, 321 | "output_type": "display_data" 322 | } 323 | ], 324 | "source": [ 325 | "vae = VAE(z_dim=2, hidden_dim=400)\n", 326 | "\n", 327 | "# Run options\n", 328 | "LEARNING_RATE = 1.0e-2\n", 329 | "\n", 330 | "# Run only for a single iteration for testing\n", 331 | "NUM_EPOCHS = 1000\n", 332 | "\n", 333 | "#train_loader = setup_data_loader(batch_size=300)\n", 334 | "\n", 335 | "# clear param store\n", 336 | "pyro.clear_param_store()\n", 337 | "\n", 338 | "# setup the optimizer\n", 339 | "adam_args = {\"lr\": LEARNING_RATE}\n", 340 | "optimizer = Adam(adam_args)\n", 341 | "\n", 342 | "# setup the inference algorithm\n", 343 | "svi = SVI(vae.model, vae.guide, optimizer, loss=Trace_ELBO())\n", 344 | "train_elbo = []\n", 345 | "# training loop\n", 346 | "for epoch in range(NUM_EPOCHS):\n", 347 | " total_epoch_loss_train = train(svi, train_loader)\n", 348 | " train_elbo.append(-total_epoch_loss_train)\n", 349 | " if (epoch % 100) == 0:\n", 350 | " print(\"[epoch %03d] average training loss: %.4f\" % (epoch, total_epoch_loss_train))\n", 351 | "\n", 352 | "plt.plot(range(len(train_elbo)), train_elbo)\n", 353 | "plt.xlabel(\"Number of iterations\")\n", 354 | "plt.ylabel(\"ELBO\")\n", 355 | "plt.show()" 356 | ] 357 | }, 358 | { 359 | "cell_type": "markdown", 360 | "metadata": {}, 361 | "source": [ 362 | "### Plot the data in the embedding space" 363 | ] 364 | }, 365 | { 366 | "cell_type": "code", 367 | "execution_count": 9, 368 | "metadata": { 369 | "scrolled": true 370 | }, 371 | "outputs": [ 372 | { 373 | "data": { 374 | "image/png": "\n", 375 | "text/plain": [ 376 | "
" 377 | ] 378 | }, 379 | "metadata": {}, 380 | "output_type": "display_data" 381 | } 382 | ], 383 | "source": [ 384 | "for x, x_l in train_loader:\n", 385 | " z_loc, z_scale = vae.encoder(x)\n", 386 | "\n", 387 | "legends = [\"Digit 5\", \"Digit 6\", \"Digit 7\"]\n", 388 | "z_loc = z_loc.detach().numpy()\n", 389 | "for idx, i in enumerate([5,6,7]):\n", 390 | " plt.scatter(z_loc[x_l.numpy()==i,0], z_loc[x_l.numpy()==i,1], label=legends[idx])\n", 391 | "plt.legend()\n", 392 | "plt.show()" 393 | ] 394 | } 395 | ], 396 | "metadata": { 397 | "kernelspec": { 398 | "display_name": "probabilistic.ai", 399 | "language": "python", 400 | "name": "probabilistic.ai" 401 | }, 402 | "language_info": { 403 | "codemirror_mode": { 404 | "name": "ipython", 405 | "version": 3 406 | }, 407 | "file_extension": ".py", 408 | "mimetype": "text/x-python", 409 | "name": "python", 410 | "nbconvert_exporter": "python", 411 | "pygments_lexer": "ipython3", 412 | "version": "3.7.0" 413 | } 414 | }, 415 | "nbformat": 4, 416 | "nbformat_minor": 2 417 | } 418 | -------------------------------------------------------------------------------- /Day3/elbo_evolution.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PGM-Lab/probabilisticAI_tutorials/40b644dccee2d26e469bb5fd32ae01e430e66200/Day3/elbo_evolution.pdf -------------------------------------------------------------------------------- /Day3/elbo_evolution_with_1_samples.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PGM-Lab/probabilisticAI_tutorials/40b644dccee2d26e469bb5fd32ae01e430e66200/Day3/elbo_evolution_with_1_samples.pdf -------------------------------------------------------------------------------- /Day3/reg_model.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PGM-Lab/probabilisticAI_tutorials/40b644dccee2d26e469bb5fd32ae01e430e66200/Day3/reg_model.png -------------------------------------------------------------------------------- /Day3/simple_pyro_exercise.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PGM-Lab/probabilisticAI_tutorials/40b644dccee2d26e469bb5fd32ae01e430e66200/Day3/simple_pyro_exercise.png -------------------------------------------------------------------------------- /Day3/slides-L3.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PGM-Lab/probabilisticAI_tutorials/40b644dccee2d26e469bb5fd32ae01e430e66200/Day3/slides-L3.pdf -------------------------------------------------------------------------------- /Day3/solution_simple_model.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import numpy as np\n", 17 | "import torch\n", 18 | "from torch.distributions import constraints\n", 19 | "import matplotlib.pyplot as plt\n", 20 | "\n", 21 | "import pyro\n", 22 | "from pyro.distributions import Normal, Gamma, MultivariateNormal\n", 23 | "from pyro.infer import SVI, Trace_ELBO\n", 24 | "from pyro.optim import Adam\n", 25 | "import pyro.optim as optim" 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": {}, 31 | "source": [ 32 | "## Generate some data" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": 2, 38 | "metadata": {}, 39 | "outputs": [], 40 | "source": [ 41 | "# Sample data\n", 42 | "np.random.seed(123)\n", 43 | "N = 100\n", 44 | "correct_mean = 5\n", 45 | "correct_precision = 1\n", 46 | "data = torch.tensor(np.random.normal(loc=correct_mean, scale=np.sqrt(1./correct_precision), size=N), dtype=torch.float)\n" 47 | ] 48 | }, 49 | { 50 | "cell_type": "markdown", 51 | "metadata": {}, 52 | "source": [ 53 | "## Our model specification" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": 3, 59 | "metadata": {}, 60 | "outputs": [], 61 | "source": [ 62 | "def model(data):\n", 63 | " gamma = pyro.sample(\"gamma\", Gamma(torch.tensor(1.), torch.tensor(1.)))\n", 64 | " mu = pyro.sample(\"mu\", Normal(torch.zeros(1), torch.tensor(10000.0)))\n", 65 | " with pyro.plate(\"data\", len(data)):\n", 66 | " pyro.sample(\"x\", Normal(loc=mu, scale=torch.sqrt(1. / gamma)), obs=data)" 67 | ] 68 | }, 69 | { 70 | "cell_type": "markdown", 71 | "metadata": {}, 72 | "source": [ 73 | "## Our guide specification" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": 4, 79 | "metadata": {}, 80 | "outputs": [], 81 | "source": [ 82 | "def guide(data=None):\n", 83 | " rate = pyro.param(\"rate\", torch.tensor(1.), contrainst=constrain.positive)\n", 84 | " conc = pyro.param(\"conc\", torch.tensor(1.), contrainst=constrain.positive)\n", 85 | " pyro.sample(\"gamma\", Gamma(rate, conc))\n", 86 | "\n", 87 | " mu_mean = pyro.param(\"mu_mean\", torch.tensor(0.))\n", 88 | " mu_scale = pyro.param(\"mu_scale\", torch.tensor(1.), contrainst=constrain.positive)\n", 89 | " pyro.sample(\"mu\", Normal(mu_mean, mu_scale))" 90 | ] 91 | }, 92 | { 93 | "cell_type": "markdown", 94 | "metadata": {}, 95 | "source": [ 96 | "## Do learning" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": 5, 102 | "metadata": {}, 103 | "outputs": [ 104 | { 105 | "name": "stdout", 106 | "output_type": "stream", 107 | "text": [ 108 | "[epoch 000] average training loss: 1599.6830\n", 109 | "[epoch 500] average training loss: 546.2211\n", 110 | "[epoch 1000] average training loss: 284.1279\n", 111 | "[epoch 1500] average training loss: 185.3350\n", 112 | "[epoch 2000] average training loss: 195.8745\n", 113 | "[epoch 2500] average training loss: 178.9226\n" 114 | ] 115 | } 116 | ], 117 | "source": [ 118 | "# setup the optimizer\n", 119 | "adam_args = {\"lr\": 0.01}\n", 120 | "optimizer = Adam(adam_args)\n", 121 | "\n", 122 | "pyro.clear_param_store()\n", 123 | "svi = SVI(model, guide, optimizer, loss=Trace_ELBO(), num_samples=10)\n", 124 | "train_elbo = []\n", 125 | "# training loop\n", 126 | "for epoch in range(3000):\n", 127 | " loss = svi.step(data)\n", 128 | " train_elbo.append(-loss)\n", 129 | " if (epoch % 500) == 0:\n", 130 | " print(\"[epoch %03d] average training loss: %.4f\" % (epoch, loss))" 131 | ] 132 | }, 133 | { 134 | "cell_type": "code", 135 | "execution_count": 6, 136 | "metadata": {}, 137 | "outputs": [ 138 | { 139 | "name": "stdout", 140 | "output_type": "stream", 141 | "text": [ 142 | "rate tensor(1.8581, requires_grad=True)\n", 143 | "conc tensor(2.1757, requires_grad=True)\n", 144 | "mu_mean tensor(5.0201, requires_grad=True)\n", 145 | "mu_scale tensor(0.1044, requires_grad=True)\n" 146 | ] 147 | } 148 | ], 149 | "source": [ 150 | "for name, value in pyro.get_param_store().items():\n", 151 | " print(name, pyro.param(name))" 152 | ] 153 | }, 154 | { 155 | "cell_type": "code", 156 | "execution_count": 7, 157 | "metadata": {}, 158 | "outputs": [ 159 | { 160 | "data": { 161 | "image/png": "\n", 162 | "text/plain": [ 163 | "
" 164 | ] 165 | }, 166 | "metadata": {}, 167 | "output_type": "display_data" 168 | } 169 | ], 170 | "source": [ 171 | "plt.plot(range(len(train_elbo)), train_elbo)\n", 172 | "plt.xlabel(\"Number of iterations\")\n", 173 | "plt.ylabel(\"ELBO\")\n", 174 | "plt.show()" 175 | ] 176 | } 177 | ], 178 | "metadata": { 179 | "kernelspec": { 180 | "display_name": "probabilistic.ai", 181 | "language": "python", 182 | "name": "probabilistic.ai" 183 | }, 184 | "language_info": { 185 | "codemirror_mode": { 186 | "name": "ipython", 187 | "version": 3 188 | }, 189 | "file_extension": ".py", 190 | "mimetype": "text/x-python", 191 | "name": "python", 192 | "nbconvert_exporter": "python", 193 | "pygments_lexer": "ipython3", 194 | "version": "3.7.0" 195 | } 196 | }, 197 | "nbformat": 4, 198 | "nbformat_minor": 2 199 | } 200 | -------------------------------------------------------------------------------- /Day3/student_BBVI.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Applying BBVI for a simple Gaussian Model" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 13, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "import numpy as np\n", 24 | "from scipy.stats import norm\n", 25 | "import matplotlib.pyplot as plt\n", 26 | "import seaborn as sns" 27 | ] 28 | }, 29 | { 30 | "cell_type": "markdown", 31 | "metadata": {}, 32 | "source": [ 33 | "# Data" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": 14, 39 | "metadata": {}, 40 | "outputs": [], 41 | "source": [ 42 | "# Generate data from a simple model: Normal(10, 1)\n", 43 | "data = np.random.normal(loc = 10, scale = 1, size = 100)" 44 | ] 45 | }, 46 | { 47 | "cell_type": "markdown", 48 | "metadata": {}, 49 | "source": [ 50 | "# Helper function: ELBO\n", 51 | "\n", 52 | "Calculate the exact value of the ELBO. Generally one would have to estimate this using sampling, but for this simple model we can evaluate it exactly " 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": 15, 58 | "metadata": {}, 59 | "outputs": [], 60 | "source": [ 61 | "def calculate_lower_bound(tau, q_mu):\n", 62 | " \"\"\"\n", 63 | " Helper routine: Calculate ELBO. Data is the sampled x-values, anything without a star relates to the prior,\n", 64 | " everything _with_ a star relates to the variational posterior.\n", 65 | " Note that we have no nu without a star; I am simplifying by forcing this to be zero a priori\n", 66 | "\n", 67 | " Note: This function obviously only works when the model is as in this code challenge,\n", 68 | " and is not a general solution.\n", 69 | "\n", 70 | " :param data: The sampled data\n", 71 | " :param tau: prior precision for mu, the mean for the data generation\n", 72 | " :param alpha: prior shape of dist for gamma, the precision of the data generation\n", 73 | " :param beta: prior rate of dist for gamma, the precision of the data generation\n", 74 | " :param nu_star: VB posterior mean for the distribution of mu - the mean of the data generation\n", 75 | " :param tau_star: VB posterior precision for the distribution of mu - the mean of the data generation\n", 76 | " :param alpha_star: VB posterior shape of dist for gamma, the precision of the data generation\n", 77 | " :param beta_star: VB posterior shape of dist for gamma, the precision of the data generation\n", 78 | " :return: the ELBO\n", 79 | " \"\"\"\n", 80 | "\n", 81 | " # We calculate ELBO as E_q log p(x,mu) - E_q log q(mu)\n", 82 | " # log p(x,z) here is log p(mu) + \\sum_i log p(x_i | mu, 1)\n", 83 | "\n", 84 | " # E_q log p(mu)\n", 85 | " log_p = -.5 * np.log(2 * np.pi) - .5 * (1/tau) * (1 + q_mu**2)\n", 86 | "\n", 87 | "\n", 88 | " # E_q log p(x_i|mu, 1)\n", 89 | " for xi in data:\n", 90 | " log_p += -.5 * np.log(2 * np.pi) - .5 * (xi * xi - 2 * xi * q_mu + 1 + q_mu**2)\n", 91 | "\n", 92 | " # Entropy of mu (Gaussian)\n", 93 | " entropy = .5 * np.log(2 * np.pi * np.exp(1))\n", 94 | "\n", 95 | " return log_p + entropy" 96 | ] 97 | }, 98 | { 99 | "cell_type": "markdown", 100 | "metadata": {}, 101 | "source": [ 102 | "# Manual estimation of the gradient of the ELBO for the above model" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": 4, 108 | "metadata": {}, 109 | "outputs": [], 110 | "source": [ 111 | "# Gradient estimator using sampling -- vanilla BBVI\n", 112 | "# We here assume the model X ~ Normal(mu, 1)\n", 113 | "# with unknown mu, that in itself is Normal, mean 0 and standard deviation 1000, \n", 114 | "# so effectively an uniformed prior. \n", 115 | "# The variational dstribution for mu is also Normal, with parameter q_mu_lambda\n", 116 | "# -- taking the role of lambda in the calculations -- and variance 1.\n", 117 | "#\n", 118 | "# Note:\n", 119 | "# We can sample from a normal using:\n", 120 | "# * np.random.normal(loc=mu, scale=1, size=1)\n", 121 | "# We can evaluate the the normal density using\n", 122 | "# * norm.logpdf(sample, loc = mu, scale = std. dev.)\n", 123 | "\n", 124 | "def grad_estimate(q_mu_lambda, samples = 1):\n", 125 | " # sum_grad_estimate will hold the sum as we move along over the samples. \n", 126 | " sum_grad_estimate = 0\n", 127 | " for i in range(samples):\n", 128 | " # Sample one example from current best guess for the variational distribution\n", 129 | " mu_sample = np.random.normal(loc=q_mu_lambda, scale=1, size=1)\n", 130 | " \n", 131 | " # Now we want to calculate the contribution from this sample, namely \n", 132 | " # [log p(x, mu_sample) - log q(mu|lambda) ] * grad( log q(mu_sample|lambda) )\n", 133 | " #\n", 134 | " value = ?\n", 135 | "\n", 136 | " # Next grad (log q(mu_sample|lambda))\n", 137 | " # The Normal distribution gives the score function with known variance as - \n", 138 | " grad_q = ?\n", 139 | "\n", 140 | " \n", 141 | " # grad ELBO for this sample is therefore in total given by\n", 142 | " sum_grad_estimate = sum_grad_estimate + grad_q * value\n", 143 | " \n", 144 | " # Divide by number of samples to get average value -- the estimated expectation \n", 145 | " return sum_grad_estimate/samples" 146 | ] 147 | }, 148 | { 149 | "cell_type": "markdown", 150 | "metadata": {}, 151 | "source": [ 152 | "# Perform BBVI using the estimated gradient" 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": 11, 158 | "metadata": {}, 159 | "outputs": [ 160 | { 161 | "name": "stdout", 162 | "output_type": "stream", 163 | "text": [ 164 | " 100 sample(s) -- Estimate: 9.91943; error 0.8% -- Calc.time: 17.40 sec.\n" 165 | ] 166 | } 167 | ], 168 | "source": [ 169 | "import time\n", 170 | "no_loops = 500\n", 171 | "sample_count = 100\n", 172 | "##### Starting point\n", 173 | "q_mu = -10\n", 174 | "start = time.time()\n", 175 | "elbos = []\n", 176 | "lr = 1E-4 \n", 177 | "\n", 178 | "\n", 179 | "#loop a couple of times\n", 180 | "for t in range(no_loops):\n", 181 | " elbos.append(calculate_lower_bound(1000, q_mu))\n", 182 | " q_grad = grad_estimate(q_mu, samples=sample_count)\n", 183 | " q_mu = q_mu + lr * q_grad\n", 184 | "\n", 185 | "print(\"{:4d} sample(s) -- Estimate: {:9.5f}; error {:5.1f}% -- Calc.time: {:5.2f} sec.\".format(\n", 186 | " sample_count, float(q_mu), float(10*np.abs(q_mu-10)), time.time() - start))" 187 | ] 188 | }, 189 | { 190 | "cell_type": "markdown", 191 | "metadata": {}, 192 | "source": [ 193 | "### Exercise\n", 194 | "* Try varying the number of samples used for estimating the gradient. What effect does it have on the results?" 195 | ] 196 | } 197 | ], 198 | "metadata": { 199 | "kernelspec": { 200 | "display_name": "Python (TFEnv conda)", 201 | "language": "python", 202 | "name": "tfenv" 203 | }, 204 | "language_info": { 205 | "codemirror_mode": { 206 | "name": "ipython", 207 | "version": 3 208 | }, 209 | "file_extension": ".py", 210 | "mimetype": "text/x-python", 211 | "name": "python", 212 | "nbconvert_exporter": "python", 213 | "pygments_lexer": "ipython3", 214 | "version": "3.6.8" 215 | } 216 | }, 217 | "nbformat": 4, 218 | "nbformat_minor": 2 219 | } 220 | -------------------------------------------------------------------------------- /Day3/student_simple_model.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import numpy as np\n", 17 | "import torch\n", 18 | "from torch.distributions import constraints\n", 19 | "import matplotlib.pyplot as plt\n", 20 | "\n", 21 | "import pyro\n", 22 | "from pyro.distributions import Normal, Gamma, MultivariateNormal\n", 23 | "from pyro.infer import SVI, Trace_ELBO\n", 24 | "from pyro.optim import Adam\n", 25 | "import pyro.optim as optim" 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": {}, 31 | "source": [ 32 | "## Generate some data" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": 2, 38 | "metadata": {}, 39 | "outputs": [], 40 | "source": [ 41 | "# Sample data\n", 42 | "np.random.seed(123)\n", 43 | "N = 100\n", 44 | "correct_mean = 5\n", 45 | "correct_precision = 1\n", 46 | "data = torch.tensor(np.random.normal(loc=correct_mean, scale=np.sqrt(1./correct_precision), size=N), dtype=torch.float)\n" 47 | ] 48 | }, 49 | { 50 | "cell_type": "markdown", 51 | "metadata": {}, 52 | "source": [ 53 | "## Our model specification" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": 3, 59 | "metadata": {}, 60 | "outputs": [], 61 | "source": [ 62 | "# Fill the method with code to define a simple Gaussian model with mean \\mu and precision \\gamma\n", 63 | "def model(data):\n" 64 | ] 65 | }, 66 | { 67 | "cell_type": "markdown", 68 | "metadata": {}, 69 | "source": [ 70 | "## Our guide specification" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": 4, 76 | "metadata": {}, 77 | "outputs": [], 78 | "source": [ 79 | "# Define the right guide for the above model, including the variational parameters. \n", 80 | "def guide(data=None):\n" 81 | ] 82 | }, 83 | { 84 | "cell_type": "markdown", 85 | "metadata": {}, 86 | "source": [ 87 | "## Do learning" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": 5, 93 | "metadata": {}, 94 | "outputs": [ 95 | { 96 | "name": "stdout", 97 | "output_type": "stream", 98 | "text": [ 99 | "[epoch 000] average training loss: 1599.6830\n", 100 | "[epoch 500] average training loss: 546.2211\n", 101 | "[epoch 1000] average training loss: 284.1279\n", 102 | "[epoch 1500] average training loss: 185.3350\n", 103 | "[epoch 2000] average training loss: 195.8745\n", 104 | "[epoch 2500] average training loss: 178.9226\n" 105 | ] 106 | } 107 | ], 108 | "source": [ 109 | "# setup the optimizer\n", 110 | "adam_args = {\"lr\": 0.01}\n", 111 | "optimizer = Adam(adam_args)\n", 112 | "\n", 113 | "pyro.clear_param_store()\n", 114 | "svi = SVI(model, guide, optimizer, loss=Trace_ELBO(), num_samples=10)\n", 115 | "train_elbo = []\n", 116 | "# training loop\n", 117 | "for epoch in range(3000):\n", 118 | " loss = svi.step(data)\n", 119 | " train_elbo.append(-loss)\n", 120 | " if (epoch % 500) == 0:\n", 121 | " print(\"[epoch %03d] average training loss: %.4f\" % (epoch, loss))" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": 6, 127 | "metadata": {}, 128 | "outputs": [ 129 | { 130 | "name": "stdout", 131 | "output_type": "stream", 132 | "text": [ 133 | "rate tensor(1.8581, requires_grad=True)\n", 134 | "conc tensor(2.1757, requires_grad=True)\n", 135 | "mu_mean tensor(5.0201, requires_grad=True)\n", 136 | "mu_scale tensor(0.1044, requires_grad=True)\n" 137 | ] 138 | } 139 | ], 140 | "source": [ 141 | "for name, value in pyro.get_param_store().items():\n", 142 | " print(name, pyro.param(name))" 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": 7, 148 | "metadata": {}, 149 | "outputs": [ 150 | { 151 | "data": { 152 | "image/png": "\n", 153 | "text/plain": [ 154 | "
" 155 | ] 156 | }, 157 | "metadata": {}, 158 | "output_type": "display_data" 159 | } 160 | ], 161 | "source": [ 162 | "plt.plot(range(len(train_elbo)), train_elbo)\n", 163 | "plt.xlabel(\"Number of iterations\")\n", 164 | "plt.ylabel(\"ELBO\")\n", 165 | "plt.show()" 166 | ] 167 | } 168 | ], 169 | "metadata": { 170 | "kernelspec": { 171 | "display_name": "probabilistic.ai", 172 | "language": "python", 173 | "name": "probabilistic.ai" 174 | }, 175 | "language_info": { 176 | "codemirror_mode": { 177 | "name": "ipython", 178 | "version": 3 179 | }, 180 | "file_extension": ".py", 181 | "mimetype": "text/x-python", 182 | "name": "python", 183 | "nbconvert_exporter": "python", 184 | "pygments_lexer": "ipython3", 185 | "version": "3.7.0" 186 | } 187 | }, 188 | "nbformat": 4, 189 | "nbformat_minor": 2 190 | } 191 | -------------------------------------------------------------------------------- /Readme.md: -------------------------------------------------------------------------------- 1 | # Day1-Day3 Tutorial of the Nordic Probabilistic AI School 2 | 3 | 4 | Make sure you have installed Python 3.6 (e.g. running the command *python -V* on the console) and the following python packages: 5 | - [Numpy](https://www.numpy.org/) 6 | - [Scipy](https://www.scipy.org/) 7 | - [Matplotlib](https://matplotlib.org/) 8 | - [Pandas](https://pandas.pydata.org/) 9 | - [seaborn](https://seaborn.pydata.org/) 10 | - [Pytorch](https://pytorch.org/) 11 | - [TorchVision](https://pypi.org/project/torchvision/) 12 | - [Pyro](http://pyro.ai/) 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: probai 2 | channels: 3 | - pytorch 4 | - defaults 5 | dependencies: 6 | - blas=1.0=mkl 7 | - ca-certificates=2019.1.23=0 8 | - certifi=2019.3.9=py36_0 9 | - cffi=1.12.3=py36hb5b8e2f_0 10 | - cycler=0.10.0=py36hfc81398_0 11 | - freetype=2.9.1=hb4e5f40_0 12 | - intel-openmp=2019.3=199 13 | - jpeg=9b=he5867d9_2 14 | - kiwisolver=1.1.0=py36h0a44026_0 15 | - libcxx=4.0.1=hcfea43d_1 16 | - libcxxabi=4.0.1=hcfea43d_1 17 | - libedit=3.1.20181209=hb402a30_0 18 | - libffi=3.2.1=h475c297_4 19 | - libgfortran=3.0.1=h93005f0_2 20 | - libpng=1.6.37=ha441bb4_0 21 | - libtiff=4.0.10=hcb84e12_2 22 | - matplotlib=3.1.0=py36h54f8f79_0 23 | - mkl=2019.3=199 24 | - mkl_fft=1.0.12=py36h5e564d8_0 25 | - mkl_random=1.0.2=py36h27c97d8_0 26 | - ncurses=6.1=h0a44026_1 27 | - ninja=1.9.0=py36h04f5b5a_0 28 | - numpy=1.16.4=py36hacdab7b_0 29 | - numpy-base=1.16.4=py36h6575580_0 30 | - olefile=0.46=py36_0 31 | - openssl=1.1.1c=h1de35cc_1 32 | - pandas=0.24.2=py36h0a44026_0 33 | - patsy=0.5.1=py36_0 34 | - pillow=6.0.0=py36hb68e598_0 35 | - pip=19.1.1=py36_0 36 | - pycparser=2.19=py36_0 37 | - pyparsing=2.4.0=py_0 38 | - python=3.6.8=haf84260_0 39 | - python-dateutil=2.8.0=py36_0 40 | - pytorch=1.1.0=py3.6_0 41 | - pytz=2019.1=py_0 42 | - readline=7.0=h1de35cc_5 43 | - scipy=1.2.1=py36h1410ff5_0 44 | - seaborn=0.9.0=py36_0 45 | - setuptools=41.0.1=py36_0 46 | - six=1.12.0=py36_0 47 | - sqlite=3.28.0=ha441bb4_0 48 | - statsmodels=0.9.0=py36h1d22016_0 49 | - tk=8.6.8=ha441bb4_0 50 | - torchvision=0.3.0=py36_cuNone_1 51 | - tornado=6.0.2=py36h1de35cc_0 52 | - wheel=0.33.4=py36_0 53 | - xz=5.2.4=h1de35cc_4 54 | - zlib=1.2.11=h1de35cc_3 55 | - zstd=1.3.7=h5bba6e5_0 56 | prefix: /anaconda3/envs/probai 57 | --------------------------------------------------------------------------------