├── .gitignore ├── LICENSE.md ├── README.md ├── docs ├── css │ ├── default.css │ └── highlight.css ├── deodorant.acq-functions.html ├── deodorant.broadcast-functions.html ├── deodorant.core.html ├── deodorant.covar-functions.html ├── deodorant.default-params.html ├── deodorant.gp-toolbox.html ├── deodorant.helper-functions.html ├── deodorant.hmc.html ├── deodorant.hyper-priors.html ├── deodorant.scaling-functions.html ├── index.html └── js │ ├── highlight.min.js │ ├── jquery.min.js │ └── page_effects.js ├── gpl-3.0.txt ├── project.clj └── src └── deodorant ├── acq_functions.clj ├── broadcast_functions.clj ├── core.clj ├── covar_functions.clj ├── default_params.clj ├── gp_toolbox.clj ├── helper_functions.clj ├── hmc.clj ├── hyper_priors.clj └── scaling_functions.clj /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /classes 3 | /checkouts 4 | pom.xml 5 | pom.xml.asc 6 | *.jar 7 | *.class 8 | /.lein-* 9 | /.nrepl-port 10 | .hgignore 11 | .hg/ 12 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | Copyright © Tom Rainforth, Tuan Anh Le, Jan-Willem van de Meent, Michael Osborne and Frank Wood 2 | 3 | Deodorant is free software: you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation, either version 3 of the License, or 6 | (at your option) any later version. 7 | 8 | Deodorant is distributed in the hope that it will be useful, 9 | but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | GNU General Public License for more details. 12 | 13 | You should have received a copy of the [GNU General Public License](gpl-3.0.txt) along with Deodorant. If not, see [http://www.gnu.org/licenses/](http://www.gnu.org/licenses/). 14 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Deodorant: Solving the problems of BO 2 | 3 | Deodorant is a Bayesian optimization package with three core features: 4 | 5 | 1. Domain scaling to exploit problem independent GP hyperpriors 6 | 2. A non-stationary mean function to allow unbounded optimization 7 | 3. External provision of the acquisition function optimizer so that this can incorporate the constraints of the problem (inc equality constraints) and ensure that no invalid points are evaluated. 8 | 9 | The main intended use of the package at present is as the BO component for [BOPP](https://github.com/probprog/bopp): 10 | 11 | Rainforth, T., Le, T. A., van de Meent, J.-W., Osborne, M. A., & Wood, F. (2016). Bayesian Optimization for Probabilistic Programs. In Advances in Neural Information Processing Systems. 12 | 13 | ``` 14 | @incollection{rainforth2016bayesian, 15 | title = {Bayesian Optimization for Probabilistic Programs}, 16 | author = {Rainforth, Tom and Le, Tuan Anh and van de Meent, Jan-Willem and Osborne, Michael A and Wood, Frank}, 17 | booktitle = {Advances in Neural Information Processing Systems 29}, 18 | pages = {280--288}, 19 | year = {2016}, 20 | url = {http://papers.nips.cc/paper/6421-bayesian-optimization-for-probabilistic-programs.pdf} 21 | } 22 | ``` 23 | 24 | which provides all the required inputs automatically given a program. Even when the intention is simply optimization, using BOPP rather than Deodorant directly is currently recommended. The rational of providing Deodorant as its own independent package is to separate out the parts of BOPP that are Anglican dependent and those that are not. As such, one may wish to integrate Deodorant into another similar package that provides all the required inputs. 25 | 26 | For details on the working of Deodorant, the previously referenced paper and its supplementary material should be consulted. 27 | 28 | ## Installation ## 29 | 30 | To use Deodorant in your own [Leiningen](http://leiningen.org/) projects, just include the dependency in your `project.clj`: 31 | ``` 32 | (defproject foo 33 | ... 34 | :dependencies [... 35 | [deodorant "0.1.3"] 36 | ...]) 37 | ``` 38 | 39 | In your Clojure files, remember to require functions from `core.clj`, e.g.: 40 | ``` 41 | (ns bar 42 | (require [deodorant.core :refer :all])) 43 | ``` 44 | The full documentation can be found [here](https://probprog.github.io/deodorant/). Checkout [core/deodorant](https://probprog.github.io/deodorant/deodorant.core.html#var-deodorant) in particular. 45 | 46 | Though Deodorant has no direct dependency on Anglican, it has the same requirements in terms 47 | of java, Leiningen etc and so we refer the reader to http://www.robots.ox.ac.uk/~fwood/anglican/usage/index.html 48 | and recommend that users follow section 2 in the user start up guide. The above link is also a good starting 49 | point for further links on Clojure etc. 50 | 51 | ## License ## 52 | 53 | Copyright © Tom Rainforth, Tuan Anh Le, Jan-Willem van de Meent, Michael Osborne and Frank Wood 54 | 55 | Deodorant is free software: you can redistribute it and/or modify 56 | it under the terms of the GNU General Public License as published by 57 | the Free Software Foundation, either version 3 of the License, or 58 | (at your option) any later version. 59 | 60 | Deodorant is distributed in the hope that it will be useful, 61 | but WITHOUT ANY WARRANTY; without even the implied warranty of 62 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 63 | GNU General Public License for more details. 64 | 65 | You should have received a copy of the [GNU General Public License](gpl-3.0.txt) along with Deodorant. If not, see [http://www.gnu.org/licenses/](http://www.gnu.org/licenses/). 66 | -------------------------------------------------------------------------------- /docs/css/default.css: -------------------------------------------------------------------------------- 1 | body { 2 | font-family: Helvetica, Arial, sans-serif; 3 | font-size: 15px; 4 | } 5 | 6 | pre, code { 7 | font-family: Monaco, DejaVu Sans Mono, Consolas, monospace; 8 | font-size: 9pt; 9 | margin: 15px 0; 10 | } 11 | 12 | h1 { 13 | font-weight: normal; 14 | font-size: 29px; 15 | margin: 10px 0 2px 0; 16 | padding: 0; 17 | } 18 | 19 | h2 { 20 | font-weight: normal; 21 | font-size: 25px; 22 | } 23 | 24 | h5.license { 25 | margin: 9px 0 22px 0; 26 | color: #555; 27 | font-weight: normal; 28 | font-size: 12px; 29 | font-style: italic; 30 | } 31 | 32 | .document h1, .namespace-index h1 { 33 | font-size: 32px; 34 | margin-top: 12px; 35 | } 36 | 37 | #header, #content, .sidebar { 38 | position: fixed; 39 | } 40 | 41 | #header { 42 | top: 0; 43 | left: 0; 44 | right: 0; 45 | height: 22px; 46 | color: #f5f5f5; 47 | padding: 5px 7px; 48 | } 49 | 50 | #content { 51 | top: 32px; 52 | right: 0; 53 | bottom: 0; 54 | overflow: auto; 55 | background: #fff; 56 | color: #333; 57 | padding: 0 18px; 58 | } 59 | 60 | .sidebar { 61 | position: fixed; 62 | top: 32px; 63 | bottom: 0; 64 | overflow: auto; 65 | } 66 | 67 | .sidebar.primary { 68 | background: #e2e2e2; 69 | border-right: solid 1px #cccccc; 70 | left: 0; 71 | width: 250px; 72 | } 73 | 74 | .sidebar.secondary { 75 | background: #f2f2f2; 76 | border-right: solid 1px #d7d7d7; 77 | left: 251px; 78 | width: 200px; 79 | } 80 | 81 | #content.namespace-index, #content.document { 82 | left: 251px; 83 | } 84 | 85 | #content.namespace-docs { 86 | left: 452px; 87 | } 88 | 89 | #content.document { 90 | padding-bottom: 10%; 91 | } 92 | 93 | #header { 94 | background: #3f3f3f; 95 | box-shadow: 0 0 8px rgba(0, 0, 0, 0.4); 96 | z-index: 100; 97 | } 98 | 99 | #header h1 { 100 | margin: 0; 101 | padding: 0; 102 | font-size: 18px; 103 | font-weight: lighter; 104 | text-shadow: -1px -1px 0px #333; 105 | } 106 | 107 | #header h1 .project-version { 108 | font-weight: normal; 109 | } 110 | 111 | .project-version { 112 | padding-left: 0.15em; 113 | } 114 | 115 | #header a, .sidebar a { 116 | display: block; 117 | text-decoration: none; 118 | } 119 | 120 | #header a { 121 | color: #f5f5f5; 122 | } 123 | 124 | .sidebar a { 125 | color: #333; 126 | } 127 | 128 | #header h2 { 129 | float: right; 130 | font-size: 9pt; 131 | font-weight: normal; 132 | margin: 4px 3px; 133 | padding: 0; 134 | color: #bbb; 135 | } 136 | 137 | #header h2 a { 138 | display: inline; 139 | } 140 | 141 | .sidebar h3 { 142 | margin: 0; 143 | padding: 10px 13px 0 13px; 144 | font-size: 19px; 145 | font-weight: lighter; 146 | } 147 | 148 | .sidebar h3 a { 149 | color: #444; 150 | } 151 | 152 | .sidebar h3.no-link { 153 | color: #636363; 154 | } 155 | 156 | .sidebar ul { 157 | padding: 7px 0 6px 0; 158 | margin: 0; 159 | } 160 | 161 | .sidebar ul.index-link { 162 | padding-bottom: 4px; 163 | } 164 | 165 | .sidebar li { 166 | display: block; 167 | vertical-align: middle; 168 | } 169 | 170 | .sidebar li a, .sidebar li .no-link { 171 | border-left: 3px solid transparent; 172 | padding: 0 10px; 173 | white-space: nowrap; 174 | } 175 | 176 | .sidebar li .no-link { 177 | display: block; 178 | color: #777; 179 | font-style: italic; 180 | } 181 | 182 | .sidebar li .inner { 183 | display: inline-block; 184 | padding-top: 7px; 185 | height: 24px; 186 | } 187 | 188 | .sidebar li a, .sidebar li .tree { 189 | height: 31px; 190 | } 191 | 192 | .depth-1 .inner { padding-left: 2px; } 193 | .depth-2 .inner { padding-left: 6px; } 194 | .depth-3 .inner { padding-left: 20px; } 195 | .depth-4 .inner { padding-left: 34px; } 196 | .depth-5 .inner { padding-left: 48px; } 197 | .depth-6 .inner { padding-left: 62px; } 198 | 199 | .sidebar li .tree { 200 | display: block; 201 | float: left; 202 | position: relative; 203 | top: -10px; 204 | margin: 0 4px 0 0; 205 | padding: 0; 206 | } 207 | 208 | .sidebar li.depth-1 .tree { 209 | display: none; 210 | } 211 | 212 | .sidebar li .tree .top, .sidebar li .tree .bottom { 213 | display: block; 214 | margin: 0; 215 | padding: 0; 216 | width: 7px; 217 | } 218 | 219 | .sidebar li .tree .top { 220 | border-left: 1px solid #aaa; 221 | border-bottom: 1px solid #aaa; 222 | height: 19px; 223 | } 224 | 225 | .sidebar li .tree .bottom { 226 | height: 22px; 227 | } 228 | 229 | .sidebar li.branch .tree .bottom { 230 | border-left: 1px solid #aaa; 231 | } 232 | 233 | .sidebar.primary li.current a { 234 | border-left: 3px solid #a33; 235 | color: #a33; 236 | } 237 | 238 | .sidebar.secondary li.current a { 239 | border-left: 3px solid #33a; 240 | color: #33a; 241 | } 242 | 243 | .namespace-index h2 { 244 | margin: 30px 0 0 0; 245 | } 246 | 247 | .namespace-index h3 { 248 | font-size: 16px; 249 | font-weight: bold; 250 | margin-bottom: 0; 251 | } 252 | 253 | .namespace-index .topics { 254 | padding-left: 30px; 255 | margin: 11px 0 0 0; 256 | } 257 | 258 | .namespace-index .topics li { 259 | padding: 5px 0; 260 | } 261 | 262 | .namespace-docs h3 { 263 | font-size: 18px; 264 | font-weight: bold; 265 | } 266 | 267 | .public h3 { 268 | margin: 0; 269 | float: left; 270 | } 271 | 272 | .usage { 273 | clear: both; 274 | } 275 | 276 | .public { 277 | margin: 0; 278 | border-top: 1px solid #e0e0e0; 279 | padding-top: 14px; 280 | padding-bottom: 6px; 281 | } 282 | 283 | .public:last-child { 284 | margin-bottom: 20%; 285 | } 286 | 287 | .members .public:last-child { 288 | margin-bottom: 0; 289 | } 290 | 291 | .members { 292 | margin: 15px 0; 293 | } 294 | 295 | .members h4 { 296 | color: #555; 297 | font-weight: normal; 298 | font-variant: small-caps; 299 | margin: 0 0 5px 0; 300 | } 301 | 302 | .members .inner { 303 | padding-top: 5px; 304 | padding-left: 12px; 305 | margin-top: 2px; 306 | margin-left: 7px; 307 | border-left: 1px solid #bbb; 308 | } 309 | 310 | #content .members .inner h3 { 311 | font-size: 12pt; 312 | } 313 | 314 | .members .public { 315 | border-top: none; 316 | margin-top: 0; 317 | padding-top: 6px; 318 | padding-bottom: 0; 319 | } 320 | 321 | .members .public:first-child { 322 | padding-top: 0; 323 | } 324 | 325 | h4.type, 326 | h4.dynamic, 327 | h4.added, 328 | h4.deprecated { 329 | float: left; 330 | margin: 3px 10px 15px 0; 331 | font-size: 15px; 332 | font-weight: bold; 333 | font-variant: small-caps; 334 | } 335 | 336 | .public h4.type, 337 | .public h4.dynamic, 338 | .public h4.added, 339 | .public h4.deprecated { 340 | font-size: 13px; 341 | font-weight: bold; 342 | margin: 3px 0 0 10px; 343 | } 344 | 345 | .members h4.type, 346 | .members h4.added, 347 | .members h4.deprecated { 348 | margin-top: 1px; 349 | } 350 | 351 | h4.type { 352 | color: #717171; 353 | } 354 | 355 | h4.dynamic { 356 | color: #9933aa; 357 | } 358 | 359 | h4.added { 360 | color: #508820; 361 | } 362 | 363 | h4.deprecated { 364 | color: #880000; 365 | } 366 | 367 | .namespace { 368 | margin-bottom: 30px; 369 | } 370 | 371 | .namespace:last-child { 372 | margin-bottom: 10%; 373 | } 374 | 375 | .index { 376 | padding: 0; 377 | font-size: 80%; 378 | margin: 15px 0; 379 | line-height: 16px; 380 | } 381 | 382 | .index * { 383 | display: inline; 384 | } 385 | 386 | .index p { 387 | padding-right: 3px; 388 | } 389 | 390 | .index li { 391 | padding-right: 5px; 392 | } 393 | 394 | .index ul { 395 | padding-left: 0; 396 | } 397 | 398 | .type-sig { 399 | clear: both; 400 | color: #088; 401 | } 402 | 403 | .type-sig pre { 404 | padding-top: 10px; 405 | margin: 0; 406 | } 407 | 408 | .usage code { 409 | display: block; 410 | color: #008; 411 | margin: 2px 0; 412 | } 413 | 414 | .usage code:first-child { 415 | padding-top: 10px; 416 | } 417 | 418 | p { 419 | margin: 15px 0; 420 | } 421 | 422 | .public p:first-child, .public pre.plaintext { 423 | margin-top: 12px; 424 | } 425 | 426 | .doc { 427 | margin: 0 0 26px 0; 428 | clear: both; 429 | } 430 | 431 | .public .doc { 432 | margin: 0; 433 | } 434 | 435 | .namespace-index .doc { 436 | margin-bottom: 20px; 437 | } 438 | 439 | .namespace-index .namespace .doc { 440 | margin-bottom: 10px; 441 | } 442 | 443 | .markdown p, .markdown li, .markdown dt, .markdown dd, .markdown td { 444 | line-height: 22px; 445 | } 446 | 447 | .markdown li { 448 | padding: 2px 0; 449 | } 450 | 451 | .markdown h2 { 452 | font-weight: normal; 453 | font-size: 25px; 454 | margin: 30px 0 10px 0; 455 | } 456 | 457 | .markdown h3 { 458 | font-weight: normal; 459 | font-size: 20px; 460 | margin: 30px 0 0 0; 461 | } 462 | 463 | .markdown h4 { 464 | font-size: 15px; 465 | margin: 22px 0 -4px 0; 466 | } 467 | 468 | .doc, .public, .namespace .index { 469 | max-width: 680px; 470 | overflow-x: visible; 471 | } 472 | 473 | .markdown pre > code { 474 | display: block; 475 | padding: 10px; 476 | } 477 | 478 | .markdown pre > code, .src-link a { 479 | border: 1px solid #e4e4e4; 480 | border-radius: 2px; 481 | } 482 | 483 | .markdown code:not(.hljs), .src-link a { 484 | background: #f6f6f6; 485 | } 486 | 487 | pre.deps { 488 | display: inline-block; 489 | margin: 0 10px; 490 | border: 1px solid #e4e4e4; 491 | border-radius: 2px; 492 | padding: 10px; 493 | background-color: #f6f6f6; 494 | } 495 | 496 | .markdown hr { 497 | border-style: solid; 498 | border-top: none; 499 | color: #ccc; 500 | } 501 | 502 | .doc ul, .doc ol { 503 | padding-left: 30px; 504 | } 505 | 506 | .doc table { 507 | border-collapse: collapse; 508 | margin: 0 10px; 509 | } 510 | 511 | .doc table td, .doc table th { 512 | border: 1px solid #dddddd; 513 | padding: 4px 6px; 514 | } 515 | 516 | .doc table th { 517 | background: #f2f2f2; 518 | } 519 | 520 | .doc dl { 521 | margin: 0 10px 20px 10px; 522 | } 523 | 524 | .doc dl dt { 525 | font-weight: bold; 526 | margin: 0; 527 | padding: 3px 0; 528 | border-bottom: 1px solid #ddd; 529 | } 530 | 531 | .doc dl dd { 532 | padding: 5px 0; 533 | margin: 0 0 5px 10px; 534 | } 535 | 536 | .doc abbr { 537 | border-bottom: 1px dotted #333; 538 | font-variant: none; 539 | cursor: help; 540 | } 541 | 542 | .src-link { 543 | margin-bottom: 15px; 544 | } 545 | 546 | .src-link a { 547 | font-size: 70%; 548 | padding: 1px 4px; 549 | text-decoration: none; 550 | color: #5555bb; 551 | } 552 | -------------------------------------------------------------------------------- /docs/css/highlight.css: -------------------------------------------------------------------------------- 1 | /* 2 | github.com style (c) Vasily Polovnyov 3 | */ 4 | 5 | .hljs { 6 | display: block; 7 | overflow-x: auto; 8 | padding: 0.5em; 9 | color: #333; 10 | background: #f8f8f8; 11 | } 12 | 13 | .hljs-comment, 14 | .hljs-quote { 15 | color: #998; 16 | font-style: italic; 17 | } 18 | 19 | .hljs-keyword, 20 | .hljs-selector-tag, 21 | .hljs-subst { 22 | color: #333; 23 | font-weight: bold; 24 | } 25 | 26 | .hljs-number, 27 | .hljs-literal, 28 | .hljs-variable, 29 | .hljs-template-variable, 30 | .hljs-tag .hljs-attr { 31 | color: #008080; 32 | } 33 | 34 | .hljs-string, 35 | .hljs-doctag { 36 | color: #d14; 37 | } 38 | 39 | .hljs-title, 40 | .hljs-section, 41 | .hljs-selector-id { 42 | color: #900; 43 | font-weight: bold; 44 | } 45 | 46 | .hljs-subst { 47 | font-weight: normal; 48 | } 49 | 50 | .hljs-type, 51 | .hljs-class .hljs-title { 52 | color: #458; 53 | font-weight: bold; 54 | } 55 | 56 | .hljs-tag, 57 | .hljs-name, 58 | .hljs-attribute { 59 | color: #000080; 60 | font-weight: normal; 61 | } 62 | 63 | .hljs-regexp, 64 | .hljs-link { 65 | color: #009926; 66 | } 67 | 68 | .hljs-symbol, 69 | .hljs-bullet { 70 | color: #990073; 71 | } 72 | 73 | .hljs-built_in, 74 | .hljs-builtin-name { 75 | color: #0086b3; 76 | } 77 | 78 | .hljs-meta { 79 | color: #999; 80 | font-weight: bold; 81 | } 82 | 83 | .hljs-deletion { 84 | background: #fdd; 85 | } 86 | 87 | .hljs-addition { 88 | background: #dfd; 89 | } 90 | 91 | .hljs-emphasis { 92 | font-style: italic; 93 | } 94 | 95 | .hljs-strong { 96 | font-weight: bold; 97 | } 98 | -------------------------------------------------------------------------------- /docs/deodorant.acq-functions.html: -------------------------------------------------------------------------------- 1 | 3 | deodorant.acq-functions documentation

deodorant.acq-functions

Acquisition functions for Deodorant.
 4 | 

expected-improvement

(expected-improvement xi mu-best gp-predicter x*)
Expected improvement acquisition function with some trickery
 5 |  to overcome underflow when very low.  Specifically, we
 6 |  revert to using log(UCB) with v=2 as the acquisition function
 7 |  if log(EI/exp(UCB))<-8.
 8 | 
 9 | Accepts:
10 |   xi - option for expected improvement (scalar)
11 |   mu-best - best posterior mean evaluated at old data points (scalar)
12 |   gp-predicter - function that returns [mu var]
13 |                  from the weighted gp ensemble posterior given
14 |                  a single point to evaluate x.  See
15 |                  create-weighted-gp-predict-with-derivatives-function*
16 |   x* - new point to evaluate (D length vector)
17 | 
18 | Returns:
19 |   EI - log value of expected improvement at x*

integrated-aq-func

(integrated-aq-func base-aq-func mu-bests gp-predicters gp-weights x*)
Calculates the integrated acquisition function from a base
20 |  acquisition function and a weighted sum of GP predictors.
21 |  This is as per Snoek et al, NIPS 2012.
22 | 
23 | Accepts:
24 |   base-aq-func - Function which takes as inputs a gp-predictor
25 |                  and a point to evaluate, returning a utility of
26 |                  evaluating that point.
27 |   mu-bests - The best of the expected best point previously
28 |              evaluated for each GP, mu_j^+ in the paper.
29 |   gp-predicters - A collection of gp-prediction functions.  Each
30 |                   takes a point to evaluate an returns [mu sig]
31 |   gp-weights - Vector of weights for each gp-predictor.  The
32 |                acquisition function values will be weighted by
33 |                these weights
34 |   x* - The point to evaluate (D length vector)
35 | Returns:
36 |   The integrated acquistion function at x*
-------------------------------------------------------------------------------- /docs/deodorant.broadcast-functions.html: -------------------------------------------------------------------------------- 1 | 3 | deodorant.broadcast-functions documentation

deodorant.broadcast-functions

broadcast-function-NxD-MxD

(broadcast-function-NxD-MxD op xs zs & b-Flip)
Performs an operation between a NxD array
 4 |  and a MxD array, returning a NxDxM array.
 5 |  Note if both inputs are NxD then the output
 6 |  will be NxDxN.
 7 | 
 8 |  Accepts: op  - matrix operation to perform (must be from core.matrix)
 9 |           xs  - NxD  array
10 |           zs  - NxM  array
11 |           bFlip - if true then NN is treated as
12 |                   first input in op
13 | Retuns: a NxDxM array

broadcast-function-NxDxN-NxN

(broadcast-function-NxDxN-NxN op NDN NN & b-Flip)
Performs an operation between a NxDxN array
14 |  and a NxN array.
15 | 
16 |  Accepts: op  - matrix operation to perform (must be from core.matrix)
17 |           NDN - NxDxN array
18 |           NN  - NxN   array
19 |           bFlip - if true then NN is treated as
20 |                   first input in op
21 | Retuns: a NxDxN array

safe-broadcast-op

(safe-broadcast-op op M1 M2 & b-Flip)
Calls broadcast-function-NxD-MxD when given arrays and
22 | just does standard broadcasting if one is a vector.
23 | See broadcast-function-NxD-MxD for details on call
24 | structure.

safe-sum

(safe-sum X dim)
Sums out a dimension of nd-array.  Takes an array
25 |  of matrix type and a dimension to sum out. Ensures
26 |  no crash if X is nil.
27 | 
28 | Accepts: X - matrix
29 |        dim - dimension to remove.
30 | 
31 | Returns: X with dim summed out

scale-square-diff

(scale-square-diff x-sq-diff rho)
Scales the output from square-diff. Care should be taken
32 |  to ensure inputs are of matrix type as will be slow
33 |  otherwise.
34 | 
35 |  Accepts: x-sq-diff - NxDxM array to scale
36 |           rho - vector of length scales
37 | 
38 | Returns: the scaled version of x-sq-diff

square-diff

(square-diff xs)
Calculates array of squared differences. Accepts an [N D] array of
39 | points xs. Returns an [N D N] array in which entry [i d j] is given
40 | by (xs[i,d] - xs[j,d])^2.
-------------------------------------------------------------------------------- /docs/deodorant.core.html: -------------------------------------------------------------------------------- 1 | 3 | deodorant.core documentation

deodorant.core

Core functions of Deodorant.
  4 | 

bo-acquire

(bo-acquire X Y acq-optimizer scaling-funcs & {:keys [cov-fn-form grad-cov-fn-hyper-form mean-fn-form gp-hyperprior-form hmc-step-size hmc-num-leapfrog-steps hmc-num-mcmc-steps hmc-num-opt-steps hmc-num-chains hmc-burn-in-proportion hmc-max-gps verbose debug-folder plot-aq b-deterministic], :or {}})
Performs the acquisition step in Bayesian optimization. Accepts a
  5 | sequence inputs and outputs and returns the next point x-next to
  6 | evaluate, the index of the point from those previously evaluated
  7 | that is considered to be the best, and the predicted mean and std-dev
  8 | of the evaluated points.  Note this std-dev is in the estimate of the
  9 | 'true' function value at this point and does not include the noise involved
 10 | in evaluating this function.
 11 | 
 12 | Accepts:
 13 |   X - matrix of input points, first dimension represents differnt points
 14 |     second dimension the different input dimensions
 15 |   Y - a vector of associated outputs
 16 |   acq-optimizer - a function that takes in the acquistion function as
 17 |     input and returns a point in the space of x that is the estimated
 18 |     optimum of the acquisition function, subject to the constraints of
 19 |     the model.
 20 |   scaling-funcs - scaling function object ouput from sf/setup-scaling-funcs
 21 | 
 22 | Options (each of these is provide as a key value pair, defaults are
 23 |              not provided as these are set by deodorant which calls this
 24 |              function which should be consulted for further info)
 25 |     cov-fn-form grad-cov-fn-hyper-form mean-fn-form
 26 |     gp-hyperprior-form hmc-step-size hmc-num-leapfrog-steps
 27 |     hmc-num-mcmc-steps hmc-num-opt-steps hmc-num-chains
 28 |     hmc-burn-in-proportion hmc-max-gps verbose debug-folder plot-aq]
 29 | 
 30 | Returns:
 31 |   x-next - point that should be evaluated next (optimum of the acquistion
 32 |            function)
 33 |   i-best - index of the point expected to be most optimal under the mixture
 34 |            of GPs posterior
 35 |   means - estimated mean value for each point in X.
 36 |   std-devs - estimated standard deviation for each point in X.

deodorant

(deodorant f aq-optimizer theta-sampler & {:keys [initial-points num-scaling-thetas num-initial-points cov-fn-form grad-cov-fn-hyper-form mean-fn-form gp-hyperprior-form b-deterministic hmc-step-size hmc-num-leapfrog-steps hmc-num-mcmc-steps hmc-num-opt-steps hmc-num-chains hmc-burn-in-proportion hmc-max-gps verbose debug-folder plot-aq], :or {b-deterministic false, debug-folder nil, hmc-step-size 0.01, hmc-num-chains 4, cov-fn-form cf/matern32-plus-matern52-K, hmc-num-leapfrog-steps 2, num-scaling-thetas 1000, verbose false, hmc-num-opt-steps 10, gp-hyperprior-form dp/default-double-matern-hyperprior, hmc-num-mcmc-steps 20, num-initial-points 5, mean-fn-form dp/default-mean-fn-form, hmc-burn-in-proportion 0.5, plot-aq false, hmc-max-gps 20, initial-points nil, grad-cov-fn-hyper-form cf/matern32-plus-matern52-grad-K}})
Deodorant: solving the problems of Bayesian optimization.
 37 | 
 38 | Deodorant is a Bayesian optimization (BO) package with three core features:
 39 |   1) Domain scaling to exploit problem independent GP hyperpriors
 40 |   2) A non-stationary mean function to allow unbounded optimization
 41 |   3) External provision of the acquisition function optimizer so that this
 42 |      can incorporate the constraints of the problem (inc equality constraints)
 43 |      and ensure that no invalid points are evaluated.
 44 | 
 45 | The main intended use of the package at present is as the BO component
 46 | for BOPP (Bayesian Optimiation for Probabilistic Programs. Rainforth T, Le TA,
 47 | van de Meent J-W, Osborne MA, Wood F. In NIPS 2016) which provides all the
 48 | required inputs automatically given a program.  Even when the intention is
 49 | simply optimization, using BOPP rather than Deodorant directly is currently
 50 | recommended.  The rational of providing Deodorant as its own independent
 51 | package is to seperate out the parts of BOPP that are Anglican dependent and
 52 | those that are not.  As such, one may wish to intergrate Deodorant into
 53 | another similar package that provides all the required inputs.
 54 | 
 55 | For details on the working of Deodorant, the previously referenced paper and
 56 | its supplementary material should be consulted.
 57 | 
 58 | Accepts:
 59 |   f - target function.  Takes in a single input x and returns a pair
 60 |       [f(x), other-outputs(x)].  Here other-outputs allows for additional x
 61 |       dependent variables to be returned.  For example, in BOPP then
 62 |       other-outputs(x) is a vector of program outputs from the calling the
 63 |       marginal query, with one component for each sample output from
 64 |       this marginal query.
 65 |   acq-optimizer - a function that takes in the acquistion function as
 66 |       input and returns a point in the space of x that is the estimated
 67 |       optimum of the acquisition function, subject to the constraints of
 68 |       the model.
 69 |   theta-sampler - charecterization of the input variables which can be
 70 |       sampled from to generate example inputs and initialize the scaling.
 71 |       Should be a function that takes no inputs and results valid examples
 72 |       of the input variables.  Note that the input variables are currently
 73 |       called x in the inner functions.
 74 | 
 75 | Optional Inputs: (defined with key value pairs, default values shown below
 76 |                   in brackets)
 77 |   Initialization options:
 78 |     :initial-points - Points to initialize BO in addition to those sampled
 79 |                      by theta-sampler
 80 |       [nil]
 81 |     :num-scaling-thetas - Number of points used to initialize scaling
 82 |       [50]
 83 |     :num-initial-points - Number of points to initialize BO
 84 |       [5]
 85 | 
 86 |   GP options:
 87 |     :cov-fn-form - covariance function with unset hyperparameters
 88 |       [cp/matern32-plus-matern52-K]
 89 |     :grad-cov-fn-hyper - grad of the above with respect to the hyperparameters
 90 |       [cp/matern32-plus-matern52-grad-K]
 91 |     :mean-fn-form - mean function with unset dimensionality
 92 |       [dp/default-mean-fn-form]
 93 |     :gp-hyperprior-form - constructor for the gp hyperparameter hyperprior
 94 |       [dp/default-double-matern-hyperprior]
 95 |     :b-deterministic - whether to include noise in the GP
 96 |       [false]
 97 | 
 98 |   HMC options:
 99 |     :hmc-step-size - HMC step size
100 |       [0.3]
101 |     :hmc-num-leapfrog-steps - Number of HMC leap-frog steps
102 |       [5]
103 |     :hmc-num-chains - Number of samplers run in parallel
104 |       [50]
105 |     :hmc-burn-in-proportion - Proportion of samples to throw away as burn in
106 |       [8]
107 |     :hmc-max-gps - Maximum number of unique GPs to keep at the end so that
108 |                    optimization of the acqusition function does not become
109 |                    too expensive.
110 |       [50]
111 |   Debug options:
112 |     :verbose - Allow debug print outs
113 |       [false]
114 |     :debug-folder - Path for the debug folder.  No output generated if path
115 |               not  provided.  These outputs include alphas (gp hyper paramters),
116 |               gp-weights (weights for each hyperparameter sample) etc
117 |       [empty]
118 |     :bo-plot-aq - Generate debugging csv of acquisition functions
119 |       [false]
120 | 
121 | Returns:
122 |   Lazy list of increasingly optimal triples
123 |   (theta, main output of f, other outputs of f).
-------------------------------------------------------------------------------- /docs/deodorant.covar-functions.html: -------------------------------------------------------------------------------- 1 | 3 | deodorant.covar-functions documentation

deodorant.covar-functions

Covariance functions for Deodorant.
  4 | 

matern-for-vector-input

(matern-for-vector-input dim K)
Converts a covariance function that takes pairs of log-sig-f
  5 |  and log-rho as inputs and converts them to one that accepts
  6 |  a vector with correctly ordered hyperparameters.
  7 | 
  8 | Accepts: dim           - Dimension of data
  9 |          K             - Relevant kernel function
 10 | 
 11 | Return: K-vec - the kernel function that now accepts x-diff-squared
 12 |                 followed by a vector

matern32-grad-K

(matern32-grad-K x-diff-squared log-sig-f log-rho)
Gradient for matern32.  Syntax as per matern32
 13 | except returns a DxNxN array giving derivatives
 14 | in the different directions.  The first entry
 15 | of the first dimension corresponds to the derivative
 16 | with respect to log-sig-f, with the others wrt
 17 | log-rho

matern32-grad-z

(matern32-grad-z xs-z-diff log-sig-f log-rho)
Jacobian of side kernel matrix w.r.t. new data point z for Matern 32.
 18 | If using a gradient based solver for the acquisition funciton, then
 19 | needed for calculating derivative of Expected Improvement, EI(z), as outlined
 20 | on page 3 of
 21 | http://homepages.mcs.vuw.ac.nz/~marcus/manuscripts/FreanBoyle-GPO-2008.pdf.
 22 | 
 23 | Accepts:
 24 | xs-z-diff   - NxD matrix whose (i, j)th entry is x_ij - z_j
 25 | log-sig-f   - scalar; parameter of kernel function
 26 | log-rho     - D-dimensional parameter of kernel function
 27 | 
 28 | Returns:
 29 | [NxD] Jacobian of side kernel matrix w.r.t. new data point where
 30 | (i, j)th entry is d(kernel(x_i, z)) / d(z_j).

matern32-K

(matern32-K x-diff-squared log-sig-f log-rho)
Covariance function for matern-32.
 31 | 
 32 |  Accepts: x-diff-squared - a NxDxN matrix of squared distances
 33 |                            or NxD matrix of squared distances of old points
 34 |                            and new point
 35 |           log-sig-f      - a scalar
 36 |           log-rho        - a vector
 37 | 
 38 | Returns: A matrix K

matern32-plus-matern52-grad-K

(matern32-plus-matern52-grad-K x-diff-squared log-sig-f-32 log-rho-32 log-sig-f-52 log-rho-52)
Gradient of compound covariance function for matern-32 and matern-52.
 39 | 
 40 | Accepts: x-diff-squared   - a NxDxN matrix of squared distances
 41 |          log-sig-f-32     - a scalar
 42 |          log-rho-32       - a vector
 43 |          log-sig-f-52     - a scalar
 44 |          log-rho-52       - a vector
 45 | 
 46 | Returns: An DxNxN array grad-K giving derivatives
 47 |          in the different directions.  The first entry
 48 |          of the first dimension corresponds to the derivative
 49 |          with respect to log-sig-f, with the others wrt
 50 |          log-rho

matern32-plus-matern52-grad-z

(matern32-plus-matern52-grad-z x-z-diff log-sig-f-32 log-rho-32 log-sig-f-52 log-rho-52)
Jacobian of side kernel matrix w.r.t. new data point z for Matern 32 + Matern 52.
 51 | If using a gradient based solver for the acquisition funciton, then
 52 | needed for calculating derivative of Expected Improvement, EI(z), as outlined
 53 | on page 3 of
 54 | http://homepages.mcs.vuw.ac.nz/~marcus/manuscripts/FreanBoyle-GPO-2008.pdf.
 55 | 
 56 | Accepts:
 57 | xs-z-diff   - NxD matrix whose (i, j)th entry is x_ij - z_j
 58 | log-sig-f   - scalar; parameter of kernel function
 59 | log-rho     - D-dimensional parameter of kernel function
 60 | 
 61 | Returns:
 62 | [NxD] Jacobian of side kernel matrix w.r.t. new data point where
 63 | (i, j)th entry is d(kernel(x_i, z)) / d(z_j).

matern32-plus-matern52-K

(matern32-plus-matern52-K x-diff-squared log-sig-f-32 log-rho-32 log-sig-f-52 log-rho-52)
Compound covariance function for matern-32 and matern-52.
 64 | 
 65 | Accepts: x-diff-squared   - a NxDxN matrix of squared distances
 66 |          log-sig-f-32     - a scalar
 67 |          log-rho-32       - a vector
 68 |          log-sig-f-52     - a scalar
 69 |          log-rho-52       - a vector
 70 | 
 71 | Returns: A matrix K

matern32-xs-z

(matern32-xs-z xs z log-sig-f log-rho)
Side covariance matrix for matern-32, i.e. vector k where
 72 | k_i = kernel(x_i, z).
 73 | 
 74 | Accepts:
 75 | xs         - a NxD vector of vectors of xs
 76 | z          - [Dx1] vector of new data point
 77 | log-sig-f  - a scalar
 78 | log-rho    - a vector
 79 | 
 80 | Returns: A vector k sized N.

matern52-grad-K

(matern52-grad-K x-diff-squared log-sig-f log-rho)
Gradient for matern52.  Syntax as per matern52
 81 | except returns a DxNxN array giving derivatives
 82 | in the different directions.  The first entry
 83 | of the first dimension corresponds to the derivative
 84 | with respect to log-sig-f, with the others wrt
 85 | log-rho

matern52-grad-z

(matern52-grad-z xs-z-diff log-sig-f log-rho)
Jacobian of side kernel matrix w.r.t. new data point z for Matern 52.
 86 | If using a gradient based solver for the acquisition funciton, then
 87 | needed for calculating derivative of Expected Improvement, EI(z), as outlined
 88 | on page 3 of
 89 | http://homepages.mcs.vuw.ac.nz/~marcus/manuscripts/FreanBoyle-GPO-2008.pdf.
 90 | 
 91 | Accepts:
 92 | xs-z-diff   - NxD matrix whose (i, j)th entry is x_ij - z_j
 93 | log-sig-f   - scalar; parameter of kernel function
 94 | log-rho     - D-dimensional parameter of kernel function
 95 | 
 96 | Returns:
 97 | [NxD] Jacobian of side kernel matrix w.r.t. new data point where
 98 | (i, j)th entry is d(kernel(x_i, z)) / d(z_j).

matern52-K

(matern52-K x-diff-squared log-sig-f log-rho)
Covariance function for matern-52.
 99 | 
100 |  Accepts: x-diff-squared - a NxDxN matrix of squared distances
101 |                            or NxD matrix of squared distances of old points
102 |                            and new point
103 |           log-sig-f      - a scalar
104 |           log-rho        - a vector
105 | 
106 | Returns: A matrix K

matern52-xs-z

(matern52-xs-z xs z log-sig-f log-rho)
Side covariance matrix for matern-52, i.e. vector k where
107 | k_i = kernel(x_i, z).
108 | 
109 | Accepts:
110 | xs         - a NxD vector of vectors of xs
111 | z          - [Dx1] vector of new data point
112 | log-sig-f  - a scalar
113 | log-rho    - a vector
114 | 
115 | Returns: A vector k sized N.
-------------------------------------------------------------------------------- /docs/deodorant.default-params.html: -------------------------------------------------------------------------------- 1 | 3 | deodorant.default-params documentation

deodorant.default-params

Helper functions for Deodorant.
 4 | 

default-double-matern-hyperprior

(default-double-matern-hyperprior dim b-deterministic)
Sets up a default hyperprior based on the composition
 5 | of a matern-32 and a matern-52 kernel.  Accepts the
 6 | dimensionality of the input space dim and returns a hash
 7 | map with fields :sampler, :log-p and :grad-log-p.  Each
 8 | of these operate on
 9 | [log-sig-n [log-sig-f-32 log-rho-32-dim1 log-rho-32-dim2 ....]
10 |            [log-sig-f-52 log-rho-52-dim1 log-rho-52-dim2 ....]].
11 | :sampler returns a set of samples of this form.
12 | :log-p returns a scalar given a set of parameters
13 | :grad-log-p returns a nested vector of the same size as sampler
14 |             does corresponding to the gradient of that hyperparameter

default-hmc-initializer

(default-hmc-initializer n-chains hyperprior)

default-mean-fn-form

(default-mean-fn-form dim)
-------------------------------------------------------------------------------- /docs/deodorant.gp-toolbox.html: -------------------------------------------------------------------------------- 1 | 3 | deodorant.gp-toolbox documentation

deodorant.gp-toolbox

GP functions for training, testing, calculating the marginal likelihood
 4 | and its derivatives.

->trained-gp-obj

(->trained-gp-obj prior-mean-fn x-obs L psi inv-K inv-L sigma-n log-likelihood x*-diff-fn k*-fn k-new-fn marginal-prior-var grad-prior-mean-fn-x grad-k*-fn)
Positional factory function for class deodorant.gp_toolbox.trained-gp-obj.
 5 | 

convert-output-to-std-dev

(convert-output-to-std-dev mu var & args)
Takes the output of a gp prediction function and converts the variance
 6 | terms to standard deviation terms for both original derivative

create-trained-gp-obj

(create-trained-gp-obj prior-mean-fn cov-fn x-obs y-obs alpha & [grad-prior-mean-fn-x grad-prior-cov-fn-x])
Created a trained-gp-obj record that is used for efficient
 7 | prediction of gp at future points.
 8 | 
 9 | Accepts:     prior-mean-func
10 |              cov-fn                  Same form as sent to gp train
11 |              points                  Vector of pairs of [x y] observations
12 |              alpha                   Vector of hyperparameters in same form as sent to gp train
13 | 
14 | Optional Inputs:
15 |              grad-prior-mean-fn-x    Gradient of prior mean function.  Needed for some derivative
16 |                                      calculations but not for basic use.
17 |              grad-prior-cov-fn-x     Gradient of prior covariance function.  Needed for some derivative
18 |                                      calculations but not for basic use.
19 | 
20 | Returns:     trained-gp-obj
21 | 
22 | Note that current usage of BOPP does not set these optional inputs.  They would be needed for anything
23 | that requires taking gradients with respect to the GP inputs, for example solving the acquisition
24 | function using gradient methods and left in for potential future convenience / other toolbox use.

gp-grad-log-likelihood

(gp-grad-log-likelihood grad-cov-fn x-diff-sq alpha L psi)
Calculates the gradient of the gp-log-likelihood with
25 | respect to the hyperparameters.
26 | 
27 | Accepts:  grad-cov-fn  - Function to return grad of covariance func wrt
28 |                          the hyperparameters returned as DxNxN matrix
29 |           x-diff-sq    - see gp-train
30 |           L            - see gp-train
31 |           psi          - see gp-train

gp-log-likelihood

(gp-log-likelihood L psi y-bar)
Calculates the gp-log-likelihood given L, psi and y-bar
32 | 

gp-mixture-mu-sig

(gp-mixture-mu-sig gp-predictors gp-weights xs)
Calculates the mean and standard deviation from a weighted
33 | sum of gps, i.e. a gp mixture model.  Note that the resulting
34 | distribution is not a Gaussian, (the marginals are mixtures of
35 | Gaussians) but the mean and covariance is still analytic.
36 | 
37 | Accepts:
38 |  gp-predictors - A collection of gp prediction functions
39 |  gp-weights - The relative weights of the gps
40 |  xs - Positions to calculate the estimates at
41 | 
42 | Returns:
43 |  mus - The means of the points
44 |  sigs - The standard deviations of the points

gp-predict-mu-cov

(gp-predict-mu-cov gp x*)
Makes gp predictions for mu and
45 | covariance for multiple points simultaneously.
46 | 
47 | Accepts   gp        - of type trained-gp-obj
48 |           x*        - new points to evaluate   (MxD matrix)
49 |           & args    - if (first args) is true then the full covariance matrix
50 |                       is returned instead of just the marginal variance
51 | 
52 | Returns   mu        - predicted means (M length vector)
53 |           cov       - (MxM matrix) corresponding to the covariance between the prediction points

gp-predict-mu-sig

(gp-predict-mu-sig gp x*)
Makes gp predictions for mu and marginal standard deviation
54 | for multiple points simultaneously.
55 | 
56 | Accepts   gp        - of type trained-gp-obj
57 |           x*        - new points to evaluate   (MxD matrix)
58 | 
59 | Returns   mu        - predicted means (M length vector)
60 |           sig       - marginal predicted standard deviations

gp-predict-with-derivatives

(gp-predict-with-derivatives gp x*)
Makes gp predictions for mu, var, grad-mu and grad-var
61 | given a gp-object and a single query point
62 | 
63 | Accepts    gp        - of type trained-gp-obj
64 |            x*        - new points to evaluate   (D length vector)
65 | 
66 | Returns    mu        - predicted means                        (vector of length 1)
67 |            var       - marginal predicted vartion             (vector of length 1)
68 |            grad-mu   - derivative of the mean with respect to the dimensions of predicted points. (D length vector)
69 |            grad-var  - derivative of the variance with respect to the dimensions of predicted points. (D length vector)

gp-sample

(gp-sample gp x* n-samples)
Generates samples from a trained-gp-obj
70 | 
71 | Accepts   gp          - of type trained-gp-obj
72 |           x*          - points to evaluate   (MxD matrix)
73 |           n-samples   - number of samples to generate
74 | 
75 | Returns   f*          - sampled values for gp output (n-samples x M matrix).  Note that the y ~ N(f*,sigma-n^2)
76 |           dist-f*     - mvn distribution object that allows for further efficient sampling if required

gp-train

(gp-train cov-fn x-diff-sq y-bar alpha)
Trains a gp, returns L and psi - the lower triangular matrix
77 | and vector of differences required for prediction.
78 | 
79 | Accepts: cov-fn - a function taking inputs of x-diff-sq and
80 |                   a vector of hyperparameters (not including noise
81 |                   parameter and therefore corresponding to (rest alpha))
82 |          x-diff-sq - a NxDxN matrix of squared distances of points
83 |          y-bar  - observations minus the value of the prior mean function
84 |                   at those points
85 |          alpha  - a vector of hyperparameters ordered as
86 |                  [log-sig-n log-sig-f-k1 log-rho-k1-dim-1 log-rho-k1-dim-2 ... log-sig-f-k2 ...]
87 | 
88 | Returns:  L     - lower triangular matrix used for gp prediction
89 |           psi   - inv-K times y-bar

map->trained-gp-obj

(map->trained-gp-obj m__6522__auto__)
Factory function for class deodorant.gp_toolbox.trained-gp-obj, taking a map of keywords to field values.
90 | 

subtract-mean-fn

(subtract-mean-fn mean-fn x y)
Subtracts the GP mean. Accepts a mean-fn from arguments [x] to a
91 | scalar y, along with a collection of points [x y]. Returns a vector
92 | of values (- y (mean-fn x)).
-------------------------------------------------------------------------------- /docs/deodorant.helper-functions.html: -------------------------------------------------------------------------------- 1 | 3 | deodorant.helper-functions documentation

deodorant.helper-functions

Helper functions for Deodorant.
 4 | 

->mvn-distribution

(->mvn-distribution mean cov k Lcov unit-normal Z iLcov transform-sample)
Positional factory function for class deodorant.helper_functions.mvn-distribution.
 5 | 

->normal-distribution

(->normal-distribution mean sd dist15675)
Positional factory function for class deodorant.helper_functions.normal-distribution.
 6 | 

argmax

(argmax coll)
Index of maximum of a collection
 7 | 

cartesian

(cartesian colls)

defdist

macro

(defdist name & args)
defines distribution
 8 | 

distribution

protocol

random distribution
 9 | 

members

observe*

(observe* this value)
return the probability [density] of the value
10 | 

sample*

(sample* this)
draws a sample from the distribution
11 | 

erf

(erf x)
error function
12 | 

indexed-max

(indexed-max f coll)
Returns an indexed maximum. Accepts a function f and a collection
13 | coll. Returns a pair [y-max i-max] in which y-max is the largest
14 | value (f x-max) and i-max is the index such that (nth coll i-max)
15 | returns x-max.

map->mvn-distribution

(map->mvn-distribution m__6522__auto__)
Factory function for class deodorant.helper_functions.mvn-distribution, taking a map of keywords to field values.
16 | 

map->normal-distribution

(map->normal-distribution m__6522__auto__)
Factory function for class deodorant.helper_functions.normal-distribution, taking a map of keywords to field values.
17 | 

mean

(mean a dimension)(mean a)
mean of array slices along specified dimension
18 | 

multivariate-distribution

protocol

additional methods for multivariate distributions
19 | 

members

transform-sample

(transform-sample this samples)
accepts a vector of random values and generates
20 | a sample from the multivariate distribution

mvn

(mvn mean cov)
multivariate normal
21 | 

normal

(normal mean sd)
normal distribution (imported from apache)
22 | 

RNG

random number generator;
23 | used by Apache Commons Math distribution objects

sum

(sum a dimension)(sum a)
sums array slices along specified dimension
24 | 
-------------------------------------------------------------------------------- /docs/deodorant.hmc.html: -------------------------------------------------------------------------------- 1 | 3 | deodorant.hmc documentation

deodorant.hmc

Basic HMC sampler implementation.
 4 | 

burn-in-and-thin

(burn-in-and-thin burn-in-proportion thin-rate samples)
Takes the output of a markov chain, removes a number of burn-in samples
 5 | and thins
 6 | 
 7 | Accpets: burn-in-proportion
 8 | thin-rate
 9 | samples
10 | 
11 | Retruns: samples(n-start:thin-rate:end)
12 | where n-start = (int (* (count samples) burn-in-proportion))

collapse-identical-samples

(collapse-identical-samples samples)
Takes an unweighted collection of samples and returns the unique values
13 | allong with a vector of the number of times they occured.  Ordering
14 | correspondings to the times of first apperance

hmc-chain

(hmc-chain u grad-u eps num-steps q-start)
Performs Hamiltonian Monte Carlo to construct a Markov Chain
15 | 
16 | Accepts functions u and grad-u with arguments [q], a parameter eps
17 | that specifies the integration step size, and a parameter num-steps
18 | that specifies the number of integration steps.
19 | 
20 | Returns a lazy sequence of samples q.

hmc-integrate

(hmc-integrate grad-u eps num-steps q p)
Preforms leap-frog integration of trajectory.
21 | 

hmc-transition

(hmc-transition u grad-u eps num-steps q-start)
Performs one Hamiltonian Monte Carlo transition update.
22 | 
23 | Accepts functions u and grad-u with arguments [q], a parameter eps
24 | that specifies the integration step size, and a parameter num-steps
25 | that specifies the number of integration steps.
26 | 
27 | Returns a new sample q.

scale-vector

(scale-vector v factor)
Scale a vector by a scalar
28 | 
-------------------------------------------------------------------------------- /docs/deodorant.hyper-priors.html: -------------------------------------------------------------------------------- 1 | 3 | deodorant.hyper-priors documentation

deodorant.hyper-priors

Acquisition functions for Deodorant.
4 | 

compose-hyperpriors

(compose-hyperpriors dim log-noise-mean log-noise-std & args)
Composes a number of hyperpriors
5 | to a form a single hyperprior.  Should still be used even
6 | if only composing a single hyperprior as adds in the
7 | derivative of sig-n and applies flatten / unflatten

constant-length-distance-hyperprior

(constant-length-distance-hyperprior dim log-sig-f-mean log-sig-f-std log-rho-mean log-rho-std)
Calls log-normal-sig-f-and-rho-hyperprior when provided with
8 | dim as first argument and uses the same value for the rho
9 | details in every dimension
-------------------------------------------------------------------------------- /docs/deodorant.scaling-functions.html: -------------------------------------------------------------------------------- 1 | 3 | deodorant.scaling-functions documentation

deodorant.scaling-functions

Scaling functions for Deodorant.
 4 | 

->scale-details-obj

(->scale-details-obj theta-min theta-max log-Z-min log-Z-max)
Positional factory function for class deodorant.scaling_functions.scale-details-obj.
 5 | 

->scaling-funcs-obj

(->scaling-funcs-obj theta-scaler theta-unscaler theta-unscaler-no-centering log-Z-scaler log-Z-unscaler log-Z-unscaler-no-centering)
Positional factory function for class deodorant.scaling_functions.scaling-funcs-obj.
 6 | 

flatten-unflatten

(flatten-unflatten x)
Returns functions for flattening and unflattening the thetas.  For example
 7 |  when sampling from a multivariate normal theta will be a nested vector
 8 | TODO make me work for matrices

map->scale-details-obj

(map->scale-details-obj m__6522__auto__)
Factory function for class deodorant.scaling_functions.scale-details-obj, taking a map of keywords to field values.
 9 | 

map->scaling-funcs-obj

(map->scaling-funcs-obj m__6522__auto__)
Factory function for class deodorant.scaling_functions.scaling-funcs-obj, taking a map of keywords to field values.
10 | 

scale

(scale data)
Normalizes data to lie inside a hypercube bounded at [-1 1] along.
11 | 
12 | Accepts a collection of data points [x] in which x may be a scalar or vector.
13 | 
14 | Returns a tuple [scaled-data unscale] containing the scaled data and
15 | a function that inverts the transformation.

scale-points

(scale-points points)
Rescales points to a hypercube bounded at [-1 1].
16 | 
17 | Accepts a collection of points [x y] in which x is a D-dimensionl
18 | vector and y is a scalar.
19 | 
20 | Returns a tuple [x-scaled y-scaled u5nscale-x unscale-y] containing
21 | the scaled data and functions to revert the scaling.

setup-scaling-funcs

(setup-scaling-funcs scale-details)
Given a scale-details-obj returns a scaling-funcs-obj
22 | 

unflatten-from-sizes

(unflatten-from-sizes sizes x)

update-scale-details

(update-scale-details scale-details scaling-funcs theta-new log-Z-new)
-------------------------------------------------------------------------------- /docs/index.html: -------------------------------------------------------------------------------- 1 | 3 | Deodorant 0.1.0

Deodorant 0.1.0

Released under the GNU General Public License Version 3

Deodorant: Solving the problems of Bayesian Optimization.

Installation

To install, add the following dependency to your project or build file:

[deodorant "0.1.0"]

Namespaces

deodorant.acq-functions

Acquisition functions for Deodorant.

Public variables and functions:

deodorant.core

Core functions of Deodorant.

Public variables and functions:

deodorant.default-params

Helper functions for Deodorant.

deodorant.hmc

Basic HMC sampler implementation.

deodorant.hyper-priors

Acquisition functions for Deodorant.
-------------------------------------------------------------------------------- /docs/js/highlight.min.js: -------------------------------------------------------------------------------- 1 | /*! highlight.js v9.6.0 | BSD3 License | git.io/hljslicense */ 2 | !function(e){var n="object"==typeof window&&window||"object"==typeof self&&self;"undefined"!=typeof exports?e(exports):n&&(n.hljs=e({}),"function"==typeof define&&define.amd&&define([],function(){return n.hljs}))}(function(e){function n(e){return e.replace(/[&<>]/gm,function(e){return I[e]})}function t(e){return e.nodeName.toLowerCase()}function r(e,n){var t=e&&e.exec(n);return t&&0===t.index}function a(e){return k.test(e)}function i(e){var n,t,r,i,o=e.className+" ";if(o+=e.parentNode?e.parentNode.className:"",t=B.exec(o))return R(t[1])?t[1]:"no-highlight";for(o=o.split(/\s+/),n=0,r=o.length;r>n;n++)if(i=o[n],a(i)||R(i))return i}function o(e,n){var t,r={};for(t in e)r[t]=e[t];if(n)for(t in n)r[t]=n[t];return r}function u(e){var n=[];return function r(e,a){for(var i=e.firstChild;i;i=i.nextSibling)3===i.nodeType?a+=i.nodeValue.length:1===i.nodeType&&(n.push({event:"start",offset:a,node:i}),a=r(i,a),t(i).match(/br|hr|img|input/)||n.push({event:"stop",offset:a,node:i}));return a}(e,0),n}function c(e,r,a){function i(){return e.length&&r.length?e[0].offset!==r[0].offset?e[0].offset"}function u(e){l+=""}function c(e){("start"===e.event?o:u)(e.node)}for(var s=0,l="",f=[];e.length||r.length;){var g=i();if(l+=n(a.substr(s,g[0].offset-s)),s=g[0].offset,g===e){f.reverse().forEach(u);do c(g.splice(0,1)[0]),g=i();while(g===e&&g.length&&g[0].offset===s);f.reverse().forEach(o)}else"start"===g[0].event?f.push(g[0].node):f.pop(),c(g.splice(0,1)[0])}return l+n(a.substr(s))}function s(e){function n(e){return e&&e.source||e}function t(t,r){return new RegExp(n(t),"m"+(e.cI?"i":"")+(r?"g":""))}function r(a,i){if(!a.compiled){if(a.compiled=!0,a.k=a.k||a.bK,a.k){var u={},c=function(n,t){e.cI&&(t=t.toLowerCase()),t.split(" ").forEach(function(e){var t=e.split("|");u[t[0]]=[n,t[1]?Number(t[1]):1]})};"string"==typeof a.k?c("keyword",a.k):E(a.k).forEach(function(e){c(e,a.k[e])}),a.k=u}a.lR=t(a.l||/\w+/,!0),i&&(a.bK&&(a.b="\\b("+a.bK.split(" ").join("|")+")\\b"),a.b||(a.b=/\B|\b/),a.bR=t(a.b),a.e||a.eW||(a.e=/\B|\b/),a.e&&(a.eR=t(a.e)),a.tE=n(a.e)||"",a.eW&&i.tE&&(a.tE+=(a.e?"|":"")+i.tE)),a.i&&(a.iR=t(a.i)),null==a.r&&(a.r=1),a.c||(a.c=[]);var s=[];a.c.forEach(function(e){e.v?e.v.forEach(function(n){s.push(o(e,n))}):s.push("self"===e?a:e)}),a.c=s,a.c.forEach(function(e){r(e,a)}),a.starts&&r(a.starts,i);var l=a.c.map(function(e){return e.bK?"\\.?("+e.b+")\\.?":e.b}).concat([a.tE,a.i]).map(n).filter(Boolean);a.t=l.length?t(l.join("|"),!0):{exec:function(){return null}}}}r(e)}function l(e,t,a,i){function o(e,n){var t,a;for(t=0,a=n.c.length;a>t;t++)if(r(n.c[t].bR,e))return n.c[t]}function u(e,n){if(r(e.eR,n)){for(;e.endsParent&&e.parent;)e=e.parent;return e}return e.eW?u(e.parent,n):void 0}function c(e,n){return!a&&r(n.iR,e)}function g(e,n){var t=N.cI?n[0].toLowerCase():n[0];return e.k.hasOwnProperty(t)&&e.k[t]}function h(e,n,t,r){var a=r?"":y.classPrefix,i='',i+n+o}function p(){var e,t,r,a;if(!E.k)return n(B);for(a="",t=0,E.lR.lastIndex=0,r=E.lR.exec(B);r;)a+=n(B.substr(t,r.index-t)),e=g(E,r),e?(M+=e[1],a+=h(e[0],n(r[0]))):a+=n(r[0]),t=E.lR.lastIndex,r=E.lR.exec(B);return a+n(B.substr(t))}function d(){var e="string"==typeof E.sL;if(e&&!x[E.sL])return n(B);var t=e?l(E.sL,B,!0,L[E.sL]):f(B,E.sL.length?E.sL:void 0);return E.r>0&&(M+=t.r),e&&(L[E.sL]=t.top),h(t.language,t.value,!1,!0)}function b(){k+=null!=E.sL?d():p(),B=""}function v(e){k+=e.cN?h(e.cN,"",!0):"",E=Object.create(e,{parent:{value:E}})}function m(e,n){if(B+=e,null==n)return b(),0;var t=o(n,E);if(t)return t.skip?B+=n:(t.eB&&(B+=n),b(),t.rB||t.eB||(B=n)),v(t,n),t.rB?0:n.length;var r=u(E,n);if(r){var a=E;a.skip?B+=n:(a.rE||a.eE||(B+=n),b(),a.eE&&(B=n));do E.cN&&(k+=C),E.skip||(M+=E.r),E=E.parent;while(E!==r.parent);return r.starts&&v(r.starts,""),a.rE?0:n.length}if(c(n,E))throw new Error('Illegal lexeme "'+n+'" for mode "'+(E.cN||"")+'"');return B+=n,n.length||1}var N=R(e);if(!N)throw new Error('Unknown language: "'+e+'"');s(N);var w,E=i||N,L={},k="";for(w=E;w!==N;w=w.parent)w.cN&&(k=h(w.cN,"",!0)+k);var B="",M=0;try{for(var I,j,O=0;;){if(E.t.lastIndex=O,I=E.t.exec(t),!I)break;j=m(t.substr(O,I.index-O),I[0]),O=I.index+j}for(m(t.substr(O)),w=E;w.parent;w=w.parent)w.cN&&(k+=C);return{r:M,value:k,language:e,top:E}}catch(T){if(T.message&&-1!==T.message.indexOf("Illegal"))return{r:0,value:n(t)};throw T}}function f(e,t){t=t||y.languages||E(x);var r={r:0,value:n(e)},a=r;return t.filter(R).forEach(function(n){var t=l(n,e,!1);t.language=n,t.r>a.r&&(a=t),t.r>r.r&&(a=r,r=t)}),a.language&&(r.second_best=a),r}function g(e){return y.tabReplace||y.useBR?e.replace(M,function(e,n){return y.useBR&&"\n"===e?"
":y.tabReplace?n.replace(/\t/g,y.tabReplace):void 0}):e}function h(e,n,t){var r=n?L[n]:t,a=[e.trim()];return e.match(/\bhljs\b/)||a.push("hljs"),-1===e.indexOf(r)&&a.push(r),a.join(" ").trim()}function p(e){var n,t,r,o,s,p=i(e);a(p)||(y.useBR?(n=document.createElementNS("http://www.w3.org/1999/xhtml","div"),n.innerHTML=e.innerHTML.replace(/\n/g,"").replace(//g,"\n")):n=e,s=n.textContent,r=p?l(p,s,!0):f(s),t=u(n),t.length&&(o=document.createElementNS("http://www.w3.org/1999/xhtml","div"),o.innerHTML=r.value,r.value=c(t,u(o),s)),r.value=g(r.value),e.innerHTML=r.value,e.className=h(e.className,p,r.language),e.result={language:r.language,re:r.r},r.second_best&&(e.second_best={language:r.second_best.language,re:r.second_best.r}))}function d(e){y=o(y,e)}function b(){if(!b.called){b.called=!0;var e=document.querySelectorAll("pre code");w.forEach.call(e,p)}}function v(){addEventListener("DOMContentLoaded",b,!1),addEventListener("load",b,!1)}function m(n,t){var r=x[n]=t(e);r.aliases&&r.aliases.forEach(function(e){L[e]=n})}function N(){return E(x)}function R(e){return e=(e||"").toLowerCase(),x[e]||x[L[e]]}var w=[],E=Object.keys,x={},L={},k=/^(no-?highlight|plain|text)$/i,B=/\blang(?:uage)?-([\w-]+)\b/i,M=/((^(<[^>]+>|\t|)+|(?:\n)))/gm,C="
",y={classPrefix:"hljs-",tabReplace:null,useBR:!1,languages:void 0},I={"&":"&","<":"<",">":">"};return e.highlight=l,e.highlightAuto=f,e.fixMarkup=g,e.highlightBlock=p,e.configure=d,e.initHighlighting=b,e.initHighlightingOnLoad=v,e.registerLanguage=m,e.listLanguages=N,e.getLanguage=R,e.inherit=o,e.IR="[a-zA-Z]\\w*",e.UIR="[a-zA-Z_]\\w*",e.NR="\\b\\d+(\\.\\d+)?",e.CNR="(-?)(\\b0[xX][a-fA-F0-9]+|(\\b\\d+(\\.\\d*)?|\\.\\d+)([eE][-+]?\\d+)?)",e.BNR="\\b(0b[01]+)",e.RSR="!|!=|!==|%|%=|&|&&|&=|\\*|\\*=|\\+|\\+=|,|-|-=|/=|/|:|;|<<|<<=|<=|<|===|==|=|>>>=|>>=|>=|>>>|>>|>|\\?|\\[|\\{|\\(|\\^|\\^=|\\||\\|=|\\|\\||~",e.BE={b:"\\\\[\\s\\S]",r:0},e.ASM={cN:"string",b:"'",e:"'",i:"\\n",c:[e.BE]},e.QSM={cN:"string",b:'"',e:'"',i:"\\n",c:[e.BE]},e.PWM={b:/\b(a|an|the|are|I'm|isn't|don't|doesn't|won't|but|just|should|pretty|simply|enough|gonna|going|wtf|so|such|will|you|your|like)\b/},e.C=function(n,t,r){var a=e.inherit({cN:"comment",b:n,e:t,c:[]},r||{});return a.c.push(e.PWM),a.c.push({cN:"doctag",b:"(?:TODO|FIXME|NOTE|BUG|XXX):",r:0}),a},e.CLCM=e.C("//","$"),e.CBCM=e.C("/\\*","\\*/"),e.HCM=e.C("#","$"),e.NM={cN:"number",b:e.NR,r:0},e.CNM={cN:"number",b:e.CNR,r:0},e.BNM={cN:"number",b:e.BNR,r:0},e.CSSNM={cN:"number",b:e.NR+"(%|em|ex|ch|rem|vw|vh|vmin|vmax|cm|mm|in|pt|pc|px|deg|grad|rad|turn|s|ms|Hz|kHz|dpi|dpcm|dppx)?",r:0},e.RM={cN:"regexp",b:/\//,e:/\/[gimuy]*/,i:/\n/,c:[e.BE,{b:/\[/,e:/\]/,r:0,c:[e.BE]}]},e.TM={cN:"title",b:e.IR,r:0},e.UTM={cN:"title",b:e.UIR,r:0},e.METHOD_GUARD={b:"\\.\\s*"+e.UIR,r:0},e});hljs.registerLanguage("clojure",function(e){var t={"builtin-name":"def defonce cond apply if-not if-let if not not= = < > <= >= == + / * - rem quot neg? pos? delay? symbol? keyword? true? false? integer? empty? coll? list? set? ifn? fn? associative? sequential? sorted? counted? reversible? number? decimal? class? distinct? isa? float? rational? reduced? ratio? odd? even? char? seq? vector? string? map? nil? contains? zero? instance? not-every? not-any? libspec? -> ->> .. . inc compare do dotimes mapcat take remove take-while drop letfn drop-last take-last drop-while while intern condp case reduced cycle split-at split-with repeat replicate iterate range merge zipmap declare line-seq sort comparator sort-by dorun doall nthnext nthrest partition eval doseq await await-for let agent atom send send-off release-pending-sends add-watch mapv filterv remove-watch agent-error restart-agent set-error-handler error-handler set-error-mode! error-mode shutdown-agents quote var fn loop recur throw try monitor-enter monitor-exit defmacro defn defn- macroexpand macroexpand-1 for dosync and or when when-not when-let comp juxt partial sequence memoize constantly complement identity assert peek pop doto proxy defstruct first rest cons defprotocol cast coll deftype defrecord last butlast sigs reify second ffirst fnext nfirst nnext defmulti defmethod meta with-meta ns in-ns create-ns import refer keys select-keys vals key val rseq name namespace promise into transient persistent! conj! assoc! dissoc! pop! disj! use class type num float double short byte boolean bigint biginteger bigdec print-method print-dup throw-if printf format load compile get-in update-in pr pr-on newline flush read slurp read-line subvec with-open memfn time re-find re-groups rand-int rand mod locking assert-valid-fdecl alias resolve ref deref refset swap! reset! set-validator! compare-and-set! alter-meta! reset-meta! commute get-validator alter ref-set ref-history-count ref-min-history ref-max-history ensure sync io! new next conj set! to-array future future-call into-array aset gen-class reduce map filter find empty hash-map hash-set sorted-map sorted-map-by sorted-set sorted-set-by vec vector seq flatten reverse assoc dissoc list disj get union difference intersection extend extend-type extend-protocol int nth delay count concat chunk chunk-buffer chunk-append chunk-first chunk-rest max min dec unchecked-inc-int unchecked-inc unchecked-dec-inc unchecked-dec unchecked-negate unchecked-add-int unchecked-add unchecked-subtract-int unchecked-subtract chunk-next chunk-cons chunked-seq? prn vary-meta lazy-seq spread list* str find-keyword keyword symbol gensym force rationalize"},r="a-zA-Z_\\-!.?+*=<>&#'",n="["+r+"]["+r+"0-9/;:]*",a="[-+]?\\d+(\\.\\d+)?",o={b:n,r:0},s={cN:"number",b:a,r:0},i=e.inherit(e.QSM,{i:null}),c=e.C(";","$",{r:0}),d={cN:"literal",b:/\b(true|false|nil)\b/},l={b:"[\\[\\{]",e:"[\\]\\}]"},m={cN:"comment",b:"\\^"+n},p=e.C("\\^\\{","\\}"),u={cN:"symbol",b:"[:]{1,2}"+n},f={b:"\\(",e:"\\)"},h={eW:!0,r:0},y={k:t,l:n,cN:"name",b:n,starts:h},b=[f,i,m,p,c,u,l,s,d,o];return f.c=[e.C("comment",""),y,h],h.c=b,l.c=b,{aliases:["clj"],i:/\S/,c:[f,i,m,p,c,u,l,s,d]}});hljs.registerLanguage("clojure-repl",function(e){return{c:[{cN:"meta",b:/^([\w.-]+|\s*#_)=>/,starts:{e:/$/,sL:"clojure"}}]}}); -------------------------------------------------------------------------------- /docs/js/page_effects.js: -------------------------------------------------------------------------------- 1 | function visibleInParent(element) { 2 | var position = $(element).position().top 3 | return position > -50 && position < ($(element).offsetParent().height() - 50) 4 | } 5 | 6 | function hasFragment(link, fragment) { 7 | return $(link).attr("href").indexOf("#" + fragment) != -1 8 | } 9 | 10 | function findLinkByFragment(elements, fragment) { 11 | return $(elements).filter(function(i, e) { return hasFragment(e, fragment)}).first() 12 | } 13 | 14 | function scrollToCurrentVarLink(elements) { 15 | var elements = $(elements); 16 | var parent = elements.offsetParent(); 17 | 18 | if (elements.length == 0) return; 19 | 20 | var top = elements.first().position().top; 21 | var bottom = elements.last().position().top + elements.last().height(); 22 | 23 | if (top >= 0 && bottom <= parent.height()) return; 24 | 25 | if (top < 0) { 26 | parent.scrollTop(parent.scrollTop() + top); 27 | } 28 | else if (bottom > parent.height()) { 29 | parent.scrollTop(parent.scrollTop() + bottom - parent.height()); 30 | } 31 | } 32 | 33 | function setCurrentVarLink() { 34 | $('.secondary a').parent().removeClass('current') 35 | $('.anchor'). 36 | filter(function(index) { return visibleInParent(this) }). 37 | each(function(index, element) { 38 | findLinkByFragment(".secondary a", element.id). 39 | parent(). 40 | addClass('current') 41 | }); 42 | scrollToCurrentVarLink('.secondary .current'); 43 | } 44 | 45 | var hasStorage = (function() { try { return localStorage.getItem } catch(e) {} }()) 46 | 47 | function scrollPositionId(element) { 48 | var directory = window.location.href.replace(/[^\/]+\.html$/, '') 49 | return 'scroll::' + $(element).attr('id') + '::' + directory 50 | } 51 | 52 | function storeScrollPosition(element) { 53 | if (!hasStorage) return; 54 | localStorage.setItem(scrollPositionId(element) + "::x", $(element).scrollLeft()) 55 | localStorage.setItem(scrollPositionId(element) + "::y", $(element).scrollTop()) 56 | } 57 | 58 | function recallScrollPosition(element) { 59 | if (!hasStorage) return; 60 | $(element).scrollLeft(localStorage.getItem(scrollPositionId(element) + "::x")) 61 | $(element).scrollTop(localStorage.getItem(scrollPositionId(element) + "::y")) 62 | } 63 | 64 | function persistScrollPosition(element) { 65 | recallScrollPosition(element) 66 | $(element).scroll(function() { storeScrollPosition(element) }) 67 | } 68 | 69 | function sidebarContentWidth(element) { 70 | var widths = $(element).find('.inner').map(function() { return $(this).innerWidth() }) 71 | return Math.max.apply(Math, widths) 72 | } 73 | 74 | function calculateSize(width, snap, margin, minimum) { 75 | if (width == 0) { 76 | return 0 77 | } 78 | else { 79 | return Math.max(minimum, (Math.ceil(width / snap) * snap) + (margin * 2)) 80 | } 81 | } 82 | 83 | function resizeSidebars() { 84 | var primaryWidth = sidebarContentWidth('.primary') 85 | var secondaryWidth = 0 86 | 87 | if ($('.secondary').length != 0) { 88 | secondaryWidth = sidebarContentWidth('.secondary') 89 | } 90 | 91 | // snap to grid 92 | primaryWidth = calculateSize(primaryWidth, 32, 13, 160) 93 | secondaryWidth = calculateSize(secondaryWidth, 32, 13, 160) 94 | 95 | $('.primary').css('width', primaryWidth) 96 | $('.secondary').css('width', secondaryWidth).css('left', primaryWidth + 1) 97 | 98 | if (secondaryWidth > 0) { 99 | $('#content').css('left', primaryWidth + secondaryWidth + 2) 100 | } 101 | else { 102 | $('#content').css('left', primaryWidth + 1) 103 | } 104 | } 105 | 106 | $(window).ready(resizeSidebars) 107 | $(window).ready(setCurrentVarLink) 108 | $(window).ready(function() { persistScrollPosition('.primary')}) 109 | $(window).ready(function() { 110 | $('#content').scroll(setCurrentVarLink) 111 | $(window).resize(setCurrentVarLink) 112 | }) 113 | -------------------------------------------------------------------------------- /project.clj: -------------------------------------------------------------------------------- 1 | (defproject deodorant "0.1.4" 2 | :description "Deodorant: Solving the problems of Bayesian Optimization" 3 | :url "http://github.com/probprog/deodorant" 4 | :license {:name "GNU General Public License Version 3" 5 | :url "http://www.gnu.org/licenses/gpl.html"} 6 | :plugins [[lein-codox "0.10.2"]] 7 | :dependencies [[org.clojure/clojure "1.8.0"] 8 | [clojure-csv/clojure-csv "2.0.1"] 9 | [clatrix "0.5.0"] 10 | [org.clojure/core.memoize "0.5.8"] 11 | [org.apache.commons/commons-math3 "3.6.1"] 12 | [bozo/bozo "0.1.1"] 13 | [colt "1.2.0"] 14 | [net.mikera/core.matrix "0.49.0"] 15 | [net.mikera/core.matrix.stats "0.7.0"] 16 | [net.mikera/vectorz-clj "0.43.1"] 17 | [org.clojure/tools.namespace "0.2.11"] 18 | [com.taoensso/tufte "1.0.0-RC2"]]) 19 | -------------------------------------------------------------------------------- /src/deodorant/acq_functions.clj: -------------------------------------------------------------------------------- 1 | (ns deodorant.acq-functions 2 | "Acquisition functions for Deodorant." 3 | (:require [clojure.core.matrix :as mat] 4 | [deodorant.broadcast-functions :refer [safe-sum]] 5 | [deodorant.helper-functions :refer [erf observe* normal]])) 6 | 7 | (defn- normal-cdf 8 | "Normal cumulative density function. 9 | 10 | Accepts: mean, standard dev, x 11 | Returns: cdf(x)" 12 | [mean stdd x] 13 | (let [xp (/ (- x mean) (* stdd (Math/sqrt 2)))] 14 | (* 1/2 (+ 1 (if (> xp 0) 15 | (erf xp) 16 | (- (erf (- xp)))))))) 17 | 18 | (defn expected-improvement 19 | "Expected improvement acquisition function with some trickery 20 | to overcome underflow when very low. Specifically, we 21 | revert to using log(UCB) with v=2 as the acquisition function 22 | if log(EI/exp(UCB))<-8. 23 | 24 | Accepts: 25 | xi - option for expected improvement (scalar) 26 | mu-best - best posterior mean evaluated at old data points (scalar) 27 | gp-predicter - function that returns [mu var] 28 | from the weighted gp ensemble posterior given 29 | a single point to evaluate x. See 30 | create-weighted-gp-predict-with-derivatives-function* 31 | x* - new point to evaluate (D length vector) 32 | 33 | Returns: 34 | EI - log value of expected improvement at x*" 35 | [xi mu-best gp-predicter x*] 36 | (let [[mu sig] (gp-predicter x*) 37 | u (mat/div (mat/sub mu (+ mu-best xi)) sig) 38 | phi (mapv #(mat/exp (observe* (normal 0 1) %)) u) 39 | Phi (mapv #(normal-cdf 0 1 %) u) 40 | EI (mat/mul sig (mat/add (mat/mul u Phi) phi)) 41 | EI (mat/emap (fn [x] (if (Double/isNaN x) 0 (max 0 x))) EI) 42 | UCB (mat/add mu (mat/scale sig 2)) 43 | ratio (mat/div EI (mat/exp UCB)) 44 | ratio (mat/emap #(if (< % 1e-8) 1e-8 %) 45 | ratio) 46 | ;; The UCB component will cancel out below unless 47 | ;; the cap on the ratio is in force, when it becomes 48 | ;; (+ UCB (log 1e-8)) 49 | logEI (mat/add UCB (mat/log ratio)) 50 | EI (mat/exp logEI)] 51 | EI)) 52 | 53 | (defn integrated-aq-func 54 | "Calculates the integrated acquisition function from a base 55 | acquisition function and a weighted sum of GP predictors. 56 | This is as per Snoek et al, NIPS 2012. 57 | 58 | Accepts: 59 | base-aq-func - Function which takes as inputs a gp-predictor 60 | and a point to evaluate, returning a utility of 61 | evaluating that point. 62 | mu-bests - The best of the expected best point previously 63 | evaluated for each GP, mu_j^+ in the paper. 64 | gp-predicters - A collection of gp-prediction functions. Each 65 | takes a point to evaluate an returns [mu sig] 66 | gp-weights - Vector of weights for each gp-predictor. The 67 | acquisition function values will be weighted by 68 | these weights 69 | x* - The point to evaluate (D length vector) 70 | Returns: 71 | The integrated acquistion function at x*" 72 | [base-aq-func mu-bests gp-predicters gp-weights x*] 73 | (let [acq-vals (mapv (fn [w m p] (mat/mul w 74 | (base-aq-func m p x*))) 75 | gp-weights 76 | mu-bests 77 | gp-predicters)] 78 | (safe-sum acq-vals 0))) 79 | -------------------------------------------------------------------------------- /src/deodorant/broadcast_functions.clj: -------------------------------------------------------------------------------- 1 | (ns deodorant.broadcast-functions 2 | (:require [clojure.core.matrix :refer [matrix shape transpose add sub div broadcast slice-views pow]])) 3 | 4 | (defn square-diff 5 | "Calculates array of squared differences. Accepts an [N D] array of 6 | points xs. Returns an [N D N] array in which entry [i d j] is given 7 | by (xs[i,d] - xs[j,d])^2." 8 | [xs] 9 | (let [xm (matrix xs) 10 | [N D] (shape xm) 11 | xx (broadcast (transpose xm) [N D N])] 12 | (pow (sub xx (transpose xx)) 2))) 13 | 14 | (defn broadcast-function-NxD-MxD 15 | "Performs an operation between a NxD array 16 | and a MxD array, returning a NxDxM array. 17 | Note if both inputs are NxD then the output 18 | will be NxDxN. 19 | 20 | Accepts: op - matrix operation to perform (must be from core.matrix) 21 | xs - NxD array 22 | zs - NxM array 23 | bFlip - if true then NN is treated as 24 | first input in op 25 | Retuns: a NxDxM array" 26 | [op xs zs & b-Flip] 27 | (let [xm (matrix xs) 28 | zm (matrix zs) 29 | [N D] (shape xm) 30 | [M D_z] (shape zm) 31 | _ (assert (= D D_z) "Second dimensions of xs and zs must be equal") 32 | x-plate (transpose xm) 33 | z-plate (transpose zm) 34 | x-volume (transpose (broadcast x-plate [M D N])) 35 | z-volume (broadcast z-plate [N D M])] 36 | (if b-Flip 37 | (op z-volume x-volume) 38 | (op x-volume z-volume)))) 39 | 40 | (defn scale-square-diff 41 | "Scales the output from square-diff. Care should be taken 42 | to ensure inputs are of matrix type as will be slow 43 | otherwise. 44 | 45 | Accepts: x-sq-diff - NxDxM array to scale 46 | rho - vector of length scales 47 | 48 | Returns: the scaled version of x-sq-diff" 49 | [x-sq-diff rho] 50 | (let [[N D M] (shape x-sq-diff) 51 | rrho (-> (broadcast rho [M D]) 52 | transpose 53 | (broadcast [N D M]))] 54 | (div x-sq-diff rrho))) 55 | 56 | (defn safe-sum 57 | "Sums out a dimension of nd-array. Takes an array 58 | of matrix type and a dimension to sum out. Ensures 59 | no crash if X is nil. 60 | 61 | Accepts: X - matrix 62 | dim - dimension to remove. 63 | 64 | Returns: X with dim summed out" 65 | [X dim] 66 | (if (or (= nil (first X)) (> dim (dec (count (shape X))))) 67 | X 68 | (reduce add (slice-views X dim)))) 69 | 70 | (defn broadcast-function-NxDxN-NxN 71 | "Performs an operation between a NxDxN array 72 | and a NxN array. 73 | 74 | Accepts: op - matrix operation to perform (must be from core.matrix) 75 | NDN - NxDxN array 76 | NN - NxN array 77 | bFlip - if true then NN is treated as 78 | first input in op 79 | Retuns: a NxDxN array" 80 | [op NDN NN & b-Flip] 81 | (let [[N D _] (shape NDN) 82 | NNb (slice-views (broadcast NN [D N N]) 1)] 83 | (if b-Flip 84 | (op NNb NDN) 85 | (op NDN NNb)))) 86 | 87 | (defn safe-broadcast-op 88 | "Calls broadcast-function-NxD-MxD when given arrays and 89 | just does standard broadcasting if one is a vector. 90 | See broadcast-function-NxD-MxD for details on call 91 | structure." 92 | [op M1 M2 & b-Flip] 93 | (let [[_ D1] (shape M1) 94 | [_ D2] (shape M2)] 95 | (if (or (= D1 nil) (= D2 nil)) 96 | (let [out (if b-Flip 97 | (op M2 M1) 98 | (op M1 M2)) 99 | [No Do] (shape out)] 100 | (if (= Do nil) 101 | (matrix [out]) 102 | (matrix out))) 103 | (apply broadcast-function-NxD-MxD op M1 M2 b-Flip)))) 104 | -------------------------------------------------------------------------------- /src/deodorant/core.clj: -------------------------------------------------------------------------------- 1 | (ns deodorant.core 2 | "Core functions of Deodorant." 3 | (:refer-clojure :exclude [rand rand-nth rand-int]) 4 | (:require [clojure.core.matrix :as mat] 5 | [clojure.core.matrix.operators :as mop] 6 | [taoensso.tufte :as tufte :refer (defnp profiled profile)] 7 | [deodorant.gp-toolbox :as gp] 8 | [deodorant.acq-functions :as acq] 9 | [deodorant.scaling-functions :as sf] 10 | [deodorant.hmc :as hmc] 11 | [deodorant.broadcast-functions :as bf] 12 | [deodorant.covar-functions :as cf] 13 | [deodorant.default-params :as dp] 14 | [deodorant.helper-functions :refer [indexed-max cartesian mean]] 15 | [bozo.core :refer [lbfgs]] 16 | [clojure-csv.core :refer :all])) 17 | 18 | (defn- hyper-prior-log-posterior 19 | "Gives the pdf for the posterior over gp hyperparameters 20 | 21 | Accepts: hyper-prior-log-p log-pdf for the hyperprior 22 | cov-fn for of covariance for gp without hyperparameters set 23 | x-diff-sq 24 | y-bar 25 | alpha current value of the hyperparameters 26 | 27 | Returns: log-p for the posterior including terms from gp" 28 | [hyper-prior-log-p cov-fn x-diff-sq y-bar alpha] 29 | (let [[L psi] (gp/gp-train cov-fn x-diff-sq y-bar alpha) 30 | lik-gp (gp/gp-log-likelihood L psi y-bar)] 31 | (+ lik-gp (hyper-prior-log-p alpha)))) 32 | 33 | (defn- grad-hyper-prior-log-posterior 34 | "Gives derivative of the pdf for the posterior over gp hyperparameters 35 | with respect to each of the hyperpriors. 36 | 37 | Accepts: hyper-prior-grad-log-p gradient of log-pdf for the hyperprior 38 | cov-fn for of covariance for gp without hyperparameters set 39 | grad-cov-fn-hyper gradient of covariance function wrt hyperparameters 40 | x-diff-sq 41 | y-bar 42 | alpha current value of the hyperparameters 43 | 44 | Returns: grad-log-p for the posterior including terms from gp" 45 | [hyper-prior-grad-log-p cov-fn grad-cov-fn-hyper x-diff-sq y-bar b-deterministic alpha] 46 | (let [[L psi] (gp/gp-train 47 | cov-fn x-diff-sq y-bar alpha) 48 | grad-log-lik-gp (gp/gp-grad-log-likelihood 49 | grad-cov-fn-hyper x-diff-sq alpha L psi)] 50 | (mat/add grad-log-lik-gp (hyper-prior-grad-log-p alpha)))) 51 | 52 | (defn- call-lbfgs-ignoring-failure 53 | "Calls lbfgs within a try catch and just returns start point if it fails" 54 | [f x0 max-iters] 55 | (try 56 | (lbfgs f x0 {:maxit max-iters}) 57 | (catch Exception e 58 | x0))) 59 | 60 | (defn- lbfgs-maximize 61 | "Runs gradient ascent (LBFGS) to find a local optimum. 62 | . Uses bozo which unfortunately appears to not allow 63 | distribution (i.e. pmap) because it uses immutable objects that 64 | end up shared between runs. 65 | Accepts: 66 | x-start - Start points 67 | target-fn - Function to optimize. 68 | 69 | Returns: 70 | x* - D-dimensional vector argmax_x acq_fn(x)." 71 | [x-start target-fn max-iters] 72 | (let [;; THIS WANTS TO BE PMAP INSTEAD OF MAP BUT THIS CAUSES RANDOM FUCKUPS IN BOZO 73 | x-start (double-array x-start) 74 | f (fn [x] (let [[y gy] (target-fn (vec x))] [(double (- y)) (double-array (mat/sub gy))])) 75 | x-max (vec (call-lbfgs-ignoring-failure f x-start max-iters)) ; Add :iprint [1 3] to options keymap for loads of print outs 76 | ] 77 | x-max)) 78 | 79 | 80 | (defn- infer-gp-hyper 81 | "Takes a mean-fn, a cov-fn (with unset hyperparameters), a 82 | series of points and hyper-prior and returns a weighted set 83 | of hyperparameter samples using a HMC sampler" 84 | [X Y & 85 | {:keys [mean-fn cov-fn grad-cov-fn-hyper gp-hyperprior hmc-step-size 86 | hmc-num-leapfrog-steps hmc-num-mcmc-steps hmc-num-opt-steps hmc-num-chains 87 | hmc-burn-in-proportion hmc-max-gps b-deterministic verbose] 88 | :or {}}] 89 | (let [;; working with a zero mean GP and adding mean in later 90 | y-bars (gp/subtract-mean-fn mean-fn X Y) 91 | 92 | ;; common term used everywhere 93 | x-diffs-sq (bf/square-diff X) 94 | 95 | ;; function and its gradient for the log posterior of the gp hyperparameters 96 | f (partial hyper-prior-log-posterior (:log-p gp-hyperprior) cov-fn x-diffs-sq y-bars) 97 | grad-f (partial grad-hyper-prior-log-posterior (:grad-log-p gp-hyperprior) cov-fn grad-cov-fn-hyper x-diffs-sq y-bars b-deterministic) 98 | 99 | ;; hmc needs negative of this 100 | u (fn [alpha] (- (f alpha))) ; MEMOIZE ME 101 | grad-u (fn [alpha] (mat/sub (grad-f alpha))) ; MEMOIZE ME 102 | 103 | ;; select the points at which to initialize the hmc chains 104 | alpha-starts (map vec (dp/default-hmc-initializer hmc-num-chains gp-hyperprior)) 105 | D (first (mat/shape (first alpha-starts))) 106 | f_grad_f (fn [x] (vec [(f x) (grad-f x)])) 107 | alpha-starts (map #(lbfgs-maximize % f_grad_f hmc-num-opt-steps) alpha-starts) 108 | ; Make sure that all alphas are within the range where they won't cause issue 109 | ; Can probably make this larger than 9 110 | alpha-starts (mapv #(mop/min (repeat D 9) (mop/max (repeat D -9) %)) alpha-starts) 111 | 112 | _ (if (> verbose 1) 113 | (println :u-starts (mapv u alpha-starts))) 114 | 115 | hmc-step-size-scaled (vec (repeat D hmc-step-size)) 116 | call-hmc-sampler (fn [q-start] 117 | (take hmc-num-mcmc-steps 118 | (hmc/hmc-chain 119 | u grad-u hmc-step-size-scaled hmc-num-leapfrog-steps q-start (u q-start)))) 120 | thin-rate 1 ; Now using max-n-gps instead 121 | alphas-and-us (reduce concat 122 | (pmap #(hmc/burn-in-and-thin 123 | hmc-burn-in-proportion thin-rate 124 | (call-hmc-sampler %)) 125 | alpha-starts)) 126 | alpha-samples (mapv first alphas-and-us) 127 | u-vals (mapv second alphas-and-us) 128 | [alpha-samples weights] (hmc/collapse-identical-samples 129 | alpha-samples 130 | verbose) 131 | i-keep (take hmc-max-gps (shuffle (range (count weights)))) 132 | alpha-samples (mapv #(nth alpha-samples %) i-keep) 133 | weights (mapv #(nth weights %) i-keep) 134 | weights (hmc/scale-vector weights (/ 1 (reduce + weights))) 135 | alpha-particles (take hmc-max-gps 136 | (shuffle (mapv (fn [a b] [a b]) 137 | alpha-samples weights)))] 138 | alpha-particles)) 139 | 140 | (defn bo-acquire 141 | "Performs the acquisition step in Bayesian optimization. Accepts a 142 | sequence inputs and outputs and returns the next point x-next to 143 | evaluate, the index of the point from those previously evaluated 144 | that is considered to be the best, and the predicted mean and std-dev 145 | of the evaluated points. Note this std-dev is in the estimate of the 146 | 'true' function value at this point and does not include the noise involved 147 | in evaluating this function. 148 | 149 | Accepts: 150 | X - matrix of input points, first dimension represents differnt points 151 | second dimension the different input dimensions 152 | Y - a vector of associated outputs 153 | acq-optimizer - a function that takes in the acquistion function as 154 | input and returns a point in the space of x that is the estimated 155 | optimum of the acquisition function, subject to the constraints of 156 | the model. 157 | scaling-funcs - scaling function object ouput from sf/setup-scaling-funcs 158 | 159 | Options (each of these is provide as a key value pair, defaults are 160 | not provided as these are set by deodorant which calls this 161 | function which should be consulted for further info) 162 | cov-fn-form grad-cov-fn-hyper-form mean-fn-form 163 | gp-hyperprior-form hmc-step-size hmc-num-leapfrog-steps 164 | hmc-num-mcmc-steps hmc-num-opt-steps hmc-num-chains 165 | hmc-burn-in-proportion hmc-max-gps verbose debug-folder plot-aq] 166 | 167 | Returns: 168 | x-next - point that should be evaluated next (optimum of the acquistion 169 | function) 170 | i-best - index of the point expected to be most optimal under the mixture 171 | of GPs posterior 172 | means - estimated mean value for each point in X. 173 | std-devs - estimated standard deviation for each point in X." 174 | [X Y acq-optimizer scaling-funcs & 175 | {:keys [cov-fn-form grad-cov-fn-hyper-form mean-fn-form 176 | gp-hyperprior-form hmc-step-size hmc-num-leapfrog-steps 177 | hmc-num-mcmc-steps hmc-num-opt-steps hmc-num-chains 178 | hmc-burn-in-proportion hmc-max-gps verbose 179 | debug-folder plot-aq b-deterministic] 180 | :or {}}] 181 | (let [[_ D] (mat/shape X) 182 | mean-fn (mean-fn-form D) 183 | cov-fn (cf/matern-for-vector-input D cov-fn-form) 184 | grad-cov-fn-hyper (cf/matern-for-vector-input D grad-cov-fn-hyper-form) 185 | gp-hyperprior (gp-hyperprior-form D b-deterministic) 186 | 187 | ;; Obtain particle estimate of predictive distribution on GP 188 | ;; hyperparameters 189 | alpha-particles (tufte/p :hyper-infer 190 | (infer-gp-hyper X Y 191 | 192 | ;; BO options 193 | :mean-fn mean-fn 194 | :cov-fn cov-fn 195 | :grad-cov-fn-hyper grad-cov-fn-hyper 196 | :gp-hyperprior gp-hyperprior 197 | :b-deterministic b-deterministic ;; FIXME make this less of a hack 198 | 199 | ;; HMC options 200 | :hmc-step-size hmc-step-size 201 | :hmc-num-leapfrog-steps hmc-num-leapfrog-steps 202 | :hmc-num-mcmc-steps hmc-num-mcmc-steps 203 | :hmc-num-opt-steps hmc-num-opt-steps 204 | :hmc-num-chains hmc-num-chains 205 | :hmc-burn-in-proportion hmc-burn-in-proportion 206 | :hmc-max-gps hmc-max-gps 207 | 208 | :verbose verbose)) 209 | alphas (map first alpha-particles) 210 | gp-weights (map second alpha-particles) 211 | 212 | _ (if (> verbose 1) (println :n-gps-in-acq-function (count gp-weights))) 213 | 214 | ;; function to create a trained-gp-obj for a given alpha currently 215 | ;; create-trained-gp-obj takes x and y in the form of "points" as 216 | ;; passed to bo-acquire, this should be changed 217 | 218 | gp-trainer (partial gp/create-trained-gp-obj 219 | mean-fn cov-fn X Y) 220 | 221 | ;; make trained-gp-obj for each sampled alpha 222 | gps (mapv gp-trainer alphas) 223 | 224 | gp-predictors (mapv #(fn [x*] (gp/gp-predict-mu-sig % x*)) gps) 225 | 226 | ;; mean-best for each gp required for the respective acq-func 227 | all-means (mapv first (mapv #(% X) gp-predictors)) 228 | mean-bests (mapv #(first (indexed-max identity %)) all-means) 229 | 230 | ;; Setup the acquistion function (will be a function of the new point 231 | ;; x* that returns a value and a derivative) 232 | xi 0 ; For now we are just going to hard-code xi to 0 for simplicity 233 | base-acq-func (partial acq/expected-improvement xi) 234 | acq-fn (partial acq/integrated-aq-func base-acq-func mean-bests gp-predictors gp-weights) 235 | acq-fn-single #(acq-fn [%]) 236 | ;; Optimize the acquisition function to give the point to 237 | ;; evaluate next 238 | 239 | x-next (tufte/p :acq-opt 240 | (acq-optimizer 241 | #(first (acq-fn-single %)))) 242 | acq-opt (acq-fn-single x-next) 243 | 244 | ;; Establish which point is best so far and the mean and std dev 245 | ;; for each of the evaluated points. This is not only for sake 246 | ;; of the return arguments 247 | [means std-devs] (gp/gp-mixture-mu-sig gp-predictors gp-weights X) 248 | [_ i-best] (indexed-max identity means)] 249 | (if (> verbose 1) (do (println :acq-opt acq-opt) 250 | (println :i-best i-best))) 251 | ;; If the debug folder option is set, do some extra calculations and 252 | ;; output all the results 253 | 254 | (tufte/p :db-folder 255 | (if debug-folder 256 | (let [subfolder (str "bopp-debug-files/" debug-folder "/bo-step-scaled-" (System/currentTimeMillis)) 257 | alphas-exp (mat/exp alphas) 258 | [_ D] (mat/shape X) 259 | [_ n-alpha] (mat/shape alphas-exp) 260 | alphas-unscaled (loop [a-un ((:log-Z-unscaler-no-centering scaling-funcs) (mat/submatrix alphas-exp 1 [0 1])) 261 | i-s 1] 262 | (if (>= i-s n-alpha) 263 | a-un 264 | (recur (mat/join-along 1 a-un 265 | ((:log-Z-unscaler-no-centering scaling-funcs) (mat/submatrix alphas-exp 1 [i-s 1])) 266 | ((:theta-unscaler-no-centering scaling-funcs) (mat/submatrix alphas-exp 1 [(inc i-s) D]))) 267 | (+ i-s 1 D)))) 268 | alphas-csv (write-csv (map #(map str %) alphas-unscaled)) 269 | gp-weights-csv (write-csv (map (comp vector str double) gp-weights)) 270 | xs-unscaled ((:theta-unscaler scaling-funcs) X) 271 | ys-unscaled ((:log-Z-unscaler scaling-funcs) Y) 272 | xs-csv (write-csv (map #(map str %) xs-unscaled)) 273 | ys-csv (write-csv (map (comp vector str) ys-unscaled)) 274 | 275 | x-grid (if (<= D 2) 276 | (let [start -1.49 277 | end 1.49 278 | step (if (= D 1) 0.002 0.02)] ; can be more general but don't want to import things since this is only temporary 279 | (cartesian (repeat D (range start end step))))) 280 | x-grid-csv (if (<= D 2) (write-csv (map #(map str %) ((:theta-unscaler scaling-funcs) x-grid)))) 281 | prior-mean-vals (if (<= D 2) ((:log-Z-unscaler scaling-funcs) (mean-fn (mat/matrix x-grid)))) 282 | prior-mean-vals-csv (if (<= D 2) (write-csv (map (comp vector str) prior-mean-vals)))] 283 | (.mkdir (java.io.File. subfolder)) 284 | (spit (str subfolder "/alphas.csv") alphas-csv) 285 | (spit (str subfolder "/gp-weights.csv") gp-weights-csv) 286 | (spit (str subfolder "/xs.csv") xs-csv) 287 | (spit (str subfolder "/ys.csv") ys-csv) 288 | (if (<= D 2) (spit (str subfolder "/x-grid.csv") x-grid-csv)) 289 | (if (<= D 2) (spit (str subfolder "/prior-mean.csv") prior-mean-vals-csv)) 290 | 291 | (if (and plot-aq (<= D 2)) 292 | (let [acq-vals (acq-fn x-grid) 293 | [means std-devs] (gp/gp-mixture-mu-sig gp-predictors gp-weights x-grid) 294 | means-csv (write-csv (map (comp vector str) ((:log-Z-unscaler scaling-funcs) means))) 295 | std-devs-csv (write-csv (map (comp vector str) ((:log-Z-unscaler-no-centering scaling-funcs) std-devs))) 296 | acq-csv (write-csv (map (comp vector str) acq-vals))] 297 | 298 | (spit (str subfolder "/acq.csv") acq-csv) 299 | (spit (str subfolder "/mus.csv") means-csv) 300 | (spit (str subfolder "/sigs.csv") std-devs-csv))))) 301 | 302 | ;; Final return 303 | [x-next i-best means std-devs]))) 304 | 305 | (defn deodorant 306 | "Deodorant: solving the problems of Bayesian optimization. 307 | 308 | Deodorant is a Bayesian optimization (BO) package with three core features: 309 | 1) Domain scaling to exploit problem independent GP hyperpriors 310 | 2) A non-stationary mean function to allow unbounded optimization 311 | 3) External provision of the acquisition function optimizer so that this 312 | can incorporate the constraints of the problem (inc equality constraints) 313 | and ensure that no invalid points are evaluated. 314 | 315 | The main intended use of the package at present is as the BO component 316 | for BOPP (Bayesian Optimiation for Probabilistic Programs. Rainforth T, Le TA, 317 | van de Meent J-W, Osborne MA, Wood F. In NIPS 2016) which provides all the 318 | required inputs automatically given a program. Even when the intention is 319 | simply optimization, using BOPP rather than Deodorant directly is currently 320 | recommended. The rational of providing Deodorant as its own independent 321 | package is to seperate out the parts of BOPP that are Anglican dependent and 322 | those that are not. As such, one may wish to intergrate Deodorant into 323 | another similar package that provides all the required inputs. 324 | 325 | For details on the working of Deodorant, the previously referenced paper and 326 | its supplementary material should be consulted. 327 | 328 | Accepts: 329 | f - target function. Takes in a single input x and returns a pair 330 | [f(x), other-outputs(x)]. Here other-outputs allows for additional x 331 | dependent variables to be returned. For example, in BOPP then 332 | other-outputs(x) is a vector of program outputs from the calling the 333 | marginal query, with one component for each sample output from 334 | this marginal query. 335 | acq-optimizer - a function that takes in the acquistion function as 336 | input and returns a point in the space of x that is the estimated 337 | optimum of the acquisition function, subject to the constraints of 338 | the model. 339 | theta-sampler - charecterization of the input variables which can be 340 | sampled from to generate example inputs and initialize the scaling. 341 | Should be a function that takes no inputs and results valid examples 342 | of the input variables. Note that the input variables are currently 343 | called x in the inner functions. 344 | 345 | Optional Inputs: (defined with key value pairs, default values shown below 346 | in brackets) 347 | Initialization options: 348 | :initial-points - Pre-evaluated points (i.e. theta and output) 349 | in addition to those sampled by theta-sampler. To see correct formatting, 350 | run without specifying and verbose > 1. 351 | [nil] 352 | :initial-thetas - Points to evaluate at start in addition to randomly sampled 353 | points (i.e. total number of initialization is points provided here + 354 | :num-initial-points). To see correct formatting, run without specifying and verbose > 1. 355 | :num-scaling-thetas - Number of points used to initialize scaling 356 | [50] 357 | :num-initial-points - Number of points to initialize BO 358 | [5] 359 | 360 | GP options: 361 | :cov-fn-form - covariance function with unset hyperparameters 362 | [cp/matern32-plus-matern52-K] 363 | :grad-cov-fn-hyper - grad of the above with respect to the hyperparameters 364 | [cp/matern32-plus-matern52-grad-K] 365 | :mean-fn-form - mean function with unset dimensionality 366 | [dp/default-mean-fn-form] 367 | :gp-hyperprior-form - constructor for the gp hyperparameter hyperprior 368 | [dp/default-double-matern-hyperprior] 369 | :b-deterministic - whether to include noise in the GP 370 | [false] 371 | 372 | HMC options: 373 | :hmc-step-size - HMC step size 374 | [0.01] 375 | :hmc-num-leapfrog-steps - Number of HMC leap-frog steps 376 | [5] 377 | :hmc-num-chains - Number of samplers run in parallel 378 | [50] 379 | :hmc-burn-in-proportion - Proportion of samples to throw away as burn in 380 | [8] 381 | :hmc-max-gps - Maximum number of unique GPs to keep at the end so that 382 | optimization of the acqusition function does not become 383 | too expensive. 384 | [50] 385 | Debug options: 386 | :verbose - debug print level: 0 (none) / 1 (iteration summaries) / 2 (detailed output) 387 | [0] 388 | :debug-folder - Path for the debug folder. No output generated if path 389 | not provided. These outputs include alphas (gp hyper paramters), 390 | gp-weights (weights for each hyperparameter sample) etc 391 | [empty] 392 | :plot-aq - Generate debugging csv of acquisition functions 393 | [false] 394 | :invert-output-display - Displays values of (- (f theta)) instead of (f theta). 395 | This is because we only consider maximization such that minimization is 396 | done by inverting f, in which case it may be preferable to print out 397 | the univerted values (e.g. risk minimization in bopp). 398 | [false] 399 | 400 | Returns: 401 | Lazy list of increasingly optimal triples 402 | (theta, estimated value of (f theta) by gp, raw evaluated value of (f theta), other outputs of f)." 403 | [f aq-optimizer theta-sampler & 404 | {:keys [initial-points initial-thetas num-scaling-thetas num-initial-points cov-fn-form 405 | grad-cov-fn-hyper-form mean-fn-form gp-hyperprior-form b-deterministic 406 | hmc-step-size hmc-num-leapfrog-steps hmc-num-mcmc-steps hmc-num-opt-steps 407 | hmc-num-chains hmc-burn-in-proportion hmc-max-gps verbose debug-folder plot-aq invert-output-display] 408 | :or {;; Initialization options 409 | initial-points nil 410 | initial-thetas nil 411 | num-scaling-thetas 1000 412 | num-initial-points 5 413 | 414 | ;; BO options 415 | cov-fn-form cf/matern32-plus-matern52-K 416 | grad-cov-fn-hyper-form cf/matern32-plus-matern52-grad-K 417 | mean-fn-form dp/default-mean-fn-form 418 | gp-hyperprior-form dp/default-double-matern-hyperprior 419 | b-deterministic false 420 | 421 | ;; HMC options 422 | hmc-step-size 0.01 423 | hmc-num-leapfrog-steps 2; 5 424 | hmc-num-mcmc-steps 20; 50 425 | hmc-num-opt-steps 10; 15 426 | hmc-num-chains 4; 8 427 | hmc-burn-in-proportion 0.5 428 | hmc-max-gps 20; 50 429 | 430 | ;; Debug options 431 | verbose 0 432 | debug-folder nil 433 | plot-aq false 434 | invert-output-display false}}] 435 | (if debug-folder 436 | (do 437 | (.mkdir (java.io.File. "bopp-debug-files")) 438 | (.mkdir (java.io.File. (str "bopp-debug-files/" debug-folder))))) 439 | (let [;; Back compatibility with verbose 440 | verbose (or verbose 0) 441 | verbose (if (= true verbose) 1 verbose) 442 | 443 | ;; Print options at high debug level 444 | _ (if (> verbose 1) 445 | (println 446 | :initial-points initial-points 447 | :initial-thetas initial-thetas 448 | :num-scaling-thetas num-scaling-thetas 449 | :num-initial-points num-initial-points 450 | :mean-fn-form mean-fn-form 451 | :cov-fn-form cov-fn-form 452 | :grad-cov-fn-hyper-form grad-cov-fn-hyper-form 453 | :gp-hyperprior-form gp-hyperprior-form 454 | :b-deterministic b-deterministic 455 | :hmc-step-size hmc-step-size 456 | :hmc-num-leapfrog-steps hmc-num-leapfrog-steps 457 | :hmc-num-mcmc-steps hmc-num-mcmc-steps 458 | :hmc-num-opt-steps hmc-num-opt-steps 459 | :hmc-num-chains hmc-num-chains 460 | :hmc-burn-in-proportion hmc-burn-in-proportion 461 | :hmc-max-gps hmc-max-gps 462 | :verbose verbose 463 | :debug-folder debug-folder 464 | :plot-aq plot-aq)) 465 | 466 | print-transform (if invert-output-display #(- %) identity) 467 | 468 | ;; Sample some thetas to use for scaling 469 | num-scaling-thetas (max num-initial-points num-scaling-thetas) 470 | scaling-thetas (theta-sampler num-scaling-thetas) 471 | [flat-f unflat-f] (sf/flatten-unflatten (first scaling-thetas)) 472 | scaling-thetas (mapv flat-f scaling-thetas) 473 | 474 | b-integer (mapv #(or (instance? Long %) (instance? Integer %)) (first scaling-thetas)) 475 | 476 | ;; FIXME add code to keep randomly sampling until distinct inputs and distinct outputs have been found 477 | 478 | ;; Choose a subset of scaling thetas and evaluate as the starting points 479 | initial-theta-samples (mapv #(unflat-f (nth scaling-thetas %)) 480 | (take num-initial-points 481 | (shuffle (range 0 (count scaling-thetas))))) 482 | 483 | initial-thetas (concat initial-thetas initial-theta-samples) 484 | 485 | _ (if (> verbose 1) 486 | (println :intial-thetas initial-thetas)) 487 | 488 | initial-points (concat initial-points 489 | (map #(into [] 490 | (cons % (f %))) 491 | initial-thetas)) 492 | 493 | _ (if (> verbose 1) 494 | (println :intial-points initial-points)) 495 | 496 | ;; Setup the scaling details 497 | theta-min (reduce clojure.core.matrix.operators/min scaling-thetas) 498 | theta-max (reduce clojure.core.matrix.operators/max scaling-thetas) 499 | initial-log-Zs (mapv second initial-points) 500 | log-Z-min (reduce min initial-log-Zs) 501 | log-Z-max (reduce max initial-log-Zs) 502 | scale-details-initial (sf/->scale-details-obj theta-min 503 | theta-max 504 | log-Z-min 505 | log-Z-max)] 506 | (letfn [(point-seq [points scale-details] 507 | (lazy-seq 508 | (let [_ (if (> verbose 0) (println "BO Iteration: " (inc (- (count points) (inc num-initial-points))))) 509 | scaling-funcs (sf/setup-scaling-funcs 510 | scale-details) 511 | 512 | theta-scaler (comp (:theta-scaler scaling-funcs) flat-f) 513 | log-Z-scaler (:log-Z-scaler scaling-funcs) 514 | 515 | aq-optimizer-scaled (fn [acq-fn] 516 | (theta-scaler 517 | (aq-optimizer 518 | (fn [theta & args] 519 | (apply acq-fn (theta-scaler theta) args))))) ; takes in a function to optimize 520 | 521 | [theta-next-sc i-best mean-thetas-sc std-dev-thetas-sc] 522 | (bo-acquire ((:theta-scaler scaling-funcs) 523 | (mapv flat-f 524 | (mapv first points))) 525 | (log-Z-scaler 526 | (mapv second points)) 527 | aq-optimizer-scaled 528 | scaling-funcs 529 | 530 | ;; TODO Make these non optional for bo-acquire 531 | 532 | ;; BO options 533 | :mean-fn-form mean-fn-form 534 | :cov-fn-form cov-fn-form 535 | :grad-cov-fn-hyper-form grad-cov-fn-hyper-form 536 | :gp-hyperprior-form gp-hyperprior-form 537 | :b-deterministic b-deterministic 538 | 539 | ;; HMC options 540 | :hmc-step-size hmc-step-size 541 | :hmc-num-leapfrog-steps hmc-num-leapfrog-steps 542 | :hmc-num-mcmc-steps hmc-num-mcmc-steps 543 | :hmc-num-opt-steps hmc-num-opt-steps 544 | :hmc-num-chains hmc-num-chains 545 | :hmc-burn-in-proportion hmc-burn-in-proportion 546 | :hmc-max-gps hmc-max-gps 547 | 548 | ;; Debug options 549 | :verbose verbose 550 | :debug-folder debug-folder 551 | :plot-aq plot-aq) 552 | 553 | theta-next ((:theta-unscaler scaling-funcs) 554 | theta-next-sc) 555 | ;; Anything that is discrete we need to make sure it 556 | ;; is the right type. Note that it shoud still be integer 557 | ;; valued, but it still needs its type to be changed. 558 | theta-next (mapv #(if %1 559 | (int (+ 0.49 %2)) 560 | %2) 561 | b-integer theta-next) 562 | theta-next (unflat-f theta-next) 563 | mean-thetas ((:log-Z-unscaler scaling-funcs) 564 | mean-thetas-sc) 565 | std-dev-thetas ((:log-Z-unscaler-no-centering scaling-funcs) 566 | std-dev-thetas-sc) 567 | 568 | _ (if (> verbose 0) 569 | (do (println "Theta to evaluate next: " theta-next))) 570 | 571 | [log-Z results] (f theta-next) 572 | points (conj points 573 | [theta-next log-Z results]) 574 | best-point (nth points (inc i-best)) 575 | return-val [(first best-point) 576 | (nth mean-thetas i-best) 577 | (second best-point) 578 | (last best-point)] 579 | 580 | ;;_ (if verbose (println :log-Z-i-best (second (nth points (inc i-best))))) 581 | _ (if (> verbose 0) 582 | (do (println "Best theta: " (first return-val)) 583 | (println "GP mixture estimate of (f best-theta): " (print-transform (second return-val))) 584 | (println "Evaluated (f best-theta): " (print-transform (nth return-val 2))))) 585 | _ (if (> verbose 0) (println "Function value at theta next: " (print-transform log-Z) "\n"))] 586 | (cons return-val 587 | (point-seq points 588 | (sf/update-scale-details 589 | scale-details 590 | scaling-funcs 591 | (flat-f theta-next) 592 | log-Z))))))] 593 | (point-seq initial-points scale-details-initial)))) 594 | -------------------------------------------------------------------------------- /src/deodorant/covar_functions.clj: -------------------------------------------------------------------------------- 1 | (ns deodorant.covar-functions 2 | "Covariance functions for Deodorant." 3 | (:require [clojure.core.matrix :refer [matrix shape transpose mul add sub div join broadcast slice-views sqrt exp pow]] 4 | [deodorant.broadcast-functions :as bf])) 5 | 6 | (defn- calc-d 7 | "Calculates scaled distance 8 | between points given log-rho and x-diff-squared 9 | 10 | Accepts: log-rho - a vector 11 | x-diff-squared - a NxDxM matrix of squared distances or 12 | NxD matrix of squared distances of old 13 | points and new point 14 | 15 | Returns: d - scaled distance between points 16 | d-squared - squared scaled distance between points 17 | sep-squared - squared seperations prior to summing 18 | over a dimension" 19 | ; MEMOIZE ME 20 | [log-rho x-diff-squared] 21 | (let [rho-sq (matrix (mapv (fn [x] (exp (* 2 x))) 22 | log-rho)) 23 | sep-squared (if (> (count (shape x-diff-squared)) 2) 24 | (bf/scale-square-diff x-diff-squared (matrix rho-sq)) 25 | (div x-diff-squared rho-sq)) 26 | d-squared (bf/safe-sum sep-squared 1)] 27 | [(sqrt d-squared) d-squared sep-squared])) 28 | 29 | (defn- exp-minus-sqrt3-d 30 | "Calculates sqrt(3)d and exp(-sqrt(3)d) allowing memoization" 31 | ; MEMOIZE ME 32 | [d] 33 | (let [sqrt-3-d (mul (sqrt 3) d)] 34 | [sqrt-3-d (exp (sub 0 sqrt-3-d))])) 35 | 36 | (defn- exp-minus-sqrt5-d 37 | "Calculates sqrt(5)d and exp(-sqrt(5)d) allowing memoization" 38 | ; MEMOIZE ME 39 | [d] 40 | (let [sqrt-5-d (mul (sqrt 5) d)] 41 | [sqrt-5-d (exp (sub 0 sqrt-5-d))])) 42 | 43 | (defn matern32-K 44 | "Covariance function for matern-32. 45 | 46 | Accepts: x-diff-squared - a NxDxN matrix of squared distances 47 | or NxD matrix of squared distances of old points 48 | and new point 49 | log-sig-f - a scalar 50 | log-rho - a vector 51 | 52 | Returns: A matrix K" 53 | ; MEMOIZE ME 54 | [x-diff-squared log-sig-f log-rho] 55 | (let [sig-f-sq (exp (* 2 log-sig-f)) 56 | [d _ _] (calc-d log-rho x-diff-squared) 57 | [sqrt-3-d exp-m-sqrt3-d] (exp-minus-sqrt3-d d)] 58 | (mul sig-f-sq (mul (add 1 sqrt-3-d) 59 | exp-m-sqrt3-d)))) 60 | 61 | (defn matern32-xs-z 62 | "Side covariance matrix for matern-32, i.e. vector k where 63 | k_i = kernel(x_i, z). 64 | 65 | Accepts: 66 | xs - a NxD vector of vectors of xs 67 | z - [Dx1] vector of new data point 68 | log-sig-f - a scalar 69 | log-rho - a vector 70 | 71 | Returns: A vector k sized N." 72 | [xs z log-sig-f log-rho] 73 | (let [sig-f-sq (exp (* 2 log-sig-f)) 74 | xs-z-diff-squared (pow (sub (matrix xs) (matrix z)) 2) 75 | [d _ _] (calc-d log-rho xs-z-diff-squared) 76 | [sqrt-3-d exp-m-sqrt3-d] (exp-minus-sqrt3-d d)] 77 | (mul sig-f-sq (mul (add 1 sqrt-3-d) 78 | exp-m-sqrt3-d)))) 79 | 80 | (defn matern52-K 81 | "Covariance function for matern-52. 82 | 83 | Accepts: x-diff-squared - a NxDxN matrix of squared distances 84 | or NxD matrix of squared distances of old points 85 | and new point 86 | log-sig-f - a scalar 87 | log-rho - a vector 88 | 89 | Returns: A matrix K" 90 | ; MEMOIZE ME 91 | [x-diff-squared log-sig-f log-rho] 92 | (let [sig-f-sq (exp (* 2 log-sig-f)) 93 | [d d-sq _] (calc-d log-rho x-diff-squared) 94 | [sqrt-5-d exp-m-sqrt5-d] (exp-minus-sqrt5-d d)] 95 | (mul sig-f-sq 96 | (add 1 sqrt-5-d (mul d-sq (/ 5 3))) 97 | exp-m-sqrt5-d))) 98 | 99 | (defn matern52-xs-z 100 | "Side covariance matrix for matern-52, i.e. vector k where 101 | k_i = kernel(x_i, z). 102 | 103 | Accepts: 104 | xs - a NxD vector of vectors of xs 105 | z - [Dx1] vector of new data point 106 | log-sig-f - a scalar 107 | log-rho - a vector 108 | 109 | Returns: A vector k sized N." 110 | [xs z log-sig-f log-rho] 111 | (let [sig-f-sq (exp (* 2 log-sig-f)) 112 | xs-z-diff-squared (pow (sub (matrix xs) (matrix z)) 2) 113 | [d d-sq _] (calc-d log-rho xs-z-diff-squared) 114 | [sqrt-5-d exp-m-sqrt5-d] (exp-minus-sqrt5-d d)] 115 | (mul sig-f-sq (mul (add (add 1 sqrt-5-d) (mul d-sq (/ 5 3))) 116 | exp-m-sqrt5-d)))) 117 | 118 | (defn matern32-plus-matern52-K 119 | "Compound covariance function for matern-32 and matern-52. 120 | 121 | Accepts: x-diff-squared - a NxDxN matrix of squared distances 122 | log-sig-f-32 - a scalar 123 | log-rho-32 - a vector 124 | log-sig-f-52 - a scalar 125 | log-rho-52 - a vector 126 | 127 | Returns: A matrix K" 128 | [x-diff-squared log-sig-f-32 log-rho-32 log-sig-f-52 log-rho-52] 129 | (let [K-32 (matern32-K x-diff-squared log-sig-f-32 log-rho-32) 130 | K-52 (matern52-K x-diff-squared log-sig-f-52 log-rho-52)] 131 | (add K-32 K-52))) 132 | 133 | (defn- broadcast-as-required-in-grads 134 | "Calls broadcast-function-NxDxN-NxN when provided first matrix is NxDxN and 135 | just uses built in broadcast operations when NxD" 136 | [op M1 M2 & args] 137 | (let [n-dims (count (shape M1))] 138 | (if (> n-dims 2) 139 | (bf/broadcast-function-NxDxN-NxN op M1 M2 args) 140 | (transpose (op (transpose M1) M2))))) 141 | 142 | 143 | (defn matern32-grad-K 144 | "Gradient for matern32. Syntax as per matern32 145 | except returns a DxNxN array giving derivatives 146 | in the different directions. The first entry 147 | of the first dimension corresponds to the derivative 148 | with respect to log-sig-f, with the others wrt 149 | log-rho" 150 | [x-diff-squared log-sig-f log-rho] 151 | (let [sig-f-sq (exp (* 2.0 log-sig-f)) 152 | [d _ seq-squared] (calc-d log-rho x-diff-squared) 153 | [sqrt-3-d exp-m-sqrt3-d] (exp-minus-sqrt3-d d) 154 | grad-K32-sig-f (mul 2.0 (matern32-K x-diff-squared log-sig-f log-rho)) 155 | [N _] (shape grad-K32-sig-f) 156 | grad-K32-rho (-> (broadcast-as-required-in-grads mul seq-squared exp-m-sqrt3-d) 157 | (mul (* 3.0 sig-f-sq))) 158 | size-broad (vec (concat [1] (shape grad-K32-sig-f)))] 159 | (join (broadcast grad-K32-sig-f size-broad) (slice-views grad-K32-rho 1)))) 160 | 161 | (defn matern52-grad-K 162 | "Gradient for matern52. Syntax as per matern52 163 | except returns a DxNxN array giving derivatives 164 | in the different directions. The first entry 165 | of the first dimension corresponds to the derivative 166 | with respect to log-sig-f, with the others wrt 167 | log-rho" 168 | [x-diff-squared log-sig-f log-rho] 169 | (let [sig-f-sq (exp (* 2.0 log-sig-f)) 170 | [d _ seq-squared] (calc-d log-rho x-diff-squared) 171 | [sqrt-5-d exp-m-sqrt5-d] (exp-minus-sqrt5-d d) 172 | grad-K52-sig-f (mul 2.0 (matern52-K x-diff-squared log-sig-f log-rho)) 173 | [N _] (shape grad-K52-sig-f) 174 | grad-K52-rho (->> (add 1 sqrt-5-d) 175 | (mul exp-m-sqrt5-d) 176 | (broadcast-as-required-in-grads mul seq-squared) 177 | (mul (* (/ 5.0 3.0) sig-f-sq))) 178 | size-broad (vec (concat [1] (shape grad-K52-sig-f)))] 179 | (join (broadcast grad-K52-sig-f size-broad) (slice-views grad-K52-rho 1)))) 180 | 181 | (defn matern32-plus-matern52-grad-K 182 | "Gradient of compound covariance function for matern-32 and matern-52. 183 | 184 | Accepts: x-diff-squared - a NxDxN matrix of squared distances 185 | log-sig-f-32 - a scalar 186 | log-rho-32 - a vector 187 | log-sig-f-52 - a scalar 188 | log-rho-52 - a vector 189 | 190 | Returns: An DxNxN array grad-K giving derivatives 191 | in the different directions. The first entry 192 | of the first dimension corresponds to the derivative 193 | with respect to log-sig-f, with the others wrt 194 | log-rho" 195 | [x-diff-squared log-sig-f-32 log-rho-32 log-sig-f-52 log-rho-52] 196 | (let [grad-K-32 (matern32-grad-K x-diff-squared log-sig-f-32 log-rho-32) 197 | grad-K-52 (matern52-grad-K x-diff-squared log-sig-f-52 log-rho-52)] 198 | (join grad-K-32 grad-K-52))) 199 | 200 | (defn matern-for-vector-input 201 | "Converts a covariance function that takes pairs of log-sig-f 202 | and log-rho as inputs and converts them to one that accepts 203 | a vector with correctly ordered hyperparameters. 204 | 205 | Accepts: dim - Dimension of data 206 | K - Relevant kernel function 207 | 208 | Return: K-vec - the kernel function that now accepts x-diff-squared 209 | followed by a vector" 210 | [dim K] 211 | (let [arrange-inputs (fn [x] 212 | (loop [in nil 213 | xl x] 214 | (if (= xl []) 215 | in 216 | (recur (concat in [(first xl) (subvec xl 1 (inc dim))]) 217 | (subvec xl (inc dim))))))] 218 | (fn [x-diff-squared hyper] (apply K x-diff-squared (arrange-inputs hyper))))) 219 | 220 | (defn matern32-grad-z 221 | "Jacobian of side kernel matrix w.r.t. new data point z for Matern 32. 222 | If using a gradient based solver for the acquisition funciton, then 223 | needed for calculating derivative of Expected Improvement, EI(z), as outlined 224 | on page 3 of 225 | http://homepages.mcs.vuw.ac.nz/~marcus/manuscripts/FreanBoyle-GPO-2008.pdf. 226 | 227 | Accepts: 228 | xs-z-diff - NxD matrix whose (i, j)th entry is x_ij - z_j 229 | log-sig-f - scalar; parameter of kernel function 230 | log-rho - D-dimensional parameter of kernel function 231 | 232 | Returns: 233 | [NxD] Jacobian of side kernel matrix w.r.t. new data point where 234 | (i, j)th entry is d(kernel(x_i, z)) / d(z_j)." 235 | [xs-z-diff log-sig-f log-rho] 236 | (let [sig-f-sq (exp (* 2 log-sig-f)) 237 | rho-sq (exp (mul 2 log-rho)) 238 | xs-z-diff-squared (pow xs-z-diff 2) 239 | [d _ _] (calc-d log-rho xs-z-diff-squared) ;; N-dimensional vector 240 | [_ ex3] (exp-minus-sqrt3-d d)] 241 | (-> (sub 0 xs-z-diff) ;; matrix of (z_j - x_ij) where i = 1:N, j = 1:D 242 | (div rho-sq) ;; matrix (NxD) of (z_j - x_ij) / rho_j 243 | transpose 244 | (mul ex3) 245 | transpose ;; matrix of exp(-\sqrt 3d_i) (z_j - x_ij) / rho_j 246 | (mul (* -3 sig-f-sq))))) ;; matrix of -3\sigma^2 exp(-\sqrt 3d_i) (z_j - x_ij) / rho_j 247 | 248 | (defn matern52-grad-z 249 | "Jacobian of side kernel matrix w.r.t. new data point z for Matern 52. 250 | If using a gradient based solver for the acquisition funciton, then 251 | needed for calculating derivative of Expected Improvement, EI(z), as outlined 252 | on page 3 of 253 | http://homepages.mcs.vuw.ac.nz/~marcus/manuscripts/FreanBoyle-GPO-2008.pdf. 254 | 255 | Accepts: 256 | xs-z-diff - NxD matrix whose (i, j)th entry is x_ij - z_j 257 | log-sig-f - scalar; parameter of kernel function 258 | log-rho - D-dimensional parameter of kernel function 259 | 260 | Returns: 261 | [NxD] Jacobian of side kernel matrix w.r.t. new data point where 262 | (i, j)th entry is d(kernel(x_i, z)) / d(z_j)." 263 | [xs-z-diff log-sig-f log-rho] 264 | (let [sig-f-sq (exp (* 2 log-sig-f)) 265 | rho-sq (exp (mul 2 log-rho)) 266 | xs-z-diff-squared (pow xs-z-diff 2) 267 | [d _ _] (calc-d log-rho xs-z-diff-squared) 268 | [sq5d ex5] (exp-minus-sqrt5-d d)] 269 | (-> (sub 0 xs-z-diff) ;; matrix of (z_j - x_ij) where i = 1:N, j = 1:D 270 | (div rho-sq) ;; matrix of (z_j - x_ij) / rho_j 271 | transpose 272 | (mul (add 1 sq5d)) 273 | (mul ex5) 274 | transpose ;; matrix of exp(-\sqrt5d_i) * (1 + \sqrt5d_i) * (z_j - x_ij) / rho_j 275 | (mul (* (- (/ 5 3)) sig-f-sq))))) 276 | 277 | (defn matern32-plus-matern52-grad-z 278 | "Jacobian of side kernel matrix w.r.t. new data point z for Matern 32 + Matern 52. 279 | If using a gradient based solver for the acquisition funciton, then 280 | needed for calculating derivative of Expected Improvement, EI(z), as outlined 281 | on page 3 of 282 | http://homepages.mcs.vuw.ac.nz/~marcus/manuscripts/FreanBoyle-GPO-2008.pdf. 283 | 284 | Accepts: 285 | xs-z-diff - NxD matrix whose (i, j)th entry is x_ij - z_j 286 | log-sig-f - scalar; parameter of kernel function 287 | log-rho - D-dimensional parameter of kernel function 288 | 289 | Returns: 290 | [NxD] Jacobian of side kernel matrix w.r.t. new data point where 291 | (i, j)th entry is d(kernel(x_i, z)) / d(z_j)." 292 | [x-z-diff log-sig-f-32 log-rho-32 log-sig-f-52 log-rho-52] 293 | (add (matern32-grad-z x-z-diff log-sig-f-32 log-rho-32) 294 | (matern52-grad-z x-z-diff log-sig-f-52 log-rho-52))) 295 | -------------------------------------------------------------------------------- /src/deodorant/default_params.clj: -------------------------------------------------------------------------------- 1 | (ns deodorant.default-params 2 | "Helper functions for Deodorant." 3 | (:require [clojure.core.matrix :as mat] 4 | [deodorant.hyper-priors :as hyper] 5 | [deodorant.broadcast-functions :as bf])) 6 | 7 | (defn default-double-matern-hyperprior 8 | "Sets up a default hyperprior based on the composition 9 | of a matern-32 and a matern-52 kernel. Accepts the 10 | dimensionality of the input space dim and returns a hash 11 | map with fields :sampler, :log-p and :grad-log-p. Each 12 | of these operate on 13 | [log-sig-n [log-sig-f-32 log-rho-32-dim1 log-rho-32-dim2 ....] 14 | [log-sig-f-52 log-rho-52-dim1 log-rho-52-dim2 ....]]. 15 | :sampler returns a set of samples of this form. 16 | :log-p returns a scalar given a set of parameters 17 | :grad-log-p returns a nested vector of the same size as sampler 18 | does corresponding to the gradient of that hyperparameter" 19 | [dim b-deterministic] 20 | (let [log-sig-noise-mean (if b-deterministic -9 -5) 21 | log-sig-noise-std-dev (if b-deterministic 0.1 2) 22 | log-sig-f-mean-32 -7 23 | log-sig-f-std-32 0.5 24 | log-rho-mean-32 -1.5 25 | log-rho-std-dev-32 0.5 26 | log-sig-f-mean-52 -0.5 27 | log-sig-f-std-52 0.15 28 | log-rho-mean-52 -1 29 | log-rho-std-dev-52 0.5 30 | dist-h-32 (hyper/constant-length-distance-hyperprior 31 | dim log-sig-f-mean-32 log-sig-f-std-32 log-rho-mean-32 log-rho-std-dev-32) 32 | dist-h-52 (hyper/constant-length-distance-hyperprior 33 | dim log-sig-f-mean-52 log-sig-f-std-52 log-rho-mean-52 log-rho-std-dev-52)] 34 | (hyper/compose-hyperpriors 35 | dim log-sig-noise-mean log-sig-noise-std-dev dist-h-32 dist-h-52))) 36 | 37 | (defn- bump-function-1d 38 | "A bump function used in the gp mean. Takes a radius from 39 | an original point, a ridge-value, an inf-value and exponent 40 | and returns a function evaluation. Note r should be positive. 41 | When rinf-value then 42 | -inf is returned. Otherwise exponent*log(inf-value-r)+a*r+c is returned 43 | where a and c are set to give the required ridge-value. 44 | 45 | Accepts [ridge-value inf-value exponent r] 46 | Returns scalar" 47 | [ridge-value inf-value exponent r] 48 | (let [a (- (/ (* exponent (Math/pow (- ridge-value inf-value) (dec exponent))) 49 | (Math/pow (- ridge-value inf-value) exponent))) 50 | c (- (+ (* exponent (Math/log (- inf-value ridge-value))) 51 | (* a ridge-value)))] 52 | (if (or (< r 0) (> r inf-value)) 53 | (- (/ 1.0 0.0)) 54 | (if (< r ridge-value) 55 | 0 56 | (+ (* exponent (Math/log (- inf-value r))) 57 | (* a r) 58 | c))))) 59 | 60 | (defn- bump-function 61 | "Calculates the radius of a point and uses it as the input 62 | to bump-function-1d 63 | 64 | Accepts [ridge-value inf-value exponent x] 65 | Returns scalar" 66 | [ridge-value inf-value exponent x] 67 | (let [r (mat/sqrt (bf/safe-sum (mat/pow x 2) 1))] 68 | (mapv #(bump-function-1d ridge-value inf-value exponent %) r))) 69 | 70 | (defn default-mean-fn-form [dim] 71 | (partial bump-function 1 (* 1.5 (Math/sqrt dim)) 1)) 72 | 73 | (defn default-hmc-initializer [n-chains hyperprior] 74 | ((:sampler hyperprior) n-chains)) 75 | -------------------------------------------------------------------------------- /src/deodorant/gp_toolbox.clj: -------------------------------------------------------------------------------- 1 | (ns deodorant.gp-toolbox 2 | "GP functions for training, testing, calculating the marginal likelihood 3 | and its derivatives." 4 | (:require [clojure.core.matrix :refer [matrix identity-matrix dot shape 5 | inverse transpose trace outer-product 6 | mul mmul add sub div join broadcast 7 | slice-views exp log pow sqrt]] 8 | [clatrix.core :as clx] 9 | [deodorant.helper-functions :refer [sample* mvn]] 10 | [deodorant.broadcast-functions :as bf])) 11 | 12 | (defn subtract-mean-fn 13 | "Subtracts the GP mean. Accepts a mean-fn from arguments [x] to a 14 | scalar y, along with a collection of points [x y]. Returns a vector 15 | of values (- y (mean-fn x))." 16 | [mean-fn x y] 17 | (sub y (mean-fn x))) 18 | 19 | (defn gp-train 20 | "Trains a gp, returns L and psi - the lower triangular matrix 21 | and vector of differences required for prediction. 22 | 23 | Accepts: cov-fn - a function taking inputs of x-diff-sq and 24 | a vector of hyperparameters (not including noise 25 | parameter and therefore corresponding to (rest alpha)) 26 | x-diff-sq - a NxDxN matrix of squared distances of points 27 | y-bar - observations minus the value of the prior mean function 28 | at those points 29 | alpha - a vector of hyperparameters ordered as 30 | [log-sig-n log-sig-f-k1 log-rho-k1-dim-1 log-rho-k1-dim-2 ... log-sig-f-k2 ...] 31 | 32 | Returns: L - lower triangular matrix used for gp prediction 33 | psi - inv-K times y-bar" 34 | ; MEMOIZE ME 35 | [cov-fn x-diff-sq y-bar alpha] 36 | (let [_ (if (not (every? (fn [x] (< x 10)) (rest alpha))) 37 | (let [;_ (print :alphafail alpha) 38 | ] (throw (Exception. "Alphas are at silly values. Throwing exception before clatrix kills java."))) 39 | nil) 40 | K-no-noise (cov-fn x-diff-sq (vec (rest alpha))) 41 | [N D _] (shape x-diff-sq) 42 | sig-n-sq (exp (* 2 (first alpha))) 43 | ;; A little naughty but for numerical stability add a little onto sig-n-sq 44 | ;; note we don't bother checking sig-n at the start because of this 45 | sig-n-sq (+ sig-n-sq 0.00000001) 46 | K (add K-no-noise (mul sig-n-sq (identity-matrix N))) 47 | L (try 48 | (transpose (matrix (clx/cholesky (clx/matrix K)))) 49 | (catch Exception e 50 | (println :calc-L-failed-with-error e) 51 | (println :alpha alpha) 52 | (println :K K) 53 | (flush) 54 | (throw (Exception. "calculating L failed due to numerical instability")))) 55 | psi (try 56 | (clx/solve (clx/matrix (transpose L)) (clx/solve (clx/matrix L) (clx/vector y-bar))) 57 | (catch Exception e 58 | (println :calc-psi-failed-with-error e) 59 | (println :alpha alpha) 60 | (println :K K) 61 | (println :L L) 62 | (flush) 63 | (throw (Exception. "calculating psi failed due to numerical instability"))))] 64 | [(matrix L) (matrix (vec psi))])) 65 | 66 | 67 | (defn gp-log-likelihood 68 | "Calculates the gp-log-likelihood given L, psi and y-bar" 69 | ; MEMOIZE ME 70 | [L psi y-bar] 71 | (sub (mul -0.5 (mmul (transpose y-bar) psi)) 72 | (trace (log L)) 73 | (* 0.5 (count y-bar) (log (* 2 Math/PI))))) 74 | 75 | (defn gp-grad-log-likelihood 76 | "Calculates the gradient of the gp-log-likelihood with 77 | respect to the hyperparameters. 78 | 79 | Accepts: grad-cov-fn - Function to return grad of covariance func wrt 80 | the hyperparameters returned as DxNxN matrix 81 | x-diff-sq - see gp-train 82 | L - see gp-train 83 | psi - see gp-train" 84 | ; MEMOIZE ME 85 | [grad-cov-fn x-diff-sq alpha L psi] 86 | (let [grad-k-no-noise (grad-cov-fn x-diff-sq (vec (rest alpha))) 87 | [N _ _] (shape x-diff-sq) 88 | grad-k (join (broadcast (mul 2 89 | (exp (* 2 (first alpha))) 90 | (identity-matrix N)) 91 | [1 N N]) 92 | grad-k-no-noise) 93 | inv-L (inverse L) 94 | inv-K (mmul (transpose inv-L) inv-L) 95 | psi-psi-T (outer-product psi psi) 96 | psi-psi-T-minus-inv-K (sub psi-psi-T inv-K)] 97 | (mul 0.5 (mapv (fn [x] 98 | (trace (mmul psi-psi-T-minus-inv-K x))) 99 | grad-k)))) 100 | 101 | (defrecord trained-gp-obj 102 | [prior-mean-fn ; Prior function for mean. Calcs are done using a zero mean gp and this added back at the end 103 | x-obs ; Observed x values 104 | L ; Lower triangular decomposition of K 105 | psi ; inv-K times y-bar 106 | inv-K ; Inverse of covariance function for trained points 107 | inv-L ; Inverse of cholesky decomposition of K 108 | sigma-n ; Standard deviation of noise 109 | log-likelihood ; log likelihood of the GP 110 | x*-diff-fn ; Function that takes an array of new points and returns a NxDxM of seperations to the observed points 111 | k*-fn ; Function to calculate covariance of (M) new points to (N) old points, returns NxM matrix 112 | ; overloaded to work on both a single point (e.g. [1 0.5]) and array of points (e.g. [[1 0.5]] or [[1 0.5] [-0.2 0.3]]) 113 | k-new-fn ; Function to calculate covariance between (M) new points, returns MxM matrix 114 | ; overloaded to work on both a single point (e.g. [1 0.5]) and array of points (e.g. [[1 0.5]] or [[1 0.5] [-0.2 0.3]]) 115 | marginal-prior-var ; Result of calling k-new-fn on a single point (i.e. variance with itself under the prior) 116 | ;; Optional fields that are not currently used in BOPP. 117 | grad-prior-mean-fn-x ; Grad of the prior-mean-fn w.r.t x, returns different differentials as first dimension. 118 | grad-k*-fn ; Function giving the gradient of k* with respect to the different dimensions of the proposed point 119 | ; overloaded to work on both a single point (e.g. [1 0.5]) and array of points (e.g. [[1 0.5]] or [[1 0.5] [-0.2 0.3]]) 120 | ]) 121 | 122 | (defn create-trained-gp-obj 123 | "Created a trained-gp-obj record that is used for efficient 124 | prediction of gp at future points. 125 | 126 | Accepts: prior-mean-func 127 | cov-fn Same form as sent to gp train 128 | points Vector of pairs of [x y] observations 129 | alpha Vector of hyperparameters in same form as sent to gp train 130 | 131 | Optional Inputs: 132 | grad-prior-mean-fn-x Gradient of prior mean function. Needed for some derivative 133 | calculations but not for basic use. 134 | grad-prior-cov-fn-x Gradient of prior covariance function. Needed for some derivative 135 | calculations but not for basic use. 136 | 137 | Returns: trained-gp-obj 138 | 139 | Note that current usage of BOPP does not set these optional inputs. They would be needed for anything 140 | that requires taking gradients with respect to the GP inputs, for example solving the acquisition 141 | function using gradient methods and left in for potential future convenience / other toolbox use." 142 | [prior-mean-fn cov-fn x-obs y-obs alpha 143 | & [grad-prior-mean-fn-x grad-prior-cov-fn-x]] 144 | (let [;; For details on these terms see the comments in trained-gp-obj 145 | y-bar (subtract-mean-fn prior-mean-fn x-obs y-obs) 146 | x-diff-sq (matrix (bf/square-diff x-obs)) 147 | sigma-n (exp (first alpha)) 148 | hypers (vec (rest alpha)) 149 | [L psi] (gp-train cov-fn x-diff-sq y-bar alpha) 150 | log-likelihood (gp-log-likelihood L psi y-bar) 151 | x*-diff-fn (fn [x*] 152 | (bf/safe-broadcast-op sub x-obs (matrix x*))) ; Returns a N-OBSxDxN* array 153 | k*-fn (fn [x*-diff] 154 | (cov-fn (pow x*-diff 2) hypers)) 155 | k-new-fn (fn [x*] 156 | (cov-fn (pow (bf/safe-broadcast-op sub (matrix x*) (matrix x*)) 2); Returns a N*xDxN* array 157 | hypers)) 158 | [N D] (shape x-obs) 159 | marginal-prior-var (first (k-new-fn (vec (repeat D 1)))) ; Variance of a single point under the prior 160 | inv-L (inverse L) 161 | inv-K (mmul (transpose inv-L) inv-L) 162 | grad-k*-fn (if (= nil grad-prior-cov-fn-x) 163 | nil 164 | (fn [x*-diff] 165 | (grad-prior-cov-fn-x x*-diff hypers)))] 166 | (->trained-gp-obj 167 | prior-mean-fn x-obs L psi inv-K inv-L sigma-n log-likelihood 168 | x*-diff-fn k*-fn k-new-fn marginal-prior-var grad-prior-mean-fn-x grad-k*-fn))) 169 | 170 | (defn gp-predict-mu-sig 171 | "Makes gp predictions for mu and marginal standard deviation 172 | for multiple points simultaneously. 173 | 174 | Accepts gp - of type trained-gp-obj 175 | x* - new points to evaluate (MxD matrix) 176 | 177 | Returns mu - predicted means (M length vector) 178 | sig - marginal predicted standard deviations" 179 | [gp x*] 180 | (let [k* ((:k*-fn gp) ((:x*-diff-fn gp) x*)) 181 | psi (:psi gp) 182 | mu (add ((:prior-mean-fn gp) x*) (mmul (transpose k*) psi)) 183 | v (mmul (:inv-L gp) k*) 184 | sig (sqrt (sub (:marginal-prior-var gp) (bf/safe-sum (pow v 2) 0)))] 185 | [mu sig])) 186 | 187 | (defn gp-predict-mu-cov 188 | "Makes gp predictions for mu and 189 | covariance for multiple points simultaneously. 190 | 191 | Accepts gp - of type trained-gp-obj 192 | x* - new points to evaluate (MxD matrix) 193 | & args - if (first args) is true then the full covariance matrix 194 | is returned instead of just the marginal variance 195 | 196 | Returns mu - predicted means (M length vector) 197 | cov - (MxM matrix) corresponding to the covariance between the prediction points" 198 | [gp x* ] 199 | (let [k* ((:k*-fn gp) ((:x*-diff-fn gp) x*)) 200 | psi (:psi gp) 201 | mu (add ((:prior-mean-fn gp) x*) (mmul (transpose k*) psi)) 202 | v (mmul (:inv-L gp) k*) 203 | cov (sub ((:k-new-fn gp) x*) (mmul (transpose v) v))] 204 | [mu cov])) 205 | 206 | 207 | (defn gp-mixture-mu-sig 208 | "Calculates the mean and standard deviation from a weighted 209 | sum of gps, i.e. a gp mixture model. Note that the resulting 210 | distribution is not a Gaussian, (the marginals are mixtures of 211 | Gaussians) but the mean and covariance is still analytic. 212 | 213 | Accepts: 214 | gp-predictors - A collection of gp prediction functions 215 | gp-weights - The relative weights of the gps 216 | xs - Positions to calculate the estimates at 217 | 218 | Returns: 219 | mus - The means of the points 220 | sigs - The standard deviations of the points" 221 | [gp-predictors gp-weights xs] 222 | (let [ 223 | ;; Though the weighted sum of gps is not a GP itself, the means 224 | ;; still add is if they were 225 | mu-sigs (mapv #(% xs) gp-predictors) 226 | 227 | mu-samples (transpose (mapv first mu-sigs)) 228 | sig-samples (transpose (mapv second mu-sigs)) 229 | 230 | mus (bf/safe-sum (mul mu-samples gp-weights) 1) 231 | 232 | ; eq A.1.8 in Mike Osborne's thesis 233 | sigs (sqrt (sub (bf/safe-sum (mul (add (pow sig-samples 2) 234 | (pow mu-samples 2)) 235 | gp-weights) 236 | 1) 237 | (pow mus 2)))] 238 | [mus sigs])) 239 | 240 | ;;;;;;;;;;;;;; These are not used by default usage of ;;;;;;;;;;;;;; 241 | ;;;;;;;;;;;;;; Deodorant but are useful GP functions ;;;;;;;;;;;;;; 242 | ;;;;;;;;;;;;;; one may wish to exploit in custom usage ;;;;;;;;;;;;;;s 243 | 244 | 245 | (defn gp-predict-with-derivatives 246 | "Makes gp predictions for mu, var, grad-mu and grad-var 247 | given a gp-object and a single query point 248 | 249 | Accepts gp - of type trained-gp-obj 250 | x* - new points to evaluate (D length vector) 251 | 252 | Returns mu - predicted means (vector of length 1) 253 | var - marginal predicted vartion (vector of length 1) 254 | grad-mu - derivative of the mean with respect to the dimensions of predicted points. (D length vector) 255 | grad-var - derivative of the variance with respect to the dimensions of predicted points. (D length vector)" 256 | [gp x*] 257 | (let [x*-diff ((:x*-diff-fn gp) x*) 258 | k* ((:k*-fn gp) x*-diff) 259 | psi (:psi gp) 260 | mu (add ((:prior-mean-fn gp) [x*]) (dot k* psi)) 261 | v (mmul (:inv-L gp) k*) 262 | var (sub ((:k-new-fn gp) x*) (dot v v)) 263 | grad-k*-t ((:grad-k*-fn gp) x*-diff) 264 | grad-mu (add ((:grad-prior-mean-fn gp) x*) (mmul (transpose grad-k*-t) psi)) 265 | grad-var (sub (mmul 2 (:inv-K gp) k* grad-k*-t))] 266 | [[mu] [var] grad-mu grad-var])) 267 | 268 | (defn gp-sample 269 | "Generates samples from a trained-gp-obj 270 | 271 | Accepts gp - of type trained-gp-obj 272 | x* - points to evaluate (MxD matrix) 273 | n-samples - number of samples to generate 274 | 275 | Returns f* - sampled values for gp output (n-samples x M matrix). Note that the y ~ N(f*,sigma-n^2) 276 | dist-f* - mvn distribution object that allows for further efficient sampling if required" 277 | [gp x* n-samples] 278 | (let [[mu cov] (gp-predict-mu-cov gp x* true) 279 | dist-f* (mvn mu cov) 280 | f* (matrix (repeatedly n-samples #(sample* dist-f*)))] 281 | [f* dist-f*])) 282 | 283 | (defn convert-output-to-std-dev 284 | "Takes the output of a gp prediction function and converts the variance 285 | terms to standard deviation terms for both original derivative" 286 | [mu var & args] 287 | (let [sig (sqrt var)] 288 | (if (empty? args) 289 | [mu sig] 290 | (let [grad-mu (first args) 291 | grad-sig (and (second args) (div (second args) (* 2 (first sig))))] 292 | [mu sig grad-mu grad-sig])))) 293 | -------------------------------------------------------------------------------- /src/deodorant/helper_functions.clj: -------------------------------------------------------------------------------- 1 | (ns deodorant.helper-functions 2 | "Helper functions for Deodorant." 3 | (:require [clojure.core.matrix :as m] 4 | [clojure.core.matrix.linear :as ml])) 5 | 6 | ;; matrix library uses vectorz for protocol implementations 7 | (m/set-current-implementation :vectorz) 8 | 9 | (defn argmax 10 | "Index of maximum of a collection" 11 | [coll] 12 | (first 13 | (apply max-key 14 | second 15 | (map vector 16 | (range (count coll)) 17 | coll)))) 18 | 19 | (defn indexed-max 20 | "Returns an indexed maximum. Accepts a function f and a collection 21 | coll. Returns a pair [y-max i-max] in which y-max is the largest 22 | value (f x-max) and i-max is the index such that (nth coll i-max) 23 | returns x-max." 24 | [f coll] 25 | (loop [best [(/ 1 (- 0.0)) 0] 26 | i 0 27 | xs coll] 28 | (if-let [x (first xs)] 29 | (let [[y-max i-max] best 30 | y (f x)] 31 | (recur (if (> y y-max) 32 | [y i] 33 | best) 34 | (inc i) 35 | (rest xs))) 36 | best))) 37 | 38 | (defn cartesian [colls] 39 | (if (empty? colls) 40 | '(()) 41 | (for [x (first colls) 42 | more (cartesian (rest colls))] 43 | (cons x more)))) 44 | 45 | ;; Functions ported from Anglican 46 | 47 | ;; erf 48 | (defn erf 49 | "error function" 50 | [x] 51 | (org.apache.commons.math3.special.Erf/erf x)) 52 | 53 | ;; mean 54 | (defn sum 55 | "sums array slices along specified dimension" 56 | ([a dimension] 57 | (reduce 58 | m/add 59 | (m/slices a dimension))) 60 | ([a] 61 | (sum a 0))) 62 | 63 | (defn mean 64 | "mean of array slices along specified dimension" 65 | ([a dimension] 66 | (m/div (sum a dimension) 67 | (get (m/shape a) dimension))) 68 | ([a] 69 | (mean a 0))) 70 | 71 | ;; distributions 72 | (defprotocol distribution 73 | "random distribution" 74 | (sample* [this] 75 | "draws a sample from the distribution") 76 | (observe* [this value] 77 | "return the probability [density] of the value")) 78 | 79 | (def RNG 80 | "random number generator; 81 | used by Apache Commons Math distribution objects" 82 | (org.apache.commons.math3.random.SynchronizedRandomGenerator. 83 | (org.apache.commons.math3.random.Well19937c.))) 84 | 85 | (defn ^:private qualify 86 | "accepts a symbol, returns the qualified symbol; 87 | intended to be called from a macro" 88 | [s] 89 | (symbol (format "%s/%s" *ns* s))) 90 | 91 | (defmacro defdist 92 | "defines distribution" 93 | [name & args] 94 | (let [[docstring parameters & args] 95 | (if (string? (first args)) 96 | args 97 | `(~(format "%s distribution" name) ~@args)) 98 | [bindings & methods] 99 | (if (vector? (first args)) 100 | args 101 | `[[] ~@args]) 102 | record-name (symbol (format "%s-distribution" name)) 103 | variables (take-nth 2 bindings)] 104 | `(do 105 | (declare ~name) 106 | (defrecord ~record-name [~@parameters ~@variables] 107 | Object 108 | (toString [~'this] 109 | (str (list '~(qualify name) ~@parameters))) 110 | distribution 111 | ~@methods) 112 | (defn ~name ~docstring ~parameters 113 | (let ~bindings 114 | (~(symbol (format "->%s" record-name)) 115 | ~@parameters ~@variables))) 116 | (defmethod print-method ~record-name 117 | [~'o ~'m] 118 | (print-simple (str ~'o) ~'m))))) 119 | 120 | (defmacro ^:private from-apache 121 | "wraps Apache Commons Math distribution" 122 | [name args type [apache-name & apache-args]] 123 | (let [dist (gensym "dist")] 124 | `(defdist ~(symbol name) 125 | ~(format "%s distribution (imported from apache)" name) 126 | ~args 127 | [~dist (~(symbol (format "org.apache.commons.math3.distribution.%sDistribution." apache-name)) 128 | RNG ~@apache-args)] 129 | (~'sample* [~'this] (.sample ~dist)) 130 | (~'observe* [~'this ~'value] 131 | ~(case type 132 | :discrete `(~'.logProbability ~dist ~'value) 133 | :continuous `(~'.logDensity ~dist ~'value)))))) 134 | 135 | (defprotocol multivariate-distribution 136 | "additional methods for multivariate distributions" 137 | (transform-sample [this samples] 138 | "accepts a vector of random values and generates 139 | a sample from the multivariate distribution")) 140 | 141 | (from-apache normal [mean sd] :continuous 142 | (Normal (double mean) (double sd))) 143 | 144 | (defdist mvn 145 | "multivariate normal" 146 | [mean cov] [k (m/ecount mean) ; number of dimensions 147 | Lcov (:L (ml/cholesky (m/matrix cov))) 148 | unit-normal (normal 0 1) 149 | Z (delay (let [|Lcov| (reduce * (m/diagonal Lcov))] 150 | (+ (* 0.5 k (Math/log (* 2 Math/PI))) 151 | (Math/log |Lcov|)))) 152 | iLcov (delay (m/inverse Lcov)) 153 | transform-sample (fn [samples] 154 | (m/add mean (m/mmul Lcov samples)))] 155 | (sample* [this] (transform-sample 156 | (repeatedly k #(sample* unit-normal)))) 157 | (observe* [this value] 158 | (let [dx (m/mmul @iLcov (m/sub value mean))] 159 | (- (* -0.5 (m/dot dx dx)) @Z))) 160 | multivariate-distribution 161 | (transform-sample [this samples] (transform-sample samples))) 162 | -------------------------------------------------------------------------------- /src/deodorant/hmc.clj: -------------------------------------------------------------------------------- 1 | (ns deodorant.hmc 2 | "Basic HMC sampler implementation." 3 | (:require [clojure.core.matrix :as mat 4 | :refer [matrix mul add sub]] 5 | [deodorant.helper-functions :refer [sample* normal]])) 6 | 7 | (defn- sum 8 | [x & [dim & _]] 9 | (if (and dim (> dim 0)) 10 | (reduce mat/add (mat/slice-views x dim)) 11 | (reduce mat/add x))) 12 | 13 | (defn- mean 14 | [x & [dim & _]] 15 | (let [dim (or dim 0)] 16 | (mat/div (sum x dim) (get (mat/shape x) dim)))) 17 | 18 | (defn- sq 19 | [x] 20 | (mat/mul x x)) 21 | 22 | (defn scale-vector 23 | "Scale a vector by a scalar" 24 | [v factor] 25 | (mapv (partial * factor) v)) 26 | 27 | (defn hmc-integrate 28 | "Preforms leap-frog integration of trajectory." 29 | [grad-u eps num-steps q p] 30 | (loop [q q 31 | p (mat/sub p (mat/mul 0.5 eps (grad-u q))) 32 | n 1] 33 | (if (< n num-steps) 34 | (let [q-new (mat/add q (mat/mul eps p)) 35 | p-new (mat/sub p (mat/mul eps (grad-u q-new)))] 36 | (recur q-new p-new (inc n))) 37 | [q (mat/sub p (mat/mul 0.5 eps (grad-u q)))]))) 38 | 39 | (defn hmc-transition 40 | "Performs one Hamiltonian Monte Carlo transition update. 41 | 42 | Accepts functions u and grad-u with arguments [q], a parameter eps 43 | that specifies the integration step size, and a parameter num-steps 44 | that specifies the number of integration steps. 45 | 46 | Returns a new sample q and the value of u at q." 47 | [u grad-u eps num-steps q-start u-start] 48 | (let [[accept-prob 49 | q-end 50 | u-end] (try 51 | (let [p-start (mat/matrix 52 | (map sample* 53 | (repeat (count q-start) 54 | (normal 0 1)))) 55 | [q-end p-end] (hmc-integrate grad-u eps num-steps 56 | q-start p-start) 57 | k-start (* 0.5 (sum (sq p-start))) 58 | k-end (* 0.5 (sum (sq p-end))) 59 | u-end (u q-end) 60 | accept-prob (Math/exp (+ (- u-start u-end) 61 | (- k-start k-end))) 62 | ; To guard against rejecting huge improvements due to numerical 63 | ; precision, accept anything where the probability increases by 64 | ; a certain amount 65 | accept_force_threshold 5 66 | accept-prob (if (> (- u-start u-end) accept_force_threshold) 67 | 1 68 | accept-prob)] 69 | [accept-prob 70 | q-end 71 | u-end]) 72 | (catch Exception e 73 | [0.0 nil nil])) 74 | ] 75 | (if (> accept-prob (rand)) 76 | [q-end u-end] 77 | [q-start u-start]))) 78 | 79 | (defn hmc-chain 80 | "Performs Hamiltonian Monte Carlo to construct a Markov Chain 81 | 82 | Accepts functions u and grad-u with arguments [q], a parameter eps 83 | that specifies the integration step size, and a parameter num-steps 84 | that specifies the number of integration steps. 85 | 86 | Returns a lazy sequence of pairs of samples q and values of u at q." 87 | [u grad-u eps num-steps q-start u-start] 88 | (let [[q-next u-next] (hmc-transition u grad-u eps num-steps q-start u-start)] 89 | (lazy-seq 90 | (cons [q-next u-next] (hmc-chain u grad-u eps num-steps q-next u-next))))) 91 | 92 | (defn burn-in-and-thin 93 | "Takes the output of a markov chain, removes a number of burn-in samples 94 | and thins 95 | 96 | Accpets: burn-in-proportion 97 | thin-rate 98 | samples 99 | 100 | Retruns: samples(n-start:thin-rate:end) 101 | where n-start = (int (* (count samples) burn-in-proportion))" 102 | [burn-in-proportion thin-rate samples] 103 | (let [n-burn-in (int (Math/ceil (* (count samples) burn-in-proportion))) 104 | samples (take-nth thin-rate (vec (drop n-burn-in samples)))] 105 | samples)) 106 | 107 | (defn collapse-identical-samples 108 | "Takes an unweighted collection of samples and returns the unique values 109 | allong with a vector of the number of times they occured. Ordering 110 | correspondings to the times of first apperance" 111 | [samples verbose] 112 | (let [freq (frequencies samples) 113 | weights (mapv second freq) 114 | weights (scale-vector weights (/ 1 (reduce + weights))) 115 | unique-samples (if (vector? samples) 116 | (mapv first freq) 117 | (map first freq))] 118 | (if (> verbose 1) 119 | (println :prop-of-thined-hmc-samples-taken 120 | (double (/ (count unique-samples) 121 | (count samples))))) 122 | [unique-samples weights])) 123 | -------------------------------------------------------------------------------- /src/deodorant/hyper_priors.clj: -------------------------------------------------------------------------------- 1 | (ns deodorant.hyper-priors 2 | "Acquisition functions for Deodorant." 3 | (:require [deodorant.helper-functions :refer [sample* observe* normal]])) 4 | 5 | (defn- unflatten 6 | "Converts a vector to nested vector for 7 | data of dimension dim" 8 | [dim v] 9 | (loop [vr (vec (rest v)) 10 | vc [[(first v)]]] 11 | (if (= [] vr) 12 | vc 13 | (recur (vec (drop (inc dim) vr)) 14 | (vec (concat vc [[[(first vr)] (subvec vr 1 (inc dim))]])))))) 15 | 16 | (defn- diff-log-normpdf 17 | "Differential of the pdf for a 1d Gaussian" 18 | [m s x] 19 | (/ (- m x) (Math/pow s 2))) 20 | 21 | (defn- setup-log-normal 22 | "Given a mean and standard deviation, returns a sampler 23 | probability function and grad-probability function. 24 | Note log-p and grad-log-p expect as inputs the log of 25 | the raw hyperparameters." 26 | [m s] 27 | (let [dists (mapv (fn [a b] (normal a b)) 28 | m s) 29 | sampler (fn [n-samples] 30 | (vec (repeatedly n-samples 31 | #(mapv (fn [x] (sample* x)) dists)))) 32 | log-p (fn [x] 33 | (reduce + 34 | (mapv (fn [xd dd] (observe* dd xd)) x dists))) 35 | grad-log-p (fn [x] 36 | (mapv (fn [a b xd] 37 | (diff-log-normpdf a b xd)) 38 | m s x))] 39 | [sampler log-p grad-log-p])) 40 | 41 | (defn- log-normal-sig-f-and-rho-hyperprior 42 | "A gp hyperprior for distance based kernels 43 | such as squared exponential or Matern that apply 44 | a log normal to each of the hyperparameters" 45 | [log-sig-f-mean log-sig-f-std log-rho-mean log-rho-std] 46 | (let [[s-s p-s g-p-s] (setup-log-normal [log-sig-f-mean] [log-sig-f-std]) 47 | [r-s p-r g-p-r] (setup-log-normal log-rho-mean log-rho-std)] 48 | {:sampler (fn [n-samples] 49 | (mapv (fn [s r] [s r]) (s-s n-samples) (r-s n-samples))) 50 | :log-p (fn [alpha] 51 | (+ (p-s (first alpha)) (p-r (second alpha)))) 52 | :grad-log-p (fn [alpha] 53 | [(g-p-s (first alpha)) (g-p-r (second alpha))])})) 54 | 55 | (defn compose-hyperpriors 56 | "Composes a number of hyperpriors 57 | to a form a single hyperprior. Should still be used even 58 | if only composing a single hyperprior as adds in the 59 | derivative of sig-n and applies flatten / unflatten" 60 | [dim log-noise-mean log-noise-std & args] 61 | (let [unflattener (partial unflatten dim) 62 | [n-s p-n g-p-n] (setup-log-normal [log-noise-mean] [log-noise-std]) 63 | n_args (count args) 64 | sampler (fn [n-samples] 65 | (map flatten (let [sig-samples (n-s n-samples) 66 | other-samples (mapv #((:sampler %) n-samples) args)] 67 | (mapv (fn [n] (reduce 68 | (fn [x y] (conj x (nth y n))) 69 | [(nth sig-samples n)] other-samples)) 70 | (range n-samples))))) 71 | log-p (fn [alpha] 72 | (let [alpha (unflattener alpha)] 73 | (+ (p-n (first alpha)) 74 | (reduce + (mapv (fn [a i] 75 | ((:log-p i) a)) 76 | (rest alpha) args))))) 77 | grad-log-p (fn [alpha] 78 | (let [alpha (unflattener alpha)] 79 | (flatten (vec (concat [[(g-p-n (first alpha))]] 80 | (mapv (fn [a i] 81 | ((:grad-log-p i) a)) 82 | (rest alpha) args))))))] 83 | {:sampler sampler :log-p log-p :grad-log-p grad-log-p})) 84 | 85 | (defn constant-length-distance-hyperprior 86 | "Calls log-normal-sig-f-and-rho-hyperprior when provided with 87 | dim as first argument and uses the same value for the rho 88 | details in every dimension" 89 | [dim log-sig-f-mean log-sig-f-std log-rho-mean log-rho-std] 90 | (log-normal-sig-f-and-rho-hyperprior 91 | log-sig-f-mean 92 | log-sig-f-std (vec (repeat dim log-rho-mean)) (vec (repeat dim log-rho-std)))) 93 | -------------------------------------------------------------------------------- /src/deodorant/scaling_functions.clj: -------------------------------------------------------------------------------- 1 | (ns deodorant.scaling-functions 2 | "Scaling functions for Deodorant." 3 | (:require [clojure.core.matrix :as mat] 4 | [clojure.core.matrix.operators :as mop])) 5 | 6 | (defn scale 7 | "Normalizes data to lie inside a hypercube bounded at [-1 1] along. 8 | 9 | Accepts a collection of data points [x] in which x may be a scalar or vector. 10 | 11 | Returns a tuple [scaled-data unscale] containing the scaled data and 12 | a function that inverts the transformation." 13 | [data] 14 | (let [dmax (reduce mop/max data) 15 | dmin (reduce mop/min data) 16 | dscale (mat/sub dmax dmin) 17 | scaler (fn [d] (mat/mul (mat/sub (mat/div (mat/sub d dmin) dscale) 0.5) 2)) 18 | scaled (scaler data) 19 | unscaler (fn [d] (mat/add (mat/mul (mat/add (mat/div d 2) 0.5) dscale) dmin)) 20 | unscaler-without-centering (fn [d] (mat/mul (mat/div d 2) dscale))] 21 | [scaled unscaler unscaler-without-centering scaler])) 22 | 23 | (defn scale-points 24 | "Rescales points to a hypercube bounded at [-1 1]. 25 | 26 | Accepts a collection of points [x y] in which x is a D-dimensionl 27 | vector and y is a scalar. 28 | 29 | Returns a tuple [x-scaled y-scaled u5nscale-x unscale-y] containing 30 | the scaled data and functions to revert the scaling." 31 | [points] 32 | (let [[X unscale-X unscale-X-no-centering] (scale (mapv first points)) 33 | [Y unscale-Y unscale-Y-no-centering] (scale (mapv second points))] 34 | [X Y unscale-X unscale-Y unscale-X-no-centering unscale-Y-no-centering])) 35 | 36 | (defrecord scale-details-obj 37 | [theta-min 38 | theta-max 39 | log-Z-min 40 | log-Z-max]) 41 | 42 | (defrecord scaling-funcs-obj 43 | [theta-scaler 44 | theta-unscaler 45 | theta-unscaler-no-centering 46 | log-Z-scaler 47 | log-Z-unscaler 48 | log-Z-unscaler-no-centering]) 49 | 50 | (defn update-scale-details 51 | [scale-details scaling-funcs theta-new log-Z-new] 52 | (if (< ((:log-Z-scaler scaling-funcs) log-Z-new) -1) 53 | scale-details 54 | (let [log-Z-max (max (:log-Z-max scale-details) log-Z-new) 55 | theta-min (mop/min (:theta-min scale-details) theta-new) 56 | theta-max (mop/max (:theta-max scale-details) theta-new)] 57 | (->scale-details-obj 58 | theta-min theta-max (:log-Z-min scale-details) log-Z-max)))) 59 | 60 | (defn setup-scaling-funcs 61 | "Given a scale-details-obj returns a scaling-funcs-obj" 62 | [scale-details] 63 | (let [[_ theta-unscaler theta-unscaler-no-centering theta-scaler] (scale [(:theta-min scale-details) (:theta-max scale-details)]) 64 | [_ log-Z-unscaler log-Z-unscaler-no-centering log-Z-scaler] (scale [(:log-Z-min scale-details) (:log-Z-max scale-details)])] 65 | (->scaling-funcs-obj 66 | theta-scaler theta-unscaler theta-unscaler-no-centering log-Z-scaler log-Z-unscaler log-Z-unscaler-no-centering))) 67 | 68 | (defn unflatten-from-sizes 69 | [sizes x] 70 | (let [sizes-this (first sizes) 71 | z-this (if (= (count sizes-this) 2) 72 | (-> (take (reduce * sizes-this) x) 73 | (mat/reshape sizes-this)) 74 | (if (= (first sizes-this) 1) 75 | (first x) 76 | (take (first sizes-this) x))) 77 | z-rest (if (empty? (rest sizes)) 78 | nil 79 | (unflatten-from-sizes (rest sizes) (into [] (drop (reduce * sizes-this) x))))] 80 | (into [] (concat [z-this] z-rest)))) 81 | 82 | (defn flatten-unflatten 83 | "Returns functions for flattening and unflattening the thetas. For example 84 | when sampling from a multivariate normal theta will be a nested vector 85 | TODO make me work for matrices" 86 | [x] 87 | (let [types (map type x) 88 | sizes (mapv (fn [v] (if (instance? mikera.vectorz.Vector v) 89 | (mat/shape v)) 90 | (if (or (vector? v) (list? v) (set? v) (coll? v) (seq? v)) 91 | [(count v)] 92 | [1])) 93 | x) 94 | flatten-f (fn [y] (into [] (flatten y))) 95 | unflatten-f (partial unflatten-from-sizes sizes)] 96 | [flatten-f unflatten-f])) 97 | --------------------------------------------------------------------------------