├── .gitignore ├── .scalafmt.conf ├── .travis.yml ├── LICENSE ├── README.md ├── build.sbt ├── project ├── build.properties └── plugins.sbt └── src ├── main └── scala │ └── thinkbayes │ ├── CategoricalCdf.scala │ ├── CategoricalPmf.scala │ ├── Cdf.scala │ ├── Pdf.scala │ ├── Pmf.scala │ ├── PmfFactory.scala │ ├── PmfLike.scala │ ├── SimpleSuite.scala │ ├── Suite.scala │ ├── extensions │ ├── Distributions.scala │ ├── Plotting.scala │ ├── Sampling.scala │ ├── Stats.scala │ ├── distributions │ │ ├── BetaBinomialPmf.scala │ │ ├── ClosedFormPmf.scala │ │ └── CommonsMathConversions.scala │ └── plotting │ │ ├── ShowControls.scala │ │ └── ThinkBayesChartTheme.scala │ └── package.scala └── test └── scala └── thinkbayes ├── PmfMatchers.scala ├── PmfSpec.scala ├── examples ├── DiceApp.scala ├── DungeonsApp.scala ├── EuroApp.scala ├── HockeyApp.scala ├── LocomotiveApp.scala ├── MMApp.scala ├── MontyApp.scala ├── PriceIsRightApp.scala └── RedLineApp.scala └── extensions ├── DistributionsSpec.scala ├── SamplingSpec.scala ├── StatsSpec.scala └── distributions └── BetaBinomialPmfSpec.scala /.gitignore: -------------------------------------------------------------------------------- 1 | *.sc 2 | showcases*.csv 3 | 4 | *.class 5 | *.log 6 | 7 | # sbt specific 8 | .bsp 9 | .cache 10 | .history 11 | .lib/ 12 | dist/* 13 | lib_managed/ 14 | project/boot/ 15 | project/plugins/project/ 16 | src_managed/ 17 | target/ 18 | 19 | # Scala-IDE specific 20 | .scala_dependencies 21 | .worksheet 22 | 23 | # IntelliJ specific 24 | .idea/ 25 | 26 | # ENSIME specific 27 | .ensime 28 | .ensime_cache 29 | 30 | # Metals specific 31 | .bloop/ 32 | .metals/ 33 | metals.sbt 34 | 35 | # vscode specific 36 | .vscode 37 | -------------------------------------------------------------------------------- /.scalafmt.conf: -------------------------------------------------------------------------------- 1 | version = 3.5.3 2 | 3 | runner.dialect = scala213 4 | align.preset = none 5 | maxColumn = 120 6 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: scala 2 | scala: 3 | - 2.11.12 4 | - 2.12.10 5 | jdk: 6 | - openjdk8 7 | script: 8 | - sbt ++$TRAVIS_SCALA_VERSION clean coverage test 9 | after_success: 10 | - sbt ++$TRAVIS_SCALA_VERSION coverageReport coveralls 11 | - if [ "$TRAVIS_BRANCH" == "master" ]; then 12 | sbt ++$TRAVIS_SCALA_VERSION publish; 13 | fi 14 | cache: 15 | directories: 16 | - $HOME/.ivy2/cache 17 | - $HOME/.sbt/boot 18 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2014-2017 Rui Gonçalves 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of 4 | this software and associated documentation files (the "Software"), to deal in 5 | the Software without restriction, including without limitation the rights to 6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 7 | the Software, and to permit persons to whom the Software is furnished to do so, 8 | subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 15 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 16 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 17 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 18 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 19 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Think Bayes in Scala 2 | 3 | A Scala implementation of the classes and functions used in the great book _Think Bayes_ by Allen B. Downey, available for free (and open-source) [here](http://www.greenteapress.com/thinkbayes/). 4 | 5 | ## Quick start 6 | 7 | The code in this repository is available as a library and can be used in Scala 2.11.x projects by adding the following dependency to `build.sbt`: 8 | 9 | ```scala 10 | libraryDependencies += "net.ruippeixotog" %% "think-bayes" % "0.1" 11 | ``` 12 | 13 | ## Core classes 14 | 15 | ### Probability mass functions 16 | 17 | The `Pmf` class is arguably the core collection in _Think Bayes_, due to the latter's focus on problem solving using discrete approximations instead of continuous mathematics. The way to build a `Pmf` and manipulate it is pretty simple: 18 | 19 | ```scala 20 | scala> import thinkbayes._ 21 | import thinkbayes._ 22 | 23 | scala> val pmf = Pmf('a' -> 0.2, 'b' -> 0.2, 'c' -> 0.6) 24 | pmf: thinkbayes.Pmf[Char] = Map(a -> 0.2, b -> 0.2, c -> 0.6) 25 | 26 | scala> pmf.prob('a') 27 | res0: Double = 0.2 28 | 29 | scala> pmf.prob(_ < 'c') 30 | res1: Double = 0.4 31 | 32 | scala> pmf.sample() 33 | res2: Char = c 34 | 35 | scala> pmf.printChart() 36 | a 0.2 ########## 37 | b 0.2 ########## 38 | c 0.6 ############################## 39 | ``` 40 | 41 | A `Pmf` is implemented as an immutable map and can be used as such: 42 | 43 | ```scala 44 | scala> pmf.size 45 | res3: Int = 3 46 | 47 | scala> pmf.map { case (k, v) => ((k + 1).toChar, v) } 48 | res4: thinkbayes.Pmf[Char] = Map(b -> 0.2, c -> 0.2, d -> 0.6) 49 | 50 | scala> pmf.filter(_._1 == 'a').normalized 51 | res5: thinkbayes.Pmf[Char] = Map(a -> 1.0) 52 | 53 | scala> pmf.foldLeft("")(_ + _._1) 54 | res6: String = abc 55 | 56 | scala> pmf.toList 57 | res7: List[(Char, Double)] = List((a,0.2), (b,0.2), (c,0.6)) 58 | ``` 59 | 60 | Specialized `Pmf` merging methods can model more complex problems in a very concise manner: 61 | 62 | ```scala 63 | scala> def die(n: Int) = Pmf(1 to n) 64 | die: (n: Int)thinkbayes.Pmf[Int] 65 | 66 | scala> die(6) 67 | res8: thinkbayes.Pmf[Int] = Map(5 -> 0.16666666666666666, 1 -> 0.16666666666666666, 6 -> 0.16666666666666666, 2 -> 0.16666666666666666, 3 -> 0.16666666666666666, 4 -> 0.16666666666666666) 68 | 69 | scala> die(6).mean 70 | res9: Double = 3.5 71 | 72 | scala> (die(6) ++ die(6)).printChart() // sum of two dice 73 | 2 0.0277 # 74 | 3 0.0555 ## 75 | 4 0.0833 #### 76 | 5 0.1111 ##### 77 | 6 0.1388 ###### 78 | 7 0.1666 ######## 79 | 8 0.1388 ###### 80 | 9 0.1111 ##### 81 | 10 0.0833 #### 82 | 11 0.0555 ## 83 | 12 0.0277 # 84 | 85 | scala> val bag = Pmf(List(die(4), die(6), die(8), die(12), die(20))) // a bag containing 5 different dice 86 | bag: thinkbayes.Pmf[thinkbayes.Pmf[Int]] = Map(Map(5 -> 0.08333333333333333, 10 -> 0.08333333333333333, 1 -> 0.08333333333333333, 6 -> 0.08333333333333333, 9 -> ... 87 | 88 | scala> bag.mixture.printChart() // roll of a random die from the bag 89 | 1 0.135 ###### 90 | 2 0.135 ###### 91 | 3 0.135 ###### 92 | 4 0.135 ###### 93 | 5 0.0850 #### 94 | 6 0.0850 #### 95 | 7 0.0516 ## 96 | 8 0.0516 ## 97 | 9 0.0266 # 98 | 10 0.0266 # 99 | 11 0.0266 # 100 | 12 0.0266 # 101 | 13 0.0100 102 | 14 0.0100 103 | 15 0.0100 104 | 16 0.0100 105 | 17 0.0100 106 | 18 0.0100 107 | 19 0.0100 108 | 20 0.0100 109 | ``` 110 | 111 | The `Distributions` extension provides methods for creating common `Pmf` such as Gaussian or Poisson distributions. 112 | 113 | ### Bayesian suites 114 | 115 | The implementation of `Suite` provided in this library does not extend `Pmf`; it is rather provided as a trait which applications can implement to model specific problems: 116 | 117 | ```scala 118 | scala> case class Dice(hypos: Seq[Int]) extends SimpleSuite[Int, Int] { 119 | | val pmf = Pmf(hypos) // which dice from `hypos` are we rolling? 120 | | def likelihood(data: Int, hypo: Int) = if(hypo < data) 0 else 1.0 / hypo 121 | | } 122 | defined class Dice 123 | 124 | scala> val prior = Dice(List(4, 6, 8, 12, 20)) 125 | prior: Dice = Dice(List(4, 6, 8, 12, 20)) 126 | 127 | scala> prior.printChart() 128 | 4 0.2 ########## 129 | 6 0.2 ########## 130 | 8 0.2 ########## 131 | 12 0.2 ########## 132 | 20 0.2 ########## 133 | 134 | scala> val posterior = prior.observed(6) // after a 6 is rolled 135 | posterior: thinkbayes.Suite[Int,Int] = thinkbayes.Suite$$anon$1@120fb03e 136 | 137 | scala> posterior.printChart() 138 | 4 0.0 139 | 6 0.3921 ################### 140 | 8 0.2941 ############## 141 | 12 0.1960 ######### 142 | 20 0.1176 ##### 143 | ``` 144 | 145 | The same prior could be built directly with: 146 | 147 | ```scala 148 | scala> val prior = Suite[Int, Int](Pmf(List(4, 6, 8, 12, 20))) { (d, h) => 149 | | if (h < d) 0 else 1.0 / h 150 | | } 151 | prior: thinkbayes.Suite[Int,Int]{val pmf: thinkbayes.Pmf[Int]} = thinkbayes.Suite$$anon$1@130dd39f 152 | ``` 153 | 154 | Multiple observations can be given to the `Suite` in bulk, which can yield results more stable numerically: 155 | 156 | ```scala 157 | scala> posterior.observed(6, 8, 7, 7, 5, 4).printChart() 158 | 4 0.0 159 | 6 0.0 160 | 8 0.9432 ############################################### 161 | 12 0.0552 ## 162 | 20 0.0015 163 | ``` 164 | 165 | ### Cumulative distribution functions 166 | 167 | A `Cdf` can be created just like a `Pmf`. It supports efficient querying for the cumulative probability on a given value (`prob`) and for the value at a given percentile (`value`): 168 | 169 | ```scala 170 | scala> val cdf = Cdf('a' -> 0.2, 'b' -> 0.2, 'c' -> 0.6) 171 | cdf: thinkbayes.Cdf[Char] = CategoricalCdf(Vector((a,0.2), (b,0.4), (c,1.0))) 172 | 173 | scala> cdf.prob('b') 174 | res10: Double = 0.4 175 | 176 | scala> cdf.value(0.5) 177 | res11: Char = c 178 | 179 | scala> cdf.value(0.35) 180 | res12: Char = b 181 | 182 | scala> cdf.printChart() 183 | a 0.2 ########## 184 | b 0.4 #################### 185 | c 1.0 ################################################## 186 | ``` 187 | 188 | Unlike `Pmf`, `Cdf` does not implement the `Map` trait and, therefore, does not inherit the common Scala collection methods. If you need to use those, you can convert easily a `Cdf` to and from a `Pmf`: 189 | 190 | ```scala 191 | scala> cdf.toPmf 192 | res13: thinkbayes.Pmf[Char] = Map(a -> 0.2, b -> 0.2, c -> 0.6) 193 | 194 | scala> cdf.toPmf.toCdf 195 | res14: thinkbayes.Cdf[Char] = CategoricalCdf(Vector((a,0.2), (b,0.4), (c,1.0))) 196 | ``` 197 | 198 | ### Probability density functions 199 | 200 | A `Pdf` can be created from a Scala real-valued function and provides a `density` method for calculating the density at a given value: 201 | 202 | ```scala 203 | scala> val pdf = Pdf { x => math.max(-x * x + 1, 0) } 204 | pdf: thinkbayes.Pdf = thinkbayes.Pdf$$anon$3@744cb6e3 205 | 206 | scala> pdf.density(0) 207 | res15: Double = 1.0 208 | 209 | scala> pdf.density(0.5) 210 | res16: Double = 0.75 211 | ``` 212 | 213 | A `BoundedPdf` is a `Pdf` whose domain has known lower and upper bounds. 214 | 215 | ```scala 216 | scala> val bpdf = Pdf(-1.0, 1.0) { x => math.max(-x * x + 1, 0) } 217 | bpdf: thinkbayes.BoundedPdf{val lowerBound: Double; val upperBound: Double} = thinkbayes.Pdf$$anon$2@397820d5 218 | ``` 219 | 220 | Both can be converted to a `Pmf` given a range or sequence of discrete values to compute. A `BoundedPdf` can alternatively be given a step value only. In both cases, the probabilities of the returned `Pmf` are normalized: 221 | 222 | ```scala 223 | scala> pdf.toPmf(0.0 to 1.0 by 0.1).printChart() 224 | 0.0 0.1398 ###### 225 | 0.1 0.1384 ###### 226 | 0.2 0.1342 ###### 227 | 0.30000000000000004 0.1272 ###### 228 | 0.4 0.1174 ##### 229 | 0.5 0.1048 ##### 230 | 0.6000000000000001 0.0895 #### 231 | 0.7000000000000001 0.0713 ### 232 | 0.8 0.0503 ## 233 | 0.9 0.0265 # 234 | 1.0 0.0 235 | 236 | scala> bpdf.toPmf(0.2).printChart() 237 | -1.0 0.0 238 | -0.8 0.0545 ## 239 | -0.6 0.0969 #### 240 | -0.3999999999999999 0.1272 ###### 241 | -0.19999999999999996 0.1454 ####### 242 | 0.0 0.1515 ####### 243 | 0.20000000000000018 0.1454 ####### 244 | 0.40000000000000013 0.1272 ###### 245 | 0.6000000000000001 0.0969 #### 246 | 0.8 0.0545 ## 247 | 1.0 0.0 248 | ``` 249 | 250 | The `Distributions` extension provides methods for creating common `Pdf` such as Gaussian or Exponential distributions. 251 | 252 | ## Extensions 253 | 254 | This library was designed such that only the core operations needed for the creation and manipulation of the structures presented above are included in the class themselves. Additional features can be added by importing modules from the package `extensions`. 255 | 256 | ### Plotting 257 | 258 | The `Plotting` module provides support for graphical plotting, leveraging the powerful [JFreeChart](http://www.jfree.org/jfreechart/) library with a custom theme. `Pmf`, `Suite`, `Cdf` and `BoundedPdf` instances can be plotted, as long as their keys have an `Ordering` (for plotting bar charts) or `Numeric` (for plotting XY line charts) implicit in scope: 259 | 260 | ```scala 261 | scala> import thinkbayes.extensions.Plotting._ 262 | import thinkbayes.extensions.Plotting._ 263 | 264 | scala> val xyChart = bpdf.plotXY("-x^2 + 1") 265 | xyChart: scalax.chart.XYChart = scalax.chart.ChartFactories$XYLineChart$$anon$17@290e640d 266 | ``` 267 | 268 | ![plotxy](http://i.imgur.com/rG1d1vj.png) 269 | 270 | ```scala 271 | scala> val barChart = prior.plotBar("prior") 272 | barChart: scalax.chart.CategoryChart = scalax.chart.ChartFactories$BarChart$$anon$3@5c3e1ebe 273 | ``` 274 | 275 | ![plotbar_prior](http://i.imgur.com/etUUT9a.png) 276 | 277 | New series can be added to a previously created chart. This is useful for comparing differences between two distributions or Bayesian suites: 278 | 279 | ```scala 280 | scala> posterior.plotBarOn(barChart, "after a 6 is rolled") 281 | res17: barChart.type = scalax.chart.ChartFactories$BarChart$$anon$3@5c3e1ebe 282 | ``` 283 | 284 | ![plotbar_posterior](http://i.imgur.com/7Ak0pQu.png) 285 | 286 | Other attributes of the chart, such as the title and the axis labels, can be optionally specified. 287 | 288 | ### Distributions 289 | 290 | The `Distributions` module provides integration with the distribution implementations from [Apache Commons Math](http://commons.apache.org/proper/commons-math/), as well as several methods for creating `Pmf` and `Pdf` instances for common distributions: 291 | 292 | ```scala 293 | scala> import thinkbayes.extensions.Distributions._ 294 | import thinkbayes.extensions.Distributions._ 295 | 296 | scala> poissonPmf(3.0).plotBar("") 297 | res18: scalax.chart.CategoryChart = scalax.chart.ChartFactories$BarChart$$anon$3@6736cd9d 298 | ``` 299 | 300 | ![poisson](http://i.imgur.com/IkDOt6Z.png) 301 | 302 | ```scala 303 | scala> val tri: Pdf = new org.apache.commons.math3.distribution.TriangularDistribution(0.0, 0.5, 2.0) 304 | tri: thinkbayes.Pdf = thinkbayes.extensions.Distributions$$anon$1@7b5cdeb6 305 | 306 | scala> tri.bounded(0.0, 2.0).plotXY("") 307 | res19: scalax.chart.XYChart = scalax.chart.ChartFactories$XYLineChart$$anon$17@55a7c8a3 308 | ``` 309 | ![triangular](http://i.imgur.com/8WW3cjU.png) 310 | 311 | Finally, we can estimate a `Pdf` from a sequence of samples using kernel density estimation: 312 | 313 | ```scala 314 | scala> estimatePdf(Seq(1, 2, 2, 4, 4, 4, 9, 9, 9, 9, 11, 11, 15, 19)).bounded(0, 20).plotXY("") 315 | res20: scalax.chart.XYChart = scalax.chart.ChartFactories$XYLineChart$$anon$17@1c15725 316 | ``` 317 | 318 | ![kde](http://i.imgur.com/ijBYGPg.png) 319 | 320 | ### Stats 321 | 322 | The `Stats` module is a simple extension that provides the calculation of quantiles and credible intervals to `Pmf` and `Cdf` instances: 323 | 324 | ```scala 325 | scala> import thinkbayes.extensions.Stats._ 326 | import thinkbayes.extensions.Stats._ 327 | 328 | scala> normalPmf(2.5, 1.5).quantile(0.5) 329 | res21: Double = 2.5 330 | 331 | scala> normalPmf(0.0, 1.0).credibleInterval(0.9) 332 | res22: (Double, Double) = (-1.6440000000000001,1.6440000000000001) 333 | ``` 334 | 335 | ### Sampling 336 | 337 | Using `Pmf` merging methods such as `mixture` or `join` yield results as accurate as they can be, but they are also computationally expensive. The `Sampling` module aims to provide probabilistic alternatives based on sampling, which can be the only choice for large `Pmf`: 338 | 339 | ```scala 340 | scala> val dieList = Seq.fill(100)(die(6)) // a hundred dice 341 | dieList: Seq[thinkbayes.Pmf[Int]] = List(Map(5 -> 0.16666666666666666, 1 -> 0.16666666666666666, 6 -> 0.16666666666666666, 2 -> 0.16666666666666666, 3 -> 0.1666666666666666, 4 -> 0.16666666666666666),... 342 | 343 | scala> val xyChart = dieList.reduce(_ ++ _).plotXY("exact") 344 | xyChart: scalax.chart.XYChart = scalax.chart.ChartFactories$XYLineChart$$anon$17@30015846 345 | 346 | scala> sampleSum(dieList, 10000).plotXYOn(xyChart, "sampled") 347 | res23: xyChart.type = scalax.chart.ChartFactories$XYLineChart$$anon$17@81f0a53 348 | ``` 349 | 350 | ![sampling](http://i.imgur.com/LiYUyFL.png) 351 | 352 | ## Examples 353 | 354 | A number of examples and problems explored throughout _Think Bayes_ are implemented in the package `examples` in the [test directory](https://github.com/ruippeixotog/think-bayes-scala/tree/master/src/test/scala/thinkbayes/examples). They are always accompanied by the original problem description and I made an effort to make the steps of each problem as clear as possible. 355 | 356 | ## Copyright 357 | 358 | Copyright (c) 2014-2017 Rui Gonçalves. See LICENSE for details. 359 | -------------------------------------------------------------------------------- /build.sbt: -------------------------------------------------------------------------------- 1 | name := "think-bayes" 2 | organization := "net.ruippeixotog" 3 | version := "1.0-SNAPSHOT" 4 | 5 | scalaVersion := "2.12.19" 6 | crossScalaVersions := Seq("2.12.19") 7 | 8 | libraryDependencies ++= Seq( 9 | "de.sciss" %% "scala-chart" % "0.8.0", 10 | "nz.ac.waikato.cms.weka" % "weka-stable" % "3.8.6", 11 | "org.apache.commons" % "commons-math3" % "3.6.1", 12 | "org.specs2" %% "specs2-core" % "4.15.0" % "test" 13 | ) 14 | 15 | scalacOptions ++= Seq( 16 | "-deprecation", 17 | "-unchecked", 18 | "-feature", 19 | "-language:implicitConversions", 20 | "-language:higherKinds" 21 | ) 22 | 23 | scalafmtOnCompile := true 24 | 25 | console / initialCommands := """ 26 | import thinkbayes._ 27 | import thinkbayes.extensions.Plotting._ 28 | import thinkbayes.extensions.Distributions._""" 29 | 30 | publishTo := { 31 | val nexus = "https://oss.sonatype.org/" 32 | if (isSnapshot.value) 33 | Some("snapshots" at nexus + "content/repositories/snapshots") 34 | else 35 | Some("releases" at nexus + "service/local/staging/deploy/maven2") 36 | } 37 | 38 | publishMavenStyle := true 39 | 40 | Test / publishArtifact := false 41 | 42 | pomIncludeRepository := { _ => false } 43 | 44 | licenses := Seq("MIT License" -> url("http://www.opensource.org/licenses/mit-license.php")) 45 | homepage := Some(url("https://github.com/ruippeixotog/think-bayes-scala")) 46 | scmInfo := Some( 47 | ScmInfo( 48 | url("https://github.com/ruippeixotog/think-bayes-scala"), 49 | "scm:git:https://github.com/ruippeixotog/think-bayes-scala.git", 50 | "scm:git:git@github.com:ruippeixotog/think-bayes-scala.git" 51 | ) 52 | ) 53 | developers := List( 54 | Developer("ruippeixotog", "Rui Gonçalves", "ruippeixotog@gmail.com", url("https://www.ruippeixotog.net")) 55 | ) 56 | -------------------------------------------------------------------------------- /project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version=1.10.0 2 | -------------------------------------------------------------------------------- /project/plugins.sbt: -------------------------------------------------------------------------------- 1 | addSbtPlugin("org.scalameta" % "sbt-scalafmt" % "2.4.6") 2 | addSbtPlugin("org.scoverage" % "sbt-coveralls" % "1.3.1") 3 | addSbtPlugin("org.scoverage" % "sbt-scoverage" % "1.9.3") 4 | -------------------------------------------------------------------------------- /src/main/scala/thinkbayes/CategoricalCdf.scala: -------------------------------------------------------------------------------- 1 | package thinkbayes 2 | 3 | case class CategoricalCdf[K: Ordering](vals: IndexedSeq[(K, Double)]) extends Cdf[K] { 4 | def iterator = vals.iterator 5 | 6 | def prob(key: K): Double = searchBy[(K, Double), K](vals, key, _._1) match { 7 | case Left((_, p)) => p 8 | case Right(nextIdx) if nextIdx == 0 => 0.0 9 | case Right(nextIdx) => vals(nextIdx - 1)._2 10 | } 11 | 12 | def value(prob: Double): K = searchBy[(K, Double), Double](vals, prob, _._2) match { 13 | case Left((key, _)) => key 14 | case Right(nextIdx) if nextIdx == vals.length => vals.last._1 15 | case Right(nextIdx) => vals(nextIdx)._1 16 | } 17 | 18 | private[this] def searchBy[A, V <% Ordered[V]](xs: IndexedSeq[A], target: V, f: A => V): Either[A, Int] = { 19 | def bs(target: V, start: Int, end: Int): Either[A, Int] = { 20 | if (start == end) { 21 | if (start == xs.length || f(xs(start)) > target) Right(start) 22 | else Right(start + 1) 23 | } else { 24 | val mid = (end + start) / 2 25 | val midVal = f(xs(mid)) 26 | 27 | if (midVal == target) Left(xs(mid)) 28 | else if (midVal > target) bs(target, start, mid) 29 | else bs(target, mid + 1, end) 30 | } 31 | } 32 | bs(target, 0, xs.length) 33 | } 34 | } 35 | 36 | object CategoricalCdf { 37 | 38 | def apply[K: Ordering](values: (K, Double)*): CategoricalCdf[K] = { 39 | val (vals, total) = values.sorted.foldLeft(IndexedSeq.empty[(K, Double)], 0.0) { 40 | case ((acc, prevTotal), (key, prob)) => (acc :+ (key, prevTotal + prob), prevTotal + prob) 41 | } 42 | CategoricalCdf(vals.map { case (key, prob) => (key, prob / total) }) 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /src/main/scala/thinkbayes/CategoricalPmf.scala: -------------------------------------------------------------------------------- 1 | package thinkbayes 2 | 3 | import scala.collection.generic.CanBuildFrom 4 | 5 | case class CategoricalPmf[K](ps: Map[K, Double]) extends Pmf[K] with PmfLike[K, CategoricalPmf[K]] { 6 | override def empty: CategoricalPmf[K] = CategoricalPmf.empty 7 | 8 | def +(kv: (K, Double))(implicit dummy: DummyImplicit) = 9 | new CategoricalPmf(ps.updated(kv._1, ps.getOrElse(kv._1, 0.0) + kv._2)) 10 | 11 | def -(key: K) = new CategoricalPmf(ps - key) 12 | def iterator: Iterator[(K, Double)] = ps.iterator 13 | def get(key: K): Option[Double] = ps.get(key) 14 | 15 | override def mapValues(f: Double => Double)(implicit dummy: DummyImplicit) = new CategoricalPmf(ps.mapValues(f)) 16 | override def +[B1 >: Double](kv: (K, B1)) = ps + kv 17 | 18 | override def toCategoricalPmf = this 19 | } 20 | 21 | object CategoricalPmf extends PmfFactory[CategoricalPmf] { 22 | def empty[K] = new CategoricalPmf(Map.empty[K, Double]) 23 | 24 | implicit def canBuildFrom[K]: CanBuildFrom[Coll, (K, Double), CategoricalPmf[K]] = new PmfCanBuildFrom[K] 25 | } 26 | -------------------------------------------------------------------------------- /src/main/scala/thinkbayes/Cdf.scala: -------------------------------------------------------------------------------- 1 | package thinkbayes 2 | 3 | trait Cdf[K] { 4 | def iterator: Iterator[(K, Double)] 5 | def prob(key: K): Double 6 | def value(prob: Double): K 7 | 8 | def pow(p: Double)(implicit num: Numeric[K]): Cdf[K] = 9 | Cdf(iterator.map { case (k, prob) => (k, math.pow(prob, p)) }.toSeq: _*) 10 | 11 | def toPmf: Pmf[K] = { 12 | val b = Pmf.newBuilder[K] 13 | var last = 0.0 14 | for ((k, prob) <- iterator) { b += (k -> (prob - last)); last = prob } 15 | b.result() 16 | } 17 | } 18 | 19 | object Cdf { 20 | def apply[K: Ordering](values: (K, Double)*): Cdf[K] = CategoricalCdf(values: _*) 21 | def apply[K: Ordering](keys: TraversableOnce[K]): Cdf[K] = Pmf(keys).toCdf 22 | } 23 | -------------------------------------------------------------------------------- /src/main/scala/thinkbayes/Pdf.scala: -------------------------------------------------------------------------------- 1 | package thinkbayes 2 | 3 | trait Pdf { 4 | 5 | def density(x: Double): Double 6 | 7 | def bounded(lower: Double, upper: Double) = new BoundedPdf { 8 | def density(x: Double) = Pdf.this.density(x) 9 | def lowerBound = lower 10 | def upperBound = upper 11 | } 12 | 13 | def toPmf(values: TraversableOnce[Double]): Pmf[Double] = 14 | Pmf(values.map { k => (k, density(k)) }.toMap).normalized 15 | } 16 | 17 | trait BoundedPdf extends Pdf { 18 | def lowerBound: Double 19 | def upperBound: Double 20 | 21 | def toPmf(step: Double = (upperBound - lowerBound) / 2000): Pmf[Double] = 22 | toPmf(lowerBound to upperBound by step) 23 | } 24 | 25 | object Pdf { 26 | 27 | def apply(densityFunc: Double => Double): Pdf = new Pdf { 28 | def density(x: Double) = densityFunc(x) 29 | } 30 | 31 | def apply(lower: Double, upper: Double)(densityFunc: Double => Double) = new BoundedPdf { 32 | def density(x: Double) = densityFunc(x) 33 | val lowerBound = lower 34 | val upperBound = upper 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /src/main/scala/thinkbayes/Pmf.scala: -------------------------------------------------------------------------------- 1 | package thinkbayes 2 | 3 | import scala.collection.generic.CanBuildFrom 4 | 5 | trait Pmf[K] extends Map[K, Double] with PmfLike[K, Pmf[K]] { 6 | override def empty: Pmf[K] = Pmf.empty[K] 7 | override def seq: Pmf[K] = this 8 | } 9 | 10 | object Pmf extends PmfFactory[Pmf] { 11 | def empty[K] = CategoricalPmf.empty[K] 12 | 13 | implicit def canBuildFrom[K]: CanBuildFrom[Coll, (K, Double), Pmf[K]] = new PmfCanBuildFrom[K] 14 | } 15 | -------------------------------------------------------------------------------- /src/main/scala/thinkbayes/PmfFactory.scala: -------------------------------------------------------------------------------- 1 | package thinkbayes 2 | 3 | import thinkbayes.PmfFactory.PmfBuilder 4 | 5 | import scala.collection.generic.CanBuildFrom 6 | import scala.collection.mutable 7 | 8 | abstract class PmfFactory[CC[K] <: Pmf[K] with PmfLike[K, CC[K]]] { 9 | type Coll = CC[_] 10 | 11 | def apply[K](ps: (K, Double)*): CC[K] = (newBuilder[K] ++= ps).result() 12 | 13 | def apply[K](ps: Map[K, Double]): CC[K] = (newBuilder[K] ++= ps).result() 14 | 15 | def apply[K](keys: TraversableOnce[K]): CC[K] = (newBuilder[K] ++= keys.map(_ -> 1.0)).result().normalized 16 | 17 | def newBuilder[K]: mutable.Builder[(K, Double), CC[K]] = new PmfBuilder[K, CC[K]](empty[K]) 18 | 19 | def empty[K]: CC[K] 20 | 21 | class PmfCanBuildFrom[K] extends CanBuildFrom[Coll, (K, Double), CC[K]] { 22 | def apply(from: Coll) = newBuilder[K] 23 | def apply() = newBuilder 24 | } 25 | } 26 | 27 | object PmfFactory { 28 | class PmfBuilder[K, Coll <: Pmf[K] with PmfLike[K, Coll]](empty: Coll) extends mutable.Builder[(K, Double), Coll] { 29 | var coll: Coll = empty 30 | 31 | def result() = coll 32 | def clear() { coll = empty } 33 | def +=(elem: (K, Double)) = { 34 | coll = (coll + elem).asInstanceOf[Coll] 35 | this 36 | } 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /src/main/scala/thinkbayes/PmfLike.scala: -------------------------------------------------------------------------------- 1 | package thinkbayes 2 | 3 | import thinkbayes.PmfFactory.PmfBuilder 4 | 5 | import scala.collection.MapLike 6 | import scala.collection.generic.CanBuildFrom 7 | import scala.util.Random 8 | 9 | trait PmfLike[K, +This <: PmfLike[K, This] with Pmf[K]] extends MapLike[K, Double, This] { 10 | 11 | protected[this] override def newBuilder = new PmfBuilder(empty) 12 | 13 | /** Adds a probability value to an outcome of this distribution, returning a new `Pmf`. 14 | * 15 | * This method can either cause the creation of a new outcome with the given probability or add the given probability 16 | * to an existing outcome. Either way, this method does _not_ normalize the `Pmf` before returning it - it is the 17 | * user's responsibility to do so after this operation. That way, the probability of several outcomes can be 18 | * increased, followed by a single normalization. 19 | * 20 | * @param kv 21 | * the outcome-probability pair to add 22 | * @return 23 | * a new `Pmf` with the added outcome-probability pair. 24 | */ 25 | def +(kv: (K, Double))(implicit dummy: DummyImplicit): Pmf[K] 26 | 27 | def +[B1 >: Double](kv: (K, B1)): Map[K, B1] = Map() ++ iterator + kv 28 | 29 | /** Returns the probability that an outcome occurs. 30 | * @param key 31 | * the outcome whose probability is to be returned 32 | * @return 33 | * the probability that the given outcome occurs. 34 | */ 35 | def prob(key: K): Double = getOrElse(key, 0.0) 36 | 37 | /** Returns the probability that an outcome that satisfies a given predicate occurs. 38 | * @param pred 39 | * the predicate used to test outcomes 40 | * @return 41 | * the probability that an outcome that satisfies the predicate `pred` occurs. 42 | */ 43 | def prob(pred: K => Boolean): Double = iterator.filter { case (k, _) => pred(k) }.map(_._2).sum 44 | 45 | /** Returns the mode of this distribution along with its probability. 46 | * @return 47 | * a pair containing the mode of this distribution and its probability. 48 | */ 49 | def maxProb: (K, Double) = maxBy(_._2) 50 | 51 | /** Returns the mode of this distribution. 52 | * @return 53 | * the mode of this distribution. 54 | */ 55 | def mode: K = maxBy(_._2)._1 56 | 57 | /** Returns the mean of this distribution. 58 | * @return 59 | * the mean of this distribution. 60 | */ 61 | def mean(implicit num: Numeric[K]): Double = 62 | iterator.map { case (h, prob) => num.toDouble(h) * prob }.sum 63 | 64 | /** Returns the variance of this distribution. 65 | * @return 66 | * the variance of this distribution. 67 | */ 68 | def variance(implicit num: Numeric[K]): Double = { 69 | val m = mean 70 | iterator.map { case (h, prob) => num.toDouble(h) * num.toDouble(h) * prob }.sum - (m * m) 71 | } 72 | 73 | /** Generates a random sample of this distribution. 74 | * @return 75 | * a random sample of this distribution. 76 | */ 77 | def sample(): K = { 78 | def get(rand: Double, it: Iterator[(K, Double)]): K = { 79 | val (k, prob) = it.next() 80 | if (rand < prob) k else get(rand - prob, it) 81 | } 82 | get(Random.nextDouble() * values.sum, iterator) 83 | } 84 | 85 | /** Transforms this `Pmf` by applying a function to every outcome. If the function maps two different outcomes to the 86 | * same value, their probabilities are combined. 87 | * 88 | * @param f 89 | * the function used to transform the outcomes of this `Pmf` 90 | * @tparam K2 91 | * the type of the resulting outcomes 92 | * @return 93 | * a new `Pmf` with every outcome of this `Pmf` transformed. 94 | */ 95 | def mapKeys[K2, That](f: K => K2)(implicit bf: CanBuildFrom[This, (K2, Double), That]): That = 96 | map { case (k, prob) => (f(k), prob) } 97 | 98 | /** Transforms this `Pmf` by applying a function to every probability. This method does _not_ normalize the `Pmf` 99 | * before returning it. 100 | * 101 | * @param f 102 | * the function used to transform the outcomes of this `Pmf` 103 | * @return 104 | * a new `Pmf` with the probability of every outcome of this `Pmf` transformed. 105 | */ 106 | def mapValues(f: Double => Double)(implicit dummy: DummyImplicit): This = { 107 | val b = newBuilder 108 | b ++= iterator.map { kv => (kv._1, f(kv._2)) } 109 | b.result() 110 | } 111 | 112 | override def filterKeys(p: K => Boolean): This = filter { kv => p(kv._1) } 113 | 114 | /** Normalizes this `Pmf` so the probabilities of all outcomes sum to 1.0. 115 | * @return 116 | * a new `Pmf` with its probabilities normalized. 117 | */ 118 | def normalized: This = { 119 | val sum = values.sum 120 | if (sum == 0.0 || sum == 1.0) repr else mapValues { prob: Double => prob / sum } 121 | } 122 | 123 | /** Joins two independent `Pmf`s by combining their outcomes using a function. 124 | * 125 | * Each outcome from this `Pmf` is combined with each outcome from `other`, and their joint probability is 126 | * calculated. If the function maps two different outcome pairs to the same value, their probabilities are combined. 127 | * This method always returns a normalized `Pmf`. 128 | * 129 | * @param other 130 | * the `Pmf` to combine with this `Pmf` 131 | * @param comb 132 | * the function used to combine outcomes from the two `Pmf`s 133 | * @tparam K2 134 | * the type of the outcomes of `other` 135 | * @tparam J 136 | * the type of the resulting outcomes 137 | * @return 138 | * a new `Pmf` resultant from combining this `Pmf` and `other` using the function `comb` 139 | */ 140 | def join[K2, J, That](other: Pmf[K2])(comb: (K, K2) => J): Pmf[J] = { 141 | val b = Pmf.newBuilder[J] 142 | for ((k, prob) <- this; (k2, prob2) <- other) 143 | b += (comb(k, k2) -> prob * prob2) 144 | b.result().normalized 145 | } 146 | 147 | /** Returns the distribution of the sum of two independent `Pmf`s. This method always returns a normalized `Pmf`. 148 | * 149 | * @param other 150 | * the `Pmf` to sum with this `Pmf` 151 | * @param num 152 | * an evidence that the outcomes of this distribution are numeric 153 | * @return 154 | * the distribution of the sum of this `Pmf` with `other`. 155 | */ 156 | def ++(other: Pmf[K])(implicit num: Numeric[K]): Pmf[K] = join(other)(num.plus) 157 | 158 | /** Returns the distribution of the difference of two independent `Pmf`s. This method always returns a normalized 159 | * `Pmf`. 160 | * 161 | * @param other 162 | * the `Pmf` to subtract to this `Pmf` 163 | * @param num 164 | * an evidence that the outcomes of this distribution are numeric 165 | * @return 166 | * the distribution of the difference between this `Pmf` and `other`. 167 | */ 168 | def --(other: Pmf[K])(implicit num: Numeric[K]): Pmf[K] = join(other)(num.minus) 169 | 170 | /** Returns the mixture distribution that results from the combination of the `Pmf` outcomes. This method always 171 | * returns a normalized `Pmf`. 172 | * 173 | * @param ev 174 | * an evidence that the outcomes of this distribution are also `Pmf`s 175 | * @tparam K2 176 | * the type of the outcomes in each outcome `Pmf` 177 | * @return 178 | * the mixture distribution that results from the combination of the `Pmf` outcomes. 179 | */ 180 | def mixture[K2](implicit ev: K <:< Pmf[K2]): Pmf[K2] = { 181 | val b = Pmf.newBuilder[K2] 182 | for ((outcome, weight) <- this; (k, prob) <- outcome) 183 | b += (k -> weight * prob) 184 | b.result().normalized 185 | } 186 | 187 | /** Returns a copy of this `Pmf` that is an instance of `CategoricalPmf`. This method can be used to force the 188 | * calculation of the probabilities of lazy `Pmf` instances or to convert `Pmf`s whose probabilities are described as 189 | * a closed-form expression into an enumeration of all outcomes and its probabilities (a 190 | * [[http://en.wikipedia.org/wiki/Categorical_distribution categorical distribution]]). 191 | * 192 | * @return 193 | * a copy of this `Pmf` that is an instance of `CategoricalPmf`. 194 | */ 195 | def toCategoricalPmf: CategoricalPmf[K] = CategoricalPmf(iterator.toSeq: _*) 196 | 197 | /** Returns a `Cdf` that represents this distribution. 198 | * @param ord 199 | * an evidence that the outcomes of this distribution have an ordering 200 | * @return 201 | * a `Cdf` that represents this distribution. 202 | */ 203 | def toCdf(implicit ord: Ordering[K]): Cdf[K] = Cdf(toSeq: _*) 204 | 205 | override def stringPrefix = "Pmf" 206 | } 207 | -------------------------------------------------------------------------------- /src/main/scala/thinkbayes/SimpleSuite.scala: -------------------------------------------------------------------------------- 1 | package thinkbayes 2 | 3 | trait SimpleSuite[H, D] extends Suite[H, D] { 4 | 5 | /** Computes the likelihood of a given data under an hypothesis. 6 | * @param data 7 | * the representation of the data whose likelihood is to be returned 8 | * @param hypo 9 | * the representation of the hypothesis 10 | * @return 11 | * the likelihood of the given data under the given hypothesis. 12 | */ 13 | def likelihood(data: D, hypo: H): Double 14 | 15 | def observed(data: D): Suite[H, D] = { 16 | val newPmf = pmf.map { case (h, prob) => (h, prob * likelihood(data, h)) }.normalized 17 | updatedPmf(newPmf) 18 | } 19 | 20 | override def observedSet(dataset: TraversableOnce[D]): Suite[H, D] = { 21 | val newPmf = dataset 22 | .foldLeft(pmf) { (acc, data) => 23 | acc.map { case (h, prob) => (h, prob * likelihood(data, h)) } 24 | } 25 | .normalized 26 | 27 | updatedPmf(newPmf) 28 | } 29 | 30 | /** Returns a new `Suite` with an updated `Pmf`. 31 | * @param newPmf 32 | * the `Pmf` of the `Suite` to be returned 33 | * @return 34 | * a new `Suite` with an updated `Pmf`. 35 | */ 36 | def updatedPmf(newPmf: Pmf[H]): Suite[H, D] = Suite(newPmf)(likelihood) 37 | } 38 | -------------------------------------------------------------------------------- /src/main/scala/thinkbayes/Suite.scala: -------------------------------------------------------------------------------- 1 | package thinkbayes 2 | 3 | trait Suite[H, D] { 4 | 5 | def pmf: Pmf[H] 6 | 7 | /** Updates each hypothesis based on the given data. 8 | * @param data 9 | * the representation of the data to use to update the suite 10 | * @return 11 | * a new [[Suite]] with the updated hypotheses. 12 | */ 13 | def observed(data: D): Suite[H, D] 14 | 15 | def observed(dataset: D*): Suite[H, D] = observedSet(dataset) 16 | 17 | /** Updates each hypothesis based on the given dataset. This is more efficient than calling `update` repeatedly 18 | * because it waits until the end to `normalize`. 19 | * @param dataset 20 | * a sequence of data values to use to update the suite 21 | * @return 22 | * a new [[Suite]] with the updated hypotheses. 23 | */ 24 | def observedSet(dataset: TraversableOnce[D]): Suite[H, D] = dataset.foldLeft(this)(_.observed(_)) 25 | } 26 | 27 | object Suite { 28 | 29 | def apply[H, D](distr: Pmf[H])(likelihoodFunc: (D, H) => Double): Suite[H, D] = new SimpleSuite[H, D] { 30 | val pmf = distr 31 | def likelihood(data: D, hypo: H) = likelihoodFunc(data, hypo) 32 | override def updatedPmf(newPmf: Pmf[H]) = Suite(newPmf)(likelihoodFunc) 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /src/main/scala/thinkbayes/extensions/Distributions.scala: -------------------------------------------------------------------------------- 1 | package thinkbayes.extensions 2 | 3 | import org.apache.commons.math3.distribution._ 4 | import org.apache.commons.math3.random.Well19937c 5 | import thinkbayes._ 6 | import thinkbayes.extensions.distributions._ 7 | import thinkbayes.extensions.distributions.CommonsMathConversions._ 8 | import weka.estimators.KernelEstimator 9 | 10 | object Distributions extends CommonsMathConversions { 11 | private[this] val rndGen = new Well19937c() 12 | 13 | def estimatePdf[K](values: Seq[K], precision: Option[Double] = None)(implicit num: Numeric[K]): Pdf = { 14 | val doubleValues = values.map(num.toDouble) 15 | val kde = new KernelEstimator(precision.getOrElse(doubleValues.max / 10000)) 16 | for (v <- doubleValues) kde.addValue(v, 1) 17 | new Pdf { def density(x: Double) = kde.getProbability(x) } 18 | } 19 | 20 | def normalPdf(mean: Double, stdev: Double): Pdf = new NormalDistribution(rndGen, mean, stdev) 21 | 22 | def normalPmf(mean: Double, stdev: Double, numSigmas: Double = 4.0, steps: Int = 2000): Pmf[Double] = { 23 | val low = mean - numSigmas * stdev 24 | val high = mean + numSigmas * stdev 25 | new RealDistributionPmf(new NormalDistribution(rndGen, mean, stdev), low to high by ((high - low) / steps)) 26 | } 27 | 28 | def poissonPmf(lam: Double): Pmf[Int] = 29 | if (lam <= 0) Pmf.empty 30 | else 31 | new PoissonDistribution( 32 | rndGen, 33 | lam, 34 | PoissonDistribution.DEFAULT_EPSILON, 35 | PoissonDistribution.DEFAULT_MAX_ITERATIONS 36 | ) 37 | 38 | def exponentialPdf(lam: Double): Pdf = new ExponentialDistribution(rndGen, 1.0 / lam) 39 | 40 | def exponentialPmf( 41 | lam: Double, 42 | steps: Int = 2000, 43 | cutoff: Double = defaultCutoff, 44 | absCutoff: Double = Double.PositiveInfinity 45 | ): Pmf[Double] = { 46 | 47 | val distrib = new ExponentialDistribution(rndGen, 1.0 / lam) 48 | val high = if (absCutoff.isPosInfinity) approximateRealUpperBound(distrib, cutoff) else absCutoff 49 | new RealDistributionPmf(distrib, 0.0 to high by (high / steps)) 50 | } 51 | 52 | def binomialPmf(trials: Int, p: Double): Pmf[Int] = new BinomialDistribution(rndGen, trials, p) 53 | 54 | def hypergeometricPmf(popSize: Int, successCount: Int, sampleSize: Int): Pmf[Int] = 55 | if (sampleSize == 0 || successCount == 0) Pmf(0 -> 1.0) 56 | else if (popSize == successCount) Pmf(sampleSize -> 1.0) 57 | else new HypergeometricDistribution(rndGen, popSize, successCount, sampleSize) 58 | 59 | def betaPdf(alpha: Double, beta: Double): BoundedPdf = { 60 | val distrib = new BetaDistribution(alpha, beta) 61 | 62 | if (alpha < 1.0 || beta < 1.0) Pdf(0.0, 1.0)(distrib.density) 63 | else 64 | Pdf(0.0, 1.0) { 65 | case 0.0 => if (alpha == 1.0) beta else 0.0 66 | case 1.0 => if (beta == 1.0) alpha else 0.0 67 | case x => distrib.density(x) 68 | } 69 | } 70 | 71 | def betaBinomialPmf(trials: Int, alpha: Double, beta: Double) = new BetaBinomialPmf(trials, alpha, beta) 72 | } 73 | -------------------------------------------------------------------------------- /src/main/scala/thinkbayes/extensions/Plotting.scala: -------------------------------------------------------------------------------- 1 | package thinkbayes.extensions 2 | 3 | import scala.util.Try 4 | 5 | import de.sciss.chart._ 6 | import de.sciss.chart.api._ 7 | import org.jfree.data.category.DefaultCategoryDataset 8 | import org.jfree.data.xy.XYSeriesCollection 9 | import thinkbayes._ 10 | import thinkbayes.extensions.plotting.{ShowControls, ThinkBayesChartTheme} 11 | 12 | trait Plotting { 13 | val defaultTheme = ThinkBayesChartTheme.Dark 14 | 15 | implicit def mapAsPlottable[K, V](map: Map[K, V])(implicit asNum: Numeric[V]) = new Plottable[K] { 16 | protected def plotData = map.mapValues(asNum.toDouble).toSeq 17 | } 18 | 19 | implicit def pmfAsPlottable[K](pmf: Pmf[K]) = new Plottable[K] { 20 | protected def plotData = pmf.toSeq 21 | override protected def defaultYLabel = "probability" 22 | } 23 | 24 | implicit def suiteAsPlottable[H](suite: Suite[H, _]) = new Plottable[H] { 25 | protected def plotData = suite.pmf.toSeq 26 | override protected def defaultYLabel = "probability" 27 | } 28 | 29 | implicit def cdfAsPlottable[K](cdf: Cdf[K]) = new Plottable[K] { 30 | protected def plotData = cdf.iterator.toSeq 31 | override protected def defaultYLabel = "probability" 32 | } 33 | 34 | implicit def boundedPdfAsPlottable[K](pdf: BoundedPdf) = new Plottable[Double] { 35 | protected def plotData = (pdf.lowerBound to pdf.upperBound by ((pdf.upperBound - pdf.lowerBound) / 10000)).map { 36 | k => (k, pdf.density(k)) 37 | } 38 | 39 | override protected def defaultYLabel = "probability" 40 | } 41 | 42 | trait Plottable[K] { 43 | protected def plotData: Seq[(K, Double)] 44 | protected def defaultXLabel = "" 45 | protected def defaultYLabel = "" 46 | 47 | /** Plots this object as a category series in a new chart. 48 | * @param seriesName 49 | * the unique name of the series 50 | * @param title 51 | * the title of the chart 52 | * @param xLabel 53 | * the label to draw on the X axis 54 | * @param yLabel 55 | * the label to draw on the Y axis 56 | * @return 57 | * the newly created chart object. 58 | */ 59 | def plotBar(seriesName: String, title: String = "", xLabel: String = defaultXLabel, yLabel: String = defaultYLabel)( 60 | implicit 61 | ord: K => Ordered[K], 62 | theme: ChartTheme = defaultTheme 63 | ): CategoryChart = { 64 | 65 | plotBarOn(emptyPlotBar(title, xLabel, yLabel), seriesName) 66 | } 67 | 68 | /** Plots this object as a category series in a new chart and opens it in a window afterwards. 69 | * @param seriesName 70 | * the unique name of the series 71 | * @param title 72 | * the title of the chart 73 | * @param xLabel 74 | * the label to draw on the X axis 75 | * @param yLabel 76 | * the label to draw on the Y axis 77 | * @return 78 | * the newly created chart object. 79 | */ 80 | def showBar(seriesName: String, title: String = "", xLabel: String = defaultXLabel, yLabel: String = defaultYLabel)( 81 | implicit 82 | ord: K => Ordered[K], 83 | theme: ChartTheme = defaultTheme 84 | ): CategoryChart = { 85 | 86 | plotBar(seriesName, title, xLabel, yLabel).showScalable() 87 | } 88 | 89 | /** Plots this object as a category series in the provided chart. If the given series name was used before, the data 90 | * of that series is replaced with the new data. 91 | * @param chart 92 | * the category chart to plot this object on 93 | * @param seriesName 94 | * the unique name of the series 95 | * @return 96 | * the provided chart object. 97 | */ 98 | def plotBarOn(chart: CategoryChart, seriesName: String)(implicit ord: K => Ordered[K]): chart.type = { 99 | chart.plot.getDataset match { 100 | case catDataset: DefaultCategoryDataset => 101 | Try(catDataset.removeRow(seriesName)) 102 | plotData.sorted.foreach { case (k, v) => catDataset.addValue(v, seriesName, k) } 103 | } 104 | chart 105 | } 106 | 107 | /** Plots this object as a XY series in a new chart. 108 | * @param seriesName 109 | * the unique name of the series 110 | * @param title 111 | * the title of the chart 112 | * @param xLabel 113 | * the label to draw on the X axis 114 | * @param yLabel 115 | * the label to draw on the Y axis 116 | * @return 117 | * the newly created chart object. 118 | */ 119 | def plotXY(seriesName: String, title: String = "", xLabel: String = defaultXLabel, yLabel: String = defaultYLabel)( 120 | implicit 121 | asNum: Numeric[K], 122 | theme: ChartTheme = defaultTheme 123 | ): XYChart = { 124 | 125 | plotXYOn(emptyPlotXY(title, xLabel, yLabel), seriesName) 126 | } 127 | 128 | /** Plots this object as a XY series in a new chart and opens it in a window afterwards. 129 | * @param seriesName 130 | * the unique name of the series 131 | * @param title 132 | * the title of the chart 133 | * @param xLabel 134 | * the label to draw on the X axis 135 | * @param yLabel 136 | * the label to draw on the Y axis 137 | * @return 138 | * the newly created chart object. 139 | */ 140 | def showXY(seriesName: String, title: String = "", xLabel: String = defaultXLabel, yLabel: String = defaultYLabel)( 141 | implicit 142 | asNum: Numeric[K], 143 | theme: ChartTheme = defaultTheme 144 | ): XYChart = { 145 | 146 | plotXY(seriesName, title, xLabel, yLabel).showScalable() 147 | } 148 | 149 | /** Plots this object as a XY series in the provided chart. If the given series name was used before, the data of 150 | * that series is replaced with the new data. 151 | * @param chart 152 | * the XY chart to plot this object on 153 | * @param seriesName 154 | * the unique name of the series 155 | * @return 156 | * the provided chart object. 157 | */ 158 | def plotXYOn[A <: XYChart](chart: A, seriesName: String)(implicit asNum: Numeric[K]): chart.type = { 159 | chart.plot.getDataset match { 160 | case seriesList: XYSeriesCollection => 161 | Try(seriesList.removeSeries(seriesList.getSeriesIndex(seriesName))) 162 | seriesList.addSeries(plotData.toXYSeries(seriesName)) 163 | } 164 | chart 165 | } 166 | } 167 | 168 | implicit class RichChart[C <: Chart](val chart: C) { 169 | 170 | /** Shows this chart in a new window. 171 | * @return 172 | * this chart. 173 | */ 174 | def showScalable(): chart.type = { 175 | val controls = new ShowControls(chart) 176 | controls.onHide(controls.dispose()) 177 | controls.show() 178 | chart 179 | } 180 | 181 | /** Returns a `ShowControls` instance for this chart. The returned object contains methods for showing the chart in 182 | * a window using Swing, as well as closing it programatically and adding hooks for open and close events. 183 | * 184 | * @return 185 | * a `ShowControls` instance for this chart. 186 | */ 187 | def showControls = new ShowControls(chart) 188 | } 189 | 190 | implicit class RichCategoryChart(val chart: CategoryChart) { 191 | 192 | /** Plots a category series in this chart. If the given series name was used before, the data of that series is 193 | * replaced with the new data. 194 | * @param plottable 195 | * the plottable object to draw 196 | * @param seriesName 197 | * the unique name of the series 198 | * @tparam K 199 | * the type of the keys 200 | * @return 201 | * this chart. 202 | */ 203 | def plotBar[K](plottable: Plottable[K], seriesName: String)(implicit ord: K => Ordered[K]): chart.type = 204 | plottable.plotBarOn(chart, seriesName) 205 | 206 | /** Removes a previously drawn series from this category chart. 207 | * @param seriesName 208 | * the unique name of the series 209 | * @return 210 | * this chart. 211 | */ 212 | def removeSeries(seriesName: String): chart.type = chart.plot.getDataset match { 213 | case catDataset: DefaultCategoryDataset => catDataset.removeRow(seriesName); chart 214 | } 215 | } 216 | 217 | implicit class RichXYChart(val chart: XYChart) { 218 | 219 | /** Plots a XY series in this chart. If the given series name was used before, the data of that series is replaced 220 | * with the new data. 221 | * @param plottable 222 | * the plottable object to draw 223 | * @param seriesName 224 | * the unique name of the series 225 | * @tparam K 226 | * the type of the keys 227 | * @return 228 | * this chart. 229 | */ 230 | def plotXY[K](plottable: Plottable[K], seriesName: String)(implicit asNum: Numeric[K]): chart.type = 231 | plottable.plotXYOn(chart, seriesName) 232 | 233 | /** Removes a previously drawn series from this chart. 234 | * @param seriesName 235 | * the unique name of the series 236 | * @return 237 | * this chart. 238 | */ 239 | def removeSeries(seriesName: String): chart.type = chart.plot.getDataset match { 240 | case seriesList: XYSeriesCollection => seriesList.removeSeries(seriesList.getSeriesIndex(seriesName)); chart 241 | } 242 | } 243 | 244 | /** Creates an empty chart for plotting category series. 245 | * @param title 246 | * the title of the chart 247 | * @param xLabel 248 | * the label to draw on the X axis 249 | * @param yLabel 250 | * the label to draw on the Y axis 251 | * @return 252 | * the newly created chart object. 253 | */ 254 | def emptyPlotBar(title: String = "", xLabel: String = "", yLabel: String = "")(implicit 255 | theme: ChartTheme = defaultTheme 256 | ): CategoryChart = { 257 | 258 | val chart = BarChart(Seq.empty[(String, Seq[(Int, Double)])]) 259 | chart.title = title 260 | chart.plot.domain.axis.label.text = xLabel 261 | chart.plot.range.axis.label.text = yLabel 262 | chart 263 | } 264 | 265 | /** Creates an empty chart for plotting XY series. 266 | * @param title 267 | * the title of the chart 268 | * @param xLabel 269 | * the label to draw on the X axis 270 | * @param yLabel 271 | * the label to draw on the Y axis 272 | * @return 273 | * the newly created chart object. 274 | */ 275 | def emptyPlotXY(title: String = "", xLabel: String = "", yLabel: String = "")(implicit 276 | theme: ChartTheme = defaultTheme 277 | ): XYChart = { 278 | 279 | val chart = XYLineChart(Seq.empty[(String, Seq[(Int, Double)])]) 280 | chart.title = title 281 | chart.plot.domain.axis.label.text = xLabel 282 | chart.plot.range.axis.label.text = yLabel 283 | chart 284 | } 285 | } 286 | 287 | object Plotting extends Plotting 288 | -------------------------------------------------------------------------------- /src/main/scala/thinkbayes/extensions/Sampling.scala: -------------------------------------------------------------------------------- 1 | package thinkbayes.extensions 2 | 3 | import scala.annotation.tailrec 4 | import scala.util.Random 5 | import thinkbayes.Pmf 6 | 7 | object Sampling { 8 | 9 | def randomJoin[K, J](pmfs: TraversableOnce[Pmf[K]], join: TraversableOnce[K] => J): J = 10 | join(pmfs.map(_.sample())) 11 | 12 | def sampleJoin[K, J](pmfs: TraversableOnce[Pmf[K]], n: Int, join: TraversableOnce[K] => J) = 13 | Pmf(Seq.fill(n)(randomJoin(pmfs, join))) 14 | 15 | def randomSum[K: Numeric](pmfs: TraversableOnce[Pmf[K]]) = randomJoin[K, K](pmfs, _.sum) 16 | def sampleSum[K: Numeric](pmfs: TraversableOnce[Pmf[K]], n: Int) = sampleJoin[K, K](pmfs, n, _.sum) 17 | def randomMax[K: Ordering](pmfs: TraversableOnce[Pmf[K]]): K = randomJoin[K, K](pmfs, _.max) 18 | def sampleMax[K: Ordering](pmfs: TraversableOnce[Pmf[K]], n: Int) = sampleJoin[K, K](pmfs, n, _.max) 19 | 20 | /** Adds sampling extensions to `Pmf` 21 | */ 22 | implicit class PmfSampling[K](val pmf: Pmf[K]) extends AnyVal { 23 | 24 | /** This implements the alias method as described in http://www.keithschwarz.com/darts-dice-coins/ 25 | * 26 | * It has a total initialization time of O(n) and generation time of O(1). 27 | * 28 | * @return 29 | * an infinite iterator of samples randomly drawn from the pmf. 30 | */ 31 | def samplesIterator: Iterator[K] = { 32 | val len = pmf.size 33 | val scale = len / pmf.values.sum 34 | val scaled = pmf.toList.map({ case (k, v) => k -> (v * scale) }) 35 | val (small, large) = scaled.partition(_._2 < 1.0) 36 | 37 | @tailrec 38 | def alias( 39 | small: List[(K, Double)], 40 | large: List[(K, Double)], 41 | rest: List[(K, Double, Option[K])] 42 | ): List[(K, Double, Option[K])] = { 43 | (small, large) match { 44 | case ((s, ps) :: ss, (l, pl) :: ll) => 45 | val remainder = (l, pl - (1.0 - ps)) 46 | val newRest = (s, ps, Some(l)) :: rest 47 | if (remainder._2 < 1) 48 | alias(remainder :: ss, ll, newRest) 49 | else 50 | alias(ss, remainder :: ll, newRest) 51 | 52 | case (_, (l, _) :: ll) => 53 | alias(small, ll, (l, 1.0, None) :: rest) 54 | 55 | case ((s, _) :: ss, _) => 56 | alias(ss, large, (s, 1.0, None) :: rest) 57 | 58 | case _ => 59 | rest 60 | } 61 | } 62 | 63 | val table = Vector() ++ alias(small, large, Nil) 64 | def select(p1: Double, p2: Double, table: Vector[(K, Double, Option[K])]): K = { 65 | table((p1 * len).toInt) match { 66 | case (a, _, None) => a 67 | case (a, p, Some(b)) => if (p2 <= p) a else b 68 | } 69 | } 70 | 71 | Iterator.continually(select(Random.nextDouble(), Random.nextDouble(), table)) 72 | } 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /src/main/scala/thinkbayes/extensions/Stats.scala: -------------------------------------------------------------------------------- 1 | package thinkbayes.extensions 2 | 3 | import thinkbayes._ 4 | 5 | object Stats { 6 | 7 | implicit class PmfStats[K](val pmf: Pmf[K]) extends AnyVal { 8 | 9 | /** Calculates a quantile of this `Pmf` according to the Nearest Rank definition. 10 | * @param p 11 | * the quantile to calculate 12 | * @return 13 | * the quantile of this `Pmf` according to the Nearest Rank definition. 14 | */ 15 | def quantile(p: Double)(implicit ord: Ordering[K]): K = { 16 | val domain = pmf.keysIterator.toIndexedSeq.sorted 17 | 18 | def loop(curr: Int, currProb: Double): K = 19 | if (curr >= domain.length - 1) domain(curr) 20 | else { 21 | val prob = pmf.prob(domain(curr)) 22 | if (currProb + prob >= p) domain(curr) 23 | else loop(curr + 1, currProb + prob) 24 | } 25 | 26 | loop(0, 0.0) 27 | } 28 | 29 | def credibleInterval(p: Double)(implicit ord: Ordering[K]): (K, K) = 30 | pmf.toCdf.credibleInterval(p) 31 | } 32 | 33 | implicit class CdfStats[K](val cdf: Cdf[K]) extends AnyVal { 34 | 35 | /** Calculates a quantile of this `Cdf` according to the Nearest Rank definition. 36 | * @param p 37 | * the quantile to calculate 38 | * @return 39 | * the quantile of this `Cdf` according to the Nearest Rank definition. 40 | */ 41 | def quantile(p: Double) = cdf.value(p) 42 | 43 | def credibleInterval(p: Double): (K, K) = { 44 | val distTail = (1.0 - p) / 2.0 45 | (quantile(distTail), quantile(1.0 - distTail)) 46 | } 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /src/main/scala/thinkbayes/extensions/distributions/BetaBinomialPmf.scala: -------------------------------------------------------------------------------- 1 | package thinkbayes.extensions.distributions 2 | 3 | import org.apache.commons.math3.special.Beta._ 4 | import org.apache.commons.math3.util.CombinatoricsUtils._ 5 | import org.apache.commons.math3.util.FastMath._ 6 | import thinkbayes.Pmf 7 | 8 | /** A `Pmf` of a beta-binomial distribution ([[http://en.wikipedia.org/wiki/Beta-binomial_distribution]]). 9 | * @param trials 10 | * the number of trials 11 | * @param alpha 12 | * the alpha parameter 13 | * @param beta 14 | * the beta parameter 15 | */ 16 | class BetaBinomialPmf(val trials: Int, val alpha: Double, val beta: Double) extends Pmf[Int] with ClosedFormPmf[Int] { 17 | 18 | private[this] def logP(k: Int) = 19 | binomialCoefficientLog(trials, k) + logBeta(k + alpha, trials - k + beta) - logBeta(alpha, beta) 20 | 21 | @inline private[this] def p(k: Int) = exp(logP(k)) 22 | 23 | def get(key: Int) = if (key < 0 || key > trials) None else Some(p(key)) 24 | def iterator = Iterator.tabulate(trials + 1) { key => (key, p(key)) } 25 | 26 | override def maxProb = { val m = mode; (m, prob(m)) } 27 | 28 | /** @inheritdoc 29 | * In `BetaBinomialPmf`, this method yields a close approximation of the real mode. 30 | * 31 | * @return 32 | * the mode of this distribution. 33 | */ 34 | override def mode = 35 | if (alpha > 1.0 && beta > 1.0) round(trials * (alpha - 1.0) / (alpha + beta - 2.0)).toInt 36 | else if (alpha == 1.0 && beta == 1.0) 0 // or any other value in the range [0, `trials`] 37 | else if (alpha < 1.0 && beta < 1.0) 0 // or `trials` 38 | else if (alpha < 1.0 && beta >= 1.0 || alpha == 1.0 && beta > 1.0) 0 39 | else trials 40 | 41 | override def mean(implicit num: Numeric[Int]) = trials * alpha / (alpha + beta) 42 | 43 | override def variance(implicit num: Numeric[Int]) = 44 | trials * alpha * beta * (alpha + beta + trials) / ((alpha + beta) * (alpha + beta) * (alpha + beta + 1)) 45 | } 46 | -------------------------------------------------------------------------------- /src/main/scala/thinkbayes/extensions/distributions/ClosedFormPmf.scala: -------------------------------------------------------------------------------- 1 | package thinkbayes.extensions.distributions 2 | 3 | import thinkbayes._ 4 | 5 | /** Trait of `Pmf` instances whose probability value is calculated by a closed-form expression and, as such, do not 6 | * allow freely adding, removing or modifying outcomes and probabilities. Such operations create and return a new 7 | * categorical distribution reflecting the changes. 8 | * 9 | * Typically, implementations of `ClosedFormPmf` also have a closed-form expression for calculating their numerical 10 | * mean. If that is the case, they are expected to override `mean` to provide an efficient implementation. 11 | * 12 | * @tparam K 13 | * the type of the outcomes 14 | */ 15 | trait ClosedFormPmf[K] extends PmfLike[K, Pmf[K]] { 16 | def +(kv: (K, Double))(implicit dummy: DummyImplicit) = toCategoricalPmf + kv 17 | def -(key: K) = toCategoricalPmf - key 18 | } 19 | -------------------------------------------------------------------------------- /src/main/scala/thinkbayes/extensions/distributions/CommonsMathConversions.scala: -------------------------------------------------------------------------------- 1 | package thinkbayes.extensions.distributions 2 | 3 | import org.apache.commons.math3.distribution.{IntegerDistribution, RealDistribution} 4 | import thinkbayes.extensions.distributions.CommonsMathConversions._ 5 | import thinkbayes._ 6 | 7 | trait CommonsMathConversions { 8 | implicit def integerDistributionAsPmf(distrib: IntegerDistribution): Pmf[Int] = new IntegerDistributionPmf(distrib) 9 | implicit def realDistributionAsPdf(distrib: RealDistribution): Pdf = Pdf(distrib.density) 10 | } 11 | 12 | object CommonsMathConversions { 13 | val defaultCutoff = 0.0001 14 | 15 | def approximateIntegerLowerBound(distrib: IntegerDistribution, cutoff: Double = defaultCutoff) = 16 | if (distrib.getSupportLowerBound != Int.MinValue) distrib.getSupportLowerBound 17 | else distrib.inverseCumulativeProbability(defaultCutoff) 18 | 19 | def approximateIntegerUpperBound(distrib: IntegerDistribution, cutoff: Double = defaultCutoff) = 20 | if (distrib.getSupportUpperBound != Int.MaxValue) distrib.getSupportUpperBound 21 | else distrib.inverseCumulativeProbability(1.0 - defaultCutoff) 22 | 23 | def approximateRealLowerBound(distrib: RealDistribution, cutoff: Double = defaultCutoff) = 24 | if (!distrib.getSupportLowerBound.isNegInfinity) distrib.getSupportLowerBound 25 | else distrib.inverseCumulativeProbability(defaultCutoff) 26 | 27 | def approximateRealUpperBound(distrib: RealDistribution, cutoff: Double = defaultCutoff) = 28 | if (!distrib.getSupportUpperBound.isPosInfinity) distrib.getSupportUpperBound 29 | else distrib.inverseCumulativeProbability(1.0 - defaultCutoff) 30 | 31 | class IntegerDistributionPmf(distrib: IntegerDistribution, cutoff: Double = defaultCutoff) 32 | extends Pmf[Int] 33 | with ClosedFormPmf[Int] { 34 | 35 | private[this] lazy val lowerBound = approximateIntegerLowerBound(distrib, cutoff) 36 | private[this] lazy val upperBound = approximateIntegerUpperBound(distrib, cutoff) 37 | 38 | def get(key: Int) = Some(distrib.probability(key)) 39 | def iterator = (lowerBound to upperBound).iterator.map { key => (key, distrib.probability(key)) } 40 | 41 | override def mean(implicit num: Numeric[Int]): Double = distrib.getNumericalMean 42 | override def variance(implicit num: Numeric[Int]): Double = distrib.getNumericalVariance 43 | override def toCdf(implicit ord: Ordering[Int]) = new IntegerDistributionCdf(distrib, cutoff) 44 | } 45 | 46 | class RealDistributionPmf(distrib: RealDistribution, domain: Seq[Double]) 47 | extends Pmf[Double] 48 | with ClosedFormPmf[Double] { 49 | 50 | def get(key: Double) = Some(distrib.density(key)) 51 | def iterator = domain.iterator.map { key => (key, distrib.density(key)) } 52 | 53 | override def toCdf(implicit ord: Ordering[Double]) = new RealDistributionCdf(distrib, domain) 54 | } 55 | 56 | object RealDistributionPmf { 57 | def apply(distrib: RealDistribution, steps: Int, cutoff: Double = defaultCutoff): RealDistributionPmf = { 58 | val lowerBound = approximateRealLowerBound(distrib, cutoff) 59 | val upperBound = approximateRealUpperBound(distrib, cutoff) 60 | new RealDistributionPmf(distrib, lowerBound to upperBound by ((upperBound - lowerBound) / steps)) 61 | } 62 | } 63 | 64 | class IntegerDistributionCdf(distrib: IntegerDistribution, cutoff: Double = defaultCutoff) extends Cdf[Int] { 65 | private[this] lazy val lowerBound = approximateIntegerLowerBound(distrib, cutoff) 66 | private[this] lazy val upperBound = approximateIntegerUpperBound(distrib, cutoff) 67 | 68 | def prob(key: Int): Double = distrib.cumulativeProbability(key) 69 | def value(prob: Double): Int = distrib.inverseCumulativeProbability(prob) 70 | def iterator = (lowerBound to upperBound).iterator.map { key => (key, distrib.cumulativeProbability(key)) } 71 | 72 | override def toPmf = new IntegerDistributionPmf(distrib, cutoff) 73 | } 74 | 75 | class RealDistributionCdf(distrib: RealDistribution, domain: Seq[Double]) extends Cdf[Double] { 76 | def prob(key: Double): Double = distrib.cumulativeProbability(key) 77 | def value(prob: Double): Double = distrib.inverseCumulativeProbability(prob) 78 | def iterator = domain.iterator.map { key => (key, distrib.cumulativeProbability(key)) } 79 | 80 | override def toPmf = new RealDistributionPmf(distrib, domain) 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /src/main/scala/thinkbayes/extensions/plotting/ShowControls.scala: -------------------------------------------------------------------------------- 1 | package thinkbayes.extensions.plotting 2 | 3 | import scala.swing.Frame 4 | import scala.swing.Swing._ 5 | import scala.swing.event.{WindowClosed, WindowOpened} 6 | 7 | import de.sciss.chart.Chart 8 | import org.jfree.chart.ChartFrame 9 | 10 | /** A wrapper for `Chart` instances making it easy for clients to show the chart in a window using Swing, as well as 11 | * closing it programatically and adding hooks for open and close events. 12 | * 13 | * @param chart 14 | * the chart to show 15 | */ 16 | class ShowControls(val chart: Chart) { 17 | private[this] var frame = Option.empty[Frame] 18 | private[this] var onShowHandler = Option.empty[() => Unit] 19 | private[this] var onHideHandler = Option.empty[() => Unit] 20 | 21 | /** Shows the chart in a window. If the window is already open, this method does nothing. 22 | */ 23 | def show() = frame match { 24 | case Some(fr) => fr.open() 25 | case None => frame = Some(showScalable(chart, chart.title, (1024, 768))) 26 | } 27 | 28 | /** Closes the chart window. If the window was already closed or it was never created, this method does nothing. 29 | */ 30 | def hide() = frame.foreach(_.close()) 31 | 32 | /** Closes the chart window and releases any resources associated with it. If the window was never created, this 33 | * method does nothing. 34 | */ 35 | def dispose() = frame.foreach(_.dispose()) 36 | 37 | /** Adds a hook to run each time the window is opened. 38 | * 39 | * @param callback 40 | * the hook to run each time the window is opened. 41 | */ 42 | def onShow(callback: => Unit) = { 43 | onShowHandler = Some(() => callback) 44 | } 45 | 46 | /** Adds a hook to run each time the window is closed. 47 | * 48 | * @param callback 49 | * the hook to run each time the window is closed. 50 | */ 51 | def onHide(callback: => Unit) = { 52 | onHideHandler = Some(() => callback) 53 | } 54 | 55 | private[this] def showScalable(chart: Chart, windowTitle: String, dim: (Int, Int)): Frame = { 56 | val frame = chart.toFrame(windowTitle) 57 | val panel = frame.peer.asInstanceOf[ChartFrame].getChartPanel 58 | panel.setMaximumDrawWidth(Int.MaxValue) 59 | panel.setMaximumDrawHeight(Int.MaxValue) 60 | frame.size = dim 61 | 62 | frame.reactions += { 63 | case WindowOpened(`frame`) => onShowHandler.foreach(_.apply) 64 | case WindowClosed(`frame`) => onHideHandler.foreach(_.apply) 65 | } 66 | frame.visible = true 67 | frame 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /src/main/scala/thinkbayes/extensions/plotting/ThinkBayesChartTheme.scala: -------------------------------------------------------------------------------- 1 | package thinkbayes.extensions.plotting 2 | 3 | import java.awt.{Stroke, BasicStroke, Color, Font} 4 | import javax.swing.UIManager 5 | 6 | import de.sciss.chart.api._ 7 | import org.jfree.chart.StandardChartTheme 8 | import org.jfree.chart.block.LineBorder 9 | import org.jfree.chart.plot.DefaultDrawingSupplier 10 | import org.jfree.chart.renderer.category.{BarRenderer, CategoryItemRenderer, StandardBarPainter} 11 | import org.jfree.chart.renderer.xy.StandardXYBarPainter 12 | import org.jfree.chart.title.{LegendTitle, Title} 13 | import org.jfree.ui.RectangleInsets 14 | 15 | class ThinkBayesChartTheme(name: String) extends StandardChartTheme(name, false) { 16 | 17 | def strokeSequence: Array[Stroke] = Array(new BasicStroke(2.0f, BasicStroke.CAP_ROUND, BasicStroke.JOIN_ROUND)) 18 | 19 | def paintSequence: Array[Paint] = 20 | DefaultDrawingSupplier.DEFAULT_FILL_PAINT_SEQUENCE 21 | 22 | UIManager.setLookAndFeel(UIManager.getSystemLookAndFeelClassName) 23 | 24 | setExtraLargeFont(new Font("Helvetica Neue", Font.BOLD, 20)) 25 | setLargeFont(new Font("Helvetica Neue", Font.BOLD, 14)) 26 | setRegularFont(new Font("Helvetica Neue", Font.PLAIN, 12)) 27 | setSmallFont(new Font("Helvetica Neue", Font.PLAIN, 10)) 28 | 29 | setAxisOffset(RectangleInsets.ZERO_INSETS) 30 | 31 | setPlotOutlinePaint(ThinkBayesChartTheme.Transparent) 32 | 33 | setBarPainter(new StandardBarPainter()) 34 | setXYBarPainter(new StandardXYBarPainter()) 35 | 36 | setLegendBackgroundPaint(ThinkBayesChartTheme.Transparent) 37 | setLabelLinkPaint(ThinkBayesChartTheme.Transparent) 38 | 39 | setDrawingSupplier( 40 | new DefaultDrawingSupplier( 41 | paintSequence, 42 | DefaultDrawingSupplier.DEFAULT_FILL_PAINT_SEQUENCE, 43 | DefaultDrawingSupplier.DEFAULT_OUTLINE_PAINT_SEQUENCE, 44 | strokeSequence, 45 | DefaultDrawingSupplier.DEFAULT_OUTLINE_STROKE_SEQUENCE, 46 | DefaultDrawingSupplier.DEFAULT_SHAPE_SEQUENCE 47 | ) 48 | ) 49 | 50 | override def applyToCategoryItemRenderer(renderer: CategoryItemRenderer) { 51 | super.applyToCategoryItemRenderer(renderer) 52 | renderer match { 53 | case br: BarRenderer => br.setItemMargin(0.0) 54 | } 55 | } 56 | 57 | override def applyToTitle(title: Title) { 58 | super.applyToTitle(title) 59 | title match { 60 | case lt: LegendTitle => 61 | lt.setFrame(new LineBorder(new Color(0, 0, 0, 0), new BasicStroke(), RectangleInsets.ZERO_INSETS)) 62 | } 63 | } 64 | } 65 | 66 | object ThinkBayesChartTheme { 67 | final val Transparent = new Color(0, 0, 0, 0) 68 | 69 | object Light extends ThinkBayesChartTheme("think-bayes-light") { 70 | 71 | override def paintSequence = Array(new Color(236, 93, 87), new Color(112, 191, 65), new Color(81, 167, 249)) 72 | 73 | setChartBackgroundPaint(Color.white) 74 | setPlotBackgroundPaint(Color.white) 75 | 76 | setDomainGridlinePaint(Color.lightGray) 77 | setRangeGridlinePaint(Color.lightGray) 78 | } 79 | 80 | object Dark extends ThinkBayesChartTheme("think-bayes-dark") { 81 | 82 | override def paintSequence = 83 | Array(new Color(160, 255, 160, 128), new Color(255, 160, 160, 128), new Color(160, 160, 255, 128)) 84 | 85 | setTitlePaint(Color.white) 86 | setSubtitlePaint(Color.white) 87 | 88 | setChartBackgroundPaint(new Color(31, 32, 27)) 89 | setPlotBackgroundPaint(new Color(31, 32, 27)) 90 | 91 | setDomainGridlinePaint(Color.white) 92 | setRangeGridlinePaint(Color.white) 93 | 94 | setLegendItemPaint(Color.white) 95 | 96 | setAxisLabelPaint(Color.white) 97 | setTickLabelPaint(Color.white) 98 | } 99 | } 100 | -------------------------------------------------------------------------------- /src/main/scala/thinkbayes/package.scala: -------------------------------------------------------------------------------- 1 | package object thinkbayes { 2 | 3 | implicit class ProbabilityMapUtils[K](hist: Map[K, Double]) { 4 | 5 | private[this] def pad(str: String, n: Int): String = 6 | if (str.length > n) str.substring(0, n) else str + (" " * (n - str.length)) 7 | 8 | def toPmf = Pmf(hist) 9 | 10 | def print()(implicit ord: Ordering[K]) { 11 | if (hist.nonEmpty) { 12 | val keyLen = hist.keys.map(_.toString.length).max 13 | hist.toSeq 14 | .sortBy(_._1) 15 | .map { case (h, prob) => 16 | pad(h.toString, keyLen) + " " + prob 17 | } 18 | .foreach(println) 19 | } 20 | } 21 | 22 | def printChart()(implicit ord: Ordering[K]) { 23 | if (hist.nonEmpty) { 24 | val keyLen = hist.keys.map(_.toString.length).max 25 | hist.toSeq 26 | .sortBy(_._1) 27 | .map { case (h, prob) => 28 | pad(h.toString, keyLen).mkString + " " + 29 | pad(prob.toString, 6) + " " + 30 | ("#" * (50 * prob).toInt) 31 | } 32 | .foreach(println) 33 | } 34 | } 35 | } 36 | 37 | implicit def cdfProbMapUtils[K](cdf: Cdf[K]) = new ProbabilityMapUtils(cdf.iterator.toMap) 38 | implicit def suiteProbMapUtils[H](suite: Suite[H, _]) = new ProbabilityMapUtils(suite.pmf) 39 | } 40 | -------------------------------------------------------------------------------- /src/test/scala/thinkbayes/PmfMatchers.scala: -------------------------------------------------------------------------------- 1 | package thinkbayes 2 | 3 | import org.specs2.matcher.Matcher 4 | import org.specs2.mutable.SpecificationLike 5 | 6 | trait PmfMatchers { this: SpecificationLike => 7 | val defaultEpsilon = 0.00001 8 | val minRelativeError = 1e-8 9 | 10 | def beRelativelyCloseTo(expected: Double) = 11 | beCloseTo(expected, math.max(minRelativeError, math.abs(expected) * defaultEpsilon)) 12 | def beRelativelyCloseTo(expected: Int) = beCloseTo(expected, math.ceil(math.abs(expected) * defaultEpsilon)) 13 | 14 | def beCloseTo[K](otherPmf: Pmf[K]): Matcher[Pmf[K]] = { pmf: Pmf[K] => 15 | foreach(pmf) { case (k, prob) => 16 | prob aka s"The probability for $k" must beRelativelyCloseTo(otherPmf.prob(k)) 17 | } 18 | foreach(otherPmf) { case (k, prob) => 19 | pmf.prob(k) aka s"The probability for $k" must beRelativelyCloseTo(prob) 20 | } 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /src/test/scala/thinkbayes/PmfSpec.scala: -------------------------------------------------------------------------------- 1 | package thinkbayes 2 | 3 | import org.specs2.mutable.Specification 4 | 5 | class PmfSpec extends Specification with PmfMatchers { 6 | 7 | "A Pmf" should { 8 | 9 | "have a factory method for constructing an empty one" in { 10 | Pmf.empty[Char].toMap ==== Map.empty[Char, Double] 11 | } 12 | 13 | "have a factory method receiving outcomes and respective probabilities" in { 14 | Pmf("a" -> 0.2, "b" -> 0.2, "c" -> 0.6).toMap === Map("a" -> 0.2, "b" -> 0.2, "c" -> 0.6) 15 | Pmf('H' -> 0.4, 'T' -> 0.6).toMap === Map('H' -> 0.4, 'T' -> 0.6) 16 | 17 | Pmf(Map("a" -> 0.2, "b" -> 0.2, "c" -> 0.6)).toMap === Map("a" -> 0.2, "b" -> 0.2, "c" -> 0.6) 18 | Pmf(Map('H' -> 0.4, 'T' -> 0.6)).toMap === Map('H' -> 0.4, 'T' -> 0.6) 19 | } 20 | 21 | "have a factory method receiving a sequence of possible outcomes" in { 22 | Pmf(1 to 4).toMap === Map(1 -> 0.25, 2 -> 0.25, 3 -> 0.25, 4 -> 0.25) 23 | Pmf(Seq('H', 'T')).toMap === Map('H' -> 0.5, 'T' -> 0.5) 24 | } 25 | 26 | "have a factory method receiving a sequence of samples" in { 27 | Pmf(Seq(1, 1, 2, 2, 3, 3, 3)).toMap === Map(1 -> 2.0 / 7, 2 -> 2.0 / 7, 3 -> 3.0 / 7) 28 | Pmf(Seq(true, true, false)).toMap === Map(true -> 2 / 3.0, false -> 1 / 3.0) 29 | } 30 | 31 | "allow retrieving the probability of an outcome or a set of outcomes" in { 32 | val pmf = Pmf('a' -> 0.2, 'b' -> 0.2, 'c' -> 0.6) 33 | pmf.prob('a') === 0.2 34 | pmf.prob(_ < 'c') === 0.4 35 | } 36 | 37 | "allow retrieving the outcome with maximum probability (mode)" in { 38 | val pmf = Pmf('a' -> 0.2, 'b' -> 0.2, 'c' -> 0.6) 39 | pmf.maxProb === ('c', 0.6) 40 | pmf.mode === 'c' 41 | } 42 | 43 | "allow calculating its mean when its outcomes are numeric" in { 44 | Pmf(0 -> 0.2, 1 -> 0.2, 2 -> 0.6).mean === 0.2 + 2 * 0.6 45 | Pmf(2.0 -> 0.5, 3.0 -> 0.5).mean === 2.5 46 | } 47 | 48 | "allow calculating its variance when its outcomes are numeric" in { 49 | Pmf(0 -> 0.2, 1 -> 0.2, 2 -> 0.6).variance must 50 | beCloseTo(0.2 * (1.4 * 1.4) + 0.2 * (0.4 * 0.4) + 0.6 * (0.6 * 0.6), 0.00001) 51 | } 52 | 53 | "allow taking random samples from it" in { 54 | val nRuns = 10000 55 | val pmf = Pmf(0 -> 0.4, 1 -> 0.6) 56 | val samplePmf = Pmf(Iterator.fill(nRuns)(pmf.sample())) 57 | 58 | // Warning: this test will fail approximately one every billion runs. 59 | // You can check the failure probability using this very library: 60 | // scala> binomialPmf(10000, 0.6).credibleInterval(1.0 - 1.0 / 1e9) 61 | // res7: (Int, Int) = (5700,6298) 62 | samplePmf.prob(1) must beBetween(0.57, 0.63) 63 | } 64 | 65 | "allow re-normalizing the result to keep the sum of all probabilities equal to 1.0" in { 66 | val pmf = Pmf('a' -> 0.2, 'b' -> 0.2, 'c' -> 0.6) 67 | pmf.filterKeys(_ != 'a') must beCloseTo(Pmf(Map('b' -> 0.2, 'c' -> 0.6))) 68 | pmf.filterKeys(_ != 'a').normalized must beCloseTo(Pmf('b' -> 0.25, 'c' -> 0.75)) 69 | } 70 | 71 | "provide Map-like methods that keep the Pmf original type when applicable" in { 72 | val pmf = Pmf('a' -> 0.2, 'b' -> 0.2, 'c' -> 0.6) 73 | 74 | (pmf - 'a').normalized must beCloseTo(Pmf('b' -> 0.25, 'c' -> 0.75)) 75 | (pmf + ('d' -> 1.0)).normalized must beCloseTo(Pmf('a' -> 0.1, 'b' -> 0.1, 'c' -> 0.3, 'd' -> 0.5)) 76 | 77 | pmf.map { case (k, v) => ((k + 1).toChar, v) } ==== Pmf('b' -> 0.2, 'c' -> 0.2, 'd' -> 0.6) 78 | pmf.map { case (k, v) => ((k + 1).toChar, k) } ==== Map('b' -> 'a', 'c' -> 'b', 'd' -> 'c') 79 | pmf.mapKeys { k => (k + 1).toChar } ==== Pmf('b' -> 0.2, 'c' -> 0.2, 'd' -> 0.6) 80 | 81 | pmf.filter(_._1 == 'a').normalized ==== Pmf('a' -> 1.0) 82 | pmf.filterKeys(_ != 'a').normalized must beCloseTo(Pmf('b' -> 0.25, 'c' -> 0.75)) 83 | 84 | pmf.toSet ==== Set('a' -> 0.2, 'b' -> 0.2, 'c' -> 0.6) 85 | } 86 | 87 | "allow being summed or subtracted by another when their outcomes are numeric" in { 88 | val d6 = Pmf(1 to 6) 89 | d6 ++ d6 must beCloseTo(Pmf(for { i <- 1 to 6; j <- 1 to 6 } yield i + j)) 90 | d6 -- d6 must beCloseTo(Pmf(for { i <- 1 to 6; j <- 1 to 6 } yield i - j)) 91 | } 92 | 93 | "allow being combined with another using a custom join function" in { 94 | val d6 = Pmf(1 to 6) 95 | d6.join(d6)(math.max) must beCloseTo(Pmf(for { i <- 1 to 6; j <- 1 to 6 } yield math.max(i, j))) 96 | 97 | val coin = Pmf('H' -> 0.4, 'T' -> 0.6) 98 | coin.join(coin)(_.toString + _) must beCloseTo(Pmf("HH" -> 0.16, "HT" -> 0.24, "TH" -> 0.24, "TT" -> 0.36)) 99 | } 100 | 101 | "allow being flattened (mixtured) if its keys are also Pmfs" in { 102 | def die(n: Int) = Pmf(1 to n) 103 | val bag = Pmf(Seq(die(4), die(6))) // a bag containing 2 different dice 104 | 105 | val expectedMix = Pmf((for { dieN <- List(4, 6); i <- 1 to dieN } yield i -> 0.5 / dieN): _*) 106 | bag.mixture must beCloseTo(expectedMix) // roll of a random die from the bag 107 | } 108 | 109 | "allow being converted into a Cdf" in { 110 | val pmf = Pmf('a' -> 0.2, 'b' -> 0.2, 'c' -> 0.6) 111 | pmf.toCdf === Cdf('a' -> 0.2, 'b' -> 0.2, 'c' -> 0.6) 112 | pmf.toCdf.iterator.toSeq === Seq('a' -> 0.2, 'b' -> 0.4, 'c' -> 1.0) 113 | } 114 | } 115 | } 116 | -------------------------------------------------------------------------------- /src/test/scala/thinkbayes/examples/DiceApp.scala: -------------------------------------------------------------------------------- 1 | package thinkbayes.examples 2 | 3 | import thinkbayes._ 4 | 5 | /** Application for solving the dice problem (page 19): 6 | * 7 | * "Suppose I have a box of dice that contains a 4-sided die, a 6-sided die, an 8-sided die, a 12-sided die, and a 8 | * 20-sided die. If you have ever played Dungeons & Dragons, you know what I am talking about. 9 | * 10 | * Suppose I select a die from the box at random, roll it, and get a 6. What is the probability that I rolled each 11 | * die?" 12 | */ 13 | object DiceApp extends App { 14 | 15 | case class Dice(hypos: Seq[Int]) extends SimpleSuite[Int, Int] { 16 | val pmf = Pmf(hypos) 17 | 18 | def likelihood(data: Int, hypo: Int) = 19 | if (hypo < data) 0 else 1.0 / hypo 20 | } 21 | 22 | // --------- 23 | 24 | val prior = new Dice(List(4, 6, 8, 12, 20)) 25 | 26 | println("Priors:") 27 | prior.printChart() 28 | 29 | println() 30 | println("After a 6 is rolled:") 31 | val posterior = prior.observed(6) 32 | posterior.printChart() 33 | 34 | println() 35 | println("After 6, 8, 7, 7, 5, 4 are rolled after the first 6:") 36 | val posterior2 = posterior.observed(6, 8, 7, 7, 5, 4) 37 | posterior2.printChart() 38 | } 39 | -------------------------------------------------------------------------------- /src/test/scala/thinkbayes/examples/DungeonsApp.scala: -------------------------------------------------------------------------------- 1 | package thinkbayes.examples 2 | 3 | import thinkbayes.Pmf 4 | import thinkbayes.extensions.Plotting._ 5 | import thinkbayes.extensions.Sampling._ 6 | 7 | /** Application for solving the Dungeons and Dragons d6 problem (page 42): 8 | * 9 | * "The first example is based on Dungeons & Dragons, a role-playing game where the results of players’ decisions are 10 | * usually determined by rolling dice. In fact, before game play starts, players generate each attribute of their 11 | * characters—strength, intelligence, wisdom, dexterity, constitution, and charisma—by rolling three 6-sided dice and 12 | * adding them up. 13 | * 14 | * So you might be curious to know the distribution of this sum." 15 | */ 16 | object DungeonsApp extends App { 17 | 18 | def die(sides: Int) = Pmf(1 to sides) 19 | 20 | // --------- 21 | 22 | // sum and maxima 23 | val three = Seq.fill(3)(die(6)) 24 | 25 | val threeSum = sampleSum(three, 1000).normalized 26 | val threeSumExact = three.reduce(_ ++ _).normalized 27 | 28 | val chartSum = threeSum.showXY("Sample", title = "Sum of three d6", xLabel = "Sum") 29 | threeSumExact.plotXYOn(chartSum, "Exact") 30 | 31 | val threeMax = sampleMax(three, 1000).normalized 32 | val threeMaxExp = die(6).toCdf.pow(3).toPmf 33 | 34 | val chartMax = threeMax.showXY("Sample", title = "Max of three d6", xLabel = "Max") 35 | threeMaxExp.plotXYOn(chartMax, "Exponential") 36 | 37 | // mixture 38 | val five = Pmf(List(4, 6, 8, 12, 20).map(die)) 39 | val mix = five.mixture 40 | 41 | mix.showBar("Outcome", title = "Outcome of random die from a box", xLabel = "Outcome") 42 | } 43 | -------------------------------------------------------------------------------- /src/test/scala/thinkbayes/examples/EuroApp.scala: -------------------------------------------------------------------------------- 1 | package thinkbayes.examples 2 | 3 | import thinkbayes._ 4 | import thinkbayes.extensions.Distributions._ 5 | import thinkbayes.extensions.Plotting._ 6 | import thinkbayes.extensions.Stats._ 7 | 8 | /** Application for solving the Euro problem (page 29): 9 | * 10 | * "When spun on edge 250 times, a Belgian one-euro coin came up heads 140 times and tails 110. ‘It looks very 11 | * suspicious to me,’ said Barry Blight, a statistics lecturer at the London School of Economics. ‘If the coin were 12 | * unbiased, the chance of getting a result as extreme as that would be less than 7%.’ 13 | * 14 | * But do these data give evidence that the coin is biased rather than fair?" 15 | */ 16 | object EuroApp extends App { 17 | 18 | type CoinSide = Boolean 19 | val Heads = true 20 | val Tails = false 21 | 22 | def trianglePmf(unit: Double = 1.0): Pmf[Double] = { 23 | val hist = (0.0 to 100.0 by unit).map { hypo => 24 | (hypo, if (hypo <= 50.0) hypo else 100.0 - hypo) 25 | }.toMap 26 | 27 | Pmf(hist).normalized 28 | } 29 | 30 | case class Euro(unit: Double = 1.0, triangle: Boolean = false) extends SimpleSuite[Double, CoinSide] { 31 | val pmf = if (triangle) trianglePmf(unit) else Pmf(0.0 to 100.0 by unit) 32 | 33 | def likelihood(data: CoinSide, hypo: Double) = (if (data == Heads) hypo else 100.0 - hypo) / 100.0 34 | } 35 | 36 | case class EuroConjugate(alpha: Double = 1.0, beta: Double = 1.0) extends Suite[Double, CoinSide] { 37 | def pmf = betaPdf(alpha, beta).toPmf(0.0 to 1.0 by 0.0005) 38 | 39 | override def observed(data: CoinSide) = 40 | if (data == Heads) EuroConjugate(alpha + 1.0, beta) 41 | else EuroConjugate(alpha, beta + 1.0) 42 | 43 | override def observedSet(dataset: TraversableOnce[CoinSide]) = observedSet(dataset.foldLeft((0, 0)) { 44 | case ((t, f), data) => if (data == Heads) (t + 1, f) else (t, f + 1) 45 | }) 46 | 47 | def observedSet(dataCounts: (Int, Int)): Suite[Double, CoinSide] = 48 | EuroConjugate(alpha + dataCounts._1, beta + dataCounts._2) 49 | } 50 | 51 | // --------- 52 | 53 | val unifPrior = Euro() 54 | val triPrior = Euro(triangle = true) 55 | 56 | println("Plotting priors...") 57 | val priorPlot = unifPrior.showXY("Uniform", title = "Prior", xLabel = "Probability of heads (%)") 58 | triPrior.plotXYOn(priorPlot, "Triangle") 59 | 60 | println("Plotting posteriors after 140 heads and 110 tails are seen...") 61 | val dataset = Seq.fill(140)(Heads) ++ Seq.fill(110)(Tails) 62 | val unifPosterior = unifPrior.observedSet(dataset) 63 | val triPosterior = triPrior.observedSet(dataset) 64 | 65 | val postPlot = unifPosterior.showXY("Uniform", title = "Posterior", xLabel = "Probability of heads (%)") 66 | triPosterior.plotXYOn(postPlot, "Triangle") 67 | 68 | println() 69 | println("Posterior distribution stats with uniform prior:") 70 | println("Hypothesis with highest probability: " + unifPosterior.pmf.maxProb._1) 71 | println("Mean of the distribution: " + unifPosterior.pmf.mean) 72 | println("Median of the distribution: " + unifPosterior.pmf.quantile(0.5)) 73 | println("90%% credible interval: " + unifPosterior.pmf.credibleInterval(0.9)) 74 | 75 | println() 76 | println("Posterior distribution stats with triangle prior:") 77 | println("Hypothesis with highest probability: " + triPosterior.pmf.maxProb._1) 78 | println("Mean of the distribution: " + triPosterior.pmf.mean) 79 | println("Median of the distribution: " + triPosterior.pmf.quantile(0.5)) 80 | println("90%% credible interval: " + triPosterior.pmf.credibleInterval(0.9)) 81 | 82 | println() 83 | println("Plotting posterior using a beta distribution...") 84 | val conjPrior = EuroConjugate() 85 | val conjPosterior = conjPrior.observedSet(140, 110) 86 | conjPosterior.showXY("Beta", title = "Beta distribution", xLabel = "Probability of heads") 87 | } 88 | -------------------------------------------------------------------------------- /src/test/scala/thinkbayes/examples/HockeyApp.scala: -------------------------------------------------------------------------------- 1 | package thinkbayes.examples 2 | 3 | import thinkbayes._ 4 | import thinkbayes.extensions.Distributions._ 5 | import thinkbayes.extensions.Plotting._ 6 | 7 | /** Application for studying the The Boston Bruins problem (page 65): 8 | * 9 | * "In the 2010-11 National Hockey League (NHL) Finals, my beloved Boston Bruins played a best-of-seven championship 10 | * series against the despised Vancouver Canucks. Boston lost the first two games 0-1 and 2-3, then won the next two 11 | * games 8-1 and 4-0. At this point in the series, what is the probability that Boston will win the next game, and what 12 | * is their probability of winning the championship?" 13 | */ 14 | object HockeyApp extends App { 15 | 16 | object Hockey extends SimpleSuite[Double, Int] { 17 | val pmf = normalPmf(2.7, 0.3, steps = 100) 18 | 19 | def likelihood(k: Int, lam: Double) = poissonPmf(lam).prob(k) 20 | } 21 | 22 | def numGoalsPmf(goalsPerGamePmf: Pmf[Double]): Pmf[Int] = 23 | goalsPerGamePmf.mapKeys(poissonPmf).mixture 24 | 25 | def goalTimePmf(goalsPerGamePmf: Pmf[Double]): Pmf[Double] = 26 | goalsPerGamePmf.mapKeys(exponentialPmf(_, absCutoff = 2.0)).mixture 27 | 28 | // --------- 29 | 30 | println("Plotting the posterior distribution of the average number of goals per game...") 31 | val perGameChartTitle = "Average number of goals per game" 32 | 33 | val bruinsPosterior = Hockey.observed(0, 2, 8, 4) 34 | val canucksPosterior = Hockey.observed(1, 3, 1, 0) 35 | 36 | val perGameChart = bruinsPosterior.showXY("Bruins", title = perGameChartTitle, xLabel = "Goals per game") 37 | canucksPosterior.plotXYOn(perGameChart, "Canucks") 38 | 39 | println("Plotting the distribution of goals in a single game...") 40 | val goalsChartTitle = "Goals in a single game" 41 | 42 | val bruinsGoalsPmf = numGoalsPmf(bruinsPosterior.pmf) 43 | val canucksGoalsPmf = numGoalsPmf(canucksPosterior.pmf) 44 | 45 | val goalsChart = bruinsGoalsPmf.showXY("Bruins", title = goalsChartTitle, xLabel = "Goals") 46 | canucksGoalsPmf.plotXYOn(goalsChart, "Canucks") 47 | 48 | println() 49 | println("Outcome at the end of regulation play:") 50 | 51 | val goalDiffPmf = bruinsGoalsPmf -- canucksGoalsPmf 52 | 53 | println("Win: %.2f%%".format(goalDiffPmf.prob(_ > 0) * 100.0)) 54 | println("Tie: %.2f%%".format(goalDiffPmf.prob(0) * 100.0)) 55 | println("Lose: %.2f%%".format(goalDiffPmf.prob(_ < 0) * 100.0)) 56 | 57 | println() 58 | println("Plotting the distribution of the time between goals...") 59 | val goalTimeChartTitle = "Time between goals" 60 | 61 | val bruinsGoalTimePmf = goalTimePmf(bruinsPosterior.pmf) 62 | val canucksGoalTimePmf = goalTimePmf(canucksPosterior.pmf) 63 | 64 | val goalTimeChart = bruinsGoalTimePmf.showXY("Bruins", title = goalTimeChartTitle, xLabel = "Games until goal") 65 | canucksGoalTimePmf.plotXYOn(goalTimeChart, "Canucks") 66 | 67 | println() 68 | println("Outcome if an overtime occurs:") 69 | 70 | val timeDiffPmf = bruinsGoalTimePmf -- canucksGoalTimePmf 71 | 72 | println("Win: %.2f%%".format(timeDiffPmf.prob(_ < 0) * 100.0)) 73 | println("Lose: %.2f%%".format(timeDiffPmf.prob(_ > 0) * 100.0)) 74 | 75 | println() 76 | 77 | val probWin = goalDiffPmf.prob(_ > 0) + goalDiffPmf.prob(0) * timeDiffPmf.prob(_ < 0) 78 | println("Overall probability of winning the next game: %.2f%%".format(probWin * 100.0)) 79 | 80 | val probWinSeries = probWin * probWin + 2 * probWin * (1 - probWin) * probWin 81 | println("Overall probability of winning the series: %.2f%%".format(probWinSeries * 100.0)) 82 | } 83 | -------------------------------------------------------------------------------- /src/test/scala/thinkbayes/examples/LocomotiveApp.scala: -------------------------------------------------------------------------------- 1 | package thinkbayes.examples 2 | 3 | import thinkbayes._ 4 | import thinkbayes.extensions.Plotting._ 5 | import thinkbayes.extensions.Stats._ 6 | 7 | /** Application for solving the locomotive problem (page 20): 8 | * 9 | * "A railroad numbers its locomotives in order 1..N. One day you see a locomotive with the number 60. Estimate how 10 | * many locomotives the railroad has." 11 | */ 12 | object LocomotiveApp extends App { 13 | 14 | case class Locomotive(hypos: Seq[Int], alpha: Double = 0.0) extends SimpleSuite[Int, Int] { 15 | val pmf = Pmf(hypos.map { hypo => (hypo, math.pow(hypo, -alpha)) }.toMap).normalized 16 | 17 | def likelihood(data: Int, hypo: Int) = 18 | if (hypo < data) 0 else 1.0 / hypo 19 | } 20 | 21 | // --------- 22 | 23 | val prior = Locomotive(1 to 1000) 24 | val prior2 = Locomotive(1 to 1000, 1.0) 25 | 26 | println("Plotting priors...") 27 | val priorPlot = prior.showXY("Uniform", title = "Prior", xLabel = "Number of trains") 28 | prior2.plotXYOn(priorPlot, "Power law") 29 | 30 | println() 31 | println("Plotting posteriors after a train with number 60 is seen...") 32 | val posterior = prior.observed(60) 33 | val posterior2 = prior2.observed(60) 34 | val postPlot = posterior.showXY("Uniform", title = "After train #60", xLabel = "Number of trains") 35 | posterior2.plotXYOn(postPlot, "Power law") 36 | 37 | println() 38 | println("Mean of the distribution after #60 is seen:") 39 | println("Uniform prior: " + posterior.pmf.mean) 40 | println("Power law prior: " + posterior2.pmf.mean) 41 | 42 | println() 43 | println("90% credible interval after #60 is seen:") 44 | println("Uniform prior: " + posterior.pmf.credibleInterval(0.9)) 45 | println("Power law prior: " + posterior2.pmf.credibleInterval(0.9)) 46 | 47 | println() 48 | println("Mean of the distribution after #30 and #90 are seen after #60:") 49 | val posterior3 = posterior.observed(60, 90) 50 | val posterior4 = posterior2.observed(60, 90) 51 | println("Uniform prior: " + posterior3.pmf.mean) 52 | println("Power law prior: " + posterior4.pmf.mean) 53 | 54 | println() 55 | println("90% credible interval after #30 and #90 are seen after #60:") 56 | println("Uniform prior: " + posterior3.pmf.credibleInterval(0.9)) 57 | println("Power law prior: " + posterior4.pmf.credibleInterval(0.9)) 58 | } 59 | -------------------------------------------------------------------------------- /src/test/scala/thinkbayes/examples/MMApp.scala: -------------------------------------------------------------------------------- 1 | package thinkbayes.examples 2 | 3 | import thinkbayes._ 4 | 5 | /** Application for solving the M&M problem (page 6): 6 | * 7 | * "M&M’s are small candy-coated chocolates that come in a variety of colors. Mars, Inc., which makes M&M’s, changes 8 | * the mixture of colors from time to time. 9 | * 10 | * In 1995, they introduced blue M&M’s. Before then, the color mix in a bag of plain M&M’s was 30% Brown, 20% Yellow, 11 | * 20% Red, 10% Green, 10% Orange, 10% Tan. Afterward it was 24% Blue , 20% Green, 16% Orange, 14% Yellow, 13% Red, 13% 12 | * Brown. 13 | * 14 | * Suppose a friend of mine has two bags of M&M’s, and he tells me that one is from 1994 and one from 1996. He won’t 15 | * tell me which is which, but he gives me one M&M from each bag. One is yellow and one is green. What is the 16 | * probability that the yellow one came from the 1994 bag?" 17 | */ 18 | object MMApp extends App { 19 | 20 | type Color = String 21 | type Mix = Map[Color, Int] 22 | type Bag = String 23 | 24 | val mix94 = Map("brown" -> 30, "yellow" -> 20, "red" -> 20, "green" -> 10, "orange" -> 10, "tan" -> 10) 25 | 26 | val mix96 = Map("blue" -> 24, "green" -> 20, "orange" -> 16, "yellow" -> 14, "red" -> 13, "brown" -> 13) 27 | 28 | case class MM(hypos: Seq[Char], hypoDefs: Map[Char, Map[Bag, Mix]]) extends SimpleSuite[Char, (Bag, Color)] { 29 | val pmf = Pmf(hypos) 30 | 31 | def likelihood(data: (Bag, Color), hypo: Char) = 32 | hypoDefs(hypo)(data._1).getOrElse(data._2, 0).toDouble 33 | } 34 | 35 | // --------- 36 | 37 | val hypoA = Map("bag1" -> mix94, "bag2" -> mix96) 38 | val hypoB = Map("bag1" -> mix96, "bag2" -> mix94) 39 | 40 | val prior = MM("AB", Map('A' -> hypoA, 'B' -> hypoB)) 41 | 42 | val posterior = prior.observed("bag1" -> "yellow", "bag2" -> "green") 43 | posterior.printChart() 44 | } 45 | -------------------------------------------------------------------------------- /src/test/scala/thinkbayes/examples/MontyApp.scala: -------------------------------------------------------------------------------- 1 | package thinkbayes.examples 2 | 3 | import thinkbayes._ 4 | 5 | /** Application for solving the Monty Hall problem (page 7): 6 | * 7 | * "Monty Hall was the original host of the game show Let’s Make a Deal. The Monty Hall problem is based on one of the 8 | * regular games on the show. If you are on the show, here’s what happens: 9 | * 10 | * 17 | * 18 | * The question is, should you “stick” or “switch” or does it make no difference?" 19 | */ 20 | object MontyApp extends App { 21 | 22 | case class Monty(hypos: Seq[Char], firstChoice: Char) extends SimpleSuite[Char, Char] { 23 | val pmf = Pmf(hypos) 24 | 25 | def likelihood(opened: Char, hypo: Char) = 26 | if (opened == hypo) 0 // if the door was opened, it is surely not the winning door 27 | else if (hypo == firstChoice) 1.0 / (hypos.length - 1) // Monty can open any door other than the winning one 28 | else 1.0 / (hypos.length - 2) // Monty can open any door other than the winning one and the chosen one 29 | } 30 | 31 | // --------- 32 | 33 | val prior = Monty("ABC", 'A') // doors A, B and C, first choice is A 34 | 35 | println("Before any door is opened:") 36 | prior.printChart() // print the probability of each hypothesis 37 | 38 | println() 39 | println("After Monty opens door B:") 40 | val posterior = prior.observed('B') // Monty opens B 41 | posterior.printChart() // print the probability of each hypothesis 42 | } 43 | -------------------------------------------------------------------------------- /src/test/scala/thinkbayes/examples/PriceIsRightApp.scala: -------------------------------------------------------------------------------- 1 | package thinkbayes.examples 2 | 3 | import org.apache.commons.math3.stat.descriptive.moment.StandardDeviation 4 | import scala.io.Source 5 | import thinkbayes._ 6 | import thinkbayes.extensions.Distributions._ 7 | import thinkbayes.extensions.Plotting._ 8 | 9 | /** Application for studying the The Price is Right problem (page 51): 10 | * 11 | * "On November 1, 2007, contestants named Letia and Nathaniel appeared on The Price is Right, an American game show. 12 | * They competed in a game called The Showcase, where the objective is to guess the price of a showcase of prizes. The 13 | * contestant who comes closest to the actual price of the showcase, without going over, wins the prizes. 14 | * 15 | * Nathaniel went first. His showcase included a dishwasher, a wine cabinet, a laptop computer, and a car. He bid 16 | * \$26,000. 17 | * 18 | * Letia’s showcase included a pinball machine, a video arcade game, a pool table, and a cruise of the Bahamas. She bid 19 | * \$21,500. 20 | * 21 | * The actual price of Nathaniel’s showcase was \$25,347. His bid was too high, so he lost. 22 | * 23 | * The actual price of Letia’s showcase was \$21,578. She was only off by \$78, so she won her showcase and, because 24 | * her bid was off by less than \$250, she also won Nathaniel’s showcase. 25 | * 26 | * For a Bayesian thinker, this scenario suggests several questions:
  1. Before seeing the prizes, what prior 27 | * beliefs should the contestant have about the price of the showcase?
  2. After seeing the prizes, how should the 28 | * contestant update those beliefs?
  3. Based on the posterior distribution, what should the contestant bid?
  4. 29 | *
" 30 | * 31 | * Note: this application requires the files "showcases.2011.csv" and "showcases.2012.csv" to be in the root folder of 32 | * the project. The files are available at [[http://thinkbayes.com/showcases.2011.csv]] and 33 | * [[http://thinkbayes.com/showcases.2012.csv]]. 34 | */ 35 | object PriceIsRightApp extends App { 36 | 37 | val maxPrice = 75000.0 38 | val step = 100.0 39 | 40 | case class Player(prices: Array[Int], bids: Array[Int], diffs: Array[Int]) { 41 | val showcasePdf = estimatePdf(prices) 42 | val showcasePmf = showcasePdf.toPmf(0.0 to maxPrice by step) 43 | val diffCdf = Cdf(diffs) 44 | 45 | val errorMean = 0.0 46 | val errorStdev = new StandardDeviation().evaluate(diffs.map(_.toDouble)) 47 | val errorPdf = normalPdf(errorMean, errorStdev) 48 | 49 | def probOverbid: Double = diffCdf.prob(-1) 50 | def probWorseThan(diff: Int): Double = 1.0 - diffCdf.prob(diff) 51 | } 52 | 53 | case class Price(player: Player) extends SimpleSuite[Double, Double] { 54 | val pmf = player.showcasePmf 55 | 56 | def likelihood(guess: Double, showcase: Double): Double = 57 | player.errorPdf.density(showcase - guess) 58 | } 59 | 60 | case class GainCalculator(player: Player, opponent: Player, playerGuess: Int, opponentGuess: Int) { 61 | val playerSuite = Price(player).observed(playerGuess) 62 | val opponentSuite = Price(opponent).observed(opponentGuess) 63 | 64 | def probWin(diff: Int): Double = opponent.probOverbid + opponent.probWorseThan(diff) 65 | 66 | def gain(bid: Int, price: Int): Double = 67 | if (bid > price) 0 68 | else { 69 | val diff = price - bid 70 | val prob = probWin(diff) 71 | 72 | if (diff <= 250) 2 * price * prob 73 | else price * prob 74 | } 75 | 76 | def expectedGain(bid: Int): Double = 77 | playerSuite.pmf.map { case (price, prob) => prob * gain(bid, price.toInt) }.sum 78 | 79 | def optimalBid: (Double, Double) = 80 | (0.0 to maxPrice by step).map { bid => (expectedGain(bid.toInt), bid) }.max.swap 81 | } 82 | 83 | // --------- 84 | 85 | def getDataFromCsv(file: String) = Source 86 | .fromFile(file) 87 | .getLines() 88 | .filter { line => line.matches(".*(Bid|Showcase|Difference).*") } 89 | .map(_.split(",").drop(1).map(_.toInt)) 90 | 91 | val dataShowcase1 :: dataShowcase2 :: dataBids1 :: dataBids2 :: dataDiff1 :: dataDiff2 :: _ = 92 | getDataFromCsv("showcases.2011.csv") 93 | .zip(getDataFromCsv("showcases.2012.csv")) 94 | .map { case (row1, row2) => row1 ++ row2 } 95 | .toList 96 | 97 | val player1 = Player(dataShowcase1, dataBids1, dataDiff1) 98 | val player2 = Player(dataShowcase2, dataBids2, dataDiff2) 99 | 100 | // --------- 101 | 102 | println("Plotting the showcase price distributions...") 103 | val scChartTitle = "Prices of showcases 2011-2012" 104 | 105 | val scChart = player1.showcasePmf.showXY("Showcase 1", title = scChartTitle, xLabel = "Price ($)") 106 | player2.showcasePmf.plotXYOn(scChart, "Showcase 2") 107 | 108 | // --------- 109 | 110 | println("Plotting the distribution of the bid errors...") 111 | val diffChartTitle = "Difference between the players' bid and the actual price" 112 | 113 | val diffChart = player1.diffCdf.showXY("Player 1", title = diffChartTitle, xLabel = "Diff ($)") 114 | player2.diffCdf.plotXYOn(diffChart, "Player 2") 115 | 116 | // --------- 117 | 118 | println("Plotting prior and posterior distributions for player 1 based on a best guess of 20000$...") 119 | val guessChartTitle = "Distributions for player 1 based on a best guess of 20000$" 120 | 121 | val pricePrior = Price(player1) 122 | val guessChart = pricePrior.showXY("Prior", title = guessChartTitle, xLabel = "Price ($)") 123 | 124 | val pricePosterior = pricePrior.observed(20000) 125 | pricePosterior.plotXYOn(guessChart, "Posterior") 126 | 127 | // --------- 128 | 129 | println() 130 | println("Optimal bids when the best guess of player 1 is 20000$ and the best guess of player 2 is 40000$:") 131 | 132 | val (optimal1, gain1) = GainCalculator(player1, player2, 20000, 40000).optimalBid 133 | val (optimal2, gain2) = GainCalculator(player2, player1, 40000, 20000).optimalBid 134 | 135 | println("Player 1 bid: %.2f$, with expected gain of %.2f$".format(optimal1, gain1)) 136 | println("Player 2 bid: %.2f$, with expected gain of %.2f$".format(optimal2, gain2)) 137 | } 138 | -------------------------------------------------------------------------------- /src/test/scala/thinkbayes/examples/RedLineApp.scala: -------------------------------------------------------------------------------- 1 | package thinkbayes.examples 2 | 3 | import thinkbayes._ 4 | import thinkbayes.extensions.Distributions._ 5 | import thinkbayes.extensions.Plotting._ 6 | 7 | /** Application for studying the The Red Line problem (page 77): 8 | * 9 | * "In Massachusetts, the Red Line is a subway that connects Cambridge and Boston. When I was working in Cambridge I 10 | * took the Red Line from Kendall Square to South Station and caught the commuter rail to Needham. During rush hour Red 11 | * Line trains run every 7–8 minutes, on average. 12 | * 13 | * When I arrived at the station, I could estimate the time until the next train based on the number of passengers on 14 | * the platform. If there were only a few people, I inferred that I just missed a train and expected to wait about 7 15 | * minutes. If there were more passengers, I expected the train to arrive sooner. But if there were a large number of 16 | * passengers, I suspected that trains were not running on schedule, so I would go back to the street level and get a 17 | * taxi. 18 | * 19 | * While I was waiting for trains, I thought about how Bayesian estimation could help predict my wait time and decide 20 | * when I should give up and take a taxi. This chapter presents the analysis I came up with." 21 | */ 22 | object RedLineApp extends App { 23 | 24 | val observedGapTimes = List(428.0, 705.0, 407.0, 465.0, 433.0, 425.0, 204.0, 506.0, 143.0, 351.0, 450.0, 598.0, 464.0, 25 | 749.0, 341.0, 586.0, 754.0, 256.0, 378.0, 435.0, 176.0, 405.0, 360.0, 519.0, 648.0, 374.0, 483.0, 537.0, 578.0, 26 | 534.0, 577.0, 619.0, 538.0, 331.0, 186.0, 629.0, 193.0, 360.0, 660.0, 484.0, 512.0, 315.0, 457.0, 404.0, 740.0, 27 | 388.0, 357.0, 485.0, 567.0, 160.0, 428.0, 387.0, 901.0, 187.0, 622.0, 616.0, 585.0, 474.0, 442.0, 499.0, 437.0, 28 | 620.0, 351.0, 286.0, 373.0, 232.0, 393.0, 745.0, 636.0, 758.0).map(_ / 60.0) 29 | 30 | val prec = 10.0 / 60.0 31 | 32 | def biasPmf(zPmf: Pmf[Double]): Pmf[Double] = 33 | zPmf.map { case (k, prob) => (k, prob * k) }.normalized 34 | 35 | def waitTimePmf(zbPmf: Pmf[Double]): Pmf[Double] = 36 | zbPmf.mapKeys { k => Pmf(0.0 to k by prec) }.mixture 37 | 38 | case class WaitTimeCalculator(zPmf: Pmf[Double]) { 39 | val zbPmf = biasPmf(zPmf) 40 | val xPmf = waitTimePmf(zbPmf) 41 | val yPmf = xPmf 42 | } 43 | 44 | case class ElapsedTime(pmf: Pmf[Double]) extends SimpleSuite[Double, (Double, Int)] { 45 | def likelihood(data: (Double, Int), x: Double) = poissonPmf(data._1 * x).prob(data._2) 46 | } 47 | 48 | case class ElapsedTimeEstimator(calc: WaitTimeCalculator, lam: Double = 2.0, numPasengers: Int = 15) { 49 | 50 | def predictWaitTime(xPmf: Pmf[Double]): Pmf[Double] = 51 | (calc.zbPmf -- xPmf).filterKeys(_ >= 0.0).normalized 52 | 53 | val xPriorSuite = ElapsedTime(calc.xPmf) 54 | val xPostSuite = xPriorSuite.observed((lam, numPasengers)) 55 | val yPmf = predictWaitTime(xPostSuite.pmf) 56 | } 57 | 58 | val observedArrivalRates = List((17, 4.6, 9), (22, 1.0, 0), (23, 1.4, 4), (18, 5.4, 12), (4, 5.8, 11)) 59 | 60 | case class ArrivalRate(hypos: Seq[Double]) extends SimpleSuite[Double, (Double, Int)] { 61 | val pmf = Pmf(hypos) 62 | def likelihood(data: (Double, Int), lam: Double) = poissonPmf(lam * data._1).prob(data._2) 63 | } 64 | 65 | case class ArrivalRateEstimator(data: Seq[(Int, Double, Int)]) { 66 | 67 | val lamPriorSuite = ArrivalRate(0.0 to 5.0 by prec) 68 | val lamPostSuite = lamPriorSuite.observedSet(data.map { case (k1, y, k2) => (y, k2) }) 69 | } 70 | 71 | // --------- 72 | 73 | println("Plotting the distribution of gap time between trains...") 74 | val gapChartTitle = "Time between trains" 75 | 76 | val zPmf = estimatePdf(observedGapTimes).toPmf(0.0 to 20.0 by prec / 2) 77 | val calc = WaitTimeCalculator(zPmf) 78 | 79 | val gapChart = zPmf.showXY("Actual (z)", title = gapChartTitle, xLabel = "Minutes") 80 | calc.zbPmf.plotXYOn(gapChart, "As seen by passengers (zb)") 81 | 82 | println("Plotting the CDF of gap and wait times...") 83 | val cdfTimesChartTitle = "CDF of gap and wait times" 84 | 85 | val cdfTimesChart = zPmf.toCdf.showXY("Actual gap time (z)", title = cdfTimesChartTitle, xLabel = "Minutes") 86 | 87 | calc.zbPmf.toCdf.plotXYOn(cdfTimesChart, "Biased gap time (zb)") 88 | calc.xPmf.toCdf.plotXYOn(cdfTimesChart, "Wait time (y)") 89 | 90 | println( 91 | "Plotting the CDF of wait times after seeing 15 passengers and considering 2 arrivals " + 92 | "per minute..." 93 | ) 94 | val postWaitChartTitle = "CDF of wait times after seeing 15 passengers, 2 arrivals/min" 95 | 96 | val ete = ElapsedTimeEstimator(calc) 97 | 98 | val postWaitChart = 99 | ete.xPriorSuite.pmf.toCdf.showXY("Prior x", title = postWaitChartTitle, xLabel = "Wait time (min)") 100 | 101 | ete.xPostSuite.pmf.toCdf.plotXYOn(postWaitChart, "Posterior x") 102 | ete.yPmf.toCdf.plotXYOn(postWaitChart, "Predicted y") 103 | 104 | // --------- 105 | 106 | println("Plotting the distribution of the arrival rate after five days of passenger data...") 107 | val arrivalRatesChartTitle = "CDF of arrival rates after five days of passenger data" 108 | 109 | val are = ArrivalRateEstimator(observedArrivalRates) 110 | 111 | val arrivalRatesChart = are.lamPriorSuite.pmf.toCdf 112 | .showXY("Prior λ", title = arrivalRatesChartTitle, xLabel = "Arrival rate (passengers / min)") 113 | 114 | are.lamPostSuite.pmf.toCdf.plotXYOn(arrivalRatesChart, "Posterior λ") 115 | 116 | // --------- 117 | 118 | println("Plotting the predictive distribution of y....") 119 | val predWaitChartTitle = "CDF of wait times considering the distribution of λ" 120 | 121 | val yPredPmf = are.lamPostSuite.pmf.mapKeys(ElapsedTimeEstimator(calc, _).yPmf).mixture 122 | 123 | val predWaitChart = yPredPmf.toCdf.showXY("Mix", title = predWaitChartTitle, xLabel = "Wait time (min)") 124 | } 125 | -------------------------------------------------------------------------------- /src/test/scala/thinkbayes/extensions/DistributionsSpec.scala: -------------------------------------------------------------------------------- 1 | package thinkbayes.extensions 2 | 3 | import org.specs2.mutable.Specification 4 | import thinkbayes._ 5 | import thinkbayes.extensions.Distributions._ 6 | 7 | class DistributionsSpec extends Specification with PmfMatchers { 8 | 9 | "The Distributions extension" should { 10 | 11 | "provide a way to create Pdfs for normal distributions" in { 12 | todo 13 | } 14 | 15 | "provide a way to create approximate Pmfs for normal distributions" in { 16 | forall(-5.0 to 5.0 by 1.0) { mean => 17 | forall(0.5 to 7.0 by 0.5) { sd => 18 | val pmf = normalPmf(mean, sd, numSigmas = 6).normalized 19 | pmf.mean must beRelativelyCloseTo(mean) 20 | pmf.variance must beRelativelyCloseTo(sd * sd) 21 | pmf.toCdf.value(0.5) must beRelativelyCloseTo(mean) 22 | } 23 | } 24 | } 25 | 26 | "provide a way to create Pmfs for Poisson distributions" in { 27 | def poissonFunc(lam: Double, k: Int) = math.pow(lam, k) * math.exp(-lam) / (1 to k).product 28 | 29 | poissonPmf(1.5).filterKeys(_ <= 5) must beCloseTo((0 to 5).map { k => (k, poissonFunc(1.5, k)) }.toMap.toPmf) 30 | foreach(Seq(0.1, 1.0, 3.9)) { lam => poissonPmf(lam).mean must beCloseTo(lam, 0.001) } 31 | } 32 | 33 | "provide a way to create Pdfs for exponential distributions" in { 34 | todo 35 | } 36 | 37 | "provide a way to create approximate Pmfs for exponential distributions" in { 38 | todo 39 | } 40 | 41 | "provide a way to create Pmfs for binomial distributions" in { 42 | binomialPmf(2, 0.6) must beCloseTo(Pmf(0 -> 0.16, 1 -> 0.48, 2 -> 0.36)) 43 | binomialPmf(1, 0.6) must beCloseTo(Pmf(0 -> 0.4, 1 -> 0.6)) 44 | binomialPmf(0, 0.6) must beCloseTo(Pmf(0 -> 1.0)) 45 | 46 | binomialPmf(100, 1.0) must beCloseTo(Pmf(100 -> 1.0)) 47 | binomialPmf(100, 0.0) must beCloseTo(Pmf(0 -> 1.0)) 48 | } 49 | 50 | "provide a way to create Pmfs for hypergeometric distributions" in { 51 | hypergeometricPmf(10, 2, 9) must beCloseTo(Pmf(1 -> 0.2, 2 -> 0.8)) 52 | hypergeometricPmf(10, 2, 2) must beCloseTo(Pmf(0 -> 28.0 / 45, 1 -> 16.0 / 45, 2 -> 1.0 / 45)) 53 | hypergeometricPmf(10, 2, 1) must beCloseTo(Pmf(0 -> 0.8, 1 -> 0.2)) 54 | hypergeometricPmf(10, 2, 0) must beCloseTo(Pmf(0 -> 1.0)) 55 | 56 | hypergeometricPmf(100, 100, 100) must beCloseTo(Pmf(100 -> 1.0)) 57 | hypergeometricPmf(100, 100, 50) must beCloseTo(Pmf(50 -> 1.0)) 58 | hypergeometricPmf(100, 100, 49) must beCloseTo(Pmf(49 -> 1.0)) 59 | hypergeometricPmf(100, 100, 0) must beCloseTo(Pmf(0 -> 1.0)) 60 | hypergeometricPmf(100, 50, 100) must beCloseTo(Pmf(50 -> 1.0)) 61 | hypergeometricPmf(100, 0, 50) must beCloseTo(Pmf(0 -> 1.0)) 62 | hypergeometricPmf(100, 0, 1) must beCloseTo(Pmf(0 -> 1.0)) 63 | hypergeometricPmf(100, 0, 0) must beCloseTo(Pmf(0 -> 1.0)) 64 | hypergeometricPmf(0, 0, 0) must beCloseTo(Pmf(0 -> 1.0)) 65 | } 66 | 67 | "provide a way to create bounded Pdfs for beta distributions" in { 68 | val epsilon = 0.00001 69 | 70 | betaPdf(3.5, 4.7).lowerBound === 0.0 71 | betaPdf(6.8, 10.1).upperBound === 1.0 72 | forall(0.0 to 1.0 by 0.1) { p => betaPdf(1.0, 1.0).density(p) === 1.0 } 73 | forall(0.0 to 1.0 by 0.1) { p => betaPdf(2.0, 1.0).density(p) must beCloseTo(2 * p, epsilon) } 74 | forall(0.0 to 1.0 by 0.1) { p => betaPdf(1.0, 2.0).density(p) must beCloseTo(2 * (1 - p), epsilon) } 75 | betaPdf(9.4, 2.3).density(0.45) must beCloseTo(172.285 * math.pow(0.45, 8.4) * math.pow(0.55, 1.3), epsilon) 76 | betaPdf(9.4, 2.3).density(0.78) must beCloseTo(172.285 * math.pow(0.78, 8.4) * math.pow(0.22, 1.3), epsilon) 77 | } 78 | 79 | "provide a way to create Pmfs for beta-binomial distributions" in { 80 | val epsilon = 0.00001 81 | 82 | forall(0 to 100 by 10) { t => 83 | betaBinomialPmf(t, 4.56, 7.54).keySet === (0 to t).toSet 84 | betaBinomialPmf(t, 1.0, 1.0).prob(t) must beCloseTo(1.0 / (t + 1), epsilon) 85 | } 86 | 87 | forall(1.0 to 5.0 by 0.5) { a => 88 | forall(1.0 to 5.0 by 0.5) { b => betaBinomialPmf(1, a, b).prob(0) must beCloseTo(b / (a + b), epsilon) } 89 | } 90 | 91 | betaBinomialPmf(18, 16.9, 9.4).prob(13) must beCloseTo(0.13846457651739186, epsilon) 92 | betaBinomialPmf(48, 12.8, 23.7).prob(29) must beCloseTo(0.004926859420972929, epsilon) 93 | betaBinomialPmf(48, 12.8, 23.7).prob(17) must beCloseTo(0.07817877873188939, epsilon) 94 | betaBinomialPmf(8, 27.6, 18.1).prob(3) must beCloseTo(0.12456374434810324, epsilon) 95 | } 96 | 97 | "provide a way to estimate a Pdf from a sequence of samples" in { 98 | todo 99 | } 100 | } 101 | } 102 | -------------------------------------------------------------------------------- /src/test/scala/thinkbayes/extensions/SamplingSpec.scala: -------------------------------------------------------------------------------- 1 | package thinkbayes.extensions 2 | 3 | import org.specs2.mutable.Specification 4 | import scala.util.Random 5 | import thinkbayes._ 6 | import thinkbayes.extensions.Distributions._ 7 | import thinkbayes.extensions.Sampling._ 8 | import thinkbayes.extensions.Stats._ 9 | 10 | class SamplingSpec extends Specification { 11 | 12 | "A PmfSampling" should { 13 | 14 | "allow taking random samples from it" in { 15 | val nValues = 20 16 | val pmf = Pmf((0 until nValues).map(_ -> Random.nextDouble()).toMap).normalized 17 | val nSamples = 10000 18 | val samplePmf = Pmf(pmf.samplesIterator.take(nSamples)) 19 | 20 | (0 until nValues).forall { value => 21 | val p = pmf.prob(value) 22 | val (s, l) = binomialPmf(nSamples, p).credibleInterval(1.0 - 1.0 / 1e9) 23 | samplePmf.prob(value) must beBetween(s / nSamples.toDouble, l / nSamples.toDouble) 24 | } 25 | } 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /src/test/scala/thinkbayes/extensions/StatsSpec.scala: -------------------------------------------------------------------------------- 1 | package thinkbayes.extensions 2 | 3 | import org.specs2.mutable.Specification 4 | import thinkbayes._ 5 | import thinkbayes.extensions.Stats._ 6 | 7 | class StatsSpec extends Specification { 8 | 9 | "The Stats extension" should { 10 | val pmf1 = Pmf(List(15, 20, 35, 40, 50)) 11 | val pmf2 = Pmf(List(3, 6, 7, 8, 8, 10, 13, 15, 16, 20)) 12 | val pmf3 = Pmf('a' -> 0.2, 'b' -> 0.2, 'c' -> 0.6) 13 | 14 | "allow calculating quantiles of a Pmf" in { 15 | List(0.3, 0.4, 0.5, 1.0).map(pmf1.quantile) === Seq(20, 20, 35, 50) 16 | (0.0 to 1.0 by 0.25).map(pmf2.quantile) === Seq(3, 7, 8, 15, 20) 17 | 18 | pmf3.quantile(0.1) === 'a' 19 | pmf3.quantile(0.2) === 'a' 20 | pmf3.quantile(0.21) === 'b' 21 | pmf3.quantile(0.4) === 'b' 22 | pmf3.quantile(0.41) === 'c' 23 | } 24 | 25 | "allow calculating credible intervals of a Pmf" in { 26 | todo 27 | } 28 | 29 | "allow calculating quantiles of a Cdf" in { 30 | List(0.3, 0.4, 0.5, 1.0).map(pmf1.toCdf.quantile) === Seq(20, 20, 35, 50) 31 | (0.0 to 1.0 by 0.25).map(pmf2.toCdf.quantile) === Seq(3, 7, 8, 15, 20) 32 | 33 | val cdf3 = pmf3.toCdf 34 | cdf3.quantile(0.1) === 'a' 35 | cdf3.quantile(0.2) === 'a' 36 | cdf3.quantile(0.21) === 'b' 37 | cdf3.quantile(0.4) === 'b' 38 | cdf3.quantile(0.41) === 'c' 39 | } 40 | 41 | "allow calculating credible intervals of a Cdf" in { 42 | todo 43 | } 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /src/test/scala/thinkbayes/extensions/distributions/BetaBinomialPmfSpec.scala: -------------------------------------------------------------------------------- 1 | package thinkbayes.extensions.distributions 2 | 3 | import org.specs2.mutable.Specification 4 | import thinkbayes.PmfMatchers 5 | 6 | import scala.util.Random 7 | 8 | class BetaBinomialPmfSpec extends Specification with PmfMatchers { 9 | 10 | "A BetaBinomialPmf" should { 11 | 12 | def randomBetaBinomialPmf(n: Int) = new BetaBinomialPmf(n, Random.nextDouble() * 20, Random.nextDouble() * 20) 13 | 14 | "define an iterator having all possible outcomes" in { 15 | foreach(0 to 100) { n => 16 | randomBetaBinomialPmf(n).iterator.length === n + 1 17 | } 18 | } 19 | 20 | "define the mean as a constant-time closed form expression" in { 21 | val table = Map( 22 | (1234, 4.5, 3.5) -> 694.125, 23 | (31234, 1.0, 1.0) -> 15617.0, 24 | (31234, 0.4, 1.0) -> 8924.0, 25 | (31234, 1.0, 0.5) -> 20822.666666667, 26 | (31234, 1.4, 1.0) -> 18219.833333333, 27 | (31234, 1.0, 1.5) -> 12493.6, 28 | (31234, 0.4, 0.5) -> 13881.777777778, 29 | (31234, 0.4, 1.5) -> 6575.57894733406, 30 | (31234, 1.4, 0.5) -> 23014.526315792507, 31 | (31234, 1.4, 1.5) -> 15078.482758214723, 32 | (31234, 47.5, 92.5) -> 10597.3, 33 | (312349, 6.2, 52.5) -> 32990.9, 34 | (312364973, 10483.2, 24681.3) -> 9.312188385882352e7 35 | ) 36 | 37 | foreach(0 to 100) { n => 38 | val pmf = randomBetaBinomialPmf(n) 39 | pmf.mean must beRelativelyCloseTo(pmf.toCategoricalPmf.mean) 40 | } 41 | 42 | foreach(table) { case ((n, a, b), res) => 43 | new BetaBinomialPmf(n, a, b).mean must beRelativelyCloseTo(res) 44 | } 45 | } 46 | 47 | "define the variance as a constant-time closed form expression" in { 48 | val table = Map( 49 | (1234, 4.5, 3.5) -> 41907.8, 50 | (31234, 1.0, 1.0) -> 8.130210200333697e7, 51 | (31234, 0.4, 1.0) -> 8.29597350004374e7, 52 | (31234, 1.0, 0.5) -> 8.67208539532947e7, 53 | (31234, 1.4, 1.0) -> 6.974534336258161e7, 54 | (31234, 1.0, 1.5) -> 6.690108623889974e7, 55 | (31234, 0.4, 0.5) -> 1.2678243879877478e8, 56 | (31234, 0.4, 1.5) -> 5.591491645461144e7, 57 | (31234, 1.4, 0.5) -> 6.5234069195641756e7, 58 | (31234, 1.4, 1.5) -> 6.2467514339533895e7, 59 | (31234, 47.5, 92.5) -> 1.5579648550531915e6, 60 | (312349, 6.2, 52.5) -> 1.5440533738189435e8, 61 | (312364973, 10483.2, 24681.3) -> 5.806437517739379e11 62 | ) 63 | 64 | foreach(0 to 100) { n => 65 | val pmf = randomBetaBinomialPmf(n) 66 | pmf.variance must beRelativelyCloseTo(pmf.toCategoricalPmf.variance) 67 | } 68 | 69 | foreach(table) { case ((n, a, b), res) => 70 | new BetaBinomialPmf(n, a, b).variance must beRelativelyCloseTo(res) 71 | } 72 | } 73 | 74 | "define the mode as a constant-time closed form expression" in { 75 | val table = Map( 76 | (1234, 4.5, 3.5) -> 720, 77 | (31234, 1.0, 1.0) -> 0, 78 | (31234, 0.4, 1.0) -> 0, 79 | (31234, 1.0, 0.5) -> 31234, 80 | (31234, 1.4, 1.0) -> 31234, 81 | (31234, 1.0, 1.5) -> 0, 82 | (31234, 0.4, 0.5) -> 0, 83 | (31234, 0.4, 1.5) -> 0, 84 | (31234, 1.4, 0.5) -> 31234, 85 | (31234, 1.4, 1.5) -> 13882, 86 | (31234, 47.5, 92.5) -> 10524, 87 | (312349, 6.2, 52.5) -> 28645, 88 | (312364973, 10483.2, 24681.3) -> 93118643 89 | ) 90 | 91 | foreach(0 to 100) { n => 92 | val pmf = randomBetaBinomialPmf(n) 93 | pmf.variance must beRelativelyCloseTo(pmf.toCategoricalPmf.variance) 94 | } 95 | 96 | foreach(table) { case ((n, a, b), res) => 97 | new BetaBinomialPmf(n, a, b).mode.toDouble must beRelativelyCloseTo(res) 98 | } 99 | } 100 | } 101 | } 102 | --------------------------------------------------------------------------------