├── examples ├── C4-PCA │ ├── test.dat │ ├── src │ │ ├── test │ │ │ └── scala │ │ │ │ └── app-template-test.scala │ │ └── main │ │ │ └── scala │ │ │ └── pca.scala │ ├── project │ │ └── build.properties │ ├── .gitignore │ ├── Readme.md │ └── build.sbt ├── C4-GammaTest │ ├── src │ │ ├── test │ │ │ └── scala │ │ │ │ └── app-template-test.scala │ │ └── main │ │ │ └── scala │ │ │ └── gamma-test.scala │ ├── project │ │ └── build.properties │ ├── .gitignore │ ├── Readme.md │ └── build.sbt ├── C5-Metropolis │ ├── src │ │ ├── test │ │ │ └── scala │ │ │ │ └── app-template-test.scala │ │ └── main │ │ │ └── scala │ │ │ └── metropolis.scala │ ├── project │ │ └── build.properties │ ├── Readme.md │ ├── .gitignore │ └── build.sbt ├── C5-MonteCarlo │ ├── src │ │ ├── test │ │ │ └── scala │ │ │ │ └── app-template-test.scala │ │ └── main │ │ │ └── scala │ │ │ └── monte-carlo.scala │ ├── project │ │ └── build.properties │ ├── Readme.md │ ├── .gitignore │ └── build.sbt ├── C6-Regression │ ├── src │ │ ├── test │ │ │ └── scala │ │ │ │ └── app-template-test.scala │ │ └── main │ │ │ └── scala │ │ │ └── regression.scala │ ├── project │ │ └── build.properties │ ├── Readme.md │ └── build.sbt ├── C8-SparkJob │ ├── src │ │ ├── test │ │ │ └── scala │ │ │ │ └── app-template-test.scala │ │ └── main │ │ │ └── scala │ │ │ └── spark.scala │ ├── project │ │ └── build.properties │ ├── Readme.md │ └── build.sbt ├── C6-Rainier │ ├── project │ │ ├── build.properties │ │ └── plugins.sbt │ ├── target │ │ └── mdoc │ │ │ ├── b0.png │ │ │ └── b1.png │ ├── .gitignore │ ├── src │ │ ├── test │ │ │ └── scala │ │ │ │ └── rainier-test.scala │ │ └── main │ │ │ └── scala │ │ │ └── rainier.scala │ ├── Readme.md │ ├── build.sbt │ └── docs │ │ └── LogisticRegression.md ├── C6-Smile │ ├── project │ │ ├── build.properties │ │ └── plugins.sbt │ ├── src │ │ ├── test │ │ │ └── scala │ │ │ │ └── smile-test.scala │ │ └── main │ │ │ └── scala │ │ │ └── smile.scala │ ├── .gitignore │ ├── build.sbt │ ├── Readme.md │ ├── docs │ │ └── smile-example.md │ └── target │ │ └── mdoc │ │ └── smile-example.md ├── C7-EvilPlot │ ├── project │ │ └── build.properties │ ├── Readme.md │ ├── build.sbt │ └── src │ │ └── main │ │ └── scala │ │ └── evilplot-examples.scala ├── C7-MetropAssembly │ ├── src │ │ ├── test │ │ │ └── scala │ │ │ │ └── app-template-test.scala │ │ └── main │ │ │ └── scala │ │ │ └── metropolis.scala │ ├── project │ │ ├── build.properties │ │ └── plugins.sbt │ ├── rscala.R │ ├── build.sbt │ └── Readme.md ├── C7-Vegas │ ├── project │ │ └── build.properties │ ├── Readme.md │ ├── src │ │ ├── test │ │ │ └── scala │ │ │ │ └── C7-Vegas-test.scala │ │ └── main │ │ │ └── scala │ │ │ └── C7-Vegas.scala │ ├── .gitignore │ └── build.sbt ├── C6-ScalaGlm │ ├── project │ │ └── build.properties │ ├── Readme.md │ ├── src │ │ ├── main │ │ │ └── scala │ │ │ │ └── scala-glm-example.scala │ │ └── test │ │ │ └── scala │ │ │ └── scala-glm-example-test.scala │ ├── .gitignore │ └── build.sbt ├── C9-ScalablePF │ ├── project │ │ └── build.properties │ ├── README.md │ ├── .gitignore │ ├── build.sbt │ └── src │ │ ├── main │ │ └── scala │ │ │ └── pfilter │ │ │ └── pfilter.scala │ │ └── test │ │ └── scala │ │ └── pfilter-test.scala ├── C6-DataFrames │ ├── smiledf │ │ ├── project │ │ │ └── build.properties │ │ ├── build.sbt │ │ └── src │ │ │ └── main │ │ │ └── scala │ │ │ └── smile-df.scala │ ├── r │ │ ├── df.R │ │ └── gen-csv.R │ ├── sparkdf │ │ └── spark.scala │ ├── framian │ │ ├── build.sbt │ │ └── framian.scala │ ├── datatable │ │ ├── build.sbt │ │ └── datatable.scala │ ├── saddle │ │ ├── CsvDf.scala │ │ └── build.sbt │ └── README.md ├── C1-HelloWorld │ ├── HelloWorld.scala │ └── Readme.md ├── C3-Pi │ ├── Readme.md │ └── pi.scala ├── Readme.md └── C2-LogFactorial │ ├── Readme.md │ └── log-fact.scala ├── app-template ├── src │ ├── test │ │ └── scala │ │ │ └── app-template-test.scala │ └── main │ │ └── scala │ │ └── app-template │ │ └── app-template.scala ├── project │ └── build.properties ├── build.sbt └── .gitignore ├── exercises ├── option │ ├── project │ │ └── build.properties │ ├── src │ │ ├── main │ │ │ └── scala │ │ │ │ └── option.scala │ │ └── test │ │ │ └── scala │ │ │ └── option-test.scala │ ├── build.sbt │ └── Readme.md ├── bisection │ ├── project │ │ └── build.properties │ ├── src │ │ ├── main │ │ │ └── scala │ │ │ │ └── bisect.scala │ │ └── test │ │ │ └── scala │ │ │ └── bisect-test.scala │ ├── build.sbt │ └── Readme.md ├── Readme.md ├── Basics.md ├── Spark.md ├── Collections.md ├── Monte.md ├── Breeze.md ├── Intro.md ├── Stats.md ├── Tools.md └── Advanced.md ├── sbt-test ├── project │ └── build.properties ├── src │ ├── main │ │ └── scala │ │ │ └── sbt-test.scala │ └── test │ │ └── scala │ │ ├── sbt-test-flatspec.scala │ │ ├── sbt-test-test.scala │ │ └── sbt-test-scalacheck.scala ├── .gitignore ├── build.sbt └── Readme.md ├── scscala.pdf ├── sbt ├── sbt-0.13.13.zip └── Readme.md ├── fragments ├── intro.scala ├── Readme.md ├── advanced.scala ├── tools.scala ├── basics.scala └── monte.scala ├── IntelliJ.md ├── SelfStudyGuide.md ├── StartHere.md ├── UsefulLinks.md ├── ScalaIDE.md ├── Ensime.md ├── README.md └── Setup.md /examples/C4-PCA/test.dat: -------------------------------------------------------------------------------- 1 | 1.0,2.0 2 | 3.0,4.0 3 | -------------------------------------------------------------------------------- /app-template/src/test/scala/app-template-test.scala: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/C4-PCA/src/test/scala/app-template-test.scala: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /app-template/project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version=1.3.3 2 | -------------------------------------------------------------------------------- /app-template/src/main/scala/app-template/app-template.scala: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/C4-GammaTest/src/test/scala/app-template-test.scala: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/C4-PCA/project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version=1.3.3 2 | -------------------------------------------------------------------------------- /examples/C5-Metropolis/src/test/scala/app-template-test.scala: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/C5-MonteCarlo/src/test/scala/app-template-test.scala: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/C6-Regression/src/test/scala/app-template-test.scala: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/C8-SparkJob/src/test/scala/app-template-test.scala: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /exercises/option/project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version=1.3.3 2 | -------------------------------------------------------------------------------- /sbt-test/project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version=1.3.3 2 | 3 | -------------------------------------------------------------------------------- /examples/C4-GammaTest/project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version=1.3.3 2 | -------------------------------------------------------------------------------- /examples/C6-Rainier/project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version=1.3.2 2 | -------------------------------------------------------------------------------- /examples/C6-Smile/project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version=1.3.3 2 | -------------------------------------------------------------------------------- /examples/C7-EvilPlot/project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version=1.3.3 2 | -------------------------------------------------------------------------------- /examples/C7-MetropAssembly/src/test/scala/app-template-test.scala: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/C7-Vegas/project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version=1.3.3 2 | -------------------------------------------------------------------------------- /examples/C8-SparkJob/project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version=1.3.3 2 | -------------------------------------------------------------------------------- /exercises/bisection/project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version=1.3.3 2 | -------------------------------------------------------------------------------- /examples/C5-Metropolis/project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version=1.3.3 2 | -------------------------------------------------------------------------------- /examples/C5-MonteCarlo/project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version=1.3.3 2 | -------------------------------------------------------------------------------- /examples/C6-Regression/project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version=1.3.3 2 | -------------------------------------------------------------------------------- /examples/C6-ScalaGlm/project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version=0.13.13 2 | -------------------------------------------------------------------------------- /examples/C7-MetropAssembly/project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version=1.3.3 2 | -------------------------------------------------------------------------------- /examples/C9-ScalablePF/project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version=1.3.3 2 | -------------------------------------------------------------------------------- /examples/C6-DataFrames/smiledf/project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version=1.3.3 2 | -------------------------------------------------------------------------------- /scscala.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darrenjw/scala-course/HEAD/scscala.pdf -------------------------------------------------------------------------------- /sbt/sbt-0.13.13.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darrenjw/scala-course/HEAD/sbt/sbt-0.13.13.zip -------------------------------------------------------------------------------- /examples/C6-Rainier/project/plugins.sbt: -------------------------------------------------------------------------------- 1 | addSbtPlugin("org.scalameta" % "sbt-mdoc" % "1.3.6") 2 | 3 | -------------------------------------------------------------------------------- /examples/C6-Smile/project/plugins.sbt: -------------------------------------------------------------------------------- 1 | addSbtPlugin("org.scalameta" % "sbt-mdoc" % "1.3.6") 2 | 3 | -------------------------------------------------------------------------------- /examples/C7-MetropAssembly/project/plugins.sbt: -------------------------------------------------------------------------------- 1 | addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.14.10") 2 | 3 | -------------------------------------------------------------------------------- /examples/C6-Rainier/target/mdoc/b0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darrenjw/scala-course/HEAD/examples/C6-Rainier/target/mdoc/b0.png -------------------------------------------------------------------------------- /examples/C6-Rainier/target/mdoc/b1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darrenjw/scala-course/HEAD/examples/C6-Rainier/target/mdoc/b1.png -------------------------------------------------------------------------------- /examples/C6-Smile/src/test/scala/smile-test.scala: -------------------------------------------------------------------------------- 1 | import org.scalatest.FlatSpec 2 | 3 | class SetSpec extends FlatSpec { 4 | 5 | 6 | 7 | } 8 | 9 | -------------------------------------------------------------------------------- /examples/C7-EvilPlot/Readme.md: -------------------------------------------------------------------------------- 1 | # EvilPlot examples 2 | 3 | Some EvilPlot examples, largely taken from the documentation on the [EvilPlot website](https://cibotech.github.io/evilplot/). Should just `sbt run`. 4 | 5 | -------------------------------------------------------------------------------- /fragments/intro.scala: -------------------------------------------------------------------------------- 1 | 2 | /* 3 | multi-line 4 | comment 5 | */ 6 | 7 | object MyApp { 8 | 9 | def main(args: Array[String]): Unit = 10 | println("Hello, world!") 11 | 12 | } // single line comment 13 | 14 | -------------------------------------------------------------------------------- /examples/C1-HelloWorld/HelloWorld.scala: -------------------------------------------------------------------------------- 1 | /* 2 | multi-line 3 | comment 4 | */ 5 | 6 | object MyApp { 7 | 8 | def main(args: Array[String]): Unit = 9 | println("Hello, world!") 10 | 11 | } // single line comment 12 | 13 | -------------------------------------------------------------------------------- /examples/C5-Metropolis/Readme.md: -------------------------------------------------------------------------------- 1 | # Metropolis MCMC sampler 2 | 3 | Illustration of a simple MCMC algorithm coded in Scala, in several different ways. Also illustrating more or less functional ways of handling (infinite) data streams in Scala. 4 | -------------------------------------------------------------------------------- /exercises/bisection/src/main/scala/bisect.scala: -------------------------------------------------------------------------------- 1 | /* 2 | bisect.scala 3 | 4 | */ 5 | 6 | object Bisect { 7 | 8 | 9 | def findRoot(low: Double, high: Double)(f: Double => Double): Double = ??? 10 | 11 | 12 | } 13 | 14 | /* eof */ 15 | 16 | -------------------------------------------------------------------------------- /examples/C1-HelloWorld/Readme.md: -------------------------------------------------------------------------------- 1 | # Hello world 2 | 3 | Just a single scala file containing a runnable main method with no library dependencies. This will just `sbt run` without arranging it in a proper project directory structure, but Scala isn't often used this way. 4 | 5 | -------------------------------------------------------------------------------- /examples/C7-MetropAssembly/rscala.R: -------------------------------------------------------------------------------- 1 | ## rscala.R 2 | 3 | library(rscala) 4 | 5 | sc = scala( 6 | "target/scala-2.12/metropolis-assembly-assembly-0.1.jar" 7 | ) 8 | 9 | met = sc * 'Metropolis.chain.take(10000).toArray' 10 | 11 | library(smfsb) 12 | mcmcSummary(matrix(met,ncol=1)) 13 | 14 | ## eof 15 | 16 | -------------------------------------------------------------------------------- /examples/C9-ScalablePF/README.md: -------------------------------------------------------------------------------- 1 | # A scalable particle filter in Scala 2 | 3 | Code examples for the blog post: 4 | 5 | https://darrenjw.wordpress.com/2016/07/22/a-scalable-particle-filter-in-scala/ 6 | 7 | Just `sbt run` it. Implements a parallel particle filter. 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /examples/C7-Vegas/Readme.md: -------------------------------------------------------------------------------- 1 | # Vegas 2 | 3 | This is supposed to be an example for [Vegas](https://github.com/vegas-viz/Vegas), the Scala wrapper for [vega-lite](https://vega.github.io/vega-lite/). However, it no longer runs for me, as the latest version is very old, and seems to be incompatible with recent versions of OpenJFX. 4 | 5 | -------------------------------------------------------------------------------- /examples/C6-ScalaGlm/Readme.md: -------------------------------------------------------------------------------- 1 | # Scala GLM 2 | 3 | Very simple example illustrating use of my `scala-glm` library. As usual, it can be run with `sbt run`. 4 | 5 | This is essentially just the giter8 template for the library. You can create a new template project for yourself with: 6 | 7 | ```bash 8 | sbt new darrenjw/scala-glm.g8 9 | ``` 10 | -------------------------------------------------------------------------------- /examples/C6-DataFrames/r/df.R: -------------------------------------------------------------------------------- 1 | # df.R 2 | # Example of processing a CSV-derived data frame using R 3 | 4 | df=read.csv("cars93.csv") 5 | print(dim(df)) 6 | 7 | df=df[df$EngineSize<=4.0,] 8 | print(dim(df)) 9 | 10 | df$WeightKG=df$Weight*0.453592 11 | print(dim(df)) 12 | 13 | write.csv(df,"cars93m.csv",row.names=FALSE) 14 | 15 | # eof 16 | 17 | -------------------------------------------------------------------------------- /examples/C3-Pi/Readme.md: -------------------------------------------------------------------------------- 1 | # Parallel Monte Carlo estimation of Pi 2 | 3 | Again, just a single-file application, with no dependencies. Just to illustrate how easy it is to do parallel computing with Scala. 4 | 5 | Note however, that parallel collections are slightly different in Scala 2.13, so you need to make sure you run this under Scala 2.12 (or 2.11). 6 | 7 | 8 | -------------------------------------------------------------------------------- /examples/C7-Vegas/src/test/scala/C7-Vegas-test.scala: -------------------------------------------------------------------------------- 1 | import org.scalatest.FlatSpec 2 | 3 | class SetSpec extends FlatSpec { 4 | 5 | "A Poisson(10.0)" should "have mean 10.0" in { 6 | import breeze.stats.distributions.Poisson 7 | val p = Poisson(10.0) 8 | val m = p.mean 9 | assert(math.abs(m - 10.0) < 0.000001) 10 | } 11 | 12 | } 13 | 14 | -------------------------------------------------------------------------------- /exercises/option/src/main/scala/option.scala: -------------------------------------------------------------------------------- 1 | /* 2 | option.scala 3 | 4 | */ 5 | 6 | object OptionBisect { 7 | 8 | // Part A 9 | 10 | def findRootOpt(low: Double, high: Double)(f: Double => Double): Option[Double] = ??? 11 | 12 | 13 | // Part B 14 | 15 | def solveQuad(a: Double): Option[Double] = ??? 16 | 17 | 18 | } 19 | 20 | /* eof */ 21 | 22 | -------------------------------------------------------------------------------- /examples/C6-ScalaGlm/src/main/scala/scala-glm-example.scala: -------------------------------------------------------------------------------- 1 | /* 2 | Example scala-glm code 3 | */ 4 | 5 | object ScalaGlmApp { 6 | 7 | import scalaglm.Pca 8 | import breeze.linalg._ 9 | 10 | def main(args: Array[String]): Unit = { 11 | val X = DenseMatrix((1.0,1.5),(1.5,2.0),(2.0,1.5)) 12 | val pca = Pca(X, List("V1","V2")) 13 | pca.summary 14 | } 15 | 16 | } 17 | -------------------------------------------------------------------------------- /examples/C5-MonteCarlo/Readme.md: -------------------------------------------------------------------------------- 1 | # Monte Carlo integration 2 | 3 | Benchmarking different serial and parallel approaches to Monte Carlo estimation of a univariate integral. 4 | 5 | Note that here we are doing the benchmarking in a fairly simple way, using `System.nanoTime`, but there exist proper benchmarking libraries available for this purpose, and we will mention a couple of these later. 6 | 7 | -------------------------------------------------------------------------------- /sbt-test/src/main/scala/sbt-test.scala: -------------------------------------------------------------------------------- 1 | /* 2 | sbt-test.scala 3 | 4 | 5 | */ 6 | 7 | 8 | object SbtTest { 9 | 10 | def main(args: Array[String]): Unit = { 11 | println("") 12 | println("") 13 | println("") 14 | println("") 15 | println("") 16 | println("SBT IS INSTALLED AND WORKING") 17 | println("") 18 | println("") 19 | println("") 20 | } 21 | 22 | } 23 | 24 | /* eof */ 25 | 26 | -------------------------------------------------------------------------------- /app-template/build.sbt: -------------------------------------------------------------------------------- 1 | name := "app-template" 2 | 3 | version := "0.1" 4 | 5 | scalacOptions ++= Seq( 6 | "-unchecked", "-deprecation", "-feature" 7 | ) 8 | 9 | libraryDependencies ++= Seq( 10 | "org.scalacheck" %% "scalacheck" % "1.13.4" % "test", 11 | "org.scalatest" %% "scalatest" % "3.0.8" % "test", 12 | "org.scalanlp" %% "breeze" % "1.0", 13 | "org.scalanlp" %% "breeze-natives" % "1.0" 14 | ) 15 | 16 | scalaVersion := "2.12.10" 17 | 18 | 19 | -------------------------------------------------------------------------------- /app-template/.gitignore: -------------------------------------------------------------------------------- 1 | # .gitignore for scala projects 2 | 3 | # Classes and logs 4 | *.class 5 | *.log 6 | *~ 7 | 8 | # SBT-specific 9 | .cache 10 | .history 11 | .classpath 12 | .project 13 | .settings 14 | 15 | .lib/ 16 | dist/* 17 | target/ 18 | lib_managed/ 19 | src_managed/ 20 | project/boot/ 21 | project/plugins/project/ 22 | 23 | # Ensime specific 24 | .ensime 25 | 26 | # Scala-IDE specific 27 | .scala_dependencies 28 | .worksheet 29 | 30 | 31 | -------------------------------------------------------------------------------- /examples/C4-PCA/.gitignore: -------------------------------------------------------------------------------- 1 | # .gitignore for scala projects 2 | 3 | # Classes and logs 4 | *.class 5 | *.log 6 | *~ 7 | 8 | # SBT-specific 9 | .cache 10 | .history 11 | .classpath 12 | .project 13 | .settings 14 | 15 | .lib/ 16 | dist/* 17 | target/ 18 | lib_managed/ 19 | src_managed/ 20 | project/boot/ 21 | project/plugins/project/ 22 | 23 | # Ensime specific 24 | .ensime 25 | 26 | # Scala-IDE specific 27 | .scala_dependencies 28 | .worksheet 29 | 30 | 31 | -------------------------------------------------------------------------------- /examples/C6-DataFrames/r/gen-csv.R: -------------------------------------------------------------------------------- 1 | # gen-csv.R 2 | # Generate a CSV file for subsequent analysis 3 | 4 | package=function(somepackage) 5 | { 6 | cpackage <- as.character(substitute(somepackage)) 7 | if(!require(cpackage,character.only=TRUE)){ 8 | install.packages(cpackage) 9 | library(cpackage,character.only=TRUE) 10 | } 11 | } 12 | 13 | package(MASS) 14 | 15 | write.csv(Cars93,"cars93.csv",row.names=FALSE) 16 | 17 | 18 | 19 | # eof 20 | 21 | -------------------------------------------------------------------------------- /examples/C4-GammaTest/.gitignore: -------------------------------------------------------------------------------- 1 | # .gitignore for scala projects 2 | 3 | # Classes and logs 4 | *.class 5 | *.log 6 | *~ 7 | 8 | # SBT-specific 9 | .cache 10 | .history 11 | .classpath 12 | .project 13 | .settings 14 | 15 | .lib/ 16 | dist/* 17 | target/ 18 | lib_managed/ 19 | src_managed/ 20 | project/boot/ 21 | project/plugins/project/ 22 | 23 | # Ensime specific 24 | .ensime 25 | 26 | # Scala-IDE specific 27 | .scala_dependencies 28 | .worksheet 29 | 30 | 31 | -------------------------------------------------------------------------------- /examples/C6-Rainier/.gitignore: -------------------------------------------------------------------------------- 1 | # .gitignore for scala projects 2 | 3 | # Classes and logs 4 | *.class 5 | *.log 6 | *~ 7 | 8 | # SBT-specific 9 | .cache 10 | .history 11 | .classpath 12 | .project 13 | .settings 14 | 15 | .lib/ 16 | dist/* 17 | target/ 18 | lib_managed/ 19 | src_managed/ 20 | project/boot/ 21 | project/plugins/project/ 22 | 23 | # Ensime specific 24 | .ensime 25 | 26 | # Scala-IDE specific 27 | .scala_dependencies 28 | .worksheet 29 | 30 | 31 | -------------------------------------------------------------------------------- /examples/C6-ScalaGlm/.gitignore: -------------------------------------------------------------------------------- 1 | # .gitignore for scala projects 2 | 3 | # Classes and logs 4 | *.class 5 | *.log 6 | *~ 7 | 8 | # SBT-specific 9 | .cache 10 | .history 11 | .classpath 12 | .project 13 | .settings 14 | 15 | .lib/ 16 | dist/* 17 | target/ 18 | lib_managed/ 19 | src_managed/ 20 | project/boot/ 21 | project/plugins/project/ 22 | 23 | # Ensime specific 24 | .ensime 25 | 26 | # Scala-IDE specific 27 | .scala_dependencies 28 | .worksheet 29 | 30 | 31 | -------------------------------------------------------------------------------- /examples/C6-Smile/.gitignore: -------------------------------------------------------------------------------- 1 | # .gitignore for scala projects 2 | 3 | # Classes and logs 4 | *.class 5 | *.log 6 | *~ 7 | 8 | # SBT-specific 9 | .cache 10 | .history 11 | .classpath 12 | .project 13 | .settings 14 | 15 | .lib/ 16 | dist/* 17 | target/ 18 | lib_managed/ 19 | src_managed/ 20 | project/boot/ 21 | project/plugins/project/ 22 | 23 | # Ensime specific 24 | .ensime 25 | 26 | # Scala-IDE specific 27 | .scala_dependencies 28 | .worksheet 29 | 30 | 31 | -------------------------------------------------------------------------------- /examples/C7-Vegas/.gitignore: -------------------------------------------------------------------------------- 1 | # .gitignore for scala projects 2 | 3 | # Classes and logs 4 | *.class 5 | *.log 6 | *~ 7 | 8 | # SBT-specific 9 | .cache 10 | .history 11 | .classpath 12 | .project 13 | .settings 14 | 15 | .lib/ 16 | dist/* 17 | target/ 18 | lib_managed/ 19 | src_managed/ 20 | project/boot/ 21 | project/plugins/project/ 22 | 23 | # Ensime specific 24 | .ensime 25 | 26 | # Scala-IDE specific 27 | .scala_dependencies 28 | .worksheet 29 | 30 | 31 | -------------------------------------------------------------------------------- /exercises/Readme.md: -------------------------------------------------------------------------------- 1 | # Exercises 2 | 3 | ## Practical exercises to follow each Chapter of the course notes 4 | 5 | 1. [Introduction](Intro.md) 6 | 2. [Scala and FP Basics](Basics.md) 7 | 3. [Collections](Collections.md) 8 | 4. [Scala Breeze](Breeze.md) 9 | 5. [Monte Carlo](Monte.md) 10 | 6. [Statistical modelling](Stats.md) 11 | 7. [Tools](Tools.md) 12 | 8. [Apache Spark](Spark.md) 13 | 9. [Advanced topics](Advanced.md) 14 | 15 | 16 | #### eof 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /sbt-test/.gitignore: -------------------------------------------------------------------------------- 1 | # .gitignore for scala projects 2 | 3 | # Classes and logs 4 | *.class 5 | *.log 6 | *~ 7 | 8 | # SBT-specific 9 | .cache 10 | .history 11 | .classpath 12 | .project 13 | .settings 14 | 15 | .lib/ 16 | dist/* 17 | target/ 18 | lib_managed/ 19 | src_managed/ 20 | project/boot/ 21 | project/plugins/project/ 22 | 23 | # Ensime specific 24 | .ensime 25 | 26 | # Scala-IDE specific 27 | .scala_dependencies 28 | .worksheet 29 | 30 | 31 | /bin/ 32 | -------------------------------------------------------------------------------- /examples/C4-PCA/Readme.md: -------------------------------------------------------------------------------- 1 | # PCA 2 | 3 | Doing principal components analysis (PCA) using Breeze. 4 | 5 | Note that this a proper `sbt` project template layout, so navigate around to see where everything is, ideally before you do `sbt run`, and then again after, to see the additional files that `sbt` generates. 6 | 7 | This is our first example of doing data analysis using Scala, so study the code and the output carefully to make sure that you understand what is going on. 8 | 9 | -------------------------------------------------------------------------------- /examples/C5-Metropolis/.gitignore: -------------------------------------------------------------------------------- 1 | # .gitignore for scala projects 2 | 3 | # Classes and logs 4 | *.class 5 | *.log 6 | *~ 7 | 8 | # SBT-specific 9 | .cache 10 | .history 11 | .classpath 12 | .project 13 | .settings 14 | 15 | .lib/ 16 | dist/* 17 | target/ 18 | lib_managed/ 19 | src_managed/ 20 | project/boot/ 21 | project/plugins/project/ 22 | 23 | # Ensime specific 24 | .ensime 25 | 26 | # Scala-IDE specific 27 | .scala_dependencies 28 | .worksheet 29 | 30 | 31 | -------------------------------------------------------------------------------- /examples/C5-MonteCarlo/.gitignore: -------------------------------------------------------------------------------- 1 | # .gitignore for scala projects 2 | 3 | # Classes and logs 4 | *.class 5 | *.log 6 | *~ 7 | 8 | # SBT-specific 9 | .cache 10 | .history 11 | .classpath 12 | .project 13 | .settings 14 | 15 | .lib/ 16 | dist/* 17 | target/ 18 | lib_managed/ 19 | src_managed/ 20 | project/boot/ 21 | project/plugins/project/ 22 | 23 | # Ensime specific 24 | .ensime 25 | 26 | # Scala-IDE specific 27 | .scala_dependencies 28 | .worksheet 29 | 30 | 31 | -------------------------------------------------------------------------------- /examples/C9-ScalablePF/.gitignore: -------------------------------------------------------------------------------- 1 | # .gitignore for scala projects 2 | 3 | # Classes and logs 4 | *.class 5 | *.log 6 | *~ 7 | 8 | # SBT-specific 9 | .cache 10 | .history 11 | .classpath 12 | .project 13 | .settings 14 | 15 | .lib/ 16 | dist/* 17 | target/ 18 | lib_managed/ 19 | src_managed/ 20 | project/boot/ 21 | project/plugins/project/ 22 | 23 | # Ensime specific 24 | .ensime 25 | 26 | # Scala-IDE specific 27 | .scala_dependencies 28 | .worksheet 29 | 30 | 31 | -------------------------------------------------------------------------------- /examples/C6-Rainier/src/test/scala/rainier-test.scala: -------------------------------------------------------------------------------- 1 | import org.scalatest.flatspec.AnyFlatSpec 2 | import org.scalatest.matchers.should.Matchers 3 | 4 | 5 | // Example unit tests 6 | class CatsSpec extends AnyFlatSpec with Matchers { 7 | 8 | import cats._ 9 | import cats.implicits._ 10 | 11 | "A List" should "combine" in { 12 | val l = List(1,2) |+| List(3,4) 13 | l should be (List(1,2,3,4)) 14 | } 15 | 16 | } 17 | 18 | 19 | 20 | 21 | // eof 22 | 23 | 24 | -------------------------------------------------------------------------------- /examples/C6-ScalaGlm/src/test/scala/scala-glm-example-test.scala: -------------------------------------------------------------------------------- 1 | import org.scalatest.FlatSpec 2 | 3 | class SetSpec extends FlatSpec { 4 | 5 | import scalaglm.Utils.backSolve 6 | import breeze.linalg._ 7 | 8 | "backSolve" should "invert correctly" in { 9 | val A = DenseMatrix((4,1),(0,2)) map (_.toDouble) 10 | val x = DenseVector(3.0,-2.0) 11 | val y = A * x 12 | val xx = backSolve(A,y) 13 | assert (norm(x-xx) < 0.00001) 14 | } 15 | 16 | } 17 | 18 | -------------------------------------------------------------------------------- /examples/C6-Regression/Readme.md: -------------------------------------------------------------------------------- 1 | # Regression modelling 2 | 3 | Illustration of how to implement regression modelling "from scratch" in Scala. Contains some useful techniques for those interested in statistical computing and numerical algorithms development. 4 | 5 | For "real" regression analysis projects, you'll want to use a library, such as [scala-glm](https://github.com/darrenjw/scala-glm) or, more likely, [smile](http://haifengl.github.io/), or for "big data", [Spark MLlib](https://spark.apache.org/mllib/). 6 | 7 | 8 | -------------------------------------------------------------------------------- /examples/Readme.md: -------------------------------------------------------------------------------- 1 | # Examples 2 | ## A collection of complete runnables examples 3 | 4 | Each subdirectory of this directory contains a complete runnable Scala program, numbered according to the Chapter of the notes that they most closely relate to. It should be possible to run most of these directly from the command line of the relevant directory by typing `sbt run` at the OS command prompt. 5 | 6 | Note that there is a separate Readme in each directory, and that these contain further details specific to each example. 7 | 8 | 9 | -------------------------------------------------------------------------------- /examples/C4-GammaTest/Readme.md: -------------------------------------------------------------------------------- 1 | # Testing stochastic simulation from a Gamma distribution 2 | 3 | Simple example of how to test a non-uniform random number generation scheme. 4 | 5 | Note that this a proper `sbt` project template layout, so navigate around to see where everything is, ideally before you do `sbt run`, and then again after, to see the additional files that `sbt` generates. 6 | 7 | Note that we will consider proper testing frameworks later, including both unit-testing and property-based testing frameworks. 8 | 9 | 10 | 11 | 12 | -------------------------------------------------------------------------------- /examples/C8-SparkJob/Readme.md: -------------------------------------------------------------------------------- 1 | # Self-contained Spark application 2 | 3 | Note that you can't just `sbt run` this example. 4 | 5 | Build it with: 6 | 7 | ```bash 8 | sbt package 9 | ``` 10 | 11 | Then submit it to a Spark cluster with: 12 | 13 | ```bash 14 | spark-submit --class "SparkApp" \ 15 | --master local[4] \ 16 | target/scala-2.11/spark-template_2.11-0.1.jar 17 | ``` 18 | 19 | This works fine here, since the application has no third-party dependencies. If it did, you would need to build and submit an assembly JAR. 20 | 21 | 22 | -------------------------------------------------------------------------------- /examples/C2-LogFactorial/Readme.md: -------------------------------------------------------------------------------- 1 | # Log-factorial 2 | 3 | Illustration of the log-factorial function as a tail recursion. Again, this is just a single Scala file with no dependencies, so will just `sbt run`. 4 | 5 | To run with command line arguments, pass them in. From the `sbt` prompt, do, say, `run 100000`. From the OS prompt, do 6 | ```bash 7 | sbt "run 100000" 8 | ``` 9 | 10 | Try a non-tail-recursive version of the log-factorial function. Check that it works correctly for small argument, and that it overflows the stack for large arguments, like 100000. 11 | 12 | -------------------------------------------------------------------------------- /examples/C2-LogFactorial/log-fact.scala: -------------------------------------------------------------------------------- 1 | /* 2 | log-fact.scala 3 | Program to compute the log-factorial function 4 | */ 5 | 6 | object LogFact { 7 | 8 | import annotation.tailrec 9 | import math.log 10 | 11 | @tailrec 12 | def logfact(n: Int, acc: Double = 0.0): Double = 13 | if (n == 1) acc else 14 | logfact(n-1, acc + log(n)) 15 | 16 | def main(args: Array[String]): Unit = { 17 | val n = if (args.length == 1) args(0).toInt else 5 18 | val lfn = logfact(n) 19 | println(s"logfact($n) = $lfn") 20 | } 21 | 22 | } 23 | 24 | // eof 25 | 26 | -------------------------------------------------------------------------------- /examples/C6-Rainier/Readme.md: -------------------------------------------------------------------------------- 1 | # Rainier 2 | 3 | Simple example of using [Rainier](https://rainier.fit/) to do Bayesian logistic regression on some synthetic data. It should just `sbt run`. Work through the Rainier documentation for further information. 4 | 5 | Note that this example also illustrates the use of `mdoc` for producing reports. This includes the use of EvilPlot figures generated by Rainier diagnostics. An [example report](docs/LogisticRegression.md) is provided, and the [generated Markdown document](target/mdoc/LogisticRegression.md) includes generated output and plots. 6 | -------------------------------------------------------------------------------- /examples/C6-DataFrames/smiledf/build.sbt: -------------------------------------------------------------------------------- 1 | name := "smile" 2 | 3 | version := "0.1-SNAPSHOT" 4 | 5 | scalacOptions ++= Seq( 6 | "-unchecked", "-deprecation", "-feature" 7 | ) 8 | 9 | libraryDependencies ++= Seq( 10 | "org.scalatest" %% "scalatest" % "3.0.8" % "test", 11 | "com.github.haifengl" %% "smile-scala" % "2.1.1" 12 | ) 13 | 14 | resolvers ++= Seq( 15 | "Sonatype Snapshots" at 16 | "https://oss.sonatype.org/content/repositories/snapshots/", 17 | "Sonatype Releases" at 18 | "https://oss.sonatype.org/content/repositories/releases/" 19 | ) 20 | 21 | scalaVersion := "2.13.1" 22 | 23 | -------------------------------------------------------------------------------- /examples/C6-ScalaGlm/build.sbt: -------------------------------------------------------------------------------- 1 | name := "scala-glm-example" 2 | 3 | version := "0.1-SNAPSHOT" 4 | 5 | scalacOptions ++= Seq( 6 | "-unchecked", "-deprecation", "-feature" 7 | ) 8 | 9 | libraryDependencies ++= Seq( 10 | "org.scalatest" %% "scalatest" % "3.0.1" % "test", 11 | "com.github.darrenjw" %% "scala-glm" % "0.3" 12 | ) 13 | 14 | resolvers ++= Seq( 15 | "Sonatype Snapshots" at 16 | "https://oss.sonatype.org/content/repositories/snapshots/", 17 | "Sonatype Releases" at 18 | "https://oss.sonatype.org/content/repositories/releases/" 19 | ) 20 | 21 | scalaVersion := "2.12.1" 22 | 23 | -------------------------------------------------------------------------------- /examples/C6-DataFrames/sparkdf/spark.scala: -------------------------------------------------------------------------------- 1 | /* 2 | spark.scala 3 | 4 | code for a "spark-shell" session 5 | 6 | spark-shell --master local[4] 7 | 8 | */ 9 | 10 | val df = spark.read. 11 | option("header", "true"). 12 | option("inferSchema","true"). 13 | csv("../r/cars93.csv") 14 | val df2=df.filter("EngineSize <= 4.0") 15 | val col=df2.col("Weight")*0.453592 16 | val df3=df2.withColumn("WeightKG",col) 17 | df3.write.format("com.databricks.spark.csv"). 18 | option("header","true"). 19 | save("out-csv") 20 | 21 | 22 | // eof 23 | 24 | 25 | -------------------------------------------------------------------------------- /examples/C6-Smile/build.sbt: -------------------------------------------------------------------------------- 1 | name := "smile" 2 | 3 | version := "0.1-SNAPSHOT" 4 | 5 | scalacOptions ++= Seq( 6 | "-unchecked", "-deprecation", "-feature" 7 | ) 8 | 9 | enablePlugins(MdocPlugin) 10 | 11 | libraryDependencies ++= Seq( 12 | "org.scalatest" %% "scalatest" % "3.0.8" % "test", 13 | "com.github.haifengl" %% "smile-scala" % "2.2.1" 14 | ) 15 | 16 | resolvers ++= Seq( 17 | "Sonatype Snapshots" at 18 | "https://oss.sonatype.org/content/repositories/snapshots/", 19 | "Sonatype Releases" at 20 | "https://oss.sonatype.org/content/repositories/releases/" 21 | ) 22 | 23 | scalaVersion := "2.12.10" 24 | 25 | -------------------------------------------------------------------------------- /exercises/bisection/build.sbt: -------------------------------------------------------------------------------- 1 | name := "bisection" 2 | 3 | version := "0.1" 4 | 5 | scalacOptions ++= Seq("-unchecked", "-deprecation", "-feature") 6 | 7 | libraryDependencies ++= Seq( 8 | "org.scalatest" %% "scalatest" % "3.0.8" % "test", 9 | "org.scalanlp" %% "breeze" % "1.0", 10 | "org.scalanlp" %% "breeze-natives" % "1.0" 11 | ) 12 | 13 | resolvers ++= Seq( 14 | "Sonatype Snapshots" at "https://oss.sonatype.org/content/repositories/snapshots/", 15 | "Sonatype Releases" at "https://oss.sonatype.org/content/repositories/releases/" 16 | ) 17 | 18 | scalaVersion := "2.12.10" 19 | 20 | -------------------------------------------------------------------------------- /exercises/option/build.sbt: -------------------------------------------------------------------------------- 1 | name := "bisection-option" 2 | 3 | version := "0.1" 4 | 5 | scalacOptions ++= Seq("-unchecked", "-deprecation", "-feature") 6 | 7 | libraryDependencies ++= Seq( 8 | "org.scalatest" %% "scalatest" % "3.0.8" % "test", 9 | "org.scalanlp" %% "breeze" % "1.0", 10 | "org.scalanlp" %% "breeze-natives" % "1.0" 11 | ) 12 | 13 | resolvers ++= Seq( 14 | "Sonatype Snapshots" at "https://oss.sonatype.org/content/repositories/snapshots/", 15 | "Sonatype Releases" at "https://oss.sonatype.org/content/repositories/releases/" 16 | ) 17 | 18 | scalaVersion := "2.12.10" 19 | 20 | -------------------------------------------------------------------------------- /examples/C3-Pi/pi.scala: -------------------------------------------------------------------------------- 1 | /* 2 | pi.scala 3 | Simple (parallel) Monte Carlo estimate of Pi using rejection sampling on points randomly scattered on the unit square 4 | */ 5 | 6 | object Pi { 7 | 8 | def main(args: Array[String]): Unit = { 9 | val N = 10000000 10 | println("Estimating pi based on "+N+" draws") 11 | println("Creating random vector..") 12 | val z2 = (1 to N).par map (i => { 13 | val x = math.random 14 | val y = math.random 15 | x*x + y*y 16 | }) 17 | println("Counting successes...") 18 | val c = z2 count (_ < 1) 19 | val mypi = 4.0*c/N 20 | println("Esimate of pi: "+mypi) 21 | } 22 | 23 | } 24 | 25 | // eof 26 | 27 | 28 | -------------------------------------------------------------------------------- /examples/C7-Vegas/src/main/scala/C7-Vegas.scala: -------------------------------------------------------------------------------- 1 | /* 2 | C7-Vegas.scala 3 | Example Vegas app 4 | */ 5 | 6 | object MyVegasApp { 7 | 8 | def main(args: Array[String]): Unit = { 9 | 10 | import vegas._ 11 | import vegas.render.WindowRenderer._ 12 | 13 | val plot = Vegas("Country Pop"). 14 | withData( 15 | Seq( 16 | Map("country" -> "USA", "population" -> 314), 17 | Map("country" -> "UK", "population" -> 64), 18 | Map("country" -> "DK", "population" -> 80))). 19 | encodeX("country", Nom). 20 | encodeY("population", Quant). 21 | mark(Bar) 22 | 23 | plot.show 24 | 25 | } 26 | 27 | } 28 | -------------------------------------------------------------------------------- /examples/C6-Regression/build.sbt: -------------------------------------------------------------------------------- 1 | name := "regression" 2 | 3 | version := "0.1" 4 | 5 | scalacOptions ++= Seq( 6 | "-unchecked", "-deprecation", "-feature" 7 | ) 8 | 9 | libraryDependencies ++= Seq( 10 | "org.scalacheck" %% "scalacheck" % "1.13.4" % "test", 11 | "org.scalatest" %% "scalatest" % "3.0.1" % "test", 12 | "org.scalanlp" %% "breeze" % "1.0", 13 | "org.scalanlp" %% "breeze-natives" % "1.0" 14 | ) 15 | 16 | resolvers ++= Seq( 17 | "Sonatype Snapshots" at 18 | "https://oss.sonatype.org/content/repositories/snapshots/", 19 | "Sonatype Releases" at 20 | "https://oss.sonatype.org/content/repositories/releases/" 21 | ) 22 | 23 | scalaVersion := "2.12.10" 24 | 25 | 26 | -------------------------------------------------------------------------------- /sbt-test/src/test/scala/sbt-test-flatspec.scala: -------------------------------------------------------------------------------- 1 | import org.scalatest.flatspec.AnyFlatSpec 2 | 3 | // Tests using the "FlatSpec" style... 4 | 5 | class SetSpec extends AnyFlatSpec { 6 | 7 | "An empty Set" should "have size 0" in { 8 | assert(Set.empty.size == 0) 9 | } 10 | 11 | it should "produce NoSuchElementException when head is invoked" in { 12 | assertThrows[NoSuchElementException] { 13 | Set.empty.head 14 | } 15 | } 16 | 17 | "A Gamma(3.0,4.0)" should "have mean 12.0" in { 18 | import breeze.stats.distributions.Gamma 19 | val g = Gamma(3.0,4.0) 20 | val m = g.mean 21 | assert(math.abs(m - 12.0) < 0.000001) 22 | } 23 | 24 | } 25 | 26 | // eof 27 | -------------------------------------------------------------------------------- /examples/C9-ScalablePF/build.sbt: -------------------------------------------------------------------------------- 1 | name := "pfilter" 2 | 3 | version := "0.1" 4 | 5 | scalacOptions ++= Seq("-unchecked", "-deprecation", "-feature") 6 | 7 | libraryDependencies ++= Seq( 8 | "org.scalatest" %% "scalatest" % "3.0.1" % "test", 9 | "org.scalanlp" %% "breeze" % "0.13", 10 | "org.scalanlp" %% "breeze-natives" % "1.0", 11 | "org.scalanlp" %% "breeze-viz" % "1.0" 12 | 13 | ) 14 | 15 | resolvers ++= Seq( 16 | "Sonatype Snapshots" at "https://oss.sonatype.org/content/repositories/snapshots/", 17 | "Sonatype Releases" at "https://oss.sonatype.org/content/repositories/releases/" 18 | ) 19 | 20 | scalaVersion := "2.12.10" 21 | 22 | -------------------------------------------------------------------------------- /examples/C4-GammaTest/build.sbt: -------------------------------------------------------------------------------- 1 | name := "gamma-test" 2 | 3 | version := "0.1" 4 | 5 | scalacOptions ++= Seq("-unchecked", "-deprecation", "-feature") 6 | 7 | libraryDependencies ++= Seq( 8 | "org.scalacheck" %% "scalacheck" % "1.13.4" % "test", 9 | "org.scalatest" %% "scalatest" % "3.0.1" % "test", 10 | "org.scalanlp" %% "breeze" % "1.0", 11 | "org.scalanlp" %% "breeze-natives" % "1.0" 12 | ) 13 | 14 | resolvers ++= Seq( 15 | "Sonatype Snapshots" at "https://oss.sonatype.org/content/repositories/snapshots/", 16 | "Sonatype Releases" at "https://oss.sonatype.org/content/repositories/releases/" 17 | ) 18 | 19 | scalaVersion := "2.12.10" 20 | 21 | -------------------------------------------------------------------------------- /examples/C5-MonteCarlo/build.sbt: -------------------------------------------------------------------------------- 1 | name := "monte-carlo" 2 | 3 | version := "0.1" 4 | 5 | scalacOptions ++= Seq("-unchecked", "-deprecation", "-feature") 6 | 7 | libraryDependencies ++= Seq( 8 | "org.scalacheck" %% "scalacheck" % "1.13.4" % "test", 9 | "org.scalatest" %% "scalatest" % "3.0.1" % "test", 10 | "org.scalanlp" %% "breeze" % "1.0", 11 | "org.scalanlp" %% "breeze-natives" % "1.0" 12 | ) 13 | 14 | resolvers ++= Seq( 15 | "Sonatype Snapshots" at "https://oss.sonatype.org/content/repositories/snapshots/", 16 | "Sonatype Releases" at "https://oss.sonatype.org/content/repositories/releases/" 17 | ) 18 | 19 | scalaVersion := "2.12.10" 20 | 21 | -------------------------------------------------------------------------------- /fragments/Readme.md: -------------------------------------------------------------------------------- 1 | # Code Fragments 2 | 3 | Raw code fragments auto-extracted from the course notes, to allow copy-and-pasting of bits of code into a REPL or editor. 4 | 5 | For complete runnable examples from the notes, instead see the [examples directory](../examples/). 6 | 7 | 8 | ### Code fragments by chapter 9 | 10 | 1. [Introduction](intro.scala) 11 | 2. [Scala and FP Basics](basics.scala) 12 | 3. [Scala collections library](collections.scala) 13 | 4. [Scala Breeze](breeze.scala) 14 | 5. [Monte Carlo](monte.scala) 15 | 6. [Statistical modelling](stats.scala) 16 | 7. [Tools](tools.scala) 17 | 8. [Apache Spark](spark.scala) 18 | 9. [Advanced topics](advanced.scala) 19 | 20 | 21 | 22 | #### eof 23 | 24 | 25 | -------------------------------------------------------------------------------- /examples/C4-PCA/build.sbt: -------------------------------------------------------------------------------- 1 | name := "pca" 2 | 3 | version := "0.1" 4 | 5 | scalacOptions ++= Seq( 6 | "-unchecked", "-deprecation", "-feature" 7 | ) 8 | 9 | libraryDependencies ++= Seq( 10 | "org.scalacheck" %% "scalacheck" % "1.13.4" % "test", 11 | "org.scalatest" %% "scalatest" % "3.0.1" % "test", 12 | "org.scalanlp" %% "breeze" % "1.0", 13 | "org.scalanlp" %% "breeze-natives" % "1.0", 14 | "org.scalanlp" %% "breeze-viz" % "1.0" 15 | ) 16 | 17 | resolvers ++= Seq( 18 | "Sonatype Snapshots" at 19 | "https://oss.sonatype.org/content/repositories/snapshots/", 20 | "Sonatype Releases" at 21 | "https://oss.sonatype.org/content/repositories/releases/" 22 | ) 23 | 24 | scalaVersion := "2.12.10" 25 | 26 | 27 | -------------------------------------------------------------------------------- /examples/C7-MetropAssembly/build.sbt: -------------------------------------------------------------------------------- 1 | name := "metropolis-assembly" 2 | 3 | version := "0.1" 4 | 5 | scalacOptions ++= Seq("-unchecked", "-deprecation", "-feature") 6 | 7 | libraryDependencies ++= Seq( 8 | "org.scalacheck" %% "scalacheck" % "1.13.4" % "test", 9 | "org.scalatest" %% "scalatest" % "3.0.1" % "test", 10 | "org.scalanlp" %% "breeze" % "1.0", 11 | "org.scalanlp" %% "breeze-natives" % "1.0" 12 | ) 13 | 14 | resolvers ++= Seq( 15 | "Sonatype Snapshots" at "https://oss.sonatype.org/content/repositories/snapshots/", 16 | "Sonatype Releases" at "https://oss.sonatype.org/content/repositories/releases/" 17 | ) 18 | 19 | scalaVersion := "2.12.10" 20 | 21 | -------------------------------------------------------------------------------- /examples/C8-SparkJob/build.sbt: -------------------------------------------------------------------------------- 1 | name := "spark-template" 2 | 3 | version := "0.1" 4 | 5 | scalacOptions ++= Seq("-unchecked", "-deprecation", "-feature") 6 | 7 | libraryDependencies ++= Seq( 8 | "org.scalatest" %% "scalatest" % "3.0.8" % "test", 9 | "org.apache.spark" %% "spark-core" % "2.4.5" % Provided, 10 | "org.apache.spark" %% "spark-sql" % "2.4.5" % Provided, 11 | "org.apache.spark" %% "spark-mllib" % "2.4.5" % Provided 12 | ) 13 | 14 | resolvers ++= Seq( 15 | "Sonatype Snapshots" at "https://oss.sonatype.org/content/repositories/snapshots/", 16 | "Sonatype Releases" at "https://oss.sonatype.org/content/repositories/releases/" 17 | ) 18 | 19 | scalaVersion := "2.11.12" 20 | 21 | -------------------------------------------------------------------------------- /examples/C7-Vegas/build.sbt: -------------------------------------------------------------------------------- 1 | name := "C7-Vegas" 2 | 3 | version := "0.1-SNAPSHOT" 4 | 5 | scalacOptions ++= Seq( 6 | "-unchecked", "-deprecation", "-feature" 7 | ) 8 | 9 | libraryDependencies ++= Seq( 10 | // "org.scalatest" %% "scalatest" % "3.0.8" % "test", 11 | // "org.scalanlp" %% "breeze" % "1.0", 12 | // "org.scalanlp" %% "breeze-viz" % "1.0", 13 | // "org.scalanlp" %% "breeze-natives" % "1.0", 14 | "org.vegas-viz" %% "vegas" % "0.3.11" 15 | ) 16 | 17 | resolvers ++= Seq( 18 | "Sonatype Snapshots" at 19 | "https://oss.sonatype.org/content/repositories/snapshots/", 20 | "Sonatype Releases" at 21 | "https://oss.sonatype.org/content/repositories/releases/" 22 | ) 23 | 24 | scalaVersion := "2.11.8" 25 | 26 | fork := true 27 | 28 | 29 | -------------------------------------------------------------------------------- /examples/C6-DataFrames/framian/build.sbt: -------------------------------------------------------------------------------- 1 | name := "framian-test" 2 | 3 | version := "0.1" 4 | 5 | scalacOptions ++= Seq("-unchecked", "-deprecation", "-feature") 6 | 7 | libraryDependencies ++= Seq( 8 | "org.scalacheck" %% "scalacheck" % "1.11.4" % "test", 9 | "org.scalatest" %% "scalatest" % "2.1.7" % "test", 10 | "com.pellucid" %% "framian" % "0.3.3" 11 | ) 12 | 13 | resolvers ++= Seq( 14 | "Sonatype Snapshots" at "https://oss.sonatype.org/content/repositories/snapshots/", 15 | "Sonatype Releases" at "https://oss.sonatype.org/content/repositories/releases/", 16 | "Pellucid Bintray" at "http://dl.bintray.com/pellucid/maven" 17 | ) 18 | 19 | scalaVersion := "2.11.2" 20 | 21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /examples/C5-Metropolis/build.sbt: -------------------------------------------------------------------------------- 1 | name := "metropolis" 2 | 3 | version := "0.1" 4 | 5 | scalacOptions ++= Seq("-unchecked", "-deprecation", "-feature") 6 | 7 | libraryDependencies ++= Seq( 8 | "org.scalacheck" %% "scalacheck" % "1.13.4" % "test", 9 | "org.scalatest" %% "scalatest" % "3.0.1" % "test", 10 | "org.scalanlp" %% "breeze" % "1.0", 11 | "org.scalanlp" %% "breeze-natives" % "1.0", 12 | "org.scalanlp" %% "breeze-viz" % "1.0" 13 | ) 14 | 15 | resolvers ++= Seq( 16 | "Sonatype Snapshots" at "https://oss.sonatype.org/content/repositories/snapshots/", 17 | "Sonatype Releases" at "https://oss.sonatype.org/content/repositories/releases/" 18 | ) 19 | 20 | scalaVersion := "2.12.10" 21 | 22 | -------------------------------------------------------------------------------- /examples/C6-DataFrames/datatable/build.sbt: -------------------------------------------------------------------------------- 1 | name := "datatable-test" 2 | 3 | version := "0.1" 4 | 5 | scalacOptions ++= Seq("-unchecked", "-deprecation", "-feature") 6 | 7 | libraryDependencies ++= Seq( 8 | "org.scalacheck" %% "scalacheck" % "1.11.4" % "test", 9 | "org.scalatest" %% "scalatest" % "2.1.7" % "test", 10 | "com.github.tototoshi" %% "scala-csv" % "1.1.2", 11 | "com.github.martincooper" %% "scala-datatable" % "0.7.0" 12 | ) 13 | 14 | resolvers ++= Seq( 15 | "Sonatype Snapshots" at "https://oss.sonatype.org/content/repositories/snapshots/", 16 | "Sonatype Releases" at "https://oss.sonatype.org/content/repositories/releases/" 17 | ) 18 | 19 | scalaVersion := "2.11.7" 20 | 21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /examples/C6-DataFrames/saddle/CsvDf.scala: -------------------------------------------------------------------------------- 1 | 2 | object CsvDf { 3 | 4 | def main(args: Array[String]): Unit = { 5 | 6 | import org.saddle.Index 7 | import org.saddle.io._ 8 | 9 | val file = CsvFile("../r/cars93.csv") 10 | val df = CsvParser.parse(file).withColIndex(0) 11 | println(df) 12 | val df2 = df.rfilter(_("EngineSize").mapValues(CsvParser.parseDouble).at(0)<=4.0) 13 | println(df2) 14 | val wkg=df2.col("Weight").mapValues(CsvParser.parseDouble).mapValues(_*0.453592).setColIndex(Index("WeightKG")) 15 | val df3=df2.joinPreserveColIx(wkg.mapValues(_.toString)) 16 | println(df3) 17 | 18 | import CsvImplicits._ 19 | import scala.language.reflectiveCalls 20 | df3.writeCsvFile("saddle-out.csv") 21 | 22 | } 23 | 24 | } 25 | -------------------------------------------------------------------------------- /examples/C6-Smile/Readme.md: -------------------------------------------------------------------------------- 1 | # Smile as a Scala library 2 | 3 | Redoing the yacht hydrodynamics OLS regression analyis using [Smile](https://haifengl.github.io/). If you want to do simple statistical modelling and machine learning for small-to-medium sized data sets in Scala, then this currently looks to be a good option, so is probably worth spending some time getting to grips with it. 4 | 5 | This example also illustrates the use of Scala's [mdoc](https://scalameta.org/mdoc/) Markdown-based documentation system. After enabling the plugin, Markdown files in [docs](docs/) get compiled to [target/mdoc](target/mdoc/). 6 | 7 | Note that the latest version of Smile, version 2.2.1, works fine with Scala 2.11, 2.12 and 2.13 (some earlier releases only worked with Scala 2.13). 8 | 9 | 10 | -------------------------------------------------------------------------------- /examples/C6-DataFrames/smiledf/src/main/scala/smile-df.scala: -------------------------------------------------------------------------------- 1 | /* 2 | smile-df.scala 3 | 4 | Testing the use of Smile DataFrames 5 | 6 | */ 7 | 8 | object SmileApp { 9 | 10 | 11 | def main(args: Array[String]): Unit = { 12 | 13 | val df2 = smile.read.csv("../r/cars93.csv") 14 | val df3 = df2.filter{ _("EngineSize").asInstanceOf[Double] <= 4.0 } 15 | val w = df3.select("Weight") 16 | val wkg = w map {_(0).asInstanceOf[Int] * 0.453592} 17 | val wkgdf = smile.data.DataFrame.of(wkg.toArray.map(Array(_)),"WKG") 18 | val adf = df3 merge wkgdf 19 | smile.write.csv(adf,"cars-smile.csv") 20 | 21 | // read it back for good measure... 22 | val rdf = smile.read.csv("cars-smile.csv") 23 | println(rdf) 24 | println(rdf.summary) 25 | 26 | } 27 | 28 | } 29 | 30 | // eof 31 | 32 | -------------------------------------------------------------------------------- /sbt-test/src/test/scala/sbt-test-test.scala: -------------------------------------------------------------------------------- 1 | import org.scalatest.funsuite.AnyFunSuite 2 | 3 | // Here using FunSuite style - but other possibilities... 4 | 5 | class SetSuite extends AnyFunSuite { 6 | 7 | test("An empty Set should have size 0") { 8 | assert(Set.empty.size == 0) 9 | } 10 | 11 | test("A Gaussian sample of length 10 should have length 10") { 12 | import breeze.stats.distributions.Gaussian 13 | val x = Gaussian(2.0,4.0).sample(10) 14 | assert(x.length === 10) 15 | } 16 | 17 | test("Cats map merge") { 18 | import cats.instances.all._ 19 | import cats.syntax.semigroup._ 20 | val m1 = Map("a"->1,"b"->2) 21 | val m2 = Map("b"->2,"c"->1) 22 | val m3 = m1 |+| m2 23 | val m4 = Map("b" -> 4, "c" -> 1, "a" -> 1) 24 | assert(m3 === m4) 25 | } 26 | 27 | } 28 | 29 | 30 | // eof 31 | -------------------------------------------------------------------------------- /examples/C6-DataFrames/saddle/build.sbt: -------------------------------------------------------------------------------- 1 | name := "csv-manipulation" 2 | 3 | version := "0.1" 4 | 5 | scalacOptions ++= Seq("-unchecked", "-deprecation", "-feature") 6 | 7 | libraryDependencies ++= Seq( 8 | "org.scalacheck" %% "scalacheck" % "1.11.4" % "test", 9 | "org.scalatest" %% "scalatest" % "2.1.7" % "test", 10 | "org.scalanlp" %% "breeze" % "0.11.2", 11 | "org.scalanlp" %% "breeze-natives" % "0.11.2", 12 | "org.scalanlp" %% "breeze-viz" % "0.11.2", 13 | "org.scala-saddle" %% "saddle-core" % "1.3.+" 14 | ) 15 | 16 | resolvers ++= Seq( 17 | "Sonatype Snapshots" at "https://oss.sonatype.org/content/repositories/snapshots/", 18 | "Sonatype Releases" at "https://oss.sonatype.org/content/repositories/releases/" 19 | ) 20 | 21 | scalaVersion := "2.11.6" 22 | 23 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /examples/C7-MetropAssembly/src/main/scala/metropolis.scala: -------------------------------------------------------------------------------- 1 | /* 2 | metropolis.scala 3 | 4 | Example of a project which can be built as an assembly JAR, allowing stand-alone deployment and calling from R 5 | 6 | */ 7 | 8 | object Metropolis { 9 | 10 | import breeze.stats.distributions._ 11 | 12 | def kernel(x: Double): Rand[Double] = for { 13 | innov <- Uniform(-0.5, 0.5) 14 | can = x + innov 15 | oldll = Gaussian(0.0, 1.0).logPdf(x) 16 | loglik = Gaussian(0.0, 1.0).logPdf(can) 17 | loga = loglik - oldll 18 | u <- Uniform(0.0, 1.0) 19 | } yield if (math.log(u) < loga) can else x 20 | 21 | val chain = Stream.iterate(0.0)(kernel(_).draw) 22 | 23 | def main(args: Array[String]): Unit = { 24 | val n = if (args.size == 0) 10 else args(0).toInt 25 | chain.take(n).toArray.foreach(println) 26 | } 27 | 28 | } 29 | 30 | // eof 31 | 32 | 33 | -------------------------------------------------------------------------------- /examples/C6-DataFrames/framian/framian.scala: -------------------------------------------------------------------------------- 1 | /* 2 | framian.scala 3 | 4 | Test of "framian" 5 | 6 | */ 7 | 8 | import java.io.{File,PrintWriter} 9 | import framian.{Index,Cols} 10 | import framian.csv.{Csv,CsvFormat} 11 | 12 | object FramianTest { 13 | 14 | def main(args: Array[String]) = { 15 | println("Hello") 16 | val df=Csv.parseFile(new File("../r/cars93.csv")).labeled.toFrame 17 | println(""+df.rows+" "+df.cols) 18 | val df2=df.filter(Cols("EngineSize").as[Double])( _ <= 4.0 ) 19 | println(""+df2.rows+" "+df2.cols) 20 | val df3=df2.map(Cols("Weight").as[Int],"WeightKG")(r=>r.toDouble*0.453592) 21 | println(""+df3.rows+" "+df3.cols) 22 | println(df3.colIndex) 23 | val csv = Csv.fromFrame(new CsvFormat(",", header = true))(df3) 24 | new PrintWriter("out.csv") { write(csv.toString); close } 25 | println("Done") 26 | } 27 | 28 | } 29 | 30 | 31 | 32 | -------------------------------------------------------------------------------- /examples/C7-EvilPlot/build.sbt: -------------------------------------------------------------------------------- 1 | name := "evilplot-examples" 2 | 3 | version := "0.1-SNAPSHOT" 4 | 5 | scalacOptions ++= Seq( 6 | "-unchecked", "-deprecation", "-feature" 7 | ) 8 | 9 | libraryDependencies ++= Seq( 10 | "org.scalatest" %% "scalatest" % "3.1.0-SNAP13" % "test", 11 | "com.cibo" %% "evilplot" % "0.6.3", // 0.7.0 12 | "com.cibo" %% "evilplot-repl" % "0.6.3", // 0.7.0 13 | "org.scalanlp" %% "breeze" % "1.0", 14 | // "org.scalanlp" %% "breeze-viz" % "1.0", 15 | "org.scalanlp" %% "breeze-natives" % "1.0" 16 | ) 17 | 18 | 19 | resolvers += Resolver.bintrayRepo("cibotech", "public") 20 | 21 | resolvers ++= Seq( 22 | "Sonatype Snapshots" at 23 | "https://oss.sonatype.org/content/repositories/snapshots/", 24 | "Sonatype Releases" at 25 | "https://oss.sonatype.org/content/repositories/releases/" 26 | ) 27 | 28 | scalaVersion := "2.12.8" 29 | 30 | fork := true 31 | 32 | 33 | -------------------------------------------------------------------------------- /sbt-test/build.sbt: -------------------------------------------------------------------------------- 1 | name := "sbt-test" 2 | 3 | version := "0.1" 4 | 5 | scalacOptions ++= Seq("-unchecked", "-deprecation", "-feature","-language:implicitConversions") 6 | 7 | libraryDependencies ++= Seq( 8 | "org.scalatest" %% "scalatest" % "3.1.1" % "test", 9 | "org.scalacheck" %% "scalacheck" % "1.14.1" % "test", 10 | "org.scalanlp" %% "breeze" % "1.0", 11 | "org.scalanlp" %% "breeze-natives" % "1.0", 12 | "org.scalanlp" %% "breeze-viz" % "1.0", 13 | "org.typelevel" %% "cats-core" % "1.0.0" 14 | ) 15 | 16 | resolvers ++= Seq( 17 | "Sonatype Snapshots" at "https://oss.sonatype.org/content/repositories/snapshots/", 18 | "Sonatype Releases" at "https://oss.sonatype.org/content/repositories/releases/" 19 | ) 20 | 21 | 22 | addCompilerPlugin("org.scalamacros" % "paradise" % "2.1.0" cross CrossVersion.full) 23 | 24 | 25 | scalaVersion := "2.12.10" 26 | 27 | -------------------------------------------------------------------------------- /examples/C8-SparkJob/src/main/scala/spark.scala: -------------------------------------------------------------------------------- 1 | /* 2 | spark.scala 3 | 4 | Build with: 5 | sbt package 6 | 7 | Then submit with: 8 | spark-submit --class "SparkApp" \ 9 | --master local[4] \ 10 | target/scala-2.11/spark-template_2.11-0.1.jar 11 | 12 | */ 13 | 14 | import org.apache.spark.SparkContext 15 | import org.apache.spark.SparkContext._ 16 | import org.apache.spark.SparkConf 17 | 18 | object SparkApp { 19 | 20 | def main(args: Array[String]): Unit = { 21 | 22 | val spark = new SparkConf(). 23 | setAppName("Spark Application") 24 | val sc = new SparkContext(spark) 25 | 26 | sc.textFile("/usr/share/dict/words"). 27 | map(_.trim). 28 | map(_.toLowerCase). 29 | flatMap(_.toCharArray). 30 | filter(_ > '/'). 31 | filter(_ < '}'). 32 | map{(_,1)}. 33 | reduceByKey(_+_). 34 | sortBy(_._2,false). 35 | collect. 36 | foreach(println) 37 | 38 | } 39 | 40 | } 41 | 42 | // eof 43 | 44 | 45 | -------------------------------------------------------------------------------- /examples/C6-DataFrames/README.md: -------------------------------------------------------------------------------- 1 | # Scala data tables and frames 2 | 3 | Code samples associated with my blog post "Scala data frames and tables" which can be found at: 4 | 5 | https://darrenjw.wordpress.com/2015/08/21/data-frames-and-tables-in-scala/ 6 | 7 | See the post for explanation of the examples. 8 | 9 | Note, however, the addition here of the [Smile](http://haifengl.github.io/) `DataFrame` example, which is a welcome development. 10 | 11 | Note that you must run the script r/gen-csv.R in an R session FIRST, in order to generate the CSV file required for the Scala examples. 12 | 13 | If you have R installed, then: 14 | ```bash 15 | cd r 16 | R CMD BATCH gen-csv.R 17 | ``` 18 | should generate the file `cars93.csv` required by all of the scripts. 19 | 20 | The other directories contain Scala examples. Each can be run by going in to the relevant directory and doing `sbt run`, except for the Spark example, which needs to be run in a Spark shell (to be covered later). 21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /examples/C7-MetropAssembly/Readme.md: -------------------------------------------------------------------------------- 1 | # Creating an assembly JAR (for calling from R) 2 | 3 | This directory shows how to create an assembly JAR for a Scala project, bundling all dependencies. This can be convenient for deployment reasons, generally, but also for calling from R. 4 | 5 | Just typing 6 | ```bash 7 | sbt assembly 8 | ``` 9 | should build the fat JAR. This will be placed in `target/scala-2.12`, along with the regular artefact. Comparing the file sizes is enlightening! The `assembly` SBT task is provided by the relevant line in `project/plugins.sbt`. 10 | 11 | To run the code directly in the JVM, use something like 12 | ```bash 13 | java -jar target/scala-2.12/metropolis-assembly-assembly-0.1.jar 14 | ``` 15 | or 16 | ```bash 17 | java -jar target/scala-2.12/metropolis-assembly-assembly-0.1.jar 20 18 | ``` 19 | The file `rscala.R` shows how to call this from R using the `rscala` library. You can run this with 20 | ```bash 21 | R CMD BATCH rscala.R 22 | ``` 23 | if you have R installed. 24 | 25 | -------------------------------------------------------------------------------- /exercises/Basics.md: -------------------------------------------------------------------------------- 1 | # Scala and FP basics 2 | 3 | ## Practical exercises 4 | 5 | Exercises following the material presented in Chapter 2 6 | 7 | ### 1. LogFactorial example 8 | 9 | Find and run the complete log-factorial example using SBT, using `run` from the SBT prompt. Pass in an argument using, eg. `run 1000`. Next, drop into the REPL with `console`. Then enter 10 | ```scala 11 | import LogFact._ 12 | logfact(100) 13 | ``` 14 | So we can use the SBT REPL to run functions within our project. This is extremely useful for debugging. 15 | 16 | ### 2. Interval bisection 17 | 18 | Implement and test a recursive function for root-finding using interval bisection. See the [detailed instructions](bisection/Readme.md) for further details. 19 | 20 | ### 3. Scala exercises 21 | 22 | Start to work through the interactive [Scala tutorial](https://www.scala-exercises.org/scala_tutorial/) from [Scala exercises](https://www.scala-exercises.org/). Do the first couple of sections. Try to work through everything up to and including "Syntactic Conveniences" in any spare time you have over the next couple of days. 23 | 24 | 25 | #### eof 26 | 27 | -------------------------------------------------------------------------------- /examples/C6-Smile/docs/smile-example.md: -------------------------------------------------------------------------------- 1 | # Smile example 2 | 3 | ## Some mdoc documentation 4 | 5 | This is some documentation prepared using `mdoc`. The original file is in `docs`, but the `sbt` task `mdoc` will typecheck and execute the code blocks, and put the compiled markdown document in `target/mdoc`. 6 | 7 | We begin by reading the data (we assume that the file "yacht.csv" already exists). 8 | ```scala mdoc 9 | val df = smile.read.csv("yacht.csv") 10 | df 11 | ``` 12 | We can get a quick summary of the data as follows. 13 | ```scala mdoc 14 | df.summary 15 | ``` 16 | We can now carry out OLS regression after a couple of imports 17 | ```scala mdoc 18 | import smile.data.formula._ 19 | import scala.language.postfixOps 20 | val mod = smile.regression.ols("Resist" ~, df) 21 | mod 22 | ``` 23 | If we don't want to regress on everything, we can just choose what we'd like to regress on. 24 | ```scala mdoc 25 | smile.regression.ols("Resist" ~ "Froude", df) 26 | smile.regression.ols("Resist" ~ "Froude" + "LongPos", df) 27 | ``` 28 | 29 | ### Summary 30 | 31 | This brief document has illustrated how easy and convenient it is to produce executable documentation and reports for Scala. 32 | 33 | -------------------------------------------------------------------------------- /examples/C4-GammaTest/src/main/scala/gamma-test.scala: -------------------------------------------------------------------------------- 1 | /* 2 | gamma-test.scala 3 | 4 | Test the gamma random number generator in Breeze 5 | 6 | */ 7 | 8 | object GammaTest { 9 | 10 | import math.{abs,sqrt} 11 | import breeze.stats.meanAndVariance 12 | import breeze.stats.distributions.Gamma 13 | 14 | def gammaTest(N: Int, a: Double, b: Double): Unit = { 15 | println(s"Testing Gamma($a,$b) with $N trials") 16 | val mean = a*b 17 | val variance = a*b*b 18 | val gammas = Gamma(a,b).sample(N) 19 | val stats = meanAndVariance(gammas) 20 | val xbar = stats.mean 21 | val s2 = stats.variance 22 | println(s"True mean: $mean Sample mean: $xbar") 23 | val zscore = (xbar - mean)/sqrt(variance/N) 24 | println(s"z-score is $zscore") 25 | assert(abs(zscore) < 3.0) 26 | println(s"True variance: $variance Sample variance: $s2") 27 | } 28 | 29 | def main(args: Array[String]): Unit = { 30 | println("Testing Breeze's Gamma generator") 31 | val N = 10000000 32 | gammaTest(N,2.0,3.0) 33 | gammaTest(N,1.0,2.0) 34 | gammaTest(N,5.0,1.0) 35 | gammaTest(N,5.0,0.1) 36 | gammaTest(N,1.0,5.0) 37 | gammaTest(N,0.5,3.0) 38 | gammaTest(N,0.2,1.0) 39 | gammaTest(N,0.2,0.1) 40 | gammaTest(N,0.2,4.0) 41 | println("Test complete") 42 | } 43 | 44 | } 45 | 46 | // eof 47 | 48 | -------------------------------------------------------------------------------- /sbt/Readme.md: -------------------------------------------------------------------------------- 1 | # sbt installation and testing 2 | 3 | `sbt` is the simple/scala build tool. It is the standard build tool for Scala. Other than a recent Java installation, `sbt` is all you need for building Scala projects. 4 | 5 | Some useful links relating to `sbt` are given below: 6 | 7 | * [sbt](http://www.scala-sbt.org/) 8 | * [Documentation](http://www.scala-sbt.org/documentation.html) 9 | * [Download](https://www.scala-sbt.org/download.html) 10 | 11 | Please follow the relevant instructions for installing `sbt` on your OS. 12 | 13 | If at all possible, please install `sbt` on your system in advance of the start of the course, and test that it works by typing `sbt run` from the `sbt-test` directory of this code repository. You will need an Internet connection the first time that you run this, and it will take some time to run while it downloads Scala, the Scala compiler, the Scala standard library and all of the libraries that we will be using in the course. If the test runs correctly, it should finish by printing the message "SBT IS INSTALLED AND WORKING" to the console. Once these libraries are downloaded and cached on your system, subsequent builds should be much faster, and should not require an Internet connection. 14 | 15 | Once you have `sbt` installed and working, see the [Readme](../sbt-test/Readme.md) in the `sbt-test` directory for further information. 16 | 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /examples/C6-Rainier/build.sbt: -------------------------------------------------------------------------------- 1 | // build.sbt 2 | 3 | name := "rainier" 4 | 5 | version := "0.1-SNAPSHOT" 6 | 7 | scalacOptions ++= Seq( 8 | "-unchecked", "-deprecation", "-feature", "-language:higherKinds", 9 | "-language:implicitConversions", "-Ypartial-unification" 10 | ) 11 | 12 | enablePlugins(MdocPlugin) 13 | 14 | addCompilerPlugin("org.typelevel" %% "kind-projector" % "0.11.0" cross CrossVersion.full) 15 | addCompilerPlugin("org.scalamacros" %% "paradise" % "2.1.1" cross CrossVersion.full) 16 | 17 | libraryDependencies ++= Seq( 18 | "org.scalatest" %% "scalatest" % "3.1.1" % "test", 19 | "org.scalactic" %% "scalactic" % "3.0.8", 20 | "org.typelevel" %% "cats-core" % "2.0.0", 21 | "org.typelevel" %% "simulacrum" % "1.0.0", 22 | "com.cibo" %% "evilplot" % "0.6.3", // 0.7.0 23 | "com.cibo" %% "evilplot-repl" % "0.6.3", // 0.7.0 24 | "com.stripe" %% "rainier-core" % "0.3.0", 25 | "com.stripe" %% "rainier-notebook" % "0.3.0" 26 | 27 | ) 28 | 29 | resolvers += Resolver.bintrayRepo("cibotech", "public") // for EvilPlot 30 | 31 | resolvers ++= Seq( 32 | "Sonatype Snapshots" at 33 | "https://oss.sonatype.org/content/repositories/snapshots/", 34 | "Sonatype Releases" at 35 | "https://oss.sonatype.org/content/repositories/releases/", 36 | "jitpack" at "https://jitpack.io" // for Jupiter/notebook 37 | 38 | ) 39 | 40 | scalaVersion := "2.12.10" 41 | 42 | 43 | // eof 44 | 45 | -------------------------------------------------------------------------------- /sbt-test/Readme.md: -------------------------------------------------------------------------------- 1 | # SBT test 2 | 3 | This directory contains a Scala SBT project with numerous dependencies. Assuming that `sbt` is installed, you should be able to compile and run the project by typing `sbt run` from this directory. After downloading and caching any required libraries, it will compile the code (in `src/main`) and run it. If the program runs successfully, it will print the message "SBT IS INSTALLED AND WORKING" to the console. 4 | 5 | For reference, the file [build.sbt](build.sbt) shows how to include a dependency on many of the libraries most commonly required for statistical computing applications. 6 | 7 | For good measure, you might also want to run `sbt test` from this directory. This should compile and run a few tests (in `src/test`). Note that there are some simple example tests using both ScalaTest (in different styles) and ScalaCheck (for property-based testing) in this directory, so these provide useful tempates for test code. 8 | 9 | This directory is also useful for starting a REPL including commonly used dependencies. Just running `sbt console` from this directory will give a Scala console including dependencies on libraries such as Breeze, Breeze-viz and Cats, which can be very useful for interactive experiments. 10 | 11 | For further information about sbt, read through the [sbt getting started guide](https://www.scala-sbt.org/1.x/docs/Getting-Started.html). 12 | 13 | #### eof 14 | 15 | 16 | -------------------------------------------------------------------------------- /IntelliJ.md: -------------------------------------------------------------------------------- 1 | # IntelliJ installation and setup 2 | 3 | IntelliJ now seems to be the most popular IDE for Scala. 4 | 5 | ## Installation 6 | 7 | * To get started with IntelliJ, Scala and SBT, follow the official Scala [getting started guide](http://docs.scala-lang.org/getting-started.html) 8 | - [Get started with IntelliJ and Scala](https://www.scala-lang.org/documentation/getting-started-intellij-track/getting-started-with-scala-in-intellij.html) 9 | - [Get started with IntelliJ and Sbt](https://www.scala-lang.org/documentation/getting-started-intellij-track/building-a-scala-project-with-intellij-and-sbt.html) 10 | - [Using ScalaTest with IntelliJ](http://docs.scala-lang.org/getting-started-intellij-track/testing-scala-in-intellij-with-scalatest.html) 11 | 12 | ## Tips 13 | 14 | * Always import SBT project into IntelliJ as SBT projects - IntelliJ will then examine the SBT build file to figure out all appropriate dependencies 15 | - do **Import Project** and select the `build.sbt` file within the project directory 16 | - the default import options are mostly fine, though you probably want to build with the *SBT shell* 17 | * IntelliJ can get confused if you try and import two different SBT projects with the same name 18 | - So, if you copy the `app-template` directory, you should edit the project name in `build.sbt` *before* trying to import it into IntelliJ 19 | - **Note** that you may prefer to use `sbt new darrenjw/breeze.g8` to directly create an app template with an *appropriate name* and then import that 20 | 21 | 22 | 23 | -------------------------------------------------------------------------------- /sbt-test/src/test/scala/sbt-test-scalacheck.scala: -------------------------------------------------------------------------------- 1 | import org.scalatest.matchers.should.Matchers 2 | 3 | import org.scalacheck._ 4 | import org.scalacheck.Prop.{forAll,propBoolean} 5 | 6 | class SqrtSpecification extends Properties("Sqrt") with Matchers { 7 | 8 | property("math.sqrt should square to give original") = 9 | forAll { a: Double => 10 | (a >= 0.0) ==> { 11 | val s = math.sqrt(a) 12 | val tol = 1e-8 * a 13 | s*s === a +- tol 14 | } 15 | } 16 | 17 | } 18 | 19 | class GammaSpec extends Properties("Gamma") with Matchers { 20 | 21 | import breeze.stats.distributions.Gamma 22 | 23 | val tol = 1e-8 24 | val big = 1e100 25 | 26 | property("mean") = 27 | forAll { (a: Double, b: Double) => 28 | ((a > tol) && (a < big) && (b > tol) && (b < big)) ==> { 29 | Gamma(a,b).mean === a*b +- tol 30 | } 31 | } 32 | 33 | } 34 | 35 | class StringSpecification extends Properties("String") with Matchers { 36 | 37 | property("startwith first string") = 38 | forAll { (a: String, b: String) => 39 | (a+b).startsWith(a) 40 | } 41 | 42 | property("concatenate bound") = 43 | forAll { (a: String, b: String) => 44 | (a+b).length >= a.length && (a+b).length >= b.length 45 | } 46 | 47 | property("concatenate length") = 48 | forAll { (a: String, b: String) => 49 | (a+b).length == a.length + b.length 50 | } 51 | 52 | property("substring") = 53 | forAll { (a: String, b: String, c: String) => 54 | (a+b+c).substring(a.length, a.length+b.length) == b 55 | } 56 | 57 | } 58 | 59 | // eof 60 | 61 | -------------------------------------------------------------------------------- /exercises/bisection/src/test/scala/bisect-test.scala: -------------------------------------------------------------------------------- 1 | /* 2 | bisect-test.scala 3 | 4 | Tests for bisection exercise 5 | 6 | */ 7 | 8 | import org.scalatest._ 9 | import org.scalatest.Matchers._ 10 | 11 | class MyTestSuite extends FlatSpec { 12 | 13 | "1+2" should "=3" in { 14 | assert(1 + 2 === 3) 15 | } 16 | 17 | val tol = 1.0e-8 18 | 19 | def approxEq(test: Double, should: Double): Boolean = { 20 | if (math.abs(test - should) < tol) true else { 21 | println("approxEq test failed: found " + test + " but expected " + should + " with tolerance " + tol) 22 | false 23 | } 24 | } 25 | 26 | "1.0" should "approxEq 1.0" in { 27 | assert(approxEq(1.0, 1.0)) 28 | } 29 | 30 | import Bisect._ 31 | 32 | "findRoot(-10.0,10.0)(x => x+1.0)" should "=-1.0" in { 33 | assert(approxEq(findRoot(-10.0, 10.0)(x => x + 1.0), -1.0)) 34 | } 35 | 36 | "findRoot(-5.0, 10.0)(x => 2.0 - x)" should "=2.0" in { 37 | assert(approxEq(findRoot(-5.0, 10.0)(x => 2.0 - x), 2.0)) 38 | } 39 | 40 | "findRoot(0.0, 5.0)(x => x - 1.0)" should "= 1.0" in { 41 | assert(approxEq(findRoot(0.0, 5.0)(x => x - 1.0), 1.0)) 42 | 43 | } 44 | 45 | "findRoot(0.0, 2.0)(x => (x + 1.0) * (x - 1.0))" should "= 1.0" in { 46 | assert(approxEq(findRoot(0.0, 2.0)(x => (x + 1.0) * (x - 1.0)), 1.0)) 47 | } 48 | 49 | "findRoot(-2.0, 0.0)(x => (x + 1.0) * (x - 1.0))" should "= -1.0" in { 50 | assert(approxEq(findRoot(-2.0, 0.0)(x => (x + 1.0) * (x - 1.0)), -1.0)) 51 | } 52 | 53 | "findRoot(0.0, 2.0)(x => x * x - 2.0)" should "= math.sqrt(2.0)" in { 54 | assert(approxEq(findRoot(0.0, 2.0)(x => x * x - 2.0), math.sqrt(2.0))) 55 | } 56 | 57 | } 58 | 59 | /* eof */ 60 | 61 | -------------------------------------------------------------------------------- /exercises/bisection/Readme.md: -------------------------------------------------------------------------------- 1 | # FP Basics 2 | 3 | ## Exercise: Interval bisection 4 | 5 | Implement a function to find the (approximate) *root* of a simple function of type `Double => Double` using interval bisection - that is, find the input value `x` which makes the output of the function equal zero. 6 | 7 | Your function should have the type signature: 8 | 9 | ```scala 10 | findRoot(low: Double, high: Double)(f: Double => Double): Double 11 | ``` 12 | 13 | You may *assume* that the sign of `f(low)` is different to the sign of `f(high)` - do not test or check for this in your code (yet) - we will come back to this later. Similarly, just *assume* that `low < high`. 14 | 15 | The function should be recursive, evaluating the function at the given end points and mid-point, and then calling itself on a smaller interval where the function changes sign. 16 | 17 | You will obviously need some kind of termination criterion for the recursion. 18 | 19 | Just find a single root. *Do not* worry about identifying or tracking multiple roots. 20 | 21 | Some test cases are given below. Note that since your method is approximate, you can only expect approximate equality. 22 | 23 | ```scala 24 | findRoot(-10.0,10.0)(x => x+1.0) == -1.0 25 | 26 | findRoot(-5.0,10.0)(x => 2.0-x) == 2.0 27 | 28 | findRoot(0.0,5.0)(x => x-1.0) == 1.0 29 | 30 | findRoot(0.0,2.0)(x => (x+1.0)*(x-1.0)) == 1.0 31 | 32 | findRoot(-2.0,0.0)(x => (x+1.0)*(x-1.0)) == -1.0 33 | 34 | findRoot(0.0,2.0)(x => x*x-2.0) == math.sqrt(2.0) 35 | ``` 36 | 37 | This directory contains a template for a Scala implementation, including the above test cases. Run the `~test` task from `sbt` to check your implementation. 38 | 39 | **Optional:** experts may want to consider using continuation passing to avoid repeated evaluation of the function at the same values. 40 | 41 | 42 | -------------------------------------------------------------------------------- /examples/C6-Smile/src/main/scala/smile.scala: -------------------------------------------------------------------------------- 1 | /* 2 | smile.scala 3 | 4 | Testing the use of Smile as a Scala library for data analysis 5 | 6 | */ 7 | 8 | object SmileApp { 9 | 10 | 11 | def main(args: Array[String]): Unit = { 12 | println("Hi") 13 | val url = "http://archive.ics.uci.edu/ml/machine-learning-databases/00243/yacht_hydrodynamics.data" 14 | val fileName = "yacht.csv" 15 | 16 | // download the file to disk if it hasn't been already 17 | val file = new java.io.File(fileName) 18 | if (!file.exists) { 19 | println("Downloading file...") 20 | val s = new java.io.PrintWriter(file) 21 | s.write("LongPos,PrisCoef,LDR,BDR,LBR,Froude,Resist\n") 22 | val data = scala.io.Source.fromURL(url).getLines 23 | data.foreach(l => s.write(l.trim.split(' ').filter(_ != "").mkString("",",","\n"))) 24 | s.close 25 | println("File downloaded.") 26 | } 27 | 28 | println("Read the data from CSV into a DataFrame") 29 | val df = smile.read.csv(fileName) 30 | println(df) 31 | println(df.summary) 32 | 33 | println("Simple OLS regression") 34 | import smile.data.formula._ 35 | import scala.language.postfixOps 36 | val mod = smile.regression.ols("Resist" ~, df) 37 | println(mod) 38 | println(smile.regression.ols("Resist" ~ "Froude", df)) 39 | println(smile.regression.ols("Resist" ~ "Froude" + "LongPos", df)) 40 | 41 | println("Understand formula parsing...") 42 | println(buildFormula("Resist" ~).y(df)) 43 | println(buildFormula("Resist" ~).y(df).toDoubleArray) 44 | println(buildFormula("Resist" ~).matrix(df, true)) 45 | println(buildFormula("Resist" ~).matrix(df, true).toArray) 46 | println(buildFormula("Resist" ~).x(df)) 47 | println(buildFormula("Resist" ~).x(df).summary) 48 | 49 | 50 | } 51 | 52 | } 53 | 54 | // eof 55 | 56 | -------------------------------------------------------------------------------- /exercises/Spark.md: -------------------------------------------------------------------------------- 1 | # Spark 2 | 3 | ## Practical exercises 4 | 5 | ### 1. Unpack, test and configure Spark 6 | 7 | * Carefully work through the Spark chapter in the notes, unpacking, testing and configuring Spark as you go. Make sure that you can reproduce the first few examples before proceeding further. 8 | 9 | ### 2. Review some Spark documentation 10 | 11 | * The official [Spark Documentation](http://spark.apache.org/docs/2.4.5/) is pretty good. Read through the [Quick start guide](http://spark.apache.org/docs/2.4.5/quick-start.html), then quickly skim the [Programming guide](http://spark.apache.org/docs/2.4.5/rdd-programming-guide.html), then the [ML guide](http://spark.apache.org/docs/2.4.5/ml-guide.html), especially the section on [Classification and regression](http://spark.apache.org/docs/2.4.5/ml-classification-regression.html). Briefly familiarise yourself with the [API docs](http://spark.apache.org/docs/2.4.5/api/scala/index.html#org.apache.spark.package). 12 | 13 | ### 3. Logistic regression for the SpamBase dataset 14 | 15 | * This exercise will be concerned with analysis of the old SpamBase dataset. After skimming the documentation: 16 | * ftp://ftp.ics.uci.edu/pub/machine-learning-databases/spambase/ download the dataset: 17 | * ftp://ftp.ics.uci.edu/pub/machine-learning-databases/spambase/spambase.data 18 | to your machine and move it somewhere sensible for subsequent analysis. It actually isn't very big, so don't worry about size/memory issues. 19 | * The data is a simple CSV file, so can be parsed easily with Spark's built-in CSV parser. Write a Spark shell script to read the data and fit a simple logistic regression model for the final column (Spam or not) given the other variables. 20 | * Use Lasso regression to shrink out some of the variables. Choose your Lasso regularisation parameter by cross-validation. How many of the 57 predictor variables drop out of the regression in this case? 21 | * Create a Spark application for this analysis, package it, and submit it to Spark using `spark-submit`. 22 | 23 | 24 | #### eof 25 | -------------------------------------------------------------------------------- /examples/C6-Rainier/src/main/scala/rainier.scala: -------------------------------------------------------------------------------- 1 | /* 2 | rainier.scala 3 | 4 | Simple Rainier logistic regression example 5 | 6 | */ 7 | 8 | object RainierLogRegApp { 9 | 10 | import com.stripe.rainier.core._ 11 | import com.stripe.rainier.compute._ 12 | import com.stripe.rainier.sampler._ 13 | import com.stripe.rainier.notebook._ 14 | import com.cibo.evilplot._ 15 | import com.cibo.evilplot.plot._ 16 | 17 | def main(args: Array[String]): Unit = { 18 | 19 | // first simulate some data from a logistic regression model 20 | implicit val rng = ScalaRNG(3) 21 | val N = 1000 22 | val beta0 = 0.1 23 | val beta1 = 0.3 24 | val x = (1 to N) map { _ => 25 | 3.0 * rng.standardNormal 26 | } 27 | val theta = x map { xi => 28 | beta0 + beta1 * xi 29 | } 30 | def expit(x: Double): Double = 1.0 / (1.0 + math.exp(-x)) 31 | val p = theta map expit 32 | val yb = p map (pi => (rng.standardUniform < pi)) 33 | val y = yb map (b => if (b) 1L else 0L) 34 | println(y.take(10)) 35 | println(x.take(10)) 36 | 37 | // now build Rainier model 38 | val b0 = Normal(0, 5).latent 39 | val b1 = Normal(0, 5).latent 40 | val model = Model.observe(y, Vec.from(x).map{xi => 41 | val theta = b0 + b1*xi 42 | val p = 1.0 / (1.0 + (-theta).exp) 43 | Bernoulli(p) 44 | }) 45 | 46 | // now sample from the model 47 | val sampler = EHMC(warmupIterations = 2000, iterations = 1000) 48 | println("Sampling...\nthis can take a while...") 49 | val bt = model.sample(sampler) 50 | println("Finished sampling.") 51 | val b0t = bt.predict(b0) 52 | println(b0t.sum/b0t.length) 53 | show("b0", density(b0t)) // only works in Jupyter and mdoc 54 | val b1t = bt.predict(b1) 55 | println(b1t.sum/b1t.length) 56 | show("b1", density(b1t)) // only works in Jupyter and mdoc 57 | displayPlot(density(b0t).render()) // hack for app/repl 58 | displayPlot(density(b1t).render()) // hack for app/repl 59 | 60 | } 61 | 62 | } 63 | 64 | // eof 65 | 66 | -------------------------------------------------------------------------------- /exercises/Collections.md: -------------------------------------------------------------------------------- 1 | # Collections 2 | 3 | ## Practical exercises 4 | 5 | Exercises following the material presented in Chapter 3 6 | 7 | ### 1. Review official documentation 8 | 9 | Briefly review the official [collections overview](http://docs.scala-lang.org/overviews/collections/overview.html), concentrating in particular on [immutable collection classes](http://docs.scala-lang.org/overviews/collections/concrete-immutable-collection-classes.html), and also the [parallel collections overview](http://docs.scala-lang.org/overviews/parallel-collections/overview.html). Try some code examples in a REPL. 10 | 11 | ### 2. Computing the sample mean and standard deviation 12 | 13 | a. By copying the `app-template` directory (or otherwise), create a new Scala SBT project. Write a function with signature 14 | ```scala 15 | meanAndSD(x: Vector[Double]): (Double, Double) 16 | ``` 17 | which returns a tuple containing the [sample mean](http://mathworld.wolfram.com/SampleMean.html) and [sample standard deviation](https://en.wikipedia.org/wiki/Standard_deviation) of the collection of numbers. 18 | 19 | b. When you get it working, write some tests to check it works on a few trivial examples. 20 | 21 | c. Generalise it so that it works for any collection of `Doubles`, and check that it works for parallel as well as serial collections. 22 | 23 | d. Test your function on huge collections of random *U(0,1)* quantities. What should the true mean and standard devaition be? Can you detect a difference in speed between the serial and parallel versions? 24 | 25 | e. (optional) You have probably written this code so that it computes the mean and SD using two passes over the data. Can you figure out a way to implement it using just a single pass? 26 | 27 | f. (optional) You have probably completed task e. using a sequential fold which can not easily be parallelised. Can you make it parallelisable by replacing your `fold` with `aggregate`. You will have to look up how `aggregate` works. 28 | 29 | 30 | ### 3. Wrap interval bisection code in an Option 31 | 32 | Starting from the code you wrote for [interval bisection](./bisection/Readme.md) previously, make it safe by wrapping it in an Option. See the [detailed instructions](option/Readme.md) for further information. 33 | 34 | 35 | #### eof 36 | 37 | -------------------------------------------------------------------------------- /exercises/Monte.md: -------------------------------------------------------------------------------- 1 | # Monte Carlo methods 2 | 3 | ## Practical exercises 4 | 5 | ### 1. Simple Monte Carlo 6 | 7 | A mixture random variable is constructed as a `Binomial` random quantity with sample size taken from a `Poisson` distribution with mean 20 and success probability drawn independently from a `Beta(4,4)` distribution. 8 | 9 | * Monadically, or otherwise, construct a function for drawing samples from this random variable. Note that fresh `Poisson` and `Beta` draws are required for each `Binomial` draw. 10 | * Take 10,000 draws and plot the distribution. 11 | * Average the draws to get an empirical estimate of the mean of this random variable. 12 | * What is the theoretical mean? 13 | 14 | ### 2. Bayesian inference for a normal random sample 15 | 16 | Consider a vector of iid sample observations `x` from a Gaussian distribution with unknown mean and variance. We can define a log-likelihood function with 17 | ```scala 18 | import breeze.stats.distributions.Gaussian 19 | import scala.collection.GenSeq 20 | def ll(x: GenSeq[Double])(mean: Double,stdev: Double): Double = { 21 | val gau = Gaussian(mean,stdev) 22 | x map (gau.logPdf) reduce (_+_) 23 | } 24 | ``` 25 | 26 | * Assuming a flat prior the log-posterior is the log-likelihood. In this case, write a Metropolis sampler to sample from the posterior distribution by using the log-posterior as the log-target. For a proposal kernel, use a bivariate normal distribution, constructed using the `MultivariateGaussian` distribution in Breeze. Centre the proposal on the current value, and use a proposal variance matrix which is a scaled version of the 2x2 identity matrix. Start off with a scaling of 1. 27 | * Test your implementation on simulated data by conditioning on a large `x` sampled with a mean and variance you know. Manually tune the scaling factor of your Metropolis algorithm to get reasonable mixing. Check that the posterior mean and standard deviation are close to the true values. 28 | * I deliberately parameterised the log likelihood with a `GenSeq`. Run your MCMC algorithm in parallel by passing in `x.par` instead of `x`. Time the runs to see what speed-up (if any) you get. You will probably only get significant speed-up for large `x` (for me, the parallel version is significantly quicker for a sample size of 10k). 29 | 30 | 31 | 32 | 33 | #### eof 34 | 35 | -------------------------------------------------------------------------------- /examples/C6-Rainier/docs/LogisticRegression.md: -------------------------------------------------------------------------------- 1 | # Logistic regression 2 | 3 | We will walk through a logistic regression example in Rainier. First some imports. 4 | 5 | ```scala mdoc 6 | import com.stripe.rainier.core._ 7 | import com.stripe.rainier.compute._ 8 | import com.stripe.rainier.sampler._ 9 | import com.stripe.rainier.notebook._ 10 | import com.cibo.evilplot._ 11 | import com.cibo.evilplot.plot._ 12 | ``` 13 | 14 | Now simulate some synthetic data from a logistic regression model that we can used to test our inference algorithm. 15 | ```scala mdoc 16 | implicit val rng = ScalaRNG(3) 17 | val N = 1000 18 | val beta0 = 0.1 19 | val beta1 = 0.3 20 | val x = (1 to N) map { _ => 21 | 3.0 * rng.standardNormal 22 | } 23 | val theta = x map { xi => 24 | beta0 + beta1 * xi 25 | } 26 | def expit(x: Double): Double = 1.0 / (1.0 + math.exp(-x)) 27 | val p = theta map expit 28 | val yb = p map (pi => (rng.standardUniform < pi)) 29 | val y = yb map (b => if (b) 1L else 0L) 30 | println(y.take(10)) 31 | println(x.take(10)) 32 | ``` 33 | Now we have some data, we can build a Rainier model. 34 | ```scala mdoc 35 | val b0 = Normal(0, 5).latent 36 | val b1 = Normal(0, 5).latent 37 | val model = Model.observe(y, Vec.from(x).map{xi => 38 | val theta = b0 + b1*xi 39 | val p = 1.0 / (1.0 + (-theta).exp) 40 | Bernoulli(p) 41 | }) 42 | ``` 43 | This completes specification of the Bayesian model. We now need to sample from the implied posterior distribution. 44 | ```scala mdoc 45 | val sampler = EHMC(warmupIterations = 2000, iterations = 1000) 46 | println("Sampling...\nthis can take a while...") 47 | val bt = model.sample(sampler) 48 | println("Finished sampling.") 49 | val b0t = bt.predict(b0) 50 | println(b0t.sum/b0t.length) 51 | ``` 52 | We can plot the marginal posteriors using `show`, which works in both mdoc and Jupyter notebooks, but doesn't currently work from the Scala REPL. 53 | ```scala mdoc:image:b0.png 54 | show("b0", density(b0t)) // only works in Jupyter and mdoc 55 | ``` 56 | 57 | ```scala mdoc 58 | val b1t = bt.predict(b1) 59 | println(b1t.sum/b1t.length) 60 | ``` 61 | 62 | ```scala mdoc:image:b1.png 63 | show("b1", density(b1t)) // only works in Jupyter and mdoc 64 | ``` 65 | So we see that mdoc documents provide a nice way to document Rainier modelling workflows, similar to the way people often document R workflows using R Markdown. 66 | -------------------------------------------------------------------------------- /SelfStudyGuide.md: -------------------------------------------------------------------------------- 1 | # Self Study Guide 2 | 3 | This course is currently configured to be delivered as a (very!) intensive three-day short course, covering three chapters of notes each day. If you like what you see here, please consider signing up for the next iteration - see the [front page](README.md) for details. 4 | 5 | However, since all essential materials are now freely available on-line, it is perfectly possible to self-study this course. Use the hashtag `#scscala` when discussing this course on-line to allow others to engage with you. 6 | 7 | Although I deliver this material in three days, it is not realistic to cover this material in three days of self-study. Even if you have the luxury of being able to study this course full-time, you should allow one full day per chapter. In other words, you should allow roughly two weeks to cover the full course, based on more-or-less full-time study. 8 | 9 | In the more typical case where you are studying this course on top of full-time study or employment, covering one chapter per week is probably more realistic. This will make for a nine-week course, covering material at roughly the same rate as a MOOC such as Coursera. 10 | 11 | However you study the course, the plan of study should be roughly the same. For each Chapter: 12 | 13 | 1. Read the [course notes](https://github.com/darrenjw/scala-course/raw/master/scscala.pdf) for the Chapter (one Chapter only) 14 | 2. Run the code examples from the Chapter. If you don't like typing, copy-and-paste code examples from the [fragments](fragments/Readme.md) directory. Note that copying-and-pasting from the PDF of the course notes doesn't work well. 15 | 3. Inspect and run all of the [examples](examples/) associated with the Chapter. 16 | 4. Work through all of the [exercises](exercises/Readme.md) associated with the Chapter. 17 | 5. Don't move on to the next Chapter until you have had a *serious* attempt at all of the end-of-Chapter exercises. 18 | 19 | You learn programming by *programming* and not by reading. Although it is tempting to just read through the notes and skip everything else, if you do this you are likely to get to the end feeling like you've sort-of understood everything but not actually be able to sit down and write code. 20 | 21 | Further information, including [laptop setup instructions](Setup.md), can be obtained from the [start here](StartHere.md) page. 22 | 23 | #### eof 24 | 25 | -------------------------------------------------------------------------------- /examples/C5-MonteCarlo/src/main/scala/monte-carlo.scala: -------------------------------------------------------------------------------- 1 | /* 2 | monte-carlo.scala 3 | Integration via rejection sampling 4 | Integrate the standard normal PDF from -5 to 5 to get an estimate close to 1... 5 | Simulate points uniformly over a bounding box and look at fraction of points 6 | falling under the PDF 7 | */ 8 | 9 | import scala.math._ 10 | import breeze.stats.distributions.Uniform 11 | import breeze.linalg._ 12 | import scala.annotation.tailrec 13 | 14 | object MonteCarlo { 15 | 16 | def f(x: Double): Double = math.exp(-x * x / 2) / math.sqrt(2 * Pi) 17 | 18 | // Idiomatic Breeze solution 19 | def mc1(its: Int): Int = { 20 | val x = runif(its, -5.0, 5.0) 21 | val y = runif(its, 0.0, 0.5) 22 | val fx = x map { f(_) } 23 | sum((y <:< fx) map { xi => if (xi == true) 1 else 0 }) 24 | } 25 | 26 | // Fast, memory-efficient tail call 27 | def mc2(its: Long): Long = { 28 | @tailrec def mc(its: Long, acc: Long): Long = { 29 | if (its == 0) acc else { 30 | val x = runif(-5.0, 5.0) 31 | val y = runif(0.0, 0.5) 32 | if (y < f(x)) mc(its - 1, acc + 1) else mc(its - 1, acc) 33 | } 34 | } 35 | mc(its, 0) 36 | } 37 | 38 | // Parallel version 39 | def mc3(its: Long,NP: Int = 8): Long = { 40 | val N = its / NP // assuming NP | its 41 | (1 to NP).par.map { x => mc2(N) }.sum 42 | } 43 | 44 | // R-like functions for Uniform random numbers 45 | def runif(n: Int, l: Double, u: Double) = DenseVector[Double](Uniform(l, u).sample(n).toArray) 46 | def runif(l: Double, u: Double) = Uniform(l, u).draw 47 | 48 | // Function for timing 49 | def time[A](f: => A) = { 50 | val s = System.nanoTime 51 | val ret = f 52 | println("time: " + (System.nanoTime - s) / 1e6 + "ms") 53 | ret 54 | } 55 | 56 | // Main method for running the code 57 | def main(args: Array[String]) = { 58 | val N = 10000000 // 10^7 is as big as mc1() can really cope with 59 | println("Running with " + N + " iterations") 60 | println("Idiomatic vectorised solution") 61 | time { println(5.0 * mc1(N) / N) } 62 | println("Fast efficient (serial) tail call") 63 | time { println(5.0 * mc2(N) / N) } 64 | println("Parallelised version") 65 | time { println(5.0 * mc3(N) / N) } 66 | println("Vary size of parallel collection") 67 | (1 to 12).foreach{ i => 68 | println("NP = "+i) 69 | time(mc3(N,i)) 70 | } 71 | println("Done") 72 | } 73 | 74 | 75 | 76 | } 77 | 78 | -------------------------------------------------------------------------------- /StartHere.md: -------------------------------------------------------------------------------- 1 | # Start Here 2 | 3 | ## Main jump-off page for the Scala for Statistical Computing and Data Science Short Course 4 | 5 | Course participants should bookmark this page: https://github.com/darrenjw/scala-course/blob/master/StartHere.md 6 | 7 | * [Course outline](README.md) - front page of the repo, with brief summary overview 8 | * [Setup instructions](Setup.md) - details of how to set up your laptop for programming in Scala. Please follow these instructions carefully *in advance of the start of the course*. 9 | 10 | Registered course participants should not print the [**course notes**](https://github.com/darrenjw/scala-course/raw/master/scscala.pdf), as a printed copy of the latest version will be given to participants at the start of the course. Others are welcome to self-study this course - please see the [self-study guide](SelfStudyGuide.md). Use the hashtag `#scscala` for discussing the course and the course notes on Twitter and other social media platforms. 11 | 12 | ### Rough Schedule 13 | 14 | * 9.15: Setup 15 | * 9.30 Chapter 16 | * 10.30 Exercises 17 | * 11.30 Chapter 18 | * 12.30 Lunch 19 | * 1.30 Exercises 20 | * 2.30 Chapter 21 | * 3.30 Exercises 22 | 23 | ### Resources 24 | 25 | 26 | * [Useful links](UsefulLinks.md) - selective and curated collection of some important additional on-line resources 27 | * [app-template](app-template/) - Scala sbt "seed" project, for copying and editing to create a new Scala sbt project. Minimal dependencies in the sbt build file (just Breeze). However, if you have an internet connection, it is typically better to use `sbt new darrenjw/breeze.g8` to create new project, as described in the notes. 28 | * [sbt-test](sbt-test/) - simple Scala sbt project with lots of dependencies. See the [build.sbt](sbt-test/build.sbt) for list of dependencies. Primarily for testing correct installation of sbt and caching of commonly required dependencies. Also useful for spinning up a REPL (`sbt console`) with lots of dependencies for interactive experiments. The [src/test](sbt-test/src/test/scala/) subdirectory tree contains some basic examples of how to write test code. 29 | 30 | * [Fragments](fragments/Readme.md) - raw fragments of code from the course notes, auto-extracted by chapter 31 | * [Examples](examples/) - complete runnable code examples, split corresponding to each chapter of the course notes 32 | * [Exercises](exercises/Readme.md) - simple programming exercises, to be tackled following the presentation of each chapter of the notes. 33 | 34 | 35 | 36 | #### eof 37 | 38 | 39 | -------------------------------------------------------------------------------- /exercises/Breeze.md: -------------------------------------------------------------------------------- 1 | # Breeze 2 | 3 | ## Practical exercises 4 | 5 | #### Useful links: 6 | 7 | * [Breeze](https://github.com/scalanlp/breeze/) 8 | * [Wiki](https://github.com/scalanlp/breeze/wiki) 9 | * [API Docs](http://www.scalanlp.org/api/breeze/) 10 | 11 | ### 1. Review the on-line documentation 12 | 13 | Begin by reading through the [quickstart guide](https://github.com/scalanlp/breeze/wiki/Quickstart) and then read through the [linear algebra cheat sheet](https://github.com/scalanlp/breeze/wiki/Linear-Algebra-Cheat-Sheet). Then quickly check a few other pages on the [Breeze wiki](https://github.com/scalanlp/breeze/wiki). Finally, have a quick look at the [API docs](http://www.scalanlp.org/api/breeze/) - for example, search the docs for `Gamma` and see how Breeze parameterises the gamma distribution. Note that the docs are often very terse, so sometimes there's no alternative than to browse the [source code](https://github.com/scalanlp/breeze/tree/master/math/src/main/scala/breeze). Also, the [test code](https://github.com/scalanlp/breeze/tree/master/math/src/test/scala/breeze) can sometimes be useful for figuring out how to use a Breeze function. 14 | 15 | ### 2. Multivariate normal 16 | 17 | * Write a function with type signature 18 | ```scala 19 | rmvn(n: Int, mean: DenseVector[Double], cov: DenseMatrix[Double]): DenseMatrix[Double] 20 | ``` 21 | which returns a matrix with `n` rows, each row representing an iid draw from a multivariate normal with the given mean and variance matrix. Note that this can be accomplished by *post*-multiplying a matrix of iid *N(0,1)* random quantities by the *upper* Cholesky factor of the variance matrix (on the right), and then adding the mean to each row of the result (don't use the built-in Breeze function for simulating multivariate Gaussians unless you're stuck). Study my [PCA example](../examples/C4-PCA/src/main/scala/pca.scala) for ideas. 22 | * How can you test your code to ensure that you have implemented it correctly? See the [gamma testing](../examples/C4-GammaTest/src/main/scala/gamma-test.scala) example for clues. Also, `breeze.stats.covmat` may be of use. 23 | 24 | ### 3. Scatter-plot 25 | 26 | Write a function with type signature 27 | ```scala 28 | pairs(mat: DenseMatrix[Double]): Figure 29 | ``` 30 | which produces a scatterplot matrix similar to that produced by the `pairs()` function in R. eg. for a matrix with `k` columns, the function should plot a `k * k` array of scatter plots showing each variable against each other. Test your code on some simulated data generated using your `rmvn` function. 31 | 32 | 33 | #### eof 34 | -------------------------------------------------------------------------------- /exercises/Intro.md: -------------------------------------------------------------------------------- 1 | # Introduction 2 | 3 | ## Practical exercises 4 | 5 | These exercises are to be undertaken following presentation of the material from Chapter 1 of the course notes. 6 | 7 | 8 | ### 1. SBT and editor setup 9 | 10 | Make sure that you have this repo cloned/downloaded on your system, that SBT is installed and working, and that you have a usable editor/IDE. Check through the [laptop set-up instructions](../Setup.md) and make sure you have done everything required. In particular, make sure that typing `sbt run` from the `sbt-test` directory correctly runs the test script. 11 | 12 | ### 2. Explore the repo 13 | 14 | Explore some of the directories in this repo. In particular, find the code fragments automatically extracted from each chapter, and the complete runnable examples. Examine the complete runnable `HelloWorld` example from Chapter 1. Type `sbt run` from the relevant directory to compile and run it. Note that no `build.sbt` file or fancy directory structure is required for a simple single-file project with no dependencies. 15 | 16 | ### 3. Create your own Scala SBT project 17 | 18 | Try *not* to run SBT from the `app-template` directory in order to keep it clean. Copy that directory (and its contents) somewhere on your system and create your own SBT project. Just copy the source code file for the `HelloWorld` example into the correct source code sub-directory and check that it works by first running `sbt` and then `run` from the SBT prompt. Open up the source code in your editor/IDE and edit the message that is printed, save, then `run` again from SBT. Then type `~run` in SBT, go back to your editor and change the message again. As you save the buffer, note that SBT detects that the source file has changed and automatically re-compiles and re-runs the project. 19 | 20 | ### 4. Use the REPL 21 | 22 | From the SBT prompt, type `console` to get a REPL. Enter `1+2` to check it works. 23 | 24 | ### 5. Scala basics tour 25 | 26 | Start working through the [basic tour](https://docs.scala-lang.org/tour/tour-of-scala.html) from the official [Scala documentation](http://docs.scala-lang.org/). When you get to the appropriate point in the tour, open Scala Fiddle in another browser tab and interactively explore Scala in the browser. Try to understand as much as possible as you go along. You should only attempt the first two or three sections for now. If you get these finished, browse some of the other official Scala documentation. You will want to bookmark this material to return to and work through some additional sections later. 27 | 28 | 29 | #### eof 30 | 31 | 32 | -------------------------------------------------------------------------------- /examples/C4-PCA/src/main/scala/pca.scala: -------------------------------------------------------------------------------- 1 | /* 2 | pca.scala 3 | 4 | PCA for the dataset: 5 | 6 | http://archive.ics.uci.edu/ml/datasets/Iris 7 | 8 | from the Machine learning repository: 9 | 10 | http://archive.ics.uci.edu/ml/datasets.html 11 | 12 | */ 13 | 14 | import breeze.linalg._ 15 | import breeze.stats._ 16 | 17 | object PCA { 18 | 19 | case class Pca(mat: DenseMatrix[Double]) { 20 | // via SVD of the centred data matrix 21 | val xBar = mean(mat(::,*)).t 22 | val x = mat(*,::) - xBar 23 | val SVD = svd.reduced(x) 24 | val loadings = SVD.Vt.t 25 | val sdev = SVD.S / math.sqrt(x.rows - 1) 26 | lazy val scores = x * loadings 27 | } 28 | 29 | // Main runner method 30 | def main(args: Array[String]): Unit = { 31 | 32 | val url = "http://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data" 33 | val fileName = "iris.csv" 34 | val imap = Map( 35 | "Iris-setosa" -> 0, 36 | "Iris-versicolor" -> 1, 37 | "Iris-virginica" -> 2 38 | ) 39 | 40 | // download the file to disk if it hasn't been already 41 | val file = new java.io.File(fileName) 42 | if (!file.exists) { 43 | val s = new java.io.PrintWriter(file) 44 | val data = scala.io.Source.fromURL(url).getLines 45 | data.foreach(l => s.write(l.trim.split(','). 46 | map(x=>imap.getOrElse(x,x)).mkString("",",","\n"))) 47 | s.close 48 | } 49 | 50 | // read the file from disk 51 | val mat = csvread(new java.io.File(fileName)) 52 | println("Mat Dim: " + mat.rows + " " + mat.cols) 53 | val x = mat(::,0 to 3) 54 | println("X Dim: " + x.rows + " " + x.cols) 55 | val clas = mat(::,4).toDenseVector 56 | 57 | println("PCA with built-in Breeze version (like R princomp):") 58 | val pca = new PCA(x,covmat(x)) 59 | println("Loadings:") 60 | println(pca.loadings) 61 | println("Stdev:") 62 | println(pca.sdev) 63 | println(pca.scores(0 to 5,::)) 64 | 65 | println("Now my version (like R prcomp):") 66 | val myPca = Pca(x) 67 | println(myPca.loadings) // loadings transposed 68 | println(myPca.sdev) 69 | println(myPca.scores(0 to 5,::)) 70 | 71 | // scatter plot first 2 principal components 72 | import breeze.plot._ 73 | val fig = Figure("PCA") 74 | val p = fig.subplot(0) 75 | val ind0 = (0 until x.rows) filter (i => clas(i) == 0) 76 | p += plot(myPca.scores(ind0,0).toDenseVector, 77 | myPca.scores(ind0,1).toDenseVector,'.',colorcode="blue") 78 | val ind1 = (0 until x.rows) filter (i => clas(i) == 1) 79 | p += plot(myPca.scores(ind1,0).toDenseVector, 80 | myPca.scores(ind1,1).toDenseVector,'.',colorcode="red") 81 | val ind2 = (0 until x.rows) filter (i => clas(i) == 2) 82 | p += plot(myPca.scores(ind2,0).toDenseVector, 83 | myPca.scores(ind2,1).toDenseVector,'.',colorcode="green") 84 | } 85 | 86 | } 87 | 88 | // eof 89 | 90 | -------------------------------------------------------------------------------- /exercises/option/Readme.md: -------------------------------------------------------------------------------- 1 | # Collections 2 | 3 | ## Exercise: Wrapping a root-finder in an Option 4 | 5 | ### Part A 6 | 7 | Copy your previous `findRoot` function from the [previous exercise](../bisection/Readme.md), and add a new function `findRootOpt` which wraps it, so that instead of returning a `Double` it returns `Option[Double]`. The new signature is: 8 | 9 | ```scala 10 | findRootOpt(low: Double, high: Double)(f: Double => Double): Option[Double] 11 | ``` 12 | 13 | Add checks that `low < high` and that the sign of `f(low)` is different from the sign of `f(high)` and return `None` if either check fails. Otherwise your function should behave as previously, returning the root in a `Some`. 14 | 15 | All of the previous test case translate obviously as follows: 16 | 17 | ```scala 18 | findRootOpt(-10.0,10.0)(x => x+1.0) == Some(-1.0) 19 | 20 | findRootOpt(-5.0,10.0)(x => 2.0-x) == Some(2.0) 21 | 22 | findRootOpt(0.0,5.0)(x => x-1.0) == Some(1.0) 23 | 24 | findRootOpt(0.0,2.0)(x => (x+1.0)*(x-1.0)) == Some(1.0) 25 | 26 | findRootOpt(-2.0,0.0)(x => (x+1.0)*(x-1.0)) == Some(-1.0) 27 | 28 | findRootOpt(0.0,2.0)(x => x*x-2.0) == Some(math.sqrt(2.0)) 29 | ``` 30 | 31 | In addition, we can add some new test cases which test the inital assumptions: 32 | 33 | ```scala 34 | findRootOpt(2.0,0.0)(x => x-1.0) == None 35 | 36 | findRootOpt(-1.0,-3.0)(x => x+2.0) == None 37 | 38 | findRootOpt(0.0,2.0)(x => x+1.0) == None 39 | 40 | findRootOpt(0.0,2.0)(x => x-5.0) == None 41 | 42 | ``` 43 | 44 | Again, these test cases are all included in the associated Scala template in this directory, and can be run with the `~testOnly PartA` task in `sbt`. 45 | 46 | 47 | ### Part B (if time permits) 48 | 49 | The quadratic curve `y = a*x*x` for any fixed `a > 0` intersects the unit circle `x*x + y*y = 1` exactly once for `0 <= x <= 1`. Our task is to use our function `findRootOpt` to find this `x`. 50 | 51 | Using just a tiny bit of maths, we can write the solution to this problem as the solution to the triangular system: 52 | 53 | ```scala 54 | y - a*(1-y*y) = 0 55 | 56 | x*x + y*y -1 = 0 57 | 58 | ``` 59 | 60 | The left hand side of first equation will clearly be negative at `y=0` and positive at `y=1`. Then for `0 <= y <= 1`, the left hand side of the second equation will be negative at `x=0` and positive at `x=1`. 61 | 62 | Write a function, `solveQuad`, which accepts a value `a`, and uses a for-expression with `findRootOpt` to obtain the solution for `x`. It should have signature: 63 | 64 | ```scala 65 | solveQuad(a: Double): Option[Double] 66 | ``` 67 | 68 | We can test this function by picking an `a`, solving for `x`, computing `y = a*x*x`, then checking whether `x*x + y*y = 1`. Some example tests are included in the Scala template in this directory. 69 | 70 | You can run all tests for Part A and Part B with the `~test` task in `sbt`, or just the specific tests for Part B with `~testOnly PartB`. 71 | 72 | 73 | -------------------------------------------------------------------------------- /UsefulLinks.md: -------------------------------------------------------------------------------- 1 | # Useful Links 2 | 3 | ## A curated set of links to useful additional on-line resources 4 | 5 | * [Official Scala website](http://www.scala-lang.org/) 6 | * [Documentation](http://docs.scala-lang.org/) 7 | * [Getting started](https://docs.scala-lang.org/getting-started/) 8 | * [IntelliJ](https://docs.scala-lang.org/getting-started/intellij-track/getting-started-with-scala-in-intellij.html) 9 | * [Scaladoc](http://docs.scala-lang.org/overviews/scaladoc/overview.html) 10 | * [API Docs (2.12.10)](https://www.scala-lang.org/api/2.12.10/) 11 | * [Scala Exercises](https://www.scala-exercises.org/) 12 | * [Scala tutorial](https://www.scala-exercises.org/scala_tutorial/) 13 | * [Standard library](https://www.scala-exercises.org/std_lib/) 14 | * [sbt](http://www.scala-sbt.org/) - build tool 15 | * [giter8 templates](https://github.com/foundweekends/giter8/wiki/giter8-templates) 16 | * [Breeze](https://github.com/scalanlp/breeze/) - numerical computing library 17 | * [Wiki](https://github.com/scalanlp/breeze/wiki) 18 | * [Quickstart](https://github.com/scalanlp/breeze/wiki/Quickstart) 19 | * [Linear algebra cheat sheet](https://github.com/scalanlp/breeze/wiki/Linear-Algebra-Cheat-Sheet) 20 | * [API Docs](http://www.scalanlp.org/api/breeze/) 21 | * [Spire](https://typelevel.org/spire) - numeric types library 22 | * [Smile](http://haifengl.github.io/) - basic stats and ML 23 | * [Rainier](https://rainier.fit/) - Bayesian modelling and probabilistic programming 24 | * [EvilPlot](https://cibotech.github.io/evilplot/) - plotting library 25 | * [Mdoc](https://scalameta.org/mdoc/) - typechecked Markdown for Scala 26 | * [ScalaTest](http://www.scalatest.org/) - popular unit testing library 27 | * [ScalaCheck](https://www.scalacheck.org/) - property-based testing 28 | * [Apache Spark](http://spark.apache.org/) - big data framework 29 | * [Downloads](http://spark.apache.org/downloads.html) 30 | * [Documentation](http://spark.apache.org/docs/latest/) 31 | * [Quick start](http://spark.apache.org/docs/latest/quick-start.html) 32 | * [RDD Programming guide](http://spark.apache.org/docs/latest/rdd-programming-guide.html) 33 | * [SQL, DataFrames and Datasets](http://spark.apache.org/docs/latest/sql-programming-guide.html) 34 | * [MLlib](http://spark.apache.org/docs/latest/ml-guide.html) 35 | * [ML Pipelines](http://spark.apache.org/docs/latest/ml-pipeline.html) 36 | * [Classification and regression](http://spark.apache.org/docs/latest/ml-classification-regression.html) 37 | * [API Docs](http://spark.apache.org/docs/latest/api/scala/) 38 | * [Cats](http://typelevel.org/cats/) 39 | * [Type classes](http://typelevel.org/cats/typeclasses.html) 40 | * [Data types](http://typelevel.org/cats/datatypes.html) 41 | * [API Docs](http://typelevel.org/cats/api/cats/) 42 | * [Simulacrum](https://github.com/typelevel/simulacrum) - type class support 43 | 44 | * [Darren's Scala links](https://github.com/darrenjw/djwhacks/blob/master/scala/ScalaLinks.md) - a much less selective and less well curated set of Scala links 45 | 46 | #### eof 47 | 48 | 49 | -------------------------------------------------------------------------------- /ScalaIDE.md: -------------------------------------------------------------------------------- 1 | # Installing the Scala IDE 2 | 3 | ### N.B. I'm leaving this page for historical reasons, but the Scala IDE is now considered obsolete 4 | 5 | 6 | ## Useful links 7 | 8 | * [ScalaIDE](http://scala-ide.org/) - based on Eclipse 9 | * [Download](http://scala-ide.org/download/sdk.html) 10 | * [Documentation](http://scala-ide.org/documentation.html) 11 | * [sbteclipse](https://github.com/typesafehub/sbteclipse) - sbt plugin for eclipse 12 | * [Documentation](https://github.com/typesafehub/sbteclipse/wiki) 13 | * [Installation](https://github.com/typesafehub/sbteclipse/wiki/Installing-sbteclipse) 14 | * [User guide](https://github.com/typesafehub/sbteclipse/wiki/Using-sbteclipse) 15 | 16 | ## Installation 17 | 18 | The ScalaIDE is based on Eclipse, which is a JVM application, and is therefore easy to install as a user without admin/root privilages. 19 | 20 | **IMPORTANT** *As we are using Scala 2.12.1 for this course, it is necessary to use a Scala IDE from the 4.6.x series. The 4.5.x series does not have proper support for Scala 2.12.* 21 | 22 | From the [download site](http://scala-ide.org/download/sdk.html), select the version of the IDE for your OS. Unpack this in a convenient place on your system and follow any installation instructions. Running it should be a simple matter of running the `eclipse` executable in the top-level directory. See the [Documentation](http://scala-ide.org/documentation.html) for further details. 23 | 24 | To use the ScalaIDE with sbt projects, you must also install the eclipse plugin for sbt, `sbteclipse`. This should be as simple as adding the line: 25 | ```scala 26 | addSbtPlugin("com.typesafe.sbteclipse" % "sbteclipse-plugin" % "5.1.0") 27 | ``` 28 | to your `~/.sbt/0.13/plugins/plugins.sbt` file. Create this file if you don't already have it. See the [sbteclipse](https://github.com/typesafehub/sbteclipse) page for further details. 29 | 30 | ## Using the ScalaIDE with sbt projects 31 | 32 | The main thing to understand is that the ScalaIDE needs to know about the structure of your sbt project. This information is encoded in Eclipse project files in the top-level directory of your sbt project (where the file `build.sbt` will often be present). An initial set of project files for an sbt project can be generated using the `eclipse` sbt task provided by the `sbteclipse` plugin. 33 | 34 | So, before using the ScalaIDE with a particular sbt project for the first time, first run 35 | ```bash 36 | sbt eclipse 37 | ``` 38 | to analyse the project and create eclipse project files for it. Then start the ScalaIDE. If it asks about a workspace, make sure you select something *different to* the sbt project directory. Then import the project using the *Import Wizard* (under the File menu) to import *Existing Projects into Workspace*. You may need to repeat this process if you make significant changes to the `build.sbt` file. 39 | 40 | Once you are up-and-running, Eclipse provides fairly sophisticated IDE functionality. Some commonly used commands include: 41 | 42 | * Shift-Ctrl-F - Reformat source file 43 | * Shift-Ctrl-W - Close all windows (from package explorer) 44 | * Shift-Ctrl-P - Go to matching bracket 45 | * Ctrl-Space - Content assist 46 | 47 | ### Scala worksheet 48 | 49 | * Shift-Ctrl-B - Re-run all code 50 | 51 | See the [ScalaIDE Documentation](http://scala-ide.org/documentation.html) for further information. 52 | 53 | 54 | 55 | 56 | #### eof 57 | 58 | 59 | 60 | -------------------------------------------------------------------------------- /examples/C6-DataFrames/datatable/datatable.scala: -------------------------------------------------------------------------------- 1 | /* 2 | datatable.scala 3 | 4 | Test of "scala-datatable" and "scala-csv" 5 | 6 | */ 7 | 8 | import java.io.{File,FileReader} 9 | import com.github.tototoshi.csv._ 10 | import com.github.martincooper.datatable._ 11 | import scala.annotation.tailrec 12 | import scala.util.Try 13 | 14 | object StringCol 15 | 16 | object DatatableTest { 17 | 18 | def readCsv(name: String, file: FileReader, colTypes: Map[String,Object]): DataTable = { 19 | val reader=CSVReader.open(file) 20 | val all=reader.allWithHeaders() 21 | reader.close() 22 | val ks=colTypes.keys 23 | val colSet=ks map {key => (key,all map {row => row(key)}) } 24 | val dataCols=colSet map {pair => colTypes(pair._1) match { 25 | case StringCol => new DataColumn[String](pair._1,pair._2) 26 | case Int => new DataColumn[Int](pair._1,pair._2 map {x=> 27 | Try(x.toInt).toOption.getOrElse(-99)}) 28 | case Double => new DataColumn[Double](pair._1,pair._2 map {x=> 29 | Try(x.toDouble).toOption.getOrElse(-99.0)}) 30 | } 31 | } 32 | DataTable(name,dataCols).get 33 | } 34 | 35 | def writeCsv(df: DataTable,out: File): Unit = { 36 | val writer = CSVWriter.open(out) 37 | writer.writeRow(df.columns.map{_.name}) 38 | df.foreach{r=>writer.writeRow(r.values)} 39 | writer.close() 40 | } 41 | 42 | 43 | def main(args: Array[String]) = { 44 | 45 | val colTypes=Map("DriveTrain" -> StringCol, 46 | "Min.Price" -> Double, 47 | "Cylinders" -> Int, 48 | "Horsepower" -> Int, 49 | "Length" -> Int, 50 | "Make" -> StringCol, 51 | "Passengers" -> Int, 52 | "Width" -> Int, 53 | "Fuel.tank.capacity" -> Double, 54 | "Origin" -> StringCol, 55 | "Wheelbase" -> Int, 56 | "Price" -> Double, 57 | "Luggage.room" -> Double, 58 | "Weight" -> Int, 59 | "Model" -> StringCol, 60 | "Max.Price" -> Double, 61 | "Manufacturer" -> StringCol, 62 | "EngineSize" -> Double, 63 | "AirBags" -> StringCol, 64 | "Man.trans.avail" -> StringCol, 65 | "Rear.seat.room" -> Double, 66 | "RPM" -> Int, 67 | "Turn.circle" -> Double, 68 | "MPG.highway" -> Int, 69 | "MPG.city" -> Int, 70 | "Rev.per.mile" -> Int, 71 | "Type" -> StringCol) 72 | val df=readCsv("Cars93",new FileReader("../r/cars93.csv"),colTypes) 73 | println(df.length,df.columns.length) 74 | 75 | val df2=df.filter(row=>row.as[Double]("EngineSize")<=4.0).toDataTable 76 | println(df2.length,df2.columns.length) 77 | 78 | val oldCol=df2.columns("Weight").as[Int] 79 | val newCol=new DataColumn[Double]("WeightKG",oldCol.data.map{_.toDouble*0.453592}) 80 | val df3=df2.columns.add(newCol).get 81 | println(df3.length,df3.columns.length) 82 | 83 | writeCsv(df3,new File("out.csv")) 84 | 85 | //println("Done") 86 | } 87 | 88 | 89 | 90 | } 91 | 92 | 93 | 94 | 95 | -------------------------------------------------------------------------------- /exercises/option/src/test/scala/option-test.scala: -------------------------------------------------------------------------------- 1 | /* 2 | ex2-test.scala 3 | 4 | Tests for Exercise 2 5 | 6 | */ 7 | 8 | import org.scalatest._ 9 | import org.scalatest.Matchers._ 10 | 11 | class PartA extends FlatSpec { 12 | 13 | "1+2" should "=3" in { 14 | assert(1 + 2 === 3) 15 | } 16 | 17 | val tol = 1.0e-8 18 | 19 | def approxEq(test: Double, should: Double): Boolean = { 20 | if (math.abs(test - should) < tol) true else { 21 | println("approxEq test failed: found " + test + " but expected " + should + " with tolerance " + tol) 22 | false 23 | } 24 | } 25 | 26 | "1.0 " should "approxEq 1.0" in { 27 | assert(approxEq(1.0, 1.0)) 28 | } 29 | 30 | import OptionBisect._ 31 | 32 | "findRootOpt(-10.0,10.0)(x => x+1.0)" should "= Some(-1.0)" in { 33 | assert(approxEq(findRootOpt(-10.0, 10.0)(x => x + 1.0).getOrElse(0.0), -1.0)) 34 | } 35 | 36 | "findRootOpt(-5.0, 10.0)(x => 2.0 - x)" should "= Some(2.0)" in { 37 | assert(approxEq(findRootOpt(-5.0, 10.0)(x => 2.0 - x).getOrElse(0.0), 2.0)) 38 | } 39 | 40 | "findRootOpt(0.0, 5.0)(x => x - 1.0)" should "= Some(1.0)" in { 41 | assert(approxEq(findRootOpt(0.0, 5.0)(x => x - 1.0).getOrElse(0.0), 1.0)) 42 | } 43 | 44 | "findRootOpt(0.0, 2.0)(x => (x + 1.0) * (x - 1.0))" should "= Some(1.0)" in { 45 | assert(approxEq(findRootOpt(0.0, 2.0)(x => (x + 1.0) * (x - 1.0)).getOrElse(0.0), 1.0)) 46 | } 47 | 48 | "findRootOpt(-2.0, 0.0)(x => (x + 1.0) * (x - 1.0))" should "= Some(-1.0)" in { 49 | assert(approxEq(findRootOpt(-2.0, 0.0)(x => (x + 1.0) * (x - 1.0)).getOrElse(0.0), -1.0)) 50 | } 51 | 52 | "findRootOpt(0.0, 2.0)(x => x * x - 2.0)" should "= Some(math.sqrt(2.0))" in { 53 | assert(approxEq(findRootOpt(0.0, 2.0)(x => x * x - 2.0).getOrElse(0.0), math.sqrt(2.0))) 54 | } 55 | 56 | "findRootOpt(2.0,0.0)(x => x-1.0)" should "= None" in { 57 | assert(findRootOpt(2.0, 0.0)(x => x - 1.0) == None) 58 | } 59 | 60 | "findRootOpt(-1.0,-3.0)(x => x+2.0)" should "= None" in { 61 | assert(findRootOpt(-1.0, -3.0)(x => x + 2.0) == None) 62 | } 63 | 64 | "findRootOpt(0.0,2.0)(x => x+1.0)" should "= None" in { 65 | assert(findRootOpt(0.0, 2.0)(x => x + 1.0) == None) 66 | } 67 | 68 | "findRootOpt(0.0,2.0)(x => x-5.0)" should "= None" in { 69 | assert(findRootOpt(0.0, 2.0)(x => x - 5.0) == None) 70 | } 71 | 72 | } 73 | 74 | class PartB extends FlatSpec { 75 | 76 | "1+2" should "=3" in { 77 | assert(1 + 2 === 3) 78 | } 79 | 80 | val tol = 1.0e-8 81 | 82 | def approxEq(test: Double, should: Double): Boolean = { 83 | if (math.abs(test - should) < tol) true else { 84 | println("approxEq test failed: found " + test + " but expected " + should + " with tolerance " + tol) 85 | false 86 | } 87 | } 88 | 89 | "1.0 " should "approxEq 1.0" in { 90 | assert(approxEq(1.0, 1.0)) 91 | } 92 | 93 | import OptionBisect._ 94 | 95 | def testX(a: Double, x: Double): Boolean = { 96 | val y = a * x * x 97 | approxEq(x * x + y * y, 1.0) 98 | } 99 | 100 | "solveQuad(0.1)" should "work" in { 101 | assert(testX(0.1, solveQuad(0.1).getOrElse(0.0))) 102 | } 103 | 104 | "solveQuad(1.0)" should "work" in { 105 | assert(testX(1.0, solveQuad(1.0).getOrElse(0.0))) 106 | } 107 | 108 | "solveQuad(10.0)" should "work" in { 109 | assert(testX(10.0, solveQuad(10.0).getOrElse(0.0))) 110 | } 111 | 112 | "solveQuad(0.01)" should "work" in { 113 | assert(testX(0.01, solveQuad(0.01).getOrElse(0.0))) 114 | } 115 | 116 | 117 | 118 | } 119 | 120 | /* eof */ 121 | 122 | -------------------------------------------------------------------------------- /exercises/Stats.md: -------------------------------------------------------------------------------- 1 | # Statistical modelling 2 | 3 | ## Practical exercises 4 | 5 | You should selectively choose from this collection of exercises according to your personal interests. 6 | 7 | ### 1. Linear regression modelling 8 | 9 | * Run the [regression example](../examples/C6-Regression/) for the [yacht hydrodynamics dataset](http://archive.ics.uci.edu/ml/datasets/Yacht+Hydrodynamics), and go through the code carefully to understand exactly how it works. 10 | * When you are happy with it, make a copy and edit it to do a regression analysis for the [airfoil self-noise dataset](http://archive.ics.uci.edu/ml/datasets/Airfoil+Self-Noise). Which variables are significant for predicting scaled sound pressure? 11 | 12 | ### 2. IRLS code optimisation 13 | 14 | * Make sure you can run the logistic regression example from the notes using the simple IRLS function that was provided. 15 | * The IRLS function is illustrative rather than efficient. There are many ways in which the code could be made more efficient. We will start with the weight matrix, `W`. This is an `n` x `n` matrix, which is bad-news if `n` is large. But it's diagonal, so it could easily be represented by an `n`-vector. Modify the code to make `W` a vector rather than a matrix, and check it gives the same results as the previous version. Time it on some big problems to see if it's perceptibly faster. 16 | * (optional) Google the efficient implementation of IRLS (using QR decomposition), and implement it. Check it works and that it's faster. 17 | 18 | ### 3. Scala-Glm library 19 | 20 | I've created a small library for fitting linear and generalised linear models, based on the code examples from this course. See the [scala-glm](https://github.com/darrenjw/scala-glm) repo for further details. 21 | 22 | * Try it out and make sure you know how to use it. 23 | * Once you have figured out how it works, take some time to browse the source code. This is a small library with a relatively simple structure. It serves as an example of how to create a small library with a few source files and a few test files. It is a little bit bigger than the very small examples we have been focussing on in this course, but a lot smaller than a large library like Breeze, which can be a bit daunting at first. 24 | * Look at how I've (re-)structured the GLM code, and how I've implemented the IRLS algorithm. 25 | 26 | ### 4. Smile 27 | 28 | [Smile](http://haifengl.github.io/) has lots of functionality relating to EDA, statistical modelling and machine learning, and can be used as a library from Scala. It's probably worth figuring out how to use it. I have an [example project](../examples/C6-Smile/) to show how to use it for a the yacht hydrodynamics linear regression example. 29 | 30 | * Run this example, and study the code to make sure you know how it works. 31 | * Adapt the code to analyse the airfoil self-noise data (from Exercise 1). 32 | * Write code to fit a logistic regression model to some simulated/synthetic data, and make sure that your Smile model recovers the true values used to simulate the data to a reasonable degree. 33 | * Try downloading and running Smile as a standalone piece of software, using the Smile shell/REPL. 34 | 35 | ### 5. Rainier 36 | 37 | If you want to go beyond simple statistical models, then a library for MCMC-based Bayesian hierarchical modelling is highly desirable. [Rainier](https://rainier.fit/) is an HMC-based Scala library, that is very useful for fitting random and mixed effects models in Scala. I have an [example project](../examples/C6-Rainier/) to show how to use it for a simple Bayesian logistic regression model. 38 | 39 | * Run this example, and study the code to make sure you know how it works. 40 | * Work through the Rainier docs tutorial, replicating the examples in the REPL. The `sbt console` associated with the above example project should be suitable for this. 41 | 42 | 43 | 44 | #### eof 45 | 46 | -------------------------------------------------------------------------------- /exercises/Tools.md: -------------------------------------------------------------------------------- 1 | # Tools 2 | 3 | ## Practical exercises 4 | 5 | Again, choose selectively from these exercises according to interests and your previous selections. 6 | 7 | ### 1. ScalaDoc 8 | 9 | * Go back to your linear regression example from the Chapter 6 exercises, and add ScalaDoc documentation to the `backSolve` method and `Lm` case class. Generate HTML documentation and check it with your web browser. 10 | 11 | ### 2. Testing 12 | 13 | * Continuing with the same example, add some ScalaTest unit tests. For testing `backsolve`, just add a couple of tests using some simple 2x2 examples picked by hand. 14 | * For testing `Lm`, start by testing it with two or three points on a known straight line. 15 | * Try adding some property-based tests to your code, using ScalaCheck. 16 | 17 | ### 3. Interfacing with R 18 | 19 | * One way we could check that our logistic regression code is working as it should would be to read in or simulate a fairly small dataset and fit it with our code, then send the dataset to R and re-fit it with the `glm` function in R. Then bring the fitted coefficients back to Scala for comparison. Take a look at the tests for the [scala-glm](https://github.com/darrenjw/scala-glm/) library, which uses exactly this strategy. 20 | * If you are using simulated data, you could easily loop this to check for agreement on a range of small simulated datasets (ideally using ScalaCheck). 21 | 22 | ### 4. Interfacing with Python 23 | 24 | Not covered in the course, but it seems that calling Python from Scala is covered by the [ScalaPy](https://github.com/shadaj/scalapy) library. Calling Python machine learning libraries from Scala seems to be a standard use-case. Calling Scala from Python is less obvious. [pySpark](https://spark.apache.org/docs/latest/api/python/) uses [py4j](https://www.py4j.org/), which is a library for calling Java from Python, so that is probably as good a solution as any. 25 | 26 | * See if you can figure out how to call Python from Scala. 27 | * If you get it working, see if you can call figure out how to call a [scikit-learn](https://scikit-learn.org/) function from Scala. 28 | 29 | ### 5. EvilPlot 30 | 31 | [EvilPlot](https://cibotech.github.io/evilplot/) is a nice library for generating high-quality plots and charts using Scala. I have an [example project](../examples/C7-EvilPlot/) which shows how to use it to generate a range of plots and charts, based mainly on examples from the EvilPlot documentation. 32 | 33 | * Run the example project, and inspect the code to see how it works 34 | * Read through some of the EvilPlot documentation 35 | * Produce some nice charts and plots for one or more of the examples you have previously considered, such as a regression model, but previously charted using breeze-viz. 36 | 37 | ### 6. Mdoc 38 | 39 | [Mdoc](https://scalameta.org/mdoc/) is a great framework for documenting libraries and workflows using executable Scala code blocks within Markdown documents. A couple of the examples we have already seen had some mdoc documentation associated with them. 40 | 41 | * The [Smile example](../examples/C6-Smile/) has an mdoc document in `docs`, and the `mdoc` sbt task compiles this, and puts generated Markdown in `target/mdoc`. Make sure you know how it works. 42 | * The [Rainier example](../examples/C6-Rainier/) has an mdoc document as well. Note that Rainier has built-in support for generating EvilPlot figures, and hooks for including these in mdoc documents and Jupyter notebooks. The mdoc document associated with this example illustrates how to use this functionality to embed Rainier EvilPlot figures into a mdoc document. Study it to see how it works. Note that Rainier acheives this by making use of mdoc PostModifier hooks - you can read more about those [here](https://scalameta.org/mdoc/docs/modifiers.html#postmodifier). 43 | * Add some Mdoc tutorial documentation to one of the examples you have developed during this course, in order to document your workflow. 44 | 45 | 46 | 47 | 48 | #### eof 49 | -------------------------------------------------------------------------------- /Ensime.md: -------------------------------------------------------------------------------- 1 | # Installing Ensime 2 | 3 | ## Useful links 4 | 5 | Some useful links for using Emacs and Ensime with sbt: 6 | 7 | * [Ensime](http://ensime.org/) 8 | * [Learning Emacs](http://ensime.org/editors/emacs/learning) 9 | * [Installing with Emacs](http://ensime.org/editors/emacs/install/) 10 | * [Sbt plugin for Ensime](http://ensime.org/build_tools/sbt/) 11 | * [Emacs Ensime User Guide](http://ensime.org/editors/emacs/userguide/) 12 | 13 | ## Installation 14 | 15 | I am assuming that you are already familar with Emacs and have it installed on your system. If this is not the case, I recommend using the [Scala IDE](ScalaIDE.md) for the short course, as Emacs has a fairly steep learning curve. You can always investigate Emacs and Ensime later once you are more familiar with Scala. 16 | 17 | Ensime is installed using [MELPA](http://melpa.org/) - the Emacs package archive. If you don't currently use MELPA, you must first enable it by copying a snippet of code like: 18 | ```lisp 19 | ;; MELPA package manager 20 | (require 'package) 21 | (setq 22 | package-archives '(("gnu" . "http://elpa.gnu.org/packages/") 23 | ("org" . "http://orgmode.org/elpa/") 24 | ("melpa" . "http://melpa.org/packages/") 25 | ("melpa-stable" . "http://stable.melpa.org/packages/")) 26 | package-archive-priorities '(("melpa-stable" . 1))) 27 | 28 | (package-initialize) 29 | (when (not package-archive-contents) 30 | (package-refresh-contents) 31 | (package-install 'use-package)) 32 | (require 'use-package) 33 | ``` 34 | into your `.emacs` or `.emacs.d/init.el` file. Try restarting Emacs and check there are no errors. If for some reason this doesn't work, you could try adding the snippet: 35 | ```lisp 36 | (unless (package-installed-p 'use-package) 37 | (package-refresh-contents) 38 | (package-install 'use-package)) 39 | ``` 40 | immediately before the final line. See the [Learning Emacs](http://ensime.org/editors/emacs/learning) page for further details. 41 | 42 | 43 | 44 | Once you have MELPA set up, installing Ensime should be as simple as copying the snippet: 45 | ```lisp 46 | (use-package ensime 47 | :ensure t 48 | :pin melpa) 49 | ``` 50 | to the end of your init file and restarting Emacs, but see the [Installing with Emacs](http://ensime.org/editors/emacs/install/) page for further details. 51 | 52 | To use Ensime with sbt, you also need to install the Ensime plugin for sbt. This should be as simple as adding the line: 53 | ```scala 54 | addSbtPlugin("org.ensime" % "sbt-ensime" % "1.12.6") 55 | ``` 56 | to your `~/.sbt/0.13/plugins/plugins.sbt` file. Create this file if you don't already have it. It's also a good idea to add the lines: 57 | ```scala 58 | import org.ensime.EnsimeCoursierKeys._ 59 | ensimeServerVersion in ThisBuild := "2.0.0-SNAPSHOT" 60 | ``` 61 | to your `~/sbt/0.13/global.sbt` file (again, create it if you don't have it). See the [Sbt plugin for Ensime](http://ensime.org/build_tools/sbt/) page for further details. 62 | 63 | ## Using Ensime 64 | 65 | The main thing to understand is that Ensime needs to know about the structure of your sbt project. This information is encoded in a file `.ensime` in the top-level directory of your sbt project (where the file `build.sbt` will often be present). An initial `.ensime` file for an sbt project can be generated using the `ensimeConfig` sbt task provided by the `sbt-ensime` plugin. 66 | 67 | So, before using Emacs/Ensime with a particular sbt project for the first time, first run 68 | ```bash 69 | sbt ensimeConfig 70 | ``` 71 | to analyse the project and create a `.ensime` file for it. You should probably re-run this after editing `build.sbt` or other build configuration files. Then start emacs with a command like `emacs src/main/scala/blah/*.scala &`. This will start up emacs and some basic syntax highlighting will be provided by `scala-mode`. However, you still need to start up Ensime with `M-x ensime`. Once you are up-and-running, Ensime provides fairly sophisticated IDE functionality. Some commonly used commands include: 72 | 73 | * M-x ensime - Start up Ensime 74 | * C-c C-v d - Scaladoc for symbol at cursor 75 | * C-c C-v f - Reformat source code in this buffer 76 | * C-c C-b c - sbt compile 77 | * C-c C-b r - sbt run 78 | 79 | See the [Emacs Ensime User Guide](http://ensime.org/editors/emacs/userguide/) for further details. 80 | 81 | 82 | 83 | #### eof 84 | 85 | -------------------------------------------------------------------------------- /examples/C6-Regression/src/main/scala/regression.scala: -------------------------------------------------------------------------------- 1 | /* 2 | regression.scala 3 | 4 | Linear regression for the dataset: 5 | 6 | http://archive.ics.uci.edu/ml/datasets/Yacht+Hydrodynamics 7 | 8 | from the Machine learning repository: 9 | 10 | http://archive.ics.uci.edu/ml/datasets.html 11 | 12 | */ 13 | 14 | import breeze.linalg._ 15 | import com.github.fommil.netlib.BLAS.{ getInstance => blas } 16 | 17 | object Regression { 18 | 19 | def backSolve(A: DenseMatrix[Double], 20 | y: DenseVector[Double]): DenseVector[Double] = { 21 | val yc = y.copy 22 | blas.dtrsv("U", "N", "N", A.cols, A.toArray, 23 | A.rows, yc.data, 1) 24 | yc 25 | } 26 | 27 | case class Lm(y: DenseVector[Double], 28 | X: DenseMatrix[Double], names: List[String]) { 29 | require(y.size == X.rows) 30 | require(names.length == X.cols) 31 | require(X.rows >= X.cols) 32 | val QR = qr.reduced(X) 33 | val q = QR.q 34 | val r = QR.r 35 | val qty = q.t * y 36 | val coefficients = backSolve(r, qty) 37 | import breeze.stats._ 38 | import org.apache.commons.math3.special.Beta 39 | def tCDF(t: Double, df: Double): Double = { 40 | val xt = df / (t * t + df) 41 | 1.0 - 0.5 * Beta.regularizedBeta(xt, 0.5 * df, 0.5) 42 | } 43 | def fCDF(x: Double, d1: Double, d2: Double) = { 44 | val xt = x * d1 / (x * d1 + d2) 45 | Beta.regularizedBeta(xt, 0.5 * d1, 0.5 * d2) 46 | } 47 | lazy val fitted = q * qty 48 | lazy val residuals = y - fitted 49 | lazy val n = X.rows 50 | lazy val pp = X.cols 51 | lazy val df = n - pp 52 | lazy val rss = sum(residuals ^:^ 2.0) 53 | lazy val rse = math.sqrt(rss / df) 54 | lazy val ri = inv(r) 55 | lazy val xtxi = ri * (ri.t) 56 | lazy val se = breeze.numerics.sqrt(diag(xtxi)) * rse 57 | lazy val t = coefficients / se 58 | lazy val p = t.map { 1.0 - tCDF(_, df) }.map { _ * 2 } 59 | lazy val ybar = mean(y) 60 | lazy val ymyb = y - ybar 61 | lazy val ssy = sum(ymyb ^:^ 2.0) 62 | lazy val rSquared = (ssy - rss) / ssy 63 | lazy val adjRs = 1.0 - ((n - 1.0) / (n - pp)) * (1 - rSquared) 64 | lazy val k = pp - 1 65 | lazy val f = (ssy - rss) / k / (rss / df) 66 | lazy val pf = 1.0 - fCDF(f, k, df) 67 | def summary: Unit = { 68 | println( 69 | "Estimate\t S.E.\t t-stat\tp-value\t\tVariable") 70 | println( 71 | "---------------------------------------------------------") 72 | (0 until pp).foreach(i => printf( 73 | "%8.4f\t%6.3f\t%6.3f\t%6.4f %s\t%s\n", 74 | coefficients(i), se(i), t(i), p(i), 75 | if (p(i) < 0.05) "*" else " ", 76 | names(i))) 77 | printf( 78 | "\nResidual standard error: %8.4f on %d degrees of freedom\n", 79 | rse, df) 80 | printf( 81 | "Multiple R-squared: %6.4f, Adjusted R-squared: %6.4f\n", 82 | rSquared, adjRs) 83 | printf( 84 | "F-statistic: %6.4f on %d and %d DF, p-value: %6.5f\n\n", 85 | f, k, df, pf) 86 | } 87 | } 88 | 89 | // Main runner method 90 | def main(args: Array[String]): Unit = { 91 | 92 | val url = "http://archive.ics.uci.edu/ml/machine-learning-databases/00243/yacht_hydrodynamics.data" 93 | val fileName = "yacht.csv" 94 | 95 | // download the file to disk if it hasn't been already 96 | val file = new java.io.File(fileName) 97 | if (!file.exists) { 98 | val s = new java.io.PrintWriter(file) 99 | val data = scala.io.Source.fromURL(url).getLines 100 | data.foreach(l => s.write(l.trim.split(' ').filter(_ != "").mkString("",",","\n"))) 101 | s.close 102 | } 103 | 104 | // read the file from disk 105 | val mat = csvread(new java.io.File(fileName)) 106 | println("Dim: " + mat.rows + " " + mat.cols) 107 | val y = mat(::, 6) // response is the final column 108 | val x = mat(::, 0 to 5) 109 | // first fit without an intercept 110 | Lm(y,x,List("LongPos","PrisCoef","LDR","BDR","LBR","Froude")).summary 111 | // add an intercept and re-fit 112 | val X = DenseMatrix.horzcat( 113 | DenseVector.ones[Double](x.rows).toDenseMatrix.t,x) 114 | val mod = Lm(y,X,List("(Intercept)","LongPos","PrisCoef","LDR","BDR","LBR","Froude")) 115 | mod.summary 116 | 117 | } // main 118 | 119 | 120 | } 121 | 122 | // eof 123 | 124 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Scala for Statistical Computing and Data Science Short Course 2 | 3 | **I occasionally run this course in-house for companies - [email me](mailto:darrenjwilkinson@btinternet.com) if your company is interested in this. Also note that I run an advanced course on [Category theory for pure FP in Scala](https://github.com/darrenjw/fps-course)** 4 | 5 | *Registered course participants should bookmark the [Start Here](StartHere.md) page. Please carefully follow the [laptop setup instructions](Setup.md) in advance of the start of the course.* 6 | 7 | ## Outline course description 8 | 9 | This course is aimed at statisticians and data scientists already familiar with a dynamic programming language (such as R, Python or Octave) who would like to learn how to use [Scala](http://www.scala-lang.org/). Scala is a free modern, powerful, strongly-typed, functional programming language, well-suited to statistical computing and data science applications. In particular, it is fast and efficient, runs on the Java virtual machine (JVM), and is designed to easily exploit modern multi-core and distributed computing architectures. 10 | 11 | The course will begin with an introduction to the Scala language and basic concepts of [functional programming](https://en.wikipedia.org/wiki/Functional_programming) (FP), as well as essential Scala tools such as [SBT](http://www.scala-sbt.org/) for managing builds and library dependencies. The course will continue with an overview of the [Scala collections library](http://docs.scala-lang.org/overviews/collections/overview.html), including [parallel collections](http://docs.scala-lang.org/overviews/parallel-collections/overview.html), and we will see how parallel collections enable trivial parallelisation of many statistical computing algorithms on multi-core hardware. We will next survey the wider Scala library ecosystem, paying particular attention to [Breeze](https://github.com/scalanlp/breeze), the Scala library for scientific computing and numerical linear algebra. We will see how to exploit non-uniform random number generation and matrix computations in Breeze for statistical applications. Both maximum-likelihood and simulation-based Bayesian statistical inference algorithms will be considered. Much of the final day will be dedicated to understanding [Apache Spark](http://spark.apache.org/), the distributed Big Data analytics platform for Scala. We will understand how Spark relates to the parallel collections we have already examined, and see how it can be used not only for the processing of very large data sets, but also for the parallel and distributed analysis of large or otherwise computationally-intensive models. As time permits, we will discuss more [advanced FP concepts](https://typelevel.org/cats/), such as typeclasses, higher-kinded types, monoids, functors, monads, applicatives, streams and streaming data, and see how these enable the development of flexible, scalable, generic code in strongly-typed functional languages. 12 | 13 | #### Prerequisite 14 | 15 | The course assumes a basic familiarity with essential concepts in statistical computing, as well as some basic programming experience. It is assumed that participants will be familiar with writing their own functions in a language such as R, including essential control structures such as "for-loops" and "if-statements". The course is not suitable for people completely new to programming. However, no prior knowledge of Scala or functional programming is assumed. All participants will be expected to bring their own (multi-core) laptop and to have a recent version of Java pre-installed. Other set-up instructions will be provided in advance to registered participants. 16 | 17 | #### Course structure 18 | 19 | The course will be delivered through a combination of lectures, live demos and hands-on practical sessions. For the practical sessions, participants will be expected to actively engage with the material, run demos, follow examples, and write code to solve simple problems. 20 | 21 | #### Presenters 22 | 23 | The course will be delivered by [Prof Darren Wilkinson](https://darrenjw.github.io/) (Newcastle University, U.K.). Prof Wilkinson is co-Director of Newcastle's [EPSRC Centre for Doctoral Training in Cloud Computing for Big Data](http://www.bigdata-cdt.ac.uk/), and a [Turing Fellow](https://www.turing.ac.uk/people/researchers/darren-wilkinson). He is a well-known expert in computational Bayesian statistics and a leading proponent of the use of strongly-typed FP languages (such as Scala) for scalable statistical computing. 24 | 25 | 26 | -------------------------------------------------------------------------------- /exercises/Advanced.md: -------------------------------------------------------------------------------- 1 | # Advanced topics 2 | 3 | ## Practical exercises 4 | 5 | Start off with exercise 1 (Cats), then pick and choose according to your interests. 6 | 7 | ### 1. Playing with Cats 8 | 9 | * [Cats](http://typelevel.org/cats/) is one of many useful libraries that we haven't had time to explore properly in this short course. [Scala exercises](https://www.scala-exercises.org/) has some [Cats exercises](https://www.scala-exercises.org/cats) which are worth working through to learn a little about how it works. 10 | 11 | ### 2. Simulacrum for typeclass programming 12 | 13 | * [Simulacrum](https://github.com/typelevel/simulacrum) is another useful library for FP in Scala. Read about how it works and then re-do the `CsvRow` and `Thinnable` typeclass examples from the notes using Simulacrum. Note how much cleaner they are. Note that Cats has a dependence on Simulacrum, so if you have a project or REPL with a Cats dependency you *may* not need to add an additional dependence on Simulacrum. However, you *do* need to enable the "macro paradise" compiler plugin, by adding the line 14 | ```scala 15 | addCompilerPlugin("org.scalamacros" % "paradise" % "2.1.0" cross CrossVersion.full) 16 | ``` 17 | to your `build.sbt` file. The `sbt-test` example project is set up to allow experimenting with both Simulacrum and Cats from the REPL. 18 | 19 | ### 3. Monocle 20 | 21 | [Monocle](https://julien-truffaut.github.io/Monocle/) is an *optics* library for Scala, intended to make it easier to work with immutable data structures based on (nested) algebraic data types (ADTs). 22 | 23 | * Work through the Getting started guide to get a bit of a feel for the problem that the library solves. If it seems interesting, continue to work through the rest of the documentation. 24 | 25 | ### 4. Monix 26 | 27 | [Monix](https://monix.io/) is a library for asyncronous and concurrent programming in Scala, including stream-based functional reactive programming (FRP), using a datatype known as `Observable`. It is one of many options for working with (real time) data streams in Scala. It also contains `Task`, which is a much better version of Scala's `Future` monad. 28 | 29 | * Start working through the documentation for [Observable](https://monix.io/docs/3x/reactive/observable.html), and then investigate further if the library seems interesting. 30 | 31 | ### 5. Frameless 32 | 33 | [Frameless](https://typelevel.org/frameless/) is a library which provides a safter, more idiomatic, Scala interface to Spark. If you intend to work a lot with Spark, it is worth trying to understand this library and the potential benefits it can bring. 34 | * Start by reading through the Introduction, then continue with the library documentation, learning first about `TypedDataset`. 35 | 36 | ### 6. Probabilistic programming with Figaro 37 | 38 | * [Figaro](https://github.com/p2t2/figaro) is a library for probabilistic programming in Scala. Use the remaining time to read through the [Quick start guide](https://github.com/p2t2/figaro/raw/master/doc/Figaro%20Quick%20Start%20Guide.pdf) and then skim the [Tutorial](https://www.cra.com/sites/default/files/pdf/Figaro_Tutorial.pdf). Try to build and run the example from the quick start guide, noting that the examples can be found [here](https://github.com/p2t2/figaro/tree/master/FigaroExamples/src/main/scala/com/cra/figaro/example). 39 | * Note that from a clean SBT session (say, run from an empty/temp directory), a REPL with a Figaro dependency can be started with: 40 | ```scala 41 | set libraryDependencies += "com.cra.figaro" %% "figaro" % "5.0.0.0" 42 | set scalaVersion := "2.12.10" 43 | console 44 | ``` 45 | 46 | ### 7. Scala.js 47 | 48 | [Scala.js](https://www.scala-js.org/) is a framework for compiling Scala code to Javascript for client-side execution in web applications. If you do any front-end work, Scala.ja is one of the nicest ways to develop web applications. Many of the libraries we have considered, including EvilPlot, Cats, Simulacrum, Monocle, and Monix, are available for Scala.js as well as the usual JVM version of Scala. This makes it very easy to develop web applications which share code between the front and the back-end. Some of the web sites we have used in this course, such as [scala-fiddle](https://scalafiddle.io/) and [scala-exercises](https://www.scala-exercises.org/) are powered by Scala.js. It is particularly useful for developing web-based interactive dashboards for data science applications. 49 | * Try one of the [tutorials](https://www.scala-js.org/doc/tutorial/) to get started 50 | 51 | ### 8. Scala-native 52 | 53 | [Scala-native](https://github.com/scala-native/scala-native) is a framework for compiling Scala to native code, rather than JVM bytecode. This is useful for systems programming, for interfacing with C libraries, and for developing lightweight command-line tools. Although many people imagine that Scala code compiled to native code will execute much faster than JVM bytecode, this is typically not the case, and is certainly not the main intended use of Scala-native. See the [documentation](https://www.scala-native.org/) for further details. 54 | 55 | 56 | #### eof 57 | 58 | -------------------------------------------------------------------------------- /examples/C9-ScalablePF/src/main/scala/pfilter/pfilter.scala: -------------------------------------------------------------------------------- 1 | /* 2 | pfilter.scala 3 | 4 | Top level code for pfilter blog post 5 | 6 | */ 7 | 8 | package pfilter 9 | 10 | object PFilter { 11 | 12 | import scala.language.higherKinds 13 | import scala.collection.parallel.immutable.ParVector 14 | import scala.collection.GenTraversable 15 | 16 | // Hardcode LogLik type 17 | type LogLik = Double 18 | // Use blank typeclasses for State, Observation, and Parameter 19 | trait State[T] 20 | trait Observation[T] 21 | trait Parameter[T] 22 | 23 | // My generic collection typeclass 24 | trait GenericColl[C[_]] { 25 | def map[A, B](ca: C[A])(f: A => B): C[B] 26 | def reduce[A](ca: C[A])(f: (A, A) => A): A 27 | def flatMap[A, B, D[B] <: GenTraversable[B]](ca: C[A])(f: A => D[B]): C[B] 28 | def zip[A, B](ca: C[A])(cb: C[B]): C[(A, B)] 29 | def length[A](ca: C[A]): Int 30 | } 31 | // Syntax for the typeclass 32 | implicit class GenericCollSyntax[A, C[A]](value: C[A]) { 33 | def map[B](f: A => B)(implicit inst: GenericColl[C]): C[B] = inst.map(value)(f) 34 | def reduce(f: (A, A) => A)(implicit inst: GenericColl[C]): A = inst.reduce(value)(f) 35 | def flatMap[B, D[B] <: GenTraversable[B]](f: A => D[B])(implicit inst: GenericColl[C]): C[B] = inst.flatMap(value)(f) 36 | def zip[B](cb: C[B])(implicit inst: GenericColl[C]): C[(A, B)] = inst.zip(value)(cb) 37 | def length(implicit inst: GenericColl[C]): Int = inst.length(value) 38 | } 39 | 40 | // Implementation for Vector 41 | implicit val vGC: GenericColl[Vector] = new GenericColl[Vector] { 42 | def map[A, B](ca: Vector[A])(f: A => B): Vector[B] = ca map f 43 | def reduce[A](ca: Vector[A])(f: (A, A) => A): A = ca reduce f 44 | def flatMap[A, B, D[B] <: GenTraversable[B]](ca: Vector[A])(f: A => D[B]): Vector[B] = ca flatMap f 45 | def zip[A, B](ca: Vector[A])(cb: Vector[B]): Vector[(A, B)] = ca zip cb 46 | def length[A](ca: Vector[A]) = ca.length 47 | } 48 | 49 | // Implementation for ParVector 50 | implicit val pvGC: GenericColl[ParVector] = new GenericColl[ParVector] { 51 | def map[A, B](ca: ParVector[A])(f: A => B): ParVector[B] = ca map f 52 | def reduce[A](ca: ParVector[A])(f: (A, A) => A): A = ca reduce f 53 | def flatMap[A, B, D[B] <: GenTraversable[B]](ca: ParVector[A])(f: A => D[B]): ParVector[B] = ca flatMap f 54 | def zip[A, B](ca: ParVector[A])(cb: ParVector[B]): ParVector[(A, B)] = ca zip cb 55 | def length[A](ca: ParVector[A]) = ca.length 56 | } 57 | 58 | // TODO: Implementation for Spark RDDs 59 | 60 | // Single step of a bootstrap particle filter 61 | def update[S: State, O: Observation, C[_]: GenericColl]( 62 | dataLik: (S, O) => LogLik, stepFun: S => S 63 | )(x: C[S], o: O): (LogLik, C[S]) = { 64 | import breeze.stats.distributions.Poisson 65 | val xp = x map (stepFun(_)) 66 | val lw = xp map (dataLik(_, o)) 67 | val max = lw reduce (math.max(_, _)) 68 | val rw = lw map (lwi => math.exp(lwi - max)) 69 | val srw = rw reduce (_ + _) 70 | val l = rw.length 71 | val z = rw zip xp 72 | val rx = z flatMap { case (rwi, xpi) => 73 | Vector.fill(Poisson(rwi * l / srw).draw)(xpi) } 74 | (max + math.log(srw / l), rx) 75 | } 76 | 77 | // Run a bootstrap particle filter over a collection of observations 78 | def pFilter[S: State, O: Observation, C[_]: GenericColl, D[O] <: GenTraversable[O]]( 79 | x0: C[S], data: D[O], dataLik: (S, O) => LogLik, stepFun: S => S 80 | ): (LogLik, C[S]) = { 81 | val updater = update[S, O, C](dataLik, stepFun) _ 82 | data.foldLeft((0.0, x0))((prev, o) => { 83 | val (oll, ox) = prev 84 | val (ll, x) = updater(ox, o) 85 | (oll + ll, x) 86 | }) 87 | } 88 | 89 | // Marginal log likelihood estimation 90 | def pfMll[S: State, P: Parameter, O: Observation, C[_]: GenericColl, D[O] <: GenTraversable[O]]( 91 | simX0: P => C[S], stepFun: P => S => S, dataLik: P => (S, O) => LogLik, data: D[O] 92 | ): (P => LogLik) = (th: P) => pFilter(simX0(th), data, dataLik(th), stepFun(th))._1 93 | 94 | // Main method 95 | def main(args: Array[String]): Unit = { 96 | println("Hi") 97 | import Examples._ 98 | arTest 99 | println("Bye") 100 | } 101 | 102 | } 103 | 104 | object Examples { 105 | 106 | import PFilter._ 107 | 108 | // Simple test for an AR(1) model 109 | def arTest: Unit = { 110 | import breeze.linalg._ 111 | import breeze.stats.distributions._ 112 | println("AR(1) test start") 113 | // simulate some data from an AR(1) model with noise 114 | val inNoise = Gaussian(0.0, 1.0).sample(99) 115 | val state = DenseVector(inNoise.scanLeft(0.0)((s, i) => 0.8 * s + i).toArray) 116 | val noise = DenseVector(Gaussian(0.0, 2.0).sample(100).toArray) 117 | val data = (state + noise).toArray.toList 118 | import breeze.plot._ 119 | val f = Figure() 120 | val p0 = f.subplot(0) 121 | val idx = linspace(1, 100, 100) 122 | p0 += plot(idx, state) 123 | p0 += plot(idx, data, '.') 124 | p0.xlabel = "Time" 125 | p0.ylabel = "Value" 126 | // now try to recover autoregression coefficient 127 | implicit val dState = new State[Double] {} 128 | implicit val dObs = new Observation[Double] {} 129 | implicit val dPar = new Parameter[Double] {} 130 | val mll = pfMll( 131 | (th: Double) => Gaussian(0.0, 10.0).sample(10000).toVector.par, 132 | (th: Double) => (s: Double) => Gaussian(th * s, 1.0).draw, 133 | (th: Double) => (s: Double, o: Double) => Gaussian(s, 2.0).logPdf(o), 134 | data 135 | ) 136 | val x = linspace(0.0, 0.99, 100) 137 | val y = x map (mll(_)) 138 | //println(y) 139 | val p1 = f.subplot(2, 1, 1) 140 | p1 += plot(x, y) 141 | p1.xlabel = "theta" 142 | p1.ylabel = "mll" 143 | f.saveas("plot.png") 144 | println("AR(1) test finish") 145 | } 146 | 147 | } 148 | 149 | // eof 150 | 151 | -------------------------------------------------------------------------------- /examples/C9-ScalablePF/src/test/scala/pfilter-test.scala: -------------------------------------------------------------------------------- 1 | /* 2 | pfilter-test.scala 3 | 4 | Test code for pfilter 5 | 6 | */ 7 | 8 | package pfilter 9 | 10 | import org.scalatest._ 11 | import org.scalatest.junit._ 12 | import org.junit.runner.RunWith 13 | 14 | import scala.language.higherKinds 15 | import PFilter._ 16 | 17 | @RunWith(classOf[JUnitRunner]) 18 | class MyTestSuite extends FunSuite { 19 | 20 | test("1+2=3") { 21 | assert(1 + 2 === 3) 22 | } 23 | 24 | // test generic functions to check that the typeclass works as intended 25 | def doubleIt[C[_]: GenericColl](ca: C[Int]): C[Int] = ca map (_ * 2) 26 | def addThem[C[_]: GenericColl](ca: C[Int]): Int = ca reduce (_ + _) 27 | def repeatThem[C[_]: GenericColl](ca: C[Int]): C[Int] = ca flatMap (x => List(x, x, x)) 28 | def zipThem[C[_]: GenericColl](ci: C[Int], cd: C[Double]): C[(Int, Double)] = ci zip cd 29 | def getLength[C[_]: GenericColl](ci: C[Int]): Int = ci.length 30 | 31 | test("Vector in generic function including map") { 32 | val v = Vector(5, 10, 15, 20) 33 | val v2 = v map (_ * 2) 34 | val v3 = doubleIt(v) 35 | assert(v2 === v3) 36 | } 37 | 38 | test("Vector in generic function including flatMap") { 39 | val v = Vector(5, 10, 15) 40 | val v2 = v flatMap (x => Array(x, x, x)) 41 | //println(v2) 42 | val v3 = repeatThem(v) 43 | assert(v2 === v3) 44 | } 45 | 46 | test("Vector in generic function including reduce") { 47 | val v = Vector(5, 10, 15) 48 | val s = addThem(v) 49 | assert(s === 30) 50 | } 51 | 52 | test("Vector in generic zipping function") { 53 | val v1 = Vector(1, 2, 3) 54 | val v2 = Vector(2.0, 4.0, 6.0) 55 | val v3 = v1 zip v2 56 | val v4 = zipThem(v1, v2) 57 | assert(v4 === v3) 58 | } 59 | 60 | test("Vector in generic length function") { 61 | val v1 = Vector(1, 2, 3, 4) 62 | val l = getLength(v1) 63 | assert(l === 4) 64 | } 65 | 66 | test("ParVector in generic function including map") { 67 | val v = Vector(5, 10, 15, 30).par 68 | val v2 = v map (_ * 2) 69 | //println(v2) 70 | val v3 = doubleIt(v) 71 | assert(v2 === v3) 72 | } 73 | 74 | test("ParVector in generic function including flatMap") { 75 | val v = Vector(5, 10, 15, 10).par 76 | val v2 = v flatMap (x => Vector(x, x, x)) 77 | //println(v2) 78 | val v3 = repeatThem(v) 79 | assert(v2 === v3) 80 | } 81 | 82 | test("ParVector in generic function including reduce") { 83 | val v = Vector(5, 10, 15).par 84 | val s = addThem(v) 85 | assert(s === 30) 86 | } 87 | 88 | test("ParVector in generic zipping function") { 89 | val v1 = Vector(1, 2, 3).par 90 | val v2 = Vector(2.0, 4.0, 6.0).par 91 | val v3 = v1 zip v2 92 | //println(v3) 93 | val v4 = zipThem(v1, v2) 94 | assert(v4 === v3) 95 | } 96 | 97 | test("ParVector in generic length function") { 98 | val v1 = Vector(1, 2, 3, 4).par 99 | val l = getLength(v1) 100 | assert(l === 4) 101 | } 102 | 103 | test("Vector update test") { 104 | import breeze.stats.distributions.Gaussian 105 | implicit val dState = new State[Double] {} 106 | implicit val dObs = new Observation[Double] {} 107 | val p1 = Gaussian(0.0, 10.0).sample(100000).toVector 108 | val p2 = update((s: Double, o: Double) => Gaussian(s, 2.0).logPdf(o), (s: Double) => Gaussian(s, 1.0).draw)(p1, 5.0) 109 | assert(p2._2.length > 90000) 110 | } 111 | 112 | test("ParVector update test") { 113 | import breeze.stats.distributions.Gaussian 114 | implicit val dState = new State[Double] {} 115 | implicit val dObs = new Observation[Double] {} 116 | val p1 = Gaussian(0.0, 10.0).sample(100000).toVector.par 117 | val p2 = update((s: Double, o: Double) => Gaussian(s, 2.0).logPdf(o), (s: Double) => Gaussian(s, 1.0).draw)(p1, 5.0) 118 | assert(p2._2.length > 90000) 119 | } 120 | 121 | test("Vector pFilter test") { 122 | import breeze.stats.distributions.Gaussian 123 | implicit val dState = new State[Double] {} 124 | implicit val dObs = new Observation[Double] {} 125 | val p1 = Gaussian(0.0, 10.0).sample(100000).toVector 126 | val pn = pFilter(p1, List(2.0, 2.0, 3.0, 4.0), (s: Double, o: Double) => Gaussian(s, 2.0).logPdf(o), (s: Double) => Gaussian(s, 1.0).draw) 127 | assert(pn._2.length > 90000) 128 | } 129 | 130 | test("ParVector pFilter test") { 131 | import breeze.stats.distributions.Gaussian 132 | implicit val dState = new State[Double] {} 133 | implicit val dObs = new Observation[Double] {} 134 | val p1 = Gaussian(0.0, 10.0).sample(100000).toVector.par 135 | val pn = pFilter(p1, List(2.0, 2.0, 3.0, 4.0), (s: Double, o: Double) => Gaussian(s, 2.0).logPdf(o), (s: Double) => Gaussian(s, 1.0).draw) 136 | assert(pn._2.length > 90000) 137 | } 138 | 139 | test("Vector pfMll test") { 140 | import breeze.stats.distributions.Gaussian 141 | implicit val dState = new State[Double] {} 142 | implicit val dObs = new Observation[Double] {} 143 | implicit val dPar = new Parameter[Double] {} 144 | val mll = pfMll( 145 | (th: Double) => Gaussian(0.0, 10.0).sample(100000).toVector, 146 | (th: Double) => (s: Double) => Gaussian(s, 1.0).draw, 147 | (th: Double) => (s: Double, o: Double) => Gaussian(s, 2.0).logPdf(o), 148 | List(2.0, 2.0, 3.0, 4.0) 149 | ) 150 | val ll1 = mll(1.0) 151 | val ll2 = mll(2.0) 152 | assert(math.abs(ll1 - ll2) < 0.1) 153 | } 154 | 155 | test("ParVector pfMll test") { 156 | import breeze.stats.distributions.Gaussian 157 | implicit val dState = new State[Double] {} 158 | implicit val dObs = new Observation[Double] {} 159 | implicit val dPar = new Parameter[Double] {} 160 | val mll = pfMll( 161 | (th: Double) => Gaussian(0.0, 10.0).sample(100000).toVector.par, 162 | (th: Double) => (s: Double) => Gaussian(s, 1.0).draw, 163 | (th: Double) => (s: Double, o: Double) => Gaussian(s, 2.0).logPdf(o), 164 | List(2.0, 2.0, 3.0, 4.0) 165 | ) 166 | val ll1 = mll(1.0) 167 | val ll2 = mll(2.0) 168 | assert(math.abs(ll1 - ll2) < 0.1) 169 | } 170 | 171 | } 172 | 173 | // eof 174 | -------------------------------------------------------------------------------- /Setup.md: -------------------------------------------------------------------------------- 1 | # Setup 2 | 3 | ## Setting up your laptop for the course 4 | 5 | It will save time during the course if everyone sets up their laptop with some essential required software in advance. Since Scala runs on the JVM, and the JVM is platform independent, it doesn't really matter what OS is used - in particular, Linux, Windows and Mac should all be fine. The basic requirements are, *in order*: 6 | 7 | * Download and install Java 8 (OpenJDK is fine) - *requires root/administrator access* 8 | * Download (or clone) this course code repository 9 | * Download, install and test `sbt` 10 | * Install a Scala-aware editor or IDE 11 | * Download (but don't install) Apache Spark 12 | 13 | Further information is given below. For avoidance of doubt, I am *not* assuming that you will have done a system-wide installation of Scala or the Scala compiler, and I don't particularly recommend doing so. It is not necessary if you are using `sbt`. 14 | 15 | ### Download and install Java 16 | 17 | *This step requires root/administrator access to your laptop, so if you don't have this, you will need help from your system administrator* 18 | 19 | Versions of Scala prior to 2.12.x worked with Java 6 and Java 7, but the 2.12.x Scala releases require Java 8, as Java 8 introduced a number of features which make it a better target for Scala compilation. I will be using Scala 2.12.10 in the course, so Java 8 is required. A more recent version of Java (eg. Java 11) should also be fine. 20 | 21 | If you are running a Linux (or similar) system, you may prefer to use the [OpenJDK](http://openjdk.java.net/) rather than Oracle's Java. This is fine. On Debian-based Linux systems (including Ubuntu), this should be as simple as: 22 | ```bash 23 | sudo apt-get update 24 | sudo apt-get -y install openjdk-8-jdk 25 | ``` 26 | It should be something similar on other Linux systems. For other OS's, you may want to search the Internet for the best way to install Java on your system. **You should install the full Java development kit (JDK)**, and not just the runtime engine (JRE). If in doubt, try the Oracle Java JDK download page: http://www.oracle.com/technetwork/java/javase/downloads 27 | 28 | To check whether you have Java installed correctly, type `java -version` into a terminal window. If you get a version number of the form 1.8.x you should be fine. 29 | 30 | ### Download or clone this course code repository 31 | 32 | The course code repository is at: https://github.com/darrenjw/scala-course - on the front page there should be a "Clone or download" button. If you are familiar with git, you should clone the repo on your system (and pull the latest changes the day before the course starts). If you are not familiar with git, you should download and unpack a ZIP of the course repo. If you go for the ZIP option, you should do this roughly two weeks before the course starts, before installing and testing `sbt` (below), but then also download and unpack a fresh ZIP no more than 2 days before the course starts (in case of last-minute changes). 33 | 34 | Git users should be able to clone the repo with a command like: 35 | ```bash 36 | git clone git@github.com:darrenjw/scala-course.git 37 | ``` 38 | Others should be able to download [this ZIP file](https://github.com/darrenjw/scala-course/archive/master.zip). Linux users can download from a terminal with a command like: 39 | ```bash 40 | wget https://github.com/darrenjw/scala-course/archive/master.zip 41 | ``` 42 | 43 | ### Download and install sbt 44 | 45 | `sbt` is the Scala build tool. You should download, install and test this before the course starts. The *testing* part is particularly important, as it will download and cache a lot of Scala libraries on your system ready for use during the course. See my [sbt installation page](sbt/Readme.md) for further details. 46 | 47 | ### Install a Scala IDE 48 | 49 | People starting out with programming in Scala are likely to benefit from writing code using an editor which can provide instant feedback and assistance. There are many possible options here, but it is not possible to provide support for every Scala-aware editor in existence. The course presenter uses [Emacs](https://www.gnu.org/software/emacs/) together with [Ensime](http://ensime.org/editors/emacs/install/), and considers this to be a good option for people already comfortable with the Emacs text editor. However, this is probably not a good option for people unfamiliar with Emacs. For everyone else, [IntelliJ](IntelliJ.md) is probably a safer bet, and the course presenter has some familiarity with it, so should be able to provide basic support. The course presenter(s) will not be able to provide support for any other editor or IDE. It is therefore strongly recommended that participants comfortable with Emacs set up Emacs with Ensime, and that everyone else installs IntelliJ. Switching to another editor/IDE in the future will be quite straightforward, but it will save a lot of time during the course if everyone uses one of the two recommended IDEs. See one of the following pages for further details: 50 | 51 | * [Installing IntelliJ](IntelliJ.md) 52 | * [Installing Ensime](Ensime.md) (for Emacs users only) 53 | * [Installing the ScalaIDE](ScalaIDE.md) (obsolete) 54 | 55 | ### Download Apache Spark 56 | 57 | In case of a poor Internet connection during the course, it will be helpful if everyone could download this [Apache Spark 2.4.5](https://downloads.apache.org/spark/spark-2.4.5/spark-2.4.5-bin-hadoop2.7.tgz) package to their system in advance. Linux users can download from a terminal with a command like: 58 | ```bash 59 | wget https://downloads.apache.org/spark/spark-2.4.5/spark-2.4.5-bin-hadoop2.7.tgz 60 | ``` 61 | You should make sure that you have a tool on your system which can unpack a "tgz" file (no issue for Linux users), but there is no need to "install" Spark - we will walk through installation/setup as part of the course. 62 | 63 | ## Further information 64 | 65 | Daniel Spiewak has a nice guide to [getting started in Scala](https://gist.github.com/djspiewak/cb72c41ac335a3a9b28b3307be04aa43) that could be a useful source of additional information. However, I will not be assuming that you have followed all of the advice in his guide. In particular, although the Ammonite REPL is very nice, I understand that there are issues with it on Windows. We will therefore not be using it in this course. 66 | 67 | 68 | -------------------------------------------------------------------------------- /fragments/advanced.scala: -------------------------------------------------------------------------------- 1 | 2 | Vector(1,2,3).sum 3 | // res0: Int = 6 4 | List(1.0,5.0).sum 5 | // res1: Double = 6.0 6 | 7 | 8 | Vector(1,2,3).mean 9 | // :8: error: value mean is not a member of 10 | // Vector[Int] 11 | // Vector(1,2,3).mean 12 | // ^ 13 | 14 | 15 | object Meanable { 16 | def mean[T: Numeric](it: Iterable[T]): Double = 17 | it.map(implicitly[Numeric[T]].toDouble(_)). 18 | sum / it.size 19 | } 20 | 21 | 22 | object Meanable { 23 | def mean[T](it: Iterable[T])( 24 | implicit num: Numeric[T]): Double = 25 | it.map(num.toDouble(_)).sum / it.size 26 | } 27 | 28 | 29 | import Meanable._ 30 | // import Meanable._ 31 | mean(Vector(1,2,3)) 32 | // res3: Double = 2.0 33 | mean(List(1.0,5.0)) 34 | // res4: Double = 3.0 35 | 36 | 37 | implicit class MeanableInstance[T: Numeric]( 38 | it: Iterable[T]) { 39 | def mean[T] = Meanable.mean(it) 40 | } 41 | 42 | 43 | Vector(1,2,3).mean 44 | // res5: Double = 2.0 45 | List(1.0,3.0,5.0,7.0).mean 46 | // res6: Double = 4.0 47 | 48 | 49 | trait CsvRow[T] { 50 | def toCsv(row: T): String 51 | } 52 | 53 | 54 | implicit class CsvRowSyntax[T](row: T) { 55 | def toCsv(implicit inst: CsvRow[T]) = inst.toCsv(row) 56 | } 57 | 58 | 59 | def printRows[T: CsvRow](it: Iterable[T]): Unit = 60 | it.foreach(row => println(row.toCsv)) 61 | 62 | 63 | case class MyState(x: Int, y: Double) 64 | 65 | 66 | implicit val myStateCsvRow = new CsvRow[MyState] { 67 | def toCsv(row: MyState) = row.x.toString+","+row.y 68 | } 69 | 70 | 71 | MyState(1,2.0).toCsv 72 | // res7: String = 1,2.0 73 | printRows(List(MyState(1,2.0),MyState(2,3.0))) 74 | // 1,2.0 75 | // 2,3.0 76 | 77 | 78 | implicit val vectorDoubleCsvRow = 79 | new CsvRow[Vector[Double]] { 80 | def toCsv(row: Vector[Double]) = row.mkString(",") 81 | } 82 | // vectorDoubleCsvRow: CsvRow[Vector[Double]] = 83 | // $anon$1@4604e051 84 | 85 | Vector(1.0,2.0,3.0).toCsv 86 | // res9: String = 1.0,2.0,3.0 87 | printRows(List(Vector(1.0,2.0),Vector(4.0,5.0), 88 | Vector(3.0,3.0))) 89 | // 1.0,2.0 90 | // 4.0,5.0 91 | // 3.0,3.0 92 | 93 | 94 | import scala.language.higherKinds 95 | trait Thinnable[F[_]] { 96 | def thin[T](f: F[T], th: Int): F[T] 97 | } 98 | 99 | 100 | implicit class ThinnableSyntax[T,F[T]](value: F[T]) { 101 | def thin(th: Int)(implicit inst: Thinnable[F]): F[T] = 102 | inst.thin(value,th) 103 | } 104 | 105 | 106 | implicit val streamThinnable: Thinnable[Stream] = 107 | new Thinnable[Stream] { 108 | def thin[T](s: Stream[T],th: Int): Stream[T] = { 109 | val ss = s.drop(th-1) 110 | if (ss.isEmpty) Stream.empty else 111 | ss.head #:: thin(ss.tail, th) 112 | } 113 | } 114 | 115 | 116 | Stream.iterate(0)(_ + 1). 117 | drop(10). 118 | thin(2). 119 | take(5). 120 | toArray 121 | // res11: Array[Int] = Array(11, 13, 15, 17, 19) 122 | 123 | 124 | trait GenericColl[C[_]] { 125 | def map[A, B](ca: C[A])(f: A => B): C[B] 126 | def reduce[A](ca: C[A])(f: (A, A) => A): A 127 | def flatMap[A, B, D[B] <: GenTraversable[B]]( 128 | ca: C[A])(f: A => D[B]): C[B] 129 | def zip[A, B](ca: C[A])(cb: C[B]): C[(A, B)] 130 | def length[A](ca: C[A]): Int 131 | } 132 | 133 | 134 | def update[S: State, O: Observation, C[_]: GenericColl]( 135 | dataLik: (S, O) => LogLik, stepFun: S => S 136 | )(x: C[S], o: O): (LogLik, C[S]) = { 137 | import breeze.stats.distributions.Poisson 138 | val xp = x map (stepFun(_)) 139 | val lw = xp map (dataLik(_, o)) 140 | val max = lw reduce (math.max(_, _)) 141 | val rw = lw map (lwi => math.exp(lwi - max)) 142 | val srw = rw reduce (_ + _) 143 | val l = rw.length 144 | val z = rw zip xp 145 | val rx = z flatMap { case (rwi, xpi) => 146 | Vector.fill(Poisson(rwi * l / srw).draw)(xpi) } 147 | (max + math.log(srw / l), rx) 148 | } 149 | 150 | 151 | def pFilter[S: State, O: Observation, 152 | C[_]: GenericColl, D[O] <: GenTraversable[O]]( 153 | x0: C[S], data: D[O], 154 | dataLik: (S, O) => LogLik, stepFun: S => S 155 | ): (LogLik, C[S]) = { 156 | val updater = update[S, O, C](dataLik, stepFun) _ 157 | data.foldLeft((0.0, x0))((prev, o) => { 158 | val (oll, ox) = prev 159 | val (ll, x) = updater(ox, o) 160 | (oll + ll, x) 161 | }) 162 | } 163 | 164 | 165 | def pfMll[S: State, P: Parameter, O: Observation, 166 | C[_]: GenericColl, D[O] <: GenTraversable[O]]( 167 | simX0: P => C[S], stepFun: P => S => S, 168 | dataLik: P => (S, O) => LogLik, data: D[O] 169 | ): (P => LogLik) = (th: P) => 170 | pFilter(simX0(th), data, dataLik(th), stepFun(th))._1 171 | 172 | 173 | val inNoise = Gaussian(0.0, 1.0).sample(99) 174 | val state = DenseVector(inNoise.scanLeft(0.0)( 175 | (s, i) => 0.8 * s + i).toArray) 176 | val noise = DenseVector( 177 | Gaussian(0.0, 2.0).sample(100).toArray) 178 | val data = (state + noise).toArray.toList 179 | 180 | 181 | val mll = pfMll( 182 | (th: Double) => Gaussian(0.0, 10.0). 183 | sample(10000).toVector.par, 184 | (th: Double) => (s: Double) => 185 | Gaussian(th * s, 1.0).draw, 186 | (th: Double) => (s: Double, o: Double) => 187 | Gaussian(s, 2.0).logPdf(o), 188 | data 189 | ) 190 | 191 | 192 | libraryDependencies += "org.typelevel" %% "cats-core" % "1.0.0" 193 | 194 | 195 | import cats.Monoid 196 | // import cats.Monoid 197 | import cats.syntax.semigroup._ 198 | // import cats.syntax.semigroup._ 199 | import cats.instances.all._ 200 | // import cats.instances.all._ 201 | 202 | 203 | 1 |+| 3 204 | // res0: Int = 4 205 | 1.0 |+| 2.0 206 | // res1: Double = 3.0 207 | "Hi" |+| "There" 208 | // res2: String = HiThere 209 | List(1,2,3) |+| List(4,5) 210 | // res3: List[Int] = List(1, 2, 3, 4, 5) 211 | 212 | 213 | val m1 = Map("a" -> 2, "b" -> 3) 214 | // m1: Map[String,Int] = Map(a -> 2, b -> 3) 215 | val m2 = Map("b" -> 4, "c" -> 5) 216 | // m2: Map[String,Int] = Map(b -> 4, c -> 5) 217 | m1 |+| m2 218 | // res3: Map[String,Int] = Map(b -> 7, c -> 5, a -> 2) 219 | 220 | 221 | scala.io.Source. 222 | fromFile("/usr/share/dict/words"). 223 | getLines. 224 | map(_.trim). 225 | map(_.toLowerCase). 226 | flatMap(_.toCharArray). 227 | filter(_ > '/'). 228 | filter(_ < '}'). 229 | map(ch => Map(ch -> 1)). 230 | reduce(_ |+| _) 231 | // res4: Map[Char,Int] = Map(e -> 88833, s -> 90113, 232 | // x -> 2124, n -> 57144, j -> 1948, y -> 12652, 233 | // t -> 53006, u -> 26118, f -> 10675, a -> 64439, ... 234 | 235 | -------------------------------------------------------------------------------- /examples/C5-Metropolis/src/main/scala/metropolis.scala: -------------------------------------------------------------------------------- 1 | /* 2 | mcmc-stream.scala 3 | 4 | 5 | */ 6 | 7 | import breeze.linalg._ 8 | import breeze.plot._ 9 | import breeze.stats.distributions._ 10 | import breeze.stats.meanAndVariance 11 | import annotation.tailrec 12 | 13 | object MCMC { 14 | 15 | def mcmcSummary(dv: DenseVector[Double]): Figure = { 16 | val len = dv.length 17 | val mav = meanAndVariance(dv) 18 | val mean = mav.mean 19 | val variance = mav.variance 20 | println(s"Iters=$len, Mean=$mean, variance=$variance") 21 | val f = Figure("MCMC Summary") 22 | f.height = 1000 23 | f.width = 1200 24 | val p0 = f.subplot(1, 2, 0) 25 | p0 += plot(linspace(1, len, len), dv) 26 | p0.xlabel = "Iteration" 27 | p0.ylabel = "Value" 28 | p0.title = "Trace plot" 29 | val p1 = f.subplot(1, 2, 1) 30 | p1 += hist(dv, 100) 31 | p1.xlabel = "Value" 32 | p1.title = "Marginal density" 33 | f 34 | } 35 | 36 | def time[A](f: => A) = { 37 | val s = System.nanoTime 38 | val ret = f 39 | println("time: " + (System.nanoTime - s) / 1e6 + "ms") 40 | ret 41 | } 42 | 43 | def metrop1(n: Int = 1000, eps: Double = 0.5): DenseVector[Double] = { 44 | val vec = DenseVector.fill(n)(0.0) 45 | var x = 0.0 46 | var oldll = Gaussian(0.0, 1.0).logPdf(x) 47 | vec(0) = x 48 | (1 until n).foreach { i => 49 | val can = x + Uniform(-eps, eps).draw 50 | val loglik = Gaussian(0.0, 1.0).logPdf(can) 51 | val loga = loglik - oldll 52 | if (math.log(Uniform(0.0, 1.0).draw) < loga) { 53 | x = can 54 | oldll = loglik 55 | } 56 | vec(i) = x 57 | } 58 | vec 59 | } 60 | 61 | def metrop2(n: Int = 1000, eps: Double = 0.5): Unit = { 62 | var x = 0.0 63 | var oldll = Gaussian(0.0, 1.0).logPdf(x) 64 | (1 to n).foreach { i => 65 | val can = x + Uniform(-eps, eps).draw 66 | val loglik = Gaussian(0.0, 1.0).logPdf(can) 67 | val loga = loglik - oldll 68 | if (math.log(Uniform(0.0, 1.0).draw) < loga) { 69 | x = can 70 | oldll = loglik 71 | } 72 | println(x) 73 | } 74 | } 75 | 76 | @tailrec 77 | def metrop3(n: Int = 1000, eps: Double = 0.5, x: Double = 0.0, oldll: Double = Double.MinValue): Unit = { 78 | if (n > 0) { 79 | println(x) 80 | val can = x + Uniform(-eps, eps).draw 81 | val loglik = Gaussian(0.0, 1.0).logPdf(can) 82 | val loga = loglik - oldll 83 | if (math.log(Uniform(0.0, 1.0).draw) < loga) 84 | metrop3(n - 1, eps, can, loglik) 85 | else 86 | metrop3(n - 1, eps, x, oldll) 87 | } 88 | } 89 | 90 | @tailrec 91 | def metrop4(n: Int = 1000, eps: Double = 0.5, x: Double = 0.0, oldll: Double = Double.MinValue, acc: List[Double] = Nil): DenseVector[Double] = { 92 | if (n == 0) 93 | DenseVector(acc.reverse.toArray) 94 | else { 95 | val can = x + Uniform(-eps, eps).draw 96 | val loglik = Gaussian(0.0, 1.0).logPdf(can) 97 | val loga = loglik - oldll 98 | if (math.log(Uniform(0.0, 1.0).draw) < loga) 99 | metrop4(n - 1, eps, can, loglik, can :: acc) 100 | else 101 | metrop4(n - 1, eps, x, oldll, x :: acc) 102 | } 103 | } 104 | 105 | def newState(x: Double, oldll: Double, eps: Double): (Double, Double) = { 106 | val can = x + Uniform(-eps, eps).draw 107 | val loglik = Gaussian(0.0, 1.0).logPdf(can) 108 | val loga = loglik - oldll 109 | if (math.log(Uniform(0.0, 1.0).draw) < loga) (can, loglik) else (x, oldll) 110 | } 111 | 112 | @tailrec 113 | def metrop5(n: Int = 1000, eps: Double = 0.5, x: Double = 0.0, oldll: Double = Double.MinValue): Unit = { 114 | if (n > 0) { 115 | println(x) 116 | val ns = newState(x, oldll, eps) 117 | metrop5(n - 1, eps, ns._1, ns._2) 118 | } 119 | } 120 | 121 | @tailrec 122 | def metrop5b(n: Int = 1000, eps: Double = 0.5, x: Double = 0.0, oldll: Double = Double.MinValue): Unit = { 123 | if (n > 0) { 124 | println(x) 125 | val (nx, ll) = newState(x, oldll, eps) 126 | metrop5b(n - 1, eps, nx, ll) 127 | } 128 | } 129 | 130 | @tailrec 131 | def metrop6(n: Int = 1000, eps: Double = 0.5, x: Double = 0.0, oldll: Double = Double.MinValue, acc: List[Double] = Nil): DenseVector[Double] = { 132 | if (n == 0) DenseVector(acc.reverse.toArray) else { 133 | val (nx, ll) = newState(x, oldll, eps) 134 | metrop6(n - 1, eps, nx, ll, nx :: acc) 135 | } 136 | } 137 | 138 | def nextState(eps: Double)(state: (Double, Double)): (Double, Double) = { 139 | val (x, oldll) = state 140 | val can = x + Uniform(-eps, eps).draw 141 | val loglik = Gaussian(0.0, 1.0).logPdf(can) 142 | val loga = loglik - oldll 143 | if (math.log(Uniform(0.0, 1.0).draw) < loga) (can, loglik) else (x, oldll) 144 | } 145 | 146 | def metrop7(eps: Double = 0.5, x: Double = 0.0, oldll: Double = Double.MinValue): Stream[Double] = 147 | Stream.iterate((x, oldll))(nextState(eps)) map (_._1) 148 | 149 | def thin[T](s: Stream[T], th: Int): Stream[T] = { 150 | val ss = s.drop(th - 1) 151 | if (ss.isEmpty) Stream.empty else 152 | ss.head #:: thin(ss.tail, th) 153 | } 154 | 155 | def kernel(x: Double): Rand[Double] = for { 156 | innov <- Uniform(-0.5, 0.5) 157 | can = x + innov 158 | oldll = Gaussian(0.0, 1.0).logPdf(x) 159 | loglik = Gaussian(0.0, 1.0).logPdf(can) 160 | loga = loglik - oldll 161 | u <- Uniform(0.0, 1.0) 162 | } yield if (math.log(u) < loga) can else x 163 | 164 | def main(arg: Array[String]): Unit = { 165 | println("Hi") 166 | metrop1(10).foreach(println) 167 | metrop2(10) 168 | metrop3(10) 169 | metrop4(10).foreach(println) 170 | metrop5(10) 171 | metrop6(10).foreach(println) 172 | metrop7().take(10).foreach(println) 173 | val ms = Stream.iterate(0.0)(kernel(_).draw) 174 | ms.take(10).foreach(println) 175 | // plot output to check it looks OK 176 | mcmcSummary(DenseVector(ms.take(100000).toArray)) 177 | // timings... 178 | val N=1000000 179 | println("metrop1:") 180 | time(metrop1(N)) 181 | println("metrop4:") 182 | time(metrop4(N)) 183 | println("metrop6:") 184 | time(metrop6(N)) 185 | println("metrop7:") 186 | time(metrop7().take(N).toArray) 187 | println("MarkovChain with custom kernel") 188 | time(Stream.iterate(0.0)(kernel(_).draw).take(N).toArray) 189 | 190 | println("Bye") 191 | } 192 | 193 | } 194 | 195 | // eof 196 | 197 | -------------------------------------------------------------------------------- /fragments/tools.scala: -------------------------------------------------------------------------------- 1 | 2 | set scalaVersion := "2.12.10" 3 | 4 | 5 | set libraryDependencies+="org.scalanlp"%%"breeze"%"1.0" 6 | set libraryDependencies+="org.scalanlp"%%"breeze-natives"%"1.0" 7 | 8 | 9 | object Metropolis { 10 | 11 | import breeze.stats.distributions._ 12 | 13 | def kernel(x: Double): Rand[Double] = for { 14 | innov <- Uniform(-0.5, 0.5) 15 | can = x + innov 16 | oldll = Gaussian(0.0, 1.0).logPdf(x) 17 | loglik = Gaussian(0.0, 1.0).logPdf(can) 18 | loga = loglik - oldll 19 | u <- Uniform(0.0, 1.0) 20 | } yield if (math.log(u) < loga) can else x 21 | 22 | val chain = Stream.iterate(0.0)(kernel(_).draw) 23 | 24 | def main(args: Array[String]): Unit = { 25 | val n = if (args.size == 0) 10 else args(0).toInt 26 | chain.take(n).toArray.foreach(println) 27 | } 28 | 29 | } 30 | 31 | 32 | addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.14.10") 33 | 34 | 35 | set scalaVersion := "2.12.10" 36 | set libraryDependencies+="org.ddahl"%%"rscala"%"3.2.18" 37 | console 38 | 39 | 40 | val R = org.ddahl.rscala.RClient() 41 | // R: org.ddahl.rscala.RClient = RClient@9fc5dc1 42 | 43 | 44 | org.ddahl.rscala.RClient.defaultRCmd 45 | // res0: String = R 46 | 47 | 48 | val d0 = R.evalD0("rnorm(1)") 49 | // d0: Double = 0.945922465932532 50 | 51 | 52 | val d1 = R.evalD1("rnorm(5)") 53 | // d1: Array[Double] = Array(-0.8272179841496433, ... 54 | 55 | 56 | val d2 = R.evalD2("matrix(rnorm(6),nrow=2)") 57 | // d2: Array[Array[Double]] = Array(Array( 58 | // -0.7545734628207127, ... 59 | 60 | 61 | 62 | R.eval("vec = %-", (1 to 10).toArray) // send data to R 63 | R.evalI1("vec") 64 | // res9: Array[Int] = Array(1, 2, 3, 4, 5, 6, 7, 8, 9, 10) 65 | 66 | 67 | R eval """ 68 | vec2 = rep(vec,3) 69 | vec3 = vec2 + 1 70 | mat1 = matrix(vec3,ncol=5) 71 | """ 72 | 73 | 74 | R.evalI2("mat1") // get data back from R 75 | // res3: Array[Array[Int]] = Array(Array(2, 8, 4, ... 76 | 77 | 78 | import breeze.stats.distributions._ 79 | import breeze.linalg._ 80 | import org.ddahl.rscala.RClient 81 | val x = Uniform(50,60).sample(1000) 82 | // x: IndexedSeq[Double] = Vector(50.54008541753607, ... 83 | val eta = x map (xi => (xi * 0.1) - 3) 84 | // eta: IndexedSeq[Double] = Vector(2.054008541753607, ... 85 | val mu = eta map math.exp 86 | // mu: IndexedSeq[Double] = Vector(7.799101554600703, ... 87 | val y = mu map (Poisson(_).draw) 88 | // y: IndexedSeq[Int] = Vector(8, 15, 12, ... 89 | 90 | 91 | val R = RClient() // initialise an R interpreter 92 | // R: RClient = RClient@661e0a99 93 | R.eval("x = %-", x.toArray) // send x to R 94 | R.eval("y = %-", y.toArray) // send y to R 95 | R.eval("mod = glm(y~x,family=poisson())") // fit in R 96 | // pull the fitted coefficents back into scala 97 | DenseVector[Double](R.evalD1("mod$coefficients")) 98 | // res9: DenseVector[Double] = DenseVector( 99 | // -2.93361267743947, 0.09875286320703261) 100 | 101 | 102 | require(1 == 1) // satisfied 103 | // require(1 == 2) // throws exception 104 | assert (1 == 1) // satisfied 105 | // assert (1 == 2) // throws exception 106 | 107 | 108 | def sqrt(x: Double): Double = { 109 | require(x >= 0.0) // pre-condition 110 | val ans = math.sqrt(x) 111 | assert(math.abs(x-ans*ans) < 0.00001) // post-condition 112 | ans 113 | } 114 | 115 | sqrt(2.0) // works as expected 116 | // sqrt(-2.0) // throws exception 117 | 118 | 119 | scalacOptions += "-Xdisable-assertions" 120 | 121 | 122 | class SetSpec extends AnyFlatSpec { 123 | 124 | "An empty Set" should "have size 0" in { 125 | assert(Set.empty.size == 0) 126 | } 127 | 128 | it should "produce NoSuchElementException when head is invoked" in { 129 | assertThrows[NoSuchElementException] { 130 | Set.empty.head 131 | } 132 | } 133 | 134 | } 135 | 136 | 137 | "A Gamma(3.0,4.0)" should "have mean 12.0" in { 138 | import breeze.stats.distributions.Gamma 139 | val g = Gamma(3.0,4.0) 140 | val m = g.mean 141 | assert(math.abs(m - 12.0) < 0.000001) 142 | } 143 | 144 | 145 | "org.scalacheck" %% "scalacheck" % "1.14.1" % "test" 146 | 147 | 148 | import org.scalatest.matchers.should.Matchers 149 | 150 | import org.scalacheck._ 151 | import org.scalacheck.Prop.{forAll, propBoolean} 152 | 153 | class StringSpec extends Properties("String") with Matchers { 154 | 155 | property("startwith first string") = 156 | forAll { (a: String, b: String) => 157 | (a+b).startsWith(a) 158 | } 159 | 160 | property("concatenate length") = 161 | forAll { (a: String, b: String) => 162 | (a+b).length == a.length + b.length 163 | } 164 | 165 | property("substring") = 166 | forAll { (a: String, b: String, c: String) => 167 | (a+b+c).substring(a.length, a.length+b.length) == b 168 | } 169 | 170 | } 171 | 172 | 173 | class SqrtSpecification extends Properties("Sqrt") with Matchers { 174 | 175 | property("math.sqrt should square to give original") = 176 | forAll { a: Double => 177 | (a >= 0.0) ==> { 178 | val s = math.sqrt(a) 179 | val tol = 1e-8 * a 180 | s*s === a +- tol 181 | } 182 | } 183 | 184 | } 185 | 186 | 187 | /** 188 | * Take every th value from the stream s of type T 189 | * 190 | * @param s A Stream to be thinned 191 | * @param th Thinning interval 192 | * 193 | * @return The thinned stream, with values of 194 | * the same type as the input stream 195 | */ 196 | def thinStream[T](s: Stream[T],th: Int): Stream[T] = { 197 | val ss = s.drop(th-1) 198 | if (ss.isEmpty) Stream.empty else 199 | ss.head #:: thinStream(ss.tail, th) 200 | } 201 | 202 | 203 | val x = 3 + 2 204 | // x: Int = 5 205 | 206 | 207 | addSbtPlugin("org.scalameta" % "sbt-mdoc" % "1.3.6") 208 | 209 | 210 | enablePlugins(MdocPlugin) 211 | 212 | 213 | resolvers += Resolver.bintrayRepo("cibotech", "public") 214 | libraryDependencies += "com.cibo" %% "evilplot" % "0.6.3" 215 | libraryDependencies += "com.cibo" %% "evilplot-repl" % "0.6.3" 216 | 217 | 218 | import scala.util.Random 219 | import com.cibo.evilplot._ 220 | import com.cibo.evilplot.plot._ 221 | import com.cibo.evilplot.numeric._ 222 | import com.cibo.evilplot.plot.renderers.PointRenderer 223 | import com.cibo.evilplot.plot.aesthetics.DefaultTheme._ 224 | 225 | val points = Seq.fill(150) { 226 | Point(Random.nextDouble(), Random.nextDouble()) 227 | } :+ Point(0.0, 0.0) 228 | val years = Seq.fill(150)(Random.nextDouble()) :+ 1.0 229 | val yearMap = (points zip years).toMap.withDefaultValue(0.0) 230 | val plot = ScatterPlot( 231 | points, 232 | pointRenderer = Some(PointRenderer.depthColor((p: Point) => 233 | p.x, 0.0, 500.0, None, None)) 234 | ).standard() 235 | .xLabel("x") 236 | .yLabel("y") 237 | .trend(1, 0) 238 | .rightLegend() 239 | .render() 240 | displayPlot(plot) 241 | 242 | 243 | val im = plot.asBufferedImage 244 | 245 | -------------------------------------------------------------------------------- /examples/C7-EvilPlot/src/main/scala/evilplot-examples.scala: -------------------------------------------------------------------------------- 1 | /* 2 | evilplot-examples.scala 3 | 4 | EvilPlot examples 5 | 6 | */ 7 | 8 | object EvilPlotExamples { 9 | 10 | import scala.util.Random 11 | 12 | import com.cibo.evilplot._ 13 | import com.cibo.evilplot.plot._ 14 | import com.cibo.evilplot.numeric._ 15 | import com.cibo.evilplot.plot.renderers.PointRenderer 16 | 17 | def scatterExample() = { 18 | import com.cibo.evilplot.plot.aesthetics.DefaultTheme._ 19 | val points = Seq.fill(150) { 20 | Point(Random.nextDouble(), Random.nextDouble()) 21 | } :+ Point(0.0, 0.0) 22 | val years = Seq.fill(150)(Random.nextDouble()) :+ 1.0 23 | val yearMap = (points zip years).toMap.withDefaultValue(0.0) 24 | ScatterPlot( 25 | points, 26 | //pointRenderer = Some(PointRenderer.depthColor(p => yearMap(p), 0.0, 1.0, None, None)) 27 | pointRenderer = Some(PointRenderer.depthColor((p: Point) => p.x, 0.0, 500.0, None, None)) 28 | ) 29 | .standard() 30 | .xLabel("x") 31 | .yLabel("y") 32 | .trend(1, 0) 33 | .rightLegend() 34 | .render() 35 | } 36 | 37 | def scatterHist() = { 38 | import com.cibo.evilplot.plot.aesthetics.DefaultTheme._ 39 | import com.cibo.evilplot.colors.RGB 40 | import com.cibo.evilplot.geometry.Extent 41 | import com.cibo.evilplot.geometry.LineStyle.DashDot 42 | 43 | val allYears = (2007 to 2013).toVector 44 | val data = Seq.fill(150)(Point(Random.nextDouble(), Random.nextDouble())) 45 | val years = Seq.fill(150)(allYears(Random.nextInt(allYears.length))) 46 | val yearMap = (data zip years).toMap 47 | 48 | val xhist = Histogram(data.map(_.x), bins = 50) 49 | val yhist = Histogram(data.map(_.y), bins = 40) 50 | ScatterPlot( 51 | data = data, 52 | //pointRenderer = Some(PointRenderer.colorByCategory(data, p => yearMap(p))) 53 | ).topPlot(xhist) 54 | .rightPlot(yhist) 55 | .standard() 56 | .title("Measured vs Actual") 57 | .xLabel("measured") 58 | .yLabel("actual") 59 | .trend(1, 0, color = RGB(45, 45, 45), lineStyle = DashDot) 60 | .overlayLegend(x = 0.95, y = 0.8) 61 | .render(Extent(600, 400)) 62 | } 63 | 64 | def functionPlot() = { 65 | import com.cibo.evilplot.plot.aesthetics.DefaultTheme._ 66 | import com.cibo.evilplot.colors.HTMLNamedColors 67 | import com.cibo.evilplot.numeric.Bounds 68 | Overlay( 69 | FunctionPlot.series(x => x * x, "y = x^2", 70 | HTMLNamedColors.dodgerBlue, xbounds = Some(Bounds(-1, 1))), 71 | FunctionPlot.series(x => math.pow(x, 3), "y = x^3", 72 | HTMLNamedColors.crimson, xbounds = Some(Bounds(-1, 1))), 73 | FunctionPlot.series(x => math.pow(x, 4), "y = x^4", 74 | HTMLNamedColors.green, xbounds = Some(Bounds(-1, 1))) 75 | ).title("A bunch of polynomials.") 76 | .overlayLegend() 77 | .standard() 78 | .render() 79 | } 80 | 81 | def barChart() = { 82 | import com.cibo.evilplot.colors.RGB 83 | import com.cibo.evilplot.geometry.{Align, Drawable, Extent, Rect, Text} 84 | import com.cibo.evilplot.plot._ 85 | import com.cibo.evilplot.plot.aesthetics 86 | import com.cibo.evilplot.plot.aesthetics.DefaultTheme.{DefaultFonts} 87 | import com.cibo.evilplot.plot.renderers.BarRenderer 88 | implicit val theme = aesthetics.DefaultTheme.DefaultTheme.copy( 89 | fonts = DefaultFonts. 90 | copy(tickLabelSize = 14, legendLabelSize = 14, fontFace = "'Lato', sans-serif") 91 | ) 92 | val percentChange = Seq[Double](-10, 5, 12, 68, -22) 93 | val labels = Seq("one", "two", "three", "four", "five") 94 | val labeledByColor = new BarRenderer { 95 | val positive = RGB(241, 121, 6) 96 | val negative = RGB(226, 56, 140) 97 | def render(plot: Plot, extent: Extent, category: Bar): Drawable = { 98 | val rect = Rect(extent) 99 | val value = category.values.head 100 | val color = if (value >= 0) positive else negative 101 | Align.center(rect filled color, Text(s"$value%", size = 20) 102 | .filled(theme.colors.label) 103 | ).group 104 | } 105 | } 106 | BarChart 107 | .custom(percentChange.map(Bar.apply), spacing = Some(20), 108 | barRenderer = Some(labeledByColor) 109 | ) 110 | .standard(xLabels = labels) 111 | .hline(0) 112 | .render() 113 | } 114 | 115 | def clusteredBar() = { 116 | import com.cibo.evilplot.plot.aesthetics.DefaultTheme._ 117 | val data = Seq[Seq[Double]]( 118 | Seq(1, 2, 3), 119 | Seq(4, 5, 6), 120 | Seq(3, 4, 1), 121 | Seq(2, 3, 4) 122 | ) 123 | BarChart 124 | .clustered( 125 | data, 126 | labels = Seq("one", "two", "three") 127 | ) 128 | .title("Clustered Bar Chart Demo") 129 | .xAxis(Seq("a", "b", "c", "d")) 130 | .yAxis() 131 | .frame() 132 | .bottomLegend() 133 | .render() 134 | } 135 | 136 | def boxPlot() = { 137 | import com.cibo.evilplot.plot.aesthetics.DefaultTheme._ 138 | val data = Seq.fill(10)(Seq.fill(Random.nextInt(30))(Random.nextDouble())) 139 | BoxPlot(data) 140 | .standard(xLabels = (1 to 10).map(_.toString)) 141 | .render() 142 | } 143 | 144 | def pairsPlot() = { 145 | import com.cibo.evilplot.plot.aesthetics.DefaultTheme._ 146 | val labels = Vector("a", "b", "c", "d") 147 | val data = for (i <- 1 to 4) yield { 148 | (labels(i - 1), Seq.fill(10) { Random.nextDouble() * 10 }) 149 | } 150 | val plots = for ((xlabel, xdata) <- data) yield { 151 | for ((ylabel, ydata) <- data) yield { 152 | val points = (xdata, ydata).zipped.map { (a, b) => Point(a, b) } 153 | if (ylabel == xlabel) { 154 | Histogram(xdata, bins = 4) 155 | } else { 156 | ScatterPlot(points) 157 | } 158 | } 159 | } 160 | Facets(plots) 161 | .standard() 162 | .title("Pairs Plot with Histograms") 163 | .topLabels(data.map { _._1 }) 164 | .rightLabels(data.map { _._1 }) 165 | .render() 166 | } 167 | 168 | def contourPlot() = { 169 | import com.cibo.evilplot.plot.aesthetics.DefaultTheme._ 170 | val data = Seq.fill(100) { 171 | Point(Random.nextDouble() * 20, Random.nextDouble() * 20) 172 | } 173 | ContourPlot(data) 174 | .standard() 175 | .xbounds(0, 20) 176 | .ybounds(0, 20) 177 | .render() 178 | } 179 | 180 | def heatMap() = { 181 | import com.cibo.evilplot.plot.aesthetics.DefaultTheme._ 182 | val x = 100 ; val y = 50 183 | //val x = 500 ; val y = 500 184 | val data = Vector.fill(y)(Vector.fill(x)(Random.nextDouble())) 185 | Heatmap(data,256) 186 | .standard() 187 | .render() 188 | } 189 | 190 | 191 | def main(args: Array[String]): Unit = { 192 | import com.cibo.evilplot.plot.aesthetics.DefaultTheme._ 193 | // some plots 194 | displayPlot(scatterExample()) 195 | displayPlot(scatterHist()) 196 | displayPlot(functionPlot()) 197 | displayPlot(barChart()) 198 | displayPlot(clusteredBar()) 199 | displayPlot(boxPlot()) 200 | displayPlot(pairsPlot()) 201 | displayPlot(heatMap()) 202 | displayPlot(contourPlot()) 203 | // writing a plot to a bitmap image 204 | val bitmap = contourPlot().asBufferedImage 205 | javax.imageio.ImageIO.write(bitmap, "png", new java.io.File("image.png")) 206 | } 207 | 208 | 209 | } 210 | 211 | // eof 212 | -------------------------------------------------------------------------------- /examples/C6-Smile/target/mdoc/smile-example.md: -------------------------------------------------------------------------------- 1 | # Smile example 2 | 3 | ## Some mdoc documentation 4 | 5 | This is some documentation prepared using `mdoc`. The original file is in `docs`, but the `sbt` task `mdoc` will typecheck and execute the code blocks, and put the compiled markdown document in `target/mdoc`. 6 | 7 | We begin by reading the data (we assume that the file "yacht.csv" already exists). 8 | 9 | ```scala 10 | val df = smile.read.csv("yacht.csv") 11 | // df: smile.data.DataFrame = [LongPos: double, PrisCoef: double, LDR: double, BDR: double, LBR: double, Froude: double, Resist: double] 12 | // +-------+--------+----+----+----+------+------+ 13 | // |LongPos|PrisCoef| LDR| BDR| LBR|Froude|Resist| 14 | // +-------+--------+----+----+----+------+------+ 15 | // | -2.3| 0.568|4.78|3.99|3.17| 0.125| 0.11| 16 | // | -2.3| 0.568|4.78|3.99|3.17| 0.15| 0.27| 17 | // | -2.3| 0.568|4.78|3.99|3.17| 0.175| 0.47| 18 | // | -2.3| 0.568|4.78|3.99|3.17| 0.2| 0.78| 19 | // | -2.3| 0.568|4.78|3.99|3.17| 0.225| 1.18| 20 | // | -2.3| 0.568|4.78|3.99|3.17| 0.25| 1.82| 21 | // | -2.3| 0.568|4.78|3.99|3.17| 0.275| 2.61| 22 | // | -2.3| 0.568|4.78|3.99|3.17| 0.3| 3.76| 23 | // | -2.3| 0.568|4.78|3.99|3.17| 0.325| 4.99| 24 | // | -2.3| 0.568|4.78|3.99|3.17| 0.35| 7.16| 25 | // +-------+--------+----+----+----+------+------+ 26 | // 298 more rows... 27 | // 28 | df 29 | // res0: smile.data.DataFrame = [LongPos: double, PrisCoef: double, LDR: double, BDR: double, LBR: double, Froude: double, Resist: double] 30 | // +-------+--------+----+----+----+------+------+ 31 | // |LongPos|PrisCoef| LDR| BDR| LBR|Froude|Resist| 32 | // +-------+--------+----+----+----+------+------+ 33 | // | -2.3| 0.568|4.78|3.99|3.17| 0.125| 0.11| 34 | // | -2.3| 0.568|4.78|3.99|3.17| 0.15| 0.27| 35 | // | -2.3| 0.568|4.78|3.99|3.17| 0.175| 0.47| 36 | // | -2.3| 0.568|4.78|3.99|3.17| 0.2| 0.78| 37 | // | -2.3| 0.568|4.78|3.99|3.17| 0.225| 1.18| 38 | // | -2.3| 0.568|4.78|3.99|3.17| 0.25| 1.82| 39 | // | -2.3| 0.568|4.78|3.99|3.17| 0.275| 2.61| 40 | // | -2.3| 0.568|4.78|3.99|3.17| 0.3| 3.76| 41 | // | -2.3| 0.568|4.78|3.99|3.17| 0.325| 4.99| 42 | // | -2.3| 0.568|4.78|3.99|3.17| 0.35| 7.16| 43 | // +-------+--------+----+----+----+------+------+ 44 | // 298 more rows... 45 | // 46 | ``` 47 | 48 | We can get a quick summary of the data as follows. 49 | 50 | ```scala 51 | df.summary 52 | // res1: smile.data.DataFrame = [column: String, count: long, min: double, avg: double, max: double] 53 | // +--------+-----+-----+---------+-----+ 54 | // | column|count| min| avg| max| 55 | // +--------+-----+-----+---------+-----+ 56 | // | LongPos| 308| -5|-2.381818| 0| 57 | // |PrisCoef| 308| 0.53| 0.564136| 0.6| 58 | // | LDR| 308| 4.34| 4.788636| 5.14| 59 | // | BDR| 308| 2.81| 3.936818| 5.35| 60 | // | LBR| 308| 2.73| 3.206818| 3.64| 61 | // | Froude| 308|0.125| 0.2875| 0.45| 62 | // | Resist| 308| 0.01|10.495357|62.42| 63 | // +--------+-----+-----+---------+-----+ 64 | // 65 | ``` 66 | 67 | We can now carry out OLS regression after a couple of imports 68 | 69 | ```scala 70 | import smile.data.formula._ 71 | import scala.language.postfixOps 72 | val mod = smile.regression.ols("Resist" ~, df) 73 | // mod: smile.regression.LinearModel = Linear Model: 74 | // 75 | // Residuals: 76 | // Min 1Q Median 3Q Max 77 | // -11.7700 -7.5578 -1.8198 6.1620 31.5715 78 | // 79 | // Coefficients: 80 | // Estimate Std. Error t value Pr(>|t|) 81 | // Intercept -19.2367 27.1133 -0.7095 0.4786 82 | // LongPos 0.1938 0.3381 0.5734 0.5668 83 | // PrisCoef -6.4194 44.1590 -0.1454 0.8845 84 | // LDR 4.2330 14.1651 0.2988 0.7653 85 | // BDR -1.7657 5.5212 -0.3198 0.7493 86 | // LBR -4.5164 14.2000 -0.3181 0.7507 87 | // Froude 121.6676 5.0658 24.0175 0.0000 *** 88 | // --------------------------------------------------------------------- 89 | // Significance codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 90 | // 91 | // Residual standard error: 8.9596 on 301 degrees of freedom 92 | // Multiple R-squared: 0.6576, Adjusted R-squared: 0.6507 93 | // F-statistic: 96.3327 on 6 and 301 DF, p-value: 4.526e-67 94 | // 95 | mod 96 | // res2: smile.regression.LinearModel = Linear Model: 97 | // 98 | // Residuals: 99 | // Min 1Q Median 3Q Max 100 | // -11.7700 -7.5578 -1.8198 6.1620 31.5715 101 | // 102 | // Coefficients: 103 | // Estimate Std. Error t value Pr(>|t|) 104 | // Intercept -19.2367 27.1133 -0.7095 0.4786 105 | // LongPos 0.1938 0.3381 0.5734 0.5668 106 | // PrisCoef -6.4194 44.1590 -0.1454 0.8845 107 | // LDR 4.2330 14.1651 0.2988 0.7653 108 | // BDR -1.7657 5.5212 -0.3198 0.7493 109 | // LBR -4.5164 14.2000 -0.3181 0.7507 110 | // Froude 121.6676 5.0658 24.0175 0.0000 *** 111 | // --------------------------------------------------------------------- 112 | // Significance codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 113 | // 114 | // Residual standard error: 8.9596 on 301 degrees of freedom 115 | // Multiple R-squared: 0.6576, Adjusted R-squared: 0.6507 116 | // F-statistic: 96.3327 on 6 and 301 DF, p-value: 4.526e-67 117 | // 118 | ``` 119 | 120 | If we don't want to regress on everything, we can just choose what we'd like to regress on. 121 | 122 | ```scala 123 | smile.regression.ols("Resist" ~ "Froude", df) 124 | // res3: smile.regression.LinearModel = Linear Model: 125 | // 126 | // Residuals: 127 | // Min 1Q Median 3Q Max 128 | // -11.2396 -7.6662 -1.7111 6.4039 32.1537 129 | // 130 | // Coefficients: 131 | // Estimate Std. Error t value Pr(>|t|) 132 | // Intercept -24.4841 1.5336 -15.9654 0.0000 *** 133 | // Froude 121.6676 5.0339 24.1698 0.0000 *** 134 | // --------------------------------------------------------------------- 135 | // Significance codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 136 | // 137 | // Residual standard error: 8.9031 on 306 degrees of freedom 138 | // Multiple R-squared: 0.6562, Adjusted R-squared: 0.6551 139 | // F-statistic: 584.1803 on 1 and 306 DF, p-value: 6.233e-73 140 | // 141 | smile.regression.ols("Resist" ~ "Froude" + "LongPos", df) 142 | // res4: smile.regression.LinearModel = Linear Model: 143 | // 144 | // Residuals: 145 | // Min 1Q Median 3Q Max 146 | // -11.2361 -7.4169 -1.7970 6.3781 32.1378 147 | // 148 | // Coefficients: 149 | // Estimate Std. Error t value Pr(>|t|) 150 | // Intercept -24.0234 1.7315 -13.8743 0.0000 *** 151 | // Froude 121.6676 5.0394 24.1434 0.0000 *** 152 | // LongPos 0.1934 0.3362 0.5754 0.5655 153 | // --------------------------------------------------------------------- 154 | // Significance codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 155 | // 156 | // Residual standard error: 8.9129 on 305 degrees of freedom 157 | // Multiple R-squared: 0.6566, Adjusted R-squared: 0.6544 158 | // F-statistic: 291.6172 on 2 and 305 DF, p-value: 1.604e-71 159 | // 160 | ``` 161 | 162 | ### Summary 163 | 164 | This brief document has illustrated how easy and convenient it is to produce executable documentation and reports for Scala. 165 | 166 | -------------------------------------------------------------------------------- /fragments/basics.scala: -------------------------------------------------------------------------------- 1 | 2 | Welcome to Scala 2.12.10 (OpenJDK 64-Bit Server VM, 3 | Java 1.8.0_121). 4 | Type in expressions for evaluation. Or try :help. 5 | 6 | scala> val a = 5 7 | a: Int = 5 8 | 9 | scala> a 10 | res0: Int = 5 11 | 12 | 13 | scala> a = 6 14 | :8: error: reassignment to val 15 | a = 6 16 | ^ 17 | scala> a 18 | res1: Int = 5 19 | 20 | 21 | scala> var b = 7 22 | b: Int = 7 23 | 24 | scala> b 25 | res2: Int = 7 26 | 27 | scala> b = 8 28 | b: Int = 8 29 | 30 | scala> b 31 | res3: Int = 8 32 | 33 | 34 | scala> val c = List(3,4,5,6) 35 | c: List[Int] = List(3, 4, 5, 6) 36 | 37 | scala> c(1) 38 | res4: Int = 4 39 | 40 | scala> c.sum 41 | res5: Int = 18 42 | 43 | scala> c.length 44 | res6: Int = 4 45 | 46 | scala> c.product 47 | res7: Int = 360 48 | 49 | 50 | scala> c.foldLeft(0)((x,y) => x+y) 51 | res8: Int = 18 52 | 53 | 54 | scala> c.foldLeft(0)(_+_) 55 | res9: Int = 18 56 | 57 | scala> c.foldLeft(1)(_*_) 58 | res10: Int = 360 59 | 60 | 61 | scala> c.reduce(_*_) 62 | res11: Int = 360 63 | 64 | 65 | scala> val d = Vector(2,3,4,5,6,7,8,9) 66 | d: Vector[Int] = Vector(2, 3, 4, 5, 6, 7, 8, 9) 67 | 68 | scala> d 69 | res11: Vector[Int] = Vector(2, 3, 4, 5, 6, 7, 8, 9) 70 | 71 | scala> d.slice(3,6) 72 | res12: Vector[Int] = Vector(5, 6, 7) 73 | 74 | scala> val e = d.updated(3,0) 75 | e: Vector[Int] = Vector(2, 3, 4, 0, 6, 7, 8, 9) 76 | 77 | scala> d 78 | res13: Vector[Int] = Vector(2, 3, 4, 5, 6, 7, 8, 9) 79 | 80 | scala> e 81 | res14: Vector[Int] = Vector(2, 3, 4, 0, 6, 7, 8, 9) 82 | 83 | 84 | scala> val f=(1 to 10).toList 85 | f: List[Int] = List(1, 2, 3, 4, 5, 6, 7, 8, 9, 10) 86 | 87 | scala> f 88 | res15: List[Int] = List(1, 2, 3, 4, 5, 6, 7, 8, 9, 10) 89 | 90 | scala> f.map(x => x*x) 91 | res16: List[Int] = List(1, 4, 9, 16, 25, 36, 49, 64, 92 | 81, 100) 93 | 94 | scala> f map {x => x*x} 95 | res17: List[Int] = List(1, 4, 9, 16, 25, 36, 49, 64, 96 | 81, 100) 97 | 98 | scala> f filter {_ > 4} 99 | res18: List[Int] = List(5, 6, 7, 8, 9, 10) 100 | 101 | 102 | math.log(2.0) 103 | // res0: Double = 0.6931471805599453 104 | math.sin(1.0) 105 | // res1: Double = 0.8414709848078965 106 | log(2.0) 107 | // :8: error: not found: value log 108 | // log(2.0) 109 | // ^ 110 | import math.log 111 | // import math.log 112 | log(2.0) 113 | // res3: Double = 0.6931471805599453 114 | import math._ 115 | // import math._ 116 | sin(Pi/2) 117 | // res4: Double = 1.0 118 | exp(log(sin(Pi/2))) 119 | // res5: Double = 1.0 120 | sin(asin(0.1)) 121 | // res6: Double = 0.1 122 | atan(1)*4 123 | // res7: Double = 3.141592653589793 124 | log(sqrt(exp(1))) 125 | // res8: Double = 0.5 126 | abs(min(-1,2)) 127 | // res9: Int = 1 128 | pow(2,8) 129 | // res10: Double = 256.0 130 | random 131 | // res11: Double = 0.0954535018607291 132 | random 133 | // res12: Double = 0.5669552981874513 134 | random 135 | // res13: Double = 0.9598287994663521 136 | floor(random*3) 137 | // res14: Double = 2.0 138 | floor(random*3) 139 | // res15: Double = 1.0 140 | floor(random*3) 141 | // res16: Double = 1.0 142 | floor(random*3) 143 | // res17: Double = 1.0 144 | 145 | 146 | val a1 = 1 147 | // a1: Int = 1 148 | val a2: Int = 1 149 | // a2: Int = 1 150 | val l1 = List(1, 2, 3) 151 | // l1: List[Int] = List(1, 2, 3) 152 | val l2: List[Int] = List(2, 3, 4) 153 | // l2: List[Int] = List(2, 3, 4) 154 | 155 | 156 | val a3: Double = 1 157 | // a3: Double = 1.0 158 | val a4: Int = 1.0 159 | // :7: error: type mismatch; 160 | // found : Double(1.0) 161 | // required: Int 162 | // val a4: Int = 1.0 163 | // ^ 164 | 165 | 166 | def fact1(n: Int): Int = (1 to n).product 167 | // fact1: (n: Int)Int 168 | fact1(5) 169 | // res0: Int = 120 170 | 171 | 172 | def fact2(n: Int): Int = { 173 | var acc = 1 174 | var i = 2 175 | while (i <= n) { 176 | acc *= i 177 | i += 1 178 | } 179 | acc 180 | } 181 | // fact2: (n: Int)Int 182 | fact2(5) 183 | // res1: Int = 120 184 | 185 | 186 | def fact3(n: Int): Int = { 187 | if (n == 1) 1 else 188 | n * fact3(n-1) 189 | } 190 | // fact3: (n: Int)Int 191 | fact3(5) 192 | // res2: Int = 120 193 | 194 | 195 | @annotation.tailrec 196 | def fact4(n: Int, acc: Int = 1): Int = { 197 | if (n == 1) acc else 198 | fact4(n-1, acc*n) 199 | } 200 | // fact4: (n: Int, acc: Int)Int 201 | fact4(5) 202 | // res3: Int = 120 203 | 204 | 205 | math.log(fact4(5)) 206 | // res4: Double = 4.787491742782046 207 | 208 | def lfact(n: Int): Double = { 209 | if (n == 1) 0.0 else 210 | math.log(n) + lfact(n-1) 211 | } 212 | // lfact: (n: Int)Double 213 | lfact(5) 214 | // res5: Double = 4.787491742782046 215 | // lfact(10000) // will cause a stack overflow 216 | 217 | 218 | @annotation.tailrec 219 | def lfacttr(n: Int, acc: Double = 0.0): Double = { 220 | if (n == 1) acc else 221 | lfacttr(n-1, acc + math.log(n)) 222 | } 223 | // lfacttr: (n: Int, acc: Double)Double 224 | lfacttr(5) 225 | // res6: Double = 4.787491742782046 226 | lfacttr(10000) 227 | // res7: Double = 82108.92783681446 228 | 229 | 230 | @annotation.tailrec 231 | def factbi(n: BigInt, acc: BigInt = 1): BigInt = { 232 | if (n == 1) acc else 233 | factbi(n-1, acc*n) 234 | } 235 | // factbi: (n: BigInt, acc: BigInt)BigInt 236 | factbi(5) 237 | // res8: BigInt = 120 238 | factbi(10000) 239 | // res9: BigInt = 2846259680917054518906413212119... 240 | 241 | 242 | /* 243 | log-fact.scala 244 | Program to compute the log-factorial function 245 | */ 246 | 247 | object LogFact { 248 | 249 | import annotation.tailrec 250 | import math.log 251 | 252 | @tailrec 253 | def logfact(n: Int, acc: Double = 0.0): Double = 254 | if (n == 1) acc else 255 | logfact(n-1, acc + log(n)) 256 | 257 | def main(args: Array[String]): Unit = { 258 | val n = if (args.length == 1) args(0).toInt else 5 259 | val lfn = logfact(n) 260 | println(s"logfact($n) = $lfn") 261 | } 262 | 263 | } 264 | 265 | // eof 266 | 267 | 268 | val l1 = List(1,2,3) 269 | 270 | 271 | val l2 = 4 :: l1 272 | // List(4, 1, 2, 3) 273 | 274 | 275 | val l3 = l2 map { x => x*x } 276 | // List(16, 1, 4, 9) 277 | 278 | 279 | val l4 = l2.map(x => x*x) 280 | 281 | 282 | import breeze.plot._ 283 | def plotFun(fun: Double => Double, xmin: Double = 284 | -3.0, xmax: Double = 3.0): Figure = { 285 | val f = Figure() 286 | val p = f.subplot(0) 287 | import breeze.linalg._ 288 | val x = linspace(xmin, xmax) 289 | p += plot(x, x map fun) 290 | p.xlabel = "x" 291 | p.ylabel = "f(x)" 292 | f 293 | } 294 | 295 | 296 | plotFun(x => x*x) 297 | 298 | 299 | def myQuad1(x: Double): Double = x*x - 2*x + 1 300 | plotFun(myQuad1) 301 | def myQuad2(x: Double): Double = x*x - 3*x - 1 302 | plotFun(myQuad2) 303 | 304 | 305 | val myQuad3: (Double => Double) = x => -x*x + 2 306 | plotFun(myQuad3) 307 | 308 | 309 | def quadratic(a: Double, b: Double, c: Double, 310 | x: Double): Double = 311 | a*x*x + b*x + c 312 | 313 | 314 | plotFun(x => quadratic(3,2,1,x)) 315 | 316 | 317 | def quadFun(a: Double, b: Double, c: Double): 318 | Double => Double = x => quadratic(a,b,c,x) 319 | val myQuad4 = quadFun(2,1,3) 320 | plotFun(myQuad4) 321 | plotFun(quadFun(1,2,3)) 322 | 323 | 324 | val quadFunF: (Double,Double,Double) => Double => 325 | Double = (a,b,c) => x => quadratic(a,b,c,x) 326 | val myQuad5 = quadFunF(-1,1,2) 327 | plotFun(myQuad5) 328 | plotFun(quadFunF(1,-2,3)) 329 | 330 | 331 | val myQuad6 = quadratic(1,2,3,_: Double) 332 | plotFun(myQuad6) 333 | 334 | 335 | def quad(a: Double, b: Double, c: Double)(x: Double): 336 | Double = a*x*x + b*x + c 337 | plotFun(quad(1,2,-3)) 338 | val myQuad7 = quad(1,0,1) _ 339 | plotFun(myQuad7) 340 | 341 | 342 | def quadCurried = (quadratic _).curried 343 | plotFun(quadCurried(1)(2)(3)) 344 | 345 | 346 | val quadraticF: (Double,Double,Double,Double) => Double = 347 | (a,b,c,x) => a*x*x + b*x + c 348 | def quadCurried2 = quadraticF.curried 349 | plotFun(quadCurried2(-1)(2)(3)) 350 | 351 | 352 | val aLongString = (1 to 10000).map(_.toString). 353 | reduce(_+_) 354 | // aLongString: String = 1234567891011121314151617... 355 | 356 | val stringLength: String => Int = s => s.length 357 | // stringLength: String => Int = 358 | 359 | stringLength(aLongString) 360 | // res0: Int = 38894 361 | 362 | 363 | def convertToK: Int => Double = i => i.toDouble/1024 364 | // convertToK: Int => Double 365 | 366 | def stringLengthInK1(s: String): Double = { 367 | val l = stringLength(s) 368 | val lk = convertToK(l) 369 | lk 370 | } 371 | // stringLengthInK1: (s: String)Double 372 | 373 | stringLengthInK1(aLongString) 374 | // res1: Double = 37.982421875 375 | 376 | 377 | val stringLengthInK2: String => Double = 378 | s => convertToK(stringLength(s)) 379 | // stringLengthInK2: String => Double = 380 | 381 | stringLengthInK2(aLongString) 382 | // res2: Double = 37.982421875 383 | 384 | 385 | val stringLengthInK3: String => Double = 386 | s => (convertToK compose stringLength)(s) 387 | // stringLengthInK3: String => Double = 388 | 389 | stringLengthInK3(aLongString) 390 | // res3: Double = 37.982421875 391 | 392 | 393 | val stringLengthInK4: String => Double = 394 | convertToK compose stringLength 395 | // stringLengthInK4: String => Double = 396 | 397 | stringLengthInK4(aLongString) 398 | // res4: Double = 37.982421875 399 | 400 | -------------------------------------------------------------------------------- /fragments/monte.scala: -------------------------------------------------------------------------------- 1 | 2 | import java.util.concurrent.ThreadLocalRandom 3 | import scala.math.exp 4 | import scala.annotation.tailrec 5 | 6 | val N = 1000000L 7 | def rng = ThreadLocalRandom.current() 8 | 9 | def mc(its: Long): Double = { 10 | @tailrec def sum(its: Long, acc: Double): Double = { 11 | if (its == 0) acc else { 12 | val u = rng.nextDouble() 13 | sum(its-1, acc + exp(-u*u)) 14 | } 15 | } 16 | sum(its,0.0)/its 17 | } 18 | 19 | mc(N) 20 | // res0: Double = 0.7469182341226777 21 | 22 | 23 | def mcp(its: Long,np: Int = 4): Double = 24 | (1 to np).par.map(i => mc(its/np)).sum/np 25 | 26 | mcp(N) 27 | // res1: Double = 0.7468289488326496 28 | 29 | 30 | def time[A](f: => A) = { 31 | val s = System.nanoTime 32 | val ret = f 33 | println("time: "+(System.nanoTime-s)/1e6+"ms") 34 | ret 35 | } 36 | 37 | 38 | val bigN = 100000000L 39 | // bigN: Long = 100000000 40 | 41 | time(mc(bigN)) 42 | // time: 6225.859951ms 43 | // res2: Double = 0.7468159872240743 44 | time(mcp(bigN)) 45 | // time: 2197.872294ms 46 | // res3: Double = 0.7468246533834739 47 | 48 | 49 | (1 to 12).foreach{i => 50 | println("np = "+i) 51 | (1 to 3).foreach(j => time(mcp(bigN,i))) 52 | } 53 | // np = 1 54 | // time: 6201.480532ms 55 | // time: 6186.176627ms 56 | // time: 6198.14735ms 57 | // np = 2 58 | // time: 3127.512337ms 59 | // time: 3122.648652ms 60 | // time: 3148.509354ms 61 | // np = 3 62 | // time: 2488.273962ms 63 | // time: 2402.957878ms 64 | // time: 2555.286948ms 65 | // np = 4 66 | // time: 2133.996ms 67 | // time: 2238.847511ms 68 | // time: 2177.260599ms 69 | // np = 5 70 | // time: 2867.889727ms 71 | // time: 2890.128312ms 72 | // time: 2784.020295ms 73 | // np = 6 74 | // time: 3358.373499ms 75 | // time: 2600.759805ms 76 | // time: 2559.704485ms 77 | // np = 7 78 | // time: 3248.162029ms 79 | // time: 3359.006061ms 80 | // time: 2882.463352ms 81 | // np = 8 82 | // time: 1847.027762ms 83 | // time: 2545.40533ms 84 | // time: 2556.063328ms 85 | // np = 9 86 | // time: 2344.998373ms 87 | // time: 2253.718886ms 88 | // time: 2260.407902ms 89 | // np = 10 90 | // time: 2158.32923ms 91 | // time: 2125.176623ms 92 | // time: 2049.69822ms 93 | // np = 11 94 | // time: 1945.826366ms 95 | // time: 1945.175903ms 96 | // time: 1952.519595ms 97 | // np = 12 98 | // time: 1822.598809ms 99 | // time: 1827.48165ms 100 | // time: 2722.349404ms 101 | 102 | 103 | def metrop1(n: Int = 1000, eps: Double = 0.5): 104 | DenseVector[Double] = { 105 | val vec = DenseVector.fill(n)(0.0) 106 | var x = 0.0 107 | var oldll = Gaussian(0.0, 1.0).logPdf(x) 108 | vec(0) = x 109 | (1 until n).foreach { i => 110 | val can = x + Uniform(-eps, eps).draw 111 | val loglik = Gaussian(0.0, 1.0).logPdf(can) 112 | val loga = loglik - oldll 113 | if (math.log(Uniform(0.0, 1.0).draw) < loga) { 114 | x = can 115 | oldll = loglik 116 | } 117 | vec(i) = x 118 | } 119 | vec 120 | } 121 | 122 | 123 | def metrop2(n: Int = 1000, eps: Double = 0.5): Unit = 124 | { 125 | var x = 0.0 126 | var oldll = Gaussian(0.0, 1.0).logPdf(x) 127 | (1 to n).foreach { i => 128 | val can = x + Uniform(-eps, eps).draw 129 | val loglik = Gaussian(0.0, 1.0).logPdf(can) 130 | val loga = loglik - oldll 131 | if (math.log(Uniform(0.0, 1.0).draw) < loga) { 132 | x = can 133 | oldll = loglik 134 | } 135 | println(x) 136 | } 137 | } 138 | 139 | 140 | @tailrec 141 | def metrop3(n: Int = 1000, eps: Double = 0.5, 142 | x: Double = 0.0, oldll: Double = Double.MinValue): 143 | Unit = { 144 | if (n > 0) { 145 | println(x) 146 | val can = x + Uniform(-eps, eps).draw 147 | val loglik = Gaussian(0.0, 1.0).logPdf(can) 148 | val loga = loglik - oldll 149 | if (math.log(Uniform(0.0, 1.0).draw) < loga) 150 | metrop3(n - 1, eps, can, loglik) 151 | else 152 | metrop3(n - 1, eps, x, oldll) 153 | } 154 | } 155 | 156 | 157 | @tailrec 158 | def metrop4(n: Int = 1000, eps: Double = 0.5, 159 | x: Double = 0.0, oldll: Double = Double.MinValue, 160 | acc: List[Double] = Nil): DenseVector[Double] = { 161 | if (n == 0) 162 | DenseVector(acc.reverse.toArray) 163 | else { 164 | val can = x + Uniform(-eps, eps).draw 165 | val loglik = Gaussian(0.0, 1.0).logPdf(can) 166 | val loga = loglik - oldll 167 | if (math.log(Uniform(0.0, 1.0).draw) < loga) 168 | metrop4(n - 1, eps, can, loglik, can :: acc) 169 | else 170 | metrop4(n - 1, eps, x, oldll, x :: acc) 171 | } 172 | } 173 | 174 | 175 | def newState(x: Double, oldll: Double, eps: Double): 176 | (Double, Double) = { 177 | val can = x + Uniform(-eps, eps).draw 178 | val loglik = Gaussian(0.0, 1.0).logPdf(can) 179 | val loga = loglik - oldll 180 | if (math.log(Uniform(0.0, 1.0).draw) < loga) 181 | (can, loglik) else (x, oldll) 182 | } 183 | 184 | 185 | @tailrec 186 | def metrop5(n: Int = 1000, eps: Double = 0.5, 187 | x: Double = 0.0, 188 | oldll: Double = Double.MinValue): Unit = { 189 | if (n > 0) { 190 | println(x) 191 | val ns = newState(x, oldll, eps) 192 | metrop5(n - 1, eps, ns._1, ns._2) 193 | } 194 | } 195 | 196 | 197 | @tailrec 198 | def metrop5b(n: Int = 1000, eps: Double = 0.5, 199 | x: Double = 0.0, 200 | oldll: Double = Double.MinValue): Unit = { 201 | if (n > 0) { 202 | println(x) 203 | val (nx, ll) = newState(x, oldll, eps) 204 | metrop5b(n - 1, eps, nx, ll) 205 | } 206 | } 207 | 208 | 209 | @tailrec 210 | def metrop6(n: Int = 1000, eps: Double = 0.5, 211 | x: Double = 0.0, oldll: Double = Double.MinValue, 212 | acc: List[Double] = Nil): DenseVector[Double] = { 213 | if (n == 0) DenseVector(acc.reverse.toArray) else { 214 | val (nx, ll) = newState(x, oldll, eps) 215 | metrop6(n - 1, eps, nx, ll, nx :: acc) 216 | } 217 | } 218 | 219 | 220 | def nextState(eps: Double)(state: (Double, Double)): 221 | (Double, Double) = { 222 | val (x, oldll) = state 223 | val can = x + Uniform(-eps, eps).draw 224 | val loglik = Gaussian(0.0, 1.0).logPdf(can) 225 | val loga = loglik - oldll 226 | if (math.log(Uniform(0.0, 1.0).draw) < loga) 227 | (can, loglik) else (x, oldll) 228 | } 229 | 230 | 231 | def metrop7(eps: Double = 0.5, x: Double = 0.0, 232 | oldll: Double = Double.MinValue): Stream[Double] = 233 | Stream.iterate((x,oldll))(nextState(eps)) map (_._1) 234 | 235 | 236 | def kernel(x: Double): Rand[Double] = for { 237 | innov <- Uniform(-0.5, 0.5) 238 | can = x + innov 239 | oldll = Gaussian(0.0, 1.0).logPdf(x) 240 | loglik = Gaussian(0.0, 1.0).logPdf(can) 241 | loga = loglik - oldll 242 | u <- Uniform(0.0, 1.0) 243 | } yield if (math.log(u) < loga) can else x 244 | 245 | 246 | val ms = Stream.iterate(0.0)(kernel(_).draw) 247 | ms. 248 | drop(1000). 249 | take(10000). 250 | foreach(println) 251 | 252 | 253 | case class State(x: Double, y: Double) 254 | // defined class State 255 | 256 | 257 | val s = State(1.0,2.0) 258 | // s: State = State(1.0,2.0) 259 | s.x 260 | // res0: Double = 1.0 261 | s.y 262 | // res1: Double = 2.0 263 | s.copy() 264 | // res2: State = State(1.0,2.0) 265 | s.copy(y=3) 266 | // res3: State = State(1.0,3.0) 267 | 268 | 269 | import breeze.stats.distributions._ 270 | // import breeze.stats.distributions._ 271 | 272 | def nextState(state: State): State = { 273 | val sy = state.y 274 | val x = Gamma(3.0,1.0/(sy*sy+4)).draw 275 | val y = Gaussian(1.0/(x+1),1.0/math.sqrt(2*x+2)).draw 276 | State(x,y) 277 | } 278 | 279 | 280 | val gs = Stream.iterate(State(1.0,1.0))(nextState) 281 | // gs: scala.collection.immutable.Stream[State] = 282 | // Stream(State(1.0,1.0), ?) 283 | val output = gs.drop(1000).take(100000).toArray 284 | // output: Array[State] = Array( 285 | // State(0.20703194113971382,0.874650780098001), 286 | // State(0.5813103371812548,0.4780234809903935), ... 287 | 288 | 289 | import breeze.linalg._ 290 | val xv = DenseVector(output map (_.x)) 291 | val yv = DenseVector(output map (_.y)) 292 | 293 | import breeze.plot._ 294 | val fig = Figure("Bivariate Gibbs sampler") 295 | fig.subplot(2,2,0)+=hist(xv,50) 296 | fig.subplot(2,2,1)+=hist(yv,50) 297 | fig.subplot(2,2,2)+=plot(xv,yv,'.') 298 | 299 | 300 | def thin[T](s: Stream[T], th: Int): Stream[T] = { 301 | val ss = s.drop(th - 1) 302 | if (ss.isEmpty) Stream.empty else 303 | ss.head #:: thin(ss.tail, th) 304 | } 305 | 306 | 307 | thin(gs.drop(1000),10).take(10000).toArray 308 | 309 | 310 | // gs.drop(1000).thin(10).take(10000) 311 | 312 | 313 | def kernel(state: State): Rand[State] = for { 314 | x <- Gamma(3.0,1.0/(state.y*state.y+4)) 315 | y <- Gaussian(1.0/(x+1),1.0/math.sqrt(2*x+2)) 316 | ns = State(x,y) 317 | } yield ns 318 | 319 | val out3 = Stream.iterate(State(1.0,1.0))(kernel(_).draw). 320 | drop(1000). 321 | take(10000). 322 | toArray 323 | 324 | 325 | (th: P) => { 326 | val x0 = simx0(n, t0, th).par 327 | @tailrec def pf(ll: LogLik, x: ParVector[S], t: Time, 328 | deltas: List[Time], obs: List[O]): LogLik = 329 | obs match { 330 | case Nil => ll 331 | case head :: tail => { 332 | val xp = if (deltas.head == 0) x else 333 | (x map { stepFun(_, t, deltas.head, th) }) 334 | val w = xp map { dataLik(_, head, th) } 335 | val rows = sample(n, DenseVector(w.toArray)).par 336 | val xpp = rows map { xp(_) } 337 | pf(ll + math.log(mean(w)), xpp, t + deltas.head, 338 | deltas.tail, tail) 339 | } 340 | } 341 | pf(0, x0, t0, deltas, obs) 342 | } 343 | 344 | --------------------------------------------------------------------------------