├── examples
    ├── C4-PCA
    │   ├── test.dat
    │   ├── src
    │   │   ├── test
    │   │   │   └── scala
    │   │   │   │   └── app-template-test.scala
    │   │   └── main
    │   │   │   └── scala
    │   │   │       └── pca.scala
    │   ├── project
    │   │   └── build.properties
    │   ├── .gitignore
    │   ├── Readme.md
    │   └── build.sbt
    ├── C4-GammaTest
    │   ├── src
    │   │   ├── test
    │   │   │   └── scala
    │   │   │   │   └── app-template-test.scala
    │   │   └── main
    │   │   │   └── scala
    │   │   │       └── gamma-test.scala
    │   ├── project
    │   │   └── build.properties
    │   ├── .gitignore
    │   ├── Readme.md
    │   └── build.sbt
    ├── C5-Metropolis
    │   ├── src
    │   │   ├── test
    │   │   │   └── scala
    │   │   │   │   └── app-template-test.scala
    │   │   └── main
    │   │   │   └── scala
    │   │   │       └── metropolis.scala
    │   ├── project
    │   │   └── build.properties
    │   ├── Readme.md
    │   ├── .gitignore
    │   └── build.sbt
    ├── C5-MonteCarlo
    │   ├── src
    │   │   ├── test
    │   │   │   └── scala
    │   │   │   │   └── app-template-test.scala
    │   │   └── main
    │   │   │   └── scala
    │   │   │       └── monte-carlo.scala
    │   ├── project
    │   │   └── build.properties
    │   ├── Readme.md
    │   ├── .gitignore
    │   └── build.sbt
    ├── C6-Regression
    │   ├── src
    │   │   ├── test
    │   │   │   └── scala
    │   │   │   │   └── app-template-test.scala
    │   │   └── main
    │   │   │   └── scala
    │   │   │       └── regression.scala
    │   ├── project
    │   │   └── build.properties
    │   ├── Readme.md
    │   └── build.sbt
    ├── C8-SparkJob
    │   ├── src
    │   │   ├── test
    │   │   │   └── scala
    │   │   │   │   └── app-template-test.scala
    │   │   └── main
    │   │   │   └── scala
    │   │   │       └── spark.scala
    │   ├── project
    │   │   └── build.properties
    │   ├── Readme.md
    │   └── build.sbt
    ├── C6-Rainier
    │   ├── project
    │   │   ├── build.properties
    │   │   └── plugins.sbt
    │   ├── target
    │   │   └── mdoc
    │   │   │   ├── b0.png
    │   │   │   └── b1.png
    │   ├── .gitignore
    │   ├── src
    │   │   ├── test
    │   │   │   └── scala
    │   │   │   │   └── rainier-test.scala
    │   │   └── main
    │   │   │   └── scala
    │   │   │       └── rainier.scala
    │   ├── Readme.md
    │   ├── build.sbt
    │   └── docs
    │   │   └── LogisticRegression.md
    ├── C6-Smile
    │   ├── project
    │   │   ├── build.properties
    │   │   └── plugins.sbt
    │   ├── src
    │   │   ├── test
    │   │   │   └── scala
    │   │   │   │   └── smile-test.scala
    │   │   └── main
    │   │   │   └── scala
    │   │   │       └── smile.scala
    │   ├── .gitignore
    │   ├── build.sbt
    │   ├── Readme.md
    │   ├── docs
    │   │   └── smile-example.md
    │   └── target
    │   │   └── mdoc
    │   │       └── smile-example.md
    ├── C7-EvilPlot
    │   ├── project
    │   │   └── build.properties
    │   ├── Readme.md
    │   ├── build.sbt
    │   └── src
    │   │   └── main
    │   │       └── scala
    │   │           └── evilplot-examples.scala
    ├── C7-MetropAssembly
    │   ├── src
    │   │   ├── test
    │   │   │   └── scala
    │   │   │   │   └── app-template-test.scala
    │   │   └── main
    │   │   │   └── scala
    │   │   │       └── metropolis.scala
    │   ├── project
    │   │   ├── build.properties
    │   │   └── plugins.sbt
    │   ├── rscala.R
    │   ├── build.sbt
    │   └── Readme.md
    ├── C7-Vegas
    │   ├── project
    │   │   └── build.properties
    │   ├── Readme.md
    │   ├── src
    │   │   ├── test
    │   │   │   └── scala
    │   │   │   │   └── C7-Vegas-test.scala
    │   │   └── main
    │   │   │   └── scala
    │   │   │       └── C7-Vegas.scala
    │   ├── .gitignore
    │   └── build.sbt
    ├── C6-ScalaGlm
    │   ├── project
    │   │   └── build.properties
    │   ├── Readme.md
    │   ├── src
    │   │   ├── main
    │   │   │   └── scala
    │   │   │   │   └── scala-glm-example.scala
    │   │   └── test
    │   │   │   └── scala
    │   │   │       └── scala-glm-example-test.scala
    │   ├── .gitignore
    │   └── build.sbt
    ├── C9-ScalablePF
    │   ├── project
    │   │   └── build.properties
    │   ├── README.md
    │   ├── .gitignore
    │   ├── build.sbt
    │   └── src
    │   │   ├── main
    │   │       └── scala
    │   │       │   └── pfilter
    │   │       │       └── pfilter.scala
    │   │   └── test
    │   │       └── scala
    │   │           └── pfilter-test.scala
    ├── C6-DataFrames
    │   ├── smiledf
    │   │   ├── project
    │   │   │   └── build.properties
    │   │   ├── build.sbt
    │   │   └── src
    │   │   │   └── main
    │   │   │       └── scala
    │   │   │           └── smile-df.scala
    │   ├── r
    │   │   ├── df.R
    │   │   └── gen-csv.R
    │   ├── sparkdf
    │   │   └── spark.scala
    │   ├── framian
    │   │   ├── build.sbt
    │   │   └── framian.scala
    │   ├── datatable
    │   │   ├── build.sbt
    │   │   └── datatable.scala
    │   ├── saddle
    │   │   ├── CsvDf.scala
    │   │   └── build.sbt
    │   └── README.md
    ├── C1-HelloWorld
    │   ├── HelloWorld.scala
    │   └── Readme.md
    ├── C3-Pi
    │   ├── Readme.md
    │   └── pi.scala
    ├── Readme.md
    └── C2-LogFactorial
    │   ├── Readme.md
    │   └── log-fact.scala
├── app-template
    ├── src
    │   ├── test
    │   │   └── scala
    │   │   │   └── app-template-test.scala
    │   └── main
    │   │   └── scala
    │   │       └── app-template
    │   │           └── app-template.scala
    ├── project
    │   └── build.properties
    ├── build.sbt
    └── .gitignore
├── exercises
    ├── option
    │   ├── project
    │   │   └── build.properties
    │   ├── src
    │   │   ├── main
    │   │   │   └── scala
    │   │   │   │   └── option.scala
    │   │   └── test
    │   │   │   └── scala
    │   │   │       └── option-test.scala
    │   ├── build.sbt
    │   └── Readme.md
    ├── bisection
    │   ├── project
    │   │   └── build.properties
    │   ├── src
    │   │   ├── main
    │   │   │   └── scala
    │   │   │   │   └── bisect.scala
    │   │   └── test
    │   │   │   └── scala
    │   │   │       └── bisect-test.scala
    │   ├── build.sbt
    │   └── Readme.md
    ├── Readme.md
    ├── Basics.md
    ├── Spark.md
    ├── Collections.md
    ├── Monte.md
    ├── Breeze.md
    ├── Intro.md
    ├── Stats.md
    ├── Tools.md
    └── Advanced.md
├── sbt-test
    ├── project
    │   └── build.properties
    ├── src
    │   ├── main
    │   │   └── scala
    │   │   │   └── sbt-test.scala
    │   └── test
    │   │   └── scala
    │   │       ├── sbt-test-flatspec.scala
    │   │       ├── sbt-test-test.scala
    │   │       └── sbt-test-scalacheck.scala
    ├── .gitignore
    ├── build.sbt
    └── Readme.md
├── scscala.pdf
├── sbt
    ├── sbt-0.13.13.zip
    └── Readme.md
├── fragments
    ├── intro.scala
    ├── Readme.md
    ├── advanced.scala
    ├── tools.scala
    ├── basics.scala
    └── monte.scala
├── IntelliJ.md
├── SelfStudyGuide.md
├── StartHere.md
├── UsefulLinks.md
├── ScalaIDE.md
├── Ensime.md
├── README.md
└── Setup.md


/examples/C4-PCA/test.dat:
--------------------------------------------------------------------------------
1 | 1.0,2.0
2 | 3.0,4.0
3 | 


--------------------------------------------------------------------------------
/app-template/src/test/scala/app-template-test.scala:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/examples/C4-PCA/src/test/scala/app-template-test.scala:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/app-template/project/build.properties:
--------------------------------------------------------------------------------
1 | sbt.version=1.3.3
2 | 


--------------------------------------------------------------------------------
/app-template/src/main/scala/app-template/app-template.scala:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/examples/C4-GammaTest/src/test/scala/app-template-test.scala:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/examples/C4-PCA/project/build.properties:
--------------------------------------------------------------------------------
1 | sbt.version=1.3.3
2 | 


--------------------------------------------------------------------------------
/examples/C5-Metropolis/src/test/scala/app-template-test.scala:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/examples/C5-MonteCarlo/src/test/scala/app-template-test.scala:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/examples/C6-Regression/src/test/scala/app-template-test.scala:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/examples/C8-SparkJob/src/test/scala/app-template-test.scala:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/exercises/option/project/build.properties:
--------------------------------------------------------------------------------
1 | sbt.version=1.3.3
2 | 


--------------------------------------------------------------------------------
/sbt-test/project/build.properties:
--------------------------------------------------------------------------------
1 | sbt.version=1.3.3
2 | 
3 | 


--------------------------------------------------------------------------------
/examples/C4-GammaTest/project/build.properties:
--------------------------------------------------------------------------------
1 | sbt.version=1.3.3
2 | 


--------------------------------------------------------------------------------
/examples/C6-Rainier/project/build.properties:
--------------------------------------------------------------------------------
1 | sbt.version=1.3.2
2 | 


--------------------------------------------------------------------------------
/examples/C6-Smile/project/build.properties:
--------------------------------------------------------------------------------
1 | sbt.version=1.3.3
2 | 


--------------------------------------------------------------------------------
/examples/C7-EvilPlot/project/build.properties:
--------------------------------------------------------------------------------
1 | sbt.version=1.3.3
2 | 


--------------------------------------------------------------------------------
/examples/C7-MetropAssembly/src/test/scala/app-template-test.scala:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/examples/C7-Vegas/project/build.properties:
--------------------------------------------------------------------------------
1 | sbt.version=1.3.3
2 | 


--------------------------------------------------------------------------------
/examples/C8-SparkJob/project/build.properties:
--------------------------------------------------------------------------------
1 | sbt.version=1.3.3
2 | 


--------------------------------------------------------------------------------
/exercises/bisection/project/build.properties:
--------------------------------------------------------------------------------
1 | sbt.version=1.3.3
2 | 


--------------------------------------------------------------------------------
/examples/C5-Metropolis/project/build.properties:
--------------------------------------------------------------------------------
1 | sbt.version=1.3.3
2 | 


--------------------------------------------------------------------------------
/examples/C5-MonteCarlo/project/build.properties:
--------------------------------------------------------------------------------
1 | sbt.version=1.3.3
2 | 


--------------------------------------------------------------------------------
/examples/C6-Regression/project/build.properties:
--------------------------------------------------------------------------------
1 | sbt.version=1.3.3
2 | 


--------------------------------------------------------------------------------
/examples/C6-ScalaGlm/project/build.properties:
--------------------------------------------------------------------------------
1 | sbt.version=0.13.13
2 | 


--------------------------------------------------------------------------------
/examples/C7-MetropAssembly/project/build.properties:
--------------------------------------------------------------------------------
1 | sbt.version=1.3.3
2 | 


--------------------------------------------------------------------------------
/examples/C9-ScalablePF/project/build.properties:
--------------------------------------------------------------------------------
1 | sbt.version=1.3.3
2 | 


--------------------------------------------------------------------------------
/examples/C6-DataFrames/smiledf/project/build.properties:
--------------------------------------------------------------------------------
1 | sbt.version=1.3.3
2 | 


--------------------------------------------------------------------------------
/scscala.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darrenjw/scala-course/HEAD/scscala.pdf


--------------------------------------------------------------------------------
/sbt/sbt-0.13.13.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darrenjw/scala-course/HEAD/sbt/sbt-0.13.13.zip


--------------------------------------------------------------------------------
/examples/C6-Rainier/project/plugins.sbt:
--------------------------------------------------------------------------------
1 | addSbtPlugin("org.scalameta" % "sbt-mdoc" % "1.3.6")
2 | 
3 | 


--------------------------------------------------------------------------------
/examples/C6-Smile/project/plugins.sbt:
--------------------------------------------------------------------------------
1 | addSbtPlugin("org.scalameta" % "sbt-mdoc" % "1.3.6")
2 | 
3 | 


--------------------------------------------------------------------------------
/examples/C7-MetropAssembly/project/plugins.sbt:
--------------------------------------------------------------------------------
1 | addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.14.10")
2 | 
3 | 


--------------------------------------------------------------------------------
/examples/C6-Rainier/target/mdoc/b0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darrenjw/scala-course/HEAD/examples/C6-Rainier/target/mdoc/b0.png


--------------------------------------------------------------------------------
/examples/C6-Rainier/target/mdoc/b1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darrenjw/scala-course/HEAD/examples/C6-Rainier/target/mdoc/b1.png


--------------------------------------------------------------------------------
/examples/C6-Smile/src/test/scala/smile-test.scala:
--------------------------------------------------------------------------------
1 | import org.scalatest.FlatSpec
2 | 
3 | class SetSpec extends FlatSpec {
4 | 
5 | 
6 | 
7 | }
8 | 
9 | 


--------------------------------------------------------------------------------
/examples/C7-EvilPlot/Readme.md:
--------------------------------------------------------------------------------
1 | # EvilPlot examples
2 | 
3 | Some EvilPlot examples, largely taken from the documentation on the [EvilPlot website](https://cibotech.github.io/evilplot/). Should just `sbt run`.
4 | 
5 | 


--------------------------------------------------------------------------------
/fragments/intro.scala:
--------------------------------------------------------------------------------
 1 | 
 2 | /*
 3 |   multi-line
 4 |   comment
 5 | */
 6 | 
 7 | object MyApp {
 8 | 
 9 |   def main(args: Array[String]): Unit =
10 |     println("Hello, world!")
11 |   
12 | } // single line comment
13 | 
14 | 


--------------------------------------------------------------------------------
/examples/C1-HelloWorld/HelloWorld.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |   multi-line
 3 |   comment
 4 | */
 5 | 
 6 | object MyApp {
 7 | 
 8 |   def main(args: Array[String]): Unit =
 9 |     println("Hello, world!")
10 |   
11 | } // single line comment
12 | 
13 | 


--------------------------------------------------------------------------------
/examples/C5-Metropolis/Readme.md:
--------------------------------------------------------------------------------
1 | # Metropolis MCMC sampler
2 | 
3 | Illustration of a simple MCMC algorithm coded in Scala, in several different ways. Also illustrating more or less functional ways of handling (infinite) data streams in Scala.
4 | 


--------------------------------------------------------------------------------
/exercises/bisection/src/main/scala/bisect.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 | bisect.scala
 3 | 
 4 |  */
 5 | 
 6 | object Bisect {
 7 | 
 8 | 
 9 |   def findRoot(low: Double, high: Double)(f: Double => Double): Double = ???
10 | 
11 | 
12 | }
13 | 
14 | /* eof */
15 | 
16 | 


--------------------------------------------------------------------------------
/examples/C1-HelloWorld/Readme.md:
--------------------------------------------------------------------------------
1 | # Hello world
2 | 
3 | Just a single scala file containing a runnable main method with no library dependencies. This will just `sbt run` without arranging it in a proper project directory structure, but Scala isn't often used this way.
4 | 
5 | 


--------------------------------------------------------------------------------
/examples/C7-MetropAssembly/rscala.R:
--------------------------------------------------------------------------------
 1 | ## rscala.R
 2 | 
 3 | library(rscala)
 4 | 
 5 | sc = scala(
 6 |  "target/scala-2.12/metropolis-assembly-assembly-0.1.jar"
 7 | )
 8 | 
 9 | met = sc * 'Metropolis.chain.take(10000).toArray'
10 | 
11 | library(smfsb)
12 | mcmcSummary(matrix(met,ncol=1))
13 | 
14 | ## eof
15 | 
16 | 


--------------------------------------------------------------------------------
/examples/C9-ScalablePF/README.md:
--------------------------------------------------------------------------------
 1 | # A scalable particle filter in Scala
 2 | 
 3 | Code examples for the blog post: 
 4 | 
 5 | https://darrenjw.wordpress.com/2016/07/22/a-scalable-particle-filter-in-scala/
 6 | 
 7 | Just `sbt run` it. Implements a parallel particle filter.
 8 | 
 9 | 
10 | 
11 | 
12 | 
13 | 
14 | 
15 | 


--------------------------------------------------------------------------------
/examples/C7-Vegas/Readme.md:
--------------------------------------------------------------------------------
1 | # Vegas
2 | 
3 | This is supposed to be an example for [Vegas](https://github.com/vegas-viz/Vegas), the Scala wrapper for [vega-lite](https://vega.github.io/vega-lite/). However, it no longer runs for me, as the latest version is very old, and seems to be incompatible with recent versions of OpenJFX.
4 | 
5 | 


--------------------------------------------------------------------------------
/examples/C6-ScalaGlm/Readme.md:
--------------------------------------------------------------------------------
 1 | # Scala GLM
 2 | 
 3 | Very simple example illustrating use of my `scala-glm` library. As usual, it can be run with `sbt run`.
 4 | 
 5 | This is essentially just the giter8 template for the library. You can create a new template project for yourself with:
 6 | 
 7 | ```bash
 8 | sbt new darrenjw/scala-glm.g8
 9 | ```
10 | 


--------------------------------------------------------------------------------
/examples/C6-DataFrames/r/df.R:
--------------------------------------------------------------------------------
 1 | # df.R
 2 | # Example of processing a CSV-derived data frame using R
 3 | 
 4 | df=read.csv("cars93.csv")
 5 | print(dim(df))
 6 | 
 7 | df=df[df$EngineSize<=4.0,]
 8 | print(dim(df))
 9 | 
10 | df$WeightKG=df$Weight*0.453592
11 | print(dim(df))
12 | 
13 | write.csv(df,"cars93m.csv",row.names=FALSE)
14 | 
15 | # eof
16 | 
17 | 


--------------------------------------------------------------------------------
/examples/C3-Pi/Readme.md:
--------------------------------------------------------------------------------
1 | # Parallel Monte Carlo estimation of Pi
2 | 
3 | Again, just a single-file application, with no dependencies. Just to illustrate how easy it is to do parallel computing with Scala.
4 | 
5 | Note however, that parallel collections are slightly different in Scala 2.13, so you need to make sure you run this under Scala 2.12 (or 2.11).
6 | 
7 | 
8 | 


--------------------------------------------------------------------------------
/examples/C7-Vegas/src/test/scala/C7-Vegas-test.scala:
--------------------------------------------------------------------------------
 1 | import org.scalatest.FlatSpec
 2 | 
 3 | class SetSpec extends FlatSpec {
 4 | 
 5 |  "A Poisson(10.0)" should "have mean 10.0" in {
 6 |     import breeze.stats.distributions.Poisson
 7 |     val p = Poisson(10.0)
 8 |     val m = p.mean
 9 |     assert(math.abs(m - 10.0) < 0.000001)
10 |   }
11 | 
12 | }
13 | 
14 | 


--------------------------------------------------------------------------------
/exercises/option/src/main/scala/option.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 | option.scala
 3 | 
 4 |  */
 5 | 
 6 | object OptionBisect {
 7 | 
 8 |   // Part A
 9 | 
10 |   def findRootOpt(low: Double, high: Double)(f: Double => Double): Option[Double] = ???
11 | 
12 | 
13 |   // Part B
14 | 
15 |   def solveQuad(a: Double): Option[Double] = ???
16 | 
17 | 
18 | }
19 | 
20 | /* eof */
21 | 
22 | 


--------------------------------------------------------------------------------
/examples/C6-ScalaGlm/src/main/scala/scala-glm-example.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 | Example scala-glm code
 3 | */
 4 | 
 5 | object ScalaGlmApp {
 6 | 
 7 |   import scalaglm.Pca
 8 |   import breeze.linalg._
 9 | 
10 |   def main(args: Array[String]): Unit = {
11 |     val X = DenseMatrix((1.0,1.5),(1.5,2.0),(2.0,1.5))
12 |     val pca = Pca(X, List("V1","V2"))
13 |     pca.summary
14 |   }
15 | 
16 | }
17 | 


--------------------------------------------------------------------------------
/examples/C5-MonteCarlo/Readme.md:
--------------------------------------------------------------------------------
1 | # Monte Carlo integration
2 | 
3 | Benchmarking different serial and parallel approaches to Monte Carlo estimation of a univariate integral.
4 | 
5 | Note that here we are doing the benchmarking in a fairly simple way, using `System.nanoTime`, but there exist proper benchmarking libraries available for this purpose, and we will mention a couple of these later.
6 | 
7 | 


--------------------------------------------------------------------------------
/sbt-test/src/main/scala/sbt-test.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 | sbt-test.scala
 3 | 
 4 | 
 5 | */
 6 | 
 7 | 
 8 | object SbtTest {
 9 | 
10 |   def main(args: Array[String]): Unit = {
11 |     println("")
12 |     println("")
13 |     println("")
14 |     println("")
15 |     println("")
16 |     println("SBT IS INSTALLED AND WORKING")
17 |     println("")
18 |     println("")
19 |     println("")
20 |   }
21 | 
22 | }
23 | 
24 | /* eof */
25 | 
26 | 


--------------------------------------------------------------------------------
/app-template/build.sbt:
--------------------------------------------------------------------------------
 1 | name := "app-template"
 2 | 
 3 | version := "0.1"
 4 | 
 5 | scalacOptions ++= Seq(
 6 |   "-unchecked", "-deprecation", "-feature"
 7 | )
 8 | 
 9 | libraryDependencies  ++= Seq(
10 |  "org.scalacheck" %% "scalacheck" % "1.13.4" % "test",
11 |  "org.scalatest" %% "scalatest" % "3.0.8" % "test",
12 |  "org.scalanlp" %% "breeze" % "1.0",
13 |  "org.scalanlp" %% "breeze-natives" % "1.0"
14 | )
15 | 
16 | scalaVersion := "2.12.10"
17 | 
18 | 
19 | 


--------------------------------------------------------------------------------
/app-template/.gitignore:
--------------------------------------------------------------------------------
 1 | # .gitignore for scala projects
 2 | 
 3 | # Classes and logs
 4 | *.class
 5 | *.log
 6 | *~
 7 | 
 8 | # SBT-specific
 9 | .cache
10 | .history
11 | .classpath
12 | .project
13 | .settings
14 | 
15 | .lib/
16 | dist/*
17 | target/
18 | lib_managed/
19 | src_managed/
20 | project/boot/
21 | project/plugins/project/
22 | 
23 | # Ensime specific
24 | .ensime
25 | 
26 | # Scala-IDE specific
27 | .scala_dependencies
28 | .worksheet
29 | 
30 | 
31 | 


--------------------------------------------------------------------------------
/examples/C4-PCA/.gitignore:
--------------------------------------------------------------------------------
 1 | # .gitignore for scala projects
 2 | 
 3 | # Classes and logs
 4 | *.class
 5 | *.log
 6 | *~
 7 | 
 8 | # SBT-specific
 9 | .cache
10 | .history
11 | .classpath
12 | .project
13 | .settings
14 | 
15 | .lib/
16 | dist/*
17 | target/
18 | lib_managed/
19 | src_managed/
20 | project/boot/
21 | project/plugins/project/
22 | 
23 | # Ensime specific
24 | .ensime
25 | 
26 | # Scala-IDE specific
27 | .scala_dependencies
28 | .worksheet
29 | 
30 | 
31 | 


--------------------------------------------------------------------------------
/examples/C6-DataFrames/r/gen-csv.R:
--------------------------------------------------------------------------------
 1 | # gen-csv.R
 2 | # Generate a CSV file for subsequent analysis
 3 | 
 4 | package=function(somepackage)
 5 | {
 6 |   cpackage <- as.character(substitute(somepackage))
 7 |   if(!require(cpackage,character.only=TRUE)){
 8 |     install.packages(cpackage)
 9 |     library(cpackage,character.only=TRUE)
10 |   }
11 | }
12 | 
13 | package(MASS)
14 | 
15 | write.csv(Cars93,"cars93.csv",row.names=FALSE)
16 | 
17 | 
18 | 
19 | # eof
20 | 
21 | 


--------------------------------------------------------------------------------
/examples/C4-GammaTest/.gitignore:
--------------------------------------------------------------------------------
 1 | # .gitignore for scala projects
 2 | 
 3 | # Classes and logs
 4 | *.class
 5 | *.log
 6 | *~
 7 | 
 8 | # SBT-specific
 9 | .cache
10 | .history
11 | .classpath
12 | .project
13 | .settings
14 | 
15 | .lib/
16 | dist/*
17 | target/
18 | lib_managed/
19 | src_managed/
20 | project/boot/
21 | project/plugins/project/
22 | 
23 | # Ensime specific
24 | .ensime
25 | 
26 | # Scala-IDE specific
27 | .scala_dependencies
28 | .worksheet
29 | 
30 | 
31 | 


--------------------------------------------------------------------------------
/examples/C6-Rainier/.gitignore:
--------------------------------------------------------------------------------
 1 | # .gitignore for scala projects
 2 | 
 3 | # Classes and logs
 4 | *.class
 5 | *.log
 6 | *~
 7 | 
 8 | # SBT-specific
 9 | .cache
10 | .history
11 | .classpath
12 | .project
13 | .settings
14 | 
15 | .lib/
16 | dist/*
17 | target/
18 | lib_managed/
19 | src_managed/
20 | project/boot/
21 | project/plugins/project/
22 | 
23 | # Ensime specific
24 | .ensime
25 | 
26 | # Scala-IDE specific
27 | .scala_dependencies
28 | .worksheet
29 | 
30 | 
31 | 


--------------------------------------------------------------------------------
/examples/C6-ScalaGlm/.gitignore:
--------------------------------------------------------------------------------
 1 | # .gitignore for scala projects
 2 | 
 3 | # Classes and logs
 4 | *.class
 5 | *.log
 6 | *~
 7 | 
 8 | # SBT-specific
 9 | .cache
10 | .history
11 | .classpath
12 | .project
13 | .settings
14 | 
15 | .lib/
16 | dist/*
17 | target/
18 | lib_managed/
19 | src_managed/
20 | project/boot/
21 | project/plugins/project/
22 | 
23 | # Ensime specific
24 | .ensime
25 | 
26 | # Scala-IDE specific
27 | .scala_dependencies
28 | .worksheet
29 | 
30 | 
31 | 


--------------------------------------------------------------------------------
/examples/C6-Smile/.gitignore:
--------------------------------------------------------------------------------
 1 | # .gitignore for scala projects
 2 | 
 3 | # Classes and logs
 4 | *.class
 5 | *.log
 6 | *~
 7 | 
 8 | # SBT-specific
 9 | .cache
10 | .history
11 | .classpath
12 | .project
13 | .settings
14 | 
15 | .lib/
16 | dist/*
17 | target/
18 | lib_managed/
19 | src_managed/
20 | project/boot/
21 | project/plugins/project/
22 | 
23 | # Ensime specific
24 | .ensime
25 | 
26 | # Scala-IDE specific
27 | .scala_dependencies
28 | .worksheet
29 | 
30 | 
31 | 


--------------------------------------------------------------------------------
/examples/C7-Vegas/.gitignore:
--------------------------------------------------------------------------------
 1 | # .gitignore for scala projects
 2 | 
 3 | # Classes and logs
 4 | *.class
 5 | *.log
 6 | *~
 7 | 
 8 | # SBT-specific
 9 | .cache
10 | .history
11 | .classpath
12 | .project
13 | .settings
14 | 
15 | .lib/
16 | dist/*
17 | target/
18 | lib_managed/
19 | src_managed/
20 | project/boot/
21 | project/plugins/project/
22 | 
23 | # Ensime specific
24 | .ensime
25 | 
26 | # Scala-IDE specific
27 | .scala_dependencies
28 | .worksheet
29 | 
30 | 
31 | 


--------------------------------------------------------------------------------
/exercises/Readme.md:
--------------------------------------------------------------------------------
 1 | # Exercises
 2 | 
 3 | ## Practical exercises to follow each Chapter of the course notes
 4 | 
 5 | 1. [Introduction](Intro.md)
 6 | 2. [Scala and FP Basics](Basics.md)
 7 | 3. [Collections](Collections.md)
 8 | 4. [Scala Breeze](Breeze.md)
 9 | 5. [Monte Carlo](Monte.md)
10 | 6. [Statistical modelling](Stats.md)
11 | 7. [Tools](Tools.md)
12 | 8. [Apache Spark](Spark.md)
13 | 9. [Advanced topics](Advanced.md)
14 | 
15 | 
16 | #### eof
17 | 
18 | 
19 | 
20 | 


--------------------------------------------------------------------------------
/sbt-test/.gitignore:
--------------------------------------------------------------------------------
 1 | # .gitignore for scala projects
 2 | 
 3 | # Classes and logs
 4 | *.class
 5 | *.log
 6 | *~
 7 | 
 8 | # SBT-specific
 9 | .cache
10 | .history
11 | .classpath
12 | .project
13 | .settings
14 | 
15 | .lib/
16 | dist/*
17 | target/
18 | lib_managed/
19 | src_managed/
20 | project/boot/
21 | project/plugins/project/
22 | 
23 | # Ensime specific
24 | .ensime
25 | 
26 | # Scala-IDE specific
27 | .scala_dependencies
28 | .worksheet
29 | 
30 | 
31 | /bin/
32 | 


--------------------------------------------------------------------------------
/examples/C4-PCA/Readme.md:
--------------------------------------------------------------------------------
1 | # PCA
2 | 
3 | Doing principal components analysis (PCA) using Breeze.
4 | 
5 | Note that this a proper `sbt` project template layout, so navigate around to see where everything is, ideally before you do `sbt run`, and then again after, to see the additional files that `sbt` generates.
6 | 
7 | This is our first example of doing data analysis using Scala, so study the code and the output carefully to make sure that you understand what is going on.
8 | 
9 | 


--------------------------------------------------------------------------------
/examples/C5-Metropolis/.gitignore:
--------------------------------------------------------------------------------
 1 | # .gitignore for scala projects
 2 | 
 3 | # Classes and logs
 4 | *.class
 5 | *.log
 6 | *~
 7 | 
 8 | # SBT-specific
 9 | .cache
10 | .history
11 | .classpath
12 | .project
13 | .settings
14 | 
15 | .lib/
16 | dist/*
17 | target/
18 | lib_managed/
19 | src_managed/
20 | project/boot/
21 | project/plugins/project/
22 | 
23 | # Ensime specific
24 | .ensime
25 | 
26 | # Scala-IDE specific
27 | .scala_dependencies
28 | .worksheet
29 | 
30 | 
31 | 


--------------------------------------------------------------------------------
/examples/C5-MonteCarlo/.gitignore:
--------------------------------------------------------------------------------
 1 | # .gitignore for scala projects
 2 | 
 3 | # Classes and logs
 4 | *.class
 5 | *.log
 6 | *~
 7 | 
 8 | # SBT-specific
 9 | .cache
10 | .history
11 | .classpath
12 | .project
13 | .settings
14 | 
15 | .lib/
16 | dist/*
17 | target/
18 | lib_managed/
19 | src_managed/
20 | project/boot/
21 | project/plugins/project/
22 | 
23 | # Ensime specific
24 | .ensime
25 | 
26 | # Scala-IDE specific
27 | .scala_dependencies
28 | .worksheet
29 | 
30 | 
31 | 


--------------------------------------------------------------------------------
/examples/C9-ScalablePF/.gitignore:
--------------------------------------------------------------------------------
 1 | # .gitignore for scala projects
 2 | 
 3 | # Classes and logs
 4 | *.class
 5 | *.log
 6 | *~
 7 | 
 8 | # SBT-specific
 9 | .cache
10 | .history
11 | .classpath
12 | .project
13 | .settings
14 | 
15 | .lib/
16 | dist/*
17 | target/
18 | lib_managed/
19 | src_managed/
20 | project/boot/
21 | project/plugins/project/
22 | 
23 | # Ensime specific
24 | .ensime
25 | 
26 | # Scala-IDE specific
27 | .scala_dependencies
28 | .worksheet
29 | 
30 | 
31 | 


--------------------------------------------------------------------------------
/examples/C6-Rainier/src/test/scala/rainier-test.scala:
--------------------------------------------------------------------------------
 1 | import org.scalatest.flatspec.AnyFlatSpec
 2 | import org.scalatest.matchers.should.Matchers
 3 | 
 4 | 
 5 | // Example unit tests
 6 | class CatsSpec extends AnyFlatSpec with Matchers {
 7 | 
 8 |   import cats._
 9 |   import cats.implicits._
10 | 
11 |  "A List" should "combine" in {
12 |    val l = List(1,2) |+| List(3,4)
13 |    l should be (List(1,2,3,4))
14 |   }
15 | 
16 | }
17 | 
18 | 
19 | 
20 | 
21 | // eof
22 | 
23 | 
24 | 


--------------------------------------------------------------------------------
/examples/C6-ScalaGlm/src/test/scala/scala-glm-example-test.scala:
--------------------------------------------------------------------------------
 1 | import org.scalatest.FlatSpec
 2 | 
 3 | class SetSpec extends FlatSpec {
 4 | 
 5 |   import scalaglm.Utils.backSolve
 6 |   import breeze.linalg._
 7 | 
 8 |   "backSolve" should "invert correctly" in {
 9 |     val A = DenseMatrix((4,1),(0,2)) map (_.toDouble)
10 |     val x = DenseVector(3.0,-2.0)
11 |     val y = A * x
12 |     val xx = backSolve(A,y)
13 |     assert (norm(x-xx) < 0.00001)
14 |   }
15 | 
16 | }
17 | 
18 | 


--------------------------------------------------------------------------------
/examples/C6-Regression/Readme.md:
--------------------------------------------------------------------------------
1 | # Regression modelling
2 | 
3 | Illustration of how to implement regression modelling "from scratch" in Scala. Contains some useful techniques for those interested in statistical computing and numerical algorithms development.
4 | 
5 | For "real" regression analysis projects, you'll want to use a library, such as [scala-glm](https://github.com/darrenjw/scala-glm) or, more likely, [smile](http://haifengl.github.io/), or for "big data", [Spark MLlib](https://spark.apache.org/mllib/).
6 | 
7 | 
8 | 


--------------------------------------------------------------------------------
/examples/Readme.md:
--------------------------------------------------------------------------------
1 | # Examples
2 | ## A collection of complete runnables examples
3 | 
4 | Each subdirectory of this directory contains a complete runnable Scala program, numbered according to the Chapter of the notes that they most closely relate to. It should be possible to run most of these directly from the command line of the relevant directory by typing `sbt run` at the OS command prompt.
5 | 
6 | Note that there is a separate Readme in each directory, and that these contain further details specific to each example.
7 | 
8 | 
9 | 


--------------------------------------------------------------------------------
/examples/C4-GammaTest/Readme.md:
--------------------------------------------------------------------------------
 1 | # Testing stochastic simulation from a Gamma distribution
 2 | 
 3 | Simple example of how to test a non-uniform random number generation scheme.
 4 | 
 5 | Note that this a proper `sbt` project template layout, so navigate around to see where everything is, ideally before you do `sbt run`, and then again after, to see the additional files that `sbt` generates.
 6 | 
 7 | Note that we will consider proper testing frameworks later, including both unit-testing and property-based testing frameworks.
 8 | 
 9 | 
10 | 
11 | 
12 | 


--------------------------------------------------------------------------------
/examples/C8-SparkJob/Readme.md:
--------------------------------------------------------------------------------
 1 | # Self-contained Spark application
 2 | 
 3 | Note that you can't just `sbt run` this example.
 4 | 
 5 | Build it with: 
 6 | 
 7 | ```bash
 8 | sbt package
 9 | ```
10 | 
11 | Then submit it to a Spark cluster with:
12 | 
13 | ```bash
14 | spark-submit --class "SparkApp" \
15 | --master local[4] \
16 | target/scala-2.11/spark-template_2.11-0.1.jar
17 | ```
18 | 
19 | This works fine here, since the application has no third-party dependencies. If it did, you would need to build and submit an assembly JAR.
20 | 
21 | 
22 | 


--------------------------------------------------------------------------------
/examples/C2-LogFactorial/Readme.md:
--------------------------------------------------------------------------------
 1 | # Log-factorial
 2 | 
 3 | Illustration of the log-factorial function as a tail recursion. Again, this is just a single Scala file with no dependencies, so will just `sbt run`.
 4 | 
 5 | To run with command line arguments, pass them in. From the `sbt` prompt, do, say, `run 100000`. From the OS prompt, do
 6 | ```bash
 7 | sbt "run 100000"
 8 | ```
 9 | 
10 | Try a non-tail-recursive version of the log-factorial function. Check that it works correctly for small argument, and that it overflows the stack for large arguments, like 100000.
11 | 
12 | 


--------------------------------------------------------------------------------
/examples/C2-LogFactorial/log-fact.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 | log-fact.scala
 3 | Program to compute the log-factorial function
 4 | */
 5 | 
 6 | object LogFact {
 7 | 
 8 |   import annotation.tailrec
 9 |   import math.log
10 | 
11 |   @tailrec
12 |   def logfact(n: Int, acc: Double = 0.0): Double =
13 |     if (n == 1) acc else
14 |       logfact(n-1, acc + log(n))
15 | 
16 |   def main(args: Array[String]): Unit = {
17 |     val n = if (args.length == 1) args(0).toInt else 5
18 |     val lfn = logfact(n)
19 |     println(s"logfact($n) = $lfn")
20 |   }
21 | 
22 | }
23 | 
24 | // eof
25 | 
26 | 


--------------------------------------------------------------------------------
/examples/C6-Rainier/Readme.md:
--------------------------------------------------------------------------------
1 | # Rainier
2 | 
3 | Simple example of using [Rainier](https://rainier.fit/) to do Bayesian logistic regression on some synthetic data. It should just `sbt run`. Work through the Rainier documentation for further information.
4 | 
5 | Note that this example also illustrates the use of `mdoc` for producing reports. This includes the use of EvilPlot figures generated by Rainier diagnostics. An [example report](docs/LogisticRegression.md) is provided, and the [generated Markdown document](target/mdoc/LogisticRegression.md) includes generated output and plots.
6 | 


--------------------------------------------------------------------------------
/examples/C6-DataFrames/smiledf/build.sbt:
--------------------------------------------------------------------------------
 1 | name := "smile"
 2 | 
 3 | version := "0.1-SNAPSHOT"
 4 | 
 5 | scalacOptions ++= Seq(
 6 |   "-unchecked", "-deprecation", "-feature"
 7 | )
 8 | 
 9 | libraryDependencies  ++= Seq(
10 |   "org.scalatest" %% "scalatest" % "3.0.8" % "test",
11 |   "com.github.haifengl" %% "smile-scala" % "2.1.1"
12 | )
13 | 
14 | resolvers ++= Seq(
15 |   "Sonatype Snapshots" at
16 |     "https://oss.sonatype.org/content/repositories/snapshots/",
17 |   "Sonatype Releases" at
18 |     "https://oss.sonatype.org/content/repositories/releases/"
19 | )
20 | 
21 | scalaVersion := "2.13.1"
22 | 
23 | 


--------------------------------------------------------------------------------
/examples/C6-ScalaGlm/build.sbt:
--------------------------------------------------------------------------------
 1 | name := "scala-glm-example"
 2 | 
 3 | version := "0.1-SNAPSHOT"
 4 | 
 5 | scalacOptions ++= Seq(
 6 |   "-unchecked", "-deprecation", "-feature"
 7 | )
 8 | 
 9 | libraryDependencies  ++= Seq(
10 |   "org.scalatest" %% "scalatest" % "3.0.1" % "test",
11 |   "com.github.darrenjw" %% "scala-glm" % "0.3"
12 | )
13 | 
14 | resolvers ++= Seq(
15 |   "Sonatype Snapshots" at
16 |     "https://oss.sonatype.org/content/repositories/snapshots/",
17 |   "Sonatype Releases" at
18 |     "https://oss.sonatype.org/content/repositories/releases/"
19 | )
20 | 
21 | scalaVersion := "2.12.1"
22 | 
23 | 


--------------------------------------------------------------------------------
/examples/C6-DataFrames/sparkdf/spark.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 | spark.scala
 3 | 
 4 | code for a "spark-shell" session
 5 | 
 6 | spark-shell --master local[4]
 7 | 
 8 | */
 9 | 
10 | val df = spark.read.
11 |          option("header", "true").
12 |          option("inferSchema","true").
13 |          csv("../r/cars93.csv")
14 | val df2=df.filter("EngineSize <= 4.0")
15 | val col=df2.col("Weight")*0.453592
16 | val df3=df2.withColumn("WeightKG",col)
17 | df3.write.format("com.databricks.spark.csv").
18 |                          option("header","true").
19 |                          save("out-csv")
20 | 
21 | 
22 | // eof
23 | 
24 | 
25 | 


--------------------------------------------------------------------------------
/examples/C6-Smile/build.sbt:
--------------------------------------------------------------------------------
 1 | name := "smile"
 2 | 
 3 | version := "0.1-SNAPSHOT"
 4 | 
 5 | scalacOptions ++= Seq(
 6 |   "-unchecked", "-deprecation", "-feature"
 7 | )
 8 | 
 9 | enablePlugins(MdocPlugin)
10 | 
11 | libraryDependencies  ++= Seq(
12 |   "org.scalatest" %% "scalatest" % "3.0.8" % "test",
13 |   "com.github.haifengl" %% "smile-scala" % "2.2.1"
14 | )
15 | 
16 | resolvers ++= Seq(
17 |   "Sonatype Snapshots" at
18 |     "https://oss.sonatype.org/content/repositories/snapshots/",
19 |   "Sonatype Releases" at
20 |     "https://oss.sonatype.org/content/repositories/releases/"
21 | )
22 | 
23 | scalaVersion := "2.12.10"
24 | 
25 | 


--------------------------------------------------------------------------------
/exercises/bisection/build.sbt:
--------------------------------------------------------------------------------
 1 | name := "bisection"
 2 | 
 3 | version := "0.1"
 4 | 
 5 | scalacOptions ++= Seq("-unchecked", "-deprecation", "-feature")
 6 | 
 7 | libraryDependencies  ++= Seq(
 8 |             "org.scalatest" %% "scalatest" % "3.0.8" % "test",
 9 |             "org.scalanlp" %% "breeze" % "1.0",
10 |             "org.scalanlp" %% "breeze-natives" % "1.0"
11 | 	)
12 | 
13 | resolvers ++= Seq(
14 |             "Sonatype Snapshots" at "https://oss.sonatype.org/content/repositories/snapshots/",
15 |             "Sonatype Releases" at "https://oss.sonatype.org/content/repositories/releases/"
16 | )
17 | 
18 | scalaVersion := "2.12.10"
19 | 
20 | 


--------------------------------------------------------------------------------
/exercises/option/build.sbt:
--------------------------------------------------------------------------------
 1 | name := "bisection-option"
 2 | 
 3 | version := "0.1"
 4 | 
 5 | scalacOptions ++= Seq("-unchecked", "-deprecation", "-feature")
 6 | 
 7 | libraryDependencies  ++= Seq(
 8 |             "org.scalatest" %% "scalatest" % "3.0.8" % "test",
 9 |             "org.scalanlp" %% "breeze" % "1.0",
10 |             "org.scalanlp" %% "breeze-natives" % "1.0"
11 | 	)
12 | 
13 | resolvers ++= Seq(
14 |             "Sonatype Snapshots" at "https://oss.sonatype.org/content/repositories/snapshots/",
15 |             "Sonatype Releases" at "https://oss.sonatype.org/content/repositories/releases/"
16 | )
17 | 
18 | scalaVersion := "2.12.10"
19 | 
20 | 


--------------------------------------------------------------------------------
/examples/C3-Pi/pi.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 | pi.scala
 3 | Simple (parallel) Monte Carlo estimate of Pi using rejection sampling on points randomly scattered on the unit square
 4 | */
 5 | 
 6 | object Pi {
 7 | 
 8 |   def main(args: Array[String]): Unit = {
 9 | 	val N = 10000000
10 | 	println("Estimating pi based on "+N+" draws")
11 | 	println("Creating random vector..")
12 | 	val z2 = (1 to N).par map (i => {
13 | 		val x = math.random
14 | 		val y = math.random
15 | 		x*x + y*y
16 | 	})
17 | 	println("Counting successes...")
18 | 	val c = z2 count (_ < 1)
19 | 	val mypi = 4.0*c/N
20 | 	println("Esimate of pi: "+mypi)
21 | 	}
22 | 
23 | }
24 | 
25 | // eof
26 | 
27 | 
28 | 


--------------------------------------------------------------------------------
/examples/C7-Vegas/src/main/scala/C7-Vegas.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 | C7-Vegas.scala
 3 | Example Vegas app
 4 | */
 5 | 
 6 | object MyVegasApp {
 7 | 
 8 |   def main(args: Array[String]): Unit = {
 9 | 
10 |     import vegas._
11 |     import vegas.render.WindowRenderer._
12 | 
13 |     val plot = Vegas("Country Pop").
14 |       withData(
15 |         Seq(
16 |           Map("country" -> "USA", "population" -> 314),
17 |           Map("country" -> "UK", "population" -> 64),
18 |           Map("country" -> "DK", "population" -> 80))).
19 |         encodeX("country", Nom).
20 |         encodeY("population", Quant).
21 |         mark(Bar)
22 | 
23 |     plot.show
24 | 
25 |   }
26 | 
27 | }
28 | 


--------------------------------------------------------------------------------
/examples/C6-Regression/build.sbt:
--------------------------------------------------------------------------------
 1 | name := "regression"
 2 | 
 3 | version := "0.1"
 4 | 
 5 | scalacOptions ++= Seq(
 6 |   "-unchecked", "-deprecation", "-feature"
 7 | )
 8 | 
 9 | libraryDependencies  ++= Seq(
10 |   "org.scalacheck" %% "scalacheck" % "1.13.4" % "test",
11 |   "org.scalatest" %% "scalatest" % "3.0.1" % "test",
12 |   "org.scalanlp" %% "breeze" % "1.0",
13 |   "org.scalanlp" %% "breeze-natives" % "1.0"
14 | )
15 | 
16 | resolvers ++= Seq(
17 |   "Sonatype Snapshots" at
18 |     "https://oss.sonatype.org/content/repositories/snapshots/",
19 |   "Sonatype Releases" at
20 |     "https://oss.sonatype.org/content/repositories/releases/"
21 | )
22 | 
23 | scalaVersion := "2.12.10"
24 | 
25 | 
26 | 


--------------------------------------------------------------------------------
/sbt-test/src/test/scala/sbt-test-flatspec.scala:
--------------------------------------------------------------------------------
 1 | import org.scalatest.flatspec.AnyFlatSpec
 2 | 
 3 | // Tests using the "FlatSpec" style...
 4 | 
 5 | class SetSpec extends AnyFlatSpec {
 6 | 
 7 |   "An empty Set" should "have size 0" in {
 8 |     assert(Set.empty.size == 0)
 9 |   }
10 | 
11 |   it should "produce NoSuchElementException when head is invoked" in {
12 |     assertThrows[NoSuchElementException] {
13 |       Set.empty.head
14 |     }
15 |   }
16 | 
17 |   "A Gamma(3.0,4.0)" should "have mean 12.0" in {
18 |     import breeze.stats.distributions.Gamma
19 |     val g = Gamma(3.0,4.0)
20 |     val m = g.mean
21 |     assert(math.abs(m - 12.0) < 0.000001)
22 |   }
23 | 
24 | }
25 | 
26 | // eof
27 | 


--------------------------------------------------------------------------------
/examples/C9-ScalablePF/build.sbt:
--------------------------------------------------------------------------------
 1 | name := "pfilter"
 2 | 
 3 | version := "0.1"
 4 | 
 5 | scalacOptions ++= Seq("-unchecked", "-deprecation", "-feature")
 6 | 
 7 | libraryDependencies  ++= Seq(
 8 |             "org.scalatest" %% "scalatest" % "3.0.1" % "test",
 9 |             "org.scalanlp" %% "breeze" % "0.13",
10 |             "org.scalanlp" %% "breeze-natives" % "1.0",
11 |             "org.scalanlp" %% "breeze-viz" % "1.0"
12 | 
13 | )
14 | 
15 | resolvers ++= Seq(
16 |             "Sonatype Snapshots" at "https://oss.sonatype.org/content/repositories/snapshots/",
17 |             "Sonatype Releases" at "https://oss.sonatype.org/content/repositories/releases/"
18 | )
19 | 
20 | scalaVersion := "2.12.10"
21 | 
22 | 


--------------------------------------------------------------------------------
/examples/C4-GammaTest/build.sbt:
--------------------------------------------------------------------------------
 1 | name := "gamma-test"
 2 | 
 3 | version := "0.1"
 4 | 
 5 | scalacOptions ++= Seq("-unchecked", "-deprecation", "-feature")
 6 | 
 7 | libraryDependencies  ++= Seq(
 8 |             "org.scalacheck" %% "scalacheck" % "1.13.4" % "test",
 9 |             "org.scalatest" %% "scalatest" % "3.0.1" % "test",
10 |             "org.scalanlp" %% "breeze" % "1.0",
11 |             "org.scalanlp" %% "breeze-natives" % "1.0"
12 | )
13 | 
14 | resolvers ++= Seq(
15 |             "Sonatype Snapshots" at "https://oss.sonatype.org/content/repositories/snapshots/",
16 |             "Sonatype Releases" at "https://oss.sonatype.org/content/repositories/releases/"
17 | )
18 | 
19 | scalaVersion := "2.12.10"
20 | 
21 | 


--------------------------------------------------------------------------------
/examples/C5-MonteCarlo/build.sbt:
--------------------------------------------------------------------------------
 1 | name := "monte-carlo"
 2 | 
 3 | version := "0.1"
 4 | 
 5 | scalacOptions ++= Seq("-unchecked", "-deprecation", "-feature")
 6 | 
 7 | libraryDependencies  ++= Seq(
 8 |             "org.scalacheck" %% "scalacheck" % "1.13.4" % "test",
 9 |             "org.scalatest" %% "scalatest" % "3.0.1" % "test",
10 |             "org.scalanlp" %% "breeze" % "1.0",
11 |             "org.scalanlp" %% "breeze-natives" % "1.0"
12 | )
13 | 
14 | resolvers ++= Seq(
15 |             "Sonatype Snapshots" at "https://oss.sonatype.org/content/repositories/snapshots/",
16 |             "Sonatype Releases" at "https://oss.sonatype.org/content/repositories/releases/"
17 | )
18 | 
19 | scalaVersion := "2.12.10"
20 | 
21 | 


--------------------------------------------------------------------------------
/fragments/Readme.md:
--------------------------------------------------------------------------------
 1 | # Code Fragments
 2 | 
 3 | Raw code fragments auto-extracted from the course notes, to allow copy-and-pasting of bits of code into a REPL or editor.
 4 | 
 5 | For complete runnable examples from the notes, instead see the [examples directory](../examples/).
 6 | 
 7 | 
 8 | ### Code fragments by chapter
 9 | 
10 | 1. [Introduction](intro.scala)
11 | 2. [Scala and FP Basics](basics.scala)
12 | 3. [Scala collections library](collections.scala)
13 | 4. [Scala Breeze](breeze.scala)
14 | 5. [Monte Carlo](monte.scala)
15 | 6. [Statistical modelling](stats.scala)
16 | 7. [Tools](tools.scala)
17 | 8. [Apache Spark](spark.scala)
18 | 9. [Advanced topics](advanced.scala)
19 | 
20 | 
21 | 
22 | #### eof
23 | 
24 | 
25 | 


--------------------------------------------------------------------------------
/examples/C4-PCA/build.sbt:
--------------------------------------------------------------------------------
 1 | name := "pca"
 2 | 
 3 | version := "0.1"
 4 | 
 5 | scalacOptions ++= Seq(
 6 |   "-unchecked", "-deprecation", "-feature"
 7 | )
 8 | 
 9 | libraryDependencies  ++= Seq(
10 |   "org.scalacheck" %% "scalacheck" % "1.13.4" % "test",
11 |   "org.scalatest" %% "scalatest" % "3.0.1" % "test",
12 |   "org.scalanlp" %% "breeze" % "1.0",
13 |   "org.scalanlp" %% "breeze-natives" % "1.0",
14 |   "org.scalanlp" %% "breeze-viz" % "1.0"
15 | )
16 | 
17 | resolvers ++= Seq(
18 |   "Sonatype Snapshots" at
19 |     "https://oss.sonatype.org/content/repositories/snapshots/",
20 |   "Sonatype Releases" at
21 |     "https://oss.sonatype.org/content/repositories/releases/"
22 | )
23 | 
24 | scalaVersion := "2.12.10"
25 | 
26 | 
27 | 


--------------------------------------------------------------------------------
/examples/C7-MetropAssembly/build.sbt:
--------------------------------------------------------------------------------
 1 | name := "metropolis-assembly"
 2 | 
 3 | version := "0.1"
 4 | 
 5 | scalacOptions ++= Seq("-unchecked", "-deprecation", "-feature")
 6 | 
 7 | libraryDependencies  ++= Seq(
 8 |             "org.scalacheck" %% "scalacheck" % "1.13.4" % "test",
 9 |             "org.scalatest" %% "scalatest" % "3.0.1" % "test",
10 |             "org.scalanlp" %% "breeze" % "1.0",
11 |             "org.scalanlp" %% "breeze-natives" % "1.0"
12 | )
13 | 
14 | resolvers ++= Seq(
15 |             "Sonatype Snapshots" at "https://oss.sonatype.org/content/repositories/snapshots/",
16 |             "Sonatype Releases" at "https://oss.sonatype.org/content/repositories/releases/"
17 | )
18 | 
19 | scalaVersion := "2.12.10"
20 | 
21 | 


--------------------------------------------------------------------------------
/examples/C8-SparkJob/build.sbt:
--------------------------------------------------------------------------------
 1 | name := "spark-template"
 2 | 
 3 | version := "0.1"
 4 | 
 5 | scalacOptions ++= Seq("-unchecked", "-deprecation", "-feature")
 6 | 
 7 | libraryDependencies  ++= Seq(
 8 |             "org.scalatest" %% "scalatest" % "3.0.8" % "test",
 9 |             "org.apache.spark" %% "spark-core" % "2.4.5" % Provided,
10 |             "org.apache.spark" %% "spark-sql" % "2.4.5" % Provided,
11 |             "org.apache.spark" %% "spark-mllib" % "2.4.5" % Provided
12 | )
13 | 
14 | resolvers ++= Seq(
15 |             "Sonatype Snapshots" at "https://oss.sonatype.org/content/repositories/snapshots/",
16 |             "Sonatype Releases" at "https://oss.sonatype.org/content/repositories/releases/"
17 | )
18 | 
19 | scalaVersion := "2.11.12"
20 | 
21 | 


--------------------------------------------------------------------------------
/examples/C7-Vegas/build.sbt:
--------------------------------------------------------------------------------
 1 | name := "C7-Vegas"
 2 | 
 3 | version := "0.1-SNAPSHOT"
 4 | 
 5 | scalacOptions ++= Seq(
 6 |   "-unchecked", "-deprecation", "-feature"
 7 | )
 8 | 
 9 | libraryDependencies  ++= Seq(
10 |   // "org.scalatest" %% "scalatest" % "3.0.8" % "test",
11 |   // "org.scalanlp" %% "breeze" % "1.0",
12 |   // "org.scalanlp" %% "breeze-viz" % "1.0",
13 |   // "org.scalanlp" %% "breeze-natives" % "1.0",
14 |   "org.vegas-viz" %% "vegas" % "0.3.11"
15 | )
16 | 
17 | resolvers ++= Seq(
18 |   "Sonatype Snapshots" at
19 |     "https://oss.sonatype.org/content/repositories/snapshots/",
20 |   "Sonatype Releases" at
21 |     "https://oss.sonatype.org/content/repositories/releases/"
22 | )
23 | 
24 | scalaVersion := "2.11.8"
25 | 
26 | fork := true
27 | 
28 | 
29 | 


--------------------------------------------------------------------------------
/examples/C6-DataFrames/framian/build.sbt:
--------------------------------------------------------------------------------
 1 | name := "framian-test"
 2 | 
 3 | version := "0.1"
 4 | 
 5 | scalacOptions ++= Seq("-unchecked", "-deprecation", "-feature")
 6 | 
 7 | libraryDependencies  ++= Seq(
 8 |             "org.scalacheck" %% "scalacheck" % "1.11.4" % "test",
 9 |             "org.scalatest" %% "scalatest" % "2.1.7" % "test",
10 |             "com.pellucid" %% "framian" % "0.3.3"
11 | )
12 | 
13 | resolvers ++= Seq(
14 |             "Sonatype Snapshots" at "https://oss.sonatype.org/content/repositories/snapshots/",
15 |             "Sonatype Releases" at "https://oss.sonatype.org/content/repositories/releases/",
16 |             "Pellucid Bintray" at "http://dl.bintray.com/pellucid/maven"
17 | )
18 | 
19 | scalaVersion := "2.11.2"
20 | 
21 | 
22 | 
23 | 
24 | 


--------------------------------------------------------------------------------
/examples/C5-Metropolis/build.sbt:
--------------------------------------------------------------------------------
 1 | name := "metropolis"
 2 | 
 3 | version := "0.1"
 4 | 
 5 | scalacOptions ++= Seq("-unchecked", "-deprecation", "-feature")
 6 | 
 7 | libraryDependencies  ++= Seq(
 8 |             "org.scalacheck" %% "scalacheck" % "1.13.4" % "test",
 9 |             "org.scalatest" %% "scalatest" % "3.0.1" % "test",
10 |             "org.scalanlp" %% "breeze" % "1.0",
11 |             "org.scalanlp" %% "breeze-natives" % "1.0",
12 |             "org.scalanlp" %% "breeze-viz" % "1.0"
13 | )
14 | 
15 | resolvers ++= Seq(
16 |             "Sonatype Snapshots" at "https://oss.sonatype.org/content/repositories/snapshots/",
17 |             "Sonatype Releases" at "https://oss.sonatype.org/content/repositories/releases/"
18 | )
19 | 
20 | scalaVersion := "2.12.10"
21 | 
22 | 


--------------------------------------------------------------------------------
/examples/C6-DataFrames/datatable/build.sbt:
--------------------------------------------------------------------------------
 1 | name := "datatable-test"
 2 | 
 3 | version := "0.1"
 4 | 
 5 | scalacOptions ++= Seq("-unchecked", "-deprecation", "-feature")
 6 | 
 7 | libraryDependencies  ++= Seq(
 8 |             "org.scalacheck" %% "scalacheck" % "1.11.4" % "test",
 9 |             "org.scalatest" %% "scalatest" % "2.1.7" % "test",
10 |             "com.github.tototoshi" %% "scala-csv" % "1.1.2",
11 |             "com.github.martincooper" %% "scala-datatable" % "0.7.0"
12 | )
13 | 
14 | resolvers ++= Seq(
15 |             "Sonatype Snapshots" at "https://oss.sonatype.org/content/repositories/snapshots/",
16 |             "Sonatype Releases" at "https://oss.sonatype.org/content/repositories/releases/"
17 | )
18 | 
19 | scalaVersion := "2.11.7"
20 | 
21 | 
22 | 
23 | 
24 | 


--------------------------------------------------------------------------------
/examples/C6-DataFrames/saddle/CsvDf.scala:
--------------------------------------------------------------------------------
 1 | 
 2 | object CsvDf {
 3 | 
 4 |   def main(args: Array[String]): Unit = {
 5 | 
 6 |     import org.saddle.Index
 7 |     import org.saddle.io._
 8 | 
 9 |     val file = CsvFile("../r/cars93.csv")
10 |     val df = CsvParser.parse(file).withColIndex(0)
11 |     println(df)
12 |     val df2 = df.rfilter(_("EngineSize").mapValues(CsvParser.parseDouble).at(0)<=4.0)
13 |     println(df2)
14 |     val wkg=df2.col("Weight").mapValues(CsvParser.parseDouble).mapValues(_*0.453592).setColIndex(Index("WeightKG"))
15 |     val df3=df2.joinPreserveColIx(wkg.mapValues(_.toString))
16 |     println(df3)
17 | 
18 |     import CsvImplicits._
19 |     import scala.language.reflectiveCalls
20 |     df3.writeCsvFile("saddle-out.csv")
21 | 
22 |   }
23 | 
24 | }
25 | 


--------------------------------------------------------------------------------
/examples/C6-Smile/Readme.md:
--------------------------------------------------------------------------------
 1 | # Smile as a Scala library
 2 | 
 3 | Redoing the yacht hydrodynamics OLS regression analyis using [Smile](https://haifengl.github.io/). If you want to do simple statistical modelling and machine learning for small-to-medium sized data sets in Scala, then this currently looks to be a good option, so is probably worth spending some time getting to grips with it.
 4 | 
 5 | This example also illustrates the use of Scala's [mdoc](https://scalameta.org/mdoc/) Markdown-based documentation system. After enabling the plugin, Markdown files in [docs](docs/) get compiled to [target/mdoc](target/mdoc/). 
 6 | 
 7 | Note that the latest version of Smile, version 2.2.1, works fine with Scala 2.11, 2.12 and 2.13 (some earlier releases only worked with Scala 2.13).
 8 | 
 9 | 
10 | 


--------------------------------------------------------------------------------
/examples/C6-DataFrames/smiledf/src/main/scala/smile-df.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 | smile-df.scala
 3 | 
 4 | Testing the use of Smile DataFrames
 5 | 
 6 | */
 7 | 
 8 | object SmileApp {
 9 | 
10 | 
11 |   def main(args: Array[String]): Unit = {
12 |  
13 |     val df2 = smile.read.csv("../r/cars93.csv")
14 |     val df3 = df2.filter{ _("EngineSize").asInstanceOf[Double] <= 4.0 }
15 |     val w = df3.select("Weight")
16 |     val wkg = w map {_(0).asInstanceOf[Int] * 0.453592}
17 |     val wkgdf = smile.data.DataFrame.of(wkg.toArray.map(Array(_)),"WKG")
18 |     val adf = df3 merge wkgdf
19 |     smile.write.csv(adf,"cars-smile.csv")
20 | 
21 |     // read it back for good measure...
22 |     val rdf = smile.read.csv("cars-smile.csv")
23 |     println(rdf)
24 |     println(rdf.summary)
25 | 
26 |   }
27 | 
28 | }
29 | 
30 | // eof
31 | 
32 | 


--------------------------------------------------------------------------------
/sbt-test/src/test/scala/sbt-test-test.scala:
--------------------------------------------------------------------------------
 1 | import org.scalatest.funsuite.AnyFunSuite
 2 | 
 3 | // Here using FunSuite style - but other possibilities...
 4 | 
 5 | class SetSuite extends AnyFunSuite {
 6 | 
 7 |   test("An empty Set should have size 0") {
 8 |     assert(Set.empty.size == 0)
 9 |   }
10 | 
11 |   test("A Gaussian sample of length 10 should have length 10") {
12 |     import breeze.stats.distributions.Gaussian
13 |     val x = Gaussian(2.0,4.0).sample(10)
14 |     assert(x.length === 10)
15 |   }
16 | 
17 |   test("Cats map merge") {
18 |     import cats.instances.all._
19 |     import cats.syntax.semigroup._
20 |     val m1 = Map("a"->1,"b"->2)
21 |     val m2 = Map("b"->2,"c"->1)
22 |     val m3 = m1 |+| m2
23 |     val m4 = Map("b" -> 4, "c" -> 1, "a" -> 1)
24 |     assert(m3 === m4)
25 |   }
26 | 
27 | }
28 | 
29 | 
30 | // eof
31 | 


--------------------------------------------------------------------------------
/examples/C6-DataFrames/saddle/build.sbt:
--------------------------------------------------------------------------------
 1 | name := "csv-manipulation"
 2 | 
 3 | version := "0.1"
 4 | 
 5 | scalacOptions ++= Seq("-unchecked", "-deprecation", "-feature")
 6 | 
 7 | libraryDependencies  ++= Seq(
 8 |             "org.scalacheck" %% "scalacheck" % "1.11.4" % "test",
 9 |             "org.scalatest" %% "scalatest" % "2.1.7" % "test",
10 |             "org.scalanlp" %% "breeze" % "0.11.2",
11 |             "org.scalanlp" %% "breeze-natives" % "0.11.2",
12 |             "org.scalanlp" %% "breeze-viz" % "0.11.2", 
13 |             "org.scala-saddle" %% "saddle-core" % "1.3.+"
14 | )
15 | 
16 | resolvers ++= Seq(
17 |             "Sonatype Snapshots" at "https://oss.sonatype.org/content/repositories/snapshots/",
18 |             "Sonatype Releases" at "https://oss.sonatype.org/content/repositories/releases/"
19 | )
20 | 
21 | scalaVersion := "2.11.6"
22 | 
23 | 
24 | 
25 | 
26 | 


--------------------------------------------------------------------------------
/examples/C7-MetropAssembly/src/main/scala/metropolis.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 | metropolis.scala
 3 | 
 4 | Example of a project which can be built as an assembly JAR, allowing stand-alone deployment and calling from R
 5 | 
 6 | */
 7 | 
 8 | object Metropolis {
 9 | 
10 |   import breeze.stats.distributions._
11 | 
12 |   def kernel(x: Double): Rand[Double] = for {
13 |     innov <- Uniform(-0.5, 0.5)
14 |     can = x + innov
15 |     oldll = Gaussian(0.0, 1.0).logPdf(x)
16 |     loglik = Gaussian(0.0, 1.0).logPdf(can)
17 |     loga = loglik - oldll
18 |     u <- Uniform(0.0, 1.0)
19 |   } yield if (math.log(u) < loga) can else x
20 | 
21 |   val chain = Stream.iterate(0.0)(kernel(_).draw)
22 | 
23 |   def main(args: Array[String]): Unit = {
24 |     val n = if (args.size == 0) 10 else args(0).toInt
25 |     chain.take(n).toArray.foreach(println)
26 |   }
27 | 
28 | }
29 | 
30 | // eof
31 | 
32 | 
33 | 


--------------------------------------------------------------------------------
/examples/C6-DataFrames/framian/framian.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 | framian.scala
 3 | 
 4 | Test of "framian"
 5 | 
 6 | */
 7 | 
 8 | import java.io.{File,PrintWriter}
 9 | import framian.{Index,Cols}
10 | import framian.csv.{Csv,CsvFormat}
11 | 
12 | object FramianTest {
13 | 
14 |   def main(args: Array[String]) = {
15 |     println("Hello")
16 |     val df=Csv.parseFile(new File("../r/cars93.csv")).labeled.toFrame
17 |     println(""+df.rows+" "+df.cols)
18 |     val df2=df.filter(Cols("EngineSize").as[Double])( _ <= 4.0 )
19 |     println(""+df2.rows+" "+df2.cols)
20 |     val df3=df2.map(Cols("Weight").as[Int],"WeightKG")(r=>r.toDouble*0.453592)
21 |     println(""+df3.rows+" "+df3.cols)
22 |     println(df3.colIndex)
23 |     val csv = Csv.fromFrame(new CsvFormat(",", header = true))(df3)
24 |     new PrintWriter("out.csv") { write(csv.toString); close }
25 |     println("Done")
26 |   }
27 | 
28 | }
29 | 
30 | 
31 | 
32 | 


--------------------------------------------------------------------------------
/examples/C7-EvilPlot/build.sbt:
--------------------------------------------------------------------------------
 1 | name := "evilplot-examples"
 2 | 
 3 | version := "0.1-SNAPSHOT"
 4 | 
 5 | scalacOptions ++= Seq(
 6 |   "-unchecked", "-deprecation", "-feature"
 7 | )
 8 | 
 9 | libraryDependencies  ++= Seq(
10 |   "org.scalatest" %% "scalatest" % "3.1.0-SNAP13" % "test",
11 |   "com.cibo" %% "evilplot" % "0.6.3", // 0.7.0
12 |   "com.cibo" %% "evilplot-repl" % "0.6.3", // 0.7.0
13 |   "org.scalanlp" %% "breeze" % "1.0",
14 |   // "org.scalanlp" %% "breeze-viz" % "1.0",
15 |   "org.scalanlp" %% "breeze-natives" % "1.0"
16 | )
17 | 
18 | 
19 | resolvers += Resolver.bintrayRepo("cibotech", "public")
20 | 
21 | resolvers ++= Seq(
22 |   "Sonatype Snapshots" at
23 |     "https://oss.sonatype.org/content/repositories/snapshots/",
24 |   "Sonatype Releases" at
25 |     "https://oss.sonatype.org/content/repositories/releases/"
26 | )
27 | 
28 | scalaVersion := "2.12.8"
29 | 
30 | fork := true
31 | 
32 | 
33 | 


--------------------------------------------------------------------------------
/sbt-test/build.sbt:
--------------------------------------------------------------------------------
 1 | name := "sbt-test"
 2 | 
 3 | version := "0.1"
 4 | 
 5 | scalacOptions ++= Seq("-unchecked", "-deprecation", "-feature","-language:implicitConversions")
 6 | 
 7 | libraryDependencies  ++= Seq(
 8 |             "org.scalatest" %% "scalatest" % "3.1.1" % "test",
 9 | 	    "org.scalacheck" %% "scalacheck" % "1.14.1" % "test",
10 |             "org.scalanlp" %% "breeze" % "1.0",
11 |             "org.scalanlp" %% "breeze-natives" % "1.0",
12 |             "org.scalanlp" %% "breeze-viz" % "1.0",
13 |             "org.typelevel" %% "cats-core" % "1.0.0"
14 | )
15 | 
16 | resolvers ++= Seq(
17 |             "Sonatype Snapshots" at "https://oss.sonatype.org/content/repositories/snapshots/",
18 |             "Sonatype Releases" at "https://oss.sonatype.org/content/repositories/releases/"
19 | )
20 | 
21 | 
22 | addCompilerPlugin("org.scalamacros" % "paradise" % "2.1.0" cross CrossVersion.full)
23 | 
24 | 
25 | scalaVersion := "2.12.10"
26 | 
27 | 


--------------------------------------------------------------------------------
/examples/C8-SparkJob/src/main/scala/spark.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 | spark.scala
 3 | 
 4 | Build with: 
 5 | sbt package
 6 | 
 7 | Then submit with:
 8 | spark-submit --class "SparkApp" \
 9 | --master local[4] \
10 | target/scala-2.11/spark-template_2.11-0.1.jar
11 | 
12 | */
13 | 
14 | import org.apache.spark.SparkContext
15 | import org.apache.spark.SparkContext._
16 | import org.apache.spark.SparkConf
17 | 
18 | object SparkApp {
19 | 
20 |   def main(args: Array[String]): Unit = {
21 | 
22 |     val spark = new SparkConf().
23 |       setAppName("Spark Application")
24 |     val sc = new SparkContext(spark)
25 | 
26 |     sc.textFile("/usr/share/dict/words").
27 |       map(_.trim).
28 |       map(_.toLowerCase).
29 |       flatMap(_.toCharArray).
30 |       filter(_ > '/').
31 |       filter(_ < '}').
32 |       map{(_,1)}.
33 |       reduceByKey(_+_).
34 |       sortBy(_._2,false).
35 |       collect.
36 |       foreach(println)
37 | 
38 |   }
39 | 
40 | }
41 | 
42 | // eof
43 | 
44 | 
45 | 


--------------------------------------------------------------------------------
/examples/C6-DataFrames/README.md:
--------------------------------------------------------------------------------
 1 | # Scala data tables and frames
 2 | 
 3 | Code samples associated with my blog post "Scala data frames and tables" which can be found at:
 4 | 
 5 | https://darrenjw.wordpress.com/2015/08/21/data-frames-and-tables-in-scala/
 6 | 
 7 | See the post for explanation of the examples.
 8 | 
 9 | Note, however, the addition here of the [Smile](http://haifengl.github.io/) `DataFrame` example, which is a welcome development.
10 | 
11 | Note that you must run the script r/gen-csv.R in an R session FIRST, in order to generate the CSV file required for the Scala examples.
12 | 
13 | If you have R installed, then:
14 | ```bash
15 | cd r
16 | R CMD BATCH gen-csv.R
17 | ```
18 | should generate the file `cars93.csv` required by all of the scripts.
19 | 
20 | The other directories contain Scala examples. Each can be run by going in to the relevant directory and doing `sbt run`, except for the Spark example, which needs to be run in a Spark shell (to be covered later).
21 | 
22 | 
23 | 
24 | 


--------------------------------------------------------------------------------
/examples/C7-MetropAssembly/Readme.md:
--------------------------------------------------------------------------------
 1 | # Creating an assembly JAR (for calling from R)
 2 | 
 3 | This directory shows how to create an assembly JAR for a Scala project, bundling all dependencies. This can be convenient for deployment reasons, generally, but also for calling from R.
 4 | 
 5 | Just typing
 6 | ```bash
 7 | sbt assembly
 8 | ```
 9 | should build the fat JAR. This will be placed in `target/scala-2.12`, along with the regular artefact. Comparing the file sizes is enlightening! The `assembly` SBT task is provided by the relevant line in `project/plugins.sbt`. 
10 | 
11 | To run the code directly in the JVM, use something like
12 | ```bash
13 | java -jar target/scala-2.12/metropolis-assembly-assembly-0.1.jar
14 | ```
15 | or
16 | ```bash
17 | java -jar target/scala-2.12/metropolis-assembly-assembly-0.1.jar 20
18 | ```
19 | The file `rscala.R` shows how to call this from R using the `rscala` library. You can run this with
20 | ```bash
21 | R CMD BATCH rscala.R
22 | ```
23 | if you have R installed.
24 | 
25 | 


--------------------------------------------------------------------------------
/exercises/Basics.md:
--------------------------------------------------------------------------------
 1 | # Scala and FP basics
 2 | 
 3 | ## Practical exercises
 4 | 
 5 | Exercises following the material presented in Chapter 2
 6 | 
 7 | ### 1. LogFactorial example
 8 | 
 9 | Find and run the complete log-factorial example using SBT, using `run` from the SBT prompt. Pass in an argument using, eg. `run 1000`. Next, drop into the REPL with `console`. Then enter
10 | ```scala
11 | import LogFact._
12 | logfact(100)
13 | ```
14 | So we can use the SBT REPL to run functions within our project. This is extremely useful for debugging.
15 | 
16 | ### 2. Interval bisection
17 | 
18 | Implement and test a recursive function for root-finding using interval bisection. See the [detailed instructions](bisection/Readme.md) for further details.
19 | 
20 | ### 3. Scala exercises
21 | 
22 | Start to work through the interactive [Scala tutorial](https://www.scala-exercises.org/scala_tutorial/) from [Scala exercises](https://www.scala-exercises.org/). Do the first couple of sections. Try to work through everything up to and including "Syntactic Conveniences" in any spare time you have over the next couple of days.
23 | 
24 | 
25 | #### eof
26 | 
27 | 


--------------------------------------------------------------------------------
/examples/C6-Smile/docs/smile-example.md:
--------------------------------------------------------------------------------
 1 | # Smile example
 2 | 
 3 | ## Some mdoc documentation
 4 | 
 5 | This is some documentation prepared using `mdoc`. The original file is in `docs`, but the `sbt` task `mdoc` will typecheck and execute the code blocks, and put the compiled markdown document in `target/mdoc`.
 6 | 
 7 | We begin by reading the data (we assume that the file "yacht.csv" already exists).
 8 | ```scala mdoc
 9 | val df = smile.read.csv("yacht.csv")
10 | df
11 | ```
12 | We can get a quick summary of the data as follows.
13 | ```scala mdoc
14 | df.summary
15 | ```
16 | We can now carry out OLS regression after a couple of imports
17 | ```scala mdoc
18 | import smile.data.formula._
19 | import scala.language.postfixOps
20 | val mod = smile.regression.ols("Resist" ~, df)
21 | mod
22 | ```
23 | If we don't want to regress on everything, we can just choose what we'd like to regress on.
24 | ```scala mdoc
25 | smile.regression.ols("Resist" ~ "Froude", df)
26 | smile.regression.ols("Resist" ~ "Froude" + "LongPos", df)
27 | ```
28 | 
29 | ### Summary
30 | 
31 | This brief document has illustrated how easy and convenient it is to produce executable documentation and reports for Scala.
32 | 
33 | 


--------------------------------------------------------------------------------
/examples/C4-GammaTest/src/main/scala/gamma-test.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 | gamma-test.scala
 3 | 
 4 | Test the gamma random number generator in Breeze
 5 | 
 6 |  */
 7 | 
 8 | object GammaTest {
 9 | 
10 |   import math.{abs,sqrt}
11 |   import breeze.stats.meanAndVariance
12 |   import breeze.stats.distributions.Gamma
13 | 
14 |   def gammaTest(N: Int, a: Double, b: Double): Unit = {
15 |     println(s"Testing Gamma($a,$b) with $N trials")
16 |     val mean = a*b
17 |     val variance = a*b*b
18 |     val gammas = Gamma(a,b).sample(N)
19 |     val stats = meanAndVariance(gammas)
20 |     val xbar = stats.mean
21 |     val s2 = stats.variance
22 |     println(s"True mean: $mean  Sample mean: $xbar")
23 |     val zscore = (xbar - mean)/sqrt(variance/N)
24 |     println(s"z-score is $zscore")
25 |     assert(abs(zscore) < 3.0)
26 |     println(s"True variance: $variance Sample variance: $s2")    
27 |     }
28 | 
29 |   def main(args: Array[String]): Unit = {
30 |     println("Testing Breeze's Gamma generator")
31 |     val N = 10000000
32 |     gammaTest(N,2.0,3.0)
33 |     gammaTest(N,1.0,2.0)
34 |     gammaTest(N,5.0,1.0)
35 |     gammaTest(N,5.0,0.1)
36 |     gammaTest(N,1.0,5.0)
37 |     gammaTest(N,0.5,3.0)
38 |     gammaTest(N,0.2,1.0)
39 |     gammaTest(N,0.2,0.1)
40 |     gammaTest(N,0.2,4.0)
41 |     println("Test complete")
42 |   }
43 | 
44 | }
45 | 
46 | // eof
47 | 
48 | 


--------------------------------------------------------------------------------
/sbt/Readme.md:
--------------------------------------------------------------------------------
 1 | # sbt installation and testing
 2 | 
 3 | `sbt` is the simple/scala build tool. It is the standard build tool for Scala. Other than a recent Java installation, `sbt` is all you need for building Scala projects.
 4 | 
 5 | Some useful links relating to `sbt` are given below:
 6 | 
 7 | * [sbt](http://www.scala-sbt.org/)
 8 |   * [Documentation](http://www.scala-sbt.org/documentation.html)
 9 |   * [Download](https://www.scala-sbt.org/download.html)
10 | 
11 | Please follow the relevant instructions for installing `sbt` on your OS.
12 | 
13 | If at all possible, please install `sbt` on your system in advance of the start of the course, and test that it works by typing `sbt run` from the `sbt-test` directory of this code repository. You will need an Internet connection the first time that you run this, and it will take some time to run while it downloads Scala, the Scala compiler, the Scala standard library and all of the libraries that we will be using in the course. If the test runs correctly, it should finish by printing the message "SBT IS INSTALLED AND WORKING" to the console. Once these libraries are downloaded and cached on your system, subsequent builds should be much faster, and should not require an Internet connection.
14 | 
15 | Once you have `sbt` installed and working, see the [Readme](../sbt-test/Readme.md) in the `sbt-test` directory for further information.
16 | 
17 | 
18 | 
19 | 
20 | 


--------------------------------------------------------------------------------
/examples/C6-Rainier/build.sbt:
--------------------------------------------------------------------------------
 1 | // build.sbt
 2 | 
 3 | name := "rainier"
 4 | 
 5 | version := "0.1-SNAPSHOT"
 6 | 
 7 | scalacOptions ++= Seq(
 8 |   "-unchecked", "-deprecation", "-feature", "-language:higherKinds",
 9 |   "-language:implicitConversions", "-Ypartial-unification"
10 | )
11 | 
12 | enablePlugins(MdocPlugin)
13 | 
14 | addCompilerPlugin("org.typelevel" %% "kind-projector" % "0.11.0" cross CrossVersion.full)
15 | addCompilerPlugin("org.scalamacros" %% "paradise" % "2.1.1" cross CrossVersion.full)
16 | 
17 | libraryDependencies  ++= Seq(
18 |   "org.scalatest" %% "scalatest" % "3.1.1" % "test",
19 |   "org.scalactic" %% "scalactic" % "3.0.8",
20 |   "org.typelevel" %% "cats-core" % "2.0.0",
21 |   "org.typelevel" %% "simulacrum" % "1.0.0",
22 |   "com.cibo" %% "evilplot" % "0.6.3", // 0.7.0
23 |   "com.cibo" %% "evilplot-repl" % "0.6.3", // 0.7.0
24 |   "com.stripe" %% "rainier-core" % "0.3.0",
25 |   "com.stripe" %% "rainier-notebook" % "0.3.0"
26 | 
27 | )
28 | 
29 | resolvers += Resolver.bintrayRepo("cibotech", "public") // for EvilPlot
30 | 
31 | resolvers ++= Seq(
32 |   "Sonatype Snapshots" at
33 |     "https://oss.sonatype.org/content/repositories/snapshots/",
34 |   "Sonatype Releases" at
35 |     "https://oss.sonatype.org/content/repositories/releases/",
36 |  "jitpack" at "https://jitpack.io" // for Jupiter/notebook
37 | 
38 | )
39 | 
40 | scalaVersion := "2.12.10"
41 | 
42 | 
43 | // eof
44 | 
45 | 


--------------------------------------------------------------------------------
/sbt-test/Readme.md:
--------------------------------------------------------------------------------
 1 | # SBT test
 2 | 
 3 | This directory contains a Scala SBT project with numerous dependencies. Assuming that `sbt` is installed, you should be able to compile and run the project by typing `sbt run` from this directory. After downloading and caching any required libraries, it will compile the code (in `src/main`) and run it. If the program runs successfully, it will print the message "SBT IS INSTALLED AND WORKING" to the console.
 4 | 
 5 | For reference, the file [build.sbt](build.sbt) shows how to include a dependency on many of the libraries most commonly required for statistical computing applications.
 6 | 
 7 | For good measure, you might also want to run `sbt test` from this directory. This should compile and run a few tests (in `src/test`). Note that there are some simple example tests using both ScalaTest (in different styles) and ScalaCheck (for property-based testing) in this directory, so these provide useful tempates for test code.
 8 | 
 9 | This directory is also useful for starting a REPL including commonly used dependencies. Just running `sbt console` from this directory will give a Scala console including dependencies on libraries such as Breeze, Breeze-viz and Cats, which can be very useful for interactive experiments.
10 | 
11 | For further information about sbt, read through the [sbt getting started guide](https://www.scala-sbt.org/1.x/docs/Getting-Started.html).
12 | 
13 | #### eof
14 | 
15 | 
16 | 


--------------------------------------------------------------------------------
/IntelliJ.md:
--------------------------------------------------------------------------------
 1 | # IntelliJ installation and setup
 2 | 
 3 | IntelliJ now seems to be the most popular IDE for Scala.
 4 | 
 5 | ## Installation
 6 | 
 7 | * To get started with IntelliJ, Scala and SBT, follow the official Scala [getting started guide](http://docs.scala-lang.org/getting-started.html)
 8 |   - [Get started with IntelliJ and Scala](https://www.scala-lang.org/documentation/getting-started-intellij-track/getting-started-with-scala-in-intellij.html)
 9 |   - [Get started with IntelliJ and Sbt](https://www.scala-lang.org/documentation/getting-started-intellij-track/building-a-scala-project-with-intellij-and-sbt.html)
10 |   - [Using ScalaTest with IntelliJ](http://docs.scala-lang.org/getting-started-intellij-track/testing-scala-in-intellij-with-scalatest.html)
11 | 
12 | ## Tips
13 | 
14 | * Always import SBT project into IntelliJ as SBT projects - IntelliJ will then examine the SBT build file to figure out all appropriate dependencies
15 |   - do **Import Project** and select the `build.sbt` file within the project directory
16 |   - the default import options are mostly fine, though you probably want to build with the *SBT shell*
17 | * IntelliJ can get confused if you try and import two different SBT projects with the same name
18 |   - So, if you copy the `app-template` directory, you should edit the project name in `build.sbt` *before* trying to import it into IntelliJ
19 |   - **Note** that you may prefer to use `sbt new darrenjw/breeze.g8` to directly create an app template with an *appropriate name* and then import that
20 | 
21 | 
22 | 
23 | 


--------------------------------------------------------------------------------
/sbt-test/src/test/scala/sbt-test-scalacheck.scala:
--------------------------------------------------------------------------------
 1 | import org.scalatest.matchers.should.Matchers
 2 | 
 3 | import org.scalacheck._
 4 | import org.scalacheck.Prop.{forAll,propBoolean}
 5 | 
 6 | class SqrtSpecification extends Properties("Sqrt") with Matchers {
 7 | 
 8 |   property("math.sqrt should square to give original") =
 9 |     forAll { a: Double =>
10 |       (a >= 0.0) ==> {
11 |         val s = math.sqrt(a)
12 |         val tol = 1e-8 * a
13 |         s*s === a +- tol
14 |       }
15 |     }
16 | 
17 | }
18 | 
19 | class GammaSpec extends Properties("Gamma") with Matchers {
20 | 
21 |   import breeze.stats.distributions.Gamma
22 | 
23 |   val tol = 1e-8
24 |   val big = 1e100
25 | 
26 |   property("mean") =
27 |     forAll { (a: Double, b: Double) =>
28 |       ((a > tol) && (a < big) && (b > tol) && (b < big)) ==> {
29 |         Gamma(a,b).mean === a*b +- tol
30 |       }
31 |     }
32 | 
33 | }
34 | 
35 | class StringSpecification extends Properties("String") with Matchers {
36 | 
37 |   property("startwith first string") =
38 |     forAll { (a: String, b: String) =>
39 |       (a+b).startsWith(a)
40 |     } 
41 | 
42 |   property("concatenate bound") =
43 |     forAll { (a: String, b: String) =>
44 |       (a+b).length >= a.length && (a+b).length >= b.length
45 |     }
46 | 
47 |   property("concatenate length") =
48 |     forAll { (a: String, b: String) =>
49 |       (a+b).length == a.length + b.length
50 |     }
51 | 
52 |   property("substring") =
53 |     forAll { (a: String, b: String, c: String) =>
54 |       (a+b+c).substring(a.length, a.length+b.length) == b
55 |     }
56 | 
57 | }
58 | 
59 | // eof
60 | 
61 | 


--------------------------------------------------------------------------------
/exercises/bisection/src/test/scala/bisect-test.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 | bisect-test.scala
 3 | 
 4 | Tests for bisection exercise
 5 | 
 6 |  */
 7 | 
 8 | import org.scalatest._
 9 | import org.scalatest.Matchers._
10 | 
11 | class MyTestSuite extends FlatSpec {
12 | 
13 |   "1+2" should "=3" in {
14 |     assert(1 + 2 === 3)
15 |   }
16 | 
17 |   val tol = 1.0e-8
18 | 
19 |   def approxEq(test: Double, should: Double): Boolean = {
20 |     if (math.abs(test - should) < tol) true else {
21 |       println("approxEq test failed: found " + test + " but expected " + should + " with tolerance " + tol)
22 |       false
23 |     }
24 |   }
25 | 
26 |   "1.0" should "approxEq 1.0" in {
27 |     assert(approxEq(1.0, 1.0))
28 |   }
29 | 
30 |   import Bisect._
31 | 
32 |   "findRoot(-10.0,10.0)(x => x+1.0)" should "=-1.0" in {
33 |     assert(approxEq(findRoot(-10.0, 10.0)(x => x + 1.0), -1.0))
34 |   }
35 | 
36 |   "findRoot(-5.0, 10.0)(x => 2.0 - x)" should "=2.0" in {
37 |     assert(approxEq(findRoot(-5.0, 10.0)(x => 2.0 - x), 2.0))
38 |   }
39 | 
40 |   "findRoot(0.0, 5.0)(x => x - 1.0)" should "= 1.0" in {
41 |     assert(approxEq(findRoot(0.0, 5.0)(x => x - 1.0), 1.0))
42 | 
43 |   }
44 | 
45 |   "findRoot(0.0, 2.0)(x => (x + 1.0) * (x - 1.0))" should "= 1.0" in {
46 |     assert(approxEq(findRoot(0.0, 2.0)(x => (x + 1.0) * (x - 1.0)), 1.0))
47 |   }
48 | 
49 |   "findRoot(-2.0, 0.0)(x => (x + 1.0) * (x - 1.0))" should "= -1.0" in {
50 |     assert(approxEq(findRoot(-2.0, 0.0)(x => (x + 1.0) * (x - 1.0)), -1.0))
51 |   }
52 | 
53 |   "findRoot(0.0, 2.0)(x => x * x - 2.0)" should "= math.sqrt(2.0)" in {
54 |     assert(approxEq(findRoot(0.0, 2.0)(x => x * x - 2.0), math.sqrt(2.0)))
55 |   }
56 | 
57 | }
58 | 
59 | /* eof */
60 | 
61 | 


--------------------------------------------------------------------------------
/exercises/bisection/Readme.md:
--------------------------------------------------------------------------------
 1 | # FP Basics
 2 | 
 3 | ## Exercise: Interval bisection
 4 | 
 5 | Implement a function to find the (approximate) *root* of a simple function of type `Double => Double` using interval bisection - that is, find the input value `x` which makes the output of the function equal zero.
 6 | 
 7 | Your function should have the type signature:
 8 | 
 9 | ```scala
10 | findRoot(low: Double, high: Double)(f: Double => Double): Double
11 | ```
12 | 
13 | You may *assume* that the sign of `f(low)` is different to the sign of `f(high)` - do not test or check for this in your code (yet) - we will come back to this later. Similarly, just *assume* that `low < high`.
14 | 
15 | The function should be recursive, evaluating the function at the given end points and mid-point, and then calling itself on a smaller interval where the function changes sign.
16 | 
17 | You will obviously need some kind of termination criterion for the recursion.
18 | 
19 | Just find a single root. *Do not* worry about identifying or tracking multiple roots.
20 | 
21 | Some test cases are given below. Note that since your method is approximate, you can only expect approximate equality.
22 | 
23 | ```scala
24 | findRoot(-10.0,10.0)(x => x+1.0) == -1.0
25 | 
26 | findRoot(-5.0,10.0)(x => 2.0-x) == 2.0
27 | 
28 | findRoot(0.0,5.0)(x => x-1.0) == 1.0
29 | 
30 | findRoot(0.0,2.0)(x => (x+1.0)*(x-1.0)) == 1.0
31 | 
32 | findRoot(-2.0,0.0)(x => (x+1.0)*(x-1.0)) == -1.0
33 | 
34 | findRoot(0.0,2.0)(x => x*x-2.0) == math.sqrt(2.0)
35 | ```
36 | 
37 | This directory contains a template for a Scala implementation, including the above test cases. Run the `~test` task from `sbt` to check your implementation.
38 | 
39 | **Optional:** experts may want to consider using continuation passing to avoid repeated evaluation of the function at the same values.
40 | 
41 | 
42 | 


--------------------------------------------------------------------------------
/examples/C6-Smile/src/main/scala/smile.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 | smile.scala
 3 | 
 4 | Testing the use of Smile as a Scala library for data analysis
 5 | 
 6 | */
 7 | 
 8 | object SmileApp {
 9 | 
10 | 
11 |   def main(args: Array[String]): Unit = {
12 |     println("Hi")
13 |     val url = "http://archive.ics.uci.edu/ml/machine-learning-databases/00243/yacht_hydrodynamics.data"
14 |     val fileName = "yacht.csv"
15 | 
16 |     // download the file to disk if it hasn't been already
17 |     val file = new java.io.File(fileName)
18 |     if (!file.exists) {
19 |       println("Downloading file...")
20 |       val s = new java.io.PrintWriter(file)
21 |       s.write("LongPos,PrisCoef,LDR,BDR,LBR,Froude,Resist\n")
22 |       val data = scala.io.Source.fromURL(url).getLines
23 |       data.foreach(l => s.write(l.trim.split(' ').filter(_ != "").mkString("",",","\n")))
24 |       s.close
25 |       println("File downloaded.")
26 |     }
27 | 
28 |     println("Read the data from CSV into a DataFrame")
29 |     val df = smile.read.csv(fileName)
30 |     println(df)
31 |     println(df.summary)
32 | 
33 |     println("Simple OLS regression")
34 |     import smile.data.formula._
35 |     import scala.language.postfixOps
36 |     val mod = smile.regression.ols("Resist" ~, df)
37 |     println(mod)
38 |     println(smile.regression.ols("Resist" ~ "Froude", df))
39 |     println(smile.regression.ols("Resist" ~ "Froude" + "LongPos", df))
40 | 
41 |     println("Understand formula parsing...")
42 |     println(buildFormula("Resist" ~).y(df))
43 |     println(buildFormula("Resist" ~).y(df).toDoubleArray)
44 |     println(buildFormula("Resist" ~).matrix(df, true))
45 |     println(buildFormula("Resist" ~).matrix(df, true).toArray)
46 |     println(buildFormula("Resist" ~).x(df))
47 |     println(buildFormula("Resist" ~).x(df).summary)
48 | 
49 | 
50 |   }
51 | 
52 | }
53 | 
54 | // eof
55 | 
56 | 


--------------------------------------------------------------------------------
/exercises/Spark.md:
--------------------------------------------------------------------------------
 1 | # Spark
 2 | 
 3 | ## Practical exercises
 4 | 
 5 | ### 1. Unpack, test and configure Spark
 6 | 
 7 | * Carefully work through the Spark chapter in the notes, unpacking, testing and configuring Spark as you go. Make sure that you can reproduce the first few examples before proceeding further.
 8 | 
 9 | ### 2. Review some Spark documentation
10 | 
11 | * The official [Spark Documentation](http://spark.apache.org/docs/2.4.5/) is pretty good. Read through the [Quick start guide](http://spark.apache.org/docs/2.4.5/quick-start.html), then quickly skim the [Programming guide](http://spark.apache.org/docs/2.4.5/rdd-programming-guide.html), then the [ML guide](http://spark.apache.org/docs/2.4.5/ml-guide.html), especially the section on [Classification and regression](http://spark.apache.org/docs/2.4.5/ml-classification-regression.html). Briefly familiarise yourself with the [API docs](http://spark.apache.org/docs/2.4.5/api/scala/index.html#org.apache.spark.package).
12 | 
13 | ### 3. Logistic regression for the SpamBase dataset
14 | 
15 | * This exercise will be concerned with analysis of the old SpamBase dataset. After skimming the documentation:
16 |   * ftp://ftp.ics.uci.edu/pub/machine-learning-databases/spambase/ download the dataset:
17 |   * ftp://ftp.ics.uci.edu/pub/machine-learning-databases/spambase/spambase.data
18 | to your machine and move it somewhere sensible for subsequent analysis. It actually isn't very big, so don't worry about size/memory issues.
19 | * The data is a simple CSV file, so can be parsed easily with Spark's built-in CSV parser. Write a Spark shell script to read the data and fit a simple logistic regression model for the final column (Spam or not) given the other variables.
20 | * Use Lasso regression to shrink out some of the variables. Choose your Lasso regularisation parameter by cross-validation. How many of the 57 predictor variables drop out of the regression in this case?
21 | * Create a Spark application for this analysis, package it, and submit it to Spark using `spark-submit`.
22 | 
23 | 
24 | #### eof
25 | 


--------------------------------------------------------------------------------
/examples/C6-Rainier/src/main/scala/rainier.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 | rainier.scala
 3 | 
 4 | Simple Rainier logistic regression example
 5 | 
 6 | */
 7 | 
 8 | object RainierLogRegApp {
 9 | 
10 |   import com.stripe.rainier.core._
11 |   import com.stripe.rainier.compute._
12 |   import com.stripe.rainier.sampler._
13 |   import com.stripe.rainier.notebook._
14 |   import com.cibo.evilplot._
15 |   import com.cibo.evilplot.plot._
16 | 
17 |   def main(args: Array[String]): Unit = {
18 | 
19 |     // first simulate some data from a logistic regression model
20 |     implicit val rng = ScalaRNG(3)
21 |     val N = 1000
22 |     val beta0 = 0.1
23 |     val beta1 = 0.3
24 |     val x = (1 to N) map { _ =>
25 |       3.0 * rng.standardNormal
26 |     }
27 |     val theta = x map { xi =>
28 |       beta0 + beta1 * xi
29 |     }
30 |     def expit(x: Double): Double = 1.0 / (1.0 + math.exp(-x))
31 |     val p = theta map expit
32 |     val yb = p map (pi => (rng.standardUniform < pi))
33 |     val y = yb map (b => if (b) 1L else 0L)
34 |     println(y.take(10))
35 |     println(x.take(10))
36 | 
37 |     // now build Rainier model
38 |     val b0 = Normal(0, 5).latent
39 |     val b1 = Normal(0, 5).latent
40 |     val model = Model.observe(y, Vec.from(x).map{xi => 
41 |       val theta = b0 + b1*xi
42 |       val p  = 1.0 / (1.0 + (-theta).exp)
43 |       Bernoulli(p)
44 |     })
45 | 
46 |     // now sample from the model
47 |     val sampler = EHMC(warmupIterations = 2000, iterations = 1000)
48 |     println("Sampling...\nthis can take a while...")
49 |     val bt = model.sample(sampler)
50 |     println("Finished sampling.")
51 |     val b0t = bt.predict(b0)
52 |     println(b0t.sum/b0t.length)
53 |     show("b0", density(b0t)) // only works in Jupyter and mdoc
54 |     val b1t = bt.predict(b1)
55 |     println(b1t.sum/b1t.length)
56 |     show("b1", density(b1t)) // only works in Jupyter and mdoc
57 |     displayPlot(density(b0t).render()) // hack for app/repl
58 |     displayPlot(density(b1t).render()) // hack for app/repl
59 | 
60 |   }
61 | 
62 | }
63 | 
64 | // eof
65 | 
66 | 


--------------------------------------------------------------------------------
/exercises/Collections.md:
--------------------------------------------------------------------------------
 1 | # Collections
 2 | 
 3 | ## Practical exercises
 4 | 
 5 | Exercises following the material presented in Chapter 3
 6 | 
 7 | ### 1. Review official documentation
 8 | 
 9 | Briefly review the official [collections overview](http://docs.scala-lang.org/overviews/collections/overview.html), concentrating in particular on [immutable collection classes](http://docs.scala-lang.org/overviews/collections/concrete-immutable-collection-classes.html), and also the [parallel collections overview](http://docs.scala-lang.org/overviews/parallel-collections/overview.html). Try some code examples in a REPL. 
10 | 
11 | ### 2. Computing the sample mean and standard deviation
12 | 
13 | a. By copying the `app-template` directory (or otherwise), create a new Scala SBT project. Write a function with signature
14 | ```scala
15 | meanAndSD(x: Vector[Double]): (Double, Double)
16 | ```
17 | which returns a tuple containing the [sample mean](http://mathworld.wolfram.com/SampleMean.html) and [sample standard deviation](https://en.wikipedia.org/wiki/Standard_deviation) of the collection of numbers.
18 | 
19 | b. When you get it working, write some tests to check it works on a few trivial examples.
20 | 
21 | c. Generalise it so that it works for any collection of `Doubles`, and check that it works for parallel as well as serial collections.
22 | 
23 | d. Test your function on huge collections of random *U(0,1)* quantities. What should the true mean and standard devaition be? Can you detect a difference in speed between the serial and parallel versions?
24 | 
25 | e. (optional) You have probably written this code so that it computes the mean and SD using two passes over the data. Can you figure out a way to implement it using just a single pass?
26 | 
27 | f. (optional) You have probably completed task e. using a sequential fold which can not easily be parallelised. Can you make it parallelisable by replacing your `fold` with `aggregate`. You will have to look up how `aggregate` works.
28 | 
29 | 
30 | ### 3. Wrap interval bisection code in an Option
31 | 
32 | Starting from the code you wrote for [interval bisection](./bisection/Readme.md) previously, make it safe by wrapping it in an Option. See the [detailed instructions](option/Readme.md) for further information.
33 | 
34 | 
35 | #### eof
36 | 
37 | 


--------------------------------------------------------------------------------
/exercises/Monte.md:
--------------------------------------------------------------------------------
 1 | # Monte Carlo methods
 2 | 
 3 | ## Practical exercises
 4 | 
 5 | ### 1. Simple Monte Carlo
 6 | 
 7 | A mixture random variable is constructed as a `Binomial` random quantity with sample size taken from a `Poisson` distribution with mean 20 and success probability drawn independently from a `Beta(4,4)` distribution.
 8 | 
 9 | * Monadically, or otherwise, construct a function for drawing samples from this random variable. Note that fresh `Poisson` and `Beta` draws are required for each `Binomial` draw.
10 | * Take 10,000 draws and plot the distribution.
11 | * Average the draws to get an empirical estimate of the mean of this random variable.
12 | * What is the theoretical mean?
13 | 
14 | ### 2. Bayesian inference for a normal random sample
15 | 
16 | Consider a vector of iid sample observations `x` from a Gaussian distribution with unknown mean and variance. We can define a log-likelihood function with
17 | ```scala
18 | import breeze.stats.distributions.Gaussian
19 | import scala.collection.GenSeq
20 | def ll(x: GenSeq[Double])(mean: Double,stdev: Double): Double = {
21 |   val gau = Gaussian(mean,stdev)
22 |   x map (gau.logPdf) reduce (_+_)
23 | }
24 | ```
25 | 
26 | * Assuming a flat prior the log-posterior is the log-likelihood. In this case, write a Metropolis sampler to sample from the posterior distribution by using the log-posterior as the log-target. For a proposal kernel, use a bivariate normal distribution, constructed using the `MultivariateGaussian` distribution in Breeze. Centre the proposal on the current value, and use a proposal variance matrix which is a scaled version of the 2x2 identity matrix. Start off with a scaling of 1.
27 | * Test your implementation on simulated data by conditioning on a large `x` sampled with a mean and variance you know.  Manually tune the scaling factor of your Metropolis algorithm to get reasonable mixing. Check that the posterior mean and standard deviation are close to the true values.
28 | * I deliberately parameterised the log likelihood with a `GenSeq`. Run your MCMC algorithm in parallel by passing in `x.par` instead of `x`. Time the runs to see what speed-up (if any) you get. You will probably only get significant speed-up for large `x` (for me, the parallel version is significantly quicker for a sample size of 10k).
29 | 
30 | 
31 | 
32 | 
33 | #### eof
34 | 
35 | 


--------------------------------------------------------------------------------
/examples/C6-Rainier/docs/LogisticRegression.md:
--------------------------------------------------------------------------------
 1 | # Logistic regression
 2 | 
 3 | We will walk through a logistic regression example in Rainier. First some imports.
 4 | 
 5 | ```scala mdoc
 6 | import com.stripe.rainier.core._
 7 | import com.stripe.rainier.compute._
 8 | import com.stripe.rainier.sampler._
 9 | import com.stripe.rainier.notebook._
10 | import com.cibo.evilplot._
11 | import com.cibo.evilplot.plot._
12 | ```
13 | 
14 | Now simulate some synthetic data from a logistic regression model that we can used to test our inference algorithm.
15 | ```scala mdoc
16 | implicit val rng = ScalaRNG(3)
17 | val N = 1000
18 | val beta0 = 0.1
19 | val beta1 = 0.3
20 | val x = (1 to N) map { _ =>
21 |  3.0 * rng.standardNormal
22 | }
23 | val theta = x map { xi =>
24 |  beta0 + beta1 * xi
25 | }
26 | def expit(x: Double): Double = 1.0 / (1.0 + math.exp(-x))
27 | val p = theta map expit
28 | val yb = p map (pi => (rng.standardUniform < pi))
29 | val y = yb map (b => if (b) 1L else 0L)
30 | println(y.take(10))
31 | println(x.take(10))
32 | ```
33 | Now we have some data, we can build a Rainier model.
34 | ```scala mdoc
35 | val b0 = Normal(0, 5).latent
36 | val b1 = Normal(0, 5).latent
37 | val model = Model.observe(y, Vec.from(x).map{xi => 
38 |   val theta = b0 + b1*xi
39 |   val p  = 1.0 / (1.0 + (-theta).exp)
40 |   Bernoulli(p)
41 | })
42 | ```
43 | This completes specification of the Bayesian model. We now need to sample from the implied posterior distribution.
44 | ```scala mdoc
45 | val sampler = EHMC(warmupIterations = 2000, iterations = 1000)
46 | println("Sampling...\nthis can take a while...")
47 | val bt = model.sample(sampler)
48 | println("Finished sampling.")
49 | val b0t = bt.predict(b0)
50 | println(b0t.sum/b0t.length)
51 | ```
52 | We can plot the marginal posteriors using `show`, which works in both mdoc and Jupyter notebooks, but doesn't currently work from the Scala REPL.
53 | ```scala mdoc:image:b0.png
54 | show("b0", density(b0t)) // only works in Jupyter and mdoc
55 | ```
56 | 
57 | ```scala mdoc
58 | val b1t = bt.predict(b1)
59 | println(b1t.sum/b1t.length)
60 | ```
61 | 
62 | ```scala mdoc:image:b1.png
63 | show("b1", density(b1t)) // only works in Jupyter and mdoc
64 | ```
65 | So we see that mdoc documents provide a nice way to document Rainier modelling workflows, similar to the way people often document R workflows using R Markdown.
66 | 


--------------------------------------------------------------------------------
/SelfStudyGuide.md:
--------------------------------------------------------------------------------
 1 | # Self Study Guide
 2 | 
 3 | This course is currently configured to be delivered as a (very!) intensive three-day short course, covering three chapters of notes each day. If you like what you see here, please consider signing up for the next iteration - see the [front page](README.md) for details.
 4 | 
 5 | However, since all essential materials are now freely available on-line, it is perfectly possible to self-study this course. Use the hashtag `#scscala` when discussing this course on-line to allow others to engage with you.
 6 | 
 7 | Although I deliver this material in three days, it is not realistic to cover this material in three days of self-study. Even if you have the luxury of being able to study this course full-time, you should allow one full day per chapter. In other words, you should allow roughly two weeks to cover the full course, based on more-or-less full-time study.
 8 | 
 9 | In the more typical case where you are studying this course on top of full-time study or employment, covering one chapter per week is probably more realistic. This will make for a nine-week course, covering material at roughly the same rate as a MOOC such as Coursera.
10 | 
11 | However you study the course, the plan of study should be roughly the same. For each Chapter:
12 | 
13 | 1. Read the [course notes](https://github.com/darrenjw/scala-course/raw/master/scscala.pdf) for the Chapter (one Chapter only)
14 | 2. Run the code examples from the Chapter. If you don't like typing, copy-and-paste code examples from the [fragments](fragments/Readme.md) directory. Note that copying-and-pasting from the PDF of the course notes doesn't work well.
15 | 3. Inspect and run all of the [examples](examples/) associated with the Chapter.
16 | 4. Work through all of the [exercises](exercises/Readme.md) associated with the Chapter.
17 | 5. Don't move on to the next Chapter until you have had a *serious* attempt at all of the end-of-Chapter exercises.
18 | 
19 | You learn programming by *programming* and not by reading. Although it is tempting to just read through the notes and skip everything else, if you do this you are likely to get to the end feeling like you've sort-of understood everything but not actually be able to sit down and write code.
20 | 
21 | Further information, including [laptop setup instructions](Setup.md), can be obtained from the [start here](StartHere.md) page.
22 | 
23 | #### eof
24 | 
25 | 


--------------------------------------------------------------------------------
/examples/C5-MonteCarlo/src/main/scala/monte-carlo.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 | monte-carlo.scala
 3 | Integration via rejection sampling
 4 | Integrate the standard normal PDF from -5 to 5 to get an estimate close to 1...
 5 | Simulate points uniformly over a bounding box and look at fraction of points
 6 | falling under the PDF
 7 | */
 8 | 
 9 | import scala.math._
10 | import breeze.stats.distributions.Uniform
11 | import breeze.linalg._
12 | import scala.annotation.tailrec
13 | 
14 | object MonteCarlo {
15 | 
16 |   def f(x: Double): Double = math.exp(-x * x / 2) / math.sqrt(2 * Pi)
17 | 
18 |   // Idiomatic Breeze solution
19 |   def mc1(its: Int): Int = {
20 |     val x = runif(its, -5.0, 5.0)
21 |     val y = runif(its, 0.0, 0.5)
22 |     val fx = x map { f(_) }
23 |     sum((y <:< fx) map { xi => if (xi == true) 1 else 0 })
24 |   }
25 | 
26 |   // Fast, memory-efficient tail call
27 |   def mc2(its: Long): Long = {
28 |     @tailrec def mc(its: Long, acc: Long): Long = {
29 |       if (its == 0) acc else {
30 |         val x = runif(-5.0, 5.0)
31 |         val y = runif(0.0, 0.5)
32 |         if (y < f(x)) mc(its - 1, acc + 1) else mc(its - 1, acc)
33 |       }
34 |     }
35 |     mc(its, 0)
36 |   }
37 | 
38 |   // Parallel version
39 |   def mc3(its: Long,NP: Int = 8): Long = {
40 |     val N = its / NP // assuming NP | its
41 |     (1 to NP).par.map { x => mc2(N) }.sum
42 |   }
43 | 
44 |   // R-like functions for Uniform random numbers
45 |   def runif(n: Int, l: Double, u: Double) = DenseVector[Double](Uniform(l, u).sample(n).toArray)
46 |   def runif(l: Double, u: Double) = Uniform(l, u).draw
47 | 
48 |   // Function for timing
49 |   def time[A](f: => A) = {
50 |     val s = System.nanoTime
51 |     val ret = f
52 |     println("time: " + (System.nanoTime - s) / 1e6 + "ms")
53 |     ret
54 |   }
55 | 
56 |   // Main method for running the code
57 |   def main(args: Array[String]) = {
58 |     val N = 10000000 // 10^7 is as big as mc1() can really cope with
59 |     println("Running with " + N + " iterations")
60 |     println("Idiomatic vectorised solution")
61 |     time { println(5.0 * mc1(N) / N) }
62 |     println("Fast efficient (serial) tail call")
63 |     time { println(5.0 * mc2(N) / N) }
64 |     println("Parallelised version")
65 |     time { println(5.0 * mc3(N) / N) }
66 |     println("Vary size of parallel collection")
67 |       (1 to 12).foreach{ i =>
68 |         println("NP = "+i)
69 |         time(mc3(N,i))
70 |       }
71 |     println("Done")
72 |   }
73 | 
74 | 
75 | 
76 | }
77 | 
78 | 


--------------------------------------------------------------------------------
/StartHere.md:
--------------------------------------------------------------------------------
 1 | # Start Here
 2 | 
 3 | ## Main jump-off page for the Scala for Statistical Computing and Data Science Short Course
 4 | 
 5 | Course participants should bookmark this page: https://github.com/darrenjw/scala-course/blob/master/StartHere.md
 6 | 
 7 | * [Course outline](README.md) - front page of the repo, with brief summary overview
 8 | * [Setup instructions](Setup.md) - details of how to set up your laptop for programming in Scala. Please follow these instructions carefully *in advance of the start of the course*.
 9 | 
10 | Registered course participants should not print the [**course notes**](https://github.com/darrenjw/scala-course/raw/master/scscala.pdf), as a printed copy of the latest version will be given to participants at the start of the course. Others are welcome to self-study this course - please see the [self-study guide](SelfStudyGuide.md). Use the hashtag `#scscala` for discussing the course and the course notes on Twitter and other social media platforms.
11 | 
12 | ### Rough Schedule
13 | 
14 | * 9.15: Setup
15 | * 9.30 Chapter
16 | * 10.30 Exercises
17 | * 11.30 Chapter
18 | * 12.30 Lunch
19 | * 1.30 Exercises
20 | * 2.30 Chapter
21 | * 3.30 Exercises
22 | 
23 | ### Resources
24 | 
25 | 
26 | * [Useful links](UsefulLinks.md) - selective and curated collection of some important additional on-line resources
27 | * [app-template](app-template/) - Scala sbt "seed" project, for copying and editing to create a new Scala sbt project. Minimal dependencies in the sbt build file (just Breeze). However, if you have an internet connection, it is typically better to use `sbt new darrenjw/breeze.g8` to create new project, as described in the notes.
28 | * [sbt-test](sbt-test/) - simple Scala sbt project with lots of dependencies. See the [build.sbt](sbt-test/build.sbt) for list of dependencies. Primarily for testing correct installation of sbt and caching of commonly required dependencies. Also useful for spinning up a REPL (`sbt console`) with lots of dependencies for interactive experiments. The [src/test](sbt-test/src/test/scala/) subdirectory tree contains some basic examples of how to write test code.
29 | 
30 | * [Fragments](fragments/Readme.md) - raw fragments of code from the course notes, auto-extracted by chapter
31 | * [Examples](examples/) - complete runnable code examples, split corresponding to each chapter of the course notes
32 | * [Exercises](exercises/Readme.md) - simple programming exercises, to be tackled following the presentation of each chapter of the notes.
33 | 
34 | 
35 | 
36 | #### eof
37 | 
38 | 
39 | 


--------------------------------------------------------------------------------
/exercises/Breeze.md:
--------------------------------------------------------------------------------
 1 | # Breeze
 2 | 
 3 | ## Practical exercises
 4 | 
 5 | #### Useful links:
 6 | 
 7 | * [Breeze](https://github.com/scalanlp/breeze/)
 8 |   * [Wiki](https://github.com/scalanlp/breeze/wiki)
 9 |   * [API Docs](http://www.scalanlp.org/api/breeze/)
10 | 
11 | ### 1. Review the on-line documentation
12 | 
13 | Begin by reading through the [quickstart guide](https://github.com/scalanlp/breeze/wiki/Quickstart) and then read through the [linear algebra cheat sheet](https://github.com/scalanlp/breeze/wiki/Linear-Algebra-Cheat-Sheet). Then quickly check a few other pages on the [Breeze wiki](https://github.com/scalanlp/breeze/wiki). Finally, have a quick look at the [API docs](http://www.scalanlp.org/api/breeze/) - for example, search the docs for `Gamma` and see how Breeze parameterises the gamma distribution. Note that the docs are often very terse, so sometimes there's no alternative than to browse the [source code](https://github.com/scalanlp/breeze/tree/master/math/src/main/scala/breeze). Also, the [test code](https://github.com/scalanlp/breeze/tree/master/math/src/test/scala/breeze) can sometimes be useful for figuring out how to use a Breeze function.
14 | 
15 | ### 2. Multivariate normal
16 | 
17 | * Write a function with type signature
18 | ```scala
19 | rmvn(n: Int, mean: DenseVector[Double], cov: DenseMatrix[Double]): DenseMatrix[Double]
20 | ```
21 | which returns a matrix with `n` rows, each row representing an iid draw from a multivariate normal with the given mean and variance matrix. Note that this can be accomplished by *post*-multiplying a matrix of iid *N(0,1)* random quantities by the *upper* Cholesky factor of the variance matrix (on the right), and then adding the mean to each row of the result (don't use the built-in Breeze function for simulating multivariate Gaussians unless you're stuck). Study my [PCA example](../examples/C4-PCA/src/main/scala/pca.scala) for ideas.
22 | * How can you test your code to ensure that you have implemented it correctly? See the [gamma testing](../examples/C4-GammaTest/src/main/scala/gamma-test.scala) example for clues. Also, `breeze.stats.covmat` may be of use.
23 | 
24 | ### 3. Scatter-plot
25 | 
26 | Write a function with type signature
27 | ```scala
28 | pairs(mat: DenseMatrix[Double]): Figure
29 | ```
30 | which produces a scatterplot matrix similar to that produced by the `pairs()` function in R. eg. for a matrix with `k` columns, the function should plot a `k * k` array of scatter plots showing each variable against each other. Test your code on some simulated data generated using your `rmvn` function.
31 | 
32 | 
33 | #### eof
34 | 


--------------------------------------------------------------------------------
/exercises/Intro.md:
--------------------------------------------------------------------------------
 1 | # Introduction
 2 | 
 3 | ## Practical exercises
 4 | 
 5 | These exercises are to be undertaken following presentation of the material from Chapter 1 of the course notes.
 6 | 
 7 | 
 8 | ### 1. SBT and editor setup
 9 | 
10 | Make sure that you have this repo cloned/downloaded on your system, that SBT is installed and working, and that you have a usable editor/IDE. Check through the [laptop set-up instructions](../Setup.md) and make sure you have done everything required. In particular, make sure that typing `sbt run` from the `sbt-test` directory correctly runs the test script.
11 | 
12 | ### 2. Explore the repo
13 | 
14 | Explore some of the directories in this repo. In particular, find the code fragments automatically extracted from each chapter, and the complete runnable examples. Examine the complete runnable `HelloWorld` example from Chapter 1. Type `sbt run` from the relevant directory to compile and run it. Note that no `build.sbt` file or fancy directory structure is required for a simple single-file project with no dependencies.
15 | 
16 | ### 3. Create your own Scala SBT project
17 | 
18 | Try *not* to run SBT from the `app-template` directory in order to keep it clean. Copy that directory (and its contents) somewhere on your system and create your own SBT project. Just copy the source code file for the `HelloWorld` example into the correct source code sub-directory and check that it works by first running `sbt` and then `run` from the SBT prompt. Open up the source code in your editor/IDE and edit the message that is printed, save, then `run` again from SBT. Then type `~run` in SBT, go back to your editor and change the message again. As you save the buffer, note that SBT detects that the source file has changed and automatically re-compiles and re-runs the project.
19 | 
20 | ### 4. Use the REPL
21 | 
22 | From the SBT prompt, type `console` to get a REPL. Enter `1+2` to check it works. 
23 | 
24 | ### 5. Scala basics tour
25 | 
26 | Start working through the [basic tour](https://docs.scala-lang.org/tour/tour-of-scala.html) from the official [Scala documentation](http://docs.scala-lang.org/). When you get to the appropriate point in the tour, open Scala Fiddle in another browser tab and interactively explore Scala in the browser. Try to understand as much as possible as you go along. You should only attempt the first two or three sections for now. If you get these finished, browse some of the other official Scala documentation. You will want to bookmark this material to return to and work through some additional sections later.
27 | 
28 | 
29 | #### eof
30 | 
31 | 
32 | 


--------------------------------------------------------------------------------
/examples/C4-PCA/src/main/scala/pca.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 | pca.scala
 3 | 
 4 | PCA for the dataset:
 5 | 
 6 | http://archive.ics.uci.edu/ml/datasets/Iris
 7 | 
 8 | from the Machine learning repository: 
 9 | 
10 | http://archive.ics.uci.edu/ml/datasets.html
11 | 
12 | */
13 | 
14 | import breeze.linalg._
15 | import breeze.stats._
16 | 
17 | object PCA {
18 | 
19 |   case class Pca(mat: DenseMatrix[Double]) {
20 |     // via SVD of the centred data matrix
21 |     val xBar = mean(mat(::,*)).t
22 |     val x = mat(*,::) - xBar
23 |     val SVD = svd.reduced(x)
24 |     val loadings = SVD.Vt.t
25 |     val sdev = SVD.S / math.sqrt(x.rows - 1)
26 |     lazy val scores = x * loadings
27 |   }
28 | 
29 |   // Main runner method
30 |   def main(args: Array[String]): Unit = {
31 | 
32 |     val url = "http://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"
33 |     val fileName = "iris.csv"
34 |     val imap = Map(
35 |       "Iris-setosa" -> 0,
36 |       "Iris-versicolor" -> 1,
37 |       "Iris-virginica" -> 2
38 |     )
39 | 
40 |     // download the file to disk if it hasn't been already
41 |     val file = new java.io.File(fileName)
42 |     if (!file.exists) {
43 |       val s = new java.io.PrintWriter(file)
44 |       val data = scala.io.Source.fromURL(url).getLines
45 |       data.foreach(l => s.write(l.trim.split(',').
46 |         map(x=>imap.getOrElse(x,x)).mkString("",",","\n")))
47 |       s.close
48 |     }
49 | 
50 |     // read the file from disk
51 |     val mat = csvread(new java.io.File(fileName))
52 |     println("Mat Dim: " + mat.rows + " " + mat.cols)
53 |     val x = mat(::,0 to 3)
54 |     println("X Dim: " + x.rows + " " + x.cols)
55 |     val clas = mat(::,4).toDenseVector
56 | 
57 |     println("PCA with built-in Breeze version (like R princomp):")
58 |     val pca = new PCA(x,covmat(x))
59 |     println("Loadings:")
60 |     println(pca.loadings)
61 |     println("Stdev:")
62 |     println(pca.sdev)
63 |     println(pca.scores(0 to 5,::))
64 | 
65 |     println("Now my version (like R prcomp):")
66 |     val myPca = Pca(x)
67 |     println(myPca.loadings) // loadings transposed
68 |     println(myPca.sdev)
69 |     println(myPca.scores(0 to 5,::))
70 | 
71 |     // scatter plot first 2 principal components
72 |     import breeze.plot._
73 |     val fig = Figure("PCA")
74 |     val p = fig.subplot(0)
75 |     val ind0 = (0 until x.rows) filter (i => clas(i) == 0)
76 |     p += plot(myPca.scores(ind0,0).toDenseVector,
77 |       myPca.scores(ind0,1).toDenseVector,'.',colorcode="blue")
78 |     val ind1 = (0 until x.rows) filter (i => clas(i) == 1)
79 |     p += plot(myPca.scores(ind1,0).toDenseVector,
80 |       myPca.scores(ind1,1).toDenseVector,'.',colorcode="red")
81 |     val ind2 = (0 until x.rows) filter (i => clas(i) == 2)
82 |     p += plot(myPca.scores(ind2,0).toDenseVector,
83 |       myPca.scores(ind2,1).toDenseVector,'.',colorcode="green")
84 |   } 
85 | 
86 | }
87 | 
88 | // eof
89 | 
90 | 


--------------------------------------------------------------------------------
/exercises/option/Readme.md:
--------------------------------------------------------------------------------
 1 | # Collections
 2 | 
 3 | ## Exercise: Wrapping a root-finder in an Option
 4 | 
 5 | ### Part A
 6 | 
 7 | Copy your previous `findRoot` function from the [previous exercise](../bisection/Readme.md), and add a new function `findRootOpt` which wraps it, so that instead of returning a `Double` it returns `Option[Double]`. The new signature is:
 8 | 
 9 | ```scala
10 | findRootOpt(low: Double, high: Double)(f: Double => Double): Option[Double]
11 | ```
12 | 
13 | Add checks that `low < high` and that the sign of `f(low)` is different from the sign of `f(high)` and return `None` if either check fails. Otherwise your function should behave as previously, returning the root in a `Some`.
14 | 
15 | All of the previous test case translate obviously as follows:
16 | 
17 | ```scala
18 | findRootOpt(-10.0,10.0)(x => x+1.0) == Some(-1.0)
19 | 
20 | findRootOpt(-5.0,10.0)(x => 2.0-x) == Some(2.0)
21 | 
22 | findRootOpt(0.0,5.0)(x => x-1.0) == Some(1.0)
23 | 
24 | findRootOpt(0.0,2.0)(x => (x+1.0)*(x-1.0)) == Some(1.0)
25 | 
26 | findRootOpt(-2.0,0.0)(x => (x+1.0)*(x-1.0)) == Some(-1.0)
27 | 
28 | findRootOpt(0.0,2.0)(x => x*x-2.0) == Some(math.sqrt(2.0))
29 | ```
30 | 
31 | In addition, we can add some new test cases which test the inital assumptions:
32 | 
33 | ```scala
34 | findRootOpt(2.0,0.0)(x => x-1.0) == None
35 | 
36 | findRootOpt(-1.0,-3.0)(x => x+2.0) == None
37 | 
38 | findRootOpt(0.0,2.0)(x => x+1.0) == None
39 | 
40 | findRootOpt(0.0,2.0)(x => x-5.0) == None
41 | 
42 | ```
43 | 
44 | Again, these test cases are all included in the associated Scala template in this directory, and can be run with the `~testOnly PartA` task in `sbt`.
45 | 
46 | 
47 | ### Part B (if time permits)
48 | 
49 | The quadratic curve `y = a*x*x` for any fixed `a > 0` intersects the unit circle `x*x + y*y = 1` exactly once for `0 <= x <= 1`. Our task is to use our function `findRootOpt` to find this `x`.
50 | 
51 | Using just a tiny bit of maths, we can write the solution to this problem as the solution to the triangular system:
52 | 
53 | ```scala
54 | y - a*(1-y*y) = 0
55 | 
56 | x*x + y*y -1 = 0
57 | 
58 | ```
59 | 
60 | The left hand side of first equation will clearly be negative at `y=0` and positive at `y=1`. Then for `0 <= y <= 1`, the left hand side of the second equation will be negative at `x=0` and positive at `x=1`.
61 | 
62 | Write a function, `solveQuad`, which accepts a value `a`, and uses a for-expression with `findRootOpt` to obtain the solution for `x`. It should have signature:
63 | 
64 | ```scala
65 | solveQuad(a: Double): Option[Double]
66 | ```
67 | 
68 | We can test this function by picking an `a`, solving for `x`, computing `y = a*x*x`, then checking whether `x*x + y*y = 1`. Some example tests are included in the Scala template in this directory.
69 | 
70 | You can run all tests for Part A and Part B with the `~test` task in `sbt`, or just the specific tests for Part B with `~testOnly PartB`.
71 | 
72 | 
73 | 


--------------------------------------------------------------------------------
/UsefulLinks.md:
--------------------------------------------------------------------------------
 1 | # Useful Links
 2 | 
 3 | ## A curated set of links to useful additional on-line resources
 4 | 
 5 | * [Official Scala website](http://www.scala-lang.org/)
 6 |   * [Documentation](http://docs.scala-lang.org/)
 7 |     * [Getting started](https://docs.scala-lang.org/getting-started/)
 8 | 	  * [IntelliJ](https://docs.scala-lang.org/getting-started/intellij-track/getting-started-with-scala-in-intellij.html)
 9 |       * [Scaladoc](http://docs.scala-lang.org/overviews/scaladoc/overview.html)
10 |   * [API Docs (2.12.10)](https://www.scala-lang.org/api/2.12.10/)
11 | * [Scala Exercises](https://www.scala-exercises.org/)
12 |   * [Scala tutorial](https://www.scala-exercises.org/scala_tutorial/)
13 |   * [Standard library](https://www.scala-exercises.org/std_lib/)
14 | * [sbt](http://www.scala-sbt.org/) - build tool
15 |   * [giter8 templates](https://github.com/foundweekends/giter8/wiki/giter8-templates)
16 | * [Breeze](https://github.com/scalanlp/breeze/) - numerical computing library
17 |   * [Wiki](https://github.com/scalanlp/breeze/wiki)
18 |     * [Quickstart](https://github.com/scalanlp/breeze/wiki/Quickstart)
19 |     * [Linear algebra cheat sheet](https://github.com/scalanlp/breeze/wiki/Linear-Algebra-Cheat-Sheet)
20 |   * [API Docs](http://www.scalanlp.org/api/breeze/)
21 | * [Spire](https://typelevel.org/spire) - numeric types library
22 | * [Smile](http://haifengl.github.io/) - basic stats and ML
23 | * [Rainier](https://rainier.fit/) - Bayesian modelling and probabilistic programming
24 | * [EvilPlot](https://cibotech.github.io/evilplot/) - plotting library
25 | * [Mdoc](https://scalameta.org/mdoc/) - typechecked Markdown for Scala
26 | * [ScalaTest](http://www.scalatest.org/) - popular unit testing library
27 | * [ScalaCheck](https://www.scalacheck.org/) - property-based testing
28 | * [Apache Spark](http://spark.apache.org/) - big data framework
29 |   * [Downloads](http://spark.apache.org/downloads.html)
30 |   * [Documentation](http://spark.apache.org/docs/latest/)
31 |     * [Quick start](http://spark.apache.org/docs/latest/quick-start.html)
32 |     * [RDD Programming guide](http://spark.apache.org/docs/latest/rdd-programming-guide.html)
33 |     * [SQL, DataFrames and Datasets](http://spark.apache.org/docs/latest/sql-programming-guide.html)
34 |     * [MLlib](http://spark.apache.org/docs/latest/ml-guide.html)
35 |       * [ML Pipelines](http://spark.apache.org/docs/latest/ml-pipeline.html)
36 |       * [Classification and regression](http://spark.apache.org/docs/latest/ml-classification-regression.html)
37 |   * [API Docs](http://spark.apache.org/docs/latest/api/scala/)
38 | * [Cats](http://typelevel.org/cats/)
39 |   * [Type classes](http://typelevel.org/cats/typeclasses.html)
40 |   * [Data types](http://typelevel.org/cats/datatypes.html)
41 |   * [API Docs](http://typelevel.org/cats/api/cats/)
42 | * [Simulacrum](https://github.com/typelevel/simulacrum) - type class support
43 | 
44 | * [Darren's Scala links](https://github.com/darrenjw/djwhacks/blob/master/scala/ScalaLinks.md) - a much less selective and less well curated set of Scala links
45 | 
46 | #### eof
47 | 
48 | 
49 | 


--------------------------------------------------------------------------------
/ScalaIDE.md:
--------------------------------------------------------------------------------
 1 | # Installing the Scala IDE
 2 | 
 3 | ### N.B. I'm leaving this page for historical reasons, but the Scala IDE is now considered obsolete
 4 | 
 5 | 
 6 | ## Useful links
 7 | 
 8 | * [ScalaIDE](http://scala-ide.org/) - based on Eclipse
 9 |   * [Download](http://scala-ide.org/download/sdk.html)
10 |   * [Documentation](http://scala-ide.org/documentation.html)
11 | * [sbteclipse](https://github.com/typesafehub/sbteclipse) - sbt plugin for eclipse
12 |   * [Documentation](https://github.com/typesafehub/sbteclipse/wiki)
13 |     * [Installation](https://github.com/typesafehub/sbteclipse/wiki/Installing-sbteclipse)
14 |     * [User guide](https://github.com/typesafehub/sbteclipse/wiki/Using-sbteclipse)
15 | 
16 | ## Installation
17 | 
18 | The ScalaIDE is based on Eclipse, which is a JVM application, and is therefore easy to install as a user without admin/root privilages.
19 | 
20 | **IMPORTANT** *As we are using Scala 2.12.1 for this course, it is necessary to use a Scala IDE from the 4.6.x series. The 4.5.x series does not have proper support for Scala 2.12.*
21 | 
22 | From the [download site](http://scala-ide.org/download/sdk.html), select the version of the IDE for your OS. Unpack this in a convenient place on your system and follow any installation instructions. Running it should be a simple matter of running the `eclipse` executable in the top-level directory. See the [Documentation](http://scala-ide.org/documentation.html) for further details.
23 | 
24 | To use the ScalaIDE with sbt projects, you must also install the eclipse plugin for sbt, `sbteclipse`.  This should be as simple as adding the line:
25 | ```scala
26 | addSbtPlugin("com.typesafe.sbteclipse" % "sbteclipse-plugin" % "5.1.0")
27 | ```
28 | to your `~/.sbt/0.13/plugins/plugins.sbt` file. Create this file if you don't already have it. See the [sbteclipse](https://github.com/typesafehub/sbteclipse) page for further details.
29 | 
30 | ## Using the ScalaIDE with sbt projects
31 | 
32 | The main thing to understand is that the ScalaIDE needs to know about the structure of your sbt project. This information is encoded in Eclipse project files in the top-level directory of your sbt project (where the file `build.sbt` will often be present). An initial set of project files for an sbt project can be generated using the `eclipse` sbt task provided by the `sbteclipse` plugin.
33 | 
34 | So, before using the ScalaIDE with a particular sbt project for the first time, first run
35 | ```bash
36 | sbt eclipse
37 | ```
38 | to analyse the project and create eclipse project files for it. Then start the ScalaIDE. If it asks about a workspace, make sure you select something *different to* the sbt project directory. Then import the project using the *Import Wizard* (under the File menu) to import *Existing Projects into Workspace*. You may need to repeat this process if you make significant changes to the `build.sbt` file.
39 | 
40 | Once you are up-and-running, Eclipse provides fairly sophisticated IDE functionality. Some commonly used commands include:
41 | 
42 | * Shift-Ctrl-F - Reformat source file
43 | * Shift-Ctrl-W - Close all windows (from package explorer)
44 | * Shift-Ctrl-P - Go to matching bracket
45 | * Ctrl-Space - Content assist
46 | 
47 | ### Scala worksheet
48 | 
49 | * Shift-Ctrl-B - Re-run all code
50 | 
51 | See the [ScalaIDE Documentation](http://scala-ide.org/documentation.html) for further information.
52 | 
53 | 
54 | 
55 | 
56 | #### eof
57 | 
58 | 
59 | 
60 | 


--------------------------------------------------------------------------------
/examples/C6-DataFrames/datatable/datatable.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 | datatable.scala
 3 | 
 4 | Test of "scala-datatable" and "scala-csv"
 5 | 
 6 | */
 7 | 
 8 | import java.io.{File,FileReader}
 9 | import com.github.tototoshi.csv._
10 | import com.github.martincooper.datatable._
11 | import scala.annotation.tailrec
12 | import scala.util.Try
13 | 
14 | object StringCol
15 | 
16 | object DatatableTest {
17 | 
18 |   def readCsv(name: String, file: FileReader, colTypes: Map[String,Object]): DataTable = {
19 |     val reader=CSVReader.open(file)
20 |     val all=reader.allWithHeaders()
21 |     reader.close()
22 |     val ks=colTypes.keys
23 |     val colSet=ks map {key => (key,all map {row => row(key)}) }
24 |     val dataCols=colSet map {pair => colTypes(pair._1) match { 
25 |       case StringCol => new DataColumn[String](pair._1,pair._2)
26 |       case Int       => new DataColumn[Int](pair._1,pair._2 map {x=>
27 |                                         Try(x.toInt).toOption.getOrElse(-99)})
28 |       case Double    => new DataColumn[Double](pair._1,pair._2 map {x=>
29 |                                         Try(x.toDouble).toOption.getOrElse(-99.0)})
30 |       } 
31 |     }
32 |     DataTable(name,dataCols).get
33 |   }
34 | 
35 |   def writeCsv(df: DataTable,out: File): Unit = {
36 |     val writer = CSVWriter.open(out)
37 |     writer.writeRow(df.columns.map{_.name})
38 |     df.foreach{r=>writer.writeRow(r.values)}
39 |     writer.close()
40 |   }
41 | 
42 | 
43 |   def main(args: Array[String]) = {
44 | 
45 |     val colTypes=Map("DriveTrain" -> StringCol, 
46 |                      "Min.Price" -> Double, 
47 |                      "Cylinders" -> Int, 
48 |                      "Horsepower" -> Int, 
49 |                      "Length" -> Int, 
50 |                      "Make" -> StringCol, 
51 |                      "Passengers" -> Int, 
52 |                      "Width" -> Int, 
53 |                      "Fuel.tank.capacity" -> Double, 
54 |                      "Origin" -> StringCol, 
55 |                      "Wheelbase" -> Int, 
56 |                      "Price" -> Double, 
57 |                      "Luggage.room" -> Double, 
58 |                      "Weight" -> Int, 
59 |                      "Model" -> StringCol, 
60 |                      "Max.Price" -> Double, 
61 |                      "Manufacturer" -> StringCol, 
62 |                      "EngineSize" -> Double, 
63 |                      "AirBags" -> StringCol, 
64 |                      "Man.trans.avail" -> StringCol, 
65 |                      "Rear.seat.room" -> Double, 
66 |                      "RPM" -> Int, 
67 |                      "Turn.circle" -> Double, 
68 |                      "MPG.highway" -> Int, 
69 |                      "MPG.city" -> Int, 
70 |                      "Rev.per.mile" -> Int, 
71 |                      "Type" -> StringCol)
72 |     val df=readCsv("Cars93",new FileReader("../r/cars93.csv"),colTypes)
73 |     println(df.length,df.columns.length)
74 | 
75 |     val df2=df.filter(row=>row.as[Double]("EngineSize")<=4.0).toDataTable
76 |     println(df2.length,df2.columns.length)
77 | 
78 |     val oldCol=df2.columns("Weight").as[Int]
79 |     val newCol=new DataColumn[Double]("WeightKG",oldCol.data.map{_.toDouble*0.453592})
80 |     val df3=df2.columns.add(newCol).get
81 |     println(df3.length,df3.columns.length)
82 | 
83 |     writeCsv(df3,new File("out.csv"))
84 | 
85 |     //println("Done")
86 |   }
87 | 
88 | 
89 | 
90 | }
91 | 
92 | 
93 | 
94 | 
95 | 


--------------------------------------------------------------------------------
/exercises/option/src/test/scala/option-test.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 | ex2-test.scala
  3 | 
  4 | Tests for Exercise 2
  5 | 
  6 |  */
  7 | 
  8 | import org.scalatest._
  9 | import org.scalatest.Matchers._
 10 | 
 11 | class PartA extends FlatSpec {
 12 | 
 13 |   "1+2" should "=3" in {
 14 |     assert(1 + 2 === 3)
 15 |   }
 16 | 
 17 |   val tol = 1.0e-8
 18 | 
 19 |   def approxEq(test: Double, should: Double): Boolean = {
 20 |     if (math.abs(test - should) < tol) true else {
 21 |       println("approxEq test failed: found " + test + " but expected " + should + " with tolerance " + tol)
 22 |       false
 23 |     }
 24 |   }
 25 | 
 26 |   "1.0 " should "approxEq 1.0" in {
 27 |     assert(approxEq(1.0, 1.0))
 28 |   }
 29 | 
 30 |   import OptionBisect._
 31 | 
 32 |   "findRootOpt(-10.0,10.0)(x => x+1.0)" should "= Some(-1.0)" in {
 33 |     assert(approxEq(findRootOpt(-10.0, 10.0)(x => x + 1.0).getOrElse(0.0), -1.0))
 34 |   }
 35 | 
 36 |   "findRootOpt(-5.0, 10.0)(x => 2.0 - x)" should "= Some(2.0)" in {
 37 |     assert(approxEq(findRootOpt(-5.0, 10.0)(x => 2.0 - x).getOrElse(0.0), 2.0))
 38 |   }
 39 | 
 40 |   "findRootOpt(0.0, 5.0)(x => x - 1.0)" should "= Some(1.0)" in {
 41 |     assert(approxEq(findRootOpt(0.0, 5.0)(x => x - 1.0).getOrElse(0.0), 1.0))
 42 |   }
 43 | 
 44 |   "findRootOpt(0.0, 2.0)(x => (x + 1.0) * (x - 1.0))" should "= Some(1.0)" in {
 45 |     assert(approxEq(findRootOpt(0.0, 2.0)(x => (x + 1.0) * (x - 1.0)).getOrElse(0.0), 1.0))
 46 |   }
 47 | 
 48 |   "findRootOpt(-2.0, 0.0)(x => (x + 1.0) * (x - 1.0))" should "= Some(-1.0)" in {
 49 |     assert(approxEq(findRootOpt(-2.0, 0.0)(x => (x + 1.0) * (x - 1.0)).getOrElse(0.0), -1.0))
 50 |   }
 51 | 
 52 |   "findRootOpt(0.0, 2.0)(x => x * x - 2.0)" should "= Some(math.sqrt(2.0))" in {
 53 |     assert(approxEq(findRootOpt(0.0, 2.0)(x => x * x - 2.0).getOrElse(0.0), math.sqrt(2.0)))
 54 |   }
 55 | 
 56 |   "findRootOpt(2.0,0.0)(x => x-1.0)" should "= None" in {
 57 |     assert(findRootOpt(2.0, 0.0)(x => x - 1.0) == None)
 58 |   }
 59 | 
 60 |   "findRootOpt(-1.0,-3.0)(x => x+2.0)" should "= None" in {
 61 |     assert(findRootOpt(-1.0, -3.0)(x => x + 2.0) == None)
 62 |   }
 63 | 
 64 |   "findRootOpt(0.0,2.0)(x => x+1.0)" should "= None" in {
 65 |     assert(findRootOpt(0.0, 2.0)(x => x + 1.0) == None)
 66 |   }
 67 | 
 68 |   "findRootOpt(0.0,2.0)(x => x-5.0)" should "= None" in {
 69 |     assert(findRootOpt(0.0, 2.0)(x => x - 5.0) == None)
 70 |   }
 71 | 
 72 | }
 73 | 
 74 | class PartB extends FlatSpec {
 75 | 
 76 |   "1+2" should "=3" in {
 77 |     assert(1 + 2 === 3)
 78 |   }
 79 | 
 80 |   val tol = 1.0e-8
 81 | 
 82 |   def approxEq(test: Double, should: Double): Boolean = {
 83 |     if (math.abs(test - should) < tol) true else {
 84 |       println("approxEq test failed: found " + test + " but expected " + should + " with tolerance " + tol)
 85 |       false
 86 |     }
 87 |   }
 88 | 
 89 |   "1.0 " should "approxEq 1.0" in {
 90 |     assert(approxEq(1.0, 1.0))
 91 |   }
 92 | 
 93 |   import OptionBisect._
 94 | 
 95 |   def testX(a: Double, x: Double): Boolean = {
 96 |     val y = a * x * x
 97 |     approxEq(x * x + y * y, 1.0)
 98 |   }
 99 | 
100 |   "solveQuad(0.1)" should "work" in {
101 |     assert(testX(0.1, solveQuad(0.1).getOrElse(0.0)))
102 |   }
103 | 
104 |   "solveQuad(1.0)" should "work" in {
105 |     assert(testX(1.0, solveQuad(1.0).getOrElse(0.0)))
106 |   }
107 | 
108 |   "solveQuad(10.0)" should "work" in {
109 |     assert(testX(10.0, solveQuad(10.0).getOrElse(0.0)))
110 |   }
111 | 
112 |   "solveQuad(0.01)" should "work" in {
113 |     assert(testX(0.01, solveQuad(0.01).getOrElse(0.0)))
114 |   }
115 | 
116 | 
117 | 
118 | }
119 | 
120 | /* eof */
121 | 
122 | 


--------------------------------------------------------------------------------
/exercises/Stats.md:
--------------------------------------------------------------------------------
 1 | # Statistical modelling
 2 | 
 3 | ## Practical exercises
 4 | 
 5 | You should selectively choose from this collection of exercises according to your personal interests.
 6 | 
 7 | ### 1. Linear regression modelling
 8 | 
 9 | * Run the [regression example](../examples/C6-Regression/) for the [yacht hydrodynamics dataset](http://archive.ics.uci.edu/ml/datasets/Yacht+Hydrodynamics), and go through the code carefully to understand exactly how it works. 
10 | * When you are happy with it, make a copy and edit it to do a regression analysis for the [airfoil self-noise dataset](http://archive.ics.uci.edu/ml/datasets/Airfoil+Self-Noise). Which variables are significant for predicting scaled sound pressure?
11 | 
12 | ### 2. IRLS code optimisation
13 | 
14 | * Make sure you can run the logistic regression example from the notes using the simple IRLS function that was provided.
15 | * The IRLS function is illustrative rather than efficient. There are many ways in which the code could be made more efficient. We will start with the weight matrix, `W`. This is an `n` x `n` matrix, which is bad-news if `n` is large. But it's diagonal, so it could easily be represented by an `n`-vector. Modify the code to make `W` a vector rather than a matrix, and check it gives the same results as the previous version. Time it on some big problems to see if it's perceptibly faster.
16 | * (optional) Google the efficient implementation of IRLS (using QR decomposition), and implement it. Check it works and that it's faster.
17 | 
18 | ### 3. Scala-Glm library
19 | 
20 | I've created a small library for fitting linear and generalised linear models, based on the code examples from this course. See the [scala-glm](https://github.com/darrenjw/scala-glm) repo for further details.
21 | 
22 | * Try it out and make sure you know how to use it.
23 | * Once you have figured out how it works, take some time to browse the source code. This is a small library with a relatively simple structure. It serves as an example of how to create a small library with a few source files and a few test files. It is a little bit bigger than the very small examples we have been focussing on in this course, but a lot smaller than a large library like Breeze, which can be a bit daunting at first.
24 | * Look at how I've (re-)structured the GLM code, and how I've implemented the IRLS algorithm.
25 | 
26 | ### 4. Smile
27 | 
28 | [Smile](http://haifengl.github.io/) has lots of functionality relating to EDA, statistical modelling and machine learning, and can be used as a library from Scala. It's probably worth figuring out how to use it. I have an [example project](../examples/C6-Smile/) to show how to use it for a the yacht hydrodynamics linear regression example.
29 | 
30 | * Run this example, and study the code to make sure you know how it works.
31 | * Adapt the code to analyse the airfoil self-noise data (from Exercise 1).
32 | * Write code to fit a logistic regression model to some simulated/synthetic data, and make sure that your Smile model recovers the true values used to simulate the data to a reasonable degree.
33 | * Try downloading and running Smile as a standalone piece of software, using the Smile shell/REPL.
34 | 
35 | ### 5. Rainier
36 | 
37 | If you want to go beyond simple statistical models, then a library for MCMC-based Bayesian hierarchical modelling is highly desirable. [Rainier](https://rainier.fit/) is an HMC-based Scala library, that is very useful for fitting random and mixed effects models in Scala. I have an [example project](../examples/C6-Rainier/) to show how to use it for a simple Bayesian logistic regression model.
38 | 
39 | * Run this example, and study the code to make sure you know how it works.
40 | * Work through the Rainier docs tutorial, replicating the examples in the REPL. The `sbt console` associated with the above example project should be suitable for this.
41 | 
42 | 
43 | 
44 | #### eof
45 | 
46 | 


--------------------------------------------------------------------------------
/exercises/Tools.md:
--------------------------------------------------------------------------------
 1 | # Tools
 2 | 
 3 | ## Practical exercises
 4 | 
 5 | Again, choose selectively from these exercises according to interests and your previous selections.
 6 | 
 7 | ### 1. ScalaDoc
 8 | 
 9 | * Go back to your linear regression example from the Chapter 6 exercises, and add ScalaDoc documentation to the `backSolve` method and `Lm` case class. Generate HTML documentation and check it with your web browser.
10 | 
11 | ### 2. Testing
12 | 
13 | * Continuing with the same example, add some ScalaTest unit tests. For testing `backsolve`, just add a couple of tests using some simple 2x2 examples picked by hand.
14 | * For testing `Lm`, start by testing it with two or three points on a known straight line.
15 | * Try adding some property-based tests to your code, using ScalaCheck.
16 | 
17 | ### 3. Interfacing with R
18 | 
19 | * One way we could check that our logistic regression code is working as it should would be to read in or simulate a fairly small dataset and fit it with our code, then send the dataset to R and re-fit it with the `glm` function in R. Then bring the fitted coefficients back to Scala for comparison. Take a look at the tests for the [scala-glm](https://github.com/darrenjw/scala-glm/) library, which uses exactly this strategy.
20 | * If you are using simulated data, you could easily loop this to check for agreement on a range of small simulated datasets (ideally using ScalaCheck).
21 | 
22 | ### 4. Interfacing with Python
23 | 
24 | Not covered in the course, but it seems that calling Python from Scala is covered by the [ScalaPy](https://github.com/shadaj/scalapy) library. Calling Python machine learning libraries from Scala seems to be a standard use-case. Calling Scala from Python is less obvious. [pySpark](https://spark.apache.org/docs/latest/api/python/) uses [py4j](https://www.py4j.org/), which is a library for calling Java from Python, so that is probably as good a solution as any.
25 | 
26 | * See if you can figure out how to call Python from Scala.
27 | * If you get it working, see if you can call figure out how to call a [scikit-learn](https://scikit-learn.org/) function from Scala.
28 | 
29 | ### 5. EvilPlot
30 | 
31 | [EvilPlot](https://cibotech.github.io/evilplot/) is a nice library for generating high-quality plots and charts using Scala. I have an [example project](../examples/C7-EvilPlot/) which shows how to use it to generate a range of plots and charts, based mainly on examples from the EvilPlot documentation.
32 | 
33 | * Run the example project, and inspect the code to see how it works
34 | * Read through some of the EvilPlot documentation
35 | * Produce some nice charts and plots for one or more of the examples you have previously considered, such as a regression model, but previously charted using breeze-viz.
36 | 
37 | ### 6. Mdoc
38 | 
39 | [Mdoc](https://scalameta.org/mdoc/) is a great framework for documenting libraries and workflows using executable Scala code blocks within Markdown documents. A couple of the examples we have already seen had some mdoc documentation associated with them.
40 | 
41 | * The [Smile example](../examples/C6-Smile/) has an mdoc document in `docs`, and the `mdoc` sbt task compiles this, and puts generated Markdown in `target/mdoc`. Make sure you know how it works.
42 | * The [Rainier example](../examples/C6-Rainier/) has an mdoc document as well. Note that Rainier has built-in support for generating EvilPlot figures, and hooks for including these in mdoc documents and Jupyter notebooks. The mdoc document associated with this example illustrates how to use this functionality to embed Rainier EvilPlot figures into a mdoc document. Study it to see how it works. Note that Rainier acheives this by making use of mdoc PostModifier hooks - you can read more about those [here](https://scalameta.org/mdoc/docs/modifiers.html#postmodifier).
43 | * Add some Mdoc tutorial documentation to one of the examples you have developed during this course, in order to document your workflow.
44 | 
45 | 
46 | 
47 | 
48 | #### eof
49 | 


--------------------------------------------------------------------------------
/Ensime.md:
--------------------------------------------------------------------------------
 1 | # Installing Ensime
 2 | 
 3 | ## Useful links
 4 | 
 5 | Some useful links for using Emacs and Ensime with sbt:
 6 | 
 7 | * [Ensime](http://ensime.org/)
 8 |   * [Learning Emacs](http://ensime.org/editors/emacs/learning)
 9 |   * [Installing with Emacs](http://ensime.org/editors/emacs/install/)
10 |   * [Sbt plugin for Ensime](http://ensime.org/build_tools/sbt/)
11 |   * [Emacs Ensime User Guide](http://ensime.org/editors/emacs/userguide/)
12 | 
13 | ## Installation
14 | 
15 | I am assuming that you are already familar with Emacs and have it installed on your system. If this is not the case, I recommend using the [Scala IDE](ScalaIDE.md) for the short course, as Emacs has a fairly steep learning curve. You can always investigate Emacs and Ensime later once you are more familiar with Scala.
16 | 
17 | Ensime is installed using [MELPA](http://melpa.org/) - the Emacs package archive. If you don't currently use MELPA, you must first enable it by copying a snippet of code like:
18 | ```lisp
19 | ;; MELPA package manager
20 | (require 'package)
21 | (setq
22 |  package-archives '(("gnu" . "http://elpa.gnu.org/packages/")
23 |                     ("org" . "http://orgmode.org/elpa/")
24 |                     ("melpa" . "http://melpa.org/packages/")
25 |                     ("melpa-stable" . "http://stable.melpa.org/packages/"))
26 |  package-archive-priorities '(("melpa-stable" . 1)))
27 | 
28 | (package-initialize)
29 | (when (not package-archive-contents)
30 |   (package-refresh-contents)
31 |   (package-install 'use-package))
32 | (require 'use-package)
33 | ```
34 | into your `.emacs` or `.emacs.d/init.el` file. Try restarting Emacs and check there are no errors. If for some reason this doesn't work, you could try adding the snippet:
35 | ```lisp
36 | (unless (package-installed-p 'use-package)
37 |   (package-refresh-contents)
38 |   (package-install 'use-package))
39 | ```
40 | immediately before the final line. See the [Learning Emacs](http://ensime.org/editors/emacs/learning) page for further details.
41 | 
42 | 
43 | 
44 | Once you have MELPA set up, installing Ensime should be as simple as copying the snippet:
45 | ```lisp
46 | (use-package ensime
47 |   :ensure t
48 |   :pin melpa)
49 | ```
50 | to the end of your init file and restarting Emacs, but see the [Installing with Emacs](http://ensime.org/editors/emacs/install/) page for further details.
51 | 
52 | To use Ensime with sbt, you also need to install the Ensime plugin for sbt. This should be as simple as adding the line:
53 | ```scala
54 | addSbtPlugin("org.ensime" % "sbt-ensime" % "1.12.6")
55 | ```
56 | to your `~/.sbt/0.13/plugins/plugins.sbt` file. Create this file if you don't already have it. It's also a good idea to add the lines:
57 | ```scala
58 | import org.ensime.EnsimeCoursierKeys._
59 | ensimeServerVersion in ThisBuild := "2.0.0-SNAPSHOT"
60 | ```
61 | to your `~/sbt/0.13/global.sbt` file (again, create it if you don't have it). See the [Sbt plugin for Ensime](http://ensime.org/build_tools/sbt/) page for further details.
62 | 
63 | ## Using Ensime
64 | 
65 | The main thing to understand is that Ensime needs to know about the structure of your sbt project. This information is encoded in a file `.ensime` in the top-level directory of your sbt project (where the file `build.sbt` will often be present). An initial `.ensime` file for an sbt project can be generated using the `ensimeConfig` sbt task provided by the `sbt-ensime` plugin.
66 | 
67 | So, before using Emacs/Ensime with a particular sbt project for the first time, first run
68 | ```bash
69 | sbt ensimeConfig
70 | ```
71 | to analyse the project and create a `.ensime` file for it. You should probably re-run this after editing `build.sbt` or other build configuration files. Then start emacs with a command like `emacs src/main/scala/blah/*.scala &`. This will start up emacs and some basic syntax highlighting will be provided by `scala-mode`. However, you still need to start up Ensime with `M-x ensime`. Once you are up-and-running, Ensime provides fairly sophisticated IDE functionality. Some commonly used commands include:
72 | 
73 | * M-x ensime - Start up Ensime
74 | * C-c C-v d - Scaladoc for symbol at cursor
75 | * C-c C-v f - Reformat source code in this buffer
76 | * C-c C-b c - sbt compile
77 | * C-c C-b r - sbt run
78 | 
79 | See the [Emacs Ensime User Guide](http://ensime.org/editors/emacs/userguide/) for further details.
80 | 
81 | 
82 | 
83 | #### eof
84 | 
85 | 


--------------------------------------------------------------------------------
/examples/C6-Regression/src/main/scala/regression.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 | regression.scala
  3 | 
  4 | Linear regression for the dataset:
  5 | 
  6 | http://archive.ics.uci.edu/ml/datasets/Yacht+Hydrodynamics
  7 | 
  8 | from the Machine learning repository: 
  9 | 
 10 | http://archive.ics.uci.edu/ml/datasets.html
 11 | 
 12 | */
 13 | 
 14 | import breeze.linalg._
 15 | import com.github.fommil.netlib.BLAS.{ getInstance => blas }
 16 | 
 17 | object Regression {
 18 | 
 19 |   def backSolve(A: DenseMatrix[Double],
 20 |     y: DenseVector[Double]): DenseVector[Double] = {
 21 |     val yc = y.copy
 22 |     blas.dtrsv("U", "N", "N", A.cols, A.toArray,
 23 |       A.rows, yc.data, 1)
 24 |     yc
 25 |   }
 26 | 
 27 |   case class Lm(y: DenseVector[Double],
 28 |     X: DenseMatrix[Double], names: List[String]) {
 29 |     require(y.size == X.rows)
 30 |     require(names.length == X.cols)
 31 |     require(X.rows >= X.cols)
 32 |     val QR = qr.reduced(X)
 33 |     val q = QR.q
 34 |     val r = QR.r
 35 |     val qty = q.t * y
 36 |     val coefficients = backSolve(r, qty)
 37 |     import breeze.stats._
 38 |     import org.apache.commons.math3.special.Beta
 39 |     def tCDF(t: Double, df: Double): Double = {
 40 |       val xt = df / (t * t + df)
 41 |       1.0 - 0.5 * Beta.regularizedBeta(xt, 0.5 * df, 0.5)
 42 |     }
 43 |     def fCDF(x: Double, d1: Double, d2: Double) = {
 44 |       val xt = x * d1 / (x * d1 + d2)
 45 |       Beta.regularizedBeta(xt, 0.5 * d1, 0.5 * d2)
 46 |     }
 47 |     lazy val fitted = q * qty
 48 |     lazy val residuals = y - fitted
 49 |     lazy val n = X.rows
 50 |     lazy val pp = X.cols
 51 |     lazy val df = n - pp
 52 |     lazy val rss = sum(residuals ^:^ 2.0)
 53 |     lazy val rse = math.sqrt(rss / df)
 54 |     lazy val ri = inv(r)
 55 |     lazy val xtxi = ri * (ri.t)
 56 |     lazy val se = breeze.numerics.sqrt(diag(xtxi)) * rse
 57 |     lazy val t = coefficients / se
 58 |     lazy val p = t.map { 1.0 - tCDF(_, df) }.map { _ * 2 }
 59 |     lazy val ybar = mean(y)
 60 |     lazy val ymyb = y - ybar
 61 |     lazy val ssy = sum(ymyb ^:^ 2.0)
 62 |     lazy val rSquared = (ssy - rss) / ssy
 63 |     lazy val adjRs = 1.0 - ((n - 1.0) / (n - pp)) * (1 - rSquared)
 64 |     lazy val k = pp - 1
 65 |     lazy val f = (ssy - rss) / k / (rss / df)
 66 |     lazy val pf = 1.0 - fCDF(f, k, df)
 67 |     def summary: Unit = {
 68 |       println(
 69 |         "Estimate\t S.E.\t t-stat\tp-value\t\tVariable")
 70 |       println(
 71 |         "---------------------------------------------------------")
 72 |       (0 until pp).foreach(i => printf(
 73 |         "%8.4f\t%6.3f\t%6.3f\t%6.4f %s\t%s\n",
 74 |         coefficients(i), se(i), t(i), p(i),
 75 |         if (p(i) < 0.05) "*" else " ",
 76 |         names(i)))
 77 |       printf(
 78 |         "\nResidual standard error: %8.4f on %d degrees of freedom\n",
 79 |         rse, df)
 80 |       printf(
 81 |         "Multiple R-squared: %6.4f, Adjusted R-squared: %6.4f\n",
 82 |         rSquared, adjRs)
 83 |       printf(
 84 |         "F-statistic: %6.4f on %d and %d DF, p-value: %6.5f\n\n",
 85 |         f, k, df, pf)
 86 |     }
 87 |   }
 88 | 
 89 |   // Main runner method
 90 |   def main(args: Array[String]): Unit = {
 91 | 
 92 |     val url = "http://archive.ics.uci.edu/ml/machine-learning-databases/00243/yacht_hydrodynamics.data"
 93 |     val fileName = "yacht.csv"
 94 | 
 95 |     // download the file to disk if it hasn't been already
 96 |     val file = new java.io.File(fileName)
 97 |     if (!file.exists) {
 98 |       val s = new java.io.PrintWriter(file)
 99 |       val data = scala.io.Source.fromURL(url).getLines
100 |       data.foreach(l => s.write(l.trim.split(' ').filter(_ != "").mkString("",",","\n")))
101 |       s.close
102 |     }
103 | 
104 |     // read the file from disk
105 |     val mat = csvread(new java.io.File(fileName))
106 |     println("Dim: " + mat.rows + " " + mat.cols)
107 |     val y = mat(::, 6) // response is the final column
108 |     val x = mat(::, 0 to 5)
109 |     // first fit without an intercept
110 |     Lm(y,x,List("LongPos","PrisCoef","LDR","BDR","LBR","Froude")).summary
111 |     // add an intercept and re-fit
112 |     val X = DenseMatrix.horzcat(
113 |       DenseVector.ones[Double](x.rows).toDenseMatrix.t,x)
114 |     val mod = Lm(y,X,List("(Intercept)","LongPos","PrisCoef","LDR","BDR","LBR","Froude"))
115 |     mod.summary
116 | 
117 |   } // main
118 | 
119 | 
120 | }
121 | 
122 | // eof
123 | 
124 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Scala for Statistical Computing and Data Science Short Course
 2 | 
 3 | **I occasionally run this course in-house for companies - [email me](mailto:darrenjwilkinson@btinternet.com) if your company is interested in this. Also note that I run an advanced course on [Category theory for pure FP in Scala](https://github.com/darrenjw/fps-course)**
 4 | 
 5 | *Registered course participants should bookmark the [Start Here](StartHere.md) page. Please carefully follow the [laptop setup instructions](Setup.md) in advance of the start of the course.*
 6 | 
 7 | ## Outline course description
 8 | 
 9 | This course is aimed at statisticians and data scientists already familiar with a dynamic programming language (such as R, Python or Octave) who would like to learn how to use [Scala](http://www.scala-lang.org/). Scala is a free modern, powerful, strongly-typed, functional programming language, well-suited to statistical computing and data science applications. In particular, it is fast and efficient, runs on the Java virtual machine (JVM), and is designed to easily exploit modern multi-core and distributed computing architectures.
10 | 
11 | The course will begin with an introduction to the Scala language and basic concepts of [functional programming](https://en.wikipedia.org/wiki/Functional_programming) (FP), as well as essential Scala tools such as [SBT](http://www.scala-sbt.org/) for managing builds and library dependencies. The course will continue with an overview of the [Scala collections library](http://docs.scala-lang.org/overviews/collections/overview.html), including [parallel collections](http://docs.scala-lang.org/overviews/parallel-collections/overview.html), and we will see how parallel collections enable trivial parallelisation of many statistical computing algorithms on multi-core hardware. We will next survey the wider Scala library ecosystem, paying particular attention to [Breeze](https://github.com/scalanlp/breeze), the Scala library for scientific computing and numerical linear algebra. We will see how to exploit non-uniform random number generation and matrix computations in Breeze for statistical applications. Both maximum-likelihood and simulation-based Bayesian statistical inference algorithms will be considered. Much of the final day will be dedicated to understanding [Apache Spark](http://spark.apache.org/), the distributed Big Data analytics platform for Scala. We will understand how Spark relates to the parallel collections we have already examined, and see how it can be used not only for the processing of very large data sets, but also for the parallel and distributed analysis of large or otherwise computationally-intensive models. As time permits, we will discuss more [advanced FP concepts](https://typelevel.org/cats/), such as typeclasses, higher-kinded types, monoids, functors, monads, applicatives, streams and streaming data, and see how these enable the development of flexible, scalable, generic code in strongly-typed functional languages.
12 | 
13 | #### Prerequisite
14 | 
15 | The course assumes a basic familiarity with essential concepts in statistical computing, as well as some basic programming experience. It is assumed that participants will be familiar with writing their own functions in a language such as R, including essential control structures such as "for-loops" and "if-statements". The course is not suitable for people completely new to programming. However, no prior knowledge of Scala or functional programming is assumed. All participants will be expected to bring their own (multi-core) laptop and to have a recent version of Java pre-installed. Other set-up instructions will be provided in advance to registered participants.
16 | 
17 | #### Course structure
18 | 
19 | The course will be delivered through a combination of lectures, live demos and hands-on practical sessions. For the practical sessions, participants will be expected to actively engage with the material, run demos, follow examples, and write code to solve simple problems.
20 | 
21 | #### Presenters
22 | 
23 | The course will be delivered by [Prof Darren Wilkinson](https://darrenjw.github.io/) (Newcastle University, U.K.). Prof Wilkinson is co-Director of Newcastle's [EPSRC Centre for Doctoral Training in Cloud Computing for Big Data](http://www.bigdata-cdt.ac.uk/), and a [Turing Fellow](https://www.turing.ac.uk/people/researchers/darren-wilkinson). He is a well-known expert in computational Bayesian statistics and a leading proponent of the use of strongly-typed FP languages (such as Scala) for scalable statistical computing.
24 | 
25 | 
26 | 


--------------------------------------------------------------------------------
/exercises/Advanced.md:
--------------------------------------------------------------------------------
 1 | # Advanced topics
 2 | 
 3 | ## Practical exercises
 4 | 
 5 | Start off with exercise 1 (Cats), then pick and choose according to your interests.
 6 | 
 7 | ### 1. Playing with Cats
 8 | 
 9 | * [Cats](http://typelevel.org/cats/) is one of many useful libraries that we haven't had time to explore properly in this short course. [Scala exercises](https://www.scala-exercises.org/) has some [Cats exercises](https://www.scala-exercises.org/cats) which are worth working through to learn a little about how it works.
10 | 
11 | ### 2. Simulacrum for typeclass programming
12 | 
13 | * [Simulacrum](https://github.com/typelevel/simulacrum) is another useful library for FP in Scala. Read about how it works and then re-do the `CsvRow` and `Thinnable` typeclass examples from the notes using Simulacrum. Note how much cleaner they are. Note that Cats has a dependence on Simulacrum, so if you have a project or REPL with a Cats dependency you *may* not need to add an additional dependence on Simulacrum. However, you *do* need to enable the "macro paradise" compiler plugin, by adding the line
14 | ```scala
15 | addCompilerPlugin("org.scalamacros" % "paradise" % "2.1.0" cross CrossVersion.full)
16 | ```
17 | to your `build.sbt` file. The `sbt-test` example project is set up to allow experimenting with both Simulacrum and Cats from the REPL.
18 | 
19 | ### 3. Monocle
20 | 
21 | [Monocle](https://julien-truffaut.github.io/Monocle/) is an *optics* library for Scala, intended to make it easier to work with immutable data structures based on (nested) algebraic data types (ADTs).
22 | 
23 | * Work through the Getting started guide to get a bit of a feel for the problem that the library solves. If it seems interesting, continue to work through the rest of the documentation.
24 | 
25 | ### 4. Monix
26 | 
27 | [Monix](https://monix.io/) is a library for asyncronous and concurrent programming in Scala, including stream-based functional reactive programming (FRP), using a datatype known as `Observable`. It is one of many options for working with (real time) data streams in Scala. It also contains `Task`, which is a much better version of Scala's `Future` monad.
28 | 
29 | * Start working through the documentation for [Observable](https://monix.io/docs/3x/reactive/observable.html), and then investigate further if the library seems interesting.
30 | 
31 | ### 5. Frameless
32 | 
33 | [Frameless](https://typelevel.org/frameless/) is a library which provides a safter, more idiomatic, Scala interface to Spark. If you intend to work a lot with Spark, it is worth trying to understand this library and the potential benefits it can bring.
34 | * Start by reading through the Introduction, then continue with the library documentation, learning first about `TypedDataset`.
35 | 
36 | ### 6. Probabilistic programming with Figaro
37 | 
38 | * [Figaro](https://github.com/p2t2/figaro) is a library for probabilistic programming in Scala. Use the remaining time to read through the [Quick start guide](https://github.com/p2t2/figaro/raw/master/doc/Figaro%20Quick%20Start%20Guide.pdf) and then skim the [Tutorial](https://www.cra.com/sites/default/files/pdf/Figaro_Tutorial.pdf). Try to build and run the example from the quick start guide, noting that the examples can be found [here](https://github.com/p2t2/figaro/tree/master/FigaroExamples/src/main/scala/com/cra/figaro/example).
39 | * Note that from a clean SBT session (say, run from an empty/temp directory), a REPL with a Figaro dependency can be started with:
40 | ```scala
41 | set libraryDependencies += "com.cra.figaro" %% "figaro" % "5.0.0.0"
42 | set scalaVersion := "2.12.10"
43 | console
44 | ```
45 | 
46 | ### 7. Scala.js
47 | 
48 | [Scala.js](https://www.scala-js.org/) is a framework for compiling Scala code to Javascript for client-side execution in web applications. If you do any front-end work, Scala.ja is one of the nicest ways to develop web applications. Many of the libraries we have considered, including EvilPlot, Cats, Simulacrum, Monocle, and Monix, are available for Scala.js as well as the usual JVM version of Scala. This makes it very easy to develop web applications which share code between the front and the back-end. Some of the web sites we have used in this course, such as [scala-fiddle](https://scalafiddle.io/) and [scala-exercises](https://www.scala-exercises.org/) are powered by Scala.js. It is particularly useful for developing web-based interactive dashboards for data science applications.
49 | * Try one of the [tutorials](https://www.scala-js.org/doc/tutorial/) to get started
50 | 
51 | ### 8. Scala-native
52 | 
53 | [Scala-native](https://github.com/scala-native/scala-native) is a framework for compiling Scala to native code, rather than JVM bytecode. This is useful for systems programming, for interfacing with C libraries, and for developing lightweight command-line tools. Although many people imagine that Scala code compiled to native code will execute much faster than JVM bytecode, this is typically not the case, and is certainly not the main intended use of Scala-native. See the [documentation](https://www.scala-native.org/) for further details.
54 | 
55 | 
56 | #### eof
57 | 
58 | 


--------------------------------------------------------------------------------
/examples/C9-ScalablePF/src/main/scala/pfilter/pfilter.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 | pfilter.scala
  3 | 
  4 | Top level code for pfilter blog post
  5 | 
  6 |  */
  7 | 
  8 | package pfilter
  9 | 
 10 | object PFilter {
 11 | 
 12 |   import scala.language.higherKinds
 13 |   import scala.collection.parallel.immutable.ParVector
 14 |   import scala.collection.GenTraversable
 15 | 
 16 |   // Hardcode LogLik type
 17 |   type LogLik = Double
 18 |   // Use blank typeclasses for State, Observation, and Parameter
 19 |   trait State[T]
 20 |   trait Observation[T]
 21 |   trait Parameter[T]
 22 | 
 23 |   // My generic collection typeclass
 24 |   trait GenericColl[C[_]] {
 25 |     def map[A, B](ca: C[A])(f: A => B): C[B]
 26 |     def reduce[A](ca: C[A])(f: (A, A) => A): A
 27 |     def flatMap[A, B, D[B] <: GenTraversable[B]](ca: C[A])(f: A => D[B]): C[B]
 28 |     def zip[A, B](ca: C[A])(cb: C[B]): C[(A, B)]
 29 |     def length[A](ca: C[A]): Int
 30 |   }
 31 |   // Syntax for the typeclass
 32 |   implicit class GenericCollSyntax[A, C[A]](value: C[A]) {
 33 |     def map[B](f: A => B)(implicit inst: GenericColl[C]): C[B] = inst.map(value)(f)
 34 |     def reduce(f: (A, A) => A)(implicit inst: GenericColl[C]): A = inst.reduce(value)(f)
 35 |     def flatMap[B, D[B] <: GenTraversable[B]](f: A => D[B])(implicit inst: GenericColl[C]): C[B] = inst.flatMap(value)(f)
 36 |     def zip[B](cb: C[B])(implicit inst: GenericColl[C]): C[(A, B)] = inst.zip(value)(cb)
 37 |     def length(implicit inst: GenericColl[C]): Int = inst.length(value)
 38 |   }
 39 | 
 40 |   // Implementation for Vector
 41 |   implicit val vGC: GenericColl[Vector] = new GenericColl[Vector] {
 42 |     def map[A, B](ca: Vector[A])(f: A => B): Vector[B] = ca map f
 43 |     def reduce[A](ca: Vector[A])(f: (A, A) => A): A = ca reduce f
 44 |     def flatMap[A, B, D[B] <: GenTraversable[B]](ca: Vector[A])(f: A => D[B]): Vector[B] = ca flatMap f
 45 |     def zip[A, B](ca: Vector[A])(cb: Vector[B]): Vector[(A, B)] = ca zip cb
 46 |     def length[A](ca: Vector[A]) = ca.length
 47 |   }
 48 | 
 49 |   // Implementation for ParVector
 50 |   implicit val pvGC: GenericColl[ParVector] = new GenericColl[ParVector] {
 51 |     def map[A, B](ca: ParVector[A])(f: A => B): ParVector[B] = ca map f
 52 |     def reduce[A](ca: ParVector[A])(f: (A, A) => A): A = ca reduce f
 53 |     def flatMap[A, B, D[B] <: GenTraversable[B]](ca: ParVector[A])(f: A => D[B]): ParVector[B] = ca flatMap f
 54 |     def zip[A, B](ca: ParVector[A])(cb: ParVector[B]): ParVector[(A, B)] = ca zip cb
 55 |     def length[A](ca: ParVector[A]) = ca.length
 56 |   }
 57 | 
 58 |   // TODO: Implementation for Spark RDDs
 59 | 
 60 |   // Single step of a bootstrap particle filter
 61 |   def update[S: State, O: Observation, C[_]: GenericColl](
 62 |     dataLik: (S, O) => LogLik, stepFun: S => S
 63 |   )(x: C[S], o: O): (LogLik, C[S]) = {
 64 |     import breeze.stats.distributions.Poisson
 65 |     val xp = x map (stepFun(_))
 66 |     val lw = xp map (dataLik(_, o))
 67 |     val max = lw reduce (math.max(_, _))
 68 |     val rw = lw map (lwi => math.exp(lwi - max))
 69 |     val srw = rw reduce (_ + _)
 70 |     val l = rw.length
 71 |     val z = rw zip xp
 72 |     val rx = z flatMap { case (rwi, xpi) => 
 73 |       Vector.fill(Poisson(rwi * l / srw).draw)(xpi) }
 74 |     (max + math.log(srw / l), rx)
 75 |   }
 76 | 
 77 |   // Run a bootstrap particle filter over a collection of observations
 78 |   def pFilter[S: State, O: Observation, C[_]: GenericColl, D[O] <: GenTraversable[O]](
 79 |     x0: C[S], data: D[O], dataLik: (S, O) => LogLik, stepFun: S => S
 80 |   ): (LogLik, C[S]) = {
 81 |     val updater = update[S, O, C](dataLik, stepFun) _
 82 |     data.foldLeft((0.0, x0))((prev, o) => {
 83 |       val (oll, ox) = prev
 84 |       val (ll, x) = updater(ox, o)
 85 |       (oll + ll, x)
 86 |     })
 87 |   }
 88 | 
 89 |   // Marginal log likelihood estimation
 90 |   def pfMll[S: State, P: Parameter, O: Observation, C[_]: GenericColl, D[O] <: GenTraversable[O]](
 91 |     simX0: P => C[S], stepFun: P => S => S, dataLik: P => (S, O) => LogLik, data: D[O]
 92 |   ): (P => LogLik) = (th: P) => pFilter(simX0(th), data, dataLik(th), stepFun(th))._1
 93 | 
 94 |   // Main method
 95 |   def main(args: Array[String]): Unit = {
 96 |     println("Hi")
 97 |     import Examples._
 98 |     arTest
 99 |     println("Bye")
100 |   }
101 | 
102 | }
103 | 
104 | object Examples {
105 | 
106 |   import PFilter._
107 | 
108 |   // Simple test for an AR(1) model
109 |   def arTest: Unit = {
110 |     import breeze.linalg._
111 |     import breeze.stats.distributions._
112 |     println("AR(1) test start")
113 |     // simulate some data from an AR(1) model with noise
114 |     val inNoise = Gaussian(0.0, 1.0).sample(99)
115 |     val state = DenseVector(inNoise.scanLeft(0.0)((s, i) => 0.8 * s + i).toArray)
116 |     val noise = DenseVector(Gaussian(0.0, 2.0).sample(100).toArray)
117 |     val data = (state + noise).toArray.toList
118 |     import breeze.plot._
119 |     val f = Figure()
120 |     val p0 = f.subplot(0)
121 |     val idx = linspace(1, 100, 100)
122 |     p0 += plot(idx, state)
123 |     p0 += plot(idx, data, '.')
124 |     p0.xlabel = "Time"
125 |     p0.ylabel = "Value"
126 |     // now try to recover autoregression coefficient
127 |     implicit val dState = new State[Double] {}
128 |     implicit val dObs = new Observation[Double] {}
129 |     implicit val dPar = new Parameter[Double] {}
130 |     val mll = pfMll(
131 |       (th: Double) => Gaussian(0.0, 10.0).sample(10000).toVector.par,
132 |       (th: Double) => (s: Double) => Gaussian(th * s, 1.0).draw,
133 |       (th: Double) => (s: Double, o: Double) => Gaussian(s, 2.0).logPdf(o),
134 |       data
135 |     )
136 |     val x = linspace(0.0, 0.99, 100)
137 |     val y = x map (mll(_))
138 |     //println(y)
139 |     val p1 = f.subplot(2, 1, 1)
140 |     p1 += plot(x, y)
141 |     p1.xlabel = "theta"
142 |     p1.ylabel = "mll"
143 |     f.saveas("plot.png")
144 |     println("AR(1) test finish")
145 |   }
146 | 
147 | }
148 | 
149 | // eof
150 | 
151 | 


--------------------------------------------------------------------------------
/examples/C9-ScalablePF/src/test/scala/pfilter-test.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 | pfilter-test.scala
  3 | 
  4 | Test code for pfilter
  5 | 
  6 |  */
  7 | 
  8 | package pfilter
  9 | 
 10 | import org.scalatest._
 11 | import org.scalatest.junit._
 12 | import org.junit.runner.RunWith
 13 | 
 14 | import scala.language.higherKinds
 15 | import PFilter._
 16 | 
 17 | @RunWith(classOf[JUnitRunner])
 18 | class MyTestSuite extends FunSuite {
 19 | 
 20 |   test("1+2=3") {
 21 |     assert(1 + 2 === 3)
 22 |   }
 23 | 
 24 |   // test generic functions to check that the typeclass works as intended
 25 |   def doubleIt[C[_]: GenericColl](ca: C[Int]): C[Int] = ca map (_ * 2)
 26 |   def addThem[C[_]: GenericColl](ca: C[Int]): Int = ca reduce (_ + _)
 27 |   def repeatThem[C[_]: GenericColl](ca: C[Int]): C[Int] = ca flatMap (x => List(x, x, x))
 28 |   def zipThem[C[_]: GenericColl](ci: C[Int], cd: C[Double]): C[(Int, Double)] = ci zip cd
 29 |   def getLength[C[_]: GenericColl](ci: C[Int]): Int = ci.length
 30 | 
 31 |   test("Vector in generic function including map") {
 32 |     val v = Vector(5, 10, 15, 20)
 33 |     val v2 = v map (_ * 2)
 34 |     val v3 = doubleIt(v)
 35 |     assert(v2 === v3)
 36 |   }
 37 | 
 38 |   test("Vector in generic function including flatMap") {
 39 |     val v = Vector(5, 10, 15)
 40 |     val v2 = v flatMap (x => Array(x, x, x))
 41 |     //println(v2)
 42 |     val v3 = repeatThem(v)
 43 |     assert(v2 === v3)
 44 |   }
 45 | 
 46 |   test("Vector in generic function including reduce") {
 47 |     val v = Vector(5, 10, 15)
 48 |     val s = addThem(v)
 49 |     assert(s === 30)
 50 |   }
 51 | 
 52 |   test("Vector in generic zipping function") {
 53 |     val v1 = Vector(1, 2, 3)
 54 |     val v2 = Vector(2.0, 4.0, 6.0)
 55 |     val v3 = v1 zip v2
 56 |     val v4 = zipThem(v1, v2)
 57 |     assert(v4 === v3)
 58 |   }
 59 | 
 60 |   test("Vector in generic length function") {
 61 |     val v1 = Vector(1, 2, 3, 4)
 62 |     val l = getLength(v1)
 63 |     assert(l === 4)
 64 |   }
 65 | 
 66 |   test("ParVector in generic function including map") {
 67 |     val v = Vector(5, 10, 15, 30).par
 68 |     val v2 = v map (_ * 2)
 69 |     //println(v2)
 70 |     val v3 = doubleIt(v)
 71 |     assert(v2 === v3)
 72 |   }
 73 | 
 74 |   test("ParVector in generic function including flatMap") {
 75 |     val v = Vector(5, 10, 15, 10).par
 76 |     val v2 = v flatMap (x => Vector(x, x, x))
 77 |     //println(v2)
 78 |     val v3 = repeatThem(v)
 79 |     assert(v2 === v3)
 80 |   }
 81 | 
 82 |   test("ParVector in generic function including reduce") {
 83 |     val v = Vector(5, 10, 15).par
 84 |     val s = addThem(v)
 85 |     assert(s === 30)
 86 |   }
 87 | 
 88 |   test("ParVector in generic zipping function") {
 89 |     val v1 = Vector(1, 2, 3).par
 90 |     val v2 = Vector(2.0, 4.0, 6.0).par
 91 |     val v3 = v1 zip v2
 92 |     //println(v3)
 93 |     val v4 = zipThem(v1, v2)
 94 |     assert(v4 === v3)
 95 |   }
 96 | 
 97 |   test("ParVector in generic length function") {
 98 |     val v1 = Vector(1, 2, 3, 4).par
 99 |     val l = getLength(v1)
100 |     assert(l === 4)
101 |   }
102 | 
103 |   test("Vector update test") {
104 |     import breeze.stats.distributions.Gaussian
105 |     implicit val dState = new State[Double] {}
106 |     implicit val dObs = new Observation[Double] {}
107 |     val p1 = Gaussian(0.0, 10.0).sample(100000).toVector
108 |     val p2 = update((s: Double, o: Double) => Gaussian(s, 2.0).logPdf(o), (s: Double) => Gaussian(s, 1.0).draw)(p1, 5.0)
109 |     assert(p2._2.length > 90000)
110 |   }
111 | 
112 |   test("ParVector update test") {
113 |     import breeze.stats.distributions.Gaussian
114 |     implicit val dState = new State[Double] {}
115 |     implicit val dObs = new Observation[Double] {}
116 |     val p1 = Gaussian(0.0, 10.0).sample(100000).toVector.par
117 |     val p2 = update((s: Double, o: Double) => Gaussian(s, 2.0).logPdf(o), (s: Double) => Gaussian(s, 1.0).draw)(p1, 5.0)
118 |     assert(p2._2.length > 90000)
119 |   }
120 | 
121 |   test("Vector pFilter test") {
122 |     import breeze.stats.distributions.Gaussian
123 |     implicit val dState = new State[Double] {}
124 |     implicit val dObs = new Observation[Double] {}
125 |     val p1 = Gaussian(0.0, 10.0).sample(100000).toVector
126 |     val pn = pFilter(p1, List(2.0, 2.0, 3.0, 4.0), (s: Double, o: Double) => Gaussian(s, 2.0).logPdf(o), (s: Double) => Gaussian(s, 1.0).draw)
127 |     assert(pn._2.length > 90000)
128 |   }
129 | 
130 |   test("ParVector pFilter test") {
131 |     import breeze.stats.distributions.Gaussian
132 |     implicit val dState = new State[Double] {}
133 |     implicit val dObs = new Observation[Double] {}
134 |     val p1 = Gaussian(0.0, 10.0).sample(100000).toVector.par
135 |     val pn = pFilter(p1, List(2.0, 2.0, 3.0, 4.0), (s: Double, o: Double) => Gaussian(s, 2.0).logPdf(o), (s: Double) => Gaussian(s, 1.0).draw)
136 |     assert(pn._2.length > 90000)
137 |   }
138 | 
139 |   test("Vector pfMll test") {
140 |     import breeze.stats.distributions.Gaussian
141 |     implicit val dState = new State[Double] {}
142 |     implicit val dObs = new Observation[Double] {}
143 |     implicit val dPar = new Parameter[Double] {}
144 |     val mll = pfMll(
145 |       (th: Double) => Gaussian(0.0, 10.0).sample(100000).toVector,
146 |       (th: Double) => (s: Double) => Gaussian(s, 1.0).draw,
147 |       (th: Double) => (s: Double, o: Double) => Gaussian(s, 2.0).logPdf(o),
148 |       List(2.0, 2.0, 3.0, 4.0)
149 |     )
150 |     val ll1 = mll(1.0)
151 |     val ll2 = mll(2.0)
152 |     assert(math.abs(ll1 - ll2) < 0.1)
153 |   }
154 | 
155 |   test("ParVector pfMll test") {
156 |     import breeze.stats.distributions.Gaussian
157 |     implicit val dState = new State[Double] {}
158 |     implicit val dObs = new Observation[Double] {}
159 |     implicit val dPar = new Parameter[Double] {}
160 |     val mll = pfMll(
161 |       (th: Double) => Gaussian(0.0, 10.0).sample(100000).toVector.par,
162 |       (th: Double) => (s: Double) => Gaussian(s, 1.0).draw,
163 |       (th: Double) => (s: Double, o: Double) => Gaussian(s, 2.0).logPdf(o),
164 |       List(2.0, 2.0, 3.0, 4.0)
165 |     )
166 |     val ll1 = mll(1.0)
167 |     val ll2 = mll(2.0)
168 |     assert(math.abs(ll1 - ll2) < 0.1)
169 |   }
170 | 
171 | }
172 | 
173 | // eof
174 | 


--------------------------------------------------------------------------------
/Setup.md:
--------------------------------------------------------------------------------
 1 | # Setup
 2 | 
 3 | ## Setting up your laptop for the course
 4 | 
 5 | It will save time during the course if everyone sets up their laptop with some essential required software in advance. Since Scala runs on the JVM, and the JVM is platform independent, it doesn't really matter what OS is used - in particular, Linux, Windows and Mac should all be fine. The basic requirements are, *in order*:
 6 | 
 7 | * Download and install Java 8 (OpenJDK is fine) - *requires root/administrator access*
 8 | * Download (or clone) this course code repository
 9 | * Download, install and test `sbt`
10 | * Install a Scala-aware editor or IDE
11 | * Download (but don't install) Apache Spark
12 | 
13 | Further information is given below. For avoidance of doubt, I am *not* assuming that you will have done a system-wide installation of Scala or the Scala compiler, and I don't particularly recommend doing so. It is not necessary if you are using `sbt`.
14 | 
15 | ### Download and install Java
16 | 
17 | *This step requires root/administrator access to your laptop, so if you don't have this, you will need help from your system administrator*
18 | 
19 | Versions of Scala prior to 2.12.x worked with Java 6 and Java 7, but the 2.12.x Scala releases require Java 8, as Java 8 introduced a number of features which make it a better target for Scala compilation. I will be using Scala 2.12.10 in the course, so Java 8 is required. A more recent version of Java (eg. Java 11) should also be fine.
20 | 
21 | If you are running a Linux (or similar) system, you may prefer to use the [OpenJDK](http://openjdk.java.net/) rather than Oracle's Java. This is fine. On Debian-based Linux systems (including Ubuntu), this should be as simple as:
22 | ```bash
23 | sudo apt-get update
24 | sudo apt-get -y install openjdk-8-jdk
25 | ```
26 | It should be something similar on other Linux systems. For other OS's, you may want to search the Internet for the best way to install Java on your system. **You should install the full Java development kit (JDK)**, and not just the runtime engine (JRE). If in doubt, try the Oracle Java JDK download page: http://www.oracle.com/technetwork/java/javase/downloads
27 | 
28 | To check whether you have Java installed correctly, type `java -version` into a terminal window. If you get a version number of the form 1.8.x you should be fine.
29 | 
30 | ### Download or clone this course code repository
31 | 
32 | The course code repository is at: https://github.com/darrenjw/scala-course - on the front page there should be a "Clone or download" button. If you are familiar with git, you should clone the repo on your system (and pull the latest changes the day before the course starts). If you are not familiar with git, you should download and unpack a ZIP of the course repo. If you go for the ZIP option, you should do this roughly two weeks before the course starts, before installing and testing `sbt` (below), but then also download and unpack a fresh ZIP no more than 2 days before the course starts (in case of last-minute changes).
33 | 
34 | Git users should be able to clone the repo with a command like:
35 | ```bash
36 | git clone git@github.com:darrenjw/scala-course.git
37 | ```
38 | Others should be able to download [this ZIP file](https://github.com/darrenjw/scala-course/archive/master.zip). Linux users can download from a terminal with a command like:
39 | ```bash
40 | wget https://github.com/darrenjw/scala-course/archive/master.zip
41 | ```
42 | 
43 | ### Download and install sbt
44 | 
45 | `sbt` is the Scala build tool. You should download, install and test this before the course starts. The *testing* part is particularly important, as it will download and cache a lot of Scala libraries on your system ready for use during the course. See my [sbt installation page](sbt/Readme.md) for further details.
46 | 
47 | ### Install a Scala IDE
48 | 
49 | People starting out with programming in Scala are likely to benefit from writing code using an editor which can provide instant feedback and assistance. There are many possible options here, but it is not possible to provide support for every Scala-aware editor in existence. The course presenter uses [Emacs](https://www.gnu.org/software/emacs/) together with [Ensime](http://ensime.org/editors/emacs/install/), and considers this to be a good option for people already comfortable with the Emacs text editor. However, this is probably not a good option for people unfamiliar with Emacs. For everyone else, [IntelliJ](IntelliJ.md) is probably a safer bet, and the course presenter has some familiarity with it, so should be able to provide basic support. The course presenter(s) will not be able to provide support for any other editor or IDE. It is therefore strongly recommended that participants comfortable with Emacs set up Emacs with Ensime, and that everyone else installs IntelliJ. Switching to another editor/IDE in the future will be quite straightforward, but it will save a lot of time during the course if everyone uses one of the two recommended IDEs. See one of the following pages for further details:
50 | 
51 | * [Installing IntelliJ](IntelliJ.md)
52 | * [Installing Ensime](Ensime.md) (for Emacs users only)
53 | * [Installing the ScalaIDE](ScalaIDE.md) (obsolete)
54 | 
55 | ### Download Apache Spark
56 | 
57 | In case of a poor Internet connection during the course, it will be helpful if everyone could download this [Apache Spark 2.4.5](https://downloads.apache.org/spark/spark-2.4.5/spark-2.4.5-bin-hadoop2.7.tgz) package to their system in advance. Linux users can download from a terminal with a command like:
58 | ```bash
59 | wget https://downloads.apache.org/spark/spark-2.4.5/spark-2.4.5-bin-hadoop2.7.tgz
60 | ```
61 | You should make sure that you have a tool on your system which can unpack a "tgz" file (no issue for Linux users), but there is no need to "install" Spark - we will walk through installation/setup as part of the course.
62 | 
63 | ## Further information
64 | 
65 | Daniel Spiewak has a nice guide to [getting started in Scala](https://gist.github.com/djspiewak/cb72c41ac335a3a9b28b3307be04aa43) that could be a useful source of additional information. However, I will not be assuming that you have followed all of the advice in his guide. In particular, although the Ammonite REPL is very nice, I understand that there are issues with it on Windows. We will therefore not be using it in this course.
66 | 
67 | 
68 | 


--------------------------------------------------------------------------------
/fragments/advanced.scala:
--------------------------------------------------------------------------------
  1 | 
  2 | Vector(1,2,3).sum
  3 | // res0: Int = 6
  4 | List(1.0,5.0).sum
  5 | // res1: Double = 6.0
  6 | 
  7 | 
  8 | Vector(1,2,3).mean
  9 | // <console>:8: error: value mean is not a member of
 10 | //   Vector[Int]
 11 | //               Vector(1,2,3).mean
 12 | //                             ^
 13 | 
 14 | 
 15 | object Meanable {
 16 |   def mean[T: Numeric](it: Iterable[T]): Double =
 17 |     it.map(implicitly[Numeric[T]].toDouble(_)).
 18 |       sum / it.size
 19 | }
 20 | 
 21 | 
 22 | object Meanable {
 23 |   def mean[T](it: Iterable[T])(
 24 |     implicit num: Numeric[T]): Double =
 25 |     it.map(num.toDouble(_)).sum / it.size
 26 | }
 27 | 
 28 | 
 29 | import Meanable._
 30 | // import Meanable._
 31 | mean(Vector(1,2,3))
 32 | // res3: Double = 2.0
 33 | mean(List(1.0,5.0))
 34 | // res4: Double = 3.0
 35 | 
 36 | 
 37 | implicit class MeanableInstance[T: Numeric](
 38 |     it: Iterable[T]) {
 39 |   def mean[T] = Meanable.mean(it)
 40 | }
 41 | 
 42 | 
 43 | Vector(1,2,3).mean
 44 | // res5: Double = 2.0
 45 | List(1.0,3.0,5.0,7.0).mean
 46 | // res6: Double = 4.0
 47 | 
 48 | 
 49 | trait CsvRow[T] {
 50 |  def toCsv(row: T): String
 51 | }
 52 | 
 53 | 
 54 | implicit class CsvRowSyntax[T](row: T) {
 55 |   def toCsv(implicit inst: CsvRow[T]) = inst.toCsv(row)
 56 | }
 57 | 
 58 | 
 59 | def printRows[T: CsvRow](it: Iterable[T]): Unit =
 60 |   it.foreach(row => println(row.toCsv))
 61 | 
 62 | 
 63 | case class MyState(x: Int, y: Double)
 64 | 
 65 | 
 66 | implicit val myStateCsvRow = new CsvRow[MyState] {
 67 |   def toCsv(row: MyState) = row.x.toString+","+row.y
 68 | }
 69 | 
 70 | 
 71 | MyState(1,2.0).toCsv
 72 | // res7: String = 1,2.0
 73 | printRows(List(MyState(1,2.0),MyState(2,3.0)))
 74 | // 1,2.0
 75 | // 2,3.0
 76 | 
 77 | 
 78 | implicit val vectorDoubleCsvRow =
 79 |     new CsvRow[Vector[Double]] {
 80 |   def toCsv(row: Vector[Double]) = row.mkString(",")
 81 | }
 82 | // vectorDoubleCsvRow: CsvRow[Vector[Double]] =
 83 | //   $anon$1@4604e051
 84 | 
 85 | Vector(1.0,2.0,3.0).toCsv
 86 | // res9: String = 1.0,2.0,3.0
 87 | printRows(List(Vector(1.0,2.0),Vector(4.0,5.0),
 88 |   Vector(3.0,3.0)))
 89 | // 1.0,2.0
 90 | // 4.0,5.0
 91 | // 3.0,3.0
 92 | 
 93 | 
 94 | import scala.language.higherKinds
 95 | trait Thinnable[F[_]] {
 96 |   def thin[T](f: F[T], th: Int): F[T]
 97 | }
 98 | 
 99 | 
100 | implicit class ThinnableSyntax[T,F[T]](value: F[T]) {
101 |   def thin(th: Int)(implicit inst: Thinnable[F]): F[T] =
102 |     inst.thin(value,th)
103 | }
104 | 
105 | 
106 | implicit val streamThinnable: Thinnable[Stream] =
107 |     new Thinnable[Stream] {
108 |   def thin[T](s: Stream[T],th: Int): Stream[T] = {
109 |   val ss = s.drop(th-1)
110 |   if (ss.isEmpty) Stream.empty else
111 |     ss.head #:: thin(ss.tail, th)
112 |   }
113 | }
114 | 
115 | 
116 | Stream.iterate(0)(_ + 1).
117 |   drop(10).
118 |   thin(2).
119 |   take(5).
120 |   toArray
121 | // res11: Array[Int] = Array(11, 13, 15, 17, 19)
122 | 
123 | 
124 | trait GenericColl[C[_]] {
125 |   def map[A, B](ca: C[A])(f: A => B): C[B]
126 |   def reduce[A](ca: C[A])(f: (A, A) => A): A
127 |   def flatMap[A, B, D[B] <: GenTraversable[B]](
128 |     ca: C[A])(f: A => D[B]): C[B]
129 |   def zip[A, B](ca: C[A])(cb: C[B]): C[(A, B)]
130 |   def length[A](ca: C[A]): Int
131 | }
132 | 
133 | 
134 | def update[S: State, O: Observation, C[_]: GenericColl](
135 |     dataLik: (S, O) => LogLik, stepFun: S => S
136 |   )(x: C[S], o: O): (LogLik, C[S]) = {
137 |     import breeze.stats.distributions.Poisson
138 |     val xp = x map (stepFun(_))
139 |     val lw = xp map (dataLik(_, o))
140 |     val max = lw reduce (math.max(_, _))
141 |     val rw = lw map (lwi => math.exp(lwi - max))
142 |     val srw = rw reduce (_ + _)
143 |     val l = rw.length
144 |     val z = rw zip xp
145 |     val rx = z flatMap { case (rwi, xpi) => 
146 |       Vector.fill(Poisson(rwi * l / srw).draw)(xpi) }
147 |     (max + math.log(srw / l), rx)
148 | }
149 | 
150 | 
151 | def pFilter[S: State, O: Observation,
152 |   C[_]: GenericColl, D[O] <: GenTraversable[O]](
153 |   x0: C[S], data: D[O],
154 |   dataLik: (S, O) => LogLik, stepFun: S => S
155 |   ): (LogLik, C[S]) = {
156 |     val updater = update[S, O, C](dataLik, stepFun) _
157 |     data.foldLeft((0.0, x0))((prev, o) => {
158 |       val (oll, ox) = prev
159 |       val (ll, x) = updater(ox, o)
160 |       (oll + ll, x)
161 |     })
162 | }
163 | 
164 | 
165 | def pfMll[S: State, P: Parameter, O: Observation, 
166 |     C[_]: GenericColl, D[O] <: GenTraversable[O]](
167 |   simX0: P => C[S], stepFun: P => S => S, 
168 |   dataLik: P => (S, O) => LogLik, data: D[O]
169 | ): (P => LogLik) = (th: P) => 
170 |     pFilter(simX0(th), data, dataLik(th), stepFun(th))._1
171 | 
172 | 
173 | val inNoise = Gaussian(0.0, 1.0).sample(99)
174 | val state = DenseVector(inNoise.scanLeft(0.0)(
175 |   (s, i) => 0.8 * s + i).toArray)
176 | val noise = DenseVector(
177 |   Gaussian(0.0, 2.0).sample(100).toArray)
178 | val data = (state + noise).toArray.toList
179 | 
180 | 
181 | val mll = pfMll(
182 |   (th: Double) => Gaussian(0.0, 10.0).
183 |     sample(10000).toVector.par,
184 |   (th: Double) => (s: Double) =>
185 |     Gaussian(th * s, 1.0).draw,
186 |   (th: Double) => (s: Double, o: Double) =>
187 |     Gaussian(s, 2.0).logPdf(o),
188 |   data
189 | )
190 | 
191 | 
192 | libraryDependencies += "org.typelevel" %% "cats-core" % "1.0.0"
193 | 
194 | 
195 | import cats.Monoid
196 | // import cats.Monoid
197 | import cats.syntax.semigroup._
198 | // import cats.syntax.semigroup._
199 | import cats.instances.all._
200 | // import cats.instances.all._
201 | 
202 | 
203 | 1 |+| 3
204 | // res0: Int = 4
205 | 1.0 |+| 2.0
206 | // res1: Double = 3.0
207 | "Hi" |+| "There"
208 | // res2: String = HiThere
209 | List(1,2,3) |+| List(4,5)
210 | // res3: List[Int] = List(1, 2, 3, 4, 5)
211 | 
212 | 
213 | val m1 = Map("a" -> 2, "b" -> 3)
214 | // m1: Map[String,Int] = Map(a -> 2, b -> 3)
215 | val m2 = Map("b" -> 4, "c" -> 5)
216 | // m2: Map[String,Int] = Map(b -> 4, c -> 5)
217 | m1 |+| m2
218 | // res3: Map[String,Int] = Map(b -> 7, c -> 5, a -> 2)
219 | 
220 | 
221 | scala.io.Source.
222 |   fromFile("/usr/share/dict/words").
223 |   getLines.
224 |   map(_.trim).
225 |   map(_.toLowerCase).
226 |   flatMap(_.toCharArray).
227 |   filter(_ > '/').
228 |   filter(_ < '}').
229 |   map(ch => Map(ch -> 1)).
230 |   reduce(_ |+| _)
231 | // res4: Map[Char,Int] = Map(e -> 88833, s -> 90113,
232 | //  x -> 2124, n -> 57144, j -> 1948, y -> 12652,
233 | // t -> 53006, u -> 26118, f -> 10675, a -> 64439, ...
234 | 
235 | 


--------------------------------------------------------------------------------
/examples/C5-Metropolis/src/main/scala/metropolis.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 | mcmc-stream.scala
  3 | 
  4 | 
  5 |  */
  6 | 
  7 | import breeze.linalg._
  8 | import breeze.plot._
  9 | import breeze.stats.distributions._
 10 | import breeze.stats.meanAndVariance
 11 | import annotation.tailrec
 12 | 
 13 | object MCMC {
 14 | 
 15 |   def mcmcSummary(dv: DenseVector[Double]): Figure = {
 16 |     val len = dv.length
 17 |     val mav = meanAndVariance(dv)
 18 |     val mean = mav.mean
 19 |     val variance = mav.variance
 20 |     println(s"Iters=$len, Mean=$mean, variance=$variance")
 21 |     val f = Figure("MCMC Summary")
 22 |     f.height = 1000
 23 |     f.width = 1200
 24 |     val p0 = f.subplot(1, 2, 0)
 25 |     p0 += plot(linspace(1, len, len), dv)
 26 |     p0.xlabel = "Iteration"
 27 |     p0.ylabel = "Value"
 28 |     p0.title = "Trace plot"
 29 |     val p1 = f.subplot(1, 2, 1)
 30 |     p1 += hist(dv, 100)
 31 |     p1.xlabel = "Value"
 32 |     p1.title = "Marginal density"
 33 |     f
 34 |   }
 35 | 
 36 |   def time[A](f: => A) = {
 37 |     val s = System.nanoTime
 38 |     val ret = f
 39 |     println("time: " + (System.nanoTime - s) / 1e6 + "ms")
 40 |     ret
 41 |   }
 42 | 
 43 |   def metrop1(n: Int = 1000, eps: Double = 0.5): DenseVector[Double] = {
 44 |     val vec = DenseVector.fill(n)(0.0)
 45 |     var x = 0.0
 46 |     var oldll = Gaussian(0.0, 1.0).logPdf(x)
 47 |     vec(0) = x
 48 |     (1 until n).foreach { i =>
 49 |       val can = x + Uniform(-eps, eps).draw
 50 |       val loglik = Gaussian(0.0, 1.0).logPdf(can)
 51 |       val loga = loglik - oldll
 52 |       if (math.log(Uniform(0.0, 1.0).draw) < loga) {
 53 |         x = can
 54 |         oldll = loglik
 55 |       }
 56 |       vec(i) = x
 57 |     }
 58 |     vec
 59 |   }
 60 | 
 61 |   def metrop2(n: Int = 1000, eps: Double = 0.5): Unit = {
 62 |     var x = 0.0
 63 |     var oldll = Gaussian(0.0, 1.0).logPdf(x)
 64 |     (1 to n).foreach { i =>
 65 |       val can = x + Uniform(-eps, eps).draw
 66 |       val loglik = Gaussian(0.0, 1.0).logPdf(can)
 67 |       val loga = loglik - oldll
 68 |       if (math.log(Uniform(0.0, 1.0).draw) < loga) {
 69 |         x = can
 70 |         oldll = loglik
 71 |       }
 72 |       println(x)
 73 |     }
 74 |   }
 75 | 
 76 |   @tailrec
 77 |   def metrop3(n: Int = 1000, eps: Double = 0.5, x: Double = 0.0, oldll: Double = Double.MinValue): Unit = {
 78 |     if (n > 0) {
 79 |       println(x)
 80 |       val can = x + Uniform(-eps, eps).draw
 81 |       val loglik = Gaussian(0.0, 1.0).logPdf(can)
 82 |       val loga = loglik - oldll
 83 |       if (math.log(Uniform(0.0, 1.0).draw) < loga)
 84 |         metrop3(n - 1, eps, can, loglik)
 85 |       else
 86 |         metrop3(n - 1, eps, x, oldll)
 87 |     }
 88 |   }
 89 | 
 90 |   @tailrec
 91 |   def metrop4(n: Int = 1000, eps: Double = 0.5, x: Double = 0.0, oldll: Double = Double.MinValue, acc: List[Double] = Nil): DenseVector[Double] = {
 92 |     if (n == 0)
 93 |       DenseVector(acc.reverse.toArray)
 94 |     else {
 95 |       val can = x + Uniform(-eps, eps).draw
 96 |       val loglik = Gaussian(0.0, 1.0).logPdf(can)
 97 |       val loga = loglik - oldll
 98 |       if (math.log(Uniform(0.0, 1.0).draw) < loga)
 99 |         metrop4(n - 1, eps, can, loglik, can :: acc)
100 |       else
101 |         metrop4(n - 1, eps, x, oldll, x :: acc)
102 |     }
103 |   }
104 | 
105 |   def newState(x: Double, oldll: Double, eps: Double): (Double, Double) = {
106 |     val can = x + Uniform(-eps, eps).draw
107 |     val loglik = Gaussian(0.0, 1.0).logPdf(can)
108 |     val loga = loglik - oldll
109 |     if (math.log(Uniform(0.0, 1.0).draw) < loga) (can, loglik) else (x, oldll)
110 |   }
111 | 
112 |   @tailrec
113 |   def metrop5(n: Int = 1000, eps: Double = 0.5, x: Double = 0.0, oldll: Double = Double.MinValue): Unit = {
114 |     if (n > 0) {
115 |       println(x)
116 |       val ns = newState(x, oldll, eps)
117 |       metrop5(n - 1, eps, ns._1, ns._2)
118 |     }
119 |   }
120 | 
121 |   @tailrec
122 |   def metrop5b(n: Int = 1000, eps: Double = 0.5, x: Double = 0.0, oldll: Double = Double.MinValue): Unit = {
123 |     if (n > 0) {
124 |       println(x)
125 |       val (nx, ll) = newState(x, oldll, eps)
126 |       metrop5b(n - 1, eps, nx, ll)
127 |     }
128 |   }
129 | 
130 |   @tailrec
131 |   def metrop6(n: Int = 1000, eps: Double = 0.5, x: Double = 0.0, oldll: Double = Double.MinValue, acc: List[Double] = Nil): DenseVector[Double] = {
132 |     if (n == 0) DenseVector(acc.reverse.toArray) else {
133 |       val (nx, ll) = newState(x, oldll, eps)
134 |       metrop6(n - 1, eps, nx, ll, nx :: acc)
135 |     }
136 |   }
137 | 
138 |   def nextState(eps: Double)(state: (Double, Double)): (Double, Double) = {
139 |     val (x, oldll) = state
140 |     val can = x + Uniform(-eps, eps).draw
141 |     val loglik = Gaussian(0.0, 1.0).logPdf(can)
142 |     val loga = loglik - oldll
143 |     if (math.log(Uniform(0.0, 1.0).draw) < loga) (can, loglik) else (x, oldll)
144 |   }
145 | 
146 |   def metrop7(eps: Double = 0.5, x: Double = 0.0, oldll: Double = Double.MinValue): Stream[Double] =
147 |     Stream.iterate((x, oldll))(nextState(eps)) map (_._1)
148 | 
149 |   def thin[T](s: Stream[T], th: Int): Stream[T] = {
150 |     val ss = s.drop(th - 1)
151 |     if (ss.isEmpty) Stream.empty else
152 |       ss.head #:: thin(ss.tail, th)
153 |   }
154 | 
155 |   def kernel(x: Double): Rand[Double] = for {
156 |     innov <- Uniform(-0.5, 0.5)
157 |     can = x + innov
158 |     oldll = Gaussian(0.0, 1.0).logPdf(x)
159 |     loglik = Gaussian(0.0, 1.0).logPdf(can)
160 |     loga = loglik - oldll
161 |     u <- Uniform(0.0, 1.0)
162 |   } yield if (math.log(u) < loga) can else x
163 | 
164 |   def main(arg: Array[String]): Unit = {
165 |     println("Hi")
166 |     metrop1(10).foreach(println)
167 |     metrop2(10)
168 |     metrop3(10)
169 |     metrop4(10).foreach(println)
170 |     metrop5(10)
171 |     metrop6(10).foreach(println)
172 |     metrop7().take(10).foreach(println)
173 |     val ms = Stream.iterate(0.0)(kernel(_).draw)
174 |     ms.take(10).foreach(println)
175 |     // plot output to check it looks OK
176 |     mcmcSummary(DenseVector(ms.take(100000).toArray))
177 |     // timings...
178 |     val N=1000000
179 |     println("metrop1:")
180 |     time(metrop1(N))
181 |     println("metrop4:")
182 |     time(metrop4(N))
183 |     println("metrop6:")
184 |     time(metrop6(N))
185 |     println("metrop7:")
186 |     time(metrop7().take(N).toArray)
187 |     println("MarkovChain with custom kernel")
188 |     time(Stream.iterate(0.0)(kernel(_).draw).take(N).toArray)
189 | 
190 |     println("Bye")
191 |   }
192 | 
193 | }
194 | 
195 | // eof
196 | 
197 | 


--------------------------------------------------------------------------------
/fragments/tools.scala:
--------------------------------------------------------------------------------
  1 | 
  2 | set scalaVersion := "2.12.10"
  3 | 
  4 | 
  5 | set libraryDependencies+="org.scalanlp"%%"breeze"%"1.0"
  6 | set libraryDependencies+="org.scalanlp"%%"breeze-natives"%"1.0"
  7 | 
  8 | 
  9 | object Metropolis {
 10 | 
 11 |   import breeze.stats.distributions._
 12 | 
 13 |   def kernel(x: Double): Rand[Double] = for {
 14 |     innov <- Uniform(-0.5, 0.5)
 15 |     can = x + innov
 16 |     oldll = Gaussian(0.0, 1.0).logPdf(x)
 17 |     loglik = Gaussian(0.0, 1.0).logPdf(can)
 18 |     loga = loglik - oldll
 19 |     u <- Uniform(0.0, 1.0)
 20 |   } yield if (math.log(u) < loga) can else x
 21 | 
 22 |   val chain = Stream.iterate(0.0)(kernel(_).draw)
 23 | 
 24 |   def main(args: Array[String]): Unit = {
 25 |     val n = if (args.size == 0) 10 else args(0).toInt
 26 |     chain.take(n).toArray.foreach(println)
 27 |   }
 28 | 
 29 | }
 30 | 
 31 | 
 32 | addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.14.10")
 33 | 
 34 | 
 35 | set scalaVersion := "2.12.10"
 36 | set libraryDependencies+="org.ddahl"%%"rscala"%"3.2.18"
 37 | console
 38 | 
 39 | 
 40 | val R = org.ddahl.rscala.RClient()
 41 | // R: org.ddahl.rscala.RClient = RClient@9fc5dc1
 42 | 
 43 | 
 44 | org.ddahl.rscala.RClient.defaultRCmd
 45 | // res0: String = R
 46 | 
 47 | 
 48 | val d0 = R.evalD0("rnorm(1)")
 49 | // d0: Double = 0.945922465932532
 50 | 
 51 | 
 52 | val d1 = R.evalD1("rnorm(5)")
 53 | // d1: Array[Double] = Array(-0.8272179841496433, ...
 54 | 
 55 | 
 56 | val d2 = R.evalD2("matrix(rnorm(6),nrow=2)")
 57 | // d2: Array[Array[Double]] = Array(Array(
 58 | //      -0.7545734628207127, ...
 59 | 
 60 | 
 61 | 
 62 | R.eval("vec = %-", (1 to 10).toArray) // send data to R
 63 | R.evalI1("vec")
 64 | // res9: Array[Int] = Array(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
 65 | 
 66 | 
 67 | R eval """
 68 | vec2 = rep(vec,3)
 69 | vec3 = vec2 + 1
 70 | mat1 = matrix(vec3,ncol=5)
 71 | """
 72 | 
 73 | 
 74 | R.evalI2("mat1") // get data back from R
 75 | // res3: Array[Array[Int]] = Array(Array(2, 8, 4, ...
 76 | 
 77 | 
 78 | import breeze.stats.distributions._
 79 | import breeze.linalg._
 80 | import org.ddahl.rscala.RClient
 81 | val x = Uniform(50,60).sample(1000)
 82 | // x: IndexedSeq[Double] = Vector(50.54008541753607, ...
 83 | val eta = x map (xi => (xi * 0.1) - 3)
 84 | // eta: IndexedSeq[Double] = Vector(2.054008541753607, ...
 85 | val mu = eta map math.exp
 86 | // mu: IndexedSeq[Double] = Vector(7.799101554600703, ...
 87 | val y = mu map (Poisson(_).draw)
 88 | // y: IndexedSeq[Int] = Vector(8, 15, 12, ...
 89 | 
 90 | 
 91 | val R = RClient() // initialise an R interpreter
 92 | // R: RClient = RClient@661e0a99
 93 | R.eval("x = %-", x.toArray) // send x to R
 94 | R.eval("y = %-", y.toArray) // send y to R
 95 | R.eval("mod = glm(y~x,family=poisson())") // fit in R
 96 | // pull the fitted coefficents back into scala
 97 | DenseVector[Double](R.evalD1("mod$coefficients"))
 98 | // res9: DenseVector[Double] = DenseVector(
 99 | //    -2.93361267743947, 0.09875286320703261)
100 | 
101 | 
102 | require(1 == 1) // satisfied
103 | // require(1 == 2) // throws exception
104 | assert (1 == 1) // satisfied
105 | // assert (1 == 2) // throws exception
106 | 
107 | 
108 | def sqrt(x: Double): Double = {
109 |   require(x >= 0.0) // pre-condition
110 |   val ans = math.sqrt(x)
111 |   assert(math.abs(x-ans*ans) < 0.00001) // post-condition
112 |   ans
113 | }
114 | 
115 | sqrt(2.0) // works as expected
116 | // sqrt(-2.0) // throws exception
117 | 
118 | 
119 | scalacOptions += "-Xdisable-assertions"
120 | 
121 | 
122 | class SetSpec extends AnyFlatSpec {
123 | 
124 |   "An empty Set" should "have size 0" in {
125 |     assert(Set.empty.size == 0)
126 |   }
127 | 
128 |   it should "produce NoSuchElementException when head is invoked" in {
129 |     assertThrows[NoSuchElementException] {
130 |       Set.empty.head
131 |     }
132 |   }
133 | 
134 | }
135 | 
136 | 
137 |   "A Gamma(3.0,4.0)" should "have mean 12.0" in {
138 |     import breeze.stats.distributions.Gamma
139 |     val g = Gamma(3.0,4.0)
140 |     val m = g.mean
141 |     assert(math.abs(m - 12.0) < 0.000001)
142 |   }
143 | 
144 | 
145 | "org.scalacheck" %% "scalacheck" % "1.14.1" % "test"
146 | 
147 | 
148 | import org.scalatest.matchers.should.Matchers
149 | 
150 | import org.scalacheck._
151 | import org.scalacheck.Prop.{forAll, propBoolean}
152 | 
153 | class StringSpec extends Properties("String") with Matchers {
154 | 
155 |   property("startwith first string") =
156 |     forAll { (a: String, b: String) =>
157 |       (a+b).startsWith(a)
158 |     } 
159 | 
160 |   property("concatenate length") =
161 |     forAll { (a: String, b: String) =>
162 |       (a+b).length == a.length + b.length
163 |     }
164 | 
165 |   property("substring") =
166 |     forAll { (a: String, b: String, c: String) =>
167 |       (a+b+c).substring(a.length, a.length+b.length) == b
168 |     }
169 | 
170 | }
171 | 
172 | 
173 | class SqrtSpecification extends Properties("Sqrt") with Matchers {
174 | 
175 |   property("math.sqrt should square to give original") =
176 |     forAll { a: Double =>
177 |       (a >= 0.0) ==> {
178 |         val s = math.sqrt(a)
179 |         val tol = 1e-8 * a
180 |         s*s === a +- tol
181 |       }
182 |     }
183 | 
184 | }
185 | 
186 | 
187 | /**
188 |   *  Take every th value from the stream s of type T
189 |   * 
190 |   *  @param s A Stream to be thinned
191 |   *  @param th Thinning interval
192 |   * 
193 |   *  @return The thinned stream, with values of
194 |   *  the same type as the input stream
195 |   */
196 | def thinStream[T](s: Stream[T],th: Int): Stream[T] = {
197 |   val ss = s.drop(th-1)
198 |   if (ss.isEmpty) Stream.empty else
199 |     ss.head #:: thinStream(ss.tail, th)
200 | }
201 | 
202 | 
203 | val x = 3 + 2
204 | // x: Int = 5
205 | 
206 | 
207 | addSbtPlugin("org.scalameta" % "sbt-mdoc" % "1.3.6")
208 | 
209 | 
210 | enablePlugins(MdocPlugin)
211 | 
212 | 
213 | resolvers += Resolver.bintrayRepo("cibotech", "public")
214 | libraryDependencies += "com.cibo" %% "evilplot" % "0.6.3"
215 | libraryDependencies += "com.cibo" %% "evilplot-repl" % "0.6.3"
216 | 
217 | 
218 | import scala.util.Random
219 | import com.cibo.evilplot._
220 | import com.cibo.evilplot.plot._
221 | import com.cibo.evilplot.numeric._
222 | import com.cibo.evilplot.plot.renderers.PointRenderer
223 | import com.cibo.evilplot.plot.aesthetics.DefaultTheme._
224 | 
225 | val points = Seq.fill(150) {
226 |   Point(Random.nextDouble(), Random.nextDouble())
227 | } :+ Point(0.0, 0.0)
228 | val years = Seq.fill(150)(Random.nextDouble()) :+ 1.0
229 | val yearMap = (points zip years).toMap.withDefaultValue(0.0)
230 | val plot = ScatterPlot(
231 |   points,
232 |   pointRenderer = Some(PointRenderer.depthColor((p: Point) =>
233 |     p.x, 0.0, 500.0, None, None))
234 |   ).standard()
235 |     .xLabel("x")
236 |     .yLabel("y")
237 |     .trend(1, 0)
238 |     .rightLegend()
239 |     .render()
240 | displayPlot(plot)
241 | 
242 | 
243 | val im = plot.asBufferedImage
244 | 
245 | 


--------------------------------------------------------------------------------
/examples/C7-EvilPlot/src/main/scala/evilplot-examples.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 | evilplot-examples.scala
  3 | 
  4 | EvilPlot examples
  5 | 
  6 | */
  7 | 
  8 | object EvilPlotExamples {
  9 | 
 10 |   import scala.util.Random
 11 | 
 12 |   import com.cibo.evilplot._
 13 |   import com.cibo.evilplot.plot._
 14 |   import com.cibo.evilplot.numeric._
 15 |   import com.cibo.evilplot.plot.renderers.PointRenderer
 16 | 
 17 |   def scatterExample() = {
 18 |     import com.cibo.evilplot.plot.aesthetics.DefaultTheme._
 19 |     val points = Seq.fill(150) {
 20 |       Point(Random.nextDouble(), Random.nextDouble())
 21 |     } :+ Point(0.0, 0.0)
 22 |     val years = Seq.fill(150)(Random.nextDouble()) :+ 1.0
 23 |     val yearMap = (points zip years).toMap.withDefaultValue(0.0)
 24 |     ScatterPlot(
 25 |       points,
 26 |       //pointRenderer = Some(PointRenderer.depthColor(p => yearMap(p), 0.0, 1.0, None, None))
 27 |       pointRenderer = Some(PointRenderer.depthColor((p: Point) => p.x, 0.0, 500.0, None, None))
 28 | )
 29 |       .standard()
 30 |       .xLabel("x")
 31 |       .yLabel("y")
 32 |       .trend(1, 0)
 33 |       .rightLegend()
 34 |       .render()
 35 |   }
 36 | 
 37 |   def scatterHist() = {
 38 |     import com.cibo.evilplot.plot.aesthetics.DefaultTheme._
 39 |     import com.cibo.evilplot.colors.RGB
 40 |     import com.cibo.evilplot.geometry.Extent
 41 |     import com.cibo.evilplot.geometry.LineStyle.DashDot
 42 | 
 43 |     val allYears = (2007 to 2013).toVector
 44 |     val data = Seq.fill(150)(Point(Random.nextDouble(), Random.nextDouble()))
 45 |     val years = Seq.fill(150)(allYears(Random.nextInt(allYears.length)))
 46 |     val yearMap = (data zip years).toMap
 47 | 
 48 |     val xhist = Histogram(data.map(_.x), bins = 50)
 49 |     val yhist = Histogram(data.map(_.y), bins = 40)
 50 |     ScatterPlot(
 51 |       data = data,
 52 |       //pointRenderer = Some(PointRenderer.colorByCategory(data, p => yearMap(p)))
 53 |     ).topPlot(xhist)
 54 |       .rightPlot(yhist)
 55 |       .standard()
 56 |       .title("Measured vs Actual")
 57 |       .xLabel("measured")
 58 |       .yLabel("actual")
 59 |       .trend(1, 0, color = RGB(45, 45, 45), lineStyle = DashDot)
 60 |       .overlayLegend(x = 0.95, y = 0.8)
 61 |       .render(Extent(600, 400))
 62 |   }
 63 | 
 64 |   def functionPlot() = {
 65 |     import com.cibo.evilplot.plot.aesthetics.DefaultTheme._
 66 |     import com.cibo.evilplot.colors.HTMLNamedColors
 67 |     import com.cibo.evilplot.numeric.Bounds
 68 |     Overlay(
 69 |       FunctionPlot.series(x => x * x, "y = x^2",
 70 |         HTMLNamedColors.dodgerBlue, xbounds = Some(Bounds(-1, 1))),
 71 |       FunctionPlot.series(x => math.pow(x, 3), "y = x^3",
 72 |         HTMLNamedColors.crimson, xbounds = Some(Bounds(-1, 1))),
 73 |       FunctionPlot.series(x => math.pow(x, 4), "y = x^4",
 74 |         HTMLNamedColors.green, xbounds = Some(Bounds(-1, 1)))
 75 |     ).title("A bunch of polynomials.")
 76 |       .overlayLegend()
 77 |       .standard()
 78 |       .render()
 79 |   }
 80 | 
 81 |   def barChart() = {
 82 |     import com.cibo.evilplot.colors.RGB
 83 |     import com.cibo.evilplot.geometry.{Align, Drawable, Extent, Rect, Text}
 84 |     import com.cibo.evilplot.plot._
 85 |     import com.cibo.evilplot.plot.aesthetics
 86 |     import com.cibo.evilplot.plot.aesthetics.DefaultTheme.{DefaultFonts}
 87 |     import com.cibo.evilplot.plot.renderers.BarRenderer
 88 |     implicit val theme = aesthetics.DefaultTheme.DefaultTheme.copy(
 89 |       fonts = DefaultFonts.
 90 |         copy(tickLabelSize = 14, legendLabelSize = 14, fontFace = "'Lato', sans-serif")
 91 |     )
 92 |     val percentChange = Seq[Double](-10, 5, 12, 68, -22)
 93 |     val labels = Seq("one", "two", "three", "four", "five")
 94 |     val labeledByColor = new BarRenderer {
 95 |       val positive = RGB(241, 121, 6)
 96 |       val negative = RGB(226, 56, 140)
 97 |       def render(plot: Plot, extent: Extent, category: Bar): Drawable = {
 98 |         val rect = Rect(extent)
 99 |         val value = category.values.head
100 |         val color = if (value >= 0) positive else negative
101 |         Align.center(rect filled color, Text(s"$value%", size = 20)
102 |           .filled(theme.colors.label)
103 |         ).group
104 |       }
105 |     }
106 |     BarChart
107 |       .custom(percentChange.map(Bar.apply), spacing = Some(20),
108 |         barRenderer = Some(labeledByColor)
109 |       )
110 |       .standard(xLabels = labels)
111 |       .hline(0)
112 |       .render()
113 |   }
114 | 
115 |   def clusteredBar() = {
116 |     import com.cibo.evilplot.plot.aesthetics.DefaultTheme._
117 |     val data = Seq[Seq[Double]](
118 |       Seq(1, 2, 3),
119 |       Seq(4, 5, 6),
120 |       Seq(3, 4, 1),
121 |       Seq(2, 3, 4)
122 |     )
123 |     BarChart
124 |       .clustered(
125 |         data,
126 |         labels = Seq("one", "two", "three")
127 |       )
128 |       .title("Clustered Bar Chart Demo")
129 |       .xAxis(Seq("a", "b", "c", "d"))
130 |       .yAxis()
131 |       .frame()
132 |       .bottomLegend()
133 |       .render()
134 |   }
135 | 
136 |   def boxPlot() = {
137 |     import com.cibo.evilplot.plot.aesthetics.DefaultTheme._
138 |     val data = Seq.fill(10)(Seq.fill(Random.nextInt(30))(Random.nextDouble()))
139 |     BoxPlot(data)
140 |       .standard(xLabels = (1 to 10).map(_.toString))
141 |       .render()
142 |   }
143 | 
144 |   def pairsPlot() = {
145 |     import com.cibo.evilplot.plot.aesthetics.DefaultTheme._
146 |     val labels = Vector("a", "b", "c", "d")
147 |     val data = for (i <- 1 to 4) yield {
148 |       (labels(i - 1), Seq.fill(10) { Random.nextDouble() * 10 })
149 |     }
150 |     val plots = for ((xlabel, xdata) <- data) yield {
151 |       for ((ylabel, ydata) <- data) yield {
152 |         val points = (xdata, ydata).zipped.map { (a, b) => Point(a, b) }
153 |         if (ylabel == xlabel) {
154 |           Histogram(xdata, bins = 4)
155 |         } else {
156 |           ScatterPlot(points)
157 |         }
158 |       }
159 |     }
160 |     Facets(plots)
161 |       .standard()
162 |       .title("Pairs Plot with Histograms")
163 |       .topLabels(data.map { _._1 })
164 |       .rightLabels(data.map { _._1 })
165 |       .render()
166 |   }
167 | 
168 |   def contourPlot() = {
169 |     import com.cibo.evilplot.plot.aesthetics.DefaultTheme._
170 |     val data = Seq.fill(100) {
171 |       Point(Random.nextDouble() * 20, Random.nextDouble() * 20)
172 |       }
173 |       ContourPlot(data)
174 |         .standard()
175 |         .xbounds(0, 20)
176 |         .ybounds(0, 20)
177 |         .render()
178 |     }
179 | 
180 |   def heatMap() = {
181 |     import com.cibo.evilplot.plot.aesthetics.DefaultTheme._
182 |     val x = 100 ; val y = 50
183 |     //val x = 500 ; val y = 500
184 |     val data = Vector.fill(y)(Vector.fill(x)(Random.nextDouble()))
185 |     Heatmap(data,256)
186 |       .standard()
187 |       .render()
188 |   }
189 | 
190 | 
191 |   def main(args: Array[String]): Unit = {
192 |     import com.cibo.evilplot.plot.aesthetics.DefaultTheme._
193 |     // some plots
194 |     displayPlot(scatterExample())
195 |     displayPlot(scatterHist())
196 |     displayPlot(functionPlot())
197 |     displayPlot(barChart())
198 |     displayPlot(clusteredBar())
199 |     displayPlot(boxPlot())
200 |     displayPlot(pairsPlot())
201 |     displayPlot(heatMap())
202 |     displayPlot(contourPlot())
203 |     // writing a plot to a bitmap image
204 |     val bitmap = contourPlot().asBufferedImage
205 |     javax.imageio.ImageIO.write(bitmap, "png", new java.io.File("image.png"))
206 |   }
207 | 
208 | 
209 | }
210 | 
211 | // eof
212 | 


--------------------------------------------------------------------------------
/examples/C6-Smile/target/mdoc/smile-example.md:
--------------------------------------------------------------------------------
  1 | # Smile example
  2 | 
  3 | ## Some mdoc documentation
  4 | 
  5 | This is some documentation prepared using `mdoc`. The original file is in `docs`, but the `sbt` task `mdoc` will typecheck and execute the code blocks, and put the compiled markdown document in `target/mdoc`.
  6 | 
  7 | We begin by reading the data (we assume that the file "yacht.csv" already exists).
  8 | 
  9 | ```scala
 10 | val df = smile.read.csv("yacht.csv")
 11 | // df: smile.data.DataFrame = [LongPos: double, PrisCoef: double, LDR: double, BDR: double, LBR: double, Froude: double, Resist: double]
 12 | // +-------+--------+----+----+----+------+------+
 13 | // |LongPos|PrisCoef| LDR| BDR| LBR|Froude|Resist|
 14 | // +-------+--------+----+----+----+------+------+
 15 | // |   -2.3|   0.568|4.78|3.99|3.17| 0.125|  0.11|
 16 | // |   -2.3|   0.568|4.78|3.99|3.17|  0.15|  0.27|
 17 | // |   -2.3|   0.568|4.78|3.99|3.17| 0.175|  0.47|
 18 | // |   -2.3|   0.568|4.78|3.99|3.17|   0.2|  0.78|
 19 | // |   -2.3|   0.568|4.78|3.99|3.17| 0.225|  1.18|
 20 | // |   -2.3|   0.568|4.78|3.99|3.17|  0.25|  1.82|
 21 | // |   -2.3|   0.568|4.78|3.99|3.17| 0.275|  2.61|
 22 | // |   -2.3|   0.568|4.78|3.99|3.17|   0.3|  3.76|
 23 | // |   -2.3|   0.568|4.78|3.99|3.17| 0.325|  4.99|
 24 | // |   -2.3|   0.568|4.78|3.99|3.17|  0.35|  7.16|
 25 | // +-------+--------+----+----+----+------+------+
 26 | // 298 more rows...
 27 | // 
 28 | df
 29 | // res0: smile.data.DataFrame = [LongPos: double, PrisCoef: double, LDR: double, BDR: double, LBR: double, Froude: double, Resist: double]
 30 | // +-------+--------+----+----+----+------+------+
 31 | // |LongPos|PrisCoef| LDR| BDR| LBR|Froude|Resist|
 32 | // +-------+--------+----+----+----+------+------+
 33 | // |   -2.3|   0.568|4.78|3.99|3.17| 0.125|  0.11|
 34 | // |   -2.3|   0.568|4.78|3.99|3.17|  0.15|  0.27|
 35 | // |   -2.3|   0.568|4.78|3.99|3.17| 0.175|  0.47|
 36 | // |   -2.3|   0.568|4.78|3.99|3.17|   0.2|  0.78|
 37 | // |   -2.3|   0.568|4.78|3.99|3.17| 0.225|  1.18|
 38 | // |   -2.3|   0.568|4.78|3.99|3.17|  0.25|  1.82|
 39 | // |   -2.3|   0.568|4.78|3.99|3.17| 0.275|  2.61|
 40 | // |   -2.3|   0.568|4.78|3.99|3.17|   0.3|  3.76|
 41 | // |   -2.3|   0.568|4.78|3.99|3.17| 0.325|  4.99|
 42 | // |   -2.3|   0.568|4.78|3.99|3.17|  0.35|  7.16|
 43 | // +-------+--------+----+----+----+------+------+
 44 | // 298 more rows...
 45 | //
 46 | ```
 47 | 
 48 | We can get a quick summary of the data as follows.
 49 | 
 50 | ```scala
 51 | df.summary
 52 | // res1: smile.data.DataFrame = [column: String, count: long, min: double, avg: double, max: double]
 53 | // +--------+-----+-----+---------+-----+
 54 | // |  column|count|  min|      avg|  max|
 55 | // +--------+-----+-----+---------+-----+
 56 | // | LongPos|  308|   -5|-2.381818|    0|
 57 | // |PrisCoef|  308| 0.53| 0.564136|  0.6|
 58 | // |     LDR|  308| 4.34| 4.788636| 5.14|
 59 | // |     BDR|  308| 2.81| 3.936818| 5.35|
 60 | // |     LBR|  308| 2.73| 3.206818| 3.64|
 61 | // |  Froude|  308|0.125|   0.2875| 0.45|
 62 | // |  Resist|  308| 0.01|10.495357|62.42|
 63 | // +--------+-----+-----+---------+-----+
 64 | //
 65 | ```
 66 | 
 67 | We can now carry out OLS regression after a couple of imports
 68 | 
 69 | ```scala
 70 | import smile.data.formula._
 71 | import scala.language.postfixOps
 72 | val mod = smile.regression.ols("Resist" ~, df)
 73 | // mod: smile.regression.LinearModel = Linear Model:
 74 | // 
 75 | // Residuals:
 76 | // 	       Min	        1Q	    Median	        3Q	       Max
 77 | // 	  -11.7700	   -7.5578	   -1.8198	    6.1620	   31.5715
 78 | // 
 79 | // Coefficients:
 80 | //                   Estimate Std. Error    t value   Pr(>|t|)
 81 | // Intercept         -19.2367    27.1133    -0.7095     0.4786 
 82 | // LongPos             0.1938     0.3381     0.5734     0.5668 
 83 | // PrisCoef           -6.4194    44.1590    -0.1454     0.8845 
 84 | // LDR                 4.2330    14.1651     0.2988     0.7653 
 85 | // BDR                -1.7657     5.5212    -0.3198     0.7493 
 86 | // LBR                -4.5164    14.2000    -0.3181     0.7507 
 87 | // Froude            121.6676     5.0658    24.0175     0.0000 ***
 88 | // ---------------------------------------------------------------------
 89 | // Significance codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
 90 | // 
 91 | // Residual standard error: 8.9596 on 301 degrees of freedom
 92 | // Multiple R-squared: 0.6576,    Adjusted R-squared: 0.6507
 93 | // F-statistic: 96.3327 on 6 and 301 DF,  p-value: 4.526e-67
 94 | // 
 95 | mod
 96 | // res2: smile.regression.LinearModel = Linear Model:
 97 | // 
 98 | // Residuals:
 99 | // 	       Min	        1Q	    Median	        3Q	       Max
100 | // 	  -11.7700	   -7.5578	   -1.8198	    6.1620	   31.5715
101 | // 
102 | // Coefficients:
103 | //                   Estimate Std. Error    t value   Pr(>|t|)
104 | // Intercept         -19.2367    27.1133    -0.7095     0.4786 
105 | // LongPos             0.1938     0.3381     0.5734     0.5668 
106 | // PrisCoef           -6.4194    44.1590    -0.1454     0.8845 
107 | // LDR                 4.2330    14.1651     0.2988     0.7653 
108 | // BDR                -1.7657     5.5212    -0.3198     0.7493 
109 | // LBR                -4.5164    14.2000    -0.3181     0.7507 
110 | // Froude            121.6676     5.0658    24.0175     0.0000 ***
111 | // ---------------------------------------------------------------------
112 | // Significance codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
113 | // 
114 | // Residual standard error: 8.9596 on 301 degrees of freedom
115 | // Multiple R-squared: 0.6576,    Adjusted R-squared: 0.6507
116 | // F-statistic: 96.3327 on 6 and 301 DF,  p-value: 4.526e-67
117 | //
118 | ```
119 | 
120 | If we don't want to regress on everything, we can just choose what we'd like to regress on.
121 | 
122 | ```scala
123 | smile.regression.ols("Resist" ~ "Froude", df)
124 | // res3: smile.regression.LinearModel = Linear Model:
125 | // 
126 | // Residuals:
127 | // 	       Min	        1Q	    Median	        3Q	       Max
128 | // 	  -11.2396	   -7.6662	   -1.7111	    6.4039	   32.1537
129 | // 
130 | // Coefficients:
131 | //                   Estimate Std. Error    t value   Pr(>|t|)
132 | // Intercept         -24.4841     1.5336   -15.9654     0.0000 ***
133 | // Froude            121.6676     5.0339    24.1698     0.0000 ***
134 | // ---------------------------------------------------------------------
135 | // Significance codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
136 | // 
137 | // Residual standard error: 8.9031 on 306 degrees of freedom
138 | // Multiple R-squared: 0.6562,    Adjusted R-squared: 0.6551
139 | // F-statistic: 584.1803 on 1 and 306 DF,  p-value: 6.233e-73
140 | // 
141 | smile.regression.ols("Resist" ~ "Froude" + "LongPos", df)
142 | // res4: smile.regression.LinearModel = Linear Model:
143 | // 
144 | // Residuals:
145 | // 	       Min	        1Q	    Median	        3Q	       Max
146 | // 	  -11.2361	   -7.4169	   -1.7970	    6.3781	   32.1378
147 | // 
148 | // Coefficients:
149 | //                   Estimate Std. Error    t value   Pr(>|t|)
150 | // Intercept         -24.0234     1.7315   -13.8743     0.0000 ***
151 | // Froude            121.6676     5.0394    24.1434     0.0000 ***
152 | // LongPos             0.1934     0.3362     0.5754     0.5655 
153 | // ---------------------------------------------------------------------
154 | // Significance codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
155 | // 
156 | // Residual standard error: 8.9129 on 305 degrees of freedom
157 | // Multiple R-squared: 0.6566,    Adjusted R-squared: 0.6544
158 | // F-statistic: 291.6172 on 2 and 305 DF,  p-value: 1.604e-71
159 | //
160 | ```
161 | 
162 | ### Summary
163 | 
164 | This brief document has illustrated how easy and convenient it is to produce executable documentation and reports for Scala.
165 | 
166 | 


--------------------------------------------------------------------------------
/fragments/basics.scala:
--------------------------------------------------------------------------------
  1 | 
  2 | Welcome to Scala 2.12.10 (OpenJDK 64-Bit Server VM,
  3 |     Java 1.8.0_121).
  4 | Type in expressions for evaluation. Or try :help.
  5 | 
  6 | scala> val a = 5
  7 | a: Int = 5
  8 | 
  9 | scala> a
 10 | res0: Int = 5
 11 | 
 12 | 
 13 | scala> a = 6
 14 | :8: error: reassignment to val
 15 |        a = 6
 16 |          ^
 17 | scala> a
 18 | res1: Int = 5
 19 | 
 20 | 
 21 | scala> var b = 7
 22 | b: Int = 7
 23 | 
 24 | scala> b
 25 | res2: Int = 7
 26 | 
 27 | scala> b = 8
 28 | b: Int = 8
 29 | 
 30 | scala> b
 31 | res3: Int = 8
 32 | 
 33 | 
 34 | scala> val c = List(3,4,5,6)
 35 | c: List[Int] = List(3, 4, 5, 6)
 36 | 
 37 | scala> c(1)
 38 | res4: Int = 4
 39 | 
 40 | scala> c.sum
 41 | res5: Int = 18
 42 | 
 43 | scala> c.length
 44 | res6: Int = 4
 45 | 
 46 | scala> c.product
 47 | res7: Int = 360
 48 | 
 49 | 
 50 | scala> c.foldLeft(0)((x,y) => x+y)
 51 | res8: Int = 18
 52 | 
 53 | 
 54 | scala> c.foldLeft(0)(_+_)
 55 | res9: Int = 18
 56 | 
 57 | scala> c.foldLeft(1)(_*_)
 58 | res10: Int = 360
 59 | 
 60 | 
 61 | scala> c.reduce(_*_)
 62 | res11: Int = 360
 63 | 
 64 | 
 65 | scala> val d = Vector(2,3,4,5,6,7,8,9)
 66 | d: Vector[Int] = Vector(2, 3, 4, 5, 6, 7, 8, 9)
 67 | 
 68 | scala> d
 69 | res11: Vector[Int] = Vector(2, 3, 4, 5, 6, 7, 8, 9)
 70 | 
 71 | scala> d.slice(3,6)
 72 | res12: Vector[Int] = Vector(5, 6, 7)
 73 | 
 74 | scala> val e = d.updated(3,0)
 75 | e: Vector[Int] = Vector(2, 3, 4, 0, 6, 7, 8, 9)
 76 | 
 77 | scala> d
 78 | res13: Vector[Int] = Vector(2, 3, 4, 5, 6, 7, 8, 9)
 79 | 
 80 | scala> e
 81 | res14: Vector[Int] = Vector(2, 3, 4, 0, 6, 7, 8, 9)
 82 | 
 83 | 
 84 | scala> val f=(1 to 10).toList
 85 | f: List[Int] = List(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
 86 | 
 87 | scala> f
 88 | res15: List[Int] = List(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
 89 | 
 90 | scala> f.map(x => x*x)
 91 | res16: List[Int] = List(1, 4, 9, 16, 25, 36, 49, 64,
 92 |   81, 100)
 93 | 
 94 | scala> f map {x => x*x}
 95 | res17: List[Int] = List(1, 4, 9, 16, 25, 36, 49, 64,
 96 |   81, 100)
 97 | 
 98 | scala> f filter {_ > 4}
 99 | res18: List[Int] = List(5, 6, 7, 8, 9, 10)
100 | 
101 | 
102 | math.log(2.0)
103 | // res0: Double = 0.6931471805599453
104 | math.sin(1.0)
105 | // res1: Double = 0.8414709848078965
106 | log(2.0)
107 | // <console>:8: error: not found: value log
108 | //               log(2.0)
109 | //               ^
110 | import math.log
111 | // import math.log
112 | log(2.0)
113 | // res3: Double = 0.6931471805599453
114 | import math._
115 | // import math._
116 | sin(Pi/2)
117 | // res4: Double = 1.0
118 | exp(log(sin(Pi/2)))
119 | // res5: Double = 1.0
120 | sin(asin(0.1))
121 | // res6: Double = 0.1
122 | atan(1)*4
123 | // res7: Double = 3.141592653589793
124 | log(sqrt(exp(1)))
125 | // res8: Double = 0.5
126 | abs(min(-1,2))
127 | // res9: Int = 1
128 | pow(2,8)
129 | // res10: Double = 256.0
130 | random
131 | // res11: Double = 0.0954535018607291
132 | random
133 | // res12: Double = 0.5669552981874513
134 | random
135 | // res13: Double = 0.9598287994663521
136 | floor(random*3)
137 | // res14: Double = 2.0
138 | floor(random*3)
139 | // res15: Double = 1.0
140 | floor(random*3)
141 | // res16: Double = 1.0
142 | floor(random*3)
143 | // res17: Double = 1.0
144 | 
145 | 
146 | val a1 = 1
147 | // a1: Int = 1
148 | val a2: Int = 1
149 | // a2: Int = 1
150 | val l1 = List(1, 2, 3)
151 | // l1: List[Int] = List(1, 2, 3)
152 | val l2: List[Int] = List(2, 3, 4)
153 | // l2: List[Int] = List(2, 3, 4)
154 | 
155 | 
156 | val a3: Double = 1
157 | // a3: Double = 1.0
158 | val a4: Int = 1.0
159 | // <console>:7: error: type mismatch;
160 | //  found   : Double(1.0)
161 | //  required: Int
162 | //        val a4: Int = 1.0
163 | //                      ^
164 | 
165 | 
166 | def fact1(n: Int): Int = (1 to n).product
167 | // fact1: (n: Int)Int
168 | fact1(5)
169 | // res0: Int = 120
170 | 
171 | 
172 | def fact2(n: Int): Int = {
173 |   var acc = 1
174 |   var i = 2
175 |   while (i <= n) {
176 |     acc *= i
177 |     i += 1
178 |     }
179 |   acc
180 |   }
181 | // fact2: (n: Int)Int
182 | fact2(5)
183 | // res1: Int = 120
184 | 
185 | 
186 | def fact3(n: Int): Int = {
187 |   if (n == 1) 1 else
188 |     n * fact3(n-1)
189 |   }
190 | // fact3: (n: Int)Int
191 | fact3(5)
192 | // res2: Int = 120
193 | 
194 | 
195 | @annotation.tailrec
196 | def fact4(n: Int, acc: Int = 1): Int = {
197 |   if (n == 1) acc else
198 |     fact4(n-1, acc*n)
199 |   }
200 | // fact4: (n: Int, acc: Int)Int
201 | fact4(5)
202 | // res3: Int = 120
203 | 
204 | 
205 | math.log(fact4(5))
206 | // res4: Double = 4.787491742782046
207 | 
208 | def lfact(n: Int): Double = {
209 |   if (n == 1) 0.0 else
210 |     math.log(n) + lfact(n-1)
211 |   }
212 | // lfact: (n: Int)Double
213 | lfact(5)
214 | // res5: Double = 4.787491742782046
215 | // lfact(10000) // will cause a stack overflow
216 | 
217 | 
218 | @annotation.tailrec
219 | def lfacttr(n: Int, acc: Double = 0.0): Double = {
220 |   if (n == 1) acc else
221 |     lfacttr(n-1, acc + math.log(n))
222 |   }
223 | // lfacttr: (n: Int, acc: Double)Double
224 | lfacttr(5)
225 | // res6: Double = 4.787491742782046
226 | lfacttr(10000)
227 | // res7: Double = 82108.92783681446
228 | 
229 | 
230 | @annotation.tailrec
231 | def factbi(n: BigInt, acc: BigInt = 1): BigInt = {
232 |   if (n == 1) acc else
233 |     factbi(n-1, acc*n)
234 |   }
235 | // factbi: (n: BigInt, acc: BigInt)BigInt
236 | factbi(5)
237 | // res8: BigInt = 120
238 | factbi(10000)
239 | // res9: BigInt = 2846259680917054518906413212119...
240 | 
241 | 
242 | /*
243 | log-fact.scala
244 | Program to compute the log-factorial function
245 | */
246 | 
247 | object LogFact {
248 | 
249 |   import annotation.tailrec
250 |   import math.log
251 | 
252 |   @tailrec
253 |   def logfact(n: Int, acc: Double = 0.0): Double =
254 |     if (n == 1) acc else
255 |       logfact(n-1, acc + log(n))
256 | 
257 |   def main(args: Array[String]): Unit = {
258 |     val n = if (args.length == 1) args(0).toInt else 5
259 |     val lfn = logfact(n)
260 |     println(s"logfact($n) = $lfn")
261 |   }
262 | 
263 | }
264 | 
265 | // eof
266 | 
267 | 
268 | val l1 = List(1,2,3)
269 | 
270 | 
271 | val l2 = 4 :: l1
272 | // List(4, 1, 2, 3)
273 | 
274 | 
275 | val l3 = l2 map { x => x*x }
276 | // List(16, 1, 4, 9)
277 | 
278 | 
279 | val l4 = l2.map(x => x*x)
280 | 
281 | 
282 | import breeze.plot._
283 | def plotFun(fun: Double => Double, xmin: Double =
284 |                -3.0, xmax: Double = 3.0): Figure = {
285 |   val f = Figure()
286 |   val p = f.subplot(0)
287 |   import breeze.linalg._
288 |   val x = linspace(xmin, xmax)
289 |   p += plot(x, x map fun)
290 |   p.xlabel = "x"
291 |   p.ylabel = "f(x)"
292 |   f
293 | }
294 | 
295 | 
296 | plotFun(x => x*x)
297 | 
298 | 
299 | def myQuad1(x: Double): Double = x*x - 2*x + 1
300 | plotFun(myQuad1)
301 | def myQuad2(x: Double): Double = x*x - 3*x - 1
302 | plotFun(myQuad2)
303 | 
304 | 
305 | val myQuad3: (Double => Double) = x => -x*x + 2
306 | plotFun(myQuad3)
307 | 
308 | 
309 | def quadratic(a: Double, b: Double, c: Double,
310 |                              x: Double): Double = 
311 |   a*x*x + b*x + c
312 | 
313 | 
314 | plotFun(x => quadratic(3,2,1,x))
315 | 
316 | 
317 | def quadFun(a: Double, b: Double, c: Double):
318 |     Double => Double = x => quadratic(a,b,c,x)
319 | val myQuad4 = quadFun(2,1,3)
320 | plotFun(myQuad4)
321 | plotFun(quadFun(1,2,3))
322 | 
323 | 
324 | val quadFunF: (Double,Double,Double) => Double =>
325 |      Double = (a,b,c) => x => quadratic(a,b,c,x)
326 | val myQuad5 = quadFunF(-1,1,2)
327 | plotFun(myQuad5)
328 | plotFun(quadFunF(1,-2,3))
329 | 
330 | 
331 | val myQuad6 = quadratic(1,2,3,_: Double)
332 | plotFun(myQuad6)
333 | 
334 | 
335 | def quad(a: Double, b: Double, c: Double)(x: Double):
336 |       Double = a*x*x + b*x + c
337 | plotFun(quad(1,2,-3))
338 | val myQuad7 = quad(1,0,1) _
339 | plotFun(myQuad7)
340 | 
341 | 
342 | def quadCurried = (quadratic _).curried
343 | plotFun(quadCurried(1)(2)(3))
344 | 
345 | 
346 | val quadraticF: (Double,Double,Double,Double) => Double =
347 |                      (a,b,c,x) => a*x*x + b*x + c
348 | def quadCurried2 = quadraticF.curried
349 | plotFun(quadCurried2(-1)(2)(3))
350 | 
351 | 
352 | val aLongString = (1 to 10000).map(_.toString).
353 |                                reduce(_+_)
354 | // aLongString: String = 1234567891011121314151617...
355 | 
356 | val stringLength: String => Int = s => s.length
357 | // stringLength: String => Int = <function1>
358 | 
359 | stringLength(aLongString)
360 | // res0: Int = 38894
361 | 
362 | 
363 | def convertToK: Int => Double = i => i.toDouble/1024
364 | // convertToK: Int => Double
365 | 
366 | def stringLengthInK1(s: String): Double = {
367 |   val l = stringLength(s)
368 |   val lk = convertToK(l)
369 |   lk
370 | }
371 | // stringLengthInK1: (s: String)Double
372 | 
373 | stringLengthInK1(aLongString)
374 | // res1: Double = 37.982421875
375 | 
376 | 
377 | val stringLengthInK2: String => Double =
378 |                       s => convertToK(stringLength(s))
379 | // stringLengthInK2: String => Double = <function1>
380 | 
381 | stringLengthInK2(aLongString)
382 | // res2: Double = 37.982421875
383 | 
384 | 
385 | val stringLengthInK3: String => Double =
386 |              s => (convertToK compose stringLength)(s)
387 | // stringLengthInK3: String => Double = <function1>
388 | 
389 | stringLengthInK3(aLongString)
390 | // res3: Double = 37.982421875
391 | 
392 | 
393 | val stringLengthInK4: String => Double =
394 |                       convertToK compose stringLength
395 | // stringLengthInK4: String => Double = <function1>
396 | 
397 | stringLengthInK4(aLongString)
398 | // res4: Double = 37.982421875
399 | 
400 | 


--------------------------------------------------------------------------------
/fragments/monte.scala:
--------------------------------------------------------------------------------
  1 | 
  2 | import java.util.concurrent.ThreadLocalRandom
  3 | import scala.math.exp
  4 | import scala.annotation.tailrec
  5 | 
  6 | val N = 1000000L
  7 | def rng = ThreadLocalRandom.current()
  8 | 
  9 | def mc(its: Long): Double = {
 10 |   @tailrec def sum(its: Long, acc: Double): Double = {
 11 |     if (its == 0) acc else {
 12 |       val u = rng.nextDouble()
 13 |       sum(its-1, acc + exp(-u*u))
 14 |     }
 15 |   }
 16 |   sum(its,0.0)/its
 17 | }
 18 | 
 19 | mc(N)
 20 | // res0: Double = 0.7469182341226777
 21 | 
 22 | 
 23 | def mcp(its: Long,np: Int = 4): Double =
 24 |   (1 to np).par.map(i => mc(its/np)).sum/np
 25 | 
 26 | mcp(N)
 27 | // res1: Double = 0.7468289488326496
 28 | 
 29 | 
 30 | def time[A](f: => A) = {
 31 |     val s = System.nanoTime
 32 |     val ret = f
 33 |     println("time: "+(System.nanoTime-s)/1e6+"ms")
 34 |     ret 
 35 |   }
 36 | 
 37 | 
 38 | val bigN = 100000000L
 39 | // bigN: Long = 100000000
 40 | 
 41 | time(mc(bigN))
 42 | // time: 6225.859951ms
 43 | // res2: Double = 0.7468159872240743
 44 | time(mcp(bigN))
 45 | // time: 2197.872294ms
 46 | // res3: Double = 0.7468246533834739
 47 | 
 48 | 
 49 | (1 to 12).foreach{i =>
 50 |   println("np = "+i)
 51 |   (1 to 3).foreach(j => time(mcp(bigN,i)))
 52 | }
 53 | // np = 1
 54 | // time: 6201.480532ms
 55 | // time: 6186.176627ms
 56 | // time: 6198.14735ms
 57 | // np = 2
 58 | // time: 3127.512337ms
 59 | // time: 3122.648652ms
 60 | // time: 3148.509354ms
 61 | // np = 3
 62 | // time: 2488.273962ms
 63 | // time: 2402.957878ms
 64 | // time: 2555.286948ms
 65 | // np = 4
 66 | // time: 2133.996ms
 67 | // time: 2238.847511ms
 68 | // time: 2177.260599ms
 69 | // np = 5
 70 | // time: 2867.889727ms
 71 | // time: 2890.128312ms
 72 | // time: 2784.020295ms
 73 | // np = 6
 74 | // time: 3358.373499ms
 75 | // time: 2600.759805ms
 76 | // time: 2559.704485ms
 77 | // np = 7
 78 | // time: 3248.162029ms
 79 | // time: 3359.006061ms
 80 | // time: 2882.463352ms
 81 | // np = 8
 82 | // time: 1847.027762ms
 83 | // time: 2545.40533ms
 84 | // time: 2556.063328ms
 85 | // np = 9
 86 | // time: 2344.998373ms
 87 | // time: 2253.718886ms
 88 | // time: 2260.407902ms
 89 | // np = 10
 90 | // time: 2158.32923ms
 91 | // time: 2125.176623ms
 92 | // time: 2049.69822ms
 93 | // np = 11
 94 | // time: 1945.826366ms
 95 | // time: 1945.175903ms
 96 | // time: 1952.519595ms
 97 | // np = 12
 98 | // time: 1822.598809ms
 99 | // time: 1827.48165ms
100 | // time: 2722.349404ms
101 | 
102 | 
103 | def metrop1(n: Int = 1000, eps: Double = 0.5):
104 |   DenseVector[Double] = {
105 |     val vec = DenseVector.fill(n)(0.0)
106 |     var x = 0.0
107 |     var oldll = Gaussian(0.0, 1.0).logPdf(x)
108 |     vec(0) = x
109 |     (1 until n).foreach { i =>
110 |       val can = x + Uniform(-eps, eps).draw
111 |       val loglik = Gaussian(0.0, 1.0).logPdf(can)
112 |       val loga = loglik - oldll
113 |       if (math.log(Uniform(0.0, 1.0).draw) < loga) {
114 |         x = can
115 |         oldll = loglik
116 |       }
117 |       vec(i) = x
118 |     }
119 |     vec
120 | }
121 | 
122 | 
123 | def metrop2(n: Int = 1000, eps: Double = 0.5): Unit =
124 | {
125 |     var x = 0.0
126 |     var oldll = Gaussian(0.0, 1.0).logPdf(x)
127 |     (1 to n).foreach { i =>
128 |       val can = x + Uniform(-eps, eps).draw
129 |       val loglik = Gaussian(0.0, 1.0).logPdf(can)
130 |       val loga = loglik - oldll
131 |       if (math.log(Uniform(0.0, 1.0).draw) < loga) {
132 |         x = can
133 |         oldll = loglik
134 |       }
135 |       println(x)
136 |     }
137 | }
138 | 
139 | 
140 | @tailrec
141 | def metrop3(n: Int = 1000, eps: Double = 0.5,
142 |   x: Double = 0.0, oldll: Double = Double.MinValue):
143 |   Unit = {
144 |     if (n > 0) {
145 |       println(x)
146 |       val can = x + Uniform(-eps, eps).draw
147 |       val loglik = Gaussian(0.0, 1.0).logPdf(can)
148 |       val loga = loglik - oldll
149 |       if (math.log(Uniform(0.0, 1.0).draw) < loga)
150 |         metrop3(n - 1, eps, can, loglik)
151 |       else
152 |         metrop3(n - 1, eps, x, oldll)
153 |     }
154 |   }
155 | 
156 | 
157 | @tailrec
158 | def metrop4(n: Int = 1000, eps: Double = 0.5,
159 |   x: Double = 0.0, oldll: Double = Double.MinValue,
160 |   acc: List[Double] = Nil): DenseVector[Double] = {
161 |     if (n == 0)
162 |       DenseVector(acc.reverse.toArray)
163 |     else {
164 |       val can = x + Uniform(-eps, eps).draw
165 |       val loglik = Gaussian(0.0, 1.0).logPdf(can)
166 |       val loga = loglik - oldll
167 |       if (math.log(Uniform(0.0, 1.0).draw) < loga)
168 |         metrop4(n - 1, eps, can, loglik, can :: acc)
169 |       else
170 |         metrop4(n - 1, eps, x, oldll, x :: acc)
171 |     }
172 | }
173 | 
174 | 
175 | def newState(x: Double, oldll: Double, eps: Double):
176 |   (Double, Double) = {
177 |     val can = x + Uniform(-eps, eps).draw
178 |     val loglik = Gaussian(0.0, 1.0).logPdf(can)
179 |     val loga = loglik - oldll
180 |     if (math.log(Uniform(0.0, 1.0).draw) < loga)
181 |       (can, loglik) else (x, oldll)
182 | }
183 | 
184 | 
185 |   @tailrec
186 |   def metrop5(n: Int = 1000, eps: Double = 0.5,
187 |   x: Double = 0.0,
188 |   oldll: Double = Double.MinValue): Unit = {
189 |     if (n > 0) {
190 |       println(x)
191 |       val ns = newState(x, oldll, eps)
192 |       metrop5(n - 1, eps, ns._1, ns._2)
193 |     }
194 |   }
195 | 
196 | 
197 |   @tailrec
198 |   def metrop5b(n: Int = 1000, eps: Double = 0.5,
199 |   x: Double = 0.0,
200 |   oldll: Double = Double.MinValue): Unit = {
201 |     if (n > 0) {
202 |       println(x)
203 |       val (nx, ll) = newState(x, oldll, eps)
204 |       metrop5b(n - 1, eps, nx, ll)
205 |     }
206 |   }
207 | 
208 | 
209 |  @tailrec
210 |  def metrop6(n: Int = 1000, eps: Double = 0.5,
211 |   x: Double = 0.0, oldll: Double = Double.MinValue,
212 |   acc: List[Double] = Nil): DenseVector[Double] = {
213 |    if (n == 0) DenseVector(acc.reverse.toArray) else {
214 |      val (nx, ll) = newState(x, oldll, eps)
215 |      metrop6(n - 1, eps, nx, ll, nx :: acc)
216 |    }
217 |  }
218 | 
219 | 
220 | def nextState(eps: Double)(state: (Double, Double)):
221 |   (Double, Double) = {
222 |     val (x, oldll) = state
223 |     val can = x + Uniform(-eps, eps).draw
224 |     val loglik = Gaussian(0.0, 1.0).logPdf(can)
225 |     val loga = loglik - oldll
226 |     if (math.log(Uniform(0.0, 1.0).draw) < loga)
227 |       (can, loglik) else (x, oldll)
228 |   }
229 | 
230 | 
231 | def metrop7(eps: Double = 0.5, x: Double = 0.0,
232 |  oldll: Double = Double.MinValue): Stream[Double] =
233 |   Stream.iterate((x,oldll))(nextState(eps)) map (_._1)
234 | 
235 | 
236 | def kernel(x: Double): Rand[Double] = for {
237 |     innov <- Uniform(-0.5, 0.5)
238 |     can = x + innov
239 |     oldll = Gaussian(0.0, 1.0).logPdf(x)
240 |     loglik = Gaussian(0.0, 1.0).logPdf(can)
241 |     loga = loglik - oldll
242 |     u <- Uniform(0.0, 1.0)
243 | } yield if (math.log(u) < loga) can else x
244 | 
245 | 
246 | val ms = Stream.iterate(0.0)(kernel(_).draw)
247 | ms.
248 | drop(1000).
249 | take(10000).
250 | foreach(println)
251 | 
252 | 
253 | case class State(x: Double, y: Double)
254 | // defined class State
255 | 
256 | 
257 | val s = State(1.0,2.0)
258 | // s: State = State(1.0,2.0)
259 | s.x
260 | // res0: Double = 1.0
261 | s.y
262 | // res1: Double = 2.0
263 | s.copy()
264 | // res2: State = State(1.0,2.0)
265 | s.copy(y=3)
266 | // res3: State = State(1.0,3.0)
267 | 
268 | 
269 | import breeze.stats.distributions._
270 | // import breeze.stats.distributions._
271 | 
272 | def nextState(state: State): State = {
273 |  val sy = state.y
274 |  val x = Gamma(3.0,1.0/(sy*sy+4)).draw
275 |  val y = Gaussian(1.0/(x+1),1.0/math.sqrt(2*x+2)).draw
276 |  State(x,y)
277 | }
278 | 
279 | 
280 | val gs = Stream.iterate(State(1.0,1.0))(nextState)
281 | // gs: scala.collection.immutable.Stream[State] =
282 | //  Stream(State(1.0,1.0), ?)
283 | val output = gs.drop(1000).take(100000).toArray
284 | // output: Array[State] = Array(
285 | //  State(0.20703194113971382,0.874650780098001),
286 | //  State(0.5813103371812548,0.4780234809903935), ...
287 | 
288 | 
289 | import breeze.linalg._
290 | val xv = DenseVector(output map (_.x))
291 | val yv = DenseVector(output map (_.y))
292 | 
293 | import breeze.plot._
294 | val fig = Figure("Bivariate Gibbs sampler")
295 | fig.subplot(2,2,0)+=hist(xv,50)
296 | fig.subplot(2,2,1)+=hist(yv,50)
297 | fig.subplot(2,2,2)+=plot(xv,yv,'.')
298 | 
299 | 
300 | def thin[T](s: Stream[T], th: Int): Stream[T] = {
301 |     val ss = s.drop(th - 1)
302 |     if (ss.isEmpty) Stream.empty else
303 |       ss.head #:: thin(ss.tail, th)
304 |   }
305 | 
306 | 
307 | thin(gs.drop(1000),10).take(10000).toArray
308 | 
309 | 
310 | // gs.drop(1000).thin(10).take(10000)
311 | 
312 | 
313 | def kernel(state: State): Rand[State] = for {
314 |   x <- Gamma(3.0,1.0/(state.y*state.y+4))
315 |   y <- Gaussian(1.0/(x+1),1.0/math.sqrt(2*x+2))
316 |   ns = State(x,y)
317 | } yield ns
318 | 
319 | val out3 = Stream.iterate(State(1.0,1.0))(kernel(_).draw).
320 |   drop(1000).
321 |   take(10000).
322 |   toArray
323 | 
324 | 
325 | (th: P) => {
326 |   val x0 = simx0(n, t0, th).par
327 |   @tailrec def pf(ll: LogLik, x: ParVector[S], t: Time, 
328 |              deltas: List[Time], obs: List[O]): LogLik =
329 |     obs match {
330 |       case Nil => ll
331 |       case head :: tail => {
332 |         val xp = if (deltas.head == 0) x else 
333 |                (x map { stepFun(_, t, deltas.head, th) })
334 |         val w = xp map { dataLik(_, head, th) }
335 |         val rows = sample(n, DenseVector(w.toArray)).par
336 |         val xpp = rows map { xp(_) }
337 |         pf(ll + math.log(mean(w)), xpp, t + deltas.head, 
338 |                                         deltas.tail, tail)
339 |       }
340 |     }
341 |   pf(0, x0, t0, deltas, obs)
342 | }
343 | 
344 | 


--------------------------------------------------------------------------------