├── .gitignore ├── LICENSE ├── README.md ├── comonads ├── .gitignore ├── DraftPost.md ├── README.md ├── build.sbt ├── heat.png ├── logmap.png ├── make-movie.sh ├── project │ └── build.properties └── src │ ├── main │ └── scala │ │ └── comonads.scala │ └── test │ └── scala │ └── comonads-test.scala ├── curry ├── README.md ├── build.sbt ├── currying.R └── currying.scala ├── docs ├── about.html ├── index.html ├── index.xml ├── listings.json ├── posts │ ├── draft │ │ └── index.html │ ├── post-with-code │ │ ├── image.jpg │ │ └── index.html │ ├── py-test │ │ ├── index.html │ │ └── index_files │ │ │ └── figure-html │ │ │ └── cell-2-output-2.png │ ├── r-test │ │ ├── index.html │ │ └── index_files │ │ │ └── figure-html │ │ │ └── unnamed-chunk-1-1.png │ └── welcome │ │ ├── index.html │ │ └── thumbnail.jpg ├── profile.jpg ├── robots.txt ├── search.json ├── site_libs │ ├── bootstrap │ │ ├── bootstrap-icons.css │ │ ├── bootstrap-icons.woff │ │ ├── bootstrap.min.css │ │ └── bootstrap.min.js │ ├── clipboard │ │ └── clipboard.min.js │ ├── quarto-html │ │ ├── anchor.min.js │ │ ├── popper.min.js │ │ ├── quarto-syntax-highlighting.css │ │ ├── quarto.js │ │ ├── tippy.css │ │ └── tippy.umd.min.js │ ├── quarto-listing │ │ ├── list.min.js │ │ └── quarto-listing.js │ ├── quarto-nav │ │ ├── headroom.min.js │ │ └── quarto-nav.js │ └── quarto-search │ │ ├── autocomplete.umd.js │ │ ├── fuse.min.js │ │ └── quarto-search.js ├── sitemap.xml └── styles.css ├── drafts ├── LmDiag.png ├── LmPairs.png ├── SfDSBook.md ├── SfDSBooks.md ├── evil.md ├── fp-ssc.md ├── index75.md ├── md2wp ├── pairs.png ├── r2pp.png ├── r2tp.png ├── rainier-intro.md ├── rainier2.md ├── rjb.md ├── scala-glm.md ├── scala-view.md ├── smfsb3e.md └── traceplots.png ├── first-monads ├── README.md ├── build.sbt ├── monads.scala └── project │ └── build.properties ├── gibbs-java-r-mvn ├── README.md ├── compile-and-run.sh ├── pom.xml ├── run-gibbs.R └── src │ └── main │ └── java │ └── darrenjw │ └── GibbsR.java ├── mcmc-stream ├── .gitignore ├── Draft.md ├── README.md ├── build.sbt ├── project │ └── build.properties └── src │ ├── main │ └── scala │ │ └── mcmc-stream │ │ └── mcmc-stream.scala │ └── test │ └── scala │ └── mcmc-stream-test.scala ├── min-ppl ├── .gitignore ├── Makefile ├── Readme.md ├── build.sbt ├── md2wp ├── project │ ├── build.properties │ └── plugins.sbt └── src │ ├── main │ ├── scala │ │ ├── min-ppl-examples.scala │ │ └── min-ppl.scala │ └── tut │ │ └── DraftPost.md │ └── test │ └── scala │ └── min-ppl-test.scala ├── min-ppl2 ├── .gitignore ├── Makefile ├── Readme.md ├── build.sbt ├── md2wp ├── project │ ├── build.properties │ └── plugins.sbt └── src │ ├── main │ ├── scala │ │ ├── min-ppl-examples.scala │ │ └── min-ppl.scala │ └── tut │ │ └── DraftPost.md │ └── test │ └── scala │ └── min-ppl-test.scala ├── pfilter ├── .gitignore ├── README.md ├── build.sbt ├── project │ └── build.properties └── src │ ├── main │ └── scala │ │ └── pfilter │ │ └── pfilter.scala │ └── test │ └── scala │ └── pfilter-test.scala ├── pi-cam ├── DraftPost.md ├── README.md ├── camera-script.sh ├── camera-setup.sh ├── hourly.sh ├── index.html └── web-setup.sh ├── pi-cluster ├── README.md ├── copy-keys ├── dhcpd.conf ├── install-packages ├── interfaces ├── iptables ├── map-network ├── setup-cluster ├── setup-network ├── shutdown-workers └── upgrade-workers ├── qblog ├── .gitignore ├── Makefile ├── _quarto.yml ├── about.qmd ├── index.qmd ├── posts │ ├── _metadata.yml │ ├── draft │ │ └── index.qmd │ ├── py-test │ │ └── index.qmd │ └── r-test │ │ └── index.qmd ├── profile.jpg └── styles.css ├── rainier ├── .gitignore ├── Makefile ├── Readme.md ├── build.sbt ├── docs │ ├── DraftPost.md │ └── Tutorial.md ├── project │ ├── build.properties │ └── plugins.sbt └── src │ ├── main │ └── scala │ │ └── rainier.scala │ └── test │ └── scala │ └── rainier-test.scala ├── reaction-diffusion ├── .gitignore ├── DraftPost.md ├── Makefile ├── Readme.md ├── build.sbt ├── lv-cle.mp4 ├── lv-cle.png ├── lv-cle2.mp4 ├── lv-cle2.png ├── lv-cle3.mp4 ├── lv-cle3.png ├── lv-exact.mp4 ├── lv-exact.png ├── lv-rre.mp4 ├── lv-rre.png ├── lv-rre2.mp4 ├── lv-rre2.png ├── lv-rre3.mp4 ├── lv-rre3.png ├── make-movie.sh ├── project │ └── build.properties ├── sir-cle.mp4 ├── sir-cle.png ├── sir-rre.mp4 ├── sir-rre.png └── src │ ├── main │ └── scala │ │ └── rd │ │ ├── LvCle.scala │ │ ├── LvCle2.scala │ │ ├── LvCle3.scala │ │ ├── LvExact.scala │ │ ├── LvRre.scala │ │ ├── LvRre2.scala │ │ ├── LvRre3.scala │ │ ├── SirCle.scala │ │ ├── SirRre.scala │ │ └── package.scala │ └── test │ └── scala │ └── reaction-diffusion-test.scala ├── sbml-scala ├── DraftPost.md ├── README.md ├── build.sbt ├── ch07-mm-stoch.xml ├── project │ └── build.properties └── src │ └── main │ └── scala │ └── jsbml.scala ├── sbt ├── sbt-launch.jar ├── sbt.bat ├── scala-dataframes ├── README.md ├── datatable │ ├── build.sbt │ └── datatable.scala ├── framian │ ├── build.sbt │ └── framian.scala ├── r │ ├── df.R │ └── gen-csv.R ├── saddle │ ├── CsvDf.scala │ └── build.sbt └── sparkdf │ └── spark.scala ├── scala-smfsb ├── .gitignore ├── build.sbt ├── project │ ├── build.properties │ └── plugins.sbt └── src │ ├── main │ ├── scala │ │ └── scala-smfsb.scala │ └── tut │ │ └── DraftPost.md │ └── test │ └── scala │ └── scala-smfsb-test.scala ├── smfsb ├── Makefile ├── md2wp └── smfsb.Rmd └── spark-intro ├── DraftPost.md ├── Makefile └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | *.class 2 | *.log 3 | 4 | # sbt specific 5 | .cache 6 | .history 7 | .lib/ 8 | dist/* 9 | target/ 10 | lib_managed/ 11 | src_managed/ 12 | project/boot/ 13 | project/plugins/project/ 14 | 15 | # Scala-IDE specific 16 | .scala_dependencies 17 | .worksheet 18 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # blog 2 | 3 | Code samples associated with my blog posts 4 | 5 | My blogs can be found at: 6 | 7 | https://darrenjw.wordpress.com/ 8 | 9 | and 10 | 11 | http://darrenjw2.wordpress.com/ 12 | 13 | 14 | -------------------------------------------------------------------------------- /comonads/.gitignore: -------------------------------------------------------------------------------- 1 | # .gitignore for scala projects 2 | 3 | # Classes and logs 4 | *.class 5 | *.log 6 | *~ 7 | 8 | # SBT-specific 9 | .cache 10 | .history 11 | .classpath 12 | .project 13 | .settings 14 | 15 | .lib/ 16 | dist/* 17 | target/ 18 | lib_managed/ 19 | src_managed/ 20 | project/boot/ 21 | project/plugins/project/ 22 | 23 | # Ensime specific 24 | .ensime 25 | 26 | # Scala-IDE specific 27 | .scala_dependencies 28 | .worksheet 29 | 30 | 31 | -------------------------------------------------------------------------------- /comonads/README.md: -------------------------------------------------------------------------------- 1 | # Comonads for scientific and statistical computing in Scala 2 | 3 | Code examples for the blog post: 4 | 5 | [Comonads for scientific and statistical computing in Scala](https://darrenjw.wordpress.com/2018/01/22/comonads-for-scientific-and-statistical-computing-in-scala/) 6 | 7 | 8 | Note that this repo contains everything that is needed to build and run the Scala code examples on any system that has Java installed. Any recent version of Java is fine. You do not need to "install" Scala or any Scala "packages" in order to run the code. If you have Java and a decent internet connection, you are good to go. This is one of the benefits of Scala - you can run it anywhere, on any system with a Java installation. 9 | 10 | To check if you have Java installed, just run: 11 | 12 | ```bash 13 | java -version 14 | ``` 15 | 16 | at your system command prompt. If you get an error, Java is absent or incorrectly installed. Installing Java is very easy on any platform, but the best way to install it depends on exactly what OS you are running, so search the internet for advice on the best way to install Java on your OS. 17 | 18 | The code uses `sbt` (the simple build tool) as the build tool. The sbt launcher has been included in the repo for the benefit of those new to Scala. It should be possible to run sbt from this directory by typing: 19 | 20 | ```bash 21 | ..\sbt 22 | ``` 23 | 24 | on Windows (which should run `..\sbt.bat`), or 25 | 26 | ```bash 27 | ../sbt 28 | ``` 29 | 30 | on Linux and similar systems (including Macs). If you want to be able to experiment with Scala yourself, you should copy the script and the file `sbt-launch.jar` to the same directory somewhere in your path, but this isn't necessary to run these examples. 31 | 32 | The sbt launcher script will download and run sbt, which will then download scala, the scala compiler, scala standard libraries and all dependencies needed to compile and run the code. All the downloaded files will be cached on your system for future use. Therefore, make sure you have a good internet connection and a bit of free disk space before running sbt for the first time. 33 | 34 | Assuming you can run sbt, just typing `run` at the sbt prompt will compile and run the example code. Typing `console` will give a Scala REPL with a properly configured classpath including all dependencies. You can type scala expressions directly into the REPL just as you would in your favourite dynamic math/stat language. Type `help` at the sbt prompt for help on sbt. Type `:help` at the Scala REPL for help on the REPL. 35 | 36 | The best way to follow the blog post is to copy-and-paste lines of code directly from the blog post to the sbt `console`. 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | -------------------------------------------------------------------------------- /comonads/build.sbt: -------------------------------------------------------------------------------- 1 | name := "comonads" 2 | 3 | version := "0.1-SNAPSHOT" 4 | 5 | scalacOptions ++= Seq( 6 | "-unchecked", "-deprecation", "-feature", 7 | "-Ypartial-unification", "-language:higherKinds" 8 | ) 9 | 10 | libraryDependencies ++= Seq( 11 | "org.scalatest" %% "scalatest" % "3.0.1" % "test", 12 | "org.scalanlp" %% "breeze" % "0.13", 13 | "org.scalanlp" %% "breeze-viz" % "0.13", 14 | "org.scalanlp" %% "breeze-natives" % "0.13", 15 | "org.typelevel" %% "cats-jvm" % "1.0.1" 16 | ) 17 | 18 | addCompilerPlugin("org.scalamacros" % "paradise" % "2.1.0" cross CrossVersion.full) 19 | 20 | scalaVersion := "2.12.1" 21 | 22 | -------------------------------------------------------------------------------- /comonads/heat.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darrenjw/blog/be123278cc20048ab8458b895ccb5ccca798ea59/comonads/heat.png -------------------------------------------------------------------------------- /comonads/logmap.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darrenjw/blog/be123278cc20048ab8458b895ccb5ccca798ea59/comonads/logmap.png -------------------------------------------------------------------------------- /comonads/make-movie.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # make-movie.sh 3 | 4 | rm -f ising????-s.jpg 5 | 6 | for name in ising????.png 7 | do 8 | short="${name%.*}" 9 | echo $short 10 | #pngtopnm "$name" | pnmscale 20 | pnmtopng > "${short}-s.png" 11 | convert -flatten "$name" "${short}-s.jpg" 12 | done 13 | 14 | rm -f movie.mp4 15 | 16 | avconv -r 20 -i ising%04d-s.jpg movie.mp4 17 | 18 | # eof 19 | -------------------------------------------------------------------------------- /comonads/project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version=0.13.13 2 | -------------------------------------------------------------------------------- /comonads/src/test/scala/comonads-test.scala: -------------------------------------------------------------------------------- 1 | import org.scalatest.FlatSpec 2 | 3 | class SetSpec extends FlatSpec { 4 | 5 | "A Poisson(10.0)" should "have mean 10.0" in { 6 | import breeze.stats.distributions.Poisson 7 | val p = Poisson(10.0) 8 | val m = p.mean 9 | assert(math.abs(m - 10.0) < 0.000001) 10 | } 11 | 12 | } 13 | 14 | -------------------------------------------------------------------------------- /curry/README.md: -------------------------------------------------------------------------------- 1 | # HOFs, closures, partial application and Currying to solve the function environment problem in Scala 2 | 3 | 4 | Code examples for the blog post: 5 | 6 | https://darrenjw.wordpress.com/2015/11/16/hofs-closures-partial-application-and-currying-to-solve-the-function-environment-problem-in-scala/ 7 | 8 | The main example is in Scala, though there is also an R script which illustrates how similar concepts apply to the R language. 9 | 10 | Note that this repo contains everything that is needed to build and run the Scala code examples on any system that has Java installed. Any recent version of Java is fine. You do not need to "install" Scala or any Scala "packages" in order to run the code. If you have Java and a decent internet connection, you are good to go. This is one of the benefits of Scala - you can run it anywhere, on any system with a Java installation. 11 | 12 | To check if you have Java installed, just run: 13 | 14 | ```bash 15 | java -version 16 | ``` 17 | 18 | at your system command prompt. If you get an error, Java is absent or incorrectly installed. Installing Java is very easy on any platform, but the best way to install it depends on exactly what OS you are running, so search the internet for advice on the best way to install Java on your OS. 19 | 20 | The code uses `sbt` (the simple build tool) as the build tool. The sbt launcher has been included in the repo for the benefit of those new to Scala. It should be possible to run sbt from this directory by typing: 21 | 22 | ```bash 23 | ..\sbt 24 | ``` 25 | 26 | on Windows (which should run `..\sbt.bat`), or 27 | 28 | ```bash 29 | ../sbt 30 | ``` 31 | 32 | on Linux and similar systems (including Macs). If you want to be able to experiment with Scala yourself, you should copy the script and the file `sbt-launch.jar` to the same directory somewhere in your path, but this isn't necessary to run these examples. 33 | 34 | The sbt launcher script will download and run sbt, which will then download scala, the scala compiler, scala standard libraries and all dependencies needed to compile and run the code. All the downloaded files will be cached on your system for future use. Therefore, make sure you have a good internet connection and a bit of free disk space before running sbt for the first time. 35 | 36 | Assuming you can run sbt, just typing `run` at the sbt prompt will compile and run the example code. Typing `console` will give a Scala REPL with a properly configured classpath including all dependencies. You can type scala expressions directly into the REPL just as you would in your favourite dynamic math/stat language. Type `help` at the sbt prompt for help on sbt. Type `:help` at the Scala REPL for help on the REPL. 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | -------------------------------------------------------------------------------- /curry/build.sbt: -------------------------------------------------------------------------------- 1 | name := "currying" 2 | 3 | version := "0.1" 4 | 5 | scalacOptions ++= Seq("-unchecked", "-deprecation", "-feature") 6 | 7 | libraryDependencies ++= Seq( 8 | "org.scalacheck" %% "scalacheck" % "1.11.4" % "test", 9 | "org.scalatest" %% "scalatest" % "2.1.7" % "test", 10 | "org.scalanlp" %% "breeze" % "0.11.2", 11 | "org.scalanlp" %% "breeze-natives" % "0.11.2", 12 | "org.scalanlp" %% "breeze-viz" % "0.11.2" 13 | ) 14 | 15 | resolvers ++= Seq( 16 | "Sonatype Snapshots" at "https://oss.sonatype.org/content/repositories/snapshots/", 17 | "Sonatype Releases" at "https://oss.sonatype.org/content/repositories/releases/" 18 | ) 19 | 20 | scalaVersion := "2.11.6" 21 | 22 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /curry/currying.R: -------------------------------------------------------------------------------- 1 | # R language script for illustrating HOF concepts in R 2 | 3 | curve(x*x,-3,3) 4 | 5 | myQuad1 = function(x) x*x + 2*x + 3 6 | curve(myQuad1,-3,3) 7 | 8 | quadratic = function(a,b,c,x) a*x*x + b*x + c 9 | myQuad2 = function(x) quadratic(-1,2,3,x) 10 | curve(myQuad2,-3,3) 11 | 12 | quadFun = function(a,b,c) function(x) quadratic(a,b,c,x) 13 | myQuad3 = quadFun(1,2,3) 14 | curve(myQuad3,-3,3) 15 | 16 | # eof 17 | 18 | 19 | -------------------------------------------------------------------------------- /curry/currying.scala: -------------------------------------------------------------------------------- 1 | /* 2 | currying.scala 3 | */ 4 | 5 | import breeze.plot._ 6 | 7 | object Currying 8 | { 9 | 10 | def plotFun(fun: Double => Double, xmin: Double = -3.0, xmax: Double = 3.0): Figure = { 11 | val f = Figure() 12 | val p = f.subplot(0) 13 | import breeze.linalg._ 14 | val x = linspace(xmin,xmax) 15 | p += plot(x, x map fun) 16 | p.xlabel = "x" 17 | p.ylabel = "f(x)" 18 | f 19 | } 20 | 21 | def main(args: Array[String]): Unit = { 22 | println("Hello") 23 | 24 | 25 | val l1 = List(1,2,3) 26 | val l2 = 4 :: l1 27 | println(l2) 28 | val l3 = l2 map { x => x*x } 29 | println(l3) 30 | val l4 = l2.map(x => x*x) 31 | println(l4) 32 | 33 | plotFun(x => x*x) 34 | 35 | def myQuad1(x: Double): Double = x*x - 2*x + 1 36 | plotFun(myQuad1) 37 | def myQuad2(x: Double): Double = x*x - 3*x - 1 38 | plotFun(myQuad2) 39 | 40 | val myQuad3: (Double => Double) = x => -x*x + 2 41 | plotFun(myQuad3) 42 | 43 | def quadratic(a: Double, b: Double, c: Double, x: Double): Double = a*x*x + b*x + c 44 | plotFun(x => quadratic(3,2,1,x)) 45 | 46 | def quadFun(a: Double, b: Double, c: Double): Double => Double = 47 | x => quadratic(a,b,c,x) 48 | val myQuad4 = quadFun(2,1,3) 49 | plotFun(myQuad4) 50 | plotFun(quadFun(1,2,3)) 51 | 52 | val quadFunF: (Double,Double,Double) => Double => Double = 53 | (a,b,c) => x => quadratic(a,b,c,x) 54 | val myQuad5 = quadFunF(-1,1,2) 55 | plotFun(myQuad5) 56 | plotFun(quadFunF(1,-2,3)) 57 | 58 | val myQuad6 = quadratic(1,2,3,_: Double) 59 | plotFun(myQuad6) 60 | 61 | def quad(a: Double, b: Double, c: Double)(x: Double): Double = a*x*x + b*x + c 62 | plotFun(quad(1,2,-3)) 63 | val myQuad7 = quad(1,0,1) _ 64 | plotFun(myQuad7) 65 | 66 | def quadCurried = (quadratic _).curried 67 | plotFun(quadCurried(1)(2)(3)) 68 | 69 | val quadraticF: (Double,Double,Double,Double) => Double = (a,b,c,x) => a*x*x + b*x + c 70 | def quadCurried2 = quadraticF.curried 71 | plotFun(quadCurried2(-1)(2)(3)) 72 | 73 | println("Goodbye") 74 | } 75 | 76 | } 77 | 78 | // eof 79 | 80 | -------------------------------------------------------------------------------- /docs/index.xml: -------------------------------------------------------------------------------- 1 | 2 | 7 | 8 | DJW's blog 9 | https://darrenjw.github.io/blog/index.html 10 | 11 | Darren Wilkinson's blog 12 | quarto-1.2.335 13 | Mon, 04 Mar 2024 00:00:00 GMT 14 | 15 | R test 16 | Darren Wilkinson 17 | https://darrenjw.github.io/blog/posts/r-test/index.html 18 | 24 |

R test

25 |

A test post including some R code.

26 |
27 |
hist(rnorm(1000))
28 |
29 |

30 |
31 |
32 |

Blah.

33 | 34 | 35 | 36 | 37 | ]]>
38 | R 39 | code 40 | https://darrenjw.github.io/blog/posts/r-test/index.html 41 | Mon, 04 Mar 2024 00:00:00 GMT 42 |
43 | 44 | Python test 45 | Darren Wilkinson 46 | https://darrenjw.github.io/blog/posts/py-test/index.html 47 | 53 |

Python test

54 |

A test post including some python code

55 |
56 |
x = [1,2,3]
57 | print(x[1])
58 | 
59 | import matplotlib.pyplot as plt
60 | fig, axis = plt.subplots()
61 | axis.plot([0,1,2,3,4,5], [3,4,6,5,2,4])
62 |
63 |
2
64 |
65 |
66 |

67 |
68 |
69 |

Blah.

70 | 71 | 72 | 73 | 74 | ]]>
75 | python 76 | code 77 | https://darrenjw.github.io/blog/posts/py-test/index.html 78 | Mon, 04 Mar 2024 00:00:00 GMT 79 |
80 |
81 |
82 | -------------------------------------------------------------------------------- /docs/listings.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "listing": "/index.html", 4 | "items": [ 5 | "/posts/r-test/index.html", 6 | "/posts/py-test/index.html" 7 | ] 8 | } 9 | ] -------------------------------------------------------------------------------- /docs/posts/post-with-code/image.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darrenjw/blog/be123278cc20048ab8458b895ccb5ccca798ea59/docs/posts/post-with-code/image.jpg -------------------------------------------------------------------------------- /docs/posts/py-test/index_files/figure-html/cell-2-output-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darrenjw/blog/be123278cc20048ab8458b895ccb5ccca798ea59/docs/posts/py-test/index_files/figure-html/cell-2-output-2.png -------------------------------------------------------------------------------- /docs/posts/r-test/index_files/figure-html/unnamed-chunk-1-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darrenjw/blog/be123278cc20048ab8458b895ccb5ccca798ea59/docs/posts/r-test/index_files/figure-html/unnamed-chunk-1-1.png -------------------------------------------------------------------------------- /docs/posts/welcome/thumbnail.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darrenjw/blog/be123278cc20048ab8458b895ccb5ccca798ea59/docs/posts/welcome/thumbnail.jpg -------------------------------------------------------------------------------- /docs/profile.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darrenjw/blog/be123278cc20048ab8458b895ccb5ccca798ea59/docs/profile.jpg -------------------------------------------------------------------------------- /docs/robots.txt: -------------------------------------------------------------------------------- 1 | Sitemap: https://darrenjw.github.io/blog/sitemap.xml 2 | -------------------------------------------------------------------------------- /docs/search.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "objectID": "about.html", 4 | "href": "about.html", 5 | "title": "About", 6 | "section": "", 7 | "text": "This is the blog of Darren Wilkinson.\nThis blog is intended to cover my reflections on mathematics, statistics, machine learning, AI, computing and biology, and especially their interactions, and relationship with “big data” and data science. This blog replaces my two wordpress blogs: my main blog and my personal blog. Wordpress served me well for more than a decade, but these days I dislike having to write anything other than Quarto, so I’ve taken the plunge and moved to a Quarto based blog." 8 | }, 9 | { 10 | "objectID": "posts/r-test/index.html", 11 | "href": "posts/r-test/index.html", 12 | "title": "R test", 13 | "section": "", 14 | "text": "R test\nA test post including some R code.\n\nhist(rnorm(1000))\n\n\n\n\nBlah." 15 | }, 16 | { 17 | "objectID": "posts/py-test/index.html", 18 | "href": "posts/py-test/index.html", 19 | "title": "Python test", 20 | "section": "", 21 | "text": "Python test\nA test post including some python code\n\nx = [1,2,3]\nprint(x[1])\n\nimport matplotlib.pyplot as plt\nfig, axis = plt.subplots()\naxis.plot([0,1,2,3,4,5], [3,4,6,5,2,4])\n\n2\n\n\n\n\n\nBlah." 22 | }, 23 | { 24 | "objectID": "index.html", 25 | "href": "index.html", 26 | "title": "Darren Wilkinson’s blog", 27 | "section": "", 28 | "text": "R test\n\n\n\n\n\n\n\nR\n\n\ncode\n\n\n\n\n\n\n\n\n\n\n\nMar 4, 2024\n\n\nDarren Wilkinson\n\n\n\n\n\n\n\n\nPython test\n\n\n\n\n\n\n\npython\n\n\ncode\n\n\n\n\n\n\n\n\n\n\n\nMar 4, 2024\n\n\nDarren Wilkinson\n\n\n\n\n\n\nNo matching items" 29 | } 30 | ] -------------------------------------------------------------------------------- /docs/site_libs/bootstrap/bootstrap-icons.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darrenjw/blog/be123278cc20048ab8458b895ccb5ccca798ea59/docs/site_libs/bootstrap/bootstrap-icons.woff -------------------------------------------------------------------------------- /docs/site_libs/quarto-html/anchor.min.js: -------------------------------------------------------------------------------- 1 | // @license magnet:?xt=urn:btih:d3d9a9a6595521f9666a5e94cc830dab83b65699&dn=expat.txt Expat 2 | // 3 | // AnchorJS - v4.3.1 - 2021-04-17 4 | // https://www.bryanbraun.com/anchorjs/ 5 | // Copyright (c) 2021 Bryan Braun; Licensed MIT 6 | // 7 | // @license magnet:?xt=urn:btih:d3d9a9a6595521f9666a5e94cc830dab83b65699&dn=expat.txt Expat 8 | !function(A,e){"use strict";"function"==typeof define&&define.amd?define([],e):"object"==typeof module&&module.exports?module.exports=e():(A.AnchorJS=e(),A.anchors=new A.AnchorJS)}(this,function(){"use strict";return function(A){function d(A){A.icon=Object.prototype.hasOwnProperty.call(A,"icon")?A.icon:"",A.visible=Object.prototype.hasOwnProperty.call(A,"visible")?A.visible:"hover",A.placement=Object.prototype.hasOwnProperty.call(A,"placement")?A.placement:"right",A.ariaLabel=Object.prototype.hasOwnProperty.call(A,"ariaLabel")?A.ariaLabel:"Anchor",A.class=Object.prototype.hasOwnProperty.call(A,"class")?A.class:"",A.base=Object.prototype.hasOwnProperty.call(A,"base")?A.base:"",A.truncate=Object.prototype.hasOwnProperty.call(A,"truncate")?Math.floor(A.truncate):64,A.titleText=Object.prototype.hasOwnProperty.call(A,"titleText")?A.titleText:""}function w(A){var e;if("string"==typeof A||A instanceof String)e=[].slice.call(document.querySelectorAll(A));else{if(!(Array.isArray(A)||A instanceof NodeList))throw new TypeError("The selector provided to AnchorJS was invalid.");e=[].slice.call(A)}return e}this.options=A||{},this.elements=[],d(this.options),this.isTouchDevice=function(){return Boolean("ontouchstart"in window||window.TouchEvent||window.DocumentTouch&&document instanceof DocumentTouch)},this.add=function(A){var e,t,o,i,n,s,a,c,r,l,h,u,p=[];if(d(this.options),"touch"===(l=this.options.visible)&&(l=this.isTouchDevice()?"always":"hover"),0===(e=w(A=A||"h2, h3, h4, h5, h6")).length)return this;for(null===document.head.querySelector("style.anchorjs")&&((u=document.createElement("style")).className="anchorjs",u.appendChild(document.createTextNode("")),void 0===(A=document.head.querySelector('[rel="stylesheet"],style'))?document.head.appendChild(u):document.head.insertBefore(u,A),u.sheet.insertRule(".anchorjs-link{opacity:0;text-decoration:none;-webkit-font-smoothing:antialiased;-moz-osx-font-smoothing:grayscale}",u.sheet.cssRules.length),u.sheet.insertRule(":hover>.anchorjs-link,.anchorjs-link:focus{opacity:1}",u.sheet.cssRules.length),u.sheet.insertRule("[data-anchorjs-icon]::after{content:attr(data-anchorjs-icon)}",u.sheet.cssRules.length),u.sheet.insertRule('@font-face{font-family:anchorjs-icons;src:url(data:n/a;base64,AAEAAAALAIAAAwAwT1MvMg8yG2cAAAE4AAAAYGNtYXDp3gC3AAABpAAAAExnYXNwAAAAEAAAA9wAAAAIZ2x5ZlQCcfwAAAH4AAABCGhlYWQHFvHyAAAAvAAAADZoaGVhBnACFwAAAPQAAAAkaG10eASAADEAAAGYAAAADGxvY2EACACEAAAB8AAAAAhtYXhwAAYAVwAAARgAAAAgbmFtZQGOH9cAAAMAAAAAunBvc3QAAwAAAAADvAAAACAAAQAAAAEAAHzE2p9fDzz1AAkEAAAAAADRecUWAAAAANQA6R8AAAAAAoACwAAAAAgAAgAAAAAAAAABAAADwP/AAAACgAAA/9MCrQABAAAAAAAAAAAAAAAAAAAAAwABAAAAAwBVAAIAAAAAAAIAAAAAAAAAAAAAAAAAAAAAAAMCQAGQAAUAAAKZAswAAACPApkCzAAAAesAMwEJAAAAAAAAAAAAAAAAAAAAARAAAAAAAAAAAAAAAAAAAAAAQAAg//0DwP/AAEADwABAAAAAAQAAAAAAAAAAAAAAIAAAAAAAAAIAAAACgAAxAAAAAwAAAAMAAAAcAAEAAwAAABwAAwABAAAAHAAEADAAAAAIAAgAAgAAACDpy//9//8AAAAg6cv//f///+EWNwADAAEAAAAAAAAAAAAAAAAACACEAAEAAAAAAAAAAAAAAAAxAAACAAQARAKAAsAAKwBUAAABIiYnJjQ3NzY2MzIWFxYUBwcGIicmNDc3NjQnJiYjIgYHBwYUFxYUBwYGIwciJicmNDc3NjIXFhQHBwYUFxYWMzI2Nzc2NCcmNDc2MhcWFAcHBgYjARQGDAUtLXoWOR8fORYtLTgKGwoKCjgaGg0gEhIgDXoaGgkJBQwHdR85Fi0tOAobCgoKOBoaDSASEiANehoaCQkKGwotLXoWOR8BMwUFLYEuehYXFxYugC44CQkKGwo4GkoaDQ0NDXoaShoKGwoFBe8XFi6ALjgJCQobCjgaShoNDQ0NehpKGgobCgoKLYEuehYXAAAADACWAAEAAAAAAAEACAAAAAEAAAAAAAIAAwAIAAEAAAAAAAMACAAAAAEAAAAAAAQACAAAAAEAAAAAAAUAAQALAAEAAAAAAAYACAAAAAMAAQQJAAEAEAAMAAMAAQQJAAIABgAcAAMAAQQJAAMAEAAMAAMAAQQJAAQAEAAMAAMAAQQJAAUAAgAiAAMAAQQJAAYAEAAMYW5jaG9yanM0MDBAAGEAbgBjAGgAbwByAGoAcwA0ADAAMABAAAAAAwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABAAH//wAP) format("truetype")}',u.sheet.cssRules.length)),u=document.querySelectorAll("[id]"),t=[].map.call(u,function(A){return A.id}),i=0;i\]./()*\\\n\t\b\v\u00A0]/g,"-").replace(/-{2,}/g,"-").substring(0,this.options.truncate).replace(/^-+|-+$/gm,"").toLowerCase()},this.hasAnchorJSLink=function(A){var e=A.firstChild&&-1<(" "+A.firstChild.className+" ").indexOf(" anchorjs-link "),A=A.lastChild&&-1<(" "+A.lastChild.className+" ").indexOf(" anchorjs-link ");return e||A||!1}}}); 9 | // @license-end -------------------------------------------------------------------------------- /docs/site_libs/quarto-html/quarto-syntax-highlighting.css: -------------------------------------------------------------------------------- 1 | /* quarto syntax highlight colors */ 2 | :root { 3 | --quarto-hl-ot-color: #003B4F; 4 | --quarto-hl-at-color: #657422; 5 | --quarto-hl-ss-color: #20794D; 6 | --quarto-hl-an-color: #5E5E5E; 7 | --quarto-hl-fu-color: #4758AB; 8 | --quarto-hl-st-color: #20794D; 9 | --quarto-hl-cf-color: #003B4F; 10 | --quarto-hl-op-color: #5E5E5E; 11 | --quarto-hl-er-color: #AD0000; 12 | --quarto-hl-bn-color: #AD0000; 13 | --quarto-hl-al-color: #AD0000; 14 | --quarto-hl-va-color: #111111; 15 | --quarto-hl-bu-color: inherit; 16 | --quarto-hl-ex-color: inherit; 17 | --quarto-hl-pp-color: #AD0000; 18 | --quarto-hl-in-color: #5E5E5E; 19 | --quarto-hl-vs-color: #20794D; 20 | --quarto-hl-wa-color: #5E5E5E; 21 | --quarto-hl-do-color: #5E5E5E; 22 | --quarto-hl-im-color: #00769E; 23 | --quarto-hl-ch-color: #20794D; 24 | --quarto-hl-dt-color: #AD0000; 25 | --quarto-hl-fl-color: #AD0000; 26 | --quarto-hl-co-color: #5E5E5E; 27 | --quarto-hl-cv-color: #5E5E5E; 28 | --quarto-hl-cn-color: #8f5902; 29 | --quarto-hl-sc-color: #5E5E5E; 30 | --quarto-hl-dv-color: #AD0000; 31 | --quarto-hl-kw-color: #003B4F; 32 | } 33 | 34 | /* other quarto variables */ 35 | :root { 36 | --quarto-font-monospace: SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace; 37 | } 38 | 39 | pre > code.sourceCode > span { 40 | color: #003B4F; 41 | } 42 | 43 | code span { 44 | color: #003B4F; 45 | } 46 | 47 | code.sourceCode > span { 48 | color: #003B4F; 49 | } 50 | 51 | div.sourceCode, 52 | div.sourceCode pre.sourceCode { 53 | color: #003B4F; 54 | } 55 | 56 | code span.ot { 57 | color: #003B4F; 58 | } 59 | 60 | code span.at { 61 | color: #657422; 62 | } 63 | 64 | code span.ss { 65 | color: #20794D; 66 | } 67 | 68 | code span.an { 69 | color: #5E5E5E; 70 | } 71 | 72 | code span.fu { 73 | color: #4758AB; 74 | } 75 | 76 | code span.st { 77 | color: #20794D; 78 | } 79 | 80 | code span.cf { 81 | color: #003B4F; 82 | } 83 | 84 | code span.op { 85 | color: #5E5E5E; 86 | } 87 | 88 | code span.er { 89 | color: #AD0000; 90 | } 91 | 92 | code span.bn { 93 | color: #AD0000; 94 | } 95 | 96 | code span.al { 97 | color: #AD0000; 98 | } 99 | 100 | code span.va { 101 | color: #111111; 102 | } 103 | 104 | code span.pp { 105 | color: #AD0000; 106 | } 107 | 108 | code span.in { 109 | color: #5E5E5E; 110 | } 111 | 112 | code span.vs { 113 | color: #20794D; 114 | } 115 | 116 | code span.wa { 117 | color: #5E5E5E; 118 | font-style: italic; 119 | } 120 | 121 | code span.do { 122 | color: #5E5E5E; 123 | font-style: italic; 124 | } 125 | 126 | code span.im { 127 | color: #00769E; 128 | } 129 | 130 | code span.ch { 131 | color: #20794D; 132 | } 133 | 134 | code span.dt { 135 | color: #AD0000; 136 | } 137 | 138 | code span.fl { 139 | color: #AD0000; 140 | } 141 | 142 | code span.co { 143 | color: #5E5E5E; 144 | } 145 | 146 | code span.cv { 147 | color: #5E5E5E; 148 | font-style: italic; 149 | } 150 | 151 | code span.cn { 152 | color: #8f5902; 153 | } 154 | 155 | code span.sc { 156 | color: #5E5E5E; 157 | } 158 | 159 | code span.dv { 160 | color: #AD0000; 161 | } 162 | 163 | code span.kw { 164 | color: #003B4F; 165 | } 166 | 167 | .prevent-inlining { 168 | content: ".tippy-arrow{bottom:0}.tippy-box[data-placement^=top]>.tippy-arrow:before{bottom:-7px;left:0;border-width:8px 8px 0;border-top-color:initial;transform-origin:center top}.tippy-box[data-placement^=bottom]>.tippy-arrow{top:0}.tippy-box[data-placement^=bottom]>.tippy-arrow:before{top:-7px;left:0;border-width:0 8px 8px;border-bottom-color:initial;transform-origin:center bottom}.tippy-box[data-placement^=left]>.tippy-arrow{right:0}.tippy-box[data-placement^=left]>.tippy-arrow:before{border-width:8px 0 8px 8px;border-left-color:initial;right:-7px;transform-origin:center left}.tippy-box[data-placement^=right]>.tippy-arrow{left:0}.tippy-box[data-placement^=right]>.tippy-arrow:before{left:-7px;border-width:8px 8px 8px 0;border-right-color:initial;transform-origin:center right}.tippy-box[data-inertia][data-state=visible]{transition-timing-function:cubic-bezier(.54,1.5,.38,1.11)}.tippy-arrow{width:16px;height:16px;color:#333}.tippy-arrow:before{content:"";position:absolute;border-color:transparent;border-style:solid}.tippy-content{position:relative;padding:5px 9px;z-index:1} -------------------------------------------------------------------------------- /docs/site_libs/quarto-nav/headroom.min.js: -------------------------------------------------------------------------------- 1 | /*! 2 | * headroom.js v0.12.0 - Give your page some headroom. Hide your header until you need it 3 | * Copyright (c) 2020 Nick Williams - http://wicky.nillia.ms/headroom.js 4 | * License: MIT 5 | */ 6 | 7 | !function(t,n){"object"==typeof exports&&"undefined"!=typeof module?module.exports=n():"function"==typeof define&&define.amd?define(n):(t=t||self).Headroom=n()}(this,function(){"use strict";function t(){return"undefined"!=typeof window}function d(t){return function(t){return t&&t.document&&function(t){return 9===t.nodeType}(t.document)}(t)?function(t){var n=t.document,o=n.body,s=n.documentElement;return{scrollHeight:function(){return Math.max(o.scrollHeight,s.scrollHeight,o.offsetHeight,s.offsetHeight,o.clientHeight,s.clientHeight)},height:function(){return t.innerHeight||s.clientHeight||o.clientHeight},scrollY:function(){return void 0!==t.pageYOffset?t.pageYOffset:(s||o.parentNode||o).scrollTop}}}(t):function(t){return{scrollHeight:function(){return Math.max(t.scrollHeight,t.offsetHeight,t.clientHeight)},height:function(){return Math.max(t.offsetHeight,t.clientHeight)},scrollY:function(){return t.scrollTop}}}(t)}function n(t,s,e){var n,o=function(){var n=!1;try{var t={get passive(){n=!0}};window.addEventListener("test",t,t),window.removeEventListener("test",t,t)}catch(t){n=!1}return n}(),i=!1,r=d(t),l=r.scrollY(),a={};function c(){var t=Math.round(r.scrollY()),n=r.height(),o=r.scrollHeight();a.scrollY=t,a.lastScrollY=l,a.direction=ls.tolerance[a.direction],e(a),l=t,i=!1}function h(){i||(i=!0,n=requestAnimationFrame(c))}var u=!!o&&{passive:!0,capture:!1};return t.addEventListener("scroll",h,u),c(),{destroy:function(){cancelAnimationFrame(n),t.removeEventListener("scroll",h,u)}}}function o(t){return t===Object(t)?t:{down:t,up:t}}function s(t,n){n=n||{},Object.assign(this,s.options,n),this.classes=Object.assign({},s.options.classes,n.classes),this.elem=t,this.tolerance=o(this.tolerance),this.offset=o(this.offset),this.initialised=!1,this.frozen=!1}return s.prototype={constructor:s,init:function(){return s.cutsTheMustard&&!this.initialised&&(this.addClass("initial"),this.initialised=!0,setTimeout(function(t){t.scrollTracker=n(t.scroller,{offset:t.offset,tolerance:t.tolerance},t.update.bind(t))},100,this)),this},destroy:function(){this.initialised=!1,Object.keys(this.classes).forEach(this.removeClass,this),this.scrollTracker.destroy()},unpin:function(){!this.hasClass("pinned")&&this.hasClass("unpinned")||(this.addClass("unpinned"),this.removeClass("pinned"),this.onUnpin&&this.onUnpin.call(this))},pin:function(){this.hasClass("unpinned")&&(this.addClass("pinned"),this.removeClass("unpinned"),this.onPin&&this.onPin.call(this))},freeze:function(){this.frozen=!0,this.addClass("frozen")},unfreeze:function(){this.frozen=!1,this.removeClass("frozen")},top:function(){this.hasClass("top")||(this.addClass("top"),this.removeClass("notTop"),this.onTop&&this.onTop.call(this))},notTop:function(){this.hasClass("notTop")||(this.addClass("notTop"),this.removeClass("top"),this.onNotTop&&this.onNotTop.call(this))},bottom:function(){this.hasClass("bottom")||(this.addClass("bottom"),this.removeClass("notBottom"),this.onBottom&&this.onBottom.call(this))},notBottom:function(){this.hasClass("notBottom")||(this.addClass("notBottom"),this.removeClass("bottom"),this.onNotBottom&&this.onNotBottom.call(this))},shouldUnpin:function(t){return"down"===t.direction&&!t.top&&t.toleranceExceeded},shouldPin:function(t){return"up"===t.direction&&t.toleranceExceeded||t.top},addClass:function(t){this.elem.classList.add.apply(this.elem.classList,this.classes[t].split(" "))},removeClass:function(t){this.elem.classList.remove.apply(this.elem.classList,this.classes[t].split(" "))},hasClass:function(t){return this.classes[t].split(" ").every(function(t){return this.classList.contains(t)},this.elem)},update:function(t){t.isOutOfBounds||!0!==this.frozen&&(t.top?this.top():this.notTop(),t.bottom?this.bottom():this.notBottom(),this.shouldUnpin(t)?this.unpin():this.shouldPin(t)&&this.pin())}},s.options={tolerance:{up:0,down:0},offset:0,scroller:t()?window:null,classes:{frozen:"headroom--frozen",pinned:"headroom--pinned",unpinned:"headroom--unpinned",top:"headroom--top",notTop:"headroom--not-top",bottom:"headroom--bottom",notBottom:"headroom--not-bottom",initial:"headroom"}},s.cutsTheMustard=!!(t()&&function(){}.bind&&"classList"in document.documentElement&&Object.assign&&Object.keys&&requestAnimationFrame),s}); 8 | -------------------------------------------------------------------------------- /docs/sitemap.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | https://darrenjw.github.io/blog/about.html 5 | 2024-03-10T18:35:37.374Z 6 | 7 | 8 | https://darrenjw.github.io/blog/posts/r-test/index.html 9 | 2024-03-10T18:35:37.750Z 10 | 11 | 12 | https://darrenjw.github.io/blog/posts/py-test/index.html 13 | 2024-03-10T18:35:38.182Z 14 | 15 | 16 | https://darrenjw.github.io/blog/index.html 17 | 2024-03-10T18:35:38.334Z 18 | 19 | 20 | -------------------------------------------------------------------------------- /docs/styles.css: -------------------------------------------------------------------------------- 1 | /* css styles */ 2 | -------------------------------------------------------------------------------- /drafts/LmDiag.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darrenjw/blog/be123278cc20048ab8458b895ccb5ccca798ea59/drafts/LmDiag.png -------------------------------------------------------------------------------- /drafts/LmPairs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darrenjw/blog/be123278cc20048ab8458b895ccb5ccca798ea59/drafts/LmPairs.png -------------------------------------------------------------------------------- /drafts/SfDSBook.md: -------------------------------------------------------------------------------- 1 | # Scala for Data Science [book review] 2 | 3 | *This is the draft of my blog post: https://darrenjw.wordpress.com/2016/12/22/scala-for-data-science-book-review/ - This is not the definitive version.* 4 | 5 | This post will review the book: 6 | 7 | * [Scala for Data Science](http://amzn.to/2hKGIz2), Bugnion, Packt, 2016. 8 | 9 | *Disclaimer: This book review has not been solicited by the publisher (or anyone else) in any way. I purchased the review copy of this book myself. I have not received any benefit from the writing of this review.* 10 | 11 | ## Introduction 12 | 13 | On this blog I previously reviewed the (terrible) book, [Scala for machine learning](https://darrenjw.wordpress.com/2015/04/09/scala-for-machine-learning-book-review/) by the same publisher. I was therefore rather wary of buying this book. But the topic coverage looked good, so I decided to buy it, and wasn't disappointed. Scala for Data Science is my top recommendation for getting started with statistical computing and data science applications using Scala. 14 | 15 | ## Overview 16 | 17 | The book assumes a basic familiarity with programming in Scala, at around the level of someone who has completed the [Functional Programming Principles in Scala](https://www.coursera.org/learn/progfun1) Coursera course. That is, it (quite sensibly) doesn't attempt to teach the reader how to program in Scala, but rather how to approach the development of data science applications using Scala. It introduces more advanced Scala idioms gradually (eg. typeclasses don't appear until Chapter 5), so it is relatively approachable for those who aren't yet Scala experts. The book does cover [Apache Spark](http://spark.apache.org/), but Spark isn't introduced until Chapter 10, so it isn't "just another Spark book". Most of the book is about developing data science applications in Scala, completely independently of Spark. That said, it also provides one of the better introductions to Spark, so doubles up as a pretty good introductory Spark book, in addition to being a good introduction to the development of data science applications with Scala. It should probably be emphasised that the book is very much focused on data science, rather than statistical computing, but there is plenty of material of relevance to those who are more interested in statistical computing than applied data science. 18 | 19 | 20 | ## Chapter by chapter 21 | 22 | 1. *Scala and Data Science* - motivation for using Scala in preference to certain other languages I could mention... 23 | 2. *Manipulating data with Breeze* - [Breeze](https://github.com/scalanlp/breeze) is the standard Scala library for scientific and statistical computing. It's pretty good, but documentation is rather lacking. This Chapter provides a good tutorial introduction to Breeze, which should be enough to get people going sufficiently to be able to make some sense of the available on-line documentation. 24 | 3. *Plotting with breeze-viz* - Breeze has some support for plotting and visualisation of data. It's somewhat limited when compared to what is available in R, but is fine for interactive exploratory analysis. However, the available on-line documentation for breeze-viz is almost non-existent. This Chapter is the best introduction to breeze-viz that I have seen. 25 | 4. *Parallel collections and futures* - the Scala standard library has built-in support for parallel and concurrent programming based on functional programming concepts such as parallel (monadic) collections and Futures. Again, this Chapter provides an excellent introduction to these powerful concepts, allowing the reader to start developing parallel algorithms for multi-core hardware with minimal fuss. 26 | 5. *Scala and SQL through JDBC* - this Chapter looks at connecting to databases using standard JVM mechanisms such as JDBC. However, it gradually introduces more functional ways of interfacing with databases using typeclasses, motivating: 27 | 6. *Slick - a functional interface for SQL* - an introduction to the Slick library for a more Scala-esque way of database interfacing. 28 | 7. *Web APIs* - the practicalities of talking to web APIs. eg. authenticated HTTP requests and parsing of JSON responses. 29 | 8. *Scala and MongoDB* - working with a NoSQL database from Scala 30 | 9. *Concurrency with Akka* - Akka is the canonical implementation of the actor model in Scala, for building large concurrent applications. It is the foundation on which Spark is built. 31 | 10. *Distributed batch processing with Spark* - a tutorial introduction to Apache Spark. Spark is a big data analytics framework built on top of Scala and Akka. It is arguably the best available framework for big data analytics on computing clusters in the cloud, and hence there is a lot of interest in it. Indeed, Spark is driving some of the interest in Scala. 32 | 11. *Spark SQL and DataFrames* - interfacing with databases using Spark, and more importantly, an introduction to Spark's DataFrame abstraction, which is now fundamental to developing machine learning pipelines in Spark. 33 | 12. *Distributed machine learning with MLLib* - MLLib is the machine learning library for Spark. It is worth emphasising that unlike many early books on Spark, this chapter covers the newer DataFrame-based pipeline API, in addition to the original RDD-based API. Together, Chapters 11 and 12 provide a pretty good tutorial introduction to Spark. After working through these, it should be easy to engage with the official on-line Spark documentation. 34 | 13. *Web APIs with Play* - is concerned with developing a web API at the end of a data science pipeline. 35 | 14. *Visualisation with D3 and the Play framework* - is concerned with integrating visualisation into a data science web application. 36 | 37 | ## Summary 38 | 39 | This book provides a good tutorial introduction to a large number of topics relevant to statisticians and data scientists interested in developing data science applications using Scala. After working through this book, readers should be well-placed to augment their knowledge with readily searchable on-line documentation. 40 | 41 | In a follow-up post I will give a quick overview of some other books relevant to getting started with Scala for statistical computing and data science. 42 | 43 | 44 | 45 | 46 | -------------------------------------------------------------------------------- /drafts/evil.md: -------------------------------------------------------------------------------- 1 | # Using EvilPlot with scala-view 2 | 3 | ## EvilPlot 4 | 5 | [EvilPlot](https://cibotech.github.io/evilplot/) is a new functional data visualisation library for Scala. Although there are several data viz options for Scala, this new library has a nice functional API for producing attractive, flexible, compositional plots which can be rendered in JVM applications and in web applications (via Scala.js). For a quick introduction, see this [blog post](https://medium.com/@CiboEng/evilplot-a-combinator-based-plotting-library-for-scala-4531f00208) from one of the library's creators. For further information, see the [official documentation](https://cibotech.github.io/evilplot/) and the [github repo](https://github.com/cibotech/evilplot). For a quick overview of the kinds of plots that the library is capable of generating, see the [plot catalog](https://cibotech.github.io/evilplot/plot-catalog.html). 6 | 7 | The library is designed to produce plots which can be rendered into applications. However, when doing data analysis in the REPL on the JVM, it is often convenient to be able to just pop up a plot in a window on the desktop. EvilPlot doesn't seem to contain code for on-screen rendering, but the plots can be rendered to a bitmap image. In the [previous post](https://darrenjw.wordpress.com/2018/03/01/scala-view-animate-streams-of-images/) I described a small library, [scala-view](https://github.com/darrenjw/scala-view/), which renders such images, and image sequences on the desktop. In this post I'll walk through using scala-view to render EvilPlot plots on-screen. 8 | 9 | ## An interactive session 10 | 11 | To follow this session, you just need to run [SBT](https://www.scala-sbt.org/) from an empty directory. Just run `sbt` and paste the following at the SBT prompt: 12 | ```scala 13 | set libraryDependencies += "com.cibo" %% "evilplot" % "0.2.0" 14 | set libraryDependencies += "com.github.darrenjw" %% "scala-view" % "0.6-SNAPSHOT" 15 | set resolvers += Resolver.bintrayRepo("cibotech", "public") 16 | set resolvers += "Sonatype Snapshots" at "https://oss.sonatype.org/content/repositories/snapshots/" 17 | set scalaVersion := "2.12.4" 18 | set fork := true 19 | console 20 | ``` 21 | 22 | ### Displaying a single plot 23 | 24 | This will give a Scala REPL prompt. First we need some imports: 25 | ```scala 26 | import com.cibo.evilplot.plot._ 27 | import com.cibo.evilplot.colors._ 28 | import com.cibo.evilplot.plot.aesthetics.DefaultTheme._ 29 | import com.cibo.evilplot.numeric.Point 30 | import java.awt.Image.SCALE_SMOOTH 31 | import scalaview.Utils._ 32 | ``` 33 | 34 | We can simulate some data an produce a simple line chart: 35 | ```scala 36 | val data = Seq.tabulate(100) { i => 37 | Point(i.toDouble, scala.util.Random.nextDouble()) 38 | } 39 | val plot = LinePlot.series(data, "Line graph", HSL(210, 100, 56)). 40 | xAxis().yAxis().frame(). 41 | xLabel("x").yLabel("y").render() 42 | ``` 43 | 44 | This `plot` object contains the rendering instructions, but doesn't actually produce a plot. We can use scala-view to display it as follows: 45 | ```scala 46 | scalaview.SfxImageViewer(biResize(plot.asBufferedImage,1000,800,SCALE_SMOOTH)) 47 | ``` 48 | This will produce a window on screen something like the following: 49 | 50 | PLOT HERE 51 | 52 | Don't close this plot yet, as this will confuse the REPL. Just switch back to the REPL and continue. 53 | 54 | ### Displaying a sequence of plots 55 | 56 | Sometimes we want to produce a sequence of plots. Let's now suppose that the data above arises sequentially as a stream, and that we want to produce a sequence of plots with each observation as it arrives. First create a stream of partial datasets and map a function which turns a dataset into a plot to get a stream of images representing the plots. Then pass the stream of images into the viewer to get an animated sequence of plots on-screen 57 | 58 | ```scala 59 | val dataStream = data.toStream 60 | val cumulStream = dataStream.scanLeft(Nil: List[Point])((l,p) => p :: l).drop(1) 61 | def dataToImage(data: List[Point]) = LinePlot. 62 | series(data, "Line graph", HSL(210, 100, 56)). 63 | xAxis().yAxis().frame(). 64 | xLabel("x").yLabel("y").render().asBufferedImage 65 | val plotStream = cumulStream map (d => biResize(dataToImage(d),1000,800,SCALE_SMOOTH)) 66 | scalaview.SfxImageViewer.bi(plotStream, 100000, autoStart=true) 67 | ``` 68 | 69 | 70 | 71 | #### eof 72 | -------------------------------------------------------------------------------- /drafts/fp-ssc.md: -------------------------------------------------------------------------------- 1 | ## An introduction to functional programming for scalable statistical computing and machine learning 2 | 3 | Functional programming (FP) languages are great for statistical computing, computational statistics, and machine learning. They are particularly well-suited to scalable computation, where this could either mean scaling up to distributed algorithms for very big data, or running algorithms for more moderately sized data sets very fast in parallel on GPUs. However, people unfamiliar with FP often find FP languages quite intimidating, due to the fairly steep initial learning curve. This issue is exacerbated by the fact that there is very little introductory documentation available for people new to FP who are interested in applications to statistical computing and machine learning (ML). 4 | 5 | So for some time I've been meaning to put together materials for a short course (suitable for self-study) that will get people started with FP in few different languages, with a very basic problem from statistical computing used as the running example, together with a catalogue of resources for further learning, in order to provide people with the information they need to keep going once they have got over the initial hurdle. But as with many things, it never got high enough up my priority list to actually sit down and do it. Fortunately, [StatML](https://statml.io/) invited me to deliver some training in advanced statistical computing, so this gave me the perfect motivation to actually assemble something. The in-person training has been delayed (due to the [UCU](https://www.ucu.org.uk/) strike), but the materials are all prepared and publicly available, and suitable for self-study now. 6 | 7 | The course gives a very quick introduction to the ideas of FP, followed by very quick hands-on introductions to my favourite FP languages/libraries: Scala, Haskell, JAX and Dex. There is also a brief introduction to *splittable random number generators* which are becoming increasingly popular for the development of functional parallel Monte Carlo algorithms. 8 | 9 | If you've been vaguely interested in FP for statistical computing and ML but not sure how to get started, hopefully this solves the problem. 10 | 11 | [An introduction to functional programming for scalable statistical computing and machine learning](https://github.com/darrenjw/fp-ssc-course) (short course) 12 | 13 | -------------------------------------------------------------------------------- /drafts/index75.md: -------------------------------------------------------------------------------- 1 | # Index to first 75 posts 2 | 3 | This is the 75th post to this blog. Every 25 posts I produce an index of posts so far for easy reference. If I make it to post 100 I'll do something similar. 4 | 5 | * 25. [Catalogue of my first 25 blog posts](https://darrenjw.wordpress.com/2011/12/30/catalogue-of-my-first-25-blog-posts/) 6 | 7 | 8 | * 50. [Index to first 50 posts](https://darrenjw.wordpress.com/2015/04/10/index-to-first-50-posts/) 9 | 10 | 11 | * 51. [Calling Scala code from R using rscala](https://darrenjw.wordpress.com/2015/08/15/calling-scala-code-from-r-using-rscala/) 12 | * 52. [Calling R from Scala sbt projects using rscala](https://darrenjw.wordpress.com/2015/08/15/calling-r-from-scala-sbt-projects-using-rscala/) 13 | * 53. [Data frames and tables in Scala](https://darrenjw.wordpress.com/2015/08/21/data-frames-and-tables-in-scala/) 14 | * 54. [HOFs, closures, partial application and currying to solve the function environment problem in Scala](https://darrenjw.wordpress.com/2015/11/16/hofs-closures-partial-application-and-currying-to-solve-the-function-environment-problem-in-scala/) 15 | * 55. [First steps with monads in Scala](https://darrenjw.wordpress.com/2016/04/15/first-steps-with-monads-in-scala/) 16 | * 56. [A scalable particle filter in Scala](https://darrenjw.wordpress.com/2016/07/22/a-scalable-particle-filter-in-scala/) 17 | * 57. [Working with SBML using Scala](https://darrenjw.wordpress.com/2016/12/17/working-with-sbml-using-scala/) 18 | * 58. [Scala for Data Science [book review]](https://darrenjw.wordpress.com/2016/12/22/scala-for-data-science-book-review/) 19 | * 59. [Books on Scala for statistical computing and data science](https://darrenjw.wordpress.com/2016/12/22/books-on-scala-for-statistical-computing-and-data-science/) 20 | * 60. [A quick introduction to Apache Spark for statisticians](https://darrenjw.wordpress.com/2017/02/08/a-quick-introduction-to-apache-spark-for-statisticians/) 21 | * 61. [MCMC as a Stream](https://darrenjw.wordpress.com/2017/04/01/mcmc-as-a-stream/) 22 | * 62. [Statistical computing with Scala free on-line course](https://darrenjw.wordpress.com/2017/05/31/statistical-computing-with-scala-free-on-line-course/) 23 | * 63. [scala-glm: Regression modelling in Scala](https://darrenjw.wordpress.com/2017/06/21/scala-glm-regression-modelling-in-scala/) 24 | * 64. [Comonads for scientific and statistical computing in Scala](https://darrenjw.wordpress.com/2018/01/22/comonads-for-scientific-and-statistical-computing-in-scala/) 25 | * 65. [Scala-view: Animate streams of images](https://darrenjw.wordpress.com/2018/03/01/scala-view-animate-streams-of-images/) 26 | * 66. [Using EvilPlot with scala-view](https://darrenjw.wordpress.com/2018/05/11/using-evilplot-with-scala-view/) 27 | * 67. [Monadic probabilistic programming in Scala with Rainier](https://darrenjw.wordpress.com/2018/06/01/monadic-probabilistic-programming-in-scala-with-rainier/) 28 | * 68. [Bayesian hierarchical modelling with Rainier](https://darrenjw.wordpress.com/2018/06/10/bayesian-hierarchical-modelling-with-rainier/) 29 | * 69. [Stochastic Modelling for Systems Biology, third edition](https://darrenjw.wordpress.com/2018/12/19/stochastic-modelling-for-systems-biology-third-edition/) 30 | * 70. [The smfsb R package](https://darrenjw.wordpress.com/2019/01/01/the-smfsb-r-package/) 31 | * 71. [The scala-smfsb library](https://darrenjw.wordpress.com/2019/01/04/the-scala-smfsb-library/) 32 | * 72. [Stochastic reaction-diffusion modelling](https://darrenjw.wordpress.com/2019/01/22/stochastic-reaction-diffusion-modelling/) 33 | * 73. [Write your own general-purpose monadic probabilistic programming language from scratch in 50 lines of (Scala) code](https://darrenjw.wordpress.com/2019/08/07/write-your-own-general-purpose-monadic-probabilistic-programming-language-from-scratch-in-50-lines-of-scala-code/) 34 | * 74. [A probability monad for the bootstrap particle filter](https://darrenjw.wordpress.com/2019/08/10/a-probability-monad-for-the-bootstrap-particle-filter/) 35 | 36 | 37 | * 75. Index to first 75 posts (this post) 38 | 39 | 40 | -------------------------------------------------------------------------------- /drafts/md2wp: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # md2wp 3 | # convert github flavoured markdown to wordpress html 4 | 5 | cat $1 | \ 6 | sed 's/```scala/```/g' | \ 7 | sed 's/```R/```/g' | \ 8 | sed 's/```bash/```/g' | \ 9 | pandoc -f markdown_github -t html5 | \ 10 | sed 's/
/[sourcecode language="scala" light="true"]\n/g' | \
11 |   sed 's|
|\n[/sourcecode]|g' | \ 12 | sed 's/"/"/g' | \ 13 | sed 's/>/>/g' | \ 14 | sed 's/</ mu + (x - mu) * a + sig * rng.standardNormal). 32 | take(n).toVector 33 | val obs = state.map(_ + sigD * rng.standardNormal) 34 | ``` 35 | 36 | Now we have some synthetic data, let's think about building a probabilistic program for this model. Start with a prior. 37 | 38 | ```scala 39 | case class Static(mu: Real, a: Real, sig: Real, sigD: Real) 40 | val prior = for { 41 | mu <- Normal(0, 10).param 42 | a <- Normal(1, 0.1).param 43 | sig <- Gamma(2,1).param 44 | sigD <- Gamma(2,2).param 45 | sp <- Normal(0, 50).param 46 | } yield (Static(mu, a, sig, sigD), List(sp)) 47 | ``` 48 | 49 | Note the use of a case class for wrapping the static parameters. Next, let's define a function to add a state and associated observation to an existing model. 50 | 51 | ```scala 52 | def addTimePoint(current: RandomVariable[(Static, List[Real])], 53 | datum: Double) = for { 54 | tup <- current 55 | static = tup._1 56 | states = tup._2 57 | os = states.head 58 | ns <- Normal(((Real.one - static.a) * static.mu) + (static.a * os), 59 | static.sig).param 60 | _ <- Normal(ns, static.sigD).fit(datum) 61 | } yield (static, ns :: states) 62 | ``` 63 | 64 | Given this, we can generate the probabilistic program for our model as a *fold* over the data initialised with the prior. 65 | 66 | ```scala 67 | val fullModel = obs.foldLeft(prior)(addTimePoint(_, _)) 68 | ``` 69 | 70 | If we don't want to keep samples for all of the variables, we can focus on the parameters of interest, wrapping the results in a `Map` for convenient sampling and plotting. 71 | 72 | ```scala 73 | val model = for { 74 | tup <- fullModel 75 | static = tup._1 76 | states = tup._2 77 | } yield 78 | Map("mu" -> static.mu, 79 | "a" -> static.a, 80 | "sig" -> static.sig, 81 | "sigD" -> static.sigD, 82 | "SP" -> states.reverse.head) 83 | ``` 84 | 85 | We can sample with 86 | 87 | ```scala 88 | val out = model.sample(HMC(3), 100000, 10000 * 500, 500) 89 | ``` 90 | 91 | (this will take several minutes) and plot some diagnostics with 92 | 93 | ```scala 94 | import com.cibo.evilplot.geometry.Extent 95 | import com.stripe.rainier.plot.EvilTracePlot._ 96 | 97 | val truth = Map("mu" -> mu, "a" -> a, "sigD" -> sigD, 98 | "sig" -> sig, "SP" -> state(0)) 99 | render(traces(out, truth), "traceplots.png", 100 | Extent(1200, 1400)) 101 | render(pairs(out, truth), "pairs.png") 102 | ``` 103 | 104 | This generates the following diagnostic plots: 105 | 106 | ![Trace plots](r2tp.png) 107 | 108 | ![Pairs plots](r2pp.png) 109 | 110 | ## Summary 111 | 112 | Rainier is a monadic embedded DSL for probabilistic programming in Scala. We can use standard functional combinators and for-expressions for building models to sample, and then run an efficient HMC algorithm on the resulting probability monad in order to obtain samples from the posterior distribution of the model. 113 | 114 | See the [Rainier](https://github.com/stripe/rainier) repo for further details. 115 | 116 | 117 | #### eof 118 | 119 | -------------------------------------------------------------------------------- /drafts/rjb.md: -------------------------------------------------------------------------------- 1 | # MCMC code for Bayesian inference for a discretely observed stochastic kinetic model 2 | 3 | In June this year the (twice COVID-delayed) [Richard J Boys Memorial Workshop](https://conferences.ncl.ac.uk/rjbmemorialmeeting/) finally took place, celebrating the life and work of my former colleague and collaborator, who died suddenly in 2019 ([obituary](http://www.bernoulli-society.org/files/BernoulliNews2019-1.pdf#page=13)). I completed the programme of talks by delivering the inaugural RSS North East Richard Boys lecture. For this, I decided that it would be most appropriate to talk about the paper [Bayesian inference for a discretely observed stochastic kinetic model](http://dx.doi.org/10.1007/s11222-007-9043-x), published in Statistics and Computing in 2008. The paper is concerned with (exact and approximate) MCMC-based fully Bayesian inference for continuous time Markov jump processes observed partially and discretely in time. Although the ideas are generally applicable to a broad class of "stochastic kinetic models", the running example throughout the paper is a discrete stochastic Lotka Volterra model. 4 | 5 | In preparation for the talk, I managed to track down most of the MCMC codes used for the examples presented in the paper. This included C code I wrote for exact and approximate block-updating algorithms, and Fortran code written by Richard using an exact reversible jump MCMC approach. I've fixed up all of the codes so that they are now easy to build and run on a modern Linux (or other Unix-like) system, and provided some minimal documentation. It is all available in a [public github repo](https://github.com/darrenjw/BWK). Hopefully this will be of some interest or use to a few people. 6 | -------------------------------------------------------------------------------- /drafts/scala-glm.md: -------------------------------------------------------------------------------- 1 | # scala-glm: Regression modelling in Scala 2 | 3 | ## Introduction 4 | 5 | As discussed in the [previous post](https://darrenjw.wordpress.com/2017/05/31/statistical-computing-with-scala-free-on-line-course/), I've recently constructed and delivered a short course on statistical computing with Scala. Much of the course is concerned with writing statistical algorithms in Scala, typically making use of the scientific and numerical computing library, [Breeze](https://github.com/scalanlp/breeze). Breeze has all of the essential tools necessary for building statistical algorithms, but doesn't contain any higher level modelling functionality. As part of the course, I walked through how to build a small library for regression modelling on top of Breeze, including all of the usual regression diagnostics (such as standard errors, t-statistics, p-values, F-statistics, etc.). While preparing the course materials it occurred to me that it would be useful to package and document this code properly for general use. In advance of the course I packaged the code up into a bare-bones library, but since then I've fleshed it out, tidied it up and documented it properly, so it's now ready for people to use. 6 | 7 | The library covers PCA, linear regression modelling and simple one-parameter GLMs (including logistic and Poisson regression). The underlying algorithms are fairly efficient and numerically stable (eg. linear regression uses the [QR decomposition](https://en.wikipedia.org/wiki/QR_decomposition) of the model matrix, and the GLM fitting uses QR within each [IRLS](https://en.wikipedia.org/wiki/Iteratively_reweighted_least_squares) step), though they are optimised more for clarity than speed. The library also includes a few utility functions and procedures, including a pairs plot (scatter-plot matrix). 8 | 9 | ## A linear regression example 10 | 11 | Plenty of [documentation](https://github.com/darrenjw/scala-glm/blob/master/README.md) is available from the [scala-glm github repo](https://github.com/darrenjw/scala-glm) which I won't repeat here. But to give a rough idea of how things work, I'll run through an interactive session for the linear regression example. 12 | 13 | First, download a [dataset](https://archive.ics.uci.edu/ml/datasets/airfoil+self-noise) from the [UCI ML Repository](http://archive.ics.uci.edu/ml/) to disk for subsequent analysis (caching the file on disk is good practice, as it avoids unnecessary load on the UCI server, and allows running the code off-line): 14 | 15 | ```scala 16 | import scalaglm._ 17 | import breeze.linalg._ 18 | 19 | val url = "http://archive.ics.uci.edu/ml/machine-learning-databases/00291/airfoil_self_noise.dat" 20 | val fileName = "self-noise.csv" 21 | 22 | // download the file to disk if it hasn't been already 23 | val file = new java.io.File(fileName) 24 | if (!file.exists) { 25 | val s = new java.io.PrintWriter(file) 26 | val data = scala.io.Source.fromURL(url).getLines 27 | data.foreach(l => s.write(l.trim. 28 | split('\t').filter(_ != ""). 29 | mkString("", ",", "\n"))) 30 | s.close 31 | } 32 | ``` 33 | 34 | Once we have a CSV file on disk, we can load it up and look at it. 35 | ```scala 36 | val mat = csvread(new java.io.File(fileName)) 37 | // mat: breeze.linalg.DenseMatrix[Double] = 38 | // 800.0 0.0 0.3048 71.3 0.00266337 126.201 39 | // 1000.0 0.0 0.3048 71.3 0.00266337 125.201 40 | // 1250.0 0.0 0.3048 71.3 0.00266337 125.951 41 | // ... 42 | println("Dim: " + mat.rows + " " + mat.cols) 43 | // Dim: 1503 6 44 | val figp = Utils.pairs(mat, List("Freq", "Angle", "Chord", "Velo", "Thick", "Sound")) 45 | // figp: breeze.plot.Figure = breeze.plot.Figure@37718125 46 | ``` 47 | 48 | We can then regress the response in the final column on the other variables. 49 | 50 | ```scala 51 | val y = mat(::, 5) // response is the final column 52 | // y: DenseVector[Double] = DenseVector(126.201, 125.201, ... 53 | val X = mat(::, 0 to 4) 54 | // X: breeze.linalg.DenseMatrix[Double] = 55 | // 800.0 0.0 0.3048 71.3 0.00266337 56 | // 1000.0 0.0 0.3048 71.3 0.00266337 57 | // 1250.0 0.0 0.3048 71.3 0.00266337 58 | // ... 59 | val mod = Lm(y, X, List("Freq", "Angle", "Chord", "Velo", "Thick")) 60 | // mod: scalaglm.Lm = 61 | // Lm(DenseVector(126.201, 125.201, ... 62 | mod.summary 63 | // Estimate S.E. t-stat p-value Variable 64 | // --------------------------------------------------------- 65 | // 132.8338 0.545 243.866 0.0000 * (Intercept) 66 | // -0.0013 0.000 -30.452 0.0000 * Freq 67 | // -0.4219 0.039 -10.847 0.0000 * Angle 68 | // -35.6880 1.630 -21.889 0.0000 * Chord 69 | // 0.0999 0.008 12.279 0.0000 * Velo 70 | // -147.3005 15.015 -9.810 0.0000 * Thick 71 | // Residual standard error: 4.8089 on 1497 degrees of freedom 72 | // Multiple R-squared: 0.5157, Adjusted R-squared: 0.5141 73 | // F-statistic: 318.8243 on 5 and 1497 DF, p-value: 0.00000 74 | val fig = mod.plots 75 | // fig: breeze.plot.Figure = breeze.plot.Figure@60d7ebb0 76 | ``` 77 | 78 | There is a `.predict` method for generating point predictions (and standard errors) given a new model matrix, and fitting GLMs is very similar - these things are covered in the [quickstart guide](https://darrenjw.github.io/scala-glm/QuickStart.html) for the library. 79 | 80 | ## Summary 81 | 82 | [scala-glm](https://github.com/darrenjw/scala-glm/) is a small Scala library built on top of the [Breeze](https://github.com/scalanlp/breeze) numerical library which enables simple and convenient regression modelling in Scala. It is reasonably well documented and usable in its current form, but I intend to gradually add additional features according to demand as time permits. 83 | 84 | #### eof 85 | 86 | -------------------------------------------------------------------------------- /drafts/smfsb3e.md: -------------------------------------------------------------------------------- 1 | # Stochastic Modelling for Systems Biology, third edition 2 | 3 | The third edition of my textbook, [Stochastic Modelling for Systems Biology](https://github.com/darrenjw/smfsb) has recently been published by Chapman & Hall/CRC Press. The book has ISBN-10 **113854928-2** and ISBN-13 **978-113854928-9**. It can be ordered from [CRC Press](https://www.crcpress.com/Stochastic-Modelling-for-Systems-Biology-Third-Edition/Wilkinson/p/book/9781138549289), [Amazon.com](https://amzn.to/2LAVSSN), [Amazon.co.uk](https://amzn.to/2PeDIZt) and similar book sellers. 4 | 5 | I was fairly happy with the way that the second edition, published in 2011, turned out, and so I haven't substantially re-written any of the text for the third edition. Instead, I've concentrated on adding in new material and improving the associated on-line resources. Those on-line resources are all free and open source, and hence available to everyone, irrespective of whether you have a copy of the new edition. I'll give an introduction to those resources below (and in subsequent posts). The new material can be briefly summarised as follows: 6 | 7 | * New chapter on spatially extended systems, covering the spatial Gillespie algorithm for reaction diffusion master equation (RDME) models in 1- and 2-d, the next subvolume method, spatial CLE, scaling issues, etc. 8 | * Significantly expanded chapter on inference for stochastic kinetic models from data, covering approximate methods of inference (ABC), including ABC-SMC. The material relating to particle MCMC has also been improved and extended. 9 | * Updated R package, including code relating to all of the new material 10 | * New R package for parsing SBML models into simulatable stochastic Petri net models 11 | * New software library, written in Scala, replicating most of the functionality of the R packages in a fast, compiled, strongly typed, functional language 12 | 13 | ## New content 14 | 15 | Although some minor edits and improvements have been made throughout the text, there are two substantial new additions to the text in this new edition. The first is an entirely new chapter on spatially extended systems. The first two editions of the text focused on the implications of discreteness and stochasticity in chemical reaction systems, but maintained the well-mixed assumption throughout. This is a reasonable first approach, since discreteness and stochasticity are most pronounced in very small volumes where diffusion should be rapid. In any case, even these non-spatial models have very interesting behaviour, and become computationally challenging very quickly for non-trivial reaction networks. However, we know that, in fact, the cell is a very crowded environment, and so even at small spatial scales, many interesting processes are diffusion limited. It therefore seems appropriate to dedicate one chapter (the new Chapter 9) to studying some of the implications of relaxing the well-mixed assumption. Entire books can be written on stochastic reaction-diffusion systems, so here only a brief introduction is provided, based mainly around models in the reaction-diffusion master equation (RDME) style. Exact stochastic simulation algorithms are discussed, and implementations provided in the 1- and 2-d cases, and an appropriate Langevin approximation is examined, the spatial CLE. 16 | 17 | The second major addition is to the chapter on inference for stochastic kinetic models from data (now Chapter 11). The second edition of the book included a discussion of "likelihood free" Bayesian MCMC methods for inference, and provided a working implementation of likelihood free particle marginal Metropolis-Hastings (PMMH) for stochastic kinetic models. The third edition improves on that implementation, and discusses approximate Bayesian computation (ABC) as an alternative to MCMC for likelihood free inference. Implementation issues are discussed, and sequential ABC approaches are examined, concentrating in particular on the method known as ABC-SMC. 18 | 19 | ## New software and on-line resources 20 | 21 | Accompanying the text are new and improved on-line resources, all well-documented, free, and open source. 22 | 23 | ### New website/GitHub repo 24 | 25 | Information and materials relating to the previous editions were kept on my University website. All materials relating to this new edition are kept in a public GitHub repo: [darrenjw/smfsb](https://github.com/darrenjw/smfsb). This will be simpler to maintain, and will make it much easier for people to make copies of the material for use and studying off-line. 26 | 27 | ### Updated R package(s) 28 | 29 | Along with the second edition of the book I released an accompanying R package, "smfsb", published on CRAN. This was a very popular feature, allowing anyone with R to trivially experiment with all of the models and algorithms discussed in the text. This R package has been updated, and a new version has been published to CRAN. The updates are all backwards-compatible with the version associated with the second edition of the text, so owners of that edition can still upgrade safely. I'll give a proper introduction to the package, including the new features, in a subsequent post, but in the meantime, you can install/upgrade the package from a running R session with 30 | ```R 31 | install.packages("smfsb") 32 | ``` 33 | and then pop up a tutorial vignette with: 34 | ```R 35 | vignette("smfsb") 36 | ``` 37 | This should be enough to get you started. 38 | 39 | In addition to the main R package, there is an additional R package for parsing SBML models into models that can be simulated within R. This package is not on CRAN, due to its dependency on a non-CRAN package. See the [repo](https://github.com/darrenjw/smfsb) for further details. 40 | 41 | There are also Python scripts available for converting SBML models to and from the shorthand SBML notation used in the text. 42 | 43 | ### New Scala library 44 | 45 | Another major new resource associated with the third edition of the text is a software library written in the Scala programming language. This library provides Scala implementations of all of the algorithms discussed in the book and implemented in the associated R packages. This then provides example implementations in a fast, efficient, compiled language, and is likely to be most useful for people wanting to use the methods in the book for research. Again, I'll provide a tutorial introduction to this library in a subsequent post, but it is well-documented, with all necessary information needed to get started available at the [scala-smfsb](https://github.com/darrenjw/scala-smfsb) repo/website, including a step-by-step [tutorial](https://github.com/darrenjw/scala-smfsb/blob/master/docs/Tutorial.md) and some additional [examples](https://github.com/darrenjw/scala-smfsb/tree/master/examples). 46 | 47 | 48 | #### eof 49 | -------------------------------------------------------------------------------- /drafts/traceplots.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darrenjw/blog/be123278cc20048ab8458b895ccb5ccca798ea59/drafts/traceplots.png -------------------------------------------------------------------------------- /first-monads/README.md: -------------------------------------------------------------------------------- 1 | # First steps with monads in Scala 2 | 3 | Code examples for the blog post: 4 | 5 | https://darrenjw.wordpress.com/2016/04/15/first-steps-with-monads-in-scala/ 6 | 7 | 8 | Note that this repo contains everything that is needed to build and run the Scala code examples on any system that has Java installed. Any recent version of Java is fine. You do not need to "install" Scala or any Scala "packages" in order to run the code. If you have Java and a decent internet connection, you are good to go. This is one of the benefits of Scala - you can run it anywhere, on any system with a Java installation. 9 | 10 | To check if you have Java installed, just run: 11 | 12 | ```bash 13 | java -version 14 | ``` 15 | 16 | at your system command prompt. If you get an error, Java is absent or incorrectly installed. Installing Java is very easy on any platform, but the best way to install it depends on exactly what OS you are running, so search the internet for advice on the best way to install Java on your OS. 17 | 18 | The code uses `sbt` (the simple build tool) as the build tool. The sbt launcher has been included in the repo for the benefit of those new to Scala. It should be possible to run sbt from this directory by typing: 19 | 20 | ```bash 21 | ..\sbt 22 | ``` 23 | 24 | on Windows (which should run `..\sbt.bat`), or 25 | 26 | ```bash 27 | ../sbt 28 | ``` 29 | 30 | on Linux and similar systems (including Macs). If you want to be able to experiment with Scala yourself, you should copy the script and the file `sbt-launch.jar` to the same directory somewhere in your path, but this isn't necessary to run these examples. 31 | 32 | The sbt launcher script will download and run sbt, which will then download scala, the scala compiler, scala standard libraries and all dependencies needed to compile and run the code. All the downloaded files will be cached on your system for future use. Therefore, make sure you have a good internet connection and a bit of free disk space before running sbt for the first time. 33 | 34 | Assuming you can run sbt, just typing `run` at the sbt prompt will compile and run the example code. Typing `console` will give a Scala REPL with a properly configured classpath including all dependencies. You can type scala expressions directly into the REPL just as you would in your favourite dynamic math/stat language. Type `help` at the sbt prompt for help on sbt. Type `:help` at the Scala REPL for help on the REPL. 35 | 36 | The best way to follow the blog post is to copy-and-paste lines of code directly from the blog post to the sbt `console`. 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | -------------------------------------------------------------------------------- /first-monads/build.sbt: -------------------------------------------------------------------------------- 1 | name := "first-monads" 2 | 3 | version := "0.1" 4 | 5 | scalacOptions ++= Seq("-unchecked", "-deprecation", "-feature") 6 | 7 | libraryDependencies ++= Seq( 8 | "org.scalacheck" %% "scalacheck" % "1.11.4" % "test", 9 | "org.scalatest" %% "scalatest" % "2.1.7" % "test", 10 | "org.scalanlp" %% "breeze" % "0.11.2", 11 | "org.scalanlp" %% "breeze-natives" % "0.11.2" 12 | ) 13 | 14 | resolvers ++= Seq( 15 | "Sonatype Snapshots" at "https://oss.sonatype.org/content/repositories/snapshots/", 16 | "Sonatype Releases" at "https://oss.sonatype.org/content/repositories/releases/" 17 | ) 18 | 19 | scalaVersion := "2.11.6" 20 | 21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /first-monads/monads.scala: -------------------------------------------------------------------------------- 1 | /* 2 | monads.scala 3 | 4 | First steps with monads in scala 5 | 6 | */ 7 | 8 | object FirstMonads { 9 | 10 | def main(args: Array[String]): Unit = { 11 | 12 | println("Hello") 13 | 14 | println("map and Functors") 15 | val x = (0 to 4).toList 16 | println(x) 17 | val x2 = x map { x => x * 3 } 18 | println(x2) 19 | val x3 = x map { _ * 3 } 20 | println(x3) 21 | val x4 = x map { _ * 0.1 } 22 | println(x4) 23 | val xv = x.toVector 24 | println(xv) 25 | val xv2 = xv map { _ * 0.2 } 26 | println(xv2) 27 | val xv3 = for (xi <- xv) yield (xi * 0.2) 28 | println(xv3) 29 | 30 | println("flatMap and Monads") 31 | val x5 = x map { x => List(x - 0.1, x + 0.1) } 32 | println(x5) 33 | val x6 = x flatMap { x => List(x - 0.1, x + 0.1) } 34 | println(x6) 35 | 36 | val y = (0 to 12 by 2).toList 37 | println(y) 38 | val xy = x flatMap { xi => y map { yi => xi * yi } } 39 | println(xy) 40 | val xy2 = for { 41 | xi <- x 42 | yi <- y 43 | } yield (xi * yi) 44 | println(xy2) 45 | 46 | println("Option monad") 47 | val three = Option(3) 48 | val twelve = three map (_ * 4) 49 | println(twelve) 50 | val four = Option(4) 51 | val twelveB = three map (i => four map (i * _)) 52 | println(twelveB) 53 | val twelveC = three flatMap (i => four map (i * _)) 54 | println(twelveC) 55 | val twelveD = for { 56 | i <- three 57 | j <- four 58 | } yield (i * j) 59 | println(twelveD) 60 | val oops: Option[Int] = None 61 | val oopsB = for { 62 | i <- three 63 | j <- oops 64 | } yield (i * j) 65 | println(oopsB) 66 | val oopsC = for { 67 | i <- oops 68 | j <- four 69 | } yield (i * j) 70 | println(oopsC) 71 | 72 | println("IEEE floating point and NaN") 73 | val nan = Double.NaN 74 | println(3.0 * 4.0) 75 | println(3.0 * nan) 76 | println(nan * 4.0) 77 | val nanB = 0.0 / 0.0 78 | println(nanB) 79 | // val nanC=0/0 80 | // println(nanC) 81 | 82 | println("Option for matrix computations") 83 | import breeze.linalg._ 84 | def safeChol(m: DenseMatrix[Double]): Option[DenseMatrix[Double]] = scala.util.Try(cholesky(m)).toOption 85 | val m = DenseMatrix((2.0, 1.0), (1.0, 3.0)) 86 | val c = safeChol(m) 87 | println(c) 88 | val m2 = DenseMatrix((1.0, 2.0), (2.0, 3.0)) 89 | val c2 = safeChol(m2) 90 | println(c2) 91 | 92 | import com.github.fommil.netlib.BLAS.{getInstance => blas} 93 | def dangerousForwardSolve(A: DenseMatrix[Double], y: DenseVector[Double]): DenseVector[Double] = { 94 | val yc = y.copy 95 | blas.dtrsv("L", "N", "N", A.cols, A.toArray, A.rows, yc.data, 1) 96 | yc 97 | } 98 | def safeForwardSolve(A: DenseMatrix[Double], y: DenseVector[Double]): Option[DenseVector[Double]] = scala.util.Try(dangerousForwardSolve(A, y)).toOption 99 | 100 | def safeStd(A: DenseMatrix[Double], y: DenseVector[Double]): Option[DenseVector[Double]] = for { 101 | L <- safeChol(A) 102 | z <- safeForwardSolve(L, y) 103 | } yield z 104 | 105 | println(safeStd(m,DenseVector(1.0,2.0))) 106 | 107 | println("Future monad") 108 | import scala.concurrent.duration._ 109 | import scala.concurrent.{Future,ExecutionContext,Await} 110 | import ExecutionContext.Implicits.global 111 | val f1=Future{ 112 | Thread.sleep(10000) 113 | 1 } 114 | val f2=Future{ 115 | Thread.sleep(10000) 116 | 2 } 117 | val f3=for { 118 | v1<-f1 119 | v2<-f2 120 | } yield (v1+v2) 121 | println(Await.result(f3,30.second)) 122 | 123 | println("Goodbye") 124 | } 125 | 126 | } 127 | 128 | // eof 129 | 130 | -------------------------------------------------------------------------------- /first-monads/project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version=0.13.7 2 | -------------------------------------------------------------------------------- /gibbs-java-r-mvn/README.md: -------------------------------------------------------------------------------- 1 | # R and Java interfacing with rJava and Maven 2 | 3 | ## Calling Java code from R 4 | 5 | This page and the code in this subdirectory serve as an addendum to an old blog post of mine (from 2011) on [Calling Java code from R](https://darrenjw.wordpress.com/2011/01/01/calling-java-code-from-r/). In the post I show how to use the [rJava](https://cran.r-project.org/web/packages/rJava/) package in R to call a Gibbs sampler written in Java. The example Gibbs sampler in Java had an external dependency on the Java-based scientific library [parallel COLT](https://sites.google.com/site/piotrwendykier/software/parallelcolt). In the post I just assumed that parallel COLT was in the users Java classpath, and then everything shoud have worked fine. However, reading through the comments on the post, it is clear that many people have had trouble in getting the example to work due to Java classpath issues. 6 | 7 | The example in the post was illustrative, and isn't really a good solution for larger, more complex projects with many dependencies. The "correct" solution to managing Java dependencies is to use a good build tool which understands the dependencies and can manage them for you. In the Java world, the most widely used decent build tool is [Maven](https://maven.apache.org/), so here I show how to use Maven to solve the problem properly. 8 | 9 | For this example to work, you must first install Java, Maven, R, and the rJava package. So, `java -version` and `mvn --version` should return something sensible, and entering `library(rJava)` at your R command prompt should return silently (without error). If you have done that, then simply running the script [`./compile-and-run.sh`](compile-and-run.sh) in this directory should compile the Java code and run an R script which calls it. That's it! Look at the scripts and the code to figure out how it all works. 10 | 11 | In summary, Maven builds a "fat jar" containing the compiled classes including all dependencies. The magic is all in the Maven config file, [`pom.xml`](pom.xml). It lists the parallel COLT dependency, and uses the "shade plugin" to build the fat jar. So `mvn clean compile package` will build the fat jar without any additional fuss. Then from the R script [`run-gibbs.R`](run-gibbs.R) you just pass a link to the jar when you call `.jinit()` and that's it. 12 | 13 | 14 | -------------------------------------------------------------------------------- /gibbs-java-r-mvn/compile-and-run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | mvn clean compile package 4 | Rscript run-gibbs.R 5 | 6 | 7 | -------------------------------------------------------------------------------- /gibbs-java-r-mvn/pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | darrenjw 5 | gibbs-mvn 6 | jar 7 | 1.0-SNAPSHOT 8 | gibbs-mvn 9 | http://maven.apache.org 10 | 11 | 12 | junit 13 | junit 14 | 4.13.1 15 | test 16 | 17 | 18 | net.sourceforge.parallelcolt 19 | parallelcolt 20 | 0.10.1 21 | 22 | 23 | 24 | 25 | 26 | org.apache.maven.plugins 27 | maven-shade-plugin 28 | 2.3 29 | 30 | 31 | 32 | package 33 | 34 | shade 35 | 36 | 37 | 38 | 39 | path.to.MainClass 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | -------------------------------------------------------------------------------- /gibbs-java-r-mvn/run-gibbs.R: -------------------------------------------------------------------------------- 1 | library(rJava) 2 | .jinit("./target/gibbs-mvn-1.0-SNAPSHOT.jar") 3 | obj=.jnew("GibbsR") 4 | 5 | jgibbs<-function(N=10000,thin=500,seed=trunc(runif(1)*1e6)) 6 | { 7 | result=.jcall(obj,"[[D","gibbs",as.integer(N),as.integer(thin),as.integer(seed)) 8 | mat=sapply(result,.jevalArray) 9 | mat=cbind(1:N,mat) 10 | colnames(mat)=c("Iter","x","y") 11 | mat 12 | } 13 | 14 | jgibbs() 15 | 16 | 17 | -------------------------------------------------------------------------------- /gibbs-java-r-mvn/src/main/java/darrenjw/GibbsR.java: -------------------------------------------------------------------------------- 1 | import java.util.*; 2 | import cern.jet.random.tdouble.*; 3 | import cern.jet.random.tdouble.engine.*; 4 | 5 | class GibbsR 6 | { 7 | 8 | public static void main(String[] arg) 9 | { 10 | if (arg.length != 3) { 11 | System.err.println("Usage: java GibbsR "); 12 | System.exit(1); 13 | } 14 | int N=Integer.parseInt(arg[0]); 15 | int thin=Integer.parseInt(arg[1]); 16 | int seed=Integer.parseInt(arg[2]); 17 | double[][] mat=gibbs(N,thin,seed); 18 | System.out.println("Iter x y"); 19 | for (int i=0;i/[sourcecode language="scala" light="true"]\n/g' | \ 10 | sed 's||\n[/sourcecode]|g' | \ 11 | sed 's/"/"/g' | \ 12 | sed 's/>/>/g' | \ 13 | sed 's/</ meanVar} 10 | import breeze.linalg.DenseVector 11 | 12 | // Normal random sample 13 | def example1 = { 14 | val mod = for { 15 | mu <- Normal(0,100) 16 | v <- Gamma(1,0.1) 17 | _ <- Normal(mu,v).fitQ(List(8.0,9,7,7,8,10)) 18 | } yield (mu,v) 19 | val modEmp = mod.empirical 20 | print("mu : ") 21 | println(meanVar(modEmp map (_._1))) 22 | print("v : ") 23 | println(meanVar(modEmp map (_._2))) 24 | } 25 | 26 | // Normal random sample - IG on v 27 | def example2 = { 28 | val mod = for { 29 | mu <- Normal(0, 100) 30 | tau <- Gamma(1, 0.1) 31 | _ <- Normal(mu, 1.0/tau).fitQ(List(8.0,9,7,7,8,10)) 32 | } yield (mu,tau) 33 | val modEmp = mod.empirical 34 | print("mu : ") 35 | println(meanVar(modEmp map (_._1))) 36 | print("tau : ") 37 | println(meanVar(modEmp map (_._2))) 38 | } 39 | 40 | // Poisson DGLM 41 | def example3 = { 42 | 43 | val data = List(2,1,0,2,3,4,5,4,3,2,1) 44 | 45 | val prior = for { 46 | w <- Gamma(1, 1) 47 | state0 <- Normal(0.0, 2.0) 48 | } yield (w, List(state0)) 49 | 50 | def addTimePoint(current: Prob[(Double, List[Double])], 51 | obs: Int): Prob[(Double, List[Double])] = { 52 | println(s"Conditioning on observation: $obs") 53 | for { 54 | tup <- current 55 | (w, states) = tup 56 | os = states.head 57 | ns <- Normal(os, w) 58 | _ <- Poisson(math.exp(ns)).fitQ(obs) 59 | } yield (w, ns :: states) 60 | } 61 | 62 | val mod = data.foldLeft(prior)(addTimePoint(_,_)).empirical 63 | print("w : ") 64 | println(meanVar(mod map (_._1))) 65 | print("s0 : ") 66 | println(meanVar(mod map (_._2.reverse.head))) 67 | print("sN : ") 68 | println(meanVar(mod map (_._2.head))) 69 | 70 | } 71 | 72 | // Linear model 73 | def example4 = { 74 | val x = List(1.0,2,3,4,5,6) 75 | val y = List(3.0,2,4,5,5,6) 76 | val xy = x zip y 77 | case class Param(alpha: Double, beta: Double, v: Double) 78 | println("Forming prior distribution") 79 | val prior = for { 80 | alpha <- Normal(0,10) 81 | beta <- Normal(0,4) 82 | v <- Gamma(1,0.1) 83 | } yield Param(alpha, beta, v) 84 | def addPoint(current: Prob[Param], obs: (Double, Double)): Prob[Param] = { 85 | println(s"Conditioning on $obs") 86 | for { 87 | p <- current 88 | (x, y) = obs 89 | _ <- Normal(p.alpha + p.beta * x, p.v).fitQ(y) 90 | } yield p 91 | } 92 | val mod = xy.foldLeft(prior)(addPoint(_,_)).empirical 93 | print("a : ") 94 | println(meanVar(mod map (_.alpha))) 95 | print("b : ") 96 | println(meanVar(mod map (_.beta))) 97 | print("v : ") 98 | println(meanVar(mod map (_.v))) 99 | } 100 | 101 | // Noisy observations of a count 102 | def example5 = { 103 | val mod = for { 104 | count <- Poisson(10) 105 | tau <- Gamma(1,0.1) 106 | _ <- Normal(count,1.0/tau).fitQ(List(4.2,5.1,4.6,3.3,4.7,5.3)) 107 | } yield (count,tau) 108 | val modEmp = mod.empirical 109 | print("count : ") 110 | println(meanVar(modEmp map (_._1.toDouble))) 111 | print("tau : ") 112 | println(meanVar(modEmp map (_._2))) 113 | } 114 | 115 | 116 | // Main entry point 117 | 118 | def main(args: Array[String]): Unit = { 119 | println("Hi") 120 | example1 121 | example2 122 | example3 123 | example4 124 | example5 125 | println("Bye") 126 | } 127 | 128 | } 129 | 130 | // eof 131 | 132 | -------------------------------------------------------------------------------- /min-ppl/src/main/scala/min-ppl.scala: -------------------------------------------------------------------------------- 1 | object MinPpl { 2 | 3 | import breeze.stats.{distributions => bdist} 4 | import breeze.linalg.DenseVector 5 | 6 | implicit val numParticles = 300 7 | 8 | case class Particle[T](v: T, lw: Double) { // value and log-weight 9 | def map[S](f: T => S): Particle[S] = Particle(f(v), lw) 10 | } 11 | 12 | trait Prob[T] { 13 | val particles: Vector[Particle[T]] 14 | def map[S](f: T => S): Prob[S] = Empirical(particles map (_ map f)) 15 | def flatMap[S](f: T => Prob[S]): Prob[S] = { 16 | Empirical((particles map (p => { 17 | f(p.v).particles.map(psi => Particle(psi.v, p.lw + psi.lw)) 18 | })).flatten).resample 19 | } 20 | def resample(implicit N: Int): Prob[T] = { 21 | val lw = particles map (_.lw) 22 | val mx = lw reduce (math.max(_,_)) 23 | val rw = lw map (lwi => math.exp(lwi - mx)) 24 | val law = mx + math.log(rw.sum/(rw.length)) 25 | val ind = bdist.Multinomial(DenseVector(rw.toArray)).sample(N) 26 | val newParticles = ind map (i => particles(i)) 27 | Empirical(newParticles.toVector map (pi => Particle(pi.v, law))) 28 | } 29 | def cond(ll: T => Double): Prob[T] = 30 | Empirical(particles map (p => Particle(p.v, p.lw + ll(p.v)))) 31 | def empirical: Vector[T] = resample.particles.map(_.v) 32 | } 33 | 34 | case class Empirical[T](particles: Vector[Particle[T]]) extends Prob[T] 35 | 36 | def unweighted[T](ts: Vector[T], lw: Double = 0.0): Prob[T] = 37 | Empirical(ts map (Particle(_, lw))) 38 | 39 | trait Dist[T] extends Prob[T] { 40 | def ll(obs: T): Double 41 | def ll(obs: Seq[T]): Double = obs map (ll) reduce (_+_) 42 | def fit(obs: Seq[T]): Prob[T] = 43 | Empirical(particles map (p => Particle(p.v, p.lw + ll(obs)))) 44 | def fitQ(obs: Seq[T]): Prob[T] = Empirical(Vector(Particle(obs.head, ll(obs)))) 45 | def fit(obs: T): Prob[T] = fit(List(obs)) 46 | def fitQ(obs: T): Prob[T] = fitQ(List(obs)) 47 | } 48 | 49 | case class Normal(mu: Double, v: Double)(implicit N: Int) extends Dist[Double] { 50 | lazy val particles = unweighted(bdist.Gaussian(mu, math.sqrt(v)).sample(N).toVector).particles 51 | def ll(obs: Double) = bdist.Gaussian(mu, math.sqrt(v)).logPdf(obs) 52 | } 53 | 54 | case class Gamma(a: Double, b: Double)(implicit N: Int) extends Dist[Double] { 55 | lazy val particles = unweighted(bdist.Gamma(a, 1.0/b).sample(N).toVector).particles 56 | def ll(obs: Double) = bdist.Gamma(a, 1.0/b).logPdf(obs) 57 | } 58 | 59 | case class Poisson(mu: Double)(implicit N: Int) extends Dist[Int] { 60 | lazy val particles = unweighted(bdist.Poisson(mu).sample(N).toVector).particles 61 | def ll(obs: Int) = bdist.Poisson(mu).logProbabilityOf(obs) 62 | } 63 | 64 | } 65 | -------------------------------------------------------------------------------- /min-ppl/src/test/scala/min-ppl-test.scala: -------------------------------------------------------------------------------- 1 | /* 2 | min-ppl-test.scala 3 | 4 | Some basic sanity checks on the language 5 | 6 | */ 7 | 8 | import org.scalatest.flatspec.AnyFlatSpec 9 | import org.scalactic._ 10 | import MinPpl._ 11 | import breeze.stats.{meanAndVariance => meanVar} 12 | 13 | class PplSpec extends AnyFlatSpec with Tolerance { 14 | 15 | "A linear Gaussian" should "flatMap correctly" in { 16 | System.err.println("**These tests take a LONG time, and it is normal for a couple to fail**") 17 | val xy = for { 18 | x <- Normal(5,4) 19 | y <- Normal(x,1) 20 | } yield (x,y) 21 | val y = xy.map(_._2).empirical 22 | val mv = meanVar(y) 23 | assert(mv.mean === 5.0 +- 0.5) 24 | assert(mv.variance === 5.0 +- 1.0) 25 | } 26 | 27 | it should "cond correctly" in { 28 | val xy = for { 29 | x <- Normal(5,4) 30 | y <- Normal(x,1) 31 | } yield (x,y) 32 | val y = xy.map(_._2) 33 | val yGz = y.cond(yi => Normal(yi, 9).ll(8.0)).empirical 34 | val mv = meanVar(yGz) 35 | assert(mv.mean === 5.857 +- 0.5) 36 | assert(mv.variance === 2.867 +- 1.0) 37 | val xyGz = xy.cond{case (x,y) => Normal(y,9).ll(8.0)}.empirical 38 | val mvx = meanVar(xyGz.map(_._1)) 39 | assert(mvx.mean === 5.857 +- 0.5) 40 | assert(mvx.variance === 2.867 +- 1.0) 41 | val mvy = meanVar(xyGz.map(_._2)) 42 | assert(mvy.mean === 6.071 +- 0.5) 43 | assert(mvy.variance === 3.214 +- 1.0) 44 | } 45 | 46 | it should "cond correctly in a for" in { 47 | val wxyz = for { 48 | w <- Normal(5,2) 49 | x <- Normal(w,2) 50 | y <- Normal(x,1).cond(y => Normal(y,9).ll(8.0)) 51 | } yield (w,x,y) 52 | val wxyze = wxyz.empirical 53 | val mvw = meanVar(wxyze.map(_._1)) 54 | assert(mvw.mean === 5.429 +- 0.5) 55 | assert(mvw.variance === 1.714 +- 1.0) 56 | val mvx = meanVar(wxyze.map(_._2)) 57 | assert(mvx.mean === 5.857 +- 0.5) 58 | assert(mvx.variance === 2.867 +- 1.0) 59 | val mvy = meanVar(wxyze.map(_._3)) 60 | assert(mvy.mean === 6.071 +- 0.5) 61 | assert(mvy.variance === 3.214 +- 1.0) 62 | } 63 | 64 | it should "fit correctly" in { 65 | val xyzf = for { 66 | x <- Normal(5,4) 67 | y <- Normal(x,1) 68 | z <- Normal(y,9).fit(8.0) 69 | } yield (x,y,z) 70 | val xyzfe = xyzf.empirical 71 | val mvx = meanVar(xyzfe.map(_._1)) 72 | assert(mvx.mean === 5.857 +- 0.5) 73 | assert(mvx.variance === 2.867 +- 1.0) 74 | val mvy = meanVar(xyzfe.map(_._2)) 75 | assert(mvy.mean === 6.071 +- 0.5) 76 | assert(mvy.variance === 3.214 +- 1.0) 77 | val mvz = meanVar(xyzfe.map(_._3)) 78 | assert(mvz.mean === 6.071 +- 0.5) 79 | assert(mvz.variance === 12.214 +- 2.0) 80 | } 81 | 82 | it should "fitQ correctly" in { 83 | val xyzfq = for { 84 | x <- Normal(5,4) 85 | y <- Normal(x,1) 86 | z <- Normal(y,9).fitQ(8.0) 87 | } yield (x,y,z) 88 | val xyzfqe = xyzfq.empirical 89 | val mvx = meanVar(xyzfqe.map(_._1)) 90 | assert(mvx.mean === 5.857 +- 0.5) 91 | assert(mvx.variance === 2.867 +- 1.0) 92 | val mvy = meanVar(xyzfqe.map(_._2)) 93 | assert(mvy.mean === 6.071 +- 0.5) 94 | assert(mvy.variance === 3.214 +- 1.0) 95 | val mvz = meanVar(xyzfqe.map(_._3)) 96 | assert(mvz.mean === 8.000 +- 0.001) 97 | assert(mvz.variance === 0.000 +- 0.001) 98 | } 99 | 100 | it should "fit marginalised correctly" in { 101 | val yzf = for { 102 | y <- Normal(5,5) 103 | z <- Normal(y,9).fit(8.0) 104 | } yield (y,z) 105 | val yzfe = yzf.empirical 106 | val mvy = meanVar(yzfe.map(_._1)) 107 | assert(mvy.mean === 6.071 +- 0.5) 108 | assert(mvy.variance === 3.213 +- 1.0) 109 | val mvz = meanVar(yzfe.map(_._2)) 110 | assert(mvz.mean === 6.071 +- 0.5) 111 | assert(mvz.variance === 12.214 +- 2.0) 112 | } 113 | 114 | it should "fit multiple iid observations correctly" in { 115 | val yzf2 = for { 116 | y <- Normal(5,5) 117 | z <- Normal(y,18).fit(List(6.0,10.0)) 118 | } yield (y,z) 119 | val yzfe2 = yzf2.empirical 120 | val mvy = meanVar(yzfe2.map(_._1)) 121 | assert(mvy.mean === 6.071 +- 0.5) 122 | assert(mvy.variance === 3.214 +- 1.0) 123 | val mvz = meanVar(yzfe2.map(_._2)) 124 | assert(mvz.mean === 6.071 +- 0.5) 125 | assert(mvz.variance === 21.214 +- 2.5) 126 | } 127 | 128 | it should "deep chain correctly" in { 129 | val deep = for { 130 | w <- Normal(2.0,1.0) 131 | x <- Normal(w,1) 132 | y <- Normal(x,2) 133 | z <- Normal(y,1) 134 | } yield z 135 | val mvz = meanVar(deep.empirical) 136 | assert(mvz.mean === 2.0 +- 0.5) 137 | assert(mvz.variance === 5.0 +- 1.0) 138 | } 139 | 140 | } 141 | 142 | -------------------------------------------------------------------------------- /min-ppl2/.gitignore: -------------------------------------------------------------------------------- 1 | # .gitignore for scala projects 2 | 3 | # Classes and logs 4 | *.class 5 | *.log 6 | *~ 7 | 8 | # SBT-specific 9 | .cache 10 | .history 11 | .classpath 12 | .project 13 | .settings 14 | 15 | .lib/ 16 | dist/* 17 | target/ 18 | lib_managed/ 19 | src_managed/ 20 | project/boot/ 21 | project/plugins/project/ 22 | 23 | # Ensime specific 24 | .ensime 25 | 26 | # Scala-IDE specific 27 | .scala_dependencies 28 | .worksheet 29 | 30 | 31 | -------------------------------------------------------------------------------- /min-ppl2/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile 2 | 3 | 4 | DraftPost.html: src/main/tut/DraftPost.md 5 | make tut 6 | pandoc -t html5 target/scala-2.13/tut/DraftPost.md -o DraftPost.html 7 | 8 | tut: 9 | sbt tut 10 | 11 | edit: 12 | emacs Makefile build.sbt *.md src/test/scala/*.scala src/main/scala/*.scala src/main/tut/*.md & 13 | 14 | 15 | # eof 16 | -------------------------------------------------------------------------------- /min-ppl2/Readme.md: -------------------------------------------------------------------------------- 1 | # min-ppl2 2 | 3 | ## [A probability monad for the bootstrap particle filter](https://darrenjw.wordpress.com/2019/08/10/a-probability-monad-for-the-bootstrap-particle-filter/) 4 | 5 | If you have (a recent JDK, and) [sbt](https://www.scala-sbt.org/) installed, you can compile and run the examples with `sbt run`, or run some tests with `sbt test` (slow), or compile the [tut](http://tpolecat.github.io/tut/) document that formed the draft of the post with `sbt tut`. 6 | 7 | If you are a statistician or data scientist interested to learn more about Scala, note that I have a free on-line course available: [Scala for statistical computing and data science](https://github.com/darrenjw/scala-course/blob/master/StartHere.md) 8 | 9 | 10 | Copyright (C) 2019 [Darren J Wilkinson](https://darrenjw.github.io/) 11 | 12 | -------------------------------------------------------------------------------- /min-ppl2/build.sbt: -------------------------------------------------------------------------------- 1 | name := "min-ppl2" 2 | 3 | version := "0.1-SNAPSHOT" 4 | 5 | scalacOptions ++= Seq( 6 | "-unchecked", "-deprecation", "-feature" 7 | ) 8 | 9 | libraryDependencies ++= Seq( 10 | "org.scalatest" %% "scalatest" % "3.1.0-SNAP13" % "test", 11 | "org.scalactic" %% "scalactic" % "3.0.8" % "test", 12 | "org.typelevel" %% "cats-core" % "2.0.0-RC1", 13 | "org.scalanlp" %% "breeze" % "1.0-RC4", 14 | //"org.scalanlp" %% "breeze-viz" % "1.0-RC4", 15 | "org.scalanlp" %% "breeze-natives" % "1.0-RC4" 16 | ) 17 | 18 | resolvers ++= Seq( 19 | "Sonatype Snapshots" at 20 | "https://oss.sonatype.org/content/repositories/snapshots/", 21 | "Sonatype Releases" at 22 | "https://oss.sonatype.org/content/repositories/releases/" 23 | ) 24 | 25 | enablePlugins(TutPlugin) 26 | 27 | scalaVersion := "2.13.0" 28 | 29 | -------------------------------------------------------------------------------- /min-ppl2/md2wp: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # md2wp 3 | # convert github flavoured markdown to wordpress html 4 | 5 | cat $1 | \ 6 | sed 's/```scala/```/g' | \ 7 | sed 's/```bash/```/g' | \ 8 | pandoc -f markdown_github -t html5 | \ 9 | sed 's/
/[sourcecode language="scala" light="true"]\n/g' | \
10 |   sed 's|
|\n[/sourcecode]|g' | \ 11 | sed 's/"/"/g' | \ 12 | sed 's/>/>/g' | \ 13 | sed 's/</ meanVar} 12 | import breeze.linalg.DenseVector 13 | import cats._ 14 | import cats.implicits._ 15 | import cats.syntax._ 16 | 17 | // Zip vs flatMap 18 | def example1 = { 19 | println("binding with for") 20 | val prior1 = for { 21 | x <- Normal(0,1) 22 | y <- Gamma(1,1) 23 | z <- Poisson(10) 24 | } yield (x,y,z) 25 | println(meanVar(prior1.empirical.map(_._2))) 26 | println("binding with flatMap") 27 | val prior2 = 28 | Normal(0,1) flatMap {x => 29 | Gamma(1,1) flatMap {y => 30 | Poisson(10) map {z => 31 | (x,y,z)}}} 32 | println(meanVar(prior2.empirical.map(_._2))) 33 | println("tupling") 34 | val prior3 = Applicative[Prob].tuple3(Normal(0,1), Gamma(1,1), Poisson(10)) 35 | println(meanVar(prior3.empirical.map(_._2))) 36 | print("done") 37 | } 38 | 39 | // Poisson DGLM 40 | def example2 = { 41 | 42 | val data = List(2,1,0,2,3,4,5,4,3,2,1) 43 | 44 | val prior = for { 45 | w <- Gamma(1, 1) 46 | state0 <- Normal(0.0, 2.0) 47 | } yield (w, List(state0)) 48 | 49 | def addTimePointSimple(current: Prob[(Double, List[Double])], 50 | obs: Int): Prob[(Double, List[Double])] = { 51 | println(s"Conditioning on observation: $obs") 52 | val updated = for { 53 | tup <- current 54 | (w, states) = tup 55 | os = states.head 56 | ns <- Normal(os, w) 57 | _ <- Poisson(math.exp(ns)).fitQ(obs) 58 | } yield (w, ns :: states) 59 | updated.resample 60 | } 61 | 62 | def addTimePoint(current: Prob[(Double, List[Double])], 63 | obs: Int): Prob[(Double, List[Double])] = { 64 | println(s"Conditioning on observation: $obs") 65 | val predict = for { 66 | tup <- current 67 | (w, states) = tup 68 | os = states.head 69 | ns <- Normal(os, w) 70 | } 71 | yield (w, ns :: states) 72 | val updated = for { 73 | tup <- predict 74 | (w, states) = tup 75 | st = states.head 76 | _ <- Poisson(math.exp(st)).fitQ(obs) 77 | } yield (w, states) 78 | updated.resample 79 | } 80 | 81 | val mod = data.foldLeft(prior)(addTimePoint(_,_)).empirical 82 | print("w : ") 83 | println(meanVar(mod map (_._1))) 84 | print("s0 : ") 85 | println(meanVar(mod map (_._2.reverse.head))) 86 | print("sN : ") 87 | println(meanVar(mod map (_._2.head))) 88 | 89 | } 90 | 91 | 92 | 93 | // Main entry point 94 | 95 | def main(args: Array[String]): Unit = { 96 | println("Hi") 97 | //example1 98 | example2 99 | println("Bye") 100 | } 101 | 102 | } 103 | 104 | // eof 105 | 106 | -------------------------------------------------------------------------------- /min-ppl2/src/main/scala/min-ppl.scala: -------------------------------------------------------------------------------- 1 | /* 2 | min-ppl.scala 3 | 4 | SMC-based probability monad for bootstrap particle filter inference 5 | 6 | */ 7 | 8 | object MinPpl2 { 9 | 10 | import breeze.stats.{distributions => bdist} 11 | import breeze.linalg.DenseVector 12 | import cats._ 13 | import cats.implicits._ 14 | 15 | implicit val numParticles = 2000 16 | 17 | case class Particle[T](v: T, lw: Double) { // value and log-weight 18 | def map[S](f: T => S): Particle[S] = Particle(f(v), lw) 19 | def flatMap[S](f: T => Particle[S]): Particle[S] = { 20 | val ps = f(v) 21 | Particle(ps.v, lw + ps.lw) 22 | } 23 | } 24 | 25 | implicit val particleMonad = new Monad[Particle] { 26 | def pure[T](t: T): Particle[T] = Particle(t, 0.0) 27 | def flatMap[T,S](pt: Particle[T])(f: T => Particle[S]): Particle[S] = pt.flatMap(f) 28 | def tailRecM[T,S](t: T)(f: T => Particle[Either[T,S]]): Particle[S] = ??? 29 | } 30 | 31 | trait Prob[T] { 32 | val particles: Vector[Particle[T]] 33 | def draw: Particle[T] 34 | def mapP[S](f: T => Particle[S]): Prob[S] = Empirical(particles map (_ flatMap f)) 35 | def map[S](f: T => S): Prob[S] = mapP(v => Particle(f(v), 0.0)) 36 | def flatMap[S](f: T => Prob[S]): Prob[S] = mapP(f(_).draw) 37 | def resample(implicit N: Int): Prob[T] = { 38 | val lw = particles map (_.lw) 39 | val mx = lw reduce (math.max(_,_)) 40 | val rw = lw map (lwi => math.exp(lwi - mx)) 41 | val law = mx + math.log(rw.sum/(rw.length)) 42 | val ind = bdist.Multinomial(DenseVector(rw.toArray)).sample(N) 43 | val newParticles = ind map (i => particles(i)) 44 | Empirical(newParticles.toVector map (pi => Particle(pi.v, law))) 45 | } 46 | def cond(ll: T => Double): Prob[T] = mapP(v => Particle(v, ll(v))) 47 | def empirical: Vector[T] = resample.particles.map(_.v) 48 | } 49 | 50 | implicit val probMonad = new Monad[Prob] { 51 | def pure[T](t: T): Prob[T] = Empirical(Vector(Particle(t, 0.0))) 52 | def flatMap[T,S](pt: Prob[T])(f: T => Prob[S]): Prob[S] = pt.flatMap(f) 53 | def tailRecM[T,S](t: T)(f: T => Prob[Either[T,S]]): Prob[S] = ??? 54 | } 55 | 56 | case class Empirical[T](particles: Vector[Particle[T]]) extends Prob[T] { 57 | def draw: Particle[T] = { 58 | val lw = particles map (_.lw) 59 | val mx = lw reduce (math.max(_,_)) 60 | val rw = lw map (lwi => math.exp(lwi - mx)) 61 | val law = mx + math.log(rw.sum/(rw.length)) 62 | val idx = bdist.Multinomial(DenseVector(rw.toArray)).draw 63 | Particle(particles(idx).v, law) 64 | } 65 | } 66 | 67 | def unweighted[T](ts: Vector[T], lw: Double = 0.0): Prob[T] = 68 | Empirical(ts map (Particle(_, lw))) 69 | 70 | trait Dist[T] extends Prob[T] { 71 | def ll(obs: T): Double 72 | def ll(obs: Seq[T]): Double = obs map (ll) reduce (_+_) 73 | def fit(obs: Seq[T]): Prob[T] = mapP(v => Particle(v, ll(obs))) 74 | def fitQ(obs: Seq[T]): Prob[T] = Empirical(Vector(Particle(obs.head, ll(obs)))) 75 | def fit(obs: T): Prob[T] = fit(List(obs)) 76 | def fitQ(obs: T): Prob[T] = fitQ(List(obs)) 77 | } 78 | 79 | case class Normal(mu: Double, v: Double)(implicit N: Int) extends Dist[Double] { 80 | lazy val particles = unweighted(bdist.Gaussian(mu, math.sqrt(v)). 81 | sample(N).toVector).particles 82 | def draw = Particle(bdist.Gaussian(mu, math.sqrt(v)).draw, 0.0) 83 | def ll(obs: Double) = bdist.Gaussian(mu, math.sqrt(v)).logPdf(obs) 84 | } 85 | 86 | case class Gamma(a: Double, b: Double)(implicit N: Int) extends Dist[Double] { 87 | lazy val particles = unweighted(bdist.Gamma(a, 1.0/b). 88 | sample(N).toVector).particles 89 | def draw = Particle(bdist.Gamma(a, 1.0/b).draw, 0.0) 90 | def ll(obs: Double) = bdist.Gamma(a, 1.0/b).logPdf(obs) 91 | } 92 | 93 | case class Poisson(mu: Double)(implicit N: Int) extends Dist[Int] { 94 | lazy val particles = unweighted(bdist.Poisson(mu). 95 | sample(N).toVector).particles 96 | def draw = Particle(bdist.Poisson(mu).draw, 0.0) 97 | def ll(obs: Int) = bdist.Poisson(mu).logProbabilityOf(obs) 98 | } 99 | 100 | } 101 | 102 | 103 | // eof 104 | 105 | -------------------------------------------------------------------------------- /min-ppl2/src/test/scala/min-ppl-test.scala: -------------------------------------------------------------------------------- 1 | /* 2 | min-ppl-test.scala 3 | 4 | Some basic sanity checks on the language 5 | 6 | */ 7 | 8 | import org.scalatest.flatspec.AnyFlatSpec 9 | import org.scalactic._ 10 | import MinPpl2._ 11 | import breeze.stats.{meanAndVariance => meanVar} 12 | 13 | class PplSpec extends AnyFlatSpec with Tolerance { 14 | 15 | "A linear Gaussian" should "flatMap correctly" in { 16 | System.err.println("**These tests take a LONG time, and it is normal for a couple to fail**") 17 | val xy = for { 18 | x <- Normal(5,4) 19 | y <- Normal(x,1) 20 | } yield (x,y) 21 | val y = xy.map(_._2).empirical 22 | val mv = meanVar(y) 23 | assert(mv.mean === 5.0 +- 0.2) 24 | assert(mv.variance === 5.0 +- 0.5) 25 | } 26 | 27 | it should "cond correctly" in { 28 | val xy = for { 29 | x <- Normal(5,4) 30 | y <- Normal(x,1) 31 | } yield (x,y) 32 | val y = xy.map(_._2) 33 | val yGz = y.cond(yi => Normal(yi, 9).ll(8.0)).empirical 34 | val mv = meanVar(yGz) 35 | assert(mv.mean === 5.857 +- 0.2) 36 | assert(mv.variance === 2.867 +- 0.5) 37 | val xyGz = xy.cond{case (x,y) => Normal(y,9).ll(8.0)}.empirical 38 | val mvx = meanVar(xyGz.map(_._1)) 39 | assert(mvx.mean === 5.857 +- 0.2) 40 | assert(mvx.variance === 2.867 +- 0.5) 41 | val mvy = meanVar(xyGz.map(_._2)) 42 | assert(mvy.mean === 6.071 +- 0.2) 43 | assert(mvy.variance === 3.214 +- 0.5) 44 | } 45 | 46 | it should "cond correctly in a for" in { 47 | val wxyz = for { 48 | w <- Normal(5,2) 49 | x <- Normal(w,2) 50 | y <- Normal(x,1).cond(y => Normal(y,9).ll(8.0)) 51 | } yield (w,x,y) 52 | val wxyze = wxyz.empirical 53 | val mvw = meanVar(wxyze.map(_._1)) 54 | assert(mvw.mean === 5.429 +- 0.2) 55 | assert(mvw.variance === 1.714 +- 0.5) 56 | val mvx = meanVar(wxyze.map(_._2)) 57 | assert(mvx.mean === 5.857 +- 0.2) 58 | assert(mvx.variance === 2.867 +- 0.5) 59 | val mvy = meanVar(wxyze.map(_._3)) 60 | assert(mvy.mean === 6.071 +- 0.2) 61 | assert(mvy.variance === 3.214 +- 0.5) 62 | } 63 | 64 | it should "fit correctly" in { 65 | val xyzf = for { 66 | x <- Normal(5,4) 67 | y <- Normal(x,1) 68 | z <- Normal(y,9).fit(8.0) 69 | } yield (x,y,z) 70 | val xyzfe = xyzf.empirical 71 | val mvx = meanVar(xyzfe.map(_._1)) 72 | assert(mvx.mean === 5.857 +- 0.2) 73 | assert(mvx.variance === 2.867 +- 0.5) 74 | val mvy = meanVar(xyzfe.map(_._2)) 75 | assert(mvy.mean === 6.071 +- 0.2) 76 | assert(mvy.variance === 3.214 +- 0.5) 77 | val mvz = meanVar(xyzfe.map(_._3)) 78 | assert(mvz.mean === 6.071 +- 0.2) 79 | assert(mvz.variance === 12.214 +- 1.0) 80 | } 81 | 82 | it should "fitQ correctly" in { 83 | val xyzfq = for { 84 | x <- Normal(5,4) 85 | y <- Normal(x,1) 86 | z <- Normal(y,9).fitQ(8.0) 87 | } yield (x,y,z) 88 | val xyzfqe = xyzfq.empirical 89 | val mvx = meanVar(xyzfqe.map(_._1)) 90 | assert(mvx.mean === 5.857 +- 0.2) 91 | assert(mvx.variance === 2.867 +- 0.5) 92 | val mvy = meanVar(xyzfqe.map(_._2)) 93 | assert(mvy.mean === 6.071 +- 0.2) 94 | assert(mvy.variance === 3.214 +- 0.5) 95 | val mvz = meanVar(xyzfqe.map(_._3)) 96 | assert(mvz.mean === 8.000 +- 0.001) 97 | assert(mvz.variance === 0.000 +- 0.001) 98 | } 99 | 100 | it should "fit marginalised correctly" in { 101 | val yzf = for { 102 | y <- Normal(5,5) 103 | z <- Normal(y,9).fit(8.0) 104 | } yield (y,z) 105 | val yzfe = yzf.empirical 106 | val mvy = meanVar(yzfe.map(_._1)) 107 | assert(mvy.mean === 6.071 +- 0.2) 108 | assert(mvy.variance === 3.213 +- 0.5) 109 | val mvz = meanVar(yzfe.map(_._2)) 110 | assert(mvz.mean === 6.071 +- 0.2) 111 | assert(mvz.variance === 12.214 +- 1.0) 112 | } 113 | 114 | it should "fit multiple iid observations correctly" in { 115 | val yzf2 = for { 116 | y <- Normal(5,5) 117 | z <- Normal(y,18).fit(List(6.0,10.0)) 118 | } yield (y,z) 119 | val yzfe2 = yzf2.empirical 120 | val mvy = meanVar(yzfe2.map(_._1)) 121 | assert(mvy.mean === 6.071 +- 0.2) 122 | assert(mvy.variance === 3.214 +- 0.5) 123 | val mvz = meanVar(yzfe2.map(_._2)) 124 | assert(mvz.mean === 6.071 +- 0.2) 125 | assert(mvz.variance === 21.214 +- 1.5) 126 | } 127 | 128 | } 129 | 130 | -------------------------------------------------------------------------------- /pfilter/.gitignore: -------------------------------------------------------------------------------- 1 | # .gitignore for scala projects 2 | 3 | # Classes and logs 4 | *.class 5 | *.log 6 | *~ 7 | 8 | # SBT-specific 9 | .cache 10 | .history 11 | .classpath 12 | .project 13 | .settings 14 | 15 | .lib/ 16 | dist/* 17 | target/ 18 | lib_managed/ 19 | src_managed/ 20 | project/boot/ 21 | project/plugins/project/ 22 | 23 | # Ensime specific 24 | .ensime 25 | 26 | # Scala-IDE specific 27 | .scala_dependencies 28 | .worksheet 29 | 30 | 31 | -------------------------------------------------------------------------------- /pfilter/README.md: -------------------------------------------------------------------------------- 1 | # A scalable particle filter in Scala 2 | 3 | Code examples for the blog post: 4 | 5 | https://darrenjw.wordpress.com/2016/07/22/a-scalable-particle-filter-in-scala/ 6 | 7 | 8 | Note that this repo contains everything that is needed to build and run the Scala code examples on any system that has Java installed. Any recent version of Java is fine. You do not need to "install" Scala or any Scala "packages" in order to run the code. If you have Java and a decent internet connection, you are good to go. This is one of the benefits of Scala - you can run it anywhere, on any system with a Java installation. 9 | 10 | To check if you have Java installed, just run: 11 | 12 | ```bash 13 | java -version 14 | ``` 15 | 16 | at your system command prompt. If you get an error, Java is absent or incorrectly installed. Installing Java is very easy on any platform, but the best way to install it depends on exactly what OS you are running, so search the internet for advice on the best way to install Java on your OS. 17 | 18 | The code uses `sbt` (the simple build tool) as the build tool. The sbt launcher has been included in the repo for the benefit of those new to Scala. It should be possible to run sbt from this directory by typing: 19 | 20 | ```bash 21 | ..\sbt 22 | ``` 23 | 24 | on Windows (which should run `..\sbt.bat`), or 25 | 26 | ```bash 27 | ../sbt 28 | ``` 29 | 30 | on Linux and similar systems (including Macs). If you want to be able to experiment with Scala yourself, you should copy the script and the file `sbt-launch.jar` to the same directory somewhere in your path, but this isn't necessary to run these examples. 31 | 32 | The sbt launcher script will download and run sbt, which will then download scala, the scala compiler, scala standard libraries and all dependencies needed to compile and run the code. All the downloaded files will be cached on your system for future use. Therefore, make sure you have a good internet connection and a bit of free disk space before running sbt for the first time. 33 | 34 | Assuming you can run sbt, just typing `run` at the sbt prompt will compile and run the example code. Typing `test` will run some tests. Typing `console` will give a Scala REPL with a properly configured classpath including all dependencies. You can type scala expressions directly into the REPL just as you would in your favourite dynamic math/stat language. Type `help` at the sbt prompt for help on sbt. Type `:help` at the Scala REPL for help on the REPL. 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | -------------------------------------------------------------------------------- /pfilter/build.sbt: -------------------------------------------------------------------------------- 1 | name := "pfilter" 2 | 3 | version := "0.1" 4 | 5 | scalacOptions ++= Seq("-unchecked", "-deprecation", "-feature") 6 | 7 | libraryDependencies ++= Seq( 8 | "org.scalacheck" %% "scalacheck" % "1.11.4" % "test", 9 | "org.scalatest" %% "scalatest" % "2.1.7" % "test", 10 | "org.scalanlp" %% "breeze" % "0.12", 11 | "org.scalanlp" %% "breeze-natives" % "0.12", 12 | "org.scalanlp" %% "breeze-viz" % "0.12" 13 | 14 | ) 15 | 16 | resolvers ++= Seq( 17 | "Sonatype Snapshots" at "https://oss.sonatype.org/content/repositories/snapshots/", 18 | "Sonatype Releases" at "https://oss.sonatype.org/content/repositories/releases/" 19 | ) 20 | 21 | scalaVersion := "2.11.7" 22 | 23 | -------------------------------------------------------------------------------- /pfilter/project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version=0.13.8 2 | -------------------------------------------------------------------------------- /pfilter/src/main/scala/pfilter/pfilter.scala: -------------------------------------------------------------------------------- 1 | /* 2 | pfilter.scala 3 | 4 | Top level code for pfilter blog post 5 | 6 | */ 7 | 8 | package pfilter 9 | 10 | object PFilter { 11 | 12 | import scala.language.higherKinds 13 | import scala.collection.parallel.immutable.ParVector 14 | import scala.collection.GenTraversable 15 | 16 | // Hardcode LogLik type 17 | type LogLik = Double 18 | // Use blank typeclasses for State, Observation, and Parameter 19 | trait State[T] 20 | trait Observation[T] 21 | trait Parameter[T] 22 | 23 | // My generic collection typeclass 24 | trait GenericColl[C[_]] { 25 | def map[A, B](ca: C[A])(f: A => B): C[B] 26 | def reduce[A](ca: C[A])(f: (A, A) => A): A 27 | def flatMap[A, B, D[B] <: GenTraversable[B]](ca: C[A])(f: A => D[B]): C[B] 28 | def zip[A, B](ca: C[A])(cb: C[B]): C[(A, B)] 29 | def length[A](ca: C[A]): Int 30 | } 31 | // Syntax for the typeclass 32 | implicit class GenericCollSyntax[A, C[A]](value: C[A]) { 33 | def map[B](f: A => B)(implicit inst: GenericColl[C]): C[B] = inst.map(value)(f) 34 | def reduce(f: (A, A) => A)(implicit inst: GenericColl[C]): A = inst.reduce(value)(f) 35 | def flatMap[B, D[B] <: GenTraversable[B]](f: A => D[B])(implicit inst: GenericColl[C]): C[B] = inst.flatMap(value)(f) 36 | def zip[B](cb: C[B])(implicit inst: GenericColl[C]): C[(A, B)] = inst.zip(value)(cb) 37 | def length(implicit inst: GenericColl[C]): Int = inst.length(value) 38 | } 39 | 40 | // Implementation for Vector 41 | implicit val vGC: GenericColl[Vector] = new GenericColl[Vector] { 42 | def map[A, B](ca: Vector[A])(f: A => B): Vector[B] = ca map f 43 | def reduce[A](ca: Vector[A])(f: (A, A) => A): A = ca reduce f 44 | def flatMap[A, B, D[B] <: GenTraversable[B]](ca: Vector[A])(f: A => D[B]): Vector[B] = ca flatMap f 45 | def zip[A, B](ca: Vector[A])(cb: Vector[B]): Vector[(A, B)] = ca zip cb 46 | def length[A](ca: Vector[A]) = ca.length 47 | } 48 | 49 | // Implementation for ParVector 50 | implicit val pvGC: GenericColl[ParVector] = new GenericColl[ParVector] { 51 | def map[A, B](ca: ParVector[A])(f: A => B): ParVector[B] = ca map f 52 | def reduce[A](ca: ParVector[A])(f: (A, A) => A): A = ca reduce f 53 | def flatMap[A, B, D[B] <: GenTraversable[B]](ca: ParVector[A])(f: A => D[B]): ParVector[B] = ca flatMap f 54 | def zip[A, B](ca: ParVector[A])(cb: ParVector[B]): ParVector[(A, B)] = ca zip cb 55 | def length[A](ca: ParVector[A]) = ca.length 56 | } 57 | 58 | // TODO: Implementation for Spark RDDs 59 | 60 | // Single step of a bootstrap particle filter 61 | def update[S: State, O: Observation, C[_]: GenericColl]( 62 | dataLik: (S, O) => LogLik, stepFun: S => S 63 | )(x: C[S], o: O): (LogLik, C[S]) = { 64 | import breeze.stats.distributions.Poisson 65 | val xp = x map (stepFun(_)) 66 | val lw = xp map (dataLik(_, o)) 67 | val max = lw reduce (math.max(_, _)) 68 | val rw = lw map (lwi => math.exp(lwi - max)) 69 | val srw = rw reduce (_ + _) 70 | val l = rw.length 71 | val z = rw zip xp 72 | val rx = z flatMap (p => Vector.fill(Poisson(p._1 * l / srw).draw)(p._2)) 73 | (max + math.log(srw / l), rx) 74 | } 75 | 76 | // Run a bootstrap particle filter over a collection of observations 77 | def pFilter[S: State, O: Observation, C[_]: GenericColl, D[O] <: GenTraversable[O]]( 78 | x0: C[S], data: D[O], dataLik: (S, O) => LogLik, stepFun: S => S 79 | ): (LogLik, C[S]) = { 80 | val updater = update[S, O, C](dataLik, stepFun) _ 81 | data.foldLeft((0.0, x0))((prev, o) => { 82 | val next = updater(prev._2, o) 83 | (prev._1 + next._1, next._2) 84 | }) 85 | } 86 | 87 | // Marginal log likelihood estimation 88 | def pfMll[S: State, P: Parameter, O: Observation, C[_]: GenericColl, D[O] <: GenTraversable[O]]( 89 | simX0: P => C[S], stepFun: P => S => S, dataLik: P => (S, O) => LogLik, data: D[O] 90 | ): (P => LogLik) = (th: P) => pFilter(simX0(th), data, dataLik(th), stepFun(th))._1 91 | 92 | // Main method 93 | def main(args: Array[String]): Unit = { 94 | println("Hi") 95 | import Examples._ 96 | arTest 97 | println("Bye") 98 | } 99 | 100 | } 101 | 102 | object Examples { 103 | 104 | import PFilter._ 105 | 106 | // Simple test for an AR(1) model 107 | def arTest: Unit = { 108 | import breeze.linalg._ 109 | import breeze.stats.distributions._ 110 | println("AR(1) test start") 111 | // simulate some data from an AR(1) model with noise 112 | val inNoise = Gaussian(0.0, 1.0).sample(99) 113 | val state = DenseVector(inNoise.scanLeft(0.0)((s, i) => 0.8 * s + i).toArray) 114 | val noise = DenseVector(Gaussian(0.0, 2.0).sample(100).toArray) 115 | val data = (state + noise).toArray.toList 116 | import breeze.plot._ 117 | val f = Figure() 118 | val p0 = f.subplot(0) 119 | val idx = linspace(1, 100, 100) 120 | p0 += plot(idx, state) 121 | p0 += plot(idx, data, '.') 122 | p0.xlabel = "Time" 123 | p0.ylabel = "Value" 124 | // now try to recover autoregression coefficient 125 | implicit val dState = new State[Double] {} 126 | implicit val dObs = new Observation[Double] {} 127 | implicit val dPar = new Parameter[Double] {} 128 | val mll = pfMll( 129 | (th: Double) => Gaussian(0.0, 10.0).sample(10000).toVector.par, 130 | (th: Double) => (s: Double) => Gaussian(th * s, 1.0).draw, 131 | (th: Double) => (s: Double, o: Double) => Gaussian(s, 2.0).logPdf(o), 132 | data 133 | ) 134 | val x = linspace(0.0, 0.99, 100) 135 | val y = x map (mll(_)) 136 | //println(y) 137 | val p1 = f.subplot(2, 1, 1) 138 | p1 += plot(x, y) 139 | p1.xlabel = "theta" 140 | p1.ylabel = "mll" 141 | f.saveas("plot.png") 142 | println("AR(1) test finish") 143 | } 144 | 145 | } 146 | 147 | // eof 148 | 149 | -------------------------------------------------------------------------------- /pfilter/src/test/scala/pfilter-test.scala: -------------------------------------------------------------------------------- 1 | /* 2 | pfilter-test.scala 3 | 4 | Test code for pfilter 5 | 6 | */ 7 | 8 | package pfilter 9 | 10 | import org.scalatest._ 11 | import org.scalatest.junit._ 12 | import org.junit.runner.RunWith 13 | 14 | import scala.language.higherKinds 15 | import PFilter._ 16 | 17 | @RunWith(classOf[JUnitRunner]) 18 | class MyTestSuite extends FunSuite { 19 | 20 | test("1+2=3") { 21 | assert(1 + 2 === 3) 22 | } 23 | 24 | // test generic functions to check that the typeclass works as intended 25 | def doubleIt[C[_]: GenericColl](ca: C[Int]): C[Int] = ca map (_ * 2) 26 | def addThem[C[_]: GenericColl](ca: C[Int]): Int = ca reduce (_ + _) 27 | def repeatThem[C[_]: GenericColl](ca: C[Int]): C[Int] = ca flatMap (x => List(x, x, x)) 28 | def zipThem[C[_]: GenericColl](ci: C[Int], cd: C[Double]): C[(Int, Double)] = ci zip cd 29 | def getLength[C[_]: GenericColl](ci: C[Int]): Int = ci.length 30 | 31 | test("Vector in generic function including map") { 32 | val v = Vector(5, 10, 15, 20) 33 | val v2 = v map (_ * 2) 34 | val v3 = doubleIt(v) 35 | assert(v2 === v3) 36 | } 37 | 38 | test("Vector in generic function including flatMap") { 39 | val v = Vector(5, 10, 15) 40 | val v2 = v flatMap (x => Array(x, x, x)) 41 | //println(v2) 42 | val v3 = repeatThem(v) 43 | assert(v2 === v3) 44 | } 45 | 46 | test("Vector in generic function including reduce") { 47 | val v = Vector(5, 10, 15) 48 | val s = addThem(v) 49 | assert(s === 30) 50 | } 51 | 52 | test("Vector in generic zipping function") { 53 | val v1 = Vector(1, 2, 3) 54 | val v2 = Vector(2.0, 4.0, 6.0) 55 | val v3 = v1 zip v2 56 | val v4 = zipThem(v1, v2) 57 | assert(v4 === v3) 58 | } 59 | 60 | test("Vector in generic length function") { 61 | val v1 = Vector(1, 2, 3, 4) 62 | val l = getLength(v1) 63 | assert(l === 4) 64 | } 65 | 66 | test("ParVector in generic function including map") { 67 | val v = Vector(5, 10, 15, 30).par 68 | val v2 = v map (_ * 2) 69 | //println(v2) 70 | val v3 = doubleIt(v) 71 | assert(v2 === v3) 72 | } 73 | 74 | test("ParVector in generic function including flatMap") { 75 | val v = Vector(5, 10, 15, 10).par 76 | val v2 = v flatMap (x => Vector(x, x, x)) 77 | //println(v2) 78 | val v3 = repeatThem(v) 79 | assert(v2 === v3) 80 | } 81 | 82 | test("ParVector in generic function including reduce") { 83 | val v = Vector(5, 10, 15).par 84 | val s = addThem(v) 85 | assert(s === 30) 86 | } 87 | 88 | test("ParVector in generic zipping function") { 89 | val v1 = Vector(1, 2, 3).par 90 | val v2 = Vector(2.0, 4.0, 6.0).par 91 | val v3 = v1 zip v2 92 | //println(v3) 93 | val v4 = zipThem(v1, v2) 94 | assert(v4 === v3) 95 | } 96 | 97 | test("ParVector in generic length function") { 98 | val v1 = Vector(1, 2, 3, 4).par 99 | val l = getLength(v1) 100 | assert(l === 4) 101 | } 102 | 103 | test("Vector update test") { 104 | import breeze.stats.distributions.Gaussian 105 | implicit val dState = new State[Double] {} 106 | implicit val dObs = new Observation[Double] {} 107 | val p1 = Gaussian(0.0, 10.0).sample(100000).toVector 108 | val p2 = update((s: Double, o: Double) => Gaussian(s, 2.0).logPdf(o), (s: Double) => Gaussian(s, 1.0).draw)(p1, 5.0) 109 | assert(p2._2.length > 90000) 110 | } 111 | 112 | test("ParVector update test") { 113 | import breeze.stats.distributions.Gaussian 114 | implicit val dState = new State[Double] {} 115 | implicit val dObs = new Observation[Double] {} 116 | val p1 = Gaussian(0.0, 10.0).sample(100000).toVector.par 117 | val p2 = update((s: Double, o: Double) => Gaussian(s, 2.0).logPdf(o), (s: Double) => Gaussian(s, 1.0).draw)(p1, 5.0) 118 | assert(p2._2.length > 90000) 119 | } 120 | 121 | test("Vector pFilter test") { 122 | import breeze.stats.distributions.Gaussian 123 | implicit val dState = new State[Double] {} 124 | implicit val dObs = new Observation[Double] {} 125 | val p1 = Gaussian(0.0, 10.0).sample(100000).toVector 126 | val pn = pFilter(p1, List(2.0, 2.0, 3.0, 4.0), (s: Double, o: Double) => Gaussian(s, 2.0).logPdf(o), (s: Double) => Gaussian(s, 1.0).draw) 127 | assert(pn._2.length > 90000) 128 | } 129 | 130 | test("ParVector pFilter test") { 131 | import breeze.stats.distributions.Gaussian 132 | implicit val dState = new State[Double] {} 133 | implicit val dObs = new Observation[Double] {} 134 | val p1 = Gaussian(0.0, 10.0).sample(100000).toVector.par 135 | val pn = pFilter(p1, List(2.0, 2.0, 3.0, 4.0), (s: Double, o: Double) => Gaussian(s, 2.0).logPdf(o), (s: Double) => Gaussian(s, 1.0).draw) 136 | assert(pn._2.length > 90000) 137 | } 138 | 139 | test("Vector pfMll test") { 140 | import breeze.stats.distributions.Gaussian 141 | implicit val dState = new State[Double] {} 142 | implicit val dObs = new Observation[Double] {} 143 | implicit val dPar = new Parameter[Double] {} 144 | val mll = pfMll( 145 | (th: Double) => Gaussian(0.0, 10.0).sample(100000).toVector, 146 | (th: Double) => (s: Double) => Gaussian(s, 1.0).draw, 147 | (th: Double) => (s: Double, o: Double) => Gaussian(s, 2.0).logPdf(o), 148 | List(2.0, 2.0, 3.0, 4.0) 149 | ) 150 | val ll1 = mll(1.0) 151 | val ll2 = mll(2.0) 152 | assert(math.abs(ll1 - ll2) < 0.1) 153 | } 154 | 155 | test("ParVector pfMll test") { 156 | import breeze.stats.distributions.Gaussian 157 | implicit val dState = new State[Double] {} 158 | implicit val dObs = new Observation[Double] {} 159 | implicit val dPar = new Parameter[Double] {} 160 | val mll = pfMll( 161 | (th: Double) => Gaussian(0.0, 10.0).sample(100000).toVector.par, 162 | (th: Double) => (s: Double) => Gaussian(s, 1.0).draw, 163 | (th: Double) => (s: Double, o: Double) => Gaussian(s, 2.0).logPdf(o), 164 | List(2.0, 2.0, 3.0, 4.0) 165 | ) 166 | val ll1 = mll(1.0) 167 | val ll2 = mll(2.0) 168 | assert(math.abs(ll1 - ll2) < 0.1) 169 | } 170 | 171 | } 172 | 173 | // eof 174 | -------------------------------------------------------------------------------- /pi-cam/DraftPost.md: -------------------------------------------------------------------------------- 1 | # Raspberry Pi Rabbit Hutch-cam 2 | 3 | **This is the draft of a post [now published here](https://darrenjw2.wordpress.com/2016/12/21/raspberry-pi-rabbit-hutch-cam/) - this is not the definitive version.** 4 | 5 | *This post describes how to setup a Rasberry Pi with a Pi camera board and a (wireless) internet connection as a webcam serving a latest image (updated every 15 seconds) and a short timelapse containing the most recent hour of images in a 20 second movie (updated once per hour). The website has basic password protection. I've set it up to monitor a rabbit hutch, but obviously there are other potential applications. There isn't anything very novel here - this post serves mainly to document my setup in case I ever need to set it up again, which seems likely, as we've just ordered a Pi Noir camera for additional night-time monitoring...* 6 | 7 | ## Introduction 8 | 9 | My kids got a rabbit this summer. It lives in a hutch in the back garden, placed so that we can see into the hutch easily from the kitchen window. So, the kids can easily check on the rabbit when in the kitchen, but not when they are in the lounge watching TV, and certainly not when they are out-and-about. So my Xmas Pi project was to set up Joey-cam (the rabbit is called "Joey", because kid-reasons), so that the kids can check on the rabbit from their smartphones wherever they are.... 10 | 11 | However, the hutch has closed (opaque) compartments, so in addition to a live image, the kids also wanted to be able to look at a timelapse of recent images, to be able to quickly and easily check for any sign of movement in the last hour. 12 | 13 | The final requirement was that Joey-cam should be accessible from anywhere over the internet, but to have some very basic password protection, so that it wouldn't be completely public. 14 | 15 | ## Pre-requisites 16 | 17 | I'm assuming a Pi (doesn't really matter which), with a Pi camera board attached, a clean Raspbian install, and some kind of internet connection. Mine has wifi (via a small USB wifi dongle), which is very convenient given its location. It also needs to have an SSH server enabled, so that you can log into it from another machine. I'm assuming that the reader understands how to do all this already. This post is about the camera and web server setup. 18 | 19 | ## Set up 20 | 21 | First log in to the Pi from another machine (eg. `ssh pi@ip-address`, replacing *ip-address* appropriately) and then download this repo with: 22 | ```bash 23 | cd 24 | wget https://github.com/darrenjw/blog/archive/master.zip 25 | unzip master.zip 26 | cd blog-master/pi-cam/ 27 | ``` 28 | Run the camera setup with: 29 | ```bash 30 | ./camera-setup.sh 31 | ``` 32 | The script finishes by running raspi-config. If you already have the camera enabled, just exit. If not, enable it, but don't yet reboot - you will need to reboot soon anyway. But if you haven't set a sensible hostname yet, it's probably worth doing that, too. 33 | 34 | The camera scripts can be enabled by running: 35 | ```bash 36 | crontab -e 37 | ``` 38 | and add the following lines to the end of the file: 39 | ``` 40 | @reboot /home/pi/blog-master/pi-cam/camera-script.sh 2>&1 41 | 20 * * * * /home/pi/blog-master/pi-cam/hourly.sh 2>&1 42 | ``` 43 | Save and exit and check with `crontab -l`. 44 | 45 | Next set up the web-site by running: 46 | ```bash 47 | ./web-setup.sh 48 | ``` 49 | 50 | Reboot with `sudo reboot` and log back in again after 30 seconds. Check that photos start to appear every 15 seconds or so: 51 | ```bash 52 | ls -l ~/timelapse/ 53 | ``` 54 | Assuming so, try pointing a web-browser at your Pi (`http://ip-address/` - replace *ip-address* appropriately). You should get a basic page containing the latest image, and the page should update every 20 seconds. There will also be a link to the "latest movie", but it won't work straight away - wait a couple of hours before trying that. 55 | 56 | If you aren't going to open up your Pi to the world, then you should be done. 57 | 58 | ## Adding password protection 59 | 60 | If you are intending to open up your cam to the internet, it's probably worth adding some basic password protection. The very basic protection I describe here is probably OK for something as mundane as a rabbit hutch, but if you are monitoring anything more sensitive, then you should google how to lock down your site properly. 61 | 62 | You can find more detailed instructions [here](https://www.cyberciti.biz/tips/lighttpd-setup-a-password-protected-directory-directories.html), but the tl;dr is to 63 | ```bash 64 | sudo nano /etc/lighttpd/lighttpd.conf 65 | ``` 66 | and paste the following at the end of the file: 67 | ``` 68 | 69 | server.modules += ( "mod_auth" ) 70 | 71 | auth.debug = 2 72 | auth.backend = "plain" 73 | auth.backend.plain.userfile = "/home/pi/lighttpdpwd" 74 | 75 | auth.require = ( "/" => 76 | ( 77 | "method" => "basic", 78 | "realm" => "Hutch-cam", 79 | "require" => "user=joey" 80 | ) 81 | ) 82 | ``` 83 | Replace the required username (here, "joey"), with something appropriate, and create the file `/home/pi/lighttpdpwd` containing a single line in the form `username:password`. Then restart the server with 84 | ```bash 85 | /etc/init.d/lighttpd restart 86 | ``` 87 | Test it out and find diagnostic information in `/var/log/lighttpd/`. 88 | 89 | 90 | ## Opening up 91 | 92 | You need to open up ports on your router to allow access to the Pi from outside your home network. This is router-specific, so you may want to just google for instructions for your router. Note that I have another public-facing web server on my home network, so I map port 81 on my router to port 80 on the Pi to have a public facing webcam on port 81 of my home network address (`http://my.home.address:81`, where *my.home.address* is replaced appropriately). 93 | 94 | ## Customisation 95 | 96 | You will almost certainly want to customise the very basic web page in `/var/www/index.html` appropriately for your application. You can also tweak various things in `camera-script.sh` and `hourly.sh`, particularly settings such as resolutions, qualities, frame-rates, etc. 97 | 98 | 99 | -------------------------------------------------------------------------------- /pi-cam/README.md: -------------------------------------------------------------------------------- 1 | # Raspberry Pi Rabbit Hutch-cam 2 | 3 | This is the repo associated with blog post: 4 | 5 | https://darrenjw2.wordpress.com/2016/12/21/raspberry-pi-rabbit-hutch-cam/ 6 | 7 | It sets up a Raspberry pi with wireless and a pi camera as a webcam for monitoring a rabbit hutch. 8 | 9 | 10 | -------------------------------------------------------------------------------- /pi-cam/camera-script.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # camera-script.sh 3 | 4 | while [ "true" != "false" ] 5 | do 6 | DATE=$(date +"%Y-%m-%d_%H:%M:%S") 7 | raspistill -o now.jpg # default settings 8 | # raspistill -ex night -ss 3500000 -o now.jpg # night settings 9 | avconv -i now.jpg -vf scale=640:-1 now-small.jpg 10 | mv now-small.jpg now.jpg 11 | cp now.jpg ~/timelapse/tl-$DATE.jpg # time-stamped photo 12 | cp now.jpg /var/www/html/latest.jpg # copy latest image for serving via web 13 | # raspistill takes around 8 seconds 14 | # add any extra delay (in seconds) below: 15 | sleep 7 16 | done 17 | 18 | exit 0 19 | 20 | #eof 21 | 22 | -------------------------------------------------------------------------------- /pi-cam/camera-setup.sh: -------------------------------------------------------------------------------- 1 | # camera-setup.sh 2 | # script to run once to set up camera 3 | 4 | sudo apt-get -y update 5 | sudo apt-get -y upgrade 6 | sudo apt-get -y install libav-tools 7 | 8 | mkdir ~/timelapse 9 | 10 | sudo raspi-config 11 | 12 | # eof 13 | -------------------------------------------------------------------------------- /pi-cam/hourly.sh: -------------------------------------------------------------------------------- 1 | # hourly.sh 2 | # hourly cron 3 | # NB. Can take over half an hour to run, so shouldn't run more than once per hour 4 | 5 | cd ~/timelapse 6 | 7 | # Delete images older than an hour (don't actually need) 8 | # find . -name tl-\*.jpg -type f -mmin +60 -delete 9 | 10 | # Make stills into a movie: 11 | ls *.jpg | awk 'BEGIN{ a=0 }{ printf "mv %s tlsn-%04d.jpg\n", $0, a++ }' | bash 12 | avconv -y -r 10 -i tlsn-%4d.jpg -r 10 -vcodec libx264 -q:v 3 -vf scale=640:480 timelapse.mp4; 13 | rm -f tlsn-*.jpg 14 | 15 | # move to web for serving: 16 | mv timelapse.mp4 /var/www/html/ 17 | 18 | # eof 19 | 20 | 21 | -------------------------------------------------------------------------------- /pi-cam/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | Hutch-cam 4 | 5 | 6 | 7 |

Hutch-cam

8 |

Welcome to Hutch-cam!

9 | 10 |

11 | 12 |

13 | 14 |

15 | Latest movie 16 |

17 | 18 | 19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /pi-cam/web-setup.sh: -------------------------------------------------------------------------------- 1 | # web-setup.sh 2 | # run-once web setup script 3 | 4 | sudo apt-get -y update 5 | sudo apt-get -y install lighttpd 6 | 7 | sudo chown www-data:www-data /var/www/html 8 | sudo chmod 775 /var/www/html 9 | sudo usermod -a -G www-data pi 10 | 11 | sudo cp index.html /var/www/html/ 12 | sudo chown www-data:www-data /var/www/html/index.html 13 | sudo chmod g+w /var/www/html/index.html 14 | 15 | # eof 16 | 17 | 18 | -------------------------------------------------------------------------------- /pi-cluster/README.md: -------------------------------------------------------------------------------- 1 | # Raspberry Pi 2 cluster with NAT routing head node 2 | 3 | Scripts and config files associated with my blog post "Raspberry Pi 2 cluster with NAT routing": 4 | 5 | https://darrenjw2.wordpress.com/2015/09/07/raspberry-pi-2-cluster-with-nat-routing/ 6 | 7 | The brief summary is as follows: 8 | 9 | Create a cluster by connecting a bunch of Pis to a switch via the Pis ethernet port. Pick one of the Pis to be a head node and NAT router. Connect a USB ethernet dongle to this Pi, and use the dongle port as the internet uplink. 10 | 11 | Stick Raspbian on each node, with SSH server enabled. 12 | 13 | Boot up the head node. 14 | 15 | ```bash 16 | wget https://github.com/darrenjw/blog/archive/master.zip 17 | unzip master.zip 18 | cd blog-master/pi-cluster 19 | sudo sh install-packages 20 | ``` 21 | 22 | will reboot when done. On reboot, re-enter same directory, and then do: 23 | 24 | ```sudo sh setup-network``` 25 | 26 | when done, will reboot. 27 | 28 | On reboot, re-enter same directory. Boot up the other nodes and then run 29 | 30 | ```sh setup-cluster``` 31 | 32 | on the head node. 33 | 34 | 35 | -------------------------------------------------------------------------------- /pi-cluster/copy-keys: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | awk '{print "ssh-copy-id " $1}' < workers.txt > /tmp/copy-keys 4 | sh /tmp/copy-keys 5 | 6 | # eof 7 | 8 | -------------------------------------------------------------------------------- /pi-cluster/dhcpd.conf: -------------------------------------------------------------------------------- 1 | # /etc/dhcp/dhcpd.conf 2 | 3 | authoritative; 4 | 5 | subnet 192.168.0.0 netmask 255.255.255.0 { 6 | range 192.168.0.10 192.168.0.250; 7 | option broadcast-address 192.168.0.255; 8 | option routers 192.168.0.1; 9 | default-lease-time 600; 10 | max-lease-time 7200; 11 | option domain-name "local"; 12 | option domain-name-servers 8.8.8.8, 8.8.4.4; 13 | } 14 | 15 | 16 | # eof 17 | 18 | 19 | -------------------------------------------------------------------------------- /pi-cluster/install-packages: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | apt-get update && sudo apt-get -y upgrade 4 | apt-get -y install nmap isc-dhcp-server pssh 5 | reboot 6 | 7 | # eof 8 | 9 | -------------------------------------------------------------------------------- /pi-cluster/interfaces: -------------------------------------------------------------------------------- 1 | # /etc/network/interfaces 2 | 3 | auto lo 4 | iface lo inet loopback 5 | 6 | # dongle uplink 7 | auto eth1 8 | iface eth1 inet dhcp 9 | 10 | # internal gateway 11 | auto eth0 12 | iface eth0 inet static 13 | address 192.168.0.1 14 | netmask 255.255.255.0 15 | network 192.168.0.0 16 | broadcast 192.168.0.255 17 | 18 | # eof 19 | 20 | -------------------------------------------------------------------------------- /pi-cluster/iptables: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | iptables-restore < /etc/iptables.up.rules 4 | 5 | exit 0 6 | 7 | # eof 8 | 9 | -------------------------------------------------------------------------------- /pi-cluster/map-network: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | nmap -sn 192.168.0.0/24 -oG - | grep -v "^#" | cut -d " " -f 2 > all-hosts.txt 4 | grep -v "^192.168.0.1$" < all-hosts.txt > workers.txt 5 | cp workers.txt ~/ 6 | 7 | # eof 8 | 9 | -------------------------------------------------------------------------------- /pi-cluster/setup-cluster: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | ssh-keygen 4 | 5 | sh map-network 6 | sh copy-keys 7 | sh upgrade-workers 8 | 9 | # eof 10 | 11 | 12 | -------------------------------------------------------------------------------- /pi-cluster/setup-network: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # basic interfaces 4 | cp interfaces /etc/network/ 5 | /etc/init.d/networking restart 6 | 7 | # DHCP server 8 | cp /etc/dhcp/dhcpd.conf /etc/dhcp/dhcpd.conf.old 9 | cp dhcpd.conf /etc/dhcp/ 10 | echo 'INTERFACES="eth0"' >> /etc/default/isc-dhcp-server 11 | /etc/init.d/isc-dhcp-server restart 12 | 13 | # NAT routing 14 | echo 1 > /proc/sys/net/ipv4/ip_forward 15 | echo "net.ipv4.ip_forward=1" >> /etc/sysctl.conf 16 | iptables -t nat -A POSTROUTING -o eth1 -j MASQUERADE 17 | # TODO: add more rules here for extra security... 18 | 19 | ifdown eth1 && ifup eth1 20 | iptables-save > /etc/iptables.up.rules 21 | 22 | cp iptables /etc/network/if-pre-up.d/ 23 | chown root:root /etc/network/if-pre-up.d/iptables 24 | chmod 755 /etc/network/if-pre-up.d/iptables 25 | 26 | reboot 27 | 28 | # eof 29 | 30 | -------------------------------------------------------------------------------- /pi-cluster/shutdown-workers: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | parallel-ssh -h workers.txt -t 0 -p 100 -P sudo shutdown -h now 4 | 5 | # eof 6 | 7 | 8 | -------------------------------------------------------------------------------- /pi-cluster/upgrade-workers: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | parallel-ssh -h workers.txt -t 0 -p 100 -P sudo apt-get update 4 | parallel-ssh -h workers.txt -t 0 -p 100 -P sudo apt-get -y upgrade 5 | parallel-ssh -h workers.txt -t 0 -p 100 -P sudo apt-get clean 6 | parallel-ssh -h workers.txt -t 0 -p 100 -P sudo reboot 7 | 8 | # eof 9 | 10 | 11 | -------------------------------------------------------------------------------- /qblog/.gitignore: -------------------------------------------------------------------------------- 1 | /.quarto/ 2 | -------------------------------------------------------------------------------- /qblog/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile 2 | 3 | CURRENT=draft 4 | 5 | FORCE: 6 | make render 7 | 8 | preview: 9 | quarto preview 10 | 11 | render: 12 | quarto render 13 | 14 | view: 15 | make render 16 | xdg-open _site/index.html 17 | 18 | publish: 19 | make render 20 | cp -r _site/* ../docs/ 21 | git add ../docs/* 22 | git commit -a && git push 23 | 24 | edit: 25 | emacs Makefile *.yml *.qmd posts/$(CURRENT)/*.qmd & 26 | 27 | 28 | # eof 29 | 30 | -------------------------------------------------------------------------------- /qblog/_quarto.yml: -------------------------------------------------------------------------------- 1 | project: 2 | type: website 3 | 4 | website: 5 | title: "DJW's blog" 6 | site-url: https://darrenjw.github.io/blog/ 7 | description: "Darren Wilkinson's blog" 8 | navbar: 9 | right: 10 | - about.qmd 11 | - icon: rss 12 | href: index.xml 13 | - icon: github 14 | href: https://github.com/darrenjw/blog/ 15 | - icon: twitter 16 | href: https://twitter.com/darrenjw 17 | format: 18 | html: 19 | theme: united 20 | css: styles.css 21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /qblog/about.qmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "About" 3 | image: profile.jpg 4 | about: 5 | template: jolla 6 | links: 7 | - icon: twitter 8 | text: Twitter 9 | href: https://twitter.com/darrenjw 10 | - icon: linkedin 11 | text: LinkedIn 12 | href: https://linkedin.com/in/darrenjwilkinson/ 13 | - icon: github 14 | text: Github 15 | href: https://github.com/darrenjw 16 | 17 | --- 18 | 19 | This is the blog of [Darren Wilkinson](https://darrenjw.github.io/). 20 | 21 | This blog is intended to cover my reflections on mathematics, statistics, machine learning, AI, computing and biology, and especially their interactions, and relationship with “big data” and data science. This blog replaces my two wordpress blogs: my [main blog](https://darrenjw.wordpress.com/) and my [personal blog](https://darrenjw2.wordpress.com). [Wordpress](https://wordpress.com/) served me well for more than a decade, but these days I dislike having to write anything other than [Quarto](http://quarto.org/), so I've taken the plunge and moved to a Quarto based blog. 22 | 23 | -------------------------------------------------------------------------------- /qblog/index.qmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Darren Wilkinson's blog" 3 | listing: 4 | contents: posts 5 | sort: "date desc" 6 | type: default 7 | categories: true 8 | sort-ui: false 9 | filter-ui: false 10 | feed: true 11 | page-layout: full 12 | title-block-banner: true 13 | --- 14 | 15 | 16 | -------------------------------------------------------------------------------- /qblog/posts/_metadata.yml: -------------------------------------------------------------------------------- 1 | # options specified here will apply to all posts in this folder 2 | 3 | # freeze computational output 4 | # (see https://quarto.org/docs/projects/code-execution.html#freeze) 5 | freeze: true 6 | 7 | # Enable banner style title blocks 8 | title-block-banner: true 9 | -------------------------------------------------------------------------------- /qblog/posts/draft/index.qmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Draft post" 3 | author: "Darren Wilkinson" 4 | date: "2024-03-05" 5 | categories: [stats] 6 | draft: true 7 | --- 8 | 9 | # Draft post 10 | 11 | This shouldn't appear in the published site. Blah. A draft post. 12 | 13 | -------------------------------------------------------------------------------- /qblog/posts/py-test/index.qmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Python test" 3 | author: "Darren Wilkinson" 4 | date: "2024-03-04" 5 | categories: [python, code] 6 | --- 7 | 8 | # Python test 9 | 10 | A test post including some python code 11 | ```{python} 12 | x = [1,2,3] 13 | print(x[1]) 14 | 15 | import matplotlib.pyplot as plt 16 | fig, axis = plt.subplots() 17 | axis.plot([0,1,2,3,4,5], [3,4,6,5,2,4]) 18 | ``` 19 | Blah. 20 | -------------------------------------------------------------------------------- /qblog/posts/r-test/index.qmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "R test" 3 | author: "Darren Wilkinson" 4 | date: "2024-03-04" 5 | categories: [R, code] 6 | --- 7 | 8 | # R test 9 | 10 | A test post including some R code. 11 | ```{r} 12 | hist(rnorm(1000)) 13 | ``` 14 | Blah. 15 | 16 | -------------------------------------------------------------------------------- /qblog/profile.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darrenjw/blog/be123278cc20048ab8458b895ccb5ccca798ea59/qblog/profile.jpg -------------------------------------------------------------------------------- /qblog/styles.css: -------------------------------------------------------------------------------- 1 | /* css styles */ 2 | -------------------------------------------------------------------------------- /rainier/.gitignore: -------------------------------------------------------------------------------- 1 | # .gitignore for scala projects 2 | 3 | # Classes and logs 4 | *.class 5 | *.log 6 | *~ 7 | 8 | # SBT-specific 9 | .cache 10 | .history 11 | .classpath 12 | .project 13 | .settings 14 | 15 | .lib/ 16 | dist/* 17 | target/ 18 | lib_managed/ 19 | src_managed/ 20 | project/boot/ 21 | project/plugins/project/ 22 | 23 | # Ensime specific 24 | .ensime 25 | 26 | # Scala-IDE specific 27 | .scala_dependencies 28 | .worksheet 29 | 30 | 31 | -------------------------------------------------------------------------------- /rainier/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile 2 | 3 | 4 | FORCE: 5 | make doc 6 | 7 | doc: docs/DraftPost.md docs/Tutorial.md 8 | sbt mdoc 9 | cd target/mdoc ; pandoc DraftPost.md -o DraftPost.html 10 | cd target/mdoc ; pandoc Tutorial.md -o Tutorial.html 11 | 12 | edit: 13 | emacs *.md Makefile build.sbt src/test/scala/*.scala docs/*.md src/main/scala/*.scala & 14 | 15 | commit: 16 | git commit -a && git push 17 | 18 | update: 19 | git pull 20 | git log|less 21 | 22 | 23 | # eof 24 | 25 | 26 | -------------------------------------------------------------------------------- /rainier/Readme.md: -------------------------------------------------------------------------------- 1 | # Rainier 0.3.0 2 | 3 | Materials supporting a blog post about Rainier 0.3.0, which is a major update of the functional probabilistic programming language, [Rainier](https://rainier.fit/). 4 | 5 | A draft post, using **mdoc**, will gradually appear in [docs](docs/). 6 | 7 | 8 | 9 | #### eof 10 | 11 | 12 | -------------------------------------------------------------------------------- /rainier/build.sbt: -------------------------------------------------------------------------------- 1 | // build.sbt 2 | 3 | name := "rainier" 4 | 5 | version := "0.1-SNAPSHOT" 6 | 7 | scalacOptions ++= Seq( 8 | "-unchecked", "-deprecation", "-feature", "-language:higherKinds", 9 | "-language:implicitConversions", "-Ypartial-unification" 10 | ) 11 | 12 | addCompilerPlugin("org.typelevel" %% "kind-projector" % "0.11.0" cross CrossVersion.full) 13 | addCompilerPlugin("org.scalamacros" %% "paradise" % "2.1.1" cross CrossVersion.full) 14 | 15 | enablePlugins(MdocPlugin) 16 | 17 | libraryDependencies ++= Seq( 18 | "org.scalatest" %% "scalatest" % "3.0.8" % "test", 19 | "org.scalactic" %% "scalactic" % "3.0.8" % "test", 20 | "org.typelevel" %% "cats-core" % "2.0.0", 21 | "org.typelevel" %% "discipline-core" % "1.0.0", 22 | "org.typelevel" %% "discipline-scalatest" % "1.0.0", 23 | "org.typelevel" %% "simulacrum" % "1.0.0", 24 | "com.cibo" %% "evilplot" % "0.6.3", // 0.7.0 25 | "com.cibo" %% "evilplot-repl" % "0.6.3", // 0.7.0 26 | // "com.stripe" %% "rainier-core" % "0.3.0", 27 | // "com.stripe" %% "rainier-notebook" % "0.3.0" 28 | "com.stripe" %% "rainier-core" % "0.3.2+2-8a01736f", 29 | "com.stripe" %% "rainier-notebook" % "0.3.2+2-8a01736f" 30 | ) 31 | 32 | 33 | resolvers += Resolver.bintrayRepo("cibotech", "public") // for EvilPlot 34 | 35 | resolvers ++= Seq( 36 | "Sonatype Snapshots" at 37 | "https://oss.sonatype.org/content/repositories/snapshots/", 38 | "Sonatype Releases" at 39 | "https://oss.sonatype.org/content/repositories/releases/", 40 | "jitpack" at "https://jitpack.io" // for Jupiter/notebook 41 | ) 42 | 43 | scalaVersion := "2.12.10" 44 | 45 | 46 | // eof 47 | 48 | -------------------------------------------------------------------------------- /rainier/docs/DraftPost.md: -------------------------------------------------------------------------------- 1 | # Probabilistic programming with Rainier 0.3.0 2 | 3 | 4 | ## Setup 5 | 6 | 7 | **Start with setting up an SBT console from scratch? Bit messy due to non-standard resolvers...** 8 | 9 | 10 | ```scala mdoc 11 | import com.stripe.rainier.core._ 12 | import com.stripe.rainier.compute._ 13 | import com.stripe.rainier.notebook._ 14 | import com.stripe.rainier.sampler._ 15 | 16 | implicit val rng = ScalaRNG(3) 17 | val sampler = EHMC(warmupIterations = 5000, iterations = 5000) 18 | ``` 19 | 20 | # Normal random sample 21 | 22 | Let's start by looking at inferring the mean and standard deviation of a normal random sample. 23 | 24 | 25 | ```scala mdoc 26 | // first simulate some data 27 | val n = 1000 28 | val mu = 3.0 29 | val sig = 5.0 30 | val x = Vector.fill(n)(mu + sig*rng.standardNormal) 31 | // now build Rainier model 32 | val m = Normal(0,100).latent 33 | val s = Gamma(1,10).latent 34 | val nrs = Model.observe(x, Normal(m,s)) 35 | // now sample from the model 36 | val out = nrs.sample(sampler) 37 | ``` 38 | 39 | ```scala mdoc:image:nrs-mu.png 40 | val mut = out.predict(m) 41 | show("mu", density(mut)) 42 | ``` 43 | 44 | ```scala mdoc:image:nrs-sig.png 45 | val sigt = out.predict(s) 46 | show("sig", density(sigt)) 47 | ``` 48 | 49 | # Logistic regression 50 | 51 | Now let's fit a basic logistic regression model. 52 | 53 | ```scala mdoc:silent:reset 54 | import com.stripe.rainier.core._ 55 | import com.stripe.rainier.compute._ 56 | import com.stripe.rainier.notebook._ 57 | import com.stripe.rainier.sampler._ 58 | 59 | implicit val rng = ScalaRNG(3) 60 | val sampler = EHMC(warmupIterations = 5000, iterations = 5000) 61 | ``` 62 | 63 | ```scala mdoc 64 | val N = 1000 65 | val beta0 = 0.1 66 | val beta1 = 0.3 67 | val x = (1 to N) map { _ => 2.0 * rng.standardNormal } 68 | val theta = x map { xi => beta0 + beta1 * xi } 69 | def expit(x: Double): Double = 1.0 / (1.0 + math.exp(-x)) 70 | val p = theta map expit 71 | val yb = p map (pi => (rng.standardUniform < pi)) 72 | val y = yb map (b => if (b) 1L else 0L) 73 | println("Proportion of successes: " + (y.filter(_ > 0L).length.toDouble/N)) 74 | // now build Rainier model 75 | val b0 = Normal(0, 2).latent 76 | val b1 = Normal(0, 2).latent 77 | val model = Model.observe(y, Vec.from(x).map(xi => { 78 | val theta = b0 + b1*xi 79 | val p = 1.0 / (1.0 + (-theta).exp) 80 | Bernoulli(p) 81 | })) 82 | // now sample from the model 83 | val bt = model.sample(sampler) 84 | ``` 85 | 86 | ```scala mdoc:image:lr-b0.png 87 | val b0t = bt.predict(b0) 88 | show("b0", density(b0t)) 89 | ``` 90 | 91 | ```scala mdoc:image:lr-b1.png 92 | val b1t = bt.predict(b1) 93 | show("b1", density(b1t)) 94 | ``` 95 | 96 | 97 | # ANOVA model 98 | 99 | Let's now turn attention to a very basic normal random effects model. 100 | 101 | ```scala mdoc:silent:reset 102 | import com.stripe.rainier.core._ 103 | import com.stripe.rainier.compute._ 104 | import com.stripe.rainier.notebook._ 105 | import com.stripe.rainier.sampler._ 106 | 107 | implicit val rng = ScalaRNG(3) 108 | val sampler = EHMC(warmupIterations = 5000, iterations = 5000) 109 | ``` 110 | 111 | ```scala mdoc 112 | // simulate synthetic data 113 | //val n = 50 // groups 114 | //val N = 150 // obs per group 115 | val n = 15 // groups 116 | val N = 50 // obs per group 117 | val mu = 5.0 // overall mean 118 | val sigE = 2.0 // random effect SD 119 | val sigD = 3.0 // obs SD 120 | val effects = Vector.fill(n)(sigE * rng.standardNormal) 121 | val data = effects map (e => 122 | Vector.fill(N)(mu + e + sigD * rng.standardNormal)) 123 | // build model 124 | val m = Normal(0, 100).latent 125 | val sD = LogNormal(0, 10).latent 126 | val sE = LogNormal(1, 5).latent 127 | val eff = Vector.fill(n)(Normal(m, sE).latent) 128 | val models = (0 until n).map(i => 129 | Model.observe(data(i), Normal(eff(i), sD))) 130 | val anova = models.reduce{(m1, m2) => m1.merge(m2)} 131 | // now sample the model 132 | val trace = anova.sample(sampler) 133 | val mt = trace.predict(m) 134 | ``` 135 | 136 | ```scala mdoc:image:anova-mu.png 137 | show("mu", density(mt)) 138 | ``` 139 | 140 | ```scala mdoc:image:anova-sd.png 141 | val sDt = trace.predict(sD) 142 | show("sigD", density(sDt)) 143 | ``` 144 | 145 | ```scala mdoc:image:anova-se.png 146 | val sEt = trace.predict(sE) 147 | show("sigE", density(sEt)) 148 | ``` 149 | 150 | 151 | 152 | #### eof 153 | 154 | -------------------------------------------------------------------------------- /rainier/docs/Tutorial.md: -------------------------------------------------------------------------------- 1 | # Tutorial for Rainier 0.3 2 | 3 | 4 | ```scala mdoc 5 | import com.stripe.rainier.core._ 6 | import com.stripe.rainier.compute._ 7 | 8 | val a = Uniform(0,1).latent 9 | val b = a + 1 10 | 11 | val c = Normal(b, a).latent 12 | Model.sample((a,c)).take(10) 13 | ``` 14 | 15 | ```scala mdoc:image:scatter.png 16 | import com.stripe.rainier.notebook._ 17 | val ac = Model.sample((a,c)) 18 | show("a", "c", scatter(ac)) 19 | ``` 20 | 21 | ```scala mdoc 22 | val eggs = List[Long](45, 52, 45, 47, 41, 42, 44, 42, 46, 38, 36, 35, 41, 48, 42, 29, 45, 43, 45, 40, 42, 53, 31, 48, 40, 45, 39, 29, 45, 42) 23 | val lambda = Gamma(0.5, 100).latent 24 | ``` 25 | 26 | ```scala mdoc:image:lambda.png 27 | show("lambda", density(Model.sample(lambda))) 28 | ``` 29 | 30 | ```scala mdoc 31 | val eggModel = Model.observe(eggs, Poisson(lambda)) 32 | eggModel.optimize(lambda) 33 | val dozens = eggModel.optimize(lambda / 12) 34 | import com.stripe.rainier.sampler._ 35 | 36 | val sampler = EHMC(warmupIterations = 5000, iterations = 500) 37 | val eggTrace = eggModel.sample(sampler) 38 | eggTrace.diagnostics 39 | val thinTrace = eggTrace.thin(2) 40 | thinTrace.diagnostics 41 | val posterior = eggTrace.predict(lambda) 42 | ``` 43 | 44 | ```scala mdoc:image:lambdap.png 45 | show("lambda", density(posterior)) 46 | ``` 47 | 48 | 49 | #### eof 50 | 51 | -------------------------------------------------------------------------------- /rainier/project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version=1.3.2 2 | -------------------------------------------------------------------------------- /rainier/project/plugins.sbt: -------------------------------------------------------------------------------- 1 | addSbtPlugin("org.scalameta" % "sbt-mdoc" % "1.3.6") 2 | 3 | 4 | -------------------------------------------------------------------------------- /rainier/src/main/scala/rainier.scala: -------------------------------------------------------------------------------- 1 | /* 2 | rainier.scala 3 | Simple example rainier app 4 | */ 5 | 6 | object RainierApp { 7 | 8 | import cats._ 9 | import cats.implicits._ 10 | import com.stripe.rainier.core._ 11 | import com.stripe.rainier.compute._ 12 | import com.stripe.rainier.sampler._ 13 | import com.stripe.rainier.notebook._ 14 | import com.cibo.evilplot._ 15 | import com.cibo.evilplot.plot._ 16 | 17 | // rainier tutorial 18 | def tutorial: Unit = { 19 | println("Tutorial") 20 | val a = Uniform(0,1).latent 21 | val b = a + 1 22 | 23 | val c = Normal(b, a).latent 24 | Model.sample((a,c)).take(10) 25 | 26 | val ac = Model.sample((a,c)) 27 | show("a", "c", scatter(ac)) // produces an almond Image, but then what? 28 | displayPlot(scatter(ac).render()) // use Evilplot to display on console 29 | 30 | val eggs = List[Long](45, 52, 45, 47, 41, 42, 44, 42, 46, 38, 36, 35, 41, 48, 42, 29, 45, 43, 31 | 45, 40, 42, 53, 31, 48, 40, 45, 39, 29, 45, 42) 32 | val lambda = Gamma(0.5, 100).latent 33 | 34 | show("lambda", density(Model.sample(lambda))) // show 35 | mcmcSummary("lambda", Model.sample(lambda)) 36 | 37 | val eggModel = Model.observe(eggs, Poisson(lambda)) 38 | eggModel.optimize(lambda) 39 | val dozens = eggModel.optimize(lambda / 12) 40 | 41 | val sampler = EHMC(5000, 500) 42 | val eggTrace = eggModel.sample(sampler) 43 | eggTrace.diagnostics 44 | val thinTrace = eggTrace.thin(2) 45 | thinTrace.diagnostics 46 | val posterior = eggTrace.predict(lambda) 47 | 48 | show("lambda", density(posterior)) // show 49 | mcmcSummary("lambda", posterior) 50 | } 51 | 52 | def mcmcSummary(name: String, chain: Seq[Double]): Unit = { 53 | println(name) 54 | val dens = density(chain). 55 | standard(). 56 | xLabel(name). 57 | yLabel("Density") 58 | val trace = line(chain.zipWithIndex map (_.swap)). 59 | standard(). 60 | xLabel("Iteration"). 61 | yLabel(name) 62 | //displayPlot(dens.render()) 63 | //displayPlot(trace.render()) 64 | displayPlot(Facets(Vector(Vector(trace, dens))).render()) 65 | } 66 | 67 | def mcmcSummary(chain: Seq[Double]): Unit = mcmcSummary("Variable", chain) 68 | 69 | 70 | // normal random sample 71 | def nrs: Unit = { 72 | // first simulate some synthetic data 73 | val n = 1000 74 | val mu = 3.0 75 | val sig = 5.0 76 | implicit val rng = ScalaRNG(3) 77 | val x = Vector.fill(n)(mu + sig*rng.standardNormal) 78 | // now build Rainier model 79 | val m = Normal(0,100).latent 80 | val s = Gamma(1,10).latent 81 | val model = Model.observe(x, Normal(m,s)) 82 | // now sample from the model 83 | val sampler = EHMC(5000, 5000) 84 | println("sampling...") 85 | val out = model.sample(sampler) 86 | println("finished sampling.") 87 | println(out.diagnostics) 88 | val mut = out.predict(m) 89 | show("mu", density(mut)) 90 | val sigt = out.predict(s) 91 | show("sig", density(sigt)) 92 | // try some diagnostic plots 93 | mcmcSummary("mu", mut) 94 | mcmcSummary("sig", sigt) 95 | } 96 | 97 | // logistic regression 98 | def logReg: Unit = { 99 | println("logReg") 100 | // first simulate some data from a logistic regression model 101 | implicit val rng = ScalaRNG(3) 102 | val N = 1000 103 | val beta0 = 0.1 104 | val beta1 = 0.3 105 | val x = (1 to N) map { _ => 106 | 3.0 * rng.standardNormal 107 | } 108 | val theta = x map { xi => 109 | beta0 + beta1 * xi 110 | } 111 | def expit(x: Double): Double = 1.0 / (1.0 + math.exp(-x)) 112 | val p = theta map expit 113 | val yb = p map (pi => (rng.standardUniform < pi)) 114 | val y = yb map (b => if (b) 1L else 0L) 115 | println(y.take(10)) 116 | println(x.take(10)) 117 | // now build Rainier model 118 | val b0 = Normal(0, 5).latent 119 | val b1 = Normal(0, 5).latent 120 | val model = Model.observe(y, Vec.from(x).map{xi => 121 | val theta = b0 + b1*xi 122 | val p = 1.0 / (1.0 + (-theta).exp) 123 | Bernoulli(p) 124 | }) 125 | // now sample from the model 126 | //val sampler = EHMC(10000, 1000) 127 | val sampler = HMC(5000, 1000, 50) 128 | println("sampling...") 129 | val bt = model.sample(sampler) 130 | println("finished sampling.") 131 | println(bt.diagnostics) 132 | val b0t = bt.predict(b0) 133 | show("b0", density(b0t)) 134 | val b1t = bt.predict(b1) 135 | show("b1", density(b1t)) 136 | mcmcSummary("b0", b0t) 137 | mcmcSummary("b1", b1t) 138 | } 139 | 140 | // one-way anova model 141 | def anova: Unit = { 142 | println("anova") 143 | // simulate synthetic data 144 | implicit val rng = ScalaRNG(3) 145 | //val n = 50 // groups 146 | //val N = 150 // obs per group 147 | val n = 10 // groups 148 | val N = 20 // obs per group 149 | val mu = 5.0 // overall mean 150 | val sigE = 2.0 // random effect SD 151 | val sigD = 3.0 // obs SD 152 | val effects = Vector.fill(n)(sigE * rng.standardNormal) 153 | val data = effects map (e => 154 | Vector.fill(N)(mu + e + sigD * rng.standardNormal)) 155 | // build model 156 | val m = Normal(0, 100).latent 157 | val sD = LogNormal(0, 10).latent 158 | val sE = LogNormal(1, 5).latent 159 | val eff = Vector.fill(n)(Normal(m, sE).latent) 160 | val models = (0 until n).map(i => 161 | Model.observe(data(i), Normal(eff(i), sD))) 162 | val model = models.reduce{(m1, m2) => m1.merge(m2)} 163 | // now sample the model 164 | val sampler = EHMC(5000, 5000) 165 | println("sampling...") 166 | val trace = model.sample(sampler) 167 | println("finished sampling.") 168 | println(trace.diagnostics) 169 | val mt = trace.predict(m) 170 | show("mu", density(mt)) 171 | mcmcSummary("mu", mt) 172 | mcmcSummary("sigE", trace.predict(sE)) 173 | mcmcSummary("sigD", trace.predict(sD)) 174 | } 175 | 176 | // repeat one-way anova, but without merging 177 | def anova2: Unit = { 178 | 179 | } 180 | 181 | 182 | def main(args: Array[String]): Unit = { 183 | println("main starting") 184 | 185 | //tutorial 186 | //nrs 187 | logReg 188 | //anova 189 | 190 | 191 | println("main finishing") 192 | } 193 | 194 | } 195 | -------------------------------------------------------------------------------- /rainier/src/test/scala/rainier-test.scala: -------------------------------------------------------------------------------- 1 | import org.scalatest.flatspec.AnyFlatSpec 2 | import org.scalatest.matchers.should.Matchers 3 | 4 | 5 | // Example unit tests 6 | class CatsSpec extends AnyFlatSpec with Matchers { 7 | 8 | import cats._ 9 | import cats.implicits._ 10 | 11 | "A List" should "combine" in { 12 | val l = List(1,2) |+| List(3,4) 13 | l should be (List(1,2,3,4)) 14 | } 15 | 16 | } 17 | 18 | 19 | // Example property-based tests 20 | import org.scalatestplus.scalacheck._ 21 | class MyPropertyTests extends AnyFlatSpec with Matchers with ScalaCheckPropertyChecks { 22 | 23 | import cats._ 24 | import cats.implicits._ 25 | 26 | "An Int" should "combine commutatively" in { 27 | forAll { (a: Int, b: Int) => 28 | (a |+| b) should be (b |+| a) 29 | } 30 | } 31 | 32 | it should "invert" in { 33 | forAll { (a: Int) => 34 | (a |+| a.inverse) shouldBe Monoid[Int].empty 35 | } 36 | } 37 | 38 | } 39 | 40 | // eof 41 | 42 | 43 | -------------------------------------------------------------------------------- /reaction-diffusion/.gitignore: -------------------------------------------------------------------------------- 1 | # .gitignore for scala projects 2 | 3 | # Classes and logs 4 | *.class 5 | *.log 6 | *~ 7 | 8 | # SBT-specific 9 | .cache 10 | .history 11 | .classpath 12 | .project 13 | .settings 14 | 15 | .lib/ 16 | dist/* 17 | target/ 18 | lib_managed/ 19 | src_managed/ 20 | project/boot/ 21 | project/plugins/project/ 22 | 23 | # Ensime specific 24 | .ensime 25 | 26 | # Scala-IDE specific 27 | .scala_dependencies 28 | .worksheet 29 | 30 | 31 | -------------------------------------------------------------------------------- /reaction-diffusion/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile 2 | 3 | 4 | 5 | 6 | view: DraftPost.html 7 | xdg-open DraftPost.html 8 | 9 | DraftPost.html: DraftPost.md 10 | pandoc DraftPost.md -o DraftPost.html 11 | 12 | clean: 13 | sbt clean 14 | rm -f siv-*.png 15 | 16 | 17 | 18 | # eof 19 | 20 | -------------------------------------------------------------------------------- /reaction-diffusion/Readme.md: -------------------------------------------------------------------------------- 1 | # Stochastic reaction-diffusion modelling (in Scala) 2 | 3 | This directory contains the source code associated with the blog post: 4 | 5 | [Stochastic reaction-diffusion modelling](https://darrenjw.wordpress.com/2019/01/22/stochastic-reaction-diffusion-modelling/) 6 | 7 | This code should run on any system with a recent [Java JDK](https://www.oracle.com/technetwork/java/javase/downloads/jdk8-downloads-2133151.html) installation, and [sbt](https://www.scala-sbt.org/). Note that this code relies on JavaFx libraries, so if you are running OpenJdk, you will also need to install OpenJfx. On most Linux (and similar) systems, this should be as easy as installing the `openjfx` package in addition to (say) `openjdk-8-jdk` using your OS package manager. 8 | 9 | Once you have Java and sbt installed, you should be able to compile and run the examples by typing: 10 | ```bash 11 | sbt run 12 | ``` 13 | at your OS prompt from *this* directory (that is, the directory containing the file `build.sbt`). Then just select the number of the example you want to run. The animation should auto-start. 14 | 15 | 16 | 17 | #### eof 18 | 19 | -------------------------------------------------------------------------------- /reaction-diffusion/build.sbt: -------------------------------------------------------------------------------- 1 | name := "reaction-diffusion" 2 | 3 | version := "0.1-SNAPSHOT" 4 | 5 | scalacOptions ++= Seq( 6 | "-unchecked", "-deprecation", "-feature" 7 | ) 8 | 9 | libraryDependencies ++= Seq( 10 | "org.scalatest" %% "scalatest" % "3.0.1" % "test", 11 | "org.scalanlp" %% "breeze" % "0.13.2", 12 | "org.scalanlp" %% "breeze-viz" % "0.13.2", 13 | "org.scalanlp" %% "breeze-natives" % "0.13.2", 14 | "com.github.darrenjw" %% "scala-view" % "0.5", 15 | "com.github.darrenjw" %% "scala-smfsb" % "0.6" 16 | ) 17 | 18 | resolvers ++= Seq( 19 | "Sonatype Snapshots" at 20 | "https://oss.sonatype.org/content/repositories/snapshots/", 21 | "Sonatype Releases" at 22 | "https://oss.sonatype.org/content/repositories/releases/" 23 | ) 24 | 25 | scalaVersion := "2.12.8" 26 | 27 | scalaVersion in ThisBuild := "2.12.8" // for ensime 28 | 29 | 30 | -------------------------------------------------------------------------------- /reaction-diffusion/lv-cle.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darrenjw/blog/be123278cc20048ab8458b895ccb5ccca798ea59/reaction-diffusion/lv-cle.mp4 -------------------------------------------------------------------------------- /reaction-diffusion/lv-cle.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darrenjw/blog/be123278cc20048ab8458b895ccb5ccca798ea59/reaction-diffusion/lv-cle.png -------------------------------------------------------------------------------- /reaction-diffusion/lv-cle2.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darrenjw/blog/be123278cc20048ab8458b895ccb5ccca798ea59/reaction-diffusion/lv-cle2.mp4 -------------------------------------------------------------------------------- /reaction-diffusion/lv-cle2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darrenjw/blog/be123278cc20048ab8458b895ccb5ccca798ea59/reaction-diffusion/lv-cle2.png -------------------------------------------------------------------------------- /reaction-diffusion/lv-cle3.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darrenjw/blog/be123278cc20048ab8458b895ccb5ccca798ea59/reaction-diffusion/lv-cle3.mp4 -------------------------------------------------------------------------------- /reaction-diffusion/lv-cle3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darrenjw/blog/be123278cc20048ab8458b895ccb5ccca798ea59/reaction-diffusion/lv-cle3.png -------------------------------------------------------------------------------- /reaction-diffusion/lv-exact.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darrenjw/blog/be123278cc20048ab8458b895ccb5ccca798ea59/reaction-diffusion/lv-exact.mp4 -------------------------------------------------------------------------------- /reaction-diffusion/lv-exact.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darrenjw/blog/be123278cc20048ab8458b895ccb5ccca798ea59/reaction-diffusion/lv-exact.png -------------------------------------------------------------------------------- /reaction-diffusion/lv-rre.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darrenjw/blog/be123278cc20048ab8458b895ccb5ccca798ea59/reaction-diffusion/lv-rre.mp4 -------------------------------------------------------------------------------- /reaction-diffusion/lv-rre.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darrenjw/blog/be123278cc20048ab8458b895ccb5ccca798ea59/reaction-diffusion/lv-rre.png -------------------------------------------------------------------------------- /reaction-diffusion/lv-rre2.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darrenjw/blog/be123278cc20048ab8458b895ccb5ccca798ea59/reaction-diffusion/lv-rre2.mp4 -------------------------------------------------------------------------------- /reaction-diffusion/lv-rre2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darrenjw/blog/be123278cc20048ab8458b895ccb5ccca798ea59/reaction-diffusion/lv-rre2.png -------------------------------------------------------------------------------- /reaction-diffusion/lv-rre3.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darrenjw/blog/be123278cc20048ab8458b895ccb5ccca798ea59/reaction-diffusion/lv-rre3.mp4 -------------------------------------------------------------------------------- /reaction-diffusion/lv-rre3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darrenjw/blog/be123278cc20048ab8458b895ccb5ccca798ea59/reaction-diffusion/lv-rre3.png -------------------------------------------------------------------------------- /reaction-diffusion/make-movie.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # make-movie.sh 3 | 4 | rm -f siv-??????-s.png 5 | 6 | for name in siv-??????.png 7 | do 8 | short="${name%.*}" 9 | echo $short 10 | #pngtopnm "$name" | pnmscale 20 | pnmtopng > "${short}-s.png" 11 | convert "$name" -scale 1200x600 -define png:color-type=2 "${short}-s.png" 12 | done 13 | 14 | rm -f movie.mp4 15 | 16 | #avconv -r 20 -i siv-%06d-s.png movie.mp4 17 | ffmpeg -f image2 -r 10 -pattern_type glob -i 'siv-*-s.png' movie.mp4 18 | 19 | # make a version that should play on Android devices... 20 | ffmpeg -i movie.mp4 -codec:v libx264 -profile:v main -preset slow -b:v 400k -maxrate 400k -bufsize 800k -vf scale=-1:480 -threads 0 -codec:a libfdk_aac -b:a 128k -pix_fmt yuv420p movie-a.mp4 21 | 22 | # Animated GIF... 23 | # ffmpeg -i movie.mp4 -s 200x100 movie.gif 24 | 25 | 26 | # eof 27 | 28 | 29 | 30 | -------------------------------------------------------------------------------- /reaction-diffusion/project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version=1.2.8 2 | 3 | -------------------------------------------------------------------------------- /reaction-diffusion/sir-cle.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darrenjw/blog/be123278cc20048ab8458b895ccb5ccca798ea59/reaction-diffusion/sir-cle.mp4 -------------------------------------------------------------------------------- /reaction-diffusion/sir-cle.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darrenjw/blog/be123278cc20048ab8458b895ccb5ccca798ea59/reaction-diffusion/sir-cle.png -------------------------------------------------------------------------------- /reaction-diffusion/sir-rre.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darrenjw/blog/be123278cc20048ab8458b895ccb5ccca798ea59/reaction-diffusion/sir-rre.mp4 -------------------------------------------------------------------------------- /reaction-diffusion/sir-rre.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darrenjw/blog/be123278cc20048ab8458b895ccb5ccca798ea59/reaction-diffusion/sir-rre.png -------------------------------------------------------------------------------- /reaction-diffusion/src/main/scala/rd/LvCle.scala: -------------------------------------------------------------------------------- 1 | /* 2 | LvCle.scala 3 | 4 | Chemical Langevin approximation for a Lotka-Volterra system 5 | 6 | Numerical solution of an SPDE 7 | 8 | */ 9 | 10 | package rd 11 | 12 | object LvCle { 13 | 14 | import smfsb._ 15 | import breeze.linalg.{Vector => BVec, _} 16 | import breeze.numerics._ 17 | 18 | def main(args: Array[String]): Unit = { 19 | val r = 250; val c = 300 20 | val model = SpnModels.lv[DoubleState]() 21 | val step = Spatial.cle2d(model, DenseVector(0.6, 0.6), 0.05) 22 | val x00 = DenseVector(0.0, 0.0) 23 | val x0 = DenseVector(50.0, 100.0) 24 | val xx00 = PMatrix(r, c, Vector.fill(r*c)(x00)) 25 | val xx0 = xx00.updated(c/2, r/2, x0) 26 | val s = Stream.iterate(xx0)(step(_,0.0,0.1)) 27 | val si = s map (toSfxI(_)) 28 | scalaview.SfxImageViewer(si, 1, autoStart=true) 29 | } 30 | 31 | } 32 | 33 | // eof 34 | -------------------------------------------------------------------------------- /reaction-diffusion/src/main/scala/rd/LvCle2.scala: -------------------------------------------------------------------------------- 1 | /* 2 | LvCle2.scala 3 | 4 | Chemical Langevin approximation for a Lotka-Volterra system 5 | 6 | Numerical solution of an SPDE 7 | 8 | */ 9 | 10 | package rd 11 | 12 | object LvCle2 { 13 | 14 | import smfsb._ 15 | import breeze.linalg.{Vector => BVec, _} 16 | import breeze.numerics._ 17 | 18 | def main(args: Array[String]): Unit = { 19 | val r = 300; val c = 600 20 | val model = SpnModels.lv[DoubleState]() 21 | val step = Spatial.cle2d(model, DenseVector(0.6, 0.6), 0.05) 22 | val x00 = DenseVector(0.0, 0.0) 23 | val x0 = DenseVector(50.0, 100.0) 24 | val xx00 = PMatrix(r, c, Vector.fill(r*c)(x00)) 25 | val xx0 = xx00. 26 | updated(c/3, r/2, x0). 27 | updated(2*c/3,r/2,x0). 28 | updated(c/2,2*r/3,x0) 29 | val s = Stream.iterate(xx0)(step(_,0.0,0.1)) 30 | val si = s map (toSfxI(_)) 31 | scalaview.SfxImageViewer(si, 1, autoStart=true) 32 | } 33 | 34 | } 35 | 36 | // eof 37 | -------------------------------------------------------------------------------- /reaction-diffusion/src/main/scala/rd/LvCle3.scala: -------------------------------------------------------------------------------- 1 | /* 2 | LvCle3.scala 3 | 4 | Chemical Langevin approximation for a Lotka-Volterra system 5 | 6 | Numerical solution of an SPDE 7 | 8 | */ 9 | 10 | package rd 11 | 12 | object LvCle3 { 13 | 14 | import smfsb._ 15 | import breeze.linalg.{Vector => BVec, _} 16 | import breeze.numerics._ 17 | import breeze.stats.distributions.Uniform 18 | 19 | def main(args: Array[String]): Unit = { 20 | val r = 300; val c = 400 21 | val model = SpnModels.lv[DoubleState]() 22 | val step = Spatial.cle2d(model, DenseVector(0.6, 0.6), 0.05) 23 | val xx0 = PMatrix(r, c, Vector.fill(r*c)(DenseVector( 24 | Uniform(100,300).draw, 25 | Uniform(100,300).draw))) 26 | val s = Stream.iterate(xx0)(step(_,0.0,0.1)) 27 | val si = s map (toSfxI(_)) 28 | scalaview.SfxImageViewer(si, 1, autoStart=true) 29 | } 30 | 31 | } 32 | 33 | // eof 34 | -------------------------------------------------------------------------------- /reaction-diffusion/src/main/scala/rd/LvExact.scala: -------------------------------------------------------------------------------- 1 | /* 2 | LvExact.scala 3 | 4 | Exact Gillespie simulation of the RDME for a Lotka-Volterra system 5 | 6 | */ 7 | 8 | package rd 9 | 10 | object LvExact { 11 | 12 | import smfsb._ 13 | import breeze.linalg.{Vector => BVec, _} 14 | import breeze.numerics._ 15 | 16 | def main(args: Array[String]): Unit = { 17 | val r = 100; val c = 120 18 | val model = SpnModels.lv[IntState]() 19 | val step = Spatial.gillespie2d(model, DenseVector(0.6, 0.6), maxH=1e12) 20 | val x00 = DenseVector(0, 0) 21 | val x0 = DenseVector(50, 100) 22 | val xx00 = PMatrix(r, c, Vector.fill(r*c)(x00)) 23 | val xx0 = xx00.updated(c/2, r/2, x0) 24 | val s = Stream.iterate(xx0)(step(_,0.0,0.1)) 25 | val si = s map (toSfxIi(_)) 26 | scalaview.SfxImageViewer(si, 1, autoStart=true) 27 | } 28 | 29 | } 30 | 31 | // eof 32 | -------------------------------------------------------------------------------- /reaction-diffusion/src/main/scala/rd/LvRre.scala: -------------------------------------------------------------------------------- 1 | /* 2 | LvRre.scala 3 | 4 | Reaction rate equations for the Lotka-Volterra model 5 | Numerical solution of a PDE 6 | 7 | */ 8 | 9 | package rd 10 | 11 | object LvRre { 12 | 13 | import smfsb._ 14 | import breeze.linalg.{Vector => BVec, _} 15 | import breeze.numerics._ 16 | 17 | def main(args: Array[String]): Unit = { 18 | val r = 250; val c = 300 19 | val model = SpnModels.lv[DoubleState]() 20 | val step = Spatial.euler2d(model, DenseVector(0.6, 0.6), 0.02) 21 | val x00 = DenseVector(0.0, 0.0) 22 | val x0 = DenseVector(50.0, 100.0) 23 | val xx00 = PMatrix(r, c, Vector.fill(r*c)(x00)) 24 | val xx0 = xx00.updated(c/2, r/2, x0) 25 | val s = Stream.iterate(xx0)(step(_,0.0,0.1)) 26 | val si = s map (toSfxI(_)) 27 | scalaview.SfxImageViewer(si, 1, autoStart=true) 28 | } 29 | 30 | } 31 | 32 | // eof 33 | -------------------------------------------------------------------------------- /reaction-diffusion/src/main/scala/rd/LvRre2.scala: -------------------------------------------------------------------------------- 1 | /* 2 | LvRre2.scala 3 | 4 | PDE approximation for a Lotka-Volterra system 5 | 6 | Numerical solution of an SPDE 7 | 8 | */ 9 | 10 | package rd 11 | 12 | object LvRre2 { 13 | 14 | import smfsb._ 15 | import breeze.linalg.{Vector => BVec, _} 16 | import breeze.numerics._ 17 | 18 | def main(args: Array[String]): Unit = { 19 | val r = 300; val c = 600 20 | val model = SpnModels.lv[DoubleState]() 21 | val step = Spatial.euler2d(model, DenseVector(0.6, 0.6), 0.05) 22 | val x00 = DenseVector(0.0, 0.0) 23 | val x0 = DenseVector(50.0, 100.0) 24 | val xx00 = PMatrix(r, c, Vector.fill(r*c)(x00)) 25 | val xx0 = xx00. 26 | updated(c/3, r/2, x0). 27 | updated(2*c/3,r/2,x0). 28 | updated(c/2,2*r/3,x0) 29 | val s = Stream.iterate(xx0)(step(_,0.0,0.1)) 30 | val si = s map (toSfxI(_)) 31 | scalaview.SfxImageViewer(si, 1, autoStart=true) 32 | } 33 | 34 | } 35 | 36 | // eof 37 | -------------------------------------------------------------------------------- /reaction-diffusion/src/main/scala/rd/LvRre3.scala: -------------------------------------------------------------------------------- 1 | /* 2 | LvRre3.scala 3 | 4 | PDE approximation for a Lotka-Volterra system 5 | 6 | Numerical solution of a PDE 7 | 8 | */ 9 | 10 | package rd 11 | 12 | object LvRre3 { 13 | 14 | import smfsb._ 15 | import breeze.linalg.{Vector => BVec, _} 16 | import breeze.numerics._ 17 | import breeze.stats.distributions.Uniform 18 | 19 | def main(args: Array[String]): Unit = { 20 | val r = 300; val c = 400 21 | val model = SpnModels.lv[DoubleState]() 22 | val step = Spatial.euler2d(model, DenseVector(0.6, 0.6), 0.05) 23 | val xx0 = PMatrix(r, c, Vector.fill(r*c)(DenseVector( 24 | Uniform(100,300).draw, 25 | Uniform(100,300).draw))) 26 | val s = Stream.iterate(xx0)(step(_,0.0,0.1)) 27 | val si = s map (toSfxI(_)) 28 | scalaview.SfxImageViewer(si, 1, autoStart=true) 29 | } 30 | 31 | } 32 | 33 | // eof 34 | -------------------------------------------------------------------------------- /reaction-diffusion/src/main/scala/rd/SirCle.scala: -------------------------------------------------------------------------------- 1 | /* 2 | SirCle.scala 3 | 4 | Chemical Langevin approximation for a SIR epidemic model 5 | 6 | Numerical solution of an SPDE 7 | 8 | */ 9 | 10 | package rd 11 | 12 | object SirCle { 13 | 14 | import smfsb._ 15 | import breeze.linalg.{Vector => BVec, _} 16 | import breeze.numerics._ 17 | 18 | 19 | def main(args: Array[String]): Unit = { 20 | val r = 250; val c = 300 21 | val model = sir[DoubleState]() 22 | val step = Spatial.cle2d(model, DenseVector(3.0, 2.0, 0.0), 0.005) 23 | val x00 = DenseVector(100.0, 0.0, 0.0) 24 | val x0 = DenseVector(50.0, 50.0, 0.0) 25 | val xx00 = PMatrix(r, c, Vector.fill(r*c)(x00)) 26 | val xx0 = xx00.updated(c/2, r/2, x0) 27 | val s = Stream.iterate(xx0)(step(_,0.0,0.05)) 28 | val si = s map (toSfxI3(_)) 29 | scalaview.SfxImageViewer(si, 1, autoStart=true) 30 | } 31 | 32 | } 33 | 34 | // eof 35 | -------------------------------------------------------------------------------- /reaction-diffusion/src/main/scala/rd/SirRre.scala: -------------------------------------------------------------------------------- 1 | /* 2 | SirRre.scala 3 | 4 | PDE approximation for a SIR epidemic model 5 | 6 | Numerical solution of an SPDE 7 | 8 | */ 9 | 10 | package rd 11 | 12 | object SirRre { 13 | 14 | import smfsb._ 15 | import breeze.linalg.{Vector => BVec, _} 16 | import breeze.numerics._ 17 | 18 | 19 | def main(args: Array[String]): Unit = { 20 | val r = 250; val c = 300 21 | val model = sir[DoubleState]() 22 | val step = Spatial.euler2d(model, DenseVector(3.0, 2.0, 0.0), 0.005) 23 | val x00 = DenseVector(100.0, 0.0, 0.0) 24 | val x0 = DenseVector(50.0, 50.0, 0.0) 25 | val xx00 = PMatrix(r, c, Vector.fill(r*c)(x00)) 26 | val xx0 = xx00.updated(c/2, r/2, x0) 27 | val s = Stream.iterate(xx0)(step(_,0.0,0.05)) 28 | val si = s map (toSfxI3(_)) 29 | scalaview.SfxImageViewer(si, 1, autoStart=true) 30 | } 31 | 32 | } 33 | 34 | // eof 35 | -------------------------------------------------------------------------------- /reaction-diffusion/src/main/scala/rd/package.scala: -------------------------------------------------------------------------------- 1 | /* 2 | package.scala 3 | 4 | Shared code 5 | 6 | */ 7 | 8 | package object rd { 9 | 10 | import smfsb._ 11 | import breeze.linalg.{Vector => BVec, _} 12 | import breeze.numerics._ 13 | import scalafx.scene.image.WritableImage 14 | import scalafx.scene.paint._ 15 | 16 | def toSfxI(im: PMatrix[DenseVector[Double]]): WritableImage = { 17 | val wi = new WritableImage(im.c, im.r) 18 | val pw = wi.pixelWriter 19 | val m = im.data.aggregate(0.0)((acc,v) => math.max(acc,max(v)), math.max(_,_)) 20 | val rsi = im map (_ / m) 21 | (0 until im.c).par foreach (i => 22 | (0 until im.r).par foreach (j => 23 | pw.setColor(i, j, Color.rgb((rsi(i,j)(1)*255).toInt, 0, (rsi(i,j)(0)*255).toInt)) 24 | )) 25 | wi 26 | } 27 | 28 | def toSfxIi(im: PMatrix[DenseVector[Int]]): WritableImage = 29 | toSfxI(im map (v => v map (_.toDouble))) 30 | 31 | def sir[S: State](p: DenseVector[Double] = DenseVector(0.1, 0.5)): Spn[S] = 32 | UnmarkedSpn[S]( 33 | List("S", "I", "R"), 34 | DenseMatrix((1, 1, 0), (0, 1, 0)), 35 | DenseMatrix((0, 2, 0), (0, 0, 1)), 36 | (x, t) => { 37 | val xd = x.toDvd 38 | DenseVector( 39 | xd(0) * xd(1) * p(0), xd(1) * p(1) 40 | )} 41 | ) 42 | 43 | def toSfxI3(im: PMatrix[DenseVector[Double]]): WritableImage = { 44 | val wi = new WritableImage(im.c, im.r) 45 | val pw = wi.pixelWriter 46 | val m = im.data.aggregate(0.0)((acc,v) => math.max(acc,max(v)), math.max(_,_)) 47 | val rsi = im map (_ / m) 48 | (0 until im.c).par foreach (i => 49 | (0 until im.r).par foreach (j => 50 | pw.setColor(i, j, Color.rgb((rsi(i,j)(1)*255).toInt, (rsi(i,j)(0)*255).toInt, (rsi(i,j)(2)*255).toInt)) 51 | )) 52 | wi 53 | } 54 | 55 | 56 | 57 | } 58 | 59 | // eof 60 | 61 | -------------------------------------------------------------------------------- /reaction-diffusion/src/test/scala/reaction-diffusion-test.scala: -------------------------------------------------------------------------------- 1 | import org.scalatest.FlatSpec 2 | 3 | import smfsb._ 4 | import breeze.linalg._ 5 | 6 | class SmfsbSpec extends FlatSpec { 7 | 8 | "Step.gillespie" should "create and step LV model" in { 9 | val model = SpnModels.lv[IntState]() 10 | val step = Step.gillespie(model) 11 | val output = step(DenseVector(50, 100), 0.0, 1.0) 12 | assert(output.length === 2) 13 | } 14 | 15 | 16 | } 17 | 18 | -------------------------------------------------------------------------------- /sbml-scala/README.md: -------------------------------------------------------------------------------- 1 | # Working with SBML using Scala 2 | 3 | Code example for the blog post: 4 | 5 | https://darrenjw.wordpress.com/2016/12/17/working-with-sbml-using-scala/ 6 | 7 | Note that this repo contains everything that is needed to build and run the Scala code examples on any system that has Java installed. Any recent version of Java is fine. You do not need to "install" Scala or any Scala "packages" in order to run the code. If you have Java and a decent internet connection, you are good to go. This is one of the benefits of Scala - you can run it anywhere, on any system with a Java installation. 8 | 9 | To check if you have Java installed, just run: 10 | 11 | ```bash 12 | java -version 13 | ``` 14 | 15 | at your system command prompt. If you get an error, Java is absent or incorrectly installed. Installing Java is very easy on any platform, but the best way to install it depends on exactly what OS you are running, so search the internet for advice on the best way to install Java on your OS. 16 | 17 | The code uses `sbt` (the simple build tool) as the build tool. The sbt launcher has been included in the repo for the benefit of those new to Scala. It should be possible to run sbt from this directory by typing: 18 | 19 | ```bash 20 | ..\sbt 21 | ``` 22 | 23 | on Windows (which should run `..\sbt.bat`), or 24 | 25 | ```bash 26 | ../sbt 27 | ``` 28 | 29 | on Linux and similar systems (including Macs). If you want to be able to experiment with Scala yourself, you should copy the script and the file `sbt-launch.jar` to the same directory somewhere in your path, but this isn't necessary to run these examples. 30 | 31 | The sbt launcher script will download and run sbt, which will then download scala, the scala compiler, scala standard libraries and all dependencies needed to compile and run the code. All the downloaded files will be cached on your system for future use. Therefore, make sure you have a good internet connection and a bit of free disk space before running sbt for the first time. 32 | 33 | Assuming you can run sbt, just typing `run` at the sbt prompt will compile and run the example code. Typing `console` will give a Scala REPL with a properly configured classpath including all dependencies. You can type scala expressions directly into the REPL just as you would in your favourite dynamic math/stat language. Type `help` at the sbt prompt for help on sbt. Type `:help` at the Scala REPL for help on the REPL. 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | -------------------------------------------------------------------------------- /sbml-scala/build.sbt: -------------------------------------------------------------------------------- 1 | name := "jsbml" 2 | 3 | version := "0.1" 4 | 5 | scalacOptions ++= Seq("-unchecked", "-deprecation", "-feature") 6 | 7 | libraryDependencies ++= Seq( 8 | "org.sbml.jsbml" % "jsbml" % "1.2", 9 | "org.apache.logging.log4j" % "log4j-1.2-api" % "2.3", 10 | "org.apache.logging.log4j" % "log4j-api" % "2.3", 11 | "org.apache.logging.log4j" % "log4j-core" % "2.3" 12 | ) 13 | 14 | scalaVersion := "2.11.7" 15 | 16 | 17 | -------------------------------------------------------------------------------- /sbml-scala/ch07-mm-stoch.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | c1 27 | S 28 | E 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | c2 49 | SE 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | c3 70 | SE 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | -------------------------------------------------------------------------------- /sbml-scala/project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version=0.13.8 2 | -------------------------------------------------------------------------------- /sbml-scala/src/main/scala/jsbml.scala: -------------------------------------------------------------------------------- 1 | /* 2 | DemoApp.java 3 | Simple demo of jSBML 4 | */ 5 | 6 | object JsbmlApp { 7 | import org.sbml.jsbml.SBMLReader 8 | // import org.sbml.jsbml.{ Unit => JsbmlUnit } 9 | import scala.collection.JavaConversions._ 10 | 11 | def main(args: Array[String]): Unit = { 12 | val filename = args.headOption.getOrElse("ch07-mm-stoch.xml") 13 | val reader = new SBMLReader 14 | val document = reader.readSBML(filename) 15 | val model = document.getModel 16 | println(model.getId + "\n" + model.getName) 17 | val listOfSpecies = model.getListOfSpecies 18 | val ns = model.getNumSpecies 19 | println(s"$ns Species:") 20 | listOfSpecies.iterator.foreach(species => { 21 | println(" " + 22 | species.getId + "\t" + 23 | species.getName + "\t" + 24 | species.getCompartment + "\t" + 25 | species.getInitialAmount) 26 | }) 27 | val nr = model.getNumReactions 28 | println(s"$nr Reactions.") 29 | } 30 | 31 | } 32 | 33 | /* eof */ 34 | 35 | -------------------------------------------------------------------------------- /sbt: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | SBT_OPTS="-Xms512M -Xmx1536M -Xss1M -XX:+CMSClassUnloadingEnabled -XX:MaxPermSize=256M" 3 | java $SBT_OPTS -jar `dirname $0`/sbt-launch.jar "$@" 4 | 5 | -------------------------------------------------------------------------------- /sbt-launch.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darrenjw/blog/be123278cc20048ab8458b895ccb5ccca798ea59/sbt-launch.jar -------------------------------------------------------------------------------- /sbt.bat: -------------------------------------------------------------------------------- 1 | set SCRIPT_DIR=%~dp0 2 | java -Xms512M -Xmx1024M -Xss1M -XX:+CMSClassUnloadingEnabled -XX:MaxPermSize=256M -jar "%SCRIPT_DIR%sbt-launch.jar" %* 3 | 4 | -------------------------------------------------------------------------------- /scala-dataframes/README.md: -------------------------------------------------------------------------------- 1 | # Scala data tables and frames 2 | 3 | Code samples associated with my blog post "Scala data frames and tables" which can be found at: 4 | 5 | https://darrenjw.wordpress.com/2015/08/21/data-frames-and-tables-in-scala/ 6 | 7 | See the post for explanation of the examples. 8 | 9 | Note that you must run the script r/gen-csv.R in an R session FIRST, in order to generate the CSV file required for the Scala examples. 10 | 11 | 12 | -------------------------------------------------------------------------------- /scala-dataframes/datatable/build.sbt: -------------------------------------------------------------------------------- 1 | name := "datatable-test" 2 | 3 | version := "0.1" 4 | 5 | scalacOptions ++= Seq("-unchecked", "-deprecation", "-feature") 6 | 7 | libraryDependencies ++= Seq( 8 | "org.scalacheck" %% "scalacheck" % "1.11.4" % "test", 9 | "org.scalatest" %% "scalatest" % "2.1.7" % "test", 10 | "com.github.tototoshi" %% "scala-csv" % "1.1.2", 11 | "com.github.martincooper" %% "scala-datatable" % "0.7.0" 12 | ) 13 | 14 | resolvers ++= Seq( 15 | "Sonatype Snapshots" at "https://oss.sonatype.org/content/repositories/snapshots/", 16 | "Sonatype Releases" at "https://oss.sonatype.org/content/repositories/releases/" 17 | ) 18 | 19 | scalaVersion := "2.11.7" 20 | 21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /scala-dataframes/datatable/datatable.scala: -------------------------------------------------------------------------------- 1 | /* 2 | datatable.scala 3 | 4 | Test of "scala-datatable" and "scala-csv" 5 | 6 | */ 7 | 8 | import java.io.{File,FileReader} 9 | import com.github.tototoshi.csv._ 10 | import com.github.martincooper.datatable._ 11 | import scala.annotation.tailrec 12 | import scala.util.Try 13 | 14 | object StringCol 15 | 16 | object DatatableTest { 17 | 18 | def readCsv(name: String, file: FileReader, colTypes: Map[String,Object]): DataTable = { 19 | val reader=CSVReader.open(file) 20 | val all=reader.allWithHeaders() 21 | reader.close() 22 | val ks=colTypes.keys 23 | val colSet=ks map {key => (key,all map {row => row(key)}) } 24 | val dataCols=colSet map {pair => colTypes(pair._1) match { 25 | case StringCol => new DataColumn[String](pair._1,pair._2) 26 | case Int => new DataColumn[Int](pair._1,pair._2 map {x=> 27 | Try(x.toInt).toOption.getOrElse(-99)}) 28 | case Double => new DataColumn[Double](pair._1,pair._2 map {x=> 29 | Try(x.toDouble).toOption.getOrElse(-99.0)}) 30 | } 31 | } 32 | DataTable(name,dataCols).get 33 | } 34 | 35 | def writeCsv(df: DataTable,out: File): Unit = { 36 | val writer = CSVWriter.open(out) 37 | writer.writeRow(df.columns.map{_.name}) 38 | df.foreach{r=>writer.writeRow(r.values)} 39 | writer.close() 40 | } 41 | 42 | 43 | def main(args: Array[String]) = { 44 | 45 | val colTypes=Map("DriveTrain" -> StringCol, 46 | "Min.Price" -> Double, 47 | "Cylinders" -> Int, 48 | "Horsepower" -> Int, 49 | "Length" -> Int, 50 | "Make" -> StringCol, 51 | "Passengers" -> Int, 52 | "Width" -> Int, 53 | "Fuel.tank.capacity" -> Double, 54 | "Origin" -> StringCol, 55 | "Wheelbase" -> Int, 56 | "Price" -> Double, 57 | "Luggage.room" -> Double, 58 | "Weight" -> Int, 59 | "Model" -> StringCol, 60 | "Max.Price" -> Double, 61 | "Manufacturer" -> StringCol, 62 | "EngineSize" -> Double, 63 | "AirBags" -> StringCol, 64 | "Man.trans.avail" -> StringCol, 65 | "Rear.seat.room" -> Double, 66 | "RPM" -> Int, 67 | "Turn.circle" -> Double, 68 | "MPG.highway" -> Int, 69 | "MPG.city" -> Int, 70 | "Rev.per.mile" -> Int, 71 | "Type" -> StringCol) 72 | val df=readCsv("Cars93",new FileReader("../r/cars93.csv"),colTypes) 73 | println(df.length,df.columns.length) 74 | 75 | val df2=df.filter(row=>row.as[Double]("EngineSize")<=4.0).toDataTable 76 | println(df2.length,df2.columns.length) 77 | 78 | val oldCol=df2.columns("Weight").as[Int] 79 | val newCol=new DataColumn[Double]("WeightKG",oldCol.data.map{_.toDouble*0.453592}) 80 | val df3=df2.columns.add(newCol).get 81 | println(df3.length,df3.columns.length) 82 | 83 | writeCsv(df3,new File("out.csv")) 84 | 85 | //println("Done") 86 | } 87 | 88 | 89 | 90 | } 91 | 92 | 93 | 94 | 95 | -------------------------------------------------------------------------------- /scala-dataframes/framian/build.sbt: -------------------------------------------------------------------------------- 1 | name := "framian-test" 2 | 3 | version := "0.1" 4 | 5 | scalacOptions ++= Seq("-unchecked", "-deprecation", "-feature") 6 | 7 | libraryDependencies ++= Seq( 8 | "org.scalacheck" %% "scalacheck" % "1.11.4" % "test", 9 | "org.scalatest" %% "scalatest" % "2.1.7" % "test", 10 | "com.pellucid" %% "framian" % "0.3.3" 11 | ) 12 | 13 | resolvers ++= Seq( 14 | "Sonatype Snapshots" at "https://oss.sonatype.org/content/repositories/snapshots/", 15 | "Sonatype Releases" at "https://oss.sonatype.org/content/repositories/releases/", 16 | "Pellucid Bintray" at "http://dl.bintray.com/pellucid/maven" 17 | ) 18 | 19 | scalaVersion := "2.11.2" 20 | 21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /scala-dataframes/framian/framian.scala: -------------------------------------------------------------------------------- 1 | /* 2 | framian.scala 3 | 4 | Test of "framian" 5 | 6 | */ 7 | 8 | import java.io.{File,PrintWriter} 9 | import framian.{Index,Cols} 10 | import framian.csv.{Csv,CsvFormat} 11 | 12 | object FramianTest { 13 | 14 | def main(args: Array[String]) = { 15 | println("Hello") 16 | val df=Csv.parseFile(new File("../r/cars93.csv")).labeled.toFrame 17 | println(""+df.rows+" "+df.cols) 18 | val df2=df.filter(Cols("EngineSize").as[Double])( _ <= 4.0 ) 19 | println(""+df2.rows+" "+df2.cols) 20 | val df3=df2.map(Cols("Weight").as[Int],"WeightKG")(r=>r.toDouble*0.453592) 21 | println(""+df3.rows+" "+df3.cols) 22 | println(df3.colIndex) 23 | val csv = Csv.fromFrame(new CsvFormat(",", header = true))(df3) 24 | new PrintWriter("out.csv") { write(csv.toString); close } 25 | println("Done") 26 | } 27 | 28 | } 29 | 30 | 31 | 32 | -------------------------------------------------------------------------------- /scala-dataframes/r/df.R: -------------------------------------------------------------------------------- 1 | # df.R 2 | # Example of processing a CSV-derived data frame using R 3 | 4 | df=read.csv("cars93.csv") 5 | print(dim(df)) 6 | 7 | df=df[df$EngineSize<=4.0,] 8 | print(dim(df)) 9 | 10 | df$WeightKG=df$Weight*0.453592 11 | print(dim(df)) 12 | 13 | write.csv(df,"cars93m.csv",row.names=FALSE) 14 | 15 | # eof 16 | 17 | -------------------------------------------------------------------------------- /scala-dataframes/r/gen-csv.R: -------------------------------------------------------------------------------- 1 | # gen-csv.R 2 | # Generate a CSV file for subsequent analysis 3 | 4 | package=function(somepackage) 5 | { 6 | cpackage <- as.character(substitute(somepackage)) 7 | if(!require(cpackage,character.only=TRUE)){ 8 | install.packages(cpackage) 9 | library(cpackage,character.only=TRUE) 10 | } 11 | } 12 | 13 | package(MASS) 14 | 15 | write.csv(Cars93,"cars93.csv",row.names=FALSE) 16 | 17 | 18 | 19 | # eof 20 | 21 | -------------------------------------------------------------------------------- /scala-dataframes/saddle/CsvDf.scala: -------------------------------------------------------------------------------- 1 | 2 | object CsvDf { 3 | 4 | def main(args: Array[String]): Unit = { 5 | 6 | import org.saddle.Index 7 | import org.saddle.io._ 8 | 9 | val file = CsvFile("../r/cars93.csv") 10 | val df = CsvParser.parse(file).withColIndex(0) 11 | println(df) 12 | val df2 = df.rfilter(_("EngineSize").mapValues(CsvParser.parseDouble).at(0)<=4.0) 13 | println(df2) 14 | val wkg=df2.col("Weight").mapValues(CsvParser.parseDouble).mapValues(_*0.453592).setColIndex(Index("WeightKG")) 15 | val df3=df2.joinPreserveColIx(wkg.mapValues(_.toString)) 16 | println(df3) 17 | 18 | import CsvImplicits._ 19 | import scala.language.reflectiveCalls 20 | df3.writeCsvFile("saddle-out.csv") 21 | 22 | } 23 | 24 | } 25 | -------------------------------------------------------------------------------- /scala-dataframes/saddle/build.sbt: -------------------------------------------------------------------------------- 1 | name := "csv-manipulation" 2 | 3 | version := "0.1" 4 | 5 | scalacOptions ++= Seq("-unchecked", "-deprecation", "-feature") 6 | 7 | libraryDependencies ++= Seq( 8 | "org.scalacheck" %% "scalacheck" % "1.11.4" % "test", 9 | "org.scalatest" %% "scalatest" % "2.1.7" % "test", 10 | "org.scalanlp" %% "breeze" % "0.11.2", 11 | "org.scalanlp" %% "breeze-natives" % "0.11.2", 12 | "org.scalanlp" %% "breeze-viz" % "0.11.2", 13 | "org.scala-saddle" %% "saddle-core" % "1.3.+" 14 | ) 15 | 16 | resolvers ++= Seq( 17 | "Sonatype Snapshots" at "https://oss.sonatype.org/content/repositories/snapshots/", 18 | "Sonatype Releases" at "https://oss.sonatype.org/content/repositories/releases/" 19 | ) 20 | 21 | scalaVersion := "2.11.6" 22 | 23 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /scala-dataframes/sparkdf/spark.scala: -------------------------------------------------------------------------------- 1 | /* 2 | spark.scala 3 | 4 | code for a "spark-shell" session 5 | 6 | spark-shell --master local[4] 7 | 8 | */ 9 | 10 | val df = spark.read. 11 | option("header", "true"). 12 | option("inferSchema","true"). 13 | csv("../r/cars93.csv") 14 | val df2=df.filter("EngineSize <= 4.0") 15 | val col=df2.col("Weight")*0.453592 16 | val df3=df2.withColumn("WeightKG",col) 17 | df3.write.format("com.databricks.spark.csv"). 18 | option("header","true"). 19 | save("out-csv") 20 | 21 | 22 | // eof 23 | 24 | 25 | -------------------------------------------------------------------------------- /scala-smfsb/.gitignore: -------------------------------------------------------------------------------- 1 | # .gitignore for scala projects 2 | 3 | # Classes and logs 4 | *.class 5 | *.log 6 | *~ 7 | 8 | # SBT-specific 9 | .cache 10 | .history 11 | .classpath 12 | .project 13 | .settings 14 | 15 | .lib/ 16 | dist/* 17 | target/ 18 | lib_managed/ 19 | src_managed/ 20 | project/boot/ 21 | project/plugins/project/ 22 | 23 | # Ensime specific 24 | .ensime 25 | 26 | # Scala-IDE specific 27 | .scala_dependencies 28 | .worksheet 29 | 30 | 31 | -------------------------------------------------------------------------------- /scala-smfsb/build.sbt: -------------------------------------------------------------------------------- 1 | name := "scala-smfsb" 2 | 3 | version := "0.1-SNAPSHOT" 4 | 5 | scalacOptions ++= Seq( 6 | "-unchecked", "-deprecation", "-feature", "-Yrepl-class-based" 7 | ) 8 | 9 | enablePlugins(TutPlugin) 10 | 11 | libraryDependencies ++= Seq( 12 | "org.scalatest" %% "scalatest" % "3.0.1" % "test", 13 | //"org.scalanlp" %% "breeze" % "0.13.2", 14 | "org.scalanlp" %% "breeze-viz" % "0.13.2", 15 | //"org.scalanlp" %% "breeze-natives" % "0.13.2", 16 | "com.github.darrenjw" %% "scala-smfsb" % "0.5" 17 | ) 18 | 19 | resolvers ++= Seq( 20 | "Sonatype Snapshots" at 21 | "https://oss.sonatype.org/content/repositories/snapshots/", 22 | "Sonatype Releases" at 23 | "https://oss.sonatype.org/content/repositories/releases/" 24 | ) 25 | 26 | scalaVersion := "2.12.8" 27 | 28 | -------------------------------------------------------------------------------- /scala-smfsb/project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version=0.13.13 2 | -------------------------------------------------------------------------------- /scala-smfsb/project/plugins.sbt: -------------------------------------------------------------------------------- 1 | addSbtPlugin("org.tpolecat" % "tut-plugin" % "0.5.5") 2 | 3 | -------------------------------------------------------------------------------- /scala-smfsb/src/main/scala/scala-smfsb.scala: -------------------------------------------------------------------------------- 1 | /* 2 | Stub for scala-smfsb code 3 | */ 4 | 5 | object Stub { 6 | 7 | import smfsb._ 8 | import breeze.linalg._ 9 | import breeze.numerics._ 10 | 11 | def main(args: Array[String]): Unit = { 12 | val model = SpnModels.lv[IntState]() 13 | val step = Step.gillespie(model) 14 | val ts = Sim.ts(DenseVector(50, 100), 0.0, 20.0, 0.05, step) 15 | Sim.plotTs(ts, "Gillespie simulation of LV model") 16 | } 17 | 18 | } 19 | -------------------------------------------------------------------------------- /scala-smfsb/src/test/scala/scala-smfsb-test.scala: -------------------------------------------------------------------------------- 1 | import org.scalatest.FlatSpec 2 | 3 | class SetSpec extends FlatSpec { 4 | 5 | import scalaglm.Utils.backSolve 6 | import breeze.linalg._ 7 | 8 | "backSolve" should "invert correctly" in { 9 | val A = DenseMatrix((4,1),(0,2)) map (_.toDouble) 10 | val x = DenseVector(3.0,-2.0) 11 | val y = A * x 12 | val xx = backSolve(A,y) 13 | assert (norm(x-xx) < 0.00001) 14 | } 15 | 16 | } 17 | 18 | -------------------------------------------------------------------------------- /smfsb/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile 2 | # On a system with make, latex, pdflatex, R and knitr, just typing "make" should build the PDF 3 | 4 | FILE=smfsb 5 | 6 | FORCE: 7 | make $(FILE).pdf 8 | 9 | $(FILE).md: $(FILE).Rmd 10 | Rscript -e "library(knitr); knit('$(FILE).Rmd')" 11 | 12 | $(FILE).html: $(FILE).md 13 | pandoc $(FILE).md -o $(FILE).html 14 | 15 | $(FILE).wp: $(FILE).md 16 | ./md2wp $(FILE).md > $(FILE).wp 17 | 18 | $(FILE).pdf: $(FILE).md 19 | pandoc $(FILE).md -o $(FILE).pdf 20 | 21 | view: $(FILE).pdf 22 | xdg-open $(FILE).pdf & 23 | 24 | edit: 25 | emacs Makefile *.Rmd & 26 | 27 | update: 28 | git pull 29 | git log | less 30 | 31 | commit: 32 | git commit -a 33 | git push 34 | make update 35 | 36 | clean: 37 | rm -f *~ core $(FILE).md $(FILE).tex $(FILE).pdf *.ps *.eps *.dvi *.log *.aux *.out *.flc *.idx 38 | 39 | # eof 40 | -------------------------------------------------------------------------------- /smfsb/md2wp: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # md2wp 3 | # convert github flavoured markdown to wordpress html 4 | 5 | cat $1 | \ 6 | sed 's/```scala/```/g' | \ 7 | sed 's/```r/```/g' | \ 8 | sed 's/```bash/```/g' | \ 9 | pandoc -f markdown_github -t html5 | \ 10 | sed 's/
/[sourcecode language="r" light="true"]\n/g' | \
11 |   sed 's|
|\n[/sourcecode]|g' | \ 12 | sed 's/"/"/g' | \ 13 | sed 's/>/>/g' | \ 14 | sed 's/</ $@ 17 | 18 | %.scala: %.md 19 | sed -n '/^```scala/,/^```/ p' < $< | sed 's/^```.*//g' > $@ 20 | 21 | view: DraftPost.pdf 22 | xdg-open DraftPost.pdf 23 | 24 | view-html: DraftPost.html 25 | xdg-open DraftPost.html 26 | 27 | clean: 28 | rm -rf *~ $(TARGETS) 29 | 30 | 31 | edit: 32 | emacs Makefile *.md & 33 | 34 | 35 | 36 | 37 | # eof 38 | 39 | 40 | -------------------------------------------------------------------------------- /spark-intro/README.md: -------------------------------------------------------------------------------- 1 | # A quick introduction to Apache Spark for statisticians 2 | 3 | This directory contains code for a draft blog post - the post is now published at: https://darrenjw.wordpress.com/2017/02/08/a-quick-introduction-to-apache-spark-for-statisticians/ 4 | 5 | 6 | 7 | 8 | 9 | ### (C) 2017 Darren J Wilkinson 10 | 11 | 12 | --------------------------------------------------------------------------------