├── Scala ├── project │ ├── build.properties │ └── plugins.sbt ├── Makefile ├── Readme.md ├── md │ ├── Makefile │ ├── ScalaHO.md │ ├── ScalaCC.md │ ├── Parallel.md │ └── Example.md ├── src │ ├── test │ │ └── scala │ │ │ ├── ML-GA-unit.scala │ │ │ ├── ML-GA-pbt.scala │ │ │ └── ML-GA-laws.scala │ └── main │ │ └── scala │ │ └── ML-GA.scala ├── cli │ ├── Readme.md │ ├── logFact.scala │ ├── parallel.scala │ ├── futures.scala │ └── ML-GA.scala ├── build.sbt └── docs │ ├── ScalaCC.md │ ├── Example.md │ ├── ScalaHO.md │ └── Parallel.md ├── Haskell ├── Setup.hs ├── test │ └── Spec.hs ├── src │ └── Lib.hs ├── Makefile ├── CHANGELOG.md ├── Example.md ├── LICENSE ├── package.yaml ├── ml-ga.cabal ├── README.md ├── app │ └── Main.hs └── stack.yaml ├── Dex ├── Readme.md ├── Makefile ├── Example.md ├── ml-ga.dx ├── DexRandom.dx ├── djwutils.dx └── DexCC.dx ├── Makefile ├── JAX ├── Makefile ├── Example.qmd ├── Example.md ├── ml-ga.py ├── Random.qmd ├── Readme.qmd ├── Random.md └── Readme.md ├── Intro ├── Makefile ├── RandomHO.md ├── Example.md ├── Resources.md ├── Random.md └── Readme.md ├── Outline.md ├── Setup.md ├── README.md ├── pima.data └── docs ├── DexRandom.html ├── djwutils.html └── DexCC.html /Scala/project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version=1.8.1 2 | -------------------------------------------------------------------------------- /Haskell/Setup.hs: -------------------------------------------------------------------------------- 1 | import Distribution.Simple 2 | main = defaultMain 3 | -------------------------------------------------------------------------------- /Scala/project/plugins.sbt: -------------------------------------------------------------------------------- 1 | addSbtPlugin("org.scalameta" % "sbt-mdoc" % "2.3.3") 2 | -------------------------------------------------------------------------------- /Haskell/test/Spec.hs: -------------------------------------------------------------------------------- 1 | main :: IO () 2 | main = putStrLn "Test suite not yet implemented" 3 | -------------------------------------------------------------------------------- /Haskell/src/Lib.hs: -------------------------------------------------------------------------------- 1 | module Lib 2 | ( someFunc 3 | ) where 4 | 5 | someFunc :: IO () 6 | someFunc = putStrLn "someFunc" 7 | -------------------------------------------------------------------------------- /Dex/Readme.md: -------------------------------------------------------------------------------- 1 | # Dex 2 | 3 | The Dex examples are in this directory. See the [Makefile](Makefile) for 4 | clues on how to run stuff. 5 | 6 | 7 | 8 | 9 | 10 | -------------------------------------------------------------------------------- /Scala/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile 2 | 3 | FORCE: 4 | make doc 5 | 6 | doc: 7 | sbt mdoc 8 | cd md ; make 9 | 10 | edit: 11 | emacs Makefile build.sbt *.md docs/*.md src/main/scala/*.scala & 12 | 13 | 14 | 15 | # eof 16 | 17 | 18 | -------------------------------------------------------------------------------- /Scala/Readme.md: -------------------------------------------------------------------------------- 1 | # Scala 2 | 3 | This is the Scala project directory. Type `sbt run` from this directory to build and run the example application, or `sbt console` to get a REPL with relevant dependencies for interactive exploration. 4 | 5 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Makefile 2 | 3 | FORCE: 4 | cd Intro; make 5 | cd Scala; make 6 | cd Haskell; make 7 | #cd JAX; make 8 | cd Dex; make 9 | 10 | edit: 11 | emacs Makefile *.md Intro/*.md & 12 | 13 | 14 | 15 | # eof 16 | 17 | 18 | -------------------------------------------------------------------------------- /Haskell/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile 2 | 3 | 4 | FORCE: 5 | stack clean 6 | make build 7 | make run 8 | 9 | 10 | build: 11 | stack build 12 | 13 | run: 14 | stack exec ml-ga-exe 15 | 16 | edit: 17 | emacs Makefile *.yaml *.md lib/*.hs app/*.hs & 18 | 19 | 20 | # eof 21 | 22 | 23 | -------------------------------------------------------------------------------- /JAX/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile 2 | 3 | TARGETS=Readme.md Random.md Example.md 4 | 5 | FORCE: 6 | make $(TARGETS) 7 | 8 | %.md: %.qmd Makefile 9 | quarto render $< --to gfm 10 | 11 | edit: 12 | emacs Makefile *.qmd *.py & 13 | 14 | 15 | clean: 16 | rm -f *~ $(TARGETS) 17 | 18 | 19 | # eof 20 | 21 | 22 | -------------------------------------------------------------------------------- /Scala/md/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile 2 | 3 | 4 | TARGETS=ScalaCC.pdf 5 | 6 | FORCE: 7 | make $(TARGETS) 8 | 9 | 10 | %.pdf: %.md 11 | pandoc $< -t beamer -o $@ 12 | 13 | 14 | view: $(TARGETS) 15 | xdg-open $(TARGETS) & 16 | 17 | edit: 18 | emacs Makefile *.md & 19 | 20 | clean: 21 | rm -f *~ *.pdf 22 | 23 | 24 | # eof 25 | 26 | -------------------------------------------------------------------------------- /Scala/src/test/scala/ML-GA-unit.scala: -------------------------------------------------------------------------------- 1 | import cats.* 2 | import cats.implicits.* 3 | 4 | import munit.* 5 | 6 | // Example unit tests 7 | class MyUnitTests extends FunSuite: 8 | 9 | test("A List should combine") { 10 | val l = List(1,2) |+| List(3,4) 11 | assert(l === List(1,2,3,4)) 12 | } 13 | 14 | 15 | // eof 16 | 17 | 18 | -------------------------------------------------------------------------------- /Haskell/CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog for `ml-ga` 2 | 3 | All notable changes to this project will be documented in this file. 4 | 5 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), 6 | and this project adheres to the 7 | [Haskell Package Versioning Policy](https://pvp.haskell.org/). 8 | 9 | ## Unreleased 10 | 11 | ## 0.1.0.0 - YYYY-MM-DD 12 | -------------------------------------------------------------------------------- /Intro/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile 2 | 3 | 4 | TARGETS=Readme.pdf Example.pdf 5 | 6 | FORCE: 7 | make $(TARGETS) 8 | 9 | 10 | %.pdf: %.md 11 | pandoc $< -t beamer -o $@ 12 | 13 | %.html: %.md 14 | pandoc $< -o $@ 15 | 16 | 17 | view: $(TARGETS) 18 | xdg-open $(TARGETS) & 19 | 20 | edit: 21 | emacs Makefile *.md & 22 | 23 | clean: 24 | rm -f *~ *.pdf 25 | 26 | 27 | # eof 28 | 29 | -------------------------------------------------------------------------------- /Intro/RandomHO.md: -------------------------------------------------------------------------------- 1 | # Splittable random numbers hands-on 2 | 3 | Choose *either* JAX or Dex (according to your interests) for a quick hands-on with splittable random numbers. Both JAX and Dex use splittable generators by default. 4 | 5 | * [Splittable random numbers in JAX](../JAX/Random.md) 6 | * [Splittable random numbers in Dex](https://darrenjw.github.io/fp-ssc-course/DexRandom.html) 7 | 8 | 9 | -------------------------------------------------------------------------------- /Scala/cli/Readme.md: -------------------------------------------------------------------------------- 1 | # CLI 2 | 3 | ## scala-cli examples 4 | 5 | This directory contains stand-alone `scala-cli` scripts. It is not part of the `sbt` project. 6 | 7 | Individual `scala-cli` scripts can typically be run by typing `scala-cli scriptname.scala` at your OS command prompt. Note that required dependencies are specified in the script headers. Command-line arguments can be provided following a `--` separator. 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /Scala/cli/logFact.scala: -------------------------------------------------------------------------------- 1 | //> using scala 3.3.0 2 | 3 | /* 4 | 5 | logFact.scala 6 | 7 | Simple stand-alone application script with no external dependencies 8 | 9 | Run with: 10 | scala-cli logFact.scala -- 11 | where is the number you want the log-factorial of. 12 | 13 | */ 14 | 15 | object LogFactApp: 16 | 17 | @annotation.tailrec 18 | def logFact(n: Int, acc: Double = 0.0): Double = 19 | if (n <= 1) acc else 20 | logFact(n - 1, math.log(n) + acc) 21 | 22 | @main 23 | def run(n: Int) = 24 | println(logFact(n)) 25 | 26 | // eof 27 | 28 | 29 | -------------------------------------------------------------------------------- /Scala/src/test/scala/ML-GA-pbt.scala: -------------------------------------------------------------------------------- 1 | import munit.ScalaCheckSuite 2 | import org.scalacheck.Prop.* 3 | 4 | import cats.* 5 | import cats.implicits.* 6 | 7 | // Example property-based tests 8 | 9 | class MyPropertyTests extends ScalaCheckSuite: 10 | 11 | property("An Int should combine commutatively") { 12 | forAll { (a: Int, b: Int) => 13 | assertEquals(a |+| b, b |+| a) 14 | } 15 | } 16 | 17 | property("An Int should invert") { 18 | forAll { (a: Int) => 19 | assertEquals(a |+| a.inverse(), Monoid[Int].empty) 20 | } 21 | } 22 | 23 | 24 | // eof 25 | 26 | 27 | -------------------------------------------------------------------------------- /Dex/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile 2 | 3 | LIBPATH=BUILTIN_LIBRARIES:. 4 | 5 | HTML=djwutils.html DexCC.html ml-ga.html DexRandom.html 6 | 7 | FORCE: 8 | make $(HTML) 9 | cp *.html ../docs/ 10 | make ml-ga 11 | 12 | %: %.dx djwutils.dx 13 | time dex --lib-path $(LIBPATH) -O script $< 14 | 15 | %.html: %.dx djwutils.dx 16 | dex --lib-path $(LIBPATH) -O script $< --outfmt html > $@ 17 | 18 | edit: 19 | emacs Makefile *.md *.dx & 20 | 21 | clean: 22 | rm -f *.out *.tsv *~ *.html *.pdf 23 | 24 | repl: 25 | dex --lib-path $(LIBPATH) repl 26 | 27 | .PRECIOUS: %.html 28 | %: %.html 29 | make $< 30 | xdg-open $< 31 | 32 | 33 | 34 | # eof 35 | 36 | -------------------------------------------------------------------------------- /Scala/src/test/scala/ML-GA-laws.scala: -------------------------------------------------------------------------------- 1 | import cats.* 2 | import cats.implicits.* 3 | 4 | import munit.DisciplineSuite 5 | 6 | // Example Cats laws tests 7 | 8 | class MyLawTests extends DisciplineSuite: 9 | 10 | import cats.kernel.laws.discipline.SemigroupTests 11 | 12 | checkAll("List[Int].SemigroupLaws", SemigroupTests[List[Int]].semigroup) 13 | 14 | import cats.laws.discipline.FunctorTests 15 | 16 | checkAll("List.FunctorLaws", FunctorTests[List].functor[Int, Int, String]) 17 | 18 | import cats.laws.discipline.MonadTests 19 | // checking monad laws is quite slow 20 | //checkAll("List.MonadLaws", MonadTests[List].monad[Int, Int, String]) 21 | 22 | 23 | 24 | // eof 25 | 26 | 27 | -------------------------------------------------------------------------------- /Scala/cli/parallel.scala: -------------------------------------------------------------------------------- 1 | //> using scala 3.3.0 2 | //> using dep org.scala-lang.modules::scala-parallel-collections:1.0.4 3 | 4 | /* 5 | parallel.scala 6 | 7 | Simple app illustrating the use of parallel collections 8 | 9 | */ 10 | 11 | import scala.collection.parallel.CollectionConverters._ 12 | 13 | object ParallelApp: 14 | 15 | val rng = scala.util.Random(42) 16 | 17 | def ll0(mu: Double)(x: Double): Double = -(x - mu)*(x - mu)/2.0 18 | 19 | def ll(mu: Double)(x: Double): Double = 20 | Thread.sleep(500) 21 | ll0(mu)(x) 22 | 23 | @main 24 | def run() = 25 | val v = Vector.fill(10)(rng.nextGaussian) 26 | println("Computing ll sequentially") 27 | println((v map ll(0.0)) reduce (_+_)) 28 | val vp = v.par // convert v to a ParVector 29 | println("Computing ll in parallel") 30 | println((vp map ll(0.0)) reduce (_+_)) 31 | println("Done") 32 | -------------------------------------------------------------------------------- /Scala/cli/futures.scala: -------------------------------------------------------------------------------- 1 | //> using scala 3.3.0 2 | //> using dep org.typelevel::cats-core:2.10.0 3 | 4 | /* 5 | futures.scala 6 | 7 | Simple app illustrating the use of Futures 8 | 9 | */ 10 | 11 | import cats.* 12 | import cats.syntax.all.* 13 | import scala.concurrent.* 14 | import scala.util.Success 15 | import ExecutionContext.Implicits.global 16 | import scala.concurrent.duration.* 17 | 18 | object FuturesApp: 19 | 20 | val rng = scala.util.Random(42) 21 | 22 | def ll0(mu: Double)(x: Double): Double = -(x - mu)*(x - mu)/2.0 23 | 24 | def ll(mu: Double)(x: Double): Double = 25 | Thread.sleep(500) 26 | ll0(mu)(x) 27 | 28 | @main 29 | def run() = 30 | val v = Vector.fill(10)(rng.nextGaussian) 31 | println("Evalulating ll using Futures") 32 | val vf = v map (x => Future(ll(0.0)(x))) 33 | val lf = vf.sequence map (_ reduce (_+_)) 34 | val l = Await.result(lf, 2.seconds) 35 | println(l) 36 | 37 | -------------------------------------------------------------------------------- /Outline.md: -------------------------------------------------------------------------------- 1 | # FP for scalable statistical computing 2 | 3 | ## Course outline 4 | 5 | * Introduction: problems with existing languages and benefits of FP (20 mins) 6 | * Quick introduction to FP with Scala - Scala crash course (10) 7 | * Hands on with Scala - Scastie and sbt (25) 8 | * Running example - gradient-based opt of a logreg likelihood (10) 9 | * Example in Scala then Run example in Scala (15) 10 | * Scala parallel programming (10) 11 | * (EVENING BREAK?) 12 | * Quick intro to Haskell (10) 13 | * Example in Haskell then Hands-on with Haskell (15) 14 | * JAX for Python (10) 15 | * Example in JAX then JAX hands-on (15) 16 | * (BREAK?) 17 | * Introduction to Dex (10) 18 | * Example in Dex then Dex hands-on (15) 19 | * Functional and parallel random numbers (10) 20 | * Hands-on with splittable random numbers (choice of language) (15) 21 | * Wrap-up and next steps (logreg repo and learning resources) (5) 22 | * (195 minutes, excluding Breaks) 23 | 24 | -------------------------------------------------------------------------------- /JAX/Example.qmd: -------------------------------------------------------------------------------- 1 | # JAX implementation of the running example 2 | 3 | ## ML for a logistic regression model using gradient ascent 4 | 5 | ### The JAX application 6 | 7 | A JAX implementation of our running example can be found in the file [ml-ga.py](ml-ga.py). Study this, and compare and contrast with the Scala and Haskell implementations. 8 | 9 | If you have installed JAX the script should just run, with something like `./ml-ga.py` (or perhaps `python3 ml-ga.py`, or just `python ml-ga.py`). Make sure that you can build and run the application before proceeding. 10 | 11 | ### Hands-on exercise 12 | 13 | Do either of both of these exercises (or go back to previous exercises) as your interests dictate and time permits. 14 | 15 | * Try manually tweaking the initial guess, the learning rate, the convergence tolerance and the maximum number of iterations to see how robust (or otherwise) this naive gradient ascent algorithm is to these tuning parameters. 16 | * Improve on the naive descent algorithm somewhow, perhaps by implementing line search for choosing the step size. 17 | 18 | 19 | -------------------------------------------------------------------------------- /Scala/build.sbt: -------------------------------------------------------------------------------- 1 | // build.sbt 2 | 3 | name := "ML-GA" 4 | 5 | version := "0.1-SNAPSHOT" 6 | 7 | scalacOptions ++= Seq( 8 | "-unchecked", "-deprecation", "-feature", "-language:higherKinds", 9 | "-language:implicitConversions", "-Ykind-projector:underscores" 10 | ) 11 | 12 | enablePlugins(MdocPlugin) 13 | 14 | mdocOut := file("md/") 15 | 16 | libraryDependencies ++= Seq( 17 | "org.scalameta" %% "munit" % "0.7.29" % Test, 18 | "org.scalameta" %% "munit-scalacheck" % "0.7.29" % Test, 19 | "org.typelevel" %% "discipline-munit" % "1.0.9" % Test, 20 | "org.scalanlp" %% "breeze" % "2.1.0", 21 | "org.scala-lang.modules" %% "scala-parallel-collections" % "1.0.4", 22 | ("com.github.haifengl" %% "smile-scala" % "3.0.0").cross(CrossVersion.for3Use2_13), 23 | "org.typelevel" %% "cats-core" % "2.8.0", 24 | "org.typelevel" %% "cats-free" % "2.8.0", 25 | "org.typelevel" %% "cats-laws" % "2.8.0", 26 | "org.typelevel" %% "cats-effect" % "3.2.2", 27 | "org.typelevel" %% "discipline-core" % "1.1.5" 28 | ) 29 | 30 | scalaVersion := "3.3.0" 31 | 32 | 33 | // eof 34 | 35 | -------------------------------------------------------------------------------- /JAX/Example.md: -------------------------------------------------------------------------------- 1 | 2 | # JAX implementation of the running example 3 | 4 | ## ML for a logistic regression model using gradient ascent 5 | 6 | ### The JAX application 7 | 8 | A JAX implementation of our running example can be found in the file 9 | [ml-ga.py](ml-ga.py). Study this, and compare and contrast with the 10 | Scala and Haskell implementations. 11 | 12 | If you have installed JAX the script should just run, with something 13 | like `./ml-ga.py` (or perhaps `python3 ml-ga.py`, or just 14 | `python ml-ga.py`). Make sure that you can build and run the application 15 | before proceeding. 16 | 17 | ### Hands-on exercise 18 | 19 | Do either of both of these exercises (or go back to previous exercises) 20 | as your interests dictate and time permits. 21 | 22 | - Try manually tweaking the initial guess, the learning rate, the 23 | convergence tolerance and the maximum number of iterations to see how 24 | robust (or otherwise) this naive gradient ascent algorithm is to these 25 | tuning parameters. 26 | - Improve on the naive descent algorithm somewhow, perhaps by 27 | implementing line search for choosing the step size. 28 | -------------------------------------------------------------------------------- /Haskell/Example.md: -------------------------------------------------------------------------------- 1 | # Haskell implementation of the running example 2 | 3 | ## ML for a logistic regression model using gradient ascent 4 | 5 | ### The Haskell application 6 | 7 | A Haskell implementation of our running example can be found in the file [Main.hs](app/Main.hs). Study this, and compare and contrast with the Scala implementation. 8 | 9 | The application is a [stack](https://docs.haskellstack.org/en/stable/) project. So, from the [Haskell directory](./) you can build the application with `stack build` and run it with `stack exec ml-ga-exe`. 10 | 11 | Make sure that you can build and run the application before proceeding. 12 | 13 | ### Hands-on exercise 14 | 15 | Do either of both of these exercises (or go back to previous exercises) as your interests dictate and time permits. 16 | 17 | * Try manually tweaking the initial guess, the learning rate, the convergence tolerance and the maximum number of iterations to see how robust (or otherwise) this naive gradient ascent algorithm is to these tuning parameters. 18 | * Improve on the naive descent algorithm somewhow, perhaps by implementing line search for choosing the step size. 19 | 20 | 21 | -------------------------------------------------------------------------------- /Dex/Example.md: -------------------------------------------------------------------------------- 1 | # Dex implementation of the running example 2 | 3 | ## ML for a logistic regression model using gradient ascent 4 | 5 | ### The Dex application 6 | 7 | A Dex implementation of our running example can be found in the file [ml-ga.dx](ml-ga.dx) ([HTML rendering](https://darrenjw.github.io/fp-ssc-course/ml-ga.html)). Study this, and compare and contrast with the Scala, Haskell and JAX implementations. 8 | 9 | If you have installed Dex the script should run with something like 10 | ```bash 11 | dex --lib-path BUILTIN_LIBRARIES:. script ml-ga.dx 12 | ``` 13 | Setting the library path will allow the inclusion of some utility functions, [djwutils.dx](https://darrenjw.github.io/fp-ssc-course/djwutils.html). Note that there is a [Makefile](Makefile) in the directory that you might want to take a quick look at. Make sure that you can build and run the application before proceeding. 14 | 15 | ### Hands-on exercise 16 | 17 | Do either or both of these exercises (or go back to previous exercises) as your interests dictate and time permits. 18 | 19 | * Try manually tweaking the initial guess, the learning rate, the convergence tolerance and the maximum number of iterations to see how robust (or otherwise) this naive gradient ascent algorithm is to these tuning parameters. 20 | * Improve on the naive descent algorithm somewhow, perhaps by implementing line search for choosing the step size. 21 | 22 | 23 | -------------------------------------------------------------------------------- /Haskell/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright Author name here (c) 2023 2 | 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | * Redistributions of source code must retain the above copyright 9 | notice, this list of conditions and the following disclaimer. 10 | 11 | * Redistributions in binary form must reproduce the above 12 | copyright notice, this list of conditions and the following 13 | disclaimer in the documentation and/or other materials provided 14 | with the distribution. 15 | 16 | * Neither the name of Author name here nor the names of other 17 | contributors may be used to endorse or promote products derived 18 | from this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | -------------------------------------------------------------------------------- /Dex/ml-ga.dx: -------------------------------------------------------------------------------- 1 | '# ML via gradient ascent using Dex 2 | 3 | -- load some generic utility functions (need a TSV parser) 4 | import djwutils 5 | 6 | '## Read and process the data 7 | 8 | dat = unsafe_io \. read_file "../pima.data" 9 | AsList(_, tab) = parse_tsv ' ' dat 10 | atab = map (\l. cons "1.0" l) tab 11 | att = map (\r. list2tab r :: (Fin 9)=>String) atab 12 | xStr = map (\r. slice r 0 (Fin 8)) att 13 | xmb = map (\r. map parseString r) xStr :: _=>(Fin 8)=>(Maybe Float) 14 | x = map (\r. map from_just r) xmb :: _=>(Fin 8)=>Float 15 | yStrM = map (\r. slice r 8 (Fin 1)) att 16 | yStr = (transpose yStrM)[0@_] 17 | y = map (\s. select (s == "Yes") 1.0 0.0) yStr 18 | 19 | x 20 | y 21 | 22 | '## Gradient ascent 23 | 24 | def ll(b: (Fin 8)=>Float) -> Float = 25 | neg $ sum (log (map (\ x. (exp x) + 1) ((map (\ yi. 1 - 2*yi) y)*(x **. b)))) 26 | 27 | gll = \x. grad ll x -- use auto-diff for the gradient 28 | 29 | def one_step(learning_rate: Float) -> (Fin 8=>Float) -> (Fin 8)=>Float = 30 | \b0. b0 + learning_rate .* gll b0 31 | 32 | def ascend(step: (Fin 8=>Float) -> (Fin 8)=>Float, init: (Fin 8)=>Float, max_its: Float) -> 33 | (Fin 8)=>Float = 34 | (b_opt, its_left) = yield_state (init, max_its) \state. 35 | while \. 36 | (b0, its) = get state 37 | b1 = step b0 38 | diff = b1-b0 39 | sz = sqrt $ sum $ diff*diff 40 | if ((its > 0) && (sz > 1.0e-8)) 41 | then 42 | state := (b1, its - 1) 43 | True 44 | else False 45 | b_opt 46 | 47 | init = [-9.8, 0.1, 0, 0, 0, 0, 1.8, 0] 48 | ll init 49 | 50 | opt = ascend (one_step 1.0e-6) init 10000 51 | opt 52 | ll opt 53 | 54 | -- eof 55 | -------------------------------------------------------------------------------- /Haskell/package.yaml: -------------------------------------------------------------------------------- 1 | name: ml-ga 2 | version: 0.1.0.0 3 | github: "githubuser/ml-ga" 4 | license: BSD3 5 | author: "Author name here" 6 | maintainer: "example@example.com" 7 | copyright: "2023 Author name here" 8 | 9 | extra-source-files: 10 | - README.md 11 | - CHANGELOG.md 12 | 13 | # Metadata used when publishing your package 14 | # synopsis: Short description of your package 15 | # category: Web 16 | 17 | # To avoid duplicated efforts in documentation and dealing with the 18 | # complications of embedding Haddock markup inside cabal files, it is 19 | # common to point users to the README.md file. 20 | description: Please see the README on GitHub at 21 | 22 | dependencies: 23 | - base >= 4.7 && < 5 24 | - Frames >= 0.7 && < 1 25 | - pipes >= 4.3 && < 5 26 | - microlens >= 0.4 && < 1 27 | - hmatrix >= 0.20 && < 1 28 | 29 | 30 | ghc-options: 31 | - -Wall 32 | - -Wcompat 33 | - -Widentities 34 | - -Wincomplete-record-updates 35 | - -Wincomplete-uni-patterns 36 | - -Wmissing-export-lists 37 | - -Wmissing-home-modules 38 | - -Wpartial-fields 39 | - -Wredundant-constraints 40 | 41 | library: 42 | source-dirs: src 43 | 44 | executables: 45 | ml-ga-exe: 46 | main: Main.hs 47 | source-dirs: app 48 | ghc-options: 49 | - -threaded 50 | - -rtsopts 51 | - -with-rtsopts=-N 52 | dependencies: 53 | - ml-ga 54 | 55 | tests: 56 | ml-ga-test: 57 | main: Spec.hs 58 | source-dirs: test 59 | ghc-options: 60 | - -threaded 61 | - -rtsopts 62 | - -with-rtsopts=-N 63 | dependencies: 64 | - ml-ga 65 | -------------------------------------------------------------------------------- /JAX/ml-ga.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # ml-ga.py 3 | # Maximum likelihood via gradient ascent 4 | 5 | import os 6 | import pandas as pd 7 | import numpy as np 8 | import scipy as sp 9 | 10 | import jax 11 | from jax import grad, jit 12 | import jax.numpy as jnp 13 | import jax.scipy as jsp 14 | 15 | print("Maximum likelihood by gradient ascent using JAX") 16 | 17 | print("First read and process the data (using regular Python)") 18 | df = pd.read_csv(os.path.join("..", "pima.data"), sep=" ", header=None) 19 | print(df) 20 | n, p = df.shape 21 | print(n, p) 22 | 23 | y = pd.get_dummies(df[7])["Yes"].to_numpy(dtype='float32') 24 | X = df.drop(columns=7).to_numpy() 25 | X = np.hstack((np.ones((n,1)), X)) 26 | print(X) 27 | print(y) 28 | 29 | print("Now gradient ascent using JAX") 30 | X = X.astype(jnp.float32) 31 | y = y.astype(jnp.float32) 32 | 33 | @jit 34 | def ll(beta): 35 | return jnp.sum(-jnp.log(1 + jnp.exp(-(2*y - 1)*jnp.dot(X, beta)))) 36 | 37 | gll = jit(grad(ll)) # use auto-diff for the gradient 38 | 39 | @jit 40 | def one_step(b0, learning_rate=1e-6): 41 | return b0 + learning_rate*gll(b0) 42 | 43 | def ascend(step, init, max_its=10000, tol=1e-5, verb=True): 44 | def term(state): 45 | x1, x0, its = state 46 | return ((its > 0) & jnp.logical_not(jnp.allclose(x1, x0, tol))) 47 | def step_state(state): 48 | x1, x0, its = state 49 | x2 = step(x1) 50 | return [x2, x1, its - 1] 51 | b_opt, _, its_remaining = jax.lax.while_loop( 52 | term, step_state, [init, -init, max_its]) 53 | if (verb): 54 | print(str(its_remaining) + " iterations remaining") 55 | return(b_opt) 56 | 57 | init = jnp.array([-9.8, 0.1, 0, 0, 0, 0, 1.8, 0]).astype(jnp.float32) 58 | print(init) 59 | print(ll(init)) 60 | opt = ascend(one_step, init) 61 | print(opt) 62 | print(ll(opt)) 63 | print("Goodbye.") 64 | 65 | 66 | # eof 67 | 68 | -------------------------------------------------------------------------------- /Intro/Example.md: -------------------------------------------------------------------------------- 1 | # Running example 2 | 3 | ## Intro 4 | 5 | It will be useful to have a running example for the course. 6 | 7 | ## The model and likelihood 8 | 9 | Here we will conduct inference for a [logistic regression](https://en.wikipedia.org/wiki/Logistic_regression) model for a binary outcome based on some covariates. Observation $i$ will be 1 with probability $p_i$, and the [logit](https://en.wikipedia.org/wiki/Logit) of $p_i$ will depend linearly on predictors. This leads to a log-likelihood function 10 | 11 | $$\ell(b; y) = -\mathbf{1}'[\log(\mathbf{1} + \exp[-(2y - \mathbf{1})\circ(Xb)])]$$ 12 | 13 | where $y$ is a binary vector of responses, $X$ is an $n\times p$ matrix of covariates, $b$ is the $p$-vector of parameters of inferential interest, and $\circ$ denotes the [Hadamard product](https://en.wikipedia.org/wiki/Hadamard_product_(matrices)). 14 | 15 | Note that I discuss the derivation of this likelihood in detail in a [series of blog posts](https://darrenjw.wordpress.com/2022/08/07/bayesian-inference-for-a-logistic-regression-model-part-1/). 16 | 17 | ## Gradient of the likelihood 18 | 19 | Some languages and frameworks can auto-diff likelihoods like this, but we can also differentiate by hand: 20 | 21 | $$\nabla \ell(b) = X'(y-p), \quad \text{where}\quad p = (\mathbf{1} + \exp[-Xb])^{-1}.$$ 22 | 23 | For our running example we will use a very simple gradient ascent algorithm in order to try and maximise the likelihood, $\ell$, wrt to the covariate parameter weights, $b$. 24 | 25 | ## The data 26 | 27 | We will be analysing the ["Pima" training dataset](../pima.data), with 200 observations and 7 predictors. Including an intercept as the first covariate gives a parameter vector of length $p=8$. 28 | 29 | For a small dataset like this, there is no problem using the gradient of the full likelihood in a simple [**steepest ascent**](https://en.wikipedia.org/wiki/Gradient_descent) algorithm, so that's what we'll start with. But if you are interested in optimisation, you can then go on to experiement with adapting the learning rate, accelerated learning algorithms, using [**stochastic gradient ascent**](https://en.wikipedia.org/wiki/Stochastic_gradient_descent), etc., according to your interests. 30 | -------------------------------------------------------------------------------- /Scala/src/main/scala/ML-GA.scala: -------------------------------------------------------------------------------- 1 | /* 2 | ML-GA.scala 3 | 4 | Simple gradient ascent algorithm for maximum likelihood estimation 5 | of a logistic regression model, applied to the Pima data 6 | 7 | */ 8 | 9 | import breeze.linalg.* 10 | import breeze.numerics.* 11 | import smile.data.pimpDataFrame 12 | import annotation.tailrec 13 | 14 | type DVD = DenseVector[Double] 15 | 16 | object GradientAscent: 17 | 18 | 19 | @main def run() = 20 | println("First read and process the data") 21 | val df = smile.read.csv("../pima.data", delimiter=" ", header=false) 22 | print(df) 23 | val y = DenseVector(df.select("V8"). 24 | map(_(0).asInstanceOf[String]). 25 | map(s => if (s == "Yes") 1.0 else 0.0).toArray) 26 | println(y) 27 | val x = DenseMatrix(df.drop("V8").toMatrix.toArray:_*) 28 | println(x) 29 | val ones = DenseVector.ones[Double](x.rows) 30 | val X = DenseMatrix.horzcat(ones.toDenseMatrix.t, x) 31 | println(X) 32 | val p = X.cols 33 | println(p) 34 | 35 | println("Now define log likelihood and gradient") 36 | def ll(beta: DVD): Double = 37 | sum(-log(ones + exp(-1.0*(2.0*y - ones)*:*(X * beta)))) 38 | def gll(beta: DVD): DVD = 39 | (X.t)*(y - ones/:/(ones + exp(-X*beta))) 40 | 41 | println("Now define functions for gradient ascent") 42 | def oneStep(learningRate: Double)(b0: DVD): DVD = 43 | b0 + learningRate*gll(b0) 44 | def ascend(step: DVD => DVD, init: DVD, maxIts: Int = 10000, 45 | tol: Double = 1e-8, verb: Boolean = true): DVD = 46 | @tailrec def go(b0: DVD, ll0: Double, itsLeft: Int): DVD = 47 | if (verb) 48 | println(s"$itsLeft : $ll0") 49 | val b1 = step(b0) 50 | val ll1 = ll(b1) 51 | if ((math.abs(ll0 - ll1) < tol)|(itsLeft < 1)) 52 | b1 53 | else 54 | go(b1, ll1, itsLeft - 1) 55 | go(init, ll(init), maxIts) 56 | 57 | println("Now run a simple gradient ascent algorithm") 58 | // Better choose a reasonable init as gradient ascent is terrible... 59 | val init = DenseVector(-9.8, 0.1, 0, 0, 0, 0, 1.8, 0) 60 | val opt = ascend(oneStep(1e-6), init) 61 | println("Inits: " + init) 62 | println("Init ll: " + ll(init)) 63 | println("Opt: " + opt) 64 | println("Opt ll: " + ll(opt)) 65 | println("Goodbye.") 66 | 67 | -------------------------------------------------------------------------------- /Dex/DexRandom.dx: -------------------------------------------------------------------------------- 1 | '# Quick hands-on with splittable random numbers in Dex 2 | 3 | '## Basics 4 | Call up a Dex REPL (`dex repl`) and start messing around with splittable random numbers in Dex. 5 | 6 | k0 = new_key 42 7 | [k1, k2] = split_key k0 8 | 9 | k0 10 | k1 11 | k2 12 | 13 | rand k0 14 | rand k0 15 | rand k0 16 | rand k1 17 | rand k2 18 | 19 | 'Note that we don't just have to split the key into two. 20 | 21 | [k3, k4, k5] = split_key k2 22 | 23 | k3 24 | k4 25 | k5 26 | 27 | split_key k5 :: Fin 10=>Key 28 | 29 | keys = for i:(Fin 8). ixkey k5 i 30 | 31 | keys 32 | 33 | 'It can sometimes be convenient to split a key into a *table* of keys and then *map* or *fold* a random function over the table. 34 | 35 | sum $ map rand keys 36 | 37 | 'If we really just want to "advance" the key, we can do that too. 38 | 39 | split_key k5 :: Fin 1=>Key 40 | 41 | [k6] = split_key k5 42 | 43 | k6 44 | 45 | '## Probability distributions 46 | `rand` is used to generate a $U(0,1)$, and `randn` generates from a standard normal, $N(0,1)$. 47 | 48 | map randn keys 49 | 50 | 'For more interesting probability distributions, you will want to use the [stats library](https://google-research.github.io/dex-lang/lib/stats.html). 51 | 52 | import stats 53 | 54 | draw(Exponential(2.0), k5) :: Float 55 | 56 | pd = Poisson(10.0) 57 | 58 | draw(pd, k6) :: Nat 59 | 60 | map (\k. draw(pd, k)) keys :: _=>Nat 61 | 62 | '## Random functions 63 | Suppose that you want to define your own random function. Here we will define our own function for sampling exponentials. 64 | 65 | def rexp(rate: Float) -> (Key) -> Float = \k. log1p (-rand k) / -rate 66 | 67 | 'Notice how the non-determinism is clearly signalled by the presence of the key in the type signature. Also notice how we have made the key the final input parameter. This is because of currying. We can call the function directly: 68 | 69 | rexp(10.0)(k5) 70 | 71 | 'But we can also create a particular random variable: 72 | 73 | my_rexp = rexp 20.0 74 | 75 | 'and then use this partially applied function with multiple keys. 76 | 77 | map my_rexp keys 78 | 79 | 'This is why you nearly always want to make the key the final input parameter, and why you might want to curry it separately. 80 | 81 | '## Exercise 82 | Write a function to simulate a 1d [random walk](https://en.wikipedia.org/wiki/Random_walk). 83 | 84 | -------------------------------------------------------------------------------- /Scala/cli/ML-GA.scala: -------------------------------------------------------------------------------- 1 | //> using scala 3.1.2 2 | //> using dep org.scalanlp::breeze:2.1.0 3 | //> using dep com.github.haifengl:smile-scala_2.13:3.0.2 4 | 5 | /* 6 | ML-GA.scala 7 | 8 | Simple gradient ascent algorithm for maximum likelihood estimation 9 | of a logistic regression model, applied to the Pima data 10 | 11 | */ 12 | 13 | import breeze.linalg.* 14 | import breeze.numerics.* 15 | import smile.data.pimpDataFrame 16 | import annotation.tailrec 17 | 18 | type DVD = DenseVector[Double] 19 | 20 | object GradientAscent: 21 | 22 | 23 | @main def run() = 24 | println("First read and process the data") 25 | val df = smile.read.csv("../../pima.data", delimiter=" ", header=false) 26 | print(df) 27 | val y = DenseVector(df.select("V8"). 28 | map(_(0).asInstanceOf[String]). 29 | map(s => if (s == "Yes") 1.0 else 0.0).toArray) 30 | println(y) 31 | val x = DenseMatrix(df.drop("V8").toMatrix.toArray:_*) 32 | println(x) 33 | val ones = DenseVector.ones[Double](x.rows) 34 | val X = DenseMatrix.horzcat(ones.toDenseMatrix.t, x) 35 | println(X) 36 | val p = X.cols 37 | println(p) 38 | 39 | println("Now define log likelihood and gradient") 40 | def ll(beta: DVD): Double = 41 | sum(-log(ones + exp(-1.0*(2.0*y - ones)*:*(X * beta)))) 42 | def gll(beta: DVD): DVD = 43 | (X.t)*(y - ones/:/(ones + exp(-X*beta))) 44 | 45 | println("Now define functions for gradient ascent") 46 | def oneStep(learningRate: Double)(b0: DVD): DVD = 47 | b0 + learningRate*gll(b0) 48 | def ascend(step: DVD => DVD, init: DVD, maxIts: Int = 10000, 49 | tol: Double = 1e-8, verb: Boolean = true): DVD = 50 | @tailrec def go(b0: DVD, ll0: Double, itsLeft: Int): DVD = 51 | if (verb) 52 | println(s"$itsLeft : $ll0") 53 | val b1 = step(b0) 54 | val ll1 = ll(b1) 55 | if ((math.abs(ll0 - ll1) < tol)|(itsLeft < 1)) 56 | b1 57 | else 58 | go(b1, ll1, itsLeft - 1) 59 | go(init, ll(init), maxIts) 60 | 61 | println("Now run a simple gradient ascent algorithm") 62 | // Better choose a reasonable init as gradient ascent is terrible... 63 | val init = DenseVector(-9.8, 0.1, 0, 0, 0, 0, 1.8, 0) 64 | val opt = ascend(oneStep(1e-6), init) 65 | println("Inits: " + init) 66 | println("Init ll: " + ll(init)) 67 | println("Opt: " + opt) 68 | println("Opt ll: " + ll(opt)) 69 | println("Goodbye.") 70 | 71 | -------------------------------------------------------------------------------- /Haskell/ml-ga.cabal: -------------------------------------------------------------------------------- 1 | cabal-version: 1.12 2 | 3 | -- This file has been generated from package.yaml by hpack version 0.34.2. 4 | -- 5 | -- see: https://github.com/sol/hpack 6 | 7 | name: ml-ga 8 | version: 0.1.0.0 9 | description: Please see the README on GitHub at 10 | homepage: https://github.com/githubuser/ml-ga#readme 11 | bug-reports: https://github.com/githubuser/ml-ga/issues 12 | author: Author name here 13 | maintainer: example@example.com 14 | copyright: 2023 Author name here 15 | license: BSD3 16 | license-file: LICENSE 17 | build-type: Simple 18 | extra-source-files: 19 | README.md 20 | CHANGELOG.md 21 | 22 | source-repository head 23 | type: git 24 | location: https://github.com/githubuser/ml-ga 25 | 26 | library 27 | exposed-modules: 28 | Lib 29 | other-modules: 30 | Paths_ml_ga 31 | hs-source-dirs: 32 | src 33 | ghc-options: -Wall -Wcompat -Widentities -Wincomplete-record-updates -Wincomplete-uni-patterns -Wmissing-export-lists -Wmissing-home-modules -Wpartial-fields -Wredundant-constraints 34 | build-depends: 35 | Frames >=0.7 && <1 36 | , base >=4.7 && <5 37 | , hmatrix >=0.20 && <1 38 | , microlens >=0.4 && <1 39 | , pipes >=4.3 && <5 40 | default-language: Haskell2010 41 | 42 | executable ml-ga-exe 43 | main-is: Main.hs 44 | other-modules: 45 | Paths_ml_ga 46 | hs-source-dirs: 47 | app 48 | ghc-options: -Wall -Wcompat -Widentities -Wincomplete-record-updates -Wincomplete-uni-patterns -Wmissing-export-lists -Wmissing-home-modules -Wpartial-fields -Wredundant-constraints -threaded -rtsopts -with-rtsopts=-N 49 | build-depends: 50 | Frames >=0.7 && <1 51 | , base >=4.7 && <5 52 | , hmatrix >=0.20 && <1 53 | , microlens >=0.4 && <1 54 | , ml-ga 55 | , pipes >=4.3 && <5 56 | default-language: Haskell2010 57 | 58 | test-suite ml-ga-test 59 | type: exitcode-stdio-1.0 60 | main-is: Spec.hs 61 | other-modules: 62 | Paths_ml_ga 63 | hs-source-dirs: 64 | test 65 | ghc-options: -Wall -Wcompat -Widentities -Wincomplete-record-updates -Wincomplete-uni-patterns -Wmissing-export-lists -Wmissing-home-modules -Wpartial-fields -Wredundant-constraints -threaded -rtsopts -with-rtsopts=-N 66 | build-depends: 67 | Frames >=0.7 && <1 68 | , base >=4.7 && <5 69 | , hmatrix >=0.20 && <1 70 | , microlens >=0.4 && <1 71 | , ml-ga 72 | , pipes >=4.3 && <5 73 | default-language: Haskell2010 74 | -------------------------------------------------------------------------------- /Dex/djwutils.dx: -------------------------------------------------------------------------------- 1 | -- djwutils.dx 2 | -- Some generic utility functions 3 | -- some of which should probably be added to the standard Dex prelude 4 | 5 | def iterate(n: Nat, step: (a) -> a, init: a) -> Fin n => a given (a|Data) = 6 | with_state init \st. 7 | for i:(Fin n). 8 | old = get st 9 | next = step old 10 | st := next 11 | old 12 | 13 | def unfold(n: Nat, advance: (a) -> (b, a), init: a) -> Fin n => b given (a|Data, b) = 14 | with_state init \st. 15 | for i:(Fin n). 16 | (b, a) = advance (get st) 17 | st := a 18 | b 19 | 20 | def length(arr: n=>a) -> Nat given (a, n|Ix) = 21 | size n 22 | 23 | def last_dumb(arr: n=>a) -> a given (a, n|Ix) = 24 | head $ reverse arr 25 | 26 | def last(arr: n=>a) -> a given (a, n|Ix) = 27 | nind = unsafe_nat_diff (size n) 1 28 | arr[asidx nind] 29 | 30 | -- Not ideal instance, since likely loss of precision... 31 | instance Parse(Float64) 32 | def parseString(str) = 33 | mf : Maybe Float = parseString str 34 | case mf of 35 | Nothing -> Nothing 36 | Just f -> Just $ f_to_f64 f 37 | 38 | def clipv(x: a=>Float, c: Float) -> (a)=>Float given (a|Ix) = 39 | map (\xi. clip (-c, c) xi) x 40 | 41 | def nanclip(x: a=>Float) -> (a)=>Float given (a|Ix) = 42 | map (\xi. if (isnan xi) then 0.0 else xi) x 43 | 44 | def to_tsv(mat: (n)=>(p)=>Float) -> String given (n|Ix, p|Ix) = 45 | ms = for i j. show mat[i,j] <> "\t" 46 | concat (map (\l. l <> "\n") (map concat ms)) 47 | 48 | -- based on "lines" from the prelude... 49 | def words(sep: Word8, source: String) -> List String = 50 | AsList(_, s) = source <> (to_list [sep]) 51 | AsList(num_words, space_ixs) = cat_maybes for i_char. 52 | if (s[i_char] == sep) 53 | then Just i_char 54 | else Nothing 55 | to_list for i_word:(Fin num_words). 56 | start = case prev_ix i_word of 57 | Nothing -> first_ix 58 | Just i -> right_post space_ixs[i] 59 | end = left_post space_ixs[i_word] 60 | post_slice s start end 61 | 62 | def cons(x: a, xs: List a) -> List a given (a) = 63 | AsList(on, xt) = xs 64 | n = on + 1 65 | nxt = for i:(Fin n). case (ordinal i == 0) of 66 | True -> x 67 | False -> xt[asidx (unsafe_nat_diff (ordinal i) 1)] 68 | to_list nxt 69 | 70 | def list2tab(l: List a) -> (n)=>a given (a, n|Ix) = 71 | AsList(ll, t) = l 72 | unsafe_cast_table t 73 | 74 | def parse_tsv(sep: Word8, input: String) -> List (List String) = 75 | AsList(_, lines) = lines input 76 | to_list $ map (\l. words sep l) lines 77 | 78 | 79 | 80 | 81 | -- eof 82 | -------------------------------------------------------------------------------- /JAX/Random.qmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "JAX splittable random numbers hands on" 3 | author: "Darren Wilkinson" 4 | jupyter: python3 5 | --- 6 | 7 | ## Basics 8 | Call Python REPL (`python` or `python3`) and start messing around with splittable random numbers in JAX. 9 | 10 | ```{python} 11 | import numpy as np 12 | import scipy as sp 13 | import scipy.stats 14 | 15 | import jax 16 | from jax import grad, jit 17 | import jax.numpy as jnp 18 | import jax.scipy as jsp 19 | import jax.lax as jl 20 | import jax.random as jr 21 | 22 | k0 = jr.PRNGKey(42) 23 | 24 | [k1, k2] = jr.split(k0) 25 | 26 | k0 27 | ``` 28 | ```{python} 29 | k1 30 | ``` 31 | ```{python} 32 | k2 33 | ``` 34 | ```{python} 35 | jr.uniform(k0) 36 | ``` 37 | ```{python} 38 | jr.uniform(k0) 39 | ``` 40 | ```{python} 41 | jr.uniform(k0) 42 | ``` 43 | ```{python} 44 | jr.uniform(k1) 45 | ``` 46 | ```{python} 47 | jr.uniform(k2) 48 | ``` 49 | 50 | Note that we don't just have to split the key into two. 51 | 52 | ```{python} 53 | [k3, k4, k5] = jr.split(k2, 3) 54 | 55 | k3 56 | ``` 57 | ```{python} 58 | k4 59 | ``` 60 | ```{python} 61 | k5 62 | ``` 63 | ```{python} 64 | 65 | keys = jr.split(k5, 8) 66 | 67 | keys 68 | ``` 69 | 70 | It can sometimes be convenient to split a key into an *array* of keys and then *map* or *fold* a random function over the array. 71 | 72 | ```{python} 73 | jnp.sum(jl.map(jr.uniform, keys)) 74 | ``` 75 | 76 | If we really just want to "advance" the key, we can do that too. 77 | 78 | ```{python} 79 | jr.split(k5, 1) 80 | ``` 81 | 82 | ## Probability distributions 83 | `jr.uniform` is used to generate a $U(0,1)$, and `jr.normal` generates from a standard normal, $N(0,1)$. 84 | 85 | ```{python} 86 | jl.map(jr.normal, keys) 87 | ``` 88 | 89 | There are other commonly encountered random variables available for sampling. 90 | 91 | ```{python} 92 | jr.exponential(k5) 93 | ``` 94 | ```{python} 95 | jr.poisson(k5, 10.0) 96 | ``` 97 | ```{python} 98 | jr.poisson(k5, 10.0, [10]) 99 | ``` 100 | 101 | ## Random functions 102 | Suppose that you want to define your own random function. Here we will define our own function for sampling exponentials. 103 | 104 | ```{python} 105 | def rexp(rate, key): 106 | return jnp.log1p(-jr.uniform(key)) / (-rate) 107 | ``` 108 | 109 | Notice how we have made the key the final input parameter. This is because of currying. We can call the function directly: 110 | 111 | ```{python} 112 | rexp(10.0, k5) 113 | ``` 114 | 115 | But we can also create a particular random variable: 116 | 117 | ```{python} 118 | from functools import partial 119 | 120 | my_rexp = partial(rexp, 20.0) 121 | ``` 122 | 123 | and then use this partially applied function with multiple keys. 124 | 125 | ```{python} 126 | jl.map(my_rexp, keys) 127 | ``` 128 | 129 | This is why you might want to make the key the final input parameter. 130 | 131 | ## Exercise 132 | 133 | Write a function to simulate a 1d [random walk](https://en.wikipedia.org/wiki/Random_walk). 134 | 135 | 136 | -------------------------------------------------------------------------------- /Setup.md: -------------------------------------------------------------------------------- 1 | ## FP for scalable statistical computing 2 | 3 | # Setup 4 | 5 | **In order to get the most benefit from the course, it is necessary to install some software in advance** 6 | 7 | The languages that will be covered in the course are Scala, Haskell, JAX and Dex. Ideally, you will install all of these on your laptop in advance of the course. However, it is *not necessary* to install/use *all* of these languages for the course to be useful. Installing, say, Scala and at least one other (depending on your interests), will be fine. 8 | 9 | In addition to installing some software, you should also download a copy of [this repo](https://github.com/darrenjw/fp-ssc-course/archive/refs/heads/main.zip) the day before the course. 10 | 11 | ### Scala 12 | 13 | The easiest way to install all necessary Scala tools on your system is by using a tool called Coursier. See the [getting started](https://docs.scala-lang.org/getting-started/) instructions on the [Scala](https://www.scala-lang.org/) website for how to install this. Once you have Coursier installed, doing `cs setup` should install everything else that you need, including [sbt](https://www.scala-sbt.org/), which is the main tool we will be relying on for this course. 14 | 15 | Scala editing modes are available for most programmer code editing tools (including Emacs and Vim). If you want a full-featured IDE, then [IntelliJ Idea](https://www.jetbrains.com/idea/) is typically recommended. The free community edition is fine. Be sure to install the Scala plugin for IntelliJ during the installation process. 16 | 17 | ### Haskell 18 | 19 | The [Haskell](https://www.haskell.org/) website has information on installing various components of the Haskell toolchain. But if you are running Linux, you can easily install everything you need via your package manager. eg. on Ubuntu, just installing the packages `haskell-platform` and `haskell-stack` will provide everything you need for this course. The packages have similar names on other distros. 20 | 21 | ### JAX (and Python) 22 | 23 | [JAX](https://jax.readthedocs.io/en/latest/) is a library for [Python](https://www.python.org/) which can be installed with `pip`. You first need to make sure that you have python and pip installed on your system. Then see the [installation instructions](https://github.com/google/jax#installation) which are a bit system dependent. The CPU-only version of JAX is fine for this course. Note that if you know about Python "virtual environments", it might be better to install JAX in a new environment. 24 | 25 | ### Dex 26 | 27 | [Dex](https://github.com/google-research/dex-lang) is an experimental new language (written in Haskell) which currently needs to be built from source. This is relatively straightforward on Macs and most Linux distros, but is likely to be impractical on Windows. 28 | 29 | If you know `git`, clone the Dex repo, and if not, click on the green "Code" button, and download the repo as a zip file, and uncompress on your system. Then follow the [Installing](https://github.com/google-research/dex-lang#Installing) instructions. Note that I have some additional [installation](https://github.com/darrenjw/djwhacks/blob/master/dex/Reminders.md#Installation) instructions for Ubuntu and Fedora which will probably be useful for anyone using those distros (and the Ubuntu instructions are probably useful for any Debian-derived distro). 30 | -------------------------------------------------------------------------------- /Haskell/README.md: -------------------------------------------------------------------------------- 1 | # Haskell crash course 2 | 3 | Haskell is similar to Scala in the sense that it is a strongly typed functional programming language with a sophisticated type system. However, it is much purer than Scala, and unlike Scala, evaluation is *lazy*, rather than *strict* by default. There are pros and cons with this, which we don't have time to explore today. It also uses a different syntax. The syntax is whitespace sensitive, somewhat similar to Python and the braceless syntax of Scala 3. 4 | 5 | ## Types and assignments 6 | 7 | A REPL can be called up by running `ghci` from the command line. GHC is the Glasgow Haskell Compiler (the "standard" Haskell compiler), and the "i" is for "interactive". This crash course can be followed in the REPL. Note that `:t` is a REPL command that will return the type of an object (and note that `:t` also works similarly in the Scala REPL). 8 | ```haskell 9 | x = 5 :: Int 10 | x 11 | :t x 12 | 13 | y = 6 14 | y 15 | :t y 16 | ``` 17 | Note that unlike Scala, Haskell will defer inferring a specific numeric type from a numeric literal as long as possible. 18 | 19 | ## Immutable collections 20 | 21 | Linked lists are very (too) fundamental in Haskell. They are denoted with square brackets. 22 | ```haskell 23 | l = [1, 3, 4, 5, 8] :: [Int] 24 | l 25 | :t l 26 | ``` 27 | 28 | But there are many other interesting collections in Haskell. For example, immutable arrays. 29 | ```haskell 30 | import Data.Array.IArray 31 | import Data.Array.Unboxed 32 | la = listArray (0, 3) l :: UArray Int Int 33 | la 34 | :t la 35 | la ! 1 36 | la ! 2 37 | la2 = la // [(1, 7)] 38 | la2 39 | la 40 | ``` 41 | 42 | ## Manipulating collections 43 | 44 | In Haskell, the map operation associated with the functor type class is called `fmap`, and has infix notation `<$>`. 45 | ```haskell 46 | fmap (\x -> x*2) l 47 | (\x -> x*2) <$> l 48 | ``` 49 | 50 | ```haskell 51 | foldl (+) 0 l -- left fold 52 | foldr (+) 0 l -- right fold 53 | ``` 54 | 55 | ## Writing functions 56 | 57 | We will again use the log-factorial function as our illustrative example. Note that for declaring functions, you probably want to turn on multi-line mode in the GHCi REPL with `:set +m`. A simple example can be written as follows. 58 | ```haskell 59 | logFact :: Int -> Double 60 | logFact n = sum (log <$> [1..n]) 61 | 62 | logFact 5 63 | logFact 100000 64 | ``` 65 | A recursive version can also be implemented. 66 | ```haskell 67 | logFact2 :: Int -> Double 68 | logFact2 n = if (n == 0) 69 | then 0.0 70 | else (log (fromIntegral n)) + logFact2 (n-1) 71 | 72 | logFact2 5 73 | logFact2 100000 74 | ``` 75 | Note that due to Haskell's evaluation model, stack overflow on recursion is very often avoided. However, we can write the function in tail-recursive form if we choose. 76 | ```haskell 77 | logFact3 :: Int -> Double 78 | logFact3 n = go (n :: Int) (0.0 :: Double) where 79 | go 0 acc = acc 80 | go n acc = go (n-1) (acc + log (fromIntegral n)) 81 | 82 | logFact3 5 83 | logFact3 100000 84 | ``` 85 | 86 | ## Curried functions 87 | 88 | In Haskell, functions are typically written in fully curried form by default. This is often convenient, but can sometimes make type signatures difficult to interpret. 89 | 90 | ```haskell 91 | linFun :: Double -> Double -> Double -> Double 92 | linFun m c x = m*x + c 93 | 94 | f = linFun 2 3 95 | f 0 96 | f 1 97 | f 2 98 | ``` 99 | 100 | -------------------------------------------------------------------------------- /Haskell/app/Main.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE ConstraintKinds, DataKinds, FlexibleContexts, GADTs, 2 | OverloadedStrings, PatternSynonyms, QuasiQuotes, 3 | ScopedTypeVariables, TemplateHaskell, TypeOperators, TypeApplications, 4 | ViewPatterns #-} 5 | 6 | module Main (main) where 7 | 8 | import Frames 9 | import Frames.TH (rowGen, RowGen(..)) 10 | import Pipes hiding (Proxy) 11 | import Numeric.LinearAlgebra 12 | import qualified Data.Foldable as F 13 | import Lens.Micro.Extras 14 | 15 | -- template Haskell to create the Person type, and personParser 16 | tableTypes' (rowGen "../pima.data") 17 | { rowTypeName = "Person" 18 | , columnNames = [ "npreg", "glu", "bp" 19 | , "skin", "bmi", "ped", "age", "yy" ] 20 | , separator = " " } 21 | 22 | -- create a data stream 23 | dataStream :: MonadSafe m => Producer Person m () 24 | dataStream = readTableOpt personParser "../pima.data" 25 | 26 | -- load full dataset 27 | loadData :: IO (Frame Person) 28 | loadData = inCoreAoS dataStream 29 | 30 | -- create rows of covariate matrix 31 | rec2l :: Person -> [Double] 32 | rec2l r = [1.0, fromIntegral $ rgetField @Npreg r, fromIntegral $ rgetField @Glu r, 33 | fromIntegral $ rgetField @Bp r, fromIntegral $ rgetField @Skin r, 34 | rgetField @Bmi r, rgetField @Ped r, fromIntegral $ rgetField @Age r] 35 | 36 | -- sum an hmatrix Vector 37 | vsum :: Vector Double -> Double 38 | vsum v = (konst 1 (size v) :: Vector Double) <.> v 39 | 40 | -- log-likelihood 41 | ll :: Matrix Double -> Vector Double -> Vector Double -> Double 42 | ll x y b = (negate) (vsum (cmap log ( 43 | (scalar 1) + (cmap exp (cmap (negate) ( 44 | (((scalar 2) * y) - (scalar 1)) * (x #> b) 45 | ) 46 | ))))) 47 | 48 | -- gradient 49 | gll :: Matrix Double -> Vector Double -> Vector Double -> Vector Double 50 | gll x y b = (tr x) #> (y - (scalar 1)/((scalar 1) + (cmap exp (-x #> b)))) 51 | 52 | -- one step of gradient ascent 53 | oneStep :: Matrix Double -> Vector Double -> Double -> Vector Double -> Vector Double 54 | oneStep x y lrate b0 = b0 + (scalar lrate) * (gll x y b0) 55 | 56 | -- function for ascent 57 | ascend :: (Vector Double -> Vector Double) -> 58 | Vector Double -> Int -> Double -> Vector Double 59 | ascend astep init0 maxIts tol = 60 | go init0 maxIts 61 | where 62 | go b0 itsLeft = 63 | let b1 = astep b0 64 | d = norm_2 $ b1 - b0 65 | in if ((d < tol)||(itsLeft < 1)) then b1 66 | else go b1 (itsLeft - 1) 67 | 68 | main :: IO () 69 | main = do 70 | putStrLn "Gradient ascent for a log likelihood" 71 | putStrLn "First load and process the data..." 72 | dat <- loadData 73 | let yl = (\x -> if x then 1.0 else 0.0) <$> F.toList (view yy <$> dat) 74 | let xl = rec2l <$> F.toList dat 75 | print $ head xl 76 | let y = vector yl 77 | print y 78 | let x = fromLists xl 79 | disp 2 x 80 | putStrLn "Now run the gradient ascent" 81 | -- choose reasonable init, since gradient ascent is terrible... 82 | let init0 = fromList [-9.8, 0.1, 0, 0, 0, 0, 1.8, 0] :: Vector Double 83 | print init0 84 | print $ ll x y init0 85 | let opt = ascend (oneStep x y 1.0e-6) init0 10000 1.0e-5 86 | print opt 87 | print $ ll x y opt 88 | putStrLn "Goodbye." 89 | 90 | 91 | -------------------------------------------------------------------------------- /Haskell/stack.yaml: -------------------------------------------------------------------------------- 1 | # This file was automatically generated by 'stack init' 2 | # 3 | # Some commonly used options have been documented as comments in this file. 4 | # For advanced use and comprehensive documentation of the format, please see: 5 | # https://docs.haskellstack.org/en/stable/yaml_configuration/ 6 | 7 | # Resolver to choose a 'specific' stackage snapshot or a compiler version. 8 | # A snapshot resolver dictates the compiler version and the set of packages 9 | # to be used for project dependencies. For example: 10 | # 11 | # resolver: lts-3.5 12 | # resolver: nightly-2015-09-21 13 | resolver: ghc-8.10.7 14 | #resolver: ghc-9.2.5 15 | # 16 | # The location of a snapshot can be provided as a file or url. Stack assumes 17 | # a snapshot provided as a file might change, whereas a url resource does not. 18 | # 19 | # resolver: ./custom-snapshot.yaml 20 | # resolver: https://example.com/snapshots/2018-01-01.yaml 21 | #resolver: lts-20.6 22 | 23 | # User packages to be built. 24 | # Various formats can be used as shown in the example below. 25 | # 26 | # packages: 27 | # - some-directory 28 | # - https://example.com/foo/bar/baz-0.0.2.tar.gz 29 | # subdirs: 30 | # - auto-update 31 | # - wai 32 | packages: 33 | - . 34 | # Dependency packages to be pulled from upstream that are not in the resolver. 35 | # These entries can reference officially published versions as well as 36 | # forks / in-progress versions pinned to a git hash. For example: 37 | # 38 | extra-deps: 39 | - Frames-0.7.3 40 | - hmatrix-0.20.2 41 | - microlens-0.4.13.1 42 | - pipes-4.3.16 43 | - contravariant-1.5.5 44 | - discrimination-0.5 45 | - hashable-1.4.2.0 46 | - mmorph-1.2.0 47 | - pipes-bytestring-2.1.7 48 | - pipes-group-1.0.12 49 | - pipes-parse-3.0.9 50 | - pipes-safe-2.3.4 51 | - primitive-0.7.4.0 52 | - random-1.2.1.1 53 | - readable-0.3.1 54 | - semigroups-0.20 55 | - split-0.2.3.5 56 | - storable-complex-0.2.3.0 57 | - vector-0.13.0.0 58 | - vector-th-unbox-0.2.2 59 | - vinyl-0.14.3 60 | - void-0.7.3 61 | - StateVar-1.2.2 62 | - base-orphans-0.8.7 63 | - data-array-byte-0.1.0.1 64 | - free-5.1.10 65 | - monad-control-1.0.3.1 66 | - promises-0.3 67 | - splitmix-0.1.0.4 68 | - stringsearch-0.3.6.6 69 | - transformers-base-0.4.6 70 | - transformers-compat-0.7.2 71 | - vector-stream-0.1.0.0 72 | - comonad-5.0.8 73 | - distributive-0.6.2.1 74 | - indexed-traversable-0.1.2 75 | - profunctors-5.6.2 76 | - semigroupoids-5.3.7 77 | - th-abstraction-0.4.5.0 78 | - bifunctors-5.5.14 79 | - tagged-0.8.6.1 80 | - unordered-containers-0.2.19.1 81 | 82 | 83 | # - acme-missiles-0.3 84 | # - git: https://github.com/commercialhaskell/stack.git 85 | # commit: e7b331f14bcffb8367cd58fbfc8b40ec7642100a 86 | # 87 | # extra-deps: [] 88 | 89 | # Override default flag values for local packages and extra-deps 90 | # flags: {} 91 | 92 | # Extra package databases containing global packages 93 | # extra-package-dbs: [] 94 | 95 | # Control whether we use the GHC we find on the path 96 | system-ghc: false 97 | # 98 | # Require a specific version of stack, using version ranges 99 | # require-stack-version: -any # Default 100 | # require-stack-version: ">=2.3" 101 | # 102 | # Override the architecture used by stack, especially useful on Windows 103 | # arch: i386 104 | # arch: x86_64 105 | # 106 | # Extra directories used by stack for building 107 | # extra-include-dirs: [/path/to/dir] 108 | # extra-lib-dirs: [/path/to/dir] 109 | # 110 | # Allow a newer minor version of GHC than the snapshot specifies 111 | # compiler-check: newer-minor 112 | -------------------------------------------------------------------------------- /Scala/docs/ScalaCC.md: -------------------------------------------------------------------------------- 1 | # Scala crash course 2 | 3 | ## Immutability 4 | 5 | Scala is not a *pure* functional language, so it is *possible* to use *mutable variables*. 6 | 7 | ```scala mdoc 8 | var v = 5 9 | v = v + 1 10 | v 11 | ``` 12 | But use of these is strongly discouraged in favour of *immutable values*. 13 | ```scala mdoc:fail 14 | val x = 5 15 | x = x + 1 16 | ``` 17 | 18 | ## Immutable collections 19 | 20 | We often want to work with collections of values of given type, and these are also immutable. 21 | ```scala mdoc 22 | val vi = Vector(2, 4, 6, 3) 23 | // now update value at position 2 to be 7 24 | val viu = vi.updated(2, 7) 25 | // original vector unchanged 26 | vi 27 | viu 28 | ``` 29 | The new vector is *effectively* an updated copy, but that doesn't mean that all of the data has been copied. We can point to data in the original vector safely, since it is immutable. 30 | 31 | ## Manipulating collections 32 | 33 | Since collections are *functors*, we can *map* them. 34 | ```scala mdoc 35 | vi.map(x => x*2) 36 | vi map (x => x*2) 37 | vi map (_*2) 38 | ``` 39 | We can also reduce them. 40 | ```scala mdoc 41 | vi.foldLeft(0)(_+_) 42 | vi.reduce(_+_) 43 | vi.sum 44 | ``` 45 | Note that `map` and `reduce` are higher-order functions (HoFs), since they accept a function as an argument. 46 | 47 | ## Writing functions 48 | 49 | Here's a simple definition of a log-factorial function. 50 | ```scala mdoc 51 | def logFact(n: Int): Double = 52 | (1 to n).map(_.toDouble).map(math.log).sum 53 | 54 | logFact(3) 55 | logFact(10) 56 | logFact(100000) 57 | ``` 58 | This requires creating a collection of size `n`, which might not be desirable. 59 | 60 | We will use the log-factorial function to explore the use of recursion instead of more imperative looping constructs. 61 | 62 | ## Recursive functions 63 | 64 | ```scala mdoc 65 | def logFactR(n: Int): Double = 66 | if (n <= 1) 0.0 else 67 | math.log(n) + logFactR(n - 1) 68 | 69 | logFactR(3) 70 | logFactR(10) 71 | ``` 72 | This function is recursive, but not tail-recursive since the result of the recursive call (`logFactR(n - 1)`) is modified before the correct value is returned. So, although it doesn't consume heap space, it consumes stack space, which is worse. That is, this function will stack-overflow if evaluated at a large enough input value. 73 | ```scala 74 | logFactR(100000) 75 | // java.lang.StackOverflowError 76 | ``` 77 | 78 | ## Tail-recursive functions 79 | 80 | ```scala mdoc 81 | @annotation.tailrec 82 | final def logFactTR(n: Int, acc: Double = 0.0): Double = 83 | if (n <= 1) acc else 84 | logFactTR(n - 1, math.log(n) + acc) 85 | 86 | logFactTR(3) 87 | logFactTR(10) 88 | logFactTR(100000) 89 | ``` 90 | This version consumes neither heap nor stack space. The `tailrec` annotation is optional, but is useful, since it forces the compiler to flag an error if there is some reason why the tail call elimination can not be performed (eg. here, the method needed to be decalared `final` so it could not be over-ridden). 91 | 92 | ## Helper functions 93 | 94 | The previous example made use of the fact that Scala has optional arguments with default values. Even if this wasn't the case, we could acheive the same thing by embedding the two-argument version as a private function inside the one-argument version. 95 | ```scala mdoc 96 | def logFactTRH(n: Int): Double = 97 | def go(n: Int, acc: Double): Double = 98 | if (n <= 1) acc else 99 | go(n - 1, math.log(n) + acc) 100 | go(n, 0.0) 101 | 102 | logFactTRH(3) 103 | logFactTRH(10) 104 | logFactTRH(100000) 105 | ``` 106 | 107 | ## Curried functions 108 | 109 | Sometimes we want to partially apply a function by providing some of the arguments. We can flag this by grouping them. 110 | ```scala mdoc 111 | def linFun(m: Double, c: Double)(x: Double): Double = 112 | m*x + c 113 | 114 | val f = linFun(2, 3) 115 | 116 | f(0) 117 | f(1) 118 | f(2) 119 | ``` 120 | Since the output of the partial call is a function, this is another example of a HoF. 121 | -------------------------------------------------------------------------------- /Dex/DexCC.dx: -------------------------------------------------------------------------------- 1 | '# Dex crash course 2 | Dex is a strongly typed pure functional differentiable array processing language, designed with scientific computing and machine learning applications in mind. It is well-suited to statistical computing applications, and like JAX, can exploit a GPU if available. 3 | 4 | 'Start a Dex REPL by entering `dex repl` at your command prompt. 5 | 6 | '## Immutability 7 | Dex objects are immutable. 8 | 9 | x = 5 10 | :t x 11 | 12 | x = x + 1 13 | 14 | '## Immutable collections 15 | Dex, like JAX, has arrays/tensors as its main data structure, which are referred to as *tables*, and these are immutable. 16 | 17 | v = [1.0, 2, 4, 5, 7] 18 | v 19 | :t v 20 | 21 | 'Dex has a strong static type system, including elements of dependent typing. Note how the length of an array (and in general, the dimensions of a tensor) is part of its type. This allows the detection of all kinds of dimension mismatch errors at compile time rather than runtime, and this is a very good thing! Notice that the type reflects the idea that conceptually, an array is essentially a function mapping from an index to a value. 22 | 23 | 'We can't just directly index into a table with an integer, since this isn't safe - we might violate the table index bounds. We need to cast our integer to a typed index using the `@` operator. 24 | 25 | v[2@Fin 5] 26 | 27 | 'However, where things are unambiguous, we can use type inference. 28 | 29 | v[2@_] 30 | 31 | 'It is relatively unusual to want to update a single element of a Dex table, but we can certaintly do it (immutably). Below we update the element with index 2 to be 9.0. 32 | 33 | vu = for i. case (i == (2@_)) of 34 | True -> 9.0 35 | False -> v[i] 36 | 37 | vu 38 | v 39 | 40 | 'This syntax will gradually become clear. 41 | 42 | '## Manipulating collections 43 | We can map and reduce. 44 | 45 | map (\x. 2*x) v 46 | 47 | 2.0 .* v 48 | 49 | sum v 50 | 51 | sum(v) 52 | 53 | reduce 0.0 (\x y. x+y) v 54 | 55 | reduce(0.0, \x y. x+y, v) 56 | 57 | fold 0.0 (\i acc. acc + v[i]) 58 | 59 | 'The main way of creating and transforming tables is using `for`, which in Dex is more like a *for-comprehension* or *for-expression* in some languages than a traditional imperative for-loop. However, it is designed to allow the writing of index-based algorithms in a safe, pure functional way. For example, as an alternative to using `map` we could write. 60 | 61 | for i. 2*v[i] 62 | 63 | 'We can create a table of given length filled with the same element 64 | 65 | for i:(Fin 8). 2.0 66 | 67 | 'or different elements 68 | 69 | for i:(Fin 6). n_to_f $ ordinal i 70 | 71 | 'We can create 2d tables similarly. 72 | 73 | Height=Fin 3 74 | Width=Fin 4 75 | m = for i:Height j:Width. n_to_f $ ordinal i + ordinal j 76 | m 77 | :t m 78 | 79 | 80 | '## Writing functions 81 | We can write a log-factorial function as follows. 82 | 83 | def log_fact(n: Nat) -> Float = 84 | sum $ for i:(Fin n). log $ n_to_f (ordinal i + 1) 85 | 86 | :t log_fact 87 | 88 | log_fact 3 89 | log_fact(10) 90 | log_fact 100000 91 | 92 | 'But this consumes heap. Dex, like JAX, is differentiable, so prohibits explicit recursion. However, it allows the creation of a mutable state variable that can be get and set via its algebraic effects system. 93 | 94 | def log_fact_s(n: Nat) -> Float = 95 | (lf, _) = yield_state (0.0, n_to_i n) \state. 96 | while \. 97 | (acc, i) = get state 98 | if (i > 0) 99 | then 100 | state := (acc + log (i_to_f i), i - 1) 101 | True 102 | else False 103 | lf 104 | 105 | log_fact_s 3 106 | log_fact_s 10 107 | log_fact_s 100000 108 | 109 | 'Note that for the final example, significant numerical error has accumulated in this naive sequential sum of 32 bit floats. 110 | 111 | '## Curried functions 112 | Note that we can curry functions as appropriate, using lambdas. 113 | 114 | def lin_fun(m: Float, c: Float) -> (Float) -> Float = 115 | \x. m*x + c 116 | 117 | :t lin_fun 118 | 119 | f = lin_fun 2 3 120 | 121 | :t f 122 | 123 | f 0 124 | f(1) 125 | f 2 126 | 127 | 128 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # fp-ssc-course 2 | 3 | ### An introduction to functional programming for scalable statistical computing and machine learning 4 | 5 | ## A half-day short-course 6 | 7 | A brief introduction to ideas of functional programming in the context 8 | of scalable statistical computing, illustrated with hands-on examples 9 | in Scala, Haskell, (Python+)JAX and 10 | Dex. [Scala](https://www.scala-lang.org/) and 11 | [Haskell](https://www.haskell.org/) are general purpose strongly typed 12 | functional programming 13 | languages. [JAX](https://jax.readthedocs.io/en/latest/) is a 14 | functional language for differentiable array programming embedded in 15 | Python, and [Dex](https://github.com/google-research/dex-lang) is a 16 | new experimental strongly typed functional language for differentiable 17 | array processing. 18 | 19 | This course is still subject to ongoing minor updates and revisions, but is now complete. The first iteration of the course was delivered to [StatML](https://statml.io/) PhD students on 2023-05-18. Note that all materials are freely available, and the course is quite suitable for self-study. If you are self-studying, you should probably allow a full day (including laptop setup). 20 | 21 | *Please note that you need to install some software on your system **in advance** of the course.* See the [Setup](Setup.md) guide for details. 22 | 23 | You will also need a copy of this repo. If you know git, clone it ASAP, and then do a pull the day before the course. If you don't know git, click on the green "Code" button and download a zip file, but do (or re-do) this the day before the course to make sure you have an up-to-date version. 24 | 25 | It would also be useful to have a copy of my [logreg](https://github.com/darrenjw/logreg) repo, which explores MCMC algorithms in a variety of (functional) languages. 26 | 27 | ### Abstract 28 | 29 | Non-trivial research problems in statistical computing and machine 30 | learning are often complex and computationally intensive, requiring a 31 | custom implementation in some programming language. All of the 32 | languages commonly used for this purpose are very old, dating back to 33 | the dawn of the computing age, and are quite unsuitable for scalable 34 | and efficient statistical computation. There have been huge advances in 35 | computing science in the decades since these languages were created, 36 | and many new, different and better programming languages have been 37 | created. Although functional programming languages are not new, there 38 | has been a large resurgence of interest in functional languages in the 39 | last decade or two, as people have begun to appreciate the advantages 40 | of the functional approach, especially in the context of developing 41 | large, scalable software systems, and the ability to take advantage of 42 | modern computing hardware. 43 | 44 | This short course will provide a brief introduction to ideas of 45 | functional programming in the context of scalable statistical 46 | computing, illustrated with hands-on examples in Scala, Haskell, 47 | (Python+)JAX and Dex. Scala and Haskell are general purpose strongly 48 | typed functional programming languages. JAX is a functional language 49 | for differentiable array programming embedded in Python, and Dex is a 50 | new experimental strongly typed functional language for differentiable 51 | array processing. 52 | 53 | ### Materials 54 | 55 | * [Laptop setup](Setup.md) - to be completed *in advance* of the course 56 | * [*Introduction to FP*](Intro/Readme.md) 57 | * [*Scala crash-course*](Scala/md/ScalaCC.md) 58 | * [Scala hands-on](Scala/md/ScalaHO.md) 59 | * [*Running example*](Intro/Example.md) 60 | * [Scala example and hands-on](Scala/md/Example.md) 61 | * [Scala parallel programming crash course](Scala/md/Parallel.md) 62 | * [Haskell crash-course](Haskell/README.md) 63 | * [Haskell example and hands-on](Haskell/Example.md) 64 | * [JAX crash-course](JAX/Readme.md) 65 | * [JAX example and hands-on](JAX/Example.md) 66 | * [Dex crash-course](https://darrenjw.github.io/fp-ssc-course/DexCC.html) 67 | * [Dex example and hands-on](Dex/Example.md) 68 | * [Functional and parallel random numbers](Intro/Random.md) 69 | * [Splittable random numbers hands-on](Intro/RandomHO.md) 70 | * [Wrap-up and next steps](Intro/Resources.md) (including additional learning resources) 71 | 72 | 73 | 74 | 75 | -------------------------------------------------------------------------------- /JAX/Readme.qmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "JAX for Python crash course" 3 | author: "Darren Wilkinson" 4 | jupyter: python3 5 | --- 6 | 7 | JAX is a pure functional language embedded in python, but designed to feel as much like python as practical. From a `python` prompt, first do some imports. 8 | ```{python} 9 | import os 10 | import pandas as pd 11 | import numpy as np 12 | import scipy as sp 13 | 14 | import jax 15 | from jax import grad, jit 16 | import jax.numpy as jnp 17 | import jax.scipy as jsp 18 | import jax.lax as jl 19 | ``` 20 | If any of these imports fail, you probably don't have JAX installed correctly (in your current environment). For most numpy and scipy functions, there is a JAX equivalent, so with the above imports, translating from regular python to JAX often involves replacing a call to `np.X` (for some `X`) with a call to `jnp.X`, and `sp.X` with `jsp.X`. But there are other issues to confront, due to the fact that JAX is a pure functional language and python most definitely isn't! 21 | 22 | ## Immutable collections 23 | 24 | ```{python} 25 | v = jnp.array([2, 4, 6, 3]).astype(jnp.float32) 26 | ``` 27 | Note that the type of the array has been set to `float32`, since these are fast and efficient, especially on GPUs. JAX arrays are immutable. 28 | ```{python} 29 | v[2] 30 | vu = v.at[2].set(7) 31 | vu 32 | ``` 33 | 34 | ```{python} 35 | v 36 | ``` 37 | 38 | We can map JAX arrays. 39 | ```{python} 40 | jl.map(lambda x: 2*x, v) 41 | ``` 42 | Mapping can be parallelised, and JAX will do this automatically. We can also reduce them. 43 | ```{python} 44 | jl.reduce(v, 0.0, lambda x,y: x+y, [0]) 45 | ``` 46 | ```{python} 47 | jnp.sum(v) 48 | ``` 49 | The reduction must be *monoidal* (the operation must be associative, and the initial value must be an identity wrt that operation), or the result is undefined. Since the reduction is monoidal, it can be parallised via tree reduction, and JAX will do this automatically. 50 | 51 | ## Functions 52 | 53 | Functions are written like regular python functions. But if they are to be part of a hot loop, they can be JIT-compiled. 54 | ```{python} 55 | @jit 56 | def sumArray1d(v): 57 | return jl.reduce(v, 0.0, lambda x,y: x+y, [0]) 58 | 59 | float(sumArray1d(v)) 60 | ``` 61 | Note that you can't use `float` inside a JIT'd JAX function, since `float` is a python function, not a JAX function. 62 | 63 | We have seen that functional languages often exploit recursion, either explicitly or implicitly, for the implementation of "looping" constructs. However, allowing general recursion turns out to be problematic for reverse-mode automatic differentiation. Consequently, some differentiable functional languages (such as JAX and Dex) disallow recursive functions. But without any mutable variables or recursion, how can we loop?! In this case the language must provide us with some built-in constructs. In JAX, the two most commonly used constructs (in addition to [`map`](https://jax.readthedocs.io/en/latest/_autosummary/jax.lax.map.html), [`reduce`](https://jax.readthedocs.io/en/latest/_autosummary/jax.lax.reduce.html) and [`scan`](https://jax.readthedocs.io/en/latest/_autosummary/jax.lax.scan.html)) are [`jax.lax.while_loop`](https://jax.readthedocs.io/en/latest/_autosummary/jax.lax.while_loop.html) and [`jax.lax.fori_loop`](https://jax.readthedocs.io/en/latest/_autosummary/jax.lax.fori_loop.html). Note that you cannot reverse-mode differentiate through a `while_loop` (this is problematic for the same reason that recursive functions are problematic - you cannot know statically what the memory requirements will be). A for loop is relatively straightforward. 64 | 65 | ```{python} 66 | def logFactF(n): 67 | return float(jl.fori_loop(1, n+1, 68 | lambda i,acc: acc + jnp.log(i), 0.0)) 69 | 70 | logFactF(3) 71 | ``` 72 | ```{python} 73 | logFactF(100000) 74 | ``` 75 | Note that the upper bound on the loop is *exclusive*. A while loop is slightly more involved, due to the need to propagate two items of state (the counter and the accumulator). However, the while loop can be used when the number of iterations is not known statically. 76 | ```{python} 77 | def logFactW(n): 78 | def cont(state): 79 | [i, acc] = state 80 | return i <= n 81 | def advance(state): 82 | [i, acc] = state 83 | return [i + 1, acc + jnp.log(i)] 84 | return float(jl.while_loop(cont, advance, [1, 0.0])[1]) 85 | 86 | logFactW(3) 87 | ``` 88 | ```{python} 89 | logFactW(100000) 90 | ``` 91 | -------------------------------------------------------------------------------- /Scala/docs/Example.md: -------------------------------------------------------------------------------- 1 | # Running example 2 | 3 | ## ML for a logistic regression model using gradient ascent 4 | 5 | ### An interactive session 6 | 7 | Here we will present an interactive Scala session for conducting maximum likelihood inference for our simple logistic regression model using a very naive gradient ascent algorithm. We will need to use the [Breeze](https://github.com/scalanlp/breeze/) library for numerical linear algebra, and we will also use [Smile](https://haifengl.github.io/) for a data frame object and CSV parser. The [sbt](https://www.scala-sbt.org/) project in the [Scala](../) directory has these dependencies (and a few others) preconfigured, so running `sbt console` from the Scala directory will give a REPL into which the following commands can be pasted. 8 | 9 | We start with a few imports and a shorthand type declaration. 10 | ```scala mdoc 11 | import breeze.linalg.* 12 | import breeze.numerics.* 13 | import smile.data.pimpDataFrame 14 | import annotation.tailrec 15 | 16 | type DVD = DenseVector[Double] 17 | ``` 18 | 19 | Next we use Smile to read and process the data. 20 | ```scala mdoc 21 | val df = smile.read.csv("../pima.data", delimiter=" ", header=false) 22 | val y = DenseVector(df.select("V8"). 23 | map(_(0).asInstanceOf[String]). 24 | map(s => if (s == "Yes") 1.0 else 0.0).toArray) 25 | val x = DenseMatrix(df.drop("V8").toMatrix.toArray:_*) 26 | val ones = DenseVector.ones[Double](x.rows) 27 | val X = DenseMatrix.horzcat(ones.toDenseMatrix.t, x) 28 | val p = X.cols 29 | ``` 30 | 31 | Now `y` is our response variable and `X` is our covariate matrix, including an intercept column. Now we define the likelihood and some functions for gradient ascent. Note that the `ascend` function contains a tail-recursive function `go` that avoids the need for mutable variables and a "while loop", but is effectively equivalent. 32 | ```scala mdoc 33 | def ll(beta: DVD): Double = 34 | sum(-log(ones + exp(-1.0*(2.0*y - ones)*:*(X * beta)))) 35 | 36 | def gll(beta: DVD): DVD = 37 | (X.t)*(y - ones/:/(ones + exp(-X*beta))) 38 | 39 | def oneStep(learningRate: Double)(b0: DVD): DVD = 40 | b0 + learningRate*gll(b0) 41 | 42 | def ascend(step: DVD => DVD, init: DVD, maxIts: Int = 10000, 43 | tol: Double = 1e-8, verb: Boolean = true): DVD = 44 | @tailrec def go(b0: DVD, ll0: Double, itsLeft: Int): DVD = { 45 | if (verb) 46 | println(s"$itsLeft : $ll0") 47 | val b1 = step(b0) 48 | val ll1 = ll(b1) 49 | if ((math.abs(ll0 - ll1) < tol)|(itsLeft < 1)) 50 | b1 51 | else 52 | go(b1, ll1, itsLeft - 1) 53 | } 54 | go(init, ll(init), maxIts) 55 | ``` 56 | 57 | Now let's run the gradient ascent algorithm, starting from a reasonable initial guess, since naive gradient ascent is terrible. 58 | ```scala mdoc 59 | val init = DenseVector(-9.8, 0.1, 0, 0, 0, 0, 1.8, 0) 60 | ll(init) 61 | val opt = ascend(oneStep(1e-6), init, verb=false) 62 | ll(opt) 63 | ``` 64 | Note how much the likelihood has improved relative to our initial guess. 65 | 66 | 67 | ### A standalone application 68 | 69 | We can package the code above into a standalone Scala application, and this is available in the file [ML-GA.scala](../src/main/scala/ML-GA.scala). We can compile and run this application by typing `sbt run` from the Scala directory. Note that you must run `sbt` from the directory containing the [build.sbt](../build.sbt) file, not from the subdirectory containing the actual source code files. Make sure that you can run the application before proceding to the exercises. 70 | 71 | ### Hands-on exercise 72 | 73 | Do some or all these exercises (or go back to previous exercises) as your interests dictate and time permits. 74 | 75 | * Try manually tweaking the initial guess, the learning rate, the convergence tolerance and the maximum number of iterations to see how robust (or otherwise) this naive gradient ascent algorithm is to these tuning parameters. 76 | * Improve on the naive ascent algorithm somewhow, perhaps by implementing [line search](https://en.wikipedia.org/wiki/Line_search) for choosing the step size. 77 | * Note that Breeze has a bunch of utilities for optimisation, in the [breeze.optimise](https://github.com/scalanlp/breeze/wiki/Quickstart#breezeoptimize) package. See if you can figure out how to use them by messing around in the REPL. Then see if you can adapt the running example to use one of the methods. The [ScalaDoc](http://www.scalanlp.org/api/breeze/#breeze.optimize.package) may be useful. 78 | 79 | -------------------------------------------------------------------------------- /Scala/docs/ScalaHO.md: -------------------------------------------------------------------------------- 1 | # Scala hands-on 2 | 3 | ## Scastie 4 | 5 | You can start experimenting with Scala in the browser without installing any software. Go to [Scastie](https://scastie.scala-lang.org/), and start entering code snippets. Make sure that you have "Worksheet" mode selected (green dot next to "Worksheet"). Note that you can configure Scastie to use different versions of Scala, and to add dependencies on third party libraries. Figuring out how to do this is left as an exercise. 6 | 7 | Paste the following into the Scastie worksheet: 8 | ```scala 9 | def logFactTR(n: Int, acc: Double = 0.0): Double = 10 | if (n <= 1) acc else 11 | logFactTR(n - 1, math.log(n) + acc) 12 | 13 | logFactTR(100000) 14 | ``` 15 | and then click on "Run". 16 | 17 | In the crash course we glossed over the different styles of Scala syntax. Prior to Scala 3, Scala required braces to delimit blocks, like C, Java, R, etc. In Scala 3, braceless syntax (with significant whitespace) is optionally allowed (like Python and Haskell). So we can write functions like: 18 | ```scala 19 | def logFactTRB(n: Int, acc: Double = 0.0): Double = { 20 | if (n <= 1) acc else 21 | logFactTRB(n - 1, math.log(n) + acc) 22 | } 23 | 24 | logFactTRB(100000) 25 | ``` 26 | I prefer the new braceless syntax, but a lot of existing tooling works better with the old braced syntax. It's really just a matter of preference, but it's best to stick with one style or the other as far as possible. 27 | 28 | ## scala-cli 29 | 30 | If you have `scala-cli` installed, you can use this to build and run small (typically one file) Scala applications. Required dependencies can be specified in the header of the script. 31 | 32 | [logFact.scala](../cli/logFact.scala) is a simple stand-alone application to compute a log-factorial. It can be run with 33 | ```bash 34 | scala-cli logFact.scala -- 10000 35 | ``` 36 | 37 | [ML-GA.scala](../cli/ML-GA.scala) is a gradient-ascent example (to be discussed imminently). Don't worry about the details of this program yet, but note the external dependencies specified at the start of the script. It can be run with 38 | ```bash 39 | scala-cli ML-GA.scala 40 | ``` 41 | 42 | Make sure that you are able to run these scripts before proceeding. 43 | 44 | `scala-cli` can also be used to build multi-file applications, but for any non-trivial project, it is probably better to use `sbt`. 45 | 46 | ## sbt 47 | 48 | If you have `sbt` installed, you can use this to create, build and run Scala projects. 49 | 50 | ### Using an existing project 51 | 52 | There is an `sbt` project in the [Scala](../) directory of this code repository. This is indicated by the presence of a [build.sbt](../build.sbt) file, which contains most of the configuration info related to the project. 53 | 54 | From the directory containing the build file, running `sbt console` will give a Scala REPL. 55 | 56 | Try pasting one of the above snippets into the REPL and make sure it works. 57 | 58 | Exit the console (Ctrl-D), and type `sbt run` to build and run the application assocaited with the project. We will study this application later. 59 | 60 | Note that `sbt` is really designed to be run interactively. So, just typing `sbt` will lead to an `sbt` prompt (which is different from a Scala REPL prompt). Then, typing `console` from the `sbt` prompt will give a Scala REPL, `compile` will compile the project, `run` will compile and run the project, `test` will run any test suites, etc. Also note that `sbt` can "watch" the project directory for file changes. Use a `~` as a prefix to watch for changes. eg. typing `~compile` will watch for file changes and recompile the application (incrementally) whenever a file is saved. 61 | 62 | ### Creating a new project 63 | 64 | `sbt` expects projects to be configured in a particular way, with certain files in certain directories. `sbt` can itself be used to construct project templates with the correct structure. Different templates are suitable for different kinds of projects. eg. 65 | ```bash 66 | sbt new darrenjw/breeze.g8 67 | ``` 68 | will create a new project in a new directory inside the current directory, using the template provided in the `breeze.g8` repo in the GitHub account of `darrenjw`. This is a template for a project with fairly minimal dependencies on the [Breeze](https://github.com/scalanlp/breeze) library for numerical computing. I have some other templates. eg. `fps.g8` contains dependencies on libraries commonly used for pure functional programming in Scala, `scala-glm.g8` has a dependency on my regression modelling library, etc. 69 | 70 | Create a new `sbt` project using one of the templates mentioned, and explore it. 71 | 72 | Create an `sbt` application that prints to the console the log-factorial of 10000. 73 | 74 | -------------------------------------------------------------------------------- /Scala/md/ScalaHO.md: -------------------------------------------------------------------------------- 1 | # Scala hands-on 2 | 3 | ## Scastie 4 | 5 | You can start experimenting with Scala in the browser without installing any software. Go to [Scastie](https://scastie.scala-lang.org/), and start entering code snippets. Make sure that you have "Worksheet" mode selected (green dot next to "Worksheet"). Note that you can configure Scastie to use different versions of Scala, and to add dependencies on third party libraries. Figuring out how to do this is left as an exercise. 6 | 7 | Paste the following into the Scastie worksheet: 8 | ```scala 9 | def logFactTR(n: Int, acc: Double = 0.0): Double = 10 | if (n <= 1) acc else 11 | logFactTR(n - 1, math.log(n) + acc) 12 | 13 | logFactTR(100000) 14 | ``` 15 | and then click on "Run". 16 | 17 | In the crash course we glossed over the different styles of Scala syntax. Prior to Scala 3, Scala required braces to delimit blocks, like C, Java, R, etc. In Scala 3, braceless syntax (with significant whitespace) is optionally allowed (like Python and Haskell). So we can write functions like: 18 | ```scala 19 | def logFactTRB(n: Int, acc: Double = 0.0): Double = { 20 | if (n <= 1) acc else 21 | logFactTRB(n - 1, math.log(n) + acc) 22 | } 23 | 24 | logFactTRB(100000) 25 | ``` 26 | I prefer the new braceless syntax, but a lot of existing tooling works better with the old braced syntax. It's really just a matter of preference, but it's best to stick with one style or the other as far as possible. 27 | 28 | ## scala-cli 29 | 30 | If you have `scala-cli` installed, you can use this to build and run small (typically one file) Scala applications. Required dependencies can be specified in the header of the script. 31 | 32 | [logFact.scala](../cli/logFact.scala) is a simple stand-alone application to compute a log-factorial. It can be run with 33 | ```bash 34 | scala-cli logFact.scala -- 10000 35 | ``` 36 | 37 | [ML-GA.scala](../cli/ML-GA.scala) is a gradient-ascent example (to be discussed imminently). Don't worry about the details of this program yet, but note the external dependencies specified at the start of the script. It can be run with 38 | ```bash 39 | scala-cli ML-GA.scala 40 | ``` 41 | 42 | Make sure that you are able to run these scripts before proceeding. 43 | 44 | `scala-cli` can also be used to build multi-file applications, but for any non-trivial project, it is probably better to use `sbt`. 45 | 46 | ## sbt 47 | 48 | If you have `sbt` installed, you can use this to create, build and run Scala projects. 49 | 50 | ### Using an existing project 51 | 52 | There is an `sbt` project in the [Scala](../) directory of this code repository. This is indicated by the presence of a [build.sbt](../build.sbt) file, which contains most of the configuration info related to the project. 53 | 54 | From the directory containing the build file, running `sbt console` will give a Scala REPL. 55 | 56 | Try pasting one of the above snippets into the REPL and make sure it works. 57 | 58 | Exit the console (Ctrl-D), and type `sbt run` to build and run the application assocaited with the project. We will study this application later. 59 | 60 | Note that `sbt` is really designed to be run interactively. So, just typing `sbt` will lead to an `sbt` prompt (which is different from a Scala REPL prompt). Then, typing `console` from the `sbt` prompt will give a Scala REPL, `compile` will compile the project, `run` will compile and run the project, `test` will run any test suites, etc. Also note that `sbt` can "watch" the project directory for file changes. Use a `~` as a prefix to watch for changes. eg. typing `~compile` will watch for file changes and recompile the application (incrementally) whenever a file is saved. 61 | 62 | ### Creating a new project 63 | 64 | `sbt` expects projects to be configured in a particular way, with certain files in certain directories. `sbt` can itself be used to construct project templates with the correct structure. Different templates are suitable for different kinds of projects. eg. 65 | ```bash 66 | sbt new darrenjw/breeze.g8 67 | ``` 68 | will create a new project in a new directory inside the current directory, using the template provided in the `breeze.g8` repo in the GitHub account of `darrenjw`. This is a template for a project with fairly minimal dependencies on the [Breeze](https://github.com/scalanlp/breeze) library for numerical computing. I have some other templates. eg. `fps.g8` contains dependencies on libraries commonly used for pure functional programming in Scala, `scala-glm.g8` has a dependency on my regression modelling library, etc. 69 | 70 | Create a new `sbt` project using one of the templates mentioned, and explore it. 71 | 72 | Create an `sbt` application that prints to the console the log-factorial of 10000. 73 | 74 | -------------------------------------------------------------------------------- /JAX/Random.md: -------------------------------------------------------------------------------- 1 | JAX splittable random numbers hands on 2 | ================ 3 | Darren Wilkinson 4 | 5 | ## Basics 6 | 7 | Call Python REPL (`python` or `python3`) and start messing around with 8 | splittable random numbers in JAX. 9 | 10 | ``` python 11 | import numpy as np 12 | import scipy as sp 13 | import scipy.stats 14 | 15 | import jax 16 | from jax import grad, jit 17 | import jax.numpy as jnp 18 | import jax.scipy as jsp 19 | import jax.lax as jl 20 | import jax.random as jr 21 | 22 | k0 = jr.PRNGKey(42) 23 | 24 | [k1, k2] = jr.split(k0) 25 | 26 | k0 27 | ``` 28 | 29 | No GPU/TPU found, falling back to CPU. (Set TF_CPP_MIN_LOG_LEVEL=0 and rerun for more info.) 30 | 31 | Array([ 0, 42], dtype=uint32) 32 | 33 | ``` python 34 | k1 35 | ``` 36 | 37 | Array([2465931498, 3679230171], dtype=uint32) 38 | 39 | ``` python 40 | k2 41 | ``` 42 | 43 | Array([255383827, 267815257], dtype=uint32) 44 | 45 | ``` python 46 | jr.uniform(k0) 47 | ``` 48 | 49 | Array(0.42672753, dtype=float32) 50 | 51 | ``` python 52 | jr.uniform(k0) 53 | ``` 54 | 55 | Array(0.42672753, dtype=float32) 56 | 57 | ``` python 58 | jr.uniform(k0) 59 | ``` 60 | 61 | Array(0.42672753, dtype=float32) 62 | 63 | ``` python 64 | jr.uniform(k1) 65 | ``` 66 | 67 | Array(0.5548415, dtype=float32) 68 | 69 | ``` python 70 | jr.uniform(k2) 71 | ``` 72 | 73 | Array(0.91457367, dtype=float32) 74 | 75 | Note that we don’t just have to split the key into two. 76 | 77 | ``` python 78 | [k3, k4, k5] = jr.split(k2, 3) 79 | 80 | k3 81 | ``` 82 | 83 | Array([1245536116, 3554031723], dtype=uint32) 84 | 85 | ``` python 86 | k4 87 | ``` 88 | 89 | Array([2395711293, 1211085833], dtype=uint32) 90 | 91 | ``` python 92 | k5 93 | ``` 94 | 95 | Array([3735526216, 3692299482], dtype=uint32) 96 | 97 | ``` python 98 | keys = jr.split(k5, 8) 99 | 100 | keys 101 | ``` 102 | 103 | Array([[ 467138136, 99877137], 104 | [ 899471984, 3583342607], 105 | [3016824453, 9292390], 106 | [4251814516, 3897073703], 107 | [2111337182, 1349560346], 108 | [ 969359427, 2464285400], 109 | [1422039866, 4206098692], 110 | [1762018044, 1626056740]], dtype=uint32) 111 | 112 | It can sometimes be convenient to split a key into an *array* of keys 113 | and then *map* or *fold* a random function over the array. 114 | 115 | ``` python 116 | jnp.sum(jl.map(jr.uniform, keys)) 117 | ``` 118 | 119 | Array(2.272056, dtype=float32) 120 | 121 | If we really just want to “advance” the key, we can do that too. 122 | 123 | ``` python 124 | jr.split(k5, 1) 125 | ``` 126 | 127 | Array([[4045048393, 3663185921]], dtype=uint32) 128 | 129 | ## Probability distributions 130 | 131 | `jr.uniform` is used to generate a $U(0,1)$, and `jr.normal` generates 132 | from a standard normal, $N(0,1)$. 133 | 134 | ``` python 135 | jl.map(jr.normal, keys) 136 | ``` 137 | 138 | Array([ 0.4662092 , -1.665825 , -0.6956026 , -1.4537567 , -0.16085608, 139 | 0.43099454, -1.9535689 , -1.2808337 ], dtype=float32) 140 | 141 | There are other commonly encountered random variables available for 142 | sampling. 143 | 144 | ``` python 145 | jr.exponential(k5) 146 | ``` 147 | 148 | Array(0.4373902, dtype=float32) 149 | 150 | ``` python 151 | jr.poisson(k5, 10.0) 152 | ``` 153 | 154 | Array(13, dtype=int32) 155 | 156 | ``` python 157 | jr.poisson(k5, 10.0, [10]) 158 | ``` 159 | 160 | Array([ 8, 8, 10, 8, 12, 11, 17, 10, 11, 8], dtype=int32) 161 | 162 | ## Random functions 163 | 164 | Suppose that you want to define your own random function. Here we will 165 | define our own function for sampling exponentials. 166 | 167 | ``` python 168 | def rexp(rate, key): 169 | return jnp.log1p(-jr.uniform(key)) / (-rate) 170 | ``` 171 | 172 | Notice how we have made the key the final input parameter. This is 173 | because of currying. We can call the function directly: 174 | 175 | ``` python 176 | rexp(10.0, k5) 177 | ``` 178 | 179 | Array(0.04373902, dtype=float32) 180 | 181 | But we can also create a particular random variable: 182 | 183 | ``` python 184 | from functools import partial 185 | 186 | my_rexp = partial(rexp, 20.0) 187 | ``` 188 | 189 | and then use this partially applied function with multiple keys. 190 | 191 | ``` python 192 | jl.map(my_rexp, keys) 193 | ``` 194 | 195 | Array([0.05688852, 0.0024529 , 0.01394199, 0.00379046, 0.02864421, 196 | 0.05494518, 0.00128518, 0.00527503], dtype=float32) 197 | 198 | This is why you might want to make the key the final input parameter. 199 | 200 | ## Exercise 201 | 202 | Write a function to simulate a 1d [random 203 | walk](https://en.wikipedia.org/wiki/Random_walk). 204 | -------------------------------------------------------------------------------- /Scala/docs/Parallel.md: -------------------------------------------------------------------------------- 1 | # Scala parallel programming crash course 2 | 3 | ## Parallel collections 4 | 5 | This simplest (but by no means the only) way to get started with parallel programming in Scala is using [parallel collections](https://docs.scala-lang.org/overviews/parallel-collections/overview.html). 6 | 7 | Let's create some random data: 8 | ```scala mdoc 9 | val rng = scala.util.Random(42) 10 | val v = Vector.fill(10)(rng.nextGaussian) 11 | ``` 12 | and pretend that we don't know the true mean. We can define a simple log likelihood function as 13 | ```scala mdoc 14 | def ll0(mu: Double)(x: Double): Double = -(x - mu)*(x - mu)/2.0 15 | ``` 16 | But now to mimic the computation of a very expensive likelihood, we can artificially slow it down. 17 | ```scala mdoc 18 | def ll(mu: Double)(x: Double): Double = 19 | Thread.sleep(500) 20 | ll0(mu)(x) 21 | ``` 22 | So now the likelihood evaluation associated with every observation will take at least half a second. 23 | 24 | We can evaluate the likelihood for our vector of observations (at 0) as follows: 25 | ```scala mdoc 26 | (v map ll(0.0)) reduce (_+_) 27 | ``` 28 | and this will take at least 5 seconds. However, if we convert the vector to a parallel collection 29 | ```scala mdoc 30 | import scala.collection.parallel.CollectionConverters.* 31 | val vp = v.par // convert v to a ParVector 32 | (vp map ll(0.0)) reduce (_+_) 33 | ``` 34 | the likelihood evaluation will be much quicker, since the `map` operation parallelises "perfectly", and the `reduce` operation can be evaluated in parallel with tree reduction. Scala automatically implements these parallelisations on parallel collections. Note that no change is required to the code - you just switch a regular (serial) collection to a parallel collection, and the library takes care of the rest. 35 | 36 | 37 | ## Futures 38 | 39 | Many, if not most, parallel computation operations that arise in statistical computing and machine learning can be formulated in terms of operations on parallel collections. However, we sometimes need more control over the way that computations are evaluated and combined in parallel. A standard approach to this problem in many functional programming languages is to use some kind of `Future` monad. Here we will illustrate how to use Scala's built-in `Future` to do the same parallel computation as above, but without relying on parallel collections. 40 | 41 | We start with a few imports. 42 | ```scala mdoc 43 | import cats.* 44 | import cats.syntax.all.* 45 | import scala.concurrent.* 46 | import scala.util.Success 47 | import ExecutionContext.Implicits.global 48 | import scala.concurrent.duration.* 49 | ``` 50 | A `Future` evaluates a computation on another thread while returning immediately with a "wrapper" that will eventually contain the desired value (when the computation is finished). So while 51 | ```scala mdoc 52 | ll(0.0)(1.0) 53 | ``` 54 | will take at least half a second to return a value, 55 | ```scala mdoc 56 | Future(ll(0.0)(1.0)) 57 | ``` 58 | will return immediately, but the return type will be `Future[Double]`, not `Double`. The `Future` object has many methods, including those to map another computation over the result, and to ask whether the computation is completed. `Futures` make it easy to run many computations concurrently. For example 59 | ```scala mdoc 60 | val vf = v map (x => Future(ll(0.0)(x))) 61 | ``` 62 | will return immediately, with type `Vector[Future[Double]]`. Each of the `Futures` inside the vector will run concurrently. We can use `sequence` to change the `Vector[Future[Double]]` into a `Future[Vector[Double]]` and then `map` a `reduce` operation to get a `Future[Double]`. We can then extract the value we want from this. 63 | ```scala mdoc 64 | val lf = vf.sequence map (_ reduce (_+_)) 65 | val l = Await.result(lf, 2.seconds) 66 | println(l) 67 | ``` 68 | Crucially, this runs much faster than the corresponding sequential code. However, it's still not as good as using parallel collections, since `map` and `reduce` are still the standard sequential versions, which are still $\mathcal{O}(n)$ operations. We could write our own `parMap` and `parReduce` methods, which use binary splitting to evaluate in parallel, but this is a bit beyond the scope of this very short course. 69 | 70 | ## Effects 71 | 72 | Futures are a powerful and flexible way to construct parallel and concurrent applications. However, they aren't a perfect fit to a pure functional approach to programming. The fact that futures "fire" as soon as they are created means that they have a *side-effect* (such as creating a thread), and that is potentially problematic. People have developed more principled functional effects systems for Scala, such as [Cats effect](https://typelevel.org/cats-effect/) with its `IO` monad. These provide better mechanisms for parallel and concurrent programming in Scala. They are, however, (well) beyond the scope of this course. 73 | 74 | -------------------------------------------------------------------------------- /Scala/md/ScalaCC.md: -------------------------------------------------------------------------------- 1 | # Scala crash course 2 | 3 | ## Immutability 4 | 5 | Scala is not a *pure* functional language, so it is *possible* to use *mutable variables*. 6 | 7 | ```scala 8 | var v = 5 9 | // v: Int = 5 10 | v = v + 1 11 | v 12 | // res1: Int = 6 13 | ``` 14 | But use of these is strongly discouraged in favour of *immutable values*. 15 | ```scala 16 | val x = 5 17 | x = x + 1 18 | // error: 19 | // Reassignment to val x 20 | // def linFun(m: Double, c: Double)(x: Double): Double = 21 | // ^ 22 | ``` 23 | 24 | ## Immutable collections 25 | 26 | We often want to work with collections of values of given type, and these are also immutable. 27 | ```scala 28 | val vi = Vector(2, 4, 6, 3) 29 | // vi: Vector[Int] = Vector(2, 4, 6, 3) 30 | // now update value at position 2 to be 7 31 | val viu = vi.updated(2, 7) 32 | // viu: Vector[Int] = Vector(2, 4, 7, 3) 33 | // original vector unchanged 34 | vi 35 | // res3: Vector[Int] = Vector(2, 4, 6, 3) 36 | viu 37 | // res4: Vector[Int] = Vector(2, 4, 7, 3) 38 | ``` 39 | The new vector is *effectively* an updated copy, but that doesn't mean that all of the data has been copied. We can point to data in the original vector safely, since it is immutable. 40 | 41 | ## Manipulating collections 42 | 43 | Since collections are *functors*, we can *map* them. 44 | ```scala 45 | vi.map(x => x*2) 46 | // res5: Vector[Int] = Vector(4, 8, 12, 6) 47 | vi map (x => x*2) 48 | // res6: Vector[Int] = Vector(4, 8, 12, 6) 49 | vi map (_*2) 50 | // res7: Vector[Int] = Vector(4, 8, 12, 6) 51 | ``` 52 | We can also reduce them. 53 | ```scala 54 | vi.foldLeft(0)(_+_) 55 | // res8: Int = 15 56 | vi.reduce(_+_) 57 | // res9: Int = 15 58 | vi.sum 59 | // res10: Int = 15 60 | ``` 61 | Note that `map` and `reduce` are higher-order functions (HoFs), since they accept a function as an argument. 62 | 63 | ## Writing functions 64 | 65 | Here's a simple definition of a log-factorial function. 66 | ```scala 67 | def logFact(n: Int): Double = 68 | (1 to n).map(_.toDouble).map(math.log).sum 69 | 70 | logFact(3) 71 | // res11: Double = 1.791759469228055 72 | logFact(10) 73 | // res12: Double = 15.104412573075518 74 | logFact(100000) 75 | // res13: Double = 1051299.2218991187 76 | ``` 77 | This requires creating a collection of size `n`, which might not be desirable. 78 | 79 | We will use the log-factorial function to explore the use of recursion instead of more imperative looping constructs. 80 | 81 | ## Recursive functions 82 | 83 | ```scala 84 | def logFactR(n: Int): Double = 85 | if (n <= 1) 0.0 else 86 | math.log(n) + logFactR(n - 1) 87 | 88 | logFactR(3) 89 | // res14: Double = 1.791759469228055 90 | logFactR(10) 91 | // res15: Double = 15.104412573075518 92 | ``` 93 | This function is recursive, but not tail-recursive since the result of the recursive call (`logFactR(n - 1)`) is modified before the correct value is returned. So, although it doesn't consume heap space, it consumes stack space, which is worse. That is, this function will stack-overflow if evaluated at a large enough input value. 94 | ```scala 95 | logFactR(100000) 96 | // java.lang.StackOverflowError 97 | ``` 98 | 99 | ## Tail-recursive functions 100 | 101 | ```scala 102 | @annotation.tailrec 103 | final def logFactTR(n: Int, acc: Double = 0.0): Double = 104 | if (n <= 1) acc else 105 | logFactTR(n - 1, math.log(n) + acc) 106 | 107 | logFactTR(3) 108 | // res16: Double = 1.791759469228055 109 | logFactTR(10) 110 | // res17: Double = 15.104412573075514 111 | logFactTR(100000) 112 | // res18: Double = 1051299.221899134 113 | ``` 114 | This version consumes neither heap nor stack space. The `tailrec` annotation is optional, but is useful, since it forces the compiler to flag an error if there is some reason why the tail call elimination can not be performed (eg. here, the method needed to be decalared `final` so it could not be over-ridden). 115 | 116 | ## Helper functions 117 | 118 | The previous example made use of the fact that Scala has optional arguments with default values. Even if this wasn't the case, we could acheive the same thing by embedding the two-argument version as a private function inside the one-argument version. 119 | ```scala 120 | def logFactTRH(n: Int): Double = 121 | def go(n: Int, acc: Double): Double = 122 | if (n <= 1) acc else 123 | go(n - 1, math.log(n) + acc) 124 | go(n, 0.0) 125 | 126 | logFactTRH(3) 127 | // res19: Double = 1.791759469228055 128 | logFactTRH(10) 129 | // res20: Double = 15.104412573075514 130 | logFactTRH(100000) 131 | // res21: Double = 1051299.221899134 132 | ``` 133 | 134 | ## Curried functions 135 | 136 | Sometimes we want to partially apply a function by providing some of the arguments. We can flag this by grouping them. 137 | ```scala 138 | def linFun(m: Double, c: Double)(x: Double): Double = 139 | m*x + c 140 | 141 | val f = linFun(2, 3) 142 | // f: Function1[Double, Double] = repl.MdocSession$MdocApp$$Lambda$8192/0x0000000802165e08@36ff03be 143 | 144 | f(0) 145 | // res22: Double = 3.0 146 | f(1) 147 | // res23: Double = 5.0 148 | f(2) 149 | // res24: Double = 7.0 150 | ``` 151 | Since the output of the partial call is a function, this is another example of a HoF. 152 | -------------------------------------------------------------------------------- /JAX/Readme.md: -------------------------------------------------------------------------------- 1 | JAX for Python crash course 2 | ================ 3 | Darren Wilkinson 4 | 5 | JAX is a pure functional language embedded in python, but designed to 6 | feel as much like python as practical. From a `python` prompt, first do 7 | some imports. 8 | 9 | ``` python 10 | import os 11 | import pandas as pd 12 | import numpy as np 13 | import scipy as sp 14 | 15 | import jax 16 | from jax import grad, jit 17 | import jax.numpy as jnp 18 | import jax.scipy as jsp 19 | import jax.lax as jl 20 | ``` 21 | 22 | If any of these imports fail, you probably don’t have JAX installed 23 | correctly (in your current environment). For most numpy and scipy 24 | functions, there is a JAX equivalent, so with the above imports, 25 | translating from regular python to JAX often involves replacing a call 26 | to `np.X` (for some `X`) with a call to `jnp.X`, and `sp.X` with 27 | `jsp.X`. But there are other issues to confront, due to the fact that 28 | JAX is a pure functional language and python most definitely isn’t! 29 | 30 | ## Immutable collections 31 | 32 | ``` python 33 | v = jnp.array([2, 4, 6, 3]).astype(jnp.float32) 34 | ``` 35 | 36 | No GPU/TPU found, falling back to CPU. (Set TF_CPP_MIN_LOG_LEVEL=0 and rerun for more info.) 37 | 38 | Note that the type of the array has been set to `float32`, since these 39 | are fast and efficient, especially on GPUs. JAX arrays are immutable. 40 | 41 | ``` python 42 | v[2] 43 | vu = v.at[2].set(7) 44 | vu 45 | ``` 46 | 47 | Array([2., 4., 7., 3.], dtype=float32) 48 | 49 | ``` python 50 | v 51 | ``` 52 | 53 | Array([2., 4., 6., 3.], dtype=float32) 54 | 55 | We can map JAX arrays. 56 | 57 | ``` python 58 | jl.map(lambda x: 2*x, v) 59 | ``` 60 | 61 | Array([ 4., 8., 12., 6.], dtype=float32) 62 | 63 | Mapping can be parallelised, and JAX will do this automatically. We can 64 | also reduce them. 65 | 66 | ``` python 67 | jl.reduce(v, 0.0, lambda x,y: x+y, [0]) 68 | ``` 69 | 70 | Array(15., dtype=float32) 71 | 72 | ``` python 73 | jnp.sum(v) 74 | ``` 75 | 76 | Array(15., dtype=float32) 77 | 78 | The reduction must be *monoidal* (the operation must be associative, and 79 | the initial value must be an identity wrt that operation), or the result 80 | is undefined. Since the reduction is monoidal, it can be parallised via 81 | tree reduction, and JAX will do this automatically. 82 | 83 | ## Functions 84 | 85 | Functions are written like regular python functions. But if they are to 86 | be part of a hot loop, they can be JIT-compiled. 87 | 88 | ``` python 89 | @jit 90 | def sumArray1d(v): 91 | return jl.reduce(v, 0.0, lambda x,y: x+y, [0]) 92 | 93 | float(sumArray1d(v)) 94 | ``` 95 | 96 | 15.0 97 | 98 | Note that you can’t use `float` inside a JIT’d JAX function, since 99 | `float` is a python function, not a JAX function. 100 | 101 | We have seen that functional languages often exploit recursion, either 102 | explicitly or implicitly, for the implementation of “looping” 103 | constructs. However, allowing general recursion turns out to be 104 | problematic for reverse-mode automatic differentiation. Consequently, 105 | some differentiable functional languages (such as JAX and Dex) disallow 106 | recursive functions. But without any mutable variables or recursion, how 107 | can we loop?! In this case the language must provide us with some 108 | built-in constructs. In JAX, the two most commonly used constructs (in 109 | addition to 110 | [`map`](https://jax.readthedocs.io/en/latest/_autosummary/jax.lax.map.html), 111 | [`reduce`](https://jax.readthedocs.io/en/latest/_autosummary/jax.lax.reduce.html) 112 | and 113 | [`scan`](https://jax.readthedocs.io/en/latest/_autosummary/jax.lax.scan.html)) 114 | are 115 | [`jax.lax.while_loop`](https://jax.readthedocs.io/en/latest/_autosummary/jax.lax.while_loop.html) 116 | and 117 | [`jax.lax.fori_loop`](https://jax.readthedocs.io/en/latest/_autosummary/jax.lax.fori_loop.html). 118 | Note that you cannot reverse-mode differentiate through a `while_loop` 119 | (this is problematic for the same reason that recursive functions are 120 | problematic - you cannot know statically what the memory requirements 121 | will be). A for loop is relatively straightforward. 122 | 123 | ``` python 124 | def logFactF(n): 125 | return float(jl.fori_loop(1, n+1, 126 | lambda i,acc: acc + jnp.log(i), 0.0)) 127 | 128 | logFactF(3) 129 | ``` 130 | 131 | 1.7917594909667969 132 | 133 | ``` python 134 | logFactF(100000) 135 | ``` 136 | 137 | 1051299.625 138 | 139 | Note that the upper bound on the loop is *exclusive*. A while loop is 140 | slightly more involved, due to the need to propagate two items of state 141 | (the counter and the accumulator). However, the while loop can be used 142 | when the number of iterations is not known statically. 143 | 144 | ``` python 145 | def logFactW(n): 146 | def cont(state): 147 | [i, acc] = state 148 | return i <= n 149 | def advance(state): 150 | [i, acc] = state 151 | return [i + 1, acc + jnp.log(i)] 152 | return float(jl.while_loop(cont, advance, [1, 0.0])[1]) 153 | 154 | logFactW(3) 155 | ``` 156 | 157 | 1.7917594909667969 158 | 159 | ``` python 160 | logFactW(100000) 161 | ``` 162 | 163 | 1051299.625 164 | -------------------------------------------------------------------------------- /Scala/md/Parallel.md: -------------------------------------------------------------------------------- 1 | # Scala parallel programming crash course 2 | 3 | ## Parallel collections 4 | 5 | This simplest (but by no means the only) way to get started with parallel programming in Scala is using [parallel collections](https://docs.scala-lang.org/overviews/parallel-collections/overview.html). 6 | 7 | Let's create some random data: 8 | ```scala 9 | val rng = scala.util.Random(42) 10 | // rng: Random = scala.util.Random@4fbdbb7 11 | val v = Vector.fill(10)(rng.nextGaussian) 12 | // v: Vector[Double] = Vector( 13 | // 1.1419053154730547, 14 | // 0.9194079489827879, 15 | // -0.9498666368908959, 16 | // -1.1069902863993377, 17 | // 0.2809776380727795, 18 | // 0.6846227956326554, 19 | // -0.8172214073987268, 20 | // -1.3966434026780434, 21 | // -0.19094451307087512, 22 | // 1.4862133923906502 23 | // ) 24 | ``` 25 | and pretend that we don't know the true mean. We can define a simple log likelihood function as 26 | ```scala 27 | def ll0(mu: Double)(x: Double): Double = -(x - mu)*(x - mu)/2.0 28 | ``` 29 | But now to mimic the computation of a very expensive likelihood, we can artificially slow it down. 30 | ```scala 31 | def ll(mu: Double)(x: Double): Double = 32 | Thread.sleep(500) 33 | ll0(mu)(x) 34 | ``` 35 | So now the likelihood evaluation associated with every observation will take at least half a second. 36 | 37 | We can evaluate the likelihood for our vector of observations (at 0) as follows: 38 | ```scala 39 | (v map ll(0.0)) reduce (_+_) 40 | // res0: Double = -4.844171665682075 41 | ``` 42 | and this will take at least 5 seconds. However, if we convert the vector to a parallel collection 43 | ```scala 44 | import scala.collection.parallel.CollectionConverters.* 45 | val vp = v.par // convert v to a ParVector 46 | // vp: ParVector[Double] = ParVector(1.1419053154730547, 0.9194079489827879, -0.9498666368908959, -1.1069902863993377, 0.2809776380727795, 0.6846227956326554, -0.8172214073987268, -1.3966434026780434, -0.19094451307087512, 1.4862133923906502) // convert v to a ParVector 47 | (vp map ll(0.0)) reduce (_+_) 48 | // res1: Double = -4.844171665682075 49 | ``` 50 | the likelihood evaluation will be much quicker, since the `map` operation parallelises "perfectly", and the `reduce` operation can be evaluated in parallel with tree reduction. Scala automatically implements these parallelisations on parallel collections. Note that no change is required to the code - you just switch a regular (serial) collection to a parallel collection, and the library takes care of the rest. 51 | 52 | 53 | ## Futures 54 | 55 | Many, if not most, parallel computation operations that arise in statistical computing and machine learning can be formulated in terms of operations on parallel collections. However, we sometimes need more control over the way that computations are evaluated and combined in parallel. A standard approach to this problem in many functional programming languages is to use some kind of `Future` monad. Here we will illustrate how to use Scala's built-in `Future` to do the same parallel computation as above, but without relying on parallel collections. 56 | 57 | We start with a few imports. 58 | ```scala 59 | import cats.* 60 | import cats.syntax.all.* 61 | import scala.concurrent.* 62 | import scala.util.Success 63 | import ExecutionContext.Implicits.global 64 | import scala.concurrent.duration.* 65 | ``` 66 | A `Future` evaluates a computation on another thread while returning immediately with a "wrapper" that will eventually contain the desired value (when the computation is finished). So while 67 | ```scala 68 | ll(0.0)(1.0) 69 | // res2: Double = -0.5 70 | ``` 71 | will take at least half a second to return a value, 72 | ```scala 73 | Future(ll(0.0)(1.0)) 74 | // res3: Future[Double] = Future(Success(-0.5)) 75 | ``` 76 | will return immediately, but the return type will be `Future[Double]`, not `Double`. The `Future` object has many methods, including those to map another computation over the result, and to ask whether the computation is completed. `Futures` make it easy to run many computations concurrently. For example 77 | ```scala 78 | val vf = v map (x => Future(ll(0.0)(x))) 79 | // vf: Vector[Future[Double]] = Vector( 80 | // Future(Success(-0.6519738747528083)), 81 | // Future(Success(-0.42265548832636834)), 82 | // Future(Success(-0.4511233139392105)), 83 | // Future(Success(-0.6127137470912438)), 84 | // Future(Success(-0.03947421654847893)), 85 | // Future(Success(-0.2343541861499363)), 86 | // Future(Success(-0.3339254143553779)), 87 | // Future(Success(-0.9753063971220517)), 88 | // Future(Success(-0.0182299035359368)), 89 | // Future(Success(-1.1044151238606623)) 90 | // ) 91 | ``` 92 | will return immediately, with type `Vector[Future[Double]]`. Each of the `Futures` inside the vector will run concurrently. We can use `sequence` to change the `Vector[Future[Double]]` into a `Future[Vector[Double]]` and then `map` a `reduce` operation to get a `Future[Double]`. We can then extract the value we want from this. 93 | ```scala 94 | val lf = vf.sequence map (_ reduce (_+_)) 95 | // lf: Future[Double] = Future(Success(-4.844171665682075)) 96 | val l = Await.result(lf, 2.seconds) 97 | // l: Double = -4.844171665682075 98 | println(l) 99 | // -4.844171665682075 100 | ``` 101 | Crucially, this runs much faster than the corresponding sequential code. However, it's still not as good as using parallel collections, since `map` and `reduce` are still the standard sequential versions, which are still $\mathcal{O}(n)$ operations. We could write our own `parMap` and `parReduce` methods, which use binary splitting to evaluate in parallel, but this is a bit beyond the scope of this very short course. 102 | 103 | ## Effects 104 | 105 | Futures are a powerful and flexible way to construct parallel and concurrent applications. However, they aren't a perfect fit to a pure functional approach to programming. The fact that futures "fire" as soon as they are created means that they have a *side-effect* (such as creating a thread), and that is potentially problematic. People have developed more principled functional effects systems for Scala, such as [Cats effect](https://typelevel.org/cats-effect/) with its `IO` monad. These provide better mechanisms for parallel and concurrent programming in Scala. They are, however, (well) beyond the scope of this course. 106 | 107 | -------------------------------------------------------------------------------- /pima.data: -------------------------------------------------------------------------------- 1 | 5 86 68 28 30.2 0.364 24 No 2 | 7 195 70 33 25.1 0.163 55 Yes 3 | 5 77 82 41 35.8 0.156 35 No 4 | 0 165 76 43 47.9 0.259 26 No 5 | 0 107 60 25 26.4 0.133 23 No 6 | 5 97 76 27 35.6 0.378 52 Yes 7 | 3 83 58 31 34.3 0.336 25 No 8 | 1 193 50 16 25.9 0.655 24 No 9 | 3 142 80 15 32.4 0.2 63 No 10 | 2 128 78 37 43.3 1.224 31 Yes 11 | 0 137 40 35 43.1 2.288 33 Yes 12 | 9 154 78 30 30.9 0.164 45 No 13 | 1 189 60 23 30.1 0.398 59 Yes 14 | 12 92 62 7 27.6 0.926 44 Yes 15 | 1 86 66 52 41.3 0.917 29 No 16 | 4 99 76 15 23.2 0.223 21 No 17 | 1 109 60 8 25.4 0.947 21 No 18 | 11 143 94 33 36.6 0.254 51 Yes 19 | 1 149 68 29 29.3 0.349 42 Yes 20 | 0 139 62 17 22.1 0.207 21 No 21 | 2 99 70 16 20.4 0.235 27 No 22 | 1 100 66 29 32 0.444 42 No 23 | 4 83 86 19 29.3 0.317 34 No 24 | 0 101 64 17 21 0.252 21 No 25 | 1 87 68 34 37.6 0.401 24 No 26 | 9 164 84 21 30.8 0.831 32 Yes 27 | 1 99 58 10 25.4 0.551 21 No 28 | 0 140 65 26 42.6 0.431 24 Yes 29 | 5 108 72 43 36.1 0.263 33 No 30 | 2 110 74 29 32.4 0.698 27 No 31 | 1 79 60 42 43.5 0.678 23 No 32 | 3 148 66 25 32.5 0.256 22 No 33 | 0 121 66 30 34.3 0.203 33 Yes 34 | 3 158 64 13 31.2 0.295 24 No 35 | 2 105 80 45 33.7 0.711 29 Yes 36 | 13 145 82 19 22.2 0.245 57 No 37 | 1 79 80 25 25.4 0.583 22 No 38 | 1 71 48 18 20.4 0.323 22 No 39 | 0 102 86 17 29.3 0.695 27 No 40 | 0 119 66 27 38.8 0.259 22 No 41 | 8 176 90 34 33.7 0.467 58 Yes 42 | 1 97 68 21 27.2 1.095 22 No 43 | 4 129 60 12 27.5 0.527 31 No 44 | 1 97 64 19 18.2 0.299 21 No 45 | 0 86 68 32 35.8 0.238 25 No 46 | 2 125 60 20 33.8 0.088 31 No 47 | 5 123 74 40 34.1 0.269 28 No 48 | 2 92 76 20 24.2 1.698 28 No 49 | 3 171 72 33 33.3 0.199 24 Yes 50 | 1 199 76 43 42.9 1.394 22 Yes 51 | 3 116 74 15 26.3 0.107 24 No 52 | 2 83 66 23 32.2 0.497 22 No 53 | 8 154 78 32 32.4 0.443 45 Yes 54 | 1 114 66 36 38.1 0.289 21 No 55 | 1 106 70 28 34.2 0.142 22 No 56 | 4 127 88 11 34.5 0.598 28 No 57 | 1 124 74 36 27.8 0.1 30 No 58 | 1 109 38 18 23.1 0.407 26 No 59 | 2 123 48 32 42.1 0.52 26 No 60 | 8 167 106 46 37.6 0.165 43 Yes 61 | 7 184 84 33 35.5 0.355 41 Yes 62 | 1 96 64 27 33.2 0.289 21 No 63 | 10 129 76 28 35.9 0.28 39 No 64 | 6 92 62 32 32 0.085 46 No 65 | 6 109 60 27 25 0.206 27 No 66 | 5 139 80 35 31.6 0.361 25 Yes 67 | 6 134 70 23 35.4 0.542 29 Yes 68 | 3 106 54 21 30.9 0.292 24 No 69 | 0 131 66 40 34.3 0.196 22 Yes 70 | 0 135 94 46 40.6 0.284 26 No 71 | 5 158 84 41 39.4 0.395 29 Yes 72 | 3 112 74 30 31.6 0.197 25 Yes 73 | 8 181 68 36 30.1 0.615 60 Yes 74 | 2 121 70 32 39.1 0.886 23 No 75 | 1 168 88 29 35 0.905 52 Yes 76 | 1 144 82 46 46.1 0.335 46 Yes 77 | 2 101 58 17 24.2 0.614 23 No 78 | 2 96 68 13 21.1 0.647 26 No 79 | 3 107 62 13 22.9 0.678 23 Yes 80 | 12 121 78 17 26.5 0.259 62 No 81 | 2 100 64 23 29.7 0.368 21 No 82 | 4 154 72 29 31.3 0.338 37 No 83 | 6 125 78 31 27.6 0.565 49 Yes 84 | 10 125 70 26 31.1 0.205 41 Yes 85 | 2 122 76 27 35.9 0.483 26 No 86 | 2 114 68 22 28.7 0.092 25 No 87 | 1 115 70 30 34.6 0.529 32 Yes 88 | 7 114 76 17 23.8 0.466 31 No 89 | 2 115 64 22 30.8 0.421 21 No 90 | 1 130 60 23 28.6 0.692 21 No 91 | 1 79 75 30 32 0.396 22 No 92 | 4 112 78 40 39.4 0.236 38 No 93 | 7 150 78 29 35.2 0.692 54 Yes 94 | 1 91 54 25 25.2 0.234 23 No 95 | 1 100 72 12 25.3 0.658 28 No 96 | 12 140 82 43 39.2 0.528 58 Yes 97 | 4 110 76 20 28.4 0.118 27 No 98 | 2 94 76 18 31.6 0.649 23 No 99 | 2 84 50 23 30.4 0.968 21 No 100 | 10 148 84 48 37.6 1.001 51 Yes 101 | 3 61 82 28 34.4 0.243 46 No 102 | 4 117 62 12 29.7 0.38 30 Yes 103 | 3 99 80 11 19.3 0.284 30 No 104 | 3 80 82 31 34.2 1.292 27 Yes 105 | 4 154 62 31 32.8 0.237 23 No 106 | 6 103 72 32 37.7 0.324 55 No 107 | 6 111 64 39 34.2 0.26 24 No 108 | 0 124 70 20 27.4 0.254 36 Yes 109 | 1 143 74 22 26.2 0.256 21 No 110 | 1 81 74 41 46.3 1.096 32 No 111 | 4 189 110 31 28.5 0.68 37 No 112 | 4 116 72 12 22.1 0.463 37 No 113 | 7 103 66 32 39.1 0.344 31 Yes 114 | 8 124 76 24 28.7 0.687 52 Yes 115 | 1 71 78 50 33.2 0.422 21 No 116 | 0 137 84 27 27.3 0.231 59 No 117 | 9 112 82 32 34.2 0.26 36 Yes 118 | 4 148 60 27 30.9 0.15 29 Yes 119 | 1 136 74 50 37.4 0.399 24 No 120 | 9 145 80 46 37.9 0.637 40 Yes 121 | 1 93 56 11 22.5 0.417 22 No 122 | 1 107 72 30 30.8 0.821 24 No 123 | 12 151 70 40 41.8 0.742 38 Yes 124 | 1 97 70 40 38.1 0.218 30 No 125 | 5 144 82 26 32 0.452 58 Yes 126 | 2 112 86 42 38.4 0.246 28 No 127 | 2 99 52 15 24.6 0.637 21 No 128 | 1 109 56 21 25.2 0.833 23 No 129 | 1 120 80 48 38.9 1.162 41 No 130 | 7 187 68 39 37.7 0.254 41 Yes 131 | 3 129 92 49 36.4 0.968 32 Yes 132 | 7 179 95 31 34.2 0.164 60 No 133 | 6 80 66 30 26.2 0.313 41 No 134 | 2 105 58 40 34.9 0.225 25 No 135 | 3 191 68 15 30.9 0.299 34 No 136 | 0 95 80 45 36.5 0.33 26 No 137 | 4 99 72 17 25.6 0.294 28 No 138 | 0 137 68 14 24.8 0.143 21 No 139 | 1 97 70 15 18.2 0.147 21 No 140 | 0 100 88 60 46.8 0.962 31 No 141 | 1 167 74 17 23.4 0.447 33 Yes 142 | 0 180 90 26 36.5 0.314 35 Yes 143 | 2 122 70 27 36.8 0.34 27 No 144 | 1 90 62 12 27.2 0.58 24 No 145 | 3 120 70 30 42.9 0.452 30 No 146 | 6 154 78 41 46.1 0.571 27 No 147 | 2 56 56 28 24.2 0.332 22 No 148 | 0 177 60 29 34.6 1.072 21 Yes 149 | 3 124 80 33 33.2 0.305 26 No 150 | 8 85 55 20 24.4 0.136 42 No 151 | 12 88 74 40 35.3 0.378 48 No 152 | 9 152 78 34 34.2 0.893 33 Yes 153 | 0 198 66 32 41.3 0.502 28 Yes 154 | 0 188 82 14 32 0.682 22 Yes 155 | 5 139 64 35 28.6 0.411 26 No 156 | 7 168 88 42 38.2 0.787 40 Yes 157 | 2 197 70 99 34.7 0.575 62 Yes 158 | 2 142 82 18 24.7 0.761 21 No 159 | 8 126 74 38 25.9 0.162 39 No 160 | 3 158 76 36 31.6 0.851 28 Yes 161 | 3 130 78 23 28.4 0.323 34 Yes 162 | 2 100 54 28 37.8 0.498 24 No 163 | 1 164 82 43 32.8 0.341 50 No 164 | 4 95 60 32 35.4 0.284 28 No 165 | 2 122 52 43 36.2 0.816 28 No 166 | 4 85 58 22 27.8 0.306 28 No 167 | 0 151 90 46 42.1 0.371 21 Yes 168 | 6 144 72 27 33.9 0.255 40 No 169 | 3 111 90 12 28.4 0.495 29 No 170 | 1 107 68 19 26.5 0.165 24 No 171 | 6 115 60 39 33.7 0.245 40 Yes 172 | 5 105 72 29 36.9 0.159 28 No 173 | 7 194 68 28 35.9 0.745 41 Yes 174 | 4 184 78 39 37 0.264 31 Yes 175 | 0 95 85 25 37.4 0.247 24 Yes 176 | 7 124 70 33 25.5 0.161 37 No 177 | 1 111 62 13 24 0.138 23 No 178 | 7 137 90 41 32 0.391 39 No 179 | 9 57 80 37 32.8 0.096 41 No 180 | 2 157 74 35 39.4 0.134 30 No 181 | 2 95 54 14 26.1 0.748 22 No 182 | 12 140 85 33 37.4 0.244 41 No 183 | 0 117 66 31 30.8 0.493 22 No 184 | 8 100 74 40 39.4 0.661 43 Yes 185 | 9 123 70 44 33.1 0.374 40 No 186 | 0 138 60 35 34.6 0.534 21 Yes 187 | 14 100 78 25 36.6 0.412 46 Yes 188 | 14 175 62 30 33.6 0.212 38 Yes 189 | 0 74 52 10 27.8 0.269 22 No 190 | 1 133 102 28 32.8 0.234 45 Yes 191 | 0 119 64 18 34.9 0.725 23 No 192 | 5 155 84 44 38.7 0.619 34 No 193 | 1 128 48 45 40.5 0.613 24 Yes 194 | 2 112 68 22 34.1 0.315 26 No 195 | 1 140 74 26 24.1 0.828 23 No 196 | 2 141 58 34 25.4 0.699 24 No 197 | 7 129 68 49 38.5 0.439 43 Yes 198 | 0 106 70 37 39.4 0.605 22 No 199 | 1 118 58 36 33.3 0.261 23 No 200 | 8 155 62 26 34 0.543 46 Yes 201 | -------------------------------------------------------------------------------- /Intro/Resources.md: -------------------------------------------------------------------------------- 1 | # Wrap-up 2 | 3 | We have just begun to cover the bare essentials of each of the languages/libraries considered. Hopefully it has been just enough to get a feel for why they exist and are interesting. But there is a lot more to know about all of this stuff. To start with I recommend picking just one of the languages considered here and spending some time diving a little deeper into the language and library ecosystem associated with it. Suggested further information is detailed below. Don't be in too much of a rush to understand everything. Learning (functional) programming is a life-long journey. 4 | 5 | 6 | # Learning resources 7 | 8 | Below are some links for further information about the languages and libraries briefly introduced in this course. See my [logreg repo](https://github.com/darrenjw/logreg) for more example (MCMC) code using these languages and libraries. 9 | 10 | ## Scala 11 | 12 | * [Scala programming language](https://www.scala-lang.org/) - main web site 13 | * [Learning resources](https://docs.scala-lang.org/) 14 | * [Scastie](https://scastie.scala-lang.org/) - try Scala in the browser 15 | * [Scala exercises](https://www.scala-exercises.org/) - learn Scala in the browswer 16 | * [Coursier](https://get-coursier.io/) - Scala tool and artifact manager 17 | * [scala-cli](https://scala-cli.virtuslab.org/) - Scala command-line tool 18 | * [sbt](https://www.scala-sbt.org/) - the Scala build tool 19 | * [An introduction to sbt](https://blog.rockthejvm.com/sbt-tutorial/) - a nice, recent tutorial 20 | * [Scala examples](https://github.com/darrenjw/code-examples/blob/main/Scala/Readme.md) - some Scala 3 examples I've written, with a STEM focus 21 | * [Functional Programming in Scala, second edition](https://www.manning.com/books/functional-programming-in-scala-second-edition) - unfortunately this book is not free, but remains one of the best introductions to functional programming (in any language) 22 | * [Chapter notes](https://github.com/fpinscala/fpinscala) 23 | * [The Science of Functional Programming](https://github.com/winitzki/sofp) - huge, free book, covering functional programming and its relationship to category theory, using Scala 2 to illustrate the ideas 24 | * [Typelevel](https://typelevel.org/) - libraries for functional programming in Scala 25 | * [Spire](https://typelevel.org/spire/) - numerics library 26 | * [Cats](https://typelevel.org/cats/) - category theory abstractions for pure functional programming 27 | * [Cats effect](https://typelevel.org/cats-effect/) - functional parallel and concurrent effects system 28 | * [Monocle](https://www.optics.dev/Monocle/) - lens/optics library 29 | * [FS2](https://fs2.io/) - pure functional streaming data library 30 | * [Breeze](https://github.com/scalanlp/breeze/) - numerical linear algebra and scientific computing (like numpy/scipy for Scala) 31 | * [Smile](https://haifengl.github.io/) - Statistical machine learning library (like scikit-learn for Scala) 32 | * [Apache Spark](https://spark.apache.org/) - library for parallel and distributed big data processing 33 | * Bindings for (python) ML libraries: 34 | * [lamp](https://pityka.github.io/lamp/) - Scala tensor library built on Torch 35 | * [storch](https://storch.dev/) - Scala 3 bindings for Torch 36 | * [scala_torch](https://github.com/microsoft/scala_torch) - Microsoft's Torch bindings 37 | * [tensorflow_scala](http://platanios.org/tensorflow_scala/) - TensorFlow bindings for Scala (possibly abandoned?) 38 | * [GeoTrellis](https://geotrellis.io/) - Geographic data processing library for Scala 39 | * [Scala for statistical computing and data science](https://github.com/darrenjw/scala-course/blob/master/StartHere.md) - materials for my short course (not yet updated for Scala 3) 40 | * [scala-glm](https://github.com/darrenjw/scala-glm) - my library for regression modelling 41 | * [scala-smfsb](https://github.com/darrenjw/scala-smfsb) - my library for simulation and inference for stochastic biochemical network models 42 | 43 | ## Haskell 44 | 45 | * [Haskell](https://www.haskell.org/) - main web site 46 | * [Documentation](https://www.haskell.org/documentation/) 47 | * [Cabal](https://www.haskell.org/cabal/) - a build tool 48 | * [Hackage](https://hackage.haskell.org/) - a package repository 49 | * [Stack](https://docs.haskellstack.org/en/stable/) - a newer build tool 50 | * [Stackage](https://www.stackage.org/) - alternative repository 51 | * [Hoogle](https://hoogle.haskell.org/) - Haskell search engine 52 | * [lyahfgg](http://learnyouahaskell.com/) - good introductory book 53 | * [Haskell wiki](https://wiki.haskell.org/Haskell) 54 | * [Typeclassopedia](https://wiki.haskell.org/Typeclassopedia) 55 | * [dataHaskell](https://www.datahaskell.org/) - a project for data science with Haskell 56 | * [Deep Learning From The First Principles](https://penkovsky.com/neural-networks/) - series of blog posts on neural networks with Haskell 57 | * [backprop](https://backprop.jle.im/) - backprop/autodiff library 58 | * [hmatrix-backprop](https://hackage.haskell.org/package/hmatrix-backprop) - autodiff with linear algebra 59 | * [massiv](https://hackage.haskell.org/package/massiv) - (parallel) multi-dimensional arrays 60 | * [Hasktorch](http://hasktorch.org/) - Torch bindings for Haskell 61 | 62 | 63 | ## JAX 64 | 65 | * [Python](https://www.python.org/) - JAX is embedded in Python 66 | * [JAX](https://jax.readthedocs.io/en/latest/) 67 | * [github](https://github.com/google/jax) 68 | * [Quickstart](https://jax.readthedocs.io/en/latest/notebooks/quickstart.html) 69 | * [Tutorial: JAX 101](https://jax.readthedocs.io/en/latest/jax-101/) 70 | * [Random numbers](https://jax.readthedocs.io/en/latest/jax-101/05-random-numbers.html) 71 | * [Autodiff cookbook](https://jax.readthedocs.io/en/latest/notebooks/autodiff_cookbook.html) 72 | * [Flax](https://flax.readthedocs.io/en/latest/guides/flax_basics.html) - neural networks with JAX 73 | * [BlackJAX](https://blackjax-devs.github.io/blackjax/) - MCMC sampling with JAX 74 | * [Deepmind JAX ecosystem](https://www.deepmind.com/blog/using-jax-to-accelerate-our-research) 75 | 76 | ## Dex 77 | 78 | * [Dex](https://github.com/google-research/dex-lang) 79 | * [Tutorial](https://google-research.github.io/dex-lang/examples/tutorial.html) 80 | * [Paper](https://arxiv.org/abs/2104.05372) 81 | * [Prelude](https://google-research.github.io/dex-lang/prelude.html) 82 | * [InDex](https://google-research.github.io/dex-lang/index.html) 83 | * [Stats library](https://google-research.github.io/dex-lang/lib/stats.html) 84 | * [My Dex reminders](https://github.com/darrenjw/djwhacks/blob/master/dex/Reminders.md) 85 | 86 | 87 | ## General 88 | 89 | ### Category theory for FP 90 | 91 | These resources are of generic interest for any serious functional programmer. Haskell is often used to illustrate the ideas. 92 | 93 | * Category theory for programmers, by [Bartosz Milewski](https://bartoszmilewski.com/) 94 | * [Blog post series](https://bartoszmilewski.com/2014/10/28/category-theory-for-programmers-the-preface/) 95 | * [Book](https://github.com/hmemcpy/milewski-ctfp-pdf/) - the blog posts were turned into a book 96 | * [Scala edition](https://github.com/hmemcpy/milewski-ctfp-pdf/releases/tag/v1.3.0) - a Scala edition of the book was produced 97 | * [YouTube](https://www.youtube.com/channel/UC8BtBl8PNgd3vWKtm2yJ7aA) - Video lectures accompany the blog posts (and book) 98 | * [Series 1](https://www.youtube.com/playlist?list=PLbgaMIhjbmEnaH_LTkxLI7FMa2HsnawM_), [Series 2](https://www.youtube.com/playlist?list=PLbgaMIhjbmElia1eCEZNvsVscFef9m0dm), [Series 3](https://www.youtube.com/playlist?list=PLbgaMIhjbmEn64WVX4B08B4h2rOtueWIL) 99 | * [The Dao of FP](https://github.com/BartoszMilewski/Publications/raw/master/TheDaoOfFP/DaoFP.pdf) - Bartosz decided to write a proper book - this is a more comprehensive introduction to category theory in the context of functional programming 100 | 101 | ### FP and CT for ML 102 | 103 | * [Category theory for ML papers](https://github.com/bgavran/Category_Theory_Machine_Learning) - includes a [section on AD](https://github.com/bgavran/Category_Theory_Machine_Learning#differentiable-programming--automatic-differentiation) 104 | * [You only linearize once](https://arxiv.org/abs/2204.10923) - the functional AD approach used by JAX and Dex 105 | 106 | ### Data structures 107 | 108 | * [Purely functional data structures](https://books.google.co.uk/books?id=IV8hAwAAQBAJ) - book on immutable data structures, with examples in SML (and Haskell) ([PDF](https://doc.lagout.org/programmation/Functional%20Programming/Chris_Okasaki-Purely_Functional_Data_Structures-Cambridge_University_Press%281998%29.pdf)) 109 | 110 | ### Other interesting languages 111 | 112 | * [OCaml](https://ocaml.org/) - nice strongly typed and fairly functional language 113 | * [Flix](https://flix.dev/) - a new strongly typed and fairly functional language 114 | * [Futhark](https://futhark-lang.org/) - a language for GPU programming 115 | * [Rust](https://www.rust-lang.org/) - not really functional, but a good, safe, modern typed language, increasingly popular for systems programming 116 | 117 | -------------------------------------------------------------------------------- /Intro/Random.md: -------------------------------------------------------------------------------- 1 | # Functional and parallel random number generation 2 | 3 | ## Introduction 4 | 5 | ### Background 6 | 7 | Since current computers are essentially deterministic, random number generators used for stochastic simulation aren't truly random at all, and so are more correctly referred to as *pseudo-random number generators*. 8 | 9 | Random number generators typically consist of three main components: 10 | 11 | * An internal state, $s\in S$ 12 | * A deterministic function for transforming the internal state, $f: S\rightarrow S$ 13 | * A deterministic function for turning the current internal state into a pseudo-random number (say, a realisation of a $U(0,1)$ random variable), $g: S\rightarrow [0,1)$ 14 | 15 | The state space, $S$, must be sufficiently large that applying $f$ a large number of times is unlikely to get you back to a previously visited state. This is essential for large Monte Carlo simulations. There are typically other functions associated with random number generators. eg. there is very often a function that will turn a given numeric "seed" into an internal state, and there are often functions for turning the internal state into pseudo-random numbers other than $U(0,1)$, but these are not essential to our discussion. 16 | 17 | Typically, $f$ is a very complex non-linear function, leading to an apparently random sequence of internal states, and $g$ is a very simple function mapping the internal state to $[0,1)$. But note that this is not the only possibility. $f$ could be a very simple function (say, a function that increments the internal state as a "counter"), and then $g$ could be a very complex function that maps the internal state to an apparently random $[0,1)$. This is the idea behind counter-based pseudo-random number generators, but again, it's not essential to the current discussion. 18 | 19 | How these functions are used and accessed depends on the language. In imperative languages, many of the details are often hidden. eg. The internal state might be hidden in a *global* *mutable* variable. Then a function, say, `random`, could first update the internal state by calling $f$, and then over-write the old internal state with the new internal state, and it can then compute a value to return to the user by applying $g$ to the global internal state. 20 | 21 | In some languages/libraries the internal state is stored in a *mutable* variable, but it is not *global*. Then you use a seeding function to create a pointer/reference to an internal state that you then need to pass in to any random functions. But again, the random function updates the mutable internal state at the given reference and then turns the new state into a random value to be returned to the user. 22 | 23 | ### Functional random number generation 24 | 25 | In pure functional languages we don't have mutable variables, so we must be more explicit in our handling of the state. The standard way to do this is to have a generator function `random` : $S\rightarrow S\times [0,1)$, which could be implemented (in Scala) as 26 | ```scala 27 | def random(s0: S): (S, Double) = 28 | val s1 = f(s0) 29 | (s1, g(s1)) 30 | ``` 31 | We then need to unpack the result of this function, and be sure to pass the updated state into subsequent calls to the generator. There are ways to make this process more elegant and convenient by using the *state monad*, but this is again tangential to the current discussion. 32 | 33 | ### Parallel random number generation 34 | 35 | A major problem with all of the approaches discussed so far is that they are fundamentally *sequential*, and don't adapt trivially to a *parallel* context. Since the state is necessarily finite, repeated application of $f$ must eventually result in a previously visited state, and then due to the determinism of $f$, the states must then all repeat. So the state can be considered to live on a circular lattice. In a parallel context, you want different processors to have different internal states, corresponding to different points on the lattice. You need a reliable way to start different processors off at different points so that the probability of getting overlapping streams on different processors is very small. There are certain kinds of generators that allow efficient *leap-frogging*, ahead a large number of steps, but these are not so commonly used. *Parallel pseudo-random number generators* (PPRNGs) have states living on a toroidal lattice, so that different parallel processors can each traverse a different circular lattice. Both approaches can work for a very particular kind of parallel program on particular kinds of parallel hardware. If you are interested to learn more about these, I give an overview in a [very old paper](https://darrenjw.github.io/work/docs/pbc.pdf) of mine. However, I now consider them to have been a bit of a dead end. The problem is that they are not very flexible, and not well-suited to dynamic contexts and modern hardware consisting of multi-core CPUs, GPUs, etc. 36 | 37 | ### Splittable generators 38 | 39 | An arguably much better approach to parallel random number generation than old-school PPRNGs is to use *splittable* generators. There are many variations on this approach, but the main idea is to replace the *advancement* function 40 | 41 | $f: S \rightarrow S$ 42 | 43 | with a *split* function 44 | 45 | $f: S \rightarrow S\times S$ 46 | 47 | so that instead of just getting one new state, the *split* function returns two (or more) states. These states are different to each other and the input state, and the split function is carefully chosen so that if either of the returned states are subsequently split, and so on, the *tree* of states generated is unlikely to contain any repeats. For historical reasons, the states of a splittable generator are often referred to as "keys". 48 | 49 | In this case, the `random` function is simply the deterministic function $g$, which takes as input a state (or key), and outputs a random number. The programmer is responsible for ensuring that the same key is never passed in to the random function, typically by splitting before use. Splittable generators solve many issues around the use of random number generation in parallel and concurrent contexts, and are also a perfect fit for pure functional programming languages. 50 | 51 | To understand why splittable generators solve the concurrency and parallelism problem, consider two functions, `my_random1` and `my_random2` that each use the splittable generator functions $f$ and $g$ internally. Both need to be called, and ideally they would be executed in parallel. Since these are intended to be used in a random context, they must take a state (or key) as input. The user is responsible for ensuring the keys provided to the two functions are different, so the code to call these two functions might look something like (in Dex): 52 | ```haskell 53 | k0 = new_key 42 -- initial key from a seed 54 | [k1, k2] = split_key k0 55 | res1 = my_random1 k1 56 | res2 = my_random2 k2 57 | ... 58 | ``` 59 | Since the functions are pure and there is no dependence between them, there is no reason why `my_random` and `my_random2` cannot be executed in parallel. But this argument works at every level of the program. Arbitrary concurrency and parallelism can be employed throughout. From a type safety viewpoint, there is also the added bonus that the intended randomness of any function is clearly indicated in its type signature, since it must take a state/key as input. In languages with support for currying, it is most convenient to always have the random key as the *final* input parameter. 60 | 61 | The downside of splittable generators is that the programmer is responsible for ensuring that no key is ever reused. But it is quite easy to accidentally re-use a key, leading to incorrect behaviour, and bugs in Monte Carlo codes are hard to track down. Smart people are thinking about ways to address this drawback, but for now you just have to be careful! 62 | 63 | ## Further reading on splittable random numbers 64 | 65 | ### Random numbers in JAX 66 | 67 | * [Pseudo Random Numbers in JAX](https://jax.readthedocs.io/en/latest/jax-101/05-random-numbers.html) 68 | * [JAX Random numbers](https://jax.readthedocs.io/en/latest/notebooks/Common_Gotchas_in_JAX.html#random-numbers) 69 | * [JAX PRNG Design](https://github.com/google/jax/blob/main/docs/jep/263-prng.md) 70 | * [jax.random](https://jax.readthedocs.io/en/latest/jax.random.html) 71 | * JAX uses a threefry counter PRNG (described in Salmon et al, 2011)) with a functional array-oriented splitting model (described in Claessen and Palka, 2013) 72 | * Salmon et al (2011) [Parallel Random Numbers: As Easy as 1, 2, 3](https://doi.org/10.1145/2063384.2063405), *SC '11: Proceedings of 2011 International Conference for High Performance Computing, Networking, Storage and Analysis*, 16. [PDF](http://www.thesalmons.org/john/random123/papers/random123sc11.pdf) 73 | * Claessen and Palka (2013) [Splittable pseudorandom number generators using cryptographic hashing](https://doi.org/10.1145/2578854.2503784), *Haskell '13: Proceedings of the 2013 ACM SIGPLAN symposium on Haskell*, 47-58. [PDF](https://publications.lib.chalmers.se/records/fulltext/183348/local_183348.pdf) 74 | 75 | 76 | ### Splittable random numbers on the JVM 77 | 78 | * Steele at al (2014) [Fast splittable pseudorandom number generators](https://doi.org/10.1145/2660193.2660195), *ACM SIGPLAN Notices*, **49**(10):453-472. [PDF](https://gee.cs.oswego.edu/dl/papers/oopsla14.pdf) 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | -------------------------------------------------------------------------------- /Intro/Readme.md: -------------------------------------------------------------------------------- 1 | # Introduction 2 | 3 | ## Functional programming for scalable statistical computing and ML 4 | 5 | ### Background 6 | 7 | - Non-trivial research problems in statistical computing and machine learning are often complex and computationally intensive, requiring a custom implementation in some programming 8 | language 9 | 10 | - All of the languages commonly used for this purpose are very old, dating back to the 11 | dawn of the computing age, and are quite unsuitable for scalable and 12 | efficient statistical computation 13 | 14 | - Interpreted dynamic languages such as R, Python and Matlab are far 15 | too slow, among many other things 16 | 17 | - Languages such as C, C++, Java and Fortran are much faster, but are 18 | also very poorly suited to the development of efficient, 19 | well-tested, compositional, scalable code, able to take advantage of 20 | modern computing hardware 21 | 22 | ### Alternative languages and approaches 23 | 24 | - All of the languages on the previous slide are fundamentally 25 | **imperative** programming languages, mimicking closely the 26 | way computer processors actually operate 27 | 28 | - There have been huge advances in computing science in the decades 29 | since these languages were created, and many new, different and 30 | better programming languages have been created 31 | 32 | - Although **functional** programming (FP) languages are not 33 | new, there has been a large resurgence of interest in functional 34 | languages in the last decade or two, as people have begun to 35 | appreciate the advantages of the functional approach, especially in 36 | the context of developing large, scalable software systems, and the 37 | ability to take advantage of modern computing hardware 38 | 39 | - There has also been a swing away from **dynamically typed** 40 | programming languages back to **statically typed** languages 41 | 42 | ### Functional programming 43 | 44 | - FP languages emphasise the use of *immutable* data, *pure, 45 | referentially transparent functions*, and *higher order 46 | functions* 47 | 48 | - FP languages more naturally support composition of models, data and 49 | computation than imperative languages, leading to more scalable and 50 | testable software 51 | 52 | - Statically typed FP languages (such as Haskell and Scala) correspond 53 | closely to the *simply-typed lambda calculus* which is one 54 | of the canonical examples of a *Cartesian closed 55 | category* (CCC) 56 | 57 | - This connection between typed FP languages and CCCs enables the 58 | borrowing of ideas from category theory into FP 59 | 60 | - Category theory concepts such as *functors*, 61 | *monads* and *comonads* are useful for simplifying 62 | code that would otherwise be somewhat cumbersome to express in pure 63 | FP languages 64 | 65 | ### Concurrency, parallelism, distribution, state 66 | 67 | - Modern computing platforms feature processors with many cores, and 68 | possibly many such processors - parallel programming is required 69 | to properly exploit these 70 | 71 | - Most of the notorious difficulties associated with parallel 72 | programming revolve around *shared mutable state* 73 | 74 | - In pure FP, state is not mutable, so there is no mutable state, 75 | shared or otherwise 76 | 77 | - Consequently, most of the difficulties typically associated with 78 | parallel, distributed, and concurrent programming simply don't exist 79 | in FP - parallelism in FP is so easy and natural that it is 80 | sometimes completely automatic 81 | 82 | - This natural scalability of FP languages is one reason for their 83 | recent resurgence 84 | 85 | ### Compositionality 86 | 87 | - Not all issues relating to scalability of models and algorithms 88 | relate to parallelism 89 | 90 | - A good way to build a large model is to construct it from smaller 91 | models 92 | 93 | - A good way to develop a complex computation is to construct it from 94 | simpler computations 95 | 96 | - This (recursive) decomposition-composition approach is at the heart 97 | of the so-called "divide and conquer" approach to problem solution, 98 | and is very natural in FP (eg. FFT and BP for PGMs) 99 | 100 | - It also makes code much easier to *test* for correct 101 | behaviour 102 | 103 | - Category theory is in many ways the mathematical study of 104 | (associative) composition, and this leads to useful insights 105 | 106 | ### Statistical computing 107 | 108 | - *map-reduce* operations on *functorial* data 109 | collections can trivially parallelise (and distribute): 110 | 111 | - Likelihood evaluations for big data 112 | 113 | - ABC algorithms 114 | 115 | - SMC re-weighting and re-sampling 116 | 117 | - Gibbs sampling algorithms can be implemented as *cobind* 118 | operations on an appropriately coloured (parallel) 119 | *comonadic* conditional independence graph 120 | 121 | - *Probabilistic programming languages* (PPLs) can be 122 | implemented as embedded domain specific languages (DSLs) trivially 123 | using *for/do* syntax for *monadic composition* in 124 | conjunction with *probability monads* 125 | 126 | - *Automatic differentiation* (AD) is natural and convenient 127 | in functional languages, facilitating gradient-based algorithms 128 | 129 | ### Monoids and parallel "map-reduce" 130 | 131 | - A *monoid* is a very important concept in FP 132 | 133 | - For now we will think of a monoid as a *set* of elements 134 | with a *binary relation* $\star$ which is *closed* 135 | and *associative*, and having an *identity* element 136 | wrt the binary relation 137 | 138 | - You can think of it as a *semi-group* with an identity or a 139 | *group* without an inverse 140 | 141 | - `fold`s, `scan`s and `reduce` operations can be computed in 142 | parallel using *tree reduction*, reducing time from $O(n)$ 143 | to $O(\log n)$ (on infinite parallel hardware) 144 | 145 | - "map-reduce" is just the pattern of processing large 146 | amounts of data in an immutable collection by first 147 | *map*ping the data (in parallel) into a monoid and then 148 | tree-*reduc*ing the result (in parallel), sometimes called 149 | `foldMap` 150 | 151 | ### Distributed parallel collections with Apache Spark 152 | 153 | - *Apache Spark* ([spark.apache.org](spark.apache.org)) is a 154 | Scala library for distributed Big Data processing on (large) 155 | clusters of machines 156 | 157 | - The basic datatype provided by Spark is an *RDD* --- a 158 | resilient distributed dataset 159 | 160 | - An RDD is just a *lazy*, *distributed*, parallel 161 | monadic collection, supporting methods such as `map`, `flatMap`, 162 | `reduce`, etc., which can be used in exactly the same way as any 163 | other Scala collection 164 | 165 | - Code looks exactly the same whether the RDD is a small dataset on a 166 | laptop or terabytes in size, distributed over a large Spark cluster 167 | 168 | - It is a powerful framework for the development of scalable 169 | algorithms for statistical computing and machine learning 170 | 171 | ### JAX 172 | 173 | - *JAX* ([jax.readthedocs.io](jax.readthedocs.io)) is a Python 174 | library embedding a DSL for automatic differentiation and 175 | JIT-compiling (array) functions to run very fast on (multiple) CPU 176 | or GPU 177 | 178 | - It is especially good at speeding up likelihood evaluations and 179 | (MCMC-based) sampling algorithms for complex models 180 | 181 | - It is not unheard of for MCMC algorithms to run 100 times faster 182 | than regular Python code, even on a single CPU (multiple cores and 183 | GPUs will speed things up further) 184 | 185 | - The JAX eDSL is *pure functional array language* 186 | 187 | - Despite targeting a completely different kind of scalability to 188 | Spark, and being embedded in a very different language, the 189 | fundamental computational model is very similar: *express algorithms 190 | in terms of lazy transformations of immutable data structures using 191 | pure functions* 192 | 193 | ### Functional algorithms 194 | 195 | - By expressing algorithms in a functional style (eg. lazy 196 | transformations of immutable data structures with pure functions), 197 | we allow many code optimisations to be automatically applied 198 | 199 | - *Pure functional* algorithms are relatively easy to 200 | *analyse*, *optimise*, *transform*, *compile*, 201 | *parallelise*, *distribute*, 202 | *differentiate*, push to GPU, etc. 203 | 204 | - These transformations can typically be performed *automatically* by 205 | the library, compiler, framework, etc., without significant user 206 | intervention 207 | 208 | - It is very difficult (often impossible) to analyse and reason about 209 | imperative code in a similar way 210 | 211 | ### JAX ecosystem for Bayesian computation 212 | 213 | - (Unnormalised) log-posteriors expressed in JAX can be sampled using 214 | a variety of different algorithms (including HMC and NUTS, via 215 | auto-diff) using *BlackJAX* ([blackjax-devs.github.io/blackjax/](blackjax-devs.github.io/blackjax/)) 216 | 217 | - The original Pyro uses PyTorch as a back-end, but the popular 218 | *NumPyro* () fork uses 219 | JAX for a back-end, leading to significant performance improvements 220 | 221 | - The PyMC4 project intended to use TensorFlow for a back-end, but 222 | this project was abandoned, and *PyMC(3)* 223 | ([docs.pymc.io](docs.pymc.io)) has switched from Theano to Aesara 224 | with JAX as a back-end 225 | 226 | ### Dex 227 | 228 | - Although the conceptual computational model of JAX has a number of 229 | good features, the embedding of such a language in a dynamic, 230 | interpreted, imperative language such as Python has a number of 231 | limitations and drawbacks 232 | 233 | - A similar issue arises with Spark - although it is possible to 234 | develop Spark applications in Python using PySpark, in practice most 235 | non-trivial applications are developed in Scala, for good reason 236 | 237 | - This motivates the development of a JAX-like array processing DSL in 238 | a strongly typed functional programming language 239 | 240 | - [Dex]([github.com/google-research/dex-lang](github.com/google-research/dex-lang)) 241 | is a new experimental (Haskell-like) language in this space with a 242 | number of interesting and desirable features... 243 | 244 | 245 | 246 | 247 | 248 | 249 | -------------------------------------------------------------------------------- /Scala/md/Example.md: -------------------------------------------------------------------------------- 1 | # Running example 2 | 3 | ## ML for a logistic regression model using gradient ascent 4 | 5 | ### An interactive session 6 | 7 | Here we will present an interactive Scala session for conducting maximum likelihood inference for our simple logistic regression model using a very naive gradient ascent algorithm. We will need to use the [Breeze](https://github.com/scalanlp/breeze/) library for numerical linear algebra, and we will also use [Smile](https://haifengl.github.io/) for a data frame object and CSV parser. The [sbt](https://www.scala-sbt.org/) project in the [Scala](../) directory has these dependencies (and a few others) preconfigured, so running `sbt console` from the Scala directory will give a REPL into which the following commands can be pasted. 8 | 9 | We start with a few imports and a shorthand type declaration. 10 | ```scala 11 | import breeze.linalg.* 12 | import breeze.numerics.* 13 | import smile.data.pimpDataFrame 14 | import annotation.tailrec 15 | 16 | type DVD = DenseVector[Double] 17 | ``` 18 | 19 | Next we use Smile to read and process the data. 20 | ```scala 21 | val df = smile.read.csv("../pima.data", delimiter=" ", header=false) 22 | // df: DataFrame = [V1: int, V2: int, V3: int, V4: int, V5: double, V6: double, V7: int, V8: String] 23 | // +---+---+---+---+----+-----+---+---+ 24 | // | V1| V2| V3| V4| V5| V6| V7| V8| 25 | // +---+---+---+---+----+-----+---+---+ 26 | // | 5| 86| 68| 28|30.2|0.364| 24| No| 27 | // | 7|195| 70| 33|25.1|0.163| 55|Yes| 28 | // | 5| 77| 82| 41|35.8|0.156| 35| No| 29 | // | 0|165| 76| 43|47.9|0.259| 26| No| 30 | // | 0|107| 60| 25|26.4|0.133| 23| No| 31 | // | 5| 97| 76| 27|35.6|0.378| 52|Yes| 32 | // | 3| 83| 58| 31|34.3|0.336| 25| No| 33 | // | 1|193| 50| 16|25.9|0.655| 24| No| 34 | // | 3|142| 80| 15|32.4| 0.2| 63| No| 35 | // | 2|128| 78| 37|43.3|1.224| 31|Yes| 36 | // +---+---+---+---+----+-----+---+---+ 37 | // 190 more rows... 38 | // 39 | val y = DenseVector(df.select("V8"). 40 | map(_(0).asInstanceOf[String]). 41 | map(s => if (s == "Yes") 1.0 else 0.0).toArray) 42 | // y: DenseVector[Double] = DenseVector(0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0) 43 | val x = DenseMatrix(df.drop("V8").toMatrix.toArray:_*) 44 | // x: DenseMatrix[Double] = 5.0 86.0 68.0 28.0 30.2 0.364 24.0 45 | // 7.0 195.0 70.0 33.0 25.1 0.163 55.0 46 | // 5.0 77.0 82.0 41.0 35.8 0.156 35.0 47 | // 0.0 165.0 76.0 43.0 47.9 0.259 26.0 48 | // 0.0 107.0 60.0 25.0 26.4 0.133 23.0 49 | // 5.0 97.0 76.0 27.0 35.6 0.378 52.0 50 | // 3.0 83.0 58.0 31.0 34.3 0.336 25.0 51 | // 1.0 193.0 50.0 16.0 25.9 0.655 24.0 52 | // 3.0 142.0 80.0 15.0 32.4 0.2 63.0 53 | // 2.0 128.0 78.0 37.0 43.3 1.224 31.0 54 | // 0.0 137.0 40.0 35.0 43.1 2.288 33.0 55 | // 9.0 154.0 78.0 30.0 30.9 0.164 45.0 56 | // 1.0 189.0 60.0 23.0 30.1 0.398 59.0 57 | // 12.0 92.0 62.0 7.0 27.6 0.926 44.0 58 | // 1.0 86.0 66.0 52.0 41.3 0.917 29.0 59 | // 4.0 99.0 76.0 15.0 23.2 0.223 21.0 60 | // 1.0 109.0 60.0 8.0 25.4 0.947 21.0 61 | // 11.0 143.0 94.0 33.0 36.6 0.254 51.0 62 | // 1.0 149.0 68.0 29.0 29.3 0.349 42.0 63 | // 0.0 139.0 62.0 17.0 22.1 0.207 21.0 64 | // 2.0 99.0 70.0 16.0 20.4 0.235 27.0 65 | // 1.0 100.0 66.0 29.0 32.0 0.444 42.0 66 | // 4.0 83.0 86.0 19.0 29.3 0.317 34.0 67 | // 0.0 101.0 64.0 17.0 21.0 0.252 21.0 68 | // 1.0 87.0 68.0 34.0 37.6 0.401 24.0 69 | // 9.0 164.0 84.0 21.0 30.8 0.831 32.0 70 | // 1.0 99.0 58.0 10.0 25.4 0.551 21.0 71 | // 0.0 140.0 65.0 26.0 42.6 0.431 24.0 72 | // 5.0 108.0 72.0 43.0 36.1 0.263 33.0 73 | // 2.0 110.0 74.0 29.0 32.4 0.698 27.0 74 | // 1.0 79.0 60.0 42.0 43.5 0.678 23.0 75 | // 3.0 148.0 66.0 25.0 32.5 0.256 22.0 76 | // 0.0 121.0 66.0 30.0 34.3 0.203 33.0 77 | // 3.0 158.0 64.0 13.0 31.2 0.295 24.0 78 | // 2.0 105.0 80.0 45.0 33.7 0.711 29.0 79 | // 13.0 145.0 82.0 19.0 22.2 0.245 57.0 80 | // 1.0 79.0 80.0 25.0 25.4 0.583 22.0 81 | // 1.0 71.0 48.0 18.0 20.4 0.323 22.0 82 | // 0.0 102.0 86.0 17.0 29.3 0.695 27.0 83 | // 0.0 119.0 66.0 27.0 38.8 0.259 22.0 84 | // 8.0 176.0 90.0 34.0 33.7 0.467 58.0 85 | // 1.0 97.0 68.0 21.0 27.2 1.095 22.0 86 | // 4.0 129.0 60.0 12.0 27.5 0.527 31.0 87 | // 1.0 97.0 64.0 19.0 18.2 0.299 21.0 88 | // 0.0 86.0 68.0 32.0 35.8 0.238 25.0 89 | // 2.0 125.0 60.0 20.0 33.8 0.088 31.0 90 | // 5.0 123.0 74.0 40.0 34.1 0.269 28.0 91 | // 2.0 92.0 76.0 20.0 24.2 1.698 28.0 92 | // 3.0 171.0 72.0 33.0 33.3 0.199 24.0 93 | // ... 94 | val ones = DenseVector.ones[Double](x.rows) 95 | // ones: DenseVector[Double] = DenseVector(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0) 96 | val X = DenseMatrix.horzcat(ones.toDenseMatrix.t, x) 97 | // X: DenseMatrix[Double] = 1.0 5.0 86.0 68.0 28.0 30.2 0.364 24.0 98 | // 1.0 7.0 195.0 70.0 33.0 25.1 0.163 55.0 99 | // 1.0 5.0 77.0 82.0 41.0 35.8 0.156 35.0 100 | // 1.0 0.0 165.0 76.0 43.0 47.9 0.259 26.0 101 | // 1.0 0.0 107.0 60.0 25.0 26.4 0.133 23.0 102 | // 1.0 5.0 97.0 76.0 27.0 35.6 0.378 52.0 103 | // 1.0 3.0 83.0 58.0 31.0 34.3 0.336 25.0 104 | // 1.0 1.0 193.0 50.0 16.0 25.9 0.655 24.0 105 | // 1.0 3.0 142.0 80.0 15.0 32.4 0.2 63.0 106 | // 1.0 2.0 128.0 78.0 37.0 43.3 1.224 31.0 107 | // 1.0 0.0 137.0 40.0 35.0 43.1 2.288 33.0 108 | // 1.0 9.0 154.0 78.0 30.0 30.9 0.164 45.0 109 | // 1.0 1.0 189.0 60.0 23.0 30.1 0.398 59.0 110 | // 1.0 12.0 92.0 62.0 7.0 27.6 0.926 44.0 111 | // 1.0 1.0 86.0 66.0 52.0 41.3 0.917 29.0 112 | // 1.0 4.0 99.0 76.0 15.0 23.2 0.223 21.0 113 | // 1.0 1.0 109.0 60.0 8.0 25.4 0.947 21.0 114 | // 1.0 11.0 143.0 94.0 33.0 36.6 0.254 51.0 115 | // 1.0 1.0 149.0 68.0 29.0 29.3 0.349 42.0 116 | // 1.0 0.0 139.0 62.0 17.0 22.1 0.207 21.0 117 | // 1.0 2.0 99.0 70.0 16.0 20.4 0.235 27.0 118 | // 1.0 1.0 100.0 66.0 29.0 32.0 0.444 42.0 119 | // 1.0 4.0 83.0 86.0 19.0 29.3 0.317 34.0 120 | // 1.0 0.0 101.0 64.0 17.0 21.0 0.252 21.0 121 | // 1.0 1.0 87.0 68.0 34.0 37.6 0.401 24.0 122 | // 1.0 9.0 164.0 84.0 21.0 30.8 0.831 32.0 123 | // 1.0 1.0 99.0 58.0 10.0 25.4 0.551 21.0 124 | // 1.0 0.0 140.0 65.0 26.0 42.6 0.431 24.0 125 | // 1.0 5.0 108.0 72.0 43.0 36.1 0.263 33.0 126 | // 1.0 2.0 110.0 74.0 29.0 32.4 0.698 27.0 127 | // 1.0 1.0 79.0 60.0 42.0 43.5 0.678 23.0 128 | // 1.0 3.0 148.0 66.0 25.0 32.5 0.256 22.0 129 | // 1.0 0.0 121.0 66.0 30.0 34.3 0.203 33.0 130 | // 1.0 3.0 158.0 64.0 13.0 31.2 0.295 24.0 131 | // 1.0 2.0 105.0 80.0 45.0 33.7 0.711 29.0 132 | // 1.0 13.0 145.0 82.0 19.0 22.2 0.245 57.0 133 | // 1.0 1.0 79.0 80.0 25.0 25.4 0.583 22.0 134 | // 1.0 1.0 71.0 48.0 18.0 20.4 0.323 22.0 135 | // 1.0 0.0 102.0 86.0 17.0 29.3 0.695 27.0 136 | // 1.0 0.0 119.0 66.0 27.0 38.8 0.259 22.0 137 | // 1.0 8.0 176.0 90.0 34.0 33.7 0.467 58.0 138 | // 1.0 1.0 97.0 68.0 21.0 27.2 1.095 22.0 139 | // 1.0 4.0 129.0 60.0 12.0 27.5 0.527 31.0 140 | // 1.0 1.0 97.0 64.0 19.0 18.2 0.299 21.0 141 | // 1.0 0.0 86.0 68.0 32.0 35.8 0.238 25.0 142 | // 1.0 2.0 125.0 60.0 20.0 33.8 0.088 31.0 143 | // 1.0 5.0 123.0 74.0 40.0 34.1 0.269 28.0 144 | // 1.0 2.0 92.0 76.0 20.0 24.2 1.698 28.0 145 | // 1.0 3.0 171.0 72.0 33.0 33.3 0.199 24.0 146 | // ... 147 | val p = X.cols 148 | // p: Int = 8 149 | ``` 150 | 151 | Now `y` is our response variable and `X` is our covariate matrix, including an intercept column. Now we define the likelihood and some functions for gradient ascent. Note that the `ascend` function contains a tail-recursive function `go` that avoids the need for mutable variables and a "while loop", but is effectively equivalent. 152 | ```scala 153 | def ll(beta: DVD): Double = 154 | sum(-log(ones + exp(-1.0*(2.0*y - ones)*:*(X * beta)))) 155 | 156 | def gll(beta: DVD): DVD = 157 | (X.t)*(y - ones/:/(ones + exp(-X*beta))) 158 | 159 | def oneStep(learningRate: Double)(b0: DVD): DVD = 160 | b0 + learningRate*gll(b0) 161 | 162 | def ascend(step: DVD => DVD, init: DVD, maxIts: Int = 10000, 163 | tol: Double = 1e-8, verb: Boolean = true): DVD = 164 | @tailrec def go(b0: DVD, ll0: Double, itsLeft: Int): DVD = { 165 | if (verb) 166 | println(s"$itsLeft : $ll0") 167 | val b1 = step(b0) 168 | val ll1 = ll(b1) 169 | if ((math.abs(ll0 - ll1) < tol)|(itsLeft < 1)) 170 | b1 171 | else 172 | go(b1, ll1, itsLeft - 1) 173 | } 174 | go(init, ll(init), maxIts) 175 | ``` 176 | 177 | Now let's run the gradient ascent algorithm, starting from a reasonable initial guess, since naive gradient ascent is terrible. 178 | ```scala 179 | val init = DenseVector(-9.8, 0.1, 0, 0, 0, 0, 1.8, 0) 180 | // init: DenseVector[Double] = DenseVector(-9.8, 0.1, 0.0, 0.0, 0.0, 0.0, 1.8, 0.0) 181 | ll(init) 182 | // res0: Double = -566.3903911564223 183 | val opt = ascend(oneStep(1e-6), init, verb=false) 184 | // opt: DenseVector[Double] = DenseVector(-9.798616371360632, 0.10314432881260363, 0.032145673085756866, -0.00452855938919666, -0.001984121863541414, 0.08411858929117885, 1.801384805815113, 0.04114190402348266) 185 | ll(opt) 186 | // res1: Double = -89.19598966159712 187 | ``` 188 | Note how much the likelihood has improved relative to our initial guess. 189 | 190 | 191 | ### A standalone application 192 | 193 | We can package the code above into a standalone Scala application, and this is available in the file [ML-GA.scala](../src/main/scala/ML-GA.scala). We can compile and run this application by typing `sbt run` from the Scala directory. Note that you must run `sbt` from the directory containing the [build.sbt](../build.sbt) file, not from the subdirectory containing the actual source code files. Make sure that you can run the application before proceding to the exercises. 194 | 195 | ### Hands-on exercise 196 | 197 | Do some or all these exercises (or go back to previous exercises) as your interests dictate and time permits. 198 | 199 | * Try manually tweaking the initial guess, the learning rate, the convergence tolerance and the maximum number of iterations to see how robust (or otherwise) this naive gradient ascent algorithm is to these tuning parameters. 200 | * Improve on the naive ascent algorithm somewhow, perhaps by implementing [line search](https://en.wikipedia.org/wiki/Line_search) for choosing the step size. 201 | * Note that Breeze has a bunch of utilities for optimisation, in the [breeze.optimise](https://github.com/scalanlp/breeze/wiki/Quickstart#breezeoptimize) package. See if you can figure out how to use them by messing around in the REPL. Then see if you can adapt the running example to use one of the methods. The [ScalaDoc](http://www.scalanlp.org/api/breeze/#breeze.optimize.package) may be useful. 202 | 203 | -------------------------------------------------------------------------------- /docs/DexRandom.html: -------------------------------------------------------------------------------- 1 | 2 |

Quick hands-on with splittable random numbers in Dex

309 |

Basics

310 |

Call up a Dex REPL (dex repl) and start messing around with splittable random numbers in Dex.

311 |
k0 = new_key 42 312 |
[k1, k2] = split_key k0 313 |
314 |
k0 315 |
0xa7c2d0d406a4b7ed
k1 316 |
0xfcd0d5c71aa2378
k2 317 |
0xaa7d4e4cc9c535ff
318 |
rand k0 319 |
0.286863
rand k0 320 |
0.286863
rand k0 321 |
0.286863
rand k1 322 |
0.3292074
rand k2 323 |
0.5407103
324 |

Note that we don't just have to split the key into two.

325 |
[k3, k4, k5] = split_key k2 326 |
327 |
k3 328 |
0x672ae31f2a29e791
k4 329 |
0xd4d38cabd70ddf36
k5 330 |
0xd5ea3af45c4e3336
331 |
split_key k5 :: Fin 10=>Key 332 |
[0x3756f7d71ec688b1, 0x3424338e5f824c0d, 0xb91c292bccac2b94, 0xa97c48b43810c84b, 0xe692ecd5716afebc, 0xe8e48f98eec6e2e9, 0xfe0889dd3b046666, 0x318bdcd2d4005d37, 0x93c8c08271e014c9, 0x9a5111caec8ab195]
333 |
keys = for i:(Fin 8). ixkey k5 i 334 |
335 |
keys 336 |
[0x3756f7d71ec688b1, 0x3424338e5f824c0d, 0xb91c292bccac2b94, 0xa97c48b43810c84b, 0xe692ecd5716afebc, 0xe8e48f98eec6e2e9, 0xfe0889dd3b046666, 0x318bdcd2d4005d37]
337 |

It can sometimes be convenient to split a key into a table of keys and then map or fold a random function over the table.

338 |
sum $ map rand keys 339 |
2.472103
340 |

If we really just want to "advance" the key, we can do that too.

341 |
split_key k5 :: Fin 1=>Key 342 |
[0x3756f7d71ec688b1]
343 |
[k6] = split_key k5 344 |
345 |
k6 346 |
0x3756f7d71ec688b1
347 |

Probability distributions

348 |

rand is used to generate a $U(0,1)$, and randn generates from a standard normal, $N(0,1)$.

349 |
map randn keys 350 |
[0.151103, 0.4355534, -0.4552877, -2.912428, 0.9843643, -0.7889465, 1.424858, -0.5015252]
351 |

For more interesting probability distributions, you will want to use the stats library.

352 |
import stats 353 |
354 |
draw(Exponential(2.0), k5) :: Float 355 |
0.472008
356 |
pd = Poisson(10.0) 357 |
358 |
draw(pd, k6) :: Nat 359 |
6
360 |
map (\k. draw(pd, k)) keys :: _=>Nat 361 |
[6, 7, 11, 13, 15, 8, 13, 11]
362 |

Random functions

363 |

Suppose that you want to define your own random function. Here we will define our own function for sampling exponentials.

364 |
def rexp(rate: Float) -> (Key) -> Float = \k. log1p (-rand k) / -rate 365 |
366 |

Notice how the non-determinism is clearly signalled by the presence of the key in the type signature. Also notice how we have made the key the final input parameter. This is because of currying. We can call the function directly:

367 |
rexp(10.0)(k5) 368 |
0.09440161
369 |

But we can also create a particular random variable:

370 |
my_rexp = rexp 20.0 371 |
372 |

and then use this partially applied function with multiple keys.

373 |
map my_rexp keys 374 |
[0.0400418, 0.0009054433, 0.0211621, 0.007027078, 0.09036362, 0.04034937, 0.001748984, 0.0001424373]
375 |

This is why you nearly always want to make the key the final input parameter, and why you might want to curry it separately.

376 |

Exercise

377 |

Write a function to simulate a 1d random walk.

378 |
-------------------------------------------------------------------------------- /docs/djwutils.html: -------------------------------------------------------------------------------- 1 | 2 |
-- djwutils.dx 309 |
-- Some generic utility functions 310 |
-- some of which should probably be added to the standard Dex prelude 311 |
312 |
def iterate(n: Nat, step: (a) -> a, init: a) -> Fin n => a given (a|Data) = 313 | with_state init \st. 314 | for i:(Fin n). 315 | old = get st 316 | next = step old 317 | st := next 318 | old 319 |
320 |
def unfold(n: Nat, advance: (a) -> (b, a), init: a) -> Fin n => b given (a|Data, b) = 321 | with_state init \st. 322 | for i:(Fin n). 323 | (b, a) = advance (get st) 324 | st := a 325 | b 326 |
327 |
def length(arr: n=>a) -> Nat given (a, n|Ix) = 328 | size n 329 |
330 |
def last_dumb(arr: n=>a) -> a given (a, n|Ix) = 331 | head $ reverse arr 332 |
333 |
def last(arr: n=>a) -> a given (a, n|Ix) = 334 | nind = unsafe_nat_diff (size n) 1 335 | arr[asidx nind] 336 |
337 |
-- Not ideal instance, since likely loss of precision... 338 |
instance Parse(Float64) 339 | def parseString(str) = 340 | mf : Maybe Float = parseString str 341 | case mf of 342 | Nothing -> Nothing 343 | Just f -> Just $ f_to_f64 f 344 |
345 |
def clipv(x: a=>Float, c: Float) -> (a)=>Float given (a|Ix) = 346 | map (\xi. clip (-c, c) xi) x 347 |
348 |
def nanclip(x: a=>Float) -> (a)=>Float given (a|Ix) = 349 | map (\xi. if (isnan xi) then 0.0 else xi) x 350 |
351 |
def to_tsv(mat: (n)=>(p)=>Float) -> String given (n|Ix, p|Ix) = 352 | ms = for i j. show mat[i,j] <> "\t" 353 | concat (map (\l. l <> "\n") (map concat ms)) 354 |
355 |
-- based on "lines" from the prelude... 356 |
def words(sep: Word8, source: String) -> List String = 357 | AsList(_, s) = source <> (to_list [sep]) 358 | AsList(num_words, space_ixs) = cat_maybes for i_char. 359 | if (s[i_char] == sep) 360 | then Just i_char 361 | else Nothing 362 | to_list for i_word:(Fin num_words). 363 | start = case prev_ix i_word of 364 | Nothing -> first_ix 365 | Just i -> right_post space_ixs[i] 366 | end = left_post space_ixs[i_word] 367 | post_slice s start end 368 |
369 |
def cons(x: a, xs: List a) -> List a given (a) = 370 | AsList(on, xt) = xs 371 | n = on + 1 372 | nxt = for i:(Fin n). case (ordinal i == 0) of 373 | True -> x 374 | False -> xt[asidx (unsafe_nat_diff (ordinal i) 1)] 375 | to_list nxt 376 |
377 |
def list2tab(l: List a) -> (n)=>a given (a, n|Ix) = 378 | AsList(ll, t) = l 379 | unsafe_cast_table t 380 |
381 |
def parse_tsv(sep: Word8, input: String) -> List (List String) = 382 | AsList(_, lines) = lines input 383 | to_list $ map (\l. words sep l) lines 384 |
385 | 386 | 387 | 388 |
-- eof 389 |
-------------------------------------------------------------------------------- /docs/DexCC.html: -------------------------------------------------------------------------------- 1 | 2 |

Dex crash course

309 |

Dex is a strongly typed pure functional differentiable array processing language, designed with scientific computing and machine learning applications in mind. It is well-suited to statistical computing applications, and like JAX, can exploit a GPU if available.

310 |

Start a Dex REPL by entering dex repl at your command prompt.

311 |

Immutability

312 |

Dex objects are immutable.

313 |
x = 5 314 |
:t x 315 |
Nat
316 |
x = x + 1 317 |
Error: variable already defined: x 318 | 319 | x = x + 1 320 | ^^ 321 |
322 |

Immutable collections

323 |

Dex, like JAX, has arrays/tensors as its main data structure, which are referred to as tables, and these are immutable.

324 |
v = [1.0, 2, 4, 5, 7] 325 |
v 326 |
[1., 2., 4., 5., 7.]
:t v 327 |
((Fin 5) => Float32)
328 |

Dex has a strong static type system, including elements of dependent typing. Note how the length of an array (and in general, the dimensions of a tensor) is part of its type. This allows the detection of all kinds of dimension mismatch errors at compile time rather than runtime, and this is a very good thing! Notice that the type reflects the idea that conceptually, an array is essentially a function mapping from an index to a value.

329 |

We can't just directly index into a table with an integer, since this isn't safe - we might violate the table index bounds. We need to cast our integer to a typed index using the @ operator.

330 |
v[2@Fin 5] 331 |
4.
332 |

However, where things are unambiguous, we can use type inference.

333 |
v[2@_] 334 |
4.
335 |

It is relatively unusual to want to update a single element of a Dex table, but we can certaintly do it (immutably). Below we update the element with index 2 to be 9.0.

336 |
vu = for i. case (i == (2@_)) of 337 | True -> 9.0 338 | False -> v[i] 339 |
340 |
vu 341 |
[1., 2., 9., 5., 7.]
v 342 |
[1., 2., 4., 5., 7.]
343 |

This syntax will gradually become clear.

344 |

Manipulating collections

345 |

We can map and reduce.

346 |
map (\x. 2*x) v 347 |
[2., 4., 8., 10., 14.]
348 |
2.0 .* v 349 |
[2., 4., 8., 10., 14.]
350 |
sum v 351 |
19.
352 |
sum(v) 353 |
19.
354 |
reduce 0.0 (\x y. x+y) v 355 |
19.
356 |
reduce(0.0, \x y. x+y, v) 357 |
19.
358 |
fold 0.0 (\i acc. acc + v[i]) 359 |
19.
360 |

The main way of creating and transforming tables is using for, which in Dex is more like a for-comprehension or for-expression in some languages than a traditional imperative for-loop. However, it is designed to allow the writing of index-based algorithms in a safe, pure functional way. For example, as an alternative to using map we could write.

361 |
for i. 2*v[i] 362 |
[2., 4., 8., 10., 14.]
363 |

We can create a table of given length filled with the same element

364 |
for i:(Fin 8). 2.0 365 |
[2., 2., 2., 2., 2., 2., 2., 2.]
366 |

or different elements

367 |
for i:(Fin 6). n_to_f $ ordinal i 368 |
[0., 1., 2., 3., 4., 5.]
369 |

We can create 2d tables similarly.

370 |
Height=Fin 3 371 |
Width=Fin 4 372 |
m = for i:Height j:Width. n_to_f $ ordinal i + ordinal j 373 |
m 374 |
[[0., 1., 2., 3.], [1., 2., 3., 4.], [2., 3., 4., 5.]]
:t m 375 |
((Fin 3) => (Fin 4) => Float32)
376 | 377 |

Writing functions

378 |

We can write a log-factorial function as follows.

379 |
def log_fact(n: Nat) -> Float = 380 | sum $ for i:(Fin n). log $ n_to_f (ordinal i + 1) 381 |
382 |
:t log_fact 383 |
((n:Nat) -> Float32)
384 |
log_fact 3 385 |
1.791759
log_fact(10) 386 |
15.10441
log_fact 100000 387 |
1051300.
388 |

But this consumes heap. Dex, like JAX, is differentiable, so prohibits explicit recursion. However, it allows the creation of a mutable state variable that can be get and set via its algebraic effects system.

389 |
def log_fact_s(n: Nat) -> Float = 390 | (lf, _) = yield_state (0.0, n_to_i n) \state. 391 | while \. 392 | (acc, i) = get state 393 | if (i > 0) 394 | then 395 | state := (acc + log (i_to_f i), i - 1) 396 | True 397 | else False 398 | lf 399 |
400 |
log_fact_s 3 401 |
1.791759
log_fact_s 10 402 |
15.10441
log_fact_s 100000 403 |
1051310.
404 |

Note that for the final example, significant numerical error has accumulated in this naive sequential sum of 32 bit floats.

405 |

Curried functions

406 |

Note that we can curry functions as appropriate, using lambdas.

407 |
def lin_fun(m: Float, c: Float) -> (Float) -> Float = 408 | \x. m*x + c 409 |
410 |
:t lin_fun 411 |
((m:Float32,c:Float32) -> ((x:Float32) -> Float32))
412 |
f = lin_fun 2 3 413 |
414 |
:t f 415 |
((x:Float32) -> Float32)
416 |
f 0 417 |
3.
f(1) 418 |
5.
f 2 419 |
7.
420 | 421 |
--------------------------------------------------------------------------------