├── hello
    ├── hello.sml
    ├── hello-again.sml
    ├── hello-twice.mlb
    └── README.md
├── mcss
    ├── mcss.mlb
    ├── mcss
    ├── README.md
    └── mcss.sml
├── msort
    ├── msort.mlb
    ├── msort
    ├── README.md
    └── msort.sml
├── sequences
    ├── fast-par
    │   ├── mcss.mlb
    │   ├── main.mlb
    │   ├── mcss.sml
    │   ├── main.sml
    │   └── ArraySequence.sml
    ├── bad-par
    │   ├── main.mlb
    │   ├── #examples.mlb#
    │   ├── main.sml
    │   ├── README.md
    │   └── ArraySequence.sml
    └── README.md
├── bfs
    ├── graphs
    ├── bfs.mlb
    ├── inputs
    │   └── sample-mesh
    ├── README.md
    ├── seq-bfs.sml
    ├── par-cas-bfs.sml
    ├── par-bfs.sml
    ├── offset-search.sml
    ├── bfs.sml
    └── dopt-bfs.sml
├── trees
    ├── test-scan
    │   ├── main.mlb
    │   └── main.sml
    ├── old
    │   ├── sequential
    │   │   ├── main.mlb
    │   │   ├── mapreduce.mlb
    │   │   ├── main.sml
    │   │   ├── mapreduce.sml
    │   │   └── Tree.sml
    │   ├── bad-par
    │   │   ├── main.mlb
    │   │   ├── examples.mlb
    │   │   ├── mapreduce.mlb
    │   │   ├── main.sml
    │   │   ├── mapreduce.sml
    │   │   ├── examples.sml
    │   │   └── Tree.sml
    │   └── fast-par
    │   │   ├── main.mlb
    │   │   ├── examples.mlb
    │   │   ├── mapreduce.mlb
    │   │   ├── main-unbalanced.mlb
    │   │   ├── main-unbalanced.sml
    │   │   ├── main.sml
    │   │   ├── mapreduce.sml
    │   │   ├── examples.sml
    │   │   └── Tree.sml
    ├── test-balanced
    │   ├── main.mlb
    │   └── main.sml
    ├── test-unbalanced
    │   ├── main.mlb
    │   └── main.sml
    ├── Tree.sml
    └── README.md
├── local
    ├── sources.mlb
    └── TutorialBenchmark.sml
├── TODO.md
├── sml.pkg
├── setup
    ├── mac-docker-mem-fix.jpg
    └── README.md
├── how-to-par
    ├── sequential
    │   ├── main.mlb
    │   ├── fib.sml
    │   └── main.sml
    ├── bad-par
    │   ├── main.mlb
    │   ├── bad-par-fib.sml
    │   └── main.sml
    ├── tuning
    │   ├── main.mlb
    │   └── main.sml
    ├── fast-par
    │   ├── main.mlb
    │   ├── fast-par-fib.sml
    │   └── main.sml
    └── README.md
├── .gitignore
├── Dockerfile
├── start-container.sh
├── LICENSE
└── README.md


/hello/hello.sml:
--------------------------------------------------------------------------------
1 | val _ = print "hello world\n"
2 | 


--------------------------------------------------------------------------------
/mcss/mcss.mlb:
--------------------------------------------------------------------------------
1 | ../local/sources.mlb
2 | mcss.sml
3 | 


--------------------------------------------------------------------------------
/hello/hello-again.sml:
--------------------------------------------------------------------------------
1 | val _ = print "hello again\n"
2 | 


--------------------------------------------------------------------------------
/msort/msort.mlb:
--------------------------------------------------------------------------------
1 | ../local/sources.mlb
2 | msort.sml
3 | 


--------------------------------------------------------------------------------
/sequences/fast-par/mcss.mlb:
--------------------------------------------------------------------------------
1 | ../../local/sources.mlb
2 | mcss.sml
3 | 


--------------------------------------------------------------------------------
/bfs/graphs:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MPLLang/mpl-tutorial/HEAD/bfs/graphs


--------------------------------------------------------------------------------
/mcss/mcss:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MPLLang/mpl-tutorial/HEAD/mcss/mcss


--------------------------------------------------------------------------------
/msort/msort:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MPLLang/mpl-tutorial/HEAD/msort/msort


--------------------------------------------------------------------------------
/trees/test-scan/main.mlb:
--------------------------------------------------------------------------------
1 | ../../local/sources.mlb
2 | ../Tree.sml
3 | main.sml
4 | 


--------------------------------------------------------------------------------
/hello/hello-twice.mlb:
--------------------------------------------------------------------------------
1 | $(SML_LIB)/basis/basis.mlb
2 | hello.sml
3 | hello-again.sml
4 | 


--------------------------------------------------------------------------------
/trees/old/sequential/main.mlb:
--------------------------------------------------------------------------------
1 | $(SML_LIB)/basis/basis.mlb
2 | Tree.sml
3 | main.sml
4 | 


--------------------------------------------------------------------------------
/trees/test-balanced/main.mlb:
--------------------------------------------------------------------------------
1 | ../../local/sources.mlb
2 | ../Tree.sml
3 | main.sml
4 | 


--------------------------------------------------------------------------------
/trees/test-unbalanced/main.mlb:
--------------------------------------------------------------------------------
1 | ../../local/sources.mlb
2 | ../Tree.sml
3 | main.sml
4 | 


--------------------------------------------------------------------------------
/sequences/fast-par/main.mlb:
--------------------------------------------------------------------------------
1 | ../../local/sources.mlb
2 | ArraySequence.sml
3 | main.sml
4 | 


--------------------------------------------------------------------------------
/local/sources.mlb:
--------------------------------------------------------------------------------
1 | ../lib/github.com/mpllang/mpllib/sources.mpl.mlb
2 | TutorialBenchmark.sml
3 | 


--------------------------------------------------------------------------------
/trees/old/sequential/mapreduce.mlb:
--------------------------------------------------------------------------------
1 | $(SML_LIB)/basis/basis.mlb
2 | Tree.sml
3 | mapreduce.sml
4 | 


--------------------------------------------------------------------------------
/TODO.md:
--------------------------------------------------------------------------------
1 | * Create a sequential implementian for sequences (use lists)
2 | * Update the ML primer notes


--------------------------------------------------------------------------------
/sml.pkg:
--------------------------------------------------------------------------------
1 | require {
2 |   github.com/mpllang/mpllib 0.1.0 #d79054f3e561a7a10db692cac6af91927520d52b
3 | }
4 | 


--------------------------------------------------------------------------------
/setup/mac-docker-mem-fix.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MPLLang/mpl-tutorial/HEAD/setup/mac-docker-mem-fix.jpg


--------------------------------------------------------------------------------
/how-to-par/sequential/main.mlb:
--------------------------------------------------------------------------------
1 | $(SML_LIB)/basis/basis.mlb
2 | ../../local/sources.mlb
3 | fib.sml
4 | main.sml
5 | 


--------------------------------------------------------------------------------
/trees/old/bad-par/main.mlb:
--------------------------------------------------------------------------------
1 | $(SML_LIB)/basis/basis.mlb
2 | $(SML_LIB)/basis/fork-join.mlb
3 | Tree.sml
4 | main.sml
5 | 


--------------------------------------------------------------------------------
/sequences/bad-par/main.mlb:
--------------------------------------------------------------------------------
1 | $(SML_LIB)/basis/basis.mlb
2 | ../../local/sources.mlb
3 | ArraySequence.sml
4 | main.sml
5 | 


--------------------------------------------------------------------------------
/trees/old/fast-par/main.mlb:
--------------------------------------------------------------------------------
1 | $(SML_LIB)/basis/basis.mlb
2 | $(SML_LIB)/basis/fork-join.mlb
3 | Tree.sml
4 | main.sml
5 | 


--------------------------------------------------------------------------------
/how-to-par/bad-par/main.mlb:
--------------------------------------------------------------------------------
1 | $(SML_LIB)/basis/basis.mlb
2 | ../../local/sources.mlb
3 | bad-par-fib.sml
4 | main.sml
5 | 
6 | 


--------------------------------------------------------------------------------
/trees/old/bad-par/examples.mlb:
--------------------------------------------------------------------------------
1 | $(SML_LIB)/basis/basis.mlb
2 | $(SML_LIB)/basis/fork-join.mlb
3 | Tree.sml
4 | examples.sml
5 | 


--------------------------------------------------------------------------------
/trees/old/bad-par/mapreduce.mlb:
--------------------------------------------------------------------------------
1 | $(SML_LIB)/basis/basis.mlb
2 | $(SML_LIB)/basis/fork-join.mlb
3 | Tree.sml
4 | mapreduce.sml
5 | 


--------------------------------------------------------------------------------
/trees/old/fast-par/examples.mlb:
--------------------------------------------------------------------------------
1 | $(SML_LIB)/basis/basis.mlb
2 | $(SML_LIB)/basis/fork-join.mlb
3 | Tree.sml
4 | examples.sml
5 | 


--------------------------------------------------------------------------------
/how-to-par/tuning/main.mlb:
--------------------------------------------------------------------------------
1 | $(SML_LIB)/basis/basis.mlb
2 | $(SML_LIB)/basis/fork-join.mlb
3 | ../sequential/fib.sml
4 | main.sml
5 | 


--------------------------------------------------------------------------------
/trees/old/fast-par/mapreduce.mlb:
--------------------------------------------------------------------------------
1 | $(SML_LIB)/basis/basis.mlb
2 | $(SML_LIB)/basis/fork-join.mlb
3 | Tree.sml
4 | mapreduce.sml
5 | 


--------------------------------------------------------------------------------
/sequences/bad-par/#examples.mlb#:
--------------------------------------------------------------------------------
1 | $(SML_LIB)/basis/basis.mlb
2 | $(SML_LIB)/basis/fork-join.mlb
3 | ArraySequence.sml
4 | examples.sml
5 | 


--------------------------------------------------------------------------------
/bfs/bfs.mlb:
--------------------------------------------------------------------------------
1 | ../local/sources.mlb
2 | offset-search.sml
3 | dopt-bfs.sml
4 | par-bfs.sml
5 | par-cas-bfs.sml
6 | seq-bfs.sml
7 | bfs.sml
8 | 


--------------------------------------------------------------------------------
/trees/old/fast-par/main-unbalanced.mlb:
--------------------------------------------------------------------------------
1 | $(SML_LIB)/basis/basis.mlb
2 | $(SML_LIB)/basis/fork-join.mlb
3 | Tree.sml
4 | main-unbalanced.sml
5 | 


--------------------------------------------------------------------------------
/how-to-par/fast-par/main.mlb:
--------------------------------------------------------------------------------
1 | $(SML_LIB)/basis/basis.mlb
2 | ../../local/sources.mlb
3 | ../sequential/fib.sml
4 | fast-par-fib.sml
5 | main.sml
6 | 


--------------------------------------------------------------------------------
/how-to-par/sequential/fib.sml:
--------------------------------------------------------------------------------
1 | fun fib n =
2 |   if n = 0 then
3 |     0
4 |   else if n = 1 then
5 |     1
6 |   else
7 |     fib (n-1) + fib (n-2)
8 | 


--------------------------------------------------------------------------------
/msort/README.md:
--------------------------------------------------------------------------------
 1 | # Parallel Merge Sort
 2 | 
 3 | ## Compilation
 4 | To compile run
 5 | ```
 6 | $ mpl main.mlb
 7 | ```
 8 | 
 9 | This will generate the executable `main`
10 | 
11 | 
12 | 


--------------------------------------------------------------------------------
/trees/old/fast-par/main-unbalanced.sml:
--------------------------------------------------------------------------------
1 | val million = 1000000
2 | val n = 10 * million
3 | val tree = Tree.mk_unbalanced n
4 | val result = Tree.height tree
5 | val _ = print (Int.toString result ^ "\n")
6 | 


--------------------------------------------------------------------------------
/how-to-par/fast-par/fast-par-fib.sml:
--------------------------------------------------------------------------------
 1 | fun fastParFib n =
 2 |   if n < 20 then
 3 |     fib n
 4 |   else
 5 |     let
 6 |       val (a, b) =
 7 |         ForkJoin.par (fn () => fastParFib (n-1),
 8 |                       fn () => fastParFib (n-2))
 9 |     in
10 |       a + b
11 |     end
12 | 


--------------------------------------------------------------------------------
/mcss/README.md:
--------------------------------------------------------------------------------
1 | [(← sequences)](../sequences/README.md)
2 | [(Graphs →)](../graphs/README.md)
3 | 
4 | # Maximum Contiguous Subsequence Sums
5 | 
6 | The implementation follows the description presented 
7 | [in this chapter](https://diderot.one/courses/121/books/492/chapter/6838)
8 | of the Acar-Blelloch algorithms book.


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | #*
 2 | *~
 3 | 
 4 | lib/
 5 | 
 6 | hello/hello
 7 | hello/hello-twice
 8 | hello/foobar
 9 | 
10 | how-to-par/bad-par/main
11 | how-to-par/fast-par/main
12 | how-to-par/sequential/main
13 | how-to-par/tuning/main
14 | 
15 | trees/test-balanced/main
16 | trees/test-unbalanced/main
17 | trees/test-scan/main
18 | 


--------------------------------------------------------------------------------
/how-to-par/bad-par/bad-par-fib.sml:
--------------------------------------------------------------------------------
 1 | fun badParFib n =
 2 |   if n = 0 then
 3 |     0
 4 |   else if n = 1 then
 5 |     1
 6 |   else
 7 |     let
 8 |       val (a, b) =
 9 |         ForkJoin.par (fn () => badParFib (n-1),
10 |                       fn () => badParFib (n-2))
11 |     in
12 |       a + b
13 |     end
14 | 


--------------------------------------------------------------------------------
/how-to-par/fast-par/main.sml:
--------------------------------------------------------------------------------
 1 | (* Usage: main -n <sequence length> *)
 2 | 
 3 | structure CLA = CommandLineArgs
 4 | 
 5 | val defaultInput = 35
 6 | val n = CLA.parseInt "n" defaultInput
 7 | val _ = print ("Computing fib(" ^ Int.toString n ^ ")\n")
 8 | 
 9 | val result = fastParFib n
10 | val _ = print ("fib(" ^ Int.toString n ^ ") = " ^ Int.toString result ^ "\n")
11 | 


--------------------------------------------------------------------------------
/how-to-par/bad-par/main.sml:
--------------------------------------------------------------------------------
 1 | (* Usage: main -n <sequence length> *)
 2 | 
 3 | structure CLA = CommandLineArgs
 4 | 
 5 | val defaultInput = 35
 6 | val n = CLA.parseInt "n" defaultInput
 7 | val _ = print ("Computing fib(" ^ Int.toString n ^ ")\n")
 8 | 
 9 | val result = badParFib 35
10 | val _ = print ("fib(" ^ Int.toString n ^ ") = " ^ Int.toString result ^ "\n")
11 | 
12 | 


--------------------------------------------------------------------------------
/how-to-par/sequential/main.sml:
--------------------------------------------------------------------------------
 1 | (* Usage: main -n <sequence length> *)
 2 | 
 3 | structure CLA = CommandLineArgs
 4 | 
 5 | val defaultInput = 35
 6 | val n = CLA.parseInt "n" defaultInput
 7 | val _ = print ("Computing Sequentially: fib(" ^ Int.toString n ^ ")\n")
 8 | 
 9 | val result = fib n
10 | val _ = print ("fib(" ^ Int.toString n ^ ") = " ^ Int.toString result ^ "\n")
11 | 
12 | 


--------------------------------------------------------------------------------
/bfs/inputs/sample-mesh:
--------------------------------------------------------------------------------
 1 | Mesh
 2 | 11
 3 | 12
 4 | 31 86
 5 | 27 84
 6 | 41 82
 7 | 9 53
 8 | 77 10
 9 | 93 40
10 | 60 66
11 | 25 20
12 | 60 8
13 | 50 40
14 | 90 90
15 | 7 8 9 1 5
16 | 8 4 9 0 2
17 | 9 4 5 1 3
18 | 9 5 6 2 4 11
19 | 9 6 2 3 8 10
20 | 3 7 9 0 7
21 | 0 1 2 8 9
22 | 3 9 1 5 8
23 | 1 9 2 4 6 7
24 | 0 2 10 6 10
25 | 2 6 10 9 4 11
26 | 6 5 10 10 3
27 | 
28 | 
29 | 
30 | 
31 | 
32 | 
33 | 
34 | 
35 | 
36 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM shwestrick/mpl:latest
 2 | 
 3 | # install smlpkg
 4 | RUN git clone https://github.com/diku-dk/smlpkg.git /root/smlpkg \
 5 |     && cd /root/smlpkg \
 6 |     && git fetch --all \
 7 |     && git checkout v0.1.5 \
 8 |     && MLCOMP=mlton make clean all \
 9 |     && apt-get update -qq \
10 |     && apt-get install -qq curl unzip
11 | ENV PATH /root/smlpkg/src/:$PATH
12 | 
13 | RUN mkdir /root/mpl-tutorial
14 | WORKDIR /root/mpl-tutorial
15 | 


--------------------------------------------------------------------------------
/trees/old/bad-par/main.sml:
--------------------------------------------------------------------------------
 1 | val _ = print "# Large example:\n"
 2 | val million = 1000000
 3 | val n = 10 * million
 4 | val tree = Tree.mkBalanced n
 5 | val _ = print "."
 6 | val h = Tree.height tree
 7 | val _ = print "."
 8 | val tree_f_opt = Tree.filter (fn x => Int.mod(x, 2) = 0) tree
 9 | val _ = print "."
10 | val (prefixSums, _) = Tree.scan 0 (fn (x, y) => Int.mod(x + y, million)) tree 
11 | val _ = print ".\n"
12 | val _ = print ("Height of filtered unbalanced tree is: " ^ Int.toString h ^ "\n")
13 | 


--------------------------------------------------------------------------------
/trees/old/fast-par/main.sml:
--------------------------------------------------------------------------------
 1 | val _ = print "# Large example:\n"
 2 | val million = 1000000
 3 | val n = 10 * million
 4 | val tree = Tree.mkBalanced n
 5 | val _ = print "."
 6 | val h = Tree.height tree
 7 | val _ = print "."
 8 | val tree_f_opt = Tree.filter (fn x => Int.mod(x, 2) = 0) tree
 9 | val _ = print "."
10 | val (prefixSums, _) = Tree.scan 0 (fn (x, y) => Int.mod(x + y, million)) tree 
11 | val _ = print ".\n"
12 | val _ = print ("Height of filtered unbalanced tree is: " ^ Int.toString h ^ "\n")
13 | 


--------------------------------------------------------------------------------
/bfs/README.md:
--------------------------------------------------------------------------------
 1 | [(← mcss)](../mcss/README.md)
 2 | 
 3 | # BFS
 4 | 
 5 | This directory contains several implementations of the BFS, including
 6 | sequential and several parallel implementations.
 7 | 
 8 | The sequential implementation is standard.
 9 | The parallel implementations follow the presentation
10 | [in this chapter](https://diderot.one/courses/121/books/494/chapter/6873)
11 | of the Acar-Blelloch algorithms book.
12 | 
13 | The graph representation is a sequence-based representation 
14 | [as described here](https://diderot.one/courses/121/books/494/chapter/6871)
15 | 
16 | 
17 | The implementation follows the description presented 
18 | 


--------------------------------------------------------------------------------
/trees/old/sequential/main.sml:
--------------------------------------------------------------------------------
 1 | val _ = print "# Small example:\n"
 2 | val tree = Tree.mkBalanced 10
 3 | val _ = print ("Balanced tree is: " ^ Tree.toString Int.toString tree ^ "\n")
 4 | val _ = print ("Height of balanced tree is: " ^ Int.toString (Tree.height tree) ^ "\n")
 5 | 
 6 | val tree = Tree.mkUnbalanced 10
 7 | val _ = print ("Unbalanced tree is: " ^ Tree.toString Int.toString tree ^ "\n")
 8 | val _ = print ("Height of unbalanced tree is: " ^ Int.toString (Tree.height tree) ^ "\n")
 9 | 
10 | val _ = print "# Big example:\n"
11 | val million = 1000000
12 | val n = 10 * million
13 | val tree = Tree.mkBalanced n
14 | val _ = Tree.mkBalanced n
15 | val _ = print ("Height of balanced tree is: " ^ Int.toString (Tree.height tree) ^ "\n")
16 | 


--------------------------------------------------------------------------------
/start-container.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | DIR=$(git rev-parse --show-toplevel)
 4 | cd $DIR
 5 | 
 6 | echo "building container at $DIR"
 7 | 
 8 | cmd="docker build . -t mpl-tutorial"
 9 | echo -e "\n$cmd"
10 | eval $cmd || \
11 | ( echo -e "\ndocker failed; retrying with sudo"; \
12 |   echo -e "\nsudo $cmd"; \
13 |   eval "sudo $cmd" \
14 | )
15 | if [[ $? -ne 0 ]]; then
16 |   echo "aborting...";
17 |   exit 1
18 | fi
19 | 
20 | echo -e "\nstarting container at $DIR"
21 | 
22 | cmd="docker run --rm -v $DIR:/root/mpl-tutorial -it mpl-tutorial /bin/bash"
23 | echo -e "\n$cmd"
24 | eval $cmd || \
25 | ( echo -e "\ndocker failed; retrying with sudo"; \
26 |   echo -e "\nsudo $cmd"; \
27 |   eval "sudo $cmd" \
28 | )
29 | if [[ $? -ne 0 ]]; then
30 |   echo "aborting...";
31 |   exit 1
32 | fi
33 | 


--------------------------------------------------------------------------------
/trees/test-balanced/main.sml:
--------------------------------------------------------------------------------
 1 | val size = CommandLineArgs.parseInt "size" 100000
 2 | 
 3 | val _ = print ("size " ^ Int.toString size ^ "\n")
 4 | 
 5 | fun sumSeq tree = Tree.reduceSeq (fn (a, b) => a+b) 0 tree
 6 | fun sum tree = Tree.reduce (fn (a, b) => a+b) 0 tree
 7 | 
 8 | val balancedTree = Tree.makeBalanced Int64.fromInt 0 size
 9 | val _ =
10 |   print ("built balancedTree: height "
11 |          ^ Int.toString (Tree.height balancedTree) ^ "\n")
12 | 
13 | val benchParams = {warmup = 1.0, repeat = 20}
14 | fun run msg f = TutorialBenchmark.run benchParams msg f
15 | 
16 | (* sequential performance *)
17 | val _ = print ("============ sequential ============\n")
18 | val result1 = run "sumSeq(balancedTree)" (fn () => sumSeq balancedTree)
19 | 
20 | (* parallel performance *)
21 | val _ = print ("============= parallel =============\n")
22 | val result2 = run "sum(balancedTree)" (fn () => sum balancedTree)
23 | 
24 | val _ =
25 |   if result1 = result2 then ()
26 |   else Util.die ("whoops: results differ (bug??)")
27 | 


--------------------------------------------------------------------------------
/trees/test-unbalanced/main.sml:
--------------------------------------------------------------------------------
 1 | val size = CommandLineArgs.parseInt "size" 100000
 2 | 
 3 | val _ = print ("size " ^ Int.toString size ^ "\n")
 4 | 
 5 | fun sumSeq tree = Tree.reduceSeq (fn (a, b) => a+b) 0 tree
 6 | fun sum tree = Tree.reduce (fn (a, b) => a+b) 0 tree
 7 | 
 8 | val unbalancedTree = Tree.makeUnbalanced Int64.fromInt 0 size
 9 | val _ =
10 |   print ("built unbalancedTree: height "
11 |          ^ Int.toString (Tree.height unbalancedTree) ^ "\n")
12 | 
13 | val benchParams = {warmup = 1.0, repeat = 20}
14 | fun run msg f = TutorialBenchmark.run benchParams msg f
15 | 
16 | (* sequential performance *)
17 | val _ = print ("============ sequential ============\n")
18 | val result1 = run "sumSeq(unbalancedTree)" (fn () => sumSeq unbalancedTree)
19 | 
20 | (* parallel performance *)
21 | val _ = print ("============= parallel =============\n")
22 | val result2 = run "sum(unbalancedTree)" (fn () => sum unbalancedTree)
23 | 
24 | val _ =
25 |   if result1 = result2 then ()
26 |   else Util.die ("whoops: results differ (bug??)")
27 | 


--------------------------------------------------------------------------------
/trees/old/fast-par/mapreduce.sml:
--------------------------------------------------------------------------------
 1 | fun pairToString (i, j) = 
 2 |   "(" ^ (Int.toString i) ^ ", " ^ (Int.toString j) ^ ")"
 3 | 
 4 | val _ = print "# Small example:\n"
 5 | val tree = Tree.mkBalanced 10
 6 | val tree2 = Tree.map (fn i => (i, i)) tree
 7 | val max = Tree.reduce (fn ((x, y), (a, b)) => (Int.max (x, a), Int.max (y, b))) 
 8 |                       (0, 0)
 9 |                       tree2
10 | val _ = print ("Balanced tree is: " ^ Tree.toString Int.toString tree ^ "\n")
11 | val _ = print ("Balanced tree2 is: " ^ Tree.toString pairToString tree2 ^ "\n")
12 | val _ = print ("Max is: " ^ pairToString max ^ "\n")
13 | 
14 | val _ = print "# Big example:\n"
15 | val million = 1000000
16 | val n = million
17 | val tree = Tree.mkBalanced n
18 | val tree2 = Tree.map (fn i => (i, i)) tree
19 | val max = Tree.reduce (fn ((x, y), (a, b)) => (Int.max (x, a), Int.max (y, b))) 
20 |                       (0, 0)
21 |                       tree2
22 | val _ = Tree.mkBalanced n
23 | val _ = print ("Max of balanced tree is: " ^ pairToString max ^ "\n")
24 | 


--------------------------------------------------------------------------------
/how-to-par/tuning/main.sml:
--------------------------------------------------------------------------------
 1 | fun parFibWithGrain (g, n) =
 2 |   if n < g then
 3 |     fib n
 4 |   else
 5 |     let
 6 |       val (a, b) =
 7 |         ForkJoin.par (fn () => parFibWithGrain (g, n-1),
 8 |                       fn () => parFibWithGrain (g, n-2))
 9 |     in
10 |       a + b
11 |     end
12 | 
13 | fun timeFibWithGrain g =
14 |   let
15 |     val n = 35
16 | 
17 |     val t0 = Time.now ()
18 |     val result = parFibWithGrain (g, n)
19 |     val t1 = Time.now ()
20 | 
21 |     val elapsed = Time.- (t1, t0)
22 |   in
23 |     print ("grain " ^ Int.toString g ^ ": " ^ Time.toString elapsed ^ "\n")
24 |   end
25 | 
26 | (* run f(i), f(i+1), ..., f(j-1) *)
27 | fun forloop (i, j, f) =
28 |   if i >= j then () else (f i; forloop (i+1, j, f))
29 | 
30 | (** this is the same as
31 |   *   (timeFibWithGrain 5;
32 |   *    timeFibWithGrain 10;
33 |   *    timeFibWithGrain 15;
34 |   *    timeFibWithGrain 20;
35 |   *    timeFibWithGrain 25;
36 |   *    timeFibWithGrain 30)
37 |   *)
38 | val _ = forloop (1, 7, fn i => timeFibWithGrain (5*i))
39 | 


--------------------------------------------------------------------------------
/trees/old/sequential/mapreduce.sml:
--------------------------------------------------------------------------------
 1 | fun pairToString (i, j) = 
 2 |   "(" ^ (Int.toString i) ^ ", " ^ (Int.toString j) ^ ")"
 3 | 
 4 | val _ = print "# Small example:\n"
 5 | val tree = Tree.mkBalanced 10
 6 | val tree2 = Tree.map (fn i => (i, i)) tree
 7 | val max = Tree.reduce (fn ((x, y), (a, b)) => (Int.max (x, a), Int.max (y, b))) 
 8 |                       (0, 0)
 9 |                       tree2
10 | val _ = print ("Balanced tree is: " ^ Tree.toString Int.toString tree ^ "\n")
11 | val _ = print ("Balanced tree2 is: " ^ Tree.toString pairToString tree2 ^ "\n")
12 | val _ = print ("Max is: " ^ pairToString max ^ "\n")
13 | 
14 | val _ = print "# Big example:\n"
15 | val million = 1000000
16 | val n =  million
17 | val tree = Tree.mkBalanced n
18 | val tree2 = Tree.map (fn i => (i, i)) tree
19 | val max = Tree.reduce (fn ((x, y), (a, b)) => (Int.max (x, a), Int.max (y, b))) 
20 |                       (0, 0)
21 |                       tree2
22 | val _ = Tree.mkBalanced n
23 | val _ = print ("Max of balanced tree is: " ^ pairToString max ^ "\n")
24 | 


--------------------------------------------------------------------------------
/trees/old/bad-par/mapreduce.sml:
--------------------------------------------------------------------------------
 1 | fun pairToString (i, j) = 
 2 |   "(" ^ (Int.toString i) ^ ", " ^ (Int.toString j) ^ ")"
 3 | 
 4 | val _ = print "# Small example:\n"
 5 | val tree = Tree.mkBalanced 10
 6 | val tree2 = Tree.map (fn i => (i, i)) tree
 7 | val max = Tree.reduce (fn ((x, y), (a, b)) => (Int.max (x, a), Int.max (y, b))) 
 8 |                       (fn x => x)
 9 |                       tree2
10 | val _ = print ("Balanced tree is: " ^ Tree.toString Int.toString tree ^ "\n")
11 | val _ = print ("Balanced tree2 is: " ^ Tree.toString pairToString tree2 ^ "\n")
12 | val _ = print ("Max is: " ^ pairToString max ^ "\n")
13 | 
14 | val _ = print "# Big example:\n"
15 | val million = 1000000
16 | val n = million
17 | val tree = Tree.mkBalanced n
18 | val tree2 = Tree.map (fn i => (i, i)) tree
19 | val max = Tree.reduce (fn ((x, y), (a, b)) => (Int.max (x, a), Int.max (y, b))) 
20 |                       (fn x => x)
21 |                       tree2
22 | val _ = Tree.mkBalanced n
23 | val _ = print ("Max of balanced tree is: " ^ pairToString max ^ "\n")
24 | 


--------------------------------------------------------------------------------
/bfs/seq-bfs.sml:
--------------------------------------------------------------------------------
 1 | structure SequentialBFS =
 2 | struct
 3 | 
 4 |   structure G = AdjacencyGraph(Int)
 5 | 
 6 |   fun bfs g s =
 7 |     let
 8 |       fun neighbors v = G.neighbors g v
 9 |       fun degree v = G.degree g v
10 | 
11 |       val n = G.numVertices g
12 |       val m = G.numEdges g
13 | 
14 |       val queue = ForkJoin.alloc (m+1)
15 |       val parents = Array.array (n, ~1)
16 | 
17 |       fun search (lo, hi) =
18 |         if lo >= hi then lo else
19 |         let
20 |           val v = Array.sub (queue, lo)
21 |           fun visit (hi', u) =
22 |             if Array.sub (parents, u) >= 0 then hi'
23 |             else ( Array.update (parents, u, v)
24 |                  ; Array.update (queue, hi', u)
25 |                  ; hi'+1
26 |                  )
27 |         in
28 |           search (lo+1, Seq.iterate visit hi (neighbors v))
29 |         end
30 | 
31 |       val _ = Array.update (parents, s, s)
32 |       val _ = Array.update (queue, 0, s)
33 |       val numVisited = search (0, 1)
34 |     in
35 |       ArraySlice.full parents
36 |     end
37 | 
38 | end
39 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020-2022 MPL developers
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/trees/test-scan/main.sml:
--------------------------------------------------------------------------------
 1 | val size = CommandLineArgs.parseInt "size" 100000
 2 | 
 3 | val _ = print ("size " ^ Int.toString size ^ "\n")
 4 | 
 5 | fun scanSeq tree = Tree.scanSeq (fn (a, b) => a+b) 0 tree
 6 | fun scan tree = Tree.scan (fn (a, b) => a+b) 0 tree
 7 | 
 8 | val input = Tree.makeBalanced Int64.fromInt 1 (size+1)
 9 | val _ =
10 |   print ("built input: height "
11 |          ^ Int.toString (Tree.height input) ^ "\n")
12 | 
13 | val benchParams = {warmup = 5.0, repeat = 20}
14 | fun run msg f = TutorialBenchmark.run benchParams msg f
15 | 
16 | (* sequential performance *)
17 | val _ = print ("============ sequential ============\n")
18 | val (sums1, total1) = run "scanSeq" (fn () => scanSeq input)
19 | 
20 | val _ =
21 |   if size > 20 then ()
22 |   else print ("result " ^ Tree.toString Int64.toString sums1 ^ "\n")
23 | 
24 | (* parallel performance *)
25 | val _ = print ("============= parallel =============\n")
26 | val (sums2, total2) = run "scan" (fn () => scan input)
27 | 
28 | val _ =
29 |   if size > 20 then ()
30 |   else print ("result " ^ Tree.toString Int64.toString sums2 ^ "\n")
31 | 
32 | val _ =
33 |   if total1 = total2 then ()
34 |   else Util.die ("whoops: results differ (bug??)")
35 | 


--------------------------------------------------------------------------------
/bfs/par-cas-bfs.sml:
--------------------------------------------------------------------------------
 1 | (* bfs.sml
 2 |  * A parallel implementation of BFS based on sequence primitives
 3 |  * This implementation does not use concurrency primitives such as cas
 4 |  * It relies on sequence inject instead 
 5 |  *)
 6 | structure ParCASBFS =
 7 | struct
 8 |   exception InternalError
 9 | 
10 |   structure G = AdjacencyGraph(Int)
11 |   structure S = ArraySequence 
12 | 
13 |   fun bfs g s: int S.seq =
14 |     let
15 |       fun neighbors v = G.neighbors g v
16 |       fun degree v = G.degree g v
17 | 
18 |       val n = G.numVertices g
19 |       val m = G.numEdges g
20 | 
21 |       fun search (visited, frontier: int S.seq) =
22 |         if Seq.length(frontier) = 0 then
23 |           visited
24 |         else
25 |           let 
26 | 
27 |             fun claim (u, v) = 
28 |                 Array.sub (visited, u) = ~1
29 |                 andalso
30 |                 ~1 = Concurrency.casArray (visited, u) (~1, v) 
31 | 
32 |             fun visit(v) = 
33 |               S.filterSafe (fn u => claim (u, v)) (neighbors v)
34 | 
35 |             val frontier' = S.flatten (S.map visit frontier)
36 |           in
37 |             search (visited, frontier')
38 |           end 
39 |            
40 | 
41 |       val visited: int Array.array = Array.tabulate (n, fn i => if i = s then s else ~1)
42 |       val frontier = S.singleton s
43 |       val visited' = search (visited, frontier)
44 |     in
45 |       S.fromArray visited'
46 |     end
47 | 
48 | end
49 | 


--------------------------------------------------------------------------------
/bfs/par-bfs.sml:
--------------------------------------------------------------------------------
 1 | (* bfs.sml
 2 |  * A parallel implementation of BFS based on sequence primitives
 3 |  * This implementation does not use concurrency primitives such as cas
 4 |  * It relies on sequence inject instead 
 5 |  *)
 6 | structure ParBFS =
 7 | struct
 8 |   exception InternalError
 9 | 
10 |   structure G = AdjacencyGraph(Int)
11 |   structure S = ArraySequence 
12 | 
13 |   fun bfs g s: int S.seq =
14 |     let
15 |       fun neighbors v = G.neighbors g v
16 |       fun degree v = G.degree g v
17 | 
18 |       val n = G.numVertices g
19 |       val m = G.numEdges g
20 | 
21 |       fun search (visited: int S.seq, frontier: int S.seq) =
22 |         if Seq.length(frontier) = 0 then
23 |           visited
24 |         else
25 |           let 
26 |             fun f(v) = 
27 |               S.filtermap 
28 |                 (fn u => S.nth visited u = ~1) 
29 |                 (fn u => (u, v)) 
30 |                 (neighbors v)
31 | 
32 |             val edges = S.flatten (S.map f frontier)
33 |             val visited' = S.inject (visited, edges) 
34 |             val frontier' = S.filtermap
35 |                             (fn (u, v) => S.nth visited' u = v) 
36 |                             (fn (u, v) => u)
37 |                             edges
38 |           in
39 |             search (visited', frontier')
40 |           end 
41 | 
42 |       val visited = S.tabulate (fn i => if i = s then s else ~1) n
43 |       val frontier = S.singleton s
44 |       val visited' = search (visited, frontier)
45 |     in
46 |       visited'
47 |     end
48 | 
49 | end
50 | 


--------------------------------------------------------------------------------
/trees/old/fast-par/examples.sml:
--------------------------------------------------------------------------------
 1 | val _ = print "# Small example:\n"
 2 | val tree = Tree.mkBalanced 10
 3 | val SOME tree_f = Tree.filter (fn x => Int.mod(x, 2) = 0) tree
 4 | val (prefixSums, _) = Tree.scan 0 (fn (x, y) => x + y) tree 
 5 | val iprefixSums = Tree.iscan 0 (fn (x, y) => x + y) tree 
 6 | val _ = print ("Balanced tree is: " ^ Tree.toString Int.toString tree ^ "\n")
 7 | val _ = print ("Balanced tree (filtered) is: " ^ Tree.toString Int.toString tree_f ^ "\n")
 8 | val _ = print ("Balanced tree (scan) is: " ^ Tree.toString Int.toString prefixSums ^ "\n")
 9 | val _ = print ("Balanced tree (iscan) is: " ^ Tree.toString Int.toString iprefixSums ^ "\n")
10 | val _ = print ("Height of balanced tree is: " ^ Int.toString (Tree.height tree) ^ "\n")
11 | val _ = print ("Height of filtered balanced tree is: " ^ Int.toString (Tree.height tree_f) ^ "\n")
12 | 
13 | 
14 | val tree = Tree.mkUnbalanced 10
15 | val SOME tree_f = Tree.filter (fn x => Int.mod(x, 2) = 0) tree
16 | val (prefixSums, _) = Tree.scan 0 (fn (x, y) => x + y) tree 
17 | val iprefixSums = Tree.iscan 0 (fn (x, y) => x + y) tree 
18 | val _ = print ("Unbalanced tree is: " ^ Tree.toString Int.toString tree ^ "\n")
19 | val _ = print ("Unbalanced tree (filtered) is: " ^ Tree.toString Int.toString tree_f ^ "\n")
20 | val _ = print ("Unbalanced tree (scan) is: " ^ Tree.toString Int.toString prefixSums ^ "\n")
21 | val _ = print ("Unbalanced tree (iscan) is: " ^ Tree.toString Int.toString iprefixSums ^ "\n")
22 | val _ = print ("Height of unbalanced tree is: " ^ Int.toString (Tree.height tree) ^ "\n")
23 | val _ = print ("Height of filtered unbalanced tree is: " ^ Int.toString (Tree.height tree_f) ^ "\n")
24 | 
25 | 


--------------------------------------------------------------------------------
/trees/old/bad-par/examples.sml:
--------------------------------------------------------------------------------
 1 | val _ = print "# Small example:\n"
 2 | val tree = Tree.mkBalanced 10
 3 | val SOME tree_f = Tree.filter (fn x => Int.mod(x, 2) = 1) tree
 4 | (* val (prefixSums, _) = Tree.scan 0 (fn (x, y) => x + y) tree  *)
 5 | val iprefixSums = Tree.iscan 0 (fn (x, y) => x + y) tree  
 6 | val _ = print ("Balanced tree is: " ^ Tree.toString Int.toString tree ^ "\n")
 7 | val _ = print ("Balanced tree (filtered) is: " ^ Tree.toString Int.toString tree_f ^ "\n")
 8 | (* val _ = print ("Balanced tree (scan) is: " ^ Tree.toString Int.toString prefixSums ^ "\n") *)
 9 | val _ = print ("Balanced tree (iscan) is: " ^ Tree.toString Int.toString iprefixSums ^ "\n")
10 | val _ = print ("Height of balanced tree is: " ^ Int.toString (Tree.height tree) ^ "\n")
11 | val _ = print ("Height of filtered balanced tree is: " ^ Int.toString (Tree.height tree_f) ^ "\n")
12 | 
13 | 
14 | val tree = Tree.mkUnbalanced 10
15 | val SOME tree_f = Tree.filter (fn x => Int.mod(x, 2) = 1) tree
16 | (* val (prefixSums, _) = Tree.scan 0 (fn (x, y) => x + y) tree  *)
17 | val iprefixSums = Tree.iscan 0 (fn (x, y) => x + y) tree 
18 | val _ = print ("Unbalanced tree is: " ^ Tree.toString Int.toString tree ^ "\n")
19 | val _ = print ("Unbalanced tree (filtered) is: " ^ Tree.toString Int.toString tree_f ^ "\n")
20 | (* val _ = print ("Unbalanced tree (scan) is: " ^ Tree.toString Int.toString prefixSums ^ "\n") *)
21 | val _ = print ("Unbalanced tree (iscan) is: " ^ Tree.toString Int.toString iprefixSums ^ "\n") 
22 | val _ = print ("Height of unbalanced tree is: " ^ Int.toString (Tree.height tree) ^ "\n")
23 | val _ = print ("Height of filtered unbalanced tree is: " ^ Int.toString (Tree.height tree_f) ^ "\n")
24 | 
25 | 


--------------------------------------------------------------------------------
/bfs/offset-search.sml:
--------------------------------------------------------------------------------
 1 | structure OffsetSearch :>
 2 | sig
 3 |   (* `search x xs (lo, hi)` searches the sorted array `xs` between indices `lo`
 4 |    * and `hi`, returning `(i, j)` where `i-lo` is the number of elements that
 5 |    * are strictly less than `x`, and `j-i` is the number of elements which are
 6 |    * equal to `x`. *)
 7 |   val search : int -> int array -> int * int -> int * int
 8 | end =
 9 | struct
10 | 
11 |   val sub = Array.sub
12 |   val upd = Array.update
13 | 
14 |   fun lowSearch x xs (lo, hi) =
15 |     case hi - lo of
16 |       0 => lo
17 |     | n => let val mid = lo + n div 2
18 |            in case Int.compare (x, sub (xs, mid)) of
19 |                  LESS => lowSearch x xs (lo, mid)
20 |                | GREATER => lowSearch x xs (mid + 1, hi)
21 |                | EQUAL => lowSearchEq x xs (lo, mid)
22 |            end
23 | 
24 |   and lowSearchEq x xs (lo, mid) =
25 |     if (mid = 0) orelse (x > sub (xs, mid-1))
26 |     then mid
27 |     else lowSearch x xs (lo, mid)
28 | 
29 |   and highSearch x xs (lo, hi) =
30 |     case hi - lo of
31 |       0 => lo
32 |     | n => let val mid = lo + n div 2
33 |            in case Int.compare (x, sub (xs, mid)) of
34 |                  LESS => highSearch x xs (lo, mid)
35 |                | GREATER => highSearch x xs (mid + 1, hi)
36 |                | EQUAL => highSearchEq x xs (mid, hi)
37 |            end
38 | 
39 |   and highSearchEq x xs (mid, hi) =
40 |     if (mid = Array.length xs - 1) orelse (x < sub (xs, mid + 1))
41 |     then mid + 1
42 |     else highSearch x xs (mid + 1, hi)
43 | 
44 |   and search (x : int) (xs : int array) (lo, hi) : int * int =
45 |     case hi - lo of
46 |       0 => (lo, lo)
47 |     | n => let val mid = lo + n div 2
48 |            in case Int.compare (x, sub (xs, mid)) of
49 |                 LESS    => search x xs (lo, mid)
50 |               | GREATER => search x xs (mid + 1, hi)
51 |               | EQUAL   => (lowSearchEq x xs (lo, mid), highSearchEq x xs (mid, hi))
52 |            end
53 | 
54 | end
55 | 


--------------------------------------------------------------------------------
/trees/old/sequential/Tree.sml:
--------------------------------------------------------------------------------
 1 | structure Tree = 
 2 | struct
 3 | 
 4 | (* A binary ("apple") tree of type 'a elements ("apples")" *) 
 5 | datatype 'a t = Leaf of 'a | Node of 'a t * 'a t
 6 |   
 7 | (* Define "size of a tree" as the number of internal nodes *)
 8 | 
 9 | (* Create a balanced integer tree of the given size n *)
10 | fun mkBalanced n = 
11 |   let
12 |     fun mk i n = 
13 |       if n = 0 then
14 |         Leaf i 
15 |       else    
16 |         let 
17 |           val nn = n - 1 
18 |           val m = Int.div (nn, 2) 
19 |           val left = mk i m
20 |           val right = mk (i + m + 1) (nn - m) 
21 |         in
22 |           Node (left, right) 
23 |         end
24 |   in
25 |     mk 0 n
26 |   end
27 | 
28 | (* Create an un balanced tree of the given size n *)
29 | fun mkUnbalanced n = 
30 |   let
31 |     fun mk i n = 
32 |       if n = 0 then
33 |         Leaf i
34 |       else    
35 |         let 
36 |           val left = mk i (n - 1) 
37 |         in
38 |           Node (left, Leaf n) 
39 |         end
40 |   in
41 |     mk 0 n
42 |   end
43 | 
44 | (* Take eToString which makes a string out of an element return a string rep of the tree. *)
45 | fun toString eToString t = 
46 |   case t of 
47 |     Leaf x => eToString x
48 |   | Node (l, r) => 
49 |     let val ls = toString eToString l 
50 |         val rs = toString eToString r 
51 |     in
52 |       ls ^ " " ^ rs
53 |     end
54 | 
55 | fun height t =
56 |   case t of 
57 |     Leaf _ => 0
58 |   | Node (l, r) =>
59 |     let val (hl, hr) = (height l, height r) in
60 |       if hl > hr then 1 + hl else 1 + hr
61 |     end 
62 | 
63 | (* Map f over tree t *)
64 | fun map f t =
65 |   case t of 
66 |     Leaf x => Leaf (f x)
67 |   | Node (l, r) =>
68 |     let val (ll, rr) = (map f l, map f r) in
69 |       Node (ll, rr)
70 |     end 
71 | 
72 | (* Reduce tree t with f identity id *)
73 | fun reduce f id t =
74 |   case t of 
75 |     Leaf x => f (id, x)
76 |   | Node (l, r) =>
77 |     let val (ls, rs) = (reduce f id l, reduce f id r) in
78 |       f (ls, rs)
79 |     end 
80 | 
81 | end
82 | 


--------------------------------------------------------------------------------
/sequences/bad-par/main.sml:
--------------------------------------------------------------------------------
 1 | (* Usage: examples -n <sequence length> *)
 2 | 
 3 | structure S = ArraySequence
 4 | structure CLA = CommandLineArgs
 5 | 
 6 | val defaultInput = 9
 7 | val n = CLA.parseInt "n" defaultInput
 8 | 
 9 | 
10 | fun printWork s nested updates evens max sUpdated t scanMax it tTree scanMaxTree itTree flat = 
11 |   if n > defaultInput then
12 |     ()
13 |   else
14 |     let 
15 |       fun pairToString (i,j) =
16 |         "(" ^ Int.toString i ^ ", " ^ Int.toString j ^ ")"
17 |       val ss = S.toString Int.toString s
18 |       val sevens = S.toString Int.toString evens
19 |       val supdates = S.toString pairToString updates
20 |       val ssUpdated = S.toString Int.toString sUpdated
21 |       val ts = S.toString Int.toString t
22 |       val its = S.toString Int.toString it			
23 |       val tsTree = S.toString Int.toString tTree
24 |       val itsTree = S.toString Int.toString itTree
25 |       val snested = S.toString (S.toString Int.toString) nested
26 |       val sflat = S.toString Int.toString flat
27 | 
28 |       val _ = print ("Sequence is: " ^ ss ^ "\n")
29 |       val _ = print ("Filtered   : " ^ sevens ^ "\n")
30 |       val _ = print ("Max        : " ^ Int.toString max ^ "\n")
31 |       val _ = print ("Scan       : " ^ ts ^ " and scanMax = " ^ Int.toString scanMax ^ "\n")
32 |       val _ = print ("IScan      : " ^ its ^ "\n")
33 |       val _ = print ("ScanTree   : " ^ tsTree ^ " and scanMaxTree = " ^ Int.toString scanMaxTree ^ "\n")
34 |       val _ = print ("IScanTree  : " ^ itsTree ^ "\n")			
35 |       val _ = print ("Updates   : " ^ supdates ^ "\n")
36 |       val _ = print ("Updated   : " ^ ssUpdated ^ "\n")
37 |       val _ = print ("Nested sequence is: " ^ snested ^ "\n")
38 |       val _ = print ("Flat sequence is  : " ^ sflat ^ "\n")
39 |     in
40 |       ()
41 |     end
42 | 
43 | val _ = print ("# Begin: Array Sequences, n =" ^ Int.toString n ^ "\n")
44 | val s = S.tabulate (fn i => i) n
45 | val nested = S.tabulate (fn i => s) (Int.min (1 + Int.div (n, 1000), 10))
46 | val flat = S.flatten nested
47 | 
48 | val updates = S.tabulate (fn i => (2*Int.div(i, 2), 10*i)) (Int.div (n, 2))
49 | val evens = S.filter (fn i => Int.mod(i,2)=0) s
50 | val max = S.reduce Int.max ~1 s
51 | 
52 | val sUpdated = S.inject s updates
53 | val (t, scanMax) = S.scan Int.max 0 s
54 | val it = S.iscan Int.max 0 s
55 | 
56 | val (tTree, scanMaxTree) = S.scanTree Int.max 0 s
57 | val itTree = S.iscanTree Int.max 0 s
58 | 
59 | 
60 | 
61 | val () = printWork s  nested updates evens max sUpdated t scanMax it tTree scanMaxTree itTree flat 
62 | val _ = print ("# End: Array sequences\n")
63 | 


--------------------------------------------------------------------------------
/msort/msort.sml:
--------------------------------------------------------------------------------
 1 | (* Usage:
 2 |  * msort [-n <input size> [--check] [--seq | --par] 
 3 |  *)
 4 | 
 5 | structure S = ArraySequence
 6 | structure CLA = CommandLineArgs
 7 | 
 8 | val par = ForkJoin.par
 9 | 
10 | val usage = "Please supply one of '--seq', '--par'"
11 | val doCheck = CLA.parseFlag "check"
12 | val doSeq = CLA.parseFlag "seq"
13 | val doPar = CLA.parseFlag "par"
14 | 
15 | val defaultInput = 9
16 | val n = CLA.parseInt "n" defaultInput
17 | 
18 | 
19 | fun intSeqToString s =
20 | 	let
21 | 		val prefix = S.take s (Int.min (10, S.length s))
22 |   in											
23 |     (S.toString Int.toString prefix) ^ "..."
24 |   end
25 | 		
26 | fun intSeqCmp (s, t) =
27 | 	let
28 | 	  fun zip i = (S.nth s i, S.nth t i)
29 |     val zipped = S.tabulate zip (S.length s)
30 |     val eqs = S.map (fn (i, j) => i = j) zipped								 
31 |   in								
32 |     S.reduce (fn (x, y) => x andalso y) true eqs 
33 | 	end
34 | 	
35 | 
36 | fun msortSeq (cmp: 'a * 'a -> order) (a: 'a Seq.t): 'a Seq.t = 
37 |   (* Sequential merge sort *)
38 |   let
39 |   in
40 |     (a)
41 |   end 
42 | 
43 | fun msort (cmp: 'a * 'a -> order) (a: 'a Seq.t): 'a Seq.t = 
44 |   (* Parallel merge sort *)	
45 |   let 
46 |   in
47 | 		a
48 |   end
49 | 
50 | 
51 | val _ = print ("# Begin: msort n =" ^ Int.toString n ^ "\n")
52 | val s = S.tabulate (fn i => Int.mod (Util.hash i, 9999)) n 
53 | val _ = print ("# Merge sorting\n")
54 | val _ = if n <= defaultInput then
55 |           print ("Input = " ^ intSeqToString s ^ "\n")  
56 |         else 
57 |           ()
58 | val result = 
59 |   if doSeq then
60 |     let 
61 |       val _ = print ("# sorting sequentially\n")
62 |       val result = Benchmark.run "running msort" (fn _ => msortSeq Int.compare s)
63 |       val _ = print ("# sequential sort = " ^ intSeqToString result ^ "\n")
64 |     in
65 |       result
66 |     end
67 |   else if doPar then
68 |     let 
69 |       val _ = print ("# Sorting in parallel\n")
70 |       val result = Benchmark.run "running msort" (fn _ => msort Int.compare s)
71 |       val _ = print ("# parallel msort = " ^ intSeqToString result ^ "\n")
72 |     in
73 |       result
74 |     end
75 |   else 
76 |     Util.die usage
77 | 
78 | val _ = 
79 |   if doCheck then
80 |     let
81 |       val resultSeq = msortSeq Int.compare s
82 |     in
83 |       if intSeqCmp(result, resultSeq) then
84 |         print ("Correct? YES\n")
85 |       else
86 |         (print ("Correct? NO! Got: " ^ intSeqToString result ^ " expected: " ^ intSeqToString resultSeq ^ "\n");
87 |          print ("Input: " ^ intSeqToString s  ^ "\n"))
88 |     end
89 |   else 
90 |     ()
91 |  
92 | val _ = print ("# End: msort\n")
93 | 


--------------------------------------------------------------------------------
/sequences/fast-par/mcss.sml:
--------------------------------------------------------------------------------
 1 | (* Usage: examples -n <sequence length> *)
 2 | 
 3 | structure S = ArraySequence
 4 | structure CLA = CommandLineArgs
 5 | 
 6 | val doCheck = CLA.parseFlag "check"
 7 | val doSerial = CLA.parseFlag "seq"
 8 | val doParallel = CLA.parseFlag "par"
 9 | 
10 | val defaultInput = 9
11 | val n = CLA.parseInt "n" defaultInput
12 | 
13 | fun iteratePrefixes f b s =
14 |   let 
15 |     fun g ((l, b), a) = (b::l, f(b, a))
16 |     val (l, r) = S.iterate g ([], b) s
17 |   in 
18 |     (S.fromList (List.rev l), r)
19 |   end
20 | 
21 | fun iteratePrefixesUnordered f b s =
22 |   let 
23 |     fun g ((l, b), a) = (b::l, f(b, a))
24 |     val (l, r) = S.iterate g ([], b) s
25 |   in 
26 |     (S.fromList l, r)
27 |   end
28 | 
29 | 
30 | fun mcssSeq a = 
31 |   (* Kanade's linear-time sequential algorithm *)
32 |   let
33 |     fun f(sum, x) = 
34 |         if sum + x >= x then
35 |           sum + x
36 |         else
37 |           x
38 |     val (b, total) = iteratePrefixesUnordered f 0 a
39 |     val m = S.reduce Int.max (S.nth b 0) b
40 |   in
41 |     Int.max (m, total)
42 |   end 
43 | 
44 | fun mcss a = 
45 |   let 
46 |     val b = S.scanWithTotal Int.+ 0 a
47 |     (* use first element of the sequence for min identity *)
48 |     val (c, _) = S.scan Int.min (S.nth b 0) b    
49 |     val d = S.tabulate 
50 |               (fn i => (S.nth b (i+1)) - (S.nth c (i+1)))
51 |               (S.length a)
52 |     (*
53 |     val _ = print ("b = " ^ S.toString Int.toString b ^ "\n")
54 |     val _ = print ("c = " ^ S.toString Int.toString c ^ "\n")
55 |     val _ = print ("d = " ^ S.toString Int.toString d ^ "\n")
56 |     *)
57 |    in
58 |      (* use first element of the sequence for max identity *)
59 |      S.reduce Int.max (S.nth d 0) d
60 |    end
61 | 
62 | val _ = print ("# Begin: MCSS n =" ^ Int.toString n ^ "\n")
63 | val m = Int.max (n div 100, 10)
64 | val s = S.tabulate (fn i => (Util.hash i) mod m - (Util.hash (2*n+i+1))  mod m) n
65 | val _ = print ("# Calculating mcss\n")
66 | val _ = if n <= defaultInput then
67 |           print ("Input = " ^ S.toString Int.toString s ^ "\n")  
68 |         else 
69 |           ()
70 | val _ = 
71 |   if doSerial then
72 |     let 
73 |       val _ = print ("# Calculating sequentially\n")
74 |       val rseq = mcssSeq s
75 |     in
76 |       print ("# sequential mcss = " ^ Int.toString rseq ^ "\n")
77 |     end
78 |   else if doParallel then
79 |     let 
80 |       val _ = print ("# Calculating in parallel\n")
81 |       val rpar = mcss s 
82 |     in
83 |       print ("# parallel mcss = " ^ Int.toString rpar ^ "\n")
84 |     end
85 |   else 
86 |     print ("Please supply one of '--seq' ar '--par' flag\n")
87 |  
88 | val _ = print ("# End: MCSS\n")
89 | 


--------------------------------------------------------------------------------
/local/TutorialBenchmark.sml:
--------------------------------------------------------------------------------
 1 | structure TutorialBenchmark =
 2 | struct
 3 | 
 4 |   fun getTimes msg n f =
 5 |     let
 6 |       fun loop tms n lastDot =
 7 |         let
 8 |           val (result, tm) = Util.getTime f
 9 |           val t = Time.now ()
10 |         in
11 |           (* print (msg ^ " " ^ Time.fmt 4 tm ^ "s\n"); *)
12 | 
13 |           if n <= 1 then
14 |             (result, List.rev (tm :: tms))
15 |           else if Time.toReal (Time.- (t, lastDot)) >= 0.25 then
16 |             (print "."; loop (tm :: tms) (n-1) t)
17 |           else
18 |             loop (tm :: tms) (n-1) lastDot
19 |         end
20 | 
21 |       val result = loop [] n (Time.now ())
22 |     in
23 |       print "\n";
24 |       result
25 |     end
26 | 
27 |   fun run {warmup, repeat=rep} msg f =
28 |     let
29 |       val warmup = Time.fromReal warmup
30 |       val _ =
31 |         if rep >= 1 then ()
32 |         else Util.die "Benchmark.run: repeat must be at least 1"
33 | 
34 |       (* val _ = print (msg ^ "\n") *)
35 | 
36 |       fun warmupLoop startTime lastDot =
37 |         if Time.>= (Time.- (Time.now (), startTime), warmup) then
38 |           () (* warmup done! *)
39 |         else
40 |           let
41 |             val (_, tm) = Util.getTime f
42 |             val t = Time.now ()
43 |           in
44 |             if Time.toReal (Time.- (t, lastDot)) >= 0.25 then
45 |               (print "."; warmupLoop startTime t)
46 |             else
47 |               warmupLoop startTime lastDot
48 |             (* print ("warmup_run " ^ Time.fmt 4 tm ^ "s\n"); *)
49 |           end
50 | 
51 |       val _ =
52 |         if Time.<= (warmup, Time.zeroTime) then ()
53 |         else ( print ("warmup...")
54 |              ; warmupLoop (Time.now ()) (Time.now ())
55 |              ; print (" ")
56 |              )
57 | 
58 |       val _ = print ("timing...")
59 | 
60 |       val t0 = Time.now ()
61 |       val (result, tms) = getTimes "time" rep f
62 |       val t1 = Time.now ()
63 | 
64 |       val endToEnd = Time.- (t1, t0)
65 | 
66 |       val total = List.foldl Time.+ Time.zeroTime tms
67 |       val avg = Time.toReal total / (Real.fromInt rep)
68 |     in
69 |       (* print "\n"; *)
70 |       print (msg ^ " time: " ^ Real.fmt (StringCvt.FIX (SOME 4)) avg ^ "s\n");
71 |       (* print ("total   " ^ Time.fmt 4 total ^ "s\n");
72 |       print ("end-to-end " ^ Time.fmt 4 endToEnd ^ "s\n"); *)
73 |       result
74 |     end
75 | 
76 |   fun runWithArgsFromCommandLine msg f =
77 |     let
78 |       val warmup = CommandLineArgs.parseReal "warmup" 0.0
79 |       val repeat = CommandLineArgs.parseInt "repeat" 1
80 | 
81 |       val _ = print ("warmup " ^ Time.fmt 4 (Time.fromReal warmup) ^ "\n")
82 |       val _ = print ("repeat " ^ Int.toString repeat ^ "\n")
83 |     in
84 |       run {warmup = warmup, repeat = repeat} msg f
85 |     end
86 | 
87 | end
88 | 


--------------------------------------------------------------------------------
/sequences/bad-par/README.md:
--------------------------------------------------------------------------------
 1 | # Sequences
 2 | 
 3 | A sequence 
 4 | `<a_0, a_2, ..., a_n>`
 5 | is an ordered collection of elements that support several operations, inclduing fast random access.
 6 | 
 7 | # Sequence Interface
 8 | 
 9 | Sequences support the following operations
10 | 
11 | * `length` returns the length of the sequence
12 | * `nth` returns the element at the specified position (counting from 0)
13 | * `empty` returns an empty sequence
14 | * `singleton` takes an element and returns a sequence that contains that element (only)
15 | * `tabulate` takes 1) a generator function that takes a position and generates the element at that position 2) a length and returns a sequence of the given length where the element at a given position is computed by appling the generator function (at that position)
16 | * `map` takes a function from elements to (possibly new type of) elements and creates a new sequence by appling the function to each element
17 | * `subseq` takes a sequence and an interval and returns a subsequence that contains the elements in the given interval 
18 | * `append` takes two sequences and appends them
19 | * `filter` takes a boolean function and a sequence and returns a new sequencue consisting of elements that satisfy the function
20 | * `flatten` takes a sequence of sequences and flattens it into a single, flat sequencues by appending the nested sequencues,
21 | * `update` takes an input sequence and a position value pair and returns a new sequence that is identical to the input sequence except at the given position, which contains the specified value
22 | * `inject` takes an input sequence and a sequencue of updates consisting of position-value pairs and returns a new sequence that is idential to the input sequence except and specified updates.  For each updated position, the output sequence contains (an arbitrary) one of the updated values.   
23 | * `isEmpty` returns `true` if the input sequence is empty and `false` otherwise
24 | * `isSingleton` returns `true` if the input sequence is a singleton and `false` otherwise
25 | * `iterate` takes 1) an iterator function, 2) an initial value, 3) and a sequence and iteratively applies the iterator function to the elements of the sequence and previously computed value (or the initial value) and returns the final computed value
26 | * `reduce` takes a 1) associative reducer function that maps to elements to another element, 2) the identity value of the reducer function, and 3) and a sequence and returs the reduced value for the sequence   
27 | * `scan` takes a 1) associative reducer function that maps to elements to another element, 2) the identity value of the reducer function, and 3) and a sequence and returns 1) the reduced value for each prefix of the sequence (starting with the emtpy sequence, for which the value is identity), and 2) the reduced value for the whole sequence    
28 | 
29 | 


--------------------------------------------------------------------------------
/bfs/bfs.sml:
--------------------------------------------------------------------------------
  1 | (* Usage:
  2 |  * bfs <input file> [-source <vertex number>] [--check] [--seq | --par | --parcas] 
  3 |  * Example: bfs inputs/rmat-1K-symm -source 0 --check --parcas
  4 |  *)
  5 | 
  6 | structure CLA = CommandLineArgs
  7 | structure G = AdjacencyGraph(Int)
  8 | 
  9 | structure SBFS = SequentialBFS
 10 | structure PBFS = ParBFS
 11 | structure PCASBFS = ParCASBFS
 12 | structure DBFS = DoptBFS
 13 | 
 14 | val usage = "bfs <filename> <mode: seq | dopt | par | parcas> [--source <number>] [--check]"
 15 | val source = CLA.parseInt "source" 0
 16 | val doCheck = CLA.parseFlag "check"
 17 | 
 18 | 
 19 | val (filename, mode) =
 20 |   case CLA.positional () of
 21 |     [x,y] => (x, y)
 22 |   | _ => Util.die usage
 23 | 
 24 | 
 25 | fun readGraph(filename) = 
 26 |   let 
 27 |     val (graph, tm) = Util.getTime (fn _ => G.parseFile filename)
 28 |     val _ = print ("num vertices: " ^ Int.toString (G.numVertices graph) ^ "\n")
 29 |     val _ = print ("num edges: " ^ Int.toString (G.numEdges graph) ^ "\n")
 30 |     val _ = print ("source: " ^ Int.toString source ^ "\n")
 31 |     val _ = print ("check for correctness: " ^ (if doCheck then "yes" else "no") ^ "\n")
 32 |     val (_, tm) = Util.getTime (fn _ =>
 33 |       if G.parityCheck graph then ()
 34 |       else TextIO.output (TextIO.stdErr,
 35 |         "WARNING: parity check failed; graph might not be symmetric " ^
 36 |         "or might have duplicate- or self-edges\n"))
 37 |     val _ = print ("parity check in " ^ Time.fmt 4 tm ^ "s\n")
 38 |   in
 39 |     graph
 40 |   end
 41 | 
 42 | fun numHops P hops v =
 43 |   if hops > Seq.length P then ~2
 44 |   else if Seq.nth P v = ~1 then ~1
 45 |   else if Seq.nth P v = v then hops
 46 |   else numHops P (hops+1) (Seq.nth P v)
 47 | 
 48 | 
 49 | fun check graph source P =
 50 |   let
 51 |     val (P', sequentialTime) =
 52 |       Util.getTime (fn _ => SBFS.bfs graph source)
 53 | 
 54 |     val correct =
 55 |       Seq.length P = Seq.length P'
 56 |       andalso
 57 |       SeqBasis.reduce 10000 
 58 |         (fn (a, b) => a andalso b) 
 59 |         true 
 60 |         (0, Seq.length P)
 61 |         (fn i => numHops P 0 i = numHops P' 0 i)
 62 |   in
 63 |     print ("sequential finished in " ^ Time.fmt 4 sequentialTime ^ "s\n");
 64 |     print ("correct? " ^ (if correct then "yes" else "no") ^ "\n")
 65 |   end
 66 | 
 67 | 
 68 | fun runBFS graph source bfs =  
 69 |   let 
 70 |     fun numHops P hops v =
 71 |       if hops > Seq.length P then ~2
 72 |       else if Seq.nth P v = ~1 then ~1
 73 |       else if Seq.nth P v = v then hops
 74 |       else numHops P (hops+1) (Seq.nth P v)
 75 | 
 76 |     val P = Benchmark.run "running bfs" (fn _ => bfs graph source)
 77 | 
 78 |     val numVisitedSeq =
 79 |       SeqBasis.reduce 10000 op+ 0 (0, Seq.length P)
 80 |         (fn i => if Seq.nth P i >= 0 then 1 else 0)
 81 |     val _ = print ("visited " ^ Int.toString numVisitedSeq ^ "\n")
 82 | 
 83 |     val maxHops =
 84 |       SeqBasis.reduce 100 Int.max ~3 (0, G.numVertices graph) (numHops P 0)
 85 |     val _ = print ("max dist " ^ Int.toString maxHops ^ "\n")
 86 | 
 87 |   in 
 88 |     if doCheck then check graph source P else ()
 89 |   end
 90 | 
 91 | val graph = readGraph(filename)
 92 | 
 93 | val () = if mode = "seq" then
 94 |            runBFS graph source SBFS.bfs
 95 |          else if mode = "dopt" then
 96 |            runBFS graph source DBFS.bfs
 97 |          else if mode = "parcas" then
 98 |            runBFS graph source PCASBFS.bfs
 99 |          else if mode = "par" then
100 |            runBFS graph source PBFS.bfs
101 |          else
102 |            print ("Incorrect mode:" ^ usage)
103 | 
104 | val _ = GCStats.report ()
105 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # MPL Tutorial
 2 | 
 3 | ## Introduction
 4 | 
 5 | [MPL][mpl] is a compiler for parallel programming on shared-memory multicore
 6 | machines. The MPL language is essentially [Standard ML][sml] (SML) with
 7 | extensions for parallelism.
 8 | 
 9 | This tutorial provides an introduction to using MPL. You don't need to
10 | already know SML, but you should be comfortable with using the command-line
11 | shell and know some basic programming (integers, booleans, conditionals,
12 | variables, functions, recursion, etc.).
13 | 
14 | All source code is contained in this repo. Some of the examples use the
15 | [`mpllib`](https://github.com/MPLLang/mpllib).
16 | 
17 | ## Get started
18 | 
19 | (**Note**: more detailed instructions available [here](setup/README.md)).
20 | 
21 | We recommend that you clone this repository and then run the tutorial
22 | in a [Docker container](https://www.docker.com/) using the top-level Dockerfile.
23 | 
24 | ```
25 | $ git clone https://github.com/MPLLang/mpl-tutorial.git
26 | $ cd mpl-tutorial
27 | $ ./start-container.sh
28 | ```
29 | 
30 | This opens a bash shell in the container, which should have a prompt that
31 | looks something like this (the numbers after `root@` may differ; this is
32 | normal):
33 | ```
34 | root@b80fc75d8c76:~/mpl-tutorial#
35 | ```
36 | 
37 | For simplicity throughout the tutorial, we will write `<container>#` before
38 | commands that are intended to be run inside the Docker container.
39 | 
40 | ### Inside the container
41 | 
42 | The directory structure inside the
43 | container is as follows. Starting the container puts us inside the
44 | `mpl-tutorial` directory.
45 | 
46 | ```
47 | root
48 | ├── mpl            # the MPLLang/mpl repository
49 | └── mpl-tutorial   # this repository
50 | ```
51 | 
52 | Inside the container, the directory `mpl-tutorial` is mounted from your local
53 | machine. Any changes within this directory will be visible both inside
54 | and outside the container. This ensure that any changes you make will not be
55 | lost when you exit the container, and also allows you to use any text editor
56 | outside the container to edit files.
57 | 
58 | ### Pull the library
59 | 
60 | Once you have started the container, you need to pull the library code:
61 | 
62 | ```
63 | <container># pwd
64 | /root/mpl-tutorial
65 | <container># smlpkg sync
66 | ```
67 | 
68 | This populates the directory `lib` with packages that this tutorial depends
69 | on. You only need to do this once, when starting the tutorial for the first
70 | time.
71 | 
72 | Do not modify the contents of the `lib` subdirectory. These are maintained
73 | by the package manager.
74 | 
75 | ## Table of Contents
76 | 
77 | 1. [Setup](setup/README.md): running with docker and/or installing the compiler
78 | 2. [Hello World](hello/README.md): writing, compiling, and running a simple program
79 | 3. [Parallelism and Granularity Control](how-to-par/README.md): simple parallelism with `ForkJoin.par`, and work-efficiency via granularity control
80 | 4. [Trees](trees/README.md): parallel algorithms on binary trees, tree
81 | balance experiments
82 | 4. [Sequences](sequences/README.md): parallel algorithms on sequences
83 | 5. [MCSS](mcss/README.md): the Maximum Contigous Subsequence Sum problem
84 | 6. [BFS](bfs/README.md): Breadth First Search.
85 | 
86 | [mpl]: https://github.com/MPLLang/mpl
87 | [sml]: https://en.wikipedia.org/wiki/Standard_ML
88 | 
89 | ## FAQ
90 | 
91 | **Help: Inside the Docker container, a process dies with the message `Killed`**.
92 | This is likely due to a Docker memory limit. See the *Docker Resource Limits*
93 | section of [Setup](setup/README.md) for instructions on how to fix.
94 | 
95 | **Help: When compiling, I see a long string of errors such as `Undefined structure`**.
96 | Make sure you've pulled the library code. See the *Pull the library* section,
97 | above.
98 | 


--------------------------------------------------------------------------------
/hello/README.md:
--------------------------------------------------------------------------------
  1 | # 2. Hello World
  2 | 
  3 | [(← Setup)](../setup/README.md) [(Parallelism and Granularity Control →)](../how-to-par/README.md)
  4 | 
  5 | ## Preliminaries
  6 | 
  7 | Make sure that you've already done the [setup](../setup/README.md). If
  8 | you're using Docker to run the tutorial, all commands below should be
  9 | run within the container in directory `~/mpl-tutorial/hello/`:
 10 | 
 11 | ```
 12 | $ cd path/to/mpl-tutorial
 13 | $ ./start-container.sh
 14 | <container># cd hello
 15 | <container># <enter commands here>
 16 | ```
 17 | 
 18 | ## Write it
 19 | 
 20 | Our first program is a one-liner: the function `print` takes a string as
 21 | argument and writes it to the terminal.
 22 | Note that in SML, it is common to call a function without putting parentheses
 23 | around its arguments (e.g. `f x` instead of `f(x)`).
 24 | 
 25 | [`mpl-tutorial/hello/hello.sml`](./hello.sml):
 26 | ```sml
 27 | val _ = print "hello world\n"
 28 | ```
 29 | 
 30 | <details>
 31 | <summary><strong>Question</strong>: What does <code>val _ =</code> mean?</summary>
 32 | <blockquote>
 33 | Normally, we use the syntax <code>val ... = ...</code> to introduce a new
 34 | variable. For example, <code>val x = 2+2</code>.
 35 | 
 36 | But in this case, <code>print</code> doesn't return anything interesting, so we
 37 | just write <code>val _ = print ...</code> which means "print the thing, but
 38 | don't introduce a new variable for the result".
 39 | </blockquote>
 40 | </details>
 41 | 
 42 | ## Compile and run it
 43 | 
 44 | To compile this file, pass it to `mpl` at the command-line. This produces
 45 | an executable called `hello`. By default, `mpl` names the executable the same
 46 | as the source file. We can tell it to use a different name with the
 47 | `-output` flag.
 48 | 
 49 | ```
 50 | <container># ls
 51 | README.md  hello-again.sml  hello-twice.mlb  hello.sml
 52 | 
 53 | <container># mpl hello.sml
 54 | <container># ./hello
 55 | hello world
 56 | 
 57 | <container># mpl -output foobar hello.sml
 58 | <container># ./foobar
 59 | hello world
 60 | ```
 61 | 
 62 | ## Compiling multiple files as one program
 63 | 
 64 | **`.mlb` Files**. Typically, we don't write programs as just a single `.sml`
 65 | file. Instead, we write multiple separate files and then compile them together
 66 | as one program. To do this with MPL, we need to write an additional file that
 67 | describes how to put the files together. This additional file is called an
 68 | [ML Basis File](http://mlton.org/MLBasis), and has the extension `.mlb`.
 69 | 
 70 | For example, take a look at [hello-twice.mlb](./hello-twice.mlb), which tells
 71 | MPL to load three things: the
 72 | [SML basis library](https://smlfamily.github.io/Basis/index.html), and two
 73 | files: [hello.sml](./hello.sml) followed by
 74 | [hello-again.sml](./hello-again.sml).
 75 | 
 76 | [`mpl-tutorial/hello/hello-twice.mlb`](./hello-twice.mlb):
 77 | ```sml
 78 | $(SML_LIB)/basis/basis.mlb
 79 | hello.sml
 80 | hello-again.sml
 81 | ```
 82 | 
 83 | We can pass an `.mlb` file directly to MPL to produce an executable, similar to
 84 | before.
 85 | 
 86 | ```
 87 | <container># mpl hello-twice.mlb
 88 | <container># ./hello-twice
 89 | hello world
 90 | hello again
 91 | ```
 92 | 
 93 | <details>
 94 | <summary><strong>Question</strong>: What is this <code>$(SML_LIB)/basis/basis.mlb</code> thing? </summary>
 95 | <blockquote>
 96 | This loads the
 97 | <a href="https://smlfamily.github.io/Basis/">SML basis library</a>, which is
 98 | the standard library included in all SML distributions. It includes the
 99 | definition of important functions such as <code>print</code>.
100 | <br><br>
101 | When we compile a <code>.sml</code> file by itself, the basis library is
102 | implicitly included for convenience. But when we use a <code>.mlb</code>, we
103 | have to be more explicit. This way, our <code>.mlb</code> file
104 | describes <strong>everything</strong> about our program. No hidden pieces!
105 | </blockquote>
106 | </details>
107 | 


--------------------------------------------------------------------------------
/trees/old/bad-par/Tree.sml:
--------------------------------------------------------------------------------
  1 | structure Tree = 
  2 | struct
  3 | 
  4 | (* A binary ("apple") tree of type 'a elements ("apples")" *) 
  5 | datatype 'a t = Leaf of 'a | Node of 'a t * 'a t
  6 |   
  7 | (* Define "size of a tree" as the number of internal nodes *)
  8 | 
  9 | (* Create a balanced integer tree of the given size n *)
 10 | fun mkBalanced n = 
 11 |   let
 12 |     fun mk i n = 
 13 |       if n = 0 then
 14 |         Leaf i 
 15 |       else    
 16 |         let 
 17 |           val nn = n - 1 
 18 |           val m = Int.div (nn, 2) 
 19 |           val (left, right) = ForkJoin.par (fn () => mk i m,  
 20 |                                             fn () => mk (i + m + 1) (nn - m)) 
 21 |         in
 22 |           Node (left, right) 
 23 |         end
 24 |   in
 25 |     mk 0 n
 26 |   end
 27 | 
 28 | (* Create an un balanced tree of the given size n *)
 29 | fun mkUnbalanced n = 
 30 |   let
 31 |     fun mk i n = 
 32 |       if n = 0 then
 33 |         Leaf i
 34 |       else    
 35 |         let 
 36 |           val left = mk i (n - 1) 
 37 |         in
 38 |           Node (left, Leaf n) 
 39 |         end
 40 |   in
 41 |     mk 0 n
 42 |   end
 43 | 
 44 | (* Take eToString which makes a string out of an element return a string rep of the tree. *)
 45 | fun toString eToString t = 
 46 |   case t of 
 47 |     Leaf x => eToString x
 48 |   | Node (l, r) => 
 49 |     let val (ls, rs) = 
 50 |           ForkJoin.par (fn () => toString eToString l, 
 51 |                         fn () => toString eToString r) 
 52 |     in
 53 |       ls ^ " " ^ rs
 54 |     end
 55 | 
 56 | fun height t =
 57 |   case t of 
 58 |     Leaf _ => 0
 59 |   | Node (l, r) =>
 60 |     let 
 61 |       val (hl, hr) = ForkJoin.par (fn () => height l, 
 62 |                                    fn () => height r) 
 63 |     in
 64 |       if hl > hr then 1 + hl else 1 + hr
 65 |     end 
 66 | 
 67 | (* Map f over tree t *)
 68 | fun map f t =
 69 |   case t of 
 70 |     Leaf x => Leaf (f x)
 71 |   | Node (l, r) =>
 72 |     let 
 73 |       val (ll, rr) = ForkJoin.par (fn () => map f l, fn () => map f r) 
 74 |     in
 75 |       Node (ll, rr)
 76 |     end 
 77 | 
 78 | (* Reduce tree t with f identity id *)
 79 | fun reduce f id t =
 80 |   case t of 
 81 |     Leaf x => id x
 82 |   | Node (l, r) =>
 83 |     let val (ls, rs) = ForkJoin.par (fn () => reduce f id l, 
 84 |                                      fn () => reduce f id r) in
 85 |       f (ls, rs)
 86 |     end 
 87 | 
 88 | fun filter f t = 
 89 |   case t of
 90 |     Leaf x => 
 91 |       if f x then
 92 |         SOME (Leaf x)
 93 |       else
 94 |         NONE 
 95 |   | Node (left, right) =>
 96 |       let 
 97 |         val (l, r) = ForkJoin.par (fn () => filter f left,
 98 |                                    fn () => filter f right)
 99 |       in
100 |         case l of  
101 |           NONE => r
102 |         | SOME lt => 
103 |           case r of 
104 |             NONE => l
105 |           | SOME rt => SOME (Node (lt, rt))
106 |       end
107 | 
108 | datatype 'a stree = SLeaf of 'a | SNode of ('a * 'a stree * 'a stree)
109 | exception Error 
110 | fun iscan id f tree = 
111 |   let 
112 |     fun up t = 
113 |       case t of
114 |         Leaf x => (x, SLeaf x)
115 |       | Node (left, right) => 
116 |         let val ((ls, lst), (rs, rst)) = ForkJoin.par (fn () => up left, 
117 |                                                        fn () => up right)
118 |         in
119 |           (f (ls, rs), SNode (ls, lst, rst))
120 |         end
121 |      
122 |     fun down sum sumtree tree =
123 |       case tree of 
124 |         Leaf x => Leaf (f (sum, x))
125 |       | Node (left, right) => 
126 |       let 
127 |          val SNode(s, l, r) = sumtree 
128 |          val (ls, rs) = ForkJoin.par (fn () => down sum l left, 
129 |                                       fn () => down (sum + s) r right)
130 |       in
131 |         Node (ls, rs)
132 |       end
133 | 
134 |     val (sum, sumtree) = up tree   
135 |     in 
136 |       down id sumtree tree
137 |     end   
138 | 
139 | fun scan id f tree = 
140 |   let 
141 |     fun up t = 
142 |       case t of
143 |         Leaf x => (x, SLeaf x)
144 |       | Node (left, right) => 
145 |         let val ((ls, lst), (rs, rst)) = ForkJoin.par (fn () => up left, 
146 |                                                        fn () => up right)
147 |         in
148 |           (f (ls, rs), SNode (ls, lst, rst))
149 |         end
150 |      
151 |     fun down sum sumtree tree =
152 |       case tree of 
153 |         Leaf x => sum
154 |       | Node (left, right) => 
155 |       let 
156 |          val SNode(s, l, r) = sumtree 
157 |          val (ls, rs) = ForkJoin.par (fn () => down sum l left, 
158 |                                       fn () => down (sum + s) r right)
159 |       in
160 |         Node (ls, rs)
161 |       end
162 | 
163 |     val (sum, sumtree) = up tree   
164 |     in 
165 |       down id sumtree tree
166 |     end   
167 | 
168 |         
169 | end
170 | 


--------------------------------------------------------------------------------
/mcss/mcss.sml:
--------------------------------------------------------------------------------
  1 | (* Usage:
  2 |  * mcss [-n <input size> [--check] [--seq | --par | --parcas] 
  3 |  * Example: mcss [-n <input size> [--check] [--seq | --par | --parcas] 
  4 |  *)
  5 | 
  6 | structure S = ArraySequence
  7 | structure CLA = CommandLineArgs
  8 | 
  9 | val par = ForkJoin.par
 10 | 
 11 | val usage = "Please supply one of '--seq', '--par' or '--pardc' flag\n"
 12 | val doCheck = CLA.parseFlag "check"
 13 | val doSeq = CLA.parseFlag "seq"
 14 | val doPar = CLA.parseFlag "par"
 15 | val doParDC = CLA.parseFlag "pardc"
 16 | 
 17 | val defaultInput = 9
 18 | val n = CLA.parseInt "n" defaultInput
 19 | 
 20 | structure I = IntInf
 21 | 
 22 | fun mcssSeq a = 
 23 |   (* Kanade's linear-time sequential algorithm *)
 24 |   let
 25 |     fun f((sum, max), x) = 
 26 |       let
 27 |         val sumx = I.+(sum, x)
 28 |         val sum' =  if I.>= (sumx, x) then sumx else x
 29 |         val max' = I.max(max, sum')
 30 |       in
 31 |         (sum', max')
 32 |       end
 33 | 
 34 |     val (_, max) = S.iterate f (I.fromInt 0, S.first a) a
 35 |   in
 36 |     max
 37 |   end 
 38 | 
 39 | fun mcssSeqWithPrefix a = 
 40 |   let
 41 |     fun f((sum, max, maxprefix, total), x) = 
 42 |       let   
 43 |         val sumx = I.+(sum, x)
 44 |         val sum' =  if I.>= (sumx, x) then sumx else x
 45 |         val max' = I.max(max, sum')     
 46 |         val maxprefix' = I.max(total, maxprefix)
 47 |         val total' = I.+(total, x)
 48 |       in
 49 |         (sum', max', maxprefix', total')
 50 |       end
 51 | 
 52 |     val (_, max, maxprefix, total) = S.iterate f (S.first a, S.first a, S.first a, S.first a) (S.drop a 1)
 53 | 
 54 |   in
 55 |     (max, maxprefix, total)
 56 |   end 
 57 | 
 58 | 
 59 | fun mcss a = 
 60 |   let 
 61 |     val b = S.scanWithTotal I.+ 0 a
 62 |     (* use first element of the sequence for min identity *)
 63 |     val (c, _) = S.scan I.min (S.nth b 0) b    
 64 |     val d = S.tabulate 
 65 |               (fn i => (S.nth b (i+1)) - (S.nth c (i+1)))
 66 |               (S.length a)
 67 |    in
 68 |      (* use first element of the sequence for max identity *)
 69 |      S.reduce I.max (S.nth d 0) d
 70 |    end
 71 | 
 72 | fun mcssdc a = 
 73 |   let
 74 |     val GRAIN = 100000
 75 | 
 76 |     fun maxSuffix a =
 77 |     let
 78 |       fun f((sum, max), x) = 
 79 |         let
 80 |           val sum' = I.+(sum, x)
 81 |           val max' = I.max(max, sum')
 82 |         in
 83 |           (sum', max')
 84 |         end
 85 | 
 86 |       val (_, max) = S.foldr f (S.last a, S.last a) (S.take a (S.length a - 1))
 87 |     in
 88 |       max
 89 |     end 
 90 | 
 91 |     fun mcss a = 
 92 |       if S.length a <= GRAIN then
 93 |         let 
 94 |           val (m, xprefix, total) = mcssSeqWithPrefix a
 95 |           val xsuffix = maxSuffix a
 96 | (*
 97 |           val _ = print ("input = " ^ S.toString I.toString a ^ "\n")
 98 |           val _ = print ("m = " ^ I.toString m ^ 
 99 |                          " xprefix = " ^ I.toString xprefix ^ 
100 |                          " xsuffix = " ^ I.toString xsuffix ^ 
101 |                          " total = " ^ I.toString total ^ "\n")
102 | 
103 | *)
104 |         in
105 |           (m, xprefix, xsuffix, total)
106 |         end
107 |       else 
108 |         let
109 |           val n = S.length a
110 |           val nl = n div 2
111 |           val (left, right) = par (fn () => mcss (S.subseq a (0, nl)), 
112 |                                    fn () => mcss (S.subseq a (nl, n-nl)))
113 |           val (ml, pl, sl, tl) = left
114 |           val (mr, pr, sr, tr) = right
115 | 
116 |           val (m, p, s, t) = 
117 |             (I.max (sl + pr, I.max(ml, mr)),
118 |              I.max (pl, tl + pr),
119 |              I.max (sl + tr, sr),
120 |              I.+(tl, tr))
121 |         in
122 |           (m, p, s, t)
123 |         end
124 |     val (m, _, _, _) = mcss a
125 |   in
126 |     m
127 |   end
128 | 
129 | val _ = print ("# Begin: MCSS n =" ^ Int.toString n ^ "\n")
130 | val m = I.fromInt (Int.max (n div 10, 10))
131 | val s = S.tabulate (fn i => I.mod(I.fromInt(Util.hash i), m) - 
132 |                             I.mod(I.fromInt(Util.hash (2*i+1)), m)) n
133 | val _ = print ("# Calculating mcss\n")
134 | val _ = if n <= defaultInput then
135 |           print ("Input = " ^ S.toString I.toString s ^ "\n")  
136 |         else 
137 |           ()
138 | val result = 
139 |   if doSeq then
140 |     let 
141 |       val _ = print ("# Calculating sequentially\n")
142 |       val result = Benchmark.run "running mcss" (fn _ => mcssSeq s)
143 |       val _ = print ("# sequential mcss = " ^ I.toString result ^ "\n")
144 |     in
145 |       result
146 |     end
147 |   else if doPar then
148 |     let 
149 |       val _ = print ("# Calculating in parallel\n")
150 |       val result = Benchmark.run "running mcss" (fn _ => mcss s)
151 |       val _ = print ("# parallel mcss = " ^ I.toString result ^ "\n")
152 |     in
153 |       result
154 |     end
155 |   else if doParDC then
156 |     let 
157 |       val _ = print ("# Calculating in parallel using DC\n")
158 |       val result = Benchmark.run "running mcss" (fn _ => mcssdc s)
159 |       val _ = print ("# parallel mcss = " ^ I.toString result ^ "\n")
160 |     in
161 |       result
162 |     end
163 |   else 
164 |     Util.die usage
165 | 
166 | val _ = 
167 |   if doCheck then
168 |     let
169 |       val resultSeq = mcssSeq s
170 |     in
171 |       if result = resultSeq then
172 |         print ("Correct? YES\n")
173 |       else
174 |         (print ("Correct? NO! Got: " ^ I.toString result ^ " expected: " ^ I.toString resultSeq ^ "\n");
175 |          print ("Input: " ^ S.toString I.toString (S.take s 10) ^ "\n"))
176 |     end
177 |   else 
178 |     ()
179 |  
180 | val _ = print ("# End: MCSS\n")
181 | 


--------------------------------------------------------------------------------
/setup/README.md:
--------------------------------------------------------------------------------
  1 | # 1: Setup
  2 | 
  3 | [(Hello World →)](../hello/README.md)
  4 | 
  5 | There are two options:
  6 |   1. Run the tutorial in a Docker container. [Instructions below](#option-1-docker)
  7 |   2. Install `mpl` locally on x86-64 Linux. [Instructions below](#option-2-local-install)
  8 | 
  9 | We recommend that you use Docker.
 10 | 
 11 | ## Option 1: Docker
 12 | 
 13 | Clone this repository and then build and run a
 14 | [Docker](https://www.docker.com/) container using the top-level Dockerfile:
 15 | 
 16 | ```
 17 | $ git clone https://github.com/MPLLang/mpl-tutorial.git
 18 | $ cd mpl-tutorial
 19 | $ ./start-container.sh
 20 | ```
 21 | 
 22 | This opens a bash shell in the container, with a prompt that should look
 23 | something like `root@43a65ec53fc6:~#`. The directory structure inside the
 24 | container is as follows. Starting the container puts us inside the
 25 | `mpl-tutorial` directory.
 26 | 
 27 | ```
 28 | root
 29 | ├── mpl            # the MPLLang/mpl repository
 30 | └── mpl-tutorial   # this repository
 31 | ```
 32 | 
 33 | Inside the container, the directory `mpl-tutorial` is mounted from your local
 34 | machine. Any changes within this directory will be visible both inside
 35 | and outside the container. This ensure that any changes you make will not be
 36 | lost when you exit the container, and also allows you to use any text editor
 37 | outside the container to edit files.
 38 | 
 39 | ### Pull the library
 40 | 
 41 | Once you have started the container, you need to pull the library code:
 42 | 
 43 | ```
 44 | <container># pwd
 45 | /root/mpl-tutorial
 46 | <container># smlpkg sync
 47 | ```
 48 | 
 49 | This populates the directory `lib` with packages that this tutorial depends
 50 | on. You only need to do this once, when starting the tutorial for the first
 51 | time.
 52 | 
 53 | Do not modify the contents of the `lib` subdirectory. These are maintained
 54 | by the package manager.
 55 | 
 56 | ### MPL in the Container
 57 | 
 58 | In the container, you can double check that `mpl` has already been installed
 59 | (your version number may differ):
 60 | 
 61 | ```
 62 | <container># mpl
 63 | MLton [mpl] 20200827.140808-gcce156bf3
 64 | ```
 65 | 
 66 | There are also pre-compiled binaries in the `/root/mpl/examples/bin` subdirectory.
 67 | Let's try to run one of these.
 68 | 
 69 | ### Primes Example
 70 | In the container, we can run the pre-compiled `primes` example with 4 processors.
 71 | 
 72 | ```
 73 | <container># /root/mpl/examples/bin/primes @mpl procs 4 --
 74 | generating primes up to 100000000
 75 | finished in 0.6058s
 76 | number of primes 5761455
 77 | result [2, 3, 5, 7, 11, 13, 17, ..., 99999989]
 78 | ```
 79 | 
 80 | Depending on the number of cores you computer has, you might want to use
 81 | fewer processors. The syntax for a MPL executable is
 82 | `<program> @mpl procs <num processors> --`. For
 83 | example, we can run on 1 or 2 processors, shown below.
 84 | We can see that with 2 processors, the `primes` benchmark takes about 1
 85 | second to run. This is about twice as fast as using one processor.
 86 | 
 87 | ```
 88 | <container># /root/mpl/examples/bin/primes @mpl procs 1 --
 89 | generating primes up to 100000000
 90 | finished in 2.1835s
 91 | number of primes 5761455
 92 | result [2, 3, 5, 7, 11, 13, 17, ..., 99999989]
 93 | 
 94 | <container># /root/mpl/examples/bin/primes @mpl procs 2 --
 95 | generating primes up to 100000000
 96 | finished in 1.1390s
 97 | number of primes 5761455
 98 | result [2, 3, 5, 7, 11, 13, 17, ..., 99999989]
 99 | ```
100 | 
101 | ### Other Examples
102 | There are a few other examples in `/root/mpl/examples/bin`
103 | directory. They can all be called in a similar way to `primes`. See
104 | `/root/mpl/examples/README.md` for details.
105 | 
106 | Here is running mergesort on 1 and 2 processors:
107 | ```
108 | <container># /root/mpl/examples/bin/msort @mpl procs 1 --
109 | ./bin/msort @mpl procs 1 --
110 | generating 100000000 random integers
111 | sorting
112 | finished in 27.9411s
113 | result [0, 0, 0, 1, 1, 2, 4, ..., 99999999]
114 | 
115 | <container># /root/mpl/examples/bin/msort @mpl procs 2 --
116 | ./bin/msort @mpl procs 2 --
117 | generating 100000000 random integers
118 | sorting
119 | finished in 15.1132s
120 | result [0, 0, 0, 1, 1, 2, 4, ..., 99999999]
121 | ```
122 | 
123 | ### Docker Resource Limits
124 | 
125 | By default, Docker will place a limit on how much memory a container is
126 | permitted to use. This can cause processes inside the container to be killed,
127 | due to the container running out of memory.
128 | 
129 | For example, you may see this when running the mergesort example, above:
130 | ```
131 | <container># /root/mpl/examples/bin/msort @mpl procs 2 --
132 | ./bin/msort @mpl procs 2 --
133 | generating 100000000 random integers
134 | sorting
135 | Killed
136 | ```
137 | 
138 | To fix this, you need to increaes the Docker memory limit. Here's how to
139 | fix it on Mac. (Navigate to Settings -> Resources -> Memory, and set it
140 | to a larger number, e.g. 8GB).
141 | 
142 | ![setting the Docker memory limit to 8GB on macOS](mac-docker-mem-fix.jpg)
143 | 
144 | ## Option 2: Local Install
145 | 
146 | If you are on x86-64 Linux, you can...
147 | 
148 | 
149 | ## Option 3: Using Existing Implementation
150 | 
151 | If `mpl` in installed on your computer, then you can use it just like
152 | any other program.  For examples in this repo, you want to obtain
153 | first the library code the examples rely on.
154 | 
155 | ### Pull the library
156 | 
157 | Obtain the library code by using `smlpkg` command
158 | 
159 | 
160 | ```
161 | <container># pwd
162 | /root/mpl-tutorial
163 | <container># smlpkg sync
164 | ```
165 | 
166 | This populates the directory `lib` with packages that this tutorial depends
167 | on. You only need to do this once, when starting the tutorial for the first
168 | time.
169 | 
170 | Do not modify the contents of the `lib` subdirectory. These are maintained
171 | by the package manager.
172 | 
173 | 
174 | 
175 | 


--------------------------------------------------------------------------------
/sequences/fast-par/main.sml:
--------------------------------------------------------------------------------
  1 | (* Usage: examples -n <sequence length> *)
  2 | 
  3 | structure S = ArraySequence
  4 | structure CLA = CommandLineArgs
  5 | 
  6 | val defaultInput = 9
  7 | val n = CLA.parseInt "n" defaultInput
  8 | 
  9 | fun fib n = 
 10 |   if n < 2 then 
 11 |     n 
 12 |   else
 13 |     fib (n-1) + fib (n-2)
 14 | 
 15 | fun testBinarySearch () = 
 16 |   let 
 17 |     val a = S.tabulate (fn i => 2*i) n
 18 |     val sa = S.toString Int.toString a
 19 |     val _ = print ("a = " ^ sa ^ "\n")
 20 | 
 21 |     (* Failing search *)
 22 |     val key = (2*Int.div(n,2) - 1)
 23 |     val _ = print ("Looking for: " ^ Int.toString key ^ "\n")
 24 |     val result = S.binarySearch Int.compare a key
 25 |     val _ =  
 26 |       case result of 
 27 |         NONE => print ("not found\n")
 28 |       | SOME pos => print ("found at position: " ^ Int.toString pos ^ "\n")
 29 | 
 30 |     (* Successful search *)
 31 |     val key = (2*Int.div(n,2))
 32 |     val _ = print ("Looking for: " ^ Int.toString key ^ "\n")
 33 |     val result = S.binarySearch Int.compare a key
 34 |     val _ =  
 35 |       case result of 
 36 |         NONE => print ("not found\n")
 37 |       | SOME pos => print ("found at position: " ^ Int.toString pos ^ "\n")
 38 |   in
 39 |     ()
 40 |   end
 41 | 
 42 | fun testBinarySplit () = 
 43 |   let 
 44 |     val a = S.tabulate (fn i => 2*i) n
 45 |     val sa = S.toString Int.toString a
 46 |     val _ = print ("a = " ^ sa ^ "\n")
 47 | 
 48 |     (* Failing search *)
 49 |     val key = (2*Int.div(n,2) - 1)
 50 |     val _ = print ("Looking for: " ^ Int.toString key ^ "\n")
 51 |     val result = S.binarySplit Int.compare a key
 52 |     val _ = print ("result = " ^ Int.toString result ^ "\n")								 
 53 | 
 54 | 
 55 |     (* Successful search *)
 56 |     val key = (2*Int.div(n,2))
 57 |     val _ = print ("Looking for: " ^ Int.toString key ^ "\n")
 58 |     val result = S.binarySplit Int.compare a key
 59 |     val _ = print ("result = " ^ Int.toString result ^ "\n")								 		
 60 |   in
 61 |     ()
 62 |   end
 63 | 
 64 | 
 65 | fun testBivariantSplit () = 
 66 |   let 
 67 |     val a = S.tabulate (fn i => 2*i) n
 68 |     val b = S.tabulate (fn i => 2*i + 1) n
 69 | 
 70 |     val sa = S.toString Int.toString a
 71 |     val sb = S.toString Int.toString b
 72 |     val _ = print ("a = " ^ sa ^ "\n")
 73 |     val _ = print ("b = " ^ sb ^ "\n")
 74 | 
 75 |     val (i, j) = S.bivariantSplit a b (6)
 76 |     val _ = print ("split at i        : " ^ 
 77 |                    Int.toString i ^ " and " ^ " j = " ^ Int.toString j ^ "\n")
 78 | 
 79 |   in
 80 |     ()
 81 |   end
 82 | 
 83 | fun testMerge () = 
 84 |   let 
 85 |     val a = S.tabulate (fn i => 2*i) n
 86 |     val b = S.tabulate (fn i => 2*i + 1) n
 87 |     val c = S.mergeSeq a b 
 88 |     val sa = S.toString Int.toString a
 89 |     val sb = S.toString Int.toString b
 90 |     val sc = S.toString Int.toString c
 91 |     val _ = print ("a = " ^ sa ^ "\n")
 92 |     val _ = print ("b = " ^ sb ^ "\n")
 93 |     val _ = print ("c = " ^ sc ^ "\n")						
 94 |   in
 95 |     ()
 96 |   end
 97 | 
 98 | fun testSampleSearch () = 
 99 |   let 
100 |     val a = S.tabulate (fn i => 2*i) n
101 |     val sa = S.toString Int.toString a
102 |     val _ = print ("a = " ^ sa ^ "\n")
103 |     fun degree n = Real.trunc(Math.sqrt(Real.fromInt n))
104 |     val key = (2*Int.div(n,2) - 1)
105 |     val _ = print ("Looking for: " ^ Int.toString key ^ "\n")
106 |     val result = S.sampleSearch degree Int.compare a key
107 |     val _ =  
108 |       case result of 
109 |         NONE => print ("not found\n")
110 |       | SOME pos => print ("found at position: " ^ Int.toString pos ^ "\n")
111 | 
112 |     val key = 6
113 |     val _ = print ("Looking for: " ^ Int.toString key ^ "\n")
114 |     val result = S.sampleSearch degree Int.compare a key
115 |     val _ =  
116 |       case result of 
117 |         NONE => print ("not found\n")
118 |       | SOME pos => print ("found at position: " ^ Int.toString pos ^ "\n")
119 |   in
120 |     ()
121 |   end
122 | 
123 | 
124 | fun printWork s nested updates evens max sUpdated t scanMax it flat = 
125 |   if n > defaultInput then
126 |     ()
127 |   else
128 |     let 
129 |       fun pairToString (i,j) =
130 |         "(" ^ Int.toString i ^ ", " ^ Int.toString j ^ ")"
131 |       val ss = S.toString Int.toString s
132 |       val sevens = S.toString Int.toString evens
133 |       val supdates = S.toString pairToString updates
134 |       val ssUpdated = S.toString Int.toString sUpdated
135 |       val ts = S.toString Int.toString t
136 |       val its = S.toString Int.toString it
137 |       val snested = S.toString (S.toString Int.toString) nested
138 |       val sflat = S.toString Int.toString flat
139 | 
140 |       val _ = print ("Sequence is: " ^ ss ^ "\n")
141 |       val _ = print ("Filtered   : " ^ sevens ^ "\n")
142 |       val _ = print ("Max        : " ^ Int.toString max ^ "\n")
143 |       val _ = print ("Scan       : " ^ ts ^ " and scanMax = " ^ Int.toString scanMax ^ "\n")
144 |       val _ = print ("IScan      : " ^ its ^ "\n")
145 |       val _ = print ("Updates   : " ^ supdates ^ "\n")
146 | 
147 |       val _ = print ("Updated   : " ^ ssUpdated ^ "\n")
148 |       val _ = print ("Nested sequence is: " ^ snested ^ "\n")
149 |       val _ = print ("Flat sequence is  : " ^ sflat ^ "\n")
150 |     in
151 |       ()
152 |     end
153 | 
154 | 
155 | val _ = testMerge () 				
156 | (*
157 | val _ = testBinarySearch () 
158 | val _ = testSampleSearch ()
159 | val _ = testBinarySplit () 
160 | val _ = testMerge () 				
161 | val _ = print ("# Begin: Array Sequences, n =" ^ Int.toString n ^ "\n")
162 | val _ = testSampleSearch()
163 | *)
164 | (*
165 | val _ = testBivariantSplit()
166 | *)
167 | (* val s = S.tabulate (fn i => fib (Int.mod (i, 8))) n) *)
168 | (* val nested = S.tabulate (fn i => s) (Int.min (1 + Int.div (n, 1000), 10)) *)
169 | 
170 | (* val updates = S.tabulate (fn i => (2*Int.div(i, 2), 10*i)) (Int.div (n, 2)) *)
171 | (* val evens = S.filter (fn i => Int.mod(i,2)=0) s *)
172 | (* val max = S.reduce Int.max ~1 s *)
173 | (* val sUpdated = S.inject s updates *)
174 | (* val (t, scanMax) = S.scan Int.max 0 s *)
175 | (* val it = S.iscan Int.max 0 s *)
176 | (* val flat = S.flatten nested *)
177 | 
178 | (* val () = printWork s  nested updates evens max sUpdated t scanMax it flat *)
179 | val _ = print ("# End: Array Sequences\n")
180 | 


--------------------------------------------------------------------------------
/bfs/dopt-bfs.sml:
--------------------------------------------------------------------------------
  1 | (* nondeterministic direction-optimized BFS, using CAS on outneighbors to
  2 |  * construct next frontier. *)
  3 | structure DoptBFS =
  4 | struct
  5 |   type 'a seq = 'a Seq.t
  6 | 
  7 |   (* structure DS = DelayedSeq *)
  8 |   structure G = AdjacencyGraph(Int)
  9 |   structure V = G.Vertex
 10 | 
 11 |   type vertex = G.vertex
 12 | 
 13 |   val sub = Array.sub
 14 |   val upd = Array.update
 15 | 
 16 |   val vtoi = V.toInt
 17 |   val itov = V.fromInt
 18 | 
 19 |   (* fun ASsub s =
 20 |     let val (a, i, _) = ArraySlice.base s
 21 |     in sub (a, i+s)
 22 |     end *)
 23 | 
 24 |   val GRAIN = 10000
 25 | 
 26 |   fun strip s =
 27 |     let val (s', start, _) = ArraySlice.base s
 28 |     in if start = 0 then s' else raise Fail "strip base <> 0"
 29 |     end
 30 | 
 31 |   fun bfs (g : G.graph) (s : vertex) =
 32 |     let
 33 |       val n = G.numVertices g
 34 |       val parent = strip (Seq.tabulate (fn _ => ~1) n)
 35 | 
 36 |       (* Choose method of filtering the frontier: either frontier always
 37 |        * only consists of valid vertex ids, or it allows invalid vertices and
 38 |        * pretends that these vertices are isolated. *)
 39 |       fun degree v = G.degree g v
 40 |       fun filterFrontier s = Seq.filter (fn x => x <> itov (~1)) s
 41 |       (*
 42 |       fun degree v = if v < 0 then 0 else Graph.degree g v
 43 |       fun filterFrontier s = s
 44 |       *)
 45 | 
 46 |       val denseThreshold = G.numEdges g div 20
 47 | 
 48 |       fun sumOfOutDegrees frontier =
 49 |         SeqBasis.reduce 10000 op+ 0 (0, Seq.length frontier) (degree o Seq.nth frontier)
 50 |         (* DS.reduce op+ 0 (DS.map degree (DS.fromArraySeq frontier)) *)
 51 | 
 52 |       fun shouldProcessDense frontier =
 53 |         let
 54 |           val n = Seq.length frontier
 55 |           val m = sumOfOutDegrees frontier
 56 |         in
 57 |           n + m > denseThreshold
 58 |         end
 59 | 
 60 |       fun bottomUp frontier =
 61 |         let
 62 |           val flags = Seq.tabulate (fn _ => false) n
 63 |           val _ = Seq.foreach frontier (fn (_, v) =>
 64 |             ArraySlice.update (flags, v, true))
 65 |           fun inFrontier v = Seq.nth flags (vtoi v)
 66 | 
 67 |           fun processVertex v =
 68 |             if sub (parent, v) <> ~1 then NONE else
 69 |             let
 70 |               val nbrs = G.neighbors g (itov v)
 71 |               val deg = ArraySlice.length nbrs
 72 |               fun loop i =
 73 |                 if i >= deg then
 74 |                   NONE
 75 |                 else
 76 |                   let
 77 |                     val u = Seq.nth nbrs i
 78 |                   in
 79 |                     if inFrontier u then
 80 |                       (upd (parent, v, u); SOME v)
 81 |                     else
 82 |                       loop (i+1)
 83 |                   end
 84 |             in
 85 |               loop 0
 86 |             end
 87 |         in
 88 |           ArraySlice.full (SeqBasis.tabFilter 1000 (0, n) processVertex)
 89 |         end
 90 | 
 91 |       fun topDown frontier =
 92 |         let
 93 |           val nf = Seq.length frontier
 94 |           val offsets = SeqBasis.scan GRAIN op+ 0 (0, nf) (degree o Seq.nth frontier)
 95 |           val mf = sub (offsets, nf)
 96 |           val outNbrs = ForkJoin.alloc mf
 97 | 
 98 |           (* attempt to claim parent of u as v *)
 99 |           fun claim (u, v) =
100 |             sub (parent, u) = ~1
101 |             andalso
102 |             ~1 = Concurrency.casArray (parent, u) (~1, v)
103 | 
104 |           fun visitNeighbors offset v nghs =
105 |             Util.for (0, Seq.length nghs) (fn i =>
106 |               let val u = Seq.nth nghs i
107 |               in if not (claim (vtoi u, vtoi v))
108 |                  then upd (outNbrs, offset + i, itov (~1))
109 |                  else upd (outNbrs, offset + i, u)
110 |               end)
111 | 
112 |           fun visitMany offlo lo hi =
113 |             if lo = hi then () else
114 |             let
115 |               val v = Seq.nth frontier offlo
116 |               val voffset = sub (offsets, offlo)
117 |               val k = Int.min (hi - lo, sub (offsets, offlo+1) - lo)
118 |             in
119 |               if k = 0 then visitMany (offlo+1) lo hi
120 |               else ( visitNeighbors lo v (Seq.subseq (G.neighbors g v) (lo - voffset, k))
121 |                    ; visitMany (offlo+1) (lo+k) hi
122 |                    )
123 |             end
124 | 
125 |           fun parVisitMany (offlo, offhi) (lo, hi) =
126 |             if hi - lo <= GRAIN then
127 |               visitMany offlo lo hi
128 |             else
129 |               let
130 |                 val mid = lo + (hi - lo) div 2
131 |                 val (i, j) = OffsetSearch.search mid offsets (offlo, offhi)
132 |                 val _ = ForkJoin.par
133 |                   ( fn _ => parVisitMany (offlo, i) (lo, mid)
134 |                   , fn _ => parVisitMany (j-1, offhi) (mid, hi)
135 |                   )
136 |               in
137 |                 ()
138 |               end
139 | 
140 |           (* Either one of the following is correct, but the second one has
141 |            * significantly better granularity control for graphs that have a
142 |            * small number of vertices with huge degree. *)
143 | 
144 |           (* val _ = ParUtil.parfor 100 (0, nf) (fn i =>
145 |             visitMany i (sub (offsets, i)) (sub (offsets, i+1))) *)
146 | 
147 |           val _ = parVisitMany (0, nf + 1) (0, mf)
148 |         in
149 |           filterFrontier (ArraySlice.full outNbrs)
150 |         end
151 | 
152 |       fun search frontier =
153 |         if Seq.length frontier = 0 then
154 |           ()
155 |         else if shouldProcessDense frontier then
156 |           let
157 |             val (nextFrontier, tm) = Util.getTime (fn _ => bottomUp frontier)
158 |           in
159 |             print ("dense  " ^ Time.fmt 4 tm ^ "\n");
160 |             search nextFrontier
161 |           end
162 |         else
163 |           let
164 |             val (nextFrontier, tm) = Util.getTime (fn _ => topDown frontier)
165 |           in
166 |             print ("sparse " ^ Time.fmt 4 tm ^ "\n");
167 |             search nextFrontier
168 |           end
169 | 
170 |       val _ = upd (parent, vtoi s, s)
171 |       val _ = search (Seq.fromList [s])
172 |     in
173 |       ArraySlice.full parent
174 |     end
175 | 
176 | end
177 | 


--------------------------------------------------------------------------------
/sequences/bad-par/ArraySequence.sml:
--------------------------------------------------------------------------------
  1 | structure ArraySequence = 
  2 | struct
  3 | 
  4 | structure A = Array
  5 | structure AS = ArraySlice
  6 | 
  7 | type 'a t = 'a ArraySlice.slice
  8 | 
  9 | datatype 'a tree = Leaf of int | Node of ('a tree * 'a tree * 'a)
 10 | 
 11 | val parfor = ForkJoin.parfor 1
 12 | val alloc = ForkJoin.alloc
 13 | 
 14 | fun nth s i = AS.sub (s, i)
 15 | 
 16 | fun length s = AS.length s
 17 | 
 18 | fun fromList xs = AS.full (A.fromList xs)
 19 | 
 20 | fun toList s = List.tabulate (length s, nth s)
 21 | 
 22 | fun toString f s =
 23 |     "<" ^ String.concatWith "," (List.map f (toList s)) ^ ">"
 24 | 
 25 |   (* let  *)
 26 |   (*   val n = length s  *)
 27 |   (*   val () = print ("nth: i = " ^ Int.toString i ^ " n = " ^ Int.toString n ^ "\n")  *)
 28 |   (*  in       *)
 29 |   (*  end *)
 30 | 
 31 | 
 32 | fun empty () = fromList []
 33 | 
 34 | (* Return subseq s[i...i+n-1] *)
 35 | fun subseq s (i, n) = 
 36 |   AS.subslice (s, i, SOME n)
 37 | 
 38 | fun take s k = subseq s (0, k)
 39 | 
 40 | fun drop s k = subseq s (k, length s - k)
 41 | 
 42 | fun foldl f b s =
 43 |   let 
 44 |     val n = length s
 45 |     fun fold current i = 
 46 |       if i = n then
 47 |         current
 48 |       else
 49 |         fold (f(current, nth s i)) (i+1)
 50 |    in
 51 |      fold b 0
 52 |    end
 53 | 
 54 | fun tabulate f n = 
 55 |   let 
 56 |     val s = ForkJoin.alloc n
 57 |     val () = parfor (0, n) (fn i => Array.update (s, i, f i))
 58 |   in 
 59 |     AS.full s
 60 |   end  
 61 | 
 62 | fun rev s = 
 63 |   tabulate (fn i => nth s (length s - i - 1)) (length s)
 64 | 
 65 | fun append (s, t) =
 66 |   tabulate 
 67 |     (fn i => if i < length s then nth s i else nth t (i - length s))
 68 |     (length s + length t)
 69 | 
 70 | fun map f s = 
 71 |   tabulate (fn i => f (nth s i)) (length s)
 72 | 
 73 | fun apply (f: 'a -> unit) (s: 'a t): unit = 
 74 |   parfor (0, length s) (fn i => f (nth s i)) 
 75 | 
 76 | fun applyi (f: int * 'a -> unit) (s: 'a t): unit = 
 77 |   parfor (0, length s) (fn i => f (i, nth s i)) 
 78 | 
 79 | fun update s (i, v) =
 80 |   let
 81 |     val result = map (fn x => x) s
 82 |     val _ = AS.update (result, i, v)
 83 |   in
 84 |     result
 85 |   end
 86 | 
 87 | fun inject s updates =
 88 |   let
 89 |     val result = map (fn x => x) s
 90 |     fun injectOne (i, v) = AS.update (result, i, v)
 91 |     val () = apply injectOne updates
 92 |   in
 93 |     result
 94 |   end
 95 | 
 96 | fun reduce f id s = 
 97 |   let
 98 |     val n = length s
 99 |   in
100 |     if n = 0 then
101 |       id
102 |     else if n = 1 then
103 |       f (id, nth s 0)
104 |     else
105 |       let
106 |         val m = Int.div (n, 2)
107 |         val (left, right) = (subseq s (0, m), subseq s (m, n-m))
108 |         val (sl, sr) = ForkJoin.par (fn () => reduce f id left, 
109 |                                      fn () => reduce f id right)
110 |       in 
111 |         f (sl, sr)
112 |       end
113 |    end
114 | 
115 | 
116 | (* Compute `reduce` for all prefixes of the input sequence,
117 |  * including the empty and the full prefix. 
118 |  *)
119 | fun scanGen f id s = 
120 |   let
121 |     val n = length s
122 | (*    val _ = print ("scan: len(s) = " ^ Int.toString n ^ "\n") *)
123 |   in 
124 |     if n = 0 then
125 |       tabulate (fn i => 0) 1
126 |     else 
127 |       let         
128 |         val m = Int.div (n, 2)
129 |         val t = tabulate (fn i => f (nth s (2*i), nth s (2*i+1))) m
130 |         val t = scanGen f id t
131 | 
132 |         fun expand i =
133 |           if Int.mod (i, 2) = 0 then
134 |             nth t (Int.div(i,2))
135 |           else 
136 |             f (nth t (Int.div(i,2)), nth s (i-1))      
137 |         in
138 |           tabulate expand (n+1)
139 |         end
140 |    end
141 | 
142 | 
143 | 
144 | (* Compute `reduce` for all prefixes of the input sequence,
145 |  * including the empty and the full prefix. 
146 |  *)
147 | fun scanGenTree f id s = 
148 |   let
149 |     val n = length s
150 |     val res = ForkJoin.alloc (n+1)
151 | 		
152 | (*    val _ = print ("scan: len(s) = " ^ Int.toString n ^ "\n") 
153 |       val ss = toString Int.toString s
154 |       val _ = print ("scan: s = " ^ ss ^ "\n")
155 | *)			
156 |     fun mkTreeSlice(l: int, r: int) =
157 |      if r = l + 1 then
158 | 		   (Leaf l, nth s l)
159 | 		 else
160 | 		   let 
161 |          val mid = Int.div(l + r, 2)
162 |          val (left, leftsum) = mkTreeSlice(l, mid)
163 | 				 val (right, rightsum) = mkTreeSlice(mid, r)
164 | 			in
165 | 			  (Node(left, right, leftsum), f(leftsum, rightsum))
166 |       end 
167 | 
168 |     fun scanTree(tree, sum) =
169 |   	  case tree of
170 |         Leaf(l) => Array.update(res, l, sum)
171 |       | Node(l, r, leftSum) =>
172 |     	  let
173 |           val _ = scanTree(l, sum)
174 |           val _ = scanTree(r, f(leftSum, sum))
175 |         in
176 |           ()
177 |         end	
178 |   in 
179 |     if n = 0 then
180 |       (Array.update(res, 0, id); AS.full res)
181 |     else 
182 |       let
183 | 			  val (tree, sum) =  mkTreeSlice(0, n)
184 |   			val _ = scanTree(tree, id)								
185 |         val _ = Array.update(res, n, sum)
186 |       in
187 |      	  AS.full res
188 |       end
189 |    end
190 | 
191 | (* Scan exclusive *)
192 | fun scan f id s = 
193 |   let 
194 |     val t = scanGen f id s
195 |     val n = length s 
196 |   in
197 |     (subseq t (0,n), nth t n)
198 |   end 
199 | 
200 | fun scanTree f id s = 
201 |   let 
202 |     val t = scanGenTree f id s
203 |     val n = length s 
204 |   in
205 |     (subseq t (0,n), nth t n)
206 |   end 
207 | 
208 | (* Scan inclusive *)
209 | fun iscan f id s = 
210 |   let 
211 |     val t = scanGen f id s
212 |     val n = length s 
213 |   in
214 |     subseq t (1,n)
215 |   end
216 | 
217 | fun iscanTree f id s = 
218 |   let 
219 |     val t = scanGenTree f id s
220 |     val n = length s 
221 |   in
222 |     subseq t (1,n)
223 |   end
224 | 
225 | fun filter f s = 
226 |   let
227 |     val indicators = map (fn x => if f x then 1 else 0) s
228 |     val (offsets, m) = scan (fn (x,y) => x + y) 0 indicators
229 |     val t = alloc m 
230 | 
231 |     fun copy t (x, i) = 
232 |       if nth indicators i = 1 then
233 |         A.update (t, nth offsets i, x)
234 |       else
235 |         ()
236 | 
237 |      val () = applyi (copy t) s
238 |    in 
239 |      AS.full t
240 |    end
241 | 
242 | fun flatten s = 
243 |   let
244 |     val lengths = map length s 
245 |     val (offsets, n) = scan (fn (x,y) => x + y) 0 lengths 
246 |     val t = alloc n
247 |     val _ = 
248 |       parfor (0, length s) (fn i =>
249 |         let 
250 |            val (x, offset) = (nth s i, nth offsets i)
251 |         in
252 |           parfor (0, length x) (fn j =>
253 |             A.update (t, offset + j, nth x j))
254 |         end)
255 |   in 
256 |     AS.full t
257 |   end
258 | 
259 | end
260 | 


--------------------------------------------------------------------------------
/trees/Tree.sml:
--------------------------------------------------------------------------------
  1 | structure Tree:
  2 | sig
  3 | 
  4 |   datatype 'a tree =
  5 |     Empty
  6 |   | Leaf of 'a
  7 |   | Node of int * ('a tree) * ('a tree)
  8 | 
  9 |   type 'a t = 'a tree
 10 | 
 11 |   val size: 'a tree -> int
 12 |   val toString: ('a -> string) -> 'a tree -> string
 13 | 
 14 |   val makeUnbalanced: (int -> 'a) -> int -> int -> 'a tree
 15 |   val makeBalanced: (int -> 'a) -> int -> int -> 'a tree
 16 | 
 17 |   (** sequential versions *)
 18 | 
 19 |   val heightSeq: 'a tree -> int
 20 |   (* val mapSeq: ('a -> 'b) -> 'a tree -> 'b tree *)
 21 |   (* val filterSeq: ('a -> bool) -> 'a tree -> 'a tree *)
 22 |   val reduceSeq: ('a * 'a -> 'a) -> 'a -> 'a tree -> 'a
 23 |   val scanSeq: ('a * 'a -> 'a) -> 'a -> 'a tree -> 'a tree * 'a
 24 | 
 25 |   (** parallel versions *)
 26 | 
 27 |   val height: 'a tree -> int
 28 |   (* val map: ('a -> 'b) -> 'a tree -> 'b tree *)
 29 |   (* val filter: ('a -> bool) -> 'a tree -> 'a tree *)
 30 |   val reduce: ('a * 'a -> 'a) -> 'a -> 'a tree -> 'a
 31 |   val scan: ('a * 'a -> 'a) -> 'a -> 'a tree -> 'a tree * 'a
 32 | 
 33 | end =
 34 | struct
 35 | 
 36 |   datatype 'a tree =
 37 |     Empty
 38 |   | Leaf of 'a
 39 |   | Node of int * ('a tree) * ('a tree)
 40 | 
 41 |   type 'a t = 'a tree
 42 | 
 43 |   val GRAIN = 5000
 44 | 
 45 | 
 46 |   fun size t =
 47 |     case t of
 48 |       Empty => 0
 49 |     | Leaf _ => 1
 50 |     | Node (n, _, _) => n
 51 | 
 52 | 
 53 |   fun heightSeq t =
 54 |     case t of
 55 |       Empty => 0
 56 |     | Leaf _ => 1
 57 |     | Node (_, l, r) => 1 + Int.max (heightSeq l, heightSeq r)
 58 | 
 59 | 
 60 |   fun height t =
 61 |     if size t < GRAIN then
 62 |       heightSeq t
 63 |     else
 64 |       case t of
 65 |         Empty => 0
 66 |       | Leaf _ => 1
 67 |       | Node (n, l, r) =>
 68 |           1 + Int.max (ForkJoin.par (fn _ => height l, fn _ => height r))
 69 | 
 70 | 
 71 |   fun reduceSeq f id t =
 72 |     case t of
 73 |       Empty => id
 74 |     | Leaf x => x
 75 |     | Node (_, left, right) => f (reduceSeq f id left, reduceSeq f id right)
 76 | 
 77 | 
 78 |   fun reduce f id t =
 79 |     if size t < GRAIN then
 80 |       reduceSeq f id t
 81 |     else
 82 |       case t of
 83 |         Empty => id
 84 |       | Leaf x => x
 85 |       | Node (_, left, right) =>
 86 |           let
 87 |             val (resultLeft, resultRight) =
 88 |               ForkJoin.par (fn () => reduce f id left,
 89 |                             fn () => reduce f id right)
 90 |           in
 91 |             f (resultLeft, resultRight)
 92 |           end
 93 | 
 94 | 
 95 |   fun makeUnbalanced f i n =
 96 |     case n of
 97 |       0 => Empty
 98 |     | 1 => Leaf (f i)
 99 |     | _ =>
100 |         let
101 |           val l = Leaf (f i)
102 |           val r = makeUnbalanced f (i+1) (n-1)
103 |         in
104 |           Node (n, l, r)
105 |         end
106 | 
107 | 
108 |   fun makeBalanced f i n =
109 |     case n of
110 |       0 => Empty
111 |     | 1 => Leaf (f i)
112 |     | _ =>
113 |         let
114 |           val half = n div 2
115 |           val l = makeBalanced f i half
116 |           val r = makeBalanced f (i + half) (n - half)
117 |         in
118 |           Node (n, l, r)
119 |         end
120 | 
121 | 
122 |   fun tabulate f n =
123 |     let
124 |       (** recursive helper computes the subtree with leaf elements
125 |         * f(offset), f(offset+1), ..., f(offset+size-1)
126 |         *)
127 |       fun subtree offset size =
128 |         case size of
129 |           0 => Empty
130 |         | 1 => Leaf (f offset)
131 |         | _ =>
132 |             let
133 |               (** divide approximately in half, such that
134 |                 * size = leftSize + rightSize *)
135 |               val leftSize = size div 2
136 |               val rightSize = size - leftSize
137 | 
138 |               fun left () = subtree offset leftSize
139 |               fun right () = subtree (offset+leftSize) rightSize
140 | 
141 |               val (l, r) =
142 |                 (* granularity control *)
143 |                 if size < GRAIN then
144 |                   (left (), right ())
145 |                 else
146 |                   ForkJoin.par (left, right)
147 |             in
148 |               Node (size, l, r)
149 |             end
150 |     in
151 |       subtree 0 n
152 |     end
153 | 
154 | 
155 |   fun toString f t =
156 |     let
157 |       fun loop t =
158 |         case t of
159 |           Empty => ""
160 |         | Leaf x => f x
161 |         | Node (_, l, r) => loop l ^ "," ^ loop r
162 |     in
163 |       "[" ^ loop t ^ "]"
164 |     end
165 | 
166 |   (** =========================================================================
167 |     * scan
168 |     *)
169 | 
170 |   fun scanLoop f acc t =
171 |     case t of
172 |       Empty => (Empty, acc)
173 |     | Leaf x => (Leaf acc, f (acc, x))
174 |     | Node (n, left, right) =>
175 |         let
176 |           val (leftPrefixSums, accLeft) = scanLoop f acc left
177 |           val (rightPrefixSums, accRight) = scanLoop f accLeft right
178 |           val allSums = Node (n, leftPrefixSums, rightPrefixSums)
179 |         in
180 |           (allSums, accRight)
181 |         end
182 | 
183 | 
184 |   fun scanSeq f id t =
185 |     scanLoop f id t
186 | 
187 | 
188 |   fun scan (f: 'a * 'a -> 'a) (id: 'a) (t: 'a tree) =
189 |     let
190 |       (** "sum tree" is produced by the first phase of the algorithm,
191 |         * the "upsweep", which is essentially the same as a reduce except that
192 |         * it produces a tree which stores all intermediate results.
193 |         *)
194 |       datatype 'a sum_tree =
195 |         GrainSum of 'a
196 |       | NodeSum of 'a * 'a sum_tree * 'a sum_tree
197 | 
198 |       fun sumOf (st: 'a sum_tree) : 'a =
199 |         case st of
200 |           GrainSum x => x
201 |         | NodeSum (x, _, _) => x
202 | 
203 |       fun upsweep (t: 'a tree): 'a sum_tree =
204 |         if size t <= GRAIN then
205 |           GrainSum (reduceSeq f id t)
206 |         else
207 |           case t of
208 |             Node (_, left, right) =>
209 |               let
210 |                 val (leftSums, rightSums) =
211 |                   ForkJoin.par (fn _ => upsweep left, fn _ => upsweep right)
212 |                 val thisSum = f (sumOf leftSums, sumOf rightSums)
213 |               in
214 |                 NodeSum (thisSum, leftSums, rightSums)
215 |               end
216 | 
217 |           | _ => raise Fail "Tree.scan.upsweep: impossible"
218 | 
219 | 
220 |       fun downsweep acc (t: 'a tree) (st: 'a sum_tree): ('a tree * 'a) =
221 |         if size t <= GRAIN then
222 |           scanLoop f acc t
223 |         else
224 |           case (t, st) of
225 |             (Node (n, left, right), NodeSum (_, stLeft, stRight)) =>
226 |               let
227 |                 val accLeft = f (acc, sumOf stLeft)
228 | 
229 |                 val ((l, _), (r, accTotal)) =
230 |                   ForkJoin.par (fn _ => downsweep acc left stLeft,
231 |                                 fn _ => downsweep accLeft right stRight)
232 |               in
233 |                 (Node (n, l, r), accTotal)
234 |               end
235 | 
236 |           | _ => raise Fail "Tree.scan.downsweep: impossible"
237 |     in
238 |       downsweep id t (upsweep t)
239 |     end
240 | 
241 | end
242 | 


--------------------------------------------------------------------------------
/sequences/README.md:
--------------------------------------------------------------------------------
  1 | [(← Trees)](../trees/README.md)
  2 | [(MCSS →)](../mcss/README.md)
  3 | 
  4 | # Sequences
  5 | 
  6 | A sequence 
  7 | `<a_0, a_2, ..., a_n>`
  8 | is an ordered collection of elements that support several operations, inclduing fast random access.
  9 | 
 10 | # Sequence Interface
 11 | 
 12 | Sequences support the following operations
 13 | 
 14 | * `length` returns the length of the sequence
 15 | 
 16 | * `nth` returns the element at the specified position (counting from 0)
 17 | 
 18 | * `empty` returns an empty sequence
 19 | 
 20 | * `singleton` takes an element and returns a sequence that contains that element (only)
 21 | 
 22 | * `tabulate` takes 1) a generator function that takes a position and generates the element at that position 2) a length and returns a sequence of the given length where the element at a given position is computed by appling the generator function (at that position)
 23 | 
 24 | * `rev` reverses the given sequence
 25 | 
 26 | * `append` takes two sequences and appends them
 27 | 
 28 | * `apply` takes a function and a sequence, and applies the function to each element in the input sequence. 
 29 | 
 30 | * `applyi` takes a function and a sequence, and applies the function to each position and the element at that position.  It differs from `apply` in that it passes the element position as an argument to the function. 
 31 | 
 32 | * `map` takes a function from elements to (possibly new type of) elements and creates a new sequence by appling the function to each element
 33 | 
 34 | * `subseq` takes a sequence and an interval and returns a subsequence that contains the elements in the given interval 
 35 | 
 36 | 
 37 | 
 38 | * `filter` takes a boolean function and a sequence and returns a new sequencue consisting of elements that satisfy the function
 39 | 
 40 | * `flatten` takes a sequence of sequences and flattens it into a single, flat sequencues by appending the nested sequencues,
 41 | 
 42 | * `update` takes an input sequence and a position value pair and returns a new sequence that is identical to the input sequence except at the given position, which contains the specified value.  The `update` function is pure is the sense that it does not modify the input sequence.
 43 | 
 44 | * `inject` takes an input sequence and a sequencue of updates consisting of position-value pairs and returns a new sequence that is idential to the input sequence except and specified updates.  For each updated position, the output sequence contains (an arbitrary) one of the updated values.   The `inject` function is pure is the sense that it does not modify the input sequence.
 45 | 
 46 | * `isEmpty` returns `true` if the input sequence is empty and `false` otherwise
 47 | 
 48 | * `isSingleton` returns `true` if the input sequence is a singleton and `false` otherwise
 49 | 
 50 | * `iterate` takes 1) an iterator function, 2) an initial value, 3) and a sequence and iteratively applies the iterator function to the elements of the sequence and previously computed value (or the initial value) and returns the final computed value
 51 | 
 52 | * `reduce` takes a 1) associative reducer function that maps to elements to another element, 2) the identity value of the reducer function, and 3) and a sequence and returs the reduced value for the sequence   
 53 | 
 54 | * `scan` takes a 1) associative reducer function that maps to elements to another element, 2) the identity value of the reducer function, and 3) and a sequence and returns 1) the reduced value for each prefix of the sequence (starting with the emtpy sequence, for which the value is identity), and 2) the reduced value for the whole sequence    
 55 | 
 56 | ## Implementation
 57 | 
 58 | We can implement the sequence interface described above in a number of
 59 | ways.  For example, we can use weight-balanced trees to represent the
 60 | elements in the sequence.  Such an implementation has the advantage of
 61 | allowing us to update an element of the sequence in logarithmic work
 62 | but most other operations such as simple accesses also require
 63 | logarithmic work.  To provide for constant-work access we can use
 64 | arrays.  This comes at the cost of increasing the cost of updates to
 65 | linear but this can be avoided in most cases, either by allowing for
 66 | destructive updates or by using persistence (versioning).  
 67 | 
 68 | ## Array Sequences
 69 | 
 70 | We implement sequences by using arrays as the backing data structure.
 71 | More specificially, we represent a sequence as an array slice, which
 72 | is an array with a beginning and ending position.
 73 | 
 74 | * Function `nth` simply accesses the element at the specified position
 75 |   in the underlying slice.
 76 | 
 77 | * Function `subseq` returns the specified subslice of the underlying slice. 
 78 | 
 79 | * Function `tabulate` allocates an array of size `n` and populates it by using a parallel for that ranges over all positions in the array.
 80 | 
 81 | * Function `rev` reverses the given sequence by applying a `tabulate` 
 82 | 
 83 | * Function `append` takes two sequences and appends them by using `tabulate`
 84 | 
 85 | * Funciton `map` tabulates a new sequence by using the provided map function on the input sequence.
 86 | 
 87 | * Functions `apply` and `applyi` aplies the given function for each
 88 |   position in the given array. The difference between `apply` and `applyi` is that `applyi` passes the position as argument to the update function.  Both are implemented as a simple parallel-for loop over the sequence.
 89 | 
 90 | * Function `update` takes an input sequence and a position value pair. It first creates a result sequence by copying the input sequence using a `map` and then updates the result sequence at the specified position with the given value.
 91 | 
 92 | * Function `inject` takes a sequence and an updat sequence consisting
 93 |   of index-value pairs, indicating the position and the value of the
 94 |   update respectively.  It then creates a result array by first copying the
 95 |   input array and then applying each update in parallel.
 96 | 
 97 | * Function `reduce` computes a reduction over the sequence (with the given reducer function) by dividing the input sequence into two halves, recursively reducing each half, and computing the result by applying the reducer to the result from the two halves.
 98 | 
 99 | * Function `scanGen` computes `reduce` for all prefixes of the input sequence, including the empty and the full prefix.
100 | 
101 | * Functions `scan` and `iscan` simply call `scanGen` and return the relevant subsequence of the full prefix reductions.
102 | 
103 | * Function `filter` first computes an indicator sequence consisting of
104 |   `0` and `1` entries that indicate whether the element at the
105 |   corresponding position is to be excluded from the output or not (`0`
106 |   means exclude, `1` means include).  It then computes the offset for each included element by performing a scan, which also returns the total. Using the total, it allocates a result array and uses `applyi` to populate the result array by copying the included elements to their respective positions.  
107 | 
108 | * Function `flatten` takes a sequence of sequences and flattens it as
109 |   a single "flat" sequence.  To this end, it first computes a "length
110 |   sequence" consisting of the length of each nested sequence by
111 |   mapping `length` over the sequence.  It then performs a scan over
112 |   the length sequence to compute the `offsets` for each nested
113 |   sequence in the result flat sequence.  Finally, it allocates the result array and copies each nested sequence by using a doubly nested loop over.
114 | 
115 | 
116 | 


--------------------------------------------------------------------------------
/trees/old/fast-par/Tree.sml:
--------------------------------------------------------------------------------
  1 | structure Tree = 
  2 | struct
  3 | 
  4 | (* Define "size of a tree" as the number of internal nodes *)
  5 | 
  6 | (* A binary ("apple") tree of type 'a elements ("apples")" 
  7 |    Each node holds the size of the subtree rooted at it
  8 |  *) 
  9 | 
 10 | datatype 'a t = Leaf of 'a | Node of int * 'a t * 'a t
 11 | 
 12 | val GRAIN = 5000
 13 |   
 14 | fun mkBalancedSeq i n = 
 15 |   let
 16 |     fun mk i n = 
 17 |       if n = 0 then
 18 |         Leaf i 
 19 |       else    
 20 |         let 
 21 |           val nn = n - 1 
 22 |           val m = Int.div (nn, 2) 
 23 |           val (left, right) = (mk i m,  
 24 |                                mk (i + m + 1) (nn - m)) 
 25 |         in
 26 |           Node (n, left, right) 
 27 |         end
 28 |   in
 29 |     mk i n
 30 |   end
 31 | 
 32 | 
 33 | fun heightSeq t = 
 34 |   case t of 
 35 |     Leaf _ => 0
 36 |   | Node (n, l, r) =>
 37 |     let 
 38 |       val (hl, hr) = (heightSeq l, 
 39 |                       heightSeq r) 
 40 |     in
 41 |       if hl > hr then 1 + hl else 1 + hr
 42 |     end 
 43 | 
 44 | fun mapSeq f t =
 45 |   case t of 
 46 |     Leaf x => Leaf (f x)
 47 |   | Node (n, l, r) =>
 48 |       let 
 49 |         val (ll, rr) = (mapSeq f l, 
 50 |                         mapSeq f r) 
 51 |       in
 52 |         Node (n, ll, rr)
 53 |       end 
 54 | 
 55 | (* Reduce tree t with f identity id *)
 56 | fun reduceSeq f id t =
 57 |   case t of 
 58 |     Leaf x => f (id, x)
 59 |   | Node (_, l, r) =>
 60 |     let val (ls, rs) = (reduceSeq f id l, reduceSeq f id r) in
 61 |       f (ls, rs)
 62 |     end 
 63 | 
 64 | fun filterSeq f t = 
 65 |   case t of 
 66 |     Leaf x => 
 67 |       if f x then
 68 |          SOME (Leaf x)
 69 |       else
 70 |          NONE
 71 |   | Node (n, l, r) =>
 72 |       let
 73 |         val (l, r) = (filterSeq f l, filterSeq f r)
 74 |       in
 75 |         case l of 
 76 |           NONE => r
 77 |         | SOME (ll as Leaf x) =>
 78 |             (case r of 
 79 |                NONE => l
 80 |              | SOME (rr as Leaf y) => SOME (Node (1, ll, rr))
 81 |              | SOME (rr as Node(nrr, lrr, rrr)) => SOME (Node (nrr+1, ll, rr)))
 82 |         | SOME (ll as Node (nll, lll, rll)) =>
 83 |             (case r of
 84 |                NONE => l
 85 |              | SOME (rr as Leaf y) => SOME (Node (nll, ll, rr))
 86 |              | SOME (rr as Node(nrr, lrr, rrr)) => SOME (Node (nrr+nll+1, ll, rr)))
 87 |       end
 88 | 
 89 | (* Create a balanced integer tree of the given size n *)
 90 | fun mkBalanced n = 
 91 |   let
 92 |     fun mk i n = 
 93 |       if n <= GRAIN then
 94 |         mkBalancedSeq i n
 95 |       else    
 96 |         let 
 97 |           val nn = n - 1 
 98 |           val m = Int.div (nn, 2) 
 99 |           val (left, right) = ForkJoin.par (fn () => mk i m,  
100 |                                             fn () => mk (i + m + 1) (nn - m)) 
101 |         in
102 |           Node (n, left, right) 
103 |         end
104 |   in
105 |     mk 0 n
106 |   end
107 | 
108 | (* Create an un balanced tree of the given size n *)
109 | fun mkUnbalanced n = 
110 |   let
111 |     fun mk i n = 
112 |       if n = 0 then
113 |         Leaf i
114 |       else    
115 |         let 
116 |           val left = mk i (n - 1) 
117 |         in
118 |           Node (n, left, Leaf n) 
119 |         end
120 |   in
121 |     mk 0 n
122 |   end
123 | 
124 | (* Take eToString which makes a string out of an element return a string rep of the tree. *)
125 | fun toString eToString t = 
126 |   case t of 
127 |     Leaf x => eToString x
128 |   | Node (_, l, r) => 
129 |     let val (ls, rs) = 
130 |           ForkJoin.par (fn () => toString eToString l, 
131 |                         fn () => toString eToString r) 
132 |     in
133 |       ls ^ " " ^ rs
134 |     end
135 |   
136 | 
137 | fun height t =
138 |   case t of 
139 |     Leaf _ => 0
140 |   | Node (n, l, r) =>
141 |     if n < GRAIN then
142 |       heightSeq t
143 |     else
144 |       let 
145 |         val (hl, hr) = ForkJoin.par (fn () => height l, 
146 |                                      fn () => height r) 
147 |       in
148 |         if hl > hr then 1 + hl else 1 + hr
149 |       end 
150 | 
151 | (* Map f over tree t *)
152 | fun map f t =
153 |   case t of 
154 |     Leaf x => Leaf (f x)
155 |   | Node (n, l, r) =>
156 |     if n < GRAIN then
157 |       mapSeq f t
158 |     else
159 |       let 
160 |         val (ll, rr) = ForkJoin.par (fn () => map f l, fn () => map f r) 
161 |       in
162 |         Node (n, ll, rr)
163 |       end 
164 | 
165 | (* Reduce tree t with f identity id *)
166 | fun reduce f id t =
167 |   case t of 
168 |     Leaf x => f (id, x)
169 |   | Node (n, l, r) =>
170 |     if n <= GRAIN then
171 |       reduceSeq f id t
172 |     else
173 |       let 
174 |         val (ls, rs) = ForkJoin.par (fn () => reduceSeq f id l, 
175 |                                      fn () => reduceSeq f id r) 
176 |       in
177 |         f (ls, rs)
178 |       end 
179 | 
180 | fun filter f t = 
181 |   case t of 
182 |     Leaf x => 
183 |       if f x then
184 |          SOME (Leaf x)
185 |       else
186 |          NONE
187 |   | Node (n, l, r) =>
188 |       if n <= GRAIN then
189 |         filterSeq f t
190 |       else 
191 |         let
192 |           val (l, r) = ForkJoin.par (fn () => filter f l, 
193 |                                      fn () => filter f r)
194 |         in
195 |           case l of 
196 |             NONE => r
197 |           | SOME (ll as Leaf x) =>
198 |               (case r of 
199 |                  NONE => l
200 |                | SOME (rr as Leaf y) => SOME (Node (1, ll, rr))
201 |                | SOME (rr as Node(nrr, lrr, rrr)) => SOME (Node (nrr+1, ll, rr)))
202 |           | SOME (ll as Node (nll, lll, rll)) =>
203 |               (case r of
204 |                  NONE => l
205 |                | SOME (rr as Leaf y) => SOME (Node (nll, ll, rr))
206 |                | SOME (rr as Node(nrr, lrr, rrr)) => SOME (Node (nrr+nll+1, ll, rr)))
207 |         end
208 | 
209 | datatype 'a stree = SLeaf of 'a | SNode of ('a * 'a stree * 'a stree)
210 |  
211 | fun scan id f tree = 
212 |   let 
213 |     fun upSeq tree = 
214 |       case tree of 
215 |         Leaf x => (x, SLeaf x)
216 |       | Node (n, l, r) =>
217 |           let val ((sl, slt), (sr, srt)) = (upSeq l, upSeq r)
218 |           in (f (sl, sr), SNode (sl, slt, srt)) end
219 | 
220 |     fun downSeq sum tree ut = 
221 |       case tree of 
222 |         Leaf x => Leaf sum
223 |       | Node (n, l, r) =>    
224 |           let val SNode (s, ul, ur) = ut 
225 |               val (ll, rr) = (downSeq sum l ul, downSeq (f(sum, s)) r ur)    
226 |           in Node (n, ll, rr) end
227 | 
228 |     fun up tree = 
229 |       case tree of 
230 |         Leaf x => (x, SLeaf x)
231 |       | Node (n, l, r) =>
232 |           if n <= GRAIN then
233 |             upSeq tree
234 |           else
235 |             let val ((sl, slt), (sr, srt)) = ForkJoin.par (fn () => up l,
236 |                                                            fn () => up r)
237 |             in (f (sl, sr), SNode (sl, slt, srt)) end
238 | 
239 |     fun down sum tree ut = 
240 |       case tree of 
241 |         Leaf x => Leaf sum
242 |       | Node (n, l, r) =>    
243 |           if n<= GRAIN then
244 |              downSeq sum tree ut
245 |           else 
246 |             let val SNode (s, ul, ur) = ut 
247 |                 val (ll, rr) = ForkJoin.par (fn () => down sum l ul,
248 |                                              fn () => down (f(sum, s)) r ur)
249 |             in Node (n, ll, rr) end
250 |             
251 |     val (sum, stree) = up tree
252 |   in
253 |     (down id tree stree, sum)
254 |   end    
255 | 
256 | 
257 | (* Inclusive scan *)
258 | fun iscan id f tree = 
259 |   let 
260 |     fun upSeq tree = 
261 |       case tree of 
262 |         Leaf x => (x, SLeaf x)
263 |       | Node (n, l, r) =>
264 |           let val ((sl, slt), (sr, srt)) = (upSeq l, upSeq r)
265 |           in (f (sl, sr), SNode (sl, slt, srt)) end
266 | 
267 |     fun downSeq sum tree ut = 
268 |       case tree of 
269 |         Leaf x => Leaf (f(sum, x))
270 |       | Node (n, l, r) =>    
271 |           let val SNode (s, ul, ur) = ut 
272 |               val (ll, rr) = (downSeq sum l ul, downSeq (f(sum, s)) r ur)    
273 |           in Node (n, ll, rr) end
274 | 
275 |     fun up tree = 
276 |       case tree of 
277 |         Leaf x => (x, SLeaf x)
278 |       | Node (n, l, r) =>
279 |           if n <= GRAIN then
280 |             upSeq tree
281 |           else
282 |             let val ((sl, slt), (sr, srt)) = ForkJoin.par (fn () => up l,
283 |                                                            fn () => up r)
284 |             in (f (sl, sr), SNode (sl, slt, srt)) end
285 | 
286 |     fun down sum tree ut = 
287 |       case tree of 
288 |         Leaf x => Leaf (f(sum, x))
289 |       | Node (n, l, r) =>    
290 |           if n<= GRAIN then
291 |              downSeq sum tree ut
292 |           else 
293 |             let val SNode (s, ul, ur) = ut 
294 |                 val (ll, rr) = ForkJoin.par (fn () => down sum l ul,
295 |                                              fn () => down (f(sum, s)) r ur)
296 |             in Node (n, ll, rr) end
297 |             
298 |     val (sum, stree) = up tree
299 |   in
300 |     down id tree stree
301 |   end    
302 | 
303 | end
304 | 
305 | 


--------------------------------------------------------------------------------
/how-to-par/README.md:
--------------------------------------------------------------------------------
  1 | # 3. Parallelism and Granularity Control
  2 | 
  3 | [(← Hello World)](../README.md)
  4 | [(Trees →)](../trees/README.md)
  5 | 
  6 | ## Preliminaries
  7 | 
  8 | Make sure that you've already done the [setup](../setup/README.md). If
  9 | you're using Docker to run the tutorial, all commands below should be
 10 | run within the container in directory `~/mpl-tutorial/how-to-par/`:
 11 | 
 12 | ```
 13 | $ cd path/to/mpl-tutorial
 14 | $ ./start-container.sh
 15 | <container># cd how-to-par
 16 | <container># <enter commands here>
 17 | ```
 18 | 
 19 | ## Running Example: Parallel Fibonacci
 20 | 
 21 | For a running example, we'll use the "hello world" of parallelism:
 22 | [Fibonacci numbers](https://en.wikipedia.org/wiki/Fibonacci_number),
 23 | calculated using the naive recursive definition
 24 | `fib(n) = fib(n-1) + fib(n-2)`. The following code defines this function,
 25 | taking a number `n` as input and returning the n<sup>th</sup> Fibonacci number.
 26 | The base cases are `n = 0` and `n = 1`.
 27 | 
 28 | [`mpl-tutorial/how-to-par/sequential/fib.sml`](./sequential/fib.sml):
 29 | ```sml
 30 | fun fib n =
 31 |   if n = 0 then
 32 |     0
 33 |   else if n = 1 then
 34 |     1
 35 |   else
 36 |     fib (n-1) + fib (n-2)
 37 | ```
 38 | 
 39 | <details>
 40 | <summary><strong>Question</strong>: I'm new to SML. How do I read this code?</summary>
 41 | <blockquote>
 42 | In the code above, the first line begins defining a function
 43 | named <code>fib</code> that takes an argument <code>n</code>. We then write
 44 | the body of the function, which in this case is a conditional expression.
 45 | <br><br>
 46 | Conditional expressions are written
 47 | <code>if B then X else Y</code>, where <code>B</code> is a boolean expression
 48 | and <code>X</code> and <code>Y</code> are expressions of the same type.
 49 | Note that we compare equality with a single "=", i.e.
 50 | <code>n = 0</code> is a boolean expression.
 51 | <br><br>
 52 | If you are coming from a language such as C, Java, Python, JavaScript, etc.,
 53 | then SML is going to feel a bit different. It's a functional language, so
 54 | functions are defined by expressions instead of sequences of statements.
 55 | </blockquote>
 56 | </details>
 57 | 
 58 | ## Parallelizing it
 59 | 
 60 | We can parallelize this code by doing the two recursive calls
 61 | in parallel. MPL provides a function for this: `ForkJoin.par`, which takes two
 62 | functions as argument and executes them in parallel.
 63 | 
 64 | Below is a first attempt, which is "correct" but has a performance issue that
 65 | we will discuss below. Notice that we make two recursive
 66 | calls, just like before, but now these are packaged up as anonymous functions
 67 | and passed as argument to `par`.
 68 | 
 69 | [`mpl-tutorial/how-to-par/bad-par/bad-par-fib.sml`](./bad-par/bad-par-fib.sml):
 70 | ```sml
 71 | fun badParFib n =
 72 |   if n = 0 then
 73 |     0
 74 |   else if n = 1 then
 75 |     1
 76 |   else
 77 |     let
 78 |       val (a, b) =
 79 |         ForkJoin.par (fn () => badParFib (n-1),
 80 |                       fn () => badParFib (n-2))
 81 |     in
 82 |       a + b
 83 |     end
 84 | ```
 85 | 
 86 | <details>
 87 | <summary><strong>Question</strong>: I'm new to SML. How do I read this code?</summary>
 88 | <blockquote>
 89 | There are three things in this code we haven't seen before:
 90 | <ol>
 91 |   <li>
 92 |     <code>val (a, b) = ...</code> introduces two variables by unpacking a
 93 |     tuple. The right hand side needs to be an expression that returns a
 94 |     tuple of two things.
 95 |   </li>
 96 | 
 97 |   <li>
 98 |     <code>let ... in ... end</code> lets us introduce new
 99 |     variables locally. In the above code, the variables <code>a</code>
100 |     and <code>b</code> can be used only until we get to the
101 |     corresponding <code>end</code>.
102 |   </li>
103 | 
104 |   <li>
105 |     <code>fn () => ...</code> is an anonymous (a.k.a. "lambda") function
106 |     that takes no interesting arguments. A more general form is
107 |     <code>fn x => A</code> where <code>A</code> is an expression that uses
108 |     variable <code>x</code>.
109 |   </li>
110 | </ol>
111 | </blockquote>
112 | </details>
113 | 
114 | 
115 | **Code to run `badParFib`**. Let's run `badParFib` on input
116 | 35 and then prints out the result. In the code below, the function
117 | `Int.toString` converts the resulting number into a string, and the operator
118 | `^` concatenates strings.
119 | 
120 | [`mpl-tutorial/how-to-par/bad-par/main.sml`](./bad-par/main.sml):
121 | ```sml
122 | val n = 35
123 | val _ = print ("Computing fib(" ^ Int.toString n ^ ")\n")
124 | val result = badParFib n
125 | val _ = print ("fib(" ^ Int.toString n ^ ") = " ^ Int.toString result ^ "\n")
126 | ```
127 | 
128 | **Compile and run it**. Here is an appropriate `.mlb` file for compilation.
129 | The line `$(SML_LIB)/basis/fork-join.mlb` makes it possible to use
130 | `ForkJoin.par`.
131 | 
132 | [`mpl-tutorial/how-to-par/bad-par/main.mlb`](./bad-par/main.mlb):
133 | ```sml
134 | $(SML_LIB)/basis/basis.mlb
135 | $(SML_LIB)/basis/fork-join.mlb
136 | bad-par-fib.sml
137 | main.sml
138 | ```
139 | 
140 | We can now compile and run the code. To use more than one processor,
141 | the syntax is `./program @mpl procs N --`.
142 | 
143 | ```
144 | <container># mpl bad-par/main.mlb
145 | 
146 | <container># time bad-par/main
147 | Computing fib(35)
148 | fib(35) = 9227465
149 | 
150 | real	0m2.432s
151 | user	0m1.843s
152 | sys	0m0.586s
153 | 
154 | <container># time bad-par/main @mpl procs 2 --
155 | Computing fib(35)
156 | fib(35) = 9227465
157 | 
158 | real	0m1.337s     # about 2x faster on 2 processors!
159 | user	0m1.902s
160 | sys	0m0.579s
161 | ```
162 | 
163 | And check it out: above we can see that this code gets about 2x faster when we
164 | use 2 processors instead of 1.
165 | 
166 | It's parallel! But is it fast?
167 | 
168 | ## Observed Work-Efficiency and Granularity Control
169 | 
170 | Ideally, a parallel program should be **work-efficient**: the total amount of
171 | work it performs should be approximately the same as the fastest known
172 | sequential alternative.
173 | 
174 | We've named `badParFib` suggestively because it has a performance problem.
175 | On one processor, `badParFib` is approximately 10x slower than the simple
176 | sequential `fib` program.
177 | 
178 | ```
179 | <container># mpl sequential/main.mlb
180 | <container># time sequential/main
181 | Computing fib(35)
182 | fib(35) = 9227465
183 | 
184 | real	0m0.216s
185 | user	0m0.213s
186 | sys	0m0.001s
187 | 
188 | <container># mpl bad-par/main.mlb
189 | <container># time bad-par/main
190 | Computing fib(35)
191 | fib(35) = 9227465
192 | 
193 | real	0m2.432s     # 10x slower than the sequential code!
194 | user	0m1.843s
195 | sys	0m0.586s
196 | ```
197 | 
198 | The only difference between the two programs is `ForkJoin.par`. This function
199 | call isn't free! The cost of `ForkJoin.par` can be fairly significant, and
200 | we need to amortize this overhead.
201 | 
202 | The simplest way to amortize the cost of `ForkJoin.par` is to ensure that the
203 | parallel tasks we create are not too small. This approach is called
204 | **granularity control**, because we are controlling the so-called
205 | [*granularity*][gran] of tasks (where the "granularity" of a task is just the
206 | amount of work the task performs).
207 | 
208 | ## Making It Fast with Granularity Control
209 | 
210 | A simple way of performing granularity control for the parallel Fibonacci
211 | function is to switch to a fast sequential algorithm below some
212 | threshold. Here, we hardcode the threshold at `n = 20`: for any `n < 20`, we'll
213 | use the fast sequential `fib(n)` instead of the parallel version.
214 | 
215 | [`mpl-tutorial/how-to-par/fast-par/fast-par-fib.sml`](./fast-par/fast-par-fib.sml):
216 | ```sml
217 | fun fastParFib n =
218 |   if n < 20 then
219 |     fib n    (* do the sequential code instead *)
220 |   else
221 |     let
222 |       val (a, b) =
223 |         ForkJoin.par (fn () => fastParFib (n-1),
224 |                       fn () => fastParFib (n-2))
225 |     in
226 |       a + b
227 |     end
228 | ```
229 | 
230 | This is now just as fast as the sequential code on one processor, but is
231 | still parallel. We get the best of both worlds.
232 | 
233 | ```
234 | <container># mpl fast-par/main.mlb
235 | <container># time fast-par/main
236 | Computing fib(35)
237 | fib(35) = 9227465
238 | 
239 | real	0m0.211s      # almost exactly the same as sequential fib!
240 | user	0m0.209s
241 | sys	0m0.001s
242 | 
243 | <container># time fast-par/main @mpl procs 2 --
244 | Computing fib(35)
245 | fib(35) = 9227465
246 | 
247 | real	0m0.110s      # still gets 2x faster on 2 processors!
248 | user	0m0.215s
249 | sys	0m0.001s
250 | ```
251 | 
252 | [gran]: https://en.wikipedia.org/wiki/Granularity_(parallel_computing)
253 | 
254 | ## Tuning Granularity
255 | 
256 | Above, we chose a constant threshold `n = 20`. How did we arrive at this
257 | number? What if we used `n = 21` instead? Does that make a difference?
258 | 
259 | Well, there's no magic here. We just have to try it and measure it. Time for
260 | an experiment!
261 | 
262 | Below, we generalize our parallel Fibonacci function to take an additional
263 | argument, `g`, which is the grain size, and switch to the sequential
264 | algorithm when `n < g`. We then run this code on a variety of grain sizes,
265 | and report their times. To loop through multiple grain sizes, we define
266 | a useful helper function, `forloop`, which takes a function as argument
267 | and runs it on a range of indices.
268 | 
269 | [`mpl-tutorial/how-to-par/tuning/main.sml`](./tuning/main.sml):
270 | ```sml
271 | fun parFibWithGrain (g, n) =
272 |   if n < g then
273 |     fib n
274 |   else
275 |     let
276 |       val (a, b) =
277 |         ForkJoin.par (fn () => parFibWithGrain (g, n-1),
278 |                       fn () => parFibWithGrain (g, n-2))
279 |     in
280 |       a + b
281 |     end
282 | 
283 | fun timeFibWithGrain g =
284 |   let
285 |     val n = 35
286 | 
287 |     val t0 = Time.now ()
288 |     val result = parFibWithGrain (g, n)
289 |     val t1 = Time.now ()
290 | 
291 |     val elapsed = Time.- (t1, t0)
292 |   in
293 |     print ("grain " ^ Int.toString g ^ ": " ^ Time.toString elapsed ^ "\n")
294 |   end
295 | 
296 | (* run f(i), f(i+1), ..., f(j-1) *)
297 | fun forloop (i, j, f) =
298 |   if i >= j then () else (f i; forloop (i+1, j, f))
299 | 
300 | (** this is the same as
301 |   *   (timeFibWithGrain 5;
302 |   *    timeFibWithGrain 10;
303 |   *    timeFibWithGrain 15;
304 |   *    timeFibWithGrain 20;
305 |   *    timeFibWithGrain 25;
306 |   *    timeFibWithGrain 30)
307 |   *)
308 | val _ = forloop (1, 7, fn i => timeFibWithGrain (5*i))
309 | ```
310 | 
311 | When we run it, we see that the running time improves significantly as the
312 | grain size increases, up to around a grain size of 15-20 ish. The difference
313 | from 20 to 30 is small. Choosing `n = 20` as the threshold
314 | seems good enough.
315 | 
316 | ```
317 | <container># mpl tuning/main.mlb
318 | <container># tuning/main
319 | grain 5: 0.868
320 | grain 10: 0.271
321 | grain 15: 0.219
322 | grain 20: 0.218
323 | grain 25: 0.213
324 | grain 30: 0.214
325 | ```
326 | 
327 | Keep in mind there is statistical noise to take into account here. A proper
328 | experiment would perform many trials and compare averages. We're being a bit
329 | sloppy, just for the sake of keeping things simple.
330 | 


--------------------------------------------------------------------------------
/sequences/fast-par/ArraySequence.sml:
--------------------------------------------------------------------------------
  1 | structure ArraySequence = 
  2 | struct
  3 | 
  4 | structure A = Array
  5 | structure AS = ArraySlice
  6 | 
  7 | type 'a t = 'a ArraySlice.slice
  8 | 
  9 | val GRAIN = 100000
 10 | 
 11 | val parfor = ForkJoin.parfor GRAIN
 12 | val alloc = ForkJoin.alloc
 13 | fun new n = AS.full (alloc n)
 14 | 
 15 | 
 16 | 
 17 | fun nth s i = AS.sub (s, i)
 18 | 
 19 | fun length s = AS.length s
 20 | 
 21 | 
 22 | fun fromList xs = ArraySlice.full (Array.fromList xs)
 23 | 
 24 | fun toList s = List.tabulate (length s, nth s)
 25 | 
 26 | fun toString f s =
 27 |     "<" ^ String.concatWith "," (List.map f (toList s)) ^ ">"
 28 | 
 29 | fun empty () = fromList []
 30 | 
 31 | (* Return subseq of s[i...i+n-1] *)
 32 | fun subseq s (i, n) = 
 33 |   AS.subslice (s, i, SOME n)
 34 | 
 35 | (* Return subseq of s[i...i+sz] if szopt = SOME sz and until the end of s otherwise *)
 36 | fun subslice s (i, szopt) = 
 37 |   AS.subslice (s, i, szopt)
 38 | 
 39 | 
 40 | fun take s k = subseq s (0, k)
 41 | 
 42 | fun drop s k = subseq s (k, length s - k)
 43 | 
 44 | fun foldl f b s =
 45 |   let 
 46 |     val n = length s
 47 |     fun fold current i = 
 48 |       if i = n then
 49 |         current
 50 |       else
 51 |         fold (f(current, nth s i)) (i+1)
 52 |    in
 53 |      fold b 0
 54 |    end
 55 | 
 56 | fun iterate f b s =
 57 |   foldl f b s
 58 | 
 59 | fun tabulate f n = 
 60 |   let 
 61 |     val s = ForkJoin.alloc n
 62 |     val g = fn i => Array.update (s, i, f i)
 63 |     val () = parfor (0, n) g
 64 |   in 
 65 |     AS.full s
 66 |   end  
 67 | 
 68 | fun rev s = tabulate (fn i => nth s (length s - i - 1)) (length s)
 69 | 
 70 | fun append (s, t) =
 71 |   tabulate (fn i => if i < length s then nth s i else nth t (i - length s))
 72 |       (length s + length t)
 73 | 
 74 | fun map f s = 
 75 |   tabulate (fn i => f (nth s i)) (length s)
 76 | 
 77 | fun apply (f: 'a -> unit) (s: 'a t): unit = 
 78 |   parfor (0, length s) (fn i => f (nth s i)) 
 79 | 
 80 | fun applyi (f: int * 'a -> unit) (s: 'a t): unit = 
 81 |   parfor (0, length s) (fn i => f (i, nth s i)) 
 82 | 
 83 | fun update s (i, v) =
 84 |   let
 85 |     val result = map (fn x => x) s
 86 |     val _ = AS.update (result, i, v)
 87 |   in
 88 |     result
 89 |   end
 90 | 
 91 | fun inject s updates =
 92 |   let
 93 |     val result = map (fn x => x) s
 94 |     fun injectOne (i, v) = AS.update (result, i, v)
 95 |     val () = apply injectOne updates
 96 |   in
 97 |     result
 98 |   end
 99 | 
100 | fun reduce f id s = 
101 |   let
102 |     val n = length s
103 | 
104 |     fun seqreduce s =
105 |       AS.foldl (fn (current, acc) => f (acc, current)) id s
106 | 
107 |   in
108 |     if n <= GRAIN then
109 |       seqreduce s
110 |     else
111 |       let 
112 |         val m = Int.div (n, 2)
113 |         val (left, right) = (subseq s (0, m), subseq s (m, n-m))
114 |         val (sl, sr) = ForkJoin.par (fn () => reduce f id left, 
115 |                                      fn () => reduce f id right)
116 |       in 
117 |         f (sl, sr)
118 |       end
119 |   end
120 | 
121 | 
122 | (* Compute `reduce` for all prefixes of the input sequence,
123 |  * including the empty and the full prefix. 
124 |  *)
125 | fun scanGen f id s = 
126 |   let
127 |     val n = length s
128 | (*    val _ = print ("scan: len(s) = " ^ Int.toString n ^ "\n") *)
129 | 
130 |     fun seqscan s t i = 
131 |       let 
132 |         val prev =         
133 |           if i = 0 then 
134 |             id
135 |           else
136 |             seqscan s t (i-1)
137 | 
138 |         val () = AS.update (t, i, prev)
139 |       in
140 |         f (prev, nth s i)
141 |       end
142 |   in
143 |     if n <= GRAIN then
144 |       let
145 |         val t = AS.full (alloc (n+1))
146 |         val r = seqscan s t (n-1)
147 |         val _ = AS.update (t, n, r)
148 |       in
149 |         t
150 |       end                        
151 |     else
152 |       let 
153 |         val m = Int.div (n, 2)
154 |         val N = m * 2
155 |         val t = tabulate (fn i => f (nth s (2*i), nth s (2*i+1))) m
156 |         val t = scanGen f id t
157 | 
158 |         fun expand i =
159 |           if Int.mod (i, 2) = 0 then
160 |             nth t (Int.div(i,2))
161 |           else 
162 |             f (nth t (Int.div(i,2)), nth s (i-1))      
163 |       in
164 |         tabulate expand (n+1)
165 |       end
166 |   end
167 | 
168 | (* Scan exclusive *)
169 | fun scan f id s = 
170 |   let 
171 |     val t = scanGen f id s
172 |     val n = length s 
173 |   in
174 |     (subseq t (0,n), nth t n)
175 |   end 
176 | 
177 | (* Scan inclusive *)
178 | fun iscan f id s = 
179 |   let 
180 |     val t = scanGen f id s
181 |     val n = length s 
182 |   in
183 |     subseq t (1,n)
184 |   end 
185 | 
186 | fun filter f s = 
187 |   let
188 |     val n = length s
189 | 
190 |     fun seqfilter s =
191 |       let 
192 |         val taken = AS.foldr (fn (current, acc) => 
193 |                               if f current then current::acc else acc) 
194 |                              [] 
195 |                              s 
196 |        in 
197 |          AS.full (A.fromList taken)
198 |        end
199 |   in
200 |     if n <= GRAIN then
201 |       seqfilter s
202 |     else
203 |       let 
204 |         val indicators = map (fn x => if f x then 1 else 0) s
205 |         val (offsets, m) = scan (fn (x,y) => x + y) 0 indicators
206 |         val t = alloc m 
207 |         fun copy t (x, i) = 
208 |           if nth indicators i = 1 then
209 |             A.update (t, nth offsets i, x)
210 |           else
211 |             ()
212 |         val () = applyi (copy t) s
213 |       in 
214 |         AS.full t
215 |       end
216 |   end
217 | 
218 | fun flatten s = 
219 |   let
220 |     val lengths = map length s 
221 |     val (offsets, n) = scan (fn (x,y) => x + y) 0 lengths 
222 |     val t = alloc n
223 |     val _ = 
224 |       parfor (0, length s) (fn i =>
225 |         let 
226 |            val (x, offset) = (nth s i, nth offsets i)
227 |         in
228 |           parfor (0, length x) (fn j =>
229 |             A.update (t, offset + j, nth x j))
230 |         end)
231 |   in 
232 |     AS.full t
233 |   end
234 | 
235 | (* Standard binary search
236 |    Returns NONE if not found and SOME pos if found at position pos *)
237 | fun binarySearch (cmp: 'a * 'a -> order) (a: 'a t) (k: 'a): int option = 
238 |   let 
239 |     fun search (i, j) = 
240 |       let 
241 |         val n = j - i
242 |       in
243 |         if n = 0 then
244 |           NONE
245 |         else if n = 1 then
246 |           case cmp (k, nth a i) of
247 |             LESS => NONE
248 |           | GREATER => NONE
249 |           | EQUAL => SOME i
250 |         else
251 |           let val mid = Int.div (i + j, 2) in
252 |             case cmp (k, nth a mid) of
253 |               LESS => search (i, mid)
254 |             | GREATER => search (mid+1, j)
255 |             | EQUAL => SOME mid
256 |           end
257 |       end
258 |    in
259 |      search (0, length a)
260 |    end          
261 | 
262 | (* Standard binary search
263 |    Returns the number of  elements of a that are less than k *)
264 | fun binarySplit (cmp: 'a * 'a -> order) (a: 'a t) (k: 'a): int = 
265 |   let 
266 |     fun search (i, j) = 
267 |       let 
268 |         val n = j - i
269 |       in
270 |         if n = 0 then
271 |           0
272 |         else if n = 1 then
273 |           case cmp (k, nth a i) of
274 |             LESS => i
275 |           | GREATER => i+1
276 |           | EQUAL => i+1
277 |         else
278 |           let val mid = Int.div (i + j, 2) in
279 |             case cmp (k, nth a mid) of
280 |               LESS => search (i, mid)
281 |             | GREATER => search (mid+1, j)
282 |             | EQUAL => mid+1
283 |           end
284 |       end
285 |    in
286 |      search (0, length a)
287 |    end          
288 | 
289 |           
290 | (* Split sorted sequences a and b into 
291 | 1) aleft, aright
292 | 2) bleft, bright
293 | such that aleft <= bright and bleft <= aright
294 |           and |aleft| + |bleft| = k
295 | Return i and j, where aleft = a[0, ... i] and bleft = b[0, ... j]
296 | 
297 | *)        
298 |  
299 | fun bivariantSplit a b k =  
300 |   let 
301 |     fun split a b k i j = 
302 |       case (length a, length b) of
303 |         (0, 0) => (i, j)
304 |       | (0, _) => (i, j + k)
305 |       | (_, 0) => (i + k, j)
306 |       | (_, _) =>  
307 |         let
308 |           val na = length a
309 |           val nb = length b
310 |           val midA = Int.div(na, 2) 
311 |           val midB = Int.div(nb, 2) 
312 |         in
313 |           if k <= midA + midB + 1 then
314 |             if nth a midA < nth b midB then
315 |               (* Drop b[midB ..] *)
316 |               split a (subseq b (0, midB)) k i j
317 |             else
318 |               (* Drop a[midA ..] *)
319 |               split (subseq a (0, midA)) b k i j
320 |           else
321 |             if nth a midA < nth b midB then
322 |               (* Drop a[0 .. midA] *)
323 |               split (subseq a (midA + 1, na - midA - 1 )) b (k - midA - 1) (i + midA + 1) j
324 |             else
325 |               (* Drop b[0 .. midB] *)
326 |               split a (subseq b (midB + 1, nb - midB - 1))  (k - midB - 1) i (j + midB + 1)
327 |         end    
328 |   in
329 |     split a b k ~1 ~1
330 |   end
331 | 
332 | (* Sample search for a k in array a by using comparison function cmp
333 |  *)
334 | fun sampleSearch (degree: int -> int) (cmp: 'a * 'a -> order) (a: 'a t) (k: 'a): int option =
335 |   let 
336 |     val n = length a
337 |     val m = Int.max (degree n, 2)   
338 |     val _ = print ("degree = " ^ Int.toString m ^ "\n")
339 | 
340 |     (* m-sample array a *)
341 |     fun sample (a, n, m) = 
342 |      let
343 |        val d = Int.div (n, m)
344 |        val _ = print ("Sample: n = " ^ Int.toString n ^ " m = " ^ Int.toString m ^ " d = " ^ Int.toString d ^ "\n")
345 |        val _ = print ("Sample: Last block: " ^ Int.toString ((m-1)*d) ^ " -- " ^ Int.toString (n-1)^ "\n")
346 |        fun fib n = if n < 2 then n else fib(n-1) + fib (n-2)
347 |        val _ = fib 40
348 |      in
349 |        (* First check last block, because it could be short 
350 |           If not found, then look elsewhere
351 |           If found, then good
352 |         *)
353 |        if cmp (k, nth a ((m-1) * d)) <> LESS andalso
354 |           cmp (k, nth a (n-1)) <> GREATER then
355 |             SOME(m*d-d, subslice a (m*d-d, NONE))
356 |        else
357 |          let 
358 |            val res = ref NONE
359 |            val _ = 
360 |              parfor (0, m-1) (fn i =>
361 |                let 
362 |                  val pos = i*d
363 |                  val _ = print ("Sample:  block begin = " ^ Int.toString pos)
364 |                  val _ = print (" end = " ^ Int.toString (pos + d - 1) ^ "\n")
365 |                in
366 |                  if cmp (k, nth a pos) <> LESS andalso
367 |                     cmp (k, nth a (pos +  d - 1)) <> GREATER then
368 |                    res := SOME pos
369 |                  else
370 |                    ()              
371 |                end)
372 |           in
373 |             case !res of 
374 |               NONE => NONE
375 |             | SOME pos => SOME(pos, subslice a (pos, SOME d))
376 |       end
377 |     end
378 |   in 
379 |     if m >= n then 
380 |       case sample (a, n, n) of
381 |         NONE => NONE
382 |       | SOME(start, b) => SOME start
383 |     else
384 |       case sample (a, n, m) of
385 |         NONE => NONE
386 |       | SOME (start, b) => 
387 |           case sampleSearch degree cmp b k of 
388 |             NONE => NONE
389 |           | SOME pos => SOME (start + pos) 
390 |   end
391 | 
392 | fun mergeSeq a b =	
393 | 	let		
394 | 		val r = alloc (length a + length b)
395 |     val na = length a
396 |   	val nb = length b
397 | 
398 |     fun copy x (i, n) k =
399 | 			if i = n then
400 | 				()
401 | 			else
402 | 				(Array.update (r, k, nth x i);
403 | 				 copy x (i+1, n) (k+1))
404 | 			
405 | 		fun mergeInplace i j k =
406 | 		  if i = na then
407 | 				copy b (j, nb) k
408 | 			else if j  = nb then
409 | 				copy a (i, na) k
410 |   		else
411 | 				if nth a i < nth b j then
412 | 					 (Array.update (r, k, nth a i);
413 | 					 mergeInplace (i+1) j (k+1))
414 | 			  else 
415 | 					 (Array.update (r, k, nth b j);
416 | 					 mergeInplace i (j+1) (k+1))					
417 | 	in
418 | 		(mergeInplace 0 0 0; AS.full r)
419 |   end								 
420 | 
421 | 
422 | end
423 | 


--------------------------------------------------------------------------------
/trees/README.md:
--------------------------------------------------------------------------------
  1 | # 4. Trees
  2 | 
  3 | [(← Parallelism and Granularity Control)](../how-to-par/README.md)
  4 | [(Sequences →)](../sequences/README.md)
  5 | 
  6 | ## Preliminaries
  7 | 
  8 | Make sure that you've already done the [setup](../setup/README.md). If
  9 | you're using Docker to run the tutorial, all commands below should be
 10 | run within the container in directory `~/mpl-tutorial/trees/`:
 11 | 
 12 | ```
 13 | $ cd path/to/mpl-tutorial
 14 | $ ./start-container.sh
 15 | <container># cd trees
 16 | <container># <enter commands here>
 17 | ```
 18 | 
 19 | ## Intro
 20 | 
 21 | Trees are natural data structures for parallel algorithms because of
 22 | their structure: many parallel algorithms can be expressed in terms of
 23 | recursive functions evaluated in parallel across the children of a node.
 24 | 
 25 | Here we will consider trees which store elements only at their leaves. This
 26 | differs from presentations of trees which are intended to be used as BSTs. The
 27 | trees here are intended to be used as parallel lists.
 28 | 
 29 | ## A Datatype For Binary Trees
 30 | 
 31 | The following datatype defines a binary tree type. The integer
 32 | at internal nodes will be used to store the sizes of subtrees, which is
 33 | important both for granularity control as well as various algorithms. We'll
 34 | consider the "size" of a tree to be the number of leaves it has; the
 35 | implementation of the `size` function is shown below.
 36 | 
 37 | ```sml
 38 | datatype 'a tree =
 39 |   Empty
 40 | | Leaf of 'a
 41 | | Node of int * ('a tree) * ('a tree)
 42 | ```
 43 | 
 44 | <details>
 45 | <summary><strong>Question</strong>: I'm new to SML. How do I read this code?</summary>
 46 | <blockquote>
 47 | We define a new type, <code>'a tree</code>, which has "elements" of type
 48 | <code>'a</code>. This thing written <code>'a</code> is a
 49 | <em>type parameter</em>. In other languages, like Java
 50 | or C++, you might see a similar type written as
 51 | <code>Tree&lt;T&gt;</code> where <code>T</code> is the type parameter.
 52 | In SML, we write the type parameter on the left
 53 | instead of the right, and we don't need to use any brackets or parentheses.
 54 | <br><br>
 55 | For example, in the code <code>val x: string tree = ...</code>, we have
 56 | a variable <code>x</code> of type <code>string tree</code>. Here, the
 57 | type parameter <code>'a</code> has been instantiated as <code>string</code>.
 58 | <br><br>
 59 | There are three possibilities for a thing of type <code>'a tree</code>:
 60 |   <ol>
 61 |     <li>it could be <code>Empty</code>,</li>
 62 |     <li>it could be <code>Leaf x</code>, where <code>x</code> is a value of
 63 |     type <code>'a</code>, or</li>
 64 |     <li> it could be <code>Node(n,l,r)</code>, where
 65 |     <code>n</code> is an integer, and <code>l</code> and <code>r</code> are
 66 |     two subtrees, both of type <code>'a tree</code>. In this tutorial, we will
 67 |     be using the integer <code>n</code> to keep track of subtree sizes.</li>
 68 |   </ol>
 69 | <br>
 70 | When defining the datatype, we separate the different possibilities with
 71 | the symbol <code>|</code>, pronounced "or". Each possibility
 72 | is identified by a tag (i.e., <code>Empty</code>, <code>Leaf</code>,
 73 | and <code>Node</code>), and then, if desired, the keyword <code>of</code>
 74 | followed by a type. This indicates that the tag carries additional data
 75 | with it. For example,
 76 | <code>Leaf of 'a</code> says the <code>Leaf</code> tag carries a thing of
 77 | type <code>'a</code> along with it, but the <code>Empty</code> tag doesn't have
 78 | have any additional data.
 79 | <br><br>
 80 | For the <code>Node</code> case, the extra data has multiple components. Note
 81 | the symbol <code>*</code> between each component of the type. This syntax is
 82 | more generally used for all
 83 | <em>tuples</em> in the language. For example, a function that takes two integers as
 84 | argument and returns a string would have type
 85 | <code>(int * int) -> string</code>.
 86 | <br><br>
 87 | In SML, tuples are first-class members of the language. One could think of our
 88 | tree <code>Node</code> as containing three pieces of data (an integer and
 89 | two subtrees), but it might be more accurate to think of a <code>Node</code> as
 90 | containing a <em>single</em> piece of data: a tuple of three components.
 91 | </blockquote>
 92 | </details>
 93 | 
 94 | ```sml
 95 | fun size t =
 96 |   case t of
 97 |     Empty => 0
 98 |   | Leaf _ => 1
 99 |   | Node (n, _, _) => n
100 | ```
101 | 
102 | <details>
103 | <summary><strong>Question</strong>: I'm new to SML. How do I read this code?</summary>
104 | <blockquote>
105 | The only thing we haven't seen before is <code>case ... of ...</code>, which
106 | lets you choose what to do based on what a value looks like. Here, we ask
107 | what <code>t</code> is. If <code>t = Empty</code>, then we do the first
108 | branch, returning 0. If <code>t = Leaf _</code>, then we do the second
109 | branch, returning 1. In the third case, when <code>t = Node(n,_,_)</code>,
110 | we return <code>n</code>, the integer stored at that node (which, recall,
111 | we intend to use to keep track of how many leaves are under that node).
112 | <br><br>
113 | The underscores (<code>_</code>) are used to ignore values that we don't need.
114 | For example, we don't care what is stored at the leaf when we return 1.
115 | <br><br>
116 | In languages like C and Java, you may have seen <code>switch (...) {...}</code>,
117 | which is similar, but in SML you don't have to worry about putting those
118 | pesky <code>break</code>s in the correct places...
119 | </blockquote>
120 | </details>
121 | 
122 | ## Parallel Reduction
123 | 
124 | Perhaps the simplest parallel algorithm on a tree is `reduce`, which which
125 | takes an associative function `f: ('a * 'a) -> 'a` as argument and
126 | computes the "sum" (with respect to `f`) of the leaves of a tree.
127 | `reduce` also takes an argument `id` which is an
128 | identity element for `f` (i.e. we assume `f(id, x) = f(x, id) = x` for any `x`).
129 | This also serves as a convenient return value for inputs that are `Empty`.
130 | 
131 | Some interesting use cases, given `t: int tree`:
132 |   * `reduce (fn (a,b) => a+b) 0 t` is the sum of `t`.
133 |   * `reduce (fn (a, b) => a*b) 1 t` is the product of `t`.
134 |   * `reduce Int.max (valOf Int.minInt) t` is the maximum of `t`.
135 |   * `reduce (fn (a, b) => if a >= 0 then a else b) ~1 t` gives you the first
136 |   non-negative value in `t`.
137 |     - Fun exercise: try proving that this function is associative.
138 |     - Also, note that the choice of `~1` as the "identity" element is a little
139 |     bit relaxed. As long as there is at least one non-negative element, it
140 |     won't affect the answer.
141 | 
142 | The `reduce` function is easy to parallelize, as the two children of every
143 | internal node can be processed in parallel, and finally their results
144 | can be combined with `f`.
145 | 
146 | Similar to the [previous section](../how-to-par/README.md),
147 | we use granularity control to ensure that the cost of `ForkJoin.par` is
148 | amortized. Here, this is implemented by switching to `reduceSeq`
149 | below a size threshold `GRAIN`. The `reduceSeq` function is just a sequential
150 | version of the same algorithm; this will also be useful for experiments later,
151 | to check if our granularity control is working.
152 | 
153 | ```sml
154 |   fun reduceSeq f id t =
155 |     case t of
156 |       Empty => id
157 |     | Leaf x => x
158 |     | Node (_, left, right) => f (reduceSeq f id left, reduceSeq f id right)
159 | 
160 |   val GRAIN = 5000
161 | 
162 |   fun reduce f id t =
163 |     if size t < GRAIN then
164 |       reduceSeq f id t
165 |     else
166 |       case t of
167 |         Empty => id
168 |       | Leaf x => x
169 |       | Node (_, left, right) =>
170 |           let
171 |             val (resultLeft, resultRight) =
172 |               ForkJoin.par (fn () => reduce f id left,
173 |                             fn () => reduce f id right)
174 |           in
175 |             f (resultLeft, resultRight)
176 |           end
177 | ```
178 | 
179 | <details>
180 | <summary><strong>Question</strong>: I'm new to SML. How do I read this code?</summary>
181 | <blockquote>
182 | We haven't seen the syntax <code>fun reduce f id t = ...</code> before. This
183 | is called <em>currying</em>. It's a trick in functional languages where you can
184 | pass multiple arguments without using a tuple.
185 | <br><br>
186 | The syntax <code>fun reduce f id t = ...</code> is shorthand for
187 | <code>val reduce = (fn f => (fn id => (fn t => ...)))</code>, i.e.,
188 | <code>reduce</code> is a function that returns a function which returns a
189 | function, etc. This might seem crazy, but it can actually be very convenient
190 | in some cases.
191 | <br><br>
192 | For example, we could write
193 | <code>val sum = reduce (fn (a,b) => a+b) 0</code>. Notice that we left out the
194 | last argument to <code>reduce</code>, so therefore <code>sum</code> is a
195 | function which is "waiting to receive the last argument".
196 | This is equivalent to writing
197 | <code>fun sum t = reduce (fn (a,b) => a+b) 0 t</code>.
198 | <br><br>
199 | You might be wondering: isn't that really inefficient? The short answer is
200 | no; it's just as efficient as using tuples to pass arguments. The long answer
201 | gets into some pretty low-level details about how compilers work.
202 | <br><br>
203 | To learn more, we recommend reading more about currying online:
204 |   <ul>
205 |     <li><a href="https://en.wikipedia.org/wiki/Currying">Wikipedia entry</a></li>
206 |   </ul>
207 | </blockquote>
208 | </details>
209 | 
210 | ## A Balancing Act: Trees To Test With
211 | 
212 | Consider the following two functions, `makeUnbalanced` and
213 | `makeBalanced`. Both functions take two integers `i` and `n` as
214 | argument, and return a tree of size `n` whose leaves (in order) are
215 | `f(i)`, `f(i+1)`, ..., `f(i+n-1)`. The two functions differ in the
216 | structure of the tree produced. This will be helpful for testing performance,
217 | below.
218 | 
219 | The function `makeUnbalanced` builds a tree that
220 | leans hard to the right, with final height exactly `n`. In contrast, the
221 | function `makeBalanced` builds a tree that is almost perfectly balanced, with
222 | final height approximately `log(n)`.
223 | 
224 | To help highlight the similarities between these two functions, we have not
225 | parallelized either one. It's worth mentioning however that, while
226 | `makeBalanced` could easily be parallelized, the function `makeUnbalanced` has
227 | essentially no opportunity for parallelism.
228 | 
229 | ```sml
230 |   fun makeUnbalanced f i n =
231 |     case n of
232 |       0 => Empty
233 |     | 1 => Leaf (f i)
234 |     | _ =>
235 |         let
236 |           val l = Leaf (f i)
237 |           val r = makeUnbalanced f (i+1) (n-1)
238 |         in
239 |           Node (n, l, r)
240 |         end
241 | 
242 | 
243 |   fun makeBalanced f i n =
244 |     case n of
245 |       0 => Empty
246 |     | 1 => Leaf (f i)
247 |     | _ =>
248 |         let
249 |           val half = n div 2
250 |           val l = makeBalanced f i half
251 |           val r = makeBalanced f (i + half) (n - half)
252 |         in
253 |           Node (n, l, r)
254 |         end
255 | ```
256 | 
257 | ## Balanced vs Unbalanced Performance
258 | 
259 | Here we'll measure the performance `reduce` by summing trees of different
260 | structure. We define two functions, `sum` and `sumSeq`, and two test trees,
261 | `balancedTree` and `unbalancedTree`.
262 | Both trees have 100K leaves, but while `balancedTree` has height only 18,
263 | `unbalancedTree` has height 100K.
264 | 
265 | ```sml
266 | fun sumSeq tree = Tree.reduceSeq (fn (a, b) => a+b) 0 tree
267 | fun sum tree = Tree.reduce (fn (a, b) => a+b) 0 tree
268 | 
269 | val size = 100000
270 | val balancedTree = Tree.makeBalanced Int64.fromInt 0 size
271 | val unbalancedTree = Tree.makeUnbalanced Int64.fromInt 0 size
272 | ```
273 | 
274 | In `test-balanced/` and `test-unbalanced/`, we've set up two benchmarks
275 | which take multiple time measurements and report the average.
276 | 
277 | ### Balanced Tree Performance
278 | First, let's measure the performance of summing the balanced tree, which has
279 | 100K leaves and height 18. We run both sequential and parallel versions, first
280 | using only one processor, and then using 4 processors.
281 | 
282 | Notice that on 1 processor, the parallel version has nearly identical
283 | performance as the sequential code, confirming that our granularity control
284 | is working. On 4 processors, we get about 2.5x speedup. Not bad, especially
285 | for such a small problem size.
286 | 
287 | ```
288 | <container># mpl test-balanced/main.mlb
289 | 
290 | <container># test-balanced/main
291 | size 100000
292 | built balancedTree: height 18
293 | ============ sequential ============
294 | warmup...... timing...
295 | sumSeq(balancedTree) time: 0.0020s
296 | ============= parallel =============
297 | warmup...... timing...
298 | sum(balancedTree) time: 0.0021s       # uniprocessor time
299 |                                       # approx the same as sequential
300 |                                       # (good!)
301 | 
302 | <container># test-balanced/main @mpl procs 4 --
303 | size 100000
304 | built balancedTree: height 18
305 | ============ sequential ============
306 | warmup...... timing...
307 | sumSeq(balancedTree) time: 0.0019s
308 | ============= parallel =============
309 | warmup...... timing...
310 | sum(balancedTree) time: 0.0008s       # parallel time on 4 processors
311 |                                       # approx 2.4x faster than sequential
312 |                                       # (nice!)
313 | ```
314 | 
315 | ### Unbalanced Tree Performance
316 | In contrast, the unbalanced tree (100K leaves, 100K height) does not do so
317 | well. The parallel version on a single processor is 66x slower than sequential:
318 | 
319 | ```
320 | <container># mpl test-unbalanced/main.mlb
321 | 
322 | <container># test-unbalanced/main
323 | size 100000
324 | built unbalancedTree: height 100000
325 | ============ sequential ============
326 | warmup...... timing...
327 | sumSeq(unbalancedTree) time: 0.0008s
328 | ============= parallel =============
329 | warmup...... timing......
330 | sum(unbalancedTree) time: 0.0528s    # uniprocessor time
331 |                                      # 66x slower than sequential!
332 | ```
333 | 
334 | **Why is it so much slower?** In this case, because of poor granularity control.
335 | Recall that our granularity control in `reduce` uses a fast sequential algorithm
336 | below the grain size, `GRAIN`, with the idea that this should significantly
337 | reduce the number of calls to `ForkJoin.par` and therefore amortize their cost.
338 | However, this approach is only effective for balanced trees.
339 | 
340 | Consider that the
341 | number of calls to `ForkJoin.par` is determined by how many internal nodes
342 | of the tree have size larger than the grain size, `GRAIN`.
343 | **On the balanced tree**, there are approximately `n / GRAIN` internal nodes
344 | with size larger than the grain size, and therefore only about `n / GRAIN`
345 | calls to `ForkJoin.par`.
346 | **But on the unbalanced tree**, there are as many as `n - GRAIN` internal
347 | nodes! As a result, the cost of `ForkJoin.par` has not been effectively
348 | amortized.
349 | 
350 | ### Ensuring Balance
351 | 
352 | To ensure balance, we could easily adapt the trees here to be self-balancing
353 | via any number of schemes:
354 | [AVL](https://en.wikipedia.org/wiki/AVL_tree),
355 | [red-black](https://en.wikipedia.org/wiki/Red%E2%80%93black_tree),
356 | [weight-balanced](https://en.wikipedia.org/wiki/Weight-balanced_tree),
357 | [treaps](https://en.wikipedia.org/wiki/Treap),
358 | etc.
359 | Because we already store the sizes of subtrees at internal nodes (which is
360 | useful for other purposes, including granularity control), it would be
361 | especially easy to adapt the trees here to be weight-balanced.
362 | 
363 | In a [recent paper](https://www.cs.cmu.edu/~guyb/papers/BFS16.pdf),
364 | Guy Blelloch, Daniel Ferizovic, and Yihan Sun showed that to implement almost
365 | any balancing scheme, you only need a single primitive called `join` which
366 | stitches two (balanced) trees together, producing a similarly balanced tree.
367 | All other operations which construct trees can then be implemented in terms of
368 | `join`, with optimal performance in theory, and excellent practical performance
369 | as well, including in parallel.
370 | 
371 | In terms of code. this approach is especially simple: any time we would
372 | construct a `Node` from two trees `t1` and `t2`, we instead just call
373 | `join (t1, t2)`. This then ensures that all trees we build are balanced.
374 | 
375 | ## Scan (Parallel Prefix Sums)
376 | 
377 | The `scan` primitive is one of the most fundamental operations in parallel
378 | computing. Similar to `reduce`, the goal is to compute a "sum" with respect to
379 | some arbitrary associative function; the difference with `scan` is that we
380 | additionally want the sums of every prefix. `scan` returns a tuple of a
381 | tree containing all prefix sums, and the total sum.
382 | 
383 | Sequentially, `scan` can be accomplished with an in-order traversal and an
384 | "accumulator" which we use to keep track of the running sum.[^1] This is the
385 | variable `acc: 'a` in the following code.
386 | 
387 | ```sml
388 |   (** A recursive loop to compute scan. *)
389 |   fun scanLoop (f: 'a * 'a -> 'a) (acc: 'a) (t: 'a tree) : 'a tree * 'a =
390 |     case t of
391 |       Empty => (Empty, acc)
392 |     | Leaf x => (Leaf acc, f (acc, x))
393 |     | Node (n, left, right) =>
394 |         let
395 |           val (leftPrefixSums, accLeft) = scanLoop f acc left
396 |           val (rightPrefixSums, accRight) = scanLoop f accLeft right
397 |           val allSums = Node (n, leftPrefixSums, rightPrefixSums)
398 |         in
399 |           (allSums, accRight)
400 |         end
401 | 
402 |   (** Sequential scan is just a call to the helper loop. *)
403 |   fun scanSeq f id t =
404 |     scanLoop f id t
405 | ```
406 | 
407 | The parallel algorithm for scan we will implement here is the
408 | "upsweep-downsweep" scan, which consists of two phases:
409 |   1. *Upsweep*: we compute a reduce, but build a tree which saves all
410 |   intermediate results. Specifically, at each internal node, we remember
411 |   the reduced (summed) value of all leaves under that node.
412 |   2. *Downsweep*: using the result of the upsweep, we push prefix sums down
413 |   into the original tree. This is accomplished by keeping track of a variable
414 |   `acc` which is the total sum of everything to the left. When we move down to
415 |   a left child, we keep the same `acc`. When we move down to a right child,
416 |   we use the stored value in the upswept tree to adjust the `acc` appropriately.
417 | 
418 | For the upsweep, we define a new datatype which will be used only in the
419 | implementation of `scan`. The datatype is called `sum_tree`, and it has
420 | two cases: `GrainSum` (to store the result of the reduce below the grain size)
421 | and `NodeSum` (to store the result of the reduce when the two children are
422 | computed in parallel).
423 | 
424 | ```sml
425 | datatype 'a sum_tree =
426 |   GrainSum of 'a
427 | | NodeSum of 'a * 'a sum_tree * 'a sum_tree
428 | 
429 | fun sumOf (st: 'a sum_tree) : 'a =
430 |   case st of
431 |     GrainSum x => x
432 |   | NodeSum (x, _, _) => x
433 | ```
434 | 
435 | TODO continue from here...
436 | 


--------------------------------------------------------------------------------