├── .gitignore ├── .travis.yml ├── README.md ├── build.sbt ├── project └── build.properties ├── sbt ├── sbt-launch.jar ├── sbt.bat └── src ├── main └── scala │ ├── common │ └── package.scala │ └── lectures │ ├── algorithms │ ├── ArrayCombiner.scala │ ├── Conc.scala │ ├── ConcBuffer.scala │ ├── MergeSort.scala │ └── package.scala │ ├── dataparallelism │ ├── Agnostic.scala │ ├── ArrayInitialization.scala │ ├── CharCount.scala │ ├── Conversion.scala │ ├── GameOfLife.scala │ ├── IntersectionCorrect.scala │ ├── IntersectionSynchronized.scala │ ├── IntersectionWrong.scala │ ├── LargestPalindromeProduct.scala │ ├── Mandelbrot.scala │ ├── ParallelGraphContraction.scala │ ├── ParallelMutation.scala │ ├── ParallelRegexSearch.scala │ ├── ParallelTrieMapGraphContraction.scala │ ├── WordCount.scala │ └── package.scala │ ├── examples │ ├── BruteForceCollatzSequence.scala │ └── DynamicProgrammingCollatzSequence.scala │ ├── introduction │ ├── FourBatchArrayNorm.scala │ └── ParallelMonteCarloPi.scala │ └── reductions │ ├── ArrayFold.scala │ ├── ArrayMap.scala │ ├── ArrayNorm.scala │ ├── ArrayScan.scala │ ├── ArrayScanDebug.scala │ ├── ArraySum.scala │ ├── RunningAverage.scala │ └── TreeMap.scala └── test └── scala └── lectures └── algorithms └── MergeSortTest.scala /.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | .idea 3 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: scala 2 | scala: 3 | - "2.11.5" 4 | jdk: 5 | - oraclejdk7 6 | script: sbt "++ ${TRAVIS_SCALA_VERSION} ; test" 7 | branches: 8 | only: 9 | - master 10 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | # Snippets from Parallel Programming Lectures 3 | 4 | [![Build Status](https://travis-ci.org/axel22/parprog-snippets.svg?branch=master)](https://travis-ci.org/axel22/parprog-snippets) 5 | 6 | This repository contains public snippets from the Parallel Programming course on Coursera. 7 | -------------------------------------------------------------------------------- /build.sbt: -------------------------------------------------------------------------------- 1 | 2 | scalaVersion := "2.11.5" 3 | 4 | scalacOptions ++= Seq( 5 | "-deprecation", 6 | "-unchecked", 7 | "-optimise", 8 | "-Yinline-warnings" 9 | ) 10 | 11 | fork := true 12 | 13 | javaOptions += "-Xmx3G" 14 | 15 | parallelExecution in Test := false 16 | 17 | libraryDependencies ++= Seq( 18 | "com.storm-enroute" %% "scalameter-core" % "0.6", 19 | "org.scala-lang.modules" %% "scala-swing" % "1.0.1", 20 | "com.github.scala-blitz" %% "scala-blitz" % "1.1", 21 | "org.scalactic" %% "scalactic" % "2.2.6", 22 | "org.scalatest" %% "scalatest" % "2.2.6" % "test" 23 | ) 24 | 25 | /** 26 | * Force sbt to use scala 2.11.5, 27 | * otherwise, some dependence will upgrade scala version to 2.11.7 28 | * in which `sort1` does not exist 29 | */ 30 | dependencyOverrides += "org.scala-lang" % "scala-library" % scalaVersion.value 31 | -------------------------------------------------------------------------------- /project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version=0.13.7 2 | -------------------------------------------------------------------------------- /sbt: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | 3 | java -Xms512M -Xmx1536M -Xss1M -XX:+CMSClassUnloadingEnabled \ 4 | -XX:MaxPermSize=256M ${JAVA_OPTS} -Dfile.encoding=UTF-8 \ 5 | -jar `dirname $0`/sbt-launch.jar "$@" 6 | -------------------------------------------------------------------------------- /sbt-launch.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/axel22/parprog-snippets/fd4400f30832d677c53555c8f1be68e06489b845/sbt-launch.jar -------------------------------------------------------------------------------- /sbt.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | 3 | set SCRIPT_DIR=%~dp0 4 | java -Xms512M -Xmx1536M -Xss1M -XX:+CMSClassUnloadingEnabled -XX:MaxPermSize=256M %JAVA_OPTS% -Dfile.encoding=UTF-8 -jar "%SCRIPT_DIR%sbt-launch.jar" %* 5 | -------------------------------------------------------------------------------- /src/main/scala/common/package.scala: -------------------------------------------------------------------------------- 1 | 2 | import java.util.concurrent._ 3 | import scala.util.DynamicVariable 4 | 5 | package object common { 6 | 7 | val forkJoinPool = new ForkJoinPool 8 | 9 | abstract class TaskScheduler { 10 | def schedule[T](body: => T): ForkJoinTask[T] 11 | def parallel[A, B](taskA: => A, taskB: => B): (A, B) = { 12 | val right = task { 13 | taskB 14 | } 15 | val left = taskA 16 | (left, right.join()) 17 | } 18 | } 19 | 20 | class DefaultTaskScheduler extends TaskScheduler { 21 | def schedule[T](body: => T): ForkJoinTask[T] = { 22 | val t = new RecursiveTask[T] { 23 | def compute = body 24 | } 25 | forkJoinPool.execute(t) 26 | t 27 | } 28 | } 29 | 30 | val scheduler = 31 | new DynamicVariable[TaskScheduler](new DefaultTaskScheduler) 32 | 33 | def task[T](body: => T): ForkJoinTask[T] = { 34 | scheduler.value.schedule(body) 35 | } 36 | 37 | def parallel[A, B](taskA: => A, taskB: => B): (A, B) = { 38 | scheduler.value.parallel(taskA, taskB) 39 | } 40 | 41 | def parallel[A, B, C, D](taskA: => A, taskB: => B, taskC: => C, taskD: => D): (A, B, C, D) = { 42 | val ta = task { taskA } 43 | val tb = task { taskB } 44 | val tc = task { taskC } 45 | val td = taskD 46 | (ta.join(), tb.join(), tc.join(), td) 47 | } 48 | 49 | } 50 | -------------------------------------------------------------------------------- /src/main/scala/lectures/algorithms/ArrayCombiner.scala: -------------------------------------------------------------------------------- 1 | package lectures 2 | package algorithms 3 | 4 | import scala.collection.parallel.Combiner 5 | import scala.collection.mutable.ArrayBuffer 6 | import scala.reflect.ClassTag 7 | import org.scalameter._ 8 | import common._ 9 | 10 | class ArrayCombiner[T <: AnyRef: ClassTag](val parallelism: Int) 11 | extends Combiner[T, Array[T]] { 12 | private var numElems = 0 13 | private val buffers = new ArrayBuffer[ArrayBuffer[T]] 14 | buffers += new ArrayBuffer[T] 15 | 16 | def +=(elem: T) = { 17 | buffers.last += elem 18 | numElems += 1 19 | this 20 | } 21 | 22 | def combine[N <: T, That >: Array[T]](that: Combiner[N, That]) = { 23 | (that: @unchecked) match { 24 | case that: ArrayCombiner[T] => 25 | buffers ++= that.buffers 26 | numElems += that.numElems 27 | this 28 | } 29 | } 30 | 31 | def size = numElems 32 | 33 | def clear() = buffers.clear() 34 | 35 | private def copyTo(array: Array[T], from: Int, end: Int): Unit = { 36 | var i = from 37 | var j = 0 38 | while (i >= buffers(j).length) { 39 | i -= buffers(j).length 40 | j += 1 41 | } 42 | var k = from 43 | while (k < end) { 44 | array(k) = buffers(j)(i) 45 | i += 1 46 | if (i >= buffers(j).length) { 47 | i = 0 48 | j += 1 49 | } 50 | k += 1 51 | } 52 | } 53 | 54 | def result: Array[T] = { 55 | val step = math.max(1, numElems / parallelism) 56 | val array = new Array[T](numElems) 57 | val starts = (0 until numElems by step) :+ numElems 58 | val chunks = starts.zip(starts.tail) 59 | val tasks = for ((from, end) <- chunks) yield task { 60 | copyTo(array, from, end) 61 | } 62 | tasks.foreach(_.join()) 63 | array 64 | } 65 | 66 | } 67 | 68 | object ArrayCombiner { 69 | 70 | val standardConfig = config( 71 | Key.exec.minWarmupRuns -> 20, 72 | Key.exec.maxWarmupRuns -> 40, 73 | Key.exec.benchRuns -> 60, 74 | Key.verbose -> true 75 | ) withWarmer(new Warmer.Default) 76 | 77 | def main(args: Array[String]) { 78 | val size = 1000000 79 | 80 | def run(p: Int) { 81 | val taskSupport = new collection.parallel.ForkJoinTaskSupport( 82 | new scala.concurrent.forkjoin.ForkJoinPool(p)) 83 | val strings = (0 until size).map(_.toString) 84 | val time = standardConfig measure { 85 | val parallelized = strings.par 86 | parallelized.tasksupport = taskSupport 87 | def newCombiner = new ArrayCombiner(p): Combiner[String, Array[String]] 88 | parallelized.aggregate(newCombiner)(_ += _, _ combine _).result 89 | } 90 | println(s"p = $p, time = $time ms") 91 | } 92 | 93 | run(1) 94 | run(2) 95 | run(4) 96 | run(8) 97 | } 98 | 99 | } 100 | -------------------------------------------------------------------------------- /src/main/scala/lectures/algorithms/Conc.scala: -------------------------------------------------------------------------------- 1 | package lectures 2 | package algorithms 3 | 4 | import scala.annotation.tailrec 5 | 6 | sealed trait Conc[@specialized(Int, Long, Float, Double) +T] { 7 | def level: Int 8 | def size: Int 9 | def left: Conc[T] 10 | def right: Conc[T] 11 | def normalized = this 12 | } 13 | 14 | object Conc { 15 | 16 | case class <>[+T](left: Conc[T], right: Conc[T]) extends Conc[T] { 17 | val level = 1 + math.max(left.level, right.level) 18 | val size = left.size + right.size 19 | } 20 | 21 | sealed trait Leaf[T] extends Conc[T] { 22 | def left = sys.error("Leaves do not have children.") 23 | def right = sys.error("Leaves do not have children.") 24 | } 25 | 26 | case object Empty extends Leaf[Nothing] { 27 | def level = 0 28 | def size = 0 29 | } 30 | 31 | class Single[@specialized(Int, Long, Float, Double) T](val x: T) extends Leaf[T] { 32 | def level = 0 33 | def size = 1 34 | override def toString = s"Single($x)" 35 | } 36 | 37 | class Chunk[@specialized(Int, Long, Float, Double) T](val array: Array[T], val size: Int, val k: Int) 38 | extends Leaf[T] { 39 | def level = 0 40 | override def toString = s"Chunk(${array.mkString("", ", ", "")}; $size; $k)" 41 | } 42 | 43 | case class Append[+T](left: Conc[T], right: Conc[T]) extends Conc[T] { 44 | val level = 1 + math.max(left.level, right.level) 45 | val size = left.size + right.size 46 | override def normalized = { 47 | def wrap[T](xs: Conc[T], ys: Conc[T]): Conc[T] = (xs: @unchecked) match { 48 | case Append(ws, zs) => wrap(ws, zs <> ys) 49 | case xs => xs <> ys 50 | } 51 | wrap(left, right) 52 | } 53 | } 54 | 55 | def concatTop[T](xs: Conc[T], ys: Conc[T]) = { 56 | if (xs == Empty) ys 57 | else if (ys == Empty) xs 58 | else concat(xs, ys) 59 | } 60 | 61 | private def concat[T](xs: Conc[T], ys: Conc[T]): Conc[T] = { 62 | val diff = ys.level - xs.level 63 | if (diff >= -1 && diff <= 1) new <>(xs, ys) 64 | else if (diff < -1) { 65 | if (xs.left.level >= xs.right.level) { 66 | val nr = concat(xs.right, ys) 67 | new <>(xs.left, nr) 68 | } else { 69 | val nrr = concat(xs.right.right, ys) 70 | if (nrr.level == xs.level - 3) { 71 | val nl = xs.left 72 | val nr = new <>(xs.right.left, nrr) 73 | new <>(nl, nr) 74 | } else { 75 | val nl = new <>(xs.left, xs.right.left) 76 | val nr = nrr 77 | new <>(nl, nr) 78 | } 79 | } 80 | } else { 81 | if (ys.right.level >= ys.left.level) { 82 | val nl = concat(xs, ys.left) 83 | new <>(nl, ys.right) 84 | } else { 85 | val nll = concat(xs, ys.left.left) 86 | if (nll.level == ys.level - 3) { 87 | val nl = new <>(nll, ys.left.right) 88 | val nr = ys.right 89 | new <>(nl, nr) 90 | } else { 91 | val nl = nll 92 | val nr = new <>(ys.left.right, ys.right) 93 | new <>(nl, nr) 94 | } 95 | } 96 | } 97 | } 98 | 99 | def appendTop[T](xs: Conc[T], ys: Leaf[T]): Conc[T] = (xs: @unchecked) match { 100 | case xs: Append[T] => append(xs, ys) 101 | case _ <> _ => new Append(xs, ys) 102 | case Empty => ys 103 | case xs: Leaf[T] => new <>(xs, ys) 104 | } 105 | @tailrec private def append[T](xs: Append[T], ys: Conc[T]): Conc[T] = { 106 | if (xs.right.level > ys.level) new Append(xs, ys) 107 | else { 108 | val zs = new <>(xs.right, ys) 109 | xs.left match { 110 | case ws @ Append(_, _) => append(ws, zs) 111 | case ws if ws.level <= zs.level => ws <> zs 112 | case ws => new Append(ws, zs) 113 | } 114 | } 115 | } 116 | 117 | def traverse[@specialized(Int, Long, Float, Double) T, @specialized(Int, Long, Float, Double) U](xs: Conc[T], f: T => U): Unit = (xs: @unchecked) match { 118 | case left <> right => 119 | traverse(left, f) 120 | traverse(right, f) 121 | case s: Single[T] => 122 | f(s.x) 123 | case c: Chunk[T] => 124 | val a = c.array 125 | val sz = c.size 126 | var i = 0 127 | while (i < sz) { 128 | f(a(i)) 129 | i += 1 130 | } 131 | case Empty => 132 | case Append(left, right) => 133 | traverse(left, f) 134 | traverse(right, f) 135 | case _ => 136 | sys.error("All cases should have been covered: " + xs + ", " + xs.getClass) 137 | } 138 | 139 | } 140 | -------------------------------------------------------------------------------- /src/main/scala/lectures/algorithms/ConcBuffer.scala: -------------------------------------------------------------------------------- 1 | package lectures 2 | package algorithms 3 | 4 | import scala.reflect.ClassTag 5 | import org.scalameter._ 6 | 7 | class ConcBuffer[@specialized(Byte, Char, Int, Long, Float, Double) T: ClassTag]( 8 | val k: Int, private var conc: Conc[T] 9 | ) extends Traversable[T] { 10 | require(k > 0) 11 | 12 | def this() = this(128, Conc.Empty) 13 | 14 | private var chunk: Array[T] = new Array(k) 15 | private var lastSize: Int = 0 16 | 17 | def foreach[U](f: T => U): Unit = { 18 | conc.foreach(f) 19 | 20 | var i = 0 21 | while (i < lastSize) { 22 | f(chunk(i)) 23 | i += 1 24 | } 25 | } 26 | 27 | final def +=(elem: T): this.type = { 28 | if (lastSize >= k) expand() 29 | chunk(lastSize) = elem 30 | lastSize += 1 31 | this 32 | } 33 | 34 | final def combine(that: ConcBuffer[T]): ConcBuffer[T] = { 35 | val combinedConc = this.result <> that.result 36 | this.clear() 37 | that.clear() 38 | new ConcBuffer(k, combinedConc) 39 | } 40 | 41 | private def pack() { 42 | conc = Conc.appendTop(conc, new Conc.Chunk(chunk, lastSize, k)) 43 | } 44 | 45 | private def expand() { 46 | pack() 47 | chunk = new Array(k) 48 | lastSize = 0 49 | } 50 | 51 | def clear() { 52 | conc = Conc.Empty 53 | chunk = new Array(k) 54 | lastSize = 0 55 | } 56 | 57 | def result: Conc[T] = { 58 | pack() 59 | conc 60 | } 61 | } 62 | 63 | object ConcBuffer { 64 | 65 | val standardConfig = config( 66 | Key.exec.minWarmupRuns -> 20, 67 | Key.exec.maxWarmupRuns -> 40, 68 | Key.exec.benchRuns -> 60, 69 | Key.verbose -> true 70 | ) withWarmer(new Warmer.Default) 71 | 72 | def main(args: Array[String]) { 73 | val size = 1000000 74 | 75 | def run(p: Int) { 76 | val taskSupport = new collection.parallel.ForkJoinTaskSupport( 77 | new scala.concurrent.forkjoin.ForkJoinPool(p)) 78 | val strings = (0 until size).map(_.toString) 79 | val time = standardConfig measure { 80 | val parallelized = strings.par 81 | parallelized.tasksupport = taskSupport 82 | parallelized.aggregate(new ConcBuffer[String])(_ += _, _ combine _).result 83 | } 84 | println(s"p = $p, time = $time ms") 85 | } 86 | 87 | run(1) 88 | run(2) 89 | run(4) 90 | run(8) 91 | } 92 | 93 | } 94 | -------------------------------------------------------------------------------- /src/main/scala/lectures/algorithms/MergeSort.scala: -------------------------------------------------------------------------------- 1 | package lectures 2 | package algorithms 3 | 4 | import org.scalameter._ 5 | import common._ 6 | 7 | object MergeSort { 8 | 9 | def quickSort(xs: Array[Int], offset: Int, length: Int): Unit = { 10 | java.util.Arrays.sort(xs, offset, offset + length) 11 | } 12 | 13 | @volatile var dummy: AnyRef = null 14 | 15 | def parMergeSort(xs: Array[Int], maxDepth: Int): Unit = { 16 | // 1) Allocate a helper array. 17 | // This step is a bottleneck, and takes: 18 | // - ~76x less time than a full quickSort without GCs (best time) 19 | // - ~46x less time than a full quickSort with GCs (average time) 20 | // Therefore: 21 | // - there is a almost no performance gain in executing allocation concurrently to the sort 22 | // - doing so would immensely complicate the algorithm 23 | val ys = new Array[Int](xs.length) 24 | dummy = ys 25 | 26 | // 2) Sort the elements. 27 | // The merge step has to do some copying, and is the main performance bottleneck of the algorithm. 28 | // This is due to the final merge call, which is a completely sequential pass. 29 | def merge(src: Array[Int], dst: Array[Int], from: Int, mid: Int, until: Int) { 30 | var left = from 31 | var right = mid 32 | var i = from 33 | while (left < mid && right < until) { 34 | while (left < mid && src(left) <= src(right)) { 35 | dst(i) = src(left) 36 | i += 1 37 | left += 1 38 | } 39 | while (right < until && src(right) <= src(left)) { 40 | dst(i) = src(right) 41 | i += 1 42 | right += 1 43 | } 44 | } 45 | while (left < mid) { 46 | dst(i) = src(left) 47 | i += 1 48 | left += 1 49 | } 50 | while (right < until) { 51 | dst(i) = src(right) 52 | i += 1 53 | right += 1 54 | } 55 | } 56 | // Without the merge step, the sort phase parallelizes almost linearly. 57 | // This is because the memory pressure is much lower than during copying in the third step. 58 | def sort(from: Int, until: Int, depth: Int): Unit = { 59 | if (depth == maxDepth) { 60 | quickSort(xs, from, until - from) 61 | } else { 62 | val mid = (from + until) / 2 63 | val right = task { 64 | sort(mid, until, depth + 1) 65 | } 66 | sort(from, mid, depth + 1) 67 | right.join() 68 | 69 | val flip = (maxDepth - depth) % 2 == 0 70 | val src = if (flip) ys else xs 71 | val dst = if (flip) xs else ys 72 | merge(src, dst, from, mid, until) 73 | } 74 | } 75 | sort(0, xs.length, 0) 76 | 77 | // 3) In parallel, copy the elements back into the source array. 78 | // Executed sequentially, this step takes: 79 | // - ~23x less time than a full quickSort without GCs (best time) 80 | // - ~16x less time than a full quickSort with GCs (average time) 81 | // There is a small potential gain in parallelizing copying. 82 | // However, most Intel processors have a dual-channel memory controller, 83 | // so parallel copying has very small performance benefits. 84 | def copy(src: Array[Int], target: Array[Int], from: Int, until: Int, depth: Int): Unit = { 85 | if (depth == maxDepth) { 86 | Array.copy(src, from, target, from, until - from) 87 | } else { 88 | val mid = from + ((until - from) / 2) 89 | val right = task { 90 | copy(src, target, mid, until, depth + 1) 91 | } 92 | copy(src, target, from, mid, depth + 1) 93 | right.join() 94 | } 95 | } 96 | if (maxDepth % 2 != 0) { 97 | copy(ys, xs, 0, xs.length, 0) 98 | } 99 | } 100 | 101 | val standardConfig = config( 102 | Key.exec.minWarmupRuns -> 20, 103 | Key.exec.maxWarmupRuns -> 60, 104 | Key.exec.benchRuns -> 60, 105 | Key.verbose -> true 106 | ) withWarmer(new Warmer.Default) 107 | 108 | def initialize(xs: Array[Int]) { 109 | var i = 0 110 | while (i < xs.length) { 111 | xs(i) = i % 100 112 | i += 1 113 | } 114 | } 115 | 116 | def main(args: Array[String]) { 117 | val length = 10000000 118 | val maxDepth = 7 119 | val xs = new Array[Int](length) 120 | val seqtime = standardConfig setUp { 121 | _ => initialize(xs) 122 | } measure { 123 | quickSort(xs, 0, xs.length) 124 | } 125 | println(s"sequential sum time: $seqtime ms") 126 | 127 | val partime = standardConfig setUp { 128 | _ => initialize(xs) 129 | } measure { 130 | parMergeSort(xs, maxDepth) 131 | } 132 | println(s"fork/join time: $partime ms") 133 | println(s"speedup: ${seqtime / partime}") 134 | } 135 | 136 | } 137 | -------------------------------------------------------------------------------- /src/main/scala/lectures/algorithms/package.scala: -------------------------------------------------------------------------------- 1 | package lectures 2 | 3 | package object algorithms { 4 | 5 | implicit class ConcOps[T](val self: Conc[T]) extends AnyVal { 6 | def foreach[U](f: T => U) = Conc.traverse(self, f) 7 | def <>(that: Conc[T]) = Conc.concatTop(self.normalized, that.normalized) 8 | } 9 | 10 | } -------------------------------------------------------------------------------- /src/main/scala/lectures/dataparallelism/Agnostic.scala: -------------------------------------------------------------------------------- 1 | package lectures 2 | package dataparallelism 3 | 4 | import scala.collection._ 5 | import org.scalameter._ 6 | 7 | object Agnostic { 8 | 9 | val standardConfig = config( 10 | Key.exec.minWarmupRuns -> 20, 11 | Key.exec.maxWarmupRuns -> 40, 12 | Key.exec.benchRuns -> 60, 13 | Key.verbose -> true 14 | ) withWarmer(new Warmer.Default) 15 | 16 | val array = (0 until 1000000).toArray 17 | 18 | def largestPalindrome(xs: GenSeq[Int]): Int = { 19 | xs.aggregate(0)( 20 | (largest, n) => if (n > largest && n.toString == n.toString.reverse) n else largest, 21 | math.max 22 | ) 23 | } 24 | 25 | def main(args: Array[String]) { 26 | val seqtime = standardConfig measure { 27 | largestPalindrome(array) 28 | } 29 | println(s"sequential time: $seqtime ms") 30 | 31 | val partime = standardConfig measure { 32 | largestPalindrome(array.par) 33 | } 34 | println(s"parallel time: $partime ms") 35 | println(s"speedup: ${seqtime / partime}") 36 | } 37 | 38 | } 39 | -------------------------------------------------------------------------------- /src/main/scala/lectures/dataparallelism/ArrayInitialization.scala: -------------------------------------------------------------------------------- 1 | package lectures 2 | package dataparallelism 3 | 4 | import org.scalameter._ 5 | 6 | object ArrayInitialization { 7 | 8 | val standardConfig = config( 9 | Key.exec.minWarmupRuns -> 20, 10 | Key.exec.maxWarmupRuns -> 40, 11 | Key.exec.benchRuns -> 60, 12 | Key.verbose -> true 13 | ) withWarmer(new Warmer.Default) 14 | 15 | val array = new Array[Int](100000000) 16 | 17 | def main(args: Array[String]) { 18 | val value = 100 19 | val seqtime = standardConfig measure { 20 | for (i <- 0 until array.length) { 21 | array(i) = value 22 | } 23 | } 24 | println(s"sequential time: $seqtime ms") 25 | 26 | val partime = standardConfig measure { 27 | for (i <- (0 until array.length).par) { 28 | array(i) = value 29 | } 30 | } 31 | println(s"parallel time: $partime ms") 32 | println(s"speedup: ${seqtime / partime}") 33 | } 34 | 35 | } 36 | -------------------------------------------------------------------------------- /src/main/scala/lectures/dataparallelism/CharCount.scala: -------------------------------------------------------------------------------- 1 | package lectures 2 | package dataparallelism 3 | 4 | import org.scalameter._ 5 | 6 | object CharCount { 7 | 8 | val standardConfig = config( 9 | Key.exec.minWarmupRuns -> 20, 10 | Key.exec.maxWarmupRuns -> 40, 11 | Key.exec.benchRuns -> 60, 12 | Key.verbose -> true 13 | ) withWarmer(new Warmer.Default) 14 | 15 | val txt = "A short text..." * 500000 16 | val ps = new ParString(txt) 17 | 18 | def main(args: Array[String]) { 19 | val seqtime = standardConfig measure { 20 | txt.foldLeft(0)((x, y) => x + 1) 21 | } 22 | println(s"sequential time: $seqtime ms") 23 | 24 | val partime = standardConfig measure { 25 | ps.aggregate(0)((x, y) => x + 1, _ + _) 26 | } 27 | println(s"parallel time: $partime ms") 28 | println(s"speedup: ${seqtime / partime}") 29 | } 30 | 31 | } 32 | -------------------------------------------------------------------------------- /src/main/scala/lectures/dataparallelism/Conversion.scala: -------------------------------------------------------------------------------- 1 | package lectures 2 | package dataparallelism 3 | 4 | import scala.collection._ 5 | import org.scalameter._ 6 | 7 | object Conversion { 8 | 9 | val standardConfig = config( 10 | Key.exec.minWarmupRuns -> 10, 11 | Key.exec.maxWarmupRuns -> 20, 12 | Key.exec.benchRuns -> 20, 13 | Key.verbose -> true 14 | ) withWarmer(new Warmer.Default) 15 | 16 | val array = Array.fill(10000000)("") 17 | val list = array.toList 18 | 19 | def main(args: Array[String]) { 20 | val listtime = standardConfig measure { 21 | list.par 22 | } 23 | println(s"list conversion time: $listtime ms") 24 | 25 | val arraytime = standardConfig measure { 26 | array.par 27 | } 28 | println(s"array conversion time: $arraytime ms") 29 | println(s"difference: ${listtime / arraytime}") 30 | } 31 | 32 | } 33 | -------------------------------------------------------------------------------- /src/main/scala/lectures/dataparallelism/GameOfLife.scala: -------------------------------------------------------------------------------- 1 | package lectures 2 | package dataparallelism 3 | 4 | import scala.swing._ 5 | import java.awt.image._ 6 | import collection._ 7 | 8 | trait GameOfLifeUtils { 9 | 10 | def blockSize: Int 11 | 12 | val grid = concurrent.TrieMap[(Int, Int), Block]() 13 | 14 | private def getBlock(g: Int, x: Int, y: Int) = { 15 | val pos = (x / blockSize, y / blockSize); 16 | grid.get(pos) match { 17 | case Some(block) => 18 | block 19 | case None => 20 | grid.put(pos, new Block(pos._1, pos._2, blockSize)) 21 | grid(pos) 22 | } 23 | } 24 | 25 | def update(g: Int, x: Int, y: Int, v: Boolean) { 26 | val block = getBlock(g, x, y) 27 | block.cells((g - 1) % 2)((y % blockSize) * blockSize + (x % blockSize)) = v 28 | block.cells(g % 2)((y % blockSize) * blockSize + (x % blockSize)) = v 29 | } 30 | 31 | def apply(g: Int, x: Int, y: Int) = { 32 | val block = getBlock(g, x, y) 33 | block.cells(g % 2)((y % blockSize) * blockSize + (x % blockSize)) 34 | } 35 | 36 | final class Block(val xp: Int, val yp: Int, val size: Int) { 37 | lazy val cells = Array( 38 | new Array[Boolean](size * size), 39 | new Array[Boolean](size * size) 40 | ) 41 | lazy val emptiness = Array(false, false) 42 | 43 | def isEmpty(generation: Int) = emptiness(generation % 2) 44 | 45 | def apply(generation: Int, x: Int, y: Int) = { 46 | cells(generation % 2)(y * size + x) 47 | } 48 | 49 | def simulate(generation: Int) { 50 | updateCells(generation) 51 | addNeighbours(generation) 52 | checkRemove(generation) 53 | } 54 | 55 | def checkRemove(g: Int) { 56 | def empty(b: Block) = (b eq null) || (b.isEmpty(g - 1)) 57 | if ( 58 | empty(grid.lookup((xp - 1, yp - 1))) && 59 | empty(grid.lookup((xp - 0, yp - 1))) && 60 | empty(grid.lookup((xp + 1, yp - 1))) && 61 | empty(grid.lookup((xp - 1, yp - 0))) && 62 | empty(grid.lookup((xp + 1, yp - 0))) && 63 | empty(grid.lookup((xp - 1, yp + 1))) && 64 | empty(grid.lookup((xp - 0, yp + 1))) && 65 | empty(grid.lookup((xp + 1, yp + 1))) 66 | ) grid.remove((xp, yp)) 67 | } 68 | 69 | def addNeighbours(g: Int) = if (!isEmpty(g)) { 70 | def add(xp: Int, yp: Int) { 71 | val pos = (xp, yp) 72 | if (grid.lookup(pos) eq null) grid.putIfAbsent(pos, new Block(xp, yp, blockSize)) 73 | } 74 | add(xp - 1, yp - 1) 75 | add(xp - 0, yp - 1) 76 | add(xp + 1, yp - 1) 77 | add(xp - 1, yp - 0) 78 | add(xp + 1, yp - 0) 79 | add(xp - 1, yp + 1) 80 | add(xp - 0, yp + 1) 81 | add(xp + 1, yp + 1) 82 | } 83 | 84 | def updateCells(g: Int) { 85 | val lasta = cells((g - 1) % 2) 86 | val curra = cells(g % 2) 87 | @inline def last(x: Int, y: Int) = lasta(y * size + x) 88 | @inline def curr(x: Int, y: Int, v: Boolean) = curra(y * size + x) = v 89 | def countMiddle(x: Int, y: Int) = { 90 | var count = 0 91 | if (last(x - 1, y - 1)) count += 1 92 | if (last(x - 0, y - 1)) count += 1 93 | if (last(x + 1, y - 1)) count += 1 94 | if (last(x - 1, y - 0)) count += 1 95 | if (last(x + 1, y - 0)) count += 1 96 | if (last(x - 1, y + 1)) count += 1 97 | if (last(x - 0, y + 1)) count += 1 98 | if (last(x + 1, y + 1)) count += 1 99 | count 100 | } 101 | def countUp(x: Int, above: Block) = { 102 | var count = 0 103 | if (above(g - 1, x - 1, size - 1)) count += 1 104 | if (above(g - 1, x - 0, size - 1)) count += 1 105 | if (above(g - 1, x + 1, size - 1)) count += 1 106 | if (last(x - 1, 0)) count += 1 107 | if (last(x + 1, 0)) count += 1 108 | if (last(x - 1, 1)) count += 1 109 | if (last(x - 0, 1)) count += 1 110 | if (last(x + 1, 1)) count += 1 111 | count 112 | } 113 | def countDown(x: Int, below: Block) = { 114 | var count = 0 115 | if (last(x - 1, size - 1 - 1)) count += 1 116 | if (last(x - 0, size - 1 - 1)) count += 1 117 | if (last(x + 1, size - 1 - 1)) count += 1 118 | if (last(x - 1, size - 1 - 0)) count += 1 119 | if (last(x + 1, size - 1 - 0)) count += 1 120 | if (below(g - 1, x - 1, 0)) count += 1 121 | if (below(g - 1, x - 0, 0)) count += 1 122 | if (below(g - 1, x + 1, 0)) count += 1 123 | count 124 | } 125 | def countLeft(y: Int, left: Block) = { 126 | var count = 0 127 | if (left(g - 1, size - 1, y - 1)) count += 1 128 | if (last(0, y - 1)) count += 1 129 | if (last(1, y - 1)) count += 1 130 | if (left(g - 1, size - 1, y - 0)) count += 1 131 | if (last(1, y - 0)) count += 1 132 | if (left(g - 1, size - 1, y + 1)) count += 1 133 | if (last(0, y + 1)) count += 1 134 | if (last(1, y + 1)) count += 1 135 | count 136 | } 137 | def countRight(y: Int, right: Block) = { 138 | var count = 0 139 | if (last(size - 1 - 1, y - 1)) count += 1 140 | if (last(size - 1 - 0, y - 1)) count += 1 141 | if (right(g - 1, 0, y - 1)) count += 1 142 | if (last(size - 1 - 1, y - 0)) count += 1 143 | if (right(g - 1, 0, y - 0)) count += 1 144 | if (last(size - 1 - 1, y + 1)) count += 1 145 | if (last(size - 1 - 0, y + 1)) count += 1 146 | if (right(g - 1, 0, y + 1)) count += 1 147 | count 148 | } 149 | def countUpLeft(up: Block, left: Block, upleft: Block) = { 150 | var count = 0 151 | if (upleft(g - 1, size - 1, size - 1)) count += 1 152 | if (up(g - 1, 0, size - 1)) count += 1 153 | if (up(g - 1, 1, size - 1)) count += 1 154 | if (left(g - 1, size - 1, 0)) count += 1 155 | if (last(1, 0)) count += 1 156 | if (left(g - 1, size - 1, 1)) count += 1 157 | if (last(0, 1)) count += 1 158 | if (last(1, 1)) count += 1 159 | count 160 | } 161 | def countUpRight(up: Block, right: Block, upright: Block) = { 162 | var count = 0 163 | if (up(g - 1, size - 1 - 1, size - 1)) count += 1 164 | if (up(g - 1, size - 1 - 0, size - 1)) count += 1 165 | if (upright(g - 1, 0, size - 1)) count += 1 166 | if (last(size - 1 - 1, 0)) count += 1 167 | if (right(g - 1, 0, 0)) count += 1 168 | if (last(size - 1 - 1, 1)) count += 1 169 | if (last(size - 1 - 0, 1)) count += 1 170 | if (right(g - 1, 0, 1)) count += 1 171 | count 172 | } 173 | def countDownLeft(down: Block, left: Block, downleft: Block) = { 174 | var count = 0 175 | if (left(g - 1, size - 1, size - 1 - 1)) count += 1 176 | if (last(0, size - 1 - 1)) count += 1 177 | if (last(1, size - 1 - 1)) count += 1 178 | if (left(g - 1, size - 1, size - 1 - 0)) count += 1 179 | if (last(1, size - 1 - 0)) count += 1 180 | if (downleft(g - 1, size - 1, 0)) count += 1 181 | if (down(g - 1, 0, 0)) count += 1 182 | if (down(g - 1, 1, 0)) count += 1 183 | count 184 | } 185 | def countDownRight(down: Block, right: Block, downright: Block) = { 186 | var count = 0 187 | if (last(size - 1 - 1, size - 1 - 1)) count += 1 188 | if (last(size - 1 - 0, size - 1 - 1)) count += 1 189 | if (right(g - 1, 0, size - 1 - 1)) count += 1 190 | if (last(size - 1 - 1, size - 1 - 0)) count += 1 191 | if (right(g - 1, 0, size - 1 - 0)) count += 1 192 | if (down(g - 1, size - 1 - 1, 0)) count += 1 193 | if (down(g - 1, size - 1 - 0, 0)) count += 1 194 | if (downright(g - 1, 0, 0)) count += 1 195 | count 196 | } 197 | def transition(live: Boolean, count: Int) = 198 | (live && count == 2) || count == 3 199 | var empty = true 200 | 201 | // update middle 202 | var x, y = 1 203 | while (y < size - 1) { 204 | while (x < size - 1) { 205 | val count = countMiddle(x, y) 206 | val newval = transition(last(x, y), count) 207 | curr(x, y, newval) 208 | if (newval) empty = false 209 | x += 1 210 | } 211 | x = 1 212 | y += 1 213 | } 214 | 215 | // fetch neighbours 216 | val upleft = grid.getOrElse((xp - 1, yp - 1), emptyBlock) 217 | val up = grid.getOrElse((xp - 0, yp - 1), emptyBlock) 218 | val upright = grid.getOrElse((xp + 1, yp - 1), emptyBlock) 219 | val left = grid.getOrElse((xp - 1, yp - 0), emptyBlock) 220 | val right = grid.getOrElse((xp + 1, yp - 0), emptyBlock) 221 | val downleft = grid.getOrElse((xp - 1, yp + 1), emptyBlock) 222 | val down = grid.getOrElse((xp - 0, yp + 1), emptyBlock) 223 | val downright = grid.getOrElse((xp + 1, yp + 1), emptyBlock) 224 | 225 | // update top border 226 | x = 1 227 | while (x < size - 1) { 228 | val count = countUp(x, up) 229 | val newval = transition(last(x, 0), count) 230 | curr(x, 0, newval) 231 | if (newval) empty = false 232 | x += 1 233 | } 234 | 235 | // update bottom border 236 | x = 1 237 | while (x < size - 1) { 238 | val count = countDown(x, down) 239 | val newval = transition(last(x, size - 1), count) 240 | curr(x, size - 1, newval) 241 | if (newval) empty = false 242 | x += 1 243 | } 244 | 245 | // update left border 246 | y = 1 247 | while (y < size - 1) { 248 | val count = countLeft(y, left) 249 | val newval = transition(last(0, y), count) 250 | curr(0, y, newval) 251 | if (newval) empty = false 252 | y += 1 253 | } 254 | 255 | // update right border 256 | y = 1 257 | while (y < size - 1) { 258 | val count = countRight(y, right) 259 | val newval = transition(last(size - 1, y), count) 260 | curr(size - 1, y, newval) 261 | if (newval) empty = false 262 | y += 1 263 | } 264 | 265 | // update corners 266 | { 267 | val count = countUpLeft(up, left, upleft) 268 | val newval = transition(last(0, 0), count) 269 | curr(0, 0, newval) 270 | if (newval) empty = false 271 | } 272 | { 273 | val count = countUpRight(up, right, upright) 274 | val newval = transition(last(size - 1, 0), count) 275 | curr(size - 1, 0, newval) 276 | if (newval) empty = false 277 | } 278 | { 279 | val count = countDownLeft(down, left, downleft) 280 | val newval = transition(last(0, size - 1), count) 281 | curr(0, size - 1, newval) 282 | if (newval) empty = false 283 | } 284 | { 285 | val count = countDownRight(down, right, downright) 286 | val newval = transition(last(size - 1, size - 1), count) 287 | curr(size - 1, size - 1, newval) 288 | if (newval) empty = false 289 | } 290 | 291 | // set emptiness 292 | emptiness(g % 2) = empty 293 | } 294 | } 295 | 296 | val emptyBlock = new Block(-1, -1, blockSize) 297 | 298 | /* some custom configurations */ 299 | 300 | def initBars(sidelength: Int) { 301 | grid.clear() 302 | for (y <- 0 until sidelength) { 303 | for (x <- 0 until sidelength by 2) this(1, x, y) = true 304 | } 305 | } 306 | 307 | } 308 | 309 | 310 | object GameOfLifeDemo extends GameOfLifeUtils { 311 | /* config */ 312 | 313 | lazy val blockSize = sys.props.getOrElse("blocksize", "10").toInt 314 | 315 | val sidelength = sys.props.getOrElse("sidelength", "800").toInt 316 | 317 | val parlevel = sys.props.getOrElse("parlevel", "4").toInt 318 | 319 | val refreshrate = sys.props.getOrElse("refreshrate", "10").toInt 320 | 321 | val tasksupport = new parallel.ForkJoinTaskSupport(new scala.concurrent.forkjoin.ForkJoinPool(parlevel)) 322 | 323 | val width = 1440 324 | 325 | val height = 960 326 | 327 | var buffer = new BufferedImage(width, height, BufferedImage.TYPE_4BYTE_ABGR) 328 | 329 | val display = new Display 330 | 331 | class Display extends Component { 332 | override def paintComponent(g: Graphics2D) { 333 | super.paintComponent(g) 334 | g.setRenderingHint(java.awt.RenderingHints.KEY_ANTIALIASING, 335 | java.awt.RenderingHints.VALUE_ANTIALIAS_ON) 336 | 337 | buffer.synchronized { 338 | g.drawImage(buffer, 0, 0, width, height, 0, 0, width, height, null, null) 339 | } 340 | } 341 | } 342 | 343 | val frame = new Frame { 344 | title = "Game of Life" 345 | contents = display 346 | display.requestFocus() 347 | } 348 | 349 | def main(args: Array[String]) { 350 | initBars(sidelength) 351 | frame.size = new Dimension(width, height) 352 | frame.peer.setDefaultCloseOperation(javax.swing.JFrame.EXIT_ON_CLOSE) 353 | frame.open() 354 | simulator.start() 355 | } 356 | 357 | val simulator = new Thread { 358 | val raster = new Array[Int](width * height) 359 | var lasttick = System.nanoTime() 360 | 361 | setDaemon(true) 362 | 363 | override def run() { 364 | var g = 1 365 | val grid = GameOfLifeDemo.this.grid.par 366 | grid.tasksupport = tasksupport 367 | while (true) { 368 | if ((g - 1) % refreshrate == 0) refresh(g, grid) 369 | for ((pos, block) <- grid) block.simulate(g) 370 | g += 1 371 | } 372 | } 373 | 374 | def refresh(gen: Int, grid: parallel.mutable.ParTrieMap[(Int, Int), Block]) = { 375 | var x, y = 0 376 | while (y < height) { 377 | while (x < width) { 378 | raster(y * width + x) = 0xffffffff 379 | x += 1 380 | } 381 | x = 0 382 | y += 1 383 | } 384 | 385 | for ((pos, block) <- grid) { 386 | val xoff = pos._1 * blockSize 387 | val yoff = pos._2 * blockSize 388 | val arr = block.cells(gen % 2) 389 | var i, j = 0 390 | while (j < blockSize) { 391 | while (i < blockSize) { 392 | val col = if (arr(j * blockSize + i)) 0xff005599 else 0xffcccccc 393 | val absx = xoff + i 394 | val absy = yoff + j 395 | if (absx >= 0 && absx < width && absy >= 0 && absy < height) raster(absy * width + absx) = col 396 | i += 1 397 | } 398 | i = 0 399 | j += 1 400 | } 401 | } 402 | 403 | buffer.setRGB(0, 0, width, height, raster, 0, width) 404 | val g = buffer.getGraphics.asInstanceOf[java.awt.Graphics2D] 405 | g.setRenderingHint(java.awt.RenderingHints.KEY_ANTIALIASING, java.awt.RenderingHints.VALUE_ANTIALIAS_ON) 406 | g.setColor(java.awt.Color.BLACK) 407 | g.drawString("Gen: " + gen.toString, 1250, 80) 408 | g.drawString("FPS: " + fps().toString, 1250, 95) 409 | 410 | display.repaint() 411 | } 412 | 413 | def fps() = { 414 | val tick = System.nanoTime() 415 | val diff = tick - lasttick 416 | lasttick = tick 417 | (1000000000.0 / diff * refreshrate).toInt 418 | } 419 | } 420 | 421 | } 422 | -------------------------------------------------------------------------------- /src/main/scala/lectures/dataparallelism/IntersectionCorrect.scala: -------------------------------------------------------------------------------- 1 | package lectures 2 | package dataparallelism 3 | 4 | import scala.collection._ 5 | import org.scalameter._ 6 | 7 | object IntersectionCorrect { 8 | 9 | def main(args: Array[String]) { 10 | def intersection(a: GenSet[Int], b: GenSet[Int]): GenSet[Int] = { 11 | if (a.size < b.size) a.filter(b(_)) 12 | else b.filter(a(_)) 13 | } 14 | val seqres = intersection((0 until 1000).toSet, (0 until 1000 by 4).toSet) 15 | val parres = intersection((0 until 1000).par.toSet, (0 until 1000 by 4).par.toSet) 16 | log(s"Sequential result - ${seqres.size}") 17 | log(s"Parallel result - ${parres.size}") 18 | } 19 | 20 | } 21 | -------------------------------------------------------------------------------- /src/main/scala/lectures/dataparallelism/IntersectionSynchronized.scala: -------------------------------------------------------------------------------- 1 | package lectures 2 | package dataparallelism 3 | 4 | import scala.collection._ 5 | import scala.collection.convert.wrapAsScala._ 6 | import java.util.concurrent._ 7 | import org.scalameter._ 8 | 9 | object IntersectionSynchronized { 10 | 11 | def main(args: Array[String]) { 12 | def intersection(a: GenSet[Int], b: GenSet[Int]) = { 13 | val result = new ConcurrentSkipListSet[Int]() 14 | for (x <- a) if (b contains x) result += x 15 | result 16 | } 17 | val seqres = intersection((0 until 1000).toSet, (0 until 1000 by 4).toSet) 18 | val parres = intersection((0 until 1000).par.toSet, (0 until 1000 by 4).par.toSet) 19 | log(s"Sequential result - ${seqres.size}") 20 | log(s"Parallel result - ${parres.size}") 21 | } 22 | 23 | } 24 | -------------------------------------------------------------------------------- /src/main/scala/lectures/dataparallelism/IntersectionWrong.scala: -------------------------------------------------------------------------------- 1 | package lectures 2 | package dataparallelism 3 | 4 | import scala.collection._ 5 | import org.scalameter._ 6 | 7 | object IntersectionWrong { 8 | 9 | def main(args: Array[String]) { 10 | def intersection(a: GenSet[Int], b: GenSet[Int]): Set[Int] = { 11 | val result = mutable.Set[Int]() 12 | for (x <- a) if (b contains x) result += x 13 | result 14 | } 15 | val seqres = intersection((0 until 1000).toSet, (0 until 1000 by 4).toSet) 16 | val parres = intersection((0 until 1000).par.toSet, (0 until 1000 by 4).par.toSet) 17 | log(s"Sequential result - ${seqres.size}") 18 | log(s"Parallel result - ${parres.size}") 19 | } 20 | 21 | } 22 | -------------------------------------------------------------------------------- /src/main/scala/lectures/dataparallelism/LargestPalindromeProduct.scala: -------------------------------------------------------------------------------- 1 | package lectures 2 | package dataparallelism 3 | 4 | import org.scalameter._ 5 | 6 | object LargestPalindromeProduct { 7 | 8 | val standardConfig = config( 9 | Key.exec.minWarmupRuns -> 20, 10 | Key.exec.maxWarmupRuns -> 40, 11 | Key.exec.benchRuns -> 60, 12 | Key.verbose -> true 13 | ) withWarmer(new Warmer.Default) 14 | 15 | def main(args: Array[String]) { 16 | val value = 100 17 | val seqtime = standardConfig measure { 18 | (100 to 999).flatMap(i => (i to 999).map(i * _)) 19 | .filter(n => n.toString == n.toString.reverse).max 20 | } 21 | println(s"sequential time: $seqtime ms") 22 | 23 | val partime = standardConfig measure { 24 | (100 to 999).par.flatMap(i => (i to 999).map(i * _)) 25 | .filter(n => n.toString == n.toString.reverse).max 26 | } 27 | println(s"parallel time: $partime ms") 28 | println(s"speedup: ${seqtime / partime}") 29 | } 30 | 31 | } 32 | -------------------------------------------------------------------------------- /src/main/scala/lectures/dataparallelism/Mandelbrot.scala: -------------------------------------------------------------------------------- 1 | package lectures 2 | package dataparallelism 3 | 4 | import java.awt._ 5 | import java.awt.event._ 6 | import javax.swing._ 7 | import javax.swing.event._ 8 | import scala.collection.parallel._ 9 | import scala.collection.par._ 10 | import org.scalameter._ 11 | import common._ 12 | 13 | object Mandelbrot { 14 | 15 | try { 16 | UIManager.setLookAndFeel(UIManager.getSystemLookAndFeelClassName()) 17 | } catch { 18 | case _: Exception => println("Cannot set look and feel.") 19 | } 20 | 21 | private def compute(xc: Double, yc: Double, threshold: Int): Int = { 22 | var i = 0 23 | var x = 0.0 24 | var y = 0.0 25 | while (x * x + y * y < 2 && i < threshold) { 26 | val xt = x * x - y * y + xc 27 | val yt = 2 * x * y + yc 28 | 29 | x = xt 30 | y = yt 31 | 32 | i += 1 33 | } 34 | i 35 | } 36 | 37 | class MandelCanvas(frame: MandelFrame) extends JComponent { 38 | val pixels = new Array[Int](4000 * 4000) 39 | var xoff = -0.9572428 40 | var yoff = -0.2956327 41 | var xlast = -1 42 | var ylast = -1 43 | 44 | def parallelism = { 45 | val selidx = frame.parcombo.getSelectedIndex 46 | frame.parcombo.getItemAt(selidx).toInt 47 | } 48 | 49 | def threshold = frame.threshold.getText.toInt 50 | 51 | def zoom = frame.zoomlevel.getValue.asInstanceOf[Int] / 10.0 * 500.0 52 | 53 | def xlo = xoff - getWidth / zoom 54 | 55 | def ylo = yoff - getHeight / zoom 56 | 57 | def xhi = xoff + getWidth / zoom 58 | 59 | def yhi = yoff + getHeight / zoom 60 | 61 | addMouseMotionListener(new MouseMotionAdapter { 62 | override def mouseDragged(e: MouseEvent) { 63 | val xcurr = e.getX 64 | val ycurr = e.getY 65 | if (xlast != -1) { 66 | val xd = xcurr - xlast 67 | val yd = ycurr - ylast 68 | xoff -= xd / zoom 69 | yoff -= yd / zoom 70 | } 71 | xlast = xcurr 72 | ylast = ycurr 73 | repaint() 74 | } 75 | }) 76 | 77 | addMouseListener(new MouseAdapter { 78 | override def mousePressed(e: MouseEvent) { 79 | xlast = -1 80 | ylast = -1 81 | } 82 | }) 83 | 84 | addMouseWheelListener(new MouseAdapter { 85 | override def mouseWheelMoved(e: MouseWheelEvent) { 86 | val prev = frame.zoomlevel.getValue.asInstanceOf[Int] 87 | val next = prev + (prev * -0.1 * e.getWheelRotation - e.getWheelRotation) 88 | frame.zoomlevel.setValue(math.max(1, next.toInt)) 89 | } 90 | }) 91 | 92 | private def fill(pixels: Array[Int], wdt: Int, hgt: Int) { 93 | val selected = frame.implcombo.getSelectedItem 94 | 95 | println("xlo: " +xlo) 96 | println("ylo: " +ylo) 97 | println("xhi: " +xhi) 98 | println("yhi: " +yhi) 99 | println("wdt: " +wdt) 100 | println("hgt: " +hgt) 101 | 102 | selected match { 103 | case "Reduction tree" => 104 | fillReduction(pixels, wdt, hgt) 105 | case "Parallel collections" => 106 | fillClassic(pixels, wdt, hgt) 107 | case "Workstealing tree" => 108 | fillWsTree(pixels, wdt, hgt) 109 | } 110 | } 111 | 112 | private def fillReduction(pixels: Array[Int], wdt: Int, hgt: Int) { 113 | def renderPixel(idx: Int) { 114 | val x = idx % wdt 115 | val y = idx / wdt 116 | val xc = xlo + (xhi - xlo) * x / wdt 117 | val yc = ylo + (yhi - ylo) * y / hgt 118 | val iters = compute(xc, yc, threshold) 119 | val a = 255 << 24 120 | val r = math.min(255, 1.0 * iters / threshold * 255).toInt << 16 121 | val g = math.min(255, 2.0 * iters / threshold * 255).toInt << 8 122 | val b = math.min(255, 3.0 * iters / threshold * 255).toInt << 0 123 | pixels(idx) = a | r | g | b 124 | } 125 | 126 | def render(from: Int, end: Int): Unit = { 127 | for (idx <- from until end) { 128 | renderPixel(idx) 129 | } 130 | } 131 | 132 | def parRender(from: Int, end: Int, threshold: Int): Unit = { 133 | if (end - from <= threshold) { 134 | render(from, end) 135 | } else { 136 | val mid = (from + end) / 2 137 | parallel(parRender(from, mid, threshold), parRender(mid, end, threshold)) 138 | } 139 | } 140 | 141 | parRender(0, wdt * hgt, 40000) 142 | } 143 | 144 | private def fillWsTree(pixels: Array[Int], wdt: Int, hgt: Int) { 145 | val range = 0 until (wdt * hgt) 146 | val conf = new Scheduler.Config.Default(parallelism) 147 | implicit val s = new Scheduler.ForkJoin(conf) 148 | 149 | for (idx <- range.toPar) { 150 | val x = idx % wdt 151 | val y = idx / wdt 152 | val xc = xlo + (xhi - xlo) * x / wdt 153 | val yc = ylo + (yhi - ylo) * y / hgt 154 | 155 | val iters = compute(xc, yc, threshold) 156 | val a = 255 << 24 157 | val r = math.min(255, 1.0 * iters / threshold * 255).toInt << 16 158 | val g = math.min(255, 2.0 * iters / threshold * 255).toInt << 8 159 | val b = math.min(255, 3.0 * iters / threshold * 255).toInt << 0 160 | pixels(idx) = a | r | g | b 161 | } 162 | 163 | s.pool.shutdown() 164 | } 165 | 166 | private def fillClassic(pixels: Array[Int], wdt: Int, hgt: Int) { 167 | val fj = new collection.parallel.ForkJoinTaskSupport(new scala.concurrent.forkjoin.ForkJoinPool(parallelism)) 168 | val range = 0 until (wdt * hgt) 169 | val pr = range.par 170 | pr.tasksupport = fj 171 | println("xlo: " +xlo) 172 | println("ylo: " +ylo) 173 | println("xhi: " +xhi) 174 | println("yhi: " +yhi) 175 | println("wdt: " +wdt) 176 | println("hgt: " +hgt) 177 | 178 | for (idx <- pr) { 179 | val x = idx % wdt 180 | val y = idx / wdt 181 | val xc = xlo + (xhi - xlo) * x / wdt 182 | val yc = ylo + (yhi - ylo) * y / hgt 183 | 184 | val iters = compute(xc, yc, threshold) 185 | val a = 255 << 24 186 | val r = math.min(255, 1.0 * iters / threshold * 255).toInt << 16 187 | val g = math.min(255, 2.0 * iters / threshold * 255).toInt << 8 188 | val b = math.min(255, 3.0 * iters / threshold * 255).toInt << 0 189 | pixels(idx) = a | r | g | b 190 | } 191 | 192 | fj.environment.shutdown() 193 | } 194 | 195 | override def paintComponent(g: Graphics) { 196 | super.paintComponent(g) 197 | 198 | val time = measure { 199 | fill(pixels, getWidth, getHeight) 200 | } 201 | val stats = "size: " + getWidth + "x" + getHeight + ", parallelism: " + parallelism + ", time: " + time + " ms" + ", bounds=(" + xoff + ", " + yoff + ")" 202 | println("Rendering: " + stats) 203 | frame.setTitle("Mandelbrot: " + stats) 204 | 205 | val img = new image.BufferedImage(getWidth, getHeight, image.BufferedImage.TYPE_INT_ARGB) 206 | for (x <- 0 until getWidth; y <- 0 until getHeight) { 207 | val color = pixels(y * getWidth + x) 208 | img.setRGB(x, y, color) 209 | } 210 | g.drawImage(img, 0, 0, null) 211 | //javax.imageio.ImageIO.write(img, "png", new java.io.File("mandelbrot.png")) 212 | } 213 | } 214 | 215 | class MandelFrame extends JFrame("Mandelbrot") { 216 | setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE) 217 | setSize(1024, 600) 218 | setLayout(new BorderLayout) 219 | val canvas = new MandelCanvas(this) 220 | add(canvas, BorderLayout.CENTER) 221 | val right = new JPanel 222 | right.setBorder(BorderFactory.createEtchedBorder(border.EtchedBorder.LOWERED)) 223 | right.setLayout(new BorderLayout) 224 | val panel = new JPanel 225 | panel.setLayout(new GridLayout(0, 1)) 226 | val controls = new JPanel 227 | controls.setLayout(new GridLayout(0, 2)) 228 | controls.add(new JLabel("Implementation")) 229 | val implcombo = new JComboBox[String](Array("Reduction tree", "Parallel collections", "Workstealing tree")) 230 | implcombo.addActionListener(new ActionListener { 231 | def actionPerformed(e: ActionEvent) { 232 | canvas.repaint() 233 | } 234 | }) 235 | controls.add(implcombo) 236 | controls.add(new JLabel("Parallelism")) 237 | val items = 1 to Runtime.getRuntime.availableProcessors map { _.toString } toArray 238 | val parcombo = new JComboBox[String](items) 239 | parcombo.setSelectedIndex(items.length - 1) 240 | parcombo.addActionListener(new ActionListener { 241 | def actionPerformed(e: ActionEvent) { 242 | canvas.repaint() 243 | } 244 | }) 245 | controls.add(parcombo) 246 | controls.add(new JLabel("Zoom")) 247 | val zoomlevel = new JSpinner 248 | zoomlevel.setValue(157) 249 | zoomlevel.addChangeListener(new ChangeListener { 250 | def stateChanged(e: ChangeEvent) { 251 | canvas.repaint() 252 | } 253 | }) 254 | controls.add(zoomlevel) 255 | controls.add(new JLabel("Threshold")) 256 | val threshold = new JTextField("2000") 257 | threshold.addActionListener(new ActionListener { 258 | def actionPerformed(e: ActionEvent) { 259 | canvas.repaint() 260 | } 261 | }) 262 | controls.add(threshold) 263 | panel.add(controls) 264 | panel.add(new JLabel("Drag canvas to scroll, move wheel to zoom.")) 265 | val renderbutton = new JButton("Render") 266 | renderbutton.addActionListener(new ActionListener { 267 | def actionPerformed(e: ActionEvent) { 268 | canvas.repaint() 269 | } 270 | }) 271 | panel.add(renderbutton) 272 | right.add(panel, BorderLayout.NORTH) 273 | add(right, BorderLayout.EAST) 274 | setVisible(true) 275 | } 276 | 277 | def main(args: Array[String]) { 278 | val frame = new MandelFrame 279 | } 280 | 281 | } 282 | -------------------------------------------------------------------------------- /src/main/scala/lectures/dataparallelism/ParallelGraphContraction.scala: -------------------------------------------------------------------------------- 1 | package lectures 2 | package dataparallelism 3 | 4 | import scala.collection._ 5 | 6 | object ParallelGraphContraction { 7 | 8 | def main(args: Array[String]) { 9 | val graph = mutable.Map[Int, Int]() ++= (0 until 100000).map(i => (i, i + 1)) 10 | graph(graph.size - 1) = 0 11 | for ((k, v) <- graph.par) graph(k) = graph(v) 12 | val violation = graph.find({ case (i, v) => v != (i + 2) % graph.size }) 13 | println(s"violation: $violation") 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /src/main/scala/lectures/dataparallelism/ParallelMutation.scala: -------------------------------------------------------------------------------- 1 | package lectures 2 | package dataparallelism 3 | 4 | import org.scalameter._ 5 | import scala.collection._ 6 | 7 | object ParallelMutation { 8 | 9 | def main(args: Array[String]) { 10 | val array = Array.fill(10000000)("") 11 | val (result, _) = common.parallel( 12 | array.par.count(_ == ""), 13 | for (i <- (0 until 10000000).par) array(i) = "modified" 14 | ) 15 | println(s"result: $result") 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /src/main/scala/lectures/dataparallelism/ParallelRegexSearch.scala: -------------------------------------------------------------------------------- 1 | package lectures 2 | package dataparallelism 3 | 4 | import org.scalameter._ 5 | import scala.concurrent._ 6 | import scala.concurrent.ExecutionContext.Implicits.global 7 | import scala.concurrent.duration._ 8 | import scala.collection._ 9 | import scala.io.Source 10 | 11 | object ParallelRegexSearch { 12 | 13 | val standardConfig = config( 14 | Key.exec.minWarmupRuns -> 30, 15 | Key.exec.maxWarmupRuns -> 60, 16 | Key.exec.benchRuns -> 120, 17 | Key.verbose -> true 18 | ) withWarmer(new Warmer.Default) 19 | 20 | def getHtmlSpec() = Future { 21 | val specSrc: Source = Source.fromURL("http://www.w3.org/MarkUp/html-spec/html-spec.txt") 22 | try specSrc.getLines.toArray finally specSrc.close() 23 | } 24 | 25 | def main(args: Array[String]) { 26 | val measurements = for (specDoc <- getHtmlSpec()) yield { 27 | println(s"Download complete!") 28 | 29 | def search(d: GenSeq[String]) = standardConfig measure { 30 | d.indexWhere(line => line.matches(".*TEXTAREA.*")) 31 | } 32 | 33 | val seqtime = search(specDoc) 34 | val partime = search(specDoc.par) 35 | 36 | (seqtime, partime) 37 | } 38 | println("Fetching HTML specification, searching for TEXTAREA.") 39 | val (seqtime, partime) = Await.result(measurements, Duration.Inf) 40 | println(s"Sequential time $seqtime ms") 41 | println(s"Parallel time $partime ms") 42 | println(s"speedup: ${seqtime / partime}") 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /src/main/scala/lectures/dataparallelism/ParallelTrieMapGraphContraction.scala: -------------------------------------------------------------------------------- 1 | package lectures 2 | package dataparallelism 3 | 4 | import scala.collection._ 5 | 6 | object ParallelTrieMapGraphContraction { 7 | 8 | def main(args: Array[String]) { 9 | val graph = concurrent.TrieMap[Int, Int]() ++= (0 until 100000).map(i => (i, i + 1)) 10 | graph(graph.size - 1) = 0 11 | val previous = graph.snapshot() 12 | for ((k, v) <- graph.par) graph(k) = previous(v) 13 | val violation = graph.find({ case (i, v) => v != (i + 2) % graph.size }) 14 | println(s"violation: $violation") 15 | } 16 | 17 | } 18 | -------------------------------------------------------------------------------- /src/main/scala/lectures/dataparallelism/WordCount.scala: -------------------------------------------------------------------------------- 1 | package lectures 2 | package dataparallelism 3 | 4 | import org.scalameter._ 5 | 6 | object WordCount { 7 | 8 | val standardConfig = config( 9 | Key.exec.minWarmupRuns -> 50, 10 | Key.exec.maxWarmupRuns -> 100, 11 | Key.exec.benchRuns -> 40, 12 | Key.verbose -> true 13 | ) withWarmer(new Warmer.Default) 14 | 15 | val txt = "A short text... " * 250000 16 | val ps = new ParString(txt) 17 | 18 | def main(args: Array[String]) { 19 | val seqtime = standardConfig measure { 20 | txt.foldLeft((0, true)) { 21 | case ((wc, _), ' ') => (wc, true) 22 | case ((wc, true), x) => (wc + 1, false) 23 | case ((wc, false), x) => (wc, false) 24 | } 25 | } 26 | println(s"sequential time: $seqtime ms") 27 | 28 | val partime = standardConfig measure { 29 | ps.aggregate((0, 0, 0))({ (x, y) => 30 | if (x._2 > 0) { 31 | if (y != ' ') x match { 32 | case (ls, wc, 0) => (ls, wc, 0) 33 | case (ls, wc, rs) => (ls, wc + 1, 0) 34 | } else x match { 35 | case (ls, wc, rs) => (ls, wc, rs + 1) 36 | } 37 | } else { 38 | if (y != ' ') x match { 39 | case (ls, 0, _) => (ls + 1, 0, ls + 1) 40 | } else x match { 41 | case (ls, 0, _) => (ls + 1, 1, 0) 42 | } 43 | } 44 | }, { 45 | case ((0, 0, 0), res) => res 46 | case (res, (0, 0, 0)) => res 47 | case ((lls, lwc, 0), (0, rwc, rrs)) => (lls, lwc + rwc - 1, rrs) 48 | case ((lls, lwc, _), (_, rwc, rrs)) => (lls, lwc + rwc, rrs) 49 | }) 50 | } 51 | println(s"parallel time: $partime ms") 52 | println(s"speedup: ${seqtime / partime}") 53 | } 54 | 55 | } 56 | -------------------------------------------------------------------------------- /src/main/scala/lectures/dataparallelism/package.scala: -------------------------------------------------------------------------------- 1 | package lectures 2 | 3 | import scala.collection.parallel._ 4 | import scala.collection.mutable.ArrayBuffer 5 | 6 | package object dataparallelism { 7 | 8 | class ParString(val str: String) 9 | extends immutable.ParSeq[Char] { 10 | 11 | def apply(i: Int) = str.charAt(i) 12 | 13 | def length = str.length 14 | 15 | def seq = new collection.immutable.WrappedString(str) 16 | 17 | def splitter = new ParStringSplitter(str, 0, str.length) 18 | 19 | class ParStringSplitter(private var s: String, private var i: Int, private val ntl: Int) 20 | extends SeqSplitter[Char] { 21 | final def hasNext = i < ntl 22 | final def next = { 23 | val r = s.charAt(i) 24 | i += 1 25 | r 26 | } 27 | def remaining = ntl - i 28 | def dup = new ParStringSplitter(s, i, ntl) 29 | def split = { 30 | val rem = remaining 31 | if (rem >= 2) psplit(rem / 2, rem - rem / 2) 32 | else Seq(this) 33 | } 34 | def psplit(sizes: Int*): Seq[SeqSplitter[Char]] = { 35 | val splitted = new ArrayBuffer[ParStringSplitter] 36 | for (sz <- sizes) { 37 | val next = (i + sz) min ntl 38 | splitted += new ParStringSplitter(s, i, next) 39 | i = next 40 | } 41 | splitted 42 | } 43 | } 44 | 45 | } 46 | 47 | } 48 | -------------------------------------------------------------------------------- /src/main/scala/lectures/examples/BruteForceCollatzSequence.scala: -------------------------------------------------------------------------------- 1 | package lectures 2 | package examples 3 | 4 | import org.scalameter._ 5 | import common._ 6 | 7 | /** An example of a trivially parallelizable brute force solution. 8 | * The nice thing about this example is that computing the collatz sequence 9 | * does not require any memory access, so the memory bandwidth is not a bottleneck. 10 | * Here we can really see the benefits of a quad-core with hyperthreading. 11 | */ 12 | object BruteForceCollatzSequence { 13 | 14 | @volatile var dummy = 0 15 | 16 | val standardConfig = config( 17 | Key.exec.minWarmupRuns -> 30, 18 | Key.exec.maxWarmupRuns -> 60, 19 | Key.exec.benchRuns -> 60, 20 | Key.verbose -> true 21 | ) withWarmer(new Warmer.Default) 22 | 23 | def collatz(number: Int): Int = { 24 | var length = 1 25 | var n = number 26 | while (n != 1) { 27 | if (n % 2 == 0) n = n / 2 28 | else n = 3 * n + 1 29 | length += 1 30 | } 31 | length 32 | } 33 | 34 | def longestCollatz(from: Int, until: Int): Int = { 35 | (from until until).maxBy(collatz) 36 | } 37 | 38 | def fjLongestCollatz(from: Int, until: Int, threshold: Int): Int = { 39 | if (until - from < threshold) { 40 | longestCollatz(from, until) 41 | } else { 42 | val mid = (from + until) / 2 43 | val (leftLongest,rightLongest) = 44 | parallel(fjLongestCollatz(from, mid, threshold), 45 | fjLongestCollatz(mid, until, threshold)) 46 | math.max(leftLongest, rightLongest) 47 | } 48 | } 49 | 50 | def main(args: Array[String]) { 51 | val until = 100000 52 | val threshold = 100 53 | val seqtime = standardConfig measure { 54 | dummy = longestCollatz(1, until) 55 | } 56 | println(s"sequential sum time: $seqtime ms") 57 | 58 | val fjtime = standardConfig measure { 59 | fjLongestCollatz(1, until, threshold) 60 | } 61 | println(s"fork/join time: $fjtime ms") 62 | println(s"speedup: ${seqtime / fjtime}") 63 | } 64 | 65 | } 66 | -------------------------------------------------------------------------------- /src/main/scala/lectures/examples/DynamicProgrammingCollatzSequence.scala: -------------------------------------------------------------------------------- 1 | package lectures 2 | package examples 3 | 4 | import java.util.concurrent.atomic._ 5 | import org.scalameter._ 6 | import common._ 7 | 8 | /** An example of a trivially parallelizable brute force solution. 9 | * The nice thing about this example is that computing the collatz sequence 10 | * does not require any memory access, so the memory bandwidth is not a bottleneck. 11 | * Here we can really see the benefits of a quad-core with hyperthreading. 12 | */ 13 | object DynamicProgrammingCollatzSequence { 14 | 15 | @volatile var dummy = 0 16 | 17 | val standardConfig = config( 18 | Key.exec.minWarmupRuns -> 50, 19 | Key.exec.maxWarmupRuns -> 100, 20 | Key.exec.benchRuns -> 60, 21 | Key.verbose -> true 22 | ) withWarmer(new Warmer.Default) 23 | 24 | val maxSize = 500000000 25 | 26 | val table = new AtomicIntegerArray(maxSize) 27 | table.set(1, 1) 28 | 29 | def collatz(number: Int): Int = { 30 | var length = 1 31 | var n = number 32 | var prev = n 33 | while (n != 1) { 34 | if (n % 2 == 0) n = n / 2 35 | else n = 3 * n + 1 36 | 37 | if (n >= table.length || table.get(n) == 0) { 38 | length += 1 39 | prev = n 40 | } else { 41 | length += table.get(n) 42 | n = 1 43 | } 44 | } 45 | table.set(number, length) 46 | length 47 | } 48 | 49 | def longestCollatz(from: Int, until: Int): Int = { 50 | (from until until).maxBy(collatz) 51 | } 52 | 53 | def fjLongestCollatz(from: Int, until: Int, threshold: Int): Int = { 54 | if (until - from < threshold) { 55 | longestCollatz(from, until) 56 | } else { 57 | val mid = (from + until) / 2 58 | val right = task { 59 | fjLongestCollatz(mid, until, threshold) 60 | } 61 | val leftLongest = fjLongestCollatz(from, mid, threshold) 62 | val rightLongest = right.join() 63 | math.max(leftLongest, rightLongest) 64 | } 65 | } 66 | 67 | def main(args: Array[String]) { 68 | val until = 100000 69 | val threshold = 100 70 | val seqtime = standardConfig measure { 71 | dummy = longestCollatz(1, until) 72 | } 73 | println(s"sequential sum time: $seqtime ms") 74 | 75 | val fjtime = standardConfig measure { 76 | fjLongestCollatz(1, until, threshold) 77 | } 78 | println(s"fork/join time: $fjtime ms") 79 | println(s"speedup: ${seqtime / fjtime}") 80 | } 81 | 82 | } 83 | -------------------------------------------------------------------------------- /src/main/scala/lectures/introduction/FourBatchArrayNorm.scala: -------------------------------------------------------------------------------- 1 | package lectures 2 | package introduction 3 | 4 | import org.scalameter._ 5 | import common._ 6 | 7 | object FourBatchArrayNorm { 8 | @volatile var dummy: Int = 0 9 | @volatile var dummy2: Int = 0 10 | 11 | val logE = math.log(math.E) 12 | 13 | def power(x: Int, p: Double): Int = { 14 | math.exp(p * math.log(x) / logE).toInt // TODO <-- make everything doubles 15 | } 16 | 17 | def sumSegment(xs: Array[Int], p: Double, from: Int, until: Int): Int = { 18 | var i = from 19 | var s = 0 20 | while (i < until) { 21 | s += power(xs(i), p) 22 | i += 1 23 | } 24 | s 25 | } 26 | 27 | def normSum(xs: Array[Int], p: Double): Int = 28 | power(sumSegment(xs, p, 0, xs.size), 1.0 / p) 29 | 30 | def fjNormSum(xs: Array[Int], p: Double, threshold: Int): Int = { 31 | val ((s1, s2), (s3, s4)) = parallel( 32 | parallel( 33 | sumSegment(xs, p, 0, xs.length / 4), 34 | sumSegment(xs, p, xs.length / 4, 2 * xs.length / 4) 35 | ), 36 | parallel( 37 | sumSegment(xs, p, 2 * xs.length / 4, 3 * xs.length / 4), 38 | sumSegment(xs, p, 3 * xs.length / 4, xs.length) 39 | ) 40 | ) 41 | power(s1 + s2 + s3 + s4, 1.0 / p) 42 | } 43 | 44 | val standardConfig = config( 45 | Key.exec.minWarmupRuns -> 10, 46 | Key.exec.maxWarmupRuns -> 10, 47 | Key.exec.benchRuns -> 10, 48 | Key.verbose -> true 49 | ) withWarmer(new Warmer.Default) 50 | 51 | def main(args: Array[String]) { 52 | val p = 1.5 53 | val xs = (0 until 2000000).map(_ % 100).toArray 54 | val seqtime = standardConfig measure { 55 | dummy = normSum(xs, p) 56 | } 57 | println(s"sequential sum time: $seqtime ms") 58 | 59 | val threshold = 10000 60 | val fjtime = standardConfig measure { 61 | dummy2 = fjNormSum(xs, p, threshold) 62 | } 63 | println(s"values computed are $dummy vs $dummy2") 64 | println(s"fork/join time: $fjtime ms") 65 | println(s"speedup: ${seqtime/fjtime}") 66 | } 67 | 68 | } 69 | -------------------------------------------------------------------------------- /src/main/scala/lectures/introduction/ParallelMonteCarloPi.scala: -------------------------------------------------------------------------------- 1 | package lectures 2 | package introduction 3 | 4 | import org.scalameter._ 5 | import scala.util.Random 6 | import common._ 7 | 8 | object ParallelMonteCarloPi { 9 | @volatile var seqResult: Double = 0 10 | @volatile var parResult: Double = 0 11 | 12 | val standardConfig = config( 13 | Key.exec.minWarmupRuns -> 20, 14 | Key.exec.maxWarmupRuns -> 40, 15 | Key.exec.benchRuns -> 20, 16 | Key.verbose -> true 17 | ) withWarmer(new Warmer.Default) 18 | 19 | def monteCarloPi(iterations: Int): Double = { 20 | val randomX = new Random 21 | val randomY = new Random 22 | var hits = 0 23 | for (i <- 0 until iterations) { 24 | val x = randomX.nextDouble() 25 | val y = randomY.nextDouble() 26 | if (x * x + y * y < 1) hits += 1 27 | } 28 | // r * r * Pi = hitRatio * 4 * r * r 29 | // Pi = hitRatio * 4 30 | 4.0 * hits / iterations 31 | } 32 | 33 | def parMonteCarloPi(iterations: Int): Double = { 34 | val ((pi1, pi2), (pi3, pi4)) = parallel( 35 | parallel(monteCarloPi(iterations / 4), monteCarloPi(iterations / 4)), 36 | parallel(monteCarloPi(iterations / 4), monteCarloPi(iterations / 4)) 37 | ) 38 | (pi1 + pi2 + pi3 + pi4) / 4 39 | } 40 | 41 | def main(args: Array[String]) { 42 | val iterations = 4000000 43 | val seqtime = standardConfig measure { 44 | seqResult = monteCarloPi(iterations) 45 | } 46 | println(s"sequential time: $seqtime ms") 47 | 48 | val partime = standardConfig measure { 49 | parResult = parMonteCarloPi(iterations) 50 | } 51 | println(s"fork/join time: $partime ms") 52 | println(s"speedup: ${seqtime/partime}") 53 | println(s"values computed are $seqResult vs $parResult") 54 | } 55 | 56 | } 57 | -------------------------------------------------------------------------------- /src/main/scala/lectures/reductions/ArrayFold.scala: -------------------------------------------------------------------------------- 1 | package lectures 2 | package reductions 3 | 4 | import org.scalameter._ 5 | import common._ 6 | 7 | object ArrayFold { 8 | def foldASegSeq[A,B](inp: Array[A], b0: B, 9 | left: Int, right: Int, 10 | f: (B,A) => B): B = { 11 | var b= b0 12 | var i= left 13 | while (i < right) { 14 | b= f(b, inp(i)) 15 | i= i+1 16 | } 17 | b 18 | } 19 | 20 | def foldASegPar[A](inp: Array[A], a0: A, 21 | left: Int, right: Int, 22 | f: (A,A) => A): A = { 23 | // requires f to be associative 24 | if (right - left < threshold) 25 | foldASegSeq(inp, a0, left, right, f) 26 | else { 27 | val mid = left + (right - left)/2 28 | val (a1,a2) = parallel(foldASegPar(inp, a0, left, mid, f), 29 | foldASegPar(inp, a0, mid, right, f)) 30 | f(a1,a2) 31 | } 32 | } 33 | 34 | val c = 2.99792458e8 35 | def assocOp(v1: Double, v2: Double): Double = { 36 | val u1 = v1/c 37 | val u2 = v2/c 38 | (v1 + v2)/(1 + u1*u2) 39 | } 40 | 41 | def addVelSeq(inp: Array[Double]): Double = { 42 | foldASegSeq(inp, 0.0, 0, inp.length, assocOp) 43 | } 44 | def addVelPar(inp: Array[Double]): Double = { 45 | foldASegPar(inp, 0.0, 0, inp.length, assocOp) 46 | } 47 | 48 | val threshold = 10000 49 | 50 | val standardConfig = config( 51 | Key.exec.minWarmupRuns -> 30, 52 | Key.exec.maxWarmupRuns -> 30, 53 | Key.exec.benchRuns -> 20, 54 | Key.verbose -> false 55 | ) withWarmer(new Warmer.Default) 56 | 57 | def main(args: Array[String]) { 58 | val alen = 2000000 59 | val inp = (0 until alen).map((x:Int) => (x % 50)*0.0001*c).toArray 60 | var resSeq = 0.0 61 | val seqtime = standardConfig measure { 62 | resSeq = addVelSeq(inp) 63 | } 64 | var resPar = 0.0 65 | val partime = standardConfig measure { 66 | resPar = addVelPar(inp) 67 | } 68 | 69 | println(s"sequential time: $seqtime ms and result $resSeq") 70 | println(s"parallel time: $partime ms and result $resPar") 71 | /* Example output on Intel(R) Core(TM) i7-3770K CPU @ 3.50GHz (4 cores, 8 hw threads), 16GB RAM 72 | 73 | [info] sequential time: 33.0507908 ms and result 2.997924579999967E8 74 | [info] parallel time: 11.158121000000003 ms and result 2.99792458E8 75 | 76 | We get around 3 times speedup. The computed value is slightly different due to roundoff errors. 77 | */ 78 | 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /src/main/scala/lectures/reductions/ArrayMap.scala: -------------------------------------------------------------------------------- 1 | package lectures 2 | package reductions 3 | 4 | import org.scalameter._ 5 | import common._ 6 | 7 | object ArrayMap { 8 | 9 | def mapASegSeq[A,B](inp: Array[A], f : A => B, 10 | left: Int, right: Int, 11 | out: Array[B]) = { 12 | var i= left 13 | while (i < right) { 14 | out(i)= f(inp(i)) 15 | i= i+1 16 | } 17 | } 18 | 19 | def mapASegPar[A,B](inp: Array[A], left: Int, right: Int, 20 | f : A => B, 21 | out: Array[B]): Unit = { 22 | // require f to be pure 23 | if (right - left < threshold) 24 | mapASegSeq(inp, f, left, right, out) 25 | else { 26 | val mid = left + (right - left)/2 27 | val _ = parallel(mapASegPar(inp, left, mid, f, out), 28 | mapASegPar(inp, mid, right, f, out)) 29 | } 30 | } 31 | 32 | def normsOfPar(inp: Array[Int], p: Double, 33 | left: Int, right: Int, 34 | out: Array[Double]): Unit = { 35 | if (right - left < threshold) { 36 | var i= left 37 | while (i < right) { 38 | out(i)= power(inp(i),p) 39 | i= i+1 40 | } 41 | } else { 42 | val mid = left + (right - left)/2 43 | val _ = parallel(normsOfPar(inp, p, left, mid, out), 44 | normsOfPar(inp, p, mid, right, out)) 45 | } 46 | } 47 | 48 | def normsOf(inp: Array[Int], p: Double, 49 | left: Int, right: Int, 50 | out: Array[Double]): Unit = { 51 | var i= left 52 | while (i < right) { 53 | out(i)= power(inp(i),p) 54 | i= i+1 55 | } 56 | } 57 | 58 | // an effectful map: more flexible, but easier to use wrongly 59 | def actionOnSegPar(action : Int => Unit, 60 | left: Int, right: Int): Unit = { 61 | // require action(i1) and action(i2) do not interfere for i1 != i2 62 | if (right - left < threshold) { 63 | var i= left 64 | while (i < right) { 65 | action(i) 66 | i= i+1 67 | } 68 | } else { 69 | val mid = left + (right - left)/2 70 | val _ = parallel(actionOnSegPar(action, left, mid), 71 | actionOnSegPar(action, left, mid)) 72 | } 73 | } 74 | 75 | def mapASegPar2[A,B](inp: Array[A], left: Int, right: Int, 76 | f : A => B, 77 | out: Array[B]): Unit = { 78 | def action(i: Int): Unit = { out(i)= f(inp(i)) } 79 | actionOnSegPar(action, left, right) 80 | } 81 | 82 | val logE = math.log(math.E) 83 | 84 | def power(x: Int, p: Double): Int = { 85 | math.exp(p * math.log(math.abs(x)) / logE).toInt 86 | } 87 | 88 | def mapNormSeq(inp: Array[Int], p: Double, 89 | out: Array[Double]): Unit = { 90 | require(inp.length == out.length) 91 | def f(x: Int): Double = power(x, p) 92 | mapASegSeq(inp, f, 0, inp.length, out) 93 | } 94 | 95 | def mapNormPar(inp: Array[Int], p: Double, 96 | out: Array[Double]): Unit = { 97 | require(inp.length == out.length) 98 | def f(x: Int): Double = power(x, p) 99 | mapASegPar(inp, 0, inp.length, f, out) 100 | } 101 | 102 | def mapNormPar2(inp: Array[Int], p: Double, 103 | out: Array[Double]): Unit = { 104 | require(inp.length == out.length) 105 | def f(x: Int): Double = power(x, p) 106 | mapASegPar2(inp, 0, inp.length, f, out) 107 | } 108 | 109 | val threshold = 10000 110 | 111 | val standardConfig = config( 112 | Key.exec.minWarmupRuns -> 30, 113 | Key.exec.maxWarmupRuns -> 30, 114 | Key.exec.benchRuns -> 20, 115 | Key.verbose -> false 116 | ) withWarmer(new Warmer.Default) 117 | 118 | def main(args: Array[String]) { 119 | val p = 1.5 120 | val alen = 2000000 121 | val inp = (0 until alen).map(_ % 100).toArray 122 | val out1 = (0 until alen).map(_ => 0.0).toArray 123 | val out2 = (0 until alen).map(_ => 0.0).toArray 124 | val out3 = (0 until alen).map(_ => 0.0).toArray 125 | val seqtime = standardConfig measure { 126 | mapNormSeq(inp, p, out1) 127 | } 128 | 129 | /* 130 | val mapNormPar2time = standardConfig measure { 131 | mapNormPar2(inp, p, out3) 132 | } 133 | println(s"mapNormPar2: $mapNormPar2time ms") 134 | println(s"speedup2: ${seqtime/mapNormPar2time}") 135 | */ 136 | 137 | val mapNormParTime = standardConfig measure { 138 | mapNormPar(inp, p, out2) 139 | } 140 | 141 | val normsOfParTime = standardConfig measure { 142 | normsOfPar(inp, p, 0, inp.length, out3) 143 | } 144 | 145 | val normsOfTime = standardConfig measure { 146 | normsOf(inp, p, 0, inp.length, out3) 147 | } 148 | 149 | println(s"sequential sum time: $seqtime ms") 150 | println(s"mapNormPar time: $mapNormParTime ms") 151 | println(s"normsOfPar time: $normsOfParTime ms") 152 | println(s"normsOf time: $normsOfTime ms") 153 | /* Example output on Intel(R) Core(TM) i7-3770K CPU @ 3.50GHz (4 cores, 8 hw threads), 16GB RAM 154 | [info] sequential sum time: 174.17463240000004 ms 155 | [info] mapNormPar time: 28.9307023 ms 156 | [info] normsOfPar time: 28.165657500000002 ms 157 | [info] normsOf time: 166.83788205000002 ms 158 | 159 | Note that manual inlining does not pay off much, 160 | and parallelization is where the main win is! 161 | */ 162 | } 163 | 164 | } 165 | -------------------------------------------------------------------------------- /src/main/scala/lectures/reductions/ArrayNorm.scala: -------------------------------------------------------------------------------- 1 | package lectures 2 | package reductions 3 | 4 | import org.scalameter._ 5 | import common._ 6 | 7 | object ArrayNorm { 8 | @volatile var dummy: Int = 0 9 | @volatile var dummy2: Int = 0 10 | 11 | val logE = math.log(math.E) 12 | 13 | def power(x: Int, p: Double): Int = { 14 | math.exp(p * math.log(x) / logE).toInt // TODO <-- make everything doubles 15 | } 16 | 17 | def sumSegment(xs: Array[Int], p: Double, from: Int, until: Int): Int = { 18 | var i = from 19 | var s = 0 20 | while (i < until) { 21 | s += power(xs(i), p) 22 | i += 1 23 | } 24 | s 25 | } 26 | 27 | def normSum(xs: Array[Int], p: Double): Int = 28 | power(sumSegment(xs, p, 0, xs.size), 1.0 / p) 29 | 30 | def fjSumSegment(xs: Array[Int], p: Double, from: Int, until: Int, threshold: Int): Int = { 31 | if (until - from < threshold) { 32 | sumSegment(xs, p, from, until) 33 | } else { 34 | val mid = (from + until) / 2 35 | val right = task { 36 | fjSumSegment(xs, p, mid, until, threshold) 37 | } 38 | val leftSum = fjSumSegment(xs, p, from, mid, threshold) 39 | val rightSum = right.join() 40 | leftSum + rightSum 41 | } 42 | } 43 | 44 | def fjNormSum(xs: Array[Int], p: Double, threshold: Int): Int = 45 | power(fjSumSegment(xs, p, 0, xs.length, threshold), 1.0 / p) 46 | 47 | val standardConfig = config( 48 | Key.exec.minWarmupRuns -> 10, 49 | Key.exec.maxWarmupRuns -> 10, 50 | Key.exec.benchRuns -> 10, 51 | Key.verbose -> true 52 | ) withWarmer(new Warmer.Default) 53 | 54 | def main(args: Array[String]) { 55 | val p = 1.5 56 | val xs = (0 until 2000000).map(_ % 100).toArray 57 | val seqtime = standardConfig measure { 58 | dummy = normSum(xs, p) 59 | } 60 | println(s"sequential sum time: $seqtime ms") 61 | 62 | val threshold = 10000 63 | val fjtime = standardConfig measure { 64 | dummy2 = fjNormSum(xs, p, threshold) 65 | } 66 | println(s"values computed are $dummy vs $dummy2") 67 | println(s"fork/join time: $fjtime ms") 68 | println(s"speedup: ${seqtime/fjtime}") 69 | } 70 | 71 | } 72 | -------------------------------------------------------------------------------- /src/main/scala/lectures/reductions/ArrayScan.scala: -------------------------------------------------------------------------------- 1 | package lectures 2 | package reductions 3 | import org.scalameter._ 4 | import common._ 5 | 6 | object ArrayScan { // Parallel scan of an array 7 | 8 | /* 9 | fold left array segment from left to right-1, sequentially. 10 | Used in the base case for upsweep. 11 | This is the same operation we would use in the base case of parallel fold. 12 | */ 13 | def foldASegSeq[A,B](inp: Array[A], 14 | left: Int, right: Int, 15 | b0: B, // initial element 16 | f: (B,A) => B): B = { 17 | var b= b0 18 | var i= left 19 | while (i < right) { 20 | b= f(b, inp(i)) 21 | i= i+1 22 | } 23 | b 24 | } 25 | 26 | // Binary trees whose nodes store elements of type A 27 | sealed abstract class FoldTree[A] { 28 | val res: A // whether it is leaf or internal node, res stores the result 29 | } 30 | case class Leaf[A](from: Int, to: Int, resLeaf: A) extends FoldTree[A] { 31 | val res= resLeaf 32 | } 33 | case class Node[A](l: FoldTree[A], r: FoldTree[A], resNode: A) extends FoldTree[A] { 34 | val res= resNode 35 | } 36 | 37 | /* 38 | fold array segment in parallel and record the intermediate computation results in a Tree[A]. 39 | In the context of scan, this phase is called upsweep. 40 | For an intuition, picture the array to reduce on the bottom, and the root of the tree at the top. 41 | Once the 'parallel' tasks are initiated, the results are combined in the 'up' direction, from array 42 | to the result of the fold. 43 | */ 44 | def upsweep[A](inp: Array[A], 45 | left: Int, right: Int, 46 | a0: A, 47 | f: (A,A) => A): FoldTree[A] = { 48 | // requires f to be associative 49 | if (right - left < threshold) 50 | Leaf(left, right, foldASegSeq(inp, left + 1, right, inp(left), f)) 51 | else { 52 | val mid = left + (right - left)/2 53 | val (t1,t2) = parallel(upsweep(inp, left, mid, a0, f), 54 | upsweep(inp, mid, right, a0, f)) 55 | Node(t1, t2, f(t1.res,t2.res)) 56 | } 57 | } 58 | 59 | /* 60 | Scan array segment inp(left) to inp(right-1), 61 | storing results into out(left+1) to out(right). 62 | At the end, out(i+1) stores fold of elements: 63 | [a0, in(left),... in(i)] for i from left to right-1. 64 | In particular, out(left+1) stores f(a0,inp(left)) 65 | and out(right) stores fold of [a0, in[(left),... inp(right-1)]. 66 | The value a0 is not directly stored into out anywhere. 67 | 68 | This is used below cutoff in downsweep for scanAPar, 69 | and also to implement scanASeq as a comparison point. 70 | */ 71 | def scanASegSeq1[A](inp: Array[A], 72 | left: Int, right: Int, 73 | a0: A, 74 | f: (A,A) => A, 75 | out: Array[A]) = { 76 | if (left < right) { 77 | var i= left 78 | var a= a0 79 | while (i < right) { 80 | a= f(a,inp(i)) 81 | out(i+1)=a 82 | i= i+1 83 | } 84 | } 85 | } 86 | 87 | def downsweep[A](inp: Array[A], 88 | a0: A, 89 | f: (A,A) => A, 90 | t: FoldTree[A], 91 | out: Array[A]): Unit = { 92 | t match { 93 | case Leaf(from, to, res) => 94 | scanASegSeq1(inp, from, to, a0, f, out) 95 | case Node(l, r, res) => { 96 | val (_,_) = parallel( 97 | downsweep(inp, a0, f, l, out), 98 | downsweep(inp, f(a0,l.res), f, r, out)) 99 | } 100 | } 101 | } 102 | 103 | def scanASegPar[A](inp: Array[A], 104 | from: Int, to: Int, 105 | a0: A, 106 | f: (A,A) => A, 107 | out: Array[A]) = { 108 | val t = upsweep(inp, from, to, a0, f) 109 | downsweep(inp, a0, f, t, out) 110 | } 111 | 112 | def scanAPar[A](inp: Array[A], 113 | a0: A, 114 | f: (A,A) => A, 115 | out: Array[A]) = { 116 | out(0)= a0 117 | scanASegPar(inp, 0, inp.length, a0, f, out) 118 | } 119 | 120 | def scanASeq[A](inp: Array[A], 121 | a0: A, 122 | f: (A,A) => A, 123 | out: Array[A]) = { 124 | out(0) = a0 125 | scanASegSeq1(inp, 0, inp.length, a0, f, out) 126 | } 127 | 128 | /* 129 | ======================================= 130 | Setting parameters and testing 131 | ======================================= 132 | */ 133 | 134 | var threshold = 20000 135 | 136 | val standardConfig = config( 137 | Key.exec.minWarmupRuns -> 6, 138 | Key.exec.maxWarmupRuns -> 6, 139 | Key.exec.benchRuns -> 5, 140 | Key.verbose -> false 141 | ) withWarmer(new Warmer.Default) 142 | 143 | 144 | def testConcat : Unit = { 145 | println("===========================================") 146 | println("Testing ArrayScan on concatenation example.") 147 | println("===========================================") 148 | 149 | def concat(x: List[Int], y: List[Int]): List[Int] = 150 | x ::: y 151 | 152 | def arrEq[A](a1: Array[A], a2: Array[A]): Boolean = { 153 | def eqSeq(from: Int, to: Int): Boolean = { 154 | var i= from 155 | while (i < to) { 156 | if (a1(i) != a2(i)) { 157 | println(s"Array difference: a1(${i})=${a1(i)}, a2(${i})=${a2(i)}") 158 | return false 159 | } else { 160 | i= i + 1 161 | } 162 | } 163 | true 164 | } 165 | if (a1.length != a2.length) { 166 | println("Different sizes!") 167 | false 168 | } else eqSeq(0, a1.length) 169 | } 170 | 171 | threshold = 100 172 | 173 | val alen = 2000 174 | val inp = (0 until alen).map((x:Int) => List(x)).toArray 175 | val outSeq = new Array[List[Int]](alen + 1) 176 | val outPar = new Array[List[Int]](alen + 1) 177 | val init = List(12309, 32123) 178 | val seqtime = standardConfig measure { 179 | scanASeq(inp, init, concat, outSeq) 180 | } 181 | println(s"sequential time: $seqtime ms") 182 | 183 | val partime = standardConfig measure { 184 | scanAPar(inp, init, concat, outPar) 185 | } 186 | println(s"parallel time: $partime ms") 187 | println(s"speedup: ${seqtime / partime}") 188 | print("Are results equal?") 189 | println(arrEq(outSeq, outPar)) 190 | //println(outPar.toList) 191 | } 192 | 193 | def testVelocity = { 194 | println("======================================") 195 | println("Testing ArrayScan on velocity example.") 196 | println("======================================") 197 | 198 | threshold = 20000 199 | 200 | val c = 2.99792458e8 201 | def velocityAdd(v1: Double, v2: Double): Double = { 202 | val u1 = v1/c 203 | val u2 = v2/c 204 | (u1 + u2)/(1 + u1*u2)*c 205 | } 206 | 207 | val alen = 2000000 208 | val inp = (0 until alen).map((x:Int) => (x % 50)*0.0001*c).toArray 209 | val outSeq = new Array[Double](alen + 1) 210 | val outPar = new Array[Double](alen + 1) 211 | val seqtime = standardConfig measure { 212 | scanASeq(inp, 0.0, velocityAdd, outSeq) 213 | } 214 | println(s"sequential time: $seqtime ms") 215 | 216 | val partime = standardConfig measure { 217 | scanAPar(inp, 0.0, velocityAdd, outPar) 218 | } 219 | println(s"parallel time: $partime ms") 220 | println(s"speedup: ${seqtime / partime}") 221 | } 222 | 223 | def testNonZero = { 224 | println("====================================================") 225 | println("Testing ArrayScan on addition with non-zero initial.") 226 | println("====================================================") 227 | val inp: Array[Int] = (1 to 10).toArray 228 | val outSeq: Array[Int] = new Array[Int](inp.length + 1) 229 | val outPar: Array[Int] = new Array[Int](inp.length + 1) 230 | val f = (x: Int, y: Int) => x + y 231 | threshold = 3 232 | scanASeq(inp, 10, f, outSeq) 233 | println(outSeq.toList) 234 | scanAPar(inp, 10, f, outPar) // a0 = 10 235 | println(outPar.toList) 236 | } 237 | 238 | def main(args: Array[String]) { 239 | testNonZero 240 | testConcat 241 | testVelocity 242 | } 243 | } 244 | -------------------------------------------------------------------------------- /src/main/scala/lectures/reductions/ArrayScanDebug.scala: -------------------------------------------------------------------------------- 1 | package lectures 2 | package reductions 3 | 4 | import org.scalameter._ 5 | import common._ 6 | 7 | object ArrayScanDebug { 8 | 9 | def foldASegSeq[A,B](inp: Array[A], 10 | left: Int, right: Int, 11 | b0: B, 12 | f: (B,A) => B): B = { 13 | var b= b0 14 | var i= left 15 | while (i < right) { 16 | b= f(b, inp(i)) 17 | i= i+1 18 | } 19 | b 20 | } 21 | 22 | sealed abstract class FoldTree[A] { 23 | val res: A 24 | } 25 | case class Leaf[A](from: Int, to: Int, resLeaf: A) extends FoldTree[A] { 26 | val res= resLeaf 27 | } 28 | case class Node[A](l: FoldTree[A], r: FoldTree[A], resNode: A) extends FoldTree[A] { 29 | val res= resNode 30 | } 31 | 32 | def foldASegParTree[A](inp: Array[A], 33 | left: Int, right: Int, 34 | a0: A, 35 | f: (A,A) => A): FoldTree[A] = { 36 | // requires f to be associative 37 | if (right - left < threshold) 38 | Leaf(left, right, foldASegSeq(inp, left, right, a0, f)) 39 | else { 40 | val mid = left + (right - left)/2 41 | val (t1,t2) = parallel(foldASegParTree(inp, left, mid, a0, f), 42 | foldASegParTree(inp, mid, right, a0, f)) 43 | Node(t1, t2, f(t1.res,t2.res)) 44 | } 45 | } 46 | 47 | /* // Poor man's dynamic effect checks 48 | def write[A](arr: Array[A], i: Int, v:A) = { 49 | if (arr(i) != 0.0) { 50 | println(s"Overwritting with $v element at $i already set to ${arr(i)} in array ${printA(arr)}") 51 | } 52 | arr(i)=v 53 | } 54 | */ 55 | 56 | def scanASegSeq1[A](inp: Array[A], 57 | left: Int, right: Int, 58 | a0: A, 59 | f: (A,A) => A, 60 | out: Array[A]) = { 61 | if (left < right) { 62 | var i= left 63 | var a= a0 64 | while (i < right) { 65 | a= f(a,inp(i)) 66 | //write(out,i+1,a) 67 | out(i+1)=a 68 | i= i+1 69 | } 70 | } 71 | } 72 | 73 | def scanASegParT[A,B](inp: Array[A], 74 | a0: A, 75 | f: (A,A) => A, 76 | t: FoldTree[A], 77 | out: Array[A]): Unit = { 78 | t match { 79 | case Leaf(from, to, res) => 80 | scanASegSeq1(inp, from, to, a0, f, out) 81 | case Node(l, r, res) => { 82 | val (_,_) = parallel( 83 | scanASegParT(inp, a0, f, l, out), 84 | scanASegParT(inp, f(a0,l.res), f, r, out)) 85 | } 86 | } 87 | } 88 | 89 | def scanASegPar[A,B](inp: Array[A], 90 | from: Int, to: Int, 91 | a0: A, 92 | f: (A,A) => A, 93 | out: Array[A]) = { 94 | val t = foldASegParTree(inp, from, to, a0, f) 95 | println("FoldTree is: " + t) 96 | scanASegParT(inp, a0, f, t, out) 97 | } 98 | 99 | def scanAPar[A](inp: Array[A], 100 | a0: A, 101 | f: (A,A) => A, 102 | out: Array[A]) = { 103 | out(0)= a0 104 | scanASegPar(inp, 0, inp.length, a0, f, out) 105 | } 106 | 107 | def scanASeq[A](inp: Array[A], 108 | a0: A, 109 | f: (A,A) => A, 110 | out: Array[A]) = { 111 | out(0) = a0 112 | scanASegSeq1(inp, 0, inp.length, a0, f, out) 113 | } 114 | 115 | val c = 2.99792458e8 116 | def assocOp(v1: Double, v2: Double): Double = { 117 | val u1 = v1/c 118 | val u2 = v2/c 119 | (u1 + u2)/(1 + u1*u2)*c 120 | } 121 | 122 | def sum(x: Double, y: Double) = x + y 123 | 124 | val standardConfig = config( 125 | Key.exec.minWarmupRuns -> 1, 126 | Key.exec.maxWarmupRuns -> 1, 127 | Key.exec.benchRuns -> 1, 128 | Key.verbose -> false 129 | ) withWarmer(new Warmer.Default) 130 | 131 | def printA[A](a: Array[A]): String = { 132 | a.toList.toString 133 | } 134 | 135 | val threshold = 3 136 | 137 | def main(args: Array[String]) { 138 | // val inp = (0 until alen).map((x:Int) => (x % 50)*0.0001*c).toArray 139 | val inp = Array(1.0, 10.0, 200.0, 0.5, 3.0, 40.0, 50.0, 5.0) 140 | val alen = inp.length 141 | println("Input: " + printA(inp)) 142 | val outSeq = new Array[Double](alen + 1); outSeq(0) = 41.0; outSeq(alen) = -12.0 143 | val outPar = new Array[Double](alen + 1); outPar(0) = 42.0; outPar(alen) = -13.0 144 | val seqtime = 1 145 | //standardConfig measure { 146 | scanASeq[Double](inp, 0.0, sum, outSeq) 147 | //} 148 | //println(s"sequential time: $seqtime ms and result ${printA(outSeq)}") 149 | 150 | val partime = 1 151 | //standardConfig measure { 152 | scanAPar[Double](inp, 0.0, sum, outPar) 153 | //} 154 | //println(s"parallel time: $partime ms and result ${printA(outPar)}") 155 | //println(s"speedup: ${seqtime / partime}") 156 | println(s"seq result ${printA(outSeq)}") 157 | println(s"par result ${printA(outPar)}") 158 | 159 | } 160 | 161 | } 162 | -------------------------------------------------------------------------------- /src/main/scala/lectures/reductions/ArraySum.scala: -------------------------------------------------------------------------------- 1 | package lectures 2 | package reductions 3 | 4 | import org.scalameter._ 5 | import common._ 6 | 7 | object ArraySum { 8 | 9 | @volatile var dummy: Int = 0 10 | 11 | def sum(xs: Array[Int], from: Int, until: Int): Int = { 12 | var i = from 13 | var s = 0 14 | while (i < until) { 15 | s += xs(i) 16 | i += 1 17 | } 18 | s 19 | } 20 | 21 | def parSum(xs: Array[Int], from: Int, until: Int, threshold: Int): Int = { 22 | if (until - from < threshold) { 23 | sum(xs, from, until) 24 | } else { 25 | val mid = (from + until) / 2 26 | val right = task { 27 | parSum(xs, mid, until, threshold) 28 | } 29 | val leftSum = parSum(xs, from, mid, threshold) 30 | val rightSum = right.join() 31 | leftSum + rightSum 32 | } 33 | } 34 | 35 | val standardConfig = config( 36 | Key.exec.minWarmupRuns -> 20, 37 | Key.exec.maxWarmupRuns -> 60, 38 | Key.exec.benchRuns -> 60, 39 | Key.verbose -> true 40 | ) withWarmer(new Warmer.Default) 41 | 42 | def main(args: Array[String]) { 43 | val xs = (0 until 100000000).map(_ % 100).toArray 44 | val seqtime = standardConfig measure { 45 | dummy = sum(xs, 0, xs.length) 46 | } 47 | println(s"sequential sum time: $seqtime ms") 48 | 49 | val fjtime = standardConfig measure { 50 | parSum(xs, 0, xs.length, 10000) 51 | } 52 | println(s"fork/join time: $fjtime ms") 53 | println(s"speedup: ${seqtime / fjtime}") 54 | } 55 | 56 | } 57 | -------------------------------------------------------------------------------- /src/main/scala/lectures/reductions/RunningAverage.scala: -------------------------------------------------------------------------------- 1 | package lectures 2 | package reductions 3 | 4 | import org.scalameter._ 5 | import common._ 6 | 7 | object RunningAverage { 8 | 9 | val standardConfig = config( 10 | Key.exec.minWarmupRuns -> 20, 11 | Key.exec.maxWarmupRuns -> 60, 12 | Key.exec.benchRuns -> 60, 13 | Key.verbose -> true 14 | ) withWarmer(new Warmer.Default) 15 | 16 | def runningAverage(input: Array[Int], output: Array[Float]): Unit = { 17 | var i = 0 18 | var xPrev = 0 19 | output(0) = xPrev 20 | while (i < input.length) { 21 | xPrev = input(i) + xPrev 22 | i += 1 23 | output(i) = 1.0f * xPrev / i 24 | } 25 | } 26 | 27 | sealed abstract class Tree { 28 | def xPrev: Int 29 | } 30 | 31 | case class Node(left: Tree, right: Tree) extends Tree { 32 | val xPrev = left.xPrev + right.xPrev 33 | } 34 | 35 | case class Leaf(from: Int, until: Int, xPrev: Int) extends Tree 36 | 37 | def parRunningAverage(input: Array[Int], output: Array[Float], threshold: Int): Unit = { 38 | def reduceSequential(from: Int, until: Int): Int = { 39 | var i = from 40 | var x = 0 41 | while (i < until) { 42 | x = input(i) + x 43 | i += 1 44 | } 45 | x 46 | } 47 | 48 | def reduce(from: Int, until: Int): Tree = { 49 | if (until - from < threshold) { 50 | Leaf(from, until, reduceSequential(from, until)) 51 | } else { 52 | val mid = (from + until) / 2 53 | val (leftTree, rightTree) = parallel( 54 | reduce(from, mid), 55 | reduce(mid, until) 56 | ) 57 | Node(leftTree, rightTree) 58 | } 59 | } 60 | 61 | val tree = reduce(0, input.length) 62 | 63 | def downsweepSequential(xPrev: Int, from: Int, until: Int): Unit = { 64 | var i = from 65 | var x = xPrev 66 | while (i < until) { 67 | x = input(i) + x 68 | i += 1 69 | output(i) = 1.0f * x / i 70 | } 71 | } 72 | 73 | def downsweep(xPrev: Int, tree: Tree): Unit = tree match { 74 | case Node(left, right) => 75 | parallel( 76 | downsweep(xPrev, left), 77 | downsweep(xPrev + left.xPrev, right) 78 | ) 79 | case Leaf(from, until, _) => 80 | downsweepSequential(xPrev, from, until) 81 | } 82 | 83 | output(0) = 0 84 | downsweep(0, tree) 85 | } 86 | 87 | def main(args: Array[String]) { 88 | val length = 10000000 89 | val input = (0 until length).map(_ % 100 - 50).toArray 90 | val output = new Array[Float](length + 1) 91 | val seqtime = standardConfig measure { 92 | runningAverage(input, output) 93 | } 94 | println(s"sequential time: $seqtime ms") 95 | 96 | val partime = standardConfig measure { 97 | parRunningAverage(input, output, 10000) 98 | } 99 | println(s"parallel time: $partime ms") 100 | println(s"speedup: ${seqtime / partime}") 101 | } 102 | 103 | } 104 | -------------------------------------------------------------------------------- /src/main/scala/lectures/reductions/TreeMap.scala: -------------------------------------------------------------------------------- 1 | package lectures 2 | package reductions 3 | 4 | import org.scalameter._ 5 | import common._ 6 | 7 | object TreeMap { 8 | 9 | sealed abstract class Tree[A] { val size: Int } 10 | case class Leaf[A](a: Array[A]) extends Tree[A] { 11 | override val size = a.size 12 | } 13 | case class Node[A](l: Tree[A], r: Tree[A]) extends Tree[A] { 14 | override val size = l.size + r.size 15 | } 16 | 17 | def mapTreeSeq[A:Manifest,B:Manifest](t: Tree[A], f: A => B) : Tree[B] = t match { 18 | case Leaf(a) => { 19 | val len = a.length 20 | val b = new Array[B](len) 21 | var i= 0 22 | while (i < len) { 23 | b(i)= f(a(i)) 24 | i= i + 1 25 | } 26 | Leaf(b) 27 | } 28 | case Node(l,r) => { 29 | val (lb,rb) = (mapTreeSeq(l,f),mapTreeSeq(r,f)) 30 | Node(lb, rb) 31 | } 32 | } 33 | 34 | def mapTreePar[A:Manifest,B:Manifest](t: Tree[A], f: A => B) : Tree[B] = t match { 35 | case Leaf(a) => { 36 | val len = a.length 37 | val b = new Array[B](len) 38 | var i= 0 39 | while (i < len) { 40 | b(i)= f(a(i)) 41 | i= i + 1 42 | } 43 | Leaf(b) 44 | } 45 | case Node(l,r) => { 46 | val (lb,rb) = parallel(mapTreePar(l,f),mapTreePar(r,f)) 47 | Node(lb, rb) 48 | } 49 | } 50 | 51 | val logE = math.log(math.E) 52 | def power(x: Double, p: Double): Int = { 53 | math.exp(p * math.log(math.abs(x)) / logE).toInt 54 | } 55 | 56 | val threshold = 10000 57 | 58 | val standardConfig = config( 59 | Key.exec.minWarmupRuns -> 30, 60 | Key.exec.maxWarmupRuns -> 30, 61 | Key.exec.benchRuns -> 20, 62 | Key.verbose -> false 63 | ) withWarmer(new Warmer.Default) 64 | 65 | 66 | def makeTree(len: Int) : Tree[Double] = { 67 | if (len < threshold) 68 | Leaf((0 until len).map((x:Int) => (x % 100)*0.9).toArray) 69 | else { 70 | Node(makeTree(len/2), makeTree(len - len/2)) 71 | } 72 | } 73 | 74 | def main(args: Array[String]) { 75 | val p = 1.5 76 | def f(x:Double)= power(x,p) 77 | val alen = 2000000 78 | val t = makeTree(alen) 79 | var t1: Tree[Double] = t 80 | var t2: Tree[Double] = t 81 | val seqtime = standardConfig measure { 82 | t1 = mapTreeSeq(t, f) 83 | } 84 | val partime = standardConfig measure { 85 | t2 = mapTreePar(t, f) 86 | } 87 | 88 | println(s"sequential time: $seqtime ms") 89 | println(s"parallel time: $partime ms") 90 | } 91 | 92 | } 93 | -------------------------------------------------------------------------------- /src/test/scala/lectures/algorithms/MergeSortTest.scala: -------------------------------------------------------------------------------- 1 | package lectures.algorithms 2 | 3 | import org.scalatest.FunSuite 4 | 5 | import scala.util.Random 6 | 7 | class MergeSortTest extends FunSuite { 8 | 9 | def sortCase(nbElem: Int, maxDepth: Int) = { 10 | print(s"Test case: # elements = $nbElem, maxDepth = $maxDepth") 11 | val arr2sort = Array.tabulate(nbElem)(_ => Random.nextInt()) 12 | val expected = arr2sort.sorted 13 | MergeSort.parMergeSort(arr2sort, maxDepth) 14 | assert(arr2sort === expected) 15 | println(" => ok") 16 | } 17 | 18 | /** 19 | * Note that maxDepth should not be too large (< 15), 20 | * otherwise number of thread in parallel will be exponential to maxDepth 21 | * which make the system slow 22 | */ 23 | test("MergeSort x elements with maxDepth = y") { 24 | for { 25 | x <- 100 to 1000 by 100 26 | y <- 2 to 5 27 | } sortCase(x, y) 28 | } 29 | 30 | } 31 | --------------------------------------------------------------------------------