├── src ├── test │ └── scala │ │ ├── SkewHeapTest.scala │ │ ├── LeftistHeapTest.scala │ │ └── HeapTest.scala └── main │ └── scala │ ├── Invariant.scala │ ├── LeftistHeap.scala │ ├── SkewHeap.scala │ └── heapTraits.scala ├── .gitignore ├── README.md ├── LICENSE └── design └── heaps.md /src/test/scala/SkewHeapTest.scala: -------------------------------------------------------------------------------- 1 | import scads.immutable._ 2 | import org.scalacheck._ 3 | 4 | object SkewHeapTest extends Properties("SkewHeap") with HeapTest { 5 | def companion: HeapCompanion = SkewHeap 6 | } 7 | -------------------------------------------------------------------------------- /src/test/scala/LeftistHeapTest.scala: -------------------------------------------------------------------------------- 1 | import scads.immutable._ 2 | import org.scalacheck._ 3 | 4 | object LeftistHeapTest extends Properties("LeftistHeap") with HeapTest { 5 | def companion: HeapCompanion = LeftistHeap 6 | } 7 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.class 2 | *.log 3 | 4 | # sbt specific 5 | .cache 6 | .history 7 | .lib/ 8 | dist/* 9 | target/ 10 | lib_managed/ 11 | src_managed/ 12 | project/boot/ 13 | project/plugins/project/ 14 | 15 | # Scala-IDE specific 16 | .scala_dependencies 17 | .worksheet 18 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # scads (Scala Algorithms and Data Structures) 2 | 3 | Eventually, I want this to be a library with a variety of useful 4 | algorithms and data structures in Scala. For right now, however, 5 | it contains only a single, proof-of-concept trait hierarchy for 6 | [typesafe heaps (priority queues)](src/main/scala/heapTraits.scala) and 7 | a single implementation of those traits as 8 | [leftist heaps](src/main/scala/LeftistHeap.scala). 9 | 10 | See a more thorough description of the technical details [here](design/heaps.md). 11 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 Chris Okasaki 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/main/scala/Invariant.scala: -------------------------------------------------------------------------------- 1 | package scads 2 | 3 | /** Mixin for a class with an internal invariant that can be checked at run-time. 4 | * 5 | * Example: {{{ 6 | * class MyClass extends HasInvariant { 7 | * ... 8 | * var list: List[Int] // must be kept sorted! 9 | * ... 10 | * def checkInvariant(): Unit = 11 | * if (list != list.sorted) { 12 | * invariantViolation(s"MyClass: list must be sorted, but list=$list") 13 | * } 14 | * } 15 | * }}} 16 | */ 17 | trait HasInvariant { 18 | 19 | /** Verifies that the invariant holds. 20 | * 21 | * Usually called during testing. 22 | * 23 | * @throws InvariantViolationException if the invariant is violated. 24 | */ 25 | def checkInvariant(): Unit 26 | 27 | /** Shorthand for throwing an InvariantViolationException. 28 | * 29 | * Usually called from within `checkInvariant`. 30 | * 31 | * @throws InvariantViolationException 32 | */ 33 | def invariantViolation(msg: String): Nothing = 34 | throw new InvariantViolationException(msg: String) 35 | } 36 | 37 | 38 | /** An exception that indicates a class invariant was violated. 39 | * 40 | * Usually thrown within the `checkInvariant` method of [[scads.HasInvariant]]. 41 | */ 42 | class InvariantViolationException(msg: String) extends Exception(msg) 43 | -------------------------------------------------------------------------------- /src/main/scala/LeftistHeap.scala: -------------------------------------------------------------------------------- 1 | // TODO: override equals and hashCode? 2 | // TODO: improve algorithm of dropUntil and dropTo from O(n log n) to O(n) 3 | 4 | package scads.immutable 5 | 6 | import scala.collection.mutable.Builder 7 | 8 | object LeftistHeap extends HeapCompanion { 9 | private class LeftistFactory[E](ord: Ordering[E]) extends HeapFactory { 10 | // smaller elements (according to ord) come before bigger elements 11 | type Elem = E 12 | sealed trait Heap extends MHeap[Elem, Heap] { 13 | def iterator: Iterator[Elem] = { 14 | import scala.collection.mutable.ArrayStack 15 | val stack: ArrayStack[Node] = ArrayStack.empty[Node] 16 | def push(h: Heap): Unit = h match { 17 | case Empty => {} 18 | case node: Node => stack += node 19 | } 20 | push(this) 21 | 22 | new scala.collection.AbstractIterator[Elem] { 23 | def hasNext: Boolean = stack.nonEmpty 24 | def next(): Elem = { 25 | if (stack.isEmpty) throw new NoSuchElementException("next on empty iterator") 26 | val Node(elem,left,right) = stack.pop() 27 | push(right) 28 | push(left) 29 | elem 30 | } 31 | } 32 | } 33 | 34 | // assumes elem comes before (or equal) the current first 35 | def _addFirst(elem: Elem): Heap = Node(elem, this, Empty) 36 | } 37 | 38 | case object Empty extends Heap { 39 | def isEmpty: Boolean = true 40 | def nonEmpty: Boolean = false 41 | def size: Int = 0 42 | 43 | def first: Elem = throw new NoSuchElementException("first on empty heap") 44 | def firstOption: Option[Elem] = None 45 | def rest: Heap = throw new UnsupportedOperationException("rest on empty heap") 46 | def firstView: Option[(Elem, Heap)] = None 47 | 48 | def add(elem: Elem): Heap = Node(elem, Empty, Empty) 49 | def merge(other: Heap): Heap = other 50 | 51 | def takeUntil(e: Elem): Heap = Empty 52 | def takeTo(e: Elem): Heap = Empty 53 | def dropUntil(e: Elem): Heap = Empty 54 | def dropTo(e: Elem): Heap = Empty 55 | 56 | def checkInvariant(): Unit = {} 57 | } 58 | 59 | case class Node(elem: Elem, left: Heap, right: Heap) extends Heap { 60 | def isEmpty: Boolean = false 61 | def nonEmpty: Boolean = true 62 | val size: Int = left.size + right.size + 1 63 | 64 | def first: Elem = elem 65 | def firstOption: Option[Elem] = Some(elem) 66 | def rest: Heap = left merge right 67 | def firstView: Option[(Elem, Heap)] = Some((elem,left merge right)) 68 | 69 | def add(elem: Elem): Heap = merge(Node(elem, Empty, Empty)) 70 | def merge(other: Heap): Heap = other match { 71 | case Empty => this 72 | case Node(oelem, oleft, oright) => 73 | if (ord.lteq(elem, oelem)) makeNode(elem, left, right merge other) 74 | else makeNode(oelem, oleft, this merge oright) 75 | } 76 | private def makeNode(elem: Elem, left: Heap, right: Heap): Node = { 77 | if (left.size < right.size) Node(elem, right, left) 78 | else Node(elem, left, right) 79 | } 80 | 81 | def takeUntil(e: Elem): Heap = 82 | if (ord.lt(elem, e)) makeNode(elem,left.takeUntil(e),right.takeUntil(e)) 83 | else Empty 84 | def takeTo(e: Elem): Heap = 85 | if (ord.lteq(elem, e)) makeNode(elem,left.takeTo(e),right.takeTo(e)) 86 | else Empty 87 | def dropUntil(e: Elem): Heap = // can be improved! 88 | if (ord.lt(elem, e)) left.dropUntil(e) merge right.dropUntil(e) 89 | else this 90 | def dropTo(e: Elem): Heap = // can be improved! 91 | if (ord.lteq(elem, e)) left.dropTo(e) merge right.dropTo(e) 92 | else this 93 | 94 | def checkInvariant(): Unit = { 95 | if (left.size < right.size) 96 | invariantViolation(s"LeftistHeap: left.size should be >= right.size but left.size=${left.size} and right.size=${right.size}") 97 | if (left.nonEmpty && ord.lt(left.first,first)) 98 | invariantViolation(s"LeftistHeap: left.first should come after first in ordering but left.first=${left.first} and first=$first") 99 | if (right.nonEmpty && ord.lt(right.first,first)) 100 | invariantViolation(s"LeftistHeap: right.first should come after first in ordering but right.first=${right.first} and first=$first") 101 | left.checkInvariant() 102 | right.checkInvariant() 103 | } 104 | } 105 | 106 | def empty: Heap = Empty 107 | override def single(elem: Elem): Heap = Node(elem,Empty,Empty) 108 | override def apply(elems: Elem*): Heap = { 109 | var len = elems.length 110 | val array = new Array[Heap](len) 111 | for (i <- 0 until len) array(i) = Node(elems(i),Empty,Empty) 112 | while (len > 1) { 113 | val half = len/2 114 | for (i <- 0 until half) array(i) = array(2*i) merge array(2*i+1) 115 | if (len % 2 == 0) len = half 116 | else { 117 | array(half) = array(len-1) 118 | len = half+1 119 | } 120 | } 121 | if (len == 0) Empty 122 | else array(0) 123 | } 124 | override def newBuilder: Builder[Elem,Heap] = new Builder[Elem,Heap] { 125 | val buffer = scala.collection.mutable.ArrayBuffer.empty[Elem] 126 | def +=(elem: Elem) = { buffer += elem; this } 127 | def clear() = buffer.clear() 128 | def result(): Heap = apply(buffer: _*) 129 | } 130 | } 131 | 132 | object Min extends HeapFactoryFactory { 133 | def factory[E](implicit ord: Ordering[E]): HeapFactory{ type Elem = E } = 134 | new LeftistFactory[E](ord) 135 | } 136 | } 137 | -------------------------------------------------------------------------------- /src/main/scala/SkewHeap.scala: -------------------------------------------------------------------------------- 1 | // An implementation of skew heaps, with most of the code stolen from 2 | // leftist heap. 3 | // 4 | // Note that the O(log n) amortized bounds for add and merge on skew heaps 5 | // is NOT guaranteed to hold for immutable heaps, because you can create 6 | // a "bad" skew heap that takes linear time for add/merge and then keep 7 | // repeating that bad operation. 8 | // 9 | // Once I have a benchmarking harness up, I'll be able to compare this 10 | // to leftist heaps. 11 | // 12 | // TODO: override equals and hashCode? 13 | // TODO: improve algorithm of dropUntil and dropTo from O(n log n) to O(n) 14 | 15 | package scads.immutable 16 | 17 | import scala.collection.mutable.Builder 18 | 19 | object SkewHeap extends HeapCompanion { 20 | private class SkewFactory[E](ord: Ordering[E]) extends HeapFactory { 21 | // smaller elements (according to ord) come before bigger elements 22 | type Elem = E 23 | sealed trait Heap extends MHeap[Elem, Heap] { 24 | def iterator: Iterator[Elem] = { 25 | import scala.collection.mutable.ArrayStack 26 | val stack: ArrayStack[Node] = ArrayStack.empty[Node] 27 | def push(h: Heap): Unit = h match { 28 | case Empty => {} 29 | case node: Node => stack += node 30 | } 31 | push(this) 32 | 33 | new scala.collection.AbstractIterator[Elem] { 34 | def hasNext: Boolean = stack.nonEmpty 35 | def next(): Elem = { 36 | if (stack.isEmpty) throw new NoSuchElementException("next on empty iterator") 37 | val Node(elem,left,right) = stack.pop() 38 | push(right) 39 | push(left) 40 | elem 41 | } 42 | } 43 | } 44 | 45 | // assumes elem comes before (or equal) the current first 46 | def _addFirst(elem: Elem): Heap = Node(elem, this, Empty) 47 | } 48 | 49 | case object Empty extends Heap { 50 | def isEmpty: Boolean = true 51 | def nonEmpty: Boolean = false 52 | def size: Int = 0 53 | 54 | def first: Elem = throw new NoSuchElementException("first on empty heap") 55 | def firstOption: Option[Elem] = None 56 | def rest: Heap = throw new UnsupportedOperationException("rest on empty heap") 57 | def firstView: Option[(Elem, Heap)] = None 58 | 59 | def add(elem: Elem): Heap = Node(elem, Empty, Empty) 60 | def merge(other: Heap): Heap = other 61 | 62 | def takeUntil(e: Elem): Heap = Empty 63 | def takeTo(e: Elem): Heap = Empty 64 | def dropUntil(e: Elem): Heap = Empty 65 | def dropTo(e: Elem): Heap = Empty 66 | 67 | def checkInvariant(): Unit = {} 68 | } 69 | 70 | case class Node(elem: Elem, left: Heap, right: Heap) extends Heap { 71 | def isEmpty: Boolean = false 72 | def nonEmpty: Boolean = true 73 | def size: Int = left.size + right.size + 1 // O(N) time! 74 | 75 | def first: Elem = elem 76 | def firstOption: Option[Elem] = Some(elem) 77 | def rest: Heap = left merge right 78 | def firstView: Option[(Elem, Heap)] = Some((elem,left merge right)) 79 | 80 | def add(elem: Elem): Heap = merge(Node(elem, Empty, Empty)) 81 | def merge(other: Heap): Heap = other match { 82 | case Empty => this 83 | case Node(oelem, oleft, oright) => 84 | if (ord.lteq(elem, oelem)) Node(elem, right merge other, left) 85 | else Node(oelem, this merge oright, oleft) 86 | } 87 | 88 | def takeUntil(e: Elem): Heap = 89 | if (ord.lt(elem, e)) Node(elem,left.takeUntil(e),right.takeUntil(e)) 90 | else Empty 91 | def takeTo(e: Elem): Heap = 92 | if (ord.lteq(elem, e)) Node(elem,left.takeTo(e),right.takeTo(e)) 93 | else Empty 94 | def dropUntil(e: Elem): Heap = // can be improved! 95 | if (ord.lt(elem, e)) left.dropUntil(e) merge right.dropUntil(e) 96 | else this 97 | def dropTo(e: Elem): Heap = // can be improved! 98 | if (ord.lteq(elem, e)) left.dropTo(e) merge right.dropTo(e) 99 | else this 100 | 101 | def checkInvariant(): Unit = { 102 | if (left.nonEmpty && ord.lt(left.first,first)) 103 | invariantViolation(s"SkewHeap: left.first should come after first in ordering but left.first=${left.first} and first=$first") 104 | if (right.nonEmpty && ord.lt(right.first,first)) 105 | invariantViolation(s"SkewHeap: right.first should come after first in ordering but right.first=${right.first} and first=$first") 106 | left.checkInvariant() 107 | right.checkInvariant() 108 | } 109 | } 110 | 111 | def empty: Heap = Empty 112 | override def single(elem: Elem): Heap = Node(elem,Empty,Empty) 113 | override def apply(elems: Elem*): Heap = { 114 | var len = elems.length 115 | val array = new Array[Heap](len) 116 | for (i <- 0 until len) array(i) = Node(elems(i),Empty,Empty) 117 | while (len > 1) { 118 | val half = len/2 119 | for (i <- 0 until half) array(i) = array(2*i) merge array(2*i+1) 120 | if (len % 2 == 0) len = half 121 | else { 122 | array(half) = array(len-1) 123 | len = half+1 124 | } 125 | } 126 | if (len == 0) Empty 127 | else array(0) 128 | } 129 | override def newBuilder: Builder[Elem,Heap] = new Builder[Elem,Heap] { 130 | val buffer = scala.collection.mutable.ArrayBuffer.empty[Elem] 131 | def +=(elem: Elem) = { buffer += elem; this } 132 | def clear() = buffer.clear() 133 | def result(): Heap = apply(buffer: _*) 134 | } 135 | } 136 | 137 | object Min extends HeapFactoryFactory { 138 | def factory[E](implicit ord: Ordering[E]): HeapFactory{ type Elem = E } = 139 | new SkewFactory[E](ord) 140 | } 141 | } 142 | -------------------------------------------------------------------------------- /src/main/scala/heapTraits.scala: -------------------------------------------------------------------------------- 1 | // This is a proof of concept for typesafe heaps with a merge operation. 2 | // Here, "typesafe" means that the interface will never allow different 3 | // orderings to be mixed within the same heap. In particular, 4 | // * when adding an element to an existing heap, that insertion 5 | // cannot involve an ordering different from the one used to 6 | // create the existing heap, and 7 | // * when merging two existing heaps, the heaps are guaranteed to 8 | // have been created with the same ordering. 9 | // 10 | // Ensuring the first condition is easy: the insertion method does not 11 | // take an ordering as a parameter. (This might seem obvious but implementations 12 | // adapted from Haskell to Scala often violate this rule.) 13 | // 14 | // Ensuring the second condition is harder. It can be achieved by 15 | // carefully staging where an ordering is supplied to create a new "factory", 16 | // and then all heaps created from that factory are guaranteed to use the 17 | // same ordering. Crucially, path-dependent types ensure that attempting 18 | // to merge heaps from different factories will be a type error detected at 19 | // compile time. 20 | // 21 | // ***See design/heaps.md for more a fuller discussion of these issues.*** 22 | // 23 | // Note that, for easier reference as a proof of concept, I've kept all 24 | // the traits in the same file, even though the various traits would typically 25 | // be written in separate files. Also, I've limited the methods to a fairly 26 | // small set. In the future, expect methods (and possibly traits) to be added, 27 | // removed, renamed, and generally reorganized. 28 | // 29 | // See LeftistHeap.scala for a sample implementation using these traits. 30 | 31 | // TODO: add scaladocs 32 | // TODO: better name for firstView (because view tends to mean something different for Scala collections) 33 | // TODO: what should the name/symbol for unapply be to support pattern matching? 34 | // TODO: improve the algorithm of the default newBuilder/from in HeapFactory 35 | // TODO: incorporate CanBuildFrom? 36 | 37 | package scads.immutable 38 | 39 | import scala.collection.mutable.Builder 40 | 41 | /** A simple heap WITHOUT a `merge` method. */ 42 | trait SHeap[Elem] extends scads.HasInvariant { 43 | def isEmpty: Boolean 44 | def nonEmpty: Boolean 45 | def size: Int 46 | 47 | def add(elem: Elem): SHeap[Elem] 48 | 49 | def first: Elem 50 | def firstOption: Option[Elem] 51 | def rest: SHeap[Elem] 52 | def firstView: Option[(Elem, SHeap[Elem])] 53 | 54 | def iterator: Iterator[Elem] // generates elements in an undefined order, should be O(N) 55 | 56 | def sorted: Iterator[Elem] = { 57 | var heap = this 58 | new scala.collection.AbstractIterator[Elem] { 59 | def hasNext: Boolean = heap.nonEmpty 60 | def next(): Elem = { 61 | if (heap.isEmpty) throw new NoSuchElementException("next on empty iterator") 62 | else { 63 | val result = heap.first 64 | heap = heap.rest 65 | result 66 | } 67 | } 68 | } 69 | } 70 | 71 | def mkString(start: String, sep: String, end: String) = iterator.mkString(start,sep,end) 72 | def mkString(sep: String): String = iterator.mkString(sep) 73 | def mkString: String = iterator.mkString 74 | override def toString(): String = mkString("Heap(",", ",")") 75 | 76 | def _addFirst(elem: Elem): SHeap[Elem] 77 | 78 | def takeUntil(e: Elem): SHeap[Elem] 79 | def takeTo(e: Elem): SHeap[Elem] 80 | def dropUntil(e: Elem): SHeap[Elem] 81 | def dropTo(e: Elem): SHeap[Elem] 82 | } 83 | 84 | /** A more specialized type for heaps WITH a `merge` method. */ 85 | trait MHeap[Elem,Heap <: SHeap[Elem]] extends SHeap[Elem] { 86 | def add(elem: Elem): Heap 87 | def merge(other: Heap): Heap 88 | def rest: Heap 89 | def firstView: Option[(Elem, Heap)] // need different name from View 90 | def _addFirst(elem: Elem): Heap 91 | 92 | def takeUntil(e: Elem): Heap 93 | def takeTo(e: Elem): Heap 94 | def dropUntil(e: Elem): Heap 95 | def dropTo(e: Elem): Heap 96 | } 97 | 98 | /** A factory for creating heaps. 99 | * 100 | * Every heap from the same fatory uses the same element type and 101 | * the same Ordering. 102 | */ 103 | trait HeapFactory { 104 | type Elem 105 | type Heap <: MHeap[Elem,Heap] 106 | 107 | def newBuilder: Builder[Elem,Heap] = new Builder[Elem,Heap] { // can be improved! 108 | val buffer = scala.collection.mutable.ArrayBuffer.empty[Elem] 109 | def +=(elem: Elem) = { buffer += elem; this } 110 | def clear() = buffer.clear() 111 | def result(): Heap = from(buffer) 112 | } 113 | def empty: Heap 114 | def single(elem: Elem): Heap = empty.add(elem) 115 | def apply(elems: Elem*): Heap = from(elems) 116 | def from(elems: TraversableOnce[Elem]): Heap = elems.foldLeft(empty)(_.add(_)) // inefficent! replace with something better! 117 | def _fromSorted(elems: Seq[Elem]): Heap = _fromReverseSorted(elems.reverse) 118 | def _fromReverseSorted(elems: Seq[Elem]): Heap = elems.foldLeft(empty)(_._addFirst(_)) 119 | } 120 | 121 | trait HeapFactoryFactory { 122 | def factory[E](implicit ord: Ordering[E]): HeapFactory{ type Elem = E } 123 | def pairedFactory[Key,Value](implicit ord: Ordering[Key]): HeapFactory { type Elem = (Key,Value) } = 124 | factory[(Key,Value)](Ordering.by[(Key,Value), Key](_._1)(ord)) 125 | 126 | def empty[E](implicit ord: Ordering[E]): SHeap[E] = factory[E](ord).empty 127 | def single[E](elem: E)(implicit ord: Ordering[E]): SHeap[E] = factory[E](ord).single(elem) 128 | def apply[E](elems: E*)(implicit ord: Ordering[E]): SHeap[E] = factory[E](ord).from(elems) 129 | def from[E](elems: TraversableOnce[E])(implicit ord: Ordering[E]): SHeap[E] = factory[E](ord).from(elems) 130 | } 131 | 132 | trait HeapCompanion { 133 | val Min: HeapFactoryFactory 134 | val Max: HeapFactoryFactory = new HeapFactoryFactory { 135 | def factory[E](implicit ord: Ordering[E]): HeapFactory{ type Elem = E } = 136 | Min.factory[E](ord.reverse) 137 | } 138 | } 139 | -------------------------------------------------------------------------------- /src/test/scala/HeapTest.scala: -------------------------------------------------------------------------------- 1 | // Resuable tests for different heap implementations. 2 | // See LeftistHeapTest.scala and SkewHeapTest.scala for examples of usage. 3 | // TODO: Needs more tests, especially for pairedFactory 4 | // TODO: Fix tests to account for the fact that "equal" elements according to 5 | // the ordering are not necessarily equal. Therefore, it's not enough to test 6 | // (for example) that the first element in the heap is == the first element 7 | // in the sorted list. 8 | 9 | import scads.immutable._ 10 | import org.scalacheck._ 11 | import Prop._ 12 | import Arbitrary._ 13 | 14 | trait HeapTest { self: Properties => 15 | def companion: HeapCompanion 16 | 17 | type E = Int // the element type used for most/all of the tests 18 | 19 | def test(isMin: Boolean) { 20 | val name = if (isMin) "Min." else "Max." 21 | val hname = name + "factory.Heap." 22 | val hff: HeapFactoryFactory = if (isMin) companion.Min else companion.Max 23 | val factory = hff.factory[E] 24 | def sort(list: List[E]): List[E] = if (isMin) list.sorted else list.sorted.reverse 25 | def sameElements(heap: SHeap[E], list: List[E]): Boolean = heap.sorted.toList == sort(list) 26 | def firstOption(list: List[E]): Option[E] = sort(list).headOption 27 | val ord: Ordering[Int] = if (isMin) Ordering.Int else Ordering.Int.reverse 28 | 29 | property(name + "factory.from") = forAll { list: List[E] => 30 | val h = factory.from(list) 31 | h.checkInvariant() 32 | sameElements(h, list) 33 | } 34 | property(name + "factory.apply") = forAll { list: List[E] => 35 | val h = factory.apply(list : _*) 36 | h.checkInvariant() 37 | sameElements(h, list) 38 | } 39 | property(name + "factory.newBuilder") = forAll { list: List[E] => 40 | val builder = factory.newBuilder 41 | for (x <- list) builder += x 42 | val h = builder.result 43 | h.checkInvariant() 44 | sameElements(h, list) 45 | } 46 | property(name + "factory._fromSorted") = forAll { list: List[E] => 47 | val s = sort(list) 48 | val h = factory._fromSorted(s) 49 | h.checkInvariant() 50 | sameElements(h, s) 51 | } 52 | property(name + "factory._fromReverseSorted") = forAll { list: List[E] => 53 | val s = sort(list).reverse 54 | val h = factory._fromReverseSorted(s) 55 | h.checkInvariant() 56 | sameElements(h, s) 57 | } 58 | property(name + "from") = forAll { list: List[E] => 59 | val h = hff.from(list) 60 | h.checkInvariant() 61 | sameElements(h, list) 62 | } 63 | property(name + "apply") = forAll { list: List[E] => 64 | val h = hff.apply(list : _*) 65 | h.checkInvariant() 66 | sameElements(h, list) 67 | } 68 | property(hname + "isEmpty/nonEmpty") = forAll {list: List[E] => 69 | val h = factory.from(list) 70 | h.isEmpty == list.isEmpty && h.nonEmpty == list.nonEmpty 71 | } 72 | property(hname + "size") = forAll { list: List[E] => 73 | val h = factory.from(list) 74 | h.size == list.size 75 | } 76 | property(hname + "add") = forAll { (list: List[E], x: E) => 77 | val h = factory.from(list) 78 | val h2 = h.add(x) 79 | h2.checkInvariant() 80 | sameElements(h2, x +: list) 81 | } 82 | property(hname + "_addFirst") = forAll { list: List[E] => list.nonEmpty ==> { 83 | val x = firstOption(list).get 84 | val listWithoutX = list.takeWhile(_ != x) ++ list.dropWhile(_ != x).tail 85 | val h = factory.from(listWithoutX) 86 | val h2 = h._addFirst(x) 87 | h2.checkInvariant() 88 | sameElements(h2, list) 89 | }} 90 | property(hname + "first") = forAll { list: List[E] => 91 | val h = factory.from(list) 92 | scala.util.Try(h.first).toOption == firstOption(list) 93 | } 94 | property(hname + "firstOption") = forAll { list: List[E] => 95 | val h = factory.from(list) 96 | h.firstOption == firstOption(list) 97 | } 98 | property(hname + "rest") = forAll { list: List[E] => 99 | val h = factory.from(list) 100 | val r = scala.util.Try(h.rest) 101 | if (list.isEmpty) r.isFailure 102 | else { 103 | val h2 = r.get 104 | h2.checkInvariant() 105 | sameElements(h2, sort(list).tail) 106 | } 107 | } 108 | property(hname + "firstView") = forAll { list: List[E] => 109 | val h = factory.from(list) 110 | val s = sort(list) 111 | h.firstView match { 112 | case None => list.isEmpty 113 | case Some((x,h2)) => 114 | h2.checkInvariant() 115 | x == s.head && sameElements(h2, s.tail) 116 | } 117 | } 118 | property(hname + "merge") = forAll { (list1: List[E], list2: List[E]) => 119 | val h1 = factory.from(list1) 120 | val h2 = factory.from(list2) 121 | val merged = h1.merge(h2) 122 | merged.checkInvariant() 123 | sameElements(merged, list1 ++ list2) 124 | } 125 | property(hname + "takeUntil") = forAll { (list: List[E], x: E) => 126 | val h = factory.from(list) 127 | val h2 = h.takeUntil(x) 128 | h2.checkInvariant() 129 | sameElements(h2, list.filter(ord.lt(_,x))) 130 | } 131 | property(hname + "takeTo") = forAll { (list: List[E], x: E) => 132 | val h = factory.from(list) 133 | val h2 = h.takeTo(x) 134 | h2.checkInvariant() 135 | sameElements(h2, list.filter(ord.lteq(_,x))) 136 | } 137 | property(hname + "dropUntil") = forAll { (list: List[E], x: E) => 138 | val h = factory.from(list) 139 | val h2 = h.dropUntil(x) 140 | h2.checkInvariant() 141 | sameElements(h2, list.filter(ord.gteq(_,x))) 142 | } 143 | property(hname + "dropTo") = forAll { (list: List[E], x: E) => 144 | val h = factory.from(list) 145 | val h2 = h.dropTo(x) 146 | h2.checkInvariant() 147 | sameElements(h2, list.filter(ord.gt(_,x))) 148 | } 149 | property(hname + "iterator") = forAll { list: List[E] => 150 | val h = factory.from(list) 151 | val list2 = h.iterator.toList 152 | list2.sorted == list.sorted 153 | } 154 | } 155 | 156 | test(true) 157 | test(false) 158 | 159 | } 160 | -------------------------------------------------------------------------------- /design/heaps.md: -------------------------------------------------------------------------------- 1 | # API Design for Heaps (aka Priority Queues) 2 | 3 | A heap (or priority queue) is a collection of elements ordered by some `Ordering`, optimized for retrieving the first element according to that ordering. Duplicate elements are allowed. Applications vary in whether they need the first element to be the smallest or the biggest element according to the ordering, so both variations should be easy to use. (However, any given heap is expected to offer easy access to either the smallest element only or the biggest element only, not both at the same time.) I will consider immutable heaps in this document, but the core issues discussed below apply to both immutable heaps and mutable heaps. 4 | 5 | Even if an element type has a natural ordering, that ordering may not be the one we want to use, so we must allow the user to specify the ordering. 6 | 7 | ## Problem 1: Don't mix two different orderings in the same heap 8 | 9 | Here is a strawman design for a very simple heap API: 10 | ```scala 11 | trait Heap[Elem] { // WARNING: THIS IS BROKEN!!! 12 | def isEmpty(implicit ord: Ordering[Elem]): Boolean 13 | def add(elem: Elem)(implicit ord: Ordering[Elem]): Heap[Elem] 14 | def first(implicit ord: Ordering[Elem]): Elem 15 | def rest(implicit ord: Ordering[Elem]): Heap[Elem] 16 | } 17 | 18 | // factory method, probably in some companion object 19 | def empty[Elem](implicit ord: Ordering[Elem]): Heap[Elem] 20 | ``` 21 | You may think it strange that all of these methods are taking an `ord` parameter. From a Scala point of view, that doesn't make much sense. But you can find variations of this design in many implementations of heaps on GitHub, including in the well-respected Scalaz library. Why? As far as I can tell, the answer is __because Haskell does it that way__. Here's the equivalent design in Haskell: 22 | ```haskell 23 | type Heap a 24 | empty :: Ord a => Heap a 25 | isEmpty :: Ord a => Heap a -> Bool 26 | add :: Ord a => a -> Heap a -> Heap a 27 | first :: Ord a => Heap a -> a 28 | rest :: Ord a => Heap a -> Heap a 29 | ``` 30 | In Haskell, this makes perfect sense. Behind the scenes, each method takes 31 | an Ord dictionary as a hidden parameter. But there's one critical difference between Haskell and Scala: in Haskell, there can only be a _single_ ordering for an element type, but in Scala, there can be _many_ orderings for the same element type. 32 | 33 | For example, consider this Scala code 34 | ```scala 35 | val ord1 = Ordering.Int 36 | val ord2 = ord1.reverse 37 | val heap1 = empty[Int](ord1).add(5)(ord1).add(7)(ord1) 38 | val heap2 = heap1.add(4)(ord2) 39 | println(heap2.first) 40 | ``` 41 | What should this print? Of course, that depends on the details of the implementation, but you would expect it to print either 4 (the smallest element) or 7 (the biggest element). However, because one ordering was used for two of the `add`s and the opposite ordering was used for the third `add`, there's an excellent chance that the actual result will be 5, which is the wrong answer for both orderings. 42 | 43 | The magic of implicit parameters is that you usually don't need to pass them explicitly. But (A) there's nothing to stop you from doing so, and (B) there's nothing to prevent you from calling methods in different scopes with different orderings. No, if you're anything like me, the possibility that this could happen by accident is making your skin crawl. Surely, the API should prevent this from happening! 44 | 45 | Fortunately, this problem is very easy to fix. __Only the `empty` method should take an ordering.__ Once that initial heap has been created, all future heaps derived from that heap via any sequence of `add`s or `rest`s should use the same ordering. With this change, the API becomes 46 | ```scala 47 | trait Heap[Elem] { 48 | def isEmpty: Boolean 49 | def add(elem: Elem): Heap[Elem] 50 | def first: Elem 51 | def rest: Heap[Elem] 52 | } 53 | 54 | // factory method, probably in some companion object 55 | def empty[Elem](implicit ord: Ordering[Elem]): Heap[Elem] 56 | ``` 57 | Yay! That is both simpler and safer. 58 | 59 | ## Problem 2: `merge` 60 | 61 | Another operation supported by many kinds of heaps is `merge`, which combines two heaps into a single heap. Examples of heaps supporting merge include leftist heaps, skew heaps, binomial heaps (aka binomial queues), Fibonacci heaps, etc. 62 | 63 | We can easily add `merge` to the existing `Heap[Elem]` trait. 64 | ```scala 65 | trait Heap[Elem] { 66 | ... 67 | def merge(other: Heap[Elem]): Heap[Elem] 68 | } 69 | ``` 70 | However, there are at last two problems with this. First, traits allow for subclassing, so we might have several different implementations, such as leftist heaps and binomial heaps. But we only want to merge leftist heaps with leftist heaps and binomial heaps with binomial heaps—we do NOT want to merge leftist heaps with binomial heaps. 71 | 72 | There are several ways to address this problem. For example, leftist heaps and binomial heaps could just use completely separate class/trait hierarchies, and each could use a `sealed trait` to prevent this unwanted mixing of types. 73 | 74 | But the code duplication this would entail is unsatisfying. It would also make it more difficult to share code (such as a testing harness) between different implementations. 75 | 76 | Alternatively, we can control the types more precisely by adding a second type parameter for the specific representation being used, as in 77 | ```scala 78 | trait MHeap[Elem, Heap] { 79 | // MHeap is "Mergeable Heap" 80 | // Heap is the specific Heap representation being used 81 | def isEmpty: Boolean 82 | def add(elem: Elem): Heap 83 | def first: Elem 84 | def rest: Heap 85 | def merge(other: Heap): Heap 86 | } 87 | 88 | sealed trait LeftistHeap[Elem] extends MHeap[Elem, LeftistHeap[Elem]] 89 | 90 | sealed trait BinomialHeap[Elem] extends MHeap[Elem, BinomialHeap[Elem]] 91 | ``` 92 | Because of the extra type parameter, a leftist heap and binomial heap are incompatible and cannot be merged. 93 | 94 | ## Problem 3: `merge` (continued) 95 | 96 | There's a second problem with `merge`. A particular implementation, such as leftist heaps, would provide a factory method for creating a new heap. 97 | ```scala 98 | object LeftistHeap { // companion object 99 | def empty[Elem](implicit ord: Ordering[Elem]): LeftistHeap[Elem] = ... 100 | } 101 | ``` 102 | Because of the `MHeap` definition, we can't `merge` a leftist heap with a binomial heap. But now we've re-introduced the problem of incompatible orderings! 103 | ```scala 104 | val ord1 = Ordering.Int 105 | val ord2 = ord1.reverse 106 | val heap1 = empty[Int](ord1).add(5).add(7) 107 | val heap2 = empty[Int](ord2).add(6).add(4) 108 | var heap3 = heap1.merge(heap2) 109 | while (!heap3.isEmpty) { 110 | println(heap3.first) 111 | heap3 = heap3.rest 112 | } 113 | ``` 114 | Notice that `heap1` and `heap2` were created with opposite orderings. What happens if we merge them? Nothing good! The exact results depend on details of the implementation, but a likely result is that loop will print the elements in the order 5,6,4,7—or maybe 5,7,6,4—when it _should_ print them in sorted order! 115 | 116 | We would really like to make this sort of situation impossible! Maybe we could test the orderings for object equality at runtime, and throw an exception if they're different? That could actually work for simple types like integers with a built-in ordering object. But for more complicated types, such as tuples, the orderings are generated on demand from the orderings of their constituent parts. And this generation is not memoized, so if we demand an ordering for, say, `(Int,String)` twice, we'll get two separate ordering objects, which will cause a false negative for our hypothetical dynamic equality check. 117 | 118 | No, we would really like to make merging two heaps with different orderings a type error. We can achieve this by making the notion of a factory explicit. The idea is that heaps can only be merged with other heaps from the same factory. Attempting to merge heaps from different factories will cause a type error. 119 | 120 | In code, we might express this as follows: 121 | ```scala 122 | trait HeapFactory { 123 | type Elem 124 | type Heap <: MHeap[Elem,Heap] 125 | 126 | def empty: Heap 127 | // plus other factory methods 128 | } 129 | 130 | object LeftistHeap { // companion object 131 | def factory[E](implicit ord: Ordering[E]): HeapFactory { type Elem = E } = ... 132 | } 133 | ``` 134 | Now we can say 135 | ```scala 136 | val minHeaps = LeftistHeap.factory[Int](Ordering.Int) 137 | val maxHeaps = LeftistHeap.factory[Int](Ordering.Int.reverse) 138 | 139 | val heap1 = minHeaps.empty.add(5).add(7) 140 | val heap2 = minHeaps.empty.add(6).add(4) 141 | val heap3 = heap1.merge(heap2) // this typechecks 142 | 143 | val heap4 = maxHeaps.empty.add(6).add(4) 144 | val heap5 = heap1.merge(heap4) // !!!type error!!! 145 | ``` 146 | Notice that `heap1`, `heap2`, and `heap3` have type `minHeaps.Heap` but `heap4` has type `maxHeaps.Heap`. According to Scala's notion of __path-dependent types__, these types are incompatible so attempting to merge `heap1` and `heap4` causes a type error, as desired. 147 | 148 | ## A question 149 | 150 | Clearly, if I create two factories with incompatible element types, then a heap from one factory should not be mergeable with a heap from the other factory. Similarly, if I create two factories with the same element type but incompatible orderings, then again a heap from one factory should not be mergeable with a heap from the other factory. 151 | 152 | But what if I create two separate factories with the same element type and the same ordering? Should a heap created from one of these factories be mergeable with a heap created from the other factory? It's not clear. If this duplication of factories was deliberate, then the answer is probably "no". This often happens with units of measure. For example, maybe one of the factories is using integers to represent inches and the other is using integers to represent grams. Even if the factories are using the same ordering, we probably don't want to merge a heap of inches with a heap of grams! 153 | 154 | On the other hand, the duplication of factories could be accidental, perhaps the result of two chunks of code being written separately and then brought together later. In that case, we might very well want to be able to merge a heap from one factory with a heap from another accidentally-separate-but-equivalent factory. 155 | 156 | Regardless of where you come down on what _should_ happen, what _will_ happen in the above design is that attempting to merge heaps from distinct factories will cause a type error, 157 | even if the factories were made for the same element type and ordering. 158 | 159 | ## Problem 4: Usability in the simple case 160 | 161 | Most applications of priority queues do not need the `merge` method. Trying to make `merge` typesafe has made the API more complicated and harder to use because of the need to instantiate a factory before creating actual heaps. Can we hide these complications from a user until and unless they actually need to use `merge`? Yes. 162 | 163 | I'll re-introduce the interface without `merge`, but now called `SHeap` for "Simple Heap". 164 | ```scala 165 | trait SHeap[Elem] { 166 | def isEmpty: Boolean 167 | def add(elem: Elem): SHeap[Elem] 168 | def first: Elem 169 | def rest: SHeap[Elem] 170 | } 171 | ``` 172 | Then `MHeap` should be a subtype of `SHeap`. 173 | ```scala 174 | trait MHeap[Elem, Heap <: SHeap[Elem]] extends SHeap[Elem] { 175 | // inherits isEmpty and first from SHeap[Elem] 176 | def add(elem: Elem): Heap // more specific return type 177 | def rest: Heap // more specific return type 178 | def merge(other: Heap): Heap 179 | } 180 | ``` 181 | The `HeapFactory` definition is unchanged. 182 | ```scala 183 | trait HeapFactory { 184 | type Elem 185 | type Heap <: MHeap[Elem,Heap] 186 | 187 | def empty: Heap 188 | // plus other factory methods 189 | } 190 | ``` 191 | The last part is that the companion object should supply simple factory methods in terms of `SHeap`. 192 | ```scala 193 | object LeftistHeap { // companion object 194 | def empty[E](implicit ord: Ordering[E]): SHeap[E] = ... 195 | // plus other ordinary factory methods, similar to other Scala collections 196 | 197 | // the big bad 198 | def factory[E](implicit ord: Ordering[E]): HeapFactory { type Elem = E } = ... 199 | } 200 | ``` 201 | Now the user can proceed in blissful ignorance of `factory` or `MHeap`, treating this essentially just like any other Scala collection, until they need `merge`. Of course, `empty` will probably be defined as 202 | ```scala 203 | def empty[E](implicit ord: Ordering[E]): SHeap[E] = factory[E](ord).empty 204 | ``` 205 | (and similarly for the other ordinary factory methods), but the user doesn't need to know that. 206 | 207 | ## Problem 5: min vs max 208 | 209 | Should a heap favor smaller elements or bigger elements? 210 | There's no obvious answer—applications abound for both. 211 | Therefore, an interface should easily support both flavors. Right now, the 212 | ordering parameter allows us to say 213 | ```scala 214 | val minHeaps = LeftistHeap.factory[Int](Ordering.Int) 215 | val maxHeaps = LeftistHeap.factory[Int](Ordering.Int.reverse) 216 | ``` 217 | But how did I know that `Ordering.Int` was the right ordering for 218 | min-heaps and `Ordering.Int.reverse` was the right ordering for max-heaps? 219 | The opposite could just as easily have been true. Sure, this detail would probably be 220 | documented in the API, but it was fundamentally a flip-a-coin arbitrary decision. 221 | And arbitrary decisions with no logic favoring one choice over the other are 222 | the hardest to remember. 223 | 224 | In an easier-to-use interface, the user might write 225 | ```scala 226 | val minHeaps = LeftistHeap.minFactory[Int](Ordering.Int) 227 | val maxHeaps = LeftistHeap.maxFactory[Int](Ordering.Int) 228 | ``` 229 | Now, the user doesn't need to worry whether to use `Ordering.Int` or 230 | `Ordering.Int.reverse`. Instead, if they want min-oriented heaps, they call 231 | `minFactory(Ordering.Int)` and if they want max-oriented heaps, they call 232 | `maxFactory(Ordering.Int)`. In fact, it's even better than that. The whole 233 | point of implicit parameters is that you usually don't need to write them 234 | down explicitly. In reality, the user would probably only write 235 | ```scala 236 | val minHeaps = LeftistHeap.minFactory[Int] 237 | val maxHeaps = LeftistHeap.maxFactory[Int] 238 | ``` 239 | Actually in the current version, this is now 240 | ```scala 241 | val minHeaps = LeftistHeap.Min.factory[Int] 242 | val maxHeaps = LeftistHeap.Max.factory[Int] 243 | ``` 244 | where `LeftistHeap.Min` and `LeftistHeap.Max` both support other 245 | simpler methods for creating `SHeap`s for users who don't need `merge`. 246 | For example, a user could write 247 | ```scala 248 | val h1 = LeftistHeap.Min.empty[Int] // an empty min-heap of integers 249 | val h2 = LeftistHeap.Max(1,2,3) // a max-heap containing 1, 2, and 3 250 | ``` 251 | 252 | _Of course, there's lots more needed to flesh the whole design out into an 253 | industrial-strength API, and even more to integrate it with the current 254 | Scala collections. I'll continue to work on this, and I welcome discussion on 255 | these issues._ 256 | --------------------------------------------------------------------------------