├── project ├── build.properties └── plugins.sbt ├── .gitignore ├── LICENSE ├── src ├── test │ └── scala │ │ └── ClusterTests.scala └── main │ └── scala │ ├── ComputeRunner.scala │ ├── Reduction.scala │ └── IgnitePipe.scala └── README.md /project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version=0.13.8 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | project/target 3 | project/project/target 4 | *.swp 5 | -------------------------------------------------------------------------------- /project/plugins.sbt: -------------------------------------------------------------------------------- 1 | resolvers += Resolver.sonatypeRepo("snapshots") 2 | resolvers += Resolver.sonatypeRepo("releases") 3 | 4 | addSbtPlugin("org.xerial.sbt" % "sbt-sonatype" % "0.2.1") 5 | addSbtPlugin("com.typesafe.sbt" % "sbt-pgp" % "0.8.3") 6 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2015 Ruban Monu 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining 4 | a copy of this software and associated documentation files (the 5 | "Software"), to deal in the Software without restriction, including 6 | without limitation the rights to use, copy, modify, merge, publish, 7 | distribute, sublicense, and/or sell copies of the Software, and to 8 | permit persons to whom the Software is furnished to do so, subject to 9 | the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be 12 | included in all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 18 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 19 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 20 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /src/test/scala/ClusterTests.scala: -------------------------------------------------------------------------------- 1 | package ignite.scala 2 | 3 | import com.twitter.algebird.Semigroup 4 | import org.apache.ignite._ 5 | import org.apache.ignite.cache.affinity._ 6 | import org.apache.ignite.configuration._ 7 | import org.apache.ignite.marshaller.optimized.OptimizedMarshaller 8 | import org.scalatest.{ FunSpec, BeforeAndAfterAll } 9 | import ignite.scala._ 10 | 11 | class ClusterTests extends FunSpec with BeforeAndAfterAll { 12 | 13 | val marsh = new OptimizedMarshaller 14 | marsh.setRequireSerializable(false) // for closures with non serializable objects 15 | val cfg = new IgniteConfiguration 16 | cfg.setMarshaller(marsh) 17 | cfg.setPeerClassLoadingEnabled(true) 18 | 19 | val ignite = Ignition.start(cfg) 20 | val cluster = ignite.cluster 21 | val compute = ignite.compute(cluster) 22 | 23 | implicit val cr = ComputeRunner(compute) 24 | 25 | override def afterAll() { 26 | ignite.close() 27 | } 28 | 29 | describe("character count test") { 30 | implicit val sg = Semigroup.intSemigroup 31 | 32 | it ("should execute") { 33 | 34 | val count = IgnitePipe.from("apple pie".split(" ")) 35 | .map(_.length) 36 | .reduce 37 | .execute 38 | 39 | assert(Some(8) == count) 40 | } 41 | 42 | it ("should execute merge") { 43 | val pipe1 = IgnitePipe.from(Seq(1, 2, 3)).map(identity) 44 | val pipe2 = IgnitePipe.from(Seq(4, 5, 6)).map(identity) 45 | // .map here forces the closure execution to cluster nodes 46 | 47 | val sum = (pipe1 ++ pipe2) 48 | .reduce 49 | .execute 50 | 51 | assert(Some(21) == sum) 52 | } 53 | } 54 | 55 | describe("cache put and get test") { 56 | val cachecfg = new CacheConfiguration[Int, Seq[String]] 57 | cachecfg.setName("cache-test") 58 | 59 | it ("should execute") { 60 | val cache = ignite.getOrCreateCache(cachecfg) 61 | cache.put(1, Seq("one", "ek")) 62 | cache.put(2, Seq("two", "do")) 63 | cache.put(3, Seq("three", "teen")) 64 | 65 | val data = IgnitePipe.from(Seq(1, 2, 3)) 66 | .map { k => 67 | cache.get(k) 68 | } 69 | .flatMap(identity) 70 | .execute 71 | 72 | assert(Set("one", "two", "three", "ek", "do", "teen") == data.toSet) 73 | } 74 | 75 | it ("should execute collocated") { 76 | // re-uses existing cache 77 | val cache = ignite.getOrCreateCache(cachecfg) 78 | val data = IgnitePipe 79 | .collocated(cache, Set(1, 2, 3)) { (c, k) => 80 | c.localPeek(k) 81 | } 82 | .flatMap(identity) 83 | .execute 84 | 85 | assert(Set("one", "two", "three", "ek", "do", "teen") == data.toSet) 86 | } 87 | } 88 | } 89 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ignite scala 2 | 3 | Scala API for distributed closures on [Apache Ignite](https://ignite.incubator.apache.org/). Inspired by [Scalding](https://github.com/twitter/scalding/). 4 | 5 | http://apacheignite.readme.io/v1.0/docs/distributed-closures 6 | 7 | #### example 0 - cluster setup 8 | ```scala 9 | import org.apache.ignite._ 10 | import org.apache.ignite.configuration._ 11 | import ignite.scala._ 12 | 13 | val cfg = new IgniteConfiguration // configure as appropriate 14 | val ignite = Ignition.start(cfg) 15 | val compute = ignite.compute(ignite.cluster) 16 | implicit val cr = ComputeRunner(compute) 17 | ``` 18 | #### example 1 - character count 19 | ```scala 20 | import com.twitter.algebird.Semigroup 21 | 22 | implicit val sg = Semigroup.intSemigroup 23 | 24 | val chain = IgnitePipe.from("The quick brown fox jumps over the lazy dog.".split(" ")) 25 | .map(_.length) // fork 26 | .reduce // join 27 | 28 | chain.execute // Option[Int] 29 | ``` 30 | #### example 2 - working with distributed cache 31 | ```scala 32 | val cache = { 33 | val cfg = new CacheConfiguration[K, V] 34 | cfg.setName(name) 35 | ignite.getOrCreateCache(cfg) 36 | } 37 | 38 | val process: V => R // computation 39 | val isValid: R => Boolean 40 | 41 | IgnitePipe.from(keys) 42 | .map { k => 43 | val v = cache.get(k) 44 | process(v) 45 | } 46 | .filter(isValid) 47 | .execute // Iterable[R] 48 | ``` 49 | #### example 3 - collocating compute with cache 50 | Ignite allows routing computations to the nodes where data is cached. 51 | ```scala 52 | IgnitePipe.collocated(cache, keys) { (c, k) => 53 | val v = c.localPeek(k) 54 | process(v) 55 | } 56 | .filter(isValid) 57 | .execute 58 | ``` 59 | #### example 4 - more chaining 60 | ```scala 61 | val cache: IgniteCache[K, V] 62 | val db: CacheJdbcBlobStore[K, V] 63 | 64 | val cacheResults = IgnitePipe.from(keys) 65 | .map { k => cacheGetAndCompute(cache, k) } 66 | 67 | val dbResults = IgnitePipe.from(keys) 68 | .map { k => dbGetAndCompute(db, k) } 69 | 70 | val combined = (cacheResults ++ dbResults) 71 | .reduce // reduction could be to consolidate cache and db for instance 72 | .toPipe // continuation 73 | 74 | combined.map { exportResults(_) }.execute 75 | ``` 76 | 77 | #### installing 78 | 79 | Add the following to your build.sbt (fetches from sonatype) 80 | ```scala 81 | resolvers += Resolver.sonatypeRepo("releases") 82 | libraryDependencies += "com.github.rubanm" %% "ignite-scala" % "0.0.1" 83 | ``` 84 | #### core api 85 | 86 | ```scala 87 | /* Provides composable distributed closures that can run on Apache Ignite. */ 88 | trait IgnitePipe[T] { 89 | 90 | def map[U](f: T => U): IgnitePipe[U] 91 | 92 | def flatMap[U](f: T => TraversableOnce[U]): IgnitePipe[U] 93 | 94 | def ++(p: IgnitePipe[T]): IgnitePipe[T] 95 | 96 | def reduce(implicit sg: Semigroup[T]): Reduction[T] 97 | 98 | def execute: Iterable[T] 99 | } 100 | 101 | /* Represents a reduction of the distributed closure results.*/ 102 | trait Reduction[T] { 103 | 104 | def execute: Option[T] 105 | 106 | def toPipe: IgnitePipe[T] 107 | } 108 | ``` 109 | -------------------------------------------------------------------------------- /src/main/scala/ComputeRunner.scala: -------------------------------------------------------------------------------- 1 | package ignite.scala 2 | 3 | import com.twitter.algebird.Semigroup 4 | import com.twitter.logging.Logger 5 | import org.apache.ignite._ 6 | import org.apache.ignite.lang._ 7 | import scala.collection.JavaConverters._ 8 | 9 | /** 10 | * Basic accumulating version of IgniteReducer. 11 | * 12 | * Values are summed in a local var as they are received 13 | * from closure computations on the cluster as defined by 14 | * the supplied Semigroup. 15 | */ 16 | class AccumulatingReducer[A](sg: Semigroup[A], n: Int) 17 | extends IgniteReducer[A, A] { 18 | 19 | require(n > 0) 20 | 21 | private[this] val log = Logger.get(getClass) 22 | private[this] object Lock 23 | 24 | private[this] var collected = Option.empty[A] 25 | private[this] var count = 0 26 | 27 | override def collect(a: A): Boolean = Lock.synchronized { 28 | log.debug(s"Collecting value $a") 29 | if (count == n) 30 | sys.error(s"Buffer overflow. Already reached size $count") 31 | else { 32 | collected match { 33 | case None => collected = Some(a) 34 | case Some(b) => collected = Some(sg.plus(b, a)) 35 | } 36 | count = count + 1 37 | if (count == n) false 38 | else true 39 | } 40 | } 41 | 42 | override def reduce: A = collected match { 43 | case None => sys.error("No values collected by reducer") 44 | case Some(c) => c 45 | } 46 | } 47 | 48 | /** 49 | * Conversions from Scala function to Ignite's closure classes. 50 | */ 51 | private object IgniteClosureConversions { 52 | 53 | def scala2closure[A, B](f: Function1[A, B]): IgniteClosure[A, B] = 54 | new IgniteClosure[A, B]() { override def apply(a: A) = f(a) } 55 | 56 | def scala2runnable[A](f: () => Unit): IgniteRunnable = 57 | new IgniteRunnable() { override def run = f() } 58 | 59 | def scala2callable[A](f: () => A): IgniteCallable[A] = 60 | new IgniteCallable[A]() { override def call = f() } 61 | 62 | def scala2reducer[A](sg: Semigroup[A], n: Int): IgniteReducer[A, A] = 63 | new AccumulatingReducer[A](sg, n) 64 | } 65 | 66 | final case class CacheAffinity[K, V](cacheName: String, key: K) 67 | 68 | /** 69 | * Provides Scala-friendly api to IgniteCompute class. 70 | */ 71 | final case class ComputeRunner(ic: IgniteCompute) { 72 | import IgniteClosureConversions._ 73 | 74 | def apply[A, B](x: A)(f: A => B): B = 75 | ic.apply[B, A](scala2closure(f), x) 76 | 77 | def apply[A, B](xs: Iterable[A])(f: A => B): Iterable[B] = 78 | xs match { 79 | case Nil => Nil // ignite does not handle empty iterables correctly 80 | case iter => ic.apply[A, B](scala2closure(f), iter.asJavaCollection).asScala 81 | } 82 | 83 | def flatMapApply[A, B](xs: Iterable[A])(f: A => TraversableOnce[B]): Iterable[B] = 84 | apply[A, TraversableOnce[B]](xs)(f).flatten 85 | 86 | def affinityApply[A, K, V](cas: Iterable[CacheAffinity[K, V]])(f: CacheAffinity[K, V] => A): Iterable[A] = cas 87 | .map { ca => 88 | List(ic.affinityCall(ca.cacheName, ca.key, 89 | scala2callable(() => f(ca)))) 90 | } 91 | .reduceOption(Semigroup.listSemigroup[A].plus(_, _)) 92 | .toIterable.flatten 93 | 94 | def flatMapAffinityApply[A, K, V](cas: Iterable[CacheAffinity[K, V]])(f: CacheAffinity[K, V] => TraversableOnce[A]): Iterable[A] = 95 | affinityApply[TraversableOnce[A], K, V](cas)(f).flatten 96 | 97 | def affinityReduceOption[A, K, V](cas: Iterable[CacheAffinity[K, V]])(f: CacheAffinity[K, V] => A)(sg: Semigroup[A]): Option[A] = 98 | affinityApply[A, K, V](cas)(f).reduceOption(sg.plus(_, _)) 99 | 100 | // TODO: size is O(n) for Iterables 101 | def reduceOption[A, B](xs: Iterable[A])(m: A => B)(sg: Semigroup[B]): Option[B] = 102 | xs match { 103 | case Nil => None 104 | case iter => Some( 105 | ic.apply[B, B, A](scala2closure(m), iter.toList.asJavaCollection, 106 | scala2reducer(sg, iter.size))) 107 | } 108 | 109 | // 110 | // the following are currently unused 111 | // 112 | 113 | def broadcastFn[A, B](x: A)(f: A => B): Iterable[B] = 114 | ic.broadcast[B, A](scala2closure(f), x).asScala 115 | 116 | def broadcastRun(x: () => Unit): Unit = 117 | ic.broadcast(scala2runnable(x)) 118 | 119 | def broadcastCall[A](x: () => A): Iterable[A] = 120 | ic.broadcast[A](scala2callable(x)).asScala 121 | 122 | def call[A](x: () => A): A = 123 | ic.call[A](scala2callable(x)) 124 | 125 | def call[A](xs: Iterable[() => A]): Iterable[A] = 126 | xs match { 127 | case Nil => Nil 128 | case iter => ic.call[A](iter.map(scala2callable(_)).asJavaCollection).asScala 129 | } 130 | 131 | def reduceOptionCall[A](xs: Iterable[() => A])(sg: Semigroup[A]): Option[A] = 132 | xs match { 133 | case Nil => None 134 | case iter => Some( 135 | ic.call[A, A](iter.map(scala2callable(_)).toList.asJavaCollection, 136 | scala2reducer(sg, iter.size))) 137 | } 138 | } 139 | -------------------------------------------------------------------------------- /src/main/scala/Reduction.scala: -------------------------------------------------------------------------------- 1 | package ignite.scala 2 | 3 | import com.twitter.algebird.Semigroup 4 | import java.io.Serializable 5 | 6 | /** 7 | * Represents a reduction of Ignite's distributed closure results. 8 | * These are performed on the client and are akin to the join step in fork-join. 9 | */ 10 | sealed trait Reduction[T] extends Serializable { 11 | 12 | /** 13 | * Execute this reduction and return computed value. 14 | */ 15 | def execute: Option[T] 16 | 17 | /** 18 | * Return a IgnitePipe representing the result of the reduction. 19 | * 20 | * Used for continuations. 21 | */ 22 | def toPipe: IgnitePipe[T] 23 | } 24 | 25 | final case object EmptyReduction extends Reduction[Nothing] { 26 | 27 | override def execute = None 28 | 29 | override def toPipe = EmptyPipe 30 | } 31 | 32 | /** 33 | * Represents a reduction containing a computed value 34 | */ 35 | final case class ValueReduction[T](value: T)(implicit val compute: ComputeRunner) 36 | extends Reduction[T] { 37 | 38 | override def execute = Some(value) 39 | 40 | override def toPipe = IgnitePipe.from(List(value))(compute) 41 | } 42 | 43 | object TransformValueReduction { 44 | def from[S, T](tvp: TransformValuePipe[S, T])(passedSg: Semigroup[T]): TransformValueReduction[S, T] = 45 | new TransformValueReduction[S, T] { 46 | override val compute = tvp.compute 47 | override val source = tvp.source 48 | override def transform = tvp.transform 49 | override def sg = passedSg 50 | } 51 | } 52 | 53 | /** 54 | * Represents a reduction performed by first performing 55 | * a transforming computation on the cluster followed by reduction. 56 | */ 57 | sealed abstract class TransformValueReduction[S, T] extends Reduction[T] 58 | with HasComputeConfig[S, T] { 59 | 60 | protected def sg: Semigroup[T] 61 | 62 | override def execute = compute.reduceOption(source)(transform)(sg) 63 | 64 | override def toPipe = ReduceHelper.toPipe[S, T](this) 65 | } 66 | 67 | object CacheAffinityValueReduction { 68 | def from[K, V, T](cap: CacheAffinityPipe[K, V, T])(passedSg: Semigroup[T]): CacheAffinityValueReduction[K, V, T] = 69 | new CacheAffinityValueReduction[K, V, T] { 70 | override val compute = cap.compute 71 | override val source = cap.source 72 | override def transform = cap.transform 73 | override def sg = passedSg 74 | } 75 | } 76 | 77 | /** 78 | * Represents a reduction performed by first performing 79 | * affinity (cache-collocated) computation followed by reduction. 80 | */ 81 | sealed abstract class CacheAffinityValueReduction[K, V, T] extends Reduction[T] 82 | with HasComputeConfig[CacheAffinity[K, V], T] { 83 | 84 | protected def sg: Semigroup[T] 85 | 86 | override def execute = compute.affinityReduceOption(source)(transform)(sg) 87 | 88 | override def toPipe = ReduceHelper.toPipe[CacheAffinity[K, V], T](this) 89 | } 90 | 91 | object FlatMapValueReduction { 92 | 93 | def from[S, T](fvp: FlatMapValuePipe[S, T])(passedSg: Semigroup[T]): FlatMapValueReduction[S, T] = 94 | new FlatMapValueReduction[S, T] { 95 | override val compute = fvp.compute 96 | override val source = fvp.source 97 | override def transform = fvp.transform 98 | override def sg = passedSg 99 | } 100 | } 101 | 102 | /** 103 | * Represents a reduction performed by first performing 104 | * a transforming computation on the cluster followed by 105 | * flattening and reduction (at the client). 106 | */ 107 | sealed abstract class FlatMapValueReduction[S, T] extends Reduction[T] 108 | with HasComputeConfig[S, TraversableOnce[T]] { 109 | 110 | protected def sg: Semigroup[T] 111 | 112 | def execute = compute 113 | .reduceOption(source)(transform.andThen(_.toList))(Semigroup.listSemigroup[T]) 114 | .flatMap(_.reduceOption(sg.plus(_, _))) 115 | 116 | override def toPipe = ReduceHelper.toPipe[S, T](this) 117 | } 118 | 119 | object FlatMapCacheAffinityReduction { 120 | 121 | def from[K, V, T](fcap: FlatMapCacheAffinityPipe[K, V, T])(passedSg: Semigroup[T]): FlatMapCacheAffinityReduction[K, V, T] = 122 | new FlatMapCacheAffinityReduction[K, V, T] { 123 | override val compute = fcap.compute 124 | override val source = fcap.source 125 | override def transform = fcap.transform 126 | override def sg = passedSg 127 | } 128 | } 129 | 130 | /** 131 | * Represents a reduction performed by first performing 132 | * affinity (cache-collocated) computation followed by 133 | * flattening and reduction (at the client). 134 | */ 135 | sealed abstract class FlatMapCacheAffinityReduction[K, V, T] extends Reduction[T] 136 | with HasComputeConfig[CacheAffinity[K, V], TraversableOnce[T]] { 137 | 138 | protected def sg: Semigroup[T] 139 | 140 | def execute = compute 141 | .affinityReduceOption(source)(transform.andThen(_.toList))(Semigroup.listSemigroup[T]) 142 | .flatMap(_.reduceOption(sg.plus(_, _))) 143 | 144 | override def toPipe = ReduceHelper.toPipe[CacheAffinity[K, V], T](this) 145 | } 146 | 147 | final case class MergedReduction[T](left: Reduction[T], 148 | right: Reduction[T])(implicit val sg: Semigroup[T]) 149 | extends Reduction[T] { 150 | 151 | override def execute = 152 | Semigroup.optionSemigroup[T].plus(left.execute, right.execute) 153 | 154 | override def toPipe = MergedPipe(left.toPipe, right.toPipe) 155 | } 156 | -------------------------------------------------------------------------------- /src/main/scala/IgnitePipe.scala: -------------------------------------------------------------------------------- 1 | package ignite.scala 2 | 3 | import com.twitter.algebird.Semigroup 4 | import org.apache.ignite.IgniteCache 5 | import java.io.Serializable 6 | 7 | object IgnitePipe { 8 | 9 | def empty: IgnitePipe[Nothing] = EmptyPipe 10 | 11 | def from[T](iter: Iterable[T])(implicit c: ComputeRunner): IgnitePipe[T] = 12 | IterablePipe[T](iter) 13 | 14 | def from[T](iterGen: () => Iterable[T])(implicit c: ComputeRunner): IgnitePipe[T] = 15 | from(List(())).flatMap(_ => iterGen()) 16 | 17 | def collocated[K, V, T](cache: IgniteCache[K, V], keys: Set[K])(f: (IgniteCache[K, V], K) => T)(implicit c: ComputeRunner): CacheAffinityPipe[K, V, T] = 18 | new CacheAffinityPipe[K, V, T] { 19 | override def compute = c 20 | override def source = keys.map(CacheAffinity[K, V](cache.getName, _)) 21 | override def transform = { ca: CacheAffinity[K, V] => f(cache, ca.key) } 22 | // TODO: this can be inefficient. keyset enrichment should happen in ComputeRunner 23 | } 24 | } 25 | 26 | /** 27 | * Provides composable distributed closures that can run on Apache Ignite. 28 | * 29 | * Allows chaining functions to be executed on the cluster. Reduction is done 30 | * on the client. Note that pipe operations like flattening, filtering are also 31 | * performed on the client after gathering results from the nodes. 32 | * 33 | * Best practice is to push computations to the cluster as much as possible 34 | * and flatten, filter on the client only if the scatter-gather overhead is 35 | * acceptable and results can fit on the client. 36 | */ 37 | sealed trait IgnitePipe[T] extends Serializable { 38 | // TODO: make this covariant 39 | 40 | /** 41 | * Transform each element using the function f. 42 | * 43 | * This is executed on the cluster nodes. Chained map transforms 44 | * are composed and executed once on the cluster nodes. Use .fork 45 | * to manually split the chain if tuning is required. 46 | */ 47 | def map[U](f: T => U): IgnitePipe[U] 48 | 49 | /** 50 | * Transform each value using the function f and flatten the result. 51 | * 52 | * Note: This is not a monadic composition. 53 | * 54 | * Flatten step is performed on the client. If you have a chain of flatMaps, 55 | * all functions in the chain are composed and flattening is performed once 56 | * on the client. 57 | * 58 | * To manually split the flatMap chain, use .fork. Forking is useful when 59 | * dealing with long, lazy chains, or when adding a barrier is desired. 60 | */ 61 | def flatMap[U](f: T => TraversableOnce[U]): IgnitePipe[U] 62 | 63 | /** 64 | * Filter elements using the function f. 65 | * 66 | * Implemented as a flatMap executed on the client. 67 | */ 68 | def filter(f: T => Boolean): IgnitePipe[T] = 69 | flatMap { t => if (f(t)) Iterator(t) else Iterator.empty } 70 | 71 | /** 72 | * Prepare a Reduction based on the provided Semigroup. 73 | * 74 | * Note that results can arrived from cluster nodes in any order, 75 | * so the operation has to be associative and commutative. 76 | */ 77 | def reduce(implicit sg: Semigroup[T]): Reduction[T] 78 | 79 | /** Merge two pipes of the same type*/ 80 | def ++(p: IgnitePipe[T]): IgnitePipe[T] = p match { 81 | case IterablePipe(iter) if iter.isEmpty => this 82 | case _ => MergedPipe(this, p) 83 | } 84 | 85 | /** 86 | * Manually add a fork in the execution chain. 87 | * This creates a barrier, which means the subsequent transforms 88 | * are planned on a fresh Ignite closure. 89 | */ 90 | def fork: IgnitePipe[T] 91 | 92 | /** Execute the chain and return the computed values. */ 93 | def execute: Iterable[T] 94 | } 95 | 96 | final case object EmptyPipe extends IgnitePipe[Nothing] { 97 | 98 | override def map[U](f: Nothing => U) = sys.error("map called on EmptyPipe") 99 | 100 | override def flatMap[U](f: Nothing => TraversableOnce[U]) = 101 | sys.error("flatMap called on EmptyPipe") 102 | 103 | override def reduce(implicit sg: Semigroup[Nothing]) = EmptyReduction 104 | 105 | override def fork = this 106 | 107 | override def execute = Iterable.empty[Nothing] 108 | } 109 | 110 | /** 111 | * Trait for pipes that hold information about 112 | * the cluster along with the source and transform 113 | * for underlying computation. 114 | */ 115 | trait HasComputeConfig[S, T] { 116 | def compute: ComputeRunner 117 | 118 | def source: Iterable[S] 119 | 120 | def transform: S => T 121 | } 122 | 123 | /** 124 | * Represents a transforming computation on the cluster. 125 | */ 126 | sealed abstract class TransformValuePipe[S, T] extends IgnitePipe[T] 127 | with HasComputeConfig[S, T] { 128 | 129 | override def map[U](f: T => U) = PipeHelper.toTransformValuePipe[S, T, U](this)(f) 130 | 131 | override def flatMap[U](f: T => TraversableOnce[U]) = 132 | PipeHelper.toFlatMapValuePipe[S, T, U](this)(f) 133 | 134 | override def reduce(implicit sg: Semigroup[T]) = 135 | TransformValueReduction.from(this)(sg) 136 | 137 | override def fork = PipeHelper.forkPipe(this) 138 | 139 | override def execute = compute.apply(source)(transform) 140 | } 141 | 142 | /** 143 | * Represents a transforming computation on the cluster 144 | * followed by flattening of results done at the client. 145 | */ 146 | sealed abstract class FlatMapValuePipe[S, T] extends IgnitePipe[T] 147 | with HasComputeConfig[S, TraversableOnce[T]] { 148 | 149 | override def map[U](f: T => U) = 150 | PipeHelper.toTransformValuePipe[S, T, U](this)(f) 151 | 152 | override def flatMap[U](f: T => TraversableOnce[U]) = 153 | PipeHelper.toFlatMapValuePipe[S, T, U](this)(f) 154 | 155 | override def reduce(implicit sg: Semigroup[T]) = 156 | FlatMapValueReduction.from(this)(sg) 157 | 158 | override def fork = PipeHelper.forkPipe(this) 159 | 160 | override def execute = compute.flatMapApply[S, T](source)(transform) 161 | } 162 | 163 | /** 164 | * Represents a transforming affinity (cache-collocation) 165 | * computation on the cluster. 166 | */ 167 | sealed abstract class CacheAffinityPipe[K, V, T] extends IgnitePipe[T] 168 | with HasComputeConfig[CacheAffinity[K, V], T] { 169 | 170 | override def map[U](f: T => U) = 171 | PipeHelper.toCacheAffinityPipe[K, V, T, U](this)(f) 172 | 173 | override def flatMap[U](f: T => TraversableOnce[U]) = 174 | PipeHelper.toFlatMapCacheAffinityPipe[K, V, T, U](this)(f) 175 | 176 | override def reduce(implicit sg: Semigroup[T]) = 177 | CacheAffinityValueReduction.from(this)(sg) 178 | 179 | override def fork = PipeHelper.forkPipe(this) 180 | 181 | override def execute = compute.affinityApply(source)(transform) 182 | } 183 | 184 | /** 185 | * Represents a transforming affnity (cache-collocated) 186 | * computation on the cluster followed by flattening of results 187 | * done at the client. 188 | */ 189 | sealed abstract class FlatMapCacheAffinityPipe[K, V, T] extends IgnitePipe[T] 190 | with HasComputeConfig[CacheAffinity[K, V], TraversableOnce[T]] { 191 | 192 | override def map[U](f: T => U) = 193 | PipeHelper.toCacheAffinityPipe[K, V, T, U](this)(f) 194 | 195 | override def flatMap[U](f: T => TraversableOnce[U]) = 196 | PipeHelper.toFlatMapCacheAffinityPipe[K, V, T, U](this)(f) 197 | 198 | override def reduce(implicit sg: Semigroup[T]) = 199 | FlatMapCacheAffinityReduction.from(this)(sg) 200 | 201 | override def fork = PipeHelper.forkPipe(this) 202 | 203 | override def execute = compute.flatMapAffinityApply(source)(transform) 204 | } 205 | 206 | final case class MergedPipe[T](left: IgnitePipe[T], right: IgnitePipe[T]) 207 | extends IgnitePipe[T] { 208 | 209 | override def map[U](f: T => U) = 210 | MergedPipe(left.map(f), right.map(f)) 211 | 212 | override def flatMap[U](f: T => TraversableOnce[U]) = 213 | MergedPipe(left.flatMap(f), right.flatMap(f)) 214 | 215 | override def reduce(implicit sg: Semigroup[T]) = 216 | MergedReduction(left.reduce, right.reduce) 217 | 218 | override def fork = this 219 | 220 | override def execute = left.execute ++ right.execute 221 | } 222 | 223 | /** 224 | * A pipe containing a sequence of values. 225 | * 226 | * Can be generally used as the starting point in the execution chain. The sequence is 227 | * partitioned and load balanced across the cluster nodes. 228 | */ 229 | final case class IterablePipe[T](iter: Iterable[T])(implicit val compute: ComputeRunner) 230 | extends IgnitePipe[T] { 231 | 232 | override def map[U](f: T => U) = PipeHelper.toTransformValuePipe[T, U](this)(f) 233 | 234 | override def flatMap[U](f: T => TraversableOnce[U]) = 235 | PipeHelper.toFlatMapValuePipe[T, U](this)(f) 236 | 237 | override def reduce(implicit sg: Semigroup[T]) = 238 | ValueReduction(iter.reduce(sg.plus(_, _)))(compute) 239 | 240 | override def fork = this 241 | 242 | override def execute = iter 243 | } 244 | 245 | /** 246 | * Helpers for switching betweeen IgnitePipe types. 247 | */ 248 | private object PipeHelper { 249 | 250 | def toTransformValuePipe[T, U](ip: IterablePipe[T])(f: T => U): TransformValuePipe[T, U] = 251 | new TransformValuePipe[T, U] { 252 | override val compute = ip.compute 253 | override val source = ip.iter 254 | override def transform = f 255 | } 256 | 257 | def toTransformValuePipe[S, T, U](tvp: TransformValuePipe[S, T])(f: T => U): TransformValuePipe[S, U] = 258 | new TransformValuePipe[S, U] { 259 | override val compute = tvp.compute 260 | override val source = tvp.source 261 | override def transform = tvp.transform.andThen(f) 262 | } 263 | 264 | def toFlatMapValuePipe[S, T, U](tvp: TransformValuePipe[S, T])(f: T => TraversableOnce[U]): FlatMapValuePipe[S, U] = 265 | new FlatMapValuePipe[S, U] { 266 | override val compute = tvp.compute 267 | override val source = tvp.source 268 | override def transform = tvp.transform.andThen(f) 269 | } 270 | 271 | def toFlatMapValuePipe[S, T, U](fvp: FlatMapValuePipe[S, T])(f: T => TraversableOnce[U]): FlatMapValuePipe[S, U] = 272 | new FlatMapValuePipe[S, U] { 273 | override val compute = fvp.compute 274 | override val source = fvp.source 275 | override def transform = fvp.transform.andThen(_.map(f)).andThen(_.flatten) 276 | } 277 | 278 | def toFlatMapValuePipe[T, U](ip: IterablePipe[T])(f: T => TraversableOnce[U]): FlatMapValuePipe[T, U] = 279 | new FlatMapValuePipe[T, U] { 280 | override val compute = ip.compute 281 | override val source = ip.iter 282 | override def transform = f 283 | } 284 | 285 | // this adds a barrier. the supplied function f is executed 286 | // on the cluster only after flatten step of the input pipe is 287 | // executed on the client 288 | def toTransformValuePipe[S, T, U](fvp: FlatMapValuePipe[S, T])(f: T => U): TransformValuePipe[T, U] = 289 | IterablePipe(fvp.execute)(fvp.compute).map(f) 290 | 291 | def toCacheAffinityPipe[K, V, T, U](cap: CacheAffinityPipe[K, V, T])(f: T => U): CacheAffinityPipe[K, V, U] = 292 | new CacheAffinityPipe[K, V, U] { 293 | override val compute = cap.compute 294 | override val source = cap.source 295 | override def transform = cap.transform.andThen(f) 296 | } 297 | 298 | // this adds a barrier similar to the non-affinity version 299 | def toCacheAffinityPipe[K, V, T, U](fcap: FlatMapCacheAffinityPipe[K, V, T])(f: T => U): TransformValuePipe[T, U] = 300 | IterablePipe(fcap.execute)(fcap.compute).map(f) 301 | 302 | def toFlatMapCacheAffinityPipe[K, V, T, U](cap: CacheAffinityPipe[K, V, T])(f: T => TraversableOnce[U]): FlatMapCacheAffinityPipe[K, V, U] = 303 | new FlatMapCacheAffinityPipe[K, V, U] { 304 | override val compute = cap.compute 305 | override val source = cap.source 306 | override def transform = cap.transform.andThen(f) 307 | } 308 | 309 | def toFlatMapCacheAffinityPipe[K, V, T, U](fcap: FlatMapCacheAffinityPipe[K, V, T])(f: T => TraversableOnce[U]): FlatMapCacheAffinityPipe[K, V, U] = 310 | new FlatMapCacheAffinityPipe[K, V, U] { 311 | override val compute = fcap.compute 312 | override val source = fcap.source 313 | override def transform = fcap.transform.andThen(_.map(f)).andThen(_.flatten) 314 | } 315 | 316 | def forkPipe[S, T](tvp: TransformValuePipe[S, T]): IgnitePipe[T] = 317 | IgnitePipe.from(() => tvp.execute)(tvp.compute) 318 | 319 | def forkPipe[S, T](fvp: FlatMapValuePipe[S, T]): IgnitePipe[T] = 320 | IgnitePipe.from(() => fvp.execute)(fvp.compute) 321 | 322 | def forkPipe[K, V, T](cap: CacheAffinityPipe[K, V, T]): IgnitePipe[T] = 323 | IgnitePipe.from(() => cap.execute)(cap.compute) 324 | 325 | def forkPipe[K, V, T](fcap: FlatMapCacheAffinityPipe[K, V, T]): IgnitePipe[T] = 326 | IgnitePipe.from(() => fcap.execute)(fcap.compute) 327 | } 328 | 329 | object ReduceHelper { 330 | // creates a pipe representing the result of the reduction 331 | def toPipe[S, T](r: Reduction[T] with HasComputeConfig[S, _]): IgnitePipe[T] = 332 | new TransformValuePipe[T, T] { 333 | override val compute = r.compute 334 | override def source = r.execute.toIterable 335 | override def transform = identity 336 | } 337 | } 338 | --------------------------------------------------------------------------------