├── project
    ├── build.properties
    └── plugins.sbt
├── .gitignore
├── LICENSE
├── src
    ├── test
    │   └── scala
    │   │   └── ClusterTests.scala
    └── main
    │   └── scala
    │       ├── ComputeRunner.scala
    │       ├── Reduction.scala
    │       └── IgnitePipe.scala
└── README.md


/project/build.properties:
--------------------------------------------------------------------------------
1 | sbt.version=0.13.8
2 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | target
2 | project/target
3 | project/project/target
4 | *.swp
5 | 


--------------------------------------------------------------------------------
/project/plugins.sbt:
--------------------------------------------------------------------------------
1 | resolvers += Resolver.sonatypeRepo("snapshots")
2 | resolvers += Resolver.sonatypeRepo("releases")
3 | 
4 | addSbtPlugin("org.xerial.sbt" % "sbt-sonatype" % "0.2.1")
5 | addSbtPlugin("com.typesafe.sbt" % "sbt-pgp" % "0.8.3")
6 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2015 Ruban Monu
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining
 4 | a copy of this software and associated documentation files (the
 5 | "Software"), to deal in the Software without restriction, including
 6 | without limitation the rights to use, copy, modify, merge, publish,
 7 | distribute, sublicense, and/or sell copies of the Software, and to
 8 | permit persons to whom the Software is furnished to do so, subject to
 9 | the following conditions:
10 | 
11 | The above copyright notice and this permission notice shall be
12 | included in all copies or substantial portions of the Software.
13 | 
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 | 


--------------------------------------------------------------------------------
/src/test/scala/ClusterTests.scala:
--------------------------------------------------------------------------------
 1 | package ignite.scala
 2 | 
 3 | import com.twitter.algebird.Semigroup
 4 | import org.apache.ignite._
 5 | import org.apache.ignite.cache.affinity._
 6 | import org.apache.ignite.configuration._
 7 | import org.apache.ignite.marshaller.optimized.OptimizedMarshaller
 8 | import org.scalatest.{ FunSpec, BeforeAndAfterAll }
 9 | import ignite.scala._
10 | 
11 | class ClusterTests extends FunSpec with BeforeAndAfterAll {
12 | 
13 |   val marsh = new OptimizedMarshaller
14 |   marsh.setRequireSerializable(false) // for closures with non serializable objects
15 |   val cfg = new IgniteConfiguration
16 |   cfg.setMarshaller(marsh)
17 |   cfg.setPeerClassLoadingEnabled(true)
18 | 
19 |   val ignite = Ignition.start(cfg)
20 |   val cluster = ignite.cluster
21 |   val compute = ignite.compute(cluster)
22 | 
23 |   implicit val cr = ComputeRunner(compute)
24 | 
25 |   override def afterAll() {
26 |     ignite.close()
27 |   }
28 | 
29 |   describe("character count test") {
30 |     implicit val sg = Semigroup.intSemigroup
31 | 
32 |     it ("should execute") {
33 | 
34 |       val count = IgnitePipe.from("apple pie".split(" "))
35 |         .map(_.length)
36 |         .reduce
37 |         .execute
38 | 
39 |       assert(Some(8) == count)
40 |     }
41 | 
42 |     it ("should execute merge") {
43 |       val pipe1 = IgnitePipe.from(Seq(1, 2, 3)).map(identity)
44 |       val pipe2 = IgnitePipe.from(Seq(4, 5, 6)).map(identity)
45 |       // .map here forces the closure execution to cluster nodes
46 | 
47 |       val sum = (pipe1 ++ pipe2)
48 |         .reduce
49 |         .execute
50 | 
51 |       assert(Some(21) == sum)
52 |     }
53 |   }
54 | 
55 |   describe("cache put and get test") {
56 |     val cachecfg = new CacheConfiguration[Int, Seq[String]]
57 |     cachecfg.setName("cache-test")
58 | 
59 |     it ("should execute") {
60 |       val cache = ignite.getOrCreateCache(cachecfg)
61 |       cache.put(1, Seq("one", "ek"))
62 |       cache.put(2, Seq("two", "do"))
63 |       cache.put(3, Seq("three", "teen"))
64 | 
65 |       val data = IgnitePipe.from(Seq(1, 2, 3))
66 |         .map { k =>
67 |           cache.get(k)
68 |         }
69 |         .flatMap(identity)
70 |         .execute
71 | 
72 |       assert(Set("one", "two", "three", "ek", "do", "teen") == data.toSet)
73 |     }
74 | 
75 |     it ("should execute collocated") {
76 |       // re-uses existing cache
77 |       val cache = ignite.getOrCreateCache(cachecfg)
78 |       val data = IgnitePipe
79 |         .collocated(cache, Set(1, 2, 3)) { (c, k) =>
80 |           c.localPeek(k)
81 |         }
82 |         .flatMap(identity)
83 |         .execute
84 | 
85 |       assert(Set("one", "two", "three", "ek", "do", "teen") == data.toSet)
86 |     }
87 |   }
88 | }
89 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # ignite scala
  2 | 
  3 | Scala API for distributed closures on [Apache Ignite](https://ignite.incubator.apache.org/). Inspired by [Scalding](https://github.com/twitter/scalding/).
  4 | 
  5 | http://apacheignite.readme.io/v1.0/docs/distributed-closures
  6 | 
  7 | #### example 0 - cluster setup
  8 | ```scala
  9 | import org.apache.ignite._
 10 | import org.apache.ignite.configuration._
 11 | import ignite.scala._
 12 | 
 13 | val cfg = new IgniteConfiguration // configure as appropriate
 14 | val ignite = Ignition.start(cfg)
 15 | val compute = ignite.compute(ignite.cluster)
 16 | implicit val cr = ComputeRunner(compute)
 17 | ```
 18 | #### example 1 - character count
 19 | ```scala
 20 | import com.twitter.algebird.Semigroup
 21 | 
 22 | implicit val sg = Semigroup.intSemigroup
 23 | 
 24 | val chain = IgnitePipe.from("The quick brown fox jumps over the lazy dog.".split(" "))
 25 |   .map(_.length) // fork
 26 |   .reduce // join
 27 | 
 28 | chain.execute // Option[Int]
 29 | ```
 30 | #### example 2 - working with distributed cache
 31 | ```scala
 32 | val cache = {
 33 |   val cfg = new CacheConfiguration[K, V]
 34 |   cfg.setName(name)
 35 |   ignite.getOrCreateCache(cfg)
 36 | }
 37 | 
 38 | val process: V => R // computation
 39 | val isValid: R => Boolean
 40 | 
 41 | IgnitePipe.from(keys)
 42 |   .map { k =>
 43 |     val v = cache.get(k)
 44 |     process(v)
 45 |   }
 46 |   .filter(isValid)
 47 |   .execute // Iterable[R]
 48 | ```
 49 | #### example 3 - collocating compute with cache
 50 | Ignite allows routing computations to the nodes where data is cached.
 51 | ```scala
 52 | IgnitePipe.collocated(cache, keys) { (c, k) =>
 53 |   val v = c.localPeek(k)
 54 |   process(v)
 55 | }
 56 | .filter(isValid)
 57 | .execute
 58 | ```
 59 | #### example 4 - more chaining
 60 | ```scala
 61 | val cache: IgniteCache[K, V]
 62 | val db: CacheJdbcBlobStore[K, V]
 63 | 
 64 | val cacheResults = IgnitePipe.from(keys)
 65 |   .map { k => cacheGetAndCompute(cache, k) }
 66 | 
 67 | val dbResults = IgnitePipe.from(keys)
 68 |   .map { k => dbGetAndCompute(db, k) }
 69 |   
 70 | val combined = (cacheResults ++ dbResults)
 71 |   .reduce // reduction could be to consolidate cache and db for instance
 72 |   .toPipe // continuation
 73 |   
 74 | combined.map { exportResults(_) }.execute
 75 | ```
 76 | 
 77 | #### installing
 78 | 
 79 | Add the following to your build.sbt (fetches from sonatype)
 80 | ```scala
 81 | resolvers += Resolver.sonatypeRepo("releases")
 82 | libraryDependencies += "com.github.rubanm" %% "ignite-scala" % "0.0.1"
 83 | ```
 84 | #### core api
 85 | 
 86 | ```scala
 87 | /* Provides composable distributed closures that can run on Apache Ignite. */
 88 | trait IgnitePipe[T] {
 89 |   
 90 |   def map[U](f: T => U): IgnitePipe[U]
 91 |   
 92 |   def flatMap[U](f: T => TraversableOnce[U]): IgnitePipe[U]
 93 |   
 94 |   def ++(p: IgnitePipe[T]): IgnitePipe[T]
 95 |   
 96 |   def reduce(implicit sg: Semigroup[T]): Reduction[T]
 97 |   
 98 |   def execute: Iterable[T]
 99 | }
100 | 
101 | /* Represents a reduction of the distributed closure results.*/
102 | trait Reduction[T] {
103 | 
104 |   def execute: Option[T]
105 | 
106 |   def toPipe: IgnitePipe[T]
107 | }
108 | ```
109 | 


--------------------------------------------------------------------------------
/src/main/scala/ComputeRunner.scala:
--------------------------------------------------------------------------------
  1 | package ignite.scala
  2 | 
  3 | import com.twitter.algebird.Semigroup
  4 | import com.twitter.logging.Logger
  5 | import org.apache.ignite._
  6 | import org.apache.ignite.lang._
  7 | import scala.collection.JavaConverters._
  8 | 
  9 | /**
 10 |  * Basic accumulating version of IgniteReducer.
 11 |  *
 12 |  * Values are summed in a local var as they are received
 13 |  * from closure computations on the cluster as defined by
 14 |  * the supplied Semigroup.
 15 |  */
 16 | class AccumulatingReducer[A](sg: Semigroup[A], n: Int)
 17 |     extends IgniteReducer[A, A] {
 18 | 
 19 |   require(n > 0)
 20 | 
 21 |   private[this] val log = Logger.get(getClass)
 22 |   private[this] object Lock
 23 | 
 24 |   private[this] var collected = Option.empty[A]
 25 |   private[this] var count = 0
 26 | 
 27 |   override def collect(a: A): Boolean = Lock.synchronized {
 28 |     log.debug(s"Collecting value $a")
 29 |     if (count == n)
 30 |       sys.error(s"Buffer overflow. Already reached size $count")
 31 |     else {
 32 |       collected match {
 33 |         case None => collected = Some(a)
 34 |         case Some(b) => collected = Some(sg.plus(b, a))
 35 |       }
 36 |       count = count + 1
 37 |       if (count == n) false
 38 |       else true
 39 |     }
 40 |   }
 41 | 
 42 |   override def reduce: A = collected match {
 43 |     case None => sys.error("No values collected by reducer")
 44 |     case Some(c) => c
 45 |   }
 46 | }
 47 | 
 48 | /**
 49 |  * Conversions from Scala function to Ignite's closure classes.
 50 |  */
 51 | private object IgniteClosureConversions {
 52 | 
 53 |   def scala2closure[A, B](f: Function1[A, B]): IgniteClosure[A, B] =
 54 |     new IgniteClosure[A, B]() { override def apply(a: A) = f(a) }
 55 | 
 56 |   def scala2runnable[A](f: () => Unit): IgniteRunnable =
 57 |     new IgniteRunnable() { override def run = f() }
 58 | 
 59 |   def scala2callable[A](f: () => A): IgniteCallable[A] =
 60 |     new IgniteCallable[A]() { override def call = f() }
 61 | 
 62 |   def scala2reducer[A](sg: Semigroup[A], n: Int): IgniteReducer[A, A] =
 63 |     new AccumulatingReducer[A](sg, n)
 64 | }
 65 | 
 66 | final case class CacheAffinity[K, V](cacheName: String, key: K)
 67 | 
 68 | /**
 69 |  * Provides Scala-friendly api to IgniteCompute class.
 70 |  */
 71 | final case class ComputeRunner(ic: IgniteCompute) {
 72 |   import IgniteClosureConversions._
 73 | 
 74 |   def apply[A, B](x: A)(f: A => B): B =
 75 |     ic.apply[B, A](scala2closure(f), x)
 76 | 
 77 |   def apply[A, B](xs: Iterable[A])(f: A => B): Iterable[B] =
 78 |     xs match {
 79 |       case Nil => Nil // ignite does not handle empty iterables correctly
 80 |       case iter => ic.apply[A, B](scala2closure(f), iter.asJavaCollection).asScala
 81 |     }
 82 | 
 83 |   def flatMapApply[A, B](xs: Iterable[A])(f: A => TraversableOnce[B]): Iterable[B] =
 84 |     apply[A, TraversableOnce[B]](xs)(f).flatten
 85 | 
 86 |   def affinityApply[A, K, V](cas: Iterable[CacheAffinity[K, V]])(f: CacheAffinity[K, V] => A): Iterable[A] = cas
 87 |     .map { ca =>
 88 |       List(ic.affinityCall(ca.cacheName, ca.key,
 89 |         scala2callable(() => f(ca))))
 90 |     }
 91 |     .reduceOption(Semigroup.listSemigroup[A].plus(_, _))
 92 |     .toIterable.flatten
 93 | 
 94 |   def flatMapAffinityApply[A, K, V](cas: Iterable[CacheAffinity[K, V]])(f: CacheAffinity[K, V] => TraversableOnce[A]): Iterable[A] =
 95 |     affinityApply[TraversableOnce[A], K, V](cas)(f).flatten
 96 | 
 97 |   def affinityReduceOption[A, K, V](cas: Iterable[CacheAffinity[K, V]])(f: CacheAffinity[K, V] => A)(sg: Semigroup[A]): Option[A] =
 98 |     affinityApply[A, K, V](cas)(f).reduceOption(sg.plus(_, _))
 99 | 
100 |   // TODO: size is O(n) for Iterables
101 |   def reduceOption[A, B](xs: Iterable[A])(m: A => B)(sg: Semigroup[B]): Option[B] =
102 |     xs match {
103 |       case Nil => None
104 |       case iter => Some(
105 |         ic.apply[B, B, A](scala2closure(m), iter.toList.asJavaCollection,
106 |           scala2reducer(sg, iter.size)))
107 |     }
108 | 
109 |   //
110 |   // the following are currently unused
111 |   //
112 | 
113 |   def broadcastFn[A, B](x: A)(f: A => B): Iterable[B] =
114 |     ic.broadcast[B, A](scala2closure(f), x).asScala
115 | 
116 |   def broadcastRun(x: () => Unit): Unit =
117 |     ic.broadcast(scala2runnable(x))
118 | 
119 |   def broadcastCall[A](x: () => A): Iterable[A] =
120 |     ic.broadcast[A](scala2callable(x)).asScala
121 | 
122 |   def call[A](x: () => A): A =
123 |     ic.call[A](scala2callable(x))
124 | 
125 |   def call[A](xs: Iterable[() => A]): Iterable[A] =
126 |     xs match {
127 |       case Nil => Nil
128 |       case iter => ic.call[A](iter.map(scala2callable(_)).asJavaCollection).asScala
129 |     }
130 | 
131 |   def reduceOptionCall[A](xs: Iterable[() => A])(sg: Semigroup[A]): Option[A] =
132 |     xs match {
133 |       case Nil => None
134 |       case iter => Some(
135 |         ic.call[A, A](iter.map(scala2callable(_)).toList.asJavaCollection,
136 |           scala2reducer(sg, iter.size)))
137 |     }
138 | }
139 | 


--------------------------------------------------------------------------------
/src/main/scala/Reduction.scala:
--------------------------------------------------------------------------------
  1 | package ignite.scala
  2 | 
  3 | import com.twitter.algebird.Semigroup
  4 | import java.io.Serializable
  5 | 
  6 | /**
  7 |  * Represents a reduction of Ignite's distributed closure results.
  8 |  * These are performed on the client and are akin to the join step in fork-join.
  9 |  */
 10 | sealed trait Reduction[T] extends Serializable {
 11 | 
 12 |   /**
 13 |    * Execute this reduction and return computed value.
 14 |    */
 15 |   def execute: Option[T]
 16 | 
 17 |   /**
 18 |    * Return a IgnitePipe representing the result of the reduction.
 19 |    *
 20 |    * Used for continuations.
 21 |    */
 22 |   def toPipe: IgnitePipe[T]
 23 | }
 24 | 
 25 | final case object EmptyReduction extends Reduction[Nothing] {
 26 | 
 27 |   override def execute = None
 28 | 
 29 |   override def toPipe = EmptyPipe
 30 | }
 31 | 
 32 | /**
 33 |  * Represents a reduction containing a computed value
 34 |  */
 35 | final case class ValueReduction[T](value: T)(implicit val compute: ComputeRunner)
 36 |   extends Reduction[T] {
 37 | 
 38 |   override def execute = Some(value)
 39 | 
 40 |   override def toPipe = IgnitePipe.from(List(value))(compute)
 41 | }
 42 | 
 43 | object TransformValueReduction {
 44 |   def from[S, T](tvp: TransformValuePipe[S, T])(passedSg: Semigroup[T]): TransformValueReduction[S, T] =
 45 |     new TransformValueReduction[S, T] {
 46 |       override val compute = tvp.compute
 47 |       override val source = tvp.source
 48 |       override def transform = tvp.transform
 49 |       override def sg = passedSg
 50 |     }
 51 | }
 52 | 
 53 | /**
 54 |  * Represents a reduction performed by first performing
 55 |  * a transforming computation on the cluster followed by reduction.
 56 |  */
 57 | sealed abstract class TransformValueReduction[S, T] extends Reduction[T]
 58 |   with HasComputeConfig[S, T] {
 59 | 
 60 |   protected def sg: Semigroup[T]
 61 | 
 62 |   override def execute = compute.reduceOption(source)(transform)(sg)
 63 | 
 64 |   override def toPipe = ReduceHelper.toPipe[S, T](this)
 65 | }
 66 | 
 67 | object CacheAffinityValueReduction {
 68 |   def from[K, V, T](cap: CacheAffinityPipe[K, V, T])(passedSg: Semigroup[T]): CacheAffinityValueReduction[K, V, T] =
 69 |     new CacheAffinityValueReduction[K, V, T] {
 70 |       override val compute = cap.compute
 71 |       override val source = cap.source
 72 |       override def transform = cap.transform
 73 |       override def sg = passedSg
 74 |     }
 75 | }
 76 | 
 77 | /**
 78 |  * Represents a reduction performed by first performing
 79 |  * affinity (cache-collocated) computation followed by reduction.
 80 |  */
 81 | sealed abstract class CacheAffinityValueReduction[K, V, T] extends Reduction[T]
 82 |   with HasComputeConfig[CacheAffinity[K, V], T] {
 83 | 
 84 |   protected def sg: Semigroup[T]
 85 | 
 86 |   override def execute = compute.affinityReduceOption(source)(transform)(sg)
 87 | 
 88 |   override def toPipe = ReduceHelper.toPipe[CacheAffinity[K, V], T](this)
 89 | }
 90 | 
 91 | object FlatMapValueReduction {
 92 | 
 93 |   def from[S, T](fvp: FlatMapValuePipe[S, T])(passedSg: Semigroup[T]): FlatMapValueReduction[S, T] =
 94 |     new FlatMapValueReduction[S, T] {
 95 |       override val compute = fvp.compute
 96 |       override val source = fvp.source
 97 |       override def transform = fvp.transform
 98 |       override def sg = passedSg
 99 |     }
100 | }
101 | 
102 | /**
103 |  * Represents a reduction performed by first performing
104 |  * a transforming computation on the cluster followed by
105 |  * flattening and reduction (at the client).
106 |  */
107 | sealed abstract class FlatMapValueReduction[S, T] extends Reduction[T]
108 |   with HasComputeConfig[S, TraversableOnce[T]] {
109 | 
110 |   protected def sg: Semigroup[T]
111 | 
112 |   def execute = compute
113 |     .reduceOption(source)(transform.andThen(_.toList))(Semigroup.listSemigroup[T])
114 |     .flatMap(_.reduceOption(sg.plus(_, _)))
115 | 
116 |   override def toPipe = ReduceHelper.toPipe[S, T](this)
117 | }
118 | 
119 | object FlatMapCacheAffinityReduction {
120 | 
121 |   def from[K, V, T](fcap: FlatMapCacheAffinityPipe[K, V, T])(passedSg: Semigroup[T]): FlatMapCacheAffinityReduction[K, V, T] =
122 |     new FlatMapCacheAffinityReduction[K, V, T] {
123 |       override val compute = fcap.compute
124 |       override val source = fcap.source
125 |       override def transform = fcap.transform
126 |       override def sg = passedSg
127 |     }
128 | }
129 | 
130 | /**
131 |  * Represents a reduction performed by first performing
132 |  * affinity (cache-collocated) computation followed by
133 |  * flattening and reduction (at the client).
134 |  */
135 | sealed abstract class FlatMapCacheAffinityReduction[K, V, T] extends Reduction[T]
136 |   with HasComputeConfig[CacheAffinity[K, V], TraversableOnce[T]] {
137 | 
138 |   protected def sg: Semigroup[T]
139 | 
140 |   def execute = compute
141 |     .affinityReduceOption(source)(transform.andThen(_.toList))(Semigroup.listSemigroup[T])
142 |     .flatMap(_.reduceOption(sg.plus(_, _)))
143 | 
144 |   override def toPipe = ReduceHelper.toPipe[CacheAffinity[K, V], T](this)
145 | }
146 | 
147 | final case class MergedReduction[T](left: Reduction[T],
148 |   right: Reduction[T])(implicit val sg: Semigroup[T])
149 |   extends Reduction[T] {
150 | 
151 |   override def execute =
152 |     Semigroup.optionSemigroup[T].plus(left.execute, right.execute)
153 | 
154 |   override def toPipe = MergedPipe(left.toPipe, right.toPipe)
155 | }
156 | 


--------------------------------------------------------------------------------
/src/main/scala/IgnitePipe.scala:
--------------------------------------------------------------------------------
  1 | package ignite.scala
  2 | 
  3 | import com.twitter.algebird.Semigroup
  4 | import org.apache.ignite.IgniteCache
  5 | import java.io.Serializable
  6 | 
  7 | object IgnitePipe {
  8 | 
  9 |   def empty: IgnitePipe[Nothing] = EmptyPipe
 10 | 
 11 |   def from[T](iter: Iterable[T])(implicit c: ComputeRunner): IgnitePipe[T] =
 12 |     IterablePipe[T](iter)
 13 | 
 14 |   def from[T](iterGen: () => Iterable[T])(implicit c: ComputeRunner): IgnitePipe[T] =
 15 |     from(List(())).flatMap(_ => iterGen())
 16 | 
 17 |   def collocated[K, V, T](cache: IgniteCache[K, V], keys: Set[K])(f: (IgniteCache[K, V], K) => T)(implicit c: ComputeRunner): CacheAffinityPipe[K, V, T] =
 18 |     new CacheAffinityPipe[K, V, T] {
 19 |       override def compute = c
 20 |       override def source = keys.map(CacheAffinity[K, V](cache.getName, _))
 21 |       override def transform = { ca: CacheAffinity[K, V] => f(cache, ca.key) }
 22 |       // TODO: this can be inefficient. keyset enrichment should happen in ComputeRunner
 23 |     }
 24 | }
 25 | 
 26 | /**
 27 |  * Provides composable distributed closures that can run on Apache Ignite.
 28 |  *
 29 |  * Allows chaining functions to be executed on the cluster. Reduction is done
 30 |  * on the client. Note that pipe operations like flattening, filtering are also
 31 |  * performed on the client after gathering results from the nodes.
 32 |  *
 33 |  * Best practice is to push computations to the cluster as much as possible
 34 |  * and flatten, filter on the client only if the scatter-gather overhead is
 35 |  * acceptable and results can fit on the client.
 36 |  */
 37 | sealed trait IgnitePipe[T] extends Serializable {
 38 |   // TODO: make this covariant
 39 | 
 40 |   /**
 41 |    * Transform each element using the function f.
 42 |    *
 43 |    * This is executed on the cluster nodes. Chained map transforms
 44 |    * are composed and executed once on the cluster nodes. Use .fork
 45 |    * to manually split the chain if tuning is required.
 46 |    */
 47 |   def map[U](f: T => U): IgnitePipe[U]
 48 | 
 49 |   /**
 50 |    * Transform each value using the function f and flatten the result.
 51 |    *
 52 |    * Note: This is not a monadic composition.
 53 |    *
 54 |    * Flatten step is performed on the client. If you have a chain of flatMaps,
 55 |    * all functions in the chain are composed and flattening is performed once
 56 |    * on the client.
 57 |    *
 58 |    * To manually split the flatMap chain, use .fork. Forking is useful when
 59 |    * dealing with long, lazy chains, or when adding a barrier is desired.
 60 |    */
 61 |   def flatMap[U](f: T => TraversableOnce[U]): IgnitePipe[U]
 62 | 
 63 |   /**
 64 |    * Filter elements using the function f.
 65 |    *
 66 |    * Implemented as a flatMap executed on the client.
 67 |    */
 68 |   def filter(f: T => Boolean): IgnitePipe[T] =
 69 |     flatMap { t => if (f(t)) Iterator(t) else Iterator.empty }
 70 | 
 71 |   /**
 72 |    * Prepare a Reduction based on the provided Semigroup.
 73 |    *
 74 |    * Note that results can arrived from cluster nodes in any order,
 75 |    * so the operation has to be associative and commutative.
 76 |    */
 77 |   def reduce(implicit sg: Semigroup[T]): Reduction[T]
 78 | 
 79 |   /** Merge two pipes of the same type*/
 80 |   def ++(p: IgnitePipe[T]): IgnitePipe[T] = p match {
 81 |     case IterablePipe(iter) if iter.isEmpty => this
 82 |     case _ => MergedPipe(this, p)
 83 |   }
 84 | 
 85 |   /**
 86 |    * Manually add a fork in the execution chain.
 87 |    * This creates a barrier, which means the subsequent transforms
 88 |    * are planned on a fresh Ignite closure.
 89 |    */
 90 |   def fork: IgnitePipe[T]
 91 | 
 92 |   /** Execute the chain and return the computed values. */
 93 |   def execute: Iterable[T]
 94 | }
 95 | 
 96 | final case object EmptyPipe extends IgnitePipe[Nothing] {
 97 | 
 98 |   override def map[U](f: Nothing => U) = sys.error("map called on EmptyPipe")
 99 | 
100 |   override def flatMap[U](f: Nothing => TraversableOnce[U]) =
101 |     sys.error("flatMap called on EmptyPipe")
102 | 
103 |   override def reduce(implicit sg: Semigroup[Nothing]) = EmptyReduction
104 | 
105 |   override def fork = this
106 | 
107 |   override def execute = Iterable.empty[Nothing]
108 | }
109 | 
110 | /**
111 |  * Trait for pipes that hold information about
112 |  * the cluster along with the source and transform
113 |  * for underlying computation.
114 |  */
115 | trait HasComputeConfig[S, T] {
116 |   def compute: ComputeRunner
117 | 
118 |   def source: Iterable[S]
119 | 
120 |   def transform: S => T
121 | }
122 | 
123 | /**
124 |  * Represents a transforming computation on the cluster.
125 |  */
126 | sealed abstract class TransformValuePipe[S, T] extends IgnitePipe[T]
127 |   with HasComputeConfig[S, T] {
128 | 
129 |   override def map[U](f: T => U) = PipeHelper.toTransformValuePipe[S, T, U](this)(f)
130 | 
131 |   override def flatMap[U](f: T => TraversableOnce[U]) =
132 |     PipeHelper.toFlatMapValuePipe[S, T, U](this)(f)
133 | 
134 |   override def reduce(implicit sg: Semigroup[T]) =
135 |     TransformValueReduction.from(this)(sg)
136 | 
137 |   override def fork = PipeHelper.forkPipe(this)
138 | 
139 |   override def execute = compute.apply(source)(transform)
140 | }
141 | 
142 | /**
143 |  * Represents a transforming computation on the cluster
144 |  * followed by flattening of results done at the client.
145 |  */
146 | sealed abstract class FlatMapValuePipe[S, T] extends IgnitePipe[T]
147 |   with HasComputeConfig[S, TraversableOnce[T]] {
148 | 
149 |   override def map[U](f: T => U) =
150 |     PipeHelper.toTransformValuePipe[S, T, U](this)(f)
151 | 
152 |   override def flatMap[U](f: T => TraversableOnce[U]) =
153 |     PipeHelper.toFlatMapValuePipe[S, T, U](this)(f)
154 | 
155 |   override def reduce(implicit sg: Semigroup[T]) =
156 |     FlatMapValueReduction.from(this)(sg)
157 | 
158 |   override def fork = PipeHelper.forkPipe(this)
159 | 
160 |   override def execute = compute.flatMapApply[S, T](source)(transform)
161 | }
162 | 
163 | /**
164 |  * Represents a transforming affinity (cache-collocation)
165 |  * computation on the cluster.
166 |  */
167 | sealed abstract class CacheAffinityPipe[K, V, T] extends IgnitePipe[T]
168 |   with HasComputeConfig[CacheAffinity[K, V], T] {
169 | 
170 |   override def map[U](f: T => U) =
171 |     PipeHelper.toCacheAffinityPipe[K, V, T, U](this)(f)
172 | 
173 |   override def flatMap[U](f: T => TraversableOnce[U]) =
174 |     PipeHelper.toFlatMapCacheAffinityPipe[K, V, T, U](this)(f)
175 | 
176 |   override def reduce(implicit sg: Semigroup[T]) =
177 |     CacheAffinityValueReduction.from(this)(sg)
178 | 
179 |   override def fork = PipeHelper.forkPipe(this)
180 | 
181 |   override def execute = compute.affinityApply(source)(transform)
182 | }
183 | 
184 | /**
185 |  * Represents a transforming affnity (cache-collocated)
186 |  * computation on the cluster followed by flattening of results
187 |  * done at the client.
188 |  */
189 | sealed abstract class FlatMapCacheAffinityPipe[K, V, T] extends IgnitePipe[T]
190 |   with HasComputeConfig[CacheAffinity[K, V], TraversableOnce[T]] {
191 | 
192 |   override def map[U](f: T => U) =
193 |     PipeHelper.toCacheAffinityPipe[K, V, T, U](this)(f)
194 | 
195 |   override def flatMap[U](f: T => TraversableOnce[U]) =
196 |     PipeHelper.toFlatMapCacheAffinityPipe[K, V, T, U](this)(f)
197 | 
198 |   override def reduce(implicit sg: Semigroup[T]) =
199 |     FlatMapCacheAffinityReduction.from(this)(sg)
200 | 
201 |   override def fork = PipeHelper.forkPipe(this)
202 | 
203 |   override def execute = compute.flatMapAffinityApply(source)(transform)
204 | }
205 | 
206 | final case class MergedPipe[T](left: IgnitePipe[T], right: IgnitePipe[T])
207 |   extends IgnitePipe[T] {
208 | 
209 |   override def map[U](f: T => U) =
210 |     MergedPipe(left.map(f), right.map(f))
211 | 
212 |   override def flatMap[U](f: T => TraversableOnce[U]) =
213 |     MergedPipe(left.flatMap(f), right.flatMap(f))
214 | 
215 |   override def reduce(implicit sg: Semigroup[T]) =
216 |     MergedReduction(left.reduce, right.reduce)
217 | 
218 |   override def fork = this
219 | 
220 |   override def execute = left.execute ++ right.execute
221 | }
222 | 
223 | /**
224 |  * A pipe containing a sequence of values.
225 |  *
226 |  * Can be generally used as the starting point in the execution chain. The sequence is
227 |  * partitioned and load balanced across the cluster nodes.
228 |  */
229 | final case class IterablePipe[T](iter: Iterable[T])(implicit val compute: ComputeRunner)
230 |   extends IgnitePipe[T] {
231 | 
232 |   override def map[U](f: T => U) = PipeHelper.toTransformValuePipe[T, U](this)(f)
233 | 
234 |   override def flatMap[U](f: T => TraversableOnce[U]) =
235 |     PipeHelper.toFlatMapValuePipe[T, U](this)(f)
236 | 
237 |   override def reduce(implicit sg: Semigroup[T]) =
238 |     ValueReduction(iter.reduce(sg.plus(_, _)))(compute)
239 | 
240 |   override def fork = this
241 | 
242 |   override def execute = iter
243 | }
244 | 
245 | /**
246 |  * Helpers for switching betweeen IgnitePipe types.
247 |  */
248 | private object PipeHelper {
249 | 
250 |   def toTransformValuePipe[T, U](ip: IterablePipe[T])(f: T => U): TransformValuePipe[T, U] =
251 |     new TransformValuePipe[T, U] {
252 |       override val compute = ip.compute
253 |       override val source = ip.iter
254 |       override def transform = f
255 |     }
256 | 
257 |   def toTransformValuePipe[S, T, U](tvp: TransformValuePipe[S, T])(f: T => U): TransformValuePipe[S, U] =
258 |     new TransformValuePipe[S, U] {
259 |       override val compute = tvp.compute
260 |       override val source = tvp.source
261 |       override def transform = tvp.transform.andThen(f)
262 |     }
263 | 
264 |   def toFlatMapValuePipe[S, T, U](tvp: TransformValuePipe[S, T])(f: T => TraversableOnce[U]): FlatMapValuePipe[S, U] =
265 |     new FlatMapValuePipe[S, U] {
266 |       override val compute = tvp.compute
267 |       override val source = tvp.source
268 |       override def transform = tvp.transform.andThen(f)
269 |     }
270 | 
271 |   def toFlatMapValuePipe[S, T, U](fvp: FlatMapValuePipe[S, T])(f: T => TraversableOnce[U]): FlatMapValuePipe[S, U] =
272 |     new FlatMapValuePipe[S, U] {
273 |       override val compute = fvp.compute
274 |       override val source = fvp.source
275 |       override def transform = fvp.transform.andThen(_.map(f)).andThen(_.flatten)
276 |     }
277 | 
278 |   def toFlatMapValuePipe[T, U](ip: IterablePipe[T])(f: T => TraversableOnce[U]): FlatMapValuePipe[T, U] =
279 |     new FlatMapValuePipe[T, U] {
280 |       override val compute = ip.compute
281 |       override val source = ip.iter
282 |       override def transform = f
283 |     }
284 | 
285 |   // this adds a barrier. the supplied function f is executed
286 |   // on the cluster only after flatten step of the input pipe is
287 |   // executed on the client
288 |   def toTransformValuePipe[S, T, U](fvp: FlatMapValuePipe[S, T])(f: T => U): TransformValuePipe[T, U] =
289 |     IterablePipe(fvp.execute)(fvp.compute).map(f)
290 | 
291 |   def toCacheAffinityPipe[K, V, T, U](cap: CacheAffinityPipe[K, V, T])(f: T => U): CacheAffinityPipe[K, V, U] =
292 |     new CacheAffinityPipe[K, V, U] {
293 |       override val compute = cap.compute
294 |       override val source = cap.source
295 |       override def transform = cap.transform.andThen(f)
296 |     }
297 | 
298 |   // this adds a barrier similar to the non-affinity version
299 |   def toCacheAffinityPipe[K, V, T, U](fcap: FlatMapCacheAffinityPipe[K, V, T])(f: T => U): TransformValuePipe[T, U] =
300 |     IterablePipe(fcap.execute)(fcap.compute).map(f)
301 | 
302 |   def toFlatMapCacheAffinityPipe[K, V, T, U](cap: CacheAffinityPipe[K, V, T])(f: T => TraversableOnce[U]): FlatMapCacheAffinityPipe[K, V, U] =
303 |     new FlatMapCacheAffinityPipe[K, V, U] {
304 |       override val compute = cap.compute
305 |       override val source = cap.source
306 |       override def transform = cap.transform.andThen(f)
307 |     }
308 | 
309 |   def toFlatMapCacheAffinityPipe[K, V, T, U](fcap: FlatMapCacheAffinityPipe[K, V, T])(f: T => TraversableOnce[U]): FlatMapCacheAffinityPipe[K, V, U] =
310 |     new FlatMapCacheAffinityPipe[K, V, U] {
311 |       override val compute = fcap.compute
312 |       override val source = fcap.source
313 |       override def transform = fcap.transform.andThen(_.map(f)).andThen(_.flatten)
314 |     }
315 | 
316 |   def forkPipe[S, T](tvp: TransformValuePipe[S, T]): IgnitePipe[T] =
317 |     IgnitePipe.from(() => tvp.execute)(tvp.compute)
318 | 
319 |   def forkPipe[S, T](fvp: FlatMapValuePipe[S, T]): IgnitePipe[T] =
320 |     IgnitePipe.from(() => fvp.execute)(fvp.compute)
321 | 
322 |   def forkPipe[K, V, T](cap: CacheAffinityPipe[K, V, T]): IgnitePipe[T] =
323 |     IgnitePipe.from(() => cap.execute)(cap.compute)
324 | 
325 |   def forkPipe[K, V, T](fcap: FlatMapCacheAffinityPipe[K, V, T]): IgnitePipe[T] =
326 |     IgnitePipe.from(() => fcap.execute)(fcap.compute)
327 | }
328 | 
329 | object ReduceHelper {
330 |   // creates a pipe representing the result of the reduction
331 |   def toPipe[S, T](r: Reduction[T] with HasComputeConfig[S, _]): IgnitePipe[T] =
332 |     new TransformValuePipe[T, T] {
333 |       override val compute = r.compute
334 |       override def source = r.execute.toIterable
335 |       override def transform = identity
336 |     }
337 | }
338 | 


--------------------------------------------------------------------------------