├── project
    ├── build.properties
    └── plugins.sbt
├── .gitignore
├── src
    └── main
    │   ├── scala
    │       ├── com
    │       │   └── bayakala
    │       │   │   └── funda
    │       │   │       ├── fda
    │       │   │           └── package.scala
    │       │   │       ├── fdapipes
    │       │   │           ├── FDAJoints.scala
    │       │   │           ├── FDAValves.scala
    │       │   │           ├── FDANodes.scala
    │       │   │           └── FDAPipes.scala
    │       │   │       ├── fdasources
    │       │   │           ├── FDADataView.scala
    │       │   │           └── FDADataStream.scala
    │       │   │       ├── fdapars
    │       │   │           └── FDAPars.scala
    │       │   │       ├── fdarows
    │       │   │           └── FDARowOperator.scala
    │       │   │       └── package.scala
    │       ├── examples
    │       │   ├── ExceptionsAndFinalizers.scala
    │       │   ├── StrongTypedSource.scala
    │       │   ├── ParallelLoading.scala
    │       │   ├── UserDefinedTask.scala
    │       │   ├── ParallelTasks.scala
    │       │   └── ParallelExecution.scala
    │       └── samples
    │       │   └── SlickModels.scala
    │   └── resources
    │       ├── application.conf
    │       └── logback.xml
├── license.txt
└── README.md


/project/build.properties:
--------------------------------------------------------------------------------
1 | sbt.version = 0.13.8
2 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | target/
2 | project/target/
3 | .idea
4 | .DS_Store
5 | 


--------------------------------------------------------------------------------
/project/plugins.sbt:
--------------------------------------------------------------------------------
1 | addSbtPlugin("me.lessis" % "bintray-sbt" % "0.3.0")
2 | logLevel := Level.Warn
3 | 


--------------------------------------------------------------------------------
/src/main/scala/com/bayakala/funda/fda/package.scala:
--------------------------------------------------------------------------------
 1 | package com.bayakala.funda
 2 | import com.bayakala.funda.fdapipes._
 3 | import com.bayakala.funda.fdarows._
 4 | import com.bayakala.funda.fdasources._
 5 | import com.bayakala.funda.fdapars._
 6 | 
 7 | package object api extends FDARowOperator
 8 |       with FDAPipes
 9 |       with FDADataView
10 |       with FDADataStream
11 |       with FDAPars
12 | 
13 | 
14 | 


--------------------------------------------------------------------------------
/src/main/scala/com/bayakala/funda/fdapipes/FDAJoints.scala:
--------------------------------------------------------------------------------
 1 | package com.bayakala.funda.fdapipes
 2 | 
 3 | import com.bayakala.funda.FDAPipeJoint
 4 | import fs2._
 5 | 
 6 | /**
 7 |   * methods to output row to a Pull object
 8 |   */
 9 | object FDAJoints {  //数据发送方法
10 |   /**
11 |     * send row to Pull
12 |     * @param row     row to be sent
13 |     * @tparam ROW    type of row
14 |     * @return        new state of Pull
15 |     */
16 |   def fda_pushRow[ROW](row: ROW): FDAPipeJoint[ROW] = Pull.output1(row)
17 | 
18 |   /**
19 |     * send list of rows to Pull
20 |     * @param rows   list of rows to be sent
21 |     * @tparam ROW   type of target row
22 |     * @return       new state of Pull
23 |     */
24 |   def fda_pushRows[ROW](rows: List[ROW]): FDAPipeJoint[ROW] = Pull.output(Chunk.seq(rows))
25 | 
26 |   /**
27 |     * end output to Pull
28 |     * @return  end of Pull
29 |     */
30 |   def fda_halt = Pull.done
31 | }
32 | 


--------------------------------------------------------------------------------
/src/main/scala/com/bayakala/funda/fdapipes/FDAValves.scala:
--------------------------------------------------------------------------------
 1 | package com.bayakala.funda.fdapipes
 2 | 
 3 | /**
 4 |   * methods to send data downstream
 5 |   * Some(Nil)           : skip sending the current row
 6 |   * Some(List(r1,r2...)): send r1,r2... downstream
 7 |   * None                : halt stream, end of process
 8 |   *
 9 |   */
10 | object FDAValves {  //流动控制方法
11 |   /** 跳过本行（不向下游发送）
12 |     * skip current row by sending a Some(Nil)
13 |     * @return   Some(Nil) to signify skipping current row
14 |     */
15 |   def fda_skip[ROW]: Option[List[ROW]] = Some(List[ROW]())
16 | 
17 |   /** 将本行发送至下游连接管道
18 |     * send row downstream
19 |     * @param row    target row
20 |     * @tparam ROW   type of target row
21 |     * @return       a single row to be sent downstream
22 |     */
23 |   def fda_next[ROW](row: ROW): Option[List[ROW]] = Some(List[ROW](row))
24 | 
25 |   /**
26 |     * send a list of rows downstream in a chunk
27 |     * @param lr     list of rows to be sent
28 |     * @tparam ROW   type of target row
29 |     * @return       a list of many rows
30 |     */
31 |   def fda_next[ROW](lr: List[ROW]): Option[List[ROW]] = Some(lr)
32 | 
33 |   /** 终止流动
34 |     * halt the current stream
35 |     * @return  a None indicating end of current stream process
36 |     */
37 |   def fda_break = None
38 | 
39 | }
40 | 
41 | 


--------------------------------------------------------------------------------
/src/main/scala/com/bayakala/funda/fdasources/FDADataView.scala:
--------------------------------------------------------------------------------
 1 | package com.bayakala.funda.fdasources
 2 | 
 3 | import com.bayakala.funda._
 4 | import fs2._
 5 | 
 6 | /**
 7 |   * produce a static view source from a Seq[ROW] collection returned from
 8 |   * running a fda_typedRows function. provide error handling and cleanup
 9 |   */
10 | trait FDADataView {
11 |   /**
12 |     * return a fs2 stream by closing a Pull object
13 |     * @param h       a Seq[ROW] type collection
14 |     * @tparam ROW    type of element inside collection
15 |     * @return        an effectful stream
16 |     */
17 |   private def streamSeq[ROW](h: Seq[ROW]): FDAPipeLine[ROW] =
18 |     pullSeq(h).close
19 | 
20 |   /**
21 |     * emit a collection of rows one by one through a Pull object.
22 |     * could use fold to reduce Seq[ROW] to Stream[Task,ROW] like this:
23 |     * foldLeft(Stream[Task,Int]())((s,a) => s ++ Stream.emit(a))
24 |     * but since h could be huge and recursion safety could not be guaranteed
25 |     * streaming through Pull object is a much safer option because
26 |     * Pull is a free monad
27 |     * @param h        a Seq[ROW] type collection
28 |     * @tparam ROW     type of element inside collection
29 |     * @return         Pull in new state
30 |     */
31 |   private def pullSeq[ROW](h: Seq[ROW]): FDAPipeJoint[ROW] = {
32 |     val it = h.iterator
33 |     def go(it: Iterator[ROW]): Pull[Task, ROW, Unit] = for {
34 |       res <- Pull.eval(Task.delay({ if (it.hasNext) Some(it.next()) else None }))
35 |       next <- res.fold[Pull[Task, ROW, Unit]](Pull.done)(o => Pull.output1(o) >> go(it))
36 |     } yield next
37 |     go(it)
38 |   }
39 | 
40 |   /**
41 |     * produce a static view source from a Seq[ROW] collection using famous 'bracket'
42 |     * provide facade to error handling and cleanup
43 |     * @example {{{
44 |     *     val source = fda_staticSource(dataSeq)()
45 |     *
46 |     *     val safeSource = fda_staticSource(dataSeq)(
47 |     *        println("the end finally!"))
48 |     * }}}
49 |     * @param acquirer       the Seq[ROW] collection
50 |     * @param finalizer      cleanup callback
51 |     * @tparam ROW           type of row
52 |     * @return               a new stream
53 |     */
54 |   def fda_staticSource[ROW](acquirer: => Seq[ROW])(
55 |                             finalizer: => Unit = ()): FDAPipeLine[ROW] = {
56 |      val s = Stream.bracket(Task.delay(acquirer))(r => streamSeq(r), r => Task.delay((): Unit))
57 |      s.onFinalize(Task.delay(finalizer))
58 |     }
59 | 
60 |   }
61 | 
62 | /**
63 |   * for global imports
64 |   */
65 | object FDADataView extends FDADataView
66 | 


--------------------------------------------------------------------------------
/src/main/resources/application.conf:
--------------------------------------------------------------------------------
  1 | app = {
  2 |   dbconfig = h2
  3 | }
  4 | 
  5 | logger.scala.slick.jdbc.JdbcBackend.statement=DEBUG
  6 | 
  7 | h2 {
  8 |   driver = "slick.driver.H2Driver$"
  9 |   db {
 10 |     url = "jdbc:h2:~/slickdemo;mv_store=false"
 11 |     driver = "org.h2.Driver"
 12 |     connectionPool = HikariCP
 13 |     numThreads = 10
 14 |     maxConnections = 12
 15 |     minConnections = 4
 16 |     keepAliveConnection = true
 17 |   }
 18 | }
 19 | 
 20 | h2db {
 21 |     url = "jdbc:h2:tcp://localhost/~/slickdemo;mv_store=false"
 22 |     driver = "org.h2.Driver"
 23 |     connectionPool = HikariCP
 24 |     numThreads = 48
 25 |     maxConnections = 48
 26 |     minConnections = 12
 27 |     keepAliveConnection = true
 28 | }
 29 | 
 30 | h2mem = {
 31 |   url = "jdbc:h2:mem:slickdemo"
 32 |   driver = org.h2.Driver
 33 |   connectionPool = disabled
 34 |   keepAliveConnection = true
 35 | }
 36 | 
 37 | mysql {
 38 |   driver = "slick.driver.MySQLDriver$"
 39 |   db {
 40 |     url = "jdbc:mysql://localhost/slickdemo"
 41 |     driver = com.mysql.jdbc.Driver
 42 |     keepAliveConnection = true
 43 |     user="root"
 44 |     password="123"
 45 |     numThreads=10
 46 |     maxConnections = 12
 47 |     minConnections = 4
 48 |   }
 49 | }
 50 | 
 51 | mysqldb = {
 52 |   dataSourceClass = "com.mysql.jdbc.jdbc2.optional.MysqlDataSource"
 53 |   properties {
 54 |     user = "root"
 55 |     password = "123"
 56 |     databaseName = "slickdemo"
 57 |     serverName = "localhost"
 58 |   }
 59 |   numThreads = 10
 60 |   maxConnections = 12
 61 |   minConnections = 4
 62 | }
 63 | 
 64 | postgres {
 65 |   driver = "slick.driver.PostgresDriver$"
 66 |   db {
 67 |     url = "jdbc:postgresql://127.0.0.1/slickdemo"
 68 |     driver = "org.postgresql.Driver"
 69 |     connectionPool = HikariCP
 70 |     user = "slick"
 71 |     password = "123"
 72 |     numThreads = 10
 73 |     maxConnections = 12
 74 |     minConnections = 4
 75 |   }
 76 | }
 77 | 
 78 | postgressdb = {
 79 |   dataSourceClass = "org.postgresql.ds.PGSimpleDataSource"
 80 |   properties = {
 81 |     databaseName = "slickdemo"
 82 |     user = "slick"
 83 |     password = "123"
 84 |   }
 85 |   connectionPool = HikariCP
 86 |   numThreads = 10
 87 |   maxConnections = 12
 88 |   minConnections = 4
 89 | }
 90 | 
 91 | mssql {
 92 |   driver = "com.typesafe.slick.driver.ms.SQLServerDriver$"
 93 |   db {
 94 |     url = "jdbc:sqlserver://host:port"
 95 |     driver = com.microsoft.sqlserver.jdbc.SQLServerDriver
 96 |     connectionTimeout = 30 second
 97 |     connectionPool = HikariCP
 98 |     user = "slick"
 99 |     password = "123"
100 |     numThreads = 10
101 |     maxConnections = 12
102 |     minConnections = 4
103 |     keepAliveConnection = true
104 |   }
105 | }
106 | 
107 | tsql {
108 |   driver = "slick.driver.H2Driver$"
109 |   db = ${h2mem}
110 | }


--------------------------------------------------------------------------------
/src/main/scala/com/bayakala/funda/fdapipes/FDANodes.scala:
--------------------------------------------------------------------------------
 1 | package com.bayakala.funda.fdapipes
 2 | import com.bayakala.funda._
 3 | import FDAJoints._
 4 | 
 5 | /** methods to run an user defined function*/
 6 | object FDANodes { //作业节点工作方法
 7 |   /**
 8 |     * returns state of next worknode. using fs2 Handle of Pull object,
 9 |     * take the next element and apply function task and determine new state of stream
10 |     * @param task   user defined function: ROW => Option[List[ROW]]
11 |     *               returns an Option[List[ROW]]] value signifying movement downstream
12 |     *               as follows:
13 |     *                  Some(Nil)           : skip sending the current row
14 |     *                  Some(List(r1,r2...)): send r1,r2... downstream
15 |     *                  None                : halt stream, end of process
16 |     * @tparam ROW   row type: FDAROW or FDAActionROW
17 |     * @return       new state of stream
18 |     */
19 |   def fda_execUserTask[ROW](task: FDAUserTask[ROW]): FDAWorkNode[ROW] = {
20 |     def go: FDAValve[ROW] => FDAPipeJoint[ROW] = h => {
21 |       h.receive1Option {
22 |         case Some((r, h)) => task(r) match {
23 |           case Some(lx) => lx match {
24 |             case Nil => go(h)
25 |             case _ => fda_pushRows(lx) >> go(h)
26 |           }
27 |           case None => {task(FDANullRow.asInstanceOf[ROW]); fda_halt}
28 |         }
29 |         case None => {task(FDANullRow.asInstanceOf[ROW]); fda_halt}
30 |       }
31 |     }
32 |     in => in.pull(go)
33 |   }
34 |   /**
35 |     * returns state of next worknode and some aggregation defined inside user function.
36 |     * execute user defined function with internal aggregation mechanism by means of
37 |     * functional state transition style of passing in state and return new state.
38 |     * take in current aggregation and next row, apply user function on both
39 |     * and determine new state of stream
40 |     * @param aggr    user selected type of aggregation such as Int, (Int,Int) ...
41 |     * @param task    user defined function: (AGGR,ROW) => (AGGR,Option[List[ROW]])
42 |     *                take in current aggregation and row,
43 |     *                and return new aggregation and Option[List[ROW]] with meaning of:
44 |     *                  Some(Nil)           : skip sending the current row
45 |     *                  Some(List(r1,r2...)): send r1,r2... downstream
46 |     *                  None                : halt stream, end of process
47 |     * @tparam AGGR   type of aggr
48 |     * @tparam ROW    type of row
49 |     * @return        new state of stream
50 |     */
51 |   def fda_aggregate[AGGR,ROW](aggr: AGGR, task: FDAAggrTask[AGGR,ROW]): FDAWorkNode[ROW] = {
52 |    def go(acc: AGGR): FDAValve[ROW] => FDAPipeJoint[ROW] = h => {
53 |      h.receive1Option {
54 |        case Some((r, h)) => task(acc,r) match {
55 |          case (a,Some(lx)) => lx match {
56 |            case Nil => go(a)(h)
57 |            case _ => fda_pushRows(lx) >> go(a)(h)
58 |          }
59 |          case (a,None) => {task(a,FDANullRow.asInstanceOf[ROW]); fda_halt}
60 |        }
61 |        case None => {task(acc,FDANullRow.asInstanceOf[ROW]); fda_halt}
62 |      }
63 |    }
64 |    in => in.pull(go(aggr))
65 |  }
66 | 
67 | }
68 | 


--------------------------------------------------------------------------------
/src/main/scala/examples/ExceptionsAndFinalizers.scala:
--------------------------------------------------------------------------------
  1 | package examples
  2 | import slick.jdbc.H2Profile.api._
  3 | import com.bayakala.funda.samples.SlickModels._
  4 | import com.bayakala.funda._
  5 | import api._
  6 | import scala.language.implicitConversions
  7 | 
  8 | object ExceptionsAndFinalizers extends App {
  9 | 
 10 |   val db = Database.forConfig("h2db")
 11 |   implicit def toState(row: StateTable#TableElementType) =
 12 |     StateModel(row.id,row.name)
 13 |   val viewLoader = FDAViewLoader(slick.jdbc.H2Profile)(toState _)
 14 |   val streamLoader = FDAStreamLoader(slick.jdbc.H2Profile)(toState _)
 15 | 
 16 |   val stateSeq = viewLoader.fda_typedRows(StateQuery.result)(db).toSeq
 17 |   val viewState = fda_staticSource(stateSeq)(println("***Finally*** the end of viewState!!!"))
 18 |   val streamState = streamLoader.fda_typedStream(StateQuery.result)(db)(64,64)(println("***Finally*** the end of streamState!!!"))()
 19 | 
 20 | /*
 21 |   viewState.startRun
 22 |   viewState.take(2).startRun
 23 |   streamState.startRun
 24 |   streamState.take(3).startRun
 25 |   //  ***Finally*** the end of viewState!!!
 26 |   //  ***Finally*** the end of viewState!!!
 27 |   //  ***Finally*** the end of streamState!!!
 28 |   //  ***Finally*** the end of streamState!!!
 29 | */
 30 | 
 31 | 
 32 | 
 33 |   def trackRows: FDAUserTask[FDAROW] = row => {
 34 |     row match {
 35 |       case m@StateModel(id,name) =>
 36 |         println(s"State: $id $name")
 37 |         println( "----------------")
 38 |         fda_next(m)
 39 |       case DivideZeroError(msg, e) => //error row
 40 |         println(s"***Error:$msg***")
 41 |         fda_skip
 42 |       case m@_ => fda_next(m)
 43 |     }
 44 |   }
 45 | 
 46 |   def errorRow: FDAUserTask[FDAROW] = row => {
 47 |     row match {
 48 |       case StateModel(id,name) =>
 49 |         val idx = id / (id - 3)
 50 |         fda_next(StateModel(idx,name))
 51 |       case m@_ => fda_next(m)
 52 |     }
 53 |   }
 54 | 
 55 |   case class DivideZeroError(msg: String, e: Exception) extends FDAROW
 56 |   def catchError: FDAUserTask[FDAROW] = row => {
 57 |     row match {
 58 |       case StateModel(id,name) =>
 59 |         try {
 60 |           val idx = id / (id - 3)
 61 |           fda_next(StateModel(idx, name))
 62 |         } catch {
 63 |           case e: Exception => //pass an error row
 64 |             fda_next(DivideZeroError(s"Divide by zero excption at ${id}",e))
 65 |         }
 66 |       case m@_ => fda_next(m)
 67 |     }
 68 |   }
 69 | 
 70 | 
 71 | 
 72 |   /*
 73 |   streamState.appendTask(errorRow).appendTask(trackRows).startRun
 74 | //  State: 0 Alabama
 75 | //  ----------------
 76 | //  State: -2 Alaska
 77 | //  ----------------
 78 | //  Exception in thread "main" java.lang.ArithmeticException: / by zero
 79 | //  at examples.ExceptionsAndFinalizers$$anonfun$errorRow$1.apply(ExceptionsAndFinalizers.scala:46)
 80 | //  ...
 81 | //  at java.lang.Thread.run(Thread.java:745)
 82 | //  ***Finally*** the end of streamState!!!
 83 | */
 84 |   /*
 85 |    val v = viewState.appendTask(errorRow).appendTask(trackRows)
 86 |    val v1 = v.onError {case e: Exception => println(s"Caught Error in viewState!!![${e.getMessage}]"); fda_appendRow(FDANullRow)}
 87 |    v1.startRun
 88 | 
 89 |    val s = streamState.appendTask(errorRow).appendTask(trackRows)
 90 |    val s1 = s.onError {case e: Exception => println(s"Caught Error in streamState!!![${e.getMessage}]"); fda_appendRow(FDANullRow)}
 91 |    s1.startRun
 92 |   */
 93 | 
 94 |   val s = streamState.take(5).appendTask(catchError).appendTask(trackRows)
 95 |   val s1 = s.onError {case e: Exception => println(s"Caught Error in streamState!!![${e.getMessage}]"); fda_appendRow(FDANullRow)}
 96 |   s1.startRun
 97 | 
 98 | 
 99 |   
100 | }
101 | 


--------------------------------------------------------------------------------
/src/main/scala/samples/SlickModels.scala:
--------------------------------------------------------------------------------
 1 | package com.bayakala.funda.samples
 2 | 
 3 | import slick.jdbc.H2Profile.api._
 4 | import com.bayakala.funda._
 5 | 
 6 | object SlickModels {
 7 | 
 8 |   //表字段对应模版
 9 |   case class AQMRawModel(mid: String
10 |                          , mtype: String
11 |                          , state: String
12 |                          , fips: String
13 |                          , county: String
14 |                          , year: String
15 |                          , value: String) extends FDAROW
16 | 
17 |   //表结构: 定义字段类型, * 代表结果集字段
18 |   class AQMRawTable(tag: Tag) extends Table[AQMRawModel](tag, "AIRQM") {
19 |     def mid = column[String]("MEASUREID")
20 |     def mtype = column[String]("MEASURETYPE")
21 |     def state = column[String]("STATENAME")
22 |     def fips = column[String]("COUNTYFIPS")
23 |     def county = column[String]("COUNTYNAME")
24 |     def year = column[String]("REPORTYEAR")
25 |     def value = column[String]("VALUE")
26 | 
27 |     def * = (mid,mtype,state,fips,county,year,value) <> (AQMRawModel.tupled, AQMRawModel.unapply)
28 |   }
29 | 
30 |   //库表实例
31 |   val AQMRawQuery = TableQuery[AQMRawTable]
32 | 
33 |   case class AQMRPTModel(rid: Long
34 |                          , mid: Int
35 |                          , state: String
36 |                          , county: String
37 |                          , year: Int
38 |                          , value: Int
39 |                          , total: Int
40 |                          , valid: Boolean) extends FDAROW
41 | 
42 |   class AQMRPTTable(tag: Tag) extends Table[AQMRPTModel](tag, "AQMRPT") {
43 |     def rid = column[Long]("ROWID",O.AutoInc,O.PrimaryKey)
44 |     def mid = column[Int]("MEASUREID")
45 |     def state = column[String]("STATENAME",O.Length(32))
46 |     def county = column[String]("COUNTYNAME",O.Length(32))
47 |     def year = column[Int]("REPORTYEAR")
48 |     def value = column[Int]("VALUE")
49 |     def total = column[Int]("TOTAL")
50 |     def valid = column[Boolean]("VALID")
51 | 
52 |     def * = (rid,mid,state,county,year,value,total,valid) <> (AQMRPTModel.tupled, AQMRPTModel.unapply)
53 |   }
54 | 
55 | 
56 |   val AQMRPTQuery = TableQuery[AQMRPTTable]
57 | 
58 |   case class StateModel(id: Int, name: String) extends FDAROW
59 |   class StateTable(tag: Tag) extends Table[StateModel](tag,"STATE") {
60 |     def id = column[Int]("ID",O.AutoInc,O.PrimaryKey)
61 |     def name = column[String]("NAME",O.Length(32))
62 |     def * = (id,name)<>(StateModel.tupled,StateModel.unapply)
63 |   }
64 |   val StateQuery = TableQuery[StateTable]
65 | 
66 |   case class CountyModel(id: Int, name: String) extends FDAROW
67 |   case class CountyTable(tag: Tag) extends Table[CountyModel](tag,"COUNTY") {
68 |     def id = column[Int]("ID",O.AutoInc,O.PrimaryKey)
69 |     def name = column[String]("NAME",O.Length(64))
70 |     def * = (id,name)<>(CountyModel.tupled,CountyModel.unapply)
71 |   }
72 |   val CountyQuery = TableQuery[CountyTable]
73 | 
74 |   case class NORMAQMModel(rid: Long
75 |                           , mid: Int
76 |                           , state: Int
77 |                           , county: Int
78 |                           , year: Int
79 |                           , value: Int
80 |                           , average: Int
81 |                          ) extends FDAROW
82 | 
83 |   class NORMAQMTable(tag: Tag) extends Table[NORMAQMModel](tag, "NORMAQM") {
84 |     def rid = column[Long]("ROWID",O.AutoInc,O.PrimaryKey)
85 |     def mid = column[Int]("MEASUREID")
86 |     def state = column[Int]("STATID")
87 |     def county = column[Int]("COUNTYID")
88 |     def year = column[Int]("REPORTYEAR")
89 |     def value = column[Int]("VALUE")
90 |     def average = column[Int]("AVG")
91 | 
92 |     def * = (rid,mid,state,county,year,value,average) <> (NORMAQMModel.tupled, NORMAQMModel.unapply)
93 |   }
94 | 
95 | 
96 |   val NORMAQMQuery = TableQuery[NORMAQMTable]
97 | 
98 | 
99 | }


--------------------------------------------------------------------------------
/src/main/scala/examples/StrongTypedSource.scala:
--------------------------------------------------------------------------------
  1 | package com.bayakala.funda.examples
  2 | import slick.jdbc.H2Profile.api._
  3 | import scala.language.implicitConversions
  4 | import com.bayakala.funda._
  5 | import api._
  6 | import com.bayakala.funda.samples._
  7 | 
  8 | object StrongTypedSource extends App {
  9 | 
 10 |   val aqmraw =  SlickModels.AQMRawQuery
 11 | 
 12 |   val db = Database.forConfig("h2db")
 13 |   // aqmQuery.result returns Seq[(String,String,String,String)]
 14 |   val aqmQuery = aqmraw.map {r => (r.year,r.state,r.county,r.value)}
 15 |   // user designed strong typed resultset type. must extend FDAROW
 16 |   case class TypedRow(year: String, state: String, county: String, value: String) extends FDAROW
 17 |   // strong typed resultset conversion function. declared implicit to remind during compilation
 18 |   implicit def toTypedRow(row: (String,String,String,String)): TypedRow =
 19 |     TypedRow(row._1,row._2,row._3,row._4)
 20 |   // loader to read from database and convert result collection to strong typed collection
 21 |   val viewLoader = FDAViewLoader(slick.jdbc.H2Profile)(toTypedRow _)
 22 |   val dataSeq = viewLoader.fda_typedRows(aqmQuery.result)(db).toSeq
 23 |   // turn Seq collection into fs2 stream
 24 |   val aqmStream =  fda_staticSource(dataSeq)()
 25 |   // now access fields in the strong typed resultset
 26 |   def showRecord: FDAUserTask[FDAROW] = row => {
 27 |     row match {
 28 |       case qmr: TypedRow =>
 29 |         println(s"州名: ${qmr.state}")
 30 |         println(s"县名：${qmr.county}")
 31 |         println(s"年份：${qmr.year}")
 32 |         println(s"取值：${qmr.value}")
 33 |         println("-------------")
 34 |         fda_skip
 35 |       case _ => fda_skip
 36 |     }
 37 |   }
 38 |   // use stream combinators with field names
 39 |   aqmStream.filter{r => r.year > "1999"}.take(3).appendTask(showRecord).startRun
 40 | 
 41 |   val allState = aqmraw.map(_.state)
 42 |   //no converter to help type inference. must provide type parameters explicitly
 43 |   val stateLoader = FDAViewLoader[String,String](slick.jdbc.H2Profile)()
 44 |   val stateSeq = stateLoader.fda_plainRows(allState.distinct.result)(db).toSeq
 45 |   //constructed a Stream[Task,String]
 46 |   val stateStream =  fda_staticSource(stateSeq)()
 47 |   //strong typed row type. must extend FDAROW
 48 |   case class StateRow(state: String) extends FDAROW
 49 |   def showState: FDAUserTask[FDAROW] = row => {
 50 |     row match {
 51 |       case StateRow(sname) =>
 52 |         println(s"州名称：$sname")
 53 |         fda_skip
 54 |       case _ => fda_skip
 55 |     }
 56 |   }
 57 | 
 58 |   //first convert to StateRows to turn Stream[Task,FDAROW] typed stream
 59 |   stateStream.map{s => StateRow(s)}
 60 |     .filter{r => r.state > "Alabama"}.take(3)
 61 |     .appendTask(showState).startRun
 62 | 
 63 | 
 64 | 
 65 |   //KillSwitch.killNow
 66 |   object killer extends Fs2Terminator
 67 | 
 68 |   val streamLoader = FDAStreamLoader(slick.jdbc.H2Profile)(toTypedRow _)
 69 |   val streamSource = streamLoader.fda_typedStream(aqmQuery.result)(db)(512,16)()(killer)
 70 | //  streamSource.filter{r => r.year > "1999"}.appendTask(showRecord).startRun
 71 |   streamSource
 72 |        .map {row => row match {
 73 |           case qmr: TypedRow if (qmr.value.toString == "5") =>
 74 |              killer.stopASAP
 75 |              qmr
 76 |          case _ => row }}
 77 |       .appendTask(showRecord)
 78 |       .startRun
 79 | 
 80 | 
 81 |   val stateStreamLoader = FDAStreamLoader[String,String](slick.jdbc.H2Profile)()
 82 |   val stateStreamSource = stateStreamLoader.fda_plainStream(allState.distinct.result)(db)(512,512,30)()
 83 | 
 84 |   //first convert to StateRows to turn Stream[Task,FDAROW] typed stream
 85 |   stateStreamSource.map{s => StateRow(s)}
 86 |     .filter{r => r.state > "Alabama"}.take(3)
 87 |     .appendTask(showState).startRun
 88 | 
 89 |   val akkaStreamLoader = FDAStreamLoader(slick.jdbc.H2Profile)(toTypedRow _)
 90 |   val akkaStreamSource = akkaStreamLoader.fda_akkaTypedStream(aqmQuery.result)(db)(512,512,30)()()
 91 |   akkaStreamSource     //.filter{r => r.year > "1999"}
 92 |     .appendTask(showRecord).startRun
 93 | 
 94 |   val akkaStreamSource2 = akkaStreamLoader.fda_akkaTypedStream(aqmQuery.result)(db)(512,1)()()
 95 |   akkaStreamSource2
 96 |     .map {row => row match {
 97 |     case qmr: TypedRow if (qmr.value.toString == "5") =>
 98 |       AkkaKillSwitch.stopASAP
 99 |       qmr
100 |     case _ => row }}
101 |     .appendTask(showRecord)
102 |     .startRun
103 | 
104 | 
105 | }


--------------------------------------------------------------------------------
/src/main/scala/com/bayakala/funda/fdapars/FDAPars.scala:
--------------------------------------------------------------------------------
  1 | package com.bayakala.funda.fdapars
  2 | 
  3 | import fs2._
  4 | import com.bayakala.funda._
  5 | import com.bayakala.funda.fdapipes.FDAJoints._
  6 | /** operation methods for parallel computation */
  7 | trait FDAPars {
  8 |   /**
  9 |     * return a single stream of combined data rows.
 10 |     * reading from a list of many streams defined in sources concurrently.
 11 |     * @example {{{
 12 |     *    //3 separate streams to extract county names from the same database table AQMRPT
 13 |     *    val countiesA_KStream = countyLoader.fda_typedStream(qryCountiesA_K.result)(db_b)(64,64)()
 14 |     *    val countiesK_PStream = countyLoader.fda_typedStream(qryCountiesK_P.result)(db_b)(64,64)()
 15 |     *    val countiesP_ZStream = countyLoader.fda_typedStream(qryCountiesP_Z.result)(db_b)(64,64)()
 16 |     *
 17 |     *    //obtain a combined stream with parallel loading with max of 4 open computation
 18 |     *    val combinedStream = fda_par_load(statesStream,countiesA_KStream,countiesK_PStream,countiesP_ZStream)(4)
 19 |     * }}}
 20 |     * @param sources     list of many individual streams
 21 |     * @param maxOpen     maximum number of concurrently open computation.
 22 |     *                    could be a number near and less than the cpu cores
 23 |     * @return            a combined data stream with nondeterministic element order
 24 |     */
 25 |   def fda_par_load(sources: FDAPipeLine[FDAROW]*)(maxOpen: Int): FDAPipeLine[FDAROW] = {
 26 |      concurrent.join(maxOpen)(Stream(sources: _*))
 27 |   }
 28 |   /**
 29 |     * run user defined function on a single stream many ways in parallel.
 30 |     * return a single stream as result of user function application.
 31 |     * @example {{{
 32 |     *    //turn getIdsThenInsertAction into parallel task
 33 |     *    val parTasks = source.toPar(getIdsThenInsertAction)
 34 |     *    //runPar to produce a new stream
 35 |     *    val actionStream =fda_runPar(parTasks)(4)
 36 |     * }}}
 37 |     * @param parTask     user defined function turned into par style by toPar
 38 |     * @param maxOpen     maximum number of concurrently open computation.
 39 |     *                    could be a number near and less than the cpu cores
 40 |     * @return            a single stream with nondeterministic element order
 41 |     */
 42 |   def fda_runPar(parTask: FDAParTask)(maxOpen: Int) =
 43 |     concurrent.join(maxOpen)(parTask).through(fda_afterPar)
 44 | 
 45 |   /**
 46 |     * load many sources in parallel.
 47 |     * return a single combined stream with non-deterministic row order.
 48 |     * @example {{{
 49 |     *  //loading rows with year yr
 50 |     *  def loadRowsInYear(yr: Int) = {
 51 |     *    //a new query
 52 |     *    val query = AQMRPTQuery.filter(row => row.year === yr)
 53 |     *    //reuse same loader
 54 |     *    AQMRPTLoader.fda_typedStream(query.result)(db)(256, 256)(println(s"End of stream ${yr}!!!!!!"))
 55 |     *  }
 56 |     *
 57 |     *  //loading rows by year
 58 |     *  def loadRowsByYear: FDASourceLoader = row => {
 59 |     *   row match {
 60 |     *    case Years(y) => loadRowsInYear(y) //produce stream of the year
 61 |     *    case _ => fda_appendRow(FDANullRow)
 62 |     *   }
 63 |     *  }
 64 |     *
 65 |     *  //get parallel source constructor
 66 |     *  val parSource = yearStream.toParSource(loadRowsByYear)
 67 |     *  //produce a stream from parallel sources
 68 |     *  val source = fda_par_source(parSource)(4)
 69 |     * }}}
 70 |     * @param parSource   user defined source constructor turned into
 71 |     *                    par style by toParSource
 72 |     * @param maxOpen     maximum number of concurrently open computation.
 73 |     *                    could be a number near and less than the cpu cores
 74 |     * @return            a single stream with nondeterministic element order
 75 |     */
 76 |   def fda_par_source(parSource: FDAParSource)(maxOpen: Int) =
 77 |     concurrent.join(maxOpen)(parSource)
 78 | 
 79 | 
 80 | /** flatten the Option[List[FDAROW]]] type returned by fda_runPar to plain FDAROW */
 81 |   private def fda_afterPar: Pipe[Task,Option[List[FDAROW]],FDAROW] = {
 82 |     def go: Handle[Task,Option[List[FDAROW]]] => FDAPipeJoint[FDAROW] = h => {
 83 |       h.receive1Option {
 84 |         case Some((r, h)) => r match {
 85 |           case Some(xr) =>
 86 |             fda_pushRows(xr) >> go(h)
 87 |           case None => go(h)
 88 |         }
 89 |         case None => fda_halt
 90 |       }
 91 |     }
 92 |     in => in.pull(go)
 93 |   }
 94 | 
 95 | }
 96 | 
 97 | /**
 98 |   * for global imports
 99 |   */
100 | object FDAPars extends FDAPars


--------------------------------------------------------------------------------
/src/main/scala/com/bayakala/funda/fdarows/FDARowOperator.scala:
--------------------------------------------------------------------------------
  1 | package com.bayakala.funda.fdarows
  2 | import com.bayakala.funda._
  3 | import scala.concurrent.duration._
  4 | import scala.concurrent.Await
  5 | import slick.jdbc.JdbcProfile
  6 | 
  7 | /**
  8 |   * loading of data rows and running of action rows
  9 |   */
 10 | trait FDARowOperator {
 11 | 
 12 |   /**
 13 |     * read view, an entire collection of data as result of Slick DBIO action
 14 |     * convert collection data type if required
 15 |     * run FDAAction row as Slick run(DBIOAction)
 16 |     * @param slickProfile  Slick jdbcprofile such as 'slick.jdbc.H2Profile'
 17 |     * @param convert       a defined implicit type conversion function.
 18 |     *                      from SOURCE type to TARGET type, set to null if not required
 19 |     * @tparam SOURCE       source type, result type of DBIOAction, most likely a tuple type
 20 |     * @tparam TARGET       final converted type, most likely a case class type
 21 |     */
 22 |   class FDAViewLoader[SOURCE, TARGET](slickProfile: JdbcProfile, convert: SOURCE => TARGET) {
 23 | 
 24 |     import slickProfile.api._
 25 | 
 26 |     /**
 27 |       * return collection of TARGET type data.
 28 |       * run slickAction and apply convert result from collection of SOURCE type
 29 |       * to collection of TARGET type
 30 |       * @example {{{
 31 |       *       val viewLoader = FDAViewLoader(slick.jdbc.H2Profile)(toTypedRow _)
 32 |       *       val dataSeq = viewLoader.fda_typedRows(aqmQuery.result)(db).toSeq
 33 |       * }}}
 34 |       * @param slickAction   Slick DBIOAction for query data results
 35 |       * @param slickDB       Slick database object
 36 |       * @param converter     just a measure to guarantee conversion function is defined
 37 |       *                      when this function is used there has to be a converter defined
 38 |       *                      implicitly in compile time
 39 |       * @return              a collection of TARGET type data
 40 |       */
 41 |     def fda_typedRows(slickAction: DBIO[Iterable[SOURCE]])(slickDB: Database)(
 42 |                       implicit converter: SOURCE => TARGET): Iterable[TARGET] = {
 43 |       Await.result(slickDB.run(slickAction), Duration.Inf).map(raw => convert(raw))
 44 |     }
 45 | 
 46 |     /**
 47 |       * load view without type conversion of result collection data type
 48 |       * @example {{{
 49 |       *       val viewLoader = FDAViewLoader(slick.jdbc.H2Profile)()
 50 |       *       val dataSeq = viewLoader.fda_plainRows(aqmQuery.result)(db).toSeq
 51 |       * }}}
 52 |       * @param slickAction   Slick DBIOAction for query data results
 53 |       * @param slickDB       Slick database object
 54 |       * @return              a collection of SOURCE type data
 55 |       */
 56 |     def fda_plainRows(slickAction: DBIO[Iterable[SOURCE]])(slickDB: Database): Iterable[SOURCE] = {
 57 |       Await.result(slickDB.run(slickAction), Duration.Inf)
 58 |     }
 59 |   }
 60 | 
 61 |   /**
 62 |     * constructor given slickprofile and converter function
 63 |     */
 64 |   object FDAViewLoader{
 65 |     /**
 66 |       * FDAViewLoader constructor
 67 |       * @example {{{
 68 |       *    val viewLoader = FDAViewLoader(slick.jdbc.H2Profile)(toTypedRow _)
 69 |       *    val untypedLoader = FDAViewLoader(slick.jdbc.H2Profile)()
 70 |       * }}}
 71 |       * @param slickProfile  Slick jdbcprofile such as 'slick.jdbc.H2Profile'
 72 |       * @param converter     type conversion function. from SOURCE type to TARGET type
 73 |       * @tparam SOURCE       source type, result type of DBIOAction, most likely a tuple type
 74 |       * @tparam TARGET       final converted type, most likely a case class type
 75 |       * @return              a new FDAViewLoader object
 76 |       */
 77 |     def apply[SOURCE, TARGET](slickProfile: JdbcProfile)(
 78 |       converter: SOURCE => TARGET = null): FDAViewLoader[SOURCE, TARGET] =
 79 |       new FDAViewLoader[SOURCE, TARGET](slickProfile, converter)
 80 |   }
 81 | 
 82 |   /**
 83 |     * method to run action rows of type FDAAction
 84 |     * @param slickProfile  Slick jdbcprofile such as 'slick.jdbc.H2Profile'
 85 |     */
 86 |   class FDAActionRunner(slickProfile: JdbcProfile) {
 87 | 
 88 |     import slickProfile.api._
 89 | 
 90 |     /**
 91 |       * run a Slick DBIOAction action.
 92 |       * @param action    a Slick DBIOAction
 93 |       * @param slickDB   Slick database object
 94 |       * @return          rows affected
 95 |       */
 96 |     def fda_execAction(action: FDAAction)(slickDB: Database): Int =
 97 |       Await.result(slickDB.run(action), Duration.Inf)
 98 |   }
 99 | 
100 |   /**
101 |     * constructor given jdbcprofile
102 |     */
103 |   object FDAActionRunner {
104 |     /**
105 |       * construct a FDAActionRunner object given slickProfile
106 |       * @param slickProfile  Slick jdbcprofile such as 'slick.jdbc.H2Profile'
107 |       * @return              a new FDAActionRunner object
108 |       */
109 |     def apply(slickProfile: JdbcProfile): FDAActionRunner = new FDAActionRunner(slickProfile)
110 |   }
111 | }
112 | 
113 | /**
114 |   * for global imports
115 |   */
116 | object FDARowOperator extends FDARowOperator
117 | 


--------------------------------------------------------------------------------
/src/main/resources/logback.xml:
--------------------------------------------------------------------------------
 1 | <configuration>
 2 |     <appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
 3 |         <encoder>
 4 |             <pattern>*** \(%logger{30}\)%green(%X{debugId}) %msg%n</pattern>
 5 |         </encoder>
 6 |     </appender>
 7 |     <root level="${log.root:-info}">
 8 |         <appender-ref ref="STDOUT" />
 9 |     </root>
10 |     <logger name="application" level="DEBUG"/>
11 |     <logger name="com.zaxxer.hikari" level="INFO"/>
12 |     <logger name="slick.basic.BasicBackend.action"          level="${log.action:-info}" />
13 |     <logger name="slick.basic.BasicBackend.stream"          level="${log.stream:-info}" />
14 |     <logger name="slick.compiler"                           level="${log.qcomp:-info}" />
15 |     <logger name="slick.compiler.QueryCompiler"             level="${log.qcomp.phases:-inherited}" />
16 |     <logger name="slick.compiler.QueryCompilerBenchmark"    level="${log.qcomp.bench:-inherited}" />
17 |     <logger name="slick.compiler.Inline"                    level="${log.qcomp.inline:-inherited}" />
18 |     <logger name="slick.compiler.AssignUniqueSymbols"       level="${log.qcomp.assignUniqueSymbols:-inherited}" />
19 |     <logger name="slick.compiler.InferTypes"                level="${log.qcomp.inferTypes:-inherited}" />
20 |     <logger name="slick.compiler.ExpandTables"              level="${log.qcomp.expandTables:-inherited}" />
21 |     <logger name="slick.compiler.EmulateOuterJoins"         level="${log.qcomp.emulateOuterJoins:-inherited}" />
22 |     <logger name="slick.compiler.ForceOuterBinds"           level="${log.qcomp.forceOuterBinds:-inherited}" />
23 |     <logger name="slick.compiler.RemoveMappedTypes"         level="${log.qcomp.removeMappedTypes:-inherited}" />
24 |     <logger name="slick.compiler.CreateResultSetMapping"    level="${log.qcomp.createResultSetMapping:-inherited}" />
25 |     <logger name="slick.compiler.ExpandSums"                level="${log.qcomp.expandSums:-inherited}" />
26 |     <logger name="slick.compiler.ExpandRecords"             level="${log.qcomp.expandRecords:-inherited}" />
27 |     <logger name="slick.compiler.ExpandConditionals"        level="${log.qcomp.expandConditionals:-inherited}" />
28 |     <logger name="slick.compiler.FlattenProjections"        level="${log.qcomp.flattenProjections:-inherited}" />
29 |     <logger name="slick.compiler.CreateAggregates"          level="${log.qcomp.createAggregates:-inherited}" />
30 |     <logger name="slick.compiler.RewriteJoins"              level="${log.qcomp.rewriteJoins:-inherited}" />
31 |     <logger name="slick.compiler.RemoveTakeDrop"            level="${log.qcomp.removeTakeDrop:-inherited}" />
32 |     <logger name="slick.compiler.ResolveZipJoins"           level="${log.qcomp.resolveZipJoins:-inherited}" />
33 |     <logger name="slick.compiler.HoistClientOps"            level="${log.qcomp.hoistClientOps:-inherited}" />
34 |     <logger name="slick.compiler.ReorderOperations"         level="${log.qcomp.reorderOperations:-inherited}" />
35 |     <logger name="slick.compiler.MergeToComprehensions"     level="${log.qcomp.mergeToComprehensions:-inherited}" />
36 |     <logger name="slick.compiler.OptimizeScalar"            level="${log.qcomp.optimizeScalar:-inherited}" />
37 |     <logger name="slick.compiler.FixRowNumberOrdering"      level="${log.qcomp.fixRowNumberOrdering:-inherited}" />
38 |     <logger name="slick.compiler.PruneProjections"          level="${log.qcomp.pruneProjections:-inherited}" />
39 |     <logger name="slick.compiler.RewriteDistinct"           level="${log.qcomp.rewriteDistinct:-inherited}" />
40 |     <logger name="slick.compiler.RewriteBooleans"           level="${log.qcomp.rewriteBooleans:-inherited}" />
41 |     <logger name="slick.compiler.SpecializeParameters"      level="${log.qcomp.specializeParameters:-inherited}" />
42 |     <logger name="slick.compiler.CodeGen"                   level="${log.qcomp.codeGen:-inherited}" />
43 |     <logger name="slick.compiler.RemoveFieldNames"          level="${log.qcomp.removeFieldNames:-inherited}" />
44 |     <logger name="slick.compiler.InsertCompiler"            level="${log.qcomp.insertCompiler:-inherited}" />
45 |     <logger name="slick.compiler.VerifyTypes"               level="${log.qcomp.verifyTypes:-inherited}" />
46 |     <logger name="slick.jdbc.DriverDataSource"              level="${log.jdbc.driver:-info}" />
47 |     <logger name="slick.jdbc.JdbcBackend.statement"         level="${log.jdbc.statement:-info}" />
48 |     <logger name="slick.jdbc.JdbcBackend.parameter"         level="${log.jdbc.parameter:-info}" />
49 |     <logger name="slick.jdbc.JdbcBackend.benchmark"         level="${log.jdbc.bench:-info}" />
50 |     <logger name="slick.jdbc.StatementInvoker.result"       level="${log.jdbc.result:-info}" />
51 |     <logger name="slick.jdbc.JdbcModelBuilder"              level="${log.createModel:-info}" />
52 |     <logger name="slick.memory.HeapBackend"                 level="${log.heap:-inherited}" />
53 |     <logger name="slick.memory.QueryInterpreter"            level="${log.interpreter:-inherited}" />
54 |     <logger name="slick.relational.ResultConverterCompiler" level="${log.resultConverter:-inherited}" />
55 |     <logger name="slick.util.AsyncExecutor"                 level="${log.asyncExecutor:-inherited}" />
56 | </configuration>
57 | 


--------------------------------------------------------------------------------
/src/main/scala/com/bayakala/funda/fdapipes/FDAPipes.scala:
--------------------------------------------------------------------------------
  1 | package com.bayakala.funda.fdapipes
  2 | import com.bayakala.funda._
  3 | 
  4 | trait FDAPipes {
  5 |   /**
  6 |     * returns state of next worknode. using fs2 Handle of Pull object,
  7 |     * take the next element and apply function task and determine new state of stream
  8 |     * @param task   user defined function: ROW => Option[List[ROW]]
  9 |     *               returns an Option[List[ROW]]] value signifying movement downstream
 10 |     *               as follows:
 11 |     *                  Some(Nil)           : skip sending the current row
 12 |     *                  Some(List(r1,r2...)): send r1,r2... downstream
 13 |     *                  None                : halt stream, end of process
 14 |     * @tparam ROW   row type: FDAROW or FDAActionROW
 15 |     * @return       new state of stream
 16 |     */
 17 |   private def fda_execUserTask[ROW](task: FDAUserTask[ROW]): FDAWorkNode[ROW] = FDANodes.fda_execUserTask(task)
 18 |   /**
 19 |     * returns state of next worknode and some aggregation defined inside user function.
 20 |     * execute user defined function with internal aggregation mechanism by means of
 21 |     * functional state transition style of passing in state and return new state.
 22 |     * take in current aggregation and next row, apply user function on both
 23 |     * and determine new state of stream
 24 |     * @param aggr    user selected type of aggregation such as Int, (Int,Int) ...
 25 |     * @param task    user defined function: (AGGR,ROW) => (AGGR,Option[List[ROW]])
 26 |     *                take in current aggregation and row,
 27 |     *                and return new aggregation and Option[List[ROW]] with meaning of:
 28 |     *                  Some(Nil)           : skip sending the current row
 29 |     *                  Some(List(r1,r2...)): send r1,r2... downstream
 30 |     *                  None                : halt stream, end of process
 31 |     * @tparam AGGR   type of aggr
 32 |     * @tparam ROW    type of row
 33 |     * @return        new state of stream
 34 |     */
 35 |   private def fda_aggregateTask[AGGR,ROW](aggr: AGGR, task: FDAAggrTask[AGGR,ROW]): FDAWorkNode[ROW] = FDANodes.fda_aggregate(aggr,task)
 36 |   /**
 37 |     * send row to Pull
 38 |     * @param row     row to be sent
 39 |     * @tparam ROW    type of row
 40 |     * @return        new state of Pull
 41 |     */
 42 |   private def fda_pushRow[ROW](row: ROW): FDAPipeJoint[ROW] = FDAJoints.fda_pushRow(row)
 43 |   /**
 44 |     * send list of rows to Pull
 45 |     * @param rows   list of rows to be sent
 46 |     * @tparam ROW   type of target row
 47 |     * @return       new state of Pull
 48 |     */
 49 |   private def fda_pushRows[ROW](rows: List[ROW]): FDAPipeJoint[ROW] = FDAJoints.fda_pushRows(rows)
 50 |   /**
 51 |     * end output to Pull
 52 |     * @return  end of Pull
 53 |     */
 54 |   private def fda_halt = FDAJoints.fda_halt
 55 |   /**
 56 |     * skip current row by sending a Some(Nil)
 57 |     * @example {{{
 58 |     *    def trackRows: FDAUserTask[FDAROW] = row => {
 59 |     *      row match {
 60 |     *        case m@StateModel(id,name) =>
 61 |     *          println(s"State: $id $name")
 62 |     *          println( "----------------")
 63 |     *          fda_next(m)
 64 |     *        case DivideZeroError(msg, e) => //error row
 65 |     *          println(s"***Error:$msg***")
 66 |     *          fda_skip
 67 |     *        case m@_ => fda_next(m)
 68 |     *       }
 69 |     *     }
 70 |     * }}}
 71 |     * @return   Some(Nil) to signify skipping current row
 72 |     */
 73 |   def fda_skip[ROW]: Option[List[ROW]] = FDAValves.fda_skip[ROW]
 74 |   /**
 75 |     * send row downstream
 76 |     * @example {{{
 77 |     *    def trackRows: FDAUserTask[FDAROW] = row => {
 78 |     *      row match {
 79 |     *        case m@StateModel(id,name) =>
 80 |     *          println(s"State: $id $name")
 81 |     *          println( "----------------")
 82 |     *          fda_next(m)
 83 |     *        case DivideZeroError(msg, e) => //error row
 84 |     *          println(s"***Error:$msg***")
 85 |     *          fda_skip
 86 |     *        case m@_ => fda_next(m)
 87 |     *       }
 88 |     *     }
 89 |     * }}}
 90 |     * @param row    target row
 91 |     * @tparam ROW   type of target row
 92 |     * @return       a single row to be sent downstream
 93 |     */
 94 |   def fda_next[ROW](row: ROW): Option[List[ROW]] = FDAValves.fda_next(row)
 95 |   /**
 96 |     * send a list of rows downstream in a chunk
 97 |     * @example {{{
 98 |     *    def trackRows: FDAUserTask[FDAROW] = row => {
 99 |     *      row match {
100 |     *        case m@StateModel(id,name) =>
101 |     *          println(s"State: $id $name")
102 |     *          println( "----------------")
103 |     *          fda_next(m)
104 |     *        case DivideZeroError(msg, e) => //error row
105 |     *          println(s"***Error:$msg***")
106 |     *          fda_skip
107 |     *        case m@_ => fda_next(List(m,m)) //send double row
108 |     *       }
109 |     *     }
110 |     * }}}
111 |     * @param lr     list of rows to be sent
112 |     * @tparam ROW   type of target row
113 |     * @return       a list of many rows
114 |     */
115 |   def fda_next[ROW](lr: List[ROW]): Option[List[ROW]] = FDAValves.fda_next(lr)
116 |   /**
117 |     * halt the current stream
118 |     * @example {{{
119 |     *    //loading rows by year
120 |     *    def loadRowsByYear: FDASourceLoader = row => {
121 |     *      row match {
122 |     *        case Years(y) => loadRowsInYear(y) //produce stream of the year
123 |     *        case _ => //something wrong break and terminate this process
124 |     *           fda_break
125 |     *      }
126 |     *    }
127 |     * }}}
128 |     * @return  a None indicating end of current stream process
129 |     */
130 |   def fda_break = FDAValves.fda_break
131 | }
132 | 
133 | /**
134 |   * for global imports
135 |   */
136 | object FDAPipes extends FDAPipes


--------------------------------------------------------------------------------
/src/main/scala/examples/ParallelLoading.scala:
--------------------------------------------------------------------------------
  1 | package examples
  2 | import com.bayakala.funda._
  3 | import api._
  4 | import scala.language.implicitConversions
  5 | import slick.jdbc.H2Profile.api._
  6 | import scala.concurrent.duration._
  7 | import scala.concurrent.{Await, Future}
  8 | import scala.util.{Failure, Success}
  9 | import com.bayakala.funda.samples.SlickModels._
 10 | import scala.concurrent.ExecutionContext.Implicits.global
 11 | 
 12 | object ParallelLoading extends App {
 13 | 
 14 |   //assume two distinct db objects
 15 |   val db_a = Database.forConfig("h2db")
 16 |   //another db object
 17 |   val db_b = Database.forConfig("h2db")
 18 | 
 19 | 
 20 |   //create STATE table
 21 |   val actionCreateState = StateQuery.schema.create
 22 |   val futCreateState = db_a.run(actionCreateState).andThen {
 23 |     case Success(_) => println("State Table created successfully!")
 24 |     case Failure(e) => println(s"State Table may exist already! Error: ${e.getMessage}")
 25 |   }
 26 |   //would carry on even fail to create table
 27 |   Await.ready(futCreateState,Duration.Inf)
 28 | 
 29 |   //create COUNTY table
 30 |   val actionCreateCounty = CountyQuery.schema.create
 31 |   val futCreateCounty = db_a.run(actionCreateCounty).andThen {
 32 |     case Success(_) => println("County Table created successfully!")
 33 |     case Failure(e) => println(s"County Table may exist already! Error: ${e.getMessage}")
 34 |   }
 35 |   //would carry on even fail to create table
 36 |   Await.ready(futCreateCounty,Duration.Inf)
 37 | 
 38 |   //define query for extracting State names from AQMRPT
 39 |   val qryStates = AQMRPTQuery.map(_.state).distinct.sorted  //     .distinctOn(r => r)
 40 |   case class States(name: String) extends FDAROW
 41 |   implicit def toStates(row: String) = States(row)
 42 |   val stateLoader = FDAStreamLoader(slick.jdbc.H2Profile)(toStates _)
 43 |   val statesStream = stateLoader.fda_typedStream(qryStates.result)(db_a)(64,64)()()
 44 | 
 45 | 
 46 |   //define query for extracting County names from AQMRPT in separate chunks
 47 |   //query with state name >A and <K
 48 |   val qryCountiesA_K = AQMRPTQuery.filter(r => (r.state.toUpperCase > "A" &&
 49 |     r.state.toUpperCase < "K")).map(r => (r.state,r.county))
 50 |     .distinctOn(r => (r._1,r._2))
 51 |     .sortBy(r => (r._1,r._2))
 52 | 
 53 |   //query with state name >K and <P
 54 |   val qryCountiesK_P = AQMRPTQuery.filter(r => (r.state.toUpperCase > "K" &&
 55 |     r.state.toUpperCase < "P")).map(r => (r.state,r.county))
 56 |     .distinctOn(r => (r._1,r._2))
 57 |     .sortBy(r => (r._1,r._2))
 58 | 
 59 |   //query with state name >P
 60 |   val qryCountiesP_Z = AQMRPTQuery.filter(r => r.state.toUpperCase > "P")
 61 |     .map(r => (r.state,r.county))
 62 |     .distinctOn(r => (r._1,r._2))
 63 |     .sortBy(r => (r._1,r._2))
 64 | 
 65 |   case class Counties(state: String, name: String) extends FDAROW
 66 |   implicit def toCounties(row: (String,String)) = Counties(row._1,row._2)
 67 |   val countyLoader = FDAStreamLoader(slick.jdbc.H2Profile)(toCounties _)
 68 |   //3 separate streams to extract county names from the same database table AQMRPT
 69 |   val countiesA_KStream = countyLoader.fda_typedStream(qryCountiesA_K.result)(db_b)(64,64)()()
 70 |   val countiesK_PStream = countyLoader.fda_typedStream(qryCountiesK_P.result)(db_b)(64,64)()()
 71 |   val countiesP_ZStream = countyLoader.fda_typedStream(qryCountiesP_Z.result)(db_b)(64,64)()()
 72 | 
 73 |   //obtain a combined stream with parallel loading with max of 4 open computation
 74 |   val combinedStream = fda_par_load(statesStream,countiesA_KStream,countiesK_PStream,countiesP_ZStream)(4)
 75 | 
 76 | 
 77 |   //define separate rows for different actions
 78 |   case class StateActionRow(action: FDAAction) extends FDAROW
 79 |   case class CountyActionRow(action: FDAAction) extends FDAROW
 80 |   val actionRunner = FDAActionRunner(slick.jdbc.H2Profile)
 81 | 
 82 |   //user-task to catch rows of States type and transform them into db insert actions
 83 |   def processStates: FDAUserTask[FDAROW] = row => {
 84 |     row match {
 85 |       //catch states row and transform it into insert action
 86 |       case States(stateName) =>  //target row type
 87 |         println(s"State name: ${stateName}")
 88 |         val action = StateQuery += StateModel(0,stateName)
 89 |         fda_next(StateActionRow(action))
 90 |       case others@ _ => //pass other types to next user-defined-tasks
 91 |         fda_next(others)
 92 |     }
 93 |   }
 94 |   //user-task to catch rows of Counties type and transform them into db insert actions
 95 |   def processCounties: FDAUserTask[FDAROW] = row => {
 96 |     row match {
 97 |       //catch counties row and transform it into insert action
 98 |       case Counties(stateName,countyName) =>  //target row type
 99 |         println(s"County ${countyName} of ${stateName}")
100 |         val action = CountyQuery += CountyModel(0,countyName+ " of "+stateName)
101 |         fda_next(CountyActionRow(action))
102 |       case others@ _ => //pass other types to next user-defined-tasks
103 |         fda_next(others)
104 |     }
105 |   }
106 | 
107 |   //user-task to catch States insert action rows and run them
108 |   def runStateAction: FDAUserTask[FDAROW] = row  => {
109 |     row match {
110 |       case StateActionRow(action) => //this is a state action row type
111 |         println(s"runstate: ${action}")
112 |         actionRunner.fda_execAction(action)(db_a)  //run this query with db_a context
113 |         fda_skip
114 |       case others@ _ => //otherwise pass alone to next user-defined-tasks
115 |         fda_next(others)
116 |     }
117 |   }
118 | 
119 |   //user-task to catch Counties insert action rows and run them
120 |   def runCountyAction: FDAUserTask[FDAROW] = row  => {
121 |     row match {
122 |       case CountyActionRow(action) => //this is a county action row type
123 |         actionRunner.fda_execAction(action)(db_b)  //run this query with db_b context
124 |         fda_skip
125 |       case others@ _ => //otherwise pass alone to next user-defined-tasks
126 |         fda_next(others)
127 |     }
128 |   }
129 | 
130 | 
131 | 
132 |   def showRows: FDAUserTask[FDAROW] = row => {
133 |     row match {
134 |       case States(nm) =>
135 |         println("")
136 |         println(s"State: $nm")
137 |         println("************")
138 |         fda_skip
139 |       case Counties(s,c) =>
140 |         println("")
141 |         println(s"County: $c")
142 |         println(s"state of $s")
143 |         println("------------")
144 |         fda_skip
145 |       case _ => fda_skip
146 |     }
147 |   }
148 | 
149 |   combinedStream.appendTask(processStates)
150 |     .appendTask(processCounties)
151 |     .appendTask(runStateAction)
152 |     .appendTask(runCountyAction)
153 |     .startRun
154 | 
155 | }
156 | 


--------------------------------------------------------------------------------
/src/main/scala/examples/UserDefinedTask.scala:
--------------------------------------------------------------------------------
  1 | package com.bayakala.funda.examples
  2 | import slick.jdbc.meta._
  3 | import scala.language.implicitConversions
  4 | import scala.concurrent.ExecutionContext.Implicits.global
  5 | import scala.concurrent.duration._
  6 | import scala.concurrent.{Await, Future}
  7 | import scala.util.{Failure, Success}
  8 | import slick.jdbc.H2Profile.api._
  9 | import com.bayakala.funda._
 10 | import api._
 11 | import com.bayakala.funda.samples.SlickModels._
 12 | 
 13 | object UserDefinedTasks extends App {
 14 | 
 15 | 
 16 |   val db = Database.forConfig("h2db")
 17 | 
 18 |   //drop original table schema
 19 |   val futVectorTables = db.run(MTable.getTables)
 20 | 
 21 |   val futDropTable = futVectorTables.flatMap{ tables => {
 22 |     val tableNames = tables.map(t => t.name.name)
 23 |     if (tableNames.contains(AQMRPTQuery.baseTableRow.tableName))
 24 |       db.run(AQMRPTQuery.schema.drop)
 25 |     else Future(():Unit)
 26 |   }
 27 |   }.andThen {
 28 |     case Success(_) => println(s"Table ${AQMRPTQuery.baseTableRow.tableName} dropped successfully! ")
 29 |     case Failure(e) => println(s"Failed to drop Table ${AQMRPTQuery.baseTableRow.tableName}, it may not exist! Error: ${e.getMessage}")
 30 |   }
 31 |   Await.ready(futDropTable,Duration.Inf)
 32 | 
 33 |   //create new table to refine AQMRawTable
 34 |   val actionCreateTable = AQMRPTQuery.schema.create
 35 |   val futCreateTable = db.run(actionCreateTable).andThen {
 36 |     case Success(_) => println("Table created successfully!")
 37 |     case Failure(e) => println(s"Table may exist already! Error: ${e.getMessage}")
 38 |   }
 39 |   //would carry on even fail to create table
 40 |   Await.ready(futCreateTable,Duration.Inf)
 41 | 
 42 | 
 43 |   //truncate data, only available in slick 3.2.1
 44 |   val futTruncateTable = futVectorTables.flatMap{ tables => {
 45 |     val tableNames = tables.map(t => t.name.name)
 46 |     if (tableNames.contains(AQMRPTQuery.baseTableRow.tableName))
 47 |       db.run(AQMRPTQuery.schema.truncate)
 48 |     else Future(():Unit)
 49 |   }
 50 |   }.andThen {
 51 |     case Success(_) => println(s"Table ${AQMRPTQuery.baseTableRow.tableName} truncated successfully!")
 52 |     case Failure(e) => println(s"Failed to truncate Table ${AQMRPTQuery.baseTableRow.tableName}! Error: ${e.getMessage}")
 53 |   }
 54 |   Await.ready(futDropTable,Duration.Inf)
 55 | 
 56 | 
 57 |   //load original table content
 58 |   //original table strong-typed-row
 59 |   case class AQMRaw(mid: String, state: String,
 60 |                     county: String, year: String, value: String) extends FDAROW
 61 |   implicit def toAQMRaw(row: (String,String,String,String,String)) =
 62 |     AQMRaw(row._1,row._2,row._3,row._4,row._5)
 63 |   val streamLoader = FDAStreamLoader(slick.jdbc.H2Profile)(toAQMRaw _)
 64 |   //  val queryAQMRaw = for { r <- AQMRawQuery } yield (r.mid,r.state,r.county,r.year,r.value)
 65 |   val queryAQMRaw = sql"""
 66 |     SELECT MEASUREID,STATENAME,COUNTYNAME,REPORTYEAR,VALUE FROM AIRQM
 67 |   """.as[(String,String,String,String,String)]
 68 | 
 69 |   val streamAQMRaw: FDAPipeLine[FDAROW] = streamLoader.fda_typedStream(queryAQMRaw)(db)(512,512)()()
 70 | 
 71 | 
 72 |   //filter out rows with inconvertible value strings and out of ranged value and year
 73 |   def filterRows: FDAUserTask[FDAROW] = row => {
 74 |     row match {
 75 |       case r: AQMRaw => {
 76 |         try {
 77 |           val yr = r.year.toInt
 78 |           val v = r.value.toInt
 79 |           val vlu = if ( v > 10  ) 10 else v
 80 |           val data = AQMRPTModel(0,r.mid.toInt,r.state,r.county,yr,vlu,0,true)
 81 |           if ((yr > 1960 && yr < 2018))
 82 |             fda_next(data)   //this row ok. pass downstream
 83 |           else
 84 |             fda_skip    //filter out this row
 85 |         } catch {
 86 |           case e: Exception =>
 87 |             fda_next(AQMRPTModel(0,r.mid.toInt,r.state,r.county,2000,0,0,false))
 88 |           //pass a invalid row
 89 |         }
 90 |       }
 91 |       case _ => fda_skip   //wrong type, skip
 92 |     }
 93 |   }
 94 | 
 95 |   //transform data to action for later execution
 96 |   def toAction: FDAUserTask[FDAROW] = row => {
 97 |     row match {
 98 |       case r: AQMRPTModel =>
 99 |         val queryAction = AQMRPTQuery += r
100 |         fda_next(FDAActionRow(queryAction))
101 |       case other @ _ => fda_next(other)
102 |     }
103 |   }
104 | 
105 |   //get a query runner and an action task
106 |   val actionRunner = FDAActionRunner(slick.jdbc.H2Profile)
107 |   def runActionRow: FDAUserTask[FDAROW] = action => {
108 |     action match {
109 |       case FDAActionRow(q) => actionRunner.fda_execAction(q)(db)
110 |         fda_skip
111 |       case _ => fda_skip
112 |     }
113 |   }
114 | 
115 | 
116 |   //start the program
117 |   val streamAllTasks =  streamAQMRaw.appendTask(filterRows)
118 |     .appendTask(toAction)
119 |     .appendTask(runActionRow)
120 | 
121 |   val streamToRun = streamAllTasks.onError { case e: Exception => println("Error:"+e.getMessage); fda_appendRow(FDAErrorRow(new Exception(e))) }
122 | 
123 |   streamToRun.startRun
124 | 
125 |   //aggregate-task demo: get count and sum of value for each state and year
126 |   val orderedAQMRPT = AQMRPTQuery.sortBy(r => (r.state,r.year))
127 |   //TableElementType conversion. must declare implicit
128 |   implicit def toAQMRPT(row: AQMRPTTable#TableElementType) =
129 |     AQMRPTModel(row.rid,row.mid,row.state,row.county,row.year,row.value,row.total,row.valid)
130 |   val aqmrStreamLoader = FDAStreamLoader(slick.jdbc.H2Profile)(toAQMRPT _)
131 |   val aqmrStream: FDAPipeLine[FDAROW] = aqmrStreamLoader.fda_typedStream(orderedAQMRPT.result)(db)(512,512)()()
132 |   //user defined aggregator type.
133 |   case class Accu(state: String, county: String, year: Int, count: Int, sumOfValue: Int)
134 |   //user defined aggregation task
135 |   def aggregateValue: FDAAggrTask[Accu,FDAROW] = (accu,row) => {
136 |     row match {
137 |       case aqmr: AQMRPTModel =>
138 |         if (accu.state == "" || (aqmr.state == accu.state && aqmr.year == accu.year))
139 |         //same condition: inc count and add sum, pass no row downstream
140 |           (Accu(aqmr.state,aqmr.county,aqmr.year,accu.count+1, accu.sumOfValue+aqmr.value),fda_skip)
141 |         else
142 |         //reset accumulator, create a new aggregated row and pass downstream
143 |           (Accu(aqmr.state,aqmr.county,aqmr.year,1, aqmr.value)
144 |             ,fda_next(AQMRPTModel(0,9999,accu.state,accu.county,accu.year
145 |             ,accu.count,accu.sumOfValue/accu.count,true)))
146 |       case FDANullRow =>
147 |         //last row encountered. create and pass new aggregated row
148 |         (Accu(accu.state,accu.county,accu.year,1, 0)
149 |           ,fda_next(AQMRPTModel(0,9999,accu.state,accu.county,accu.year
150 |           ,accu.count,accu.sumOfValue/accu.count,true)))
151 |       //incorrect row type, do nothing
152 |       case _ => (accu,fda_skip)
153 |     }
154 |   }
155 | 
156 | 
157 |   aqmrStream.aggregateTask(Accu("","",0,0,0),aggregateValue)
158 |     .appendTask(toAction)
159 |     .appendTask(runActionRow)
160 |     .startRun
161 | 
162 | 
163 | }
164 | 


--------------------------------------------------------------------------------
/src/main/scala/examples/ParallelTasks.scala:
--------------------------------------------------------------------------------
  1 | package examples
  2 | import slick.jdbc.meta._
  3 | import com.bayakala.funda._
  4 | import api._
  5 | import scala.language.implicitConversions
  6 | import scala.concurrent.ExecutionContext.Implicits.global
  7 | import scala.concurrent.duration._
  8 | import scala.concurrent.{Await, Future}
  9 | import scala.util.{Failure, Success}
 10 | import slick.jdbc.H2Profile.api._
 11 | import com.bayakala.funda.samples.SlickModels._
 12 | import fs2.Strategy
 13 | 
 14 | object ParallelTasks extends App {
 15 | 
 16 |   val db = Database.forConfig("h2db")
 17 | 
 18 |   //drop original table schema
 19 |   val futVectorTables = db.run(MTable.getTables)
 20 | 
 21 |   val futDropTable = futVectorTables.flatMap{ tables => {
 22 |     val tableNames = tables.map(t => t.name.name)
 23 |     if (tableNames.contains(NORMAQMQuery.baseTableRow.tableName))
 24 |       db.run(NORMAQMQuery.schema.drop)
 25 |     else Future(():Unit)
 26 |   }
 27 |   }.andThen {
 28 |     case Success(_) => println(s"Table ${NORMAQMQuery.baseTableRow.tableName} dropped successfully! ")
 29 |     case Failure(e) => println(s"Failed to drop Table ${NORMAQMQuery.baseTableRow.tableName}, it may not exist! Error: ${e.getMessage}")
 30 |   }
 31 |   Await.ready(futDropTable,Duration.Inf)
 32 | 
 33 |   //create new table to refine AQMRawTable
 34 |   val actionCreateTable = NORMAQMQuery.schema.create
 35 |   val futCreateTable = db.run(actionCreateTable).andThen {
 36 |     case Success(_) => println("Table created successfully!")
 37 |     case Failure(e) => println(s"Table may exist already! Error: ${e.getMessage}")
 38 |   }
 39 |   //would carry on even fail to create table
 40 |   Await.ready(futCreateTable,Duration.Inf)
 41 | 
 42 | 
 43 |   //truncate data, only available in slick 3.2.1
 44 |   val futTruncateTable = futVectorTables.flatMap{ tables => {
 45 |     val tableNames = tables.map(t => t.name.name)
 46 |     if (tableNames.contains(NORMAQMQuery.baseTableRow.tableName))
 47 |       db.run(NORMAQMQuery.schema.truncate)
 48 |     else Future(():Unit)
 49 |   }
 50 |   }.andThen {
 51 |     case Success(_) => println(s"Table ${NORMAQMQuery.baseTableRow.tableName} truncated successfully!")
 52 |     case Failure(e) => println(s"Failed to truncate Table ${NORMAQMQuery.baseTableRow.tableName}! Error: ${e.getMessage}")
 53 |   }
 54 |   Await.ready(futDropTable,Duration.Inf)
 55 | 
 56 |   //a conceived task for the purpose of resource consumption
 57 |   //getting id with corresponding name from STATES table
 58 |   def getStateID(state: String): Int = {
 59 |     //create a stream for state id with state name
 60 |     implicit def toState(row:  StateTable#TableElementType) = StateModel(row.id,row.name)
 61 |     val stateLoader = FDAViewLoader(slick.jdbc.H2Profile)(toState _)
 62 |     val stateSeq = stateLoader.fda_typedRows(StateQuery.result)(db).toSeq
 63 |     //constructed a Stream[Task,String]
 64 |     val stateStream =  fda_staticSource(stateSeq)()
 65 |     var id  = -1
 66 |     def getid: FDAUserTask[FDAROW] = row => {
 67 |       row match {
 68 |         case StateModel(stid,stname) =>   //target row type
 69 |           if (stname.contains(state)) {
 70 |             id = stid
 71 |             fda_break      //exit
 72 |           }
 73 |           else fda_skip   //take next row
 74 |         case _ => fda_skip
 75 |       }
 76 |     }
 77 |     stateStream.appendTask(getid).startRun
 78 |     id
 79 |   }
 80 |   //another conceived task for the purpose of resource consumption
 81 |   //getting id with corresponding names from COUNTIES table
 82 |   def getCountyID(state: String, county: String): Int = {
 83 |     //create a stream for county id with state name and county name
 84 |     implicit def toCounty(row:  CountyTable#TableElementType) = CountyModel(row.id,row.name)
 85 |     val countyLoader = FDAViewLoader(slick.jdbc.H2Profile)(toCounty _)
 86 |     val countySeq = countyLoader.fda_typedRows(CountyQuery.result)(db).toSeq
 87 |     //constructed a Stream[Task,String]
 88 |     val countyStream =  fda_staticSource(countySeq)()
 89 |     var id  = -1
 90 |     def getid: FDAUserTask[FDAROW] = row => {
 91 |       row match {
 92 |         case CountyModel(cid,cname) =>   //target row type
 93 |           if (cname.contains(state) && cname.contains(county)) {
 94 |             id = cid
 95 |             fda_break      //exit
 96 |           }
 97 |           else fda_skip   //take next row
 98 |         case _ => fda_skip
 99 |       }
100 |     }
101 |     countyStream.appendTask(getid).startRun
102 |     id
103 |   }
104 | 
105 |   //original table listing
106 |   implicit def toAQMRPT(row: AQMRPTTable#TableElementType) =
107 |     AQMRPTModel(row.rid,row.mid,row.state,row.county,row.year,row.value,row.total,row.valid)
108 |   val AQMRPTLoader = FDAStreamLoader(slick.jdbc.H2Profile)(toAQMRPT _)
109 |   val AQMRPTStream = AQMRPTLoader.fda_typedStream(AQMRPTQuery.result)(db)(256,256)()()
110 | 
111 |   def getIdsThenInsertAction: FDAUserTask[FDAROW] = row => {
112 |     row match {
113 |       case aqm: AQMRPTModel =>
114 |         if (aqm.valid) {
115 |           val stateId = 0 //getStateID(aqm.state)
116 |           val countyId = 0 //getCountyID(aqm.state,aqm.county)
117 |           val action = NORMAQMQuery += NORMAQMModel(0,aqm.mid, stateId, countyId, aqm.year,aqm.value,aqm.total)
118 |           fda_next(FDAActionRow(action))
119 |         }
120 |         else fda_skip
121 |       case _ => fda_skip
122 |     }
123 |   }
124 |   val runner = FDAActionRunner(slick.jdbc.H2Profile)
125 |   def runInsertAction: FDAUserTask[FDAROW] = row =>
126 |     row match {
127 |       case FDAActionRow(action) =>
128 |         runner.fda_execAction(action)(db)
129 |         fda_skip
130 |       case _ => fda_skip
131 |     }
132 | 
133 |   val cnt_start = System.currentTimeMillis()
134 | 
135 | 
136 |   /*
137 |   AQMRPTStream.take(100000)
138 |     .appendTask(getIdsThenInsertAction)
139 |     .appendTask(runInsertAction)
140 |     .startRun
141 |   //println(s"processing 10000 rows in a single thread in ${(System.currentTimeMillis - cnt_start)/1000} seconds")
142 |   //processing 10000 rows in a single thread in 570 seconds
143 |   //println(s"processing 20000 rows in a single thread in ${(System.currentTimeMillis - cnt_start)/1000} seconds")
144 |   //processing 20000 rows in a single thread in 1090 seconds
145 |   //println(s"processing 100000 rows in a single thread in ${(System.currentTimeMillis - cnt_start)/1000} seconds")
146 |   //processing 100000 rows in a single thread in 2+ hrs
147 |  */
148 | 
149 |   implicit val strategy = Strategy.fromCachedDaemonPool("cachedPool")
150 |   //      implicit val strategy = Strategy.fromFixedDaemonPool(6)
151 |   fda_runPar(AQMRPTStream.toPar(getIdsThenInsertAction))(4)
152 |     .appendTask(runInsertAction)
153 |     .startRun
154 | 
155 |   //println(s"processing 10000 rows parallelly  in ${(System.currentTimeMillis - cnt_start)/1000} seconds")
156 |   // processing 10000 rows parallelly  in 316 seconds
157 |   //println(s"processing 20000 rows parallelly  in ${(System.currentTimeMillis - cnt_start)/1000} seconds")
158 |   //processing 20000 rows parallelly  in 614 seconds
159 |   println(s"processing 100000 rows parallelly  in ${(System.currentTimeMillis - cnt_start)/1000} seconds")
160 |   //processing 100000 rows parallelly  in 3885 seconds
161 | 
162 | }
163 | 


--------------------------------------------------------------------------------
/src/main/scala/examples/ParallelExecution.scala:
--------------------------------------------------------------------------------
  1 | package examples
  2 | import slick.jdbc.meta._
  3 | import com.bayakala.funda._
  4 | import api._
  5 | import scala.language.implicitConversions
  6 | import scala.concurrent.ExecutionContext.Implicits.global
  7 | import scala.concurrent.duration._
  8 | import scala.concurrent.{Await, Future}
  9 | import scala.util.{Failure, Success}
 10 | import slick.jdbc.H2Profile.api._
 11 | import com.bayakala.funda.samples.SlickModels._
 12 | 
 13 | 
 14 | object ParallelExecution extends App {
 15 | 
 16 |   val db = Database.forConfig("h2db")
 17 | 
 18 |   //drop original table schema
 19 |   val futVectorTables = db.run(MTable.getTables)
 20 | 
 21 |   val futDropTable = futVectorTables.flatMap{ tables => {
 22 |     val tableNames = tables.map(t => t.name.name)
 23 |     if (tableNames.contains(NORMAQMQuery.baseTableRow.tableName))
 24 |       db.run(NORMAQMQuery.schema.drop)
 25 |     else Future(():Unit)
 26 |   }
 27 |   }.andThen {
 28 |     case Success(_) => println(s"Table ${NORMAQMQuery.baseTableRow.tableName} dropped successfully! ")
 29 |     case Failure(e) => println(s"Failed to drop Table ${NORMAQMQuery.baseTableRow.tableName}, it may not exist! Error: ${e.getMessage}")
 30 |   }
 31 |   Await.ready(futDropTable,Duration.Inf)
 32 | 
 33 |   //create new table to refine AQMRawTable
 34 |   val actionCreateTable = NORMAQMQuery.schema.create
 35 |   val futCreateTable = db.run(actionCreateTable).andThen {
 36 |     case Success(_) => println("Table created successfully!")
 37 |     case Failure(e) => println(s"Table may exist already! Error: ${e.getMessage}")
 38 |   }
 39 |   //would carry on even fail to create table
 40 |   Await.ready(futCreateTable,Duration.Inf)
 41 | 
 42 | 
 43 |   //truncate data, only available in slick 3.2.1
 44 |   val futTruncateTable = futVectorTables.flatMap{ tables => {
 45 |     val tableNames = tables.map(t => t.name.name)
 46 |     if (tableNames.contains(NORMAQMQuery.baseTableRow.tableName))
 47 |       db.run(NORMAQMQuery.schema.truncate)
 48 |     else Future(():Unit)
 49 |   }
 50 |   }.andThen {
 51 |     case Success(_) => println(s"Table ${NORMAQMQuery.baseTableRow.tableName} truncated successfully!")
 52 |     case Failure(e) => println(s"Failed to truncate Table ${NORMAQMQuery.baseTableRow.tableName}! Error: ${e.getMessage}")
 53 |   }
 54 |   Await.ready(futDropTable,Duration.Inf)
 55 | 
 56 |   //a conceived task for the purpose of resource consumption
 57 |   //getting id with corresponding name from STATES table
 58 |   def getStateID(state: String): Int = {
 59 |     //create a stream for state id with state name
 60 |     implicit def toState(row:  StateTable#TableElementType) = StateModel(row.id,row.name)
 61 |     val stateLoader = FDAViewLoader(slick.jdbc.H2Profile)(toState _)
 62 |     val stateSeq = stateLoader.fda_typedRows(StateQuery.result)(db).toSeq
 63 |     //constructed a Stream[Task,String]
 64 |     val stateStream =  fda_staticSource(stateSeq)()
 65 |     var id  = -1
 66 |     def getid: FDAUserTask[FDAROW] = row => {
 67 |       row match {
 68 |         case StateModel(stid,stname) =>   //target row type
 69 |           if (stname.contains(state)) {
 70 |             id = stid
 71 |             fda_break      //exit
 72 |           }
 73 |           else fda_skip   //take next row
 74 |         case _ => fda_skip
 75 |       }
 76 |     }
 77 |     stateStream.appendTask(getid).startRun
 78 |     id
 79 |   }
 80 |   //another conceived task for the purpose of resource consumption
 81 |   //getting id with corresponding names from COUNTIES table
 82 |   def getCountyID(state: String, county: String): Int = {
 83 |     //create a stream for county id with state name and county name
 84 |     implicit def toCounty(row:  CountyTable#TableElementType) = CountyModel(row.id,row.name)
 85 |     val countyLoader = FDAViewLoader(slick.jdbc.H2Profile)(toCounty _)
 86 |     val countySeq = countyLoader.fda_typedRows(CountyQuery.result)(db).toSeq
 87 |     //constructed a Stream[Task,String]
 88 |     val countyStream =  fda_staticSource(countySeq)()
 89 |     var id  = -1
 90 |     def getid: FDAUserTask[FDAROW] = row => {
 91 |       row match {
 92 |         case CountyModel(cid,cname) =>   //target row type
 93 |           if (cname.contains(state) && cname.contains(county)) {
 94 |             id = cid
 95 |             fda_break      //exit
 96 |           }
 97 |           else fda_skip   //take next row
 98 |         case _ => fda_skip
 99 |       }
100 |     }
101 |     countyStream.appendTask(getid).startRun
102 |     id
103 |   }
104 | 
105 |   //process input row and produce action row to insert into NORMAQM
106 |   def getIdsThenInsertAction: FDAUserTask[FDAROW] = row => {
107 |     row match {
108 |       case aqm: AQMRPTModel =>
109 |         if (aqm.valid) {
110 |           val stateId = getStateID(aqm.state)
111 |           val countyId = getCountyID(aqm.state,aqm.county)
112 |           val action = NORMAQMQuery += NORMAQMModel(0,aqm.mid, stateId, countyId, aqm.year,aqm.value,aqm.total)
113 |           fda_next(FDAActionRow(action))
114 |         }
115 |         else fda_skip
116 |       case _ => fda_skip
117 |     }
118 |   }
119 |   //runner for the action rows
120 |   val runner = FDAActionRunner(slick.jdbc.H2Profile)
121 |   def runInsertAction: FDAUserTask[FDAROW] = row =>
122 |     row match {
123 |       case FDAActionRow(action) =>
124 |         runner.fda_execAction(action)(db)
125 |         fda_skip
126 |       case _ => fda_skip
127 |     }
128 | 
129 |   //create parallel sources
130 |   //get a stream of years
131 |   val qryYears = AQMRPTQuery.map(_.year).distinct
132 |   case class Years(year: Int) extends FDAROW
133 | 
134 |   implicit def toYears(y: Int) = Years(y)
135 | 
136 |   val yearViewLoader = FDAViewLoader(slick.jdbc.H2Profile)(toYears _)
137 |   val yearSeq = yearViewLoader.fda_typedRows(qryYears.result)(db).toSeq
138 |   val yearStream = fda_staticSource(yearSeq)()
139 | 
140 |   //strong row type
141 |   implicit def toAQMRPT(row: AQMRPTTable#TableElementType) =
142 |     AQMRPTModel(row.rid, row.mid, row.state, row.county, row.year, row.value, row.total, row.valid)
143 | 
144 |   //shared stream loader when operate in parallel mode
145 |   val AQMRPTLoader = FDAStreamLoader(slick.jdbc.H2Profile)(toAQMRPT _)
146 | 
147 |   //loading rows with year yr
148 |   def loadRowsInYear(yr: Int) = {
149 |     //a new query
150 |     val query = AQMRPTQuery.filter(row => row.year === yr)
151 |     //reuse same loader
152 |     AQMRPTLoader.fda_typedStream(query.result)(db)(256, 256)(println(s"End of stream ${yr}!!!!!!"))()
153 |   }
154 | 
155 |   //loading rows by year
156 |   def loadRowsByYear: FDASourceLoader = row => {
157 |     row match {
158 |       case Years(y) => loadRowsInYear(y) //produce stream of the year
159 |       case _ => fda_appendRow(FDANullRow)
160 |     }
161 |   }
162 | 
163 | 
164 |   //start counter
165 |   val cnt_start = System.currentTimeMillis()
166 | 
167 |   def showRecord: FDAUserTask[FDAROW] = row => {
168 |     row match {
169 |       case Years(y) => println(y); fda_skip
170 |       case aqm: AQMRPTModel =>
171 |         println(s"${aqm.year}  $aqm")
172 |         fda_next(aqm)
173 |       case FDAActionRow(action) =>
174 |         println(s"${action}")
175 |         fda_skip
176 |       case _ => fda_skip
177 |     }
178 |   }
179 | 
180 |   //the following is a process of composition of stream combinators
181 |   //get parallel source constructor
182 |   val parSource = yearStream.toParSource(loadRowsByYear)
183 |   //produce a stream from parallel sources
184 |   val source = fda_par_source(parSource)(4)
185 |   //turn getIdsThenInsertAction into parallel task
186 |   val parTasks = source.toPar(getIdsThenInsertAction)
187 |   //runPar to produce a new stream
188 |   val actionStream =fda_runPar(parTasks)(4)
189 |   //turn runInsertAction into parallel task
190 |   val parRun = actionStream.toPar(runInsertAction)
191 |   //runPar and carry out by startRun
192 |   fda_runPar(parRun)(2).startRun
193 | 
194 |   println(s"processing 219400 rows parallelly  in ${(System.currentTimeMillis - cnt_start)/1000} seconds")
195 | 
196 | 
197 | 
198 | }
199 | 


--------------------------------------------------------------------------------
/license.txt:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "{}"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright {yyyy} {name of copyright owner}
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/src/main/scala/com/bayakala/funda/package.scala:
--------------------------------------------------------------------------------
  1 | package com.bayakala
  2 | 
  3 | /**
  4 |   * FunDA core types, global imports and fs2 stream method injection
  5 |   */
  6 | package object funda {
  7 |   import fs2._
  8 |   import slick.dbio._
  9 |   import scala.concurrent.Future
 10 |   import akka.stream.stage._
 11 | 
 12 |   implicit val fda_strategy = Strategy.fromFixedDaemonPool(4)
 13 |   implicit val fda_scheduler = Scheduler.fromFixedDaemonPool(4)
 14 | 
 15 |   /** fs2 manned halt type
 16 |     * terminateNow indicates to FDADataStream.pushData intent to stop
 17 |     * anytime now during enqueue process
 18 |     */
 19 |   class Fs2Terminator {
 20 |     var terminateNow = false
 21 |     def reset = terminateNow = false
 22 |     def stopASAP = terminateNow = true
 23 |   }
 24 |   /** default killswitch for fs2
 25 |     * declare separate instances for multiple concurrent running stream control
 26 |     * by providing explicitly killSwitch parameter
 27 |     */
 28 |   implicit object Fs2KillSwitch extends Fs2Terminator
 29 |   /** akka manned halt type
 30 |     * terminateNow indicates to FDADataStream.Fs2Gate intent to stop
 31 |     * anytime now during enqueue process
 32 |     */
 33 |   class AkkaTerminator{
 34 |     var callback: AsyncCallback[Unit] = null
 35 |     def stopASAP = {
 36 |       if (callback != null) {
 37 |         callback.invoke(())
 38 |       }
 39 |     }
 40 |   }
 41 |   /** default killswitch for akka
 42 |     * declare separate instances for multiple concurrent running stream control
 43 |     * by providing explicitly killSwitch parameter
 44 |     */
 45 |   implicit object AkkaKillSwitch extends AkkaTerminator
 46 | 
 47 |   /** 数据处理管道
 48 |     * a stream of data or action rows
 49 |     * @tparam ROW   type of row
 50 |     */
 51 |   type FDAPipeLine[ROW] = Stream[Task, ROW]
 52 | 
 53 |   /** 数据作业节点
 54 |     * a work node appended to stream to perform user action
 55 |     * @tparam ROW   type of row
 56 |     */
 57 |   type FDAWorkNode[ROW] = Pipe[Task, ROW, ROW]
 58 | 
 59 |   /** 数据管道开关阀门，从此处获得管道内数据
 60 |     * a handle to get rows from upstream
 61 |     * @tparam ROW  type of row
 62 |     */
 63 |   type FDAValve[ROW] = Handle[Task, ROW]
 64 | 
 65 |   /** 管道连接器
 66 |     * gate to send rows downstream
 67 |     * @tparam ROW  type of row
 68 |     */
 69 |   type FDAPipeJoint[ROW] = Pull[Task, ROW, Unit]
 70 | 
 71 |   /** 作业类型
 72 |     * user define function to be performed at a FDAWorkNode
 73 |     * given a row from upstream, return Option[List[ROW]] as follows:
 74 |     *    fda_skip  -> Some(Nil)           : skip sending the current row
 75 |     *    fda_next  -> Some(List(r1,r2...)): send r1,r2... downstream
 76 |     *    fda_break -> None                : halt stream, end of process
 77 |     * @tparam ROW   type of row
 78 |     */
 79 |   type FDAUserTask[ROW] = (ROW) => (Option[List[ROW]])
 80 | 
 81 |   /** 数据源构建器类型
 82 |     * a function type to produce a stream from input row
 83 |     * used to be turn into FDAParSource by toParSource
 84 |     */
 85 |   type FDASourceLoader = FDAROW => FDAPipeLine[FDAROW]
 86 | 
 87 |   /** 合计作业类型
 88 |     * user define function with aggregation effect to be performed at a FDAWorkNode
 89 |     * given current aggregation value and row from upstream,
 90 |     * return updated aggregation value and Option[List[ROW]] as follows:
 91 |     *    fda_skip  -> Some(Nil)           : skip sending the current row
 92 |     *    fda_next  -> Some(List(r1,r2...)): send r1,r2... downstream
 93 |     *    fda_break -> None                : halt stream, end of process
 94 |     * @tparam AGGR  type of aggregation
 95 |     * @tparam ROW   type of row
 96 |     */
 97 |   type FDAAggrTask[AGGR,ROW] = (AGGR,ROW) => (AGGR,Option[List[ROW]])
 98 | 
 99 |   /** 并行作业类型
100 |     * stream of streams type for parallel running user action
101 |     * use stream.toPar to convert from FDAUserTask
102 |     */
103 |   type FDAParTask = Stream[Task,Stream[Task,Option[List[FDAROW]]]]
104 | 
105 |   /** 并行数据源类型
106 |     * source of sources type for parallel loading data sources
107 |     * use stream.toParSource to convert from FDASourceLoader
108 |     */
109 |   type FDAParSource = Stream[Task,Stream[Task,FDAROW]]
110 | 
111 |   /** 数据行类型
112 |     * topmost generic row type
113 |     */
114 |   trait FDAROW
115 | 
116 |   /**
117 |     * a EOS object used to signify end of stream
118 |     */
119 |   case object FDANullRow extends FDAROW
120 | 
121 |   /**
122 |     * capture exception in a row
123 |     * @param e
124 |     */
125 |   case class FDAErrorRow(e: Exception) extends FDAROW
126 | 
127 |   /**
128 |     * manually emit a row such as FDANullRow or FDAErrorRow
129 |     * @example {{{
130 |     *    //loading rows by year
131 |     *    def loadRowsByYear: FDASourceLoader = row => {
132 |     *      row match {
133 |     *       case Years(y) => loadRowsInYear(y) //produce stream of the year
134 |     *       case _ => fda_appendRow(FDANullRow)
135 |     *      }
136 |     *     }
137 |     *   }}}
138 |     * @param row   row to emit
139 |     * @return      new stream
140 |     */
141 |   def fda_appendRow(row: FDAROW): FDAPipeLine[FDAROW] = Stream(row)
142 | 
143 |   /**
144 |     * runnable action type
145 |     */
146 |   type FDAAction = DBIO[Int]
147 | 
148 |   /**
149 |     * action row type. can have futher distinct child type as fullows:
150 |     * @example {{{
151 |     * scala> class MyActionRow(action: FDAAction) extends FDAActionRow(action)
152 |     * }}}
153 |     * @param action   runnable action
154 |     */
155 |   case class FDAActionRow(action: FDAAction) extends FDAROW
156 | 
157 |   /**
158 |     * methods injected to fs2Stream
159 |     */
160 |   implicit class toFDAOps(fs2Stream: FDAPipeLine[FDAROW]) {
161 |     /**
162 |       * append a user task t to stream
163 |       * @example {{{
164 |       *    val streamAllTasks =  streamAQMRaw.appendTask(filterRows)
165 |       *                          .appendTask(toAction)
166 |       *                          .appendTask(runActionRow)
167 |       * }}}
168 |       * @param t user defined function
169 |       * @return new stream
170 |       */
171 |     def appendTask(t: FDAUserTask[FDAROW]): FDAPipeLine[FDAROW] =
172 |       fs2Stream.through(FDATask.fda_execUserTask(t))
173 | 
174 |     /**
175 |       * append a user defined aggregation task t
176 |       * @example {{{
177 |       *   //user defined aggregator type.
178 |       *   case class Accu(state: String, county: String, year: Int, count: Int, sumOfValue: Int)
179 |       *
180 |       *   aqmrStream.aggregateTask(Accu("","",0,0,0),aggregateValue)
181 |       *             .appendTask(toAction)
182 |       *             .appendTask(runActionRow)
183 |       *             .startRun
184 |       * }}}
185 |       * @param aggr initial value of aggregation
186 |       * @param t    user defined task
187 |       * @tparam AGGR type of aggr
188 |       * @return new stream
189 |       */
190 |     def aggregateTask[AGGR](aggr: AGGR, t: FDAAggrTask[AGGR, FDAROW]): FDAPipeLine[FDAROW] =
191 |       fs2Stream.through(FDATask.fda_aggregate(aggr, t))
192 | 
193 |     /**
194 |       * replace stream[Task,ROW].run.unsafeRun
195 |       * @example {{{
196 |       *              streamAQMRaw.appendTask(filterRows)
197 |       *                          .appendTask(toAction)
198 |       *                          .appendTask(runActionRow)
199 |       *                          .startRun
200 |       * }}}
201 |       */
202 |     def startRun: Unit = fs2Stream.run.unsafeRun
203 | 
204 |     /**
205 |       * replace stream[Task,ROW].run.unsafeRunAsyncFuture
206 |       * returns immediately
207 |       *
208 |       * @return Future
209 |       */
210 |     def startFuture[A]: Future[Unit] = fs2Stream.run.unsafeRunAsyncFuture
211 | 
212 | 
213 |     /**
214 |       * turn user task into type for parallel computation
215 |       * @example {{{
216 |       *         //runner for the action rows
217 |       *    val runner = FDAActionRunner(slick.jdbc.H2Profile)
218 |       *    def runInsertAction: FDAUserTask[FDAROW] = row =>
219 |       *       row match {
220 |       *         case FDAActionRow(action) =>
221 |       *            runner.fda_execAction(action)(db)
222 |       *            fda_skip
223 |       *         case _ => fda_skip
224 |       *       }
225 |       *
226 |       *    //turn runInsertAction into parallel task
227 |       *    val parRun = actionStream.toPar(runInsertAction)
228 |       * }}}
229 |       * @param st user defined task
230 |       * @return stream of streams
231 |       */
232 |     def toPar(st: FDAUserTask[FDAROW]): FDAParTask =
233 |       fs2Stream.map { row =>
234 |         Stream.eval(Task {
235 |           st(row)
236 |         })
237 |       }
238 | 
239 |     /**
240 |       * turn a single stream into parallel sources
241 |       * @example {{{
242 |       *    //loading rows with year yr
243 |       *    def loadRowsInYear(yr: Int) = {
244 |       *        //a new query
245 |       *        val query = AQMRPTQuery.filter(row => row.year === yr)
246 |       *        //reuse same loader
247 |       *        AQMRPTLoader.fda_typedStream(query.result)(db)(256, 256)(println(s"End of stream ${yr}!!!!!!"))
248 |       *    }
249 |       *
250 |       *    //loading rows by year
251 |       *    def loadRowsByYear: FDASourceLoader = row => {
252 |       *      row match {
253 |       *       case Years(y) => loadRowsInYear(y) //produce stream of the year
254 |       *       case _ => fda_appendRow(FDANullRow)
255 |       *      }
256 |       *    }
257 |       *
258 |       *    //produce a stream from parallel sources
259 |       *    val source = fda_par_source(parSource)(4)
260 |       * }}}
261 |       * @param load stream constructing function: FDAROW => FDAPipeLine[FDAROW]
262 |       * @return stream of streams
263 |       */
264 |     def toParSource(load: FDASourceLoader): FDAParSource =
265 |       fs2Stream.map(row => load(row))
266 | 
267 |   }
268 | 
269 | 
270 |   /** methods to run an user defined function on FDAPipeLine*/
271 |   object FDATask { //作业节点工作方法
272 |     /**
273 |       * returns state of next worknode. using fs2 Handle of Pull object,
274 |       * take the next element and apply function task and determine new state of stream
275 |       * @param task   user defined function: ROW => Option[List[ROW]]
276 |       *               returns an Option[List[ROW]]] value signifying movement downstream
277 |       *               as follows:
278 |       *                  Some(Nil)           : skip sending the current row
279 |       *                  Some(List(r1,r2...)): send r1,r2... downstream
280 |       *                  None                : halt stream, end of process
281 |       * @tparam ROW   row type: FDAROW or FDAActionROW
282 |       * @return       new state of stream
283 |       */
284 |      private[funda] def fda_execUserTask[ROW](task: FDAUserTask[ROW]): FDAWorkNode[ROW] = {
285 |       def go: FDAValve[ROW] => FDAPipeJoint[ROW] = h => {
286 |         h.receive1Option {
287 |           case Some((r, h)) => task(r) match {
288 |             case Some(lx) => lx match {
289 |               case Nil => go(h)
290 |               case _ => Pull.output(Chunk.seq(lx)) >> go(h)
291 |             }
292 |             case None => task(FDANullRow.asInstanceOf[ROW]) match {
293 |               case Some(lx) => lx match {
294 |                 case Nil => Pull.done
295 |                 case _ => Pull.output(Chunk.seq(lx)) >> Pull.done
296 |               }
297 |               case _ => Pull.done
298 |             }
299 |           }
300 |           case None => task(FDANullRow.asInstanceOf[ROW]) match {
301 |             case Some(lx) => lx match {
302 |               case Nil => Pull.done
303 |               case _ => Pull.output(Chunk.seq(lx)) >> Pull.done
304 |             }
305 |             case _ => Pull.done
306 |           }
307 |         }
308 |       }
309 |       in => in.pull(go)
310 |     }
311 |     /**
312 |       * returns state of next worknode and some aggregation defined inside user function.
313 |       * execute user defined function with internal aggregation mechanism by means of
314 |       * functional state transition style of passing in state and return new state.
315 |       * take in current aggregation and next row, apply user function on both
316 |       * and determine new state of stream
317 |       * @param aggr    user selected type of aggregation such as Int, (Int,Int) ...
318 |       * @param task    user defined function: (AGGR,ROW) => (AGGR,Option[List[ROW]])
319 |       *                take in current aggregation and row,
320 |       *                and return new aggregation and Option[List[ROW]] with meaning of:
321 |       *                  Some(Nil)           : skip sending the current row
322 |       *                  Some(List(r1,r2...)): send r1,r2... downstream
323 |       *                  None                : halt stream, end of process
324 |       * @tparam AGGR   type of aggr
325 |       * @tparam ROW    type of row
326 |       * @return        new state of stream
327 |       */
328 |     private[funda] def fda_aggregate[AGGR,ROW](aggr: AGGR, task: FDAAggrTask[AGGR,ROW]): FDAWorkNode[ROW] = {
329 |       def go(acc: AGGR): FDAValve[ROW] => FDAPipeJoint[ROW] = h => {
330 |         h.receive1Option {
331 |           case Some((r, h)) => task(acc,r) match {
332 |             case (a,Some(lx)) => lx match {
333 |               case Nil => go(a)(h)
334 |               case _ => Pull.output(Chunk.seq(lx)) >> go(a)(h)
335 |             }
336 |             case (a,None) => task(a,FDANullRow.asInstanceOf[ROW]) match {
337 |               case (a,Some(lx)) => lx match {
338 |                 case Nil => Pull.done
339 |                 case _ => Pull.output(Chunk.seq(lx)) >> Pull.done
340 |               }
341 |               case _ => Pull.done
342 |             }
343 |           }
344 |           case None => task(acc,FDANullRow.asInstanceOf[ROW]) match {
345 |             case (a,Some(lx)) => lx match {
346 |               case Nil => Pull.done
347 |               case _ => Pull.output(Chunk.seq(lx)) >> Pull.done
348 |             }
349 |             case _ => Pull.done
350 |           }
351 |         }
352 |       }
353 |       in => in.pull(go(aggr))
354 |     }
355 | 
356 | 
357 |   }
358 | 
359 | 
360 | }
361 | 
362 | 


--------------------------------------------------------------------------------
/src/main/scala/com/bayakala/funda/fdasources/FDADataStream.scala:
--------------------------------------------------------------------------------
  1 | package com.bayakala.funda.fdasources
  2 | 
  3 | import fs2._
  4 | import play.api.libs.iteratee._
  5 | import com.bayakala.funda._
  6 | import slick.jdbc.JdbcProfile
  7 | 
  8 | import akka.actor._
  9 | import akka.stream.scaladsl._
 10 | import akka.stream._
 11 | import akka.stream.stage._
 12 | import akka.stream.stage.{GraphStage, GraphStageLogic}
 13 | 
 14 | /** stream loader class wrapper */
 15 | trait FDADataStream {
 16 | 
 17 |   /** running Slick DBIOAction to produce a data stream conforming to reactive-streams api.
 18 |     * provide strong typed result conversion if required
 19 |     * @param slickProfile  Slick jdbc profile such as 'slick.jdbc.H2Profile'
 20 |     * @param convert       a defined implicit type conversion function.
 21 |     *                      from SOURCE type to TARGET type, set to null if not required
 22 |     * @tparam SOURCE       source type, result type of DBIOAction, most likely a tuple type
 23 |     * @tparam TARGET       final converted type, most likely a case class type
 24 |     */
 25 |   class FDAStreamLoader[SOURCE, TARGET](slickProfile: JdbcProfile, convert: SOURCE => TARGET) {
 26 | 
 27 |     import slickProfile.api._
 28 | 
 29 |     /**
 30 |       * returns a reactive-stream from Slick DBIOAction result
 31 |       * using play-iteratees and fs2 queque to connect to slick data stream publisher
 32 |       * provide facade for error handler and finalizer to support exception and cleanup handling
 33 |       * also provide stream element conversion from SOURCE type to TARGET type
 34 |       * @example {{{
 35 |       *    val streamLoader = FDAStreamLoader(slick.jdbc.H2Profile)(toTypedRow _)
 36 |       *    val streamSource = streamLoader.fda_typedStream(aqmQuery.result)(db)(512,16,100)()()
 37 |       *    val safeStreamSource = streamLoader.fda_typedStream(aqmQuery.result)(db)(512,16)(
 38 |       *        println("the end finally!"))(killSwitch)
 39 |       * }}}
 40 |       * @param action       a Slick DBIOAction to produce query results
 41 |       * @param slickDB      Slick database object
 42 |       * @param fetchSize    number of rows cached during database read
 43 |       * @param queSize      size of queque used by iteratee as cache to pass elements to fs2 stream
 44 |       * @param take         take first 'take' elements
 45 |       * @param finalizer    cleanup callback
 46 |       * @param killSwitch   use killSwitch.stopASAP to halt stream
 47 |       * @param convert      just a measure to guarantee conversion function is defined
 48 |       *                     when this function is used there has to be a converter defined
 49 |       *                     implicitly in compile time
 50 |       * @return             a reactive-stream of TARGET row type elements
 51 |       */
 52 |     def fda_typedStream(action: DBIOAction[Iterable[SOURCE],Streaming[SOURCE],Effect.Read])(
 53 |       slickDB: Database)(
 54 |       fetchSize: Int, queSize: Int, take: Int = 0)(
 55 |       finalizer: => Unit = ())(
 56 |         killSwitch: Fs2Terminator = Fs2KillSwitch)(
 57 |         implicit convert: SOURCE => TARGET)
 58 |       : FDAPipeLine[TARGET] = {
 59 |       val disableAutocommit = SimpleDBIO(_.connection.setAutoCommit(false))
 60 |       val action_ = action.withStatementParameters(fetchSize = fetchSize)
 61 |       val publisher = slickDB.stream(disableAutocommit andThen action_)
 62 |       val enumerator = streams.IterateeStreams.publisherToEnumerator(publisher)
 63 | 
 64 |       val s = Stream.eval(async.boundedQueue[Task,Option[SOURCE]](queSize)).flatMap { q =>
 65 |         Task { Iteratee.flatten(enumerator |>> pushData(killSwitch,take,q)).run }.unsafeRunAsyncFuture()
 66 |         pipe.unNoneTerminate(q.dequeue).map {row => convert(row)}
 67 |       }
 68 |       s.onFinalize(Task.delay(finalizer))
 69 | 
 70 |     }
 71 |     /**
 72 |       * returns a reactive-stream from Slick DBIOAction result
 73 |       * using akka-stream to connect to slick data stream publisher
 74 |       * provide facade for error handler and finalizer to support exception and cleanup handling
 75 |       * @example {{{
 76 |       *    val streamLoader = FDAStreamLoader(slick.jdbc.H2Profile)(toTypedRow _)
 77 |       *    val streamSource = streamLoader.fda_akkaTypedStream(aqmQuery.result)(db)(512,2,100)()()
 78 |       *    val safeStreamSource = streamLoader.fda_akkaTypedStream(aqmQuery.result)(db)(512,2)(
 79 |       *        println("the end finally!"))(killSwitch)
 80 |       * }}}
 81 |       * @param action       a Slick DBIOAction to produce query results
 82 |       * @param slickDB      Slick database object
 83 |       * @param fetchSize    number of rows cached during database read
 84 |       * @param queSize      size of queque used akka-stream as cache to pass elements to fs2 queue
 85 |       * @param take         take first 'take' elements
 86 |       * @param finalizer    cleanup callback
 87 |       * @param killSwitch   use killSwitch.stopASAP to halt stream
 88 |       * @param convert      just a measure to guarantee conversion function is defined
 89 |       *                     when this function is used there has to be a converter defined
 90 |       *                     implicitly in compile time
 91 |       * @return             a reactive-stream of SOURCE row type elements
 92 |       */
 93 |     def fda_akkaTypedStream(action: DBIOAction[Iterable[SOURCE],Streaming[SOURCE],Effect.Read])(
 94 |       slickDB: Database)(
 95 |                              fetchSize: Int, queSize: Int, take: Int = 0)(
 96 |                              finalizer: => Unit = ())(
 97 |                              killSwitch: AkkaTerminator = AkkaKillSwitch)(
 98 |                              implicit convert: SOURCE => TARGET)
 99 |       : FDAPipeLine[TARGET] = {
100 |       val disableAutocommit = SimpleDBIO(_.connection.setAutoCommit(false))
101 |       val action_ = action.withStatementParameters(fetchSize = fetchSize)
102 |       val publisher = slickDB.stream(disableAutocommit andThen action_)
103 |       implicit val actorSys = ActorSystem("actor-system")
104 |       implicit val ec = actorSys.dispatcher
105 |       implicit val mat = ActorMaterializer()
106 |       // construct akka source
107 |       val akkaSource = Source.fromPublisher[SOURCE](publisher)
108 | 
109 |       val s = Stream.eval(async.boundedQueue[Task,Option[SOURCE]](queSize))
110 |         .flatMap { q =>
111 |           Task(akkaSource.to(new FS2Gate[SOURCE](killSwitch, take, q)).run).unsafeRunAsyncFuture  //enqueue Task(new thread)
112 |           pipe.unNoneTerminate(q.dequeue).map {row => convert(row)}      //dequeue in current thread
113 |         }
114 |       s.onFinalize{Task.delay{actorSys.terminate();finalizer}}
115 |     }
116 |     /**
117 |       * returns a reactive-stream from Slick DBIOAction result
118 |       * using play-iteratees and fs2 queque to connect to slick data stream publisher
119 |       * provide facade for error handler and finalizer to support exception and cleanup handling
120 |       * @example {{{
121 |       *    val streamLoader = FDAStreamLoader(slick.jdbc.H2Profile)()
122 |       *    val streamSource = streamLoader.fda_plainStream(aqmQuery.result)(db)(512,16, 100)()()
123 |       *    val safeStreamSource = streamLoader.fda_plainStream(aqmQuery.result)(db)(512,16)(
124 |       *        println("the end finally!"))(killSwitch)
125 |       * }}}
126 |       * @param action       a Slick DBIOAction to produce query results
127 |       * @param slickDB      Slick database object
128 |       * @param fetchSize    number of rows cached during database read
129 |       * @param queSize      size of queque used by iteratee as cache to pass elements to fs2 stream
130 |       * @param take         take first 'take' elements
131 |       * @param finalizer    cleanup callback
132 |       * @param killSwitch   use killSwitch.stopASAP to halt stream
133 |       * @return             a reactive-stream of SOURCE row type elements
134 |       */
135 |     def fda_plainStream(action: DBIOAction[Iterable[SOURCE],Streaming[SOURCE],Effect.Read])(
136 |         slickDB: Database)(
137 |                            fetchSize: Int, queSize: Int, take: Int = 0)(
138 |                            finalizer: => Unit = ())(
139 |         implicit killSwitch: Fs2Terminator): FDAPipeLine[SOURCE] = {
140 |       val disableAutocommit = SimpleDBIO(_.connection.setAutoCommit(false))
141 |       val action_ = action.withStatementParameters(fetchSize = fetchSize)
142 |       val publisher = slickDB.stream(disableAutocommit andThen action_)
143 |       val enumerator = streams.IterateeStreams.publisherToEnumerator(publisher)
144 | 
145 |       val s = Stream.eval(async.boundedQueue[Task,Option[SOURCE]](queSize)).flatMap { q =>
146 |         Task { Iteratee.flatten(enumerator |>> pushData(killSwitch,take,q)).run }.unsafeRunAsyncFuture()
147 |         pipe.unNoneTerminate(q.dequeue)
148 |       }
149 |       s.onFinalize(Task.delay(finalizer))
150 |     }
151 |     /**
152 |       * returns a reactive-stream from Slick DBIOAction result
153 |       * using akka-stream to connect to slick data stream publisher
154 |       * provide facade for error handler and finalizer to support exception and cleanup handling
155 |       * @example {{{
156 |       *    val streamLoader = FDAStreamLoader(slick.jdbc.H2Profile)()
157 |       *    val streamSource = streamLoader.fda_akkaPlainStream(aqmQuery.result)(db)(512,2,100)()()
158 |       *    val safeStreamSource = streamLoader.fda_plainStream(aqmQuery.result)(db)(512,2)(
159 |       *        println("the end finally!"))(killSwitch)
160 |       * }}}
161 |       * @param action       a Slick DBIOAction to produce query results
162 |       * @param slickDB      Slick database object
163 |       * @param fetchSize    number of rows cached during database read
164 |       * @param queSize      size of queque used akka-stream as cache to pass elements to fs2 queue
165 |       * @param take         take first 'take' elements
166 |       * @param finalizer    cleanup callback
167 |       * @param killSwitch   use killSwitch.stopASAP to halt stream
168 |       * @return             a reactive-stream of SOURCE row type elements
169 |       */
170 |     def fda_akkaPlainStream(action: DBIOAction[Iterable[SOURCE],Streaming[SOURCE],Effect.Read])(
171 |       slickDB: Database)(
172 |                          fetchSize: Int, queSize: Int, take: Int = 0)(
173 |                          finalizer: => Unit = ())(
174 |                          implicit killSwitch: AkkaTerminator): FDAPipeLine[SOURCE] = {
175 |       val disableAutocommit = SimpleDBIO(_.connection.setAutoCommit(false))
176 |       val action_ = action.withStatementParameters(fetchSize = fetchSize)
177 |       val publisher = slickDB.stream(disableAutocommit andThen action_)
178 |       implicit val actorSys = ActorSystem("actor-system")
179 |       implicit val ec = actorSys.dispatcher
180 |       implicit val mat = ActorMaterializer()
181 |       // construct akka source
182 |       val akkaSource = Source.fromPublisher[SOURCE](publisher)
183 | 
184 |       val s = Stream.eval(async.boundedQueue[Task,Option[SOURCE]](queSize))
185 |         .flatMap { q =>
186 |           Task(akkaSource.to(new FS2Gate[SOURCE](killSwitch, take, q)).run).unsafeRunAsyncFuture  //enqueue Task(new thread)
187 |           pipe.unNoneTerminate(q.dequeue)     //dequeue in current thread
188 |         }
189 |       s.onFinalize{Task.delay{actorSys.terminate();finalizer}}
190 |     }
191 | 
192 |     /**
193 |       * consume input from enumerator by pushing each element into q queque
194 |       * end and produce error when enqueque could not be completed in timeout
195 |       * @tparam R           stream element type
196 |       * @param killSwitch   object with killSwitch.stopASAP to halt stream
197 |       * @param take         emit the first 'take' elements
198 |       * @param q            queque for cache purpose
199 |       * @return             iteratee in new state
200 |       */
201 |     private def pushData[R](killSwitch: Fs2Terminator, take: Int, q: async.mutable.Queue[Task,Option[R]]): Iteratee[R,Unit] = Cont {
202 |        case Input.EOF =>
203 |          q.enqueue1(None).unsafeRun
204 |          Done((), Input.Empty)
205 |        case Input.Empty => pushData(killSwitch,take,q)
206 |        case Input.El(e) =>
207 |          if (take >= 0 && !killSwitch.terminateNow) {
208 |            q.enqueue1(Some(e)).unsafeRun
209 |            pushData(killSwitch, if(take == 0) 0 else {if (take == 1) -1 else take - 1}, q)
210 |          }
211 |          else {
212 |            killSwitch.reset
213 |            q.enqueue1(None).unsafeRun
214 |            Done((), Input.Empty)
215 |          }
216 |     }
217 |     /**
218 |       * an akka-stream graph stage that connects akka-stream-source to fs2 flow through
219 |       * ffs2.async.mutable.Queue structure
220 |       * acts as a de-backpressurer to adjust emit rate to pull-model stream fs2
221 |       * also takes care of manual halt and limiting first batch of element emission
222 |       * @tparam T           stream element type
223 |       * @param killSwitch   object with killSwitch.stopASAP to halt stream
224 |       * @param take         emit the first 'take' elements
225 |       * @param q            queque for cache purpose
226 |       * @return             iteratee in new state
227 |       */
228 |     private class FS2Gate[T](killSwitch: AkkaTerminator, take: Int, q: fs2.async.mutable.Queue[Task,Option[T]]) extends GraphStage[SinkShape[T]] {
229 |       val in = Inlet[T]("inport")
230 |       val shape = SinkShape.of(in)
231 | 
232 |       override def createLogic(inheritedAttributes: Attributes): GraphStageLogic =
233 |         new GraphStageLogic(shape) with InHandler {
234 |           override def preStart(): Unit = {
235 |             if (killSwitch != null) {
236 |               val callback = getAsyncCallback[Unit] { (_) =>
237 |                 killStream = true
238 |               }
239 |               killSwitch.callback = callback
240 |             }
241 |             pull(in)          //initiate stream elements movement
242 |             super.preStart()
243 |           }
244 |           var take_ = take
245 |           var killStream = false
246 |           override def onPush(): Unit = {
247 |             if (killStream) take_ = -1
248 |             q.enqueue1{
249 |               if ( take_ >= 0 )
250 |                 Some(grab(in))
251 |               else
252 |                 None
253 |             }.unsafeRun()
254 |             pull(in)
255 |             if ( take_ < 0) completeStage()
256 |             if (take_ == 1)
257 |               take_ = -1
258 |             else
259 |               if (take_ != 0) take_ -= 1
260 |           }
261 | 
262 |           override def onUpstreamFinish(): Unit = {
263 |             q.enqueue1(None).unsafeRun()
264 |             completeStage()
265 |           }
266 | 
267 |           override def onUpstreamFailure(ex: Throwable): Unit = {
268 |             q.enqueue1(None).unsafeRun()
269 |             completeStage()
270 |           }
271 | 
272 |           setHandler(in,this)
273 | 
274 |         }
275 |     }
276 | 
277 |   }
278 | 
279 |   /**
280 |     * constructing FDAStreamLoader given slickProfile and converter
281 |     */
282 |   object FDAStreamLoader {
283 |     /**
284 |       * constructor for FDAStreamLoader
285 |       * @example {{{
286 |       *    val streamLoader = FDAStreamLoader(slick.jdbc.H2Profile)(toTypedRow _)
287 |       *    val untypedLoader = FDAStreamLoader(slick.jdbc.H2Profile)()
288 |       * }}}
289 |       * @param slickProfile  Slick jdbcprofile such as 'slick.jdbc.H2Profile'
290 |       * @param converter     a defined implicit type conversion function.
291 |       *                      from SOURCE type to TARGET type, set to null if not required
292 |       * @tparam SOURCE       source type, result type of DBIOAction, most likely a tuple type
293 |       * @tparam TARGET       final converted type, most likely a case class type
294 |       * @return              a new FDAStreamLoader object
295 |       */
296 |     def apply[SOURCE, TARGET](slickProfile: JdbcProfile)(converter: SOURCE => TARGET = null): FDAStreamLoader[SOURCE, TARGET] =
297 |       new FDAStreamLoader[SOURCE, TARGET](slickProfile, converter)
298 |   }
299 | }
300 | 
301 | 
302 | /**
303 |   * for global imports
304 |   */
305 | object FDADataStream extends FDADataStream


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # FunDA (Functional Data Access)  
  2 | 
  3 | *FunDA* is a functional database access library designed to supplement *FRM* (*Functional-Relational-Mapper*) tools like *Slick*.  
  4 | While *FRM* tools bring type safe *language-integrated-query* and flexible query composition as well as powerful functional programming paradigm to it's users, the main focus is on data object access and thus in ways short on strength for data persistence support such as data row traversal operations that are so common in *ORM*. This short-fall plus a brand new functional programming style also make many *OOP* programmers from the *ORM* world using *FRM* quite uncomfortable or even somewhat awkward. On top of bringing back the familiar recordset operations to support data row processing, *FunDA* also adds in explicit parallel data processing capabilities as well as a simple user-defined programming model to lower requirements for functional programming skills such that with little getting-used-to a traditional *OOP* programmer could handle *FRM* so to make *FunDA* as a much practically productive tool.
  5 | The core of *FunDA* is implemented with *scalaz-streams-fs2*. *FunDA* can be depicted as a workflow pipe with a sequence of work-nodes where user-defined data processing tasks could be plugged in. *FunDA* is implemented as a forward-only stream of rows representing pure data or query actions. User-defined-tasks at a work-node can intercept rows and run some processes with the context of each row. And these user-defined-tasks can be run parallelly through *FunDA*'s Parallelism.  
  6 | A typical *FunDA* program consists of a **source** and many **user-defined-tasks**  as follows:  
  7 | 
  8 | ```
  9 | val streamSource = streamLoader.fda_typedStream(albumsInfo.result)(db)(512, 128)()
 10 | 
 11 | streamSource.appendTask(transformData).appendTask(runActionQuery).appendTask(showResults).startRun
 12 | ```
 13 | where "streamSource" is a *FunDA* stream **source** produced by loading data from database, and "transformData", "runActionQuery" and "showResults" are all user-defined-tasks each responsible to achieve some minimal distinctive effect. As the unique flavor of functional programming, these are functional combinators and can be composed in a specific order to perform a much bigger and complexed task. From the semantics of the *FunDA* program above we can make a wild guess that "transformData" would transform each data row to query actions and these query actions are executed by "runActionQuery" at the next work-node.  
 14 | #### how to use  
 15 | 
 16 | *FunDA* artifacts are currently published on Bintray. Add following in your build.sbt:  
 17 | 
 18 | ```
 19 | resolvers +=  Resolver.bintrayRepo("bayakala","maven")
 20 | libraryDependencies += "com.bayakala" %% "funda" % "1.0.0-RC-01" withSources() withJavadoc()
 21 | 
 22 | 
 23 | ```  
 24 | for your info, *FunDA* allready includes the following dependencies:  
 25 | 
 26 | ```
 27 |    libraryDependencies ++= Seq(
 28 |       "com.typesafe.slick" %% "slick" % "3.2.0",
 29 |       "com.h2database" % "h2" % "1.4.191",
 30 |       "com.typesafe.slick" %% "slick-hikaricp" % "3.2.0",
 31 |       "ch.qos.logback" % "logback-classic" % "1.1.7",
 32 |       "co.fs2" %% "fs2-core" % "0.9.4",
 33 |       "co.fs2" %% "fs2-io" % "0.9.4",
 34 |       "com.typesafe.play" % "play-iteratees-reactive-streams_2.11" % "2.6.0"
 35 |     )
 36 | 
 37 | ```  
 38 | **Remarks:** users should set up their own *Slick* database configuration file application.conf in the resources directory.
 39 | #### to run the examples
 40 | There is a sample applicaion "funda-demo" located on github here: [www.github.com/bayakala/funda-demo](http://www.github.com/bayakala/funda-demo/) . It includes sample data located under resources/testdata/ and it is a bare cvs file. Import this file in your database before you run the examples. The examples should be run in the following order:  
 41 | 
 42 | ```
 43 | 1. StrongTypedRows.scala  
 44 | 2. UserDefinedTasks.scala
 45 | 3. ParallelLoading.scala
 46 | 4. ParallelTasks.scala
 47 | 5. ParallelExecution.scala
 48 | 6. ExceptionsAndFinalizers.scala
 49 | ```
 50 | *download and try it. good luck and have fun!*
 51 | ## The Principles  
 52 | 
 53 | *FunDA*'s workflow *FDAPipeLine* is a *scalaz-streams-fs2* and therefore is a *free-monad*. It is highly composable:  
 54 | 
 55 | ```
 56 | val streamLoader = FDAStreamLoader(slick.jdbc.H2Profile)(toTypedRow _)      
 57 | val source = streamLoader.fda_typedStream(aqmQuery.result)(db)(512,512)()
 58 | val stream = source.filter{r => r.year > "1999"}.take(3).appendTask(showRecord)
 59 | 
 60 | stream.startRun
 61 | ```
 62 | as demonstrated above, we can compose stream anyway we want before **startRun**
 63 | ### FunDA stream (The program)
 64 | #####  strong-typed rows
 65 | As mentioned above, FunDA programs are just composition of a **source** and a string of **user-defined-tasks** as a stream with data produced by **source** as rows. To facilitate stream operations we must convert data loaded from database into strong-typed rows. A practical case is that *Slick* usually returns query results in a collection of tuples. Thus we must take an extra step to convert them into user defined strong-typed case classes. 
 66 | The following code snippet demonstrates such conversion:
 67 | 
 68 | ```
 69 | // aqmQuery.result returns Seq[(String,String,String,String)]
 70 |   val aqmQuery = aqmraw.map {r => (r.year,r.state,r.county,r.value)}
 71 | // user designed strong typed resultset type. must extend FDAROW
 72 |   case class TypedRow(year: String, state: String, county: String, value: String) extends FDAROW
 73 | // strong typed resultset conversion function. declared implicit to remind during compilation
 74 |   implicit def toTypedRow(row: (String,String,String,String)): TypedRow =
 75 |     TypedRow(row._1,row._2,row._3,row._4)  
 76 | 
 77 | // loader to read from database and convert result collection to strong typed collection
 78 |   val viewLoader = FDAViewLoader(slick.jdbc.H2Profile)(toTypedRow _)
 79 |   val dataSeq = viewLoader.fda_typedRows(aqmQuery.result)(db).toSeq
 80 | // turn Seq collection into FunDA stream with strong-typed rows
 81 |   val aqmStream: FDAPipeLine[TypedRow] =  fda_staticSource(dataSeq)()    
 82 |     
 83 | ``` 
 84 | ##### static view and dynamic streaming sources  
 85 | Static sources or views are data structures completely loaded into memory after returning from running a query. Stream sources are data streams as returned query results that are *reactive-streams* conformant. In other words stream sources are backend cached and motivated by back-pressure. *FunDA* provides functions to produce sources. The following is a demonstration of static view producing:  
 86 | 
 87 | ```
 88 | // loader to read from database and convert result collection to strong typed collection
 89 |   val viewLoader = FDAViewLoader(slick.jdbc.H2Profile)(toTypedRow _)
 90 |   val dataSeq = viewLoader.fda_typedRows(aqmQuery.result)(db).toSeq
 91 | // turn Seq collection into FunDA stream with strong-typed rows
 92 |   val aqmView: FDAPipeLine[TypedRow] =  fda_staticSource(dataSeq)()    
 93 | 
 94 | ``` 
 95 | stream source can be constructed as follows:  
 96 | 
 97 | ```
 98 | // strong typed source is also possible with Slick data streaming
 99 |   val streamLoader = FDAStreamLoader(slick.jdbc.H2Profile)(toTypedRow _)      
100 |   val aqmStream: FDAPipeLine[TypedRow] = streamLoader.fda_typedStream(aqmQuery.result)(db)(512,512)()
101 | ```  
102 | as demonstrated above, both static collections and dynamic data streams can be transform into strong-typed-row sources.  
103 | 
104 | ### Control data flow  
105 | 
106 | Flow of rows in *FunDA* streams are controlled inside user-defined-tasks, in which a row is received from upstream and zero or one or more rows could be passed downstream. This means additional new rows could be constructed instantly and passed downstream inside these user-defined-tasks which makes *FunDA* logics much more flexible and powerful. Passing no row in a receive-send-loop is represented by skip. User could also halt stream by passing an end-of-stream signal downstream inside these user-defined-tasks. The following are some code samples:  
107 | 
108 | ```
109 |    user-defined-task type is defined as follows:
110 |    type FDAUserTask[ROW] = (ROW) => (Option[List[ROW]])
111 |     /* user define function to be performed at a FDAWorkNode
112 |     * given a row from upstream, return Option[List[ROW]] as follows:
113 |     *    fda_skip  -> Some(Nil)           : skip sending the current row
114 |     *    fda_next  -> Some(List(r1,r2...)): send r1,r2... downstream
115 |     *    fda_break -> None                : halt stream, end of process
116 |     * @tparam ROW   type of row
117 |     */
118 |   
119 | // a example of user-defined-task  
120 |   def dancing: FDAUserTask[FDAROW] = row => {
121 |     row match {
122 |       case qmr: TypedRow =>
123 |         qmr.year match {
124 |           case a if a < 1960 =>
125 |           // pass downstream untouched
126 |            fda_next(qmr)
127 |           case b if b < 1970 => 
128 |           // transform row
129 |            fda_next(qmr.copy(year = 1970))
130 |           case c if c < 1980 =>
131 |           // pass alone with a new row. TypedRow is a case class
132 |            fda_next(List(qmr,TypedRow(qmr.stare,"countyQQ",0.3,1980)))
133 |           case d if d < 2000 =>
134 |           // do not process this row
135 |            fda_skip
136 |           case _ =>
137 |           // stop stream 
138 |            fda_break    
139 |         }
140 |       // encounter unknown row type, break out  
141 |       case _ => fda_break
142 |     }
143 |   }
144 | ```
145 | 
146 | ### defining user-defined-task  
147 |   
148 | As a functional stream, It seems that some of the data access and processing in *FunDA* could be achieved in some pure functional ways like the following:  
149 | 
150 | ```
151 | fdaStream.map(row => transformData(row)).map(action => runQueryAction(action))
152 | ```
153 | unfortunately the fact is pure stream combinators lack powerful and flexible flow control abilities that are so crucial for processing stream elements, therefore **user-defined-task** is introduced as a programming model to deal with the situation.
154 | User-defined-tasks are functional combinators designed by users each to achieve a single minimal task, and a much more complexed final task could be assembled by composing many of these tiny tasks in a specific order and then ***startRun***. The signature of *FDAUserTask[ROW]* is as follows:  
155 | 
156 | ```
157 |   type FDAUserTask[ROW] = (ROW) => (Option[List[ROW]])
158 | 
159 | ```
160 | the above reads: an **user-defined-task** takes a row as input, use or transform it, and as a way of flow control, signify the state of next step of stream by returning **Option[List[ROW]]** as a result of execution of fda_next, fda_skip or fda_break. With the strong-typed-row requirement in place, the involved row types must extend from **FDAROW** and could be either a data-row or action-row.  
161 | ##### types of rows
162 | *FunDA* streams are strong-typed, all rows must extend **FDAROW**. There are several categories of rows:
163 | 
164 | * data-row: any case class extending **FDAROW** with parameters representing fields:  
165 |    **case class TypedRow(year: Int, state: String, value: Int) extends FDAROW**  
166 | * action-row: case class extending **FDAROW** with a **Slick DBIOAction** wrapped inside the parameter as follows:   
167 |    **case class FDAActionRow(action: FDAAction) extends FDAROW**
168 |    sometimes we need to target an action row to be run in different database context. In that case we can just define any case class and extend **FDAROW**:  
169 |    **case class MyActionRow(action: FDAAction) extends FDAROW**  
170 | * error-row: case class extending **FDAROW** with a caught Exception object wrapped inside its parameter.   
171 |    **case class FDAErrorRow(e: Exception) extends FDAROW**  
172 |    user can define their own error row for different exceptions as long as they extend **FDAROW**:  
173 |    **case class MyErrorRow(msg: String, e: Exception) extends FDAROW**
174 | * null-row: a signal object used to represent EOS(end-of-stream):  
175 |    **case object FDANullRow extends FDAROW**
176 | 
177 | ##### standard-operation-procedures
178 | User-defined-tasks have standard operation procedures as the following: 
179 | 
180 | 1. determine row type by pattern-matching
181 | 2. use row fields to perform data processing and transformation
182 | 3. control flow of rows downstream 
183 | 
184 | the following are samples of a few different purposed user-defined-tasks:  
185 | 
186 | ```
187 |    //strong typed row type. must extend FDAROW 
188 |   case class StateRow(state: String) extends FDAROW
189 |   
190 |   //a logging task. show name and pass row untouched downstream
191 |   def showState: FDAUserTask[FDAROW] = row => {
192 |     row match {
193 |       case StateRow(sname) =>  //this is my row
194 |         println(s"Name of state is：$sname")
195 |         fda_next(row)
196 |       case _ => fda_skip    //not my row, do not pass it
197 |     }
198 |   }
199 | ```
200 | 
201 | ```
202 | //a filter and processing task.
203 | //filter out rows with inconvertible value strings and out of ranged value and year
204 |   def filterRows: FDAUserTask[FDAROW] = row => {
205 |     row match {
206 |       case r: AQMRaw => {  //this is the correct row
207 |         try {  //process this row and catch exceptions
208 |           val yr = r.year.toInt
209 |           val v = r.value.toInt
210 |           val vlu = if ( v > 10  ) 10 else v  //max value allowed
211 |           //construct a new row 
212 |           val data = AQMRPTModel(0,r.mid.toInt,r.state,r.county,yr,vlu,0,true)
213 |           if ((yr > 1960 && yr < 2018))  //filtering
214 |             fda_next(data)   //this row ok. pass downstream
215 |           else
216 |             fda_skip    //filter out this row
217 |         } catch {
218 |           case e: Exception =>
219 |             fda_next(FDAErrorRow(e))   //pass the caught exception as a row downstream
220 |         }
221 |       }
222 |       case _ => fda_skip   //wrong type, skip
223 |     }
224 |   }
225 | ```  
226 | 
227 | ```
228 | //a row transformation task
229 | //transform data to action for later execution
230 |   def toAction: FDAUserTask[FDAROW] = row => {
231 |     row match {
232 |       case r: AQMRPTModel =>  //this is my row
233 |         val queryAction = AQMRPTQuery += r  //slick action
234 |         fda_next(FDAActionRow(queryAction))
235 |       case _ => fda_skip
236 |     }
237 |   }
238 | ```
239 | 
240 | ```
241 | //a query action runner task
242 | //get a query runner and an action task
243 |   val actionRunner = FDAActionRunner(slick.jdbc.H2Profile)
244 |   def runActionRow: FDAUserTask[FDAROW] = action => {
245 |     action match {
246 |       case FDAActionRow(q) =>   //this is a query action row
247 |          actionRunner.fda_execAction(q)(db)  //run it
248 |          fda_skip
249 |       case other@_ => fda_next(other) //don't touch it, just pass down
250 |       //someone else downstream could process it
251 |     }
252 |   }
253 | 
254 | ```
255 | 
256 | to run many task as a whole, we compose them and **startRun**:  
257 |   
258 | ```  
259 | //compose the program
260 |   val streamAllTasks =  streamAQMRaw.appendTask(filterRows)
261 |     .appendTask(toAction)
262 |     .appendTask(runActionRow)
263 | //run program
264 |   streamToRun.startRun
265 |     
266 | ```
267 | ##### aggregation  
268 | In stream style processing, many times we need to aggregate over rows, this is where **user-aggregate-task** is designed to fit in. An **user-aggregate-task** has the following signature:  
269 | ```
270 |  type FDAAggrTask[AGGR,ROW] = (AGGR,ROW) => (AGGR,Option[List[ROW]])
271 | ```  
272 | *AGGR* could be any user defined type to represent the state of aggregation. From the above type signature, we can see it is a typical functional style function represented by input a state and output new state. The following is an example of **user-aggregate-task**:  
273 | 
274 | ```
275 | //define a structure to represent aggregator type
276 |   case class Accu(state: String, county: String, year: Int, count: Int, sumOfValue: Int)
277 | 
278 | //user defined aggregation task. only pass aggregated row downstream
279 |   def countingAverage: FDAAggrTask[Accu,FDAROW] = (accu,row) => {
280 |     row match {
281 |       case aqmr: AQMRPTModel =>  //this is the target row type
282 |         if (accu.state == "" || (aqmr.state == accu.state && aqmr.year == accu.year))
283 |           //same condition: inc count and add sum, no need to pass row downstream
284 |           (Accu(aqmr.state,aqmr.county,aqmr.year,accu.count+1, accu.sumOfValue+aqmr.value),fda_skip)
285 |         else
286 |           //reset accumulator, create a new aggregated row and pass downstream
287 |           (Accu(aqmr.state,aqmr.county,aqmr.year,1, aqmr.value)
288 |             ,fda_next(AQMRPTModel(0,9999,accu.state,accu.county,accu.year
289 |             ,accu.count,accu.sumOfValue/accu.count,true)))
290 |       case FDANullRow =>
291 |           //last row encountered. create and pass new aggregated row
292 |         (Accu(accu.state,accu.county,accu.year,1, 0)
293 |           ,fda_next(AQMRPTModel(0,9999,accu.state,accu.county,accu.year
294 |           ,accu.count,accu.sumOfValue/accu.count,true)))
295 |          //incorrect row type, do nothing
296 |       case _ => (accu,fda_skip)
297 |     }
298 |   }
299 | 
300 | ```
301 | 
302 | the following demonstrates how it is executed:  
303 | 
304 | ```
305 |   aqmrStream.aggregateTask(Accu("","",0,0,0),countingAverage)
306 |     .appendTask(toAction)
307 |     .appendTask(runActionRow)
308 |     .startRun
309 | 
310 | ```  
311 | "aqmrStream" is a **source** with rows to be aggregated.  
312 | ### Running programs inside user-defined-task  
313 | A *FunDA* program consist of a **source** and multiple **user-defined-tasks**. It is possible to execute a *FunDA* program inside these *user_defined-tasks*. This means we have to call **startRun** inside the *user-defined-task* and some effect would inevitably be produced rending the calling *user-defined-task* impure. A complete example of *FunDA* program inside a *user-defined-task* is given below:  
314 | 
315 | ```
316 |  //getting id with corresponding name from STATES table
317 |   def getStateID(state: String): Int = {
318 |     //create a stream for state id with state name
319 |     implicit def toState(row:  StateTable#TableElementType) = StateModel(row.id,row.name)
320 |     val stateLoader = FDAViewLoader(slick.jdbc.H2Profile)(toState _)
321 |     val stateSeq = stateLoader.fda_typedRows(StateQuery.result)(db).toSeq
322 |     //constructed a Stream[Task,String]
323 |     val stateStream =  fda_staticSource(stateSeq)()
324 |     var id  = -1
325 |     def getid: FDAUserTask[FDAROW] = row => {
326 |       row match {
327 |         case StateModel(stid,stname) =>   //target row type
328 |           if (stname.contains(state)) {
329 |             id = stid
330 |             fda_break      //exit
331 |           }
332 |           else fda_skip   //take next row
333 |         case _ => fda_skip
334 |       }
335 |     }
336 |     stateStream.appendTask(getid).startRun
337 |     id
338 |   }
339 |  
340 | ```  
341 | "getStateID" is a *user-defined-function* in which function "getid" is physically executed. Because "getid" is local, we are still confident to use the calling *user-defined-function* in composition with other combinators as following:  
342 | 
343 | ```
344 |  //process input row and produce action row to insert into NORMAQM
345 |   def getIdsThenInsertAction: FDAUserTask[FDAROW] = row => {
346 |     row match {
347 |       case aqm: AQMRPTModel =>
348 |         if (aqm.valid) {
349 |           val stateId = getStateID(aqm.state)
350 |           val countyId = getCountyID(aqm.state,aqm.county)
351 |           val action = NORMAQMQuery += NORMAQMModel(0,aqm.mid, stateId, countyId, aqm.year,aqm.value,aqm.total)
352 |           fda_next(FDAActionRow(action))
353 |         }
354 |         else fda_skip
355 |       case _ => fda_skip
356 |     }
357 |   }
358 | ```  
359 | in this case "getStateID" is called within another *user-defined-task*.
360 | ### Parallel Processing  
361 | *FunDA* borrows its parallelism capabilities from *scalaz-streams-fs2*. There are two areas of parallel data processing application:  
362 | 
363 | 
364 |  * parallel loading of multiple **sources**  
365 |  * parallel execution of a single **user-defined-task**  
366 |  
367 | ##### parallel loading  
368 | Parallel loading of many sources is achieved by calling function **fda-par-load** provided in *FunDA*. These sources could be constructed from tables on many separate database-servers or by spliting huge data tables into smaller un-repeated data chunks like the following:  
369 | 
370 | ```
371 |   //define query for extracting State names from AQMRPT
372 |   val qryStates = AQMRPTQuery.map(_.state).distinct.sorted  
373 |   case class States(name: String) extends FDAROW
374 |   implicit def toStates(row: String) = States(row)
375 |   val stateLoader = FDAStreamLoader(slick.jdbc.H2Profile)(toStates _)
376 |   val statesStream = stateLoader.fda_typedStream(qryStates.result)(db_a)(64,64)()
377 | 
378 | 
379 |   //define query for extracting County names from AQMRPT in separate chunks
380 |   //query with state name >A and <K
381 |   val qryCountiesA_K = AQMRPTQuery.filter(r => (r.state.toUpperCase > "A" &&
382 |     r.state.toUpperCase < "K")).map(r => (r.state,r.county))
383 |     .distinctOn(r => (r._1,r._2))
384 |     .sortBy(r => (r._1,r._2))
385 | 
386 |   //query with state name >K and <P
387 |   val qryCountiesK_P = AQMRPTQuery.filter(r => (r.state.toUpperCase > "K" &&
388 |     r.state.toUpperCase < "P")).map(r => (r.state,r.county))
389 |     .distinctOn(r => (r._1,r._2))
390 |     .sortBy(r => (r._1,r._2))
391 | 
392 |   //query with state name >P
393 |   val qryCountiesP_Z = AQMRPTQuery.filter(r => r.state.toUpperCase > "P")
394 |     .map(r => (r.state,r.county))
395 |     .distinctOn(r => (r._1,r._2))
396 |     .sortBy(r => (r._1,r._2))
397 | 
398 |   case class Counties(state: String, name: String) extends FDAROW
399 |   implicit def toCounties(row: (String,String)) = Counties(row._1,row._2)
400 |   val countyLoader = FDAStreamLoader(slick.jdbc.H2Profile)(toCounties _)
401 |   //3 separate streams to extract county names from the same database table AQMRPT
402 |   val countiesA_KStream: FDAPipeLine[County] = countyLoader.fda_typedStream(qryCountiesA_K.result)(db_b)(64,64)()
403 |   val countiesK_PStream: FDAPipeLine[County] = countyLoader.fda_typedStream(qryCountiesK_P.result)(db_b)(64,64)()
404 |   val countiesP_ZStream: FDAPipeLine[County] = countyLoader.fda_typedStream(qryCountiesP_Z.result)(db_b)(64,64)()
405 | 
406 | ``` 
407 | once these **sources** are all constructed, we then load them parallelly:  
408 | 
409 | ```
410 |   //obtain a combined stream with parallel loading with max of 4 open computation
411 |   val combinedStream: FDAPipeLine[FDAROW] = fda_par_load(statesStream,countiesA_KStream,countiesK_PStream,countiesP_ZStream)(4)
412 | 
413 | ``` 
414 | doing parallel loading would most likely to produce a stream with multiple types of rows, in the above case **States** and **Counties** represent two different types of rows respectively. Therefore *user-defined-tasks* each targeting different type of row are designed to handle rows of its target type, like the following:  
415 | 
416 | ```
417 |   //user-task to catch rows of States type and transform them into db insert actions
418 |   def processStates: FDAUserTask[FDAROW] = row => {
419 |     row match {
420 |         //catch states row and transform it into insert action
421 |       case States(stateName) =>  //target row type
422 |         println(s"State name: ${stateName}")
423 |         val action = StateQuery += StateModel(0,stateName)
424 |         fda_next(StateActionRow(action))
425 |       case others@ _ => //pass other types to next user-defined-tasks
426 |         fda_next(others)
427 |     }
428 |   }
429 |   //user-task to catch rows of Counties type and transform them into db insert actions
430 |   def processCounties: FDAUserTask[FDAROW] = row => {
431 |     row match {
432 |       //catch counties row and transform it into insert action
433 |       case Counties(stateName,countyName) =>  //target row type
434 |         println(s"County ${countyName} of ${stateName}")
435 |         val action = CountyQuery += CountyModel(0,countyName+ " of "+stateName)
436 |         fda_next(CountyActionRow(action))
437 |       case others@ _ => //pass other types to next user-defined-tasks
438 |         fda_next(others)
439 |     }
440 |   }
441 | ```
442 | ###### parallel loading a stream of sources  
443 | The above demonstration of parallel loading started with known number of sources. This is especially convenient for users to manually arrange  sources of different row types in a parallel loading operation. But, when a list of sources is itself a stream, then to parallelly load the stream of sources we need to first convert the stream into **FDAParSource** over a **FDASourceLoader** function as follows:  
444 | 
445 | ```
446 |  //loading rows with year yr
447 |   def loadRowsInYear(yr: Int) = {
448 |     //a new query
449 |     val query = AQMRPTQuery.filter(row => row.year === yr)
450 |     //reuse same loader
451 |     AQMRPTLoader.fda_typedStream(query.result)(db)(256, 256)(println(s"End of stream ${yr}!!!!!!"))
452 |   }
453 | 
454 |   //loading rows by year
455 |   def loadRowsByYear: FDASourceLoader = row => {
456 |     row match {
457 |       case Years(y) => loadRowsInYear(y) //produce stream of the year
458 |       case _ => fda_appendRow(FDANullRow)
459 |     }
460 |   }  
461 |   
462 |   //get parallel source constructor
463 |   val parSource: FDAParSource = yearStream.toParSource(loadRowsByYear)
464 |   
465 | ```  
466 | the following demonstrates loading of this parallel source:  
467 | 
468 | ```
469 |   //produce a stream from parallel source
470 |   val stream: FDAPipeLine[FDAROW] = fda_par_source(parSource)(4)
471 | ```
472 | **fda_par_source** is actually a parallel execution function analogous to **fda_runPar** which is described in the following section.
473 | ##### parallel execution  
474 | *FunDA* provides a function **fda_runPar** as a parallel task runner. A parallel task has the following signature:  
475 | 
476 | ```
477 | /** Parallel task type
478 |     * stream of streams type for parallel running user action
479 |     * use stream.toPar to convert from FDAUserTask
480 |     */
481 |   type FDAParTask = Stream[Task,Stream[Task,Option[List[FDAROW]]]]
482 | 
483 | ```  
484 | and a **FDAUserTask** can be converted to **FDAParTask** as bellow:  
485 | 
486 | ```
487 | AQMRPTStream.toPar(getIdsThenInsertAction)
488 | ```  
489 | where "AQMRPTStream" is a *FunDA* **source** and **toPar** is its method to turn "getIdsThenInsertAction" into a parallel task of many instances running in different threads. The principle of parallel execution is by scrambling rows in a single input stream into several un-ordered streams as inputs to many instances of a single task running parallelly in different threads. A **FDAParTask** requires a special runner to be carried out as shown below:  
490 | 
491 | ```
492 | fda_runPar(AQMRPTStream.toPar(getIdsThenInsertAction))(8)
493 | ```  
494 | **fda_runPar** has a signature as follows:  
495 | 
496 | ```
497 | def fda_runPar(parTask: FDAParTask)(maxOpen: Int): FDAPipleLine[FDAROW]
498 | 
499 | ```  
500 | **maxOpen** designates the maximum number of open computations or degree of parallelism and the actual number of open computations depends on a number of factors including CPU cores, size of thread-pool and no more than user suggested maximum number of open computations. Thread-pool can be adjusted from default values by declaring implicit instance of Strategy:  
501 | 
502 | ```
503 |       implicit val strategy = Strategy.fromCachedDaemonPool("cachedPool")
504 | //      implicit val strategy = Strategy.fromFixedDaemonPool(6)
505 | 
506 | ```  
507 | the actual performance of parallelism requires thorough tuning of thread-pool strategies with respect to number of CPU cores. For whatever configurations, the performance gain through parallelism over single-thread task demonstrates great significance.  A complete composition of parallel loading and parallel execution has the following model:   
508 | 
509 | ```
510 | //get parallel source constructor
511 |   val parSource = yearStream.toParSource(loadRowsByYear)
512 |   //implicit val strategy = Strategy.fromCachedDaemonPool("cachedPool")
513 |   //produce a stream from parallel sources
514 |   val source = fda_par_source(parSource)(4)
515 |   //turn getIdsThenInsertAction into parallel task
516 |   val parTasks = source.toPar(getIdsThenInsertAction)
517 |   //runPar to produce a new stream
518 |   val actionStream =fda_runPar(parTasks)(4)
519 |   //turn runInsertAction into parallel task
520 |   val parRun = actionStream.toPar(runInsertAction)
521 |   //runPar and carry out by startRun
522 |   fda_runPar(parRun)(2).startRun
523 | 
524 | ```
525 | ##### remarks when writing parallel processing programs  
526 | According to experiments, **FunDA** concurrent combinators are quite sensitive to thread management. The **maxOpen** parameters of both **fda_runPar** and **fda_par_source** in the examples given above had to be tuned to avoid thread contention and process-hang. The example used **HikariCP** with parameters set below:  
527 | 
528 | ```
529 | h2db {
530 |     url = "jdbc:h2:tcp://localhost/~/slickdemo;mv_store=false"
531 |     driver = "org.h2.Driver"
532 |     connectionPool = HikariCP
533 |     numThreads = 48
534 |     maxConnections = 48
535 |     minConnections = 12
536 |     keepAliveConnection = true
537 | }
538 | 
539 | ```
540 | ### Exceptions handling and Finalizers  
541 | **FunDA** provides a mechanism that guarantees a **finalizer** be called upon termination of the stream, no matter if it is naturally end of stream or break-out caused by interruptions or exceptions. **Finalizers** are in fact call-back-functions hooked-up to a *FunDA* program during source construction, like the following:  
542 | 
543 | ```
544 |   val view = fda_staticSource(stateSeq)(println("***Finally*** the end of view!!!"))
545 |   val stream = streamLoader.fda_typedStream(StateQuery.result)(db)(64,64)(println("***Finally*** the end of stream!!!"))
546 | 
547 | ```  
548 | exceptions can be caught by **onError** call-backs that are hooked-up at the **very end** of **FunDA** stream in order to catch exceptions from all work-nodes as follows:  
549 | 
550 | ```
551 |    val v = viewState.appendTask(errorRow).appendTask(trackRows)
552 |    val v1 = v.onError {case e: Exception => println(s"Caught Error in viewState!!![${e.getMessage}]"); fda_appendRow(FDANullRow)}
553 |    v1.startRun
554 | 
555 |    val s = streamState.appendTask(errorRow).appendTask(trackRows)
556 |    val s1 = s.onError {case e: Exception => println(s"Caught Error in streamState!!![${e.getMessage}]"); fda_appendRow(FDANullRow)}
557 |    s1.startRun
558 | 
559 | ```  
560 | ##### user defined exceptions  
561 | Sometimes we wish to watch some particular events and take corresponding actions when they take place. This can be achieved by user-defined-exceptions. User-defined-exceptions are special rows extending from **FDAROW** that can be caught by pattern matching. The following is an example of user-defined-exception and its handling:  
562 | 
563 | ```
564 |   case class DivideZeroError(msg: String, e: Exception) extends FDAROW
565 |   def catchError: FDAUserTask[FDAROW] = row => {
566 |     row match {
567 |       case StateModel(id,name) =>
568 |         try {
569 |           val idx = id / (id - 3)
570 |           fda_next(StateModel(idx, name))
571 |         } catch {
572 |           case e: Exception => //pass an error row
573 |             fda_next(DivideZeroError(s"Divide by zero exception at ${id}",e))
574 |         }
575 |       case m@_ => fda_next(m)
576 |     }
577 |   }
578 |   
579 |   def trackRows: FDAUserTask[FDAROW] = row => {
580 |     row match {
581 |       case m@StateModel(id,name) =>
582 |         println(s"State: $id $name")
583 |         println( "----------------")
584 |         fda_next(m)
585 |       case DivideZeroError(msg, e) => //error row
586 |         println(s"***Error:$msg***")
587 |         fda_skip
588 |       case m@_ => fda_next(m)
589 |     }
590 |   }
591 | 
592 |   val s = streamState.take(5).appendTask(catchError).appendTask(trackRows)
593 |   val s1 = s.onError {case e: Exception => println(s"Caught Error in streamState!!![${e.getMessage}]"); fda_appendRow(FDANullRow)}
594 |   s1.startRun
595 | 
596 | ```


--------------------------------------------------------------------------------