├── .gitignore ├── README.md ├── build.sbt ├── data ├── Jobs.csv └── Persons.csv ├── doc └── internal │ └── README.md ├── macros └── src │ └── main │ └── scala │ └── dbstage │ └── FieldForwarder.scala ├── project └── build.properties └── src ├── main └── scala │ └── dbstage │ ├── Embedding.scala │ ├── Serial.scala │ ├── example │ ├── SampleQueries.scala │ └── Sex.scala │ ├── frontend │ ├── Relation.scala │ └── package.scala │ ├── package.scala │ ├── query │ ├── Query.scala │ ├── QueryPlan.scala │ └── package.scala │ └── runtime │ └── Table.scala └── test └── scala └── Basics.scala /.gitignore: -------------------------------------------------------------------------------- 1 | *.class 2 | *.log 3 | 4 | # sbt specific 5 | .cache 6 | .history 7 | .lib/ 8 | dist/* 9 | 10 | target/* 11 | macros/target/ 12 | project/target/ 13 | project/project/ 14 | 15 | lib_managed/ 16 | src_managed/ 17 | project/boot/ 18 | project/plugins/project/ 19 | 20 | # Scala-IDE specific 21 | .scala_dependencies 22 | .worksheet 23 | .idea/* 24 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DBStage – Flexible, Staged Query Compilation Playground 2 | 3 | ![stability-experimental](https://img.shields.io/badge/stability-experimental-orange.svg) 4 | 5 | ## Introduction 6 | 7 | This repository contains a proof of concept for a configurable language-integrated runtime query compiler based on staging. 8 | The implementation relies on the [Squid type-safe metaprogramming framework](https://github.com/epfldata/squid/) for Scala, 9 | which makes its code manipulation and generation capabilities fairly robust. 10 | 11 | The main features are: 12 | * An expressive SQL embedded DSL (still currently missing many features), 13 | with type-safe integration to normal Scala programs (LinQ-style); 14 | 15 | * A backend implemented using powerful abstractions and Scala modular programming, 16 | which allows great configurability at no runtime cost: 17 | experiment and combine 18 | different ways to store relation tables (column store, row store, hash map), 19 | different ways to index them, 20 | different ways to query them (push, pull), etc. 21 | 22 | 23 | ## Step by step 24 | 25 | ### 1. define the database relations 26 | 27 | ```scala 28 | case object Person extends Relation { 29 | val Id = Column[Int]("Id", primary = true) 30 | val Name = Column[String]("Name") 31 | val Age = Column[Int]("Age") 32 | val Sex = Column[Sex]("Sex") 33 | } 34 | ``` 35 | 36 | ### 2.a. register queries to be executed later, using a SQL-like DSL 37 | 38 | ```scala 39 | import Person._ 40 | val q0 = from(Person) where ir"$Age > 18" where ir"$Sex == Male" select (Name,Age) 41 | ``` 42 | 43 | (Of course, one can write `where ir"$Age > 18 && $Sex == Male"` equivalently.) 44 | 45 | Note that column types are checked at compile-time, but column reference consistency and ambiguities are checked at query construction time (runtime). 46 | For example if I had written `select (Name,Age,Salary)` it would have complained at runtime that there are no such Salary column available. (It would be easy to have a compile-time linter written in Squid to catch these errors earlier.) 47 | 48 | ### 2.b. load the data from the file system 49 | 50 | ```scala 51 | Person.loadDataFromFile("data/persons.csv", compileCode = true) 52 | ``` 53 | 54 | This compiles a program on-the-fly to efficiently load the data given the relation schema. 55 | 56 | ### 2.c. on-the-fly compile and execute queries 57 | 58 | ```scala 59 | q0.plan.foreach { case (name, age) => assert(age > 18); println(s"$name $age") } 60 | ``` 61 | 62 | Notice that the types for `name` and `age` are correctly inferred as String and Int, respectively. 63 | 64 | Importantly, steps 2.a, 2.b and 2.c can be done in any order and can be interleaved. 65 | 66 | Another example: all pairs of people of the same age but opposite sex: 67 | 68 | ```scala 69 | val m = from(Person) 70 | val f = from(Person) 71 | val q = ((m where ir"$Sex == Male") join (f where ir"$Sex == Female"))(ir"${m.Age} == ${f.Age}") 72 | .select (m.Age, m.Name, f.Name, m.Id, f.Id) 73 | q.printLines 74 | ``` 75 | 76 | Which prints the following: 77 | 78 | | Age(0) | Name(0) | Name(1) | Id(0) | Id(1) | 79 | | --- | --- | --- | --- | --- | 80 | | 41 | bob parker | julia kenn | 1 | 6 | 81 | |...|...|...|...|...| 82 | 83 | The currently supported functionalities are: 84 | * Selection, projection, filtering, (hash) joins 85 | * Option to load data in a hashmap where the keys are the primary keys of the relation; this structure is then used to perform faster joins 86 | * Option to store data in column store, on a per-relation basis (if the above is not applied on the given relation) 87 | * User-defined functions and data types 88 | * Pushing and pulling are both supported 89 | * The type-safe DSL means one can integrate queries inside general purpose program, using DBStage as a simple Scala library 90 | * Engine is agnostic in the underlying data structures and row representation; tables currently use tuples and Scala ArrayBuffer/HashMap's, but we could easily experiment with off-heap memory to avoid boxing, for example. 91 | 92 | What I'd like to have in the future: 93 | * Aggregations, grouping, sorting 94 | * Customize the storage of data optimizing for registered queries (possibly adapt it dynamically as more queries are registered) 95 | * Option to instrument the data loading code to add more error recovery and/or add data analytics guiding subsequent query compilation 96 | * Extend the SQL subset with updates, perhaps transactions 97 | * Handling of data on disk, and associated cache management? 98 | 99 | -------------------------------------------------------------------------------- /build.sbt: -------------------------------------------------------------------------------- 1 | val paradiseVersion = "2.1.0" 2 | 3 | name := "dbstage" 4 | 5 | version := "1.0" 6 | 7 | lazy val commonSettings = Seq( 8 | scalaVersion := "2.11.11", 9 | libraryDependencies += "ch.epfl.data" %% "squid" % "0.2-SNAPSHOT", 10 | libraryDependencies ++= Seq( 11 | "junit" % "junit-dep" % "4.10" % "test", 12 | "org.scalatest" % "scalatest_2.11" % "2.2.0" % "test" 13 | ), 14 | autoCompilerPlugins := true, 15 | addCompilerPlugin("org.scalamacros" % "paradise" % paradiseVersion cross CrossVersion.full), 16 | scalacOptions ++= Seq("-feature", "-language:postfixOps", "-unchecked") 17 | ) 18 | 19 | lazy val main = (project in file(".")) 20 | .settings(commonSettings: _*) 21 | .aggregate(macros) 22 | .dependsOn(macros) 23 | 24 | lazy val macros = (project in file("macros")) 25 | .settings(commonSettings: _*) 26 | .settings( 27 | name := "macros" 28 | ) 29 | 30 | -------------------------------------------------------------------------------- /data/Jobs.csv: -------------------------------------------------------------------------------- 1 | 0|engineer|100 2 | 1|doctor|120 3 | 2|physicist|80 4 | 4|unemployed|NULL 5 | -------------------------------------------------------------------------------- /data/Persons.csv: -------------------------------------------------------------------------------- 1 | 0|john smith|23|M 2 | 1|bob parker|41|M 3 | 2|julia smith|37|F 4 | 5|toto ronto|7|M 5 | 3|hugh carper|67|M 6 | 4|martha roberts|56|F 7 | 6|julia kenn|41|F 8 | 7|derpita derpa|7|F 9 | -------------------------------------------------------------------------------- /doc/internal/README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | Have something like a 'StorageManager' 4 | 5 | 6 | TODO better handling of join conditions: 7 | represent as a `Seq[Code[Bool]]` and move down those conditions that do not concern the join 8 | 9 | 10 | 11 | Pull approaches: 12 | 13 | * Functional 'iterator' encoding: `() => Option[T]` 14 | Pro: nice interface 15 | Cons: options create complicated control-flow that the current IR does not handle well 16 | Cons: allocates options -- even if we can get rid of internal ones, the final code will still contain the type in its interface and thus options will still have to be allocated 17 | 18 | * Natural 'imperative iterator' encoding: `() => (() => Bool, () => T)` 19 | Pro: immediate mapping to Scala/Java iterators 20 | Pro: no Option allocation 21 | Cons: impl of `filter` is unnecessarily compliated and requires local optional variables, which sucks (even though they can be flattened to two local variables, it's still unnecessary variables) 22 | 23 | * Imperative CPS `() => (T => Unit) => Unit` 24 | Where the continuation passed must be invoked exactly once unless there are no more elements in the stream 25 | (This is the approach in GPCE17) 26 | Pro (big): simpler impl of operators 27 | Cons (big): generated code isn't nice for, eg, combinations of filter (unless we merge them before generating the code), as they will contain nested while loops; in general, the fact that `filter` need to loop seems problematic (is it really?) 28 | 29 | * Imperative CPS with boolean return `() => (T => Unit) => Bool` 30 | Where the boolean indicates whether there **potentially** are still elements in the stream; the continuation passed may not be executed every time, but it will be executed at most once each time 31 | Pro (big): simpler impl of operators 32 | Cons: if one wants to consume exactly one element, one has to loop until some local optional variable is set by the continuation 33 | Cons (minor): iterator impl very awkward: `hasNext` needs to execute the continuation, so it needs to put the value aside so `next` accesses it later 34 | (corollary: can't just peek to see if a given stream has any elements without actively tryin to consume one) 35 | 36 | * Imperative CPS with two boolean returns `() => (T => Unit) => (Bool,Bool)` 37 | Indicating potential elements left and whether an element was consumed or not 38 | Pro: avoids using a variable to know whether we've iterated or not 39 | 40 | * Imperative CPS + virtualized variable `() => (Var[Bool], (T => Unit) => Bool)` 41 | An interesting variation of the above, but it's not clear whether it has any actual advantages over it 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | -------------------------------------------------------------------------------- /macros/src/main/scala/dbstage/FieldForwarder.scala: -------------------------------------------------------------------------------- 1 | package dbstage 2 | 3 | import scala.language.dynamics 4 | import scala.language.experimental.macros 5 | 6 | class FieldForwarder[T](val underlying: T) extends Dynamic { 7 | def wrapSelect[R](x:R):R = x 8 | final def selectDynamic(fieldName: String): Any = macro FieldForwarder.selectDynamicImpl 9 | } 10 | object FieldForwarder { 11 | import scala.reflect.macros.whitebox.Context 12 | def selectDynamicImpl(c: Context)(fieldName: c.Tree): c.Tree = { 13 | import c.universe._ 14 | val Literal(Constant(fieldNameStr:String)) = fieldName 15 | //println(c.prefix.tree.tpe.baseType(symbolOf[FieldForwarder[_]])) 16 | val pre = c.prefix.tree 17 | val wrappedType = pre.tpe.baseType(symbolOf[FieldForwarder[_]]) 18 | q"$pre.wrapSelect($pre.underlying.${TermName(fieldNameStr)})" 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version=0.13.15 2 | -------------------------------------------------------------------------------- /src/main/scala/dbstage/Embedding.scala: -------------------------------------------------------------------------------- 1 | package dbstage 2 | 3 | import squid.anf.analysis.BlockHelpers 4 | import squid.anf.transfo.EqualityNormalizer 5 | import squid.anf.transfo.LogicFlowNormalizer 6 | import squid.anf.transfo.LogicNormalizer 7 | import squid.anf.transfo.OptionNormalizer 8 | import squid.anf.transfo.StandardNormalizer 9 | import squid.anf.transfo.VarFlattening 10 | import squid.ir.CurryEncoding 11 | import squid.ir.FixPointRuleBasedTransformer 12 | import squid.ir.FixPointTransformer 13 | import squid.ir.OnlineOptimizer 14 | import squid.ir.SchedulingANF 15 | import squid.ir.SimpleANF 16 | import squid.ir.SimpleRuleBasedTransformer 17 | import squid.ir.StandardEffects 18 | import squid.ir.TopDownTransformer 19 | import squid.lang.ScalaCore 20 | import squid.quasi.SimpleReps 21 | 22 | object Embedding 23 | extends SimpleANF 24 | //extends SchedulingANF 25 | with StandardEffects 26 | with OnlineOptimizer 27 | //with StandardNormalizer 28 | with LogicNormalizer 29 | with OptionNormalizer // note: needed by VarFlattening 30 | with BlockHelpers 31 | with ScalaCore 32 | with EqualityNormalizer 33 | with CurryEncoding.ApplicationNormalizer 34 | { 35 | 36 | import Predef._ 37 | 38 | // We make heavy use of tuples as an abstraction mechanism; they need to be removed automatically: 39 | rewrite { 40 | case ir"($f:$t0=>$t1) andThen ($g:t1 => $t2)" => ir"(x:$t0) => $g($f(x))" 41 | case ir"($lhs:$t0) -> ($rhs:$t1)" => ir"($lhs,$rhs)" 42 | case ir"($x0:$t0,$x1:$t1)._1" => x0 43 | case ir"($x0:$t0,$x1:$t1)._2" => x1 44 | case ir"($x0:$t0,$x1:$t1,$x2:$t2)._1" => x0 45 | case ir"($x0:$t0,$x1:$t1,$x2:$t2)._2" => x1 46 | case ir"($x0:$t0,$x1:$t1,$x2:$t2)._3" => x2 47 | case ir"($x0:$t0,$x1:$t1,$x2:$t2,$x3:$t3)._1" => x0 48 | case ir"($x0:$t0,$x1:$t1,$x2:$t2,$x3:$t3)._2" => x1 49 | case ir"($x0:$t0,$x1:$t1,$x2:$t2,$x3:$t3)._3" => x2 50 | case ir"($x0:$t0,$x1:$t1,$x2:$t2,$x3:$t3)._4" => x3 51 | case ir"($x0:$t0,$x1:$t1,$x2:$t2,$x3:$t3,$x4:$t4)._1" => x0 52 | case ir"($x0:$t0,$x1:$t1,$x2:$t2,$x3:$t3,$x4:$t4)._2" => x1 53 | case ir"($x0:$t0,$x1:$t1,$x2:$t2,$x3:$t3,$x4:$t4)._3" => x2 54 | case ir"($x0:$t0,$x1:$t1,$x2:$t2,$x3:$t3,$x4:$t4)._4" => x3 55 | case ir"($x0:$t0,$x1:$t1,$x2:$t2,$x3:$t3,$x4:$t4)._5" => x4 56 | } 57 | 58 | } 59 | 60 | object LogicFlow extends Embedding.SelfTransformer with LogicFlowNormalizer 61 | 62 | object LowLevelOpt extends Embedding.SelfTransformer with VarFlattening with FixPointRuleBasedTransformer with TopDownTransformer 63 | 64 | object FinalizeCode extends Embedding.TransformerWrapper(LogicFlow,LowLevelOpt) 65 | 66 | //object FinalizeCode extends Embedding.TransformerWrapper(LogicFlow,LowLevelOpt) with FixPointTransformer 67 | // ^ does not seem to converge, probably because LogicFlowNormalizer is an (atypical) IRTransformer 68 | -------------------------------------------------------------------------------- /src/main/scala/dbstage/Serial.scala: -------------------------------------------------------------------------------- 1 | package dbstage 2 | 3 | import squid.utils._ 4 | import Embedding.Predef._ 5 | 6 | class Serial[T](val parse: Code[String] => Code[T], val unparse: Code[T] => Code[String]) 7 | object Serial { 8 | implicit val SerialString = new Serial[String](identity, identity) 9 | implicit val SerialInt = new Serial[Int](str => ir"$str.toInt", x => ir"$x.toString") 10 | implicit val SerialBool = new Serial[Bool](str => ir"$str.toBoolean", x => ir"$x.toString") 11 | implicit def SerialOption[A:IRType:Serial] = { 12 | val serialA = implicitly[Serial[A]] 13 | new Serial[Option[A]]( 14 | str => ir"""if ($str == "NULL") None else Some(${serialA.parse(str)})""", 15 | x => ir"""$x.fold("NULL")(${serialA.unparse})""" 16 | ) 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /src/main/scala/dbstage/example/SampleQueries.scala: -------------------------------------------------------------------------------- 1 | package dbstage 2 | package example 3 | 4 | import Embedding.Predef._ 5 | import squid.utils._ 6 | import frontend._ 7 | import query._ 8 | 9 | // TODO add syntax sugar: 10 | // @column type Name = String 11 | // ~> 12 | // val Name = Column[String]("Name") 13 | 14 | class Person extends Relation { 15 | val Id = Column[Int]("Id", primary = true) 16 | val Name = Column[String]("Name") 17 | val Age = Column[Int]("Age") 18 | val Sex = Column[Sex]("Sex") 19 | } 20 | case object Person extends Person 21 | class HasJob extends Relation { 22 | val PersonId = Column[Int]("PId", foreign = Person.Id) 23 | val Title = Column[String]("Title") 24 | val Salary = Column[Option[Int]]("Salary") 25 | } 26 | case object HasJob extends HasJob 27 | 28 | /* 29 | Note that, for example, ir"$Age > 18" is syntax sugar for ir"${Age.toCode} > 18" (there is an implicit conversion) 30 | */ 31 | object OlderThan18 extends App { 32 | import Person._ 33 | Person.indexByKeys = false 34 | //Person.columnStore = true 35 | 36 | //Person.loadDataFromFile("data/persons.csv", compileCode = false) 37 | Person.loadDataFromFile("data/persons.csv") 38 | 39 | val p = from(Person) 40 | val j = from(HasJob) 41 | //p.printLines 42 | 43 | //val q0 = p where ir"${p.Age} > 18" select (p.Name) 44 | //val q0 = from(Person) where ir"$Age > 18" select (Name,Age) 45 | val q0 = from(Person) where ir"$Age > 18" where ir"$Sex == Male" select (Name,Age) 46 | println(q0) 47 | val q1 = (p join j)(ir"${p.Id} == ${j.PersonId}") 48 | println(q1) 49 | //println(q1.plan) 50 | 51 | 52 | // Pushing: 53 | 54 | //println(q0.plan.foreach(x => println(x))) 55 | val fe = q0.plan.foreach 56 | fe { case (name, age) => assert(age > 18); println(s"$name $age") } 57 | 58 | 59 | // Pulling: 60 | 61 | val it = (q0 select (Name,Age)).plan.iterator 62 | //val it = (q0 select (Name,Age)).plan.iterator2 63 | while (it.hasNext) { 64 | val (name, age) = it.next() 65 | assert(age > 18) 66 | println(s"$name $age") 67 | } 68 | 69 | } 70 | 71 | object OlderThan18_ColStore extends App { 72 | import Person._ 73 | Person.indexByKeys = false // can switch this on 74 | Person.columnStore = true 75 | 76 | Person.loadDataFromFile("data/persons.csv", compileCode = false) 77 | 78 | val q0 = from(Person) where ir"$Age > 18" where ir"$Sex == Male" select (Name,Age) 79 | q0.printLines() 80 | 81 | } 82 | 83 | /* 84 | TODO: allow syntax: `val m = from(Person) where (ir"$Sex == Male"); ... m.Age ...` 85 | TODO: allow naming of sources? as in: `val p = from(Person, "p")` 86 | */ 87 | object PotentialCouples extends App { 88 | import Person._ 89 | //Person.indexByKeys = false // TODO missing impl 90 | 91 | Person.loadDataFromFile("data/persons.csv", compileCode = false) 92 | 93 | val m = from(Person) 94 | val f = from(Person) 95 | 96 | val males = m where (ir"$Sex == Male") 97 | //males.printLines 98 | //males.selectStringRepr().plan.foreach(println) 99 | 100 | val females = f where (ir"$Sex == Female") 101 | //females.printLines 102 | 103 | val q = (males join females)(ir"${m.Age} == ${f.Age}") 104 | println(q) 105 | println(q.plan) 106 | q.printLines() 107 | 108 | val q2 = (males join females)(ir"${m.Age} == ${f.Age}") select (m.Age, m.Name, f.Name, m.Id, f.Id) 109 | q2.printLines() 110 | 111 | // `foreachLifted` can be used to include the foreach'ed function as part of the generated code: 112 | //q2.plan.foreachLifted(ir"println(_:Any)") 113 | 114 | } 115 | 116 | object PotentialCouples_ColStore extends App { 117 | import Person._ 118 | Person.indexByKeys = false 119 | //Person.columnStore = true 120 | 121 | Person.loadDataFromFile("data/persons.csv", compileCode = false) 122 | 123 | val m = from(Person) 124 | val f = from(Person) 125 | val q = ( 126 | ((m where ir"$Sex == Male") join (f where ir"$Sex == Female"))(ir"${m.Age} == ${f.Age}") 127 | select (m.Age, m.Name, f.Name, m.Id, f.Id) 128 | ) 129 | q.printLines() 130 | 131 | } 132 | -------------------------------------------------------------------------------- /src/main/scala/dbstage/example/Sex.scala: -------------------------------------------------------------------------------- 1 | package dbstage 2 | package example 3 | 4 | import squid.utils._ 5 | import Embedding.Predef._ 6 | import Embedding.Quasicodes._ 7 | 8 | sealed abstract class Sex extends Product with Serializable { 9 | def serialize: String 10 | } 11 | object Sex { 12 | implicit val serial = new Serial[Sex]( 13 | str => ir{ if (${str} == "M") Male else if (${str} == "F") Female else lastWords(s"Unknown sex: ${${str}}") }, 14 | s => ir{ ${s}.serialize }) 15 | } 16 | case object Male extends Sex { def serialize = "M" } 17 | case object Female extends Sex { def serialize = "F" } 18 | -------------------------------------------------------------------------------- /src/main/scala/dbstage/frontend/Relation.scala: -------------------------------------------------------------------------------- 1 | package dbstage 2 | package frontend 3 | 4 | import scala.collection.mutable 5 | import squid.utils._ 6 | import Embedding.Predef._ 7 | import runtime._ 8 | 9 | /* 10 | TODO: implement hybrid row-store/col-store 11 | Partition the columns into groups of columns that are frequently accessed together and should thus be stored in the same array. 12 | */ 13 | class Relation { 14 | protected val curColumns = mutable.ArrayBuffer[Column]() 15 | 16 | var indexByKeys = true 17 | var columnStore = false 18 | 19 | val table = Lazy { 20 | val (keys,values) = columns.partition(_.isPrimary) 21 | if (keys.nonEmpty && indexByKeys) UniqueIndexedTable(keys, values, columns.map(_.name)) 22 | else { 23 | val (foreignKeys,values) = columns.partition(_.isForeignKey) 24 | if (foreignKeys.nonEmpty && indexByKeys) GeneralIndexedTable(foreignKeys, values, columns.map(_.name)) 25 | else if (columnStore) ColumnStore(values) 26 | else Table(values) 27 | } 28 | } 29 | 30 | def columns: Seq[Column] = curColumns 31 | 32 | // The Column/Field/FieldRef organization is a bit messy; could probably do things in a clearer, less confusing way 33 | abstract class Column(override val name: String, val isPrimary: Bool, val foreignKey: Option[Relation # Column]) extends Field(name) { 34 | assert(!table.computed, "Cannot add a column after the relation has started being used! Make a new relation first.") 35 | curColumns += this 36 | def isForeignKey = foreignKey.nonEmpty 37 | } 38 | object Column { 39 | def apply[T0:IRType:Serial](name: String, primary: Bool = false, foreign: Relation # Column = null) = { 40 | new Column(name,primary,Option(foreign)) { 41 | type T = T0 42 | val IRTypeT: IRType[T] = implicitly 43 | val SerialT: Serial[T] = implicitly 44 | } 45 | } 46 | } 47 | 48 | def loadDataFromFile(fileName: String, compileCode: Bool = true) = { 49 | log(s"Loading data from file $fileName into table $this") 50 | val src = scala.io.Source.fromFile(fileName) 51 | 52 | val tbl = table.value 53 | 54 | // Loading data with `tbl.loadData` uses specialized runtime-compiled code 55 | if (compileCode) tbl.loadData(src.getLines(), '|') 56 | // `tbl.mkDataLoader` currently uses the same code but interpreted; it might be more efficient to just use generic code! 57 | else tbl.mkDataLoader('|').run(src.getLines()) 58 | 59 | } 60 | 61 | } 62 | 63 | // Freestanding fields not associated with a table 64 | // Note: the `Field` class could extend Embedding.IR so we wouldn't need implicit conversions! 65 | abstract class Field(override val name: String, in: Option[Int] = None) extends FieldRef(name,in) { thisField => 66 | implicit val SerialT: Serial[T] 67 | def toCode: Code[T] = in.fold(ir"field[T](${Const(name)})") { q => ir"fieldIn[T](${Const(name)},${Const(q)})" } 68 | def withId (id: Int): Field = Field[T](name,Some(id)) 69 | } 70 | object Field { 71 | def apply[S:IRType:Serial](name: String, in: Option[Int] = None) = new Field(name,in) { 72 | type T = S 73 | val IRTypeT: IRType[T] = implicitly 74 | val SerialT: Serial[T] = implicitly 75 | } 76 | // TODO distinguish Field/SerialField 77 | } 78 | 79 | abstract class FieldRef(val name: String, val id: Option[Int]) { 80 | type T 81 | implicit val IRTypeT: IRType[T] 82 | override def toString = s"Field[${IRTypeT.rep}]($name)${id.fold(""){ q => s" in $q"}}" 83 | def conformsTo(f: FieldRef) = f.name == name && id.forall(_ => id == f.id) 84 | } 85 | object FieldRef { 86 | def apply[S:IRType](name: String, id: Option[Int] = None) = new FieldRef(name,id) { 87 | type T = S 88 | val IRTypeT: IRType[T] = implicitly 89 | } 90 | def unapply(x: Code[_]): Option[FieldRef] = x match { 91 | // TODO use irreftable Const xtors... 92 | case ir"field[$tp](${Const(name)})" => 93 | Some(FieldRef[tp.Typ](name,None)) 94 | case ir"fieldIn[$tp](${Const(name)}, ${Const(id)})" => 95 | Some(FieldRef[tp.Typ](name,Some(id))) 96 | case _ => None 97 | } 98 | } 99 | case class CodeField[S:IRType:Serial](override val name: String, override val toCode: Code[S]) extends Field(name) { 100 | type T = S 101 | val IRTypeT: IRType[T] = implicitly 102 | val SerialT: Serial[T] = implicitly 103 | } 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | -------------------------------------------------------------------------------- /src/main/scala/dbstage/frontend/package.scala: -------------------------------------------------------------------------------- 1 | package dbstage 2 | 3 | import squid.utils._ 4 | import Embedding.Predef._ 5 | import squid.lib.transparencyPropagating 6 | import squid.lib.transparent 7 | 8 | package object frontend { 9 | 10 | def log(msg: String) = System.err.println(s"[${new java.util.Date}] $msg") 11 | 12 | // Used in temporary code representations to encode SQL field references 13 | @transparent def field[T](name: String): T = ??? 14 | @transparent def fieldIn[T](name: String, uid: Int): T = ??? 15 | 16 | import scala.language.implicitConversions 17 | implicit def field2Code(f: Field): IR[f.T,Any] = f.toCode.asClosedIR // there is currently no unquote $ overload for Code in Squid... 18 | //def field2CodeOf[S:IRType](f: Field{type T <: S}): Code[S] = f.toCode 19 | 20 | } 21 | -------------------------------------------------------------------------------- /src/main/scala/dbstage/package.scala: -------------------------------------------------------------------------------- 1 | import squid.utils._ 2 | import squid.lib.transparent 3 | 4 | package object dbstage { 5 | 6 | val MAX_SCALA_TUPLE_ARITY = 22 7 | 8 | @inline @transparent 9 | def loopWhile(cnd: => Bool) = { 10 | while(cnd)() 11 | } 12 | 13 | type IteratorRep[T] = () => (() => Bool, () => T) 14 | 15 | import scala.language.implicitConversions 16 | import Embedding.Predef._ 17 | implicit def interop[T](q: Code[T]): IR[T,Any] = q.asClosedIR // because currently Squid requires an IR (eg: for calling .compile and .run) 18 | 19 | } 20 | -------------------------------------------------------------------------------- /src/main/scala/dbstage/query/Query.scala: -------------------------------------------------------------------------------- 1 | package dbstage 2 | package query 3 | 4 | import java.io.PrintStream 5 | 6 | import squid.utils._ 7 | import frontend._ 8 | import runtime._ 9 | import Embedding.Predef._ 10 | 11 | sealed trait Query { 12 | //lazy val bestPlan: QueryPlan = ??? 13 | 14 | def join(that: Query)(pred: Code[Bool]) = Join(this, that, pred) 15 | def where(pred: Code[Bool]) = Where(this, pred) 16 | 17 | def selectStringRepr(): ToString = ToString(this) 18 | def select(c0: Field): Select[c0.T] = Select(this, c0 :: Nil) 19 | def select(c0: Field, c1: Field): Select[(c0.T,c1.T)] = Select(this, c0 :: c1 :: Nil) 20 | def select(c0: Field, c1: Field, c2: Field): Select[(c0.T,c1.T,c2.T)] = Select(this, c0 :: c1 :: c2 :: Nil) 21 | def select(c0: Field, c1: Field, c2: Field, c3: Field): Select[(c0.T,c1.T,c2.T,c3.T)] = Select(this, c0 :: c1 :: c2 :: c3 :: Nil) 22 | def select(c0: Field, c1: Field, c2: Field, c3: Field, c4: Field): Select[(c0.T,c1.T,c2.T,c3.T,c4.T)] = Select(this, c0 :: c1 :: c2 :: c3 :: c4 :: Nil) 23 | // ... 24 | 25 | def plan: QueryPlan = this match { 26 | case f @ From(r) => Scan(r.table.value, f.uid) 27 | case Where(q,p) => Filter(q.plan, p) 28 | case Select(q,cs) => Project(q.plan, cs) 29 | case Join(lhs,rhs,p) => 30 | p |> EqualityPredicate.unapply map (QueryPlan.equiJoin(lhs.plan,rhs.plan,_)) getOrElse ??? 31 | } 32 | 33 | def pushLines(consume: String => Unit, pushHeader: Bool = true) = { 34 | val fe = selectStringRepr().plan.foreach 35 | if (pushHeader) { 36 | val header = plan.rowFormat.columns.map(f => f.name+f.id.fold("")("("+_.toString+")")).mkString("|","|","|") 37 | consume(header) 38 | consume(header.map { case '|' => '|' case _ => '-' }) 39 | } 40 | fe(consume) 41 | } 42 | def printLines(out: PrintStream = System.out) = pushLines(out.println) 43 | def mkLines(sep: String = "\n"): String = { 44 | val sb = new StringBuilder 45 | pushLines(sb ++= _ alsoDo (sb ++= sep)) 46 | sb.result 47 | } 48 | 49 | } 50 | object Query { 51 | private[query] var curId = 0 52 | } 53 | 54 | case class From[R <: Relation](val rel: R) extends FieldForwarder[R](rel) with Query { 55 | val uid = Query.curId alsoDo (Query.curId += 1) 56 | override def wrapSelect[R](x: R): R = x |>=? { 57 | case f: Field => (f withId uid).asInstanceOf[R] 58 | } 59 | override def toString: String = s"From($rel){${uid}}" 60 | } 61 | 62 | case class Where(that: Query, pred: Code[Bool]) extends Query { 63 | 64 | } 65 | 66 | case class Join(lhs: Query, rhs: Query, pred: Code[Bool]) extends Query { 67 | 68 | } 69 | 70 | case class Select[T](that: Query, cols: Seq[Field]) extends Query { 71 | override def plan = super.plan.asInstanceOf[Project[T]] // TODO better expressed? 72 | } 73 | 74 | case class ToString(that: Query) extends Query { 75 | override def plan: Print = Print(that.plan) 76 | } 77 | 78 | 79 | object EqualityPredicate { 80 | def unapply(x: Code[Bool]): Option[Set[FieldRef -> FieldRef]] = x |>? { 81 | case ir"(${EqualityPredicate(lhs)}:Bool) && (${EqualityPredicate(rhs)}:Bool)" => lhs ++ rhs 82 | case ir"(${FieldRef(f0)}:$t0) equals (${FieldRef(f1)}:$t1)" => 83 | Set(f0 -> f1) 84 | } 85 | } 86 | 87 | -------------------------------------------------------------------------------- /src/main/scala/dbstage/query/QueryPlan.scala: -------------------------------------------------------------------------------- 1 | package dbstage 2 | package query 3 | 4 | import squid.utils._ 5 | import frontend._ 6 | import runtime._ 7 | import Embedding.Predef._ 8 | 9 | 10 | sealed trait QueryPlan { 11 | import rowFormat.Repr 12 | 13 | val uid = 42 // TODO rm 14 | val rowFormat: RowFormat 15 | type Row = rowFormat.Repr 16 | val taggedColumns: Seq[Field] 17 | lazy val cost: Double = ??? // TODO impl & use 18 | 19 | def push(step: Code[Row => Bool]): CrossStage[Unit] = pushImpl(step) map (_ transformWith FinalizeCode) 20 | def pushImpl(step: Code[Row => Bool]): CrossStage[Unit] = ??? 21 | 22 | def pull: CrossStage[() => (Row => Unit) => Bool] = pullImpl map (_ transformWith FinalizeCode) 23 | def pullImpl: CrossStage[() => (Row => Unit) => Bool] = ??? // TODO impl everywhere 24 | 25 | def foreachCode(f: Code[Row => Unit]) = push(ir"(x:Row) => {$f(x); true}") 26 | 27 | lazy val foreachLiftedCode: CrossStage[(Row => Unit) => Unit] = CrossStage.magic((f: Code[Row => Unit]) => foreachCode(f)) 28 | lazy val foreach = foreachLiftedCode.compile() 29 | 30 | // TODO use aggregate of Unit 31 | //def foreachCode(f: Code[Row => Unit]) = foreachLiftedCode.map(fe => ir"$fe($f)").compile() 32 | def foreachLifted(f: Code[Row => Unit]) = foreachCode(f).compile() 33 | 34 | lazy val mkPull = pull.compile 35 | def iterator = new Iterator[Row] { 36 | val curPull = mkPull()() 37 | var curElem = Option.empty[Row] 38 | def hasNext: Boolean = { 39 | loopWhile { 40 | curElem.isEmpty && curPull { e => curElem = Some(e) } 41 | } 42 | curElem.isDefined 43 | } 44 | def next(): Row = { 45 | assert(hasNext) 46 | val e = curElem.get 47 | curElem = None 48 | e 49 | } 50 | } 51 | 52 | def pull2: CrossStage[IteratorRep[Row]] = pullImpl2 map (_ transformWith FinalizeCode) 53 | def pullImpl2: CrossStage[IteratorRep[Row]] = ??? 54 | lazy val mkPull2 = pull2.compile 55 | def iterator2 = new Iterator[Row] { 56 | val curPull = mkPull2()() 57 | def hasNext: Boolean = curPull._1() 58 | def next(): Row = curPull._2() 59 | } 60 | 61 | def asIndexedOn(cols: Set[FieldRef]) = Option.empty[IndexedQueryPlan] 62 | 63 | def columnByRef(fr: FieldRef): Option[Field] = taggedColumns find { fr conformsTo _ } 64 | 65 | } 66 | object QueryPlan { 67 | def equiJoin(lhs: QueryPlan, rhs: QueryPlan, eqCols: Set[FieldRef -> FieldRef]) = { 68 | //println(s"Making equiJoin($lhs, $rhs, $eqCols)") 69 | val (lhsf,rhsf) = eqCols.iterator.map { case f0 -> f1 => 70 | (lhs columnByRef f0, lhs columnByRef f1, rhs columnByRef f0, rhs columnByRef f1) match { 71 | //case (Some(lf), None, None, Some(rf)) => f0 -> f1 72 | //case (None, Some(lf), Some(rf), None) => f1 -> f0 73 | case (Some(lf), None, None, Some(rf)) => lf -> rf 74 | case (None, Some(lf), Some(rf), None) => lf -> rf 75 | case r => 76 | println(f0,f1,r) 77 | ??? 78 | //case _ => die // otherwise: Warning:(106, 28) Exhaustivity analysis reached max recursion depth, not all missing cases are reported. 79 | } 80 | }.toSeq.unzip 81 | //println(lhsf,rhsf) 82 | lhs.asIndexedOn(lhsf.toSet) -> rhs.asIndexedOn(rhsf.toSet) //alsoApply println 83 | match { 84 | case None -> None => 85 | HashJoin(lhs, rhs, lhsf, rhsf) 86 | // TODO the right thing if the keys of one of them are the primary keys 87 | } 88 | } 89 | } 90 | 91 | case class Scan(tbl: Table, fromId: Int) extends QueryPlan { 92 | override val rowFormat: tbl.rowFmt.type = tbl.rowFmt.withId(fromId).asInstanceOf[tbl.rowFmt.type] // note: type not right 93 | val taggedColumns = rowFormat.columns map (_ withId fromId) //map (c => c.name -> c) toMap 94 | override def pushImpl(step: Code[Row => Bool]): CrossStage[Unit] = tbl.push(step) 95 | override def pullImpl: CrossStage[() => (Row => Unit) => Bool] = tbl.pull 96 | override def pullImpl2: CrossStage[IteratorRep[Row]] = tbl.pull2 97 | override def asIndexedOn(cols: Set[FieldRef]) = tbl match { 98 | case tbl: IndexedTable if tbl.keys.size == cols.size && tbl.keys.forall(c => cols.exists(_ conformsTo c)) => 99 | Some(new IndexedQueryPlan(this, tbl.keys)) 100 | case _ => 101 | None 102 | } 103 | } 104 | case class Project[T] protected(that: QueryPlan, cols: Seq[Field]) extends QueryPlan { 105 | import that.rowFormat.{Repr => ThatRepr} 106 | import rowFormat.Repr 107 | 108 | override val rowFormat = RowFormat(cols).asInstanceOf[RowFormat.Of[T]] 109 | // ^ we make the assumption that the signatures of `select` match with the format selected by `RowFormat.apply` 110 | 111 | override def pushImpl(step: Code[Row => Bool]): CrossStage[Unit] = { 112 | that.pushImpl(ir"(${that.rowFormat.lift(rowFormat.mkRefs,uid)}) andThen $step") 113 | } 114 | override def pullImpl: CrossStage[() => (Row => Unit) => Bool] = 115 | //that.pullImpl.map(p => ir"(k:Row => Unit) => $p(r => k(${that.rowFormat.lift(rowFormat.mkRefs,uid)}(r)))") 116 | that.pullImpl.map(p => ir"() => {val p = $p(); (k:Row => Unit) => p(r => k(${that.rowFormat.lift(rowFormat.mkRefs,uid)}(r)))}") 117 | override def pullImpl2: CrossStage[IteratorRep[Row]] = 118 | that.pullImpl2.map(p => ir"() => { val p = $p(); val hn = p._1; val ne = p._2; hn -> (() => ${that.rowFormat.lift(rowFormat.mkRefs,uid)}(ne())) }") 119 | 120 | override val taggedColumns = cols // TODO check no name clashes... 121 | } 122 | case class Filter(that: QueryPlan, pred: Code[Bool]) extends QueryPlan { 123 | override val rowFormat: that.rowFormat.type = that.rowFormat 124 | import rowFormat.Repr 125 | 126 | override def pushImpl(step: Code[Row => Bool]): CrossStage[Unit] = 127 | //that.push(ir"(x:Row) => !${rowFormat.lift(pred,uid)}(x) || $step(x)") 128 | that.pushImpl(ir"(x:Row) => if (${rowFormat.lift(pred,uid)}(x)) $step(x) else true") 129 | 130 | // TODO 131 | override def pullImpl: CrossStage[() => (Row => Unit) => Bool] = 132 | //that.pullImpl.map(p => ir"(k:Row => Unit) => while (!${rowFormat.lift(pred,uid)}(x)) $p") 133 | //that.pullImpl.map(p => ir"val p = $p; () => (k:Row => Unit) => loopWhile { val e = p(); !${rowFormat.lift(pred,uid)}(e) }") 134 | // 135 | //that.pullImpl.map(p => ir"val p = $p(); () => (k:Row => Unit) => loopWhile { p(e => !${rowFormat.lift(pred,uid)}(e)) }") 136 | that.pullImpl.map(p => ir"val p = $p(); () => (k:Row => Unit) => p(e => if (${rowFormat.lift(pred,uid)}(e)) k(e))") 137 | 138 | override def pullImpl2: CrossStage[IteratorRep[Row]] = that.pullImpl2.map(p => 139 | // Note: Using an Option variable that will be removed by the VarFlattening pass 140 | ir"""() => { 141 | val p = $p() 142 | //(() => p._1(), () => p._2()) 143 | var cur = Option.empty[Row] 144 | val hn = () => { 145 | while (cur.isEmpty && p._1()) { 146 | val next = p._2() 147 | if (${rowFormat.lift(pred,uid)}(next)) cur = Some(next) 148 | } 149 | cur.isDefined 150 | } 151 | val ne = () => { 152 | val res = cur.get 153 | cur = None 154 | res 155 | } 156 | hn -> ne 157 | }""") 158 | 159 | override def asIndexedOn(cols: Set[FieldRef]) = that.asIndexedOn(cols) 160 | override val taggedColumns = that.taggedColumns 161 | } 162 | abstract class JoinPlan(val lhs: QueryPlan, val rhs: QueryPlan) extends QueryPlan { 163 | override val rowFormat: CompositeFormat[lhs.rowFormat.type,rhs.rowFormat.type] = CompositeFormat(lhs.rowFormat, rhs.rowFormat) 164 | } 165 | case class NestedLoopJoin(override val lhs: QueryPlan, override val rhs: QueryPlan, pred: Code[Bool]) extends JoinPlan(lhs,rhs) { 166 | val taggedColumns = ??? 167 | } 168 | case class IndexedJoin(lhs: IndexedTable, rhs: QueryPlan, rhsHash: Seq[Field]) extends QueryPlan { 169 | // TODO 170 | val rowFormat: RowFormat = ??? 171 | val taggedColumns = ??? 172 | } 173 | case class HashJoin(override val lhs: QueryPlan, override val rhs: QueryPlan, lhsHash: Seq[Field], rhsHash: Seq[Field]) extends JoinPlan(lhs,rhs) { 174 | val taggedColumns = lhs.taggedColumns ++ rhs.taggedColumns 175 | 176 | override def pushImpl(step: Code[Row => Bool]): CrossStage[Unit] = { 177 | val lkeys = RowFormat(lhsHash) 178 | val lvals: lhs.rowFormat.type = lhs.rowFormat 179 | val rkeys = RowFormat(rhsHash).asInstanceOf[RowFormat{type Repr = lkeys.Repr}] 180 | val rvals: rhs.rowFormat.type = rhs.rowFormat 181 | import lkeys.{Repr => LKeys}, lvals.{Repr => LVals}, rvals.{Repr => RVals} 182 | type RKeys = LKeys // < ^ no need for import rkeys.{Repr => RKeys} as it is the same as LKeys 183 | import Embedding.Quasicodes._ 184 | import scala.collection.mutable.{HashMap,HashSet} 185 | 186 | val r = for { 187 | lfe <- lhs.foreachLiftedCode 188 | rfe <- rhs.foreachLiftedCode 189 | } yield ir{ 190 | //val hashTable = new HashMap[LKeys,HashSet[LVals]]() // FIXME not yet handled by LogicFlowNormalizer 191 | val hashTable = HashMap[LKeys,HashSet[LVals]]() 192 | ${lfe} { r => 193 | val k = ${lvals.lift(lkeys.mkRefs,uid)}(r) 194 | //println(s"Adding $k -> $r"); 195 | hashTable.getOrElseUpdate(k,HashSet()) += r 196 | } 197 | ${rfe} { r => 198 | //println(r) 199 | hashTable.get(${rvals.lift(rkeys.mkRefs,uid)}(r)) 200 | .foreach(ts => ts.foreach { t => 201 | //println(s"Found $t -> $r"); 202 | ${step}(t -> r) 203 | }) 204 | } 205 | } 206 | r 207 | } 208 | 209 | } 210 | 211 | case class Print(that: QueryPlan) extends QueryPlan { 212 | import that.rowFormat.Repr 213 | override val rowFormat = SingleColumnFormat[String]("StringRep") 214 | override def pushImpl(step: Code[Row => Bool]): CrossStage[Unit] = { 215 | val sep = Const("|") 216 | var colsToString = sep 217 | for (c <- that.taggedColumns) colsToString = { 218 | import c._ 219 | ir"""$colsToString + ${SerialT.unparse}($toCode) + $sep""" 220 | } 221 | that.pushImpl(ir"(x:that.Row) => $step(${that.rowFormat.lift(colsToString,uid)}(x))") 222 | } 223 | override val taggedColumns = rowFormat.col :: Nil 224 | } 225 | 226 | 227 | class IndexedQueryPlan(that: QueryPlan, keys: Seq[Field]) extends QueryPlan { 228 | // TODO 229 | val rowFormat: RowFormat = ??? 230 | val taggedColumns = ??? 231 | } 232 | 233 | -------------------------------------------------------------------------------- /src/main/scala/dbstage/query/package.scala: -------------------------------------------------------------------------------- 1 | package dbstage 2 | 3 | import squid.utils._ 4 | import frontend._ 5 | import runtime._ 6 | import Embedding.Predef._ 7 | 8 | package object query { 9 | 10 | def from(rel: Relation): From[rel.type] = new From(rel) 11 | 12 | } 13 | -------------------------------------------------------------------------------- /src/main/scala/dbstage/runtime/Table.scala: -------------------------------------------------------------------------------- 1 | package dbstage 2 | package runtime 3 | 4 | import scala.collection.mutable 5 | import scala.io._ 6 | import squid.utils._ 7 | import Embedding.Predef._ 8 | import frontend._ 9 | import squid.lib.transparencyPropagating 10 | 11 | 12 | abstract class FieldReifier { 13 | def apply(f: FieldRef): Code[f.T] 14 | } 15 | trait RowFormat { thisRow => 16 | type Repr 17 | implicit val Repr: IRType[Repr] 18 | val columns: Seq[Field] 19 | 20 | val parse: Map[String, Code[String]] => Code[Repr] 21 | protected def getField(f:FieldRef) = columns.find(f conformsTo _).fold(throw new Exception(s"No column '${f}' in $this")) { c => // TODO B/E 22 | assert(c.IRTypeT <:< f.IRTypeT) 23 | c 24 | } 25 | def get(repr:Embedding.Rep,f:FieldRef): Embedding.Rep 26 | def lift2[T](fr: FieldReifier => Code[T]): Code[Repr => T] = { 27 | val repr = Embedding.bindVal(columns.map(_.name).mkString("_"),Repr.rep,Nil) 28 | Embedding.IR(Embedding.lambda(repr::Nil, 29 | fr(new FieldReifier { 30 | def apply(f: FieldRef) = Embedding.IR(get(repr|>Embedding.readVal,f)) 31 | }).rep 32 | )) 33 | } 34 | def lift[T](q: Code[T], uid: Int): Code[Repr => T] = { 35 | lift2 { fr => 36 | val r = q rewrite { case ir"${FieldRef(f)}:Any" => fr(f).asClosedIR } // note: Any, unsound 37 | //println("Lifting: "+q) 38 | //println("Lifted : "+r) 39 | r 40 | } 41 | } 42 | 43 | // TODO implement everywhere: 44 | def mkRefs: Code[Repr] = ??? 45 | def runtimeReprOf(values: Any*): Repr = ??? 46 | 47 | def withId(id: Int): RowFormat{type Repr = thisRow.Repr} 48 | 49 | override def toString = s"Row[${Repr.rep}](${columns mkString ","})" 50 | } 51 | object RowFormat { 52 | def apply(cols: Seq[Field]): RowFormat = { 53 | val size = cols.size 54 | if (size == 0) lastWords("Table with no columns") 55 | if (size == 1) new SingleColumnFormat(cols.head) 56 | else if (size > MAX_SCALA_TUPLE_ARITY) { 57 | val t = cols.take(size/2) 58 | CompositeFormat(RowFormat(t), RowFormat(cols.drop(t.size))) 59 | } 60 | else new TupleFormat(cols) 61 | } 62 | type Tuple[T] = TupleFormat{type Repr = T} 63 | type Of[T] = RowFormat{type Repr = T} 64 | } 65 | class SingleColumnFormat(val col: Field) extends RowFormat { 66 | type Repr = col.T 67 | implicit val Repr = col.IRTypeT // Note: removing `implicit` creates a compiler crash... 68 | val columns = col :: Nil 69 | val parse = (cs: Map[String, Code[String]]) => ir"${col.SerialT.parse}(${cs(col.name)})" 70 | def get(repr:Embedding.Rep,f:FieldRef): Embedding.Rep = { getField(f); repr } 71 | override def mkRefs: Code[Repr] = col.toCode 72 | def withId(id: Int) = { 73 | import col.SerialT 74 | SingleColumnFormat[Repr](col.name, Some(id)) 75 | } 76 | override def runtimeReprOf(values: Any*): Repr = { 77 | //values |> { case Seq(v) => v } 78 | assert(values.size === 1) 79 | values.head.asInstanceOf[Repr] 80 | } 81 | } 82 | object SingleColumnFormat { 83 | def apply[S:IRType:Serial](name: String, id: Option[Int] = None) = { 84 | val c: Field{type T = S} = Field[S](name, id) 85 | new SingleColumnFormat(c) { 86 | override val col: Field{type T = S} = c 87 | } 88 | } 89 | } 90 | case class TupleFormat(columns: Seq[Field]) extends RowFormat { thisFmt => 91 | val size = columns.size 92 | require(size > 0 && size <= MAX_SCALA_TUPLE_ARITY) 93 | val clsSym = base.loadTypSymbol(s"scala.Tuple${size}") 94 | val objSym = base.loadTypSymbol(s"scala.Tuple${size}$$") 95 | val mtd = base.loadMtdSymbol(objSym, "apply", None) 96 | val typs = columns.map(_.IRTypeT.rep).toList 97 | val typ = base.staticTypeApp(clsSym, typs) 98 | implicit val Repr = base.IRType[Repr](typ) 99 | val parse = { 100 | import base._ 101 | (cs: Map[String, Code[String]]) => { 102 | IR(methodApp(staticModule(s"scala.Tuple${size}"), mtd, typs, Args(columns map (c => 103 | ir"${c.SerialT.parse}(${cs(c.name)}):${c.IRTypeT}".rep): _*)::Nil, typ)) // the ascription ${c.IRTypeT} is to prevent the QQ from using a TypeTag 104 | } 105 | } 106 | def get(repr:Embedding.Rep,f:FieldRef): Embedding.Rep = { 107 | val c = getField(f) 108 | base.methodApp(repr,base.loadMtdSymbol(clsSym, "_" + (columns.indexWhere(c conformsTo _)+1), None),Nil,Nil,c.IRTypeT.rep) 109 | } 110 | override def mkRefs: Code[Repr] = { // TODO factor with parse 111 | import base._ 112 | IR(methodApp(staticModule(s"scala.Tuple${size}"), mtd, typs, Args(columns map (c => c.toCode.rep) : _*)::Nil, typ)) 113 | } 114 | def mk(xs:Code[Any]*): Code[Repr] = { // TODO factor with parse 115 | import base._ 116 | IR(methodApp(staticModule(s"scala.Tuple${size}"), mtd, typs, Args(xs map (_.rep) : _*)::Nil, typ)) 117 | } 118 | def withId(id: Int) = TupleFormat(columns map (_ withId id)).asInstanceOf[TupleFormat{type Repr = thisFmt.Repr}] 119 | override def runtimeReprOf(values: Any*): Repr = { 120 | assert(values.size === size) 121 | (values match { 122 | case Seq(v0,v1) => (v0,v1) 123 | case Seq(v0,v1,v2) => (v0,v1,v2) 124 | case Seq(v0,v1,v2,v3) => (v0,v1,v2,v3) 125 | case Seq(v0,v1,v2,v3,v4) => (v0,v1,v2,v3,v4) 126 | case Seq(v0,v1,v2,v3,v4,v5) => (v0,v1,v2,v3,v4,v5) 127 | case Seq(v0,v1,v2,v3,v4,v5,v6) => (v0,v1,v2,v3,v4,v5,v6) 128 | case Seq(v0,v1,v2,v3,v4,v5,v6,v7) => (v0,v1,v2,v3,v4,v5,v6,v7) 129 | case Seq(v0,v1,v2,v3,v4,v5,v6,v7,v8) => (v0,v1,v2,v3,v4,v5,v6,v7,v8) 130 | case Seq(v0,v1,v2,v3,v4,v5,v6,v7,v8,v9) => (v0,v1,v2,v3,v4,v5,v6,v7,v8,v9) 131 | case Seq(v0,v1,v2,v3,v4,v5,v6,v7,v8,v9,v10) => (v0,v1,v2,v3,v4,v5,v6,v7,v8,v9,v10) 132 | case _ => lastWords(s"Tuple arity not yet supported: $size, for: ${values}") 133 | }).asInstanceOf[Repr] 134 | } 135 | } 136 | case class CompositeFormat[L<:RowFormat,R<:RowFormat](val lhs: L, val rhs: R) extends RowFormat { // TODO make abstract class/trait? 137 | import lhs.{Repr=>LR}, rhs.{Repr=>RR} 138 | type Repr = (lhs.Repr, rhs.Repr) 139 | lazy val Repr = irTypeOf[(LR,RR)] 140 | val columns = lhs.columns ++ rhs.columns 141 | val parse = (cs: Map[String, Code[String]]) => ir"(${lhs.parse(cs)}, ${rhs.parse(cs)})" 142 | def get(repr:Embedding.Rep,f:FieldRef): Embedding.Rep = { 143 | if (lhs.columns.exists(f conformsTo _)) 144 | lhs.get(ir"${Embedding.IR[Repr,Any](repr)}._1".rep,f) 145 | else { 146 | assert(rhs.columns.exists(f conformsTo _), s"Field ref $f is not in $this") 147 | rhs.get(ir"${Embedding.IR[Repr,Any](repr)}._2".rep,f) 148 | } //alsoApply (println(s"Getting $f $lhs $rhs -> "+_)) 149 | } 150 | def withId(id: Int) = CompositeFormat(lhs withId id, rhs withId id) 151 | } 152 | 153 | trait Table { 154 | val rowFmt: RowFormat 155 | def mkDataLoader(sep: Char): CrossStage[Iterator[String] => Unit] 156 | def loadData(data: Iterator[String], sep: Char = '|'): Unit = { 157 | val pgrm = mkDataLoader(sep) 158 | println(s"Generated Program: $pgrm") 159 | pgrm.compile()(data) 160 | } 161 | // TODO: implement these everywhere: 162 | def push(cont: Code[rowFmt.Repr => Bool]): CrossStage[Unit] = ??? 163 | def pull: CrossStage[() => (rowFmt.Repr => Unit) => Bool] = ??? 164 | def pull2: CrossStage[IteratorRep[rowFmt.Repr]] = ??? 165 | } 166 | object Table { 167 | def apply(cols: Seq[Field]): Table = new PlainTable(cols,0) 168 | } 169 | abstract class SimpleTable extends Table { // TODO merge into Table 170 | def mkEntryLoader(sep: Char): CrossStage[String => Unit] 171 | def mkDataLoader(sep: Char): CrossStage[Iterator[String] => Unit] = 172 | mkEntryLoader(sep) map (el => ir"(ite: Iterator[String]) => while (ite.hasNext) { val str = ite.next; ${el}(str) }") 173 | } 174 | class PlainTable(val cols: Seq[Field], idxShift: Int) extends SimpleTable { 175 | val rowFmt: RowFormat = RowFormat(cols) 176 | import rowFmt.{Repr => Val} 177 | val buffer = mutable.ArrayBuffer[Val]() 178 | private[this] val arr: Code[Array[String]] = ir"arr?:Array[String]" 179 | private[this] val colMap = cols.map(_.name).zipWithIndex.toMap.mapValues(i => ir"$arr(${Const(i+idxShift)})") // TODO factor with IndexedTable 180 | 181 | def mkEntryLoader(sep: Char): CrossStage[String => Unit] = CrossStage(buffer)(buf => 182 | ir"(str: String) => { val arr = str.split(${Const(sep)}); $buf += ${rowFmt.parse(colMap):IR[Val,{val arr:Array[String]}]}; () }") 183 | 184 | override def push(cont: Code[rowFmt.Repr => Bool]): CrossStage[Unit] = CrossStage(buffer){ buf => ir"$buf.foreach($cont)" } 185 | 186 | override def pull: CrossStage[() => (rowFmt.Repr => Unit) => Bool] = CrossStage(buffer){ buf => 187 | ir"val it = $buf.iterator; () => (k:rowFmt.Repr => Unit) => if (it.hasNext) { k(it.next); true } else false" } 188 | override def pull2: CrossStage[IteratorRep[rowFmt.Repr]] = CrossStage(buffer){ buf => 189 | ir"val it = $buf.iterator; () => (it.hasNext _) -> (it.next _)" } 190 | } 191 | class SingleColumnTable(val col: Field, idxShift: Int) extends PlainTable(col::Nil, idxShift) { 192 | override val rowFmt = new SingleColumnFormat(col) 193 | } 194 | 195 | trait IndexedTable extends Table { 196 | val keys: Seq[Field] 197 | val values: Seq[Field] 198 | val order: Seq[String] 199 | val keyFmt: RowFormat = RowFormat(keys) // Note: using TupleFormat here may generate references to Tuple1... not sure how that's handled by Scalac 200 | val valFmt: RowFormat = RowFormat(values) 201 | val rowFmt: CompositeFormat[keyFmt.type,valFmt.type] = CompositeFormat(keyFmt,valFmt) 202 | private[this] val arr: Code[Array[String]] = ir"arr?:Array[String]" 203 | private[this] val colMap = order.zipWithIndex.toMap.mapValues(name => ir"$arr(${Const(name)})") 204 | val kparser = keyFmt.parse(colMap) 205 | val vparser = valFmt.parse(colMap) 206 | } 207 | case class GeneralIndexedTable(keys: Seq[Field], values: Seq[Field], order: Seq[String]) extends IndexedTable { 208 | import keyFmt.{Repr=>Key} 209 | import valFmt.{Repr=>Val} 210 | val hashTable = mutable.HashMap[Key,mutable.Set[Val]]() 211 | def mkDataLoader(sep: Char): CrossStage[Iterator[String] => Unit] = { 212 | CrossStage(hashTable) { ht => 213 | ir"""(ite: Iterator[String]) => 214 | while (ite.hasNext) { 215 | val str = ite.next 216 | val arr = str.split(${Const(sep)}) 217 | //println(">"+arr.toList) 218 | $ht.getOrElseUpdate(${kparser:IR[Key,{val arr:Array[String]}]},mutable.Set()) += ${vparser:IR[Val,{val arr:Array[String]}]} 219 | } 220 | """ 221 | } 222 | } 223 | } 224 | case class UniqueIndexedTable(keys: Seq[Field], values: Seq[Field], order: Seq[String]) extends IndexedTable { 225 | import keyFmt.{Repr=>Key} 226 | import valFmt.{Repr=>Val} 227 | 228 | val hashTable = mutable.HashMap[Key,Val]() 229 | 230 | def mkDataLoader(sep: Char): CrossStage[Iterator[String] => Unit] = { 231 | CrossStage(hashTable) { ht => 232 | ir"""(ite: Iterator[String]) => 233 | while (ite.hasNext) { 234 | val str = ite.next 235 | val arr = str.split(${Const(sep)}) 236 | //println(">"+arr.toList) 237 | $ht += (${kparser:IR[Key,{val arr:Array[String]}]} -> ${vparser:IR[Val,{val arr:Array[String]}]}) 238 | } 239 | """ 240 | } 241 | } 242 | 243 | override def push(cont: Code[rowFmt.Repr => Bool]): CrossStage[Unit] = CrossStage(hashTable) { ht => 244 | ir"""val it = $ht.iterator; loopWhile { it.hasNext && { val kv = it.next; $cont(kv._1->kv._2) } }""" 245 | } 246 | 247 | } 248 | 249 | case class ColumnStore(val cols: Seq[Field]) extends SimpleTable { 250 | val rowFmt: TupleFormat = TupleFormat(cols) 251 | val stores = cols.zipWithIndex map {case (c,i) => new SingleColumnTable(c,i)} 252 | def mkEntryLoader(sep: Char): CrossStage[String => Unit] = 253 | (stores.map(_.mkEntryLoader(sep)).fold(CrossStage(())(_ => ir"(str:String) => ()")) { 254 | case (acc, dl) => 255 | for { 256 | a <- acc 257 | d <- dl 258 | } yield ir"(str:String) => {$a(str); $d(str)}" 259 | }) //alsoApply println 260 | 261 | override def push(cont: Code[rowFmt.Repr => Bool]): CrossStage[Unit] = { 262 | import Embedding.{hole,IR} 263 | import scala.collection.mutable.ArrayBuffer 264 | import ColumnStore.placeHolder 265 | 266 | val s = stores.map { case sct => 267 | import sct.rowFmt.Repr 268 | CrossStage(sct.buffer){ buf => ir"$buf(idx?:Int)" } 269 | }.foldLeft((ls:List[Code[Any]]) => CrossStage()(rowFmt.mk(ls:_*))) { 270 | case (f, cs) => (ls:List[Code[Any]]) => cs.flatMap(c => f(c :: ls)) 271 | } 272 | s(Nil) flatMap { tup => 273 | CrossStage(stores.head.buffer.size) { len => 274 | ir"var i = 0; loopWhile { val idx = i; i = idx+1; idx < $len && $cont(${tup:IR[rowFmt.Repr,{val idx:Int}]}) }" 275 | } 276 | } 277 | 278 | } 279 | 280 | } 281 | object ColumnStore { 282 | @transparencyPropagating def placeHolder[T](id: Int): T = ??? 283 | } 284 | 285 | 286 | // TODO Table stored as hashmaps, compressed arrays of bits, etc 287 | 288 | // TODO: 289 | //class CompressedTable extends Table[Int] { 290 | //} 291 | 292 | 293 | 294 | /** Class used to associate values in the current stage with pieces of code that will make use of them when run, later on. 295 | * We provide all the composition primitives necessary to build programs with these. */ 296 | abstract class CrossStage[A:IRType](val values: Seq[base.Val -> AnyRef]) { thisCS => 297 | type Ctx 298 | val code: IR[A,Ctx] 299 | 300 | def map[B:IRType](f: Code[A] => Code[B]): CrossStage[B] = { 301 | new CrossStage[B](thisCS.values) { 302 | val code = f(thisCS.code).asClosedIR 303 | } 304 | } 305 | def flatMap[B:IRType](f: Code[A] => CrossStage[B]): CrossStage[B] = { 306 | val cs = f(code) 307 | new CrossStage[B](values ++ cs.values) { 308 | type Ctx = thisCS.Ctx with cs.Ctx 309 | val code = cs.code 310 | } 311 | } 312 | lazy val valuesAndNames = values.zipWithIndex.map { case (vx, i) => (vx, s"cs$i") } 313 | lazy val fmt = RowFormat(valuesAndNames.map { case (v -> _, n) => Field(n)(base.IRType(v.typ),null) }) 314 | lazy val vals = fmt.runtimeReprOf(values.map(_._2):_*) 315 | lazy val body = valuesAndNames.foldLeft(code.rep) { 316 | case (newCode, (v -> x, n)) => base.letin(v, ir"field[${base.IRType(v.typ)}](${Const(n)})".rep, newCode, code.typ.rep) 317 | } 318 | lazy val mkFun: Code[fmt.Repr => A] = fmt.lift(base.IR(body),0) 319 | lazy val compile: () => A = { 320 | val f = fmt.lift(base.IR(body),0).asClosedIR.compile.asInstanceOf[Any => A] 321 | () => f(vals) 322 | } 323 | def run: A = mkFun.asClosedIR.run.apply(vals) 324 | override def toString: String = { 325 | val map = values.toMap 326 | val c = base.bottomUpPartial(code.rep) { 327 | case base.RepDef(bv: base.BoundVal) if map isDefinedAt bv => base.hole(s"cs${values.indexWhere(_._1 == bv)}", bv.typ) 328 | } 329 | s"${base.showRep(c)}\n\twhere: ${values.zipWithIndex map {case (kv,i) => s"cs${i} = ${CrossStage.showObject(kv._2)}; "} mkString}" 330 | // ^ Note: weirdly, when writing `${kv._2|>CrossStage.showObject}` above we get a strange, different result. 331 | } 332 | } 333 | object CrossStage { 334 | def apply[R:IRType]()(code0: Code[R]): CrossStage[R] = new CrossStage[R](Nil) { 335 | val code = code0.asClosedIR 336 | } 337 | def apply[T0:IRType,R:IRType](value0: T0)(codeFun: Code[T0] => Code[R]): CrossStage[R] = { 338 | val v = base.bindVal("cs", typeRepOf[T0], Nil) 339 | new CrossStage[R](v -> value0.asInstanceOf[AnyRef] :: Nil) { 340 | val code = codeFun(base.IR(v |> base.readVal)).asClosedIR 341 | } 342 | } 343 | def apply[T0:IRType,T1:IRType,R:IRType](value0: T0, value1: T1)(code: (Code[T0],Code[T1]) => Code[R]): CrossStage[R] = ??? 344 | def magic[A:IRType,B:IRType](f: Code[A] => CrossStage[B]): CrossStage[A => B] = { 345 | val cs = f(ir"a?:A") // FIXME: this probably has hygiene problems 346 | new CrossStage[A => B](cs.values)(irTypeOf[A=>B]) { // Q: why is it necessary to provide the implicit?! even `implicitly[IRType[A=>B]]` works! 347 | val code = ir"(a:A) => ${cs.code:IR[B,{val a:A}]}" 348 | } 349 | } 350 | def showObject(x:AnyRef) = s"${x.getClass.getName} @ 0x${System.identityHashCode(x).toLong.toHexString}" 351 | } 352 | 353 | 354 | -------------------------------------------------------------------------------- /src/test/scala/Basics.scala: -------------------------------------------------------------------------------- 1 | package dbstage 2 | import example.{Person => PersonClass, _} 3 | 4 | import Embedding.Predef._ 5 | import squid.utils._ 6 | import frontend._ 7 | import query._ 8 | 9 | import org.scalatest.FunSuite 10 | 11 | class Basics extends FunSuite { 12 | 13 | def sameLines(q: Query)(model: String*) = { 14 | val modSet = model.toSet 15 | q.pushLines({ line => 16 | assert(modSet(line), s"$line not in $modSet") 17 | }, pushHeader = false) 18 | } 19 | 20 | def Filtering(P: PersonClass) = { 21 | import P._ 22 | 23 | P.loadDataFromFile("data/persons.csv", compileCode = false) 24 | 25 | val q0 = from(P) where ir"$Age > 18" where ir"$Sex == Male" select (Name,Age) 26 | sameLines(q0)( 27 | "|john smith|23|", 28 | "|bob parker|41|", 29 | "|hugh carper|67|") 30 | 31 | } 32 | 33 | test("Filtering") { 34 | case object P extends PersonClass 35 | P.indexByKeys = false 36 | 37 | Filtering(P) 38 | 39 | } 40 | 41 | test("Filtering IBK") { 42 | case object P extends PersonClass 43 | P.indexByKeys = true 44 | 45 | Filtering(P) 46 | 47 | } 48 | 49 | test("Filtering CS") { 50 | case object P extends PersonClass 51 | P.indexByKeys = false 52 | P.columnStore = true 53 | 54 | Filtering(P) 55 | 56 | } 57 | 58 | def Joining(P: PersonClass) = { 59 | import P._ 60 | 61 | P.loadDataFromFile("data/persons.csv", compileCode = false) 62 | 63 | val m = from(P) 64 | val f = from(P) 65 | val q = ((m where ir"$Sex == Male") join (f where ir"$Sex == Female"))(ir"${m.Age} == ${f.Age}") select (m.Age, m.Name, f.Name, m.Id, f.Id) 66 | sameLines(q)( 67 | "|41|bob parker|julia kenn|1|6|", 68 | "|7|toto ronto|derpita derpa|5|7|") 69 | 70 | } 71 | 72 | test("Joining") { 73 | case object P extends PersonClass 74 | P.indexByKeys = false 75 | 76 | Joining(P) 77 | 78 | } 79 | 80 | test("Joining IBK") { 81 | case object P extends PersonClass 82 | P.indexByKeys = true 83 | 84 | Joining(P) 85 | 86 | } 87 | 88 | test("Joining CS") { 89 | case object P extends PersonClass 90 | P.indexByKeys = false 91 | P.columnStore = true 92 | 93 | Joining(P) 94 | 95 | } 96 | 97 | 98 | 99 | } 100 | --------------------------------------------------------------------------------