{
16 | /**
17 | * {@inheritDoc}
18 | *
19 | * The default implementation returns the result of calling
20 | * {@link #visitChildren} on {@code ctx}.
21 | */
22 | @Override public T visitQuery(SiftParser.QueryContext ctx) { return visitChildren(ctx); }
23 | /**
24 | * {@inheritDoc}
25 | *
26 | * The default implementation returns the result of calling
27 | * {@link #visitChildren} on {@code ctx}.
28 | */
29 | @Override public T visitRelId(SiftParser.RelIdContext ctx) { return visitChildren(ctx); }
30 | /**
31 | * {@inheritDoc}
32 | *
33 | * The default implementation returns the result of calling
34 | * {@link #visitChildren} on {@code ctx}.
35 | */
36 | @Override public T visitRelSubquery(SiftParser.RelSubqueryContext ctx) { return visitChildren(ctx); }
37 | /**
38 | * {@inheritDoc}
39 | *
40 | * The default implementation returns the result of calling
41 | * {@link #visitChildren} on {@code ctx}.
42 | */
43 | @Override public T visitRelBagOp(SiftParser.RelBagOpContext ctx) { return visitChildren(ctx); }
44 | /**
45 | * {@inheritDoc}
46 | *
47 | * The default implementation returns the result of calling
48 | * {@link #visitChildren} on {@code ctx}.
49 | */
50 | @Override public T visitRelJoin(SiftParser.RelJoinContext ctx) { return visitChildren(ctx); }
51 | /**
52 | * {@inheritDoc}
53 | *
54 | * The default implementation returns the result of calling
55 | * {@link #visitChildren} on {@code ctx}.
56 | */
57 | @Override public T visitTransform(SiftParser.TransformContext ctx) { return visitChildren(ctx); }
58 | /**
59 | * {@inheritDoc}
60 | *
61 | * The default implementation returns the result of calling
62 | * {@link #visitChildren} on {@code ctx}.
63 | */
64 | @Override public T visitSelect(SiftParser.SelectContext ctx) { return visitChildren(ctx); }
65 | /**
66 | * {@inheritDoc}
67 | *
68 | * The default implementation returns the result of calling
69 | * {@link #visitChildren} on {@code ctx}.
70 | */
71 | @Override public T visitProject(SiftParser.ProjectContext ctx) { return visitChildren(ctx); }
72 | /**
73 | * {@inheritDoc}
74 | *
75 | * The default implementation returns the result of calling
76 | * {@link #visitChildren} on {@code ctx}.
77 | */
78 | @Override public T visitGroup(SiftParser.GroupContext ctx) { return visitChildren(ctx); }
79 | /**
80 | * {@inheritDoc}
81 | *
82 | * The default implementation returns the result of calling
83 | * {@link #visitChildren} on {@code ctx}.
84 | */
85 | @Override public T visitSort(SiftParser.SortContext ctx) { return visitChildren(ctx); }
86 | /**
87 | * {@inheritDoc}
88 | *
89 | * The default implementation returns the result of calling
90 | * {@link #visitChildren} on {@code ctx}.
91 | */
92 | @Override public T visitLimit(SiftParser.LimitContext ctx) { return visitChildren(ctx); }
93 | /**
94 | * {@inheritDoc}
95 | *
96 | * The default implementation returns the result of calling
97 | * {@link #visitChildren} on {@code ctx}.
98 | */
99 | @Override public T visitDistinct(SiftParser.DistinctContext ctx) { return visitChildren(ctx); }
100 | /**
101 | * {@inheritDoc}
102 | *
103 | * The default implementation returns the result of calling
104 | * {@link #visitChildren} on {@code ctx}.
105 | */
106 | @Override public T visitIdentExpr(SiftParser.IdentExprContext ctx) { return visitChildren(ctx); }
107 | /**
108 | * {@inheritDoc}
109 | *
110 | * The default implementation returns the result of calling
111 | * {@link #visitChildren} on {@code ctx}.
112 | */
113 | @Override public T visitFuncExpr(SiftParser.FuncExprContext ctx) { return visitChildren(ctx); }
114 | /**
115 | * {@inheritDoc}
116 | *
117 | * The default implementation returns the result of calling
118 | * {@link #visitChildren} on {@code ctx}.
119 | */
120 | @Override public T visitIntLitExpr(SiftParser.IntLitExprContext ctx) { return visitChildren(ctx); }
121 | /**
122 | * {@inheritDoc}
123 | *
124 | * The default implementation returns the result of calling
125 | * {@link #visitChildren} on {@code ctx}.
126 | */
127 | @Override public T visitStringLitExpr(SiftParser.StringLitExprContext ctx) { return visitChildren(ctx); }
128 | /**
129 | * {@inheritDoc}
130 | *
131 | * The default implementation returns the result of calling
132 | * {@link #visitChildren} on {@code ctx}.
133 | */
134 | @Override public T visitSubExpr(SiftParser.SubExprContext ctx) { return visitChildren(ctx); }
135 | /**
136 | * {@inheritDoc}
137 | *
138 | * The default implementation returns the result of calling
139 | * {@link #visitChildren} on {@code ctx}.
140 | */
141 | @Override public T visitBoolExpr(SiftParser.BoolExprContext ctx) { return visitChildren(ctx); }
142 | /**
143 | * {@inheritDoc}
144 | *
145 | * The default implementation returns the result of calling
146 | * {@link #visitChildren} on {@code ctx}.
147 | */
148 | @Override public T visitProjMap(SiftParser.ProjMapContext ctx) { return visitChildren(ctx); }
149 | /**
150 | * {@inheritDoc}
151 | *
152 | * The default implementation returns the result of calling
153 | * {@link #visitChildren} on {@code ctx}.
154 | */
155 | @Override public T visitProjIdent(SiftParser.ProjIdentContext ctx) { return visitChildren(ctx); }
156 | /**
157 | * {@inheritDoc}
158 | *
159 | * The default implementation returns the result of calling
160 | * {@link #visitChildren} on {@code ctx}.
161 | */
162 | @Override public T visitAgg(SiftParser.AggContext ctx) { return visitChildren(ctx); }
163 | /**
164 | * {@inheritDoc}
165 | *
166 | * The default implementation returns the result of calling
167 | * {@link #visitChildren} on {@code ctx}.
168 | */
169 | @Override public T visitAlias(SiftParser.AliasContext ctx) { return visitChildren(ctx); }
170 | /**
171 | * {@inheritDoc}
172 | *
173 | * The default implementation returns the result of calling
174 | * {@link #visitChildren} on {@code ctx}.
175 | */
176 | @Override public T visitIds(SiftParser.IdsContext ctx) { return visitChildren(ctx); }
177 | }
--------------------------------------------------------------------------------
/src/main/java/com/rchowell/sift/language/v0/antlr/SiftLexer.tokens:
--------------------------------------------------------------------------------
1 | PIPE=1
2 | MAPS=2
3 | LP=3
4 | RP=4
5 | COMMA=5
6 | SQUOTE=6
7 | EQ=7
8 | GT=8
9 | LT=9
10 | GTE=10
11 | LTE=11
12 | AND=12
13 | OR=13
14 | PLUS=14
15 | MINUS=15
16 | MULT=16
17 | DIV=17
18 | MOD=18
19 | MIN=19
20 | MAX=20
21 | SUM=21
22 | AVG=22
23 | COUNT=23
24 | SELECT=24
25 | PROJECT=25
26 | GROUP=26
27 | SORT=27
28 | LIMIT=28
29 | DISTINCT=29
30 | ON=30
31 | AS=31
32 | BY=32
33 | OUTER=33
34 | LEFT=34
35 | RIGHT=35
36 | ASC=36
37 | DESC=37
38 | TRUE=38
39 | FALSE=39
40 | JOIN=40
41 | CROSS=41
42 | UNION=42
43 | DIFF=43
44 | INTERSECT=44
45 | STRING=45
46 | INT=46
47 | WS=47
48 | ID=48
49 | ID_QUOTED=49
50 | UNRECOGNIZED=50
51 | '|>'=1
52 | '->'=2
53 | '('=3
54 | ')'=4
55 | ','=5
56 | '\''=6
57 | '='=7
58 | '>'=8
59 | '<'=9
60 | '>='=10
61 | '<='=11
62 | '&&'=12
63 | '||'=13
64 | '+'=14
65 | '-'=15
66 | '*'=16
67 | '/'=17
68 | '%'=18
69 |
--------------------------------------------------------------------------------
/src/main/java/com/rchowell/sift/language/v0/antlr/SiftVisitor.java:
--------------------------------------------------------------------------------
1 | // Generated from Sift.g4 by ANTLR 4.9.3
2 |
3 | package com.rchowell.sift.language.v0.antlr;
4 |
5 | import org.antlr.v4.runtime.tree.ParseTreeVisitor;
6 |
7 | /**
8 | * This interface defines a complete generic visitor for a parse tree produced
9 | * by {@link SiftParser}.
10 | *
11 | * @param The return type of the visit operation. Use {@link Void} for
12 | * operations with no return type.
13 | */
14 | public interface SiftVisitor extends ParseTreeVisitor {
15 | /**
16 | * Visit a parse tree produced by {@link SiftParser#query}.
17 | * @param ctx the parse tree
18 | * @return the visitor result
19 | */
20 | T visitQuery(SiftParser.QueryContext ctx);
21 | /**
22 | * Visit a parse tree produced by the {@code relId}
23 | * labeled alternative in {@link SiftParser#relation}.
24 | * @param ctx the parse tree
25 | * @return the visitor result
26 | */
27 | T visitRelId(SiftParser.RelIdContext ctx);
28 | /**
29 | * Visit a parse tree produced by the {@code relSubquery}
30 | * labeled alternative in {@link SiftParser#relation}.
31 | * @param ctx the parse tree
32 | * @return the visitor result
33 | */
34 | T visitRelSubquery(SiftParser.RelSubqueryContext ctx);
35 | /**
36 | * Visit a parse tree produced by the {@code relBagOp}
37 | * labeled alternative in {@link SiftParser#relation}.
38 | * @param ctx the parse tree
39 | * @return the visitor result
40 | */
41 | T visitRelBagOp(SiftParser.RelBagOpContext ctx);
42 | /**
43 | * Visit a parse tree produced by the {@code relJoin}
44 | * labeled alternative in {@link SiftParser#relation}.
45 | * @param ctx the parse tree
46 | * @return the visitor result
47 | */
48 | T visitRelJoin(SiftParser.RelJoinContext ctx);
49 | /**
50 | * Visit a parse tree produced by {@link SiftParser#transform}.
51 | * @param ctx the parse tree
52 | * @return the visitor result
53 | */
54 | T visitTransform(SiftParser.TransformContext ctx);
55 | /**
56 | * Visit a parse tree produced by {@link SiftParser#select}.
57 | * @param ctx the parse tree
58 | * @return the visitor result
59 | */
60 | T visitSelect(SiftParser.SelectContext ctx);
61 | /**
62 | * Visit a parse tree produced by {@link SiftParser#project}.
63 | * @param ctx the parse tree
64 | * @return the visitor result
65 | */
66 | T visitProject(SiftParser.ProjectContext ctx);
67 | /**
68 | * Visit a parse tree produced by {@link SiftParser#group}.
69 | * @param ctx the parse tree
70 | * @return the visitor result
71 | */
72 | T visitGroup(SiftParser.GroupContext ctx);
73 | /**
74 | * Visit a parse tree produced by {@link SiftParser#sort}.
75 | * @param ctx the parse tree
76 | * @return the visitor result
77 | */
78 | T visitSort(SiftParser.SortContext ctx);
79 | /**
80 | * Visit a parse tree produced by {@link SiftParser#limit}.
81 | * @param ctx the parse tree
82 | * @return the visitor result
83 | */
84 | T visitLimit(SiftParser.LimitContext ctx);
85 | /**
86 | * Visit a parse tree produced by {@link SiftParser#distinct}.
87 | * @param ctx the parse tree
88 | * @return the visitor result
89 | */
90 | T visitDistinct(SiftParser.DistinctContext ctx);
91 | /**
92 | * Visit a parse tree produced by the {@code identExpr}
93 | * labeled alternative in {@link SiftParser#expr}.
94 | * @param ctx the parse tree
95 | * @return the visitor result
96 | */
97 | T visitIdentExpr(SiftParser.IdentExprContext ctx);
98 | /**
99 | * Visit a parse tree produced by the {@code funcExpr}
100 | * labeled alternative in {@link SiftParser#expr}.
101 | * @param ctx the parse tree
102 | * @return the visitor result
103 | */
104 | T visitFuncExpr(SiftParser.FuncExprContext ctx);
105 | /**
106 | * Visit a parse tree produced by the {@code intLitExpr}
107 | * labeled alternative in {@link SiftParser#expr}.
108 | * @param ctx the parse tree
109 | * @return the visitor result
110 | */
111 | T visitIntLitExpr(SiftParser.IntLitExprContext ctx);
112 | /**
113 | * Visit a parse tree produced by the {@code stringLitExpr}
114 | * labeled alternative in {@link SiftParser#expr}.
115 | * @param ctx the parse tree
116 | * @return the visitor result
117 | */
118 | T visitStringLitExpr(SiftParser.StringLitExprContext ctx);
119 | /**
120 | * Visit a parse tree produced by the {@code subExpr}
121 | * labeled alternative in {@link SiftParser#expr}.
122 | * @param ctx the parse tree
123 | * @return the visitor result
124 | */
125 | T visitSubExpr(SiftParser.SubExprContext ctx);
126 | /**
127 | * Visit a parse tree produced by the {@code boolExpr}
128 | * labeled alternative in {@link SiftParser#expr}.
129 | * @param ctx the parse tree
130 | * @return the visitor result
131 | */
132 | T visitBoolExpr(SiftParser.BoolExprContext ctx);
133 | /**
134 | * Visit a parse tree produced by the {@code projMap}
135 | * labeled alternative in {@link SiftParser#func}.
136 | * @param ctx the parse tree
137 | * @return the visitor result
138 | */
139 | T visitProjMap(SiftParser.ProjMapContext ctx);
140 | /**
141 | * Visit a parse tree produced by the {@code projIdent}
142 | * labeled alternative in {@link SiftParser#func}.
143 | * @param ctx the parse tree
144 | * @return the visitor result
145 | */
146 | T visitProjIdent(SiftParser.ProjIdentContext ctx);
147 | /**
148 | * Visit a parse tree produced by {@link SiftParser#agg}.
149 | * @param ctx the parse tree
150 | * @return the visitor result
151 | */
152 | T visitAgg(SiftParser.AggContext ctx);
153 | /**
154 | * Visit a parse tree produced by {@link SiftParser#alias}.
155 | * @param ctx the parse tree
156 | * @return the visitor result
157 | */
158 | T visitAlias(SiftParser.AliasContext ctx);
159 | /**
160 | * Visit a parse tree produced by {@link SiftParser#ids}.
161 | * @param ctx the parse tree
162 | * @return the visitor result
163 | */
164 | T visitIds(SiftParser.IdsContext ctx);
165 | }
--------------------------------------------------------------------------------
/src/main/kotlin/com/rchowell/sift/execution/Environment.kt:
--------------------------------------------------------------------------------
1 | package com.rchowell.sift.execution
2 |
3 | import com.rchowell.sift.source.InvalidSourceException
4 | import com.rchowell.sift.source.Source
5 |
6 | class Environment(
7 | sources: List = emptyList()
8 | ) {
9 |
10 | val sourceMap: MutableMap = mutableMapOf()
11 |
12 | init {
13 | sources.forEach { registerSource(it) }
14 | }
15 |
16 | fun registerSource(source: Source) {
17 | sourceMap[source.identifier] = source
18 | }
19 |
20 | fun getSource(identifier: String): Source = sourceMap[identifier] ?: throw InvalidSourceException(identifier)
21 | }
22 |
--------------------------------------------------------------------------------
/src/main/kotlin/com/rchowell/sift/execution/Executor.kt:
--------------------------------------------------------------------------------
1 | package com.rchowell.sift.execution
2 |
3 | import com.rchowell.sift.execution.planner.Planner
4 | import com.rchowell.sift.language.v0.antlr.SiftCompiler
5 |
6 | class Executor {
7 |
8 | companion object {
9 |
10 | fun sift(environment: Environment, query: String) {
11 | val compiler = SiftCompiler(environment)
12 | val logicalPlan = compiler.compile(query)
13 | val physicalPlan = Planner.plan(logicalPlan)
14 | physicalPlan.open()
15 | var batch = physicalPlan.next()
16 | while (batch != null) {
17 | println(batch)
18 | batch = physicalPlan.next()
19 | }
20 | }
21 | }
22 | }
23 |
--------------------------------------------------------------------------------
/src/main/kotlin/com/rchowell/sift/execution/logical/LogicalExpr.kt:
--------------------------------------------------------------------------------
1 | package com.rchowell.sift.execution.logical
2 |
3 | import com.rchowell.sift.types.Field
4 |
5 | /**
6 | * Query planning requires expressions to describe the resultant [Field] given a [LogicalTransform].
7 | * Change to `sealed interface` when using Kotlin 1.5
8 | *
9 | * @constructor Create empty Logical expr
10 | */
11 | interface LogicalExpr {
12 |
13 | /**
14 | * See class description
15 | *
16 | * @param input
17 | * @return
18 | */
19 | fun toField(input: LogicalTransform): Field
20 | }
21 |
--------------------------------------------------------------------------------
/src/main/kotlin/com/rchowell/sift/execution/logical/LogicalTransform.kt:
--------------------------------------------------------------------------------
1 | package com.rchowell.sift.execution.logical
2 |
3 | import com.rchowell.sift.types.Schema
4 |
5 | /**
6 | * A logical plan is a transformation which returns a relation.
7 | * This is taken directly from KQuery.
8 | *
9 | * AFAIK a logical plan represents a transformation between the incoming transformations.
10 | * I'm thinking of logical plans as a chain of mapping functions.
11 | *
12 | * Change to `sealed interface` when the project migrates from kotlin 1.4 to 1.5
13 | *
14 | * @constructor Create empty Logical plan
15 | */
16 | abstract class LogicalTransform {
17 |
18 | /**
19 | * Output schema of this transformation
20 | */
21 | abstract val schema: Schema
22 |
23 | /**
24 | * Inputs of this logical plan. Grove says this will be useful for the visitor pattern, but I'm not there yet.
25 | * Why not a value?
26 | */
27 | abstract fun inputs(): List
28 |
29 | open fun pretty(): String {
30 | return format(this)
31 | }
32 | }
33 |
34 | /**
35 | * Format returns the series of transformations nested.
36 | *
37 | * @param transform
38 | * @param indent
39 | * @return
40 | */
41 | fun format(transform: LogicalTransform, indent: Int = 0): String = buildString {
42 | val prefix = " ".repeat(indent)
43 | append(prefix).append(transform)
44 | if (transform.inputs().isNotEmpty()) {
45 | append(" {").append("\n")
46 | transform.inputs().forEach { append(format(it, indent + 1)).append("\n") }
47 | append(prefix).append("}")
48 | } else {
49 | append(" {}")
50 | }
51 | }
52 |
--------------------------------------------------------------------------------
/src/main/kotlin/com/rchowell/sift/execution/logical/expressions/LogicalAggregateExpr.kt:
--------------------------------------------------------------------------------
1 | package com.rchowell.sift.execution.logical.expressions
2 |
3 | import com.rchowell.sift.execution.logical.LogicalExpr
4 | import com.rchowell.sift.execution.logical.LogicalTransform
5 | import com.rchowell.sift.types.Field
6 | import com.rchowell.sift.types.Type
7 |
8 | class UnknownAggregateFunction(name: String) : Exception("unknown aggregate $name")
9 |
10 | /**
11 | * Aggregation expressions to be implemented by various physical aggregation expressions.
12 | *
13 | * @property op
14 | * @property input
15 | * @constructor Create empty Logical agg expr
16 | */
17 | sealed class LogicalAggregateExpr(
18 | val op: AggOp,
19 | val input: LogicalExpr,
20 | ) : LogicalExpr {
21 | override fun toString(): String = "$op($input)"
22 | override fun toField(input: LogicalTransform): Field = Field(op.name, this.input.toField(input).type)
23 |
24 | companion object {
25 |
26 | fun get(name: String, vararg args: LogicalExpr): LogicalAggregateExpr = when (name) {
27 | "MIN" -> LogicalMinExpr(args[0])
28 | "MAX" -> LogicalMaxExpr(args[0])
29 | "SUM" -> LogicalSumExpr(args[0])
30 | "AVG" -> LogicalAvgExpr(args[0])
31 | "COUNT" -> LogicalCountExpr(args[0])
32 | else -> throw UnknownAggregateFunction(name)
33 | }
34 | }
35 | }
36 |
37 | class LogicalMinExpr(input: LogicalExpr) : LogicalAggregateExpr(AggOp.MIN, input)
38 |
39 | class LogicalMaxExpr(input: LogicalExpr) : LogicalAggregateExpr(AggOp.MAX, input)
40 |
41 | class LogicalSumExpr(input: LogicalExpr) : LogicalAggregateExpr(AggOp.SUM, input)
42 |
43 | class LogicalCountExpr(input: LogicalExpr) : LogicalAggregateExpr(AggOp.COUNT, input) {
44 | override fun toField(input: LogicalTransform): Field = Field(op.name, Type.Num)
45 | }
46 |
47 | class LogicalAvgExpr(input: LogicalExpr) : LogicalAggregateExpr(AggOp.AVG, input)
48 |
--------------------------------------------------------------------------------
/src/main/kotlin/com/rchowell/sift/execution/logical/expressions/LogicalBinaryExpr.kt:
--------------------------------------------------------------------------------
1 | package com.rchowell.sift.execution.logical.expressions
2 |
3 | import com.rchowell.sift.execution.logical.LogicalExpr
4 | import com.rchowell.sift.execution.logical.LogicalTransform
5 | import com.rchowell.sift.execution.logical.expressions.BinaryOp.ADD
6 | import com.rchowell.sift.execution.logical.expressions.BinaryOp.AND
7 | import com.rchowell.sift.execution.logical.expressions.BinaryOp.DIV
8 | import com.rchowell.sift.execution.logical.expressions.BinaryOp.EQ
9 | import com.rchowell.sift.execution.logical.expressions.BinaryOp.GT
10 | import com.rchowell.sift.execution.logical.expressions.BinaryOp.GTE
11 | import com.rchowell.sift.execution.logical.expressions.BinaryOp.LT
12 | import com.rchowell.sift.execution.logical.expressions.BinaryOp.LTE
13 | import com.rchowell.sift.execution.logical.expressions.BinaryOp.MOD
14 | import com.rchowell.sift.execution.logical.expressions.BinaryOp.MULT
15 | import com.rchowell.sift.execution.logical.expressions.BinaryOp.NEQ
16 | import com.rchowell.sift.execution.logical.expressions.BinaryOp.OR
17 | import com.rchowell.sift.execution.logical.expressions.BinaryOp.SUB
18 | import com.rchowell.sift.types.Field
19 | import com.rchowell.sift.types.Type
20 |
21 | /**
22 | * Representation of binary expressions for
23 | * - Comparison
24 | * - Boolean Expressions
25 | * - Math Expressions
26 | *
27 | * @property op Operator
28 | * @property lhs Left-hand side
29 | * @property rhs Right-hand side
30 | * @constructor Create empty Logical binary expr
31 | */
32 | abstract class LogicalBinaryExpr(
33 | val op: BinaryOp,
34 | val lhs: LogicalExpr,
35 | val rhs: LogicalExpr,
36 | ) : LogicalExpr {
37 | override fun toString(): String = "$lhs $op $rhs"
38 |
39 | companion object {
40 |
41 | fun get(op: BinaryOp, lhs: LogicalExpr, rhs: LogicalExpr): LogicalExpr = when (op) {
42 | EQ -> LogicalEqExpr(lhs, rhs)
43 | NEQ -> LogicalNeqExpr(lhs, rhs)
44 | LT -> LogicalLtExpr(lhs, rhs)
45 | LTE -> LogicalLteExpr(lhs, rhs)
46 | GT -> LogicalGtExpr(lhs, rhs)
47 | GTE -> LogicalGteExpr(lhs, rhs)
48 | AND -> LogicalAndExpr(lhs, rhs)
49 | OR -> LogicalOrExpr(lhs, rhs)
50 | ADD -> LogicalAddExpr(lhs, rhs)
51 | SUB -> LogicalSubExpr(lhs, rhs)
52 | MULT -> LogicalMulExpr(lhs, rhs)
53 | DIV -> LogicalDivExpr(lhs, rhs)
54 | MOD -> LogicalModExpr(lhs, rhs)
55 | }
56 | }
57 | }
58 |
59 | /**
60 | * Binary expressions that return a Bool
61 | */
62 | sealed class LogicalBooleanBinaryExpr(
63 | op: BinaryOp,
64 | lhs: LogicalExpr,
65 | rhs: LogicalExpr,
66 | ) : LogicalBinaryExpr(op, lhs, rhs) {
67 | override fun toField(input: LogicalTransform): Field = Field(op.name, Type.Bool)
68 | }
69 |
70 | class LogicalEqExpr(lhs: LogicalExpr, rhs: LogicalExpr) : LogicalBooleanBinaryExpr(EQ, lhs, rhs)
71 |
72 | class LogicalNeqExpr(lhs: LogicalExpr, rhs: LogicalExpr) : LogicalBooleanBinaryExpr(NEQ, lhs, rhs)
73 |
74 | class LogicalLtExpr(lhs: LogicalExpr, rhs: LogicalExpr) : LogicalBooleanBinaryExpr(LT, lhs, rhs)
75 |
76 | class LogicalLteExpr(lhs: LogicalExpr, rhs: LogicalExpr) : LogicalBooleanBinaryExpr(LTE, lhs, rhs)
77 |
78 | class LogicalGtExpr(lhs: LogicalExpr, rhs: LogicalExpr) : LogicalBooleanBinaryExpr(GT, lhs, rhs)
79 |
80 | class LogicalGteExpr(lhs: LogicalExpr, rhs: LogicalExpr) : LogicalBooleanBinaryExpr(GTE, lhs, rhs)
81 |
82 | class LogicalAndExpr(lhs: LogicalExpr, rhs: LogicalExpr) : LogicalBooleanBinaryExpr(AND, lhs, rhs)
83 |
84 | class LogicalOrExpr(lhs: LogicalExpr, rhs: LogicalExpr) : LogicalBooleanBinaryExpr(OR, lhs, rhs)
85 |
86 | /**
87 | * Binary expressions that return a Num
88 | */
89 | sealed class LogicalMathBinaryExpr(
90 | op: BinaryOp,
91 | lhs: LogicalExpr,
92 | rhs: LogicalExpr,
93 | ) : LogicalBinaryExpr(op, lhs, rhs) {
94 | override fun toField(input: LogicalTransform): Field = Field(op.name, Type.Num)
95 | }
96 |
97 | class LogicalAddExpr(lhs: LogicalExpr, rhs: LogicalExpr) : LogicalBooleanBinaryExpr(ADD, lhs, rhs)
98 |
99 | class LogicalSubExpr(lhs: LogicalExpr, rhs: LogicalExpr) : LogicalBooleanBinaryExpr(SUB, lhs, rhs)
100 |
101 | class LogicalMulExpr(lhs: LogicalExpr, rhs: LogicalExpr) : LogicalBooleanBinaryExpr(MULT, lhs, rhs)
102 |
103 | class LogicalDivExpr(lhs: LogicalExpr, rhs: LogicalExpr) : LogicalBooleanBinaryExpr(DIV, lhs, rhs)
104 |
105 | class LogicalModExpr(lhs: LogicalExpr, rhs: LogicalExpr) : LogicalBooleanBinaryExpr(MOD, lhs, rhs)
106 |
--------------------------------------------------------------------------------
/src/main/kotlin/com/rchowell/sift/execution/logical/expressions/LogicalIdentifierExpr.kt:
--------------------------------------------------------------------------------
1 | package com.rchowell.sift.execution.logical.expressions
2 |
3 | import com.rchowell.sift.execution.logical.LogicalExpr
4 | import com.rchowell.sift.execution.logical.LogicalTransform
5 | import com.rchowell.sift.types.Field
6 |
7 | /**
8 | * Simple expression representing a reference to some column in the data source.
9 | *
10 | * @property identifier
11 | * @constructor Create empty Logical column expr
12 | */
13 | class LogicalIdentifierExpr(val identifier: String) : LogicalExpr {
14 |
15 | /**
16 | * Returns the [Field] information if found, else throw an exception because this is an invalid column reference.
17 | *
18 | * @param input
19 | * @return
20 | */
21 | override fun toField(input: LogicalTransform): Field = input.schema.find(identifier)
22 |
23 | override fun toString(): String = "#$identifier"
24 |
25 | override fun hashCode(): Int = identifier.hashCode()
26 |
27 | override fun equals(other: Any?): Boolean {
28 | if (this === other) return true
29 | if (javaClass != other?.javaClass) return false
30 | other as LogicalIdentifierExpr
31 | return identifier == other.identifier
32 | }
33 | }
34 |
--------------------------------------------------------------------------------
/src/main/kotlin/com/rchowell/sift/execution/logical/expressions/LogicalLiteralExpr.kt:
--------------------------------------------------------------------------------
1 | package com.rchowell.sift.execution.logical.expressions
2 |
3 | import com.rchowell.sift.execution.logical.LogicalExpr
4 | import com.rchowell.sift.execution.logical.LogicalTransform
5 | import com.rchowell.sift.types.Field
6 | import com.rchowell.sift.types.Type
7 |
8 | /**
9 | * Representation of a literal
10 | */
11 | class LogicalLiteralExpr(val v: T) : LogicalExpr {
12 |
13 | var type: Type = when (v) {
14 | is Boolean -> Type.Bool
15 | is Number -> Type.Num
16 | is String -> Type.String
17 | else -> throw IllegalArgumentException("unsupported type ${v::class.java.name}")
18 | }
19 |
20 | override fun toField(input: LogicalTransform): Field = Field(v.toString(), type)
21 |
22 | override fun toString(): String = v.toString()
23 | }
24 |
--------------------------------------------------------------------------------
/src/main/kotlin/com/rchowell/sift/execution/logical/expressions/Ops.kt:
--------------------------------------------------------------------------------
1 | package com.rchowell.sift.execution.logical.expressions
2 |
3 | class UnknownBinaryOp(op: String) : Exception("unknown binary op $op")
4 |
5 | enum class BinaryOp(private val s: String) {
6 | EQ("="),
7 | NEQ("!="),
8 | LT("<"),
9 | LTE("<="),
10 | GT(">"),
11 | GTE(">="),
12 | AND("&&"),
13 | OR("||"),
14 | ADD("+"),
15 | SUB("-"),
16 | MULT("*"),
17 | DIV("/"),
18 | MOD("%");
19 |
20 | override fun toString(): String = this.s
21 |
22 | companion object {
23 |
24 | fun get(op: String) = when (op) {
25 | "=" -> EQ
26 | "!=" -> NEQ
27 | "<" -> LT
28 | "<=" -> LTE
29 | ">" -> GT
30 | ">=" -> GTE
31 | "&&" -> AND
32 | "||" -> OR
33 | "+" -> ADD
34 | "-" -> SUB
35 | "*" -> MULT
36 | "/" -> DIV
37 | "%" -> MOD
38 | else -> throw UnknownBinaryOp(op)
39 | }
40 | }
41 | }
42 |
43 | enum class AggOp {
44 | MIN,
45 | MAX,
46 | SUM,
47 | COUNT,
48 | AVG;
49 | }
50 |
--------------------------------------------------------------------------------
/src/main/kotlin/com/rchowell/sift/execution/logical/functions/LogicalFunction.kt:
--------------------------------------------------------------------------------
1 | package com.rchowell.sift.execution.logical.functions
2 |
3 | import com.rchowell.sift.execution.logical.LogicalExpr
4 |
5 | class UnknownFunction(name: String) : Exception("unknown function $name")
6 |
7 | /**
8 | * Helper functions such a STRLEN, LOWERCASE, ABS, etc.
9 | *
10 | * @constructor Create empty Logical function
11 | */
12 | sealed class LogicalFunction {
13 |
14 | companion object {
15 |
16 | // TODO add some functions
17 | fun get(name: String, vararg args: LogicalExpr): LogicalExpr {
18 | throw UnknownFunction(name)
19 | }
20 | }
21 | }
22 |
--------------------------------------------------------------------------------
/src/main/kotlin/com/rchowell/sift/execution/logical/transforms/LogicalAggregation.kt:
--------------------------------------------------------------------------------
1 | package com.rchowell.sift.execution.logical.transforms
2 |
3 | import com.rchowell.sift.execution.logical.LogicalTransform
4 | import com.rchowell.sift.execution.logical.expressions.LogicalAggregateExpr
5 | import com.rchowell.sift.execution.logical.expressions.LogicalIdentifierExpr
6 | import com.rchowell.sift.types.Field
7 | import com.rchowell.sift.types.Schema
8 |
9 | /**
10 | * LogicalAggregation is much like the extend projection, but it includes grouping attributes.
11 | *
12 | * Aggregation operators, such as sums or averages, are not operations of relational algebra, but are used
13 | * by the grouping operator (described next). Aggregation operators apply to attributes (columns) of a relation
14 | *
15 | * Grouping of tuples according to their value in one or more attributes has the effect of partitioning the tuples
16 | * of a relation into groups. Aggregation can then be applied to columns within each group, giving us the ability
17 | * to express a number of queries that are impossible to express in the classical relational algebra.
18 | * The grouping operator, gamma, is an operator that combines the effect of grouping and aggregation. p213
19 | */
20 | class LogicalAggregation(
21 | val input: LogicalTransform,
22 | val aggregations: Map,
23 | val groups: List,
24 | ) : LogicalTransform() {
25 |
26 | override val schema: Schema
27 | get() {
28 | val fields = mutableListOf()
29 | // schema derived from groups
30 | groups.forEach { fields.add(input.schema.find(it.identifier)) }
31 | // schema derived from aggregated fields
32 | aggregations.entries.forEach { (alias, expr) ->
33 | fields.add(Field(alias.identifier, expr.toField(input).type))
34 | }
35 | return Schema(fields)
36 | }
37 |
38 | override fun inputs(): List = listOf(input)
39 |
40 | override fun toString(): String = buildString {
41 | append("AGGREGATE ")
42 | append(aggregations.entries.joinToString { (alias, expr) -> "$expr -> $alias" })
43 | if (groups.isNotEmpty()) {
44 | append(" BY ")
45 | append(groups.joinToString())
46 | }
47 | }
48 | }
49 |
--------------------------------------------------------------------------------
/src/main/kotlin/com/rchowell/sift/execution/logical/transforms/LogicalCross.kt:
--------------------------------------------------------------------------------
1 | package com.rchowell.sift.execution.logical.transforms
2 |
3 | import com.rchowell.sift.execution.logical.LogicalTransform
4 | import com.rchowell.sift.types.Schema
5 |
6 | class LogicalCross(
7 | val lhs: LogicalTransform,
8 | val rhs: LogicalTransform,
9 | ) : LogicalTransform() {
10 |
11 | override var schema: Schema = lhs.schema.combine((rhs.schema))
12 |
13 | override fun inputs(): List = listOf(lhs, rhs)
14 |
15 | override fun pretty(): String = buildString {
16 | append('(').append(lhs.pretty()).append(')')
17 | append("\nX\n")
18 | append('(').append(rhs.pretty()).append(')')
19 | }
20 |
21 | override fun toString(): String = "CROSS"
22 | }
23 |
--------------------------------------------------------------------------------
/src/main/kotlin/com/rchowell/sift/execution/logical/transforms/LogicalDiff.kt:
--------------------------------------------------------------------------------
1 | package com.rchowell.sift.execution.logical.transforms
2 |
3 | import com.rchowell.sift.execution.logical.LogicalTransform
4 | import com.rchowell.sift.types.Schema
5 |
6 | class LogicalDiff(
7 | val lhs: LogicalTransform,
8 | val rhs: LogicalTransform,
9 | ) : LogicalTransform() {
10 |
11 | override lateinit var schema: Schema
12 |
13 | override fun inputs(): List = listOf(lhs, rhs)
14 |
15 | init {
16 | assert(rhs.schema.subsetOf(lhs.schema)) {
17 | // But really? Need to look into this
18 | "Schema of right-side relation must be a subset of left-side relation in bag difference"
19 | }
20 | schema = lhs.schema
21 | }
22 |
23 | override fun pretty(): String = buildString {
24 | append('(').append(lhs.pretty()).append(')')
25 | append("\n-\n")
26 | append('(').append(rhs.pretty()).append(')')
27 | }
28 |
29 | override fun toString(): String = "DIFF"
30 | }
31 |
--------------------------------------------------------------------------------
/src/main/kotlin/com/rchowell/sift/execution/logical/transforms/LogicalDistinct.kt:
--------------------------------------------------------------------------------
1 | package com.rchowell.sift.execution.logical.transforms
2 |
3 | import com.rchowell.sift.execution.logical.LogicalTransform
4 | import com.rchowell.sift.execution.logical.expressions.LogicalIdentifierExpr
5 | import com.rchowell.sift.types.Schema
6 |
7 | /**
8 | * Duplicate elimination extension which converts a bag to a set.
9 | */
10 | class LogicalDistinct(
11 | private val input: LogicalTransform,
12 | private val identifiers: List
13 | ) : LogicalTransform() {
14 |
15 | override val schema: Schema = input.schema
16 |
17 | override fun inputs(): List = listOf(input)
18 |
19 | fun fields(): List = identifiers.map {
20 | schema.fieldIndexes[it.identifier]!!
21 | }
22 |
23 | override fun toString(): String = "DISTINCT (" + identifiers.joinToString { it.identifier } + ")"
24 | }
25 |
--------------------------------------------------------------------------------
/src/main/kotlin/com/rchowell/sift/execution/logical/transforms/LogicalIntersect.kt:
--------------------------------------------------------------------------------
1 | package com.rchowell.sift.execution.logical.transforms
2 |
3 | import com.rchowell.sift.execution.logical.LogicalTransform
4 | import com.rchowell.sift.types.Schema
5 |
6 | class LogicalIntersect(
7 | val lhs: LogicalTransform,
8 | val rhs: LogicalTransform,
9 | ) : LogicalTransform() {
10 |
11 | override var schema: Schema = Schema.common(lhs.schema, rhs.schema)
12 |
13 | override fun inputs(): List = listOf(lhs, rhs)
14 |
15 | override fun pretty(): String = buildString {
16 | append('(').append(lhs.pretty()).append(')')
17 | append("\n&\n")
18 | append('(').append(rhs.pretty()).append(')')
19 | }
20 |
21 | override fun toString(): String = "INTERSECT"
22 | }
23 |
--------------------------------------------------------------------------------
/src/main/kotlin/com/rchowell/sift/execution/logical/transforms/LogicalJoin.kt:
--------------------------------------------------------------------------------
1 | package com.rchowell.sift.execution.logical.transforms
2 |
3 | import com.rchowell.sift.execution.logical.LogicalExpr
4 | import com.rchowell.sift.execution.logical.LogicalTransform
5 | import com.rchowell.sift.types.Field
6 | import com.rchowell.sift.types.Schema
7 |
8 | enum class JoinType {
9 | INNER,
10 | OUTER,
11 | LEFT,
12 | RIGHT
13 | }
14 |
15 | /**
16 | * LogicalJoin represents the eight possible joins from natural and theta join conditions.
17 | *
18 | * @property lhs Left Relation
19 | * @property rhs Right Relation
20 | * @property condition Join condition. If empty, this is the natural join
21 | * @property type
22 | * @constructor Create empty Logical join
23 | */
24 | class LogicalJoin(
25 | private val lhs: LogicalTransform,
26 | private val rhs: LogicalTransform,
27 | private val condition: LogicalExpr?,
28 | private val type: JoinType,
29 | ) : LogicalTransform() {
30 |
31 | /**
32 | * Schema of a join is the combination of the two schemas
33 | */
34 | override val schema: Schema
35 | get() {
36 | val fields = mutableSetOf()
37 | fields.addAll(lhs.schema.fields)
38 | fields.addAll(rhs.schema.fields)
39 | return Schema(fields.toList())
40 | }
41 |
42 | override fun inputs(): List = listOf(lhs, rhs)
43 |
44 | override fun pretty(): String = if (condition == null) "$type JOIN" else "$type JOIN ON $condition"
45 | }
46 |
--------------------------------------------------------------------------------
/src/main/kotlin/com/rchowell/sift/execution/logical/transforms/LogicalLimit.kt:
--------------------------------------------------------------------------------
1 | package com.rchowell.sift.execution.logical.transforms
2 |
3 | import com.rchowell.sift.execution.logical.LogicalTransform
4 | import com.rchowell.sift.types.Schema
5 |
6 | /**
7 | * LogicalLimit simply limits results to the first `n`.
8 | * This operation isn't discussed in the extended relation algebra of DSTCB, but is simple enough to include
9 | *
10 | * SORT field ASC |> LIMIT 10 # sorts all rows by `field` then takes the first 10
11 | * LIMIT 10 |> SORT field ASC # returns the first 10 rows from the input, then sorts only those 10 rows
12 | *
13 | * @property input
14 | * @property limit
15 | * @constructor Create empty Logical limit
16 | */
17 | class LogicalLimit(
18 | val input: LogicalTransform,
19 | val n: Int,
20 | ) : LogicalTransform() {
21 |
22 | override val schema: Schema = input.schema
23 |
24 | override fun inputs(): List = listOf(input)
25 |
26 | override fun toString(): String = "LIMIT $n"
27 | }
28 |
--------------------------------------------------------------------------------
/src/main/kotlin/com/rchowell/sift/execution/logical/transforms/LogicalProjection.kt:
--------------------------------------------------------------------------------
1 | package com.rchowell.sift.execution.logical.transforms
2 |
3 | import com.rchowell.sift.execution.logical.LogicalExpr
4 | import com.rchowell.sift.execution.logical.LogicalTransform
5 | import com.rchowell.sift.execution.logical.expressions.LogicalIdentifierExpr
6 | import com.rchowell.sift.types.Field
7 | import com.rchowell.sift.types.Schema
8 |
9 | /**
10 | * Extended projection gives additional power to the projection operator.
11 | * In addition to projecting out some columns, in its generalized form it can perform computations involving
12 | * the columns of its argument relation to produce new columns. p213
13 | *
14 | * @property input
15 | * @property expr
16 | * @constructor Create empty Logical projection
17 | */
18 | class LogicalProjection(
19 | val input: LogicalTransform,
20 | val projections: Map,
21 | ) : LogicalTransform() {
22 |
23 | /**
24 | * Each expression describes its output field, so the Schema produced by this
25 | * projection is just the combination of all field types when evaluated on the given input plan.
26 | */
27 | override val schema: Schema = Schema(projections.entries.map { (k, v) -> Field(k.identifier, v.toField(input).type) })
28 |
29 | override fun inputs(): List = listOf(input)
30 |
31 | override fun toString(): String = buildString {
32 | append("PROJECT ")
33 | append(projections.entries.joinToString { (alias, expr) -> "$expr -> $alias" })
34 | }
35 | }
36 |
--------------------------------------------------------------------------------
/src/main/kotlin/com/rchowell/sift/execution/logical/transforms/LogicalScan.kt:
--------------------------------------------------------------------------------
1 | package com.rchowell.sift.execution.logical.transforms
2 |
3 | import com.rchowell.sift.execution.logical.LogicalTransform
4 | import com.rchowell.sift.source.Source
5 |
6 | /**
7 | * LogicalScan represents a plan to select fields (given identifiers) from the underlying source.
8 | *
9 | * @property source
10 | * @property identifiers
11 | * @constructor Create empty Logical scan
12 | */
13 | class LogicalScan(
14 | val source: Source,
15 | val identifiers: List = listOf(),
16 | ) : LogicalTransform() {
17 |
18 | /**
19 | * Schema is derived from the source
20 | */
21 | override val schema = source.schema.select(identifiers)
22 |
23 | /**
24 | * Children
25 | *
26 | * @return
27 | */
28 | override fun inputs(): List = listOf()
29 |
30 | override fun toString(): String {
31 | val f = if (identifiers.isEmpty()) "*" else identifiers.joinToString()
32 | return "SCAN $f FROM $source"
33 | }
34 | }
35 |
--------------------------------------------------------------------------------
/src/main/kotlin/com/rchowell/sift/execution/logical/transforms/LogicalSelection.kt:
--------------------------------------------------------------------------------
1 | package com.rchowell.sift.execution.logical.transforms
2 |
3 | import com.rchowell.sift.execution.logical.LogicalExpr
4 | import com.rchowell.sift.execution.logical.LogicalTransform
5 | import com.rchowell.sift.types.Schema
6 |
7 | /**
8 | * LogicalSelection represents a filter based on the given expression.
9 | * Separate from the LogicalScan because a filter/selection should be separate from the data source.
10 | *
11 | * @property input
12 | * @property expr
13 | * @constructor Create empty Logical selection
14 | */
15 | class LogicalSelection(
16 | val input: LogicalTransform,
17 | val expr: LogicalExpr,
18 | ) : LogicalTransform() {
19 |
20 | override val schema: Schema = input.schema
21 |
22 | override fun inputs(): List = listOf(input)
23 |
24 | override fun toString(): String = "SELECT $expr"
25 | }
26 |
--------------------------------------------------------------------------------
/src/main/kotlin/com/rchowell/sift/execution/logical/transforms/LogicalSort.kt:
--------------------------------------------------------------------------------
1 | package com.rchowell.sift.execution.logical.transforms
2 |
3 | import com.rchowell.sift.execution.logical.LogicalTransform
4 | import com.rchowell.sift.execution.logical.expressions.LogicalIdentifierExpr
5 | import com.rchowell.sift.types.Schema
6 |
7 | /**
8 | * LogicalSort describes an ASC or DESC sort based on a list of identifiers
9 | *
10 | * @property input
11 | * @property asc
12 | * @property fields
13 | * @constructor Create empty Logical sort
14 | */
15 | class LogicalSort(
16 | private val input: LogicalTransform,
17 | private val fields: List,
18 | private val asc: Boolean,
19 | ) : LogicalTransform() {
20 |
21 | /**
22 | * Sorting does not change schema
23 | */
24 | override val schema: Schema = input.schema
25 |
26 | override fun inputs(): List = listOf(input)
27 |
28 | override fun toString(): String = buildString {
29 | append("SORT ")
30 | if (fields.isNotEmpty()) {
31 | append(fields.joinToString())
32 | append(" ")
33 | }
34 | if (asc) append("ASC") else append("DESC")
35 | }
36 | }
37 |
--------------------------------------------------------------------------------
/src/main/kotlin/com/rchowell/sift/execution/logical/transforms/LogicalUnion.kt:
--------------------------------------------------------------------------------
1 | package com.rchowell.sift.execution.logical.transforms
2 |
3 | import com.rchowell.sift.execution.logical.LogicalTransform
4 | import com.rchowell.sift.types.Schema
5 |
6 | class LogicalUnion(
7 | val lhs: LogicalTransform,
8 | val rhs: LogicalTransform,
9 | ) : LogicalTransform() {
10 |
11 | override lateinit var schema: Schema
12 |
13 | override fun inputs(): List = listOf(lhs, rhs)
14 |
15 | init {
16 | assert(lhs.schema == rhs.schema) {
17 | "Relations in union do not share a schema"
18 | }
19 | schema = lhs.schema
20 | }
21 |
22 | override fun pretty(): String = buildString {
23 | append('(').append(lhs.pretty()).append(')')
24 | append("\nU\n")
25 | append('(').append(rhs.pretty()).append(')')
26 | }
27 |
28 | override fun toString(): String = "UNION"
29 | }
30 |
--------------------------------------------------------------------------------
/src/main/kotlin/com/rchowell/sift/execution/physical/aggregations/Accumulator.kt:
--------------------------------------------------------------------------------
1 | package com.rchowell.sift.execution.physical.aggregations
2 |
3 | import com.rchowell.sift.execution.physical.expressions.Expression
4 |
5 | /**
6 | * All aggregations are implemented as accumulators.
7 | *
8 | * @constructor Create empty Agg accumulator
9 | */
10 | sealed class Accumulator {
11 |
12 | abstract val expr: Expression
13 |
14 | /**
15 | * Create a new instance of this Accumulator.
16 | * Done because I couldn't create a new instance given an interface KClass ie no empty constructor
17 | */
18 | abstract fun new(): Accumulator
19 |
20 | /**
21 | * Accumulate one value at a time
22 | */
23 | abstract fun add(v: Double)
24 |
25 | /**
26 | * Returns the current value of the accumulator
27 | */
28 | abstract fun get(): Double
29 | }
30 |
31 | class SumAccumulator(override val expr: Expression) : Accumulator() {
32 |
33 | var value = 0.0
34 |
35 | override fun new(): Accumulator = SumAccumulator(expr)
36 |
37 | override fun add(v: Double) {
38 | value += v
39 | }
40 |
41 | override fun get(): Double = value
42 | }
43 |
44 | class MinAccumulator(override val expr: Expression) : Accumulator() {
45 |
46 | var value = Double.MAX_VALUE
47 |
48 | override fun new(): Accumulator = MinAccumulator(expr)
49 |
50 | override fun add(v: Double) {
51 | if (v < value) value = v
52 | }
53 |
54 | override fun get(): Double = value
55 | }
56 |
57 | class MaxAccumulator(override val expr: Expression) : Accumulator() {
58 |
59 | var value = Double.MIN_VALUE
60 |
61 | override fun new(): Accumulator = MaxAccumulator(expr)
62 |
63 | override fun add(v: Double) {
64 | if (v > value) value = v
65 | }
66 |
67 | override fun get(): Double = value
68 | }
69 |
70 | class CountAccumulator(override val expr: Expression) : Accumulator() {
71 |
72 | var value = 0.0
73 |
74 | override fun new(): Accumulator = CountAccumulator(expr)
75 |
76 | override fun add(v: Double) {
77 | value += 1
78 | }
79 |
80 | override fun get(): Double = value
81 | }
82 |
83 | class AvgAccumulator(override val expr: Expression) : Accumulator() {
84 |
85 | var numer = 0.0
86 | var denom = 0.0
87 |
88 | override fun new(): Accumulator = AvgAccumulator(expr)
89 |
90 | override fun add(v: Double) {
91 | numer += v
92 | denom += 1
93 | }
94 |
95 | override fun get(): Double = numer / denom
96 | }
97 |
--------------------------------------------------------------------------------
/src/main/kotlin/com/rchowell/sift/execution/physical/aggregations/Key.kt:
--------------------------------------------------------------------------------
1 | package com.rchowell.sift.execution.physical.aggregations
2 |
3 | import org.apache.commons.lang3.builder.HashCodeBuilder
4 |
5 | class Key(val values: List) {
6 |
7 | companion object {
8 | val EMPTY = Key(listOf())
9 | }
10 |
11 | override fun equals(other: Any?): Boolean {
12 | if (this === other) return true
13 | if (javaClass != other?.javaClass) return false
14 | other as Key
15 | return compareValues(other.values)
16 | }
17 |
18 | /**
19 | * Use String value of ByteArray rather than the reference
20 | */
21 | override fun hashCode(): Int {
22 | val hashcode = HashCodeBuilder()
23 | values.forEach {
24 | when (it) {
25 | is ByteArray -> hashcode.append(it.toString(Charsets.UTF_8))
26 | else -> hashcode.append(it)
27 | }
28 | }
29 | return hashcode.build()
30 | }
31 |
32 | override fun toString(): String = values.joinToString("-")
33 |
34 | private fun compareValues(other: List): Boolean {
35 | if (values.size != other.size) return false
36 | for (i in values.indices) {
37 | val thisVal = values[i]
38 | val thatVal = other[i]
39 | if (thisVal != thatVal) {
40 | // Compare ByteArray as String
41 | if (thisVal is ByteArray && thatVal is ByteArray) {
42 | val v1 = thisVal.toString(Charsets.UTF_8)
43 | val v2 = thatVal.toString(Charsets.UTF_8)
44 | if (v1 != v2) {
45 | return false
46 | }
47 | } else {
48 | return false
49 | }
50 | }
51 | }
52 | return true
53 | }
54 | }
55 |
--------------------------------------------------------------------------------
/src/main/kotlin/com/rchowell/sift/execution/physical/expressions/BinaryExpr.kt:
--------------------------------------------------------------------------------
1 | package com.rchowell.sift.execution.physical.expressions
2 |
3 | import com.rchowell.sift.types.Batch
4 | import com.rchowell.sift.types.BoolColumn
5 | import com.rchowell.sift.types.BoolVectorColumn
6 | import com.rchowell.sift.types.Column
7 | import com.rchowell.sift.types.NumColumn
8 | import com.rchowell.sift.types.NumVectorColumn
9 | import com.rchowell.sift.types.StringColumn
10 | import com.rchowell.sift.types.StringVectorColumn
11 |
12 | abstract class BinaryExpr(val lhs: Expression, val rhs: Expression) : Expression {
13 |
14 | override fun eval(batch: Batch): Column {
15 | val lc = lhs.eval(batch)
16 | val rc = rhs.eval(batch)
17 | assert(lc.size == rc.size)
18 | return when {
19 | (lc is NumColumn && rc is NumColumn) -> {
20 | val result = Column.VectorFactory.numeric(lc.size)
21 | for (i in 0 until lc.size) {
22 | result[i] = eval(lc[i], rc[i])
23 | }
24 | result.valueCount = lc.size
25 | NumVectorColumn(result)
26 | }
27 | (lc is BoolColumn && rc is BoolColumn) -> {
28 | val result = Column.VectorFactory.boolean(lc.size)
29 | for (i in 0 until lc.size) {
30 | result[i] = eval(lc[i], rc[i])
31 | }
32 | result.valueCount = lc.size
33 | BoolVectorColumn(result)
34 | }
35 | (lc is StringColumn && rc is StringColumn) -> {
36 | val result = Column.VectorFactory.string(lc.size)
37 | for (i in 0 until lc.size) {
38 | result[i] = eval(lc[i], rc[i])
39 | }
40 | result.valueCount = lc.size
41 | StringVectorColumn(result)
42 | }
43 | else -> throw Exception("unsupported column type ${lc::class.java}")
44 | }
45 | }
46 |
47 | open fun eval(l: Double, r: Double): Double = throw Exception("not implemented for ${this.javaClass}")
48 |
49 | open fun eval(l: Boolean, r: Boolean): Boolean = throw Exception("not implemented for ${this.javaClass}")
50 |
51 | open fun eval(l: String, r: String): String = throw Exception("not implemented for ${this.javaClass}")
52 |
53 | // VarCharVector
54 | private fun eval(l: ByteArray, r: ByteArray): ByteArray = eval(l.toString(Charsets.UTF_8), r.toString(Charsets.UTF_8)).toByteArray()
55 |
56 | // BitVector
57 | private fun eval(l: Int, r: Int): Int = if (eval(l == 1, r == 1)) 1 else 0
58 | }
59 |
60 | /**
61 | * ==================
62 | * Math Expressions
63 | * ==================
64 | */
65 |
66 | class AddExpr(lhs: Expression, rhs: Expression) : BinaryExpr(lhs, rhs) {
67 |
68 | override fun eval(l: Double, r: Double): Double = l + r
69 |
70 | override fun eval(l: String, r: String): String = l + r
71 | }
72 |
73 | class SubExpr(lhs: Expression, rhs: Expression) : BinaryExpr(lhs, rhs) {
74 |
75 | override fun eval(l: Double, r: Double): Double = l - r
76 | }
77 |
78 | class MulExpr(lhs: Expression, rhs: Expression) : BinaryExpr(lhs, rhs) {
79 |
80 | override fun eval(l: Double, r: Double): Double = l * r
81 | }
82 |
83 | class DivExpr(lhs: Expression, rhs: Expression) : BinaryExpr(lhs, rhs) {
84 |
85 | // TODO divide by 0; beginning to think expression return values
86 | // should be nullable which would become SQL NULL
87 | override fun eval(l: Double, r: Double): Double = l / r
88 | }
89 |
90 | class ModExpr(lhs: Expression, rhs: Expression) : BinaryExpr(lhs, rhs) {
91 |
92 | override fun eval(l: Double, r: Double): Double = l % r
93 | }
94 |
95 | /**
96 | * ============
97 | * Predicates
98 | * ============
99 | */
100 |
101 | abstract class PredicateBinaryExpr(val lhs: Expression, val rhs: Expression) : Expression {
102 |
103 | override fun eval(batch: Batch): BoolColumn {
104 | val lc = lhs.eval(batch)
105 | val rc = rhs.eval(batch)
106 | assert(lc.size == rc.size)
107 | val result = Column.VectorFactory.boolean(lc.size)
108 | when {
109 | (lc is NumColumn && rc is NumColumn) -> {
110 | for (i in 0 until lc.size) result[i] = if (eval(lc[i], rc[i])) 1 else 0
111 | }
112 | (lc is BoolColumn && rc is BoolColumn) -> {
113 | for (i in 0 until lc.size) result[i] = if (eval(lc[i], rc[i])) 1 else 0
114 | }
115 | (lc is StringColumn && rc is StringColumn) -> {
116 | for (i in 0 until lc.size) result[i] = if (eval(lc[i], rc[i])) 1 else 0
117 | }
118 | else -> throw Exception("unsupported vector type ${lc::class.java}")
119 | }
120 | result.valueCount = lc.size
121 | return BoolVectorColumn(result)
122 | }
123 |
124 | open fun eval(l: Double, r: Double): Boolean = throw Exception("not implemented for ${this.javaClass}")
125 |
126 | open fun eval(l: Boolean, r: Boolean): Boolean = throw Exception("not implemented for ${this.javaClass}")
127 |
128 | open fun eval(l: String, r: String): Boolean = throw Exception("not implemented for ${this.javaClass}")
129 |
130 | // BitVector
131 | private fun eval(l: Int, r: Int): Boolean = eval(l == 1, r == 1)
132 |
133 | // VarCharVector
134 | private fun eval(l: ByteArray, r: ByteArray): Boolean = eval(l.toString(Charsets.UTF_8), r.toString(Charsets.UTF_8))
135 | }
136 |
137 | /**
138 | * ============================
139 | * Boolean Binary Expressions
140 | * ============================
141 | */
142 |
143 | class AndBinaryExpr(lhs: Expression, rhs: Expression) : PredicateBinaryExpr(lhs, rhs) {
144 | override fun eval(l: Boolean, r: Boolean): Boolean = l && r
145 | }
146 |
147 | class OrBinaryExpr(lhs: Expression, rhs: Expression) : PredicateBinaryExpr(lhs, rhs) {
148 | override fun eval(l: Boolean, r: Boolean): Boolean = l || r
149 | }
150 |
151 | class GtBinaryExpr(lhs: Expression, rhs: Expression) : PredicateBinaryExpr(lhs, rhs) {
152 | override fun eval(l: Double, r: Double): Boolean = l > r
153 |
154 | override fun eval(l: String, r: String): Boolean = l > r
155 | }
156 |
157 | class GteBinaryExpr(lhs: Expression, rhs: Expression) : PredicateBinaryExpr(lhs, rhs) {
158 | override fun eval(l: Double, r: Double): Boolean = l >= r
159 |
160 | override fun eval(l: String, r: String): Boolean = l >= r
161 | }
162 |
163 | class LtBinaryExpr(lhs: Expression, rhs: Expression) : PredicateBinaryExpr(lhs, rhs) {
164 | override fun eval(l: Double, r: Double): Boolean = l < r
165 |
166 | override fun eval(l: String, r: String): Boolean = l < r
167 | }
168 |
169 | class LteBinaryExpr(lhs: Expression, rhs: Expression) : PredicateBinaryExpr(lhs, rhs) {
170 | override fun eval(l: Double, r: Double): Boolean = l <= r
171 |
172 | override fun eval(l: String, r: String): Boolean = l <= r
173 | }
174 |
175 | class EqBinaryExpr(lhs: Expression, rhs: Expression) : PredicateBinaryExpr(lhs, rhs) {
176 | override fun eval(l: Double, r: Double): Boolean = l == r
177 |
178 | override fun eval(l: Boolean, r: Boolean): Boolean = l == r
179 |
180 | override fun eval(l: String, r: String): Boolean = l == r
181 | }
182 |
183 | class NeqBinaryExpr(lhs: Expression, rhs: Expression) : PredicateBinaryExpr(lhs, rhs) {
184 | override fun eval(l: Double, r: Double): Boolean = l != r
185 |
186 | override fun eval(l: Boolean, r: Boolean): Boolean = l != r
187 |
188 | override fun eval(l: String, r: String): Boolean = l != r
189 | }
190 |
--------------------------------------------------------------------------------
/src/main/kotlin/com/rchowell/sift/execution/physical/expressions/ColumnExpr.kt:
--------------------------------------------------------------------------------
1 | package com.rchowell.sift.execution.physical.expressions
2 |
3 | import com.rchowell.sift.types.Batch
4 | import com.rchowell.sift.types.Column
5 |
6 | class ColumnExpr(val i: Int) : Expression {
7 |
8 | override fun eval(batch: Batch): Column {
9 | return batch.columns[i]
10 | }
11 | }
12 |
--------------------------------------------------------------------------------
/src/main/kotlin/com/rchowell/sift/execution/physical/expressions/Expression.kt:
--------------------------------------------------------------------------------
1 | package com.rchowell.sift.execution.physical.expressions
2 |
3 | import com.rchowell.sift.types.Batch
4 | import com.rchowell.sift.types.Column
5 |
6 | interface Expression {
7 | fun eval(batch: Batch): Column
8 | }
9 |
--------------------------------------------------------------------------------
/src/main/kotlin/com/rchowell/sift/execution/physical/expressions/LiteralExpr.kt:
--------------------------------------------------------------------------------
1 | package com.rchowell.sift.execution.physical.expressions
2 |
3 | import com.rchowell.sift.types.Batch
4 | import com.rchowell.sift.types.BoolLiteralColumn
5 | import com.rchowell.sift.types.Column
6 | import com.rchowell.sift.types.NumLiteralColumn
7 | import com.rchowell.sift.types.StringLiteralColumn
8 |
9 | /**
10 | * Literal expression simple returns the column
11 | *
12 | * @property col
13 | * @constructor Create empty Literal expr
14 | */
15 | class LiteralExpr(val v: Any) : Expression {
16 | override fun eval(batch: Batch): Column = when (v) {
17 | is Boolean -> BoolLiteralColumn(v, batch.records)
18 | is Number -> NumLiteralColumn(v.toDouble(), batch.records)
19 | is String -> StringLiteralColumn(v.toByteArray(), batch.records)
20 | else -> throw IllegalStateException("invalid type ${v.javaClass} in literal expression")
21 | }
22 | }
23 |
--------------------------------------------------------------------------------
/src/main/kotlin/com/rchowell/sift/execution/physical/sifterators/Aggregation.kt:
--------------------------------------------------------------------------------
1 | package com.rchowell.sift.execution.physical.sifterators
2 |
3 | import com.rchowell.sift.execution.physical.aggregations.Accumulator
4 | import com.rchowell.sift.execution.physical.aggregations.Key
5 | import com.rchowell.sift.types.Batch
6 | import com.rchowell.sift.types.BoolVectorColumn
7 | import com.rchowell.sift.types.Column
8 | import com.rchowell.sift.types.NumVectorColumn
9 | import com.rchowell.sift.types.Schema
10 | import com.rchowell.sift.types.StringVectorColumn
11 | import com.rchowell.sift.types.Type
12 | import org.apache.arrow.vector.BitVector
13 | import org.apache.arrow.vector.Float8Vector
14 | import org.apache.arrow.vector.ValueVector
15 | import org.apache.arrow.vector.VarCharVector
16 |
17 | /**
18 | * Aggregation Sifterator maintains an accumulator for each aggregation key and processes all input batches
19 | * before producing the output batch
20 | *
21 | * @property input
22 | * @property aggregations column to accumulator mapping
23 | * @property groups column values to group by
24 | * @constructor Create empty Aggregation
25 | */
26 | class Aggregation(
27 | val input: Sifterator,
28 | val aggregations: List,
29 | val groups: List,
30 | override val schema: Schema,
31 | ) : Sifterator {
32 |
33 | private val accumulators: MutableMap> = mutableMapOf()
34 | private var done = false
35 |
36 | /**
37 | * DSCB has iterators doing full aggregation in the open() method
38 | */
39 | override fun open() {
40 | input.open()
41 | var batch = input.next()
42 | while (batch != null) {
43 | val columns: List = aggregations.map { it.expr.eval(batch!!) }
44 | for (i in 0 until batch.records) {
45 | val values = groups.map { batch!!.columns[it][i] }
46 | val key = if (values.isEmpty()) Key.EMPTY else Key(values)
47 | accumulate(key, columns, i)
48 | }
49 | batch = input.next()
50 | }
51 | }
52 |
53 | /**
54 | * Next() returns the value of all aggregation accumulators
55 | */
56 | override fun next(): Batch? {
57 | if (done) return null
58 | done = true
59 |
60 | // Total number of rows in the output batch
61 | val rowCount = accumulators.size
62 |
63 | // Initialize vectors for each aggregation key, type is derived from the schema
64 | val keyVectors: List = groups.indices.map { group ->
65 | when (schema.fields[group].type) {
66 | Type.Num -> Column.VectorFactory.numeric(rowCount)
67 | Type.Bool -> Column.VectorFactory.boolean(rowCount)
68 | Type.String -> Column.VectorFactory.string(rowCount)
69 | }
70 | }
71 |
72 | // Initialize vectors for each aggregation value, type is always numeric
73 | val valueVectors = aggregations.map { Column.VectorFactory.numeric(rowCount) }
74 |
75 | // Add all values to the output vectors
76 | accumulators.keys.forEachIndexed { row, key ->
77 |
78 | // Add all aggregation key values to the key vectors
79 | key.values.forEachIndexed { i, kv ->
80 | when (val keyVec = keyVectors[i]) {
81 | is Float8Vector -> keyVec[row] = kv as Double
82 | is BitVector -> keyVec[row] = kv as Int
83 | is VarCharVector -> keyVec[row] = kv as ByteArray
84 | else -> throw IllegalStateException("unknown key vector type ${keyVec::class.java} for key $kv")
85 | }
86 | }
87 |
88 | // Add all aggregated values to the value vectors
89 | val accumulator = accumulators[key]!!
90 | accumulator.forEachIndexed { i, acc ->
91 | valueVectors[i][row] = acc.get()
92 | }
93 | }
94 |
95 | // Columns of the batch
96 | val cols = mutableListOf()
97 | keyVectors.forEach {
98 | it.valueCount = rowCount
99 | when (it) {
100 | is Float8Vector -> cols.add(NumVectorColumn(it))
101 | is BitVector -> cols.add(BoolVectorColumn(it))
102 | is VarCharVector -> cols.add(StringVectorColumn(it))
103 | }
104 | }
105 | valueVectors.forEach {
106 | it.valueCount = rowCount
107 | cols.add(NumVectorColumn(it))
108 | }
109 | return Batch(schema, cols)
110 | }
111 |
112 | override fun close() {
113 | input.close()
114 | }
115 |
116 | private fun accumulate(key: Key, columns: List, row: Int) {
117 | var accums = accumulators[key]
118 | if (accums == null) {
119 | accums = aggregations.map { it.new() }
120 | accumulators[key] = accums
121 | }
122 | for (col in accums.indices) {
123 | val v = columns[col][row]
124 | accums[col].add(v as Double)
125 | }
126 | }
127 | }
128 |
--------------------------------------------------------------------------------
/src/main/kotlin/com/rchowell/sift/execution/physical/sifterators/Distinct.kt:
--------------------------------------------------------------------------------
1 | package com.rchowell.sift.execution.physical.sifterators
2 |
3 | import com.rchowell.sift.types.Batch
4 | import com.rchowell.sift.types.Batch.Companion.valueCount
5 | import com.rchowell.sift.types.Schema
6 | import com.rchowell.sift.types.set
7 | import org.apache.commons.lang3.builder.HashCodeBuilder
8 |
9 | /**
10 | * Duplicate elimination for the given keys
11 | *
12 | * @property input
13 | * @property fields columns to keep distinct values
14 | * @constructor Create empty Distinct
15 | */
16 | class Distinct(
17 | val input: Sifterator,
18 | val fields: List,
19 | ) : Sifterator {
20 |
21 | private val seen: MutableSet = mutableSetOf()
22 |
23 | override val schema: Schema = input.schema.project(fields)
24 |
25 | override fun open() {
26 | input.open()
27 | }
28 |
29 | override fun next(): Batch? {
30 | val batch = input.next() ?: return null
31 | val vectors = Batch.empty(schema, batch.records)
32 | var records = 0
33 | for (row in 0 until batch.records) {
34 | val hashBuilder = HashCodeBuilder()
35 | fields.forEach {
36 | hashBuilder.append(batch.columns[it][row])
37 | }
38 | val hash = hashBuilder.build() ?: -1
39 | if (!seen.contains(hash)) {
40 | for (c in fields.indices) {
41 | vectors[c][records] = batch.columns[fields[c]][row]
42 | }
43 | seen.add(hash)
44 | records += 1
45 | }
46 | }
47 | vectors.valueCount(records)
48 | return Batch.fromVectors(schema, vectors)
49 | }
50 |
51 | override fun close() {
52 | input.close()
53 | }
54 | }
55 |
--------------------------------------------------------------------------------
/src/main/kotlin/com/rchowell/sift/execution/physical/sifterators/Limit.kt:
--------------------------------------------------------------------------------
1 | package com.rchowell.sift.execution.physical.sifterators
2 |
3 | import com.rchowell.sift.types.Batch
4 | import com.rchowell.sift.types.Batch.Companion.valueCount
5 | import com.rchowell.sift.types.set
6 | import kotlin.math.min
7 |
8 | /**
9 | * Limit will return a single [Batch] with [limit] number of records.
10 | *
11 | */
12 | class Limit(
13 | val input: Sifterator,
14 | val limit: Int,
15 | ) : Sifterator {
16 |
17 | var sent = 0
18 |
19 | override val schema = input.schema
20 |
21 | override fun open() {
22 | input.open()
23 | }
24 |
25 | override fun next(): Batch? {
26 | if (sent >= limit) return null
27 | val batch = input.next() ?: return null
28 | // Could be a [Batch] helper method
29 | // this makes me want a DataFrame library on top of Arrow
30 | val values = min(batch.records, limit - sent)
31 | val vectors = Batch.empty(batch.schema, values)
32 | for (row in 0 until batch.records) {
33 | if (sent >= limit) break
34 | for (col in batch.columns.indices) {
35 | vectors[col][row] = batch.columns[col][row]
36 | }
37 | sent += 1
38 | }
39 | vectors.valueCount(values)
40 | return Batch.fromVectors(schema, vectors)
41 | }
42 |
43 | override fun close() {
44 | input.close()
45 | }
46 | }
47 |
--------------------------------------------------------------------------------
/src/main/kotlin/com/rchowell/sift/execution/physical/sifterators/Projection.kt:
--------------------------------------------------------------------------------
1 | package com.rchowell.sift.execution.physical.sifterators
2 |
3 | import com.rchowell.sift.execution.physical.expressions.Expression
4 | import com.rchowell.sift.types.Batch
5 | import com.rchowell.sift.types.Schema
6 |
7 | /**
8 | * Projection holds a map of *output* column indexes to expressions.
9 | *
10 | * Each [Expression] is evaluated on the input [Batch], and the result is set in the output [Batch]
11 | *
12 | * @property projections
13 | * @property input
14 | * @constructor Create empty Projection
15 | */
16 | class Projection(
17 | val input: Sifterator,
18 | val projections: Map,
19 | override val schema: Schema,
20 | ) : Sifterator {
21 |
22 | override fun open() {
23 | input.open()
24 | }
25 |
26 | override fun next(): Batch? {
27 | val batch = input.next() ?: return null
28 | val output = projections.map { (_, v) -> v.eval(batch) }
29 | return Batch(schema, output)
30 | }
31 |
32 | override fun close() {
33 | input.close()
34 | }
35 | }
36 |
--------------------------------------------------------------------------------
/src/main/kotlin/com/rchowell/sift/execution/physical/sifterators/Scan.kt:
--------------------------------------------------------------------------------
1 | package com.rchowell.sift.execution.physical.sifterators
2 |
3 | import com.rchowell.sift.source.Source
4 | import com.rchowell.sift.types.Batch
5 |
6 | /**
7 | * PhysicalScan
8 | *
9 | * @property source
10 | * @property fields
11 | * @constructor Create empty Physical scan
12 | */
13 | class Scan(
14 | val source: Source,
15 | val fields: List,
16 | ) : Sifterator {
17 |
18 | override val schema = source.schema
19 |
20 | lateinit var batches: Iterator
21 |
22 | override fun open() {
23 | source.init()
24 | batches = source.scan(fields).iterator()
25 | }
26 |
27 | override fun next(): Batch? {
28 | return try {
29 | batches.next()
30 | } catch (ex: NoSuchElementException) {
31 | null
32 | }
33 | }
34 |
35 | override fun close() {
36 | source.close()
37 | }
38 | }
39 |
--------------------------------------------------------------------------------
/src/main/kotlin/com/rchowell/sift/execution/physical/sifterators/Selection.kt:
--------------------------------------------------------------------------------
1 | package com.rchowell.sift.execution.physical.sifterators
2 |
3 | import com.rchowell.sift.execution.physical.expressions.PredicateBinaryExpr
4 | import com.rchowell.sift.types.Batch
5 |
6 | /**
7 | * Selection evaluates a bool expression and uses the results to filter the next result.
8 | * Seems expensive to filter with columns versus rows.
9 | *
10 | * @property input
11 | * @property predicateBinary
12 | * @constructor Create empty Selection
13 | */
14 | class Selection(
15 | val input: Sifterator,
16 | val predicateBinary: PredicateBinaryExpr // TODO change to just predicate
17 | ) : Sifterator {
18 |
19 | override val schema = input.schema
20 |
21 | override fun open() {
22 | input.open()
23 | }
24 |
25 | override fun next(): Batch? {
26 | val batch = input.next() ?: return null
27 | val mask = predicateBinary.eval(batch)
28 | val cols = batch.columns.map { it.filter(mask) }
29 | return Batch(schema, cols)
30 | }
31 |
32 | override fun close() {
33 | input.close()
34 | }
35 | }
36 |
--------------------------------------------------------------------------------
/src/main/kotlin/com/rchowell/sift/execution/physical/sifterators/Sifterator.kt:
--------------------------------------------------------------------------------
1 | package com.rchowell.sift.execution.physical.sifterators
2 |
3 | import com.rchowell.sift.types.Batch
4 | import com.rchowell.sift.types.Schema
5 |
6 | /**
7 | * Iterator from The Volcano Model. Called Sifterator to avoid naming confusion.
8 | *
9 | * Things might get interesting/weird because `next()` returns a [Batch] rather than a row.
10 | */
11 | interface Sifterator {
12 |
13 | /**
14 | * Output schema of this transformation
15 | */
16 | val schema: Schema
17 |
18 | /**
19 | * This method starts the process of getting tuples, but does not get a tuple.
20 | * It initializes any data structures needed to perform the operation and calls Open()
21 | * for any arguments of the operation. p707
22 | */
23 | fun open()
24 |
25 | /**
26 | * Returns the next
27 | *
28 | * @return
29 | */
30 | fun next(): Batch?
31 |
32 | fun close()
33 | }
34 |
--------------------------------------------------------------------------------
/src/main/kotlin/com/rchowell/sift/execution/planner/Planner.kt:
--------------------------------------------------------------------------------
1 | package com.rchowell.sift.execution.planner
2 |
3 | import com.rchowell.sift.execution.logical.LogicalExpr
4 | import com.rchowell.sift.execution.logical.LogicalTransform
5 | import com.rchowell.sift.execution.logical.expressions.BinaryOp
6 | import com.rchowell.sift.execution.logical.expressions.LogicalAddExpr
7 | import com.rchowell.sift.execution.logical.expressions.LogicalAndExpr
8 | import com.rchowell.sift.execution.logical.expressions.LogicalAvgExpr
9 | import com.rchowell.sift.execution.logical.expressions.LogicalBinaryExpr
10 | import com.rchowell.sift.execution.logical.expressions.LogicalBooleanBinaryExpr
11 | import com.rchowell.sift.execution.logical.expressions.LogicalCountExpr
12 | import com.rchowell.sift.execution.logical.expressions.LogicalDivExpr
13 | import com.rchowell.sift.execution.logical.expressions.LogicalEqExpr
14 | import com.rchowell.sift.execution.logical.expressions.LogicalGtExpr
15 | import com.rchowell.sift.execution.logical.expressions.LogicalGteExpr
16 | import com.rchowell.sift.execution.logical.expressions.LogicalIdentifierExpr
17 | import com.rchowell.sift.execution.logical.expressions.LogicalLiteralExpr
18 | import com.rchowell.sift.execution.logical.expressions.LogicalLtExpr
19 | import com.rchowell.sift.execution.logical.expressions.LogicalLteExpr
20 | import com.rchowell.sift.execution.logical.expressions.LogicalMaxExpr
21 | import com.rchowell.sift.execution.logical.expressions.LogicalMinExpr
22 | import com.rchowell.sift.execution.logical.expressions.LogicalModExpr
23 | import com.rchowell.sift.execution.logical.expressions.LogicalMulExpr
24 | import com.rchowell.sift.execution.logical.expressions.LogicalNeqExpr
25 | import com.rchowell.sift.execution.logical.expressions.LogicalOrExpr
26 | import com.rchowell.sift.execution.logical.expressions.LogicalSubExpr
27 | import com.rchowell.sift.execution.logical.expressions.LogicalSumExpr
28 | import com.rchowell.sift.execution.logical.transforms.LogicalAggregation
29 | import com.rchowell.sift.execution.logical.transforms.LogicalDistinct
30 | import com.rchowell.sift.execution.logical.transforms.LogicalJoin
31 | import com.rchowell.sift.execution.logical.transforms.LogicalLimit
32 | import com.rchowell.sift.execution.logical.transforms.LogicalProjection
33 | import com.rchowell.sift.execution.logical.transforms.LogicalScan
34 | import com.rchowell.sift.execution.logical.transforms.LogicalSelection
35 | import com.rchowell.sift.execution.logical.transforms.LogicalSort
36 | import com.rchowell.sift.execution.physical.aggregations.AvgAccumulator
37 | import com.rchowell.sift.execution.physical.aggregations.CountAccumulator
38 | import com.rchowell.sift.execution.physical.aggregations.MaxAccumulator
39 | import com.rchowell.sift.execution.physical.aggregations.MinAccumulator
40 | import com.rchowell.sift.execution.physical.aggregations.SumAccumulator
41 | import com.rchowell.sift.execution.physical.expressions.AddExpr
42 | import com.rchowell.sift.execution.physical.expressions.AndBinaryExpr
43 | import com.rchowell.sift.execution.physical.expressions.ColumnExpr
44 | import com.rchowell.sift.execution.physical.expressions.DivExpr
45 | import com.rchowell.sift.execution.physical.expressions.EqBinaryExpr
46 | import com.rchowell.sift.execution.physical.expressions.Expression
47 | import com.rchowell.sift.execution.physical.expressions.GtBinaryExpr
48 | import com.rchowell.sift.execution.physical.expressions.GteBinaryExpr
49 | import com.rchowell.sift.execution.physical.expressions.LiteralExpr
50 | import com.rchowell.sift.execution.physical.expressions.LtBinaryExpr
51 | import com.rchowell.sift.execution.physical.expressions.LteBinaryExpr
52 | import com.rchowell.sift.execution.physical.expressions.ModExpr
53 | import com.rchowell.sift.execution.physical.expressions.MulExpr
54 | import com.rchowell.sift.execution.physical.expressions.NeqBinaryExpr
55 | import com.rchowell.sift.execution.physical.expressions.OrBinaryExpr
56 | import com.rchowell.sift.execution.physical.expressions.PredicateBinaryExpr
57 | import com.rchowell.sift.execution.physical.expressions.SubExpr
58 | import com.rchowell.sift.execution.physical.sifterators.Aggregation
59 | import com.rchowell.sift.execution.physical.sifterators.Distinct
60 | import com.rchowell.sift.execution.physical.sifterators.Limit
61 | import com.rchowell.sift.execution.physical.sifterators.Projection
62 | import com.rchowell.sift.execution.physical.sifterators.Scan
63 | import com.rchowell.sift.execution.physical.sifterators.Selection
64 | import com.rchowell.sift.execution.physical.sifterators.Sifterator
65 | import com.rchowell.sift.types.Schema
66 |
67 | class Planner {
68 |
69 | companion object {
70 |
71 | /**
72 | * Constructs a [Sifterator] to execute the [LogicalTransform].
73 | */
74 | fun plan(transform: LogicalTransform): Sifterator = when (transform) {
75 | is LogicalAggregation -> {
76 | val input = transform.inputs().first()
77 | val inPlan = plan(input)
78 | val aggregations = transform.aggregations.values.map { agg ->
79 | val expr = expression(agg.input, input.schema)
80 | when (agg) {
81 | is LogicalMinExpr -> MinAccumulator(expr)
82 | is LogicalMaxExpr -> MaxAccumulator(expr)
83 | is LogicalSumExpr -> SumAccumulator(expr)
84 | is LogicalAvgExpr -> AvgAccumulator(expr)
85 | is LogicalCountExpr -> CountAccumulator(expr)
86 | }
87 | }
88 | val groups = transform.groups.map { id -> col(input.schema, id) }
89 | Aggregation(inPlan, aggregations, groups, transform.schema)
90 | }
91 | is LogicalProjection -> {
92 | val input = transform.inputs().first()
93 | val inPlan = plan(transform.inputs().first())
94 | val projections = mutableMapOf()
95 | transform.projections.forEach { (identity, expr) ->
96 | val column = col(transform.schema, identity)
97 | projections[column] = expression(expr, input.schema)
98 | }
99 | Projection(inPlan, projections, transform.schema)
100 | }
101 | is LogicalScan -> Scan(transform.source, transform.identifiers)
102 | is LogicalSelection -> {
103 | val input = transform.inputs().first()
104 | val inPlan = plan(transform.inputs().first())
105 | val predicate = predicate(transform.expr, input.schema)
106 | Selection(inPlan, predicate)
107 | }
108 | is LogicalDistinct -> {
109 | val input = transform.inputs().first()
110 | val inPlan = plan(transform.inputs().first())
111 | val fieldIndexes = input.schema.fieldIndexes
112 | Distinct(inPlan, transform.fields())
113 | }
114 | is LogicalLimit -> Limit(
115 | input = plan(transform.inputs().first()),
116 | limit = transform.n,
117 | )
118 | is LogicalSort -> TODO()
119 | is LogicalJoin -> TODO()
120 | else -> invalid("plan", transform)
121 | }
122 |
123 | private fun expression(expr: LogicalExpr, schema: Schema): Expression = when (expr) {
124 | is LogicalIdentifierExpr -> ColumnExpr(schema.fieldIndexes[expr.identifier]!!)
125 | is LogicalLiteralExpr<*> -> LiteralExpr(expr.v)
126 | is LogicalBinaryExpr -> {
127 | val lhs = expression(expr.lhs, schema)
128 | val rhs = expression(expr.rhs, schema)
129 | when (expr) {
130 | is LogicalEqExpr -> EqBinaryExpr(lhs, rhs)
131 | is LogicalNeqExpr -> NeqBinaryExpr(lhs, rhs)
132 | is LogicalLtExpr -> LtBinaryExpr(lhs, rhs)
133 | is LogicalLteExpr -> LteBinaryExpr(lhs, rhs)
134 | is LogicalGtExpr -> GtBinaryExpr(lhs, rhs)
135 | is LogicalGteExpr -> GteBinaryExpr(lhs, rhs)
136 | is LogicalAndExpr -> AndBinaryExpr(lhs, rhs)
137 | is LogicalOrExpr -> OrBinaryExpr(lhs, rhs)
138 | is LogicalAddExpr -> AddExpr(lhs, rhs)
139 | is LogicalSubExpr -> SubExpr(lhs, rhs)
140 | is LogicalMulExpr -> MulExpr(lhs, rhs)
141 | is LogicalDivExpr -> DivExpr(lhs, rhs)
142 | is LogicalModExpr -> ModExpr(lhs, rhs)
143 | else -> invalid("binary expression", expr)
144 | }
145 | }
146 | else -> invalid("expression", expr)
147 | }
148 |
149 | private fun predicate(expr: LogicalExpr, schema: Schema): PredicateBinaryExpr {
150 | if (expr !is LogicalBinaryExpr) throw IllegalStateException()
151 | val lhs = expression(expr.lhs, schema)
152 | val rhs = expression(expr.rhs, schema)
153 | return when (expr) {
154 | is LogicalBooleanBinaryExpr -> when (expr.op) {
155 | BinaryOp.EQ -> EqBinaryExpr(lhs, rhs)
156 | BinaryOp.NEQ -> NeqBinaryExpr(lhs, rhs)
157 | BinaryOp.LT -> LtBinaryExpr(lhs, rhs)
158 | BinaryOp.LTE -> LteBinaryExpr(lhs, rhs)
159 | BinaryOp.GT -> GtBinaryExpr(lhs, rhs)
160 | BinaryOp.GTE -> GteBinaryExpr(lhs, rhs)
161 | BinaryOp.AND -> AndBinaryExpr(lhs, rhs)
162 | BinaryOp.OR -> OrBinaryExpr(lhs, rhs)
163 | else -> invalid("predicate", expr)
164 | }
165 | else -> invalid("predicate", expr)
166 | }
167 | }
168 |
169 | private fun invalid(expectedType: String, actualType: Any): Nothing =
170 | throw IllegalStateException("provided $actualType is not a valid $expectedType")
171 |
172 | private fun col(schema: Schema, id: LogicalIdentifierExpr): Int =
173 | schema.fieldIndexes[id.identifier] ?: invalid("field reference", id)
174 | }
175 | }
176 |
--------------------------------------------------------------------------------
/src/main/kotlin/com/rchowell/sift/language/README.md:
--------------------------------------------------------------------------------
1 | # Sift Language
2 |
3 | ## V0
4 |
5 | See `docs/V0.md`
6 |
7 | ## V1
8 |
9 | In Progress
10 |
--------------------------------------------------------------------------------
/src/main/kotlin/com/rchowell/sift/language/SiftLexer.kt:
--------------------------------------------------------------------------------
1 | package com.rchowell.sift.language
2 |
3 | /**
4 | * Tokenizes a [String] (ideally a Sift query) into a [Token] list.
5 | */
6 | interface SiftLexer {
7 | fun tokenize(input: String): List>
8 | }
9 |
--------------------------------------------------------------------------------
/src/main/kotlin/com/rchowell/sift/language/SiftParser.kt:
--------------------------------------------------------------------------------
1 | package com.rchowell.sift.language
2 |
3 | import com.rchowell.sift.execution.logical.LogicalTransform
4 |
5 | /**
6 | * SiftParser transforms a list of tokens into a query plan
7 | *
8 | * @constructor Create empty Sift parser
9 | */
10 | interface SiftParser {
11 |
12 | fun parse(tokens: List>): LogicalTransform
13 | }
14 |
--------------------------------------------------------------------------------
/src/main/kotlin/com/rchowell/sift/language/v0/README.md:
--------------------------------------------------------------------------------
1 | # Sift
2 |
3 | The purpose of the Sift language is to provide a super simple query language that maps near 1:1 to operators of the extended relational algebra discussed in section 5.2 of Garcia-Molina et. al. It is literally an inversion the query expression tree; with the inversion coming from using functional pipes '|>' rather than nested transformations. The point is to keep it simple, stupid and allows me to interact with the query engine at a lower level than the eventual SQL parser with pre-processor will allow. Much of the book uses typeset expressions of relational algebra, and I wanted to run these expressions without translating to SQL. I chose to use the F# (and Elixir) pipe operator to simplify writing nested transformations. Ligatures recommended.
4 |
5 | ## Grammar
6 |
7 | The following shorthand is used
8 | ```
9 | (X)* # 0 or n of X
10 | (X)? # 0 or 1 of X
11 | (X)+ # 1 or n of X
12 |
13 | # Comma-separated lists of
14 | = (, )*
15 | ```
16 |
17 | ```bash
18 | ::= [A-Za-z\-_]+ # operators, relation and field identifiers
19 | ::= '[A-Za-z0-9\s]+'
20 | ::= [0-9]+(.[0-9]+)?
21 | ::= (TRUE|FALSE|UNKOWN)
22 | ::= NULL
23 | ```
24 |
25 | ### Query
26 |
27 | A query is an initial *relation producing* operation followed by several transformations. Each transformation is an operation from the extended relational algebra. Leaf nodes of the query expression tree must be relations. For sake of simplicity, Sift currently only supports binary joins, so the parent of any given leaf node has at least one and at most two children.
28 |
29 | ```bash
30 | ::=
31 | ```
32 |
33 | ### Transformations
34 |
35 | ```bash
36 | ::= (|> )*
37 | ::=