├── .gitignore ├── .idea ├── misc.xml ├── runConfigurations.xml ├── uiDesigner.xml └── vcs.xml ├── README.md ├── build.gradle ├── gradle.properties ├── gradle └── wrapper │ └── gradle-wrapper.properties ├── settings.gradle └── src ├── main ├── antlr │ ├── Sift.g4 │ └── Sift.v1.g4.ignore ├── java │ └── com │ │ └── rchowell │ │ └── sift │ │ └── language │ │ └── v0 │ │ └── antlr │ │ ├── Sift.interp │ │ ├── Sift.tokens │ │ ├── SiftBaseVisitor.java │ │ ├── SiftLexer.interp │ │ ├── SiftLexer.java │ │ ├── SiftLexer.tokens │ │ ├── SiftParser.java │ │ └── SiftVisitor.java └── kotlin │ └── com │ └── rchowell │ └── sift │ ├── execution │ ├── Environment.kt │ ├── Executor.kt │ ├── logical │ │ ├── LogicalExpr.kt │ │ ├── LogicalTransform.kt │ │ ├── expressions │ │ │ ├── LogicalAggregateExpr.kt │ │ │ ├── LogicalBinaryExpr.kt │ │ │ ├── LogicalIdentifierExpr.kt │ │ │ ├── LogicalLiteralExpr.kt │ │ │ └── Ops.kt │ │ ├── functions │ │ │ └── LogicalFunction.kt │ │ └── transforms │ │ │ ├── LogicalAggregation.kt │ │ │ ├── LogicalCross.kt │ │ │ ├── LogicalDiff.kt │ │ │ ├── LogicalDistinct.kt │ │ │ ├── LogicalIntersect.kt │ │ │ ├── LogicalJoin.kt │ │ │ ├── LogicalLimit.kt │ │ │ ├── LogicalProjection.kt │ │ │ ├── LogicalScan.kt │ │ │ ├── LogicalSelection.kt │ │ │ ├── LogicalSort.kt │ │ │ └── LogicalUnion.kt │ ├── physical │ │ ├── aggregations │ │ │ ├── Accumulator.kt │ │ │ └── Key.kt │ │ ├── expressions │ │ │ ├── BinaryExpr.kt │ │ │ ├── ColumnExpr.kt │ │ │ ├── Expression.kt │ │ │ └── LiteralExpr.kt │ │ └── sifterators │ │ │ ├── Aggregation.kt │ │ │ ├── Distinct.kt │ │ │ ├── Limit.kt │ │ │ ├── Projection.kt │ │ │ ├── Scan.kt │ │ │ ├── Selection.kt │ │ │ └── Sifterator.kt │ └── planner │ │ └── Planner.kt │ ├── language │ ├── README.md │ ├── SiftLexer.kt │ ├── SiftParser.kt │ ├── v0 │ │ ├── README.md │ │ ├── Tokens.kt │ │ ├── antlr │ │ │ ├── SiftAntlrVisitor.kt │ │ │ ├── SiftCompiler.kt │ │ │ ├── SiftErrorListener.kt │ │ │ └── SiftVisitorBuildState.kt │ │ ├── lexers │ │ │ └── DirectCodedLexer.kt │ │ └── parsers │ │ │ └── rd │ │ │ ├── NaiveRecursiveDescentParser.kt │ │ │ └── RecursiveDescentParser.kt │ └── v1 │ │ ├── REAMDE.md │ │ └── ast │ │ ├── Expr.kt │ │ └── Node.kt │ ├── shell │ ├── Context.kt │ ├── Main.kt │ ├── ShellModule.kt │ ├── SiftHighlighter.kt │ ├── SiftLineParser.kt │ ├── SiftRunner.kt │ ├── commands │ │ ├── DebugGroup.kt │ │ ├── DescribeCommand.kt │ │ ├── ListCommand.kt │ │ ├── SetCommand.kt │ │ └── SiftRootCommand.kt │ └── kosh │ │ ├── CommandGroup.kt │ │ ├── RootCommand.kt │ │ ├── Runner.kt │ │ └── Shell.kt │ ├── source │ ├── CsvSource.kt │ ├── EmptySource.kt │ ├── MemSource.kt │ └── Source.kt │ └── types │ ├── Batch.kt │ ├── Column.kt │ ├── Field.kt │ ├── Schema.kt │ └── Type.kt └── test └── kotlin └── com └── rchowell └── sift ├── execution ├── ExecutorTest.kt ├── physical │ ├── aggregations │ │ └── KeyTest.kt │ ├── expressions │ │ └── PhysicalAddExprTest.kt │ └── sifterators │ │ ├── AggregationTest.kt │ │ ├── DistinctTest.kt │ │ ├── LimitTest.kt │ │ ├── ProjectionTest.kt │ │ ├── ScanTest.kt │ │ └── SelectionTest.kt └── planner │ └── PlannerTest.kt ├── language └── v0 │ ├── antlr │ └── SiftAntlrTest.kt │ ├── lexers │ └── DirectCodedLexerTest.kt │ └── parsers │ └── rd │ └── RecursiveDescentParserTest.kt ├── source └── CsvSourceTest.kt └── types └── BatchTest.kt /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | .idea/shelf 3 | /confluence/target 4 | /dependencies/repo 5 | /android.tests.dependencies 6 | /dependencies/android.tests.dependencies 7 | /dist 8 | /local 9 | /gh-pages 10 | /ideaSDK 11 | /clionSDK 12 | /android-studio/sdk 13 | out/ 14 | /tmp 15 | kotlin-ide/ 16 | workspace.xml 17 | *.versionsBackup 18 | /idea/testData/debugger/tinyApp/classes* 19 | /jps-plugin/testData/kannotator 20 | /js/js.translator/testData/out/ 21 | /js/js.translator/testData/out-min/ 22 | /js/js.translator/testData/out-pir/ 23 | .gradle/ 24 | build/ 25 | !**/src/**/build 26 | !**/test/**/build 27 | *.iml 28 | !**/testData/**/*.iml 29 | .idea/libraries/Gradle*.xml 30 | .idea/libraries/Maven*.xml 31 | .idea/artifacts/PILL_*.xml 32 | .idea/artifacts/KotlinPlugin.xml 33 | .idea/modules 34 | .idea/runConfigurations/JPS_*.xml 35 | .idea/runConfigurations/PILL_*.xml 36 | .idea/runConfigurations/_FP_*.xml 37 | .idea/runConfigurations/_MT_*.xml 38 | .idea/libraries 39 | .idea/modules.xml 40 | .idea/gradle.xml 41 | .idea/compiler.xml 42 | .idea/inspectionProfiles/profiles_settings.xml 43 | .idea/.name 44 | .idea/artifacts/dist_auto_* 45 | .idea/artifacts/dist.xml 46 | .idea/artifacts/ideaPlugin.xml 47 | .idea/artifacts/kotlinc.xml 48 | .idea/artifacts/kotlin_compiler_jar.xml 49 | .idea/artifacts/kotlin_plugin_jar.xml 50 | .idea/artifacts/kotlin_jps_plugin_jar.xml 51 | .idea/artifacts/kotlin_daemon_client_jar.xml 52 | .idea/artifacts/kotlin_imports_dumper_compiler_plugin_jar.xml 53 | .idea/artifacts/kotlin_main_kts_jar.xml 54 | .idea/artifacts/kotlin_compiler_client_embeddable_jar.xml 55 | .idea/artifacts/kotlin_reflect_jar.xml 56 | .idea/artifacts/kotlin_stdlib_js_ir_* 57 | .idea/artifacts/kotlin_test_js_ir_* 58 | .idea/artifacts/kotlin_stdlib_wasm_* 59 | .idea/jarRepositories.xml 60 | .idea/csv-plugin.xml 61 | .idea/libraries-with-intellij-classes.xml 62 | node_modules/ 63 | .rpt2_cache/ 64 | libraries/tools/kotlin-test-js-runner/lib/ 65 | local.properties 66 | buildSrcTmp/ 67 | distTmp/ 68 | outTmp/ 69 | /test.output 70 | /kotlin-native/dist 71 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/runConfigurations.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 9 | 10 | -------------------------------------------------------------------------------- /.idea/uiDesigner.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Preface 2 | 3 | I built this as an exercise while studying [Database Systems: The Complete Book](http://infolab.stanford.edu/~ullman/dscb.html) (DSCB) by Hector Garcia-Molina, Jeff Ullman, and Jennifer Widom. I also wanted to experiment with Apache Arrow, and I found Andy Grove's [KQuery](https://github.com/andygrove/how-query-engines-work); much of this work is modelled after his engine, and I have left notes where I use some of his constructs. This exercise was more about studying the execution of queries, so little effort was put into the parser and planner. There are currently no plan optimizations, and the language is simply syntactic sugar over the operators of Relation Algebra discussed in DSCB. 4 | 5 | ## Operations 6 | - Scan 7 | - Selection 8 | - Projection 9 | - Limit 10 | - Grouping/Aggregation 11 | - Distinct 12 | - Sort (TODO) 13 | - Join / Union / Difference / Intersection (TODO) 14 | 15 | ## Language 16 | 17 | > Full details in *sift.lang/README.md* 18 | 19 | The purpose of the Sift language is to have a query language that maps near 1:1 to operators of the extended relational algebra discussed in section 5.2 of Garcia-Molina et. al. It is literally an inversion of the query expression tree using the F# (and Elixir) pipe operator to simplify writing nested transformations. 20 | 21 | Limitations in the language come from my inability to dedicate time to the parser. Right now, I'm more interested in learning about parser generators. The purpose of the hand-written lexer and parser was to learn some basics. 22 | 23 | A query is formed with a relation production followed by transformations. All type data is provided by the **Schema** of a data **Source** which is registered to the query execution environment. The full BNF is at the bottom. 24 | 25 | ### Shell Example 26 | 27 | ![](https://i.imgur.com/1RGvkLm.png) 28 | 29 | ![](https://i.imgur.com/s2yIvwl.png) 30 | 31 | ### Relation Productions 32 | 33 | Let *R(A, B, C)* and *S(B, C, D)* be two relations. Here are some example relation productions, including subqueries. 34 | ``` 35 | # simple scan 36 | 'R' 37 | 38 | # joins 39 | 'R' JOIN 'S' 40 | 'R' OUTER JOIN 'S' 41 | 'R' JOIN 'S' ON A = D 42 | 43 | # equivalent to the previous join 44 | 'R' X 'S' |> SELECT A = D 45 | 46 | # project tuples to same domain prior to union 47 | ('R' |> PROJECT B, C) UNION ('S' |> PROJECT B, C) 48 | 49 | # Let T(X, Y) and V(X, Y) be two relations 50 | 'T' X 'V' # cross 51 | 'T' U 'V' # union 52 | 'T' \ 'V' # difference 53 | 'T' & 'V' # intersection 54 | ``` 55 | 56 | ### Examples 57 | 58 | ``` 59 | Q: Select all titles produced by Paramount between 1979 and 1982 60 | 61 | 'Movies' 62 | |> SELECT (1979 <= Year && Year <= 1982) && Studio = 'Paramount' 63 | |> PROJECT Title 64 | ``` 65 | 66 | ``` 67 | Q: Get the average, min, and max heights of all players by age and position 68 | 69 | 'Players' 70 | |> PROJECT Height, Age 71 | |> GROUP AVG(Height) -> Avg, MIN(Height) -> Shortest, MAX(Height) -> Tallest BY Age, Position 72 | ``` 73 | 74 | ## Execution 75 | 76 | > Do 'gradle run --console plain' to run the interactive query shell 77 | 78 | ### Sample Data 79 | 80 | The sample data is a collection of some fuzzy friends. 81 | 82 | ``` 83 | ┌─────────────┬────────────┬────────────┬────────────┬────────────┬────────────┐ 84 | │Name │Age │Gender │Weight │Type │Breed │ 85 | ├─────────────┼────────────┼────────────┼────────────┼────────────┼────────────┤ 86 | │Ramona │2.00 │F │8.00 │Cat │Mini Coon │ 87 | ├─────────────┼────────────┼────────────┼────────────┼────────────┼────────────┤ 88 | │Mochi │2.00 │F │45.00 │Dog │Samoyed │ 89 | ├─────────────┼────────────┼────────────┼────────────┼────────────┼────────────┤ 90 | │Cali │7.00 │F │30.00 │Dog │Vizsla │ 91 | ├─────────────┼────────────┼────────────┼────────────┼────────────┼────────────┤ 92 | │Gretchen │13.00 │F │50.00 │Dog │English │ 93 | │ │ │ │ │ │Bulldog │ 94 | ├─────────────┼────────────┼────────────┼────────────┼────────────┼────────────┤ 95 | │Cooper │6.00 │M │30.00 │Dog │Beagle │ 96 | ├─────────────┼────────────┼────────────┼────────────┼────────────┼────────────┤ 97 | │Eleanor │5.00 │F │24.00 │Dog │Cocker │ 98 | │ │ │ │ │ │Spaniel │ 99 | ├─────────────┼────────────┼────────────┼────────────┼────────────┼────────────┤ 100 | │Huckleberry │7.00 │M │20.00 │Cat │Medium Coon │ 101 | ├─────────────┼────────────┼────────────┼────────────┼────────────┼────────────┤ 102 | │Madman Mochi │3.00 │M │14.00 │Cat │Unknown │ 103 | └─────────────┴────────────┴────────────┴────────────┴────────────┴────────────┘ 104 | ``` 105 | 106 | ### Selection 107 | 108 | > You can see I have a bug in the precedence of parsing, but I don't care much about the parser 109 | 110 | ``` 111 | 'Pets' |> SELECT (Type = 'Dog') && (Gender = 'F') 112 | 113 | ┌─────────────┬────────────┬────────────┬────────────┬────────────┬────────────┐ 114 | │Name │Age │Gender │Weight │Type │Breed │ 115 | ├─────────────┼────────────┼────────────┼────────────┼────────────┼────────────┤ 116 | │Mochi │2.00 │F │45.00 │Dog │Samoyed │ 117 | ├─────────────┼────────────┼────────────┼────────────┼────────────┼────────────┤ 118 | │Cali │7.00 │F │30.00 │Dog │Vizsla │ 119 | ├─────────────┼────────────┼────────────┼────────────┼────────────┼────────────┤ 120 | │Gretchen │13.00 │F │50.00 │Dog │English │ 121 | │ │ │ │ │ │Bulldog │ 122 | ├─────────────┼────────────┼────────────┼────────────┼────────────┼────────────┤ 123 | │Eleanor │5.00 │F │24.00 │Dog │Cocker │ 124 | │ │ │ │ │ │Spaniel │ 125 | └─────────────┴────────────┴────────────┴────────────┴────────────┴────────────┘ 126 | ``` 127 | 128 | ### Projection 129 | 130 | 131 | ``` 132 | 'Pets' 133 | |> SELECT Type = 'Cat' 134 | |> PROJECT Name + ' is a ' + Breed + ' kitty cat' -> Greeting 135 | 136 | ┌──────────────────────────────────────────────────────────────────────────────┐ 137 | │Greeting │ 138 | ├──────────────────────────────────────────────────────────────────────────────┤ 139 | │Ramona is a Mini Coon kitty cat │ 140 | ├──────────────────────────────────────────────────────────────────────────────┤ 141 | │Huckleberry is a Medium Coon kitty cat │ 142 | ├──────────────────────────────────────────────────────────────────────────────┤ 143 | │Madman Mochi is a Unknown kitty cat │ 144 | └──────────────────────────────────────────────────────────────────────────────┘ 145 | ``` 146 | 147 | 148 | ### Aggregations 149 | 150 | ``` 151 | 'Pets' |> GROUP MAX(Weight) -> Thiccest BY Type 152 | 153 | ┌───────────────────────────────────────┬──────────────────────────────────────┐ 154 | │Type │Thiccest │ 155 | ├───────────────────────────────────────┼──────────────────────────────────────┤ 156 | │Cat │20.00 │ 157 | ├───────────────────────────────────────┼──────────────────────────────────────┤ 158 | │Dog │50.00 │ 159 | └───────────────────────────────────────┴──────────────────────────────────────┘ 160 | ``` 161 | 162 | --- 163 | 164 | ## SiftQL BNF 165 | 166 | ``` 167 | # Tokens 168 | ::= [A-Za-z\-_]+ # operators, relation and field identifiers 169 | ::= '[A-Za-z0-9\s]+' 170 | ::= [0-9]+(.[0-9]+)? 171 | ::= (TRUE|FALSE|UNKOWN) 172 | ::= NULL 173 | 174 | ::= 175 | 176 | ::= 177 | | 178 | | 179 | | 180 | | 181 | | 182 | 183 | ::= '' # quoted identifier 184 | | ( ) # sub-query 185 | 186 | ::= (AS )? (OUTER|LEFT|RIGHT)? JOIN (AS )? (ON )? 187 | ::= (X|CROSS) 188 | ::= (U|UNION) 189 | ::= (\|DIFF) 190 | ::= (&|INTERSECT) 191 | 192 | ::= (|> )* 193 | ::= ::= SELECT 201 | 202 | ::= PROJECT 203 | ::= 204 | | -> 205 | 206 | ::= GROUP (BY )? 207 | ::= -> 208 | ::= \#() 209 | 210 | ::= 211 | | 212 | | ( ) 213 | ::= # field reference 214 | | \#() # functions 215 | | 216 | ::= (|||) 217 | ``` 218 | 219 | ## Shell 220 | 221 | Try Graal 222 | -------------------------------------------------------------------------------- /build.gradle: -------------------------------------------------------------------------------- 1 | plugins { 2 | id 'org.jetbrains.kotlin.jvm' version '1.4.10' 3 | id 'application' 4 | id 'antlr' 5 | id 'com.github.johnrengelman.shadow' version '7.0.0' 6 | id 'java' 7 | } 8 | 9 | group = 'com.rchowell' 10 | version = '0.0.1' 11 | 12 | repositories { 13 | mavenCentral() 14 | } 15 | 16 | dependencies { 17 | // Build Dependencies 18 | implementation 'org.apache.arrow:arrow-memory:6.0.0' 19 | implementation 'org.apache.arrow:arrow-memory-unsafe:6.0.0' 20 | implementation 'org.apache.arrow:arrow-vector:6.0.0' 21 | implementation 'com.opencsv:opencsv:5.5.2' 22 | implementation 'commons-cli:commons-cli:20040117.000000' 23 | implementation 'de.vandermeer:asciitable:0.3.2' 24 | 25 | // Shell 26 | implementation 'org.jline:jline:3.20.0' 27 | implementation 'info.picocli:picocli-shell-jline3:4.6.2' 28 | implementation 'info.picocli:picocli:4.6.2' 29 | implementation 'org.fusesource.jansi:jansi:2.4.0' 30 | implementation 'org.junit.jupiter:junit-jupiter:5.8.1' 31 | implementation 'com.google.inject:guice:5.0.1' 32 | 33 | // Parser generator 34 | antlr 'org.antlr:antlr4:4.9.3' 35 | compileOnly 'org.antlr:antlr4-runtime:4.9.3' 36 | 37 | // Test Dependencies 38 | testImplementation 'org.jetbrains.kotlin:kotlin-test-junit5:1.5.31' 39 | testImplementation 'org.junit.jupiter:junit-jupiter-api:5.8.1' 40 | 41 | // Test Runtime Dependencies 42 | testRuntimeOnly 'org.junit.jupiter:junit-jupiter-engine:5.8.1' 43 | } 44 | 45 | test { 46 | useJUnitPlatform() 47 | } 48 | 49 | compileKotlin { 50 | kotlinOptions.jvmTarget = '1.8' 51 | } 52 | 53 | compileTestKotlin { 54 | kotlinOptions.jvmTarget = '1.8' 55 | } 56 | 57 | generateGrammarSource { 58 | maxHeapSize = "64m" 59 | arguments += ["-visitor", "-long-messages", "-no-listener"] 60 | outputDirectory = file("src/main/java/com/rchowell/sift/language/v0/antlr") 61 | } 62 | 63 | application { 64 | mainClassName = 'com.rchowell.sift.shell.MainKt' 65 | } 66 | 67 | // Build fat Jar for shell 68 | shadowJar { 69 | archiveBaseName.set('sift') 70 | archiveClassifier.set('') 71 | archiveVersion.set('') 72 | } 73 | 74 | // Aliases 75 | task shell { dependsOn shadowJar } 76 | task gen { dependsOn generateGrammarSource } 77 | -------------------------------------------------------------------------------- /gradle.properties: -------------------------------------------------------------------------------- 1 | kotlin.code.style=official 2 | -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.properties: -------------------------------------------------------------------------------- 1 | distributionBase=GRADLE_USER_HOME 2 | distributionPath=wrapper/dists 3 | distributionUrl=https\://services.gradle.org/distributions/gradle-7.2-bin.zip 4 | zipStoreBase=GRADLE_USER_HOME 5 | zipStorePath=wrapper/dists -------------------------------------------------------------------------------- /settings.gradle: -------------------------------------------------------------------------------- 1 | rootProject.name = "sift" 2 | -------------------------------------------------------------------------------- /src/main/antlr/Sift.g4: -------------------------------------------------------------------------------- 1 | grammar Sift; 2 | 3 | @header { 4 | package com.rchowell.sift.language.v0.antlr; 5 | } 6 | 7 | // https://github.com/trinodb/trino/blob/master/core/trino-parser/src/main/antlr4/io/trino/sql/parser/SqlBase.g4 8 | 9 | query: relation (PIPE transform)*; 10 | 11 | // ----------- 12 | // Relations 13 | // ----------- 14 | 15 | // TODO identifiers in scans 16 | 17 | relation 18 | : ID_QUOTED #relId 19 | | LP query RP #relSubquery 20 | | relation (alias)? (OUTER|LEFT|RIGHT)? JOIN relation (alias)? (ON expr) #relJoin 21 | | relation op=(UNION|CROSS|DIFF|INTERSECT) relation #relBagOp 22 | ; 23 | 24 | // ------------ 25 | // Transforms 26 | // ------------ 27 | 28 | transform 29 | : select 30 | | project 31 | | group 32 | | sort 33 | | limit 34 | | distinct 35 | ; 36 | 37 | select: SELECT expr; 38 | 39 | project: PROJECT func (COMMA func)*; 40 | 41 | group: GROUP agg (COMMA agg)* (BY ids)?; 42 | 43 | sort: SORT (ids)? order=(ASC|DESC)?; 44 | 45 | limit: LIMIT INT; 46 | 47 | distinct: DISTINCT (ids)?; 48 | 49 | // --------------------------- 50 | // Expressions and Functions 51 | // --------------------------- 52 | 53 | expr: expr op=(LT|LTE|EQ|GT|GTE) expr #boolExpr // unsure how which precedence to choose 54 | | expr op=(AND|OR) expr #boolExpr 55 | | expr op=(MULT|DIV|MOD) expr #boolExpr 56 | | expr op=(PLUS|MINUS) expr #boolExpr 57 | | INT #intLitExpr 58 | | STRING #stringLitExpr 59 | | ID LP expr (COMMA expr)* RP #funcExpr 60 | | ID #identExpr 61 | | LP expr RP #subExpr; 62 | 63 | func: expr MAPS ID #projMap 64 | | ID #projIdent 65 | ; 66 | 67 | agg: op=(MIN|MAX|SUM|AVG|COUNT) LP expr RP (MAPS ID)?; // AGG(expr) (-> ID)? 68 | 69 | alias: AS ID; 70 | 71 | ids: ID (COMMA ID)*; 72 | 73 | // -------- 74 | // Tokens 75 | // -------- 76 | 77 | // Symbols 78 | PIPE : '|>'; 79 | MAPS : '->'; 80 | LP: '('; 81 | RP: ')'; 82 | COMMA: ','; 83 | SQUOTE: '\''; 84 | 85 | // Ops 86 | EQ: '='; 87 | GT: '>'; 88 | LT: '<'; 89 | GTE: '>='; 90 | LTE: '<='; 91 | AND: '&&'; 92 | OR: '||'; 93 | PLUS: '+'; 94 | MINUS: '-'; 95 | MULT: '*'; 96 | DIV: '/'; 97 | MOD: '%'; 98 | 99 | // Aggregations 100 | MIN: 'MIN' | 'Min' | 'min'; 101 | MAX: 'MAX' | 'Max' | 'max'; 102 | SUM: 'SUM' | 'Sum' | 'sum'; 103 | AVG: 'AVG' | 'Avg' | 'avg'; 104 | COUNT: 'COUNT' | 'Count' | 'count'; 105 | 106 | // Transforms 107 | SELECT: 'SELECT' | 'Select' | 'select'; 108 | PROJECT: 'PROJECT' | 'Project' | 'project'; 109 | GROUP: 'GROUP' | 'Group' | 'group'; 110 | SORT: 'SORT' | 'Sort'| 'sort'; 111 | LIMIT: 'LIMIT' | 'Limit' | 'limit'; 112 | DISTINCT: 'DISTINCT' | 'Distinct' | 'distinct'; 113 | 114 | // Keywords 115 | ON: 'ON' | 'on'; 116 | AS: 'AS' | 'as'; 117 | BY: 'BY' | 'by'; 118 | OUTER: 'OUTER' | 'outer'; 119 | LEFT: 'LEFT' | 'left'; 120 | RIGHT: 'RIGHT' | 'right'; 121 | ASC: 'ASC' | 'asc'; 122 | DESC: 'DESC' | 'desc'; 123 | TRUE: 'TRUE' | 'true'; 124 | FALSE: 'FALSE' | 'false'; 125 | 126 | // Bag Ops 127 | JOIN: 'JOIN' | 'join'; 128 | CROSS: 'X' | 'CROSS' | 'cross'; 129 | UNION: 'U' | 'UNION' | 'union'; 130 | DIFF: '\\' | 'DIFF' | 'diff'; 131 | INTERSECT: '&' | 'INTERSECT' | 'intersect'; 132 | 133 | STRING: '"' (~'"')* '"'; 134 | INT : [0-9]+; 135 | WS : [ \t\n\r]+ -> channel(HIDDEN); 136 | 137 | ID : [a-zA-Z_\-]+; 138 | ID_QUOTED: '`' ( ~'`' | '``' )* '`'; 139 | 140 | // Seen in Trino to catch lexing errors 141 | UNRECOGNIZED: . ; -------------------------------------------------------------------------------- /src/main/antlr/Sift.v1.g4.ignore: -------------------------------------------------------------------------------- 1 | // Sift V1 2 | grammar Sift; 3 | 4 | @header { 5 | package com.rchowell.sift.language.v1.antlr; 6 | } 7 | 8 | WS : [ \t\n\r]+ -> channel(HIDDEN); 9 | 10 | query: relation (PIPE transform)*; 11 | 12 | relation 13 | : SCAN '(' identifier ')' 14 | ; 15 | 16 | transform 17 | : select 18 | | project 19 | | group 20 | | sort 21 | | limit 22 | | distinct 23 | ; 24 | 25 | select: SELECT '(' expression ')'; 26 | 27 | project: PROJECT '(' projection (',' projection)* ')'; 28 | 29 | projection 30 | : expression '->' identifier #projectionAliased 31 | | expression #projectionAnon 32 | | identifier #projectionIdentity 33 | ; 34 | 35 | group: GROUP '(' aggregation (',' aggregation)* ')' (BY '(' identifierList ')' )?; 36 | 37 | aggregation 38 | : expression '->' identifier #aggregationAliased 39 | | expression #aggregationAnon 40 | ; 41 | 42 | sort: SORT '(' identifierList (',' order=(ASC|DESC))? ')'; 43 | 44 | limit: LIMIT '(' INT_VALUE ')'; 45 | 46 | distinct: DISTINCT '(' identifierList? ')'; 47 | 48 | identifier 49 | : ID_UNQUOTED #unquotedIdentifier 50 | | ID_BACKQUOTED #backquotedIdentifier 51 | ; 52 | 53 | identifierList: identifier (',' identifier)*; 54 | 55 | // ------ Expression ---------- 56 | 57 | expression: expr; 58 | 59 | expr 60 | : valExpr predicate[$valExpr.ctx]? #valueExpr 61 | | '!' expr #notExpr 62 | | expr AND expr #boolExpr 63 | | expr OR expr #boolExpr 64 | ; 65 | 66 | predicate[ParserRuleContext value] 67 | : (LTE|LT|EQ|NEQ|GT|GTE) right=valExpr #compExpr 68 | | IN '(' valExpr (',' valExpr)* ')' #inList 69 | ; 70 | 71 | valExpr 72 | : litExpr #litValExpr 73 | | identifier #refValExpr 74 | | '(' expr ')' #subValExpr 75 | | ID_UNQUOTED '(' expr (',' expr)* ')' #funValExpr 76 | ; 77 | 78 | litExpr 79 | : INT_VALUE #litInt 80 | | DOUBLE_VALUE #litDouble 81 | | BOOL_VALUE #litBool 82 | | STRING_VALUE #litString 83 | ; 84 | 85 | // ---------------------------- 86 | 87 | // Ops 88 | PIPE: '|>'; 89 | EQ: '='; 90 | NEQ: '!='; 91 | GT: '>'; 92 | LT: '<'; 93 | GTE: '>='; 94 | LTE: '<='; 95 | AND: '&&'; 96 | OR: '||'; 97 | PLUS: '+'; 98 | MINUS: '-'; 99 | MULT: '*'; 100 | DIV: '/'; 101 | MOD: '%'; 102 | 103 | // Keywords 104 | BY: 'by'; 105 | ASC: 'asc'; 106 | DESC: 'desc'; 107 | TRUE: 'true'; 108 | FALSE: 'false'; 109 | IN: 'in'; 110 | 111 | // Transforms 112 | SCAN: 'scan'; 113 | SELECT: 'select'; 114 | PROJECT: 'project'; 115 | GROUP: 'group'; 116 | SORT: 'sort'; 117 | LIMIT: 'limit'; 118 | DISTINCT: 'distinct'; 119 | 120 | // Identifieirs 121 | ID_UNQUOTED: LETTER (LETTER | '_')*; 122 | ID_BACKQUOTED: '`' ( ~'`' | '``' )* '`'; 123 | 124 | // Literals 125 | STRING_VALUE: '\'' ( ~'\'' | '\'\'' )* '\''; 126 | INT_VALUE: DIGIT+; 127 | DOUBLE_VALUE: DIGIT+ '.' DIGIT*; 128 | BOOL_VALUE: TRUE | FALSE; 129 | 130 | fragment DIGIT: [0-9]; 131 | fragment LETTER: [A-Za-z]; 132 | 133 | UNRECOGNIZED: . ; 134 | -------------------------------------------------------------------------------- /src/main/java/com/rchowell/sift/language/v0/antlr/Sift.interp: -------------------------------------------------------------------------------- 1 | token literal names: 2 | null 3 | '|>' 4 | '->' 5 | '(' 6 | ')' 7 | ',' 8 | '\'' 9 | '=' 10 | '>' 11 | '<' 12 | '>=' 13 | '<=' 14 | '&&' 15 | '||' 16 | '+' 17 | '-' 18 | '*' 19 | '/' 20 | '%' 21 | null 22 | null 23 | null 24 | null 25 | null 26 | null 27 | null 28 | null 29 | null 30 | null 31 | null 32 | null 33 | null 34 | null 35 | null 36 | null 37 | null 38 | null 39 | null 40 | null 41 | null 42 | null 43 | null 44 | null 45 | null 46 | null 47 | null 48 | null 49 | null 50 | null 51 | null 52 | null 53 | 54 | token symbolic names: 55 | null 56 | PIPE 57 | MAPS 58 | LP 59 | RP 60 | COMMA 61 | SQUOTE 62 | EQ 63 | GT 64 | LT 65 | GTE 66 | LTE 67 | AND 68 | OR 69 | PLUS 70 | MINUS 71 | MULT 72 | DIV 73 | MOD 74 | MIN 75 | MAX 76 | SUM 77 | AVG 78 | COUNT 79 | SELECT 80 | PROJECT 81 | GROUP 82 | SORT 83 | LIMIT 84 | DISTINCT 85 | ON 86 | AS 87 | BY 88 | OUTER 89 | LEFT 90 | RIGHT 91 | ASC 92 | DESC 93 | TRUE 94 | FALSE 95 | JOIN 96 | CROSS 97 | UNION 98 | DIFF 99 | INTERSECT 100 | STRING 101 | INT 102 | WS 103 | ID 104 | ID_QUOTED 105 | UNRECOGNIZED 106 | 107 | rule names: 108 | query 109 | relation 110 | transform 111 | select 112 | project 113 | group 114 | sort 115 | limit 116 | distinct 117 | expr 118 | func 119 | agg 120 | alias 121 | ids 122 | 123 | 124 | atn: 125 | [3, 24715, 42794, 33075, 47597, 16764, 15335, 30598, 22884, 3, 52, 182, 4, 2, 9, 2, 4, 3, 9, 3, 4, 4, 9, 4, 4, 5, 9, 5, 4, 6, 9, 6, 4, 7, 9, 7, 4, 8, 9, 8, 4, 9, 9, 9, 4, 10, 9, 10, 4, 11, 9, 11, 4, 12, 9, 12, 4, 13, 9, 13, 4, 14, 9, 14, 4, 15, 9, 15, 3, 2, 3, 2, 3, 2, 7, 2, 34, 10, 2, 12, 2, 14, 2, 37, 11, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 5, 3, 45, 10, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 5, 3, 52, 10, 3, 3, 3, 5, 3, 55, 10, 3, 3, 3, 3, 3, 3, 3, 5, 3, 60, 10, 3, 3, 3, 3, 3, 3, 3, 7, 3, 65, 10, 3, 12, 3, 14, 3, 68, 11, 3, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 5, 4, 76, 10, 4, 3, 5, 3, 5, 3, 5, 3, 6, 3, 6, 3, 6, 3, 6, 7, 6, 85, 10, 6, 12, 6, 14, 6, 88, 11, 6, 3, 7, 3, 7, 3, 7, 3, 7, 7, 7, 94, 10, 7, 12, 7, 14, 7, 97, 11, 7, 3, 7, 3, 7, 5, 7, 101, 10, 7, 3, 8, 3, 8, 5, 8, 105, 10, 8, 3, 8, 5, 8, 108, 10, 8, 3, 9, 3, 9, 3, 9, 3, 10, 3, 10, 5, 10, 115, 10, 10, 3, 11, 3, 11, 3, 11, 3, 11, 3, 11, 3, 11, 3, 11, 3, 11, 7, 11, 125, 10, 11, 12, 11, 14, 11, 128, 11, 11, 3, 11, 3, 11, 3, 11, 3, 11, 3, 11, 3, 11, 3, 11, 5, 11, 137, 10, 11, 3, 11, 3, 11, 3, 11, 3, 11, 3, 11, 3, 11, 3, 11, 3, 11, 3, 11, 3, 11, 3, 11, 3, 11, 7, 11, 151, 10, 11, 12, 11, 14, 11, 154, 11, 11, 3, 12, 3, 12, 3, 12, 3, 12, 3, 12, 5, 12, 161, 10, 12, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 5, 13, 169, 10, 13, 3, 14, 3, 14, 3, 14, 3, 15, 3, 15, 3, 15, 7, 15, 177, 10, 15, 12, 15, 14, 15, 180, 11, 15, 3, 15, 2, 4, 4, 20, 16, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 2, 10, 3, 2, 43, 46, 3, 2, 35, 37, 3, 2, 38, 39, 3, 2, 9, 13, 3, 2, 14, 15, 3, 2, 18, 20, 3, 2, 16, 17, 3, 2, 21, 25, 2, 197, 2, 30, 3, 2, 2, 2, 4, 44, 3, 2, 2, 2, 6, 75, 3, 2, 2, 2, 8, 77, 3, 2, 2, 2, 10, 80, 3, 2, 2, 2, 12, 89, 3, 2, 2, 2, 14, 102, 3, 2, 2, 2, 16, 109, 3, 2, 2, 2, 18, 112, 3, 2, 2, 2, 20, 136, 3, 2, 2, 2, 22, 160, 3, 2, 2, 2, 24, 162, 3, 2, 2, 2, 26, 170, 3, 2, 2, 2, 28, 173, 3, 2, 2, 2, 30, 35, 5, 4, 3, 2, 31, 32, 7, 3, 2, 2, 32, 34, 5, 6, 4, 2, 33, 31, 3, 2, 2, 2, 34, 37, 3, 2, 2, 2, 35, 33, 3, 2, 2, 2, 35, 36, 3, 2, 2, 2, 36, 3, 3, 2, 2, 2, 37, 35, 3, 2, 2, 2, 38, 39, 8, 3, 1, 2, 39, 45, 7, 51, 2, 2, 40, 41, 7, 5, 2, 2, 41, 42, 5, 2, 2, 2, 42, 43, 7, 6, 2, 2, 43, 45, 3, 2, 2, 2, 44, 38, 3, 2, 2, 2, 44, 40, 3, 2, 2, 2, 45, 66, 3, 2, 2, 2, 46, 47, 12, 3, 2, 2, 47, 48, 9, 2, 2, 2, 48, 65, 5, 4, 3, 4, 49, 51, 12, 4, 2, 2, 50, 52, 5, 26, 14, 2, 51, 50, 3, 2, 2, 2, 51, 52, 3, 2, 2, 2, 52, 54, 3, 2, 2, 2, 53, 55, 9, 3, 2, 2, 54, 53, 3, 2, 2, 2, 54, 55, 3, 2, 2, 2, 55, 56, 3, 2, 2, 2, 56, 57, 7, 42, 2, 2, 57, 59, 5, 4, 3, 2, 58, 60, 5, 26, 14, 2, 59, 58, 3, 2, 2, 2, 59, 60, 3, 2, 2, 2, 60, 61, 3, 2, 2, 2, 61, 62, 7, 32, 2, 2, 62, 63, 5, 20, 11, 2, 63, 65, 3, 2, 2, 2, 64, 46, 3, 2, 2, 2, 64, 49, 3, 2, 2, 2, 65, 68, 3, 2, 2, 2, 66, 64, 3, 2, 2, 2, 66, 67, 3, 2, 2, 2, 67, 5, 3, 2, 2, 2, 68, 66, 3, 2, 2, 2, 69, 76, 5, 8, 5, 2, 70, 76, 5, 10, 6, 2, 71, 76, 5, 12, 7, 2, 72, 76, 5, 14, 8, 2, 73, 76, 5, 16, 9, 2, 74, 76, 5, 18, 10, 2, 75, 69, 3, 2, 2, 2, 75, 70, 3, 2, 2, 2, 75, 71, 3, 2, 2, 2, 75, 72, 3, 2, 2, 2, 75, 73, 3, 2, 2, 2, 75, 74, 3, 2, 2, 2, 76, 7, 3, 2, 2, 2, 77, 78, 7, 26, 2, 2, 78, 79, 5, 20, 11, 2, 79, 9, 3, 2, 2, 2, 80, 81, 7, 27, 2, 2, 81, 86, 5, 22, 12, 2, 82, 83, 7, 7, 2, 2, 83, 85, 5, 22, 12, 2, 84, 82, 3, 2, 2, 2, 85, 88, 3, 2, 2, 2, 86, 84, 3, 2, 2, 2, 86, 87, 3, 2, 2, 2, 87, 11, 3, 2, 2, 2, 88, 86, 3, 2, 2, 2, 89, 90, 7, 28, 2, 2, 90, 95, 5, 24, 13, 2, 91, 92, 7, 7, 2, 2, 92, 94, 5, 24, 13, 2, 93, 91, 3, 2, 2, 2, 94, 97, 3, 2, 2, 2, 95, 93, 3, 2, 2, 2, 95, 96, 3, 2, 2, 2, 96, 100, 3, 2, 2, 2, 97, 95, 3, 2, 2, 2, 98, 99, 7, 34, 2, 2, 99, 101, 5, 28, 15, 2, 100, 98, 3, 2, 2, 2, 100, 101, 3, 2, 2, 2, 101, 13, 3, 2, 2, 2, 102, 104, 7, 29, 2, 2, 103, 105, 5, 28, 15, 2, 104, 103, 3, 2, 2, 2, 104, 105, 3, 2, 2, 2, 105, 107, 3, 2, 2, 2, 106, 108, 9, 4, 2, 2, 107, 106, 3, 2, 2, 2, 107, 108, 3, 2, 2, 2, 108, 15, 3, 2, 2, 2, 109, 110, 7, 30, 2, 2, 110, 111, 7, 48, 2, 2, 111, 17, 3, 2, 2, 2, 112, 114, 7, 31, 2, 2, 113, 115, 5, 28, 15, 2, 114, 113, 3, 2, 2, 2, 114, 115, 3, 2, 2, 2, 115, 19, 3, 2, 2, 2, 116, 117, 8, 11, 1, 2, 117, 137, 7, 48, 2, 2, 118, 137, 7, 47, 2, 2, 119, 120, 7, 50, 2, 2, 120, 121, 7, 5, 2, 2, 121, 126, 5, 20, 11, 2, 122, 123, 7, 7, 2, 2, 123, 125, 5, 20, 11, 2, 124, 122, 3, 2, 2, 2, 125, 128, 3, 2, 2, 2, 126, 124, 3, 2, 2, 2, 126, 127, 3, 2, 2, 2, 127, 129, 3, 2, 2, 2, 128, 126, 3, 2, 2, 2, 129, 130, 7, 6, 2, 2, 130, 137, 3, 2, 2, 2, 131, 137, 7, 50, 2, 2, 132, 133, 7, 5, 2, 2, 133, 134, 5, 20, 11, 2, 134, 135, 7, 6, 2, 2, 135, 137, 3, 2, 2, 2, 136, 116, 3, 2, 2, 2, 136, 118, 3, 2, 2, 2, 136, 119, 3, 2, 2, 2, 136, 131, 3, 2, 2, 2, 136, 132, 3, 2, 2, 2, 137, 152, 3, 2, 2, 2, 138, 139, 12, 11, 2, 2, 139, 140, 9, 5, 2, 2, 140, 151, 5, 20, 11, 12, 141, 142, 12, 10, 2, 2, 142, 143, 9, 6, 2, 2, 143, 151, 5, 20, 11, 11, 144, 145, 12, 9, 2, 2, 145, 146, 9, 7, 2, 2, 146, 151, 5, 20, 11, 10, 147, 148, 12, 8, 2, 2, 148, 149, 9, 8, 2, 2, 149, 151, 5, 20, 11, 9, 150, 138, 3, 2, 2, 2, 150, 141, 3, 2, 2, 2, 150, 144, 3, 2, 2, 2, 150, 147, 3, 2, 2, 2, 151, 154, 3, 2, 2, 2, 152, 150, 3, 2, 2, 2, 152, 153, 3, 2, 2, 2, 153, 21, 3, 2, 2, 2, 154, 152, 3, 2, 2, 2, 155, 156, 5, 20, 11, 2, 156, 157, 7, 4, 2, 2, 157, 158, 7, 50, 2, 2, 158, 161, 3, 2, 2, 2, 159, 161, 7, 50, 2, 2, 160, 155, 3, 2, 2, 2, 160, 159, 3, 2, 2, 2, 161, 23, 3, 2, 2, 2, 162, 163, 9, 9, 2, 2, 163, 164, 7, 5, 2, 2, 164, 165, 5, 20, 11, 2, 165, 168, 7, 6, 2, 2, 166, 167, 7, 4, 2, 2, 167, 169, 7, 50, 2, 2, 168, 166, 3, 2, 2, 2, 168, 169, 3, 2, 2, 2, 169, 25, 3, 2, 2, 2, 170, 171, 7, 33, 2, 2, 171, 172, 7, 50, 2, 2, 172, 27, 3, 2, 2, 2, 173, 178, 7, 50, 2, 2, 174, 175, 7, 7, 2, 2, 175, 177, 7, 50, 2, 2, 176, 174, 3, 2, 2, 2, 177, 180, 3, 2, 2, 2, 178, 176, 3, 2, 2, 2, 178, 179, 3, 2, 2, 2, 179, 29, 3, 2, 2, 2, 180, 178, 3, 2, 2, 2, 23, 35, 44, 51, 54, 59, 64, 66, 75, 86, 95, 100, 104, 107, 114, 126, 136, 150, 152, 160, 168, 178] -------------------------------------------------------------------------------- /src/main/java/com/rchowell/sift/language/v0/antlr/Sift.tokens: -------------------------------------------------------------------------------- 1 | PIPE=1 2 | MAPS=2 3 | LP=3 4 | RP=4 5 | COMMA=5 6 | SQUOTE=6 7 | EQ=7 8 | GT=8 9 | LT=9 10 | GTE=10 11 | LTE=11 12 | AND=12 13 | OR=13 14 | PLUS=14 15 | MINUS=15 16 | MULT=16 17 | DIV=17 18 | MOD=18 19 | MIN=19 20 | MAX=20 21 | SUM=21 22 | AVG=22 23 | COUNT=23 24 | SELECT=24 25 | PROJECT=25 26 | GROUP=26 27 | SORT=27 28 | LIMIT=28 29 | DISTINCT=29 30 | ON=30 31 | AS=31 32 | BY=32 33 | OUTER=33 34 | LEFT=34 35 | RIGHT=35 36 | ASC=36 37 | DESC=37 38 | TRUE=38 39 | FALSE=39 40 | JOIN=40 41 | CROSS=41 42 | UNION=42 43 | DIFF=43 44 | INTERSECT=44 45 | STRING=45 46 | INT=46 47 | WS=47 48 | ID=48 49 | ID_QUOTED=49 50 | UNRECOGNIZED=50 51 | '|>'=1 52 | '->'=2 53 | '('=3 54 | ')'=4 55 | ','=5 56 | '\''=6 57 | '='=7 58 | '>'=8 59 | '<'=9 60 | '>='=10 61 | '<='=11 62 | '&&'=12 63 | '||'=13 64 | '+'=14 65 | '-'=15 66 | '*'=16 67 | '/'=17 68 | '%'=18 69 | -------------------------------------------------------------------------------- /src/main/java/com/rchowell/sift/language/v0/antlr/SiftBaseVisitor.java: -------------------------------------------------------------------------------- 1 | // Generated from Sift.g4 by ANTLR 4.9.3 2 | 3 | package com.rchowell.sift.language.v0.antlr; 4 | 5 | import org.antlr.v4.runtime.tree.AbstractParseTreeVisitor; 6 | 7 | /** 8 | * This class provides an empty implementation of {@link SiftVisitor}, 9 | * which can be extended to create a visitor which only needs to handle a subset 10 | * of the available methods. 11 | * 12 | * @param The return type of the visit operation. Use {@link Void} for 13 | * operations with no return type. 14 | */ 15 | public class SiftBaseVisitor extends AbstractParseTreeVisitor implements SiftVisitor { 16 | /** 17 | * {@inheritDoc} 18 | * 19 | *

The default implementation returns the result of calling 20 | * {@link #visitChildren} on {@code ctx}.

21 | */ 22 | @Override public T visitQuery(SiftParser.QueryContext ctx) { return visitChildren(ctx); } 23 | /** 24 | * {@inheritDoc} 25 | * 26 | *

The default implementation returns the result of calling 27 | * {@link #visitChildren} on {@code ctx}.

28 | */ 29 | @Override public T visitRelId(SiftParser.RelIdContext ctx) { return visitChildren(ctx); } 30 | /** 31 | * {@inheritDoc} 32 | * 33 | *

The default implementation returns the result of calling 34 | * {@link #visitChildren} on {@code ctx}.

35 | */ 36 | @Override public T visitRelSubquery(SiftParser.RelSubqueryContext ctx) { return visitChildren(ctx); } 37 | /** 38 | * {@inheritDoc} 39 | * 40 | *

The default implementation returns the result of calling 41 | * {@link #visitChildren} on {@code ctx}.

42 | */ 43 | @Override public T visitRelBagOp(SiftParser.RelBagOpContext ctx) { return visitChildren(ctx); } 44 | /** 45 | * {@inheritDoc} 46 | * 47 | *

The default implementation returns the result of calling 48 | * {@link #visitChildren} on {@code ctx}.

49 | */ 50 | @Override public T visitRelJoin(SiftParser.RelJoinContext ctx) { return visitChildren(ctx); } 51 | /** 52 | * {@inheritDoc} 53 | * 54 | *

The default implementation returns the result of calling 55 | * {@link #visitChildren} on {@code ctx}.

56 | */ 57 | @Override public T visitTransform(SiftParser.TransformContext ctx) { return visitChildren(ctx); } 58 | /** 59 | * {@inheritDoc} 60 | * 61 | *

The default implementation returns the result of calling 62 | * {@link #visitChildren} on {@code ctx}.

63 | */ 64 | @Override public T visitSelect(SiftParser.SelectContext ctx) { return visitChildren(ctx); } 65 | /** 66 | * {@inheritDoc} 67 | * 68 | *

The default implementation returns the result of calling 69 | * {@link #visitChildren} on {@code ctx}.

70 | */ 71 | @Override public T visitProject(SiftParser.ProjectContext ctx) { return visitChildren(ctx); } 72 | /** 73 | * {@inheritDoc} 74 | * 75 | *

The default implementation returns the result of calling 76 | * {@link #visitChildren} on {@code ctx}.

77 | */ 78 | @Override public T visitGroup(SiftParser.GroupContext ctx) { return visitChildren(ctx); } 79 | /** 80 | * {@inheritDoc} 81 | * 82 | *

The default implementation returns the result of calling 83 | * {@link #visitChildren} on {@code ctx}.

84 | */ 85 | @Override public T visitSort(SiftParser.SortContext ctx) { return visitChildren(ctx); } 86 | /** 87 | * {@inheritDoc} 88 | * 89 | *

The default implementation returns the result of calling 90 | * {@link #visitChildren} on {@code ctx}.

91 | */ 92 | @Override public T visitLimit(SiftParser.LimitContext ctx) { return visitChildren(ctx); } 93 | /** 94 | * {@inheritDoc} 95 | * 96 | *

The default implementation returns the result of calling 97 | * {@link #visitChildren} on {@code ctx}.

98 | */ 99 | @Override public T visitDistinct(SiftParser.DistinctContext ctx) { return visitChildren(ctx); } 100 | /** 101 | * {@inheritDoc} 102 | * 103 | *

The default implementation returns the result of calling 104 | * {@link #visitChildren} on {@code ctx}.

105 | */ 106 | @Override public T visitIdentExpr(SiftParser.IdentExprContext ctx) { return visitChildren(ctx); } 107 | /** 108 | * {@inheritDoc} 109 | * 110 | *

The default implementation returns the result of calling 111 | * {@link #visitChildren} on {@code ctx}.

112 | */ 113 | @Override public T visitFuncExpr(SiftParser.FuncExprContext ctx) { return visitChildren(ctx); } 114 | /** 115 | * {@inheritDoc} 116 | * 117 | *

The default implementation returns the result of calling 118 | * {@link #visitChildren} on {@code ctx}.

119 | */ 120 | @Override public T visitIntLitExpr(SiftParser.IntLitExprContext ctx) { return visitChildren(ctx); } 121 | /** 122 | * {@inheritDoc} 123 | * 124 | *

The default implementation returns the result of calling 125 | * {@link #visitChildren} on {@code ctx}.

126 | */ 127 | @Override public T visitStringLitExpr(SiftParser.StringLitExprContext ctx) { return visitChildren(ctx); } 128 | /** 129 | * {@inheritDoc} 130 | * 131 | *

The default implementation returns the result of calling 132 | * {@link #visitChildren} on {@code ctx}.

133 | */ 134 | @Override public T visitSubExpr(SiftParser.SubExprContext ctx) { return visitChildren(ctx); } 135 | /** 136 | * {@inheritDoc} 137 | * 138 | *

The default implementation returns the result of calling 139 | * {@link #visitChildren} on {@code ctx}.

140 | */ 141 | @Override public T visitBoolExpr(SiftParser.BoolExprContext ctx) { return visitChildren(ctx); } 142 | /** 143 | * {@inheritDoc} 144 | * 145 | *

The default implementation returns the result of calling 146 | * {@link #visitChildren} on {@code ctx}.

147 | */ 148 | @Override public T visitProjMap(SiftParser.ProjMapContext ctx) { return visitChildren(ctx); } 149 | /** 150 | * {@inheritDoc} 151 | * 152 | *

The default implementation returns the result of calling 153 | * {@link #visitChildren} on {@code ctx}.

154 | */ 155 | @Override public T visitProjIdent(SiftParser.ProjIdentContext ctx) { return visitChildren(ctx); } 156 | /** 157 | * {@inheritDoc} 158 | * 159 | *

The default implementation returns the result of calling 160 | * {@link #visitChildren} on {@code ctx}.

161 | */ 162 | @Override public T visitAgg(SiftParser.AggContext ctx) { return visitChildren(ctx); } 163 | /** 164 | * {@inheritDoc} 165 | * 166 | *

The default implementation returns the result of calling 167 | * {@link #visitChildren} on {@code ctx}.

168 | */ 169 | @Override public T visitAlias(SiftParser.AliasContext ctx) { return visitChildren(ctx); } 170 | /** 171 | * {@inheritDoc} 172 | * 173 | *

The default implementation returns the result of calling 174 | * {@link #visitChildren} on {@code ctx}.

175 | */ 176 | @Override public T visitIds(SiftParser.IdsContext ctx) { return visitChildren(ctx); } 177 | } -------------------------------------------------------------------------------- /src/main/java/com/rchowell/sift/language/v0/antlr/SiftLexer.tokens: -------------------------------------------------------------------------------- 1 | PIPE=1 2 | MAPS=2 3 | LP=3 4 | RP=4 5 | COMMA=5 6 | SQUOTE=6 7 | EQ=7 8 | GT=8 9 | LT=9 10 | GTE=10 11 | LTE=11 12 | AND=12 13 | OR=13 14 | PLUS=14 15 | MINUS=15 16 | MULT=16 17 | DIV=17 18 | MOD=18 19 | MIN=19 20 | MAX=20 21 | SUM=21 22 | AVG=22 23 | COUNT=23 24 | SELECT=24 25 | PROJECT=25 26 | GROUP=26 27 | SORT=27 28 | LIMIT=28 29 | DISTINCT=29 30 | ON=30 31 | AS=31 32 | BY=32 33 | OUTER=33 34 | LEFT=34 35 | RIGHT=35 36 | ASC=36 37 | DESC=37 38 | TRUE=38 39 | FALSE=39 40 | JOIN=40 41 | CROSS=41 42 | UNION=42 43 | DIFF=43 44 | INTERSECT=44 45 | STRING=45 46 | INT=46 47 | WS=47 48 | ID=48 49 | ID_QUOTED=49 50 | UNRECOGNIZED=50 51 | '|>'=1 52 | '->'=2 53 | '('=3 54 | ')'=4 55 | ','=5 56 | '\''=6 57 | '='=7 58 | '>'=8 59 | '<'=9 60 | '>='=10 61 | '<='=11 62 | '&&'=12 63 | '||'=13 64 | '+'=14 65 | '-'=15 66 | '*'=16 67 | '/'=17 68 | '%'=18 69 | -------------------------------------------------------------------------------- /src/main/java/com/rchowell/sift/language/v0/antlr/SiftVisitor.java: -------------------------------------------------------------------------------- 1 | // Generated from Sift.g4 by ANTLR 4.9.3 2 | 3 | package com.rchowell.sift.language.v0.antlr; 4 | 5 | import org.antlr.v4.runtime.tree.ParseTreeVisitor; 6 | 7 | /** 8 | * This interface defines a complete generic visitor for a parse tree produced 9 | * by {@link SiftParser}. 10 | * 11 | * @param The return type of the visit operation. Use {@link Void} for 12 | * operations with no return type. 13 | */ 14 | public interface SiftVisitor extends ParseTreeVisitor { 15 | /** 16 | * Visit a parse tree produced by {@link SiftParser#query}. 17 | * @param ctx the parse tree 18 | * @return the visitor result 19 | */ 20 | T visitQuery(SiftParser.QueryContext ctx); 21 | /** 22 | * Visit a parse tree produced by the {@code relId} 23 | * labeled alternative in {@link SiftParser#relation}. 24 | * @param ctx the parse tree 25 | * @return the visitor result 26 | */ 27 | T visitRelId(SiftParser.RelIdContext ctx); 28 | /** 29 | * Visit a parse tree produced by the {@code relSubquery} 30 | * labeled alternative in {@link SiftParser#relation}. 31 | * @param ctx the parse tree 32 | * @return the visitor result 33 | */ 34 | T visitRelSubquery(SiftParser.RelSubqueryContext ctx); 35 | /** 36 | * Visit a parse tree produced by the {@code relBagOp} 37 | * labeled alternative in {@link SiftParser#relation}. 38 | * @param ctx the parse tree 39 | * @return the visitor result 40 | */ 41 | T visitRelBagOp(SiftParser.RelBagOpContext ctx); 42 | /** 43 | * Visit a parse tree produced by the {@code relJoin} 44 | * labeled alternative in {@link SiftParser#relation}. 45 | * @param ctx the parse tree 46 | * @return the visitor result 47 | */ 48 | T visitRelJoin(SiftParser.RelJoinContext ctx); 49 | /** 50 | * Visit a parse tree produced by {@link SiftParser#transform}. 51 | * @param ctx the parse tree 52 | * @return the visitor result 53 | */ 54 | T visitTransform(SiftParser.TransformContext ctx); 55 | /** 56 | * Visit a parse tree produced by {@link SiftParser#select}. 57 | * @param ctx the parse tree 58 | * @return the visitor result 59 | */ 60 | T visitSelect(SiftParser.SelectContext ctx); 61 | /** 62 | * Visit a parse tree produced by {@link SiftParser#project}. 63 | * @param ctx the parse tree 64 | * @return the visitor result 65 | */ 66 | T visitProject(SiftParser.ProjectContext ctx); 67 | /** 68 | * Visit a parse tree produced by {@link SiftParser#group}. 69 | * @param ctx the parse tree 70 | * @return the visitor result 71 | */ 72 | T visitGroup(SiftParser.GroupContext ctx); 73 | /** 74 | * Visit a parse tree produced by {@link SiftParser#sort}. 75 | * @param ctx the parse tree 76 | * @return the visitor result 77 | */ 78 | T visitSort(SiftParser.SortContext ctx); 79 | /** 80 | * Visit a parse tree produced by {@link SiftParser#limit}. 81 | * @param ctx the parse tree 82 | * @return the visitor result 83 | */ 84 | T visitLimit(SiftParser.LimitContext ctx); 85 | /** 86 | * Visit a parse tree produced by {@link SiftParser#distinct}. 87 | * @param ctx the parse tree 88 | * @return the visitor result 89 | */ 90 | T visitDistinct(SiftParser.DistinctContext ctx); 91 | /** 92 | * Visit a parse tree produced by the {@code identExpr} 93 | * labeled alternative in {@link SiftParser#expr}. 94 | * @param ctx the parse tree 95 | * @return the visitor result 96 | */ 97 | T visitIdentExpr(SiftParser.IdentExprContext ctx); 98 | /** 99 | * Visit a parse tree produced by the {@code funcExpr} 100 | * labeled alternative in {@link SiftParser#expr}. 101 | * @param ctx the parse tree 102 | * @return the visitor result 103 | */ 104 | T visitFuncExpr(SiftParser.FuncExprContext ctx); 105 | /** 106 | * Visit a parse tree produced by the {@code intLitExpr} 107 | * labeled alternative in {@link SiftParser#expr}. 108 | * @param ctx the parse tree 109 | * @return the visitor result 110 | */ 111 | T visitIntLitExpr(SiftParser.IntLitExprContext ctx); 112 | /** 113 | * Visit a parse tree produced by the {@code stringLitExpr} 114 | * labeled alternative in {@link SiftParser#expr}. 115 | * @param ctx the parse tree 116 | * @return the visitor result 117 | */ 118 | T visitStringLitExpr(SiftParser.StringLitExprContext ctx); 119 | /** 120 | * Visit a parse tree produced by the {@code subExpr} 121 | * labeled alternative in {@link SiftParser#expr}. 122 | * @param ctx the parse tree 123 | * @return the visitor result 124 | */ 125 | T visitSubExpr(SiftParser.SubExprContext ctx); 126 | /** 127 | * Visit a parse tree produced by the {@code boolExpr} 128 | * labeled alternative in {@link SiftParser#expr}. 129 | * @param ctx the parse tree 130 | * @return the visitor result 131 | */ 132 | T visitBoolExpr(SiftParser.BoolExprContext ctx); 133 | /** 134 | * Visit a parse tree produced by the {@code projMap} 135 | * labeled alternative in {@link SiftParser#func}. 136 | * @param ctx the parse tree 137 | * @return the visitor result 138 | */ 139 | T visitProjMap(SiftParser.ProjMapContext ctx); 140 | /** 141 | * Visit a parse tree produced by the {@code projIdent} 142 | * labeled alternative in {@link SiftParser#func}. 143 | * @param ctx the parse tree 144 | * @return the visitor result 145 | */ 146 | T visitProjIdent(SiftParser.ProjIdentContext ctx); 147 | /** 148 | * Visit a parse tree produced by {@link SiftParser#agg}. 149 | * @param ctx the parse tree 150 | * @return the visitor result 151 | */ 152 | T visitAgg(SiftParser.AggContext ctx); 153 | /** 154 | * Visit a parse tree produced by {@link SiftParser#alias}. 155 | * @param ctx the parse tree 156 | * @return the visitor result 157 | */ 158 | T visitAlias(SiftParser.AliasContext ctx); 159 | /** 160 | * Visit a parse tree produced by {@link SiftParser#ids}. 161 | * @param ctx the parse tree 162 | * @return the visitor result 163 | */ 164 | T visitIds(SiftParser.IdsContext ctx); 165 | } -------------------------------------------------------------------------------- /src/main/kotlin/com/rchowell/sift/execution/Environment.kt: -------------------------------------------------------------------------------- 1 | package com.rchowell.sift.execution 2 | 3 | import com.rchowell.sift.source.InvalidSourceException 4 | import com.rchowell.sift.source.Source 5 | 6 | class Environment( 7 | sources: List = emptyList() 8 | ) { 9 | 10 | val sourceMap: MutableMap = mutableMapOf() 11 | 12 | init { 13 | sources.forEach { registerSource(it) } 14 | } 15 | 16 | fun registerSource(source: Source) { 17 | sourceMap[source.identifier] = source 18 | } 19 | 20 | fun getSource(identifier: String): Source = sourceMap[identifier] ?: throw InvalidSourceException(identifier) 21 | } 22 | -------------------------------------------------------------------------------- /src/main/kotlin/com/rchowell/sift/execution/Executor.kt: -------------------------------------------------------------------------------- 1 | package com.rchowell.sift.execution 2 | 3 | import com.rchowell.sift.execution.planner.Planner 4 | import com.rchowell.sift.language.v0.antlr.SiftCompiler 5 | 6 | class Executor { 7 | 8 | companion object { 9 | 10 | fun sift(environment: Environment, query: String) { 11 | val compiler = SiftCompiler(environment) 12 | val logicalPlan = compiler.compile(query) 13 | val physicalPlan = Planner.plan(logicalPlan) 14 | physicalPlan.open() 15 | var batch = physicalPlan.next() 16 | while (batch != null) { 17 | println(batch) 18 | batch = physicalPlan.next() 19 | } 20 | } 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /src/main/kotlin/com/rchowell/sift/execution/logical/LogicalExpr.kt: -------------------------------------------------------------------------------- 1 | package com.rchowell.sift.execution.logical 2 | 3 | import com.rchowell.sift.types.Field 4 | 5 | /** 6 | * Query planning requires expressions to describe the resultant [Field] given a [LogicalTransform]. 7 | * Change to `sealed interface` when using Kotlin 1.5 8 | * 9 | * @constructor Create empty Logical expr 10 | */ 11 | interface LogicalExpr { 12 | 13 | /** 14 | * See class description 15 | * 16 | * @param input 17 | * @return 18 | */ 19 | fun toField(input: LogicalTransform): Field 20 | } 21 | -------------------------------------------------------------------------------- /src/main/kotlin/com/rchowell/sift/execution/logical/LogicalTransform.kt: -------------------------------------------------------------------------------- 1 | package com.rchowell.sift.execution.logical 2 | 3 | import com.rchowell.sift.types.Schema 4 | 5 | /** 6 | * A logical plan is a transformation which returns a relation. 7 | * This is taken directly from KQuery. 8 | * 9 | * AFAIK a logical plan represents a transformation between the incoming transformations. 10 | * I'm thinking of logical plans as a chain of mapping functions. 11 | * 12 | * Change to `sealed interface` when the project migrates from kotlin 1.4 to 1.5 13 | * 14 | * @constructor Create empty Logical plan 15 | */ 16 | abstract class LogicalTransform { 17 | 18 | /** 19 | * Output schema of this transformation 20 | */ 21 | abstract val schema: Schema 22 | 23 | /** 24 | * Inputs of this logical plan. Grove says this will be useful for the visitor pattern, but I'm not there yet. 25 | * Why not a value? 26 | */ 27 | abstract fun inputs(): List 28 | 29 | open fun pretty(): String { 30 | return format(this) 31 | } 32 | } 33 | 34 | /** 35 | * Format returns the series of transformations nested. 36 | * 37 | * @param transform 38 | * @param indent 39 | * @return 40 | */ 41 | fun format(transform: LogicalTransform, indent: Int = 0): String = buildString { 42 | val prefix = " ".repeat(indent) 43 | append(prefix).append(transform) 44 | if (transform.inputs().isNotEmpty()) { 45 | append(" {").append("\n") 46 | transform.inputs().forEach { append(format(it, indent + 1)).append("\n") } 47 | append(prefix).append("}") 48 | } else { 49 | append(" {}") 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /src/main/kotlin/com/rchowell/sift/execution/logical/expressions/LogicalAggregateExpr.kt: -------------------------------------------------------------------------------- 1 | package com.rchowell.sift.execution.logical.expressions 2 | 3 | import com.rchowell.sift.execution.logical.LogicalExpr 4 | import com.rchowell.sift.execution.logical.LogicalTransform 5 | import com.rchowell.sift.types.Field 6 | import com.rchowell.sift.types.Type 7 | 8 | class UnknownAggregateFunction(name: String) : Exception("unknown aggregate $name") 9 | 10 | /** 11 | * Aggregation expressions to be implemented by various physical aggregation expressions. 12 | * 13 | * @property op 14 | * @property input 15 | * @constructor Create empty Logical agg expr 16 | */ 17 | sealed class LogicalAggregateExpr( 18 | val op: AggOp, 19 | val input: LogicalExpr, 20 | ) : LogicalExpr { 21 | override fun toString(): String = "$op($input)" 22 | override fun toField(input: LogicalTransform): Field = Field(op.name, this.input.toField(input).type) 23 | 24 | companion object { 25 | 26 | fun get(name: String, vararg args: LogicalExpr): LogicalAggregateExpr = when (name) { 27 | "MIN" -> LogicalMinExpr(args[0]) 28 | "MAX" -> LogicalMaxExpr(args[0]) 29 | "SUM" -> LogicalSumExpr(args[0]) 30 | "AVG" -> LogicalAvgExpr(args[0]) 31 | "COUNT" -> LogicalCountExpr(args[0]) 32 | else -> throw UnknownAggregateFunction(name) 33 | } 34 | } 35 | } 36 | 37 | class LogicalMinExpr(input: LogicalExpr) : LogicalAggregateExpr(AggOp.MIN, input) 38 | 39 | class LogicalMaxExpr(input: LogicalExpr) : LogicalAggregateExpr(AggOp.MAX, input) 40 | 41 | class LogicalSumExpr(input: LogicalExpr) : LogicalAggregateExpr(AggOp.SUM, input) 42 | 43 | class LogicalCountExpr(input: LogicalExpr) : LogicalAggregateExpr(AggOp.COUNT, input) { 44 | override fun toField(input: LogicalTransform): Field = Field(op.name, Type.Num) 45 | } 46 | 47 | class LogicalAvgExpr(input: LogicalExpr) : LogicalAggregateExpr(AggOp.AVG, input) 48 | -------------------------------------------------------------------------------- /src/main/kotlin/com/rchowell/sift/execution/logical/expressions/LogicalBinaryExpr.kt: -------------------------------------------------------------------------------- 1 | package com.rchowell.sift.execution.logical.expressions 2 | 3 | import com.rchowell.sift.execution.logical.LogicalExpr 4 | import com.rchowell.sift.execution.logical.LogicalTransform 5 | import com.rchowell.sift.execution.logical.expressions.BinaryOp.ADD 6 | import com.rchowell.sift.execution.logical.expressions.BinaryOp.AND 7 | import com.rchowell.sift.execution.logical.expressions.BinaryOp.DIV 8 | import com.rchowell.sift.execution.logical.expressions.BinaryOp.EQ 9 | import com.rchowell.sift.execution.logical.expressions.BinaryOp.GT 10 | import com.rchowell.sift.execution.logical.expressions.BinaryOp.GTE 11 | import com.rchowell.sift.execution.logical.expressions.BinaryOp.LT 12 | import com.rchowell.sift.execution.logical.expressions.BinaryOp.LTE 13 | import com.rchowell.sift.execution.logical.expressions.BinaryOp.MOD 14 | import com.rchowell.sift.execution.logical.expressions.BinaryOp.MULT 15 | import com.rchowell.sift.execution.logical.expressions.BinaryOp.NEQ 16 | import com.rchowell.sift.execution.logical.expressions.BinaryOp.OR 17 | import com.rchowell.sift.execution.logical.expressions.BinaryOp.SUB 18 | import com.rchowell.sift.types.Field 19 | import com.rchowell.sift.types.Type 20 | 21 | /** 22 | * Representation of binary expressions for 23 | * - Comparison 24 | * - Boolean Expressions 25 | * - Math Expressions 26 | * 27 | * @property op Operator 28 | * @property lhs Left-hand side 29 | * @property rhs Right-hand side 30 | * @constructor Create empty Logical binary expr 31 | */ 32 | abstract class LogicalBinaryExpr( 33 | val op: BinaryOp, 34 | val lhs: LogicalExpr, 35 | val rhs: LogicalExpr, 36 | ) : LogicalExpr { 37 | override fun toString(): String = "$lhs $op $rhs" 38 | 39 | companion object { 40 | 41 | fun get(op: BinaryOp, lhs: LogicalExpr, rhs: LogicalExpr): LogicalExpr = when (op) { 42 | EQ -> LogicalEqExpr(lhs, rhs) 43 | NEQ -> LogicalNeqExpr(lhs, rhs) 44 | LT -> LogicalLtExpr(lhs, rhs) 45 | LTE -> LogicalLteExpr(lhs, rhs) 46 | GT -> LogicalGtExpr(lhs, rhs) 47 | GTE -> LogicalGteExpr(lhs, rhs) 48 | AND -> LogicalAndExpr(lhs, rhs) 49 | OR -> LogicalOrExpr(lhs, rhs) 50 | ADD -> LogicalAddExpr(lhs, rhs) 51 | SUB -> LogicalSubExpr(lhs, rhs) 52 | MULT -> LogicalMulExpr(lhs, rhs) 53 | DIV -> LogicalDivExpr(lhs, rhs) 54 | MOD -> LogicalModExpr(lhs, rhs) 55 | } 56 | } 57 | } 58 | 59 | /** 60 | * Binary expressions that return a Bool 61 | */ 62 | sealed class LogicalBooleanBinaryExpr( 63 | op: BinaryOp, 64 | lhs: LogicalExpr, 65 | rhs: LogicalExpr, 66 | ) : LogicalBinaryExpr(op, lhs, rhs) { 67 | override fun toField(input: LogicalTransform): Field = Field(op.name, Type.Bool) 68 | } 69 | 70 | class LogicalEqExpr(lhs: LogicalExpr, rhs: LogicalExpr) : LogicalBooleanBinaryExpr(EQ, lhs, rhs) 71 | 72 | class LogicalNeqExpr(lhs: LogicalExpr, rhs: LogicalExpr) : LogicalBooleanBinaryExpr(NEQ, lhs, rhs) 73 | 74 | class LogicalLtExpr(lhs: LogicalExpr, rhs: LogicalExpr) : LogicalBooleanBinaryExpr(LT, lhs, rhs) 75 | 76 | class LogicalLteExpr(lhs: LogicalExpr, rhs: LogicalExpr) : LogicalBooleanBinaryExpr(LTE, lhs, rhs) 77 | 78 | class LogicalGtExpr(lhs: LogicalExpr, rhs: LogicalExpr) : LogicalBooleanBinaryExpr(GT, lhs, rhs) 79 | 80 | class LogicalGteExpr(lhs: LogicalExpr, rhs: LogicalExpr) : LogicalBooleanBinaryExpr(GTE, lhs, rhs) 81 | 82 | class LogicalAndExpr(lhs: LogicalExpr, rhs: LogicalExpr) : LogicalBooleanBinaryExpr(AND, lhs, rhs) 83 | 84 | class LogicalOrExpr(lhs: LogicalExpr, rhs: LogicalExpr) : LogicalBooleanBinaryExpr(OR, lhs, rhs) 85 | 86 | /** 87 | * Binary expressions that return a Num 88 | */ 89 | sealed class LogicalMathBinaryExpr( 90 | op: BinaryOp, 91 | lhs: LogicalExpr, 92 | rhs: LogicalExpr, 93 | ) : LogicalBinaryExpr(op, lhs, rhs) { 94 | override fun toField(input: LogicalTransform): Field = Field(op.name, Type.Num) 95 | } 96 | 97 | class LogicalAddExpr(lhs: LogicalExpr, rhs: LogicalExpr) : LogicalBooleanBinaryExpr(ADD, lhs, rhs) 98 | 99 | class LogicalSubExpr(lhs: LogicalExpr, rhs: LogicalExpr) : LogicalBooleanBinaryExpr(SUB, lhs, rhs) 100 | 101 | class LogicalMulExpr(lhs: LogicalExpr, rhs: LogicalExpr) : LogicalBooleanBinaryExpr(MULT, lhs, rhs) 102 | 103 | class LogicalDivExpr(lhs: LogicalExpr, rhs: LogicalExpr) : LogicalBooleanBinaryExpr(DIV, lhs, rhs) 104 | 105 | class LogicalModExpr(lhs: LogicalExpr, rhs: LogicalExpr) : LogicalBooleanBinaryExpr(MOD, lhs, rhs) 106 | -------------------------------------------------------------------------------- /src/main/kotlin/com/rchowell/sift/execution/logical/expressions/LogicalIdentifierExpr.kt: -------------------------------------------------------------------------------- 1 | package com.rchowell.sift.execution.logical.expressions 2 | 3 | import com.rchowell.sift.execution.logical.LogicalExpr 4 | import com.rchowell.sift.execution.logical.LogicalTransform 5 | import com.rchowell.sift.types.Field 6 | 7 | /** 8 | * Simple expression representing a reference to some column in the data source. 9 | * 10 | * @property identifier 11 | * @constructor Create empty Logical column expr 12 | */ 13 | class LogicalIdentifierExpr(val identifier: String) : LogicalExpr { 14 | 15 | /** 16 | * Returns the [Field] information if found, else throw an exception because this is an invalid column reference. 17 | * 18 | * @param input 19 | * @return 20 | */ 21 | override fun toField(input: LogicalTransform): Field = input.schema.find(identifier) 22 | 23 | override fun toString(): String = "#$identifier" 24 | 25 | override fun hashCode(): Int = identifier.hashCode() 26 | 27 | override fun equals(other: Any?): Boolean { 28 | if (this === other) return true 29 | if (javaClass != other?.javaClass) return false 30 | other as LogicalIdentifierExpr 31 | return identifier == other.identifier 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /src/main/kotlin/com/rchowell/sift/execution/logical/expressions/LogicalLiteralExpr.kt: -------------------------------------------------------------------------------- 1 | package com.rchowell.sift.execution.logical.expressions 2 | 3 | import com.rchowell.sift.execution.logical.LogicalExpr 4 | import com.rchowell.sift.execution.logical.LogicalTransform 5 | import com.rchowell.sift.types.Field 6 | import com.rchowell.sift.types.Type 7 | 8 | /** 9 | * Representation of a literal 10 | */ 11 | class LogicalLiteralExpr(val v: T) : LogicalExpr { 12 | 13 | var type: Type = when (v) { 14 | is Boolean -> Type.Bool 15 | is Number -> Type.Num 16 | is String -> Type.String 17 | else -> throw IllegalArgumentException("unsupported type ${v::class.java.name}") 18 | } 19 | 20 | override fun toField(input: LogicalTransform): Field = Field(v.toString(), type) 21 | 22 | override fun toString(): String = v.toString() 23 | } 24 | -------------------------------------------------------------------------------- /src/main/kotlin/com/rchowell/sift/execution/logical/expressions/Ops.kt: -------------------------------------------------------------------------------- 1 | package com.rchowell.sift.execution.logical.expressions 2 | 3 | class UnknownBinaryOp(op: String) : Exception("unknown binary op $op") 4 | 5 | enum class BinaryOp(private val s: String) { 6 | EQ("="), 7 | NEQ("!="), 8 | LT("<"), 9 | LTE("<="), 10 | GT(">"), 11 | GTE(">="), 12 | AND("&&"), 13 | OR("||"), 14 | ADD("+"), 15 | SUB("-"), 16 | MULT("*"), 17 | DIV("/"), 18 | MOD("%"); 19 | 20 | override fun toString(): String = this.s 21 | 22 | companion object { 23 | 24 | fun get(op: String) = when (op) { 25 | "=" -> EQ 26 | "!=" -> NEQ 27 | "<" -> LT 28 | "<=" -> LTE 29 | ">" -> GT 30 | ">=" -> GTE 31 | "&&" -> AND 32 | "||" -> OR 33 | "+" -> ADD 34 | "-" -> SUB 35 | "*" -> MULT 36 | "/" -> DIV 37 | "%" -> MOD 38 | else -> throw UnknownBinaryOp(op) 39 | } 40 | } 41 | } 42 | 43 | enum class AggOp { 44 | MIN, 45 | MAX, 46 | SUM, 47 | COUNT, 48 | AVG; 49 | } 50 | -------------------------------------------------------------------------------- /src/main/kotlin/com/rchowell/sift/execution/logical/functions/LogicalFunction.kt: -------------------------------------------------------------------------------- 1 | package com.rchowell.sift.execution.logical.functions 2 | 3 | import com.rchowell.sift.execution.logical.LogicalExpr 4 | 5 | class UnknownFunction(name: String) : Exception("unknown function $name") 6 | 7 | /** 8 | * Helper functions such a STRLEN, LOWERCASE, ABS, etc. 9 | * 10 | * @constructor Create empty Logical function 11 | */ 12 | sealed class LogicalFunction { 13 | 14 | companion object { 15 | 16 | // TODO add some functions 17 | fun get(name: String, vararg args: LogicalExpr): LogicalExpr { 18 | throw UnknownFunction(name) 19 | } 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /src/main/kotlin/com/rchowell/sift/execution/logical/transforms/LogicalAggregation.kt: -------------------------------------------------------------------------------- 1 | package com.rchowell.sift.execution.logical.transforms 2 | 3 | import com.rchowell.sift.execution.logical.LogicalTransform 4 | import com.rchowell.sift.execution.logical.expressions.LogicalAggregateExpr 5 | import com.rchowell.sift.execution.logical.expressions.LogicalIdentifierExpr 6 | import com.rchowell.sift.types.Field 7 | import com.rchowell.sift.types.Schema 8 | 9 | /** 10 | * LogicalAggregation is much like the extend projection, but it includes grouping attributes. 11 | * 12 | * Aggregation operators, such as sums or averages, are not operations of relational algebra, but are used 13 | * by the grouping operator (described next). Aggregation operators apply to attributes (columns) of a relation 14 | * 15 | * Grouping of tuples according to their value in one or more attributes has the effect of partitioning the tuples 16 | * of a relation into groups. Aggregation can then be applied to columns within each group, giving us the ability 17 | * to express a number of queries that are impossible to express in the classical relational algebra. 18 | * The grouping operator, gamma, is an operator that combines the effect of grouping and aggregation. p213 19 | */ 20 | class LogicalAggregation( 21 | val input: LogicalTransform, 22 | val aggregations: Map, 23 | val groups: List, 24 | ) : LogicalTransform() { 25 | 26 | override val schema: Schema 27 | get() { 28 | val fields = mutableListOf() 29 | // schema derived from groups 30 | groups.forEach { fields.add(input.schema.find(it.identifier)) } 31 | // schema derived from aggregated fields 32 | aggregations.entries.forEach { (alias, expr) -> 33 | fields.add(Field(alias.identifier, expr.toField(input).type)) 34 | } 35 | return Schema(fields) 36 | } 37 | 38 | override fun inputs(): List = listOf(input) 39 | 40 | override fun toString(): String = buildString { 41 | append("AGGREGATE ") 42 | append(aggregations.entries.joinToString { (alias, expr) -> "$expr -> $alias" }) 43 | if (groups.isNotEmpty()) { 44 | append(" BY ") 45 | append(groups.joinToString()) 46 | } 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /src/main/kotlin/com/rchowell/sift/execution/logical/transforms/LogicalCross.kt: -------------------------------------------------------------------------------- 1 | package com.rchowell.sift.execution.logical.transforms 2 | 3 | import com.rchowell.sift.execution.logical.LogicalTransform 4 | import com.rchowell.sift.types.Schema 5 | 6 | class LogicalCross( 7 | val lhs: LogicalTransform, 8 | val rhs: LogicalTransform, 9 | ) : LogicalTransform() { 10 | 11 | override var schema: Schema = lhs.schema.combine((rhs.schema)) 12 | 13 | override fun inputs(): List = listOf(lhs, rhs) 14 | 15 | override fun pretty(): String = buildString { 16 | append('(').append(lhs.pretty()).append(')') 17 | append("\nX\n") 18 | append('(').append(rhs.pretty()).append(')') 19 | } 20 | 21 | override fun toString(): String = "CROSS" 22 | } 23 | -------------------------------------------------------------------------------- /src/main/kotlin/com/rchowell/sift/execution/logical/transforms/LogicalDiff.kt: -------------------------------------------------------------------------------- 1 | package com.rchowell.sift.execution.logical.transforms 2 | 3 | import com.rchowell.sift.execution.logical.LogicalTransform 4 | import com.rchowell.sift.types.Schema 5 | 6 | class LogicalDiff( 7 | val lhs: LogicalTransform, 8 | val rhs: LogicalTransform, 9 | ) : LogicalTransform() { 10 | 11 | override lateinit var schema: Schema 12 | 13 | override fun inputs(): List = listOf(lhs, rhs) 14 | 15 | init { 16 | assert(rhs.schema.subsetOf(lhs.schema)) { 17 | // But really? Need to look into this 18 | "Schema of right-side relation must be a subset of left-side relation in bag difference" 19 | } 20 | schema = lhs.schema 21 | } 22 | 23 | override fun pretty(): String = buildString { 24 | append('(').append(lhs.pretty()).append(')') 25 | append("\n-\n") 26 | append('(').append(rhs.pretty()).append(')') 27 | } 28 | 29 | override fun toString(): String = "DIFF" 30 | } 31 | -------------------------------------------------------------------------------- /src/main/kotlin/com/rchowell/sift/execution/logical/transforms/LogicalDistinct.kt: -------------------------------------------------------------------------------- 1 | package com.rchowell.sift.execution.logical.transforms 2 | 3 | import com.rchowell.sift.execution.logical.LogicalTransform 4 | import com.rchowell.sift.execution.logical.expressions.LogicalIdentifierExpr 5 | import com.rchowell.sift.types.Schema 6 | 7 | /** 8 | * Duplicate elimination extension which converts a bag to a set. 9 | */ 10 | class LogicalDistinct( 11 | private val input: LogicalTransform, 12 | private val identifiers: List 13 | ) : LogicalTransform() { 14 | 15 | override val schema: Schema = input.schema 16 | 17 | override fun inputs(): List = listOf(input) 18 | 19 | fun fields(): List = identifiers.map { 20 | schema.fieldIndexes[it.identifier]!! 21 | } 22 | 23 | override fun toString(): String = "DISTINCT (" + identifiers.joinToString { it.identifier } + ")" 24 | } 25 | -------------------------------------------------------------------------------- /src/main/kotlin/com/rchowell/sift/execution/logical/transforms/LogicalIntersect.kt: -------------------------------------------------------------------------------- 1 | package com.rchowell.sift.execution.logical.transforms 2 | 3 | import com.rchowell.sift.execution.logical.LogicalTransform 4 | import com.rchowell.sift.types.Schema 5 | 6 | class LogicalIntersect( 7 | val lhs: LogicalTransform, 8 | val rhs: LogicalTransform, 9 | ) : LogicalTransform() { 10 | 11 | override var schema: Schema = Schema.common(lhs.schema, rhs.schema) 12 | 13 | override fun inputs(): List = listOf(lhs, rhs) 14 | 15 | override fun pretty(): String = buildString { 16 | append('(').append(lhs.pretty()).append(')') 17 | append("\n&\n") 18 | append('(').append(rhs.pretty()).append(')') 19 | } 20 | 21 | override fun toString(): String = "INTERSECT" 22 | } 23 | -------------------------------------------------------------------------------- /src/main/kotlin/com/rchowell/sift/execution/logical/transforms/LogicalJoin.kt: -------------------------------------------------------------------------------- 1 | package com.rchowell.sift.execution.logical.transforms 2 | 3 | import com.rchowell.sift.execution.logical.LogicalExpr 4 | import com.rchowell.sift.execution.logical.LogicalTransform 5 | import com.rchowell.sift.types.Field 6 | import com.rchowell.sift.types.Schema 7 | 8 | enum class JoinType { 9 | INNER, 10 | OUTER, 11 | LEFT, 12 | RIGHT 13 | } 14 | 15 | /** 16 | * LogicalJoin represents the eight possible joins from natural and theta join conditions. 17 | * 18 | * @property lhs Left Relation 19 | * @property rhs Right Relation 20 | * @property condition Join condition. If empty, this is the natural join 21 | * @property type 22 | * @constructor Create empty Logical join 23 | */ 24 | class LogicalJoin( 25 | private val lhs: LogicalTransform, 26 | private val rhs: LogicalTransform, 27 | private val condition: LogicalExpr?, 28 | private val type: JoinType, 29 | ) : LogicalTransform() { 30 | 31 | /** 32 | * Schema of a join is the combination of the two schemas 33 | */ 34 | override val schema: Schema 35 | get() { 36 | val fields = mutableSetOf() 37 | fields.addAll(lhs.schema.fields) 38 | fields.addAll(rhs.schema.fields) 39 | return Schema(fields.toList()) 40 | } 41 | 42 | override fun inputs(): List = listOf(lhs, rhs) 43 | 44 | override fun pretty(): String = if (condition == null) "$type JOIN" else "$type JOIN ON $condition" 45 | } 46 | -------------------------------------------------------------------------------- /src/main/kotlin/com/rchowell/sift/execution/logical/transforms/LogicalLimit.kt: -------------------------------------------------------------------------------- 1 | package com.rchowell.sift.execution.logical.transforms 2 | 3 | import com.rchowell.sift.execution.logical.LogicalTransform 4 | import com.rchowell.sift.types.Schema 5 | 6 | /** 7 | * LogicalLimit simply limits results to the first `n`. 8 | * This operation isn't discussed in the extended relation algebra of DSTCB, but is simple enough to include 9 | * 10 | * SORT field ASC |> LIMIT 10 # sorts all rows by `field` then takes the first 10 11 | * LIMIT 10 |> SORT field ASC # returns the first 10 rows from the input, then sorts only those 10 rows 12 | * 13 | * @property input 14 | * @property limit 15 | * @constructor Create empty Logical limit 16 | */ 17 | class LogicalLimit( 18 | val input: LogicalTransform, 19 | val n: Int, 20 | ) : LogicalTransform() { 21 | 22 | override val schema: Schema = input.schema 23 | 24 | override fun inputs(): List = listOf(input) 25 | 26 | override fun toString(): String = "LIMIT $n" 27 | } 28 | -------------------------------------------------------------------------------- /src/main/kotlin/com/rchowell/sift/execution/logical/transforms/LogicalProjection.kt: -------------------------------------------------------------------------------- 1 | package com.rchowell.sift.execution.logical.transforms 2 | 3 | import com.rchowell.sift.execution.logical.LogicalExpr 4 | import com.rchowell.sift.execution.logical.LogicalTransform 5 | import com.rchowell.sift.execution.logical.expressions.LogicalIdentifierExpr 6 | import com.rchowell.sift.types.Field 7 | import com.rchowell.sift.types.Schema 8 | 9 | /** 10 | * Extended projection gives additional power to the projection operator. 11 | * In addition to projecting out some columns, in its generalized form it can perform computations involving 12 | * the columns of its argument relation to produce new columns. p213 13 | * 14 | * @property input 15 | * @property expr 16 | * @constructor Create empty Logical projection 17 | */ 18 | class LogicalProjection( 19 | val input: LogicalTransform, 20 | val projections: Map, 21 | ) : LogicalTransform() { 22 | 23 | /** 24 | * Each expression describes its output field, so the Schema produced by this 25 | * projection is just the combination of all field types when evaluated on the given input plan. 26 | */ 27 | override val schema: Schema = Schema(projections.entries.map { (k, v) -> Field(k.identifier, v.toField(input).type) }) 28 | 29 | override fun inputs(): List = listOf(input) 30 | 31 | override fun toString(): String = buildString { 32 | append("PROJECT ") 33 | append(projections.entries.joinToString { (alias, expr) -> "$expr -> $alias" }) 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /src/main/kotlin/com/rchowell/sift/execution/logical/transforms/LogicalScan.kt: -------------------------------------------------------------------------------- 1 | package com.rchowell.sift.execution.logical.transforms 2 | 3 | import com.rchowell.sift.execution.logical.LogicalTransform 4 | import com.rchowell.sift.source.Source 5 | 6 | /** 7 | * LogicalScan represents a plan to select fields (given identifiers) from the underlying source. 8 | * 9 | * @property source 10 | * @property identifiers 11 | * @constructor Create empty Logical scan 12 | */ 13 | class LogicalScan( 14 | val source: Source, 15 | val identifiers: List = listOf(), 16 | ) : LogicalTransform() { 17 | 18 | /** 19 | * Schema is derived from the source 20 | */ 21 | override val schema = source.schema.select(identifiers) 22 | 23 | /** 24 | * Children 25 | * 26 | * @return 27 | */ 28 | override fun inputs(): List = listOf() 29 | 30 | override fun toString(): String { 31 | val f = if (identifiers.isEmpty()) "*" else identifiers.joinToString() 32 | return "SCAN $f FROM $source" 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /src/main/kotlin/com/rchowell/sift/execution/logical/transforms/LogicalSelection.kt: -------------------------------------------------------------------------------- 1 | package com.rchowell.sift.execution.logical.transforms 2 | 3 | import com.rchowell.sift.execution.logical.LogicalExpr 4 | import com.rchowell.sift.execution.logical.LogicalTransform 5 | import com.rchowell.sift.types.Schema 6 | 7 | /** 8 | * LogicalSelection represents a filter based on the given expression. 9 | * Separate from the LogicalScan because a filter/selection should be separate from the data source. 10 | * 11 | * @property input 12 | * @property expr 13 | * @constructor Create empty Logical selection 14 | */ 15 | class LogicalSelection( 16 | val input: LogicalTransform, 17 | val expr: LogicalExpr, 18 | ) : LogicalTransform() { 19 | 20 | override val schema: Schema = input.schema 21 | 22 | override fun inputs(): List = listOf(input) 23 | 24 | override fun toString(): String = "SELECT $expr" 25 | } 26 | -------------------------------------------------------------------------------- /src/main/kotlin/com/rchowell/sift/execution/logical/transforms/LogicalSort.kt: -------------------------------------------------------------------------------- 1 | package com.rchowell.sift.execution.logical.transforms 2 | 3 | import com.rchowell.sift.execution.logical.LogicalTransform 4 | import com.rchowell.sift.execution.logical.expressions.LogicalIdentifierExpr 5 | import com.rchowell.sift.types.Schema 6 | 7 | /** 8 | * LogicalSort describes an ASC or DESC sort based on a list of identifiers 9 | * 10 | * @property input 11 | * @property asc 12 | * @property fields 13 | * @constructor Create empty Logical sort 14 | */ 15 | class LogicalSort( 16 | private val input: LogicalTransform, 17 | private val fields: List, 18 | private val asc: Boolean, 19 | ) : LogicalTransform() { 20 | 21 | /** 22 | * Sorting does not change schema 23 | */ 24 | override val schema: Schema = input.schema 25 | 26 | override fun inputs(): List = listOf(input) 27 | 28 | override fun toString(): String = buildString { 29 | append("SORT ") 30 | if (fields.isNotEmpty()) { 31 | append(fields.joinToString()) 32 | append(" ") 33 | } 34 | if (asc) append("ASC") else append("DESC") 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /src/main/kotlin/com/rchowell/sift/execution/logical/transforms/LogicalUnion.kt: -------------------------------------------------------------------------------- 1 | package com.rchowell.sift.execution.logical.transforms 2 | 3 | import com.rchowell.sift.execution.logical.LogicalTransform 4 | import com.rchowell.sift.types.Schema 5 | 6 | class LogicalUnion( 7 | val lhs: LogicalTransform, 8 | val rhs: LogicalTransform, 9 | ) : LogicalTransform() { 10 | 11 | override lateinit var schema: Schema 12 | 13 | override fun inputs(): List = listOf(lhs, rhs) 14 | 15 | init { 16 | assert(lhs.schema == rhs.schema) { 17 | "Relations in union do not share a schema" 18 | } 19 | schema = lhs.schema 20 | } 21 | 22 | override fun pretty(): String = buildString { 23 | append('(').append(lhs.pretty()).append(')') 24 | append("\nU\n") 25 | append('(').append(rhs.pretty()).append(')') 26 | } 27 | 28 | override fun toString(): String = "UNION" 29 | } 30 | -------------------------------------------------------------------------------- /src/main/kotlin/com/rchowell/sift/execution/physical/aggregations/Accumulator.kt: -------------------------------------------------------------------------------- 1 | package com.rchowell.sift.execution.physical.aggregations 2 | 3 | import com.rchowell.sift.execution.physical.expressions.Expression 4 | 5 | /** 6 | * All aggregations are implemented as accumulators. 7 | * 8 | * @constructor Create empty Agg accumulator 9 | */ 10 | sealed class Accumulator { 11 | 12 | abstract val expr: Expression 13 | 14 | /** 15 | * Create a new instance of this Accumulator. 16 | * Done because I couldn't create a new instance given an interface KClass ie no empty constructor 17 | */ 18 | abstract fun new(): Accumulator 19 | 20 | /** 21 | * Accumulate one value at a time 22 | */ 23 | abstract fun add(v: Double) 24 | 25 | /** 26 | * Returns the current value of the accumulator 27 | */ 28 | abstract fun get(): Double 29 | } 30 | 31 | class SumAccumulator(override val expr: Expression) : Accumulator() { 32 | 33 | var value = 0.0 34 | 35 | override fun new(): Accumulator = SumAccumulator(expr) 36 | 37 | override fun add(v: Double) { 38 | value += v 39 | } 40 | 41 | override fun get(): Double = value 42 | } 43 | 44 | class MinAccumulator(override val expr: Expression) : Accumulator() { 45 | 46 | var value = Double.MAX_VALUE 47 | 48 | override fun new(): Accumulator = MinAccumulator(expr) 49 | 50 | override fun add(v: Double) { 51 | if (v < value) value = v 52 | } 53 | 54 | override fun get(): Double = value 55 | } 56 | 57 | class MaxAccumulator(override val expr: Expression) : Accumulator() { 58 | 59 | var value = Double.MIN_VALUE 60 | 61 | override fun new(): Accumulator = MaxAccumulator(expr) 62 | 63 | override fun add(v: Double) { 64 | if (v > value) value = v 65 | } 66 | 67 | override fun get(): Double = value 68 | } 69 | 70 | class CountAccumulator(override val expr: Expression) : Accumulator() { 71 | 72 | var value = 0.0 73 | 74 | override fun new(): Accumulator = CountAccumulator(expr) 75 | 76 | override fun add(v: Double) { 77 | value += 1 78 | } 79 | 80 | override fun get(): Double = value 81 | } 82 | 83 | class AvgAccumulator(override val expr: Expression) : Accumulator() { 84 | 85 | var numer = 0.0 86 | var denom = 0.0 87 | 88 | override fun new(): Accumulator = AvgAccumulator(expr) 89 | 90 | override fun add(v: Double) { 91 | numer += v 92 | denom += 1 93 | } 94 | 95 | override fun get(): Double = numer / denom 96 | } 97 | -------------------------------------------------------------------------------- /src/main/kotlin/com/rchowell/sift/execution/physical/aggregations/Key.kt: -------------------------------------------------------------------------------- 1 | package com.rchowell.sift.execution.physical.aggregations 2 | 3 | import org.apache.commons.lang3.builder.HashCodeBuilder 4 | 5 | class Key(val values: List) { 6 | 7 | companion object { 8 | val EMPTY = Key(listOf()) 9 | } 10 | 11 | override fun equals(other: Any?): Boolean { 12 | if (this === other) return true 13 | if (javaClass != other?.javaClass) return false 14 | other as Key 15 | return compareValues(other.values) 16 | } 17 | 18 | /** 19 | * Use String value of ByteArray rather than the reference 20 | */ 21 | override fun hashCode(): Int { 22 | val hashcode = HashCodeBuilder() 23 | values.forEach { 24 | when (it) { 25 | is ByteArray -> hashcode.append(it.toString(Charsets.UTF_8)) 26 | else -> hashcode.append(it) 27 | } 28 | } 29 | return hashcode.build() 30 | } 31 | 32 | override fun toString(): String = values.joinToString("-") 33 | 34 | private fun compareValues(other: List): Boolean { 35 | if (values.size != other.size) return false 36 | for (i in values.indices) { 37 | val thisVal = values[i] 38 | val thatVal = other[i] 39 | if (thisVal != thatVal) { 40 | // Compare ByteArray as String 41 | if (thisVal is ByteArray && thatVal is ByteArray) { 42 | val v1 = thisVal.toString(Charsets.UTF_8) 43 | val v2 = thatVal.toString(Charsets.UTF_8) 44 | if (v1 != v2) { 45 | return false 46 | } 47 | } else { 48 | return false 49 | } 50 | } 51 | } 52 | return true 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /src/main/kotlin/com/rchowell/sift/execution/physical/expressions/BinaryExpr.kt: -------------------------------------------------------------------------------- 1 | package com.rchowell.sift.execution.physical.expressions 2 | 3 | import com.rchowell.sift.types.Batch 4 | import com.rchowell.sift.types.BoolColumn 5 | import com.rchowell.sift.types.BoolVectorColumn 6 | import com.rchowell.sift.types.Column 7 | import com.rchowell.sift.types.NumColumn 8 | import com.rchowell.sift.types.NumVectorColumn 9 | import com.rchowell.sift.types.StringColumn 10 | import com.rchowell.sift.types.StringVectorColumn 11 | 12 | abstract class BinaryExpr(val lhs: Expression, val rhs: Expression) : Expression { 13 | 14 | override fun eval(batch: Batch): Column { 15 | val lc = lhs.eval(batch) 16 | val rc = rhs.eval(batch) 17 | assert(lc.size == rc.size) 18 | return when { 19 | (lc is NumColumn && rc is NumColumn) -> { 20 | val result = Column.VectorFactory.numeric(lc.size) 21 | for (i in 0 until lc.size) { 22 | result[i] = eval(lc[i], rc[i]) 23 | } 24 | result.valueCount = lc.size 25 | NumVectorColumn(result) 26 | } 27 | (lc is BoolColumn && rc is BoolColumn) -> { 28 | val result = Column.VectorFactory.boolean(lc.size) 29 | for (i in 0 until lc.size) { 30 | result[i] = eval(lc[i], rc[i]) 31 | } 32 | result.valueCount = lc.size 33 | BoolVectorColumn(result) 34 | } 35 | (lc is StringColumn && rc is StringColumn) -> { 36 | val result = Column.VectorFactory.string(lc.size) 37 | for (i in 0 until lc.size) { 38 | result[i] = eval(lc[i], rc[i]) 39 | } 40 | result.valueCount = lc.size 41 | StringVectorColumn(result) 42 | } 43 | else -> throw Exception("unsupported column type ${lc::class.java}") 44 | } 45 | } 46 | 47 | open fun eval(l: Double, r: Double): Double = throw Exception("not implemented for ${this.javaClass}") 48 | 49 | open fun eval(l: Boolean, r: Boolean): Boolean = throw Exception("not implemented for ${this.javaClass}") 50 | 51 | open fun eval(l: String, r: String): String = throw Exception("not implemented for ${this.javaClass}") 52 | 53 | // VarCharVector 54 | private fun eval(l: ByteArray, r: ByteArray): ByteArray = eval(l.toString(Charsets.UTF_8), r.toString(Charsets.UTF_8)).toByteArray() 55 | 56 | // BitVector 57 | private fun eval(l: Int, r: Int): Int = if (eval(l == 1, r == 1)) 1 else 0 58 | } 59 | 60 | /** 61 | * ================== 62 | * Math Expressions 63 | * ================== 64 | */ 65 | 66 | class AddExpr(lhs: Expression, rhs: Expression) : BinaryExpr(lhs, rhs) { 67 | 68 | override fun eval(l: Double, r: Double): Double = l + r 69 | 70 | override fun eval(l: String, r: String): String = l + r 71 | } 72 | 73 | class SubExpr(lhs: Expression, rhs: Expression) : BinaryExpr(lhs, rhs) { 74 | 75 | override fun eval(l: Double, r: Double): Double = l - r 76 | } 77 | 78 | class MulExpr(lhs: Expression, rhs: Expression) : BinaryExpr(lhs, rhs) { 79 | 80 | override fun eval(l: Double, r: Double): Double = l * r 81 | } 82 | 83 | class DivExpr(lhs: Expression, rhs: Expression) : BinaryExpr(lhs, rhs) { 84 | 85 | // TODO divide by 0; beginning to think expression return values 86 | // should be nullable which would become SQL NULL 87 | override fun eval(l: Double, r: Double): Double = l / r 88 | } 89 | 90 | class ModExpr(lhs: Expression, rhs: Expression) : BinaryExpr(lhs, rhs) { 91 | 92 | override fun eval(l: Double, r: Double): Double = l % r 93 | } 94 | 95 | /** 96 | * ============ 97 | * Predicates 98 | * ============ 99 | */ 100 | 101 | abstract class PredicateBinaryExpr(val lhs: Expression, val rhs: Expression) : Expression { 102 | 103 | override fun eval(batch: Batch): BoolColumn { 104 | val lc = lhs.eval(batch) 105 | val rc = rhs.eval(batch) 106 | assert(lc.size == rc.size) 107 | val result = Column.VectorFactory.boolean(lc.size) 108 | when { 109 | (lc is NumColumn && rc is NumColumn) -> { 110 | for (i in 0 until lc.size) result[i] = if (eval(lc[i], rc[i])) 1 else 0 111 | } 112 | (lc is BoolColumn && rc is BoolColumn) -> { 113 | for (i in 0 until lc.size) result[i] = if (eval(lc[i], rc[i])) 1 else 0 114 | } 115 | (lc is StringColumn && rc is StringColumn) -> { 116 | for (i in 0 until lc.size) result[i] = if (eval(lc[i], rc[i])) 1 else 0 117 | } 118 | else -> throw Exception("unsupported vector type ${lc::class.java}") 119 | } 120 | result.valueCount = lc.size 121 | return BoolVectorColumn(result) 122 | } 123 | 124 | open fun eval(l: Double, r: Double): Boolean = throw Exception("not implemented for ${this.javaClass}") 125 | 126 | open fun eval(l: Boolean, r: Boolean): Boolean = throw Exception("not implemented for ${this.javaClass}") 127 | 128 | open fun eval(l: String, r: String): Boolean = throw Exception("not implemented for ${this.javaClass}") 129 | 130 | // BitVector 131 | private fun eval(l: Int, r: Int): Boolean = eval(l == 1, r == 1) 132 | 133 | // VarCharVector 134 | private fun eval(l: ByteArray, r: ByteArray): Boolean = eval(l.toString(Charsets.UTF_8), r.toString(Charsets.UTF_8)) 135 | } 136 | 137 | /** 138 | * ============================ 139 | * Boolean Binary Expressions 140 | * ============================ 141 | */ 142 | 143 | class AndBinaryExpr(lhs: Expression, rhs: Expression) : PredicateBinaryExpr(lhs, rhs) { 144 | override fun eval(l: Boolean, r: Boolean): Boolean = l && r 145 | } 146 | 147 | class OrBinaryExpr(lhs: Expression, rhs: Expression) : PredicateBinaryExpr(lhs, rhs) { 148 | override fun eval(l: Boolean, r: Boolean): Boolean = l || r 149 | } 150 | 151 | class GtBinaryExpr(lhs: Expression, rhs: Expression) : PredicateBinaryExpr(lhs, rhs) { 152 | override fun eval(l: Double, r: Double): Boolean = l > r 153 | 154 | override fun eval(l: String, r: String): Boolean = l > r 155 | } 156 | 157 | class GteBinaryExpr(lhs: Expression, rhs: Expression) : PredicateBinaryExpr(lhs, rhs) { 158 | override fun eval(l: Double, r: Double): Boolean = l >= r 159 | 160 | override fun eval(l: String, r: String): Boolean = l >= r 161 | } 162 | 163 | class LtBinaryExpr(lhs: Expression, rhs: Expression) : PredicateBinaryExpr(lhs, rhs) { 164 | override fun eval(l: Double, r: Double): Boolean = l < r 165 | 166 | override fun eval(l: String, r: String): Boolean = l < r 167 | } 168 | 169 | class LteBinaryExpr(lhs: Expression, rhs: Expression) : PredicateBinaryExpr(lhs, rhs) { 170 | override fun eval(l: Double, r: Double): Boolean = l <= r 171 | 172 | override fun eval(l: String, r: String): Boolean = l <= r 173 | } 174 | 175 | class EqBinaryExpr(lhs: Expression, rhs: Expression) : PredicateBinaryExpr(lhs, rhs) { 176 | override fun eval(l: Double, r: Double): Boolean = l == r 177 | 178 | override fun eval(l: Boolean, r: Boolean): Boolean = l == r 179 | 180 | override fun eval(l: String, r: String): Boolean = l == r 181 | } 182 | 183 | class NeqBinaryExpr(lhs: Expression, rhs: Expression) : PredicateBinaryExpr(lhs, rhs) { 184 | override fun eval(l: Double, r: Double): Boolean = l != r 185 | 186 | override fun eval(l: Boolean, r: Boolean): Boolean = l != r 187 | 188 | override fun eval(l: String, r: String): Boolean = l != r 189 | } 190 | -------------------------------------------------------------------------------- /src/main/kotlin/com/rchowell/sift/execution/physical/expressions/ColumnExpr.kt: -------------------------------------------------------------------------------- 1 | package com.rchowell.sift.execution.physical.expressions 2 | 3 | import com.rchowell.sift.types.Batch 4 | import com.rchowell.sift.types.Column 5 | 6 | class ColumnExpr(val i: Int) : Expression { 7 | 8 | override fun eval(batch: Batch): Column { 9 | return batch.columns[i] 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /src/main/kotlin/com/rchowell/sift/execution/physical/expressions/Expression.kt: -------------------------------------------------------------------------------- 1 | package com.rchowell.sift.execution.physical.expressions 2 | 3 | import com.rchowell.sift.types.Batch 4 | import com.rchowell.sift.types.Column 5 | 6 | interface Expression { 7 | fun eval(batch: Batch): Column 8 | } 9 | -------------------------------------------------------------------------------- /src/main/kotlin/com/rchowell/sift/execution/physical/expressions/LiteralExpr.kt: -------------------------------------------------------------------------------- 1 | package com.rchowell.sift.execution.physical.expressions 2 | 3 | import com.rchowell.sift.types.Batch 4 | import com.rchowell.sift.types.BoolLiteralColumn 5 | import com.rchowell.sift.types.Column 6 | import com.rchowell.sift.types.NumLiteralColumn 7 | import com.rchowell.sift.types.StringLiteralColumn 8 | 9 | /** 10 | * Literal expression simple returns the column 11 | * 12 | * @property col 13 | * @constructor Create empty Literal expr 14 | */ 15 | class LiteralExpr(val v: Any) : Expression { 16 | override fun eval(batch: Batch): Column = when (v) { 17 | is Boolean -> BoolLiteralColumn(v, batch.records) 18 | is Number -> NumLiteralColumn(v.toDouble(), batch.records) 19 | is String -> StringLiteralColumn(v.toByteArray(), batch.records) 20 | else -> throw IllegalStateException("invalid type ${v.javaClass} in literal expression") 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /src/main/kotlin/com/rchowell/sift/execution/physical/sifterators/Aggregation.kt: -------------------------------------------------------------------------------- 1 | package com.rchowell.sift.execution.physical.sifterators 2 | 3 | import com.rchowell.sift.execution.physical.aggregations.Accumulator 4 | import com.rchowell.sift.execution.physical.aggregations.Key 5 | import com.rchowell.sift.types.Batch 6 | import com.rchowell.sift.types.BoolVectorColumn 7 | import com.rchowell.sift.types.Column 8 | import com.rchowell.sift.types.NumVectorColumn 9 | import com.rchowell.sift.types.Schema 10 | import com.rchowell.sift.types.StringVectorColumn 11 | import com.rchowell.sift.types.Type 12 | import org.apache.arrow.vector.BitVector 13 | import org.apache.arrow.vector.Float8Vector 14 | import org.apache.arrow.vector.ValueVector 15 | import org.apache.arrow.vector.VarCharVector 16 | 17 | /** 18 | * Aggregation Sifterator maintains an accumulator for each aggregation key and processes all input batches 19 | * before producing the output batch 20 | * 21 | * @property input 22 | * @property aggregations column to accumulator mapping 23 | * @property groups column values to group by 24 | * @constructor Create empty Aggregation 25 | */ 26 | class Aggregation( 27 | val input: Sifterator, 28 | val aggregations: List, 29 | val groups: List, 30 | override val schema: Schema, 31 | ) : Sifterator { 32 | 33 | private val accumulators: MutableMap> = mutableMapOf() 34 | private var done = false 35 | 36 | /** 37 | * DSCB has iterators doing full aggregation in the open() method 38 | */ 39 | override fun open() { 40 | input.open() 41 | var batch = input.next() 42 | while (batch != null) { 43 | val columns: List = aggregations.map { it.expr.eval(batch!!) } 44 | for (i in 0 until batch.records) { 45 | val values = groups.map { batch!!.columns[it][i] } 46 | val key = if (values.isEmpty()) Key.EMPTY else Key(values) 47 | accumulate(key, columns, i) 48 | } 49 | batch = input.next() 50 | } 51 | } 52 | 53 | /** 54 | * Next() returns the value of all aggregation accumulators 55 | */ 56 | override fun next(): Batch? { 57 | if (done) return null 58 | done = true 59 | 60 | // Total number of rows in the output batch 61 | val rowCount = accumulators.size 62 | 63 | // Initialize vectors for each aggregation key, type is derived from the schema 64 | val keyVectors: List = groups.indices.map { group -> 65 | when (schema.fields[group].type) { 66 | Type.Num -> Column.VectorFactory.numeric(rowCount) 67 | Type.Bool -> Column.VectorFactory.boolean(rowCount) 68 | Type.String -> Column.VectorFactory.string(rowCount) 69 | } 70 | } 71 | 72 | // Initialize vectors for each aggregation value, type is always numeric 73 | val valueVectors = aggregations.map { Column.VectorFactory.numeric(rowCount) } 74 | 75 | // Add all values to the output vectors 76 | accumulators.keys.forEachIndexed { row, key -> 77 | 78 | // Add all aggregation key values to the key vectors 79 | key.values.forEachIndexed { i, kv -> 80 | when (val keyVec = keyVectors[i]) { 81 | is Float8Vector -> keyVec[row] = kv as Double 82 | is BitVector -> keyVec[row] = kv as Int 83 | is VarCharVector -> keyVec[row] = kv as ByteArray 84 | else -> throw IllegalStateException("unknown key vector type ${keyVec::class.java} for key $kv") 85 | } 86 | } 87 | 88 | // Add all aggregated values to the value vectors 89 | val accumulator = accumulators[key]!! 90 | accumulator.forEachIndexed { i, acc -> 91 | valueVectors[i][row] = acc.get() 92 | } 93 | } 94 | 95 | // Columns of the batch 96 | val cols = mutableListOf() 97 | keyVectors.forEach { 98 | it.valueCount = rowCount 99 | when (it) { 100 | is Float8Vector -> cols.add(NumVectorColumn(it)) 101 | is BitVector -> cols.add(BoolVectorColumn(it)) 102 | is VarCharVector -> cols.add(StringVectorColumn(it)) 103 | } 104 | } 105 | valueVectors.forEach { 106 | it.valueCount = rowCount 107 | cols.add(NumVectorColumn(it)) 108 | } 109 | return Batch(schema, cols) 110 | } 111 | 112 | override fun close() { 113 | input.close() 114 | } 115 | 116 | private fun accumulate(key: Key, columns: List, row: Int) { 117 | var accums = accumulators[key] 118 | if (accums == null) { 119 | accums = aggregations.map { it.new() } 120 | accumulators[key] = accums 121 | } 122 | for (col in accums.indices) { 123 | val v = columns[col][row] 124 | accums[col].add(v as Double) 125 | } 126 | } 127 | } 128 | -------------------------------------------------------------------------------- /src/main/kotlin/com/rchowell/sift/execution/physical/sifterators/Distinct.kt: -------------------------------------------------------------------------------- 1 | package com.rchowell.sift.execution.physical.sifterators 2 | 3 | import com.rchowell.sift.types.Batch 4 | import com.rchowell.sift.types.Batch.Companion.valueCount 5 | import com.rchowell.sift.types.Schema 6 | import com.rchowell.sift.types.set 7 | import org.apache.commons.lang3.builder.HashCodeBuilder 8 | 9 | /** 10 | * Duplicate elimination for the given keys 11 | * 12 | * @property input 13 | * @property fields columns to keep distinct values 14 | * @constructor Create empty Distinct 15 | */ 16 | class Distinct( 17 | val input: Sifterator, 18 | val fields: List, 19 | ) : Sifterator { 20 | 21 | private val seen: MutableSet = mutableSetOf() 22 | 23 | override val schema: Schema = input.schema.project(fields) 24 | 25 | override fun open() { 26 | input.open() 27 | } 28 | 29 | override fun next(): Batch? { 30 | val batch = input.next() ?: return null 31 | val vectors = Batch.empty(schema, batch.records) 32 | var records = 0 33 | for (row in 0 until batch.records) { 34 | val hashBuilder = HashCodeBuilder() 35 | fields.forEach { 36 | hashBuilder.append(batch.columns[it][row]) 37 | } 38 | val hash = hashBuilder.build() ?: -1 39 | if (!seen.contains(hash)) { 40 | for (c in fields.indices) { 41 | vectors[c][records] = batch.columns[fields[c]][row] 42 | } 43 | seen.add(hash) 44 | records += 1 45 | } 46 | } 47 | vectors.valueCount(records) 48 | return Batch.fromVectors(schema, vectors) 49 | } 50 | 51 | override fun close() { 52 | input.close() 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /src/main/kotlin/com/rchowell/sift/execution/physical/sifterators/Limit.kt: -------------------------------------------------------------------------------- 1 | package com.rchowell.sift.execution.physical.sifterators 2 | 3 | import com.rchowell.sift.types.Batch 4 | import com.rchowell.sift.types.Batch.Companion.valueCount 5 | import com.rchowell.sift.types.set 6 | import kotlin.math.min 7 | 8 | /** 9 | * Limit will return a single [Batch] with [limit] number of records. 10 | * 11 | */ 12 | class Limit( 13 | val input: Sifterator, 14 | val limit: Int, 15 | ) : Sifterator { 16 | 17 | var sent = 0 18 | 19 | override val schema = input.schema 20 | 21 | override fun open() { 22 | input.open() 23 | } 24 | 25 | override fun next(): Batch? { 26 | if (sent >= limit) return null 27 | val batch = input.next() ?: return null 28 | // Could be a [Batch] helper method 29 | // this makes me want a DataFrame library on top of Arrow 30 | val values = min(batch.records, limit - sent) 31 | val vectors = Batch.empty(batch.schema, values) 32 | for (row in 0 until batch.records) { 33 | if (sent >= limit) break 34 | for (col in batch.columns.indices) { 35 | vectors[col][row] = batch.columns[col][row] 36 | } 37 | sent += 1 38 | } 39 | vectors.valueCount(values) 40 | return Batch.fromVectors(schema, vectors) 41 | } 42 | 43 | override fun close() { 44 | input.close() 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /src/main/kotlin/com/rchowell/sift/execution/physical/sifterators/Projection.kt: -------------------------------------------------------------------------------- 1 | package com.rchowell.sift.execution.physical.sifterators 2 | 3 | import com.rchowell.sift.execution.physical.expressions.Expression 4 | import com.rchowell.sift.types.Batch 5 | import com.rchowell.sift.types.Schema 6 | 7 | /** 8 | * Projection holds a map of *output* column indexes to expressions. 9 | * 10 | * Each [Expression] is evaluated on the input [Batch], and the result is set in the output [Batch] 11 | * 12 | * @property projections 13 | * @property input 14 | * @constructor Create empty Projection 15 | */ 16 | class Projection( 17 | val input: Sifterator, 18 | val projections: Map, 19 | override val schema: Schema, 20 | ) : Sifterator { 21 | 22 | override fun open() { 23 | input.open() 24 | } 25 | 26 | override fun next(): Batch? { 27 | val batch = input.next() ?: return null 28 | val output = projections.map { (_, v) -> v.eval(batch) } 29 | return Batch(schema, output) 30 | } 31 | 32 | override fun close() { 33 | input.close() 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /src/main/kotlin/com/rchowell/sift/execution/physical/sifterators/Scan.kt: -------------------------------------------------------------------------------- 1 | package com.rchowell.sift.execution.physical.sifterators 2 | 3 | import com.rchowell.sift.source.Source 4 | import com.rchowell.sift.types.Batch 5 | 6 | /** 7 | * PhysicalScan 8 | * 9 | * @property source 10 | * @property fields 11 | * @constructor Create empty Physical scan 12 | */ 13 | class Scan( 14 | val source: Source, 15 | val fields: List, 16 | ) : Sifterator { 17 | 18 | override val schema = source.schema 19 | 20 | lateinit var batches: Iterator 21 | 22 | override fun open() { 23 | source.init() 24 | batches = source.scan(fields).iterator() 25 | } 26 | 27 | override fun next(): Batch? { 28 | return try { 29 | batches.next() 30 | } catch (ex: NoSuchElementException) { 31 | null 32 | } 33 | } 34 | 35 | override fun close() { 36 | source.close() 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /src/main/kotlin/com/rchowell/sift/execution/physical/sifterators/Selection.kt: -------------------------------------------------------------------------------- 1 | package com.rchowell.sift.execution.physical.sifterators 2 | 3 | import com.rchowell.sift.execution.physical.expressions.PredicateBinaryExpr 4 | import com.rchowell.sift.types.Batch 5 | 6 | /** 7 | * Selection evaluates a bool expression and uses the results to filter the next result. 8 | * Seems expensive to filter with columns versus rows. 9 | * 10 | * @property input 11 | * @property predicateBinary 12 | * @constructor Create empty Selection 13 | */ 14 | class Selection( 15 | val input: Sifterator, 16 | val predicateBinary: PredicateBinaryExpr // TODO change to just predicate 17 | ) : Sifterator { 18 | 19 | override val schema = input.schema 20 | 21 | override fun open() { 22 | input.open() 23 | } 24 | 25 | override fun next(): Batch? { 26 | val batch = input.next() ?: return null 27 | val mask = predicateBinary.eval(batch) 28 | val cols = batch.columns.map { it.filter(mask) } 29 | return Batch(schema, cols) 30 | } 31 | 32 | override fun close() { 33 | input.close() 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /src/main/kotlin/com/rchowell/sift/execution/physical/sifterators/Sifterator.kt: -------------------------------------------------------------------------------- 1 | package com.rchowell.sift.execution.physical.sifterators 2 | 3 | import com.rchowell.sift.types.Batch 4 | import com.rchowell.sift.types.Schema 5 | 6 | /** 7 | * Iterator from The Volcano Model. Called Sifterator to avoid naming confusion. 8 | * 9 | * Things might get interesting/weird because `next()` returns a [Batch] rather than a row. 10 | */ 11 | interface Sifterator { 12 | 13 | /** 14 | * Output schema of this transformation 15 | */ 16 | val schema: Schema 17 | 18 | /** 19 | * This method starts the process of getting tuples, but does not get a tuple. 20 | * It initializes any data structures needed to perform the operation and calls Open() 21 | * for any arguments of the operation. p707 22 | */ 23 | fun open() 24 | 25 | /** 26 | * Returns the next 27 | * 28 | * @return 29 | */ 30 | fun next(): Batch? 31 | 32 | fun close() 33 | } 34 | -------------------------------------------------------------------------------- /src/main/kotlin/com/rchowell/sift/execution/planner/Planner.kt: -------------------------------------------------------------------------------- 1 | package com.rchowell.sift.execution.planner 2 | 3 | import com.rchowell.sift.execution.logical.LogicalExpr 4 | import com.rchowell.sift.execution.logical.LogicalTransform 5 | import com.rchowell.sift.execution.logical.expressions.BinaryOp 6 | import com.rchowell.sift.execution.logical.expressions.LogicalAddExpr 7 | import com.rchowell.sift.execution.logical.expressions.LogicalAndExpr 8 | import com.rchowell.sift.execution.logical.expressions.LogicalAvgExpr 9 | import com.rchowell.sift.execution.logical.expressions.LogicalBinaryExpr 10 | import com.rchowell.sift.execution.logical.expressions.LogicalBooleanBinaryExpr 11 | import com.rchowell.sift.execution.logical.expressions.LogicalCountExpr 12 | import com.rchowell.sift.execution.logical.expressions.LogicalDivExpr 13 | import com.rchowell.sift.execution.logical.expressions.LogicalEqExpr 14 | import com.rchowell.sift.execution.logical.expressions.LogicalGtExpr 15 | import com.rchowell.sift.execution.logical.expressions.LogicalGteExpr 16 | import com.rchowell.sift.execution.logical.expressions.LogicalIdentifierExpr 17 | import com.rchowell.sift.execution.logical.expressions.LogicalLiteralExpr 18 | import com.rchowell.sift.execution.logical.expressions.LogicalLtExpr 19 | import com.rchowell.sift.execution.logical.expressions.LogicalLteExpr 20 | import com.rchowell.sift.execution.logical.expressions.LogicalMaxExpr 21 | import com.rchowell.sift.execution.logical.expressions.LogicalMinExpr 22 | import com.rchowell.sift.execution.logical.expressions.LogicalModExpr 23 | import com.rchowell.sift.execution.logical.expressions.LogicalMulExpr 24 | import com.rchowell.sift.execution.logical.expressions.LogicalNeqExpr 25 | import com.rchowell.sift.execution.logical.expressions.LogicalOrExpr 26 | import com.rchowell.sift.execution.logical.expressions.LogicalSubExpr 27 | import com.rchowell.sift.execution.logical.expressions.LogicalSumExpr 28 | import com.rchowell.sift.execution.logical.transforms.LogicalAggregation 29 | import com.rchowell.sift.execution.logical.transforms.LogicalDistinct 30 | import com.rchowell.sift.execution.logical.transforms.LogicalJoin 31 | import com.rchowell.sift.execution.logical.transforms.LogicalLimit 32 | import com.rchowell.sift.execution.logical.transforms.LogicalProjection 33 | import com.rchowell.sift.execution.logical.transforms.LogicalScan 34 | import com.rchowell.sift.execution.logical.transforms.LogicalSelection 35 | import com.rchowell.sift.execution.logical.transforms.LogicalSort 36 | import com.rchowell.sift.execution.physical.aggregations.AvgAccumulator 37 | import com.rchowell.sift.execution.physical.aggregations.CountAccumulator 38 | import com.rchowell.sift.execution.physical.aggregations.MaxAccumulator 39 | import com.rchowell.sift.execution.physical.aggregations.MinAccumulator 40 | import com.rchowell.sift.execution.physical.aggregations.SumAccumulator 41 | import com.rchowell.sift.execution.physical.expressions.AddExpr 42 | import com.rchowell.sift.execution.physical.expressions.AndBinaryExpr 43 | import com.rchowell.sift.execution.physical.expressions.ColumnExpr 44 | import com.rchowell.sift.execution.physical.expressions.DivExpr 45 | import com.rchowell.sift.execution.physical.expressions.EqBinaryExpr 46 | import com.rchowell.sift.execution.physical.expressions.Expression 47 | import com.rchowell.sift.execution.physical.expressions.GtBinaryExpr 48 | import com.rchowell.sift.execution.physical.expressions.GteBinaryExpr 49 | import com.rchowell.sift.execution.physical.expressions.LiteralExpr 50 | import com.rchowell.sift.execution.physical.expressions.LtBinaryExpr 51 | import com.rchowell.sift.execution.physical.expressions.LteBinaryExpr 52 | import com.rchowell.sift.execution.physical.expressions.ModExpr 53 | import com.rchowell.sift.execution.physical.expressions.MulExpr 54 | import com.rchowell.sift.execution.physical.expressions.NeqBinaryExpr 55 | import com.rchowell.sift.execution.physical.expressions.OrBinaryExpr 56 | import com.rchowell.sift.execution.physical.expressions.PredicateBinaryExpr 57 | import com.rchowell.sift.execution.physical.expressions.SubExpr 58 | import com.rchowell.sift.execution.physical.sifterators.Aggregation 59 | import com.rchowell.sift.execution.physical.sifterators.Distinct 60 | import com.rchowell.sift.execution.physical.sifterators.Limit 61 | import com.rchowell.sift.execution.physical.sifterators.Projection 62 | import com.rchowell.sift.execution.physical.sifterators.Scan 63 | import com.rchowell.sift.execution.physical.sifterators.Selection 64 | import com.rchowell.sift.execution.physical.sifterators.Sifterator 65 | import com.rchowell.sift.types.Schema 66 | 67 | class Planner { 68 | 69 | companion object { 70 | 71 | /** 72 | * Constructs a [Sifterator] to execute the [LogicalTransform]. 73 | */ 74 | fun plan(transform: LogicalTransform): Sifterator = when (transform) { 75 | is LogicalAggregation -> { 76 | val input = transform.inputs().first() 77 | val inPlan = plan(input) 78 | val aggregations = transform.aggregations.values.map { agg -> 79 | val expr = expression(agg.input, input.schema) 80 | when (agg) { 81 | is LogicalMinExpr -> MinAccumulator(expr) 82 | is LogicalMaxExpr -> MaxAccumulator(expr) 83 | is LogicalSumExpr -> SumAccumulator(expr) 84 | is LogicalAvgExpr -> AvgAccumulator(expr) 85 | is LogicalCountExpr -> CountAccumulator(expr) 86 | } 87 | } 88 | val groups = transform.groups.map { id -> col(input.schema, id) } 89 | Aggregation(inPlan, aggregations, groups, transform.schema) 90 | } 91 | is LogicalProjection -> { 92 | val input = transform.inputs().first() 93 | val inPlan = plan(transform.inputs().first()) 94 | val projections = mutableMapOf() 95 | transform.projections.forEach { (identity, expr) -> 96 | val column = col(transform.schema, identity) 97 | projections[column] = expression(expr, input.schema) 98 | } 99 | Projection(inPlan, projections, transform.schema) 100 | } 101 | is LogicalScan -> Scan(transform.source, transform.identifiers) 102 | is LogicalSelection -> { 103 | val input = transform.inputs().first() 104 | val inPlan = plan(transform.inputs().first()) 105 | val predicate = predicate(transform.expr, input.schema) 106 | Selection(inPlan, predicate) 107 | } 108 | is LogicalDistinct -> { 109 | val input = transform.inputs().first() 110 | val inPlan = plan(transform.inputs().first()) 111 | val fieldIndexes = input.schema.fieldIndexes 112 | Distinct(inPlan, transform.fields()) 113 | } 114 | is LogicalLimit -> Limit( 115 | input = plan(transform.inputs().first()), 116 | limit = transform.n, 117 | ) 118 | is LogicalSort -> TODO() 119 | is LogicalJoin -> TODO() 120 | else -> invalid("plan", transform) 121 | } 122 | 123 | private fun expression(expr: LogicalExpr, schema: Schema): Expression = when (expr) { 124 | is LogicalIdentifierExpr -> ColumnExpr(schema.fieldIndexes[expr.identifier]!!) 125 | is LogicalLiteralExpr<*> -> LiteralExpr(expr.v) 126 | is LogicalBinaryExpr -> { 127 | val lhs = expression(expr.lhs, schema) 128 | val rhs = expression(expr.rhs, schema) 129 | when (expr) { 130 | is LogicalEqExpr -> EqBinaryExpr(lhs, rhs) 131 | is LogicalNeqExpr -> NeqBinaryExpr(lhs, rhs) 132 | is LogicalLtExpr -> LtBinaryExpr(lhs, rhs) 133 | is LogicalLteExpr -> LteBinaryExpr(lhs, rhs) 134 | is LogicalGtExpr -> GtBinaryExpr(lhs, rhs) 135 | is LogicalGteExpr -> GteBinaryExpr(lhs, rhs) 136 | is LogicalAndExpr -> AndBinaryExpr(lhs, rhs) 137 | is LogicalOrExpr -> OrBinaryExpr(lhs, rhs) 138 | is LogicalAddExpr -> AddExpr(lhs, rhs) 139 | is LogicalSubExpr -> SubExpr(lhs, rhs) 140 | is LogicalMulExpr -> MulExpr(lhs, rhs) 141 | is LogicalDivExpr -> DivExpr(lhs, rhs) 142 | is LogicalModExpr -> ModExpr(lhs, rhs) 143 | else -> invalid("binary expression", expr) 144 | } 145 | } 146 | else -> invalid("expression", expr) 147 | } 148 | 149 | private fun predicate(expr: LogicalExpr, schema: Schema): PredicateBinaryExpr { 150 | if (expr !is LogicalBinaryExpr) throw IllegalStateException() 151 | val lhs = expression(expr.lhs, schema) 152 | val rhs = expression(expr.rhs, schema) 153 | return when (expr) { 154 | is LogicalBooleanBinaryExpr -> when (expr.op) { 155 | BinaryOp.EQ -> EqBinaryExpr(lhs, rhs) 156 | BinaryOp.NEQ -> NeqBinaryExpr(lhs, rhs) 157 | BinaryOp.LT -> LtBinaryExpr(lhs, rhs) 158 | BinaryOp.LTE -> LteBinaryExpr(lhs, rhs) 159 | BinaryOp.GT -> GtBinaryExpr(lhs, rhs) 160 | BinaryOp.GTE -> GteBinaryExpr(lhs, rhs) 161 | BinaryOp.AND -> AndBinaryExpr(lhs, rhs) 162 | BinaryOp.OR -> OrBinaryExpr(lhs, rhs) 163 | else -> invalid("predicate", expr) 164 | } 165 | else -> invalid("predicate", expr) 166 | } 167 | } 168 | 169 | private fun invalid(expectedType: String, actualType: Any): Nothing = 170 | throw IllegalStateException("provided $actualType is not a valid $expectedType") 171 | 172 | private fun col(schema: Schema, id: LogicalIdentifierExpr): Int = 173 | schema.fieldIndexes[id.identifier] ?: invalid("field reference", id) 174 | } 175 | } 176 | -------------------------------------------------------------------------------- /src/main/kotlin/com/rchowell/sift/language/README.md: -------------------------------------------------------------------------------- 1 | # Sift Language 2 | 3 | ## V0 4 | 5 | See `docs/V0.md` 6 | 7 | ## V1 8 | 9 | In Progress 10 | -------------------------------------------------------------------------------- /src/main/kotlin/com/rchowell/sift/language/SiftLexer.kt: -------------------------------------------------------------------------------- 1 | package com.rchowell.sift.language 2 | 3 | /** 4 | * Tokenizes a [String] (ideally a Sift query) into a [Token] list. 5 | */ 6 | interface SiftLexer { 7 | fun tokenize(input: String): List> 8 | } 9 | -------------------------------------------------------------------------------- /src/main/kotlin/com/rchowell/sift/language/SiftParser.kt: -------------------------------------------------------------------------------- 1 | package com.rchowell.sift.language 2 | 3 | import com.rchowell.sift.execution.logical.LogicalTransform 4 | 5 | /** 6 | * SiftParser transforms a list of tokens into a query plan 7 | * 8 | * @constructor Create empty Sift parser 9 | */ 10 | interface SiftParser { 11 | 12 | fun parse(tokens: List>): LogicalTransform 13 | } 14 | -------------------------------------------------------------------------------- /src/main/kotlin/com/rchowell/sift/language/v0/README.md: -------------------------------------------------------------------------------- 1 | # Sift 2 | 3 | The purpose of the Sift language is to provide a super simple query language that maps near 1:1 to operators of the extended relational algebra discussed in section 5.2 of Garcia-Molina et. al. It is literally an inversion the query expression tree; with the inversion coming from using functional pipes '|>' rather than nested transformations. The point is to keep it simple, stupid and allows me to interact with the query engine at a lower level than the eventual SQL parser with pre-processor will allow. Much of the book uses typeset expressions of relational algebra, and I wanted to run these expressions without translating to SQL. I chose to use the F# (and Elixir) pipe operator to simplify writing nested transformations. Ligatures recommended. 4 | 5 | ## Grammar 6 | 7 | The following shorthand is used 8 | ``` 9 | (X)* # 0 or n of X 10 | (X)? # 0 or 1 of X 11 | (X)+ # 1 or n of X 12 | 13 | # Comma-separated lists of 14 | = (, )* 15 | ``` 16 | 17 | ```bash 18 | ::= [A-Za-z\-_]+ # operators, relation and field identifiers 19 | ::= '[A-Za-z0-9\s]+' 20 | ::= [0-9]+(.[0-9]+)? 21 | ::= (TRUE|FALSE|UNKOWN) 22 | ::= NULL 23 | ``` 24 | 25 | ### Query 26 | 27 | A query is an initial *relation producing* operation followed by several transformations. Each transformation is an operation from the extended relational algebra. Leaf nodes of the query expression tree must be relations. For sake of simplicity, Sift currently only supports binary joins, so the parent of any given leaf node has at least one and at most two children. 28 | 29 | ```bash 30 | ::= 31 | ``` 32 | 33 | ### Transformations 34 | 35 | ```bash 36 | ::= (|> )* 37 | ::= ::= SELECT 94 | ``` 95 | 96 | #### Projection 97 | ```bash 98 | ::= PROJECT 99 | ::= 100 | | -> 101 | ``` 102 | 103 | #### Group 104 | ```bash 105 | ::= GROUP (BY )? 106 | ::= -> 107 | ::= () 108 | ``` 109 | 110 | Examples 111 | ``` 112 | 'People' 113 | |> PROJECT height, age 114 | |> GROUP #AVG(height), #MIN(height), #MAX(height) BY age 115 | ``` 116 | 117 | ### Expressions 118 | 119 | ```bash 120 | ::= 121 | | 122 | | ( ) 123 | ::= # field reference 124 | | \#() # functions 125 | | 126 | ::= (|||) 127 | ``` 128 | 129 | ### Examples 130 | ```bash 131 | 'People' 132 | |> PROJECT name, age 133 | |> SELECT name ~ 'H%' && age > 20 134 | ``` 135 | 136 | ```bash 137 | 'Movies' 138 | |> SELECT year = 1979 && studioName = 'Paramount' 139 | |> PROJECT title 140 | ``` 141 | 142 | ### What's Missing? 143 | 144 | #### Join, Union, Intersect, Diff as a transformation 145 | 146 | ```bash 147 | # Not supported! 148 | 'A' 149 | |> PROJECT x * 2 -> xTwo 150 | |> JOIN 'B' on xTwo = foo # where's foo coming from without a projection? 151 | 152 | # Supported 153 | JOIN ('A' |> PROJECT x * 2 -> xTwo) 154 | ('B' |> PROJECT foo) 155 | ON xTwo = foo 156 | ``` 157 | -------------------------------------------------------------------------------- /src/main/kotlin/com/rchowell/sift/language/v0/Tokens.kt: -------------------------------------------------------------------------------- 1 | package com.rchowell.sift.language 2 | 3 | enum class TokenType { 4 | KEYWORD, 5 | IDENTIFIER, 6 | OPERATOR, 7 | LITERAL, 8 | PIPE, 9 | LEFT_PAREN, 10 | RIGHT_PAREN, 11 | MAPSTO, 12 | COMMA, 13 | EOF; 14 | } 15 | 16 | enum class KEYWORDS { 17 | SELECT, 18 | PROJECT, 19 | GROUP, 20 | SORT, 21 | LIMIT, 22 | DISTINCT, 23 | } 24 | 25 | enum class PRODUCTIONS { 26 | JOIN, 27 | CROSS, 28 | UNION, 29 | DIFF, 30 | INTERSECT, 31 | } 32 | 33 | class Token(val type: TokenType, val value: T? = null) { 34 | 35 | companion object { 36 | 37 | /** 38 | * Bag operators 39 | */ 40 | val PRODUCTIONS = setOf( 41 | "JOIN", 42 | "CROSS", 43 | "UNION", 44 | "DIFF", 45 | "INTERSECT", 46 | ) 47 | 48 | /** 49 | * K e y w o r d s 50 | */ 51 | val KEYWORDS = setOf( 52 | "SELECT", 53 | "PROJECT", 54 | "GROUP", 55 | "SORT", 56 | "LIMIT", 57 | "DISTINCT", 58 | "AS", 59 | "BY", 60 | "ON", 61 | "OUTER", 62 | "LEFT", 63 | "RIGHT", 64 | "ASC", 65 | "DESC", 66 | ).union(PRODUCTIONS) 67 | 68 | /** 69 | * O p e r a t o r s 70 | */ 71 | val OPERATORS = setOf( 72 | "=", "!=", "<", "<=", ">=", ">", "~", 73 | "||", "&&", "%", 74 | "+", "-", "*", "/", 75 | ) 76 | 77 | /** 78 | * S y m b o l s 79 | */ 80 | val SYMBOLS = mapOf( 81 | "|>" to TokenType.PIPE, 82 | "," to TokenType.COMMA, 83 | "(" to TokenType.LEFT_PAREN, 84 | ")" to TokenType.RIGHT_PAREN 85 | ) 86 | 87 | val BOOLEANS = setOf("TRUE", "FALSE", "UNKNOWN") 88 | } 89 | 90 | override fun toString(): String = "TOKEN($type, $value)" 91 | } 92 | 93 | /** 94 | * TokenList is a helper class for parsers to abstract processing the list of tokens from a Lexer 95 | * 96 | * @constructor Create empty Token list 97 | */ 98 | class TokenList(val tokens: List>) { 99 | 100 | var pointer = 0 101 | 102 | fun reset() { 103 | pointer = 0 104 | } 105 | 106 | /** 107 | * Returns the latest token without advancing the pointer 108 | */ 109 | fun peek(): Token<*> = tokens[pointer] 110 | 111 | /** 112 | * Returns the latest token and advances the pointer 113 | */ 114 | fun next(): Token<*> = tokens[pointer++] 115 | 116 | /** 117 | * Returns some context around the current token 118 | */ 119 | fun context(n: Int = 3): String = buildString { 120 | for (i in (pointer - n) until (pointer + n)) { 121 | if (i == pointer - 1) { 122 | append(">>") 123 | append(tokens[i].value) 124 | append("<< ") 125 | continue 126 | } 127 | if (i > 0 && i < tokens.size) { 128 | append(tokens[i].value) 129 | } 130 | append(" ") 131 | } 132 | } 133 | } 134 | -------------------------------------------------------------------------------- /src/main/kotlin/com/rchowell/sift/language/v0/antlr/SiftAntlrVisitor.kt: -------------------------------------------------------------------------------- 1 | package com.rchowell.sift.language.v0.antlr 2 | 3 | import com.rchowell.sift.execution.logical.expressions.BinaryOp 4 | import com.rchowell.sift.execution.logical.expressions.LogicalAggregateExpr 5 | import com.rchowell.sift.execution.logical.expressions.LogicalBinaryExpr 6 | import com.rchowell.sift.execution.logical.expressions.LogicalIdentifierExpr 7 | import com.rchowell.sift.execution.logical.expressions.LogicalLiteralExpr 8 | import com.rchowell.sift.execution.logical.transforms.LogicalAggregation 9 | import com.rchowell.sift.execution.logical.transforms.LogicalCross 10 | import com.rchowell.sift.execution.logical.transforms.LogicalDiff 11 | import com.rchowell.sift.execution.logical.transforms.LogicalDistinct 12 | import com.rchowell.sift.execution.logical.transforms.LogicalIntersect 13 | import com.rchowell.sift.execution.logical.transforms.LogicalLimit 14 | import com.rchowell.sift.execution.logical.transforms.LogicalProjection 15 | import com.rchowell.sift.execution.logical.transforms.LogicalScan 16 | import com.rchowell.sift.execution.logical.transforms.LogicalSelection 17 | import com.rchowell.sift.execution.logical.transforms.LogicalSort 18 | import com.rchowell.sift.execution.logical.transforms.LogicalUnion 19 | 20 | /** 21 | * The visitor builds a [LogicalPlan] starting from the provided [SiftVisitorBuildState] 22 | * 23 | * @property env 24 | * @property state optional initial state, useful for testing individual visits 25 | * @constructor Create empty Sift antlr visitor 26 | */ 27 | class SiftAntlrVisitor(private val state: SiftVisitorBuildState) : SiftBaseVisitor() { 28 | 29 | // ----------- 30 | // Relations 31 | // ----------- 32 | 33 | override fun visitRelId(ctx: SiftParser.RelIdContext) { 34 | val id = ctx.ID_QUOTED()?.text!!.trim('`') 35 | val source = state.source(id) 36 | state.push(LogicalScan(source, listOf())) 37 | } 38 | 39 | override fun visitRelBagOp(ctx: SiftParser.RelBagOpContext) { 40 | visit(ctx.relation(0)) // lhs 41 | val lhs = state.popQuery() 42 | visit(ctx.relation(1)) // rhs 43 | val rhs = state.popQuery() 44 | val transform = when (ctx.op.type) { 45 | SiftLexer.CROSS -> LogicalCross(lhs, rhs) 46 | SiftLexer.UNION -> LogicalUnion(lhs, rhs) 47 | SiftLexer.DIFF -> LogicalDiff(lhs, rhs) 48 | SiftLexer.INTERSECT -> LogicalIntersect(lhs, rhs) 49 | else -> throw IllegalStateException("unknown bag op ${ctx.op}") 50 | } 51 | state.push(transform) 52 | } 53 | 54 | // ------------ 55 | // Transforms 56 | // ------------ 57 | 58 | override fun visitSelect(ctx: SiftParser.SelectContext) { 59 | visit(ctx.expr()) 60 | val input = state.transform() 61 | val expr = state.expr() 62 | state.push(LogicalSelection(input, expr)) 63 | } 64 | 65 | override fun visitProject(ctx: SiftParser.ProjectContext) { 66 | ctx.func().forEach { visit(it) } 67 | val projections = state.projections() 68 | val input = state.transform() 69 | state.push(LogicalProjection(input, projections)) 70 | } 71 | 72 | override fun visitLimit(ctx: SiftParser.LimitContext) { 73 | val n = Integer.valueOf(ctx.INT()?.text!!) 74 | val input = state.transform() 75 | state.push(LogicalLimit(input, n)) 76 | } 77 | 78 | override fun visitDistinct(ctx: SiftParser.DistinctContext) { 79 | val input = state.transform() 80 | val fields = when (val ids = ctx.ids()) { 81 | null -> input.schema.fields.map { LogicalIdentifierExpr(it.identifier) } 82 | else -> ids.ID().map { LogicalIdentifierExpr(it.text) } 83 | } 84 | state.push(LogicalDistinct(input, fields)) 85 | } 86 | 87 | override fun visitSort(ctx: SiftParser.SortContext) { 88 | val input = state.transform() 89 | val fields = when (val ids = ctx.ids()) { 90 | null -> emptyList() 91 | else -> ids.ID().map { LogicalIdentifierExpr(it.text) } 92 | } 93 | val asc = when (val order = ctx.order) { 94 | null -> true 95 | else -> when (order.type) { 96 | SiftLexer.ASC -> true 97 | else -> false 98 | } 99 | } 100 | state.push(LogicalSort(input, fields, asc)) 101 | } 102 | 103 | override fun visitGroup(ctx: SiftParser.GroupContext) { 104 | ctx.agg().forEach { visit(it) } 105 | val input = state.transform() 106 | val aggs = state.aggregations() 107 | val groups = when (val ids = ctx.ids()) { 108 | null -> emptyList() 109 | else -> ids.ID().map { LogicalIdentifierExpr(it.text) } 110 | } 111 | state.push(LogicalAggregation(input, aggs, groups)) 112 | } 113 | 114 | // --------------------------- 115 | // Expressions and Functions 116 | // --------------------------- 117 | 118 | override fun visitIntLitExpr(ctx: SiftParser.IntLitExprContext) { 119 | val v = Integer.valueOf(ctx.INT()?.text!!) 120 | state.push(LogicalLiteralExpr(v)) 121 | } 122 | 123 | override fun visitStringLitExpr(ctx: SiftParser.StringLitExprContext) { 124 | val v = ctx.STRING()?.text!! 125 | state.push(LogicalLiteralExpr(v.trim('"'))) 126 | } 127 | 128 | override fun visitIdentExpr(ctx: SiftParser.IdentExprContext) { 129 | val id = ctx.ID()?.text!! 130 | val expr = LogicalIdentifierExpr(id) 131 | state.push(expr) 132 | } 133 | 134 | override fun visitBoolExpr(ctx: SiftParser.BoolExprContext) { 135 | visit(ctx.expr(0)) // lhs 136 | visit(ctx.expr(1)) // rhs 137 | val rhs = state.expr() 138 | val lhs = state.expr() 139 | val op = BinaryOp.get(ctx.op.text) 140 | state.push(LogicalBinaryExpr.get(op, lhs, rhs)) 141 | } 142 | 143 | override fun visitFuncExpr(ctx: SiftParser.FuncExprContext?) { 144 | // val id = ctx.ID()?.text!! 145 | 146 | super.visitFuncExpr(ctx) 147 | } 148 | 149 | override fun visitProjIdent(ctx: SiftParser.ProjIdentContext) { 150 | val id = ctx.ID()?.text!! 151 | val expr = LogicalIdentifierExpr(id) 152 | state.push(expr, expr) 153 | } 154 | 155 | override fun visitProjMap(ctx: SiftParser.ProjMapContext) { 156 | visit(ctx.expr()) 157 | val id = ctx.ID()?.text!! 158 | val alias = LogicalIdentifierExpr(id) 159 | val expr = state.expr() 160 | state.push(alias, expr) 161 | } 162 | 163 | override fun visitAgg(ctx: SiftParser.AggContext) { 164 | visit(ctx.expr()) 165 | val agg = ctx.op.text!! 166 | val expr = state.expr() 167 | val aggExpr = LogicalAggregateExpr.get(agg, expr) 168 | val id = ctx.ID() 169 | val alias = if (id != null) id.text else state.generateIdentifier() 170 | state.push(LogicalIdentifierExpr(alias), aggExpr) 171 | } 172 | } 173 | -------------------------------------------------------------------------------- /src/main/kotlin/com/rchowell/sift/language/v0/antlr/SiftCompiler.kt: -------------------------------------------------------------------------------- 1 | package com.rchowell.sift.language.v0.antlr 2 | 3 | import com.rchowell.sift.execution.Environment 4 | import com.rchowell.sift.execution.logical.LogicalTransform 5 | import org.antlr.v4.runtime.CharStreams 6 | import org.antlr.v4.runtime.CommonTokenStream 7 | import org.antlr.v4.runtime.Token 8 | import org.antlr.v4.runtime.tree.Tree 9 | import org.antlr.v4.runtime.tree.Trees 10 | import java.io.ByteArrayInputStream 11 | import java.util.function.Predicate 12 | import java.util.regex.Pattern 13 | 14 | class SiftCompiler(private val env: Environment) { 15 | 16 | fun compile(query: String): LogicalTransform { 17 | val tokens = lex(query) 18 | val parser = SiftParser(tokens) 19 | parser.addErrorListener(SiftErrorListener.INSTANCE) 20 | val tree = parser.query() 21 | val state = SiftVisitorBuildState(env) 22 | val visitor = SiftAntlrVisitor(state) 23 | visitor.visit(tree) 24 | return state.query() 25 | } 26 | 27 | fun describe(query: String, verbose: Boolean = false): QueryDescription { 28 | val tokens = lex(query) 29 | val parser = SiftParser(tokens) 30 | val tree = parser.query() 31 | if (verbose) { 32 | tokens.tokens.forEach { 33 | val type = SiftLexer.VOCABULARY.getDisplayName(it.type) 34 | println(String.format("%s: %s", type, it.text)) 35 | } 36 | println(tree.format(parser)) 37 | } 38 | val state = SiftVisitorBuildState(env) 39 | val visitor = SiftAntlrVisitor(state) 40 | visitor.visit(tree) 41 | if (verbose) println(state.query().pretty()) 42 | return QueryDescription( 43 | query, 44 | tokens.tokens, 45 | parser, 46 | tree, 47 | state.query() 48 | ) 49 | } 50 | 51 | data class QueryDescription( 52 | val query: String, 53 | val tokens: List, 54 | val parser: SiftParser, 55 | val ast: SiftParser.QueryContext, 56 | val plan: LogicalTransform, 57 | ) { 58 | override fun toString(): String = buildString { 59 | append("==== Query ====\n") 60 | append(query).append('\n') 61 | append("==== Tokens ====\n") 62 | tokens.forEach { 63 | val type = SiftLexer.VOCABULARY.getDisplayName(it.type) 64 | append(String.format("%s: %s\n", type, it.text)) 65 | } 66 | append("==== Tree ====\n") 67 | append(ast.format(parser)).append('\n') 68 | append("==== Plan ====\n") 69 | append(plan.pretty()) 70 | } 71 | } 72 | 73 | companion object { 74 | 75 | private val IDENTIFIER: Predicate = Pattern.compile("[a-zA-Z]+").asPredicate() 76 | 77 | fun lex(query: String): CommonTokenStream { 78 | val input = ByteArrayInputStream(query.toByteArray(Charsets.UTF_8)) 79 | val lexer = SiftLexer(CharStreams.fromStream(input)) 80 | return CommonTokenStream(lexer) 81 | } 82 | 83 | // https://github.com/trinodb/trino/blob/b7b515b8648f6f954a2aaa45b523247f5d85fa0f/core/trino-parser/src/main/java/io/trino/sql/ReservedIdentifiers.java#L130 84 | fun keywords(): Set { 85 | val keywords = mutableSetOf() 86 | val vocab = SiftLexer.VOCABULARY 87 | for (i in 0 until vocab.maxTokenType) { 88 | val name = vocab.getDisplayName(i) ?: "" 89 | if (IDENTIFIER.test(name)) { 90 | keywords.add(name.toUpperCase()) 91 | } 92 | } 93 | return keywords 94 | } 95 | } 96 | } 97 | 98 | fun Tree.format(parser: SiftParser, indent: Int = 0): String = buildString { 99 | val tree = this@format 100 | val prefix = " ".repeat(indent) 101 | append(prefix) 102 | append(Trees.getNodeText(tree, parser)) 103 | if (tree.childCount != 0) { 104 | append(" (\n") 105 | for (i in 0 until tree.childCount) { 106 | append(tree.getChild(i).format(parser, indent + 1)) 107 | append("\n") 108 | } 109 | append(prefix).append(")") 110 | } 111 | } 112 | -------------------------------------------------------------------------------- /src/main/kotlin/com/rchowell/sift/language/v0/antlr/SiftErrorListener.kt: -------------------------------------------------------------------------------- 1 | package com.rchowell.sift.language.v0.antlr 2 | 3 | import org.antlr.v4.runtime.BaseErrorListener 4 | import org.antlr.v4.runtime.RecognitionException 5 | import org.antlr.v4.runtime.Recognizer 6 | import org.antlr.v4.runtime.misc.ParseCancellationException 7 | 8 | /** 9 | * https://stackoverflow.com/questions/18132078/handling-errors-in-antlr4 10 | * 11 | * @constructor Create empty Sift error listener 12 | */ 13 | class SiftErrorListener : BaseErrorListener() { 14 | override fun syntaxError( 15 | recognizer: Recognizer<*, *>, 16 | offendingSymbol: Any?, 17 | line: Int, 18 | charPositionInLine: Int, 19 | msg: String, 20 | e: RecognitionException? 21 | ) { 22 | var sourceName: String = recognizer.inputStream.sourceName 23 | if (sourceName.isNotEmpty()) { 24 | sourceName = String.format("%s:%d:%d: ", sourceName, line, charPositionInLine) 25 | } 26 | val message = sourceName + "line " + line + ":" + charPositionInLine + " " + msg 27 | throw ParseCancellationException(message) 28 | } 29 | 30 | companion object { 31 | var INSTANCE = SiftErrorListener() 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /src/main/kotlin/com/rchowell/sift/language/v0/antlr/SiftVisitorBuildState.kt: -------------------------------------------------------------------------------- 1 | package com.rchowell.sift.language.v0.antlr 2 | 3 | import com.rchowell.sift.execution.Environment 4 | import com.rchowell.sift.execution.logical.LogicalExpr 5 | import com.rchowell.sift.execution.logical.LogicalTransform 6 | import com.rchowell.sift.execution.logical.expressions.LogicalAggregateExpr 7 | import com.rchowell.sift.execution.logical.expressions.LogicalIdentifierExpr 8 | import java.util.Stack 9 | 10 | /** 11 | * Visitor build state is used by the parser to generate the parsed 12 | * logical plan. This idea came from wanting to remove all query 13 | * building logic out of the visitor, as well as using this as the input 14 | * and output of the visitor for easy testing of individual visitor methods. 15 | * 16 | * @constructor Create empty Query builder 17 | */ 18 | class SiftVisitorBuildState(private val env: Environment) { 19 | 20 | private val transforms = Stack() 21 | private val exprs = Stack() 22 | 23 | // This won't work across scopes 24 | private val projections = mutableMapOf() 25 | private val aggregations = mutableMapOf() 26 | 27 | // Would be interesting to have a legitimate id generator 28 | private var n = 0 29 | 30 | /** 31 | * top-most transformation 32 | */ 33 | fun query(): LogicalTransform = transforms.peek() 34 | 35 | fun popQuery(): LogicalTransform = transforms.pop() 36 | 37 | fun source(identifier: String) = env.getSource(identifier) 38 | 39 | fun push(expr: LogicalExpr) { 40 | exprs.push(expr) 41 | } 42 | 43 | fun push(scan: LogicalTransform) { 44 | transforms.push(scan) 45 | } 46 | 47 | fun push(alias: LogicalIdentifierExpr, expr: LogicalExpr) { 48 | projections[alias] = expr 49 | } 50 | 51 | fun push(alias: LogicalIdentifierExpr, expr: LogicalAggregateExpr) { 52 | aggregations[alias] = expr 53 | } 54 | 55 | fun expr(): LogicalExpr = exprs.pop() 56 | 57 | fun transform(): LogicalTransform = transforms.pop() 58 | 59 | fun projections(): Map { 60 | val v = projections.mapValues { it.value } 61 | projections.clear() 62 | return v 63 | } 64 | 65 | fun aggregations(): Map { 66 | val v = aggregations.mapValues { it.value } 67 | aggregations.clear() 68 | return v 69 | } 70 | 71 | fun generateIdentifier() = "v_${n++}" 72 | } 73 | -------------------------------------------------------------------------------- /src/main/kotlin/com/rchowell/sift/language/v0/lexers/DirectCodedLexer.kt: -------------------------------------------------------------------------------- 1 | package com.rchowell.sift.language.v0.lexers 2 | 3 | import com.rchowell.sift.language.SiftLexer 4 | import com.rchowell.sift.language.Token 5 | import com.rchowell.sift.language.TokenType.COMMA 6 | import com.rchowell.sift.language.TokenType.EOF 7 | import com.rchowell.sift.language.TokenType.IDENTIFIER 8 | import com.rchowell.sift.language.TokenType.KEYWORD 9 | import com.rchowell.sift.language.TokenType.LEFT_PAREN 10 | import com.rchowell.sift.language.TokenType.LITERAL 11 | import com.rchowell.sift.language.TokenType.MAPSTO 12 | import com.rchowell.sift.language.TokenType.OPERATOR 13 | import com.rchowell.sift.language.TokenType.PIPE 14 | import com.rchowell.sift.language.TokenType.RIGHT_PAREN 15 | 16 | class InvalidTokenException(t: String) : Exception("invalid token $t") 17 | 18 | /** 19 | * Could be better 20 | */ 21 | class DirectCodedLexer : SiftLexer { 22 | 23 | private val idPattern = Regex("[A-Za-z\\-_]") 24 | 25 | operator fun Regex.contains(text: Char): Boolean = this.matches(text.toString()) 26 | 27 | /** Used to represent current type being scanned */ 28 | enum class State { 29 | INIT, 30 | ID, 31 | INT, 32 | FLOAT, 33 | OP, 34 | STRING, 35 | PIPE, 36 | } 37 | 38 | /** 39 | * Tokenize character by character 40 | */ 41 | override fun tokenize(input: String): List> { 42 | 43 | // I/O 44 | val terminatedInput = input + "\n" 45 | val chars = terminatedInput.toCharArray().iterator() 46 | val tokens = mutableListOf>() 47 | 48 | // Tracking 49 | val buffer = StringBuilder() 50 | var state = State.INIT 51 | 52 | fun reset() { 53 | buffer.clear() 54 | state = State.INIT 55 | } 56 | 57 | fun add(t: Token<*>) { 58 | tokens.add(t) 59 | reset() 60 | } 61 | 62 | fun invalid(message: String? = null) { 63 | throw InvalidTokenException(message ?: buffer.toString()) 64 | } 65 | 66 | while (chars.hasNext()) { 67 | val curr = chars.next() 68 | 69 | // check if we are in an accepting state 70 | if (curr.isWhitespace()) { 71 | when (state) { 72 | State.ID -> { 73 | val v = buffer.toString() 74 | if (v in Token.KEYWORDS) { 75 | add(Token(KEYWORD, v)) 76 | } else { 77 | when (v) { 78 | "TRUE" -> add(Token(LITERAL, true)) 79 | "FALSE" -> add(Token(LITERAL, false)) 80 | else -> add(Token(IDENTIFIER, v)) 81 | } 82 | } 83 | continue 84 | } 85 | State.INT -> { 86 | val v = buffer.toString().toInt(10) 87 | add(Token(LITERAL, v)) 88 | continue 89 | } 90 | State.FLOAT -> { 91 | val v = buffer.toString().toFloat() 92 | add(Token(LITERAL, v)) 93 | continue 94 | } 95 | State.OP -> { 96 | val v = buffer.toString() 97 | if (v == "->") { 98 | add(Token(MAPSTO, v)) 99 | } else { 100 | add(Token(OPERATOR, v)) 101 | } 102 | continue 103 | } 104 | State.INIT -> continue // skip whitespace 105 | else -> { 106 | } 107 | } 108 | } 109 | 110 | // process next character 111 | when (state) { 112 | State.INIT -> { 113 | if (curr.isWhitespace()) continue 114 | 115 | // simple comparisons 116 | state = when (curr) { 117 | '\'' -> { 118 | state = State.STRING 119 | continue 120 | } 121 | '(' -> { 122 | add(Token(LEFT_PAREN, "(")) 123 | continue 124 | } 125 | ')' -> { 126 | add(Token(RIGHT_PAREN, ")")) 127 | continue 128 | } 129 | ',' -> { 130 | add(Token(COMMA, ",")) 131 | continue 132 | } 133 | '|' -> State.PIPE 134 | '=' -> State.OP 135 | '!' -> State.OP 136 | '<' -> State.OP 137 | '>' -> State.OP 138 | '~' -> State.OP 139 | '&' -> State.OP 140 | '+' -> State.OP 141 | '-' -> State.OP 142 | '*' -> State.OP 143 | '/' -> State.OP 144 | '%' -> State.OP 145 | else -> state 146 | } 147 | 148 | // set difference shorthand 149 | if (curr == '\\') { 150 | if (chars.hasNext() && chars.next().isWhitespace()) { 151 | add(Token(KEYWORD, "DIFF")) 152 | continue 153 | } 154 | invalid() 155 | } 156 | 157 | // state/type has been determined; start building the buffer 158 | buffer.append(curr) 159 | 160 | // first 'when' group already matched something 161 | if (state != State.INIT) { 162 | continue 163 | } 164 | 165 | when { 166 | curr.isDigit() -> { 167 | state = State.INT 168 | } 169 | curr in idPattern -> { 170 | state = State.ID 171 | } 172 | else -> { 173 | invalid() 174 | } 175 | } 176 | } 177 | State.PIPE -> { 178 | when (curr) { 179 | '|' -> add(Token(OPERATOR, "||")) 180 | '>' -> add(Token(PIPE, "|>")) 181 | else -> invalid() 182 | } 183 | } 184 | State.ID -> { 185 | when (curr) { 186 | ',' -> { 187 | add(Token(IDENTIFIER, buffer.toString())) 188 | add(Token(COMMA, ",")) 189 | } 190 | '(' -> { 191 | add(Token(IDENTIFIER, buffer.toString())) 192 | add(Token(LEFT_PAREN, "(")) 193 | } 194 | ')' -> { 195 | when (val v = buffer.toString()) { 196 | "TRUE" -> add(Token(LITERAL, true)) 197 | "FALSE" -> add(Token(LITERAL, false)) 198 | else -> add(Token(IDENTIFIER, v)) 199 | } 200 | add(Token(RIGHT_PAREN, ")")) 201 | } 202 | else -> { 203 | if (curr !in idPattern) { 204 | invalid("combining $buffer with $curr") 205 | } 206 | buffer.append(curr) 207 | } 208 | } 209 | } 210 | State.INT -> { 211 | when { 212 | curr == '.' -> state = State.FLOAT 213 | curr == ')' -> { 214 | add(Token(LITERAL, buffer.toString().toInt(10))) 215 | add(Token(RIGHT_PAREN, ")")) 216 | } 217 | !curr.isDigit() -> invalid() 218 | } 219 | buffer.append(curr) 220 | } 221 | State.FLOAT -> { 222 | if (!curr.isDigit()) invalid() 223 | buffer.append(curr) 224 | } 225 | State.OP -> { 226 | // TODO 227 | buffer.append(curr) 228 | } 229 | State.STRING -> { 230 | if (curr == '\'' && buffer.last() != '\\') { 231 | add(Token(LITERAL, buffer.toString())) 232 | continue 233 | } 234 | // everything is allowed in string literals for now 235 | // this will likely cause problems 236 | buffer.append(curr) 237 | } 238 | } 239 | } 240 | tokens.add(Token(EOF)) 241 | return tokens 242 | } 243 | } 244 | -------------------------------------------------------------------------------- /src/main/kotlin/com/rchowell/sift/language/v1/REAMDE.md: -------------------------------------------------------------------------------- 1 | # Sift Language 2 | 3 | ## Examples 4 | 5 | ``` 6 | scan(mlb) 7 | |> select(age > 40) 8 | |> project(name, position, substr(team, 2) -> id) 9 | |> limit(25) 10 | ``` 11 | 12 | ``` 13 | scan(mlb) 14 | |> select(age > 40) 15 | |> group( 16 | avg(age), 17 | max(age) -> oldest 18 | ) by (v1, v2, ...) 19 | |> sort(fields, ...) 20 | ``` 21 | -------------------------------------------------------------------------------- /src/main/kotlin/com/rchowell/sift/language/v1/ast/Expr.kt: -------------------------------------------------------------------------------- 1 | package com.rchowell.sift.language.v1.ast 2 | 3 | abstract class Expr : Node() 4 | 5 | class BinaryExpr : Expr() 6 | -------------------------------------------------------------------------------- /src/main/kotlin/com/rchowell/sift/language/v1/ast/Node.kt: -------------------------------------------------------------------------------- 1 | package com.rchowell.sift.language.v1.ast 2 | 3 | abstract class Node 4 | -------------------------------------------------------------------------------- /src/main/kotlin/com/rchowell/sift/shell/Context.kt: -------------------------------------------------------------------------------- 1 | package com.rchowell.sift.shell 2 | 3 | import com.rchowell.sift.execution.Environment 4 | import com.rchowell.sift.source.Source 5 | 6 | /** 7 | * Holds shell context to share state between commands 8 | */ 9 | class Context( 10 | val env: Environment 11 | ) { 12 | 13 | var source: Source? = null 14 | 15 | var value: String? = null 16 | 17 | /** 18 | * Sets the context's [Source] 19 | */ 20 | fun useRelation(id: String) { 21 | source = env.getSource(id) 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /src/main/kotlin/com/rchowell/sift/shell/Main.kt: -------------------------------------------------------------------------------- 1 | package com.rchowell.sift.shell 2 | 3 | import com.google.inject.Guice 4 | import com.rchowell.sift.execution.Environment 5 | import com.rchowell.sift.shell.commands.SiftRootCommand 6 | import com.rchowell.sift.shell.kosh.Shell 7 | import com.rchowell.sift.source.CsvSource 8 | import com.rchowell.sift.types.Field 9 | import com.rchowell.sift.types.Schema 10 | import com.rchowell.sift.types.Type 11 | 12 | fun main(args: Array) { 13 | val source = CsvSource( 14 | identifier = "mlb", 15 | schema = Schema( 16 | listOf( 17 | Field("Name", Type.String), 18 | Field("Team", Type.String), 19 | Field("Position", Type.String), 20 | Field("Height", Type.Num), 21 | Field("Weight", Type.Num), 22 | Field("Age", Type.Num), 23 | ) 24 | ), 25 | path = "/Users/rch/Desktop/mlb_players.csv", 26 | header = true, 27 | ) 28 | val env = Environment( 29 | sources = listOf(source) 30 | ) 31 | val context = Context(env) 32 | val injector = Guice.createInjector(ShellModule(context)) 33 | val shell = Shell( 34 | prompt = "-> ", 35 | root = SiftRootCommand(context, injector), 36 | highlighter = SiftHighlighter(), 37 | runner = SiftRunner(env), 38 | ) 39 | shell.run() 40 | } 41 | -------------------------------------------------------------------------------- /src/main/kotlin/com/rchowell/sift/shell/ShellModule.kt: -------------------------------------------------------------------------------- 1 | package com.rchowell.sift.shell 2 | 3 | import com.google.inject.Binder 4 | import com.google.inject.Module 5 | 6 | class ShellModule(private val ctx: Context) : Module { 7 | 8 | override fun configure(binder: Binder) { 9 | binder.bind(Context::class.java).toInstance(ctx) 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /src/main/kotlin/com/rchowell/sift/shell/SiftHighlighter.kt: -------------------------------------------------------------------------------- 1 | package com.rchowell.sift.shell 2 | 3 | import com.rchowell.sift.language.v0.antlr.SiftCompiler 4 | import com.rchowell.sift.language.v0.antlr.SiftLexer 5 | import com.rchowell.sift.language.v0.antlr.SiftParser 6 | import org.antlr.v4.runtime.CommonTokenStream 7 | import org.jline.reader.Highlighter 8 | import org.jline.reader.LineReader 9 | import org.jline.utils.AttributedString 10 | import org.jline.utils.AttributedStringBuilder 11 | import org.jline.utils.AttributedStyle 12 | import org.jline.utils.AttributedStyle.BLUE 13 | import org.jline.utils.AttributedStyle.BOLD 14 | import org.jline.utils.AttributedStyle.CYAN 15 | import org.jline.utils.AttributedStyle.DEFAULT 16 | import org.jline.utils.AttributedStyle.GREEN 17 | import org.jline.utils.AttributedStyle.MAGENTA 18 | import org.jline.utils.AttributedStyle.RED 19 | import java.util.regex.Pattern 20 | 21 | class SiftHighlighter : Highlighter { 22 | 23 | private val keywords = SiftCompiler.keywords() 24 | 25 | override fun highlight(reader: LineReader?, buffer: String?): AttributedString { 26 | var tokens: CommonTokenStream? = null 27 | val builder = AttributedStringBuilder() 28 | var error = false 29 | try { 30 | tokens = SiftCompiler.lex(buffer ?: "") 31 | val parser = SiftParser(tokens) 32 | parser.removeErrorListeners() 33 | parser.query() // builds the token stream 34 | } catch (e: Exception) { 35 | error = true 36 | } 37 | if (tokens == null) { 38 | builder.styled(ERROR, buffer) 39 | return builder.toAttributedString() 40 | } 41 | tokens.tokens.forEach { 42 | when { 43 | (error || it.type == SiftLexer.UNRECOGNIZED) -> { 44 | error = true 45 | builder.styled(ERROR, it.text) 46 | } 47 | isKeyword(it.text) -> builder.styled(KEYWORD, it.text) 48 | it.type == SiftLexer.PIPE -> builder.styled(KEYWORD, it.text) 49 | it.type == SiftLexer.ID -> builder.styled(IDENTIFIER, it.text) 50 | it.type == SiftLexer.STRING -> builder.styled(STRING, it.text) 51 | it.type == SiftLexer.INT -> builder.styled(NUMBER, it.text) 52 | it.type == SiftLexer.EOF -> {} 53 | else -> builder.append(it.text) 54 | } 55 | } 56 | return builder.toAttributedString() 57 | } 58 | 59 | override fun setErrorPattern(errorPattern: Pattern?) { 60 | } 61 | 62 | override fun setErrorIndex(errorIndex: Int) { 63 | } 64 | 65 | private fun isKeyword(text: String): Boolean = keywords.contains(text.toUpperCase()) 66 | 67 | private fun isString(type: Int): Boolean = when (type) { 68 | SiftLexer.STRING -> true 69 | else -> false 70 | } 71 | 72 | private fun isNumber(type: Int): Boolean = when (type) { 73 | SiftLexer.INT -> true 74 | else -> false 75 | } 76 | 77 | private companion object Styles { 78 | private val KEYWORD: AttributedStyle = BOLD.foreground(MAGENTA) 79 | private val IDENTIFIER: AttributedStyle = DEFAULT.foreground(CYAN) 80 | private val STRING: AttributedStyle = DEFAULT.foreground(BLUE) 81 | private val NUMBER: AttributedStyle = DEFAULT.foreground(GREEN) 82 | private val ERROR: AttributedStyle = DEFAULT.foreground(RED) 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /src/main/kotlin/com/rchowell/sift/shell/SiftLineParser.kt: -------------------------------------------------------------------------------- 1 | package com.rchowell.sift.shell 2 | 3 | import org.jline.reader.EOFError 4 | import org.jline.reader.ParsedLine 5 | import org.jline.reader.Parser 6 | import org.jline.reader.Parser.ParseContext.ACCEPT_LINE 7 | import org.jline.reader.Parser.ParseContext.UNSPECIFIED 8 | import org.jline.reader.impl.DefaultParser 9 | 10 | class SiftLineParser : Parser { 11 | 12 | // Special commands 13 | var commands: Set = emptySet() 14 | 15 | private val default = DefaultParser() 16 | private val nonTerminal = setOf(ACCEPT_LINE, UNSPECIFIED) 17 | 18 | override fun parse(rawLine: String?, cursor: Int, context: Parser.ParseContext?): ParsedLine { 19 | val line = rawLine ?: "" 20 | val command = line.trim().split(" ").first() 21 | if (command.isEmpty() || commands.contains(command) || context == Parser.ParseContext.COMPLETE) { 22 | return default.parse(rawLine, cursor, context) 23 | } 24 | if (nonTerminal.contains(context) && !line.endsWith("\n")) { 25 | throw EOFError(-1, -1, null) 26 | } 27 | return DefaultParser().parse(rawLine, cursor, context) 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /src/main/kotlin/com/rchowell/sift/shell/SiftRunner.kt: -------------------------------------------------------------------------------- 1 | package com.rchowell.sift.shell 2 | 3 | import com.rchowell.sift.execution.Environment 4 | import com.rchowell.sift.execution.Executor 5 | import com.rchowell.sift.shell.kosh.Runner 6 | 7 | /** 8 | * Execute a query in the environment. 9 | * Very basic now. I really want vi style editing without 10 | * having to worry about escaping quotes. Still trying to 11 | * learn about JLine widgets + custom LineReader 12 | */ 13 | class SiftRunner(private val env: Environment) : Runner { 14 | 15 | override fun run(line: String) { 16 | try { 17 | Executor.sift(env, line) 18 | } catch (e: Exception) { 19 | println(e) 20 | } 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /src/main/kotlin/com/rchowell/sift/shell/commands/DebugGroup.kt: -------------------------------------------------------------------------------- 1 | package com.rchowell.sift.shell.commands 2 | 3 | import com.rchowell.sift.shell.Context 4 | import com.rchowell.sift.shell.kosh.CommandGroup 5 | import picocli.CommandLine 6 | import picocli.CommandLine.Command 7 | 8 | @Command( 9 | name = "debug", 10 | description = ["Collection of query compiler debug commands"], 11 | subcommands = [DescribeCommand::class], 12 | ) 13 | class DebugGroup : CommandGroup() { 14 | 15 | @CommandLine.ParentCommand 16 | lateinit var root: SiftRootCommand 17 | 18 | val context: Context 19 | get() = root.context 20 | } 21 | -------------------------------------------------------------------------------- /src/main/kotlin/com/rchowell/sift/shell/commands/DescribeCommand.kt: -------------------------------------------------------------------------------- 1 | package com.rchowell.sift.shell.commands 2 | 3 | import com.rchowell.sift.language.v0.antlr.SiftCompiler 4 | import picocli.CommandLine.Command 5 | import picocli.CommandLine.Parameters 6 | import picocli.CommandLine.ParentCommand 7 | 8 | /** 9 | * Describes the given query 10 | */ 11 | @Command(name = "describe") 12 | class DescribeCommand : Runnable { 13 | 14 | @ParentCommand 15 | lateinit var root: DebugGroup 16 | 17 | @Parameters(description = ["sift query"]) 18 | lateinit var query: String 19 | 20 | override fun run() { 21 | try { 22 | val compiler = SiftCompiler(root.context.env) 23 | val description = compiler.describe(query) 24 | println(description) 25 | } catch (e: Exception) { 26 | println(e) 27 | } 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /src/main/kotlin/com/rchowell/sift/shell/commands/ListCommand.kt: -------------------------------------------------------------------------------- 1 | package com.rchowell.sift.shell.commands 2 | 3 | import de.vandermeer.asciitable.AsciiTable 4 | import de.vandermeer.skb.interfaces.transformers.textformat.TextAlignment 5 | import picocli.CommandLine.Command 6 | import picocli.CommandLine.ParentCommand 7 | 8 | /** 9 | * Command to list relations in the environment 10 | */ 11 | @Command( 12 | name = "list", 13 | aliases = ["ls"], 14 | mixinStandardHelpOptions = true, 15 | ) 16 | class ListCommand : Runnable { 17 | 18 | @ParentCommand 19 | lateinit var root: SiftRootCommand 20 | 21 | override fun run() { 22 | try { 23 | root.context.env.sourceMap.values.forEach { source -> 24 | val table = AsciiTable() 25 | table.addRule() 26 | val r = table.addRow(null, source.identifier) 27 | r.setPadding(1) 28 | r.setTextAlignment(TextAlignment.CENTER) 29 | table.addRule() 30 | table.addRow("Field", "Type") 31 | table.addRule() 32 | source.schema.fields.forEach { field -> 33 | table.addRow(" ● ${field.identifier}", " ○ ${field.type}") 34 | table.addRule() 35 | } 36 | println(table.render()) 37 | println() 38 | } 39 | } catch (e: Exception) { 40 | println("unknown relation") 41 | println(e) 42 | } 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /src/main/kotlin/com/rchowell/sift/shell/commands/SetCommand.kt: -------------------------------------------------------------------------------- 1 | package com.rchowell.sift.shell.commands 2 | 3 | import com.rchowell.sift.shell.Context 4 | import picocli.CommandLine 5 | import javax.inject.Inject 6 | 7 | object SetCommand { 8 | 9 | class Params @Inject constructor( 10 | val ctx: Context, 11 | ) 12 | 13 | @CommandLine.Command( 14 | name = "set" 15 | ) 16 | class Command : Runnable { 17 | 18 | @CommandLine.ParentCommand 19 | lateinit var root: SiftRootCommand 20 | 21 | @CommandLine.Parameters( 22 | description = ["value to set"] 23 | ) 24 | lateinit var value: String 25 | 26 | override fun run() { 27 | val params = root.injector.getInstance(Params::class.java) 28 | println("Old value: ${params.ctx.value}") 29 | params.ctx.value = value 30 | println("New value: ${params.ctx.value}") 31 | } 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /src/main/kotlin/com/rchowell/sift/shell/commands/SiftRootCommand.kt: -------------------------------------------------------------------------------- 1 | package com.rchowell.sift.shell.commands 2 | 3 | import com.google.inject.Injector 4 | import com.rchowell.sift.shell.Context 5 | import com.rchowell.sift.shell.kosh.RootCommand 6 | import picocli.CommandLine.Command 7 | import picocli.CommandLine.HelpCommand 8 | import picocli.shell.jline3.PicocliCommands.ClearScreen 9 | 10 | /** 11 | * Top-level command for Sift shell. 12 | */ 13 | @Command( 14 | subcommands = [ 15 | ClearScreen::class, 16 | HelpCommand::class, 17 | ListCommand::class, 18 | DebugGroup::class, 19 | SetCommand.Command::class 20 | ] 21 | ) 22 | class SiftRootCommand( 23 | val context: Context, 24 | val injector: Injector, 25 | ) : RootCommand() 26 | -------------------------------------------------------------------------------- /src/main/kotlin/com/rchowell/sift/shell/kosh/CommandGroup.kt: -------------------------------------------------------------------------------- 1 | package com.rchowell.sift.shell.kosh 2 | 3 | import picocli.CommandLine 4 | 5 | /** 6 | * Command for grouping subcommands which simply prints usage 7 | */ 8 | abstract class CommandGroup : Runnable { 9 | override fun run() { 10 | println(CommandLine(this).usageMessage) 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /src/main/kotlin/com/rchowell/sift/shell/kosh/RootCommand.kt: -------------------------------------------------------------------------------- 1 | package com.rchowell.sift.shell.kosh 2 | 3 | import java.io.PrintWriter 4 | 5 | /** 6 | * Top-Level must extend root command. 7 | * This is used for exposing the PrintWriter to other commands 8 | * 9 | * @constructor Create empty Root command 10 | */ 11 | abstract class RootCommand : Runnable { 12 | lateinit var out: PrintWriter 13 | override fun run() { 14 | // no-op 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /src/main/kotlin/com/rchowell/sift/shell/kosh/Runner.kt: -------------------------------------------------------------------------------- 1 | package com.rchowell.sift.shell.kosh 2 | 3 | interface Runner { 4 | fun run(line: String) 5 | } 6 | -------------------------------------------------------------------------------- /src/main/kotlin/com/rchowell/sift/shell/kosh/Shell.kt: -------------------------------------------------------------------------------- 1 | package com.rchowell.sift.shell.kosh 2 | 3 | import com.rchowell.sift.shell.SiftLineParser 4 | import org.fusesource.jansi.AnsiConsole 5 | import org.jline.console.impl.Builtins 6 | import org.jline.console.impl.SystemRegistryImpl 7 | import org.jline.reader.Completer 8 | import org.jline.reader.EndOfFileException 9 | import org.jline.reader.Highlighter 10 | import org.jline.reader.LineReader 11 | import org.jline.reader.LineReaderBuilder 12 | import org.jline.reader.UserInterruptException 13 | import org.jline.reader.impl.DefaultHighlighter 14 | import org.jline.terminal.TerminalBuilder 15 | import picocli.CommandLine 16 | import picocli.shell.jline3.PicocliCommands 17 | import java.nio.file.Path 18 | import java.nio.file.Paths 19 | 20 | /** 21 | * https://github.com/remkop/picocli/tree/master/picocli-shell-jline3 22 | */ 23 | class Shell( 24 | private val prompt: String = "> ", 25 | private val root: RootCommand, 26 | private val runner: Runner, 27 | private val highlighter: Highlighter? = null, 28 | private val completer: Completer? = null, 29 | builtins: Set = setOf( 30 | Builtins.Command.COLORS, 31 | Builtins.Command.HISTORY, 32 | Builtins.Command.TTOP, 33 | ), 34 | ) { 35 | 36 | // TODO configurable working directory 37 | private val workDir: Path = Paths.get(System.getProperty("user.dir")) 38 | 39 | private val builtins = Builtins(builtins, workDir, null, null) 40 | 41 | private val parser = SiftLineParser() 42 | 43 | /** 44 | * Starts the shell and waits for commands 45 | */ 46 | fun run() { 47 | AnsiConsole.systemInstall() 48 | try { 49 | 50 | val commandFactory = PicocliCommands.PicocliCommandsFactory() 51 | // TODO use own factory to override the command grouping name which is the class name 52 | val commandsRegistry = PicocliCommands(CommandLine(root, commandFactory)) 53 | 54 | TerminalBuilder.builder().build().use { terminal -> 55 | 56 | // Add the builtin and user commands to the composite registry 57 | val commands = SystemRegistryImpl(parser, terminal, { workDir }, null) 58 | commands.setCommandRegistries(builtins, commandsRegistry) 59 | 60 | parser.commands = commands.commandNames() 61 | 62 | // Worth making configurable? 63 | val reader: LineReader = LineReaderBuilder.builder() 64 | .terminal(terminal) 65 | .completer(commands.completer()) 66 | .parser(parser) 67 | .highlighter(highlighter ?: DefaultHighlighter()) 68 | .variable(LineReader.LIST_MAX, 50) 69 | .build() 70 | builtins.setLineReader(reader) 71 | 72 | // Setup output of the root command 73 | root.out = reader.terminal.writer() 74 | commandFactory.setTerminal(terminal) 75 | 76 | // TODO figure out how I want to toggle the tailtip because I really like this feature 77 | // https://github.com/remkop/picocli/blob/master/picocli-shell-jline3/src/test/java/picocli/shell/jline3/example/Example.java#L167-L170 78 | 79 | // Main Shell Loop 80 | var line: String? 81 | while (true) { 82 | try { 83 | commands.cleanUp() 84 | line = reader.readLine(prompt) 85 | val command = line.split(" ").first() 86 | if (commands.commandNames().contains(command)) { 87 | commands.execute(line) 88 | } else { 89 | runner.run(line) 90 | } 91 | } catch (e: UserInterruptException) { 92 | // Ignore 93 | } catch (e: EndOfFileException) { 94 | return 95 | } catch (e: Exception) { 96 | commands.trace(e) 97 | } 98 | } 99 | } 100 | } catch (t: Throwable) { 101 | t.printStackTrace() 102 | } finally { 103 | AnsiConsole.systemUninstall() 104 | } 105 | } 106 | } 107 | -------------------------------------------------------------------------------- /src/main/kotlin/com/rchowell/sift/source/CsvSource.kt: -------------------------------------------------------------------------------- 1 | package com.rchowell.sift.source 2 | 3 | import com.opencsv.CSVReader 4 | import com.rchowell.sift.types.Batch 5 | import com.rchowell.sift.types.Batch.Companion.valueCount 6 | import com.rchowell.sift.types.Schema 7 | import org.apache.arrow.vector.BitVector 8 | import org.apache.arrow.vector.Float8Vector 9 | import org.apache.arrow.vector.ValueVector 10 | import org.apache.arrow.vector.VarCharVector 11 | import java.io.File 12 | import java.io.FileReader 13 | 14 | /** 15 | * CSV source readers the given file limiting batches to N records 16 | * 17 | * @property path 18 | * @property mapping 19 | * @constructor Create empty Csv source 20 | */ 21 | class CsvSource( 22 | override val identifier: String, 23 | override val schema: Schema, 24 | private val path: String, 25 | private val header: Boolean = false, 26 | private val batchSize: Int = 10_000, 27 | ) : Source { 28 | 29 | lateinit var reader: CSVReader 30 | 31 | override fun init() { 32 | val file = File(path) 33 | reader = CSVReader(FileReader(file)) 34 | } 35 | 36 | override fun close() { 37 | reader.close() 38 | } 39 | 40 | // TODO only scan specified identifiers 41 | override fun scan(identifiers: List): Sequence = sequence { 42 | var vecs: List = Batch.empty(schema, batchSize) 43 | var row = 0 44 | if (header) reader.skip(1) 45 | var record = reader.readNext() 46 | while (record != null) { 47 | vecs.forEachIndexed { col, v -> 48 | when (v) { 49 | is VarCharVector -> v[row] = record[col].toByteArray() 50 | is BitVector -> v[row] = if (record[col] == "true") 1 else 0 51 | is Float8Vector -> { 52 | try { 53 | v[row] = record[col].toDouble() 54 | } catch (e: Throwable) { 55 | // nulls? 56 | v[row] = -1.0 57 | } 58 | } 59 | } 60 | } 61 | row++ 62 | if (row == batchSize) { 63 | vecs.valueCount(row) 64 | yield(Batch.fromVectors(schema, vecs)) 65 | row = 0 66 | vecs = Batch.empty(schema, batchSize) 67 | } 68 | record = reader.readNext() 69 | } 70 | if (row != 0) { 71 | vecs.valueCount(row) 72 | yield(Batch.fromVectors(schema, vecs)) 73 | } 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /src/main/kotlin/com/rchowell/sift/source/EmptySource.kt: -------------------------------------------------------------------------------- 1 | package com.rchowell.sift.source 2 | 3 | import com.rchowell.sift.types.Batch 4 | import com.rchowell.sift.types.Schema 5 | 6 | class EmptySource( 7 | override val identifier: String, 8 | override val schema: Schema, 9 | ) : Source { 10 | 11 | override fun init() {} 12 | 13 | override fun close() {} 14 | 15 | override fun scan(identifiers: List): Sequence = emptySequence() 16 | 17 | override fun toString(): String = "EmptySource '$identifier'" 18 | } 19 | -------------------------------------------------------------------------------- /src/main/kotlin/com/rchowell/sift/source/MemSource.kt: -------------------------------------------------------------------------------- 1 | package com.rchowell.sift.source 2 | 3 | import com.rchowell.sift.types.Batch 4 | import com.rchowell.sift.types.Schema 5 | 6 | class MemSource( 7 | override val identifier: String, 8 | override val schema: Schema, 9 | private val data: List 10 | ) : Source { 11 | 12 | override fun init() {} 13 | 14 | override fun close() {} 15 | 16 | /** 17 | * Scan finds the column indices for the given field identifiers 18 | * and returns a sequence of [Batch] objects for the selected columns 19 | * 20 | * @param identifiers 21 | * @return 22 | */ 23 | override fun scan(identifiers: List): Sequence { 24 | val selectedSchema = schema.select(identifiers) 25 | return data.asSequence().map { batch -> 26 | Batch(selectedSchema, selectedSchema.fieldIndexes.values.map { i -> batch.columns[i] }) 27 | } 28 | } 29 | 30 | // TODO a nice Factory constructor 31 | } 32 | -------------------------------------------------------------------------------- /src/main/kotlin/com/rchowell/sift/source/Source.kt: -------------------------------------------------------------------------------- 1 | package com.rchowell.sift.source 2 | 3 | import com.rchowell.sift.types.Batch 4 | import com.rchowell.sift.types.Schema 5 | 6 | class InvalidSourceException(id: String) : Exception("Invalid source $id") 7 | 8 | /** 9 | * Provides an interface for loading structured data from various sources 10 | * 11 | * @constructor Create empty Source 12 | */ 13 | interface Source { 14 | 15 | /** 16 | * Identifier for this source 17 | */ 18 | val identifier: String 19 | 20 | /** 21 | * Schema for the source 22 | */ 23 | val schema: Schema 24 | 25 | /** 26 | * For any initialization work such as opening files or establishing connections 27 | */ 28 | fun init() 29 | 30 | /** 31 | * Resource cleanup 32 | */ 33 | fun close() 34 | 35 | /** 36 | * Given a list of identifiers, a [Source] will return a sequence of [Batch]s 37 | * 38 | * @param identifiers 39 | * @return 40 | */ 41 | fun scan(identifiers: List): Sequence 42 | } 43 | -------------------------------------------------------------------------------- /src/main/kotlin/com/rchowell/sift/types/Batch.kt: -------------------------------------------------------------------------------- 1 | package com.rchowell.sift.types 2 | 3 | import de.vandermeer.asciitable.AsciiTable 4 | import org.apache.arrow.vector.BitVector 5 | import org.apache.arrow.vector.Float8Vector 6 | import org.apache.arrow.vector.ValueVector 7 | import org.apache.arrow.vector.VarCharVector 8 | import java.util.PriorityQueue 9 | 10 | /** 11 | * A Batch holds many records from one or more columns 12 | * See KQuery RecordBatch 13 | * 14 | * @property schema has metadata for the columns 15 | * @property cols is the list of columns 16 | * @constructor Create empty Batch 17 | */ 18 | class Batch( 19 | val schema: Schema, 20 | val columns: List 21 | ) { 22 | /** 23 | * The number of records in the batch 24 | */ 25 | val records: Int 26 | get() = columns.first().size 27 | 28 | override fun toString(): String { 29 | val table = AsciiTable() 30 | table.addRule() 31 | 32 | val fields = schema.fields.map { it.identifier } 33 | table.addRow(fields) 34 | table.addRule() 35 | 36 | for (i in 0 until records) { 37 | val values = columns.map { 38 | when (val v = it[i]) { 39 | is ByteArray -> v.toString(Charsets.UTF_8) 40 | is Double -> "%.2f".format(v) 41 | else -> v.toString() 42 | } 43 | } 44 | table.addRow(values) 45 | table.addRule() 46 | } 47 | return table.render() 48 | } 49 | 50 | // Sorting with Columnar data isn't fun. I should have some row abstraction. 51 | // I'm going to try to create a heap which holds each row's current index 52 | // Then I will construct the output columns using the heap's order. 53 | // Should be O(n*log(n)) with another O(n) for writing the new results. 54 | 55 | /** 56 | * Creates a row comparator for this batch from the given fields. 57 | * Consider testing/tuning 58 | * 59 | * @param fields 60 | * @return 61 | */ 62 | fun comparator(fields: List): Comparator = Comparator { r1, r2 -> 63 | fields.forEach { 64 | val col = schema.fieldIndexes[it]!! 65 | val v1 = columns[col][r1] 66 | val v2 = columns[col][r2] 67 | val v = when (v1) { 68 | is ByteArray -> v1.toString(Charsets.UTF_8).compareTo((v2 as ByteArray).toString(Charsets.UTF_8)) 69 | is Double -> v1.compareTo(v2 as Double) 70 | is Int -> { 71 | // sort true before false? 72 | val v = v1 xor (v2 as Int) 73 | if (v1 == 1) -v else v 74 | } 75 | else -> 0 76 | } 77 | if (v != 0) return@Comparator v 78 | } 79 | 0 80 | } 81 | 82 | fun sort(fields: List): Batch { 83 | val heap = PriorityQueue(records, comparator(fields)) 84 | for (i in 0 until records) { 85 | heap.add(i) 86 | } 87 | val vectors = empty(schema, records) 88 | // r2d2? There's probably an Arrow RecordSet shuffle or maybe there should be? 89 | var r2 = 0 // (w)rite to 90 | var d2 = heap.poll() // data to write 91 | while (d2 != null) { 92 | columns.forEachIndexed { i, column -> 93 | when (column) { 94 | is StringColumn -> (vectors[i] as VarCharVector)[r2] = column[d2] 95 | is NumColumn -> (vectors[i] as Float8Vector)[r2] = column[d2] 96 | is BoolColumn -> (vectors[i] as BitVector)[r2] = column[d2] 97 | } 98 | } 99 | r2 += 1 100 | d2 = heap.poll() 101 | } 102 | vectors.valueCount(records) 103 | return fromVectors(schema, vectors) 104 | } 105 | 106 | companion object { 107 | 108 | /** 109 | * Constructs an empty [Batch] from the given schema 110 | */ 111 | fun empty(schema: Schema, capacity: Int): List { 112 | val vectors = mutableListOf() 113 | schema.fields.map { 114 | when (it.type) { 115 | Type.Bool -> vectors.add(Column.VectorFactory.boolean(capacity)) 116 | Type.Num -> vectors.add(Column.VectorFactory.numeric(capacity)) 117 | Type.String -> vectors.add(Column.VectorFactory.string(capacity)) 118 | } 119 | } 120 | return vectors 121 | } 122 | 123 | fun List.valueCount(values: Int) { 124 | this.forEach { it.valueCount = values } 125 | } 126 | 127 | fun fromVectors(schema: Schema, vectors: List): Batch { 128 | val columns = vectors.map { 129 | when (it) { 130 | is BitVector -> BoolVectorColumn(it) 131 | is VarCharVector -> StringVectorColumn(it) 132 | is Float8Vector -> NumVectorColumn(it) 133 | else -> throw IllegalStateException("unsupported vector type $it") 134 | } 135 | } 136 | return Batch(schema, columns) 137 | } 138 | } 139 | } 140 | 141 | operator fun ValueVector.set(row: Int, value: Any?) { 142 | when (this) { 143 | is BitVector -> this[row] = value as Int 144 | is Float8Vector -> this[row] = value as Double 145 | is VarCharVector -> this[row] = value as ByteArray 146 | } 147 | } 148 | -------------------------------------------------------------------------------- /src/main/kotlin/com/rchowell/sift/types/Column.kt: -------------------------------------------------------------------------------- 1 | package com.rchowell.sift.types 2 | 3 | import org.apache.arrow.memory.RootAllocator 4 | import org.apache.arrow.vector.BitVector 5 | import org.apache.arrow.vector.FieldVector 6 | import org.apache.arrow.vector.Float8Vector 7 | import org.apache.arrow.vector.VarCharVector 8 | import kotlin.math.max 9 | 10 | /** 11 | * Wrap of an Arrow vector 12 | * 13 | * @property vector 14 | * @constructor Create empty Arrow column 15 | */ 16 | sealed class Column { 17 | 18 | abstract operator fun get(row: Int): Any? 19 | 20 | abstract fun filter(mask: BoolColumn): Column 21 | 22 | abstract val size: Int 23 | 24 | /** 25 | * Factory for concise [Column] creation 26 | */ 27 | object Factory { 28 | 29 | fun boolean(values: List): BoolColumn = boolean(values.size, values) 30 | 31 | fun boolean(capacity: Int = 0, values: List): BoolColumn = 32 | BoolVectorColumn(VectorFactory.boolean(capacity, values)) 33 | 34 | fun string(values: List): StringColumn = string(values.size, values) 35 | 36 | fun string(capacity: Int = 0, values: List): StringColumn = 37 | StringVectorColumn(VectorFactory.string(capacity, values)) 38 | 39 | fun numeric(values: List): NumColumn = numeric(values.size, values) 40 | 41 | fun numeric(capacity: Int = 0, values: List): NumColumn = 42 | NumVectorColumn(VectorFactory.numeric(capacity, values)) 43 | } 44 | 45 | /** 46 | * Factory for concise [FieldVector] creation 47 | */ 48 | object VectorFactory { 49 | 50 | fun boolean(values: List): BitVector = boolean(values.size, values) 51 | 52 | fun boolean(capacity: Int = 0, values: List = listOf()): BitVector { 53 | val vector = BitVector("v", RootAllocator(Long.MAX_VALUE)) 54 | vector.setInitialCapacity(max(capacity, values.size)) 55 | vector.allocateNew() 56 | for (i in values.indices) { 57 | vector[i] = if (values[i]) 1 else 0 58 | } 59 | vector.valueCount = values.size 60 | return vector 61 | } 62 | 63 | fun numeric(values: List): Float8Vector = numeric(values.size, values) 64 | 65 | fun numeric(capacity: Int = 0, values: List = listOf()): Float8Vector { 66 | val vector = Float8Vector("v", RootAllocator(Long.MAX_VALUE)) 67 | vector.setInitialCapacity(max(capacity, values.size)) 68 | vector.allocateNew() 69 | for (i in values.indices) { 70 | vector[i] = values[i] 71 | } 72 | vector.valueCount = values.size 73 | return vector 74 | } 75 | 76 | fun string(values: List): VarCharVector = string(values.size, values) 77 | 78 | fun string(capacity: Int = 0, values: List = listOf()): VarCharVector { 79 | val vector = VarCharVector("v", RootAllocator(Long.MAX_VALUE)) 80 | // vector.setInitialCapacity(max(capacity, values.size)) 81 | vector.allocateNew() 82 | for (i in values.indices) { 83 | vector[i] = values[i].toByteArray() // UTF-8 84 | } 85 | vector.valueCount = values.size 86 | return vector 87 | } 88 | } 89 | } 90 | 91 | abstract class BoolColumn : Column() { 92 | 93 | abstract override fun get(row: Int): Int 94 | } 95 | 96 | class BoolVectorColumn(val vector: BitVector) : BoolColumn() { 97 | 98 | override fun get(row: Int): Int = vector[row] 99 | 100 | override val size: Int 101 | get() = vector.valueCount 102 | 103 | override fun filter(mask: BoolColumn): BoolColumn { 104 | var vals = 0 105 | val dupe = VectorFactory.boolean(vector.valueCapacity) 106 | for (i in 0 until vector.valueCount) { 107 | if (mask[i] == 1) dupe[vals++] = vector[i] 108 | } 109 | dupe.valueCount = vals 110 | return BoolVectorColumn(dupe) 111 | } 112 | 113 | override fun toString(): String = vector.toString() 114 | } 115 | 116 | class BoolLiteralColumn(val value: Boolean, override val size: Int) : BoolColumn() { 117 | 118 | override fun get(row: Int): Int = if (value) 1 else 0 119 | 120 | override fun filter(mask: BoolColumn): Column { 121 | TODO("Not yet implemented") 122 | } 123 | } 124 | 125 | abstract class NumColumn : Column() { 126 | 127 | abstract override fun get(row: Int): Double 128 | } 129 | 130 | class NumVectorColumn(val vector: Float8Vector) : NumColumn() { 131 | 132 | override val size: Int 133 | get() = vector.valueCount 134 | 135 | override fun get(row: Int): Double = vector[row] 136 | 137 | override fun filter(mask: BoolColumn): NumColumn { 138 | var vals = 0 139 | val dupe = VectorFactory.numeric(vector.valueCapacity) 140 | for (i in 0 until vector.valueCount) { 141 | if (mask[i] == 1) dupe[vals++] = vector[i] 142 | } 143 | dupe.valueCount = vals 144 | return NumVectorColumn(dupe) 145 | } 146 | 147 | override fun toString(): String = vector.toString() 148 | } 149 | 150 | class NumLiteralColumn(val value: Double, override val size: Int) : NumColumn() { 151 | 152 | override fun get(row: Int): Double = value 153 | 154 | override fun filter(mask: BoolColumn): Column { 155 | TODO("Not yet implemented") 156 | } 157 | } 158 | 159 | abstract class StringColumn() : Column() { 160 | 161 | abstract override fun get(row: Int): ByteArray 162 | } 163 | 164 | class StringVectorColumn(val vector: VarCharVector) : StringColumn() { 165 | 166 | override val size: Int 167 | get() = vector.valueCount 168 | 169 | override fun get(row: Int): ByteArray = vector.get(row) 170 | 171 | override fun filter(mask: BoolColumn): StringColumn { 172 | var vals = 0 173 | val dupe = VectorFactory.string(vector.valueCapacity) 174 | for (i in 0 until vector.valueCount) { 175 | if (mask[i] == 1) dupe[vals++] = vector[i] 176 | } 177 | dupe.valueCount = vals 178 | return StringVectorColumn(dupe) 179 | } 180 | 181 | override fun toString(): String = vector.toString() 182 | } 183 | 184 | class StringLiteralColumn(val value: ByteArray, override val size: Int) : StringColumn() { 185 | 186 | override fun get(row: Int): ByteArray = value 187 | 188 | override fun filter(mask: BoolColumn): Column { 189 | TODO("Not yet implemented") 190 | } 191 | } 192 | -------------------------------------------------------------------------------- /src/main/kotlin/com/rchowell/sift/types/Field.kt: -------------------------------------------------------------------------------- 1 | package com.rchowell.sift.types 2 | 3 | /** 4 | * Field simply holds identifiers and values 5 | * 6 | * @property identifier 7 | * @property type 8 | * @constructor Create empty Field 9 | */ 10 | data class Field(val identifier: String, val type: Type) { 11 | 12 | /** 13 | * The Arrow representation of the field 14 | */ 15 | val arrow: org.apache.arrow.vector.types.pojo.Field 16 | get() = org.apache.arrow.vector.types.pojo.Field( 17 | identifier, 18 | org.apache.arrow.vector.types.pojo.FieldType(true, type.arrow, null), 19 | listOf() 20 | ) 21 | 22 | override fun toString(): String = String.format("%s::%s", identifier, type) 23 | } 24 | -------------------------------------------------------------------------------- /src/main/kotlin/com/rchowell/sift/types/Schema.kt: -------------------------------------------------------------------------------- 1 | package com.rchowell.sift.types 2 | 3 | /** 4 | * Holding object for identifier to [Type] mappings 5 | * 6 | * Something to keep in mind will be maintaing orderings 7 | * 8 | * @property fields 9 | * @property relation is the relation identifier 10 | * @constructor Create empty Schema 11 | */ 12 | data class Schema(val fields: List, val relation: String = "") { 13 | 14 | /** 15 | * Arrow representation of this Schema 16 | */ 17 | val arrow: org.apache.arrow.vector.types.pojo.Schema 18 | get() = org.apache.arrow.vector.types.pojo.Schema(fields.map { it.arrow }) 19 | 20 | /** 21 | * Maps identifiers to their index in the [Field] list 22 | */ 23 | val fieldIndexes = fields.foldIndexed(mutableMapOf()) { i, m, f -> 24 | m[f.identifier] = i 25 | m 26 | } 27 | 28 | /** 29 | * Maps identifiers to their [Field] 30 | */ 31 | private val fieldMap = fields.fold(mutableMapOf()) { m, f -> 32 | m[f.identifier] = f 33 | m 34 | } 35 | 36 | /** 37 | * Constructs a new schema given field indices 38 | * 39 | * @param indices 40 | * @return 41 | */ 42 | fun project(indices: List): Schema { 43 | return Schema(indices.map { fields[it] }, relation) 44 | } 45 | 46 | /** 47 | * Select returns a new schema for the given columns. 48 | * If the set of identifiers is empty, then return all columns ie `SELECT *` 49 | * 50 | * @param identifiers 51 | * @return 52 | */ 53 | fun select(identifiers: List): Schema = if (identifiers.isEmpty()) { 54 | this 55 | } else { 56 | Schema(identifiers.filter { it in fieldMap }.map { fieldMap[it]!! }) 57 | } 58 | 59 | /** 60 | * Finds the [Field] based on the identifier, or throws 61 | * 62 | * @param identifier 63 | * @return 64 | */ 65 | fun find(identifier: String): Field = fieldMap[identifier] ?: throw Exception("unknown identifier `$identifier`") 66 | 67 | /** 68 | * Returns true if this is a subset of the other 69 | * 70 | * @param other 71 | * @return 72 | */ 73 | fun subsetOf(other: Schema): Boolean { 74 | val set = this.fields.toMutableSet() 75 | other.fields.forEach { set.remove(it) } 76 | return set.isEmpty() 77 | } 78 | 79 | // Helper method though? 80 | fun combine(other: Schema, relation: String = ""): Schema { 81 | val combined = mutableListOf() 82 | combined.addAll(resolvedFields(this)) 83 | combined.addAll(resolvedFields(other)) 84 | return Schema(combined, relation) 85 | } 86 | 87 | companion object { 88 | 89 | // Renames all fields in this Schema to use the fully resolved name 90 | fun resolvedFields(schema: Schema): List = schema.fields.map { 91 | Field("${schema.relation}.${it.identifier}", it.type) 92 | } 93 | 94 | // Returns a combined schema with just the common fields 95 | fun common(lhs: Schema, rhs: Schema, relation: String = ""): Schema { 96 | val lfs = lhs.fields.toSet() 97 | val set = mutableSetOf() 98 | rhs.fields.forEach { if (lfs.contains(it)) set.add(it) } 99 | return Schema(set.toList(), relation) 100 | } 101 | } 102 | } 103 | -------------------------------------------------------------------------------- /src/main/kotlin/com/rchowell/sift/types/Type.kt: -------------------------------------------------------------------------------- 1 | package com.rchowell.sift.types 2 | 3 | import org.apache.arrow.vector.types.FloatingPointPrecision 4 | import org.apache.arrow.vector.types.pojo.ArrowType 5 | 6 | /** 7 | * Minimal type definitions 8 | * 9 | * @constructor 10 | */ 11 | enum class Type { 12 | Bool { 13 | override val arrow: ArrowType = ArrowType.Bool() 14 | }, 15 | Num { 16 | override val arrow: ArrowType = ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE) 17 | }, 18 | String { 19 | override val arrow: ArrowType = ArrowType.Utf8() 20 | }; 21 | 22 | abstract val arrow: ArrowType 23 | } 24 | -------------------------------------------------------------------------------- /src/test/kotlin/com/rchowell/sift/execution/ExecutorTest.kt: -------------------------------------------------------------------------------- 1 | package com.rchowell.sift.execution 2 | 3 | import com.rchowell.sift.shell.data.pets 4 | import org.junit.jupiter.api.Test 5 | 6 | internal class ExecutorTest { 7 | 8 | @Test 9 | fun sift() { 10 | val env = Environment() 11 | env.registerSource(pets) 12 | 13 | // Execute the query 14 | Executor.sift( 15 | env, 16 | """ 17 | `Pets` 18 | |> group MAX(Weight) by Gender, Type 19 | """.trimIndent() 20 | ) 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /src/test/kotlin/com/rchowell/sift/execution/physical/aggregations/KeyTest.kt: -------------------------------------------------------------------------------- 1 | package com.rchowell.sift.execution.physical.aggregations 2 | 3 | import org.junit.jupiter.api.Test 4 | import kotlin.test.assertFalse 5 | import kotlin.test.assertTrue 6 | 7 | internal class KeyTest { 8 | 9 | @Test 10 | fun testEquals() { 11 | val key1 = Key(listOf(1.0, 1, "Dog".toByteArray())) 12 | val key2 = Key(listOf(1.0, 1, "Dog".toByteArray())) 13 | val key3 = Key(listOf(1.0, 1, "Cat".toByteArray())) 14 | val key4 = Key(listOf(1.0, 0, "Dog".toByteArray())) 15 | assertTrue { key1 == key2 } 16 | assertFalse { key1 == key3 } 17 | assertFalse { key1 == key4 } 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /src/test/kotlin/com/rchowell/sift/execution/physical/expressions/PhysicalAddExprTest.kt: -------------------------------------------------------------------------------- 1 | package com.rchowell.sift.execution.physical.expressions 2 | 3 | import org.junit.jupiter.api.Test 4 | import com.rchowell.sift.types.Batch 5 | import com.rchowell.sift.types.Column 6 | import com.rchowell.sift.types.Field 7 | import com.rchowell.sift.types.NumVectorColumn 8 | import com.rchowell.sift.types.Schema 9 | import com.rchowell.sift.types.Type 10 | 11 | internal class PhysicalAddExprTest { 12 | 13 | @Test 14 | fun eval() { 15 | val schema = Schema(listOf( 16 | Field("xs", Type.Num), 17 | Field("ys", Type.Num), 18 | )) 19 | val xs = Column.VectorFactory.numeric(3) 20 | xs[0] = 1.0 21 | xs[1] = 2.0 22 | xs[2] = 3.0 23 | xs.valueCount = 3 24 | val ys = Column.VectorFactory.numeric(3) 25 | ys[0] = 4.0 26 | ys[1] = 5.0 27 | ys[2] = 6.0 28 | ys.valueCount = 3 29 | val batch = Batch(schema, listOf(NumVectorColumn(xs), NumVectorColumn(ys))) 30 | val lhs = ColumnExpr(0) 31 | val rhs = ColumnExpr(1) 32 | val expr = AddExpr(lhs, rhs) 33 | val res = expr.eval(batch) 34 | println(res) 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /src/test/kotlin/com/rchowell/sift/execution/physical/sifterators/AggregationTest.kt: -------------------------------------------------------------------------------- 1 | package com.rchowell.sift.execution.physical.sifterators 2 | 3 | import com.rchowell.sift.execution.physical.aggregations.AvgAccumulator 4 | import com.rchowell.sift.execution.physical.aggregations.CountAccumulator 5 | import com.rchowell.sift.execution.physical.aggregations.MaxAccumulator 6 | import com.rchowell.sift.execution.physical.aggregations.MinAccumulator 7 | import com.rchowell.sift.execution.physical.aggregations.SumAccumulator 8 | import com.rchowell.sift.execution.physical.expressions.ColumnExpr 9 | import com.rchowell.sift.source.MemSource 10 | import com.rchowell.sift.types.Batch 11 | import com.rchowell.sift.types.Column 12 | import com.rchowell.sift.types.Field 13 | import com.rchowell.sift.types.NumVectorColumn 14 | import com.rchowell.sift.types.Schema 15 | import com.rchowell.sift.types.Type 16 | import org.junit.jupiter.api.Test 17 | 18 | class AggregationTest { 19 | 20 | @Test 21 | fun aggregates() { 22 | val n = 1..100 23 | val schema = Schema(listOf(Field("xs", Type.Num))) 24 | val xs = Column.VectorFactory.numeric(n.map { it.toDouble() }) 25 | val source = MemSource( 26 | identifier = "Foo", 27 | schema = Schema(listOf(Field("x", Type.Num))), 28 | data = listOf(Batch(schema, listOf(NumVectorColumn(xs)))) 29 | ) 30 | val aggregation = Aggregation( 31 | input = Scan(source, listOf("x")), 32 | aggregations = listOf( 33 | SumAccumulator(ColumnExpr(0)), 34 | MinAccumulator(ColumnExpr(0)), 35 | MaxAccumulator(ColumnExpr(0)), 36 | CountAccumulator(ColumnExpr(0)), 37 | AvgAccumulator(ColumnExpr(0)), 38 | ), 39 | groups = listOf(), 40 | schema = Schema( 41 | listOf( 42 | Field("sum", Type.Num), 43 | Field("min", Type.Num), 44 | Field("max", Type.Num), 45 | Field("count", Type.Num), 46 | Field("avg", Type.Num), 47 | ) 48 | ) 49 | ) 50 | aggregation.open() 51 | val batch = aggregation.next() 52 | println(batch) 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /src/test/kotlin/com/rchowell/sift/execution/physical/sifterators/DistinctTest.kt: -------------------------------------------------------------------------------- 1 | package com.rchowell.sift.execution.physical.sifterators 2 | 3 | import com.rchowell.sift.source.MemSource 4 | import com.rchowell.sift.types.Batch 5 | import com.rchowell.sift.types.Column 6 | import com.rchowell.sift.types.Field 7 | import com.rchowell.sift.types.Schema 8 | import com.rchowell.sift.types.Type 9 | import org.junit.jupiter.api.Test 10 | 11 | class DistinctTest { 12 | 13 | @Test 14 | internal fun basic() { 15 | val schema = Schema( 16 | listOf( 17 | Field("string", Type.String), 18 | Field("num", Type.Num), 19 | Field("bool", Type.Bool), 20 | ) 21 | ) 22 | val scan = Scan( 23 | source = MemSource( 24 | "Foo", 25 | schema = schema, 26 | data = listOf( 27 | Batch( 28 | schema = schema, 29 | columns = listOf( 30 | Column.Factory.string(listOf("a", "a", "a", "a", "b", "b")), 31 | Column.Factory.numeric(listOf(1.0, 1.0, 2.0, 2.0, 1.0, 1.0)), 32 | Column.Factory.boolean( 33 | listOf( 34 | true, 35 | true, 36 | true, 37 | false, 38 | false, 39 | false, 40 | ) 41 | ), 42 | ), 43 | ), 44 | ), 45 | ), 46 | fields = listOf("string", "num", "bool") 47 | ) 48 | val op = Distinct(scan, listOf(0)) 49 | op.open() 50 | var batch = op.next() 51 | while (batch != null) { 52 | println(batch) 53 | batch = op.next() 54 | } 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /src/test/kotlin/com/rchowell/sift/execution/physical/sifterators/LimitTest.kt: -------------------------------------------------------------------------------- 1 | package com.rchowell.sift.execution.physical.sifterators 2 | 3 | import org.junit.jupiter.api.Test 4 | import com.rchowell.sift.source.MemSource 5 | import com.rchowell.sift.types.Batch 6 | import com.rchowell.sift.types.Column 7 | import com.rchowell.sift.types.Field 8 | import com.rchowell.sift.types.Schema 9 | import com.rchowell.sift.types.Type 10 | 11 | internal class LimitTest { 12 | 13 | @Test 14 | internal fun basic() { 15 | val schema = Schema( 16 | listOf( 17 | Field("string", Type.String), 18 | Field("num", Type.Num), 19 | Field("bool", Type.Bool), 20 | ) 21 | ) 22 | val scan = Scan( 23 | source = MemSource( 24 | "Foo", 25 | schema = schema, 26 | data = listOf( 27 | Batch( 28 | schema = schema, 29 | columns = listOf( 30 | Column.Factory.string(listOf("a", "b", "c", "d", "e", "f", "g", "h", "i", "j")), 31 | Column.Factory.numeric(listOf(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0)), 32 | Column.Factory.boolean( 33 | listOf( 34 | true, 35 | true, 36 | true, 37 | true, 38 | true, 39 | false, 40 | false, 41 | false, 42 | false, 43 | false 44 | ) 45 | ), 46 | ), 47 | ), 48 | ), 49 | ), 50 | fields = listOf("string", "num", "bool") 51 | ) 52 | val limit = 7 53 | val limitSifterator = Limit(scan, limit) 54 | limitSifterator.open() 55 | var batch = limitSifterator.next() 56 | while (batch != null) { 57 | println(batch) 58 | batch = limitSifterator.next() 59 | } 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /src/test/kotlin/com/rchowell/sift/execution/physical/sifterators/ProjectionTest.kt: -------------------------------------------------------------------------------- 1 | package com.rchowell.sift.execution.physical.sifterators 2 | 3 | import org.junit.jupiter.api.Test 4 | import com.rchowell.sift.execution.physical.expressions.AddExpr 5 | import com.rchowell.sift.execution.physical.expressions.ColumnExpr 6 | import com.rchowell.sift.execution.physical.expressions.MulExpr 7 | import com.rchowell.sift.source.MemSource 8 | import com.rchowell.sift.types.Batch 9 | import com.rchowell.sift.types.Column 10 | import com.rchowell.sift.types.Field 11 | import com.rchowell.sift.types.NumVectorColumn 12 | import com.rchowell.sift.types.Schema 13 | import com.rchowell.sift.types.Type 14 | 15 | class ProjectionTest { 16 | 17 | @Test 18 | fun foo() { 19 | val xs = Column.VectorFactory.numeric(listOf(1.0, 2.0, 3.0)) 20 | val ys = Column.VectorFactory.numeric(listOf(2.0, 4.0, 6.0)) 21 | val zs = Column.VectorFactory.numeric(listOf(3.0, 5.0, 9.0)) 22 | val schema = Schema( 23 | listOf( 24 | Field("x", Type.Num), 25 | Field("y", Type.Num), 26 | Field("z", Type.Num), 27 | ) 28 | ) 29 | val source = MemSource( 30 | identifier = "Foo", 31 | schema = schema, 32 | data = listOf( 33 | Batch( 34 | schema, 35 | listOf( 36 | NumVectorColumn(xs), 37 | NumVectorColumn(ys), 38 | NumVectorColumn(zs) 39 | ) 40 | ) 41 | ) 42 | ) 43 | val projection = Projection( 44 | projections = mapOf( 45 | 0 to AddExpr( 46 | ColumnExpr(0), 47 | ColumnExpr(1), 48 | ), 49 | 1 to MulExpr( 50 | ColumnExpr(2), 51 | AddExpr( 52 | ColumnExpr(0), 53 | ColumnExpr(1), 54 | ), 55 | ), 56 | ), 57 | input = Scan(source, listOf("x", "y", "z")), 58 | schema = Schema( 59 | listOf( 60 | Field("x", Type.Num), 61 | ) 62 | ) 63 | ) 64 | projection.open() 65 | val batch = projection.next() 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /src/test/kotlin/com/rchowell/sift/execution/physical/sifterators/ScanTest.kt: -------------------------------------------------------------------------------- 1 | package com.rchowell.sift.execution.physical.sifterators 2 | 3 | import com.rchowell.sift.source.MemSource 4 | import com.rchowell.sift.types.Batch 5 | import com.rchowell.sift.types.BoolVectorColumn 6 | import com.rchowell.sift.types.Field 7 | import com.rchowell.sift.types.Schema 8 | import com.rchowell.sift.types.Type 9 | import org.apache.arrow.memory.RootAllocator 10 | import org.apache.arrow.vector.BitVector 11 | import org.junit.jupiter.api.Test 12 | 13 | internal class ScanTest { 14 | 15 | @Test 16 | fun simple() { 17 | val allocator = RootAllocator(Long.MAX_VALUE) 18 | val bv = BitVector("", allocator) 19 | val schema = Schema( 20 | listOf( 21 | Field( 22 | "foo", 23 | type = Type.Bool, 24 | ) 25 | ) 26 | ) 27 | bv.allocateNew(3) 28 | bv[0] = 1 29 | bv[1] = 1 30 | bv[2] = 0 31 | bv.valueCount = 3 32 | val source = MemSource( 33 | identifier = "Foo", 34 | schema = schema, 35 | data = listOf( 36 | Batch( 37 | schema, 38 | listOf( 39 | BoolVectorColumn(bv) 40 | ) 41 | ) 42 | ) 43 | ) 44 | val physicalScan = Scan(source, listOf("foo")) 45 | physicalScan.open() 46 | val b = physicalScan.next() 47 | println(b) 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /src/test/kotlin/com/rchowell/sift/execution/physical/sifterators/SelectionTest.kt: -------------------------------------------------------------------------------- 1 | package com.rchowell.sift.execution.physical.sifterators 2 | 3 | import org.junit.jupiter.api.Test 4 | import com.rchowell.sift.execution.physical.expressions.ColumnExpr 5 | import com.rchowell.sift.execution.physical.expressions.GtBinaryExpr 6 | import com.rchowell.sift.execution.physical.expressions.GteBinaryExpr 7 | import com.rchowell.sift.execution.physical.expressions.LiteralExpr 8 | import com.rchowell.sift.execution.physical.expressions.MulExpr 9 | import com.rchowell.sift.source.MemSource 10 | import com.rchowell.sift.types.Batch 11 | import com.rchowell.sift.types.Column 12 | import com.rchowell.sift.types.Field 13 | import com.rchowell.sift.types.NumVectorColumn 14 | import com.rchowell.sift.types.Schema 15 | import com.rchowell.sift.types.Type 16 | 17 | internal class SelectionTest { 18 | 19 | @Test 20 | operator fun next() { 21 | 22 | val schema = Schema( 23 | listOf( 24 | Field("a", Type.Num), 25 | Field("b", Type.Num), 26 | ) 27 | ) 28 | val source = MemSource( 29 | identifier = "Foo", 30 | schema = schema, 31 | data = listOf( 32 | Batch( 33 | schema, 34 | listOf( 35 | NumVectorColumn(Column.VectorFactory.numeric(listOf(1.0, 2.0, 3.0, 4.0, 8.0, 1.0))), 36 | NumVectorColumn(Column.VectorFactory.numeric(listOf(4.0, 3.0, 2.0, 1.0, 3.0, 2.0))), 37 | ) 38 | ) 39 | ) 40 | ) 41 | 42 | val sifter = Selection( 43 | input = Projection( 44 | input = Selection( 45 | input = Scan(source, listOf("a", "b")), 46 | GteBinaryExpr(ColumnExpr(0), ColumnExpr(1)), 47 | ), 48 | projections = mapOf( 49 | 0 to MulExpr(ColumnExpr(0), ColumnExpr(1)), 50 | 1 to MulExpr(ColumnExpr(0), ColumnExpr(1)) // TODO fix how Selection changes output schema 51 | ), 52 | schema = schema, 53 | ), 54 | predicateBinary = GtBinaryExpr( 55 | lhs = ColumnExpr(0), 56 | rhs = LiteralExpr(5) 57 | ), 58 | ) 59 | 60 | // run it 61 | sifter.open() 62 | val batch = sifter.next() 63 | println(batch) 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /src/test/kotlin/com/rchowell/sift/execution/planner/PlannerTest.kt: -------------------------------------------------------------------------------- 1 | package com.rchowell.sift.execution.planner 2 | 3 | import org.junit.jupiter.api.Test 4 | import com.rchowell.sift.execution.Environment 5 | import com.rchowell.sift.language.v0.lexers.DirectCodedLexer 6 | import com.rchowell.sift.language.v0.parsers.rd.RecursiveDescentParser 7 | import com.rchowell.sift.source.EmptySource 8 | import com.rchowell.sift.types.Field 9 | import com.rchowell.sift.types.Schema 10 | import com.rchowell.sift.types.Type 11 | 12 | internal class PlannerTest { 13 | 14 | @Test 15 | fun plan() { 16 | val src = EmptySource( 17 | "Families", 18 | Schema( 19 | listOf( 20 | Field("firstName", Type.String), 21 | Field("lastName", Type.String), 22 | Field("gender", Type.String), 23 | Field("age", Type.Num) 24 | ) 25 | ) 26 | ) 27 | val env = Environment() 28 | env.registerSource(src) 29 | 30 | val lexer = DirectCodedLexer() 31 | val parser = RecursiveDescentParser(env) 32 | val query = """ 33 | 'Families' |> SELECT gender = 'Male' |> PROJECT age / 10 -> decades 34 | """.trimIndent() 35 | val tokens = lexer.tokenize(query) 36 | val plan = parser.parse(tokens) 37 | val physicalPlan = Planner.plan(plan) 38 | println(physicalPlan) 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /src/test/kotlin/com/rchowell/sift/language/v0/antlr/SiftAntlrTest.kt: -------------------------------------------------------------------------------- 1 | package com.rchowell.sift.language.v0.antlr 2 | 3 | import com.rchowell.sift.execution.Environment 4 | import com.rchowell.sift.source.EmptySource 5 | import com.rchowell.sift.types.Field 6 | import com.rchowell.sift.types.Schema 7 | import com.rchowell.sift.types.Type 8 | import org.junit.jupiter.api.Test 9 | 10 | class SiftAntlrTest { 11 | 12 | private val dogs = EmptySource( 13 | identifier = "Dogs", 14 | schema = Schema( 15 | listOf( 16 | Field("age", Type.Num), 17 | Field("name", Type.String), 18 | ) 19 | ) 20 | ) 21 | private val cats = EmptySource( 22 | identifier = "Cats", 23 | schema = Schema( 24 | listOf( 25 | Field("age", Type.Num), 26 | Field("name", Type.String), 27 | ) 28 | ) 29 | ) 30 | private val env = Environment(listOf(dogs, cats)) 31 | private val compiler = SiftCompiler(env) 32 | 33 | @Test 34 | fun test() { 35 | val source = EmptySource( 36 | identifier = "Test", 37 | schema = Schema( 38 | listOf( 39 | Field("a", Type.Num), 40 | Field("b", Type.String), 41 | Field("c", Type.Bool), 42 | ) 43 | ) 44 | ) 45 | val env = Environment(listOf(source)) 46 | val query = """ 47 | `Test` 48 | |> SELECT a > 100 && b = "foo" 49 | |> DISTINCT 50 | |> PROJECT a, b, c > 50 -> Old 51 | |> GROUP SUM(a), AVG(b) -> x 52 | |> SORT x DESC 53 | |> LIMIT 50 54 | """.trimIndent() 55 | val compiler = SiftCompiler(env) 56 | compiler.describe(query, verbose = true) 57 | } 58 | 59 | @Test 60 | fun cross() { 61 | val query = """ 62 | (`Dogs` X `Cats`) |> SELECT Age > 2 63 | """.trimIndent() 64 | compiler.describe(query, verbose = true) 65 | } 66 | 67 | @Test 68 | fun union() { 69 | val query = """ 70 | (`Dogs` U `Cats`) |> SELECT Age > 2 71 | """.trimIndent() 72 | compiler.describe(query, verbose = true) 73 | } 74 | 75 | @Test 76 | fun diff() { 77 | val query = """ 78 | (`Dogs` - `Cats`) |> SELECT Age > 2 79 | """.trimIndent() 80 | compiler.describe(query, verbose = true) 81 | } 82 | 83 | @Test 84 | fun intersect() { 85 | val query = """ 86 | (`Dogs` & `Cats`) |> SELECT Age > 2 87 | """.trimIndent() 88 | compiler.describe(query, verbose = true) 89 | } 90 | 91 | @Test 92 | fun stringEq() { 93 | val query = """ 94 | `Cats` |> SELECT name = "Ramona Howell-Collins" 95 | """.trimIndent() 96 | compiler.describe(query, verbose = true) 97 | } 98 | 99 | @Test 100 | fun lexer() { 101 | val tokens = SiftCompiler.lex("`Cats` |> SELECT Age > 0") 102 | val parser = SiftParser(tokens) 103 | val tree = parser.query() 104 | println("--------------") 105 | tokens.tokens.forEach { 106 | println(it) 107 | } 108 | } 109 | 110 | @Test 111 | fun keywords() { 112 | val keywords = SiftCompiler.keywords() 113 | keywords.forEach { println(it) } 114 | } 115 | } 116 | -------------------------------------------------------------------------------- /src/test/kotlin/com/rchowell/sift/language/v0/lexers/DirectCodedLexerTest.kt: -------------------------------------------------------------------------------- 1 | package com.rchowell.sift.language.v0.lexers 2 | 3 | internal class DirectCodedLexerTest { 4 | 5 | @org.junit.jupiter.api.Test 6 | fun tokenizeMostLanguageConstructs() { 7 | val lexer = DirectCodedLexer() 8 | val query = """ 9 | ('A' X 'B') U ('C' & 'D') 10 | |> PROJECT x, y, z 11 | |> SELECT ((x > y) && (z < y)) || (x == z) 12 | |> PROJECT x / 10 -> x, y * 2 -> yeven, z + 3 -> zeee 13 | |> GROUP SUM(x), AVG(y), MIN(z) BY yeven, zeee 14 | """.trimIndent() 15 | val tokens = lexer.tokenize(query) 16 | for (t in tokens) { 17 | println(t) 18 | } 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /src/test/kotlin/com/rchowell/sift/language/v0/parsers/rd/RecursiveDescentParserTest.kt: -------------------------------------------------------------------------------- 1 | package com.rchowell.sift.language.v0.parsers.rd 2 | 3 | import com.rchowell.sift.execution.Environment 4 | import com.rchowell.sift.language.v0.lexers.DirectCodedLexer 5 | import com.rchowell.sift.source.EmptySource 6 | import com.rchowell.sift.types.Field 7 | import com.rchowell.sift.types.Schema 8 | import com.rchowell.sift.types.Type 9 | 10 | internal class RecursiveDescentParserTest { 11 | 12 | @org.junit.jupiter.api.Test 13 | fun simple() { 14 | val src = EmptySource( 15 | "Families", 16 | Schema( 17 | listOf( 18 | Field("firstName", Type.String), 19 | Field("lastName", Type.String), 20 | Field("gender", Type.String), 21 | Field("age", Type.Num) 22 | ) 23 | ) 24 | ) 25 | val env = Environment() 26 | env.registerSource(src) 27 | val lexer = DirectCodedLexer() 28 | val parser = RecursiveDescentParser(env) 29 | val query = """ 30 | 'Families' 31 | |> SELECT (gender = 'Male') && (age > 3) 32 | |> PROJECT age, height / 12 -> feet, height % 12 -> inches 33 | |> GROUP MAX(age) -> OldestAtHeight BY feet 34 | |> LIMIT 3 35 | """.trimIndent() 36 | val tokens = lexer.tokenize(query) 37 | val plan = parser.parse(tokens) 38 | println(plan.pretty()) 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /src/test/kotlin/com/rchowell/sift/source/CsvSourceTest.kt: -------------------------------------------------------------------------------- 1 | package com.rchowell.sift.source 2 | 3 | import com.rchowell.sift.types.Field 4 | import com.rchowell.sift.types.Schema 5 | import com.rchowell.sift.types.Type 6 | import org.junit.jupiter.api.Test 7 | 8 | internal class CsvSourceTest { 9 | 10 | @Test 11 | internal fun printAll() { 12 | val path = "/Users/rch/Desktop/mlb_players.csv" 13 | val source = CsvSource( 14 | identifier = "mlb", 15 | schema = Schema( 16 | listOf( 17 | Field("Name", Type.String), 18 | Field("Team", Type.String), 19 | Field("Position", Type.String), 20 | Field("Height", Type.Num), 21 | Field("Weight", Type.Num), 22 | Field("Age", Type.Num), 23 | ) 24 | ), 25 | path = path, 26 | header = true, 27 | ) 28 | source.init() 29 | source.scan(listOf()).iterator().forEach { 30 | println(it) 31 | } 32 | source.close() 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /src/test/kotlin/com/rchowell/sift/types/BatchTest.kt: -------------------------------------------------------------------------------- 1 | package com.rchowell.sift.types 2 | 3 | import org.junit.jupiter.api.Test 4 | 5 | // Not really looking forward to the day of writing real tests 6 | internal class BatchTest { 7 | 8 | private val batch = Batch( 9 | schema = Schema( 10 | listOf( 11 | Field("a", Type.Num), 12 | Field("b", Type.String), 13 | Field("c", Type.Bool), 14 | ) 15 | ), 16 | columns = listOf( 17 | Column.Factory.numeric(listOf(3.0, 2.0, 1.0)), 18 | Column.Factory.string(listOf("abc", "xyz", "abc")), 19 | Column.Factory.boolean(listOf(true, true, false)), 20 | ) 21 | ) 22 | 23 | @Test 24 | internal fun singleFieldComparator() { 25 | val a = batch.comparator(listOf("a")) 26 | assert(a.compare(0, 1) > 0) // 3.0 > 2.0 27 | assert(a.compare(0, 2) > 0) // 3.0 > 1.0 28 | assert(a.compare(1, 2) > 0) // 2.0 > 1.0 29 | 30 | val b = batch.comparator(listOf("b")) 31 | assert(b.compare(0, 1) < 0) // abc sorts before xzy 32 | assert(b.compare(0, 2) == 0) 33 | assert(b.compare(1, 2) > 0) // xzy sorts after abc 34 | 35 | val c = batch.comparator(listOf("c")) 36 | assert(c.compare(0, 1) == 0) 37 | assert(c.compare(0, 2) < 0) // true before false 38 | assert(c.compare(1, 2) < 0) // true before false 39 | } 40 | 41 | @Test 42 | internal fun multiFieldComparison() { 43 | val comp = batch.comparator(listOf("b", "a")) 44 | assert(comp.compare(0, 1) < 0) // abc sorts before xyz 45 | assert(comp.compare(0, 2) > 0) // abc = abc, 3 > 1 46 | assert(comp.compare(1, 2) > 0) // xyz sorts after abc 47 | } 48 | 49 | @Test 50 | internal fun singleFieldSort() { 51 | val sorted = batch.sort(listOf("a")) 52 | println(sorted) 53 | } 54 | 55 | @Test 56 | internal fun multiFieldSort() { 57 | val sorted = batch.sort(listOf("a", "b")) 58 | println(sorted) 59 | val sorted2 = batch.sort(listOf("b", "a")) 60 | println(sorted2) 61 | val sorted3 = batch.sort(listOf("c", "b")) 62 | println(sorted3) 63 | } 64 | } 65 | --------------------------------------------------------------------------------