├── .gitignore
├── CMakeLists.txt
├── Makefile
├── MushroomCloud
├── Manifest.txt
├── MushroomCloud.iml
├── README.md
├── lib
│ ├── expression.jar
│ ├── java-json.jar
│ └── jsqlparser.jar
└── src
│ └── edu
│ └── buffalo
│ └── cse562
│ ├── LeafValueComparator.java
│ ├── Main.java
│ ├── ParseTreeEvaluator.java
│ ├── ParseTreeGenerator.java
│ ├── ParseTreeOptimizer.java
│ ├── exceptions
│ └── UnsupportedStatementException.java
│ ├── operators
│ ├── CrossProductOperator.java
│ ├── DistinctOperator.java
│ ├── ExternalSortOperator.java
│ ├── GraceHashJoinOperator.java
│ ├── LimitOperator.java
│ ├── Operator.java
│ ├── OrderByOperator.java
│ ├── ProjectScanOperator.java
│ ├── ProjectionGroupByAggregateOperator.java
│ ├── ProjectionOperator.java
│ ├── ScanOperator.java
│ ├── SelectionOperator.java
│ ├── SortMergeJoinOperator.java
│ └── UnionOperator.java
│ └── schema
│ ├── ColumnInfo.java
│ ├── ColumnWithType.java
│ └── Schema.java
├── README.md
├── Shell.py
├── Valkyrie
├── CMakeLists.txt
├── Makefile
├── include
│ ├── Codegen.h
│ ├── DataTypes.h
│ ├── Expression.h
│ ├── ExpressionParser.h
│ ├── JoinOperator.h
│ ├── Operator.h
│ ├── Parser.h
│ ├── PrintOperator.h
│ ├── ProjectionOperator.h
│ ├── ScanOperator.h
│ ├── Schema.h
│ ├── SelectOperator.h
│ └── rapidjson
│ │ ├── allocators.h
│ │ ├── document.h
│ │ ├── encodedstream.h
│ │ ├── encodings.h
│ │ ├── error
│ │ ├── en.h
│ │ └── error.h
│ │ ├── filereadstream.h
│ │ ├── filewritestream.h
│ │ ├── internal
│ │ ├── biginteger.h
│ │ ├── diyfp.h
│ │ ├── dtoa.h
│ │ ├── ieee754.h
│ │ ├── itoa.h
│ │ ├── meta.h
│ │ ├── pow10.h
│ │ ├── stack.h
│ │ ├── strfunc.h
│ │ ├── strtod.h
│ │ └── swap.h
│ │ ├── memorybuffer.h
│ │ ├── memorystream.h
│ │ ├── msinttypes
│ │ ├── inttypes.h
│ │ └── stdint.h
│ │ ├── pointer.h
│ │ ├── prettywriter.h
│ │ ├── rapidjson.h
│ │ ├── reader.h
│ │ ├── stringbuffer.h
│ │ └── writer.h
└── src
│ ├── Codegen.cpp
│ ├── Expression.cpp
│ ├── ExpressionParser.cpp
│ ├── JoinOperator.cpp
│ ├── Main.cpp
│ ├── Operator.cpp
│ ├── Parser.cpp
│ ├── PrintOperator.cpp
│ ├── ProjectionOperator.cpp
│ ├── ScanOperator.cpp
│ ├── Schema.cpp
│ └── SelectOperator.cpp
├── ValkyrieDB.sh
├── allexps.sh
├── exp.sh
└── test
├── cprogs
├── q1
├── q1.c
├── q2.c
├── q3.c
└── q4.cpp
├── sql
├── ddl-schema.sql
├── test.sql
├── testcases
│ ├── 1.sql
│ ├── 2.sql
│ ├── 3.sql
│ ├── 4.sql
│ └── 5.sql
├── tpch1.sql
├── tpch10.sql
├── tpch12.sql
├── tpch3.sql
├── tpch5.sql
├── tpch6.sql
└── tpch_schemas.sql
└── tpch-data-setup.py
/.gitignore:
--------------------------------------------------------------------------------
1 | /bin/
2 | MushroomCloud/bin/
3 | Valkyrie/build/
4 |
5 | .idea/
6 |
7 | perf.data
8 | perf.data.old
9 |
10 | *.tbl
11 | *.out
12 |
13 | test/tpch.db
14 | test/tpch-dbgen
--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | cmake_minimum_required(VERSION 2.8)
2 | project(ValkyrieDB)
3 |
4 | add_subdirectory(Valkyrie)
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | INC_DIR = ./Valkyrie/include
2 | SRC_DIR = ./Valkyrie/src
3 | OBJ_DIR = ./Valkyrie/object
4 |
5 | SRC_FILES = $(wildcard $(SRC_DIR)/*.cpp)
6 | OBJ_FILES = $(SRC_FILES:.cpp=.o)
7 | OBJ_PATH = $(patsubst $(SRC_DIR)/%,$(OBJ_DIR)/%,$(OBJ_FILES))
8 |
9 | LIBS=
10 | CC=clang++-3.5
11 | CFLAGS=-rdynamic -g -O3 -m64 `llvm-config-3.5 --cxxflags --ldflags --system-libs --libs core mcjit native bitwriter`
12 |
13 | all: init mushroomcloud valkyrie
14 |
15 | init:
16 | mkdir -p ./bin
17 | mkdir -p MushroomCloud/bin
18 |
19 | mushroomcloud:
20 | javac -d MushroomCloud/bin/ -cp MushroomCloud/src:"MushroomCloud/lib/*" -sourcepath MushroomCloud/src MushroomCloud/src/edu/buffalo/cse562/Main.java
21 | jar cvfm bin/MushroomCloud.jar MushroomCloud/Manifest.txt -C MushroomCloud/bin/ .
22 | cp -a MushroomCloud/lib/. ./bin/
23 |
24 | valkyrie:
25 | $(CC) $(SRC_DIR)/*.cpp $(CFLAGS) -o bin/llvmruntime -I$(INC_DIR)
26 |
27 | clean:
28 | rm -r MushroomCloud/bin
29 | rm -r ./bin
30 |
--------------------------------------------------------------------------------
/MushroomCloud/Manifest.txt:
--------------------------------------------------------------------------------
1 | Main-Class: edu/buffalo/cse562/Main
2 | Class-Path: ./jsqlparser.jar ./expression.jar ./java-json.jar
3 |
--------------------------------------------------------------------------------
/MushroomCloud/MushroomCloud.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
--------------------------------------------------------------------------------
/MushroomCloud/README.md:
--------------------------------------------------------------------------------
1 | # SQL Evaluator
2 | A SQL evaluator done as part of a graduate database class
3 |
4 | This project translates SQL to an abstract syntax tree (AST) representation and then applies relational algebra rewrites to find an optimum query evaluation strategy. Efficient joins and external sort algorithms enable it to work with huge datasets even on limited memory.
5 |
6 | # Build and run
7 |
8 | To build, just run the build script -
9 |
10 | ```
11 | > ./build.sh
12 | ```
13 |
14 | Do not forget to add executable permission to the script using ```chmod +x```
15 |
16 | This will compile all classes in the ```src``` directory and place them in an automatically created ```bin```directory.
17 |
18 | To run, you can use the included run script -
19 |
20 | ```
21 | > ./run.sh
22 | ```
23 |
24 | # Sample data and queries
25 |
26 | Several sample datasets and sql queries are included. These can be found in the ```data``` folder and ```sql`` folders respectively.
27 |
28 | There is a set of small sanity check relations ```r, s, t``` comprising of just integer data. A bite-sized TPC-H dataset has been included to test TPC-H queries against. To generate a larger dataset please use a DBGen program for the TPC-H benchmark.
29 |
30 | This is one that has been tested and works.
31 | https://github.com/electrum/tpch-dbgen
32 |
33 | # Syntax
34 |
35 | ```
36 | > run.sh --data [ --swap ] ...
37 | ```
38 |
39 | The data flag points to the data directory. The swap flag points to a directory used for out of memory operations like external sorts.
40 |
41 | # Example
42 |
43 | ```
44 | > ./run.sh --data data sql/tpch1.sql
45 | A|F|3608|3617399.8|3415815.6154|3550622.3881|25.5887|25655.3177|0.0535|141
46 | N|F|98|96050.28|93793.9484|94868.9501|32.6667|32016.76|0.0233|3
47 | N|O|7917|7922719.62|7540013.3753|7850451.2831|25.9574|25976.1299|0.0487|305
48 | R|F|3269|3260914.61|3079298.8793|3200628.9333|24.3955|24335.1837|0.0516|134
49 | ```
50 |
51 | # Additional Flags
52 |
53 | SQL-Parser supports a few additional flags to generate output and show debugging information.
54 |
55 | ```--degug``` flag enables debugging information like generated query plans, optimized query plans and execution times.
56 |
57 | ```-q``` quiet mode, results are not output to the console.
58 |
59 | ```--fileout ``` results are written to a file named ```.out``` in the specified directory, which will be created if it doesn't exist.
60 |
61 | ```--time ``` Query plan generation time and query execution times are written as a ```|``` separated tuple to a file named ```.stat``` in the specified directory, which will be created if it doesn't exist.
62 |
--------------------------------------------------------------------------------
/MushroomCloud/lib/expression.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Legacy25/ValkyrieDB/c2adee18658658e78b7a4b464c8db91657fe58c1/MushroomCloud/lib/expression.jar
--------------------------------------------------------------------------------
/MushroomCloud/lib/java-json.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Legacy25/ValkyrieDB/c2adee18658658e78b7a4b464c8db91657fe58c1/MushroomCloud/lib/java-json.jar
--------------------------------------------------------------------------------
/MushroomCloud/lib/jsqlparser.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Legacy25/ValkyrieDB/c2adee18658658e78b7a4b464c8db91657fe58c1/MushroomCloud/lib/jsqlparser.jar
--------------------------------------------------------------------------------
/MushroomCloud/src/edu/buffalo/cse562/LeafValueComparator.java:
--------------------------------------------------------------------------------
1 | package edu.buffalo.cse562;
2 |
3 | import java.util.ArrayList;
4 | import java.util.Comparator;
5 | import java.util.HashMap;
6 |
7 | import edu.buffalo.cse562.schema.Schema;
8 | import net.sf.jsqlparser.expression.DateValue;
9 | import net.sf.jsqlparser.expression.DoubleValue;
10 | import net.sf.jsqlparser.expression.LeafValue;
11 | import net.sf.jsqlparser.expression.LongValue;
12 | import net.sf.jsqlparser.expression.StringValue;
13 | import net.sf.jsqlparser.expression.LeafValue.InvalidLeaf;
14 | import net.sf.jsqlparser.statement.select.OrderByElement;
15 |
16 | public class LeafValueComparator implements Comparator {
17 |
18 |
19 | private Schema schema;
20 |
21 | /* Holds the sort key attributes */
22 | private ArrayList arguments;
23 |
24 | /* Holds the indexes of the sort key attributes */
25 | private HashMap columnIndexes;
26 |
27 |
28 | public LeafValueComparator(ArrayList arguments ,
29 | Schema schema) {
30 |
31 | this.arguments = arguments;
32 | this.schema = schema;
33 |
34 | columnIndexes = new HashMap();
35 |
36 | findColumns();
37 |
38 | }
39 |
40 |
41 | /*
42 | * Helper function to find the appropriate column indexes on which to sort
43 | */
44 | private void findColumns() {
45 |
46 | for(OrderByElement o:arguments) {
47 | String columnName = o.getExpression().toString();
48 |
49 | for(int i=0; i= 0
51 | || columnName.toLowerCase().indexOf(schema.getColumns().get(i).getColumnName().toLowerCase()) >= 0) {
52 |
53 | columnIndexes.put(o, i);
54 | break;
55 |
56 | }
57 | }
58 | }
59 |
60 | }
61 |
62 |
63 |
64 | @Override
65 | public int compare(LeafValue[] o1, LeafValue[] o2) {
66 | String type = "";
67 | int returnVal = 0;
68 |
69 | for(int i=arguments.size()-1; i>=0; i--) {
70 | OrderByElement o = arguments.get(i);
71 | int column = columnIndexes.get(o);
72 | boolean isAsc = o.isAsc();
73 | LeafValue element = o1[column];
74 |
75 | if(element instanceof LongValue) {
76 | type = "int";
77 | }
78 | else if(element instanceof DoubleValue) {
79 | type = "decimal";
80 | }
81 | else if(element instanceof StringValue) {
82 | type = "string";
83 | }
84 | else if(element instanceof DateValue) {
85 | type = "date";
86 | }
87 |
88 | switch(type) {
89 | case "int":
90 | case "decimal":
91 | try {
92 | if(o1[column].toDouble() == o2[column].toDouble())
93 | continue;
94 | if(o1[column].toDouble() > o2[column].toDouble()) {
95 | if(isAsc) {
96 | returnVal = 1;
97 | }
98 | else {
99 | returnVal = -1;
100 | }
101 | }
102 | else {
103 | if(isAsc) {
104 | returnVal = -1;
105 | }
106 | else {
107 | returnVal = 1;
108 | }
109 | }
110 | break;
111 | } catch (InvalidLeaf e) {
112 | break;
113 | }
114 | case "string":
115 | case "date":
116 | if(o1[column].toString().equalsIgnoreCase(o2[column].toString())) {
117 | continue;
118 | }
119 | if(o1[column].toString().compareToIgnoreCase(o2[column].toString()) > 0) {
120 | if(isAsc) {
121 | returnVal = 1;
122 | }
123 | else {
124 | returnVal = -1;
125 | }
126 | }
127 | else {
128 | if(isAsc) {
129 | returnVal = -1;
130 | }
131 | else {
132 | returnVal = 1;
133 | }
134 | }
135 | break;
136 | default:
137 | /* Handle */
138 | }
139 | }
140 |
141 | return returnVal;
142 | }
143 | }
144 |
--------------------------------------------------------------------------------
/MushroomCloud/src/edu/buffalo/cse562/Main.java:
--------------------------------------------------------------------------------
1 | /*
2 | * SQL Evaluator Engine
3 | * Authors: Arindam Nandi
4 | * Saptarshi Bhattacharjee
5 | * Sayaritra Pal
6 | *
7 | * Spring 2015
8 | */
9 |
10 | package edu.buffalo.cse562;
11 |
12 | import java.io.BufferedWriter;
13 | import java.io.File;
14 | import java.io.FileWriter;
15 | import java.io.IOException;
16 | import java.nio.file.Files;
17 | import java.util.ArrayList;
18 | import java.util.HashMap;
19 | import java.util.Map;
20 | //import java.util.function.Consumer;
21 |
22 | import org.json.JSONObject;
23 |
24 | import edu.buffalo.cse562.operators.Operator;
25 |
26 | public class Main {
27 |
28 | public static final int BILLION = 1000*1000*1000;
29 |
30 | /*
31 | * Stores the swap directory
32 | * Need application-wide access to this, so a static global
33 | */
34 | public static File SWAP = null;
35 |
36 | /*
37 | * Initialize a file name counter required for bookkeeping
38 | * in external k-way merge sort
39 | */
40 | public static int FILE_UUID = 0;
41 |
42 | /*
43 | * Controls the size of the I/O buffer used to retrieve
44 | * records from file to memory. Varying this gives some
45 | * interesting variations in query processing time
46 | */
47 | public static int BLOCK = 10000;
48 |
49 | /*
50 | * Provides application wide access to information
51 | * regarding whether there is a memory limit,
52 | * this is of use to the parse-tree optimizer
53 | * in choosing non-blocking operators
54 | */
55 | public static boolean MEMORY_LIMITED = false;
56 |
57 | /*
58 | * Enabled by setting the --debug flag on the CLI.
59 | * Mainly used to print out various debugging info
60 | * like query plans and processing times
61 | */
62 | public static boolean DEBUG = false;
63 |
64 | /*
65 | * --fileout flag redirects the results to files
66 | * instead of the console
67 | */
68 | public static boolean FILE_OUTPUT = false;
69 | public static File FILE_OUTPUT_DIR = null;
70 |
71 | /*
72 | * --time flag prints out query execution times to a file
73 | */
74 | public static boolean TIME_OUTPUT = false;
75 | public static File TIME_OUTPUT_DIR = null;
76 |
77 |
78 | /*
79 | * -q Quiet mode
80 | */
81 | public static boolean QUIET = false;
82 |
83 | /*
84 | * Execute mode, either dumps a representation of the query plan
85 | * or executes the query
86 | */
87 | public static boolean EXECUTE = false;
88 |
89 | public static boolean PUSH_DOWN_PROJECTIONS = false;
90 |
91 | public static void main(String[] args) {
92 |
93 | DEBUG = false;
94 |
95 | /* Stores the data directories */
96 | ArrayList dataDirs = new ArrayList();
97 |
98 | /* Stores the SQL files */
99 | ArrayList sqlFiles = new ArrayList();
100 |
101 | /* Stores query generation times for each query */
102 | ArrayList qgenTime = new ArrayList();
103 |
104 | /* Stores query execution times for each query */
105 | ArrayList qexecTime = new ArrayList();
106 |
107 | /*
108 | * CLI argument parsing
109 | */
110 | for(int i=0; i parseTreeList = new ArrayList();
168 | for(int i = 0; i < sqlFiles.size(); i++) {
169 | File f = sqlFiles.get(i);
170 | long localStart = System.nanoTime();
171 | parseTreeList.add(ParseTreeGenerator.generate(dataDirs, f));
172 |
173 | /* Compute the generation time for this query */
174 | qgenTime.add((double) (System.nanoTime() - localStart)/BILLION);
175 | }
176 |
177 | /* Optimize each AST */
178 | for(int i = 0; i < parseTreeList.size(); i++) {
179 | Operator parseTree = parseTreeList.get(i);
180 | long localStart = System.nanoTime();
181 | parseTreeList.set(i, ParseTreeOptimizer.optimize(parseTree));
182 |
183 | /* Compute the optimization time for this query
184 | * and add it to the generation time
185 | */
186 | qgenTime.set(i, qgenTime.get(i) + (double) (System.nanoTime() - localStart)/BILLION);
187 |
188 | /*
189 | * Display the query plan
190 | */
191 | /*if(Main.DEBUG) {
192 | parseTreeList.forEach(new Consumer() {
193 | @Override
194 | public void accept(Operator t) {
195 | System.out.println(t.getSchema());
196 | }
197 | });
198 | }*/
199 | }
200 |
201 | /*
202 | * Optimized query plans ready
203 | */
204 |
205 | long totalGenerateTime = System.nanoTime();
206 |
207 | if(Main.EXECUTE){
208 | /* Now evaluate each parse-tree */
209 | for(int i=0; i< parseTreeList.size(); i++) {
210 | long localStart = System.nanoTime();
211 |
212 | if(parseTreeList.get(i) != null) {
213 | if(FILE_OUTPUT) {
214 | ParseTreeEvaluator.output(
215 | parseTreeList.get(i),
216 | new File(
217 | FILE_OUTPUT_DIR,
218 | sqlFiles.get(i).getName().split(".sql")[0] + ".out"
219 | )
220 | );
221 | } else {
222 | ParseTreeEvaluator.output(parseTreeList.get(i));
223 | }
224 | }
225 |
226 | qexecTime.add((double) (System.nanoTime() - localStart)/BILLION);
227 | }
228 | } else {
229 | for (Operator operator : parseTreeList) {
230 | if(operator != null) {
231 | Map root = new HashMap();
232 | root.put("TYPE", "PRINT");
233 | root.put("SRC", operator.getDetails());
234 | root.put("EXPRESSION", new ArrayList