├── data-generator ├── .gitignore ├── dataGenerator └── dataGenerator.cpp ├── src ├── operator │ ├── Operator.cpp │ ├── CMakeLists.txt │ ├── MapOperator.cpp │ ├── GenerateOperator.cpp │ ├── FilterOperator.cpp │ ├── KeyOperator.cpp │ ├── AggregateOperator.cpp │ ├── FinalWindowAggOperator.cpp │ ├── PrintOperator.cpp │ ├── InputOperator.cpp │ ├── ReadOperator.cpp │ ├── SelectOperator.cpp │ ├── GroupByOperator.cpp │ ├── WriteOperator.cpp │ ├── WriteToMemOperator.cpp │ ├── WindowOperator.cpp │ └── ReadWindowOperator.cpp ├── code_generation │ ├── QueryContext.cpp │ ├── CMakeLists.txt │ ├── CCode.cpp │ ├── CFile.cpp │ ├── CMethod.cpp │ └── CodeGenerator.cpp ├── CMakeLists.txt ├── jit │ ├── CMakeLists.txt │ ├── Profiling.cpp │ ├── Variant.cpp │ ├── JITCodeGenerator.cpp │ ├── JITExecutionRuntime.cpp │ └── CodeCompiler.cpp └── api │ ├── Field.cpp │ ├── CMakeLists.txt │ ├── Time.cpp │ ├── Schema.cpp │ ├── Config.cpp │ ├── Trigger.cpp │ ├── Predicate.cpp │ ├── Query.cpp │ ├── Assigner.cpp │ └── Aggregation.cpp ├── include ├── runtime │ └── input_types.h ├── jit │ ├── runtime │ │ ├── input_types.h │ │ ├── jit_global_state.hpp │ │ ├── JitRuntime.h │ │ ├── SimpleDispatcher.h │ │ ├── Variant.hpp │ │ ├── zip.hpp │ │ ├── JitDispatcher.h │ │ └── Profiling.h │ ├── JITCodeGenerator.h │ ├── JITExecutionRuntime.h │ └── CodeCompiler.hpp ├── api │ ├── Time.h │ ├── Schema.h │ ├── Query.h │ ├── Trigger.h │ ├── Window.h │ ├── Field.h │ ├── Config.h │ ├── Aggregation.h │ ├── Mapper.h │ ├── Assigner.h │ └── Predicate.h ├── operator │ ├── PrintOperator.h │ ├── ReadOperator.h │ ├── KeyOperator.h │ ├── WriteToMemOperator.h │ ├── GenerateOperator.h │ ├── WriteOperator.h │ ├── ReadWindowOperator.h │ ├── SelectOperator.h │ ├── FilterOperator.h │ ├── MapOperator.h │ ├── AggregateOperator.h │ ├── GroupByOperator.h │ ├── WindowOperator.h │ ├── FinalWindowAggOperator.h │ ├── InputOperator.h │ └── Operator.h └── code_generation │ ├── CCode.h │ ├── QueryContext.h │ ├── CFile.h │ ├── CodeGenerator.h │ └── CMethod.h ├── cmake ├── macros.cmake ├── FindPAPI.cmake ├── Findclang.cmake └── FindLLVM.cmake ├── CMakeLists.txt ├── start.cpp └── README.md /data-generator/.gitignore: -------------------------------------------------------------------------------- 1 | *.bin 2 | nexmark_data_generator 3 | -------------------------------------------------------------------------------- /src/operator/Operator.cpp: -------------------------------------------------------------------------------- 1 | #include "operator/Operator.h" 2 | -------------------------------------------------------------------------------- /src/code_generation/QueryContext.cpp: -------------------------------------------------------------------------------- 1 | #include "code_generation/QueryContext.h" 2 | -------------------------------------------------------------------------------- /data-generator/dataGenerator: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TU-Berlin-DIMA/grizzly-prototype/HEAD/data-generator/dataGenerator -------------------------------------------------------------------------------- /src/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_subdirectory(api) 2 | add_subdirectory(code_generation) 3 | add_subdirectory(operator) 4 | add_subdirectory(jit) 5 | -------------------------------------------------------------------------------- /src/code_generation/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_source_grizzly( 2 | CCode.cpp 3 | CFile.cpp 4 | CMethod.cpp 5 | CodeGenerator.cpp 6 | QueryContext.cpp 7 | ) -------------------------------------------------------------------------------- /src/jit/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_source_grizzly( 2 | CodeCompiler.cpp 3 | Variant.cpp 4 | JITExecutionRuntime.cpp 5 | Profiling.cpp 6 | JITCodeGenerator.cpp 7 | ) 8 | -------------------------------------------------------------------------------- /src/api/Field.cpp: -------------------------------------------------------------------------------- 1 | #include "api/Field.h" 2 | 3 | Field::Field(std::string name, DataType type, std::size_t size, SourceType srcType) 4 | : name(name), dataType(type), size(size), srcType(srcType) {} 5 | -------------------------------------------------------------------------------- /include/runtime/input_types.h: -------------------------------------------------------------------------------- 1 | #ifndef API_INPUT_TYPES_H 2 | #define API_INPUT_TYPES_H 3 | 4 | enum InputType { CSVFile, BinaryFile, Socket, Memory, UNDEFINED_INPUT_TYPE }; 5 | 6 | #endif // API_INPUT_TYPES_H 7 | -------------------------------------------------------------------------------- /include/jit/runtime/input_types.h: -------------------------------------------------------------------------------- 1 | #ifndef API_INPUT_TYPES_H 2 | #define API_INPUT_TYPES_H 3 | 4 | enum InputType { CSVFile, BinaryFile, Socket, Memory, UNDEFINED_INPUT_TYPE }; 5 | 6 | #endif // API_INPUT_TYPES_H 7 | -------------------------------------------------------------------------------- /src/api/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_source_grizzly( 2 | Aggregation.cpp 3 | Assigner.cpp 4 | Config.cpp 5 | Field.cpp 6 | Predicate.cpp 7 | Query.cpp 8 | Schema.cpp 9 | Time.cpp 10 | Trigger.cpp 11 | ) -------------------------------------------------------------------------------- /src/api/Time.cpp: -------------------------------------------------------------------------------- 1 | #include "api/Time.h" 2 | 3 | Time Time::seconds(size_t seconds) { 4 | Time time(seconds); 5 | return time; 6 | } 7 | 8 | Time Time::minutes(size_t minutes) { 9 | Time time(minutes * 60); 10 | return time; 11 | } 12 | 13 | std::string Time::to_string() { return std::to_string(this->time); } -------------------------------------------------------------------------------- /include/api/Time.h: -------------------------------------------------------------------------------- 1 | #ifndef API_TIME_H 2 | #define API_TIME_H 3 | 4 | #include 5 | #include 6 | 7 | class Time { 8 | public: 9 | static Time seconds(size_t seconds); 10 | static Time minutes(size_t minutes); 11 | size_t time; 12 | std::string to_string(); 13 | 14 | private: 15 | Time(size_t seconds) : time(seconds) {} 16 | }; 17 | 18 | #endif // API_TIME_H 19 | -------------------------------------------------------------------------------- /include/operator/PrintOperator.h: -------------------------------------------------------------------------------- 1 | #ifndef OPERATOR_PRINT_OPERATOR_H 2 | #define OPERATOR_PRINT_OPERATOR_H 3 | 4 | #include "operator/Operator.h" 5 | 6 | class PrintOperator : public Operator { 7 | public: 8 | PrintOperator(Operator *input); 9 | ~PrintOperator(); 10 | void consume(CodeGenerator &cg); 11 | void produce(CodeGenerator &cg); 12 | std::string to_string(); 13 | 14 | private: 15 | Operator *input; 16 | }; 17 | 18 | #endif // OPERATOR_PRINT_OPERATOR_H 19 | -------------------------------------------------------------------------------- /include/operator/ReadOperator.h: -------------------------------------------------------------------------------- 1 | #ifndef OPERATOR_READ_OPERATOR_H 2 | #define OPERATOR_READ_OPERATOR_H 3 | 4 | #include "operator/Operator.h" 5 | 6 | class ReadOperator : public Operator { 7 | public: 8 | ReadOperator(Schema &schema); 9 | ~ReadOperator(); 10 | void consume(CodeGenerator &cg); 11 | void produce(CodeGenerator &cg); 12 | std::string to_string(); 13 | 14 | private: 15 | Schema &schema; 16 | Operator *input; 17 | }; 18 | 19 | #endif // OPERATOR_READ_OPERATOR_H 20 | -------------------------------------------------------------------------------- /include/operator/KeyOperator.h: -------------------------------------------------------------------------------- 1 | #ifndef OPERATOR_KEY_OPERATOR_H 2 | #define OPERATOR_KEY_OPERATOR_H 3 | 4 | #include "operator/Operator.h" 5 | 6 | class KeyOperator : public Operator { 7 | public: 8 | KeyOperator(Field &field, Operator *input); 9 | ~KeyOperator(); 10 | void consume(CodeGenerator &cg); 11 | void produce(CodeGenerator &cg); 12 | std::string to_string(); 13 | 14 | private: 15 | Field &field; 16 | Operator *input; 17 | }; 18 | 19 | #endif // OPERATOR_KEY_OPERATOR_H 20 | -------------------------------------------------------------------------------- /include/operator/WriteToMemOperator.h: -------------------------------------------------------------------------------- 1 | #ifndef OPERATOR_WRITE_MEM_OPERATOR_H 2 | #define OPERATOR_WRITE_MEM_OPERATOR_H 3 | 4 | #include "operator/Operator.h" 5 | 6 | class WriteToMemOperator : public Operator { 7 | public: 8 | WriteToMemOperator(Operator *input); 9 | ~WriteToMemOperator(); 10 | void consume(CodeGenerator &cg); 11 | void produce(CodeGenerator &cg); 12 | std::string to_string(); 13 | 14 | private: 15 | Operator *input; 16 | }; 17 | 18 | #endif // OPERATOR_WRITE_MEM_OPERATOR_H 19 | -------------------------------------------------------------------------------- /include/operator/GenerateOperator.h: -------------------------------------------------------------------------------- 1 | #ifndef OPERATOR_GENERATE_OPERATOR_H 2 | #define OPERATOR_GENERATE_OPERATOR_H 3 | 4 | #include "operator/Operator.h" 5 | 6 | class GenerateOperator : public Operator { 7 | public: 8 | GenerateOperator(Operator *input); 9 | ~GenerateOperator(); 10 | void consume(CodeGenerator &cg) override; 11 | void produce(CodeGenerator &cg) override; 12 | std::string to_string() override; 13 | 14 | private: 15 | Operator *input; 16 | }; 17 | 18 | #endif // OPERATOR_GENERATE_OPERATOR_H 19 | -------------------------------------------------------------------------------- /include/operator/WriteOperator.h: -------------------------------------------------------------------------------- 1 | #ifndef OPERATOR_WRITE_OPERATOR_H 2 | #define OPERATOR_WRITE_OPERATOR_H 3 | 4 | #include "operator/Operator.h" 5 | 6 | class WriteOperator : public Operator { 7 | public: 8 | WriteOperator(std::string fileName, Operator *input); 9 | ~WriteOperator(); 10 | void consume(CodeGenerator &cg); 11 | void produce(CodeGenerator &cg); 12 | std::string to_string(); 13 | 14 | private: 15 | std::string fileName; 16 | Operator *input; 17 | }; 18 | 19 | #endif // OPERATOR_WRITE_OPERATOR_H 20 | -------------------------------------------------------------------------------- /include/operator/ReadWindowOperator.h: -------------------------------------------------------------------------------- 1 | #ifndef OPERATOR_READ_WINDOW_OPERATOR_H 2 | #define OPERATOR_READ_WINDOW_OPERATOR_H 3 | 4 | #include "operator/Operator.h" 5 | 6 | class ReadWindowOperator : public Operator { 7 | public: 8 | ReadWindowOperator(Schema &schema, Operator *input); 9 | ~ReadWindowOperator(); 10 | void consume(CodeGenerator &cg); 11 | void produce(CodeGenerator &cg); 12 | std::string to_string(); 13 | 14 | private: 15 | Schema &schema; 16 | Operator *input; 17 | }; 18 | 19 | #endif // OPERATOR_READ_OPERATOR_H 20 | -------------------------------------------------------------------------------- /include/operator/SelectOperator.h: -------------------------------------------------------------------------------- 1 | #ifndef OPERATOR_SELECT_OPERATOR_H 2 | #define OPERATOR_SELECT_OPERATOR_H 3 | 4 | #include "operator/Operator.h" 5 | 6 | class SelectOperator : public Operator { 7 | public: 8 | SelectOperator(Operator *input, std::vector fields); 9 | ~SelectOperator(); 10 | void consume(CodeGenerator &cg); 11 | void produce(CodeGenerator &cg); 12 | std::string to_string(); 13 | 14 | private: 15 | Operator *input; 16 | std::vector fields; 17 | }; 18 | 19 | #endif // OPERATOR_SELECT_OPERATOR_H 20 | -------------------------------------------------------------------------------- /src/operator/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_source_grizzly( 2 | AggregateOperator.cpp 3 | FilterOperator.cpp 4 | GenerateOperator.cpp 5 | GroupByOperator.cpp 6 | InputOperator.cpp 7 | KeyOperator.cpp 8 | MapOperator.cpp 9 | Operator.cpp 10 | PrintOperator.cpp 11 | ReadOperator.cpp 12 | ReadWindowOperator.cpp 13 | WindowOperator.cpp 14 | WriteOperator.cpp 15 | WriteToMemOperator.cpp 16 | SelectOperator.cpp 17 | FinalWindowAggOperator.cpp 18 | ) -------------------------------------------------------------------------------- /include/operator/FilterOperator.h: -------------------------------------------------------------------------------- 1 | #ifndef OPERATOR_FILTER_OPERATOR_H 2 | #define OPERATOR_FILTER_OPERATOR_H 3 | 4 | #include "api/Predicate.h" 5 | #include "operator/Operator.h" 6 | 7 | class FilterOperator : public Operator { 8 | public: 9 | FilterOperator(Predicate &predicate, Operator *input); 10 | ~FilterOperator(); 11 | void consume(CodeGenerator &cg); 12 | void produce(CodeGenerator &cg); 13 | std::string to_string(); 14 | 15 | private: 16 | Predicate &predicate; 17 | Operator *input; 18 | }; 19 | 20 | #endif // OPERATOR_FILTER_OPERATOR_H 21 | -------------------------------------------------------------------------------- /include/operator/MapOperator.h: -------------------------------------------------------------------------------- 1 | #ifndef OPERATOR_MAP_OPERATOR_H 2 | #define OPERATOR_MAP_OPERATOR_H 3 | 4 | #include "api/Aggregation.h" 5 | #include "api/Mapper.h" 6 | #include "operator/Operator.h" 7 | 8 | class MapOperator : public Operator { 9 | public: 10 | MapOperator(Mapper &mapper, Operator *input); 11 | ~MapOperator(); 12 | void consume(CodeGenerator &cg); 13 | void produce(CodeGenerator &cg); 14 | std::string to_string(); 15 | 16 | private: 17 | Mapper &mapper; 18 | Operator *input; 19 | }; 20 | 21 | #endif // OPERATOR_MAP_OPERATOR_H 22 | -------------------------------------------------------------------------------- /include/operator/AggregateOperator.h: -------------------------------------------------------------------------------- 1 | #ifndef OPERATOR_AGGREGATE_OPERATOR_H 2 | #define OPERATOR_AGGREGATE_OPERATOR_H 3 | 4 | #include "api/Aggregation.h" 5 | #include "operator/Operator.h" 6 | 7 | class AggregateOperator : public Operator { 8 | public: 9 | AggregateOperator(Aggregation &aggregation, Operator *input); 10 | ~AggregateOperator(); 11 | void consume(CodeGenerator &cg); 12 | void produce(CodeGenerator &cg); 13 | std::string to_string(); 14 | 15 | private: 16 | Aggregation &aggregation; 17 | Operator *input; 18 | }; 19 | 20 | #endif // OPERATOR_AGGREGATE_OPERATOR_H 21 | -------------------------------------------------------------------------------- /include/api/Schema.h: -------------------------------------------------------------------------------- 1 | #ifndef API_SCHEMA_H 2 | #define API_SCHEMA_H 3 | 4 | #include 5 | #include 6 | 7 | #include "api/Field.h" 8 | 9 | class Schema { 10 | public: 11 | static Schema create(); 12 | Schema &addFixSizeField(std::string name, DataType dataType, SourceType srcType); 13 | Schema &addVarSizeField(std::string name, DataType dataType, size_t dataSize, SourceType srcType); 14 | Field &get(std::string name); 15 | Schema &print(); 16 | std::vector fields; 17 | size_t getInputSize(); 18 | 19 | private: 20 | Schema(); 21 | }; 22 | 23 | #endif // API_SCHEMA_H 24 | -------------------------------------------------------------------------------- /include/operator/GroupByOperator.h: -------------------------------------------------------------------------------- 1 | #ifndef OPERATOR_GROUPBY_OPERATOR_H 2 | #define OPERATOR_GROUPBY_OPERATOR_H 3 | 4 | #include "operator/Operator.h" 5 | 6 | class GroupByOperator : public Operator { 7 | public: 8 | GroupByOperator(Field &field, Operator *input); 9 | GroupByOperator(Field &field, Operator *input, int maxValue); 10 | ~GroupByOperator(); 11 | void consume(CodeGenerator &cg); 12 | void produce(CodeGenerator &cg); 13 | std::string to_string(); 14 | 15 | private: 16 | Field &field; 17 | Operator *input; 18 | int maxValue; 19 | }; 20 | 21 | #endif // OPERATOR_GROUPBY_OPERATOR_H 22 | -------------------------------------------------------------------------------- /include/operator/WindowOperator.h: -------------------------------------------------------------------------------- 1 | #ifndef OPERATOR_WINDOW_OPERATOR_H 2 | #define OPERATOR_WINDOW_OPERATOR_H 3 | 4 | #include "api/Assigner.h" 5 | #include "api/Trigger.h" 6 | #include "operator/Operator.h" 7 | 8 | class WindowOperator : public Operator { 9 | public: 10 | WindowOperator(Assigner *assigner, Trigger *trigger, Operator *input); 11 | ~WindowOperator(); 12 | void consume(CodeGenerator &cg); 13 | void produce(CodeGenerator &cg); 14 | std::string to_string(); 15 | 16 | private: 17 | Assigner *assigner; 18 | Trigger *trigger; 19 | Operator *input; 20 | }; 21 | 22 | #endif // OPERATOR_WINDOW_OPERATOR_H 23 | -------------------------------------------------------------------------------- /include/code_generation/CCode.h: -------------------------------------------------------------------------------- 1 | #ifndef CODE_GENERATION_C_CODE_H 2 | #define CODE_GENERATION_C_CODE_H 3 | 4 | #include 5 | #include 6 | 7 | class CCode { 8 | 9 | public: 10 | class Builder { 11 | public: 12 | std::vector statements; 13 | 14 | Builder(); 15 | Builder &addStatement(const std::string &statement); 16 | Builder &beginControlFlow(const std::string &statement); 17 | Builder &endControlFlow(); 18 | CCode build(); 19 | }; 20 | 21 | std::string output; 22 | static CCode::Builder builder(); 23 | 24 | private: 25 | CCode(CCode::Builder &builder); 26 | }; 27 | 28 | #endif // CODE_GENERATION_C_CODE_H 29 | -------------------------------------------------------------------------------- /include/operator/FinalWindowAggOperator.h: -------------------------------------------------------------------------------- 1 | #ifndef OPERATOR_FinalWindowAggOperator_OPERATOR_H 2 | #define OPERATOR_FinalWindowAggOperator_OPERATOR_H 3 | 4 | #include "api/Aggregation.h" 5 | #include "api/Mapper.h" 6 | #include "operator/Operator.h" 7 | 8 | class FinalWindowAggOperator : public Operator { 9 | public: 10 | FinalWindowAggOperator(Aggregation *aggregation, Operator *input); 11 | ~FinalWindowAggOperator(); 12 | void consume(CodeGenerator &cg); 13 | void produce(CodeGenerator &cg); 14 | std::string to_string(); 15 | 16 | private: 17 | Operator *input; 18 | Aggregation *aggregation; 19 | }; 20 | 21 | #endif // OPERATOR_FinalWindowAggOperator_OPERATOR_H 22 | -------------------------------------------------------------------------------- /include/operator/InputOperator.h: -------------------------------------------------------------------------------- 1 | #ifndef OPERATOR_INPUT_OPERATOR_H 2 | #define OPERATOR_INPUT_OPERATOR_H 3 | 4 | #include "operator/Operator.h" 5 | 6 | class InputOperator : public Operator { 7 | public: 8 | InputOperator(InputType type, std::string path, Operator *input); 9 | ~InputOperator(); 10 | void consume(CodeGenerator &cg) override; 11 | void produce(CodeGenerator &cg) override; 12 | std::string to_string() override; 13 | InputType getInputType(); 14 | std::string getInputTypeAsString(); 15 | std::string getPath(); 16 | 17 | private: 18 | InputType type; 19 | std::string path; 20 | Operator *input; 21 | }; 22 | 23 | #endif // OPERATOR_INPUT_OPERATOR_H 24 | -------------------------------------------------------------------------------- /include/operator/Operator.h: -------------------------------------------------------------------------------- 1 | #ifndef OPERATOR_OPERATOR_H 2 | #define OPERATOR_OPERATOR_H 3 | 4 | #include 5 | 6 | #include "api/Field.h" 7 | #include "api/Schema.h" 8 | #include "code_generation/CodeGenerator.h" 9 | #include "runtime/input_types.h" 10 | 11 | class Operator { 12 | public: 13 | virtual ~Operator() {} 14 | virtual void produce(CodeGenerator &cg) = 0; 15 | virtual void consume(CodeGenerator &cg) = 0; 16 | Operator *parent; 17 | Operator *leftChild; 18 | Operator *rightChild; 19 | std::string name; 20 | size_t pipeline; 21 | size_t cost; 22 | 23 | virtual std::string to_string() { return "Operator"; } 24 | }; 25 | 26 | #endif // OPERATOR_OPERATOR_H 27 | -------------------------------------------------------------------------------- /src/operator/MapOperator.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "operator/MapOperator.h" 5 | 6 | MapOperator::MapOperator(Mapper &mapper, Operator *input) : mapper(mapper), input(input) { 7 | 8 | leftChild = NULL; 9 | rightChild = NULL; 10 | input->parent = this; 11 | name = "Map"; 12 | } 13 | 14 | std::string MapOperator::to_string() { return "Map"; } 15 | 16 | MapOperator::~MapOperator() { delete input; } 17 | 18 | void MapOperator::consume(CodeGenerator &cg) { 19 | // delegate to specific mapper function 20 | mapper.consume(cg, parent); 21 | } 22 | 23 | void MapOperator::produce(CodeGenerator &cg) { 24 | // delegate to specific mapper function 25 | mapper.produce(cg, input); 26 | } 27 | -------------------------------------------------------------------------------- /include/code_generation/QueryContext.h: -------------------------------------------------------------------------------- 1 | #ifndef CODE_GENERATOR_QUERYCONTEXT_H 2 | #define CODE_GENERATOR_QUERYCONTEXT_H 3 | 4 | #include 5 | 6 | #include "api/Schema.h" 7 | 8 | class QueryContext { 9 | public: 10 | QueryContext(Schema schema) : schema(schema), outputSchema(schema) {} 11 | 12 | enum StateStrategy { INDEPENDENT, SHARED }; 13 | 14 | Schema schema; 15 | Schema outputSchema; 16 | std::vector output; 17 | Field *keyBy; 18 | Field *groupBy; 19 | size_t numWindows = 0; 20 | bool hasKeyBy = false; 21 | bool hasGroupBy = false; 22 | bool isAggregation = false; 23 | int maxKeyValue = -1; 24 | StateStrategy stateStrategy = SHARED; 25 | }; 26 | 27 | #endif // CODE_GENERATOR_QUERYCONTEXT_H 28 | -------------------------------------------------------------------------------- /src/operator/GenerateOperator.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "operator/GenerateOperator.h" 5 | 6 | GenerateOperator::GenerateOperator(Operator *input) : input(input) { 7 | leftChild = NULL; 8 | rightChild = NULL; 9 | input->parent = this; 10 | name = "Generate"; 11 | } 12 | 13 | std::string GenerateOperator::to_string() { return "Generate"; } 14 | 15 | GenerateOperator::~GenerateOperator() {} 16 | 17 | void GenerateOperator::consume(CodeGenerator &cg) { 18 | if (parent != nullptr) { 19 | parent->consume(cg); 20 | } 21 | } 22 | 23 | void GenerateOperator::produce(CodeGenerator &cg) { 24 | // generate Yahoo Benchmark Data 25 | // cg.main.addStatement("runtime::generateYahoo(1000);"); 26 | input->produce(cg); 27 | } 28 | -------------------------------------------------------------------------------- /src/operator/FilterOperator.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "operator/FilterOperator.h" 5 | 6 | FilterOperator::FilterOperator(Predicate &predicate, Operator *input) : predicate(predicate), input(input) { 7 | leftChild = NULL; 8 | rightChild = NULL; 9 | input->parent = this; 10 | name = "Select"; 11 | } 12 | 13 | std::string FilterOperator::to_string() { return "Select "; } 14 | 15 | FilterOperator::~FilterOperator() { delete input; } 16 | 17 | void FilterOperator::consume(CodeGenerator &cg) { 18 | // delegate to specific predicate function 19 | 20 | predicate.consume(cg, parent); 21 | } 22 | 23 | void FilterOperator::produce(CodeGenerator &cg) { 24 | // delegate to specific predicate function 25 | predicate.produce(cg, input); 26 | } 27 | -------------------------------------------------------------------------------- /include/jit/JITCodeGenerator.h: -------------------------------------------------------------------------------- 1 | #ifndef CODE_JITGENERATION_CODE_GENERATOR_H 2 | #define CODE_JITGENERATION_CODE_GENERATOR_H 3 | 4 | #include "api/Config.h" 5 | #include "api/Field.h" 6 | #include "code_generation/CCode.h" 7 | #include "code_generation/CFile.h" 8 | #include "code_generation/CMethod.h" 9 | #include "code_generation/CodeGenerator.h" 10 | #include "code_generation/QueryContext.h" 11 | #include "runtime/input_types.h" 12 | 13 | class JITCodeGenerator : public CodeGenerator { 14 | public: 15 | JITCodeGenerator(Config &config, Schema &schema, ProfilingDataManager *profilingDataManager, CompileMode mode); 16 | 17 | CFile generate(std::string type, std::string path); 18 | 19 | void generateStructFile(std::string path); 20 | }; 21 | 22 | #endif // CODE_JITGENERATION_CODE_GENERATOR_H 23 | -------------------------------------------------------------------------------- /src/operator/KeyOperator.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "operator/KeyOperator.h" 5 | 6 | KeyOperator::KeyOperator(Field &field, Operator *input) : field(field), input(input) { 7 | leftChild = NULL; 8 | rightChild = NULL; 9 | input->parent = this; 10 | name = "Key"; 11 | } 12 | 13 | std::string KeyOperator::to_string() { return "Key by " + field.name; } 14 | 15 | KeyOperator::~KeyOperator() {} 16 | 17 | void KeyOperator::consume(CodeGenerator &cg) { 18 | if (parent != nullptr) { 19 | parent->consume(cg); 20 | } 21 | } 22 | 23 | void KeyOperator::produce(CodeGenerator &cg) { 24 | // add KeyBy-Field to the query context of the pipeline 25 | pipeline = cg.currentPipeline(); 26 | cg.ctx(pipeline).keyBy = &field; 27 | cg.ctx(pipeline).hasKeyBy = true; 28 | 29 | input->produce(cg); 30 | } 31 | -------------------------------------------------------------------------------- /include/jit/runtime/jit_global_state.hpp: -------------------------------------------------------------------------------- 1 | #include "tbb/atomic.h" 2 | #ifndef GRIZZLY_JIT_GLOBAL_STATE_HPP 3 | #define GRIZZLY_JIT_GLOBAL_STATE_HPP 4 | 5 | /** 6 | * Thread local state struct, to keep track of current window accross pipeline invocations. 7 | */ 8 | struct ThreadLocalState { 9 | u_int64_t current_window = 0; 10 | int64_t *windowEnds; 11 | }; 12 | 13 | /** 14 | * Global struct to keep track of window state 15 | */ 16 | struct WindowState { 17 | // Counter for how many threads had a local triggered. 18 | tbb::atomic global_tigger_counter = 0; 19 | // Array of thread local state 20 | ThreadLocalState **thread_local_state; 21 | }; 22 | 23 | struct GlobalState { 24 | /** 25 | * Stores window metadata across all pipelines 26 | */ 27 | WindowState **window_state; 28 | }; 29 | 30 | #endif // GRIZZLY_JIT_GLOBAL_STATE_HPP 31 | -------------------------------------------------------------------------------- /src/operator/AggregateOperator.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "operator/AggregateOperator.h" 5 | 6 | AggregateOperator::AggregateOperator(Aggregation &aggregation, Operator *input) 7 | : aggregation(aggregation), input(input) { 8 | leftChild = NULL; 9 | rightChild = NULL; 10 | input->parent = this; 11 | name = "Aggregate"; 12 | } 13 | 14 | std::string AggregateOperator::to_string() { return "Aggregate " + aggregation.to_string(); } 15 | 16 | AggregateOperator::~AggregateOperator() { delete input; } 17 | 18 | void AggregateOperator::consume(CodeGenerator &cg) { 19 | // delegate to specific aggregation function 20 | aggregation.consume(cg, parent); 21 | } 22 | 23 | void AggregateOperator::produce(CodeGenerator &cg) { 24 | // delegate to specific aggregation function 25 | aggregation.produce(cg, input); 26 | } 27 | -------------------------------------------------------------------------------- /include/jit/runtime/JitRuntime.h: -------------------------------------------------------------------------------- 1 | #ifndef RUNTIME_RUNTIME_H 2 | #define RUNTIME_RUNTIME_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include "JitDispatcher.h" 13 | 14 | /** 15 | * @brief This exception represents the deoptimization from an optimized query back to an unoptimized state 16 | */ 17 | struct DeoptimizeException : public std::exception { 18 | int position; 19 | int pipeline; 20 | void *buffer; 21 | DeoptimizeException(int pipeline, int position, void *buffer) 22 | : position(position), pipeline(pipeline), buffer(buffer) {} 23 | }; 24 | 25 | class JitRuntime { 26 | public: 27 | JitRuntime(); 28 | virtual void monitor(int threadID){}; 29 | }; 30 | 31 | #endif // RUNTIME_RUNTIME_H 32 | -------------------------------------------------------------------------------- /include/code_generation/CFile.h: -------------------------------------------------------------------------------- 1 | #ifndef CODE_GENERATION_C_FILE_H 2 | #define CODE_GENERATION_C_FILE_H 3 | 4 | #include 5 | #include 6 | 7 | #include "code_generation/CCode.h" 8 | #include "code_generation/CMethod.h" 9 | 10 | class CFile { 11 | public: 12 | class Builder { 13 | public: 14 | std::string name; 15 | std::vector includes; 16 | std::vector codes; 17 | std::vector methods; 18 | 19 | Builder(); 20 | Builder &withName(const std::string &name_); 21 | Builder &include(const std::string &include); 22 | Builder &addStatement(const std::string &statement); 23 | Builder &addCode(CCode &code); 24 | Builder &addMethod(CMethod &method); 25 | CFile build(); 26 | }; 27 | 28 | std::string name; 29 | std::string output; 30 | static CFile::Builder builder(); 31 | 32 | private: 33 | CFile(CFile::Builder &builder); 34 | }; 35 | 36 | #endif // CODE_GENERATION_C_FILE_H 37 | -------------------------------------------------------------------------------- /src/operator/FinalWindowAggOperator.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "operator/FinalWindowAggOperator.h" 5 | 6 | FinalWindowAggOperator::FinalWindowAggOperator(Aggregation *aggregation, Operator *input) 7 | : aggregation(aggregation), input(input) { 8 | 9 | leftChild = NULL; 10 | rightChild = NULL; 11 | input->parent = this; 12 | name = "Map"; 13 | } 14 | 15 | std::string FinalWindowAggOperator::to_string() { return "FinalWindowAggregation"; } 16 | 17 | FinalWindowAggOperator::~FinalWindowAggOperator() { delete input; } 18 | 19 | void FinalWindowAggOperator::consume(CodeGenerator &cg) { 20 | // delegate to specific mapper function 21 | aggregation->consumeFinalAggregation(cg, parent); 22 | parent->consume(cg); 23 | } 24 | 25 | void FinalWindowAggOperator::produce(CodeGenerator &cg) { 26 | // delegate to specific mapper function 27 | aggregation->produceFinalAggregation(cg, input); 28 | input->produce(cg); 29 | } 30 | -------------------------------------------------------------------------------- /src/api/Schema.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "api/Schema.h" 5 | 6 | Schema::Schema() {} 7 | 8 | Schema Schema::create() { return Schema(); } 9 | 10 | size_t Schema::getInputSize() { 11 | return 78; 12 | // TODO:make it dynamic 13 | } 14 | Schema &Schema::addFixSizeField(std::string name, DataType dataType, SourceType srcType) { 15 | fields.push_back(Field(name, dataType, dataType.defaultSize(), srcType)); 16 | return *this; 17 | } 18 | 19 | Schema &Schema::addVarSizeField(std::string name, DataType dataType, size_t dataSize, SourceType srcType) { 20 | fields.push_back(Field(name, dataType, dataSize, srcType)); 21 | return *this; 22 | } 23 | 24 | Field &Schema::get(std::string pName) { 25 | for (auto &f : fields) { 26 | if (f.name == pName) 27 | return f; 28 | } 29 | 30 | throw std::invalid_argument("field " + pName + " does not exist"); 31 | } 32 | 33 | Schema &Schema::print() { 34 | // todo 35 | return *this; 36 | } 37 | -------------------------------------------------------------------------------- /src/jit/Profiling.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by pgrulich on 18.09.19. 3 | // 4 | 5 | #include "jit/runtime/Profiling.h" 6 | 7 | ProfilingHandler::ProfilingHandler(){}; 8 | 9 | MinProfilingHandler::MinProfilingHandler() : ProfilingHandler() { value = INT32_MAX; } 10 | 11 | MaxProfilingHandler::MaxProfilingHandler() : ProfilingHandler() { value = -1; } 12 | 13 | ProfilingDataManager::ProfilingDataManager() {} 14 | 15 | void ProfilingDataManager::registerMinHandler(std::string handlerName) { 16 | this->handlers[handlerName] = (ProfilingHandler *)new MinProfilingHandler(); 17 | } 18 | 19 | void ProfilingDataManager::registerMaxHandler(std::string name) { 20 | handlers[name] = (ProfilingHandler *)new MaxProfilingHandler(); 21 | } 22 | 23 | void ProfilingDataManager::registerSelectivityHandler(std::string name, unsigned long i) { 24 | handlers[name] = (ProfilingHandler *)new SelectivityHandler(i); 25 | } 26 | 27 | void ProfilingDataManager::registerDistributionHandler(std::string name) { 28 | handlers[name] = (ProfilingHandler *)new DistributionProfilingHandler(); 29 | } -------------------------------------------------------------------------------- /cmake/macros.cmake: -------------------------------------------------------------------------------- 1 | macro(add_source PROP_NAME SOURCE_FILES) 2 | set(SOURCE_FILES_ABSOLUTE) 3 | foreach (it ${SOURCE_FILES}) 4 | get_filename_component(ABSOLUTE_PATH ${it} ABSOLUTE) 5 | set(SOURCE_FILES_ABSOLUTE ${SOURCE_FILES_ABSOLUTE} ${ABSOLUTE_PATH}) 6 | endforeach () 7 | 8 | get_property(OLD_PROP_VAL GLOBAL PROPERTY "${PROP_NAME}_SOURCE_PROP") 9 | set_property(GLOBAL PROPERTY "${PROP_NAME}_SOURCE_PROP" ${SOURCE_FILES_ABSOLUTE} ${OLD_PROP_VAL}) 10 | endmacro() 11 | 12 | macro(get_source PROP_NAME SOURCE_FILES) 13 | get_property(SOURCE_FILES_LOCAL GLOBAL PROPERTY "${PROP_NAME}_SOURCE_PROP") 14 | set(${SOURCE_FILES} ${SOURCE_FILES_LOCAL}) 15 | endmacro() 16 | 17 | macro(add_source_grizzly) 18 | add_source(GRIZZLY "${ARGN}") 19 | endmacro() 20 | 21 | macro(get_source_grizzly SOURCE_FILES) 22 | get_source(GRIZZLY SOURCE_FILES_LOCAL) 23 | set(${SOURCE_FILES} ${SOURCE_FILES_LOCAL}) 24 | endmacro() 25 | 26 | macro(get_header_grizzly HEADER_FILES) 27 | file(GLOB_RECURSE ${HEADER_FILES} "include/*.h" "include/*.hpp") 28 | endmacro() -------------------------------------------------------------------------------- /src/code_generation/CCode.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "code_generation/CCode.h" 6 | 7 | /* 8 | * CCode Definition 9 | */ 10 | CCode::CCode(CCode::Builder &builder) { 11 | std::stringstream ss; 12 | 13 | // build statements 14 | for (std::string statement : builder.statements) { 15 | ss << statement << std::endl; 16 | } 17 | 18 | output = ss.str(); 19 | } 20 | 21 | CCode::Builder CCode::builder() { return CCode::Builder(); } 22 | 23 | /* 24 | * CCode Builder Definition 25 | */ 26 | CCode::Builder::Builder() {} 27 | 28 | CCode::Builder &CCode::Builder::addStatement(const std::string &statement) { 29 | statements.push_back(statement); 30 | return *this; 31 | } 32 | 33 | CCode::Builder &CCode::Builder::beginControlFlow(const std::string &statement) { 34 | statements.push_back(statement + "{"); 35 | return *this; 36 | } 37 | 38 | CCode::Builder &CCode::Builder::endControlFlow() { 39 | statements.push_back("}"); 40 | return *this; 41 | } 42 | 43 | CCode CCode::Builder::build() { return CCode(*this); } 44 | -------------------------------------------------------------------------------- /src/operator/PrintOperator.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "operator/PrintOperator.h" 5 | 6 | PrintOperator::PrintOperator(Operator *input) : input(input) { 7 | leftChild = NULL; 8 | rightChild = NULL; 9 | name = "Print"; 10 | input->parent = this; 11 | } 12 | 13 | std::string PrintOperator::to_string() { return "Print"; } 14 | 15 | PrintOperator::~PrintOperator() { delete input; } 16 | 17 | void PrintOperator::consume(CodeGenerator &cg) { 18 | std::stringstream statements; 19 | statements << "std::cout "; 20 | 21 | if (cg.ctx(pipeline + 1).hasGroupBy) { 22 | statements << "<< key << \":\" "; 23 | } 24 | 25 | // print each field of the schema 26 | for (auto &field : cg.ctx(pipeline + 1).schema.fields) { 27 | statements << " << record." << field.name << " << \"|\" "; 28 | } 29 | 30 | statements << "<< std::endl;"; 31 | cg.pipeline(pipeline).addInstruction(CMethod::Instruction(INSTRUCTION_PRINT, statements.str())); 32 | } 33 | 34 | void PrintOperator::produce(CodeGenerator &cg) { 35 | pipeline = cg.currentPipeline(); 36 | input->produce(cg); 37 | } 38 | -------------------------------------------------------------------------------- /src/operator/InputOperator.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "operator/InputOperator.h" 5 | 6 | InputOperator::InputOperator(InputType pType, std::string pPath, Operator *input) 7 | : type(pType), path(pPath), input(input) { 8 | leftChild = NULL; 9 | rightChild = NULL; 10 | input->parent = this; 11 | name = "Input"; 12 | } 13 | 14 | InputOperator::~InputOperator() { delete input; } 15 | 16 | std::string InputOperator::to_string() { return "Input"; } 17 | 18 | void InputOperator::consume(CodeGenerator &cg) { 19 | // delegate to specific predicate function 20 | if (parent != nullptr) { 21 | parent->consume(cg); 22 | } 23 | // predicate.consume(cg, parent); 24 | } 25 | 26 | void InputOperator::produce(CodeGenerator &cg) { 27 | // delegate to specific predicate function 28 | // predicate.produce(cg, input); 29 | // parent->produce(cg); 30 | input->produce(cg); 31 | } 32 | 33 | InputType InputOperator::getInputType() { return type; } 34 | 35 | std::string InputOperator::getInputTypeAsString() { 36 | if (type == BinaryFile) 37 | return "BinaryFile"; 38 | else 39 | return ""; 40 | } 41 | 42 | std::string InputOperator::getPath() { return path; } 43 | -------------------------------------------------------------------------------- /include/jit/runtime/SimpleDispatcher.h: -------------------------------------------------------------------------------- 1 | #include "jit/runtime/JitDispatcher.h" 2 | 3 | class SimpleDispatcher : public Dispatcher { 4 | public: 5 | SimpleDispatcher(unsigned int runLength, unsigned int parallelism, unsigned int bufferSize, unsigned int runs, 6 | unsigned int tupleSize, std::string file, int numa) 7 | : Dispatcher(runLength, parallelism, bufferSize, runs, tupleSize, file, numa) { 8 | loadData(); 9 | std::thread t = std::thread(Dispatcher::throughputLogger, this); 10 | t.detach(); 11 | } 12 | 13 | void loadData() { 14 | std::cout << "Load Data from " << file << std::endl; 15 | buffer = new void **[parallelism]; 16 | for (size_t thread = 0; thread < parallelism; thread++) { 17 | buffer[thread] = new void *[bufferRuns]; 18 | for (size_t bufferRun = 0; bufferRun < bufferRuns; bufferRun++) { 19 | buffer[thread][bufferRun] = malloc(tupleSize * runLength); 20 | std::ifstream ifp(file, std::ios::in | std::ios::binary); 21 | assert(ifp.is_open()); 22 | ifp.read(reinterpret_cast(buffer[thread][bufferRun]), runLength * tupleSize); 23 | ifp.close(); 24 | } 25 | } 26 | } 27 | 28 | void tick(long s) {} 29 | }; -------------------------------------------------------------------------------- /src/operator/ReadOperator.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "operator/ReadOperator.h" 5 | 6 | ReadOperator::ReadOperator(Schema &schema) : schema(schema) { 7 | leftChild = NULL; 8 | rightChild = NULL; 9 | name = "Read"; 10 | } 11 | 12 | std::string ReadOperator::to_string() { return "Read Schema"; } 13 | 14 | ReadOperator::~ReadOperator() {} 15 | 16 | void ReadOperator::consume(CodeGenerator &cg) { 17 | // Leaf operator; no consume function 18 | } 19 | 20 | void ReadOperator::produce(CodeGenerator &cg) { 21 | pipeline = cg.currentPipeline(); 22 | std::string recordType = "record0"; 23 | 24 | // make records available for next operators 25 | cg.pipeline(pipeline).addParameter(recordType + "* records").addParameter("size_t size"); 26 | 27 | // for-loop 28 | std::stringstream statements; 29 | statements << "for(size_t i = 0; iconsume(cg); 35 | } 36 | 37 | cg.pipeline(pipeline).addInstruction(CMethod::Instruction(INSTRUCTION_CLOSE, std::string("}\n"))); 38 | } 39 | -------------------------------------------------------------------------------- /src/operator/SelectOperator.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "operator/SelectOperator.h" 5 | 6 | SelectOperator::SelectOperator(Operator *input, std::vector fields) : fields(fields), input(input) { 7 | leftChild = NULL; 8 | rightChild = NULL; 9 | input->parent = this; 10 | name = "Select"; 11 | } 12 | 13 | std::string SelectOperator::to_string() { return "Select"; } 14 | 15 | SelectOperator::~SelectOperator() { delete input; } 16 | 17 | void SelectOperator::consume(CodeGenerator &cg) { 18 | if (parent != nullptr) { 19 | parent->consume(cg); 20 | } 21 | } 22 | 23 | void SelectOperator::produce(CodeGenerator &cg) { 24 | // add KeyBy-Field to the query context of the pipeline 25 | pipeline = cg.currentPipeline(); 26 | Schema schema = Schema::create(); 27 | for (Field s : cg.ctx(0).schema.fields) { 28 | std::string name = s.name; 29 | for (std::string otherField : fields) { 30 | if (name.compare(otherField) == 0) { 31 | schema.fields.push_back(s); 32 | } 33 | } 34 | } 35 | 36 | cg.ctx(cg.currentPipeline()).outputSchema = schema; 37 | QueryContext qx = QueryContext(schema); 38 | cg.generateStruct(schema, "output", cg.currentPipeline(), false); 39 | input->produce(cg); 40 | } 41 | -------------------------------------------------------------------------------- /cmake/FindPAPI.cmake: -------------------------------------------------------------------------------- 1 | # Try to find PAPI headers and libraries. 2 | # 3 | # Usage of this module as follows: 4 | # 5 | # find_package(PAPI) 6 | # 7 | # Variables used by this module, they can change the default behaviour and need 8 | # to be set before calling find_package: 9 | # 10 | # PAPI_PREFIX Set this variable to the root installation of 11 | # libpapi if the module has problems finding the 12 | # proper installation path. 13 | # 14 | # Variables defined by this module: 15 | # 16 | # PAPI_FOUND System has PAPI libraries and headers 17 | # PAPI_LIBRARIES The PAPI library 18 | # PAPI_INCLUDE_DIRS The location of PAPI headers 19 | 20 | find_path(PAPI_PREFIX 21 | NAMES include/papi.h 22 | ) 23 | 24 | find_library(PAPI_LIBRARIES 25 | # Pick the static library first for easier run-time linking. 26 | NAMES libpapi.so libpapi.a papi 27 | HINTS ${PAPI_PREFIX}/lib ${HILTIDEPS}/lib 28 | ) 29 | 30 | find_path(PAPI_INCLUDE_DIRS 31 | NAMES papi.h 32 | HINTS ${PAPI_PREFIX}/include ${HILTIDEPS}/include 33 | ) 34 | 35 | include(FindPackageHandleStandardArgs) 36 | find_package_handle_standard_args(PAPI DEFAULT_MSG 37 | PAPI_LIBRARIES 38 | PAPI_INCLUDE_DIRS 39 | ) 40 | 41 | mark_as_advanced( 42 | PAPI_PREFIX_DIRS 43 | PAPI_LIBRARIES 44 | PAPI_INCLUDE_DIRS 45 | ) -------------------------------------------------------------------------------- /src/operator/GroupByOperator.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "operator/GroupByOperator.h" 5 | 6 | GroupByOperator::GroupByOperator(Field &field, Operator *input) : field(field), input(input), maxValue(-1) { 7 | leftChild = NULL; 8 | rightChild = NULL; 9 | input->parent = this; 10 | name = "Group"; 11 | } 12 | 13 | GroupByOperator::GroupByOperator(Field &field, Operator *input, int maxValue) 14 | : field(field), input(input), maxValue(maxValue) { 15 | leftChild = NULL; 16 | rightChild = NULL; 17 | input->parent = this; 18 | name = "Group"; 19 | } 20 | 21 | std::string GroupByOperator::to_string() { return "Group by " + field.name; } 22 | 23 | GroupByOperator::~GroupByOperator() {} 24 | 25 | void GroupByOperator::consume(CodeGenerator &cg) { 26 | if (parent != nullptr) { 27 | parent->consume(cg); 28 | } 29 | } 30 | 31 | void GroupByOperator::produce(CodeGenerator &cg) { 32 | // add GroupBy-Field to the query context of the pipeline 33 | pipeline = cg.currentPipeline(); 34 | cg.ctx(pipeline).groupBy = &field; 35 | cg.ctx(pipeline).hasGroupBy = true; 36 | if (cg.compileMode == CM_OPTIMIZE) { 37 | auto profiledMax = cg.profilingDataManager->getMaxHandler("agg_max")->getValue(); 38 | auto profiledMin = cg.profilingDataManager->getMaxHandler("agg_min")->getValue(); 39 | cg.ctx(pipeline).maxKeyValue = profiledMax; 40 | } 41 | 42 | // cg.ctx(pipeline).maxKeyValue = maxValue; 43 | 44 | input->produce(cg); 45 | } 46 | -------------------------------------------------------------------------------- /src/operator/WriteOperator.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "operator/WriteOperator.h" 5 | 6 | WriteOperator::WriteOperator(std::string fileName, Operator *input) : fileName(fileName), input(input) { 7 | leftChild = NULL; 8 | rightChild = NULL; 9 | name = "Write"; 10 | input->parent = this; 11 | } 12 | 13 | std::string WriteOperator::to_string() { return "Write File " + fileName; } 14 | 15 | WriteOperator::~WriteOperator() { delete input; } 16 | 17 | void WriteOperator::consume(CodeGenerator &cg) { 18 | cg.file.include("fstream"); 19 | 20 | auto init = std::string(""); 21 | auto code = std::string(""); 22 | auto final = std::string(""); 23 | 24 | init += "std::ofstream file;\n"; 25 | init += "file.open(\"" + fileName + "\", std::ios::out | std::ios::app);\n"; 26 | 27 | code += std::string("file"); 28 | 29 | /* Print the Groub Key, if Operator before was Grouped Aggregation. */ 30 | if (cg.ctx(pipeline + 1).hasGroupBy) { 31 | code += " << key"; 32 | } 33 | 34 | /* Print each field of Schema. */ 35 | for (auto &field : cg.ctx(pipeline + 1).schema.fields) { 36 | code += " << \";\" << record." + field.name; 37 | } 38 | code += "<< \"\\n\";\n"; 39 | 40 | final += "file.close();\n"; 41 | 42 | cg.pipeline(pipeline).addInstruction(CMethod::Instruction(INSTRUCTION_WRITE, init, code, final)); 43 | } 44 | 45 | void WriteOperator::produce(CodeGenerator &cg) { 46 | pipeline = cg.currentPipeline(); 47 | input->produce(cg); 48 | } 49 | -------------------------------------------------------------------------------- /include/jit/JITExecutionRuntime.h: -------------------------------------------------------------------------------- 1 | #ifndef RUNTIME_JIT_H 2 | #define RUNTIME_JIT_H 3 | 4 | #include "api/Query.h" 5 | #include "condition_variable" 6 | #include "mutex" 7 | #include "runtime/JitRuntime.h" 8 | #include 9 | 10 | enum PipelineState { DEFAULT, INSTRUMENTED, OPTIMIZED }; 11 | 12 | class JITExecutionRuntime : JitRuntime { 13 | public: 14 | JITExecutionRuntime(); 15 | void deoptimize(Variant *variant, void *buffer, int position); 16 | void execute(Query *query); 17 | bool isRunning(); 18 | void monitor(int threadID) override; 19 | 20 | private: 21 | Variant *currentlyExecutingVariant; 22 | Variant *defaultVariant; 23 | static void runWorker(JITExecutionRuntime *runtime, int threadID); 24 | void deployDefault(); 25 | std::mutex redeploy; 26 | int delay; 27 | Query *query; 28 | GlobalState *globalState; 29 | Dispatcher *dispatcher; 30 | std::atomic currentState; 31 | int variantNr; 32 | std::string basename; 33 | std::atomic_bool running; 34 | 35 | void deployInstrumented(); 36 | 37 | void deployOptimized(); 38 | 39 | Variant *compileVariant(Query *query, ProfilingDataManager *profilingDataManager, CompileMode mode); 40 | std::condition_variable compileCondition; 41 | std::condition_variable compilationFinish; 42 | std::mutex compilationMutex; 43 | std::mutex waitMutex; 44 | 45 | static void compilationLoop(JITExecutionRuntime *jitExecutionRuntime); 46 | static void monitor(JITExecutionRuntime *jitExecutionRuntime); 47 | }; 48 | 49 | #endif -------------------------------------------------------------------------------- /src/operator/WriteToMemOperator.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "operator/WriteToMemOperator.h" 5 | 6 | WriteToMemOperator::WriteToMemOperator(Operator *input) : input(input) { 7 | leftChild = NULL; 8 | rightChild = NULL; 9 | name = "WriteToMem"; 10 | input->parent = this; 11 | } 12 | 13 | std::string WriteToMemOperator::to_string() { return "WriteToMem"; } 14 | 15 | WriteToMemOperator::~WriteToMemOperator() { delete input; } 16 | 17 | void WriteToMemOperator::consume(CodeGenerator &cg) { 18 | 19 | auto &record = cg.pipeline(pipeline).parameters[0]; 20 | std::stringstream statements; 21 | cg.ctx(cg.currentPipeline()).outputSchema.print(); 22 | 23 | for (auto f : cg.ctx(cg.currentPipeline()).outputSchema.fields) { 24 | statements << "buffer[thread_id]." << f.name << "=record." << f.name << ";" << std::endl; 25 | } 26 | 27 | cg.generateStruct(cg.ctx(cg.currentPipeline()).outputSchema, "output", cg.currentPipeline(), false); 28 | 29 | std::stringstream intBufferStatement; 30 | intBufferStatement << "auto buffer = (output" << cg.currentPipeline() << "*) malloc (sizeof(output" 31 | << cg.currentPipeline() << ")*1000);"; 32 | intBufferStatement << "int b_i = 0;"; 33 | cg.file.addStatement(intBufferStatement.str()); 34 | 35 | statements << std::endl; 36 | cg.pipeline(pipeline).addInstruction(CMethod::Instruction(INSTRUCTION_PRINT, statements.str())); 37 | } 38 | 39 | void WriteToMemOperator::produce(CodeGenerator &cg) { 40 | pipeline = cg.currentPipeline(); 41 | input->produce(cg); 42 | } 43 | -------------------------------------------------------------------------------- /include/code_generation/CodeGenerator.h: -------------------------------------------------------------------------------- 1 | #ifndef CODE_GENERATION_CODE_GENERATOR_H 2 | #define CODE_GENERATION_CODE_GENERATOR_H 3 | 4 | #include "api/Config.h" 5 | #include "api/Field.h" 6 | #include "code_generation/CCode.h" 7 | #include "code_generation/CFile.h" 8 | #include "code_generation/CMethod.h" 9 | #include "code_generation/QueryContext.h" 10 | #include "jit/runtime/Profiling.h" 11 | 12 | enum CompileMode { CM_DEFAULT, CM_INSTRUMENT, CM_OPTIMIZE }; 13 | 14 | class CodeGenerator { 15 | public: 16 | CodeGenerator(Config &config, Schema &schema, CompileMode mode); 17 | 18 | CFile generate(std::string type, std::string path); 19 | 20 | void compileCode(); 21 | 22 | void run(); 23 | 24 | CMethod::Builder &pipeline(size_t id); 25 | 26 | QueryContext &ctx(size_t id); 27 | 28 | void addQueryContext(QueryContext ctx); 29 | 30 | void startPipeline(); 31 | 32 | size_t currentPipeline(); 33 | 34 | size_t longestPipeline(); 35 | 36 | CCode generateStruct(Schema &schema, std::string name, size_t id, bool useAtomic); 37 | 38 | Config &config; 39 | CMethod::Builder main; 40 | CMethod::Builder open; 41 | CMethod::Builder migrateFrom; 42 | CMethod::Builder migrateTo; 43 | CMethod::Builder init; 44 | CMethod::Builder execute; 45 | CMethod::Builder getState; 46 | CFile::Builder file; 47 | ProfilingDataManager *profilingDataManager; 48 | CompileMode compileMode; 49 | 50 | void writeToFile(CFile &file); 51 | 52 | protected: 53 | std::vector pipelines; 54 | std::vector queryContexts; 55 | std::vector schemaStructs; 56 | }; 57 | 58 | #endif // CODE_GENERATION_CODE_GENERATOR_H 59 | -------------------------------------------------------------------------------- /include/api/Query.h: -------------------------------------------------------------------------------- 1 | #ifndef API_QUERY_H 2 | #define API_QUERY_H 3 | 4 | #include 5 | #include 6 | 7 | #include "api/Aggregation.h" 8 | #include "api/Config.h" 9 | #include "api/Mapper.h" 10 | #include "api/Predicate.h" 11 | #include "api/Schema.h" 12 | #include "api/Window.h" 13 | #include "operator/Operator.h" 14 | 15 | class Query { 16 | public: 17 | ~Query(); 18 | static Query generate(Config &config, Schema &schema, std::string path); 19 | void generate(); 20 | void execute(); 21 | 22 | // relational operators 23 | Query &filter(Predicate &&predicate) { return filter(predicate); }; 24 | Query &filter(Predicate &predicate); 25 | Query &filter(Predicate *predicate) { return filter(*predicate); }; 26 | Query &select(std::vector fields); 27 | Query &groupBy(std::string fieldId); 28 | Query &groupBy(std::string fieldId, int keyRange); 29 | Query &aggregate(Aggregation &&aggregation); 30 | 31 | // streaming operators 32 | Query &window(Window &&window); 33 | Query &map(Mapper &&mapper); 34 | 35 | // input operators 36 | Query &input(InputType type, std::string path); 37 | 38 | // output operators 39 | Query &write(std::string fileName); 40 | Query &print(); 41 | 42 | // helper operators 43 | static void printQueryPlan(Query query); 44 | static void printPipelinePermutations(Query query); 45 | 46 | Operator *root; 47 | Operator *getInputOperator() { return root; } 48 | 49 | Query &toOutputBuffer(); 50 | Schema &schema; 51 | Config &config; 52 | Operator *current; 53 | 54 | private: 55 | Query(Config &config, Schema &schema); 56 | 57 | static void printQueryPlan(Operator *curr, int depth); 58 | }; 59 | 60 | #endif // API_QUERY_H 61 | -------------------------------------------------------------------------------- /include/api/Trigger.h: -------------------------------------------------------------------------------- 1 | #ifndef API_TRIGGER_H 2 | #define API_TRIGGER_H 3 | 4 | #include "api/Time.h" 5 | #include "code_generation/CodeGenerator.h" 6 | 7 | class Trigger { 8 | 9 | public: 10 | virtual void onBeforeElement(CodeGenerator &cg, size_t pipeline) = 0; 11 | virtual void onBeforeAssign(CodeGenerator &cg, size_t pipeline) = 0; 12 | bool purge = false; 13 | virtual std::string to_string() { return "trigger"; }; 14 | }; 15 | 16 | class CountTrigger : public Trigger { 17 | public: 18 | CountTrigger(size_t maxCount) : maxCount(maxCount) {} 19 | 20 | void onBeforeElement(CodeGenerator &cg, size_t pipeline) override; 21 | void onBeforeAssign(CodeGenerator &cg, size_t pipeline) override{}; 22 | std::string to_string() override { return "CountTrigger"; } 23 | 24 | private: 25 | size_t maxCount; 26 | }; 27 | 28 | class ProcessingTimeTrigger : public Trigger { 29 | public: 30 | ProcessingTimeTrigger(Time every) : every(every) {} 31 | 32 | void onBeforeElement(CodeGenerator &cg, size_t pipeline) override{}; 33 | void onBeforeAssign(CodeGenerator &cg, size_t pipeline) override; 34 | std::string to_string() override { return "ProcessingTimeTrigger"; } 35 | 36 | private: 37 | Time every; 38 | }; 39 | 40 | 41 | 42 | class PurgingTrigger : public Trigger { 43 | public: 44 | PurgingTrigger(Trigger *trigger) : trigger(trigger) {} 45 | PurgingTrigger(Trigger &&trigger) : trigger(&trigger) {} 46 | 47 | void onBeforeElement(CodeGenerator &cg, size_t pipeline) override; 48 | void onBeforeAssign(CodeGenerator &cg, size_t pipeline) override; 49 | std::string to_string() override { return "PurgingTrigger"; } 50 | 51 | private: 52 | Trigger *trigger; 53 | }; 54 | 55 | #endif // APPI_TRIGGER_H 56 | -------------------------------------------------------------------------------- /include/api/Window.h: -------------------------------------------------------------------------------- 1 | #ifndef API_WINDOW_H 2 | #define API_WINDOW_H 3 | 4 | #include 5 | 6 | #include "api/Assigner.h" 7 | #include "api/Time.h" 8 | #include "api/Trigger.h" 9 | #include "operator/Operator.h" 10 | 11 | class Counter { 12 | public: 13 | Counter(size_t max) : max(max) {} 14 | 15 | size_t max; 16 | 17 | private: 18 | }; 19 | 20 | class Window { 21 | public: 22 | Assigner *assigner; 23 | Trigger *trigger; 24 | 25 | Window withTrigger(Trigger &&trigger) { 26 | this->trigger = &trigger; 27 | return *this; 28 | } 29 | }; 30 | 31 | class TumblingProcessingTimeWindow : public Window { 32 | public: 33 | TumblingProcessingTimeWindow(Time size) { 34 | assigner = new TumblingProcessingTimeAssigner(size); 35 | trigger = new PurgingTrigger(new ProcessingTimeTrigger(size)); 36 | } 37 | 38 | TumblingProcessingTimeWindow(Counter size) { 39 | assigner = new TumblingProcessingTimeAssigner(Time::seconds(size.max)); 40 | trigger = new PurgingTrigger(new CountTrigger(size.max)); 41 | } 42 | }; 43 | 44 | class SlidingProcessingTimeWindow : public Window { 45 | public: 46 | SlidingProcessingTimeWindow(Time size, Time slide) { 47 | assigner = new SlidingProcessingTimeAssigner(size, slide); 48 | trigger = new PurgingTrigger(new ProcessingTimeTrigger(slide)); 49 | } 50 | }; 51 | 52 | class SessionProcessingTimeWindow : public Window { 53 | public: 54 | SessionProcessingTimeWindow(Time timeout) { 55 | assigner = new SessionProcessingTimeAssigner(timeout); 56 | trigger = new PurgingTrigger( 57 | new ProcessingTimeTrigger(Time::seconds(365 * 24 * 60 * 60))); // maximum session timeout of one year 58 | } 59 | }; 60 | 61 | 62 | #endif // API_WINDOW_H 63 | -------------------------------------------------------------------------------- /data-generator/dataGenerator.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | typedef uint64_t Timestamp; 12 | using NanoSeconds = std::chrono::nanoseconds; 13 | using Clock = std::chrono::high_resolution_clock; 14 | const int READ_FILE_BUFFERSIZE = 4056; // 52 tupels 15 | 16 | struct __attribute__((packed)) bit { 17 | uint64_t auction; 18 | uint64_t bidder; 19 | uint64_t price; 20 | uint64_t dateTime; 21 | bit() { 22 | 23 | } 24 | 25 | 26 | }; // size 78 bytes 27 | 28 | 29 | 30 | 31 | int main(int argc, char *argv[]) { 32 | // Generator Code 33 | if (argc != 4) { 34 | std::cout << "1. argument: Number of tuples. 2. argument: Number of persons. 3 argument: Number of auctions" 35 | << std::endl; 36 | return -1; 37 | } 38 | if (atoi(argv[1]) % 10 != 0) { 39 | std::cout << "Number of tuples to be generated should be divisible by 10." 40 | << std::endl; 41 | return -1; 42 | } 43 | size_t auctionCnt = atoi(argv[3]); 44 | size_t personCnt = atoi(argv[2]); 45 | size_t processCnt = atoi(argv[1]); 46 | 47 | bit *recs = new bit[processCnt]; 48 | 49 | for (size_t i = 0; i < processCnt; i++) { 50 | recs[i].auction = rand() % auctionCnt; 51 | recs[i].bidder = rand() % personCnt; 52 | recs[i].price = rand() % 100; 53 | recs[i].dateTime = i; 54 | } 55 | 56 | //printGenerated(10, recs); 57 | 58 | std::ofstream ofp("nexmark_test_data.bin", std::ios::out | std::ios::binary); 59 | ofp.write(reinterpret_cast(recs), processCnt * sizeof(bit)); 60 | ofp.close(); 61 | } 62 | -------------------------------------------------------------------------------- /src/code_generation/CFile.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "code_generation/CFile.h" 6 | 7 | /* 8 | * CFile Definition 9 | */ 10 | CFile::CFile(CFile::Builder &builder) { 11 | 12 | name = builder.name; 13 | 14 | std::stringstream ss; 15 | 16 | // pragma once for header files 17 | if (name.substr(name.length() - 2) == ".h") 18 | ss << "#pragma once" << std::endl; 19 | 20 | // includes 21 | for (std::string include : builder.includes) { 22 | ss << "#include \"" + include + "\"" << std::endl; 23 | } 24 | 25 | // code 26 | for (CCode code : builder.codes) { 27 | ss << code.output; 28 | } 29 | 30 | // generate method code 31 | for (CMethod method : builder.methods) { 32 | ss << method.output; 33 | } 34 | 35 | output = ss.str(); 36 | } 37 | 38 | CFile::Builder CFile::builder() { return CFile::Builder(); } 39 | 40 | /* 41 | * CFile Builder Definition 42 | */ 43 | CFile::Builder::Builder() { includes.push_back("iostream"); } 44 | 45 | CFile::Builder &CFile::Builder::withName(const std::string &name_) { 46 | name = name_; 47 | return *this; 48 | } 49 | 50 | CFile::Builder &CFile::Builder::include(const std::string &include) { 51 | includes.push_back(include); 52 | return *this; 53 | } 54 | 55 | CFile::Builder &CFile::Builder::addStatement(const std::string &statement) { 56 | CCode code = CCode::builder().addStatement(statement).build(); 57 | codes.push_back(code); 58 | return *this; 59 | } 60 | 61 | CFile::Builder &CFile::Builder::addCode(CCode &code) { 62 | codes.push_back(code); 63 | return *this; 64 | } 65 | 66 | CFile::Builder &CFile::Builder::addMethod(CMethod &method) { 67 | methods.push_back(method); 68 | return *this; 69 | } 70 | 71 | CFile CFile::Builder::build() { return CFile(*this); } 72 | -------------------------------------------------------------------------------- /include/api/Field.h: -------------------------------------------------------------------------------- 1 | #ifndef API_FIELD_H 2 | #define API_FIELD_H 3 | 4 | #include 5 | #include 6 | 7 | enum SourceType { Stream, Table }; 8 | 9 | struct DataType { 10 | enum Type { Boolean, Char, String, Int, Long, Double }; 11 | Type t_; 12 | 13 | DataType(Type t) : t_(t) {} 14 | 15 | operator Type() const { return t_; } 16 | 17 | size_t defaultSize() { 18 | switch (t_) { 19 | case DataType::String: 20 | return 255; 21 | default: 22 | return 1; 23 | } 24 | } 25 | 26 | const std::string cType() const { 27 | switch (t_) { 28 | case DataType::Boolean: 29 | return "bool"; 30 | case DataType::Char: 31 | return "char"; 32 | case DataType::String: 33 | return "char"; 34 | case DataType::Int: 35 | return "int"; 36 | case DataType::Long: 37 | return "long"; 38 | case DataType::Double: 39 | return "double"; 40 | default: 41 | throw std::invalid_argument("data type not supported"); 42 | } 43 | } 44 | 45 | const std::string keyType() const { 46 | switch (t_) { 47 | case DataType::Boolean: 48 | return "bool"; 49 | case DataType::Char: 50 | case DataType::String: 51 | return "std::string"; 52 | case DataType::Int: 53 | return "int"; 54 | case DataType::Long: 55 | return "long"; 56 | case DataType::Double: 57 | return "double"; 58 | default: 59 | throw std::invalid_argument("data type not supported"); 60 | } 61 | } 62 | 63 | private: 64 | template operator T() const; 65 | }; 66 | 67 | class Field { 68 | public: 69 | Field(std::string name, DataType dataType, std::size_t dataSize, SourceType srcType); 70 | std::string name; 71 | DataType dataType; 72 | std::size_t size; 73 | SourceType srcType; 74 | }; 75 | 76 | #endif // API_FIELD_H 77 | -------------------------------------------------------------------------------- /src/jit/Variant.cpp: -------------------------------------------------------------------------------- 1 | #include "jit/runtime/Variant.hpp" 2 | #include "jit/CodeCompiler.hpp" 3 | #include 4 | 5 | Variant::Variant(CompiledCCodePtr code, ProfilingDataManager *p, JitRuntime *runtime) 6 | : code(std::move(code)), runtime(runtime) { 7 | this->profilingDataManager = p; 8 | this->valid = true; 9 | this->activeThreads = 0; 10 | 11 | this->startime = std::chrono::system_clock::now(); 12 | } 13 | 14 | // pipeline function declaration 15 | typedef uint32_t (*OpenPtr)(GlobalState *, Dispatcher *, Variant *); 16 | typedef uint32_t (*InitPtr)(GlobalState *, Dispatcher *); 17 | typedef uint32_t (*ExecutePtr)(int, int); 18 | typedef uint32_t (*MigrateFrom)(void **); 19 | typedef uint32_t (*MigrateTo)(void **); 20 | typedef void **(*GetStatePtr)(); 21 | 22 | void Variant::init(GlobalState *globalState, Dispatcher *dispatcher) { 23 | auto initFunction = (*code->getFunctionPointer("_Z4initP11GlobalStateP10Dispatcher")); 24 | initFunction(globalState, dispatcher); 25 | } 26 | 27 | void Variant::open(GlobalState *globalState, Dispatcher *dispatcher) { 28 | auto openFunction = (*code->getFunctionPointer("_Z4openP11GlobalStateP10DispatcherP7Variant")); 29 | openFunction(globalState, dispatcher, this); 30 | } 31 | 32 | void Variant::migrateTo(void **outputState) { 33 | auto migrateFromFunction = (*code->getFunctionPointer("_Z9migrateToPPv")); 34 | migrateFromFunction(outputState); 35 | } 36 | 37 | void Variant::migrateFrom(void **inputState) { 38 | auto migrateFromFunction = (*code->getFunctionPointer("_Z11migrateFromPPv")); 39 | migrateFromFunction(inputState); 40 | } 41 | 42 | void Variant::execute(int threadID, int numaNode) { 43 | auto executeFromFunction = (*code->getFunctionPointer("_Z7executeii")); 44 | executeFromFunction(threadID, numaNode); 45 | } 46 | 47 | void **Variant::getState() { 48 | auto getStateFunction = (*code->getFunctionPointer("_Z8getStatev")); 49 | return getStateFunction(); 50 | } -------------------------------------------------------------------------------- /include/jit/runtime/Variant.hpp: -------------------------------------------------------------------------------- 1 | #ifndef GRIZZLY_VARIANT_HPP 2 | #define GRIZZLY_VARIANT_HPP 3 | 4 | #include "JitDispatcher.h" 5 | #include "JitRuntime.h" 6 | #include "Profiling.h" 7 | #include "jit_global_state.hpp" 8 | #include 9 | #include 10 | #include 11 | 12 | class CompiledCCode; 13 | typedef std::shared_ptr CompiledCCodePtr; 14 | 15 | /** 16 | * @brief A variant encapsulates a compiled query. 17 | * Each pipeline can consist of multiple pipeline steps and utility methods. 18 | */ 19 | class Variant { 20 | 21 | public: 22 | Variant(CompiledCCodePtr code, ProfilingDataManager *profilingDataManager, JitRuntime *runtime); 23 | 24 | /** 25 | * @brief Opens the new variant. This is called by each worker thread 26 | * @param globalState 27 | * @param dispatcher 28 | */ 29 | void open(GlobalState *globalState, Dispatcher *dispatcher); 30 | 31 | /** 32 | * @brief Initializes the new variant. This is called by the runtime once per compiled varient. 33 | * @param globalState 34 | * @param dispatcher 35 | */ 36 | void init(GlobalState *globalState, Dispatcher *dispatcher); 37 | 38 | /** 39 | * @brief This executes the variant, with a particular worker thread on a particular numa node. 40 | * @param threadID 41 | * @param numaNode 42 | */ 43 | void execute(int threadID, int numaNode); 44 | 45 | /** 46 | * @brief migrates from another variant to this variant. 47 | * @param inputState 48 | */ 49 | void migrateFrom(void **inputState); 50 | 51 | /** 52 | * @brief migrates from this variant to another variant 53 | * @param outputState 54 | */ 55 | void migrateTo(void **outputState); 56 | 57 | void **getState(); 58 | 59 | /** 60 | * @brief Indicates if this variant is still valid. 61 | * @return 62 | */ 63 | bool isValid() { return valid; }; 64 | 65 | /** 66 | * @brief Invalidates this variant. 67 | */ 68 | void invalidate() { valid = false; } 69 | 70 | // the number of threads currently executing this variant. 71 | std::atomic_int activeThreads; 72 | 73 | JitRuntime *runtime; 74 | 75 | ProfilingDataManager *profilingDataManager; 76 | std::chrono::time_point> startime; 77 | 78 | private: 79 | CompiledCCodePtr code; 80 | std::atomic_bool valid; 81 | }; 82 | 83 | #endif // GRIZZLY_VARIANT_HPP 84 | -------------------------------------------------------------------------------- /include/api/Config.h: -------------------------------------------------------------------------------- 1 | #ifndef API_CONFIG_H 2 | #define API_CONFIG_H 3 | 4 | #include 5 | 6 | class Config { 7 | public: 8 | static Config create(); 9 | 10 | /** 11 | * @brief Configures the number of worker threads, which process the input. 12 | * @param parallelism number of worker threads 13 | * @return 14 | */ 15 | Config &withParallelism(unsigned int parallelism); 16 | unsigned int getParallelism(); 17 | 18 | /** 19 | * @brief Configures the size of the input buffer. 20 | * @param bufferSize 21 | * @return 22 | */ 23 | Config &withBufferSize(unsigned int bufferSize); 24 | unsigned int getBufferSize(); 25 | 26 | /** 27 | * @brief Configures the runLength -> the number of records processed at once 28 | * @param bufferSize 29 | * @return 30 | */ 31 | Config &withRunLength(unsigned int runLength); 32 | unsigned int getRunLength(); 33 | 34 | unsigned int getPipelinePermutation(); 35 | Config &withPipelinePermutation(unsigned int pipelinePermuation); 36 | 37 | /** 38 | * @brief Configure the duration of an benchmark run. 39 | * @param runDuration 40 | * @return 41 | */ 42 | Config &withBenchmarkRunDuration(unsigned int runDuration); 43 | unsigned int getBenchmarkRunDuration(); 44 | 45 | /** 46 | * @brief Configure the delay before the jit compiler switches to the next compilation stage. 47 | * @param delay in ms 48 | * @return 49 | */ 50 | Config &withCompilationDelay(unsigned int delay); 51 | unsigned int getCompilationDelay(); 52 | 53 | Config &withOutputBuffer(unsigned size); 54 | unsigned int getOutputBuffer(); 55 | bool filterOpt(); 56 | bool distributionOpt(); 57 | Config &withDistributionOpt(bool disOpt); 58 | Config &withFilterOpt(bool filterOpt); 59 | 60 | Config &withNuma(bool numa); 61 | bool getNuma(); 62 | int getNumaNodes(); 63 | 64 | const std::string &getSourceFile() const; 65 | void setSourceFile(const std::string &sourceFile); 66 | 67 | private: 68 | Config(); 69 | unsigned int parallelism; 70 | unsigned int runLength; 71 | unsigned int bufferSize; 72 | unsigned int pipelinePermutation; 73 | unsigned int benchmarkRunTime; 74 | unsigned int compilationDelay; 75 | unsigned int outputBuffer; 76 | bool numa; 77 | bool filterOptimizations; 78 | bool distributionOptimizations; 79 | std::string sourceFile; 80 | }; 81 | 82 | #endif // API_CONFIG_H 83 | -------------------------------------------------------------------------------- /cmake/Findclang.cmake: -------------------------------------------------------------------------------- 1 | # Find Clang 2 | # Source: https://github.com/karelklic/canal/blob/master/FindClang.cmake 3 | # It defines the following variables 4 | # CLANG_FOUND - True if Clang found. 5 | # CLANG_INCLUDE_DIRS - where to find Clang include files 6 | # CLANG_LIBS - list of clang libs 7 | 8 | if (NOT LLVM_INCLUDE_DIRS OR NOT LLVM_LIBRARY_DIRS) 9 | message(FATAL_ERROR "No LLVM and Clang support requires LLVM") 10 | else (NOT LLVM_INCLUDE_DIRS OR NOT LLVM_LIBRARY_DIRS) 11 | 12 | MACRO(FIND_AND_ADD_CLANG_LIB _libname_) 13 | find_library(CLANG_${_libname_}_LIB ${_libname_} ${LLVM_LIBRARY_DIRS} ${CLANG_LIBRARY_DIRS}) 14 | if (CLANG_${_libname_}_LIB) 15 | set(CLANG_LIBS ${CLANG_LIBS} ${CLANG_${_libname_}_LIB}) 16 | endif (CLANG_${_libname_}_LIB) 17 | ENDMACRO(FIND_AND_ADD_CLANG_LIB) 18 | 19 | # Clang shared library provides just the limited C interface, so it 20 | # can not be used. We look for the static libraries. 21 | FIND_AND_ADD_CLANG_LIB(clangFrontend) 22 | FIND_AND_ADD_CLANG_LIB(clangFrontendTool) 23 | FIND_AND_ADD_CLANG_LIB(clangDriver) 24 | FIND_AND_ADD_CLANG_LIB(clangCodeGen) 25 | FIND_AND_ADD_CLANG_LIB(clangEdit) 26 | FIND_AND_ADD_CLANG_LIB(clangSema) 27 | FIND_AND_ADD_CLANG_LIB(clangChecker) 28 | FIND_AND_ADD_CLANG_LIB(clangAnalysis) 29 | FIND_AND_ADD_CLANG_LIB(clangRewrite) 30 | FIND_AND_ADD_CLANG_LIB(clangAST) 31 | FIND_AND_ADD_CLANG_LIB(clangParse) 32 | FIND_AND_ADD_CLANG_LIB(clangLex) 33 | FIND_AND_ADD_CLANG_LIB(clangBasic) 34 | FIND_AND_ADD_CLANG_LIB(clangSerialization) 35 | FIND_AND_ADD_CLANG_LIB(clangStaticAnalyzerFrontend) 36 | FIND_AND_ADD_CLANG_LIB(clangStaticAnalyzerCheckers) 37 | FIND_AND_ADD_CLANG_LIB(clangStaticAnalyzerCore) 38 | FIND_AND_ADD_CLANG_LIB(clangARCMigrate) 39 | FIND_AND_ADD_CLANG_LIB(clangRewriteFrontend) 40 | 41 | find_path(CLANG_INCLUDE_DIRS clang/Basic/Version.h HINTS ${LLVM_INCLUDE_DIRS}) 42 | 43 | if (CLANG_LIBS AND CLANG_INCLUDE_DIRS) 44 | MESSAGE(STATUS "Clang libs: " ${CLANG_LIBS}) 45 | set(CLANG_FOUND TRUE) 46 | endif (CLANG_LIBS AND CLANG_INCLUDE_DIRS) 47 | 48 | if (CLANG_FOUND) 49 | message(STATUS "Found Clang: ${CLANG_INCLUDE_DIRS}") 50 | else (CLANG_FOUND) 51 | if (CLANG_FIND_REQUIRED) 52 | message(FATAL_ERROR "Could NOT find Clang") 53 | endif (CLANG_FIND_REQUIRED) 54 | endif (CLANG_FOUND) 55 | 56 | endif (NOT LLVM_INCLUDE_DIRS OR NOT LLVM_LIBRARY_DIRS) 57 | -------------------------------------------------------------------------------- /include/code_generation/CMethod.h: -------------------------------------------------------------------------------- 1 | #ifndef CODE_GENERATION_C_METHOD_H 2 | #define CODE_GENERATION_C_METHOD_H 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | enum INSTRUCTION { 9 | INSTRUCTION_FILTER, 10 | INSTRUCTION_GROUPBY, 11 | INSTRUCTION_ORDERBY, 12 | INSTRUCTION_AGGREGATE, 13 | INSTRUCTION_JOIN_BUILD, 14 | INSTRUCTION_JOIN_PROBE, 15 | INSTRUCTION_READ, 16 | INSTRUCTION_WRITE, 17 | INSTRUCTION_PRINT, 18 | INSTRUCTION_TRIGGER, 19 | INSTRUCTION_ASSIGNER, 20 | INSTRUCTION_CLOSE, // Closing Brackets etc. 21 | INSTRUCTION_SYSTEM 22 | }; 23 | 24 | class CMethod { 25 | 26 | public: 27 | class Instruction { 28 | public: 29 | Instruction(INSTRUCTION type, std::string init, std::string statements, std::string final); 30 | Instruction(INSTRUCTION type, std::string statements, std::string final); 31 | Instruction(INSTRUCTION type, std::string statements); 32 | 33 | INSTRUCTION type; 34 | std::string init_statements; 35 | std::string statements; 36 | std::string final_statements; 37 | 38 | const std::string to_string() const; 39 | 40 | inline bool operator<(const Instruction &other) const { return statements < other.statements; } 41 | }; 42 | 43 | class Builder { 44 | public: 45 | std::string name; 46 | std::string returnType; 47 | std::vector parameters; 48 | std::vector instructions; 49 | 50 | Builder(); 51 | Builder &withName(const std::string &name_); 52 | Builder &returns(const std::string &returnType_); 53 | Builder &addParameter(const std::string ¶meter); 54 | Builder &prependInstruction(const Instruction &instruction); 55 | Builder &addInstruction(const Instruction &instruction); 56 | CMethod build(); 57 | }; 58 | 59 | class PipelineEnumerator { 60 | public: 61 | PipelineEnumerator(std::vector instructions); 62 | PipelineEnumerator(Builder &builder); 63 | void printPermutations(); 64 | void getPermutation(CMethod::Builder &builder, unsigned int number_of_permutation); 65 | 66 | private: 67 | unsigned int number_of_instructions; 68 | unsigned int number_of_permutations; 69 | std::vector permutable_positions; 70 | size_t number_of_current_permutation; 71 | std::vector current_permutation; 72 | 73 | void getNext(); 74 | 75 | static unsigned int factorial(unsigned int n); 76 | }; 77 | 78 | std::string output; 79 | static CMethod::Builder builder(); 80 | 81 | private: 82 | CMethod(CMethod::Builder &builder); 83 | }; 84 | 85 | #endif // CODE_GENERATION_C_METHOD_H 86 | -------------------------------------------------------------------------------- /src/api/Config.cpp: -------------------------------------------------------------------------------- 1 | #include "api/Config.h" 2 | #include 3 | 4 | Config::Config() { 5 | parallelism = 1; 6 | bufferSize = 1; 7 | runLength = 1; 8 | pipelinePermutation = 0; 9 | benchmarkRunTime = 60; 10 | numa = false; 11 | filterOptimizations = true; 12 | distributionOptimizations = true; 13 | } 14 | 15 | Config Config::create() { return Config(); } 16 | 17 | Config &Config::withBufferSize(unsigned int bufferSize) { 18 | this->bufferSize = bufferSize; 19 | return *this; 20 | } 21 | 22 | bool Config::filterOpt() { return filterOptimizations; } 23 | 24 | Config &Config::withFilterOpt(bool filterOpt) { 25 | filterOptimizations = filterOpt; 26 | return *this; 27 | } 28 | 29 | Config &Config::withParallelism(unsigned int parallelism) { 30 | this->parallelism = parallelism; 31 | return *this; 32 | } 33 | 34 | Config &Config::withBenchmarkRunDuration(unsigned int runDuration) { 35 | this->benchmarkRunTime = runDuration; 36 | return *this; 37 | } 38 | 39 | Config &Config::withPipelinePermutation(unsigned int pipelinePermutation) { 40 | this->pipelinePermutation = pipelinePermutation; 41 | return *this; 42 | } 43 | 44 | Config &Config::withOutputBuffer(unsigned int size) { 45 | this->outputBuffer = size; 46 | return *this; 47 | } 48 | 49 | unsigned int Config::getOutputBuffer() { return this->outputBuffer; } 50 | 51 | bool Config::getNuma() { return numa; } 52 | int Config::getNumaNodes() { 53 | if (numa) 54 | return numa_num_configured_nodes(); 55 | return 1; 56 | } 57 | Config &Config::withNuma(bool numa) { 58 | this->numa = numa; 59 | return *this; 60 | } 61 | 62 | unsigned int Config::getBenchmarkRunDuration() { return benchmarkRunTime; } 63 | 64 | Config & Config::withCompilationDelay(unsigned int delay) { 65 | compilationDelay = delay; 66 | return *this; 67 | } 68 | 69 | unsigned int Config::getCompilationDelay() { 70 | return compilationDelay; 71 | } 72 | 73 | unsigned int Config::getBufferSize() { return bufferSize; } 74 | 75 | unsigned int Config::getParallelism() { return parallelism; } 76 | 77 | unsigned int Config::getPipelinePermutation() { return pipelinePermutation; } 78 | 79 | unsigned int Config::getRunLength() { return runLength; } 80 | 81 | Config &Config::withRunLength(unsigned int runLength) { 82 | this->runLength = runLength; 83 | return *this; 84 | } 85 | 86 | Config &Config::withDistributionOpt(bool disOpt) { this->distributionOptimizations = disOpt; return *this;} 87 | 88 | bool Config::distributionOpt() { return distributionOptimizations; } 89 | const std::string &Config::getSourceFile() const { return sourceFile; } 90 | void Config::setSourceFile(const std::string &sourceFile) { Config::sourceFile = sourceFile; } 91 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.5.1) 2 | project(Grizzly CXX) 3 | 4 | # Custome CMake find instructions and macros 5 | set(CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake;${CMAKE_MODULE_PATH}") 6 | include(cmake/macros.cmake) 7 | 8 | # Check if build type is set 9 | if (NOT CMAKE_BUILD_TYPE) 10 | set(CMAKE_BUILD_TYPE "Release" CACHE STRING 11 | "Choose build type: Debug or Release." 12 | FORCE 13 | ) 14 | endif (NOT CMAKE_BUILD_TYPE) 15 | 16 | # C++ Standard 17 | set(CMAKE_CXX_STANDARD 11) 18 | set(CMAKE_CXX_STANDARD_REQUIRED ON) 19 | 20 | # Set Optimization Flags 21 | set(CMAKE_CXX_FLAGS "-Wall -lnuma -lpapi") 22 | set(CMAKE_CXX_FLAGS_DEBUG "-g -O0 -lnuma -lpapi") 23 | set(CMAKE_CXX_FLAGS_RELEASE "-Wextra -O3 -lnuma -lpapi") 24 | 25 | # Compiler should produce specific code for system architecture 26 | include(CheckCXXCompilerFlag) 27 | CHECK_CXX_COMPILER_FLAG("-march=native" COMPILER_SUPPORTS_MARCH_NATIVE) 28 | if (COMPILER_SUPPORTS_MARCH_NATIVE) 29 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native -mtune=native -mavx") 30 | endif () 31 | 32 | # Threads 33 | set(THREADS_PREFER_PTHREAD_FLAG ON) 34 | find_package(Threads REQUIRED) 35 | set(LIBRARIES ${LIBRARIES} Threads::Threads) 36 | 37 | # Use Performance Counters? 38 | find_package(PAPI) 39 | set(LIBRARIES ${LIBRARIES} ${PAPI_LIBRARIES}) 40 | #endif(USE_PERFORMANCE_COUNTER) 41 | 42 | set(LIBRARIES ${LIBRARIES} "-lnuma -ltbb") 43 | 44 | # Boost Libraries 45 | find_package(Boost 1.47.0 REQUIRED system thread program_options filesystem serialization) # Only check if lib is available on system for generated code. 46 | link_directories(${Boost_LIBRARY_DIRS}) 47 | include_directories(${Boost_INCLUDE_DIRS}) 48 | set(LIBRARIES ${LIBRARIES} ${Boost_LIBRARIES}) 49 | 50 | # Library containing dlopen and dlcose. 51 | set(LIBRARIES ${LIBRARIES} ${CMAKE_DL_LIBS}) 52 | 53 | 54 | # Create a libgmock target to be used as a dependency by test programs 55 | add_library(lnuma IMPORTED STATIC GLOBAL) 56 | 57 | 58 | # Build and Link ####################################################################################################### 59 | 60 | # Add Source Code 61 | add_subdirectory(src) 62 | 63 | # Add Library 64 | get_source_grizzly(GRIZZLY_SOURCE_FILES) 65 | get_header_grizzly(GRIZZLY_HEADER_FILES) 66 | add_library(grizzly-lib ${GRIZZLY_SOURCE_FILES} ${GRIZZLY_HEADER_FILES}) 67 | target_include_directories(grizzly-lib PUBLIC "include") 68 | 69 | add_executable(grizzly start.cpp ${Grizzly_HEADER_FILES}) 70 | target_link_libraries(grizzly grizzly-lib ${LIBRARIES}) 71 | 72 | # Make directories for generated code + copy runtime 73 | file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/generated-code) 74 | file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/jit-generated-code) 75 | file(COPY include/jit/runtime DESTINATION jit-generated-code) 76 | file(COPY include/runtime DESTINATION generated-code) 77 | file(COPY include/runtime/input_types.h DESTINATION generated-code/runtime) 78 | -------------------------------------------------------------------------------- /include/api/Aggregation.h: -------------------------------------------------------------------------------- 1 | #ifndef API_AGGREGATION_H 2 | #define API_AGGREGATION_H 3 | 4 | #include "api/Field.h" 5 | #include "code_generation/CodeGenerator.h" 6 | #include "operator/MapOperator.h" 7 | #include "operator/Operator.h" 8 | 9 | class Aggregation { 10 | public: 11 | Aggregation() {} 12 | Aggregation(std::string fieldId) : fieldId(fieldId) {} 13 | 14 | virtual void produce(CodeGenerator &cg, Operator *input) = 0; 15 | virtual void consume(CodeGenerator &cg, Operator *input) = 0; 16 | 17 | virtual std::string to_string() { return "Aggregation"; } 18 | virtual bool hasFinalAggregation() { return false; } 19 | 20 | virtual void consumeFinalAggregation(CodeGenerator &generator, Operator *pOperator); 21 | 22 | virtual void produceFinalAggregation(CodeGenerator &generator, Operator *pOperator); 23 | 24 | protected: 25 | size_t pipeline; 26 | std::string fieldId; 27 | void consume_(CodeGenerator &cg, Operator *input); 28 | void produce_(CodeGenerator &cg, Operator *input, Schema &schema); 29 | void createState(CodeGenerator &cg, Operator *input, Schema &schema); 30 | void migrateFrom(CodeGenerator &cg, Operator *input, Schema &schema); 31 | void migrateTo(CodeGenerator &cg, Operator *input, Schema &schema); 32 | void addStatePtr(CodeGenerator &cg, Operator *input, Schema &schema); 33 | }; 34 | 35 | class Sum : public Aggregation { 36 | public: 37 | Sum(std::string fieldId) : Aggregation(fieldId) {} 38 | 39 | std::string to_string() { return "Sum(" + fieldId + ")"; }; 40 | void produce(CodeGenerator &cg, Operator *input); 41 | void consume(CodeGenerator &cg, Operator *input); 42 | }; 43 | 44 | class Count : public Aggregation { 45 | public: 46 | Count() : Aggregation() {} 47 | 48 | std::string to_string() { return "Count(" + fieldId + ")"; }; 49 | void produce(CodeGenerator &cg, Operator *input); 50 | void consume(CodeGenerator &cg, Operator *input); 51 | }; 52 | 53 | class Min : public Aggregation { 54 | public: 55 | Min(std::string fieldId) : Aggregation(fieldId) {} 56 | 57 | std::string to_string() { return "Min(" + fieldId + ")"; }; 58 | void produce(CodeGenerator &cg, Operator *input); 59 | void consume(CodeGenerator &cg, Operator *input); 60 | }; 61 | 62 | class Max : public Aggregation { 63 | public: 64 | Max(std::string fieldId) : Aggregation(fieldId) {} 65 | 66 | std::string to_string() { return "Max(" + fieldId + ")"; }; 67 | void produce(CodeGenerator &cg, Operator *input); 68 | void consume(CodeGenerator &cg, Operator *input); 69 | }; 70 | 71 | class Avg : public Aggregation { 72 | public: 73 | Avg(std::string fieldId) : Aggregation(fieldId) {} 74 | 75 | bool hasFinalAggregation() override; 76 | 77 | std::string to_string() { return "Avg(" + fieldId + ")"; }; 78 | void produce(CodeGenerator &cg, Operator *input); 79 | void consume(CodeGenerator &cg, Operator *input); 80 | void consumeFinalAggregation(CodeGenerator &cg, Operator *pOperator); 81 | }; 82 | 83 | #endif // API_AGGREGATION_H 84 | -------------------------------------------------------------------------------- /include/jit/runtime/zip.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by pgrulich on 25.09.19. 3 | // 4 | 5 | #ifndef GRIZZLY_ZIP_HPP 6 | #define GRIZZLY_ZIP_HPP 7 | 8 | #include 9 | #include 10 | #include 11 | 12 | /** Zipf-like random distribution. 13 | * 14 | * "Rejection-inversion to generate variates from monotone discrete 15 | * distributions", Wolfgang Hörmann and Gerhard Derflinger 16 | * ACM TOMACS 6.3 (1996): 169-184 17 | */ 18 | template class zipf_distribution { 19 | public: 20 | typedef RealType input_type; 21 | typedef IntType result_type; 22 | 23 | static_assert(std::numeric_limits::is_integer, ""); 24 | static_assert(!std::numeric_limits::is_integer, ""); 25 | 26 | zipf_distribution(const IntType n = std::numeric_limits::max(), const RealType q = 1.0) 27 | : n(n), q(q), H_x1(H(1.5) - 1.0), H_n(H(n + 0.5)), dist(H_x1, H_n) {} 28 | 29 | IntType operator()(std::mt19937 &rng) { 30 | while (true) { 31 | const RealType u = dist(rng); 32 | const RealType x = H_inv(u); 33 | const IntType k = clamp(std::round(x), 1, n); 34 | if (u >= H(k + 0.5) - h(k)) { 35 | return k; 36 | } 37 | } 38 | } 39 | 40 | private: 41 | /** Clamp x to [min, max]. */ 42 | template static constexpr T clamp(const T x, const T min, const T max) { 43 | return std::max(min, std::min(max, x)); 44 | } 45 | 46 | /** exp(x) - 1 / x */ 47 | static double expxm1bx(const double x) { 48 | return (std::abs(x) > epsilon) ? std::expm1(x) / x : (1.0 + x / 2.0 * (1.0 + x / 3.0 * (1.0 + x / 4.0))); 49 | } 50 | 51 | /** H(x) = log(x) if q == 1, (x^(1-q) - 1)/(1 - q) otherwise. 52 | * H(x) is an integral of h(x). 53 | * 54 | * Note the numerator is one less than in the paper order to work with all 55 | * positive q. 56 | */ 57 | const RealType H(const RealType x) { 58 | const RealType log_x = std::log(x); 59 | return expxm1bx((1.0 - q) * log_x) * log_x; 60 | } 61 | 62 | /** log(1 + x) / x */ 63 | static RealType log1pxbx(const RealType x) { 64 | return (std::abs(x) > epsilon) ? std::log1p(x) / x : 1.0 - x * ((1 / 2.0) - x * ((1 / 3.0) - x * (1 / 4.0))); 65 | } 66 | 67 | /** The inverse function of H(x) */ 68 | const RealType H_inv(const RealType x) { 69 | const RealType t = std::max(-1.0, x * (1.0 - q)); 70 | return std::exp(log1pxbx(t) * x); 71 | } 72 | 73 | /** That hat function h(x) = 1 / (x ^ q) */ 74 | const RealType h(const RealType x) { return std::exp(-q * std::log(x)); } 75 | 76 | static constexpr RealType epsilon = 1e-8; 77 | 78 | IntType n; ///< Number of elements 79 | RealType q; ///< Exponent 80 | RealType H_x1; ///< H(x_1) 81 | RealType H_n; ///< H(n) 82 | std::uniform_real_distribution dist; ///< [H(x_1), H(n)] 83 | }; 84 | 85 | #endif // GRIZZLY_ZIP_HPP 86 | -------------------------------------------------------------------------------- /src/operator/WindowOperator.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "operator/WindowOperator.h" 6 | 7 | WindowOperator::WindowOperator(Assigner *assigner, Trigger *trigger, Operator *input) 8 | : assigner(assigner), trigger(trigger), input(input) { 9 | leftChild = NULL; 10 | rightChild = NULL; 11 | input->parent = this; 12 | name = "Window"; 13 | } 14 | 15 | std::string WindowOperator::to_string() { return "Window " + assigner->to_string() + " " + trigger->to_string(); } 16 | 17 | WindowOperator::~WindowOperator() { delete input; } 18 | 19 | void WindowOperator::consume(CodeGenerator &cg) { 20 | QueryContext &context = cg.ctx(pipeline); 21 | if (context.hasGroupBy && cg.compileMode == CM_OPTIMIZE && cg.config.distributionOpt()) { 22 | 23 | auto distribution = cg.profilingDataManager->getDistributionProfilingHandler("dist"); 24 | auto top = distribution->top; 25 | auto freq = distribution->freq; 26 | 27 | int64_t min = INT64_MAX; 28 | int64_t max = INT64_MIN; 29 | int64_t sumFreq = 0; 30 | for (auto it = top.begin(); it < top.end() - 1; it++) { 31 | auto t = *it; 32 | if (min > t) 33 | min = t; 34 | 35 | if (max < t) 36 | max = t; 37 | sumFreq += freq[t]; 38 | std::cout << t << ":" << freq[t] << std::endl; 39 | } 40 | 41 | auto freqAfg = ((double)sumFreq) / ((double)top.size()); 42 | 43 | double sum_STD = 0; 44 | for (auto t = top.begin(); t < top.end() - 1; t++) { 45 | sum_STD += std::pow(freq[*t] - freqAfg, 2.0); 46 | } 47 | double std = std::sqrt(sum_STD); 48 | 49 | std::cout << "Min i: " << min << " Max i:" << max << " STE:" << std << std::endl; 50 | 51 | if (std < 10000) { 52 | context.stateStrategy = QueryContext::SHARED; 53 | std::cerr << " USE SHARED STATE " << std::endl; 54 | } else { 55 | context.stateStrategy = QueryContext::INDEPENDENT; 56 | std::cerr << " USE INDEPENDENT STATE " << std::endl; 57 | } 58 | 59 | } else { 60 | context.stateStrategy = QueryContext::SHARED; 61 | } 62 | 63 | std::cout << " pipeline " << pipeline << " " << to_string() << std::endl; 64 | 65 | std::stringstream statements; 66 | statements << "auto window_state = globalState->window_state[" << pipeline << "];\n"; 67 | statements << "ThreadLocalState *thread_local_state = window_state->thread_local_state[thread_id];\n"; 68 | cg.pipeline(pipeline).prependInstruction(CMethod::Instruction(INSTRUCTION_TRIGGER, statements.str())); 69 | 70 | // trigger before assign 71 | trigger->onBeforeAssign(cg, pipeline); 72 | 73 | // assign 74 | assigner->consume(cg); 75 | 76 | // trigger before element 77 | trigger->onBeforeElement(cg, pipeline); 78 | 79 | if (parent != nullptr) { 80 | parent->consume(cg); 81 | } 82 | } 83 | 84 | void WindowOperator::produce(CodeGenerator &cg) { 85 | 86 | pipeline = cg.currentPipeline(); 87 | // std::cout<< " pipeline " << pipeline << " " << to_string() << std::endl; 88 | assigner->produce(cg); 89 | input->produce(cg); 90 | } 91 | -------------------------------------------------------------------------------- /start.cpp: -------------------------------------------------------------------------------- 1 | #include "api/Config.h" 2 | #include "api/Field.h" 3 | #include "api/Query.h" 4 | #include "api/Schema.h" 5 | #include "code_generation/CodeGenerator.h" 6 | #include 7 | 8 | int main(int argc, const char *argv[]) { 9 | 10 | if(argc != 5){ 11 | std::cout << "Please provide the right argument. " 12 | "1. parallelism, " 13 | "2. buffer size in tuple, " 14 | "3. execution duration for the query, " 15 | "4. path to input data" << std::endl; 16 | return -1; 17 | } 18 | 19 | auto parallelism = std::stoi(argv[1]); 20 | auto bufferSize = std::stoi(argv[2]); 21 | auto experimentDuration = std::stoi(argv[3]); 22 | auto path = argv[4]; 23 | 24 | Config config = Config::create() 25 | // configures the number of worker threads 26 | .withParallelism(parallelism) 27 | // the number of records per input buffer -> 28 | // this has to correspond to the number of records in the input file 29 | .withBufferSize(bufferSize) 30 | // the number of records processed per pipeline invocation -> 31 | // if this is equal to the buffer size the pipeline will always process the whole input buffer. 32 | .withRunLength(bufferSize) 33 | // configures how many seconds the benchmark will be executed. 34 | .withBenchmarkRunDuration(experimentDuration) 35 | // configures the time in ms the jit waits to switch to the next compilation stage. 36 | .withCompilationDelay(4000) 37 | // enables filter predicate optimizations 38 | .withFilterOpt(true) 39 | // enables key distribution optimizations 40 | .withDistributionOpt(true); 41 | 42 | Schema schema = Schema::create() 43 | /** Id of auction this bid is for. */ 44 | .addFixSizeField("auction", DataType::Long, Stream) 45 | /** Id of person bidding in auction. */ 46 | .addFixSizeField("bidder", DataType::Long, Stream) 47 | /** Price of bid, in cents. */ 48 | .addFixSizeField("price", DataType::Long, Stream) 49 | /**Time at which bid was made (ms since epoch)*/ 50 | .addFixSizeField("dateTime", DataType::Long, Stream); 51 | 52 | Query::generate(config, schema, path) 53 | // defines a filter operator with a >= predicate on the field auction 54 | .filter(new GreaterEqual("auction", 50)) 55 | // adds a key by operator on the auction field 56 | .groupBy("auction") 57 | // add a tumbling window over 10 seconds 58 | .window(TumblingProcessingTimeWindow(Time::seconds(10))) 59 | // adds a avg aggregation on the price 60 | .aggregate(Avg("price")) 61 | // prints the output stream to the console 62 | .print() 63 | .execute(); 64 | 65 | return 0; 66 | } 67 | -------------------------------------------------------------------------------- /src/operator/ReadWindowOperator.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "operator/ReadWindowOperator.h" 5 | 6 | ReadWindowOperator::ReadWindowOperator(Schema &schema, Operator *input) : schema(schema), input(input) { 7 | leftChild = NULL; 8 | rightChild = NULL; 9 | name = "Read Window"; 10 | input->parent = this; 11 | } 12 | 13 | std::string ReadWindowOperator::to_string() { return "Read Window"; } 14 | 15 | ReadWindowOperator::~ReadWindowOperator() {} 16 | 17 | void ReadWindowOperator::consume(CodeGenerator &cg) { 18 | std::string resultType = "record" + std::to_string(pipeline + 1); 19 | 20 | // if non-grouping query, only read from result record 21 | if (!cg.ctx(pipeline + 1).hasGroupBy || cg.ctx(pipeline + 1).hasKeyBy) { 22 | cg.pipeline(pipeline).addParameter(resultType + " record"); 23 | if (parent != nullptr) { 24 | parent->consume(cg); 25 | } 26 | } 27 | 28 | // in grouping query, read from map 29 | std::stringstream statements; 30 | if (cg.ctx(pipeline + 1).hasGroupBy) { 31 | if (cg.ctx(pipeline + 1).maxKeyValue != -1) { 32 | 33 | cg.pipeline(pipeline).addParameter("size_t currentWindow"); 34 | 35 | statements << "for (size_t i = 0; i < " << cg.ctx(pipeline + 1).maxKeyValue << "; i++) {" << std::endl; 36 | statements << " auto key = i;" << std::endl; 37 | if (cg.ctx(pipeline + 1).stateStrategy == QueryContext::SHARED) { 38 | statements << " auto record = state" << (pipeline + 1) << "[currentWindow][i];" << std::endl; 39 | } else { 40 | statements << "// merge independent window states\n"; 41 | statements << " auto record = state" << (pipeline + 1) << "[currentWindow][i];" << std::endl; 42 | statements << " for(int mergeThreadID = 1; mergeThreadID < " << cg.config.getParallelism() 43 | << "; mergeThreadID++){" 44 | "auto temp = state" 45 | << (pipeline + 1) << "[currentWindow + (mergeThreadID * window_buffers" << (pipeline + 1) << ")][i];"; 46 | for (auto field : cg.ctx(pipeline+1).schema.fields) { 47 | statements << "record." << field.name << " = " 48 | << "temp." << field.name << ";\n"; 49 | } 50 | statements << "}"; 51 | } 52 | } else { 53 | std::string keyType = cg.ctx(pipeline + 1).groupBy->dataType.keyType(); 54 | cg.pipeline(pipeline).addParameter("tbb::concurrent_unordered_map<" + keyType + ", " + resultType + "> records"); 55 | statements << "for (auto const &it : records) {" << std::endl; 56 | statements << keyType << " key = it.first;" << std::endl; 57 | statements << resultType << " record = it.second;" << std::endl; 58 | } 59 | 60 | cg.pipeline(pipeline).addInstruction(CMethod::Instruction(INSTRUCTION_READ, statements.str())); 61 | 62 | if (parent != nullptr) { 63 | parent->consume(cg); 64 | } 65 | cg.pipeline(pipeline).addInstruction(CMethod::Instruction(INSTRUCTION_CLOSE, std::string("}\n"))); 66 | } 67 | } 68 | 69 | void ReadWindowOperator::produce(CodeGenerator &cg) { 70 | pipeline = cg.currentPipeline(); 71 | input->produce(cg); 72 | } 73 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Grizzly: Efficient Stream Processing Through Adaptive Query Compilation 2 | 3 | This repository provides a prototypical snapshot of the Grizzly code generator for stream processing. 4 | This codebase contains a reduced functionality but illustrates our code generation approach. 5 | Currently, we integrate an advanced version of Grizzly in NebulaStream. Our new stream processing engine for the internet-of-things. To learn more about NebulaStream, please visit our https://www.nebula.stream. 6 | 7 | - Paper: [Grizzly: Efficient Stream Processing Through Adaptive Query Compilation](https://www.nebula.stream/publications/grizzly.html) 8 | 9 | - BibTeX citation: 10 | ``` 11 | @inproceedings{grulich2020grizzly, 12 | author = {Grulich, Philipp M. and Sebastian, Bre\ss{} and Zeuch, Steffen and Traub, Jonas and Bleichert, Janis von and Chen, Zongxiong and Rabl, Tilmann and Markl, Volker}, 13 | title = {Grizzly: Efficient Stream Processing Through Adaptive Query Compilation}, 14 | year = {2020}, 15 | isbn = {9781450367356}, 16 | publisher = {Association for Computing Machinery}, 17 | address = {New York, NY, USA}, 18 | url = {https://doi.org/10.1145/3318464.3389739}, 19 | doi = {10.1145/3318464.3389739}, 20 | booktitle = {Proceedings of the 2020 ACM SIGMOD International Conference on Management of Data}, 21 | pages = {2487–2503}, 22 | numpages = {17}, 23 | location = {Portland, OR, USA}, 24 | series = {SIGMOD ’20} 25 | } 26 | ``` 27 | 28 | ## Features of this prototype: 29 | 30 | - Adaptive code generation, with online data profiling. 31 | - Filter, Map, Select, and Window Operators. 32 | - Tumbling and sliding processing time windows. 33 | - Sum, Count, Min, Max, Avg aggregation functions. 34 | 35 | 36 | 37 | ## How to build and run 38 | 39 | ### Dependencies 40 | - Boost > 1.49 41 | - Clang 42 | - TBB 43 | 44 | ### Build the source code 45 | 1. Create a directory for your build system. 46 | 2. Call CMake to create a build system. 47 | 3. Use the generated Makefile to build the source code. 48 | ```` 49 | # Debug Build 50 | mkdir debug-build && cd debug-build 51 | cmake -DCMAKE_BUILD_TYPE=Debug .. 52 | make -j 53 | 54 | # Release Build 55 | release-build && cd release-build 56 | cmake -DCMAKE_BUILD_TYPE=Release .. 57 | make -j 58 | ```` 59 | 60 | ### Generate data 61 | 1. Customize the DataGenerator.cpp to the input schema of your query. 62 | Default schema: 63 | ```C++ 64 | Schema::create() 65 | /** Id of auction this bid is for. */ 66 | .addFixSizeField("auction", DataType::Long, Stream) 67 | /** Id of person bidding in auction. */ 68 | .addFixSizeField("bidder", DataType::Long, Stream) 69 | /** Price of bid, in cents. */ 70 | .addFixSizeField("price", DataType::Long, Stream) 71 | /**Time at which bid was made (ms since epoch)*/ 72 | .addFixSizeField("dateTime", DataType::Long, Stream); 73 | ``` 74 | 2. Build the data generator with: 75 | ```sh 76 | cd data-generator && make 77 | ``` 78 | 3. Generate input data: 79 | ``` 80 | ./dataGenerator $NumberOfTuple $NumberOfPersons $NumberOfAuctions 81 | ``` 82 | 83 | ### Run a query 84 | Queries are defined in the start.cpp. Thus, after chaining the query, you have to build the project again. 85 | To start query execute the following command: 86 | ```shell 87 | ./grizzly $parallelism $numberOfInputTuple $experimentDuration $inputFilePath 88 | 89 | For instance: 90 | ./grizzly 4 100000 60 ../data-generator/nexmark_test_data.bin 91 | ``` 92 | 93 | ### Inspect generated code 94 | Grizzly generates C++ code, which is stored under `$build_folder/jit-generated-code`. 95 | Furthermore, we can differentiate between three types of code artifacts. 96 | The last number in the file name indicates the execution stage. 97 | ``` 98 | 0 = DEFAULT, 99 | 1 = INSTRUMENTED, 100 | 2 = OPTIMIZED 101 | ``` 102 | -------------------------------------------------------------------------------- /include/jit/runtime/JitDispatcher.h: -------------------------------------------------------------------------------- 1 | #ifndef RUNTIME_DISPATCHER_H 2 | #define RUNTIME_DISPATCHER_H 3 | 4 | #include "runtime/input_types.h" 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | static inline int fast_atoi(const char *str) { 15 | int val = 0; 16 | while (*str) { 17 | val = (val << 4) - (val << 2) - (val << 1) + (*str++ - '0'); 18 | } 19 | return val; 20 | } 21 | 22 | class Dispatcher { 23 | public: 24 | Dispatcher(unsigned int runLength, unsigned int parallelism, unsigned int bufferSize, unsigned int runs, 25 | unsigned int tupleSize, std::string file, int numa) 26 | : runLength(runLength), parallelism(parallelism), bufferSize(bufferSize), runs(runs), tupleSize(tupleSize), 27 | numa(numa), file(file) { 28 | 29 | buffer = new void **[parallelism]; 30 | bufferRuns = bufferSize / runLength; 31 | std::cout << "bufferSize" << bufferSize << " runLength" << runLength << " run buffers " << bufferRuns << std::endl; 32 | std::string papi_conf_file = "papi_conf_global.cfg"; 33 | std::string config = "Branch_Preset"; 34 | } 35 | 36 | virtual void loadData() = 0; 37 | 38 | virtual void tick(long second) = 0; 39 | 40 | int seconds = 0; 41 | static void throughputLogger(Dispatcher *dispatcher) { 42 | 43 | std::this_thread::sleep_for(std::chrono::seconds(1)); 44 | 45 | auto lastOutput = std::chrono::system_clock::now(); 46 | std::ofstream file; 47 | file.open("throughput.csv", std::ios::out | std::ios::app); 48 | while (dispatcher->seconds < 500) { 49 | 50 | auto current = std::chrono::system_clock::now(); 51 | auto div = current - lastOutput; 52 | if (div >= std::chrono::seconds(1)) { 53 | auto processed = dispatcher->buffersProcessed.fetch_and_store(0); 54 | auto tuple = (processed * dispatcher->runLength); 55 | auto milliSec = std::chrono::duration_cast(div).count(); 56 | double thorughput = (((double)tuple) / (double)milliSec) * 1000; 57 | std::cout << "Buffer Processed: " << tuple << " in: " << milliSec << "ms" 58 | << " is " << thorughput << " tps" << std::endl; 59 | file << dispatcher->seconds << ";" << ((int64_t)thorughput) << "\n"; 60 | file.flush(); 61 | dispatcher->seconds++; 62 | dispatcher->tick(dispatcher->seconds); 63 | lastOutput = current; 64 | } 65 | 66 | std::this_thread::sleep_for(std::chrono::milliseconds(250)); 67 | } 68 | file.close(); 69 | } 70 | 71 | void *getWork(int thread_id, int run) { 72 | buffersProcessed++; 73 | void *t = buffer[thread_id][run]; 74 | return t; 75 | } 76 | 77 | bool hasWork() { 78 | // TODO: add if file is empty 79 | // TODO: add if file was read entirely 80 | return true; 81 | } 82 | 83 | void stop() { stopped = true; } 84 | 85 | void cleanup() { 86 | for (unsigned int i = 0; i < parallelism; i++) { 87 | // delete[] buffer[i]; 88 | } 89 | } 90 | 91 | unsigned int runLength; 92 | unsigned int bufferRuns; 93 | unsigned int tupleSize; 94 | unsigned int bufferSize; 95 | unsigned int parallelism; 96 | unsigned int numa; 97 | int *numa_relation; 98 | std::string file; 99 | tbb::atomic buffersProcessed = 0; 100 | 101 | protected: 102 | void readBinaryFileIntoBuffer(size_t thread_id) { 103 | std::ifstream ifp(path.c_str(), std::ios::in | std::ios::binary); 104 | assert(ifp.is_open()); 105 | ifp.read(reinterpret_cast(buffer[thread_id]), bufferSize * tupleSize); 106 | ifp.close(); 107 | } 108 | 109 | unsigned int runs; 110 | bool stopped; 111 | 112 | void ***buffer; 113 | std::string path; 114 | InputType type; 115 | }; 116 | 117 | #endif // RUNTIME_DISPATCHER_H 118 | -------------------------------------------------------------------------------- /include/jit/runtime/Profiling.h: -------------------------------------------------------------------------------- 1 | #ifndef GRIZZLY_PROFILING_H 2 | #define GRIZZLY_PROFILING_H 3 | 4 | #include "atomic" 5 | #include "string" 6 | #include "tbb/concurrent_unordered_map.h" 7 | #include "vector" 8 | #include // std::find 9 | #include 10 | 11 | class ProfilingHandler { 12 | public: 13 | ProfilingHandler(); 14 | }; 15 | 16 | class MinProfilingHandler : ProfilingHandler { 17 | public: 18 | MinProfilingHandler(); 19 | 20 | void update(int64_t i) { 21 | if (i < value) 22 | value = i; 23 | } 24 | 25 | int getValue() { return value; } 26 | 27 | private: 28 | std::atomic_int value; 29 | }; 30 | 31 | class DistributionProfilingHandler : ProfilingHandler { 32 | public: 33 | DistributionProfilingHandler() { 34 | k = 10; 35 | top = std::vector(k + 1); 36 | for (int i = 0; i < k + 1; i++) { 37 | this->top[i] = 0; 38 | } 39 | } 40 | 41 | std::vector top; 42 | tbb::concurrent_unordered_map freq; 43 | int k; 44 | 45 | void update(int value) { 46 | 47 | // increase the frequency 48 | freq[value]++; 49 | top[k] = value; 50 | 51 | auto it = std::find(top.begin(), top.end() - 1, value); 52 | for (int i = distance(top.begin(), it) - 1; i >= 0; --i) { 53 | // compare the frequency and swap if higher 54 | // frequency element is stored next to it 55 | if (freq[top[i]] < freq[top[i + 1]]) 56 | std::swap(top[i], top[i + 1]); 57 | 58 | // if frequency is same compare the elements 59 | // and swap if next element is high 60 | else if ((freq[top[i]] == freq[top[i + 1]]) && (top[i] > top[i + 1])) 61 | std::swap(top[i], top[i + 1]); 62 | else 63 | break; 64 | } 65 | } 66 | 67 | int getValue() { return value; } 68 | 69 | private: 70 | std::atomic_int value; 71 | }; 72 | 73 | class SelectivityHandler : ProfilingHandler { 74 | public: 75 | SelectivityHandler(unsigned long s) : ProfilingHandler() { 76 | this->values = new std::atomic_uint_fast64_t[s]; 77 | this->counter = 0; 78 | for (int i = 0; i < s; i++) { 79 | this->values[i] = 0; 80 | } 81 | }; 82 | 83 | void update(int64_t predicate, int64_t outcome) { this->values[predicate] += outcome; } 84 | 85 | int operator++() { return counter++; } 86 | 87 | std::atomic_uint_fast64_t *getValue() { return this->values; } 88 | 89 | std::atomic_uint_fast64_t counter; 90 | 91 | private: 92 | std::atomic_uint_fast64_t *values; 93 | }; 94 | 95 | class MaxProfilingHandler : ProfilingHandler { 96 | public: 97 | MaxProfilingHandler(); 98 | 99 | void update(int64_t i) { 100 | if (i > value) 101 | value = i; 102 | } 103 | 104 | int getValue() { return value; } 105 | 106 | private: 107 | std::atomic_int value; 108 | }; 109 | 110 | class ProfilingDataManager { 111 | public: 112 | ProfilingDataManager(); 113 | 114 | ProfilingHandler *getHandler(std::string handlerName) { return handlers[handlerName]; } 115 | 116 | MaxProfilingHandler *getMaxHandler(std::string name) { return (MaxProfilingHandler *)getHandler(name); } 117 | 118 | MinProfilingHandler *getMinHandler(std::string name) { return (MinProfilingHandler *)getHandler(name); } 119 | 120 | DistributionProfilingHandler *getDistributionProfilingHandler(std::string name) { 121 | return (DistributionProfilingHandler *)getHandler(name); 122 | } 123 | 124 | SelectivityHandler *getSelectivityHandler(std::string name) { return (SelectivityHandler *)getHandler(name); } 125 | 126 | void registerMinHandler(std::string name); 127 | 128 | void registerMaxHandler(std::string name); 129 | 130 | void registerSelectivityHandler(std::string name, unsigned long i); 131 | 132 | void registerDistributionHandler(std::string name); 133 | 134 | private: 135 | tbb::concurrent_unordered_map handlers; 136 | }; 137 | 138 | #endif // GRIZZLY_PROFILING_H 139 | -------------------------------------------------------------------------------- /include/jit/CodeCompiler.hpp: -------------------------------------------------------------------------------- 1 | #ifndef C_CODE_COMPILER_HPP 2 | #define C_CODE_COMPILER_HPP 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | namespace llvm { 9 | class LLVMContext; 10 | class ExecutionEngine; 11 | } // namespace llvm 12 | 13 | namespace clang { 14 | class CompilerInstance; 15 | } 16 | 17 | class PipelineStage; 18 | typedef std::shared_ptr PipelineStagePtr; 19 | 20 | class SharedLibrary; 21 | typedef std::shared_ptr SharedLibraryPtr; 22 | 23 | class SharedLibrary { 24 | public: 25 | ~SharedLibrary(); 26 | 27 | static SharedLibraryPtr load(const std::string &file_path); 28 | void *getSymbol(const std::string &mangeled_symbol_name) const; 29 | 30 | template Function getFunction(const std::string &mangeled_symbol_name) const { 31 | return reinterpret_cast(getSymbol(mangeled_symbol_name)); 32 | } 33 | 34 | private: 35 | SharedLibrary(void *shared_lib); 36 | void *shared_lib_; 37 | }; 38 | 39 | class CompiledCCode { 40 | public: 41 | virtual ~CompiledCCode() {} 42 | 43 | template Function getFunctionPointer(const std::string &name) { 44 | // INFO 45 | // http://www.trilithium.com/johan/2004/12/problem-with-dlsym/ 46 | // No real solution in 2016. 47 | static_assert(sizeof(void *) == sizeof(Function), "Void pointer to function pointer conversion will not work!" 48 | " If you encounter this, run!"); 49 | 50 | union converter { 51 | void *v_ptr; 52 | Function f_ptr; 53 | }; 54 | 55 | converter conv; 56 | conv.v_ptr = getFunctionPointerImpl(name); 57 | 58 | return conv.f_ptr; 59 | } 60 | 61 | double getCompileTimeInSeconds() const { return compile_time_in_ns_ / double(1e9); } 62 | 63 | protected: 64 | CompiledCCode(long compile_time) : compile_time_in_ns_(compile_time) {} 65 | 66 | virtual void *getFunctionPointerImpl(const std::string &name) = 0; 67 | 68 | private: 69 | long compile_time_in_ns_; 70 | }; 71 | 72 | typedef std::shared_ptr CompiledCCodePtr; 73 | 74 | class CCodeCompiler { 75 | public: 76 | CCodeCompiler(); 77 | 78 | CompiledCCodePtr compile(const std::string &source, const std::string name); 79 | 80 | private: 81 | void init(); 82 | void initCompilerArgs(); 83 | 84 | long createPrecompiledHeader(); 85 | bool rebuildPrecompiledHeader(); 86 | 87 | std::vector getPrecompiledHeaderCompilerArgs(); 88 | std::vector getCompilerArgs(); 89 | 90 | CompiledCCodePtr compileWithSystemCompiler(const std::string &source, const long pch_time, const std::string name); 91 | 92 | void callSystemCompiler(const std::vector &args); 93 | 94 | CompiledCCodePtr compileWithJITCompiler(const std::string &source, const long pch_time); 95 | 96 | void initLLVM(); 97 | 98 | void prepareClangCompiler(const std::string &source, const std::vector &args, 99 | clang::CompilerInstance &compiler); 100 | 101 | std::pair, std::shared_ptr> 102 | createLLVMContextAndEngine(clang::CompilerInstance &compiler); 103 | 104 | std::vector convertStringToCharPtrVec(const std::vector &data); 105 | 106 | void handleDebugging(const std::string &source); 107 | 108 | bool use_clang_jit_ = false; 109 | bool show_generated_code_ = false; 110 | bool debug_code_generator_ = false; 111 | bool keep_last_generated_query_code_ = false; 112 | std::vector compiler_args_; 113 | 114 | const static std::string IncludePath; 115 | const static std::string MinimalApiHeaderPath; 116 | std::string PrecompiledHeaderName; 117 | }; 118 | 119 | void exportSourceToFile(const std::string &filename, const std::string &source); 120 | void pretty_print_code(const std::string &source); 121 | 122 | #endif // C_CODE_COMPILER_HPP 123 | -------------------------------------------------------------------------------- /include/api/Mapper.h: -------------------------------------------------------------------------------- 1 | #ifndef API_MAPPER_H 2 | #define API_MAPPER_H 3 | 4 | #include "api/Field.h" 5 | #include "operator/Operator.h" 6 | 7 | class Mapper { 8 | public: 9 | // string value 10 | Mapper(std::string fieldId, std::string value, std::string outputField) 11 | : fieldId(fieldId), value("\"" + value + "\""), outputField(outputField) {} 12 | Mapper(std::string fieldId, std::string value) : Mapper(fieldId, value, fieldId) {} 13 | // long value 14 | Mapper(std::string fieldId, long value, std::string outputField) 15 | : fieldId(fieldId), value(std::to_string(value)), outputField(outputField) {} 16 | Mapper(std::string fieldId, long value) : Mapper(fieldId, value, fieldId) {} 17 | // field value 18 | Mapper(std::string fieldId, Field &value, std::string outputField) 19 | : fieldId(fieldId), value("record." + value.name), outputField(outputField) {} 20 | Mapper(std::string fieldId, Field &value) : Mapper(fieldId, value, fieldId) {} 21 | 22 | std::string to_string() { return "Map Field: " + fieldId + " on " + value + " with " + outputField; } 23 | 24 | virtual void produce(CodeGenerator &cg, Operator *input) { 25 | pipeline = cg.currentPipeline(); 26 | input->produce(cg); 27 | }; 28 | virtual void consume(CodeGenerator &cg, Operator *parent) = 0; 29 | size_t pipeline; 30 | 31 | protected: 32 | std::string fieldId; 33 | std::string value; 34 | std::string outputField; 35 | }; 36 | 37 | // adds a value to a field or adds two fields 38 | class Add : public Mapper { 39 | public: 40 | Add(std::string fieldId, std::string value) : Mapper(fieldId, value) {} 41 | Add(std::string fieldId, std::string value, std::string outputField) : Mapper(fieldId, value, outputField) {} 42 | Add(std::string fieldId, long value) : Mapper(fieldId, value) {} 43 | Add(std::string fieldId, long value, std::string outputField) : Mapper(fieldId, value, outputField) {} 44 | Add(std::string fieldId, Field &value) : Mapper(fieldId, value) {} 45 | Add(std::string fieldId, Field &value, std::string outputField) : Mapper(fieldId, value, outputField) {} 46 | 47 | void consume(CodeGenerator &cg, Operator *parent) {} 48 | }; 49 | 50 | // subtract a value from a field or subtract one field from another 51 | class Subtract : public Mapper { 52 | public: 53 | Subtract(std::string fieldId, std::string value) : Mapper(fieldId, value) {} 54 | Subtract(std::string fieldId, std::string value, std::string outputField) : Mapper(fieldId, value, outputField) {} 55 | Subtract(std::string fieldId, long value) : Mapper(fieldId, value) {} 56 | Subtract(std::string fieldId, long value, std::string outputField) : Mapper(fieldId, value, outputField) {} 57 | Subtract(std::string fieldId, Field &value) : Mapper(fieldId, value) {} 58 | Subtract(std::string fieldId, Field &value, std::string outputField) : Mapper(fieldId, value, outputField) {} 59 | 60 | void consume(CodeGenerator &cg, Operator *parent) {} 61 | }; 62 | 63 | // divides two fields or divides a field by a value 64 | class Divide : public Mapper { 65 | public: 66 | Divide(std::string fieldId, std::string value) : Mapper(fieldId, value) {} 67 | Divide(std::string fieldId, std::string value, std::string outputField) : Mapper(fieldId, value, outputField) {} 68 | Divide(std::string fieldId, long value) : Mapper(fieldId, value) {} 69 | Divide(std::string fieldId, long value, std::string outputField) : Mapper(fieldId, value, outputField) {} 70 | Divide(std::string fieldId, Field &value) : Mapper(fieldId, value) {} 71 | Divide(std::string fieldId, Field &value, std::string outputField) : Mapper(fieldId, value, outputField) {} 72 | 73 | void consume(CodeGenerator &cg, Operator *parent) {} 74 | }; 75 | 76 | // concatinates two (string) fields 77 | class Concat : public Mapper { 78 | public: 79 | Concat(std::string fieldId, std::string value) : Mapper(fieldId, value) {} 80 | Concat(std::string fieldId, std::string value, std::string outputField) : Mapper(fieldId, value, outputField) {} 81 | Concat(std::string fieldId, Field &value) : Mapper(fieldId, value) {} 82 | Concat(std::string fieldId, Field &value, std::string outputField) : Mapper(fieldId, value, outputField) {} 83 | 84 | void consume(CodeGenerator &cg, Operator *parent) {} 85 | }; 86 | 87 | #endif // API_MAPPER_H 88 | -------------------------------------------------------------------------------- /include/api/Assigner.h: -------------------------------------------------------------------------------- 1 | #ifndef API_ASSIGNER_H 2 | #define API_ASSIGNER_H 3 | 4 | #include 5 | 6 | #include "api/Time.h" 7 | #include "code_generation/CodeGenerator.h" 8 | 9 | class Assigner { 10 | 11 | public: 12 | virtual void produce(CodeGenerator &cg) = 0; 13 | virtual void consume(CodeGenerator &cg) = 0; 14 | virtual std::string to_string() { return "Assigner"; }; 15 | 16 | protected: 17 | size_t pipeline; 18 | }; 19 | 20 | class TumblingProcessingTimeAssigner : public Assigner { 21 | public: 22 | TumblingProcessingTimeAssigner(Time size) : size(size) {} 23 | void produce(CodeGenerator &cg) override; 24 | void consume(CodeGenerator &cg) override; 25 | std::string to_string() override { return "TumblingProcessingTimeAssigner"; } 26 | 27 | private: 28 | Time size; 29 | }; 30 | 31 | class SlidingProcessingTimeAssigner : public Assigner { 32 | public: 33 | SlidingProcessingTimeAssigner(Time size, Time slide) : size(size), slide(slide) { 34 | if (size.time % slide.time != 0) 35 | throw std::invalid_argument("Invalid window: size of window must be a multiple of slide"); 36 | numWindows = (size.time / slide.time) * 2; 37 | } 38 | void produce(CodeGenerator &cg) override; 39 | void consume(CodeGenerator &cg) override; 40 | std::string to_string() override { return "SlidingProcessingTimeAssigner"; } 41 | 42 | private: 43 | Time size; 44 | Time slide; 45 | size_t numWindows; 46 | }; 47 | 48 | class SessionProcessingTimeAssigner : public Assigner { 49 | public: 50 | SessionProcessingTimeAssigner(Time timeout) : timeout(timeout) {} 51 | void produce(CodeGenerator &cg) override; 52 | void consume(CodeGenerator &cg) override; 53 | std::string to_string() override { return "SessionProcessingTimeAssigner"; } 54 | 55 | private: 56 | Time timeout; 57 | }; 58 | 59 | class TumblingEventTimeAssigner : public Assigner { 60 | public: 61 | TumblingEventTimeAssigner(Time size, std::string tsFieldId, Time al) 62 | : size(size), timestampFieldId(tsFieldId), allowedLateness(al) { 63 | if (allowedLateness.time % size.time != 0) 64 | throw std::invalid_argument("Invalid window: allowed lateness must be a multiple of size"); 65 | numWindows = (allowedLateness.time / size.time) + 2; 66 | } 67 | void produce(CodeGenerator &cg) override; 68 | void consume(CodeGenerator &cg) override; 69 | std::string to_string() override { return "TumblingEventTimeAssigner"; } 70 | 71 | private: 72 | Time size; 73 | std::string timestampFieldId; 74 | Time allowedLateness; 75 | size_t numWindows; 76 | }; 77 | 78 | class SlidingEventTimeAssigner : public Assigner { 79 | public: 80 | SlidingEventTimeAssigner(Time size, Time slide, std::string tsFieldId, Time al) 81 | : size(size), slide(slide), timestampFieldId(tsFieldId), allowedLateness(al) { 82 | if (size.time % slide.time != 0) 83 | throw std::invalid_argument("Invalid window: size of window must be a multiple of slide"); 84 | numWindows = (allowedLateness.time / slide.time) + 2; 85 | } 86 | void produce(CodeGenerator &cg) override; 87 | void consume(CodeGenerator &cg) override; 88 | std::string to_string() override { return "SlidingEventTimeAssigner"; } 89 | 90 | private: 91 | Time size; 92 | Time slide; 93 | std::string timestampFieldId; 94 | Time allowedLateness; 95 | size_t numWindows; 96 | }; 97 | 98 | class SessionEventTimeAssigner : public Assigner { 99 | public: 100 | SessionEventTimeAssigner(Time timeout, std::string tsFieldId, Time al) 101 | : timeout(timeout), timestampFieldId(tsFieldId), allowedLateness(al) { 102 | if (allowedLateness.time <= timeout.time) { 103 | numWindows = 2; 104 | } else { 105 | if (allowedLateness.time % timeout.time != 0) 106 | throw std::invalid_argument("Invalid window: allowed lateness must be a multiple of timeout"); 107 | numWindows = (allowedLateness.time / timeout.time) + 2; 108 | } 109 | } 110 | std::string to_string() override { return "SessionEventTimeAssigner"; } 111 | void produce(CodeGenerator &cg) override; 112 | void consume(CodeGenerator &cg) override; 113 | 114 | private: 115 | Time timeout; 116 | std::string timestampFieldId; 117 | Time allowedLateness; 118 | size_t numWindows; 119 | }; 120 | 121 | #endif // API_ASSIGNER_H 122 | -------------------------------------------------------------------------------- /src/jit/JITCodeGenerator.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include "jit/JITCodeGenerator.h" 10 | 11 | JITCodeGenerator::JITCodeGenerator(Config &config, Schema &schema, ProfilingDataManager *profilingDataManager, 12 | CompileMode compileMode) 13 | : CodeGenerator(config, schema, compileMode) { 14 | this->profilingDataManager = profilingDataManager; 15 | open = CMethod::builder() 16 | .returns("void") 17 | .addParameter("GlobalState * g") 18 | .addParameter("Dispatcher * d") 19 | .addParameter("Variant * v") 20 | .withName("open"); 21 | 22 | init = CMethod::builder() 23 | .returns("void") 24 | .addParameter("GlobalState * g") 25 | .addParameter("Dispatcher * d") 26 | .withName("init"); 27 | 28 | execute = 29 | CMethod::builder().returns("void").addParameter("int threadID").addParameter("int numaNode").withName("execute"); 30 | 31 | migrateFrom = CMethod::builder().returns("void").addParameter("void ** inputStates").withName("migrateFrom"); 32 | 33 | migrateTo = CMethod::builder().returns("void").addParameter("void ** outputStates").withName("migrateTo"); 34 | 35 | getState = CMethod::builder().returns("void **").addParameter("").withName("getState"); 36 | 37 | open.addInstruction(CMethod::Instruction(INSTRUCTION_SYSTEM, "globalState = g;\n")); 38 | open.addInstruction(CMethod::Instruction(INSTRUCTION_SYSTEM, "dispatcher = d;\n")); 39 | open.addInstruction(CMethod::Instruction(INSTRUCTION_SYSTEM, "variant = v;\n")); 40 | getState.addInstruction( 41 | CMethod::Instruction(INSTRUCTION_SYSTEM, "void** statePtr = (void**)malloc(sizeof(void*)*2);\n")); 42 | } 43 | 44 | void JITCodeGenerator::generateStructFile(std::string path) { 45 | // Generate data_types.h file 46 | CFile::Builder dataTypesBuilder = CFile::builder().withName("data_types.h").include("tbb/atomic.h"); 47 | 48 | // add schema structs 49 | size_t i = 0; 50 | for (auto &context : queryContexts) { 51 | if (context.stateStrategy == QueryContext::SHARED) { 52 | generateStruct(context.schema, "record", i, context.isAggregation); 53 | } else { 54 | generateStruct(context.schema, "record", i, false); 55 | } 56 | i++; 57 | } 58 | 59 | for (auto code : this->schemaStructs) { 60 | dataTypesBuilder.addCode(code); 61 | } 62 | 63 | // Generate data_types.h file 64 | CFile dataTypes = dataTypesBuilder.build(); 65 | writeToFile(dataTypes); 66 | } 67 | 68 | CFile JITCodeGenerator::generate(std::string type, std::string path) { 69 | 70 | // Pipeline Permutation: permute longest pipeline if needed 71 | if (config.getPipelinePermutation() != 0) { 72 | CMethod::PipelineEnumerator enumerator = CMethod::PipelineEnumerator(pipelines[longestPipeline()]); 73 | enumerator.getPermutation(pipelines[longestPipeline()], config.getPipelinePermutation()); 74 | } 75 | 76 | // Generate pipelines 77 | auto i = 0; 78 | for (auto &pipeline : pipelines) { 79 | CMethod pipelineMethod = pipeline.withName("pipeline" + std::to_string(i)) 80 | .addParameter("int thread_id") 81 | .addParameter("int numa_node") 82 | .returns("void") 83 | .build(); 84 | file.addMethod(pipelineMethod); 85 | i++; 86 | } 87 | 88 | std::stringstream executeFunction; 89 | auto startPipeline = this->pipelines.size() - 1; 90 | executeFunction << "while(dispatcher->hasWork() && variant->isValid()){\n" 91 | " void *records = dispatcher->getWork(threadID, 0);\n" 92 | 93 | " pipeline" 94 | << startPipeline << "((record0 *) records, dispatcher->runLength, threadID, numaNode);\n"; 95 | if (this->compileMode == CM_OPTIMIZE) 96 | executeFunction << " variant->runtime->monitor(threadID);\n"; 97 | 98 | executeFunction << "}\n"; 99 | 100 | execute.addInstruction(CMethod::Instruction(INSTRUCTION_SYSTEM, executeFunction.str())); 101 | 102 | auto b = open.build(); 103 | file.addMethod(b); 104 | 105 | auto in = init.build(); 106 | file.addMethod(in); 107 | 108 | auto closeMethod = CMethod::builder().returns("void").withName("close"); 109 | 110 | auto c = closeMethod.build(); 111 | file.addMethod(c); 112 | 113 | auto m = migrateFrom.build(); 114 | file.addMethod(m); 115 | 116 | auto t = migrateTo.build(); 117 | file.addMethod(t); 118 | 119 | auto e = execute.build(); 120 | file.addMethod(e); 121 | 122 | getState.addInstruction(CMethod::Instruction(INSTRUCTION_SYSTEM, "return statePtr;\n")); 123 | auto g = getState.build(); 124 | file.addMethod(g); 125 | 126 | CFile queryFile = file.withName("query.cpp") 127 | .include("data_types.h") 128 | .include("runtime/JitDispatcher.h") 129 | .include("runtime/jit_global_state.hpp") 130 | .include("runtime/JitRuntime.h") 131 | .include("runtime/Variant.hpp") 132 | .include("tbb/atomic.h") 133 | .addStatement("GlobalState * globalState;") 134 | .addStatement("Variant * variant;") 135 | .build(); 136 | return queryFile; 137 | } -------------------------------------------------------------------------------- /src/api/Trigger.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "api/Trigger.h" 6 | 7 | void CountTrigger::onBeforeElement(CodeGenerator &cg, size_t pipeline) { 8 | 9 | std::stringstream statements; 10 | std::string maxCountS = std::to_string(maxCount); 11 | std::string numWindows = std::to_string(cg.ctx(pipeline).numWindows); 12 | 13 | // trigger meta-data (count per window) 14 | std::string key = ""; 15 | if (cg.ctx(pipeline).hasKeyBy) { 16 | if (cg.ctx(pipeline).maxKeyValue != -1) { 17 | cg.file.addStatement("tbb::concurrent_unordered_map> meta[" + numWindows + "];"); 18 | // cg.file.addStatement("tbb::concurrent_unordered_map> triggerCount;"); 19 | key = "[record." + cg.ctx(pipeline).keyBy->name + "]"; 20 | } else { 21 | cg.file.addStatement("tbb::concurrent_unordered_map> meta[" + numWindows + "];"); 22 | // cg.file.addStatement("tbb::concurrent_unordered_map> triggerCount;"); 23 | key = "[record." + cg.ctx(pipeline).keyBy->name + "]"; 24 | } 25 | } else { 26 | cg.file.addStatement("tbb::atomic meta[" + numWindows + "];"); 27 | cg.file.addStatement("tbb::atomic triggerCount;"); 28 | } 29 | 30 | // trigger condition 31 | statements << "size_t count = meta[window]" + key + ".fetch_and_increment();" << std::endl; 32 | statements << "if(count == " + maxCountS + ") {" << std::endl; 33 | // statements << "triggerCount" + key + "++;" << std::endl; 34 | statements << "pipeline" + std::to_string(pipeline - 1) + "(state[window]" + key + ");" << std::endl; 35 | 36 | if (this->purge) { 37 | if (cg.ctx(pipeline).hasGroupBy && !cg.ctx(pipeline).hasKeyBy) { 38 | statements << "state[window].clear();" << std::endl; 39 | } else { 40 | statements << "state[window]" + key + " = {};" << std::endl; 41 | } 42 | } 43 | statements << "meta[window]" + key + " = 0;" << std::endl; 44 | statements << "}" << std::endl; 45 | 46 | // re-run loop, if window triggered 47 | statements << "if(count >= " + maxCountS + ") {" << std::endl; 48 | statements << "i--; continue;" << std::endl; 49 | statements << "}" << std::endl; 50 | 51 | cg.pipeline(pipeline).addInstruction(CMethod::Instruction(INSTRUCTION_TRIGGER, statements.str())); 52 | } 53 | 54 | void ProcessingTimeTrigger::onBeforeAssign(CodeGenerator &cg, size_t pipeline) { 55 | 56 | std::stringstream statements; 57 | std::stringstream statements_main; 58 | std::string interval = std::to_string(every.time); 59 | std::string numWindows = std::to_string(cg.ctx(pipeline).numWindows); 60 | 61 | // trigger meta-data (timestamp, when each window needs to be triggered) 62 | std::string key = ""; 63 | 64 | statements << "int64_t ts = time(NULL);\n" << std::endl; 65 | statements << "if (ts >= thread_local_state->windowEnds[thread_local_state->current_window]) {" 66 | << "size_t old_window = thread_local_state->current_window;\n" 67 | << "// change the window state of this thread -> so from now on it will put tuple to the next window \n" 68 | << "thread_local_state->windowEnds[old_window] += (window_size" << pipeline << " * window_buffers" 69 | << pipeline << ");\n" 70 | << "thread_local_state->current_window = (old_window + 1) % window_buffers" << pipeline << ";\n" 71 | << "int64_t oldCount = window_state->global_tigger_counter.fetch_and_increment();\n"; 72 | 73 | statements << "if (oldCount == dispatcher->parallelism-1) {" 74 | << "window_state->global_tigger_counter = 0;\n"; 75 | cg.ctx(pipeline); 76 | 77 | if (cg.ctx(pipeline).hasGroupBy) { 78 | if (cg.config.getNuma()) { 79 | statements << "//merge local states \n"; 80 | statements << " for (int b = 0; b < " << cg.ctx(pipeline).maxKeyValue 81 | << "; b++) {\n" 82 | " state" 83 | << (pipeline) << "[old_window][b].count += state" << (pipeline) 84 | << "[old_window + 2][b].count;\n" 85 | "}"; 86 | } 87 | 88 | if (cg.ctx(pipeline).maxKeyValue != -1) { 89 | 90 | statements << "pipeline" << (pipeline - 1) << "(old_window,thread_id, numa_node);\n"; 91 | statements << "record" << (pipeline) << " t;"; 92 | statements << "std::fill(state" << pipeline << "[old_window]," 93 | << "state" << pipeline << "[old_window] + " << cg.ctx(pipeline).maxKeyValue << ", t);"; 94 | } else { 95 | statements << "pipeline" << (pipeline - 1) << "(state" << pipeline << "[old_window],thread_id, numa_node);\n"; 96 | 97 | statements << "state" << pipeline << "[old_window].clear();\n"; 98 | } 99 | } else { 100 | statements << "pipeline" << (pipeline - 1) << "(state" << (pipeline) << "[old_window],thread_id, numa_node);\n"; 101 | statements << "state" << pipeline << "[old_window] = {};\n"; 102 | } 103 | 104 | statements << "}}"; 105 | 106 | cg.main.addInstruction(CMethod::Instruction(INSTRUCTION_TRIGGER, statements_main.str())); 107 | cg.pipeline(pipeline).addInstruction(CMethod::Instruction(INSTRUCTION_TRIGGER, statements.str())); 108 | } 109 | 110 | void PurgingTrigger::onBeforeElement(CodeGenerator &cg, size_t pipeline) { 111 | trigger->purge = true; 112 | trigger->onBeforeElement(cg, pipeline); 113 | } 114 | 115 | void PurgingTrigger::onBeforeAssign(CodeGenerator &cg, size_t pipeline) { 116 | trigger->purge = true; 117 | trigger->onBeforeAssign(cg, pipeline); 118 | } 119 | -------------------------------------------------------------------------------- /src/api/Predicate.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "api/Predicate.h" 6 | 7 | void Predicate::produce(CodeGenerator &cg, Operator *input) { 8 | // get current pipeline id and save it for the consume-function 9 | pipeline = cg.currentPipeline(); 10 | 11 | // get the field to the field id 12 | if (pipeline == 0) 13 | field = &cg.ctx(pipeline).schema.get(fieldId); 14 | else 15 | field = &cg.ctx(pipeline - 1).schema.get(fieldId); 16 | // call produce for all and/or predicates 17 | for (Predicate *a : ands) { 18 | a->produce(cg, NULL); 19 | } 20 | for (Predicate *o : ors) { 21 | o->produce(cg, NULL); 22 | } 23 | 24 | if (input) 25 | input->produce(cg); 26 | } 27 | 28 | void Predicate::consume(CodeGenerator &cg, Operator *parent) { 29 | cg.file.include("string.h"); 30 | 31 | std::stringstream front_statements; 32 | 33 | if (cg.config.filterOpt() && cg.compileMode == CM_INSTRUMENT) { 34 | auto code = generatProfilingCode(cg); 35 | cg.pipeline(pipeline).addInstruction(CMethod::Instruction(INSTRUCTION_FILTER, code)); 36 | } 37 | 38 | if (cg.config.filterOpt() && cg.compileMode == CM_OPTIMIZE) { 39 | front_statements << "if(" << generateAllOptimized(cg) << ") {" << std::endl; 40 | } else { 41 | front_statements << "if(" << generateAll(cg) << ") {" << std::endl; 42 | } 43 | cg.pipeline(pipeline).addInstruction(CMethod::Instruction(INSTRUCTION_FILTER, front_statements.str())); 44 | 45 | if (parent != nullptr) { 46 | parent->consume(cg); 47 | } 48 | 49 | std::stringstream back_statements; 50 | back_statements << "}" << std::endl; 51 | cg.pipeline(pipeline).addInstruction(CMethod::Instruction(INSTRUCTION_CLOSE, back_statements.str())); 52 | } 53 | 54 | std::string Equal::generate(CodeGenerator &c) { 55 | std::string fieldId = "record." + field->name; 56 | 57 | switch (field->dataType.t_) { 58 | case DataType::String: 59 | case DataType::Char: { 60 | if (c.compileMode == CM_OPTIMIZE) { 61 | c.file.addStatement("static const char viewChar[] = " + value + ";"); 62 | return "memcmp(" + fieldId + ", viewChar, 4) == 0"; 63 | } else { 64 | return "strcmp(" + fieldId + ", " + value + ") == 0"; 65 | } 66 | } 67 | case DataType::Boolean: 68 | case DataType::Int: 69 | case DataType::Long: 70 | case DataType::Double: 71 | return fieldId + " == " + value; 72 | default: 73 | throw std::invalid_argument("unsupported data type"); 74 | } 75 | } 76 | 77 | std::string Equal::to_string() { return fieldId + " = " + value; } 78 | 79 | std::string NotEqual::generate(CodeGenerator &c) { 80 | std::string fieldId = "record." + field->name; 81 | 82 | switch (field->dataType.t_) { 83 | case DataType::String: 84 | case DataType::Char: 85 | return "strcmp(" + fieldId + ", " + value + ") != 0"; 86 | case DataType::Boolean: 87 | case DataType::Int: 88 | case DataType::Long: 89 | case DataType::Double: 90 | return fieldId + " != " + value; 91 | default: 92 | throw std::invalid_argument("unsupported data type"); 93 | } 94 | } 95 | 96 | std::string NotEqual::to_string() { return fieldId + " != " + value; } 97 | 98 | std::string Greater::generate(CodeGenerator &c) { 99 | std::string fieldId = "record." + field->name; 100 | 101 | switch (field->dataType.t_) { 102 | case DataType::String: 103 | case DataType::Char: 104 | return "strcmp(" + fieldId + ", " + value + ") > 0"; 105 | case DataType::Boolean: 106 | case DataType::Int: 107 | case DataType::Long: 108 | case DataType::Double: 109 | return fieldId + " > " + value; 110 | default: 111 | throw std::invalid_argument("unsupported data type"); 112 | } 113 | } 114 | 115 | std::string Greater::to_string() { return fieldId + " > " + value; } 116 | 117 | std::string GreaterEqual::generate(CodeGenerator &c) { 118 | std::string fieldId = "record." + field->name; 119 | 120 | switch (field->dataType.t_) { 121 | case DataType::String: 122 | case DataType::Char: 123 | return "strcmp(" + fieldId + ", " + value + ") >= 0"; 124 | case DataType::Boolean: 125 | case DataType::Int: 126 | case DataType::Long: 127 | case DataType::Double: 128 | return fieldId + " >= " + value; 129 | default: 130 | throw std::invalid_argument("unsupported data type"); 131 | } 132 | } 133 | 134 | std::string GreaterEqual::to_string() { return fieldId + " >= " + value; } 135 | 136 | std::string Less::generate(CodeGenerator &c) { 137 | std::string fieldId = "record." + field->name; 138 | 139 | switch (field->dataType.t_) { 140 | case DataType::String: 141 | case DataType::Char: 142 | return "strcmp(" + fieldId + ", " + value + ") < 0"; 143 | case DataType::Boolean: 144 | case DataType::Int: 145 | case DataType::Long: 146 | case DataType::Double: 147 | return fieldId + " < " + value; 148 | default: 149 | throw std::invalid_argument("unsupported data type"); 150 | } 151 | } 152 | 153 | std::string Less::to_string() { return fieldId + " < " + value; } 154 | 155 | std::string LessEqual::generate(CodeGenerator &c) { 156 | std::string fieldId = "record." + field->name; 157 | 158 | switch (field->dataType.t_) { 159 | case DataType::String: 160 | case DataType::Char: 161 | return "strcmp(" + fieldId + ", " + value + ") <= 0"; 162 | case DataType::Boolean: 163 | case DataType::Int: 164 | case DataType::Long: 165 | case DataType::Double: 166 | return fieldId + " <= " + value; 167 | default: 168 | throw std::invalid_argument("unsupported data type"); 169 | } 170 | } 171 | 172 | std::string LessEqual::to_string() { return fieldId + " < " + value; } 173 | 174 | std::string Like::generate(CodeGenerator &c) { 175 | std::string fieldId = "record." + field->name; 176 | 177 | switch (field->dataType.t_) { 178 | case DataType::String: 179 | case DataType::Char: 180 | return "strstr(" + fieldId + ", " + value + ") != NULL"; 181 | default: 182 | throw std::invalid_argument("unsupported data type"); 183 | } 184 | } 185 | 186 | std::string Like::to_string() { return fieldId + " like " + value; } 187 | -------------------------------------------------------------------------------- /src/code_generation/CMethod.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include "code_generation/CMethod.h" 8 | 9 | /* 10 | * CMethod Definition 11 | */ 12 | CMethod::CMethod(CMethod::Builder &builder) { 13 | std::stringstream method; 14 | 15 | // build method signature 16 | method << std::endl << builder.returnType << " " << builder.name << "("; 17 | for (size_t i = 0; i < builder.parameters.size(); ++i) { 18 | if (i != 0) 19 | method << ","; 20 | method << builder.parameters[i]; 21 | } 22 | method << ") {" << std::endl; 23 | 24 | // build statements 25 | std::stringstream statements; 26 | std::stringstream init_statements; 27 | std::stringstream final_statements; 28 | 29 | for (auto const &instruction : builder.instructions) { 30 | statements << instruction.statements; 31 | init_statements << instruction.init_statements; 32 | final_statements << instruction.final_statements; 33 | } 34 | method << init_statements.str() << statements.str() << final_statements.str() << "}" << std::endl; 35 | output = method.str(); 36 | } 37 | 38 | CMethod::Builder CMethod::builder() { return CMethod::Builder(); } 39 | 40 | /* 41 | * CMethod Instruction Definition 42 | */ 43 | CMethod::Instruction::Instruction(INSTRUCTION type, std::string init, std::string statements, std::string final) 44 | : type(type), init_statements(init), statements(statements), final_statements(final) {} 45 | CMethod::Instruction::Instruction(INSTRUCTION type, std::string statements, std::string final) 46 | : type(type), init_statements(std::string()), statements(statements), final_statements(final){}; 47 | CMethod::Instruction::Instruction(INSTRUCTION type, std::string statements) 48 | : type(type), init_statements(std::string()), statements(statements), final_statements(std::string()){}; 49 | 50 | const std::string CMethod::Instruction::to_string() const { 51 | switch (type) { 52 | case INSTRUCTION_FILTER: 53 | return "FILTER[" + statements.substr(3, statements.size() - 6) + "]\n"; 54 | case INSTRUCTION_GROUPBY: 55 | return "GROUPBY"; 56 | case INSTRUCTION_ORDERBY: 57 | return "ORDERBY"; 58 | case INSTRUCTION_AGGREGATE: 59 | return "AGGREGATE"; 60 | case INSTRUCTION_JOIN_BUILD: 61 | return "JOIN_BUILD"; 62 | case INSTRUCTION_JOIN_PROBE: 63 | return "JOIN_PROBE"; 64 | case INSTRUCTION_READ: 65 | return "READ"; 66 | case INSTRUCTION_WRITE: 67 | return "WRITE"; 68 | case INSTRUCTION_PRINT: 69 | return "PRINT"; 70 | case INSTRUCTION_TRIGGER: 71 | return "TRIGGER"; 72 | case INSTRUCTION_ASSIGNER: 73 | return "ASSIGNER"; 74 | case INSTRUCTION_CLOSE: 75 | return "CLOSE"; 76 | default: 77 | return "UNKNOWN_INSTRUCTION"; 78 | } 79 | } 80 | 81 | /* 82 | * CMethod Builder Definition 83 | */ 84 | CMethod::Builder::Builder() {} 85 | 86 | CMethod::Builder &CMethod::Builder::withName(const std::string &name_) { 87 | name = name_; 88 | return *this; 89 | } 90 | 91 | CMethod::Builder &CMethod::Builder::returns(const std::string &returnType_) { 92 | returnType = returnType_; 93 | return *this; 94 | } 95 | 96 | CMethod::Builder &CMethod::Builder::addParameter(const std::string ¶meter) { 97 | parameters.push_back(parameter); 98 | return *this; 99 | } 100 | 101 | CMethod::Builder &CMethod::Builder::prependInstruction(const Instruction &instruction) { 102 | instructions.insert(instructions.begin(), instruction); 103 | return *this; 104 | } 105 | 106 | CMethod::Builder &CMethod::Builder::addInstruction(const Instruction &instruction) { 107 | instructions.push_back(instruction); 108 | return *this; 109 | } 110 | 111 | CMethod CMethod::Builder::build() { return CMethod(*this); } 112 | 113 | /* 114 | * CMethod PlanEnumerator Definition 115 | */ 116 | CMethod::PipelineEnumerator::PipelineEnumerator(std::vector pipeline_instructions) { 117 | 118 | /* Count permutable instructions and copy them into vector. */ 119 | unsigned int i = 0; 120 | for (auto const &instruction : pipeline_instructions) { 121 | /* Currently only filter instructions are supported. */ 122 | if (instruction.type == INSTRUCTION_FILTER) { 123 | permutable_positions.push_back(i); 124 | current_permutation.push_back(instruction); 125 | } 126 | i++; 127 | } 128 | 129 | /* Managing starting point of permutation enumeration. */ 130 | number_of_instructions = current_permutation.size(); 131 | number_of_permutations = factorial(current_permutation.size()); 132 | number_of_current_permutation = 0; 133 | } 134 | 135 | CMethod::PipelineEnumerator::PipelineEnumerator(CMethod::Builder &builder) 136 | : CMethod::PipelineEnumerator(builder.instructions) {} 137 | 138 | void CMethod::PipelineEnumerator::getPermutation(CMethod::Builder &builder, unsigned int number_of_permutation) { 139 | 140 | assert(number_of_permutation < number_of_permutations); 141 | 142 | /* Permute the order of statements till it meets the given number. */ 143 | while (number_of_current_permutation != number_of_permutation) { 144 | getNext(); 145 | } 146 | 147 | /* Push permuted instructions into corresponding positions of instruction vector. */ 148 | for (unsigned int i = 0; i != permutable_positions.size(); ++i) { 149 | const size_t position = permutable_positions[i]; 150 | builder.instructions[position] = current_permutation[i]; 151 | } 152 | } 153 | 154 | void CMethod::PipelineEnumerator::printPermutations() { 155 | std::cout << "Pipeline consists of " << number_of_instructions << " permutable instructions." << std::endl; 156 | std::cout << "There are " << number_of_permutations << " permuations of this pipeline." << std::endl; 157 | 158 | /* Print permutation. */ 159 | for (unsigned int i = 0; i != number_of_permutations; i++) { 160 | std::cout << "Permutation " << i << ":" << std::endl; 161 | for (auto const &instruction : current_permutation) { 162 | std::cout << "\t" << instruction.to_string(); 163 | } 164 | getNext(); 165 | } 166 | std::cout << std::endl; 167 | } 168 | 169 | void CMethod::PipelineEnumerator::getNext() { 170 | std::next_permutation(current_permutation.begin(), current_permutation.end()); 171 | number_of_current_permutation = (number_of_current_permutation + 1) % number_of_permutations; 172 | } 173 | 174 | unsigned int CMethod::PipelineEnumerator::factorial(unsigned int n) { 175 | return (n == 1 || n == 0) ? 1 : factorial(n - 1) * n; 176 | } 177 | -------------------------------------------------------------------------------- /src/api/Query.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | #include "api/Query.h" 16 | #include "code_generation/CodeGenerator.h" 17 | #include "jit/CodeCompiler.hpp" 18 | #include "jit/JITExecutionRuntime.h" 19 | #include "operator/AggregateOperator.h" 20 | #include "operator/FilterOperator.h" 21 | #include "operator/GroupByOperator.h" 22 | #include "operator/InputOperator.h" 23 | #include "operator/KeyOperator.h" 24 | #include "operator/MapOperator.h" 25 | #include "operator/PrintOperator.h" 26 | #include "operator/ReadOperator.h" 27 | #include "operator/ReadWindowOperator.h" 28 | #include "operator/SelectOperator.h" 29 | #include "operator/WindowOperator.h" 30 | #include "operator/WriteOperator.h" 31 | #include "tbb/concurrent_unordered_map.h" 32 | 33 | Query::Query(Config &config, Schema &schema) : schema(schema), config(config) { current = NULL; } 34 | 35 | Query::~Query() {} 36 | 37 | Query Query::generate(Config &config, Schema &schema, std::string path) { 38 | Query *q = new Query(config, schema); 39 | config.setSourceFile(path); 40 | q->current = new InputOperator(BinaryFile, path, new ReadOperator(schema)); 41 | // q->current = new InputOperator(type, path); 42 | q->root = q->current; 43 | // ReadOperator* readOp = new ReadOperator(schema); 44 | // readOp->parent = q->root; 45 | return *q; 46 | } 47 | 48 | void Query::generate() { 49 | CodeGenerator codeGenerator = CodeGenerator(config, schema, CM_DEFAULT); 50 | QueryContext queryContext = QueryContext(schema); 51 | codeGenerator.addQueryContext(queryContext); 52 | current->produce(codeGenerator); 53 | Operator *input = root; 54 | while (input->rightChild) { 55 | input = input->rightChild; 56 | } 57 | InputOperator *inputOp = (InputOperator *)input; 58 | auto file = codeGenerator.generate(inputOp->getInputTypeAsString(), inputOp->getPath()); 59 | codeGenerator.writeToFile(file); 60 | codeGenerator.compileCode(); 61 | } 62 | 63 | void Query::execute() { 64 | auto jitExecutionRuntime = new JITExecutionRuntime(); 65 | jitExecutionRuntime->execute(this); 66 | } 67 | 68 | /* 69 | * Relational Operators 70 | */ 71 | Query &Query::filter(Predicate &predicate) { 72 | Operator *newOp = new FilterOperator(predicate, current); 73 | if (current) 74 | newOp->rightChild = current; 75 | root = newOp; 76 | current = newOp; 77 | return *this; 78 | } 79 | 80 | Query &Query::select(std::vector fields) { 81 | Operator *newOp = new SelectOperator(current, fields); 82 | if (current) 83 | newOp->rightChild = current; 84 | root = newOp; 85 | current = newOp; 86 | return *this; 87 | } 88 | 89 | Query &Query::groupBy(std::string fieldId, int maxValue) { 90 | Operator *newOp = new GroupByOperator(schema.get(fieldId), current, maxValue); 91 | if (current) 92 | newOp->rightChild = current; 93 | root = newOp; 94 | current = newOp; 95 | return *this; 96 | } 97 | 98 | Query &Query::groupBy(std::string fieldId) { 99 | Operator *newOp = new GroupByOperator(schema.get(fieldId), current); 100 | if (current) 101 | newOp->rightChild = current; 102 | root = newOp; 103 | current = newOp; 104 | return *this; 105 | } 106 | 107 | Query &Query::aggregate(Aggregation &&aggregation) { 108 | // TODO: diff between window and batch 109 | Operator *newOp = new ReadWindowOperator(schema, new AggregateOperator(aggregation, current)); 110 | if (current) 111 | newOp->rightChild = current; 112 | root = newOp; 113 | current = newOp; 114 | if (aggregation.hasFinalAggregation()) { 115 | Operator *newOp2 = new FinalWindowAggOperator(&aggregation, current); 116 | newOp2->rightChild = current; 117 | root = newOp2; 118 | current = newOp2; 119 | } 120 | 121 | return *this; 122 | } 123 | 124 | /* 125 | * Streaming Operators 126 | */ 127 | Query &Query::window(Window &&window) { 128 | Operator *newOp = new WindowOperator(window.assigner, window.trigger, current); 129 | if (current) 130 | newOp->rightChild = current; 131 | root = newOp; 132 | current = newOp; 133 | return *this; 134 | } 135 | 136 | Query &Query::map(Mapper &&mapper) { 137 | Operator *newOp = new MapOperator(mapper, current); 138 | if (current) 139 | newOp->rightChild = current; 140 | root = newOp; 141 | current = newOp; 142 | return *this; 143 | } 144 | 145 | /* 146 | * Input Operators 147 | */ 148 | Query &Query::input(InputType type, std::string path) { 149 | assert(0); 150 | Operator *newOp = new InputOperator(type, path, current); 151 | if (current) 152 | newOp->rightChild = current; 153 | root = newOp; 154 | current = newOp; 155 | return *this; 156 | } 157 | 158 | /* 159 | * Output Operators 160 | */ 161 | Query &Query::write(std::string fileName) { 162 | Operator *newOp = new WriteOperator(fileName, current); 163 | if (current) 164 | newOp->rightChild = current; 165 | root = newOp; 166 | current = newOp; 167 | return *this; 168 | } 169 | 170 | Query &Query::print() { 171 | Operator *newOp = new PrintOperator(current); 172 | if (current) 173 | newOp->rightChild = current; 174 | root = newOp; 175 | current = newOp; 176 | return *this; 177 | } 178 | 179 | Query &Query::toOutputBuffer() { 180 | Operator *newOp = new WriteToMemOperator(current); 181 | if (current) 182 | newOp->rightChild = current; 183 | root = newOp; 184 | current = newOp; 185 | return *this; 186 | } 187 | 188 | void Query::printQueryPlan(Operator *p, int indent) { 189 | // Taken from https://stackoverflow.com/questions/13484943/print-a-binary-tree-in-a-pretty-way 190 | 191 | if (p != NULL) { 192 | if (p->rightChild) { 193 | printQueryPlan(p->rightChild, indent + 4); 194 | } 195 | if (indent) { 196 | std::cout << std::setw(indent) << ' '; 197 | } 198 | if (p->rightChild) 199 | std::cout << " /\n" << std::setw(indent) << ' '; 200 | std::cout << p->to_string() << "\n "; 201 | if (p->leftChild) { 202 | std::cout << std::setw(indent) << ' ' << " \\\n"; 203 | printQueryPlan(p->leftChild, indent + 4); 204 | } 205 | } 206 | } 207 | 208 | void Query::printQueryPlan(Query query) { 209 | std::cout << "Query Plan " << std::string(69, '-') << std::endl; 210 | 211 | if (query.root == NULL) { 212 | printf("No root node; cant print queryplan\n"); 213 | } else { 214 | printQueryPlan(query.current, 0); 215 | printf("\n"); 216 | } 217 | } 218 | 219 | void Query::printPipelinePermutations(Query query) { 220 | std::cout << "Query Plan - Permutations of the longest Pipeline " << std::string(30, '-') << std::endl; 221 | 222 | /* Produce Code Generator */ 223 | CodeGenerator code_generator = CodeGenerator(query.config, query.schema, CM_DEFAULT); 224 | QueryContext query_context = QueryContext(query.schema); 225 | code_generator.addQueryContext(query_context); 226 | query.current->produce(code_generator); 227 | 228 | /* Choose longest Pipeline and get enumerator. */ 229 | CMethod::Builder longest_pipeline = code_generator.pipeline(code_generator.longestPipeline()); 230 | CMethod::PipelineEnumerator enumerator = CMethod::PipelineEnumerator(longest_pipeline); 231 | 232 | /* Print all permutations. */ 233 | enumerator.printPermutations(); 234 | std::cout << std::endl; 235 | } -------------------------------------------------------------------------------- /include/api/Predicate.h: -------------------------------------------------------------------------------- 1 | #ifndef API_PREDICATE_H 2 | #define API_PREDICATE_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include "api/Schema.h" 10 | #include "code_generation/CodeGenerator.h" 11 | #include "operator/Operator.h" 12 | 13 | class Predicate { 14 | public: 15 | Predicate(std::string fieldId, std::string value) : fieldId(fieldId), value("\"" + value + "\""){}; 16 | 17 | Predicate(std::string fieldId, long value) : fieldId(fieldId), value(std::to_string(value)){}; 18 | 19 | Predicate(std::string fieldId, Field &field2) : fieldId(fieldId), value("record." + field2.name){}; 20 | 21 | virtual ~Predicate() {} 22 | 23 | std::string fieldId; 24 | std::string value; 25 | Field *field; 26 | std::vector ands; 27 | std::vector ors; 28 | 29 | virtual std::string to_string() { return "predicate"; }; 30 | 31 | virtual void produce(CodeGenerator &cg, Operator *parent); 32 | 33 | virtual void consume(CodeGenerator &cg, Operator *parent); 34 | 35 | Predicate *And(Predicate *And) { 36 | ands.push_back(And); 37 | return this; 38 | } 39 | 40 | virtual Predicate &And(Predicate &&And) { 41 | ands.push_back(&And); 42 | return *this; 43 | } 44 | 45 | Predicate *Or(Predicate *Or) { 46 | ors.push_back(Or); 47 | return this; 48 | } 49 | 50 | virtual Predicate &Or(Predicate &&Or) { 51 | ors.push_back(&Or); 52 | return *this; 53 | } 54 | 55 | protected: 56 | size_t pipeline; 57 | 58 | virtual std::string generate(CodeGenerator &c) = 0; 59 | 60 | virtual std::string generatProfilingCode(CodeGenerator &c) { 61 | c.profilingDataManager->registerSelectivityHandler("select", 1 + ands.size() + ors.size()); 62 | std::stringstream buffer; 63 | buffer << "if(thread_id==0){\n"; 64 | buffer << "auto sel = variant->profilingDataManager->getSelectivityHandler(\"select\");\n"; 65 | int i = 0; 66 | buffer << "sel->update(" << i++ << "," << generate(c) << ");\n"; 67 | 68 | for (Predicate *a : ands) { 69 | buffer << "sel->update(" << i++ << "," << a->generateAll(c) << ");\n"; 70 | } 71 | 72 | buffer << "sel->operator++();}\n"; 73 | 74 | return buffer.str(); 75 | } 76 | 77 | virtual std::string generateAllOptimized(CodeGenerator &c) { 78 | SelectivityHandler *handler = c.profilingDataManager->getSelectivityHandler("select"); 79 | auto values = handler->getValue(); 80 | 81 | std::vector v; 82 | std::vector predicates; 83 | predicates.push_back(generate(c)); 84 | for (Predicate *a : ands) { 85 | predicates.push_back(a->generateAll(c)); 86 | } 87 | 88 | std::cout << "Counted Selectivity across " << handler->counter << " Tuple" << std::endl; 89 | 90 | for (uint64_t i = 0; i < (1 + ands.size() + ors.size()); i++) { 91 | v.push_back(values[i]); 92 | std::cout << "Selectivity of predicate: " << i << " is " << ((double)values[i] / (double)handler->counter) 93 | << std::endl; 94 | } 95 | 96 | int s = v.size(); 97 | std::string predicate; 98 | for (int i = 0; i < s; i++) { 99 | uint64_t minI = 0; 100 | for (int b = 1; b < s; b++) { 101 | if (v[minI] > v[b]) { 102 | minI = b; 103 | } 104 | } 105 | v[minI] = INT64_MAX; 106 | if (i == 0) 107 | predicate = predicates[minI]; 108 | else 109 | predicate += " && (" + predicates[minI] + ")"; 110 | } 111 | 112 | /* 113 | 114 | std::string predicate = generate(c); 115 | 116 | // add AND-predicates 117 | if (ands.size() > 0) { 118 | predicate += " && ("; 119 | int i = 0; 120 | for (Predicate *a : ands) { 121 | predicate += a->generateAll(c); 122 | i++; 123 | if (i < ands.size()) { 124 | predicate += " && "; 125 | } 126 | } 127 | predicate += ")"; 128 | } 129 | 130 | // add OR-predicates 131 | if (ors.size() > 0) { 132 | predicate += " || ("; 133 | for (Predicate *o : ors) { 134 | predicate += o->generateAll(c); 135 | } 136 | predicate += ")"; 137 | } 138 | */ 139 | return predicate; 140 | } 141 | 142 | virtual std::string generateAll(CodeGenerator &c) { 143 | 144 | std::string predicate = generate(c); 145 | 146 | // add AND-predicates 147 | if (ands.size() > 0) { 148 | predicate += " && ("; 149 | int i = 0; 150 | for (Predicate *a : ands) { 151 | predicate += a->generateAll(c); 152 | i++; 153 | if (i < ands.size()) { 154 | predicate += " && "; 155 | } 156 | } 157 | predicate += ")"; 158 | } 159 | 160 | // add OR-predicates 161 | if (ors.size() > 0) { 162 | predicate += " || ("; 163 | for (Predicate *o : ors) { 164 | predicate += o->generateAll(c); 165 | } 166 | predicate += ")"; 167 | } 168 | return predicate; 169 | } 170 | }; 171 | 172 | class Equal : public Predicate { 173 | public: 174 | Equal(std::string fieldId, std::string value) : Predicate(fieldId, value) {} 175 | 176 | Equal(std::string fieldId, long value) : Predicate(fieldId, value) {} 177 | 178 | Equal(std::string fieldId, Field &field2) : Predicate(fieldId, field2){}; 179 | 180 | std::string to_string() override; 181 | 182 | protected: 183 | std::string generate(CodeGenerator &c) override; 184 | }; 185 | 186 | class NotEqual : public Predicate { 187 | public: 188 | NotEqual(std::string fieldId, std::string value) : Predicate(fieldId, value) {} 189 | 190 | NotEqual(std::string fieldId, long value) : Predicate(fieldId, value) {} 191 | 192 | NotEqual(std::string fieldId, Field &field2) : Predicate(fieldId, field2){}; 193 | 194 | std::string to_string() override; 195 | 196 | protected: 197 | std::string generate(CodeGenerator &c) override; 198 | }; 199 | 200 | class Greater : public Predicate { 201 | public: 202 | Greater(std::string fieldId, std::string value) : Predicate(fieldId, value) {} 203 | 204 | Greater(std::string fieldId, long value) : Predicate(fieldId, value) {} 205 | 206 | Greater(std::string fieldId, Field &field2) : Predicate(fieldId, field2){}; 207 | 208 | std::string to_string() override; 209 | 210 | protected: 211 | std::string generate(CodeGenerator &c) override; 212 | }; 213 | 214 | class GreaterEqual : public Predicate { 215 | public: 216 | GreaterEqual(std::string fieldId, std::string value) : Predicate(fieldId, value) {} 217 | 218 | GreaterEqual(std::string fieldId, long value) : Predicate(fieldId, value) {} 219 | 220 | GreaterEqual(std::string fieldId, Field &field2) : Predicate(fieldId, field2){}; 221 | 222 | std::string to_string() override; 223 | 224 | protected: 225 | std::string generate(CodeGenerator &c) override; 226 | }; 227 | 228 | class Less : public Predicate { 229 | public: 230 | Less(std::string fieldId, std::string value) : Predicate(fieldId, value) {} 231 | 232 | Less(std::string fieldId, long value) : Predicate(fieldId, value) {} 233 | 234 | Less(std::string fieldId, Field &field2) : Predicate(fieldId, field2){}; 235 | 236 | std::string to_string() override; 237 | 238 | protected: 239 | std::string generate(CodeGenerator &c) override; 240 | }; 241 | 242 | class LessEqual : public Predicate { 243 | public: 244 | LessEqual(std::string fieldId, std::string value) : Predicate(fieldId, value) {} 245 | 246 | LessEqual(std::string fieldId, long value) : Predicate(fieldId, value) {} 247 | 248 | LessEqual(std::string fieldId, Field &field2) : Predicate(fieldId, field2){}; 249 | 250 | std::string to_string() override; 251 | 252 | protected: 253 | std::string generate(CodeGenerator &c) override; 254 | }; 255 | 256 | class Like : public Predicate { 257 | public: 258 | Like(std::string fieldId, std::string value) : Predicate(fieldId, value) {} 259 | 260 | Like(std::string fieldId, long value) : Predicate(fieldId, value) {} 261 | 262 | Like(std::string fieldId, Field &field2) : Predicate(fieldId, field2){}; 263 | 264 | std::string to_string() override; 265 | 266 | protected: 267 | std::string generate(CodeGenerator &c) override; 268 | }; 269 | 270 | #endif // API_PREDICATE_H 271 | -------------------------------------------------------------------------------- /src/code_generation/CodeGenerator.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include "code_generation/CodeGenerator.h" 10 | 11 | #define GetCurrentDir getcwd 12 | 13 | std::string GetCurrentWorkingDir(void) { 14 | char buff[FILENAME_MAX]; 15 | GetCurrentDir(buff, FILENAME_MAX); 16 | std::string current_working_dir(buff); 17 | return current_working_dir; 18 | } 19 | 20 | std::string GENERATED_PATH; 21 | 22 | CodeGenerator::CodeGenerator(Config &config, Schema &schema, CompileMode mode) : config(config), compileMode(mode) { 23 | 24 | std::string parallelism = std::to_string(config.getParallelism()); 25 | std::string bufferSize = std::to_string(config.getBufferSize()); 26 | schemaStructs = {}; 27 | 28 | GENERATED_PATH = GetCurrentWorkingDir() + "/jit-generated-code/"; 29 | std::cout << "Current working directory: " << GENERATED_PATH << std::endl; 30 | // initialize dispatcher 31 | // TODO:make type and path dynamic 32 | 33 | file.addStatement("static Dispatcher *dispatcher;"); 34 | // start initial pipeline 35 | startPipeline(); 36 | } 37 | 38 | CFile CodeGenerator::generate(std::string type, std::string path) { 39 | 40 | std::string parallelism = std::to_string(config.getParallelism()); 41 | std::string bufferSize = std::to_string(config.getBufferSize()); 42 | 43 | // Generate data_types.h file 44 | CFile::Builder dataTypesBuilder = CFile::builder().withName("data_types.h").include("tbb/atomic.h"); 45 | 46 | // add schema structs 47 | size_t i = 0; 48 | for (auto &context : queryContexts) { 49 | generateStruct(context.schema, "record", i, context.isAggregation); 50 | i++; 51 | } 52 | 53 | for (auto code : this->schemaStructs) { 54 | dataTypesBuilder.addCode(code); 55 | } 56 | 57 | // Generate data_types.h file 58 | CFile dataTypes = dataTypesBuilder.build(); 59 | writeToFile(dataTypes); 60 | 61 | // Generate code to run the query 62 | file.include("runtime/runtime.h"); 63 | auto code = std::string(""); 64 | // code += "auto file_path = std::string(argv[5]);\n"; 65 | // code += "std::cout << \"runLength=\" << runLength << \" bufferSize=\" << buffer_size << \" dop=\" << parallelism 66 | // << \" runs=\" << runs << std::endl;\n"; code += "dispatcher= new Dispatcher(runLength, parallelism, buffer_size, 67 | // runs, 78, true);\n runtime::init(dispatcher);\n"; code += "dispatcher->setInput(" + type + ", file_path);\n"; code 68 | // += "runtime::run(query);\n"; 69 | main.addInstruction(CMethod::Instruction(INSTRUCTION_SYSTEM, code)); 70 | 71 | // Generate query method 72 | std::string maxPipeline = std::to_string(pipelines.size() - 1); 73 | 74 | std::stringstream statements; 75 | statements << "while(dispatcher->hasWork()) {" << std::endl; 76 | statements << "for(unsigned int currentBuffer = 0; currentBuffer < dispatcher->bufferRuns; currentBuffer++ ){" 77 | << std::endl; 78 | statements << "void* records = dispatcher->getWork(threadId, currentBuffer);" << std::endl; 79 | if (config.getNuma()) { 80 | statements << "pipeline" << maxPipeline << "((record0 *) records, " 81 | << "dispatcher->runLength" 82 | << ", threadId, dispatcher->numa_relation[threadId]);" << std::endl; 83 | } else { 84 | statements << "pipeline" << maxPipeline << "((record0 *) records, " 85 | << "dispatcher->runLength" 86 | << ", threadId, 1);" << std::endl; 87 | } 88 | statements << "}" << std::endl; 89 | statements << "runtime::passes++;" << std::endl; 90 | statements << "}" << std::endl; 91 | 92 | CMethod::Builder query = CMethod::builder() 93 | .withName("query") 94 | .returns("void") 95 | .addParameter("int threadId") 96 | .addInstruction(CMethod::Instruction(INSTRUCTION_SYSTEM, statements.str())); 97 | 98 | CMethod queryMethod = query.build(); 99 | 100 | // Generate main method 101 | CMethod mainMethod = 102 | main.withName("open").returns("void").addParameter("int buffer_size").addParameter("int parallelism").build(); 103 | 104 | // Pipeline Permutation: permute longest pipeline if needed 105 | if (config.getPipelinePermutation() != 0) { 106 | CMethod::PipelineEnumerator enumerator = CMethod::PipelineEnumerator(pipelines[longestPipeline()]); 107 | enumerator.getPermutation(pipelines[longestPipeline()], config.getPipelinePermutation()); 108 | } 109 | 110 | // Generate pipelines 111 | i = 0; 112 | for (auto &pipeline : pipelines) { 113 | CMethod pipelineMethod = pipeline.withName("pipeline" + std::to_string(i)) 114 | .addParameter("int thread_id") 115 | .addParameter("int numa_node") 116 | .returns("void") 117 | .build(); 118 | file.addMethod(pipelineMethod); 119 | i++; 120 | } 121 | 122 | CFile mainFile = 123 | file.withName("query.cpp").include("data_types.h").addMethod(queryMethod).addMethod(mainMethod).build(); 124 | // writeToFile(mainFile); 125 | return mainFile; 126 | } 127 | 128 | void CodeGenerator::startPipeline() { pipelines.push_back(CMethod::builder()); } 129 | 130 | size_t CodeGenerator::currentPipeline() { return pipelines.size() - 1; } 131 | 132 | size_t CodeGenerator::longestPipeline() { 133 | size_t longest_pipeline = 0; 134 | for (size_t i = 0; i != pipelines.size(); ++i) { 135 | if (pipelines[i].instructions.size() > longest_pipeline) { 136 | longest_pipeline = i; 137 | } 138 | } 139 | return longest_pipeline; 140 | } 141 | 142 | CMethod::Builder &CodeGenerator::pipeline(size_t id) { return pipelines[id]; } 143 | 144 | void CodeGenerator::addQueryContext(QueryContext ctx) { 145 | queryContexts.push_back(ctx); 146 | // queryContexts.push_back(ctx); 147 | } 148 | 149 | QueryContext &CodeGenerator::ctx(size_t id) { return queryContexts[id]; } 150 | 151 | void CodeGenerator::compileCode() { 152 | // compile query 153 | std::cout << "Start compiling query" << std::endl; 154 | std::chrono::steady_clock::time_point begin = std::chrono::steady_clock::now(); 155 | std::string compile("clang++ -o0 -g -std=c++11 -I " + GENERATED_PATH + " " + GENERATED_PATH + "engine.cpp -o " + 156 | GENERATED_PATH + "query " + " -pthread -ltbb -lnuma"); 157 | std::cout << "run cmpString: " << compile << std::endl; 158 | system(compile.c_str()); 159 | std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now(); 160 | double elapsed = std::chrono::duration_cast(end - begin).count() / 1000000.0; 161 | std::cout << "Compiled query in " + std::to_string(elapsed) + "s" << std::endl; 162 | } 163 | 164 | /* 165 | * compiles and runs the query 166 | */ 167 | void CodeGenerator::run() { 168 | // run query 169 | std::cout << "Running query" << std::endl << "-----------------" << std::endl; 170 | std::string run(GENERATED_PATH + "query"); 171 | system(run.c_str()); 172 | } 173 | 174 | /* 175 | * Generates a struct for the given schema 176 | */ 177 | CCode CodeGenerator::generateStruct(Schema &schema, std::string name, size_t id, bool useAtomic) { 178 | 179 | CCode::Builder builder = 180 | CCode::builder().addStatement("struct __attribute__((packed)) " + name + std::to_string(id) + "{"); 181 | 182 | for (auto it = schema.fields.begin(); it != schema.fields.end(); ++it) { 183 | // for (auto &mapEntry: schema.fields) { 184 | const Field &field = *it; 185 | std::string type = field.dataType.cType(); 186 | if (useAtomic) { 187 | type = "tbb::atomic<" + type + ">"; 188 | } 189 | 190 | if (field.size > 1) { 191 | builder.addStatement(type + " " + field.name + "[" + std::to_string(field.size) + "];"); 192 | } else { 193 | builder.addStatement(type + " " + field.name + ";"); 194 | } 195 | } 196 | 197 | builder.addStatement("};"); 198 | auto code = builder.build(); 199 | this->schemaStructs.push_back(code); 200 | return code; 201 | } 202 | 203 | void CodeGenerator::writeToFile(CFile &file) { 204 | std::string path = GENERATED_PATH + file.name; 205 | std::ofstream generated_file(path.c_str(), std::ios::trunc | std::ios::out); 206 | generated_file << file.output; 207 | // std::cout << file.output << std::endl; 208 | // std::cout << "writeToFile: " << path.c_str() << std::endl; 209 | generated_file.close(); 210 | } 211 | -------------------------------------------------------------------------------- /cmake/FindLLVM.cmake: -------------------------------------------------------------------------------- 1 | # Find LLVM headers and libraries. 2 | # Source: https://github.com/ldc-developers/ldc/blob/master/cmake/Modules/FindLLVM.cmake 3 | # 4 | # This module locates LLVM and adapts the llvm-config output for use with 5 | # CMake. 6 | # 7 | # A given list of COMPONENTS is passed to llvm-config. 8 | # 9 | # The following variables are defined: 10 | # LLVM_FOUND - true if LLVM was found 11 | # LLVM_CXXFLAGS - C++ compiler flags for files that include LLVM headers. 12 | # LLVM_HOST_TARGET - Target triple used to configure LLVM. 13 | # LLVM_INCLUDE_DIRS - Directory containing LLVM include files. 14 | # LLVM_LDFLAGS - Linker flags to add when linking against LLVM 15 | # (includes -LLLVM_LIBRARY_DIRS). 16 | # LLVM_LIBRARIES - Full paths to the library files to link against. 17 | # LLVM_LIBRARY_DIRS - Directory containing LLVM libraries. 18 | # LLVM_ROOT_DIR - The root directory of the LLVM installation. 19 | # llvm-config is searched for in ${LLVM_ROOT_DIR}/bin. 20 | # LLVM_VERSION_MAJOR - Major version of LLVM. 21 | # LLVM_VERSION_MINOR - Minor version of LLVM. 22 | # LLVM_VERSION_STRING - Full LLVM version string (e.g. 6.0.0svn). 23 | # LLVM_VERSION_BASE_STRING - Base LLVM version string without git/svn suffix (e.g. 6.0.0). 24 | # 25 | # Note: The variable names were chosen in conformance with the offical CMake 26 | # guidelines, see ${CMAKE_ROOT}/Modules/readme.txt. 27 | 28 | # Try suffixed versions to pick up the newest LLVM install available on Debian 29 | # derivatives. 30 | # We also want an user-specified LLVM_ROOT_DIR to take precedence over the 31 | # system default locations such as /usr/local/bin. Executing find_program() 32 | # multiples times is the approach recommended in the docs. 33 | 34 | set(llvm_config_names llvm-config-8.0 llvm-config80 35 | llvm-config-7.0 llvm-config70 36 | llvm-config-6.0 llvm-config60 37 | llvm-config-5.0 llvm-config50 38 | llvm-config-4.0 llvm-config40 39 | llvm-config-3.9 llvm-config39 40 | llvm-config-3.8 llvm-config38 41 | llvm-config-3.7 llvm-config37 42 | llvm-config-3.6 llvm-config36 43 | llvm-config) 44 | find_program(LLVM_CONFIG 45 | NAMES ${llvm_config_names} 46 | PATHS ${LLVM_ROOT_DIR}/bin NO_DEFAULT_PATH 47 | DOC "Path to llvm-config tool.") 48 | find_program(LLVM_CONFIG NAMES ${llvm_config_names}) 49 | 50 | # Prints a warning/failure message depending on the required/quiet flags. Copied 51 | # from FindPackageHandleStandardArgs.cmake because it doesn't seem to be exposed. 52 | macro(_LLVM_FAIL _msg) 53 | if (LLVM_FIND_REQUIRED) 54 | message(FATAL_ERROR "${_msg}") 55 | else () 56 | if (NOT LLVM_FIND_QUIETLY) 57 | message(STATUS "${_msg}") 58 | endif () 59 | endif () 60 | endmacro() 61 | 62 | 63 | if (NOT LLVM_CONFIG) 64 | if (NOT LLVM_FIND_QUIETLY) 65 | message(WARNING "Could not find llvm-config (LLVM >= ${LLVM_FIND_VERSION}). Try manually setting LLVM_CONFIG to the llvm-config executable of the installation to use.") 66 | endif () 67 | else () 68 | macro(llvm_set var flag) 69 | if (LLVM_FIND_QUIETLY) 70 | set(_quiet_arg ERROR_QUIET) 71 | endif () 72 | set(result_code) 73 | execute_process( 74 | COMMAND ${LLVM_CONFIG} --${flag} 75 | RESULT_VARIABLE result_code 76 | OUTPUT_VARIABLE LLVM_${var} 77 | OUTPUT_STRIP_TRAILING_WHITESPACE 78 | ${_quiet_arg} 79 | ) 80 | if (result_code) 81 | _LLVM_FAIL("Failed to execute llvm-config ('${LLVM_CONFIG}', result code: '${result_code})'") 82 | else () 83 | if (${ARGV2}) 84 | file(TO_CMAKE_PATH "${LLVM_${var}}" LLVM_${var}) 85 | endif () 86 | endif () 87 | endmacro() 88 | macro(llvm_set_libs var flag) 89 | if (LLVM_FIND_QUIETLY) 90 | set(_quiet_arg ERROR_QUIET) 91 | endif () 92 | set(result_code) 93 | execute_process( 94 | COMMAND ${LLVM_CONFIG} --${flag} ${LLVM_FIND_COMPONENTS} 95 | RESULT_VARIABLE result_code 96 | OUTPUT_VARIABLE tmplibs 97 | OUTPUT_STRIP_TRAILING_WHITESPACE 98 | ${_quiet_arg} 99 | ) 100 | if (result_code) 101 | _LLVM_FAIL("Failed to execute llvm-config ('${LLVM_CONFIG}', result code: '${result_code})'") 102 | else () 103 | file(TO_CMAKE_PATH "${tmplibs}" tmplibs) 104 | string(REGEX MATCHALL "${pattern}[^ ]+" LLVM_${var} ${tmplibs}) 105 | endif () 106 | endmacro() 107 | 108 | llvm_set(VERSION_STRING version) 109 | llvm_set(CXXFLAGS cxxflags) 110 | llvm_set(HOST_TARGET host-target) 111 | llvm_set(INCLUDE_DIRS includedir true) 112 | llvm_set(ROOT_DIR prefix true) 113 | llvm_set(ENABLE_ASSERTIONS assertion-mode) 114 | 115 | # The LLVM version string _may_ contain a git/svn suffix, so cut that off 116 | string(SUBSTRING "${LLVM_VERSION_STRING}" 0 5 LLVM_VERSION_BASE_STRING) 117 | 118 | # Versions below 3.9 do not support components debuginfocodeview, globalisel, ipa 119 | list(REMOVE_ITEM LLVM_FIND_COMPONENTS "debuginfocodeview" index) 120 | list(REMOVE_ITEM LLVM_FIND_COMPONENTS "globalisel" index) 121 | list(REMOVE_ITEM LLVM_FIND_COMPONENTS "ipa" index) 122 | if (${LLVM_VERSION_STRING} MATCHES "^3\\.[0-9][\\.0-9A-Za-z]*") 123 | # Versions below 4.0 do not support component debuginfomsf 124 | list(REMOVE_ITEM LLVM_FIND_COMPONENTS "debuginfomsf" index) 125 | endif () 126 | if (${LLVM_VERSION_STRING} MATCHES "^[3-5]\\..*") 127 | # Versions below 6.0 do not support component windowsmanifest 128 | list(REMOVE_ITEM LLVM_FIND_COMPONENTS "windowsmanifest" index) 129 | endif () 130 | 131 | llvm_set(LDFLAGS ldflags) 132 | # In LLVM 3.5+, the system library dependencies (e.g. "-lz") are accessed 133 | # using the separate "--system-libs" flag. 134 | llvm_set(SYSTEM_LIBS system-libs) 135 | string(REPLACE "\n" " " LLVM_LDFLAGS "${LLVM_LDFLAGS} ${LLVM_SYSTEM_LIBS}") 136 | llvm_set(LIBRARY_DIRS libdir true) 137 | llvm_set_libs(LIBRARIES libs) 138 | # LLVM bug: llvm-config --libs tablegen returns -lLLVM-3.8.0 139 | # but code for it is not in shared library 140 | if ("${LLVM_FIND_COMPONENTS}" MATCHES "tablegen") 141 | if (NOT "${LLVM_LIBRARIES}" MATCHES "LLVMTableGen") 142 | set(LLVM_LIBRARIES "${LLVM_LIBRARIES};-lLLVMTableGen") 143 | endif () 144 | endif () 145 | 146 | if (${LLVM_VERSION_STRING} MATCHES "^3\\.[0-9][\\.0-9A-Za-z]*") 147 | # Versions below 4.0 do not support llvm-config --cmakedir 148 | set(LLVM_CMAKEDIR ${LLVM_LIBRARY_DIRS}/cmake/llvm) 149 | else () 150 | llvm_set(CMAKEDIR cmakedir) 151 | endif () 152 | 153 | llvm_set(TARGETS_TO_BUILD targets-built) 154 | string(REGEX MATCHALL "${pattern}[^ ]+" LLVM_TARGETS_TO_BUILD ${LLVM_TARGETS_TO_BUILD}) 155 | endif () 156 | 157 | # On CMake builds of LLVM, the output of llvm-config --cxxflags does not 158 | # include -fno-rtti, leading to linker errors. Be sure to add it. 159 | if (NOT MSVC AND (CMAKE_COMPILER_IS_GNUCXX OR (${CMAKE_CXX_COMPILER_ID} STREQUAL "Clang"))) 160 | if (NOT ${LLVM_CXXFLAGS} MATCHES "-fno-rtti") 161 | set(LLVM_CXXFLAGS "${LLVM_CXXFLAGS} -fno-rtti") 162 | endif () 163 | endif () 164 | 165 | # Remove some clang-specific flags for gcc. 166 | if (CMAKE_COMPILER_IS_GNUCXX) 167 | string(REPLACE "-Wcovered-switch-default " "" LLVM_CXXFLAGS ${LLVM_CXXFLAGS}) 168 | string(REPLACE "-Wstring-conversion " "" LLVM_CXXFLAGS ${LLVM_CXXFLAGS}) 169 | string(REPLACE "-fcolor-diagnostics " "" LLVM_CXXFLAGS ${LLVM_CXXFLAGS}) 170 | # this requires more recent gcc versions (not supported by 4.9) 171 | string(REPLACE "-Werror=unguarded-availability-new " "" LLVM_CXXFLAGS ${LLVM_CXXFLAGS}) 172 | endif () 173 | 174 | # Remove gcc-specific flags for clang. 175 | if (${CMAKE_CXX_COMPILER_ID} MATCHES "Clang") 176 | string(REPLACE "-Wno-maybe-uninitialized " "" LLVM_CXXFLAGS ${LLVM_CXXFLAGS}) 177 | endif () 178 | 179 | string(REGEX REPLACE "([0-9]+).*" "\\1" LLVM_VERSION_MAJOR "${LLVM_VERSION_STRING}") 180 | string(REGEX REPLACE "[0-9]+\\.([0-9]+).*[A-Za-z]*" "\\1" LLVM_VERSION_MINOR "${LLVM_VERSION_STRING}") 181 | 182 | if (${LLVM_VERSION_STRING} VERSION_LESS ${LLVM_FIND_VERSION}) 183 | message(FATAL_ERROR "Unsupported LLVM version found ${LLVM_VERSION_STRING}. At least version ${LLVM_FIND_VERSION} is required.") 184 | endif () 185 | 186 | # Use the default CMake facilities for handling QUIET/REQUIRED. 187 | include(FindPackageHandleStandardArgs) 188 | 189 | find_package_handle_standard_args(LLVM 190 | REQUIRED_VARS LLVM_ROOT_DIR LLVM_HOST_TARGET 191 | VERSION_VAR LLVM_VERSION_STRING) 192 | -------------------------------------------------------------------------------- /src/jit/JITExecutionRuntime.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | #include "jit/JITExecutionRuntime.h" 7 | #include 8 | #include 9 | #include 10 | 11 | JitRuntime::JitRuntime() {} 12 | 13 | JITExecutionRuntime::JITExecutionRuntime() { 14 | currentState = DEFAULT; 15 | this->basename = std::to_string(std::rand()); 16 | this->running = true; 17 | } 18 | 19 | bool JITExecutionRuntime::isRunning() { return running; } 20 | 21 | Variant *JITExecutionRuntime::compileVariant(Query *query, ProfilingDataManager *profilingDataManager, 22 | CompileMode mode) { 23 | 24 | JITCodeGenerator codeGenerator = JITCodeGenerator(query->config, query->schema, profilingDataManager, mode); 25 | QueryContext queryContext = QueryContext(query->schema); 26 | codeGenerator.addQueryContext(queryContext); 27 | query->current->produce(codeGenerator); 28 | Operator *input = query->root; 29 | 30 | while (input->rightChild) { 31 | input = input->rightChild; 32 | } 33 | 34 | InputOperator *inputOp = (InputOperator *)input; 35 | codeGenerator.generateStructFile(inputOp->getPath()); 36 | auto file = codeGenerator.generate(inputOp->getInputTypeAsString(), inputOp->getPath()); 37 | 38 | CCodeCompiler codeCompiler = CCodeCompiler(); 39 | const CompiledCCodePtr &code = codeCompiler.compile( 40 | file.output, this->basename + "_" + std::to_string((this->variantNr++)) + "_" + std::to_string(mode)); 41 | 42 | return new Variant(code, codeGenerator.profilingDataManager, this); 43 | } 44 | 45 | void JITExecutionRuntime::runWorker(JITExecutionRuntime *runtime, int threadId) { 46 | 47 | while (runtime->isRunning()) { 48 | 49 | std::cout << "Thread: " << threadId << " start executing pipeline." << std::endl; 50 | Variant *currentVariant = runtime->currentlyExecutingVariant; 51 | 52 | currentVariant->activeThreads++; 53 | try { 54 | currentVariant->execute(threadId, 0); 55 | } catch (DeoptimizeException &e) { 56 | std::cerr << "Thread: " << threadId << " deoptimize: in pipeline: " << e.pipeline 57 | << " buffer position: " << e.position << std::endl; 58 | 59 | { 60 | std::unique_lock lk(runtime->waitMutex); 61 | if (runtime->currentState == OPTIMIZED) { 62 | runtime->compileCondition.notify_all(); 63 | } 64 | runtime->compilationFinish.wait(lk, [runtime] { return runtime->currentState == DEFAULT; }); 65 | std::cout << "Thread: " << threadId << " restarts again" << std::endl; 66 | } 67 | } 68 | 69 | std::cout << "Thread: " << threadId << " returned from variant" << std::endl; 70 | auto oldVariant = currentVariant; 71 | // check if current thread is the last who left the variant 72 | auto oldValue = oldVariant->activeThreads.fetch_sub(1) - 1; 73 | if (oldValue == 0) { 74 | std::cout << "Thread: " << threadId << " returned last. So he has to migrate" << std::endl; 75 | if (runtime->currentState == OPTIMIZED) { 76 | void **state = oldVariant->getState(); 77 | runtime->currentlyExecutingVariant->migrateFrom(state); 78 | } else { 79 | void **state = runtime->currentlyExecutingVariant->getState(); 80 | oldVariant->migrateTo(state); 81 | } 82 | } 83 | } 84 | } 85 | 86 | void JITExecutionRuntime::compilationLoop(JITExecutionRuntime *jitExecutionRuntime) { 87 | int runs = 0; 88 | while (runs < 10) { 89 | std::this_thread::sleep_for(std::chrono::milliseconds(jitExecutionRuntime->delay)); 90 | if (jitExecutionRuntime->currentState == DEFAULT) { 91 | std::cerr << "------------- Deploy Profiling Code NOW ---------- " << std::endl; 92 | jitExecutionRuntime->deployInstrumented(); 93 | 94 | } else if (jitExecutionRuntime->currentState == INSTRUMENTED) { 95 | 96 | std::cerr << "------------- Deploy Optimized Code NOW ---------- " << std::endl; 97 | jitExecutionRuntime->deployOptimized(); 98 | 99 | { 100 | std::cerr << "-------------Main thread Waits ---------- " << std::endl; 101 | std::unique_lock lk(jitExecutionRuntime->compilationMutex); 102 | jitExecutionRuntime->compileCondition.wait(lk); 103 | std::cerr << "-------------Main thread has to deoptimize ---------- " << std::endl; 104 | jitExecutionRuntime->deployInstrumented(); 105 | jitExecutionRuntime->compilationFinish.notify_all(); 106 | } 107 | } 108 | 109 | runs++; 110 | } 111 | } 112 | 113 | void JITExecutionRuntime::monitor(int threadID) {} 114 | 115 | void JITExecutionRuntime::execute(Query *query) { 116 | std::string papi_conf_file = "papi_conf_global.cfg"; 117 | std::string config = "Branch_Benchmark"; 118 | delay = query->config.getCompilationDelay(); 119 | dispatcher = 120 | new SimpleDispatcher(query->config.getRunLength(), query->config.getParallelism(), query->config.getBufferSize(), 121 | 1, query->schema.getInputSize(), query->config.getSourceFile(), 0); 122 | globalState = new GlobalState(); 123 | globalState->window_state = new WindowState *[5]; 124 | 125 | this->query = query; 126 | this->currentState = DEFAULT; 127 | auto profilingDataManager = new ProfilingDataManager(); 128 | std::cerr << "------------- Deploy Default Code NOW ---------- " << std::endl; 129 | auto variant = compileVariant(query, nullptr, CM_DEFAULT); 130 | variant->open(globalState, dispatcher); 131 | 132 | variant->init(globalState, dispatcher); 133 | 134 | defaultVariant = variant; 135 | 136 | currentlyExecutingVariant = variant; 137 | /* Launch a group of threads. */ 138 | auto *t = new std::thread[dispatcher->parallelism]; 139 | for (size_t i = 0; i < dispatcher->parallelism; i++) { 140 | t[i] = std::thread(JITExecutionRuntime::runWorker, this, i); 141 | } 142 | 143 | auto compilationThread = new std::thread(JITExecutionRuntime::compilationLoop, this); 144 | 145 | std::this_thread::sleep_for(std::chrono::seconds(query->config.getBenchmarkRunDuration())); 146 | this->running = false; 147 | 148 | /* Wait for all threads. */ 149 | for (size_t i = 0; i < dispatcher->parallelism; i++) { 150 | t[i].join(); 151 | } 152 | compilationThread->join(); 153 | } 154 | 155 | void JITExecutionRuntime::deployOptimized() { 156 | // In the state machine we only deploy OPTIMIZED when we are INSTRUMENTED 157 | assert(this->currentState == INSTRUMENTED); 158 | auto newVariant = compileVariant(query, this->currentlyExecutingVariant->profilingDataManager, CM_OPTIMIZE); 159 | newVariant->open(globalState, dispatcher); 160 | std::cout << "we optimized to the default code" << std::endl; 161 | auto oldVariant = currentlyExecutingVariant; 162 | currentlyExecutingVariant = newVariant; 163 | this->currentState = OPTIMIZED; 164 | oldVariant->invalidate(); 165 | } 166 | 167 | void JITExecutionRuntime::deployInstrumented() { 168 | // In the state machine we only deploy instrumented when we are Default 169 | // assert(this->currentState == DEFAULT); 170 | auto profilingDataManager = new ProfilingDataManager(); 171 | auto newVariant = compileVariant(query, profilingDataManager, CM_INSTRUMENT); 172 | newVariant->open(globalState, dispatcher); 173 | std::cout << "we instrumented the default code" << std::endl; 174 | auto oldVariant = currentlyExecutingVariant; 175 | currentlyExecutingVariant = newVariant; 176 | this->currentState = INSTRUMENTED; 177 | oldVariant->invalidate(); 178 | } 179 | 180 | void JITExecutionRuntime::deployDefault() { 181 | // In the state machine we only deploy Default when we are optimized 182 | assert(this->currentState == OPTIMIZED); 183 | auto newVariant = compileVariant(query, nullptr, CM_DEFAULT); 184 | newVariant->open(globalState, dispatcher); 185 | std::cout << "we deoptimized to the default code" << std::endl; 186 | auto oldVariant = currentlyExecutingVariant; 187 | currentlyExecutingVariant = newVariant; 188 | this->currentState = DEFAULT; 189 | 190 | oldVariant->invalidate(); 191 | } 192 | 193 | void JITExecutionRuntime::deoptimize(Variant *currentVariant, void *buffer, int position) { 194 | // we can only deoptimize when we are currently optimized 195 | assert(this->currentState == OPTIMIZED); 196 | redeploy.lock(); 197 | if (currentVariant == currentlyExecutingVariant) { 198 | std::this_thread::sleep_for(std::chrono::seconds(5)); 199 | // if (currentState == DEFAULT) 200 | // JitExecutionRuntime::deployInstrumented(); 201 | // else if (currentState == INSTRUMENTED) 202 | // JitExecutionRuntime::deployOptimized(); 203 | // else if (currentState == OPTIMIZED) 204 | JITExecutionRuntime::deployDefault(); 205 | // invalidate current variant 206 | currentVariant->invalidate(); 207 | 208 | } else { 209 | std::cout << "the pipeline was already redeployed" << std::endl; 210 | } 211 | redeploy.unlock(); 212 | } 213 | -------------------------------------------------------------------------------- /src/jit/CodeCompiler.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | #pragma GCC diagnostic push 12 | #pragma GCC diagnostic ignored "-Wstrict-aliasing" 13 | 14 | #include 15 | #include 16 | #include 17 | 18 | #pragma GCC diagnostic pop 19 | 20 | const std::string CCodeCompiler::IncludePath = "jit-generated-code/"; 21 | const std::string CLANG_EXECUTABLE = "/usr/bin/clang++"; 22 | 23 | const std::string CCodeCompiler::MinimalApiHeaderPath = "/include/CodeGen/MinimalApi.hpp"; 24 | 25 | CCodeCompiler::CCodeCompiler() { init(); } 26 | 27 | CompiledCCodePtr CCodeCompiler::compile(const std::string &source, const std::string name) { 28 | // handleDebugging(source); 29 | // auto pch_time = createPrecompiledHeader(); 30 | 31 | return compileWithSystemCompiler(source, 0, name); 32 | } 33 | 34 | void CCodeCompiler::init() { 35 | use_clang_jit_ = false; 36 | 37 | show_generated_code_ = true; 38 | 39 | debug_code_generator_ = true; 40 | 41 | keep_last_generated_query_code_ = false; 42 | 43 | #ifndef NDEBUG 44 | PrecompiledHeaderName = ".debug.hpp.pch"; 45 | #else 46 | PrecompiledHeaderName = ".release.hpp.pch"; 47 | #endif 48 | initCompilerArgs(); 49 | } 50 | 51 | void CCodeCompiler::initCompilerArgs() { 52 | compiler_args_ = {"-std=c++11", "-O3", "-ltbb", "-lnuma", 53 | "-fno-trigraphs", "-fpic", "-Werror", "-Wparentheses-equality", 54 | #ifdef SSE41_FOUND 55 | "-msse4.1", 56 | #endif 57 | #ifdef SSE42_FOUND 58 | "-msse4.2", 59 | #endif 60 | #ifdef AVX_FOUND 61 | "-mavx", 62 | #endif 63 | #ifdef AVX2_FOUND 64 | "-mavx2", 65 | #endif 66 | "-I" + IncludePath}; 67 | 68 | #ifndef NDEBUG 69 | compiler_args_.push_back("-g"); 70 | #else 71 | compiler_args_.push_back("-O3"); 72 | compiler_args_.push_back("-g"); 73 | #endif 74 | } 75 | 76 | long CCodeCompiler::createPrecompiledHeader() { 77 | if (!rebuildPrecompiledHeader()) { 78 | return 0; 79 | } 80 | 81 | auto start = 0; 82 | callSystemCompiler(getPrecompiledHeaderCompilerArgs()); 83 | return 0 - start; 84 | } 85 | 86 | bool CCodeCompiler::rebuildPrecompiledHeader() { 87 | if (!boost::filesystem::exists(PrecompiledHeaderName)) { 88 | return true; 89 | } else { 90 | auto last_access_pch = boost::filesystem::last_write_time(PrecompiledHeaderName); 91 | auto last_access_header = boost::filesystem::last_write_time(MinimalApiHeaderPath); 92 | 93 | /* pre-compiled header outdated? */ 94 | return last_access_header > last_access_pch; 95 | } 96 | } 97 | 98 | std::vector CCodeCompiler::getPrecompiledHeaderCompilerArgs() { 99 | auto args = compiler_args_; 100 | 101 | std::stringstream pch_option; 102 | pch_option << "-o" << PrecompiledHeaderName; 103 | args.push_back(MinimalApiHeaderPath); 104 | args.push_back(pch_option.str()); 105 | args.push_back("-xc++-header"); 106 | 107 | return args; 108 | } 109 | 110 | std::vector CCodeCompiler::getCompilerArgs() { 111 | auto args = compiler_args_; 112 | 113 | args.push_back("-xc++"); 114 | #ifndef NDEBUG 115 | // args.push_back("-include.debug.hpp"); 116 | #else 117 | // args.push_back("-include.release.hpp"); 118 | #endif 119 | 120 | #ifdef __APPLE__ 121 | args.push_back("-framework OpenCL"); 122 | args.push_back("-undefined dynamic_lookup"); 123 | #endif 124 | 125 | return args; 126 | } 127 | 128 | void CCodeCompiler::callSystemCompiler(const std::vector &args) { 129 | std::stringstream compiler_call; 130 | compiler_call << CLANG_EXECUTABLE << " "; 131 | 132 | for (const auto &arg : args) { 133 | compiler_call << arg << " "; 134 | } 135 | std::cout << "system '" << compiler_call.str() << "'" << std::endl; 136 | auto ret = system(compiler_call.str().c_str()); 137 | 138 | if (ret != 0) { 139 | std::cout << "PrecompiledHeader compilation failed!"; 140 | throw "PrecompiledHeader compilation failed!"; 141 | } 142 | } 143 | 144 | void pretty_print_code(const std::string &source) { 145 | int ret = system("which clang-format > /dev/null"); 146 | if (ret != 0) { 147 | std::cout << "Did not find external tool 'clang-format'. " 148 | "Please install 'clang-format' and try again." 149 | "If 'clang-format-X' is installed, try to create a " 150 | "symbolic link."; 151 | return; 152 | } 153 | const std::string filename = "temporary_file.c"; 154 | 155 | exportSourceToFile(filename, source); 156 | 157 | std::string format_command = std::string("clang-format ") + filename; 158 | /* try a syntax highlighted output first */ 159 | /* command highlight available? */ 160 | ret = system("which highlight > /dev/null"); 161 | if (ret == 0) { 162 | format_command += " | highlight --src-lang=c -O ansi"; 163 | } 164 | ret = system(format_command.c_str()); 165 | std::string cleanup_command = std::string("rm ") + filename; 166 | ret = system(cleanup_command.c_str()); 167 | } 168 | 169 | void CCodeCompiler::handleDebugging(const std::string &source) { 170 | if (!show_generated_code_ && !debug_code_generator_ && !keep_last_generated_query_code_) { 171 | return; 172 | } 173 | 174 | if (keep_last_generated_query_code_ || debug_code_generator_) { 175 | exportSourceToFile("last_generated_query.c", source); 176 | } 177 | 178 | if (show_generated_code_ || debug_code_generator_) { 179 | std::cout << std::string(80, '=') << std::endl; 180 | std::cout << "<<< Generated Host Code:" << std::endl; 181 | pretty_print_code(source); 182 | std::cout << ">>> Generated Host Code" << std::endl; 183 | std::cout << std::string(80, '=') << std::endl; 184 | } 185 | } 186 | 187 | void exportSourceToFile(const std::string &filename, const std::string &source) { 188 | std::ofstream result_file(filename, std::ios::trunc | std::ios::out); 189 | result_file << source; 190 | } 191 | 192 | class SystemCompilerCompiledCCode : public CompiledCCode { 193 | public: 194 | SystemCompilerCompiledCCode(long compile_time, SharedLibraryPtr library, const std::string &base_name) 195 | : CompiledCCode(compile_time), library_(library), base_file_name_(base_name) {} 196 | 197 | ~SystemCompilerCompiledCCode() { cleanUp(); } 198 | 199 | protected: 200 | void *getFunctionPointerImpl(const std::string &name) override final { return library_->getSymbol(name); } 201 | 202 | private: 203 | void cleanUp() { 204 | if (boost::filesystem::exists(base_file_name_ + ".cpp")) { 205 | boost::filesystem::remove(base_file_name_ + ".cpp"); 206 | } 207 | 208 | if (boost::filesystem::exists(base_file_name_ + ".o")) { 209 | boost::filesystem::remove(base_file_name_ + ".o"); 210 | } 211 | 212 | if (boost::filesystem::exists(base_file_name_ + ".so")) { 213 | boost::filesystem::remove(base_file_name_ + ".so"); 214 | } 215 | 216 | if (boost::filesystem::exists(base_file_name_ + ".c.orig")) { 217 | boost::filesystem::remove(base_file_name_ + ".c.orig"); 218 | } 219 | } 220 | 221 | SharedLibraryPtr library_; 222 | std::string base_file_name_; 223 | }; 224 | 225 | CompiledCCodePtr CCodeCompiler::compileWithSystemCompiler(const std::string &source, const long pch_time, 226 | const std::string name) { 227 | auto start = 0; 228 | 229 | boost::uuids::uuid uuid = boost::uuids::random_generator()(); 230 | std::string basename = "jit-generated-code/gen_query_" + name; 231 | std::string filename = basename + ".cpp"; 232 | std::string library_name = basename + ".so"; 233 | exportSourceToFile(filename, source); 234 | 235 | auto args = getCompilerArgs(); 236 | args.push_back("--shared"); 237 | args.push_back("-o" + library_name); 238 | args.push_back(filename); 239 | 240 | callSystemCompiler(args); 241 | 242 | auto shared_library = SharedLibrary::load("./" + library_name); 243 | 244 | auto end = 0; 245 | 246 | auto compile_time = end - start + pch_time; 247 | return std::make_shared(compile_time, shared_library, basename); 248 | } 249 | 250 | SharedLibrary::SharedLibrary(void *_shared_lib) : shared_lib_(_shared_lib) { assert(shared_lib_ != NULL); } 251 | 252 | SharedLibrary::~SharedLibrary() { 253 | // if (!VariableManager::instance().getVariableValueBoolean( 254 | // "profiling.keep_shared_libraries_loaded")) { 255 | dlclose(shared_lib_); 256 | //} 257 | } 258 | 259 | void *SharedLibrary::getSymbol(const std::string &mangeled_symbol_name) const { 260 | auto symbol = dlsym(shared_lib_, mangeled_symbol_name.c_str()); 261 | auto error = dlerror(); 262 | 263 | if (error) { 264 | std::cout << "Could not load symbol: " << mangeled_symbol_name << std::endl << "Error:" << std::endl << error; 265 | } 266 | 267 | return symbol; 268 | } 269 | 270 | SharedLibraryPtr SharedLibrary::load(const std::string &file_path) { 271 | auto myso = dlopen(file_path.c_str(), RTLD_NOW); 272 | 273 | auto error = dlerror(); 274 | if (error) { 275 | std::cout << "Could not load shared library: " << file_path << std::endl << "Error:" << std::endl << error; 276 | } else if (!myso) { 277 | std::cout << "Could not load shared library: " << file_path << std::endl << "Error unknown!"; 278 | } 279 | 280 | return SharedLibraryPtr(new SharedLibrary(myso)); 281 | } 282 | -------------------------------------------------------------------------------- /src/api/Assigner.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "api/Assigner.h" 6 | 7 | /* 8 | * Record can only be in exactly one window based on processing time 9 | */ 10 | void TumblingProcessingTimeAssigner::produce(CodeGenerator &cg) { 11 | pipeline = cg.currentPipeline(); 12 | cg.file.addStatement("const int64_t window_size" + std::to_string(pipeline) + " = " + size.to_string() + ";"); 13 | cg.file.addStatement("const int64_t window_buffers" + std::to_string(pipeline) + " = 2;"); 14 | // save number of windows to query context 15 | cg.ctx(pipeline).numWindows = 2; 16 | } 17 | 18 | void TumblingProcessingTimeAssigner::consume(CodeGenerator &cg) { 19 | 20 | auto strPipeline = std::to_string(pipeline); 21 | std::string resultType = "record" + std::to_string(pipeline); 22 | 23 | // init state buffer for non-grouped query (resultType[2]) 24 | if (!cg.ctx(pipeline).hasGroupBy) { 25 | cg.file.addStatement(resultType + "* state" + std::to_string(pipeline) + " = new " + resultType + "[2];"); 26 | } 27 | 28 | std::stringstream statementsInit; 29 | // get key for keyed query 30 | std::string key = ""; 31 | if (cg.ctx(pipeline).hasKeyBy) 32 | key = "[record." + cg.ctx(pipeline).keyBy->name + "]"; 33 | 34 | statementsInit << "{g->window_state[" + strPipeline + "] = new WindowState{};\n" 35 | << "auto window_state = g->window_state[" + strPipeline + "];\n" 36 | << "window_state->thread_local_state = new ThreadLocalState*[dispatcher->parallelism];\n" 37 | << "size_t ts = time(NULL);\n" 38 | << "for (size_t thread_ID = 0; thread_ID < dispatcher->parallelism; thread_ID++) {\n"; 39 | if (cg.config.getNuma()) { 40 | statementsInit << "int node = dispatcher->numa_relation[thread_ID];\n"; 41 | statementsInit << "void* blob = numa_alloc_onnode((sizeof(runtime::ThreadLocalState)), node);\n"; 42 | statementsInit << "window_state.thread_local_state[thread_ID] = new(blob) runtime::ThreadLocalState{};\n"; 43 | statementsInit << "void* blob2 = numa_alloc_onnode((sizeof(int64_t)*window_buffers" << pipeline << "), node);\n"; 44 | statementsInit << "window_state->thread_local_state[thread_ID]->windowEnds = new(blob2) int64_t[window_buffers" 45 | << pipeline << "];\n"; 46 | } else { 47 | statementsInit << "window_state->thread_local_state[thread_ID] = new ThreadLocalState{};\n"; 48 | statementsInit << "window_state->thread_local_state[thread_ID]->windowEnds = new int64_t[window_buffers" << pipeline 49 | << "];\n"; 50 | } 51 | statementsInit << "for (size_t w = 0; w < window_buffers" << pipeline << "; w++) {;\n" 52 | << "window_state->thread_local_state[thread_ID]->windowEnds[w] = ts + ( " << size.to_string() 53 | << " * w) + " << size.to_string() << ";\n" 54 | << "}" 55 | << "}"; 56 | statementsInit << "}"; 57 | 58 | cg.init.addInstruction(CMethod::Instruction(INSTRUCTION_ASSIGNER, statementsInit.str())); 59 | // assign window 60 | std::string statements = "size_t window_index = thread_local_state->current_window;"; 61 | cg.pipeline(pipeline).addInstruction(CMethod::Instruction(INSTRUCTION_ASSIGNER, statements)); 62 | } 63 | 64 | /* 65 | * Record can belong to up to numWindows windows 66 | */ 67 | void SlidingProcessingTimeAssigner::produce(CodeGenerator &cg) { 68 | pipeline = cg.currentPipeline(); 69 | 70 | cg.file.addStatement("const int64_t window_size" + std::to_string(pipeline) + " = " + size.to_string() + ";"); 71 | cg.file.addStatement("const int64_t window_buffers" + std::to_string(pipeline) + " = " + std::to_string(numWindows) + 72 | ";"); 73 | 74 | // save number of windows to query context 75 | cg.ctx(pipeline).numWindows = numWindows; 76 | } 77 | 78 | void SlidingProcessingTimeAssigner::consume(CodeGenerator &cg) { 79 | std::stringstream statements_main; 80 | std::string resultType = "record" + std::to_string(pipeline); 81 | 82 | // init state-buffer for non-grouped query (resultType[1], size_t[1]) 83 | if (!cg.ctx(pipeline).hasGroupBy) { 84 | 85 | cg.file.addStatement(resultType + "* state" + std::to_string(pipeline) + " = new " + resultType + "[" + 86 | std::to_string(numWindows) + "];"); 87 | 88 | } else { 89 | auto keyRange = cg.ctx(pipeline).maxKeyValue; 90 | if (keyRange != -1) { 91 | // cg.file.addStatement("record" + std::to_string(pipeline) + " **state" + std::to_string(pipeline) + ";"); 92 | statements_main << "{" 93 | " state" 94 | << std::to_string(pipeline) << " = new record" + std::to_string(pipeline) + " *[window_buffers" 95 | << std::to_string(pipeline) << " * " << cg.config.getNumaNodes() << "];" 96 | << "for (size_t w = 0; w < (window_buffers" << std::to_string(pipeline) << " * " 97 | << cg.config.getNumaNodes() << " ) ; w++) {"; 98 | statements_main << "state" << std::to_string(pipeline) << "[w] = new record" + std::to_string(pipeline) + "[" 99 | << keyRange << " + 1];"; 100 | statements_main << " for (size_t i = 0; i < " << keyRange << " + 1; i++) {"; 101 | statements_main << "state" << std::to_string(pipeline) << "[w][i] = {};"; 102 | statements_main << " }" 103 | " }" 104 | << "}"; 105 | } else { 106 | // init state-buffer buffer for grouped-query (Map[1], size_t[1]) 107 | /* 108 | std::string keyType = cg.ctx(pipeline).groupBy->dataType.keyType(); 109 | cg.file.addStatement("tbb::concurrent_unordered_map<" + keyType + ", " + resultType + ">* state" + 110 | std::to_string(pipeline) + " = new tbb::concurrent_unordered_map<" + keyType + ", " + 111 | resultType + ">[" + std::to_string(numWindows) + "];"); 112 | */ 113 | } 114 | } 115 | 116 | std::stringstream statements; 117 | std::stringstream statements_final; 118 | statements << "// ASSIGNER \n // place tuple in correct window and hash key bucket \n" 119 | << "for(size_t w=0;w<" << std::to_string(numWindows) << ";w++) { \n" 120 | << "size_t window_index = (thread_local_state->current_window + w) % " << std::to_string(numWindows) 121 | << "; \n" 122 | << "if ((thread_local_state->windowEnds[window_index] - window_size" << std::to_string(pipeline) 123 | << ") <= ts) {"; 124 | // cout << "add to windows window_index " << window_index << " window_start " << window_start << " ts " << ts << 125 | // endl; 126 | 127 | statements_final << "}" << std::endl; 128 | statements_final << "}" << std::endl; 129 | cg.pipeline(pipeline).addInstruction( 130 | CMethod::Instruction(INSTRUCTION_ASSIGNER, statements.str(), statements_final.str())); 131 | 132 | statements_main << "{g->window_state[" << pipeline << "] = new WindowState{};\n" 133 | << "auto window_state = g->window_state[" << pipeline << "];\n" 134 | << "window_state->thread_local_state = new ThreadLocalState*[dispatcher->parallelism];\n" 135 | << "size_t ts = time(NULL);\n" 136 | << "for (size_t thread_ID = 0; thread_ID < dispatcher->parallelism; thread_ID++) {\n"; 137 | statements_main << "window_state->thread_local_state[thread_ID] = new ThreadLocalState{};\n"; 138 | statements_main << "window_state->thread_local_state[thread_ID]->windowEnds = new int64_t[window_buffers" << pipeline 139 | << "];\n"; 140 | statements_main << "for (size_t w = 0; w < window_buffers" << pipeline << "; w++) {;\n" 141 | << "window_state->thread_local_state[thread_ID]->windowEnds[w] = ts + ( " << slide.to_string() 142 | << " * w) + " << size.to_string() << ";\n" 143 | << "}" 144 | << "}"; 145 | statements_main << "}"; 146 | 147 | cg.init.addInstruction(CMethod::Instruction(INSTRUCTION_ASSIGNER, statements_main.str())); 148 | } 149 | 150 | /* 151 | * Record can only be in exactly one window based on processing time 152 | */ 153 | void SessionProcessingTimeAssigner::produce(CodeGenerator &cg) { 154 | pipeline = cg.currentPipeline(); 155 | 156 | // save number of windows to query context 157 | cg.ctx(pipeline).numWindows = 2; 158 | } 159 | 160 | void SessionProcessingTimeAssigner::consume(CodeGenerator &cg) { 161 | 162 | std::string resultType = "record" + std::to_string(pipeline - 1); 163 | 164 | // init state buffer for non-grouped query (resultType[2]) 165 | if (!cg.ctx(pipeline).hasGroupBy) { 166 | cg.file.addStatement(resultType + "* state = new " + resultType + "[2];"); 167 | } 168 | 169 | // init state-buffer buffer for grouped-query (Map[2]) 170 | if (cg.ctx(pipeline).hasGroupBy) { 171 | std::string keyType = cg.ctx(pipeline).groupBy->dataType.keyType(); 172 | cg.file.addStatement("tbb::concurrent_unordered_map<" + keyType + ", " + resultType + 173 | ">* state = new tbb::concurrent_unordered_map<" + keyType + ", " + resultType + ">[2];"); 174 | } 175 | 176 | // get key for keyed query 177 | std::string key = ""; 178 | if (cg.ctx(pipeline).hasKeyBy) 179 | key = "[record." + cg.ctx(pipeline).keyBy->name + "]"; 180 | 181 | std::stringstream statements; 182 | 183 | // assign window 184 | statements << "size_t window = triggerCount" << key << " % 2;" << std::endl; 185 | 186 | // update timeout 187 | statements << "meta[window]" << key << " = ts + " << std::to_string(timeout.time) << ";" << std::endl; 188 | 189 | cg.pipeline(pipeline).addInstruction(CMethod::Instruction(INSTRUCTION_ASSIGNER, statements.str())); 190 | } 191 | -------------------------------------------------------------------------------- /src/api/Aggregation.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "api/Aggregation.h" 7 | 8 | void Aggregation::produce_(CodeGenerator &cg, Operator *input, Schema &schema) { 9 | 10 | // start new pipeline 11 | QueryContext context = QueryContext(schema); 12 | cg.addQueryContext(context); 13 | cg.startPipeline(); 14 | 15 | // get current pipeline id 16 | pipeline = cg.currentPipeline(); 17 | 18 | // set aggregation in query context 19 | cg.ctx(pipeline).isAggregation = true; 20 | input->produce(cg); 21 | } 22 | 23 | void Aggregation::consume_(CodeGenerator &cg, Operator *input) { 24 | std::stringstream statements; 25 | std::string key; 26 | 27 | if (cg.ctx(pipeline).hasGroupBy) { 28 | statements << "auto keyField = record." + cg.ctx(pipeline).groupBy->name << ";" << std::endl; 29 | if (cg.compileMode == CM_DEFAULT || cg.compileMode == CM_INSTRUMENT) { 30 | statements << "auto key = keyField;\n"; 31 | } else if (cg.compileMode == CM_OPTIMIZE) { 32 | auto profiledMax = cg.profilingDataManager->getMaxHandler("agg_max")->getValue(); 33 | auto profiledMin = cg.profilingDataManager->getMaxHandler("agg_min")->getValue(); 34 | auto top = cg.profilingDataManager->getDistributionProfilingHandler("dist")->top; 35 | std::string keyType = cg.ctx(pipeline).groupBy->dataType.keyType(); 36 | if (keyType == "std::string") { 37 | statements << "uint64_t value = (uint64_t) fast_atoi(keyField);\n"; 38 | } else { 39 | statements << "uint64_t value = (uint64_t) keyField;\n"; 40 | } 41 | statements << "if(value< " << profiledMin << " || value>" << profiledMax << "){throw DeoptimizeException(" 42 | << pipeline 43 | << ",i, records);}\n" 44 | "uint64_t key = value % " 45 | << profiledMax << ";\n"; 46 | } 47 | 48 | if (cg.compileMode == CM_INSTRUMENT) { 49 | cg.profilingDataManager->registerMinHandler("agg_min"); 50 | cg.profilingDataManager->registerMaxHandler("agg_max"); 51 | cg.profilingDataManager->registerDistributionHandler("dist"); 52 | statements << "if(thread_id==0){"; 53 | std::string keyType = cg.ctx(pipeline).groupBy->dataType.keyType(); 54 | if (keyType == "std::string") { 55 | statements << "uint64_t intKey = (uint64_t) atoi(keyField);\n"; 56 | } else { 57 | statements << "uint64_t intKey = (uint64_t) keyField;\n"; 58 | } 59 | statements << "variant->profilingDataManager->getMinHandler(\"agg_min\")->update(intKey);\n" 60 | "variant->profilingDataManager->getMaxHandler(\"agg_max\")->update(intKey);\n" 61 | "variant->profilingDataManager->getDistributionProfilingHandler(\"dist\")->update(intKey);}\n"; 62 | } 63 | } 64 | 65 | // get window buffer 66 | 67 | auto strPipeline = std::to_string(pipeline); 68 | 69 | if (cg.compileMode == CM_OPTIMIZE) { 70 | auto &context = cg.ctx(pipeline); 71 | if (context.stateStrategy == QueryContext::INDEPENDENT && !cg.config.getNuma()) { 72 | statements << "auto bufferIndex = window_index + (thread_id * window_buffers" << strPipeline << ");"; 73 | } else if (context.stateStrategy == QueryContext::SHARED && cg.config.getNuma()) { 74 | statements << "auto bufferIndex = window_index + (numa_node * window_buffers" << strPipeline << ");"; 75 | } else { 76 | statements << "auto bufferIndex = window_index;"; 77 | } 78 | } else { 79 | statements << "auto bufferIndex = window_index;"; 80 | } 81 | 82 | cg.pipeline(pipeline).addInstruction(CMethod::Instruction(INSTRUCTION_AGGREGATE, statements.str())); 83 | } 84 | 85 | void Aggregation::createState(CodeGenerator &cg, Operator *input, Schema &schema) { 86 | auto strPipeline = std::to_string(pipeline); 87 | auto &contex = cg.ctx(pipeline); 88 | auto resultType = "record" + std::to_string(pipeline); 89 | std::stringstream statementsOpen; 90 | statementsOpen << "{"; 91 | // init state-buffer buffer for grouped-query (Map[2]) 92 | if (cg.ctx(pipeline).hasGroupBy) { 93 | auto keyRange = cg.ctx(pipeline).maxKeyValue; 94 | if (cg.compileMode == CM_OPTIMIZE && keyRange != -1) { 95 | cg.file.addStatement("record" + strPipeline + " **state" + strPipeline + ";"); 96 | 97 | if (contex.stateStrategy == QueryContext::INDEPENDENT) { 98 | statementsOpen << "// init state buffer for independent aggregation\n" 99 | " auto stateBuffers = window_buffers" 100 | << strPipeline << " * dispatcher->parallelism * " << cg.config.getNumaNodes() << ";\n"; 101 | } else { 102 | statementsOpen << "// init state buffer for shared aggregation\n" 103 | " auto stateBuffers = window_buffers" 104 | << strPipeline << " * " << cg.config.getNumaNodes() << ";\n"; 105 | } 106 | 107 | statementsOpen << " state" << strPipeline << " = new " + resultType + " *[stateBuffers];" 108 | << "for (size_t w = 0; w < (stateBuffers) ; w++) {\n"; 109 | 110 | if (cg.config.getNuma()) { 111 | statementsOpen << "void *blob = numa_alloc_onnode((sizeof(record" + strPipeline + ") * " << keyRange 112 | << "+1) , w / " << cg.config.getNumaNodes() << ");\n" 113 | << "state" << strPipeline << "[w] = new(blob) record" + strPipeline + "[" << keyRange 114 | << " + 1];"; 115 | } else { 116 | statementsOpen << "state" << strPipeline 117 | << "[w] = " 118 | "new record" + 119 | strPipeline + "[" 120 | << keyRange << " + 1];"; 121 | } 122 | 123 | statementsOpen << " for (size_t i = 0; i < " << keyRange << " + 1; i++) {"; 124 | statementsOpen << "state" << strPipeline << "[w][i] = {};"; 125 | statementsOpen << " }" 126 | " }"; 127 | 128 | } else { 129 | std::string keyType = cg.ctx(pipeline).groupBy->dataType.keyType(); 130 | cg.file.addStatement("tbb::concurrent_unordered_map<" + keyType + ", " + resultType + ">* state" + strPipeline + 131 | ";\n"); 132 | statementsOpen << "state" << strPipeline 133 | << " = new tbb::concurrent_unordered_map<" + keyType + ", " + resultType + ">[" + 134 | std::to_string(2 * cg.config.getNumaNodes() * cg.ctx(pipeline).numWindows) + "];"; 135 | } 136 | } 137 | statementsOpen << "}"; 138 | 139 | cg.open.addInstruction(CMethod::Instruction(INSTRUCTION_ASSIGNER, statementsOpen.str())); 140 | } 141 | 142 | void Aggregation::migrateFrom(CodeGenerator &cg, Operator *input, Schema &schema) { 143 | 144 | auto strPipeline = std::to_string(pipeline); 145 | std::string resultType = "record" + std::to_string(pipeline); 146 | std::stringstream statementsMigrateFrom; 147 | statementsMigrateFrom << "{"; 148 | 149 | // init state-buffer buffer for grouped-query (Map[2]) 150 | if (cg.ctx(pipeline).hasGroupBy) { 151 | std::string keyType = cg.ctx(pipeline).groupBy->dataType.keyType(); 152 | statementsMigrateFrom << "tbb::concurrent_unordered_map<" << keyType 153 | << ", record1>* input = ((tbb::concurrent_unordered_map<" << keyType 154 | << ", record1>*)inputStates[" << strPipeline << "]);\n"; 155 | statementsMigrateFrom << "for (size_t w = 0; w < (window_buffers" << strPipeline << " ) ; w++) {\n" 156 | << " for (size_t n = 0; n < " << cg.config.getNumaNodes() << " ; n++){\n" 157 | << " for (auto const &it : input[w]) {\n"; 158 | statementsMigrateFrom << keyType << " key = it.first;" << std::endl; 159 | statementsMigrateFrom << resultType << " record = it.second;" << std::endl; 160 | if (cg.ctx(pipeline).maxKeyValue != -1) { 161 | if (keyType == "string") { 162 | statementsMigrateFrom << "auto keyValue = ((uint64_t) key.data());" << std::endl; 163 | } else { 164 | statementsMigrateFrom << " auto keyValue = ((uint64_t) key);" << std::endl; 165 | } 166 | statementsMigrateFrom << "uint64_t keyInt = keyValue % " << cg.ctx(pipeline).maxKeyValue << ";\n"; 167 | for (auto field : schema.fields) { 168 | auto state = "state" + strPipeline + "[w+n][keyInt]." + field.name; 169 | statementsMigrateFrom << state << " = " + state + " + record." << field.name << ";\n"; 170 | } 171 | 172 | } else { 173 | for (auto field : schema.fields) { 174 | auto state = "state" + strPipeline + "[w+n][key]." + field.name; 175 | statementsMigrateFrom << state << " = " + state + " + record." << field.name << ";\n"; 176 | } 177 | } 178 | statementsMigrateFrom << " }}}"; 179 | } 180 | 181 | statementsMigrateFrom << "}"; 182 | 183 | cg.migrateFrom.addInstruction(CMethod::Instruction(INSTRUCTION_ASSIGNER, statementsMigrateFrom.str())); 184 | } 185 | 186 | void Aggregation::migrateTo(CodeGenerator &cg, Operator *input, Schema &schema) { 187 | 188 | auto strPipeline = std::to_string(pipeline); 189 | auto contex = cg.ctx(pipeline); 190 | std::string resultType = "record" + std::to_string(pipeline); 191 | std::stringstream statementsMigrateFrom; 192 | statementsMigrateFrom << "{"; 193 | 194 | // init state-buffer buffer for grouped-query (Map[2]) 195 | if (cg.ctx(pipeline).hasGroupBy) { 196 | std::string keyType = cg.ctx(pipeline).groupBy->dataType.keyType(); 197 | statementsMigrateFrom << "tbb::concurrent_unordered_map<" << keyType 198 | << ", record1>* output = ((tbb::concurrent_unordered_map<" << keyType 199 | << ", record1>*)outputStates[" << strPipeline << "]);\n"; 200 | statementsMigrateFrom << "for (size_t w = 0; w < (window_buffers" << strPipeline << " ) ; w++) {\n" 201 | << " for (size_t n = 0; n < " << cg.config.getNumaNodes() << " ; n++){\n"; 202 | 203 | if (contex.maxKeyValue != -1) { 204 | statementsMigrateFrom << "for (int i = 0; i< " << cg.ctx(pipeline).maxKeyValue << ";i++ ) {\n"; 205 | if (keyType == "string") { 206 | statementsMigrateFrom << keyType << " key = std::to_string(i);" << std::endl; 207 | } else { 208 | statementsMigrateFrom << keyType << " key = i;" << std::endl; 209 | } 210 | statementsMigrateFrom << resultType << " record = state" << strPipeline << "[w*n][i];" << std::endl; 211 | for (auto field : schema.fields) { 212 | auto state = "output[w][key]." + field.name; 213 | statementsMigrateFrom << state << " = " << state << " + record." << field.name << ";\n"; 214 | } 215 | statementsMigrateFrom << "}"; 216 | } else { 217 | statementsMigrateFrom << " for (auto const &it : state" << strPipeline << "[w*n]) {\n"; 218 | statementsMigrateFrom << keyType << " key = it.first;" << std::endl; 219 | statementsMigrateFrom << resultType << " record = it.second;" << std::endl; 220 | for (auto field : schema.fields) { 221 | auto state = "output[w][key]." + field.name; 222 | statementsMigrateFrom << state << " = " << state << " + record." << field.name << ";\n"; 223 | } 224 | statementsMigrateFrom << " }"; 225 | } 226 | statementsMigrateFrom << " }}"; 227 | } 228 | 229 | statementsMigrateFrom << "}"; 230 | 231 | cg.migrateTo.addInstruction(CMethod::Instruction(INSTRUCTION_ASSIGNER, statementsMigrateFrom.str())); 232 | } 233 | 234 | void Aggregation::addStatePtr(CodeGenerator &cg, Operator *input, Schema &schema) { 235 | auto strPipeline = std::to_string(pipeline); 236 | std::string resultType = "record" + std::to_string(pipeline); 237 | std::stringstream statementsMigrateFrom; 238 | if (cg.ctx(pipeline).maxKeyValue == -1) { 239 | statementsMigrateFrom << "auto output = state" << strPipeline << ";"; 240 | statementsMigrateFrom << "statePtr[" << strPipeline << "] = state" << strPipeline << ";\n"; 241 | } 242 | cg.getState.addInstruction(CMethod::Instruction(INSTRUCTION_ASSIGNER, statementsMigrateFrom.str())); 243 | } 244 | 245 | void Aggregation::consumeFinalAggregation(CodeGenerator &generator, Operator *pOperator) {} 246 | void Aggregation::produceFinalAggregation(CodeGenerator &generator, Operator *pOperator) {} 247 | 248 | /* 249 | * SUM 250 | */ 251 | void Sum::produce(CodeGenerator &cg, Operator *input) { 252 | // add field to schema 253 | Schema schema = Schema::create().addFixSizeField(fieldId + "_sum", DataType::Long, Stream); 254 | produce_(cg, input, schema); 255 | createState(cg, input, schema); 256 | migrateFrom(cg, input, schema); 257 | migrateTo(cg, input, schema); 258 | addStatePtr(cg, input, schema); 259 | } 260 | 261 | void Sum::consume(CodeGenerator &cg, Operator *parent) { 262 | 263 | std::stringstream statements; 264 | 265 | // get key 266 | consume_(cg, parent); 267 | 268 | // calculate sum 269 | auto sumField = fieldId + "_sum"; 270 | if (cg.config.getNuma()) { 271 | statements << "state" << std::to_string(pipeline) << "[bufferIndex][key]." << sumField << " += record." << fieldId 272 | << ";"; 273 | } else { 274 | if (cg.ctx(pipeline).hasGroupBy) { 275 | statements << "state" << std::to_string(pipeline) << "[bufferIndex][key]." << sumField << " += record." << fieldId 276 | << ";"; 277 | } else { 278 | statements << "state" << std::to_string(pipeline) << "[bufferIndex]." << sumField << " += record." << fieldId 279 | << ";"; 280 | } 281 | } 282 | // calculate count 283 | cg.pipeline(pipeline).addInstruction(CMethod::Instruction(INSTRUCTION_AGGREGATE, statements.str())); 284 | 285 | if (parent != nullptr) { 286 | parent->consume(cg); 287 | } 288 | } 289 | 290 | /* 291 | * COUNT 292 | */ 293 | void Count::produce(CodeGenerator &cg, Operator *input) { 294 | // add field to schema 295 | Schema schema = Schema::create().addFixSizeField("count", DataType::Long, Stream); 296 | 297 | produce_(cg, input, schema); 298 | createState(cg, input, schema); 299 | migrateFrom(cg, input, schema); 300 | migrateTo(cg, input, schema); 301 | addStatePtr(cg, input, schema); 302 | } 303 | 304 | void Count::consume(CodeGenerator &cg, Operator *parent) { 305 | 306 | std::stringstream statements; 307 | std::string key; 308 | 309 | // get key 310 | consume_(cg, parent); 311 | 312 | // increment 313 | if (cg.config.getNuma()) { 314 | statements << "state" << std::to_string(pipeline) << "[bufferIndex][key].count++;"; 315 | } else { 316 | if (cg.ctx(pipeline).hasGroupBy) { 317 | statements << "state" + std::to_string(pipeline) + "[bufferIndex][key].count++;" << std::endl; 318 | } else { 319 | statements << "state" + std::to_string(pipeline) + "[bufferIndex].count++;" << std::endl; 320 | } 321 | } 322 | // calculate count 323 | cg.pipeline(pipeline).addInstruction(CMethod::Instruction(INSTRUCTION_AGGREGATE, statements.str())); 324 | 325 | if (parent != nullptr) { 326 | parent->consume(cg); 327 | } 328 | } 329 | 330 | /* 331 | * Min 332 | */ 333 | void Min::produce(CodeGenerator &cg, Operator *input) { 334 | // add field to schema 335 | Schema schema = Schema::create().addFixSizeField(fieldId + "_min", DataType::Long, Stream); 336 | produce_(cg, input, schema); 337 | createState(cg, input, schema); 338 | migrateFrom(cg, input, schema); 339 | migrateTo(cg, input, schema); 340 | addStatePtr(cg, input, schema); 341 | } 342 | 343 | void Min::consume(CodeGenerator &cg, Operator *parent) { 344 | 345 | std::string key; 346 | /* if (cg.ctx(pipeline).hasGroupBy) 347 | key = "[record." + cg.ctx(pipeline).groupBy->name + "]"; 348 | */ 349 | // calculate min 350 | std::stringstream statements; 351 | 352 | consume_(cg, parent); 353 | 354 | statements << "auto recordValue = record." << fieldId << "; "; 355 | 356 | std::stringstream oldValue; 357 | if (cg.config.getNuma()) { 358 | oldValue << "state" << std::to_string(pipeline) << "[bufferIndex][key]." << fieldId << "_min"; 359 | } else { 360 | if (cg.ctx(pipeline).hasGroupBy) { 361 | oldValue << "state" << std::to_string(pipeline) << "[bufferIndex][key]." << fieldId << "_min"; 362 | } else { 363 | oldValue << "state" << std::to_string(pipeline) << "[bufferIndex]." << fieldId << "_min"; 364 | } 365 | } 366 | statements << "long old; do {\n" 367 | "// Take a snapshot\n" 368 | "old = " 369 | << oldValue.str() << ";" 370 | << ";\n" 371 | "// Quit if snapshot meets condition.\n" 372 | "if( old<=recordValue ) break;\n" 373 | "// Attempt to install new value.\n" 374 | "} while( " 375 | << oldValue.str() << ".compare_and_swap(recordValue,old)!=old);"; 376 | 377 | cg.pipeline(pipeline).addInstruction(CMethod::Instruction(INSTRUCTION_AGGREGATE, statements.str())); 378 | if (parent != nullptr) { 379 | parent->consume(cg); 380 | } 381 | } 382 | 383 | /* 384 | * Max 385 | */ 386 | void Max::produce(CodeGenerator &cg, Operator *input) { 387 | // add field to schema 388 | Schema schema = Schema::create().addFixSizeField(fieldId + "_max", DataType::Long, Stream); 389 | produce_(cg, input, schema); 390 | createState(cg, input, schema); 391 | migrateFrom(cg, input, schema); 392 | migrateTo(cg, input, schema); 393 | addStatePtr(cg, input, schema); 394 | } 395 | 396 | void Max::consume(CodeGenerator &cg, Operator *parent) { 397 | 398 | // calculate max 399 | std::stringstream statements; 400 | consume_(cg, parent); 401 | 402 | statements << "auto recordValue = record." << fieldId << "; "; 403 | 404 | std::stringstream oldValue; 405 | if (cg.config.getNuma()) { 406 | oldValue << "state" << std::to_string(pipeline) << "[bufferIndex][key]." << fieldId << "_max"; 407 | } else { 408 | if (cg.ctx(pipeline).hasGroupBy) { 409 | oldValue << "state" << std::to_string(pipeline) << "[bufferIndex][key]." << fieldId << "_max"; 410 | } else { 411 | oldValue << "state" << std::to_string(pipeline) << "[bufferIndex]." << fieldId << "_max"; 412 | } 413 | } 414 | statements << "long old; do {\n" 415 | "// Take a snapshot\n" 416 | "old = " 417 | << oldValue.str() << ";" 418 | << ";\n" 419 | "// Quit if snapshot meets condition.\n" 420 | "if( old>=recordValue ) break;\n" 421 | "// Attempt to install new value.\n" 422 | "} while( " 423 | << oldValue.str() << ".compare_and_swap(recordValue,old)!=old);"; 424 | cg.pipeline(pipeline).addInstruction(CMethod::Instruction(INSTRUCTION_AGGREGATE, statements.str())); 425 | 426 | if (parent != nullptr) { 427 | parent->consume(cg); 428 | } 429 | } 430 | 431 | bool Avg::hasFinalAggregation() { return true; } 432 | 433 | /* 434 | * Avg 435 | */ 436 | void Avg::produce(CodeGenerator &cg, Operator *input) { 437 | // add three fields to schema (save count and sum to calculate avg later) 438 | Schema schema = Schema::create() 439 | .addFixSizeField(fieldId + "_avg", DataType::Double, Stream) 440 | .addFixSizeField(fieldId + "_sum", DataType::Long, Stream) 441 | .addFixSizeField("count", DataType::Long, Stream); 442 | 443 | produce_(cg, input, schema); 444 | createState(cg, input, schema); 445 | migrateFrom(cg, input, schema); 446 | migrateTo(cg, input, schema); 447 | addStatePtr(cg, input, schema); 448 | } 449 | 450 | void Avg::consume(CodeGenerator &cg, Operator *parent) { 451 | 452 | consume_(cg, parent); 453 | std::stringstream statements; 454 | statements << "auto recordValue = record." << fieldId << "; "; 455 | 456 | std::stringstream oldValue; 457 | if (cg.config.getNuma()) { 458 | oldValue << "state" << std::to_string(pipeline) << "[bufferIndex][key]"; 459 | } else { 460 | if (cg.ctx(pipeline).hasGroupBy) { 461 | oldValue << "state" << std::to_string(pipeline) << "[bufferIndex][key]"; 462 | } else { 463 | oldValue << "state" << std::to_string(pipeline) << "[bufferIndex]"; 464 | } 465 | } 466 | 467 | statements << oldValue.str() << ".count++;" << std::endl; 468 | statements << oldValue.str() << "." << fieldId << "_sum+= recordValue;" << std::endl; 469 | cg.pipeline(pipeline).addInstruction(CMethod::Instruction(INSTRUCTION_AGGREGATE, statements.str())); 470 | if (parent != nullptr) { 471 | parent->consume(cg); 472 | } 473 | } 474 | 475 | void Avg::consumeFinalAggregation(CodeGenerator &cg, Operator *pOperator) { 476 | std::stringstream statements; 477 | statements << "if(record.count != 0)"; 478 | statements << "record." << fieldId << "_avg" 479 | << " = " 480 | << "((double)record." << fieldId << "_sum) / ((double)record.count);"; 481 | cg.pipeline(pipeline - 1).addInstruction(CMethod::Instruction(INSTRUCTION_AGGREGATE, statements.str())); 482 | } 483 | --------------------------------------------------------------------------------