├── .clang-format ├── .clang-tidy ├── .cmake-format ├── .github ├── CODEOWNERS └── workflows │ └── build_test.yml ├── .gitignore ├── .gitmodules ├── .licenserc.yaml ├── CMakeLists.txt ├── Makefile ├── README.md ├── cmake_modules └── BuildUtils.cmake ├── docker ├── Dockerfile └── README.md ├── export ├── CMakeLists.txt └── planloader │ ├── CMakeLists.txt │ ├── planloader.cpp │ ├── planloader.h │ └── tests │ ├── CMakeLists.txt │ └── PlanLoaderTest.cpp ├── include └── substrait │ ├── common │ ├── Exceptions.h │ └── Io.h │ ├── expression │ └── DecimalLiteral.h │ ├── function │ ├── Extension.h │ ├── Function.h │ ├── FunctionLookup.h │ └── FunctionSignature.h │ └── type │ └── Type.h ├── scripts ├── find_vs.ps1 ├── run-clang-format.sh ├── run-clang-tidy.py ├── run-clang-tidy.sh ├── run-cmake-format.sh └── setup-ubuntu.sh ├── src └── substrait │ ├── CMakeLists.txt │ ├── common │ ├── CMakeLists.txt │ ├── Exceptions.cpp │ ├── Io.cpp │ ├── NumberUtils.h │ ├── PlanTransformerTool.cpp │ ├── StringUtils.h │ └── tests │ │ ├── CMakeLists.txt │ │ ├── IoTest.cpp │ │ ├── NumberUtilsTest.cpp │ │ └── StringUtilsTest.cpp │ ├── expression │ ├── CMakeLists.txt │ ├── DecimalLiteral.cpp │ └── tests │ │ ├── CMakeLists.txt │ │ └── DecimalTest.cpp │ ├── function │ ├── CMakeLists.txt │ ├── Extension.cpp │ ├── Function.cpp │ ├── FunctionLookup.cpp │ └── tests │ │ ├── CMakeLists.txt │ │ └── FunctionLookupTest.cpp │ ├── proto │ ├── CMakeLists.txt │ ├── ProtoUtils.cpp │ ├── ProtoUtils.h │ └── update_proto_package.pl │ ├── textplan │ ├── Any.h │ ├── CMakeLists.txt │ ├── Finally.h │ ├── Location.cpp │ ├── Location.h │ ├── ParseResult.cpp │ ├── ParseResult.h │ ├── PlanPrinterVisitor.cpp │ ├── PlanPrinterVisitor.h │ ├── README.md │ ├── StringManipulation.cpp │ ├── StringManipulation.h │ ├── StructuredSymbolData.h │ ├── SubstraitErrorListener.cpp │ ├── SubstraitErrorListener.h │ ├── SymbolTable.cpp │ ├── SymbolTable.h │ ├── SymbolTablePrinter.cpp │ ├── SymbolTablePrinter.h │ ├── converter │ │ ├── BasePlanProtoVisitor.cpp │ │ ├── BasePlanProtoVisitor.h │ │ ├── CMakeLists.txt │ │ ├── InitialPlanProtoVisitor.cpp │ │ ├── InitialPlanProtoVisitor.h │ │ ├── LoadBinary.cpp │ │ ├── LoadBinary.h │ │ ├── ParseBinary.cpp │ │ ├── ParseBinary.h │ │ ├── PipelineVisitor.cpp │ │ ├── PipelineVisitor.h │ │ ├── README.md │ │ ├── ReferenceNormalizer.cpp │ │ ├── ReferenceNormalizer.h │ │ ├── SaveBinary.cpp │ │ ├── SaveBinary.h │ │ ├── Tool.cpp │ │ ├── data │ │ │ ├── q6_first_stage.golden.splan │ │ │ └── q6_first_stage.json │ │ └── tests │ │ │ ├── BinaryToTextPlanConversionTest.cpp │ │ │ └── CMakeLists.txt │ ├── data │ │ ├── set-comparision-any.json │ │ ├── tpch-plan01.json │ │ ├── tpch-plan02.json │ │ ├── tpch-plan03.json │ │ ├── tpch-plan04.json │ │ ├── tpch-plan05.json │ │ ├── tpch-plan06.json │ │ ├── tpch-plan07.json │ │ ├── tpch-plan09.json │ │ ├── tpch-plan10.json │ │ ├── tpch-plan11.json │ │ ├── tpch-plan13.json │ │ ├── tpch-plan14.json │ │ ├── tpch-plan16.json │ │ ├── tpch-plan17.json │ │ ├── tpch-plan18.json │ │ ├── tpch-plan19.json │ │ ├── tpch-plan20.json │ │ ├── tpch-plan21.json │ │ └── tpch-plan22.json │ ├── parser │ │ ├── CMakeLists.txt │ │ ├── LoadText.cpp │ │ ├── LoadText.h │ │ ├── ParseText.cpp │ │ ├── ParseText.h │ │ ├── README.md │ │ ├── SubstraitParserErrorListener.cpp │ │ ├── SubstraitParserErrorListener.h │ │ ├── SubstraitPlanPipelineVisitor.cpp │ │ ├── SubstraitPlanPipelineVisitor.h │ │ ├── SubstraitPlanRelationVisitor.cpp │ │ ├── SubstraitPlanRelationVisitor.h │ │ ├── SubstraitPlanSubqueryRelationVisitor.cpp │ │ ├── SubstraitPlanSubqueryRelationVisitor.h │ │ ├── SubstraitPlanTypeVisitor.cpp │ │ ├── SubstraitPlanTypeVisitor.h │ │ ├── SubstraitPlanVisitor.cpp │ │ ├── SubstraitPlanVisitor.h │ │ ├── Tool.cpp │ │ ├── data │ │ │ ├── provided_sample1.json │ │ │ └── provided_sample1.splan │ │ ├── grammar │ │ │ ├── CMakeLists.txt │ │ │ ├── SubstraitPlanLexer.g4 │ │ │ └── SubstraitPlanParser.g4 │ │ └── tests │ │ │ ├── CMakeLists.txt │ │ │ └── TextPlanParserTest.cpp │ └── tests │ │ ├── CMakeLists.txt │ │ ├── ParseResultMatchers.cpp │ │ ├── ParseResultMatchers.h │ │ ├── RoundtripTest.cpp │ │ └── SymbolTableTest.cpp │ └── type │ ├── CMakeLists.txt │ ├── Type.cpp │ └── tests │ ├── CMakeLists.txt │ └── TypeTest.cpp └── third_party ├── .clang-tidy ├── CMakeLists.txt ├── antlr4 └── cmake │ ├── ExternalAntlr4Cpp.cmake │ └── FindANTLR.cmake ├── datetime.cmake └── protobuf.cmake /.clang-format: -------------------------------------------------------------------------------- 1 | --- 2 | AccessModifierOffset: -1 3 | AlignAfterOpenBracket: AlwaysBreak 4 | AlignConsecutiveAssignments: None 5 | AlignConsecutiveDeclarations: None 6 | SeparateDefinitionBlocks: Always 7 | AlignOperands: false 8 | AlignTrailingComments: false 9 | AllowAllParametersOfDeclarationOnNextLine: false 10 | AllowShortBlocksOnASingleLine: Never 11 | AllowShortCaseLabelsOnASingleLine: false 12 | AllowShortFunctionsOnASingleLine: Empty 13 | AllowShortIfStatementsOnASingleLine: false 14 | AllowShortLoopsOnASingleLine: false 15 | AlwaysBreakAfterReturnType: None 16 | AlwaysBreakBeforeMultilineStrings: true 17 | AlwaysBreakTemplateDeclarations: Yes 18 | BinPackArguments: false 19 | BinPackParameters: false 20 | BraceWrapping: 21 | AfterClass: false 22 | AfterControlStatement: Never 23 | AfterEnum: false 24 | AfterFunction: false 25 | AfterNamespace: false 26 | AfterObjCDeclaration: false 27 | AfterStruct: false 28 | AfterUnion: false 29 | BeforeCatch: false 30 | BeforeElse: false 31 | IndentBraces: false 32 | BreakBeforeBinaryOperators: None 33 | BreakBeforeBraces: Attach 34 | BreakBeforeTernaryOperators: true 35 | BreakConstructorInitializersBeforeComma: false 36 | BreakAfterJavaFieldAnnotations: false 37 | BreakStringLiterals: false 38 | ColumnLimit: 80 39 | CommentPragmas: '^ IWYU pragma:' 40 | ConstructorInitializerAllOnOneLineOrOnePerLine: true 41 | ConstructorInitializerIndentWidth: 4 42 | ContinuationIndentWidth: 4 43 | Cpp11BracedListStyle: true 44 | DerivePointerAlignment: false 45 | DisableFormat: false 46 | ForEachMacros: [ FOR_EACH, FOR_EACH_R, FOR_EACH_RANGE, ] 47 | IncludeCategories: 48 | - Regex: '^<.*\.h(pp)?>' 49 | Priority: 1 50 | - Regex: '^<.*' 51 | Priority: 2 52 | - Regex: '.*' 53 | Priority: 3 54 | IndentCaseLabels: true 55 | IndentWidth: 2 56 | IndentWrappedFunctionNames: false 57 | KeepEmptyLinesAtTheStartOfBlocks: false 58 | MacroBlockBegin: '' 59 | MacroBlockEnd: '' 60 | MaxEmptyLinesToKeep: 1 61 | NamespaceIndentation: None 62 | ObjCBlockIndentWidth: 2 63 | ObjCSpaceAfterProperty: false 64 | ObjCSpaceBeforeProtocolList: false 65 | PenaltyBreakBeforeFirstCallParameter: 1 66 | PenaltyBreakComment: 300 67 | PenaltyBreakFirstLessLess: 120 68 | PenaltyBreakString: 1000 69 | PenaltyExcessCharacter: 1000000 70 | PenaltyReturnTypeOnItsOwnLine: 200 71 | PointerAlignment: Left 72 | ReflowComments: true 73 | SortIncludes: CaseSensitive 74 | SpaceAfterCStyleCast: false 75 | SpaceBeforeAssignmentOperators: true 76 | SpaceBeforeParens: ControlStatements 77 | SpaceInEmptyParentheses: false 78 | SpacesBeforeTrailingComments: 1 79 | SpacesInAngles: false 80 | SpacesInContainerLiterals: true 81 | SpacesInCStyleCastParentheses: false 82 | SpacesInParentheses: false 83 | SpacesInSquareBrackets: false 84 | Standard: c++11 85 | TabWidth: 8 86 | UseTab: Never 87 | -------------------------------------------------------------------------------- /.clang-tidy: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | --- 3 | # Configure clang-tidy for this project. 4 | 5 | # Here is an explanation for why some of the checks are disabled: 6 | # 7 | # 8 | # -modernize-use-trailing-return-type: clang-tidy recommends using 9 | # `auto Foo() -> std::string { return ...; }`, we think the code is less 10 | # readable in this form. 11 | # 12 | # -modernize-return-braced-init-list: We think removing typenames and using 13 | # only braced-init can hurt readability. 14 | # 15 | # -modernize-avoid-c-arrays: We only use C arrays when they seem to be the 16 | # right tool for the job, such as `char foo[] = "hello"`. In these cases, 17 | # avoiding C arrays often makes the code less readable, and std::array is 18 | # not a drop-in replacement because it doesn't deduce the size. 19 | # 20 | # -performance-move-const-arg: This warning requires the developer to 21 | # know/care more about the implementation details of types/functions than 22 | # should be necessary. For example, `A a; F(std::move(a));` will trigger a 23 | # warning IFF `A` is a trivial type (and therefore the move is 24 | # meaningless). It would also warn if `F` accepts by `const&`, which is 25 | # another detail that the caller need not care about. 26 | # 27 | # -readability-redundant-declaration: A friend declaration inside a class 28 | # counts as a declaration, so if we also declare that friend outside the 29 | # class in order to document it as part of the public API, that will 30 | # trigger a redundant declaration warning from this check. 31 | # 32 | # -readability-function-cognitive-complexity: too many false positives with 33 | # clang-tidy-12. We need to disable this check in macros, and that setting 34 | # only appears in clang-tidy-13. 35 | # 36 | # -bugprone-narrowing-conversions: too many false positives around 37 | # `std::size_t` vs. `*::difference_type`. 38 | # 39 | # -bugprone-easily-swappable-parameters: too many false positives. 40 | # 41 | # -bugprone-implicit-widening-of-multiplication-result: too many false positives. 42 | # Almost any expression of the form `2 * variable` or `long x = a_int * b_int;` 43 | # generates an error. 44 | # 45 | # -bugprone-unchecked-optional-access: too many false positives in tests. 46 | # Despite what the documentation says, this warning appears after 47 | # `ASSERT_TRUE(variable)` or `ASSERT_TRUE(variable.has_value())`. 48 | Checks: > 49 | -*, 50 | bugprone-*, 51 | google-*, 52 | modernize-*, 53 | performance-*, 54 | portability-*, 55 | readability-*, 56 | -google-runtime-references, 57 | -google-readability-todo, 58 | -google-build-using-namespace, 59 | -google-readability-casting, 60 | -modernize-return-braced-init-list, 61 | -modernize-use-trailing-return-type, 62 | -modernize-avoid-c-arrays, 63 | -performance-move-const-arg, 64 | -performance-inefficient-vector-operation, 65 | -readability-identifier-length, 66 | -readability-qualified-auto, 67 | -readability-inconsistent-declaration-parameter-name, 68 | -readability-magic-numbers, 69 | -readability-named-parameter, 70 | -readability-else-after-return, 71 | -readability-redundant-declaration, 72 | -readability-function-cognitive-complexity, 73 | -readability-implicit-bool-conversion, 74 | -bugprone-branch-clone, 75 | -bugprone-exception-escape, 76 | -bugprone-easily-swappable-parameters, 77 | -bugprone-implicit-widening-of-multiplication-result, 78 | -bugprone-unchecked-optional-access 79 | WarningsAsErrors: '*' 80 | HeaderFilterRegex: 'io/substrait/*.\\.h$' 81 | AnalyzeTemporaryDtors: false 82 | FormatStyle: none 83 | CheckOptions: 84 | - { key: readability-identifier-naming.NamespaceCase, value: lower_case} 85 | - { key: readability-identifier-naming.ClassCase , value: CamelCase } 86 | - { key: readability-identifier-naming.StructCase, value: CamelCase } 87 | - { key: readability-identifier-naming.EnumCase, value: CamelCase } 88 | - { key: readability-identifier-naming.TemplateParameterCase, value: CamelCase } 89 | - { key: readability-identifier-naming.FunctionCase, value: camelBack } 90 | - { key: readability-identifier-naming.VariableCase, value: camelBack } 91 | - { key: readability-identifier-naming.PublicMemberCase, value: camelBack } 92 | - { key: readability-identifier-naming.PublicMethodCase, value: camelBack } 93 | - { key: readability-identifier-naming.ProtectedMemberCase, value: camelBack } 94 | - { key: readability-identifier-naming.ProtectedMemberSuffix, value: _ } 95 | - { key: readability-identifier-naming.ProtectedMethodCase, value: camelBack } 96 | - { key: readability-identifier-naming.PrivateMemberSuffix, value: _ } 97 | - { key: readability-identifier-naming.PrivateMemberCase, value: camelBack } 98 | - { key: readability-identifier-naming.PrivateMethodCase, value: camelBack } 99 | - { key: readability-identifier-naming.EnumConstantCase, value: CamelCase } 100 | - { key: readability-identifier-naming.EnumConstantPrefix, value: k } 101 | - { key: readability-identifier-naming.ConstexprVariableCase, value: CamelCase } 102 | - { key: readability-identifier-naming.ConstexprVariablePrefix, value: k } 103 | - { key: readability-identifier-naming.GlobalConstantCase, value: CamelCase } 104 | - { key: readability-identifier-naming.GlobalConstantPrefix, value: k } 105 | - { key: readability-identifier-naming.MemberConstantCase, value: CamelCase } 106 | - { key: readability-identifier-naming.MemberConstantPrefix, value: k } 107 | - { key: readability-identifier-naming.StaticConstantCase, value: CamelCase } 108 | - { key: readability-identifier-naming.StaticConstantPrefix, value: k } 109 | - { key: readability-implicit-bool-conversion.AllowIntegerConditions, value: 1 } 110 | - { key: readability-implicit-bool-conversion.AllowPointerConditions, value: 1 } 111 | - { key: readability-function-cognitive-complexity.IgnoreMacros, value: 1 } 112 | -------------------------------------------------------------------------------- /.cmake-format: -------------------------------------------------------------------------------- 1 | # How many spaces to tab for indent 2 | tab_size: 2 3 | # Format command names consistently as 'lower' or 'upper' case 4 | command_case: "lower" 5 | first_comment_is_literal: False 6 | # enable comment markup parsing and reflow 7 | enable_markup: False 8 | -------------------------------------------------------------------------------- /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | * @westonpace 4 | /src/substrait/textplan @EpsilonPrime 5 | -------------------------------------------------------------------------------- /.github/workflows/build_test.yml: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | name: Build & Test 3 | 4 | on: 5 | push: 6 | branches: [main] 7 | pull_request: 8 | branches: [main] 9 | 10 | jobs: 11 | check: 12 | runs-on: ubuntu-latest 13 | 14 | steps: 15 | - uses: actions/checkout@v3 16 | with: 17 | submodules: recursive 18 | - name: Setup Ubuntu 19 | run: ./scripts/setup-ubuntu.sh 20 | - name: Check License Header 21 | uses: apache/skywalking-eyes/header@v0.4.0 22 | - name: Formatting Cmake files 23 | run: ./scripts/run-cmake-format.sh 24 | - name: Formatting Clang files 25 | run: ./scripts/run-clang-format.sh 26 | - name: Checking formatting 27 | run: git diff --exit-code 28 | - name: Checking code style 29 | run: ./scripts/run-clang-tidy.sh 30 | 31 | ubuntu-build-and-test: 32 | runs-on: ubuntu-latest 33 | 34 | steps: 35 | - uses: actions/checkout@v3 36 | with: 37 | submodules: recursive 38 | 39 | - name: Setup Ubuntu 40 | run: ./scripts/setup-ubuntu.sh 41 | 42 | - name: Build 43 | run: | 44 | mkdir build 45 | cmake --version 46 | cmake -Bbuild -GNinja -DCMAKE_BUILD_TYPE=Debug -DBUILD_TZ_LIB=ON 47 | ninja -C build 48 | 49 | - name: Test 50 | run: ctest --test-dir build --output-on-failure --timeout 30 51 | 52 | windows-build-and-test: 53 | runs-on: windows-latest 54 | 55 | steps: 56 | - uses: actions/checkout@v3 57 | with: 58 | submodules: recursive 59 | 60 | - name: Set up JDK 11 61 | uses: actions/setup-java@v3 62 | with: 63 | distribution: 'temurin' 64 | java-version: '11' 65 | 66 | - name: Build 67 | run: | 68 | ./scripts/find_vs.ps1 69 | mkdir build 70 | cmake --version 71 | cmake -Bbuild -GNinja -DCMAKE_BUILD_TYPE=Debug -DBUILD_TZ_LIB=ON 72 | ninja -C build 73 | 74 | - name: Test 75 | run: ctest --test-dir build --output-on-failure --timeout 30 76 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Prerequisites 2 | *.d 3 | 4 | # Compiled Object files 5 | *.slo 6 | *.lo 7 | *.o 8 | *.obj 9 | 10 | # Precompiled Headers 11 | *.gch 12 | *.pch 13 | 14 | # Compiled Dynamic libraries 15 | *.so 16 | *.dylib 17 | *.dll 18 | 19 | # Fortran module files 20 | *.mod 21 | *.smod 22 | 23 | # Compiled Static libraries 24 | *.lai 25 | *.la 26 | *.a 27 | *.lib 28 | 29 | # Executables 30 | *.exe 31 | *.out 32 | *.app 33 | 34 | # IDE files 35 | *~ 36 | .vscode/ 37 | 38 | # Common CMake Build directory 39 | build/ 40 | 41 | # Generated code 42 | src/proto/substrait 43 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "third_party/yaml-cpp"] 2 | path = third_party/yaml-cpp 3 | url = https://github.com/jbeder/yaml-cpp.git 4 | [submodule "third_party/substrait"] 5 | path = third_party/substrait 6 | url = https://github.com/substrait-io/substrait.git 7 | [submodule "third_party/fmt"] 8 | path = third_party/fmt 9 | url = https://github.com/fmtlib/fmt 10 | [submodule "third_party/abseil-cpp"] 11 | path = third_party/abseil-cpp 12 | url = https://github.com/abseil/abseil-cpp.git 13 | [submodule "third_party/datetime"] 14 | path = third_party/datetime 15 | url = https://github.com/HowardHinnant/date.git 16 | [submodule "third_party/protobuf-matchers"] 17 | path = third_party/protobuf-matchers 18 | url = https://github.com/EpsilonPrime/protobuf-matchers.git 19 | -------------------------------------------------------------------------------- /.licenserc.yaml: -------------------------------------------------------------------------------- 1 | header: 2 | license: 3 | spdx-id: Apache-2.0 4 | content: | 5 | SPDX-License-Identifier: Apache-2.0 6 | 7 | paths-ignore: 8 | - '.github' 9 | - '.gitignore' 10 | - '.gitmodules' 11 | - '.clang-format' 12 | - '.clang-tidy' 13 | - '.cmake-format' 14 | - '.licenserc.yaml' 15 | - 'third_party/abseil-cpp' 16 | - 'third_party/antlr4' 17 | - 'third_party/datetime' 18 | - 'third_party/fmt' 19 | - 'third_party/googletest' 20 | - 'third_party/protobuf-matchers' 21 | - 'third_party/substrait' 22 | - 'third_party/yaml-cpp' 23 | - '**/*.md' 24 | - '**/*.json' 25 | - '**/*.splan' 26 | - '**/*.log' 27 | 28 | comment: never 29 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | cmake_minimum_required(VERSION 3.24) 4 | 5 | # set the project name 6 | project(substrait-cpp) 7 | 8 | message(STATUS "Build type: ${CMAKE_BUILD_TYPE}") 9 | 10 | set(CMAKE_CXX_STANDARD 17) 11 | set(CMAKE_CXX_STANDARD_REQUIRED True) 12 | set(CMAKE_EXPORT_COMPILE_COMMANDS ON) 13 | set(CMAKE_POSITION_INDEPENDENT_CODE TRUE) 14 | 15 | option(SUBSTRAIT_CPP_SANITIZE_DEBUG_BUILD 16 | "Turns on address and undefined memory sanitization runtime checking." 17 | OFF) 18 | 19 | if(${SUBSTRAIT_CPP_SANITIZE_DEBUG_BUILD}) 20 | add_compile_options($<$:-fsanitize=undefined>) 21 | add_link_options($<$:-fsanitize=undefined>) 22 | 23 | add_compile_options($<$:-fsanitize=address>) 24 | add_link_options($<$:-fsanitize=address>) 25 | endif() 26 | 27 | option( 28 | SUBSTRAIT_CPP_BUILD_TESTING 29 | "Enable substrait-cpp tests. This will enable all other build options automatically." 30 | ON) 31 | 32 | set(SUBSTRAIT_CPP_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/include) 33 | 34 | # Setup ANTLR4 dependency. It's important that this is done at the 35 | # top-level CMakeLists.txt, given that the ANTLR4 build may generate deeply nested 36 | # paths - which may be a problem for MSVC, with its 260 character path limit. 37 | list(APPEND CMAKE_MODULE_PATH 38 | "${CMAKE_CURRENT_SOURCE_DIR}/third_party/antlr4/cmake") 39 | 40 | set(CMAKE_CXX_STANDARD 17) 41 | add_definitions(-DANTLR4CPP_STATIC) 42 | # using /MD flag for antlr4_runtime (for Visual C++ compilers only) 43 | set(ANTLR4_WITH_STATIC_CRT OFF) 44 | set(ANTLR4_BUILD_CPP_TESTS OFF) 45 | # Note: df4d68c adds a fix for MSVC compilers. No release has been made since; 46 | # latest release was 4.13.2. Revert back to a tag once 4.13.3 is released. 47 | set(ANTLR4_TAG df4d68c09cdef73e023b8838a8bc7ca4dff1d1de) 48 | include(ExternalAntlr4Cpp) 49 | include_directories(${ANTLR4_INCLUDE_DIRS}) 50 | set(ANTLR_EXECUTABLE_DIR ${CMAKE_CURRENT_BINARY_DIR}) 51 | file(DOWNLOAD https://www.antlr.org/download/antlr-4.13.2-complete.jar 52 | "${ANTLR_EXECUTABLE_DIR}/antlr.jar") 53 | set(ANTLR_EXECUTABLE "${ANTLR_EXECUTABLE_DIR}/antlr.jar") 54 | 55 | # Local files come first. 56 | include_directories(include) 57 | include_directories(src) 58 | 59 | # TODO: Simplify once we can require cmake 3.27 (where CONFIG is default). 60 | 61 | # Due to packaging changes we use the combined protobuf/absl packaging if 62 | # available otherwise we fallback to the older protobuf method. 63 | find_package(Protobuf QUIET CONFIG) 64 | if(${Protobuf_FOUND}) 65 | message(STATUS "Modern protobuf library located.") 66 | set(ABSL_INCLUDED_WITH_PROTOBUF ON) 67 | else() 68 | find_package(Protobuf QUIET) 69 | if(${Protobuf_FOUND}) 70 | message(STATUS "Legacy protobuf library located.") 71 | include_directories(${Protobuf_INCLUDE_DIRS}) 72 | set(ABSL_INCLUDED_WITH_PROTOBUF OFF) 73 | else() 74 | message(STATUS "Fetching external protobuf library.") 75 | include(third_party/protobuf.cmake) 76 | set(ABSL_INCLUDED_WITH_PROTOBUF ON) 77 | endif() 78 | endif() 79 | 80 | add_subdirectory(third_party) 81 | 82 | list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake_modules") 83 | include(BuildUtils) 84 | 85 | if(${SUBSTRAIT_CPP_BUILD_TESTING}) 86 | enable_testing() 87 | endif() 88 | 89 | install(EXPORT SubstraitTargets DESTINATION lib/cmake/Substrait) 90 | 91 | add_subdirectory(src/substrait) 92 | add_subdirectory(export) 93 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | .PHONY: all clean build debug release 4 | 5 | BUILD_TYPE := Release 6 | 7 | all: debug 8 | 9 | clean: 10 | @rm -rf build-* 11 | 12 | build-common: 13 | @mkdir -p build-${BUILD_TYPE} 14 | @cd build-${BUILD_TYPE} && \ 15 | cmake -Wno-dev \ 16 | -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \ 17 | -DPREFER_STATIC_LIBS=OFF \ 18 | $(FORCE_COLOR) \ 19 | .. 20 | 21 | build: 22 | VERBOSE=1 cmake --build build-${BUILD_TYPE} -j $${CPU_COUNT:-`nproc`} || \ 23 | cmake --build build-${BUILD_TYPE} 24 | 25 | format: 26 | set -f 27 | bash scripts/run-cmake-format.sh && bash scripts/run-clang-format.sh 28 | set +f 29 | 30 | tidy: 31 | set -f 32 | bash scripts/run-clang-tidy.sh 33 | set +f 34 | 35 | tidy-fix: 36 | set -f 37 | bash scripts/run-clang-tidy.sh fix 38 | set +f 39 | 40 | debug: 41 | @$(MAKE) build-common BUILD_TYPE=Debug 42 | @$(MAKE) build BUILD_TYPE=Debug 43 | 44 | release: 45 | @$(MAKE) build-common BUILD_TYPE=Release 46 | @$(MAKE) build BUILD_TYPE=Release 47 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | # substrait-cpp 3 | 4 | Planned home for CPP libraries to help build/consume Substrait query plans. 5 | 6 | ## Getting Started 7 | 8 | We provide scripts to help developers setup and install substrait-cpp dependencies. 9 | 10 | ### Get the substrait-cpp Source 11 | ``` 12 | git clone --recursive https://github.com/substrait-io/substrait-cpp.git 13 | cd substrait-cpp 14 | # if you are updating an existing checkout 15 | git submodule sync --recursive 16 | git submodule update --init --recursive 17 | ``` 18 | 19 | ### Setting up on Linux (Ubuntu 20.04 or later) 20 | 21 | Once you have checked out substrait-cpp, you can set up and build like so: 22 | 23 | ```shell 24 | $ ./scripts/setup-ubuntu.sh 25 | $ make 26 | ``` 27 | 28 | ### Coding style 29 | Basically the coding style is based on Google C++ Style, but there are some naming style changed: 30 | - Function case style change to 'camelBack' 31 | - Variable case style change to 'camelBack' 32 | - Class Member case style change to 'camelBack' with '_' as suffix 33 | 34 | For more detail information please refer to .clang-tidy under root directory. 35 | 36 | 37 | You can run `make format` script to formatting source code and run `make tidy` to checking coding style, and run `make tidy-fix`to fix the coding style automatically. 38 | 39 | ## License 40 | 41 | substrait-cpp is licensed under the Apache 2.0 License. A copy of the license 42 | [can be found here.](https://www.apache.org/licenses/LICENSE-2.0.html) 43 | -------------------------------------------------------------------------------- /cmake_modules/BuildUtils.cmake: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | # This file contains various cmake helper functions that are used throughout the 4 | # project. 5 | 6 | # Add a new test case, with or without an executable that should be built. 7 | # 8 | # TEST_NAME is the name of the test. 9 | # 10 | # SOURCES is the list of C++ source files to compile into the test executable. 11 | function(ADD_TEST_CASE TEST_NAME) 12 | set(multi_value_args SOURCES EXTRA_LINK_LIBS EXTRA_INCLUDES 13 | EXTRA_DEPENDENCIES) 14 | cmake_parse_arguments(ARG "${options}" "${one_value_args}" 15 | "${multi_value_args}" ${ARGN}) 16 | if(ARG_UNPARSED_ARGUMENTS) 17 | message( 18 | SEND_ERROR "Error: unrecognized arguments: ${ARG_UNPARSED_ARGUMENTS}") 19 | endif() 20 | 21 | if(ARG_SOURCES) 22 | set(SOURCES ${ARG_SOURCES}) 23 | else() 24 | message(SEND_ERROR "Error: SOURCES is a required argument to add_test_case") 25 | endif() 26 | 27 | add_executable(${TEST_NAME} ${SOURCES}) 28 | set_target_properties( 29 | ${TEST_NAME} PROPERTIES RUNTIME_OUTPUT_DIRECTORY 30 | ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/tests) 31 | 32 | if(ARG_EXTRA_LINK_LIBS) 33 | target_link_libraries(${TEST_NAME} PRIVATE ${ARG_EXTRA_LINK_LIBS}) 34 | endif() 35 | 36 | if(ARG_EXTRA_INCLUDES) 37 | target_include_directories(${TEST_NAME} SYSTEM PUBLIC ${ARG_EXTRA_INCLUDES}) 38 | endif() 39 | 40 | if(ARG_EXTRA_DEPENDENCIES) 41 | add_dependencies(${TEST_NAME} ${ARG_EXTRA_DEPENDENCIES}) 42 | endif() 43 | 44 | add_test( 45 | NAME ${TEST_NAME} 46 | COMMAND $ 47 | WORKING_DIRECTORY "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/tests") 48 | endfunction() 49 | -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | FROM ubuntu:20.04 3 | 4 | SHELL ["/bin/bash", "-o", "pipefail", "-c"] 5 | 6 | WORKDIR /substrait 7 | 8 | RUN DEBIAN_FRONTEND=noninteractive TZ=America/New_York apt-get update -y && apt-get upgrade -y \ 9 | && apt-get install -y sudo apt-utils tzdata 10 | RUN dpkg-reconfigure tzdata 11 | 12 | RUN DEBIAN_FRONTEND=noninteractive apt-get update -y && apt-get install -y git build-essential cmake 13 | 14 | RUN git clone https://github.com/substrait-io/substrait-cpp.git \ 15 | && cd substrait-cpp \ 16 | && git submodule sync --recursive \ 17 | && git submodule update --init --recursive 18 | 19 | RUN cd substrait-cpp && ./scripts/setup-ubuntu.sh 20 | 21 | RUN cd substrait-cpp && make 22 | 23 | ENTRYPOINT ["/bin/bash"] 24 | -------------------------------------------------------------------------------- /docker/README.md: -------------------------------------------------------------------------------- 1 | # Setup Docker Container 2 | 3 | ## Build 4 | 5 | ```bash 6 | docker build -t substrait-cpp . 7 | ``` 8 | 9 | ## Run 10 | 11 | ```bash 12 | docker run -it substrait-cpp 13 | ``` 14 | 15 | ## Evaluate 16 | 17 | Run function tests 18 | 19 | ```bash 20 | ./build-Debug/substrait/function/tests/substrait_function_test 21 | ``` 22 | -------------------------------------------------------------------------------- /export/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | add_subdirectory(planloader) 4 | -------------------------------------------------------------------------------- /export/planloader/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON) 4 | add_library(planloader SHARED planloader.cpp) 5 | 6 | add_dependencies(planloader substrait_io) 7 | target_link_libraries(planloader substrait_io) 8 | 9 | if(${SUBSTRAIT_CPP_BUILD_TESTING}) 10 | add_subdirectory(tests) 11 | endif() 12 | 13 | install(TARGETS planloader EXPORT SubstraitTargets) 14 | -------------------------------------------------------------------------------- /export/planloader/planloader.cpp: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: Apache-2.0 */ 2 | 3 | #include "planloader.h" 4 | 5 | #include 6 | #include 7 | 8 | extern "C" { 9 | 10 | // Load a Substrait plan (in any format) from disk. 11 | // Stores the Substrait plan in planBuffer in serialized form. 12 | // Returns a SerializedPlan structure containing either the serialized plan or 13 | // an error message. error_message is nullptr upon success. 14 | SerializedPlan* load_substrait_plan(const char* filename) { 15 | auto newPlan = new SerializedPlan(); 16 | newPlan->buffer = nullptr; 17 | newPlan->size = 0; 18 | newPlan->error_message = nullptr; 19 | 20 | auto planOrError = io::substrait::loadPlan(filename); 21 | if (!planOrError.ok()) { 22 | auto errMsg = planOrError.status().message(); 23 | newPlan->error_message = new char[errMsg.length() + 1]; 24 | strncpy(newPlan->error_message, errMsg.data(), errMsg.length() + 1); 25 | return newPlan; 26 | } 27 | ::substrait::proto::Plan plan = *planOrError; 28 | std::string text = plan.SerializeAsString(); 29 | newPlan->buffer = new unsigned char[text.length() + 1]; 30 | memcpy(newPlan->buffer, text.data(), text.length() + 1); 31 | newPlan->size = 32 | static_cast(text.length() & std::numeric_limits::max()); 33 | return newPlan; 34 | } 35 | 36 | void free_substrait_plan(SerializedPlan* plan) { 37 | delete[] plan->buffer; 38 | delete[] plan->error_message; 39 | delete plan; 40 | } 41 | 42 | // Write a serialized Substrait plan to disk in the specified format. 43 | // On error returns a non-empty error message. 44 | // On success a nullptr is returned. 45 | const char* save_substrait_plan( 46 | const unsigned char* plan_data, 47 | int32_t plan_data_length, 48 | const char* filename, 49 | io::substrait::PlanFileFormat format) { 50 | ::substrait::proto::Plan plan; 51 | std::string data((const char*)plan_data, plan_data_length); 52 | plan.ParseFromString(data); 53 | auto result = io::substrait::savePlan(plan, filename, format); 54 | if (!result.ok()) { 55 | return result.message().data(); 56 | } 57 | return nullptr; 58 | } 59 | 60 | } // extern "C" 61 | -------------------------------------------------------------------------------- /export/planloader/planloader.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: Apache-2.0 */ 2 | 3 | #include 4 | 5 | extern "C" { 6 | 7 | // Since this is actually C code, stick to C style names for exporting. 8 | // NOLINTBEGIN(readability-identifier-naming) 9 | 10 | using SerializedPlan = struct { 11 | // If set, contains a serialized ::substrait::proto::Plan object. 12 | unsigned char* buffer; 13 | // If buffer is set, this is the size of the buffer. 14 | int32_t size; 15 | // If null the buffer is valid, otherwise this points to a null terminated 16 | // error string. 17 | char* error_message; 18 | }; 19 | 20 | // Load a Substrait plan (in any format) from disk. 21 | // 22 | // Accepts filename as a null-terminated C string. 23 | // Returns a SerializedPlan structure containing either the serialized plan or 24 | // an error message. This SerializedPlan should be freed using 25 | // free_substrait_plan. 26 | SerializedPlan* load_substrait_plan(const char* filename); 27 | 28 | // Frees a SerializedPlan that was returned from load_substrait_plan. 29 | void free_substrait_plan(SerializedPlan* plan); 30 | 31 | // Write a serialized Substrait plan to disk in the specified format. 32 | // 33 | // plan_data is a Substrait Plan serialized into a byte array with length 34 | // plan_data_length. 35 | // Filename is a null-terminated C string. 36 | // On error returns a non-empty error message. 37 | // On success an empty string is returned. 38 | const char* save_substrait_plan( 39 | const unsigned char* plan_data, 40 | int32_t plan_data_length, 41 | const char* filename, 42 | io::substrait::PlanFileFormat format); 43 | 44 | // NOLINTEND(readability-identifier-naming) 45 | 46 | } // extern "C" 47 | -------------------------------------------------------------------------------- /export/planloader/tests/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | cmake_path(GET CMAKE_CURRENT_BINARY_DIR PARENT_PATH 4 | CMAKE_CURRENT_BINARY_PARENT_DIR) 5 | 6 | set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_PARENT_DIR}) 7 | 8 | add_test_case( 9 | planloader_test 10 | SOURCES 11 | PlanLoaderTest.cpp 12 | EXTRA_LINK_LIBS 13 | planloader 14 | gmock 15 | gtest 16 | gtest_main) 17 | 18 | set(TEXTPLAN_SOURCE_DIR 19 | "${CMAKE_CURRENT_SOURCE_DIR}/../../../src/substrait/textplan") 20 | 21 | add_custom_command( 22 | TARGET planloader_test 23 | POST_BUILD 24 | COMMAND ${CMAKE_COMMAND} -E echo "Copying unit test data.." 25 | COMMAND ${CMAKE_COMMAND} -E make_directory 26 | "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/tests/data" 27 | COMMAND 28 | ${CMAKE_COMMAND} -E copy 29 | "${TEXTPLAN_SOURCE_DIR}/converter/data/q6_first_stage.json" 30 | "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/tests/data/q6_first_stage.json") 31 | 32 | message( 33 | STATUS "test data will be here: ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/tests/data" 34 | ) 35 | 36 | # For Windows: copy planloader dll to the test executable directory so that it 37 | # can be found during test execution. 38 | if(WIN32) 39 | add_custom_command( 40 | TARGET planloader_test 41 | POST_BUILD 42 | COMMAND ${CMAKE_COMMAND} -E copy $ 43 | "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/tests") 44 | endif() 45 | -------------------------------------------------------------------------------- /export/planloader/tests/PlanLoaderTest.cpp: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: Apache-2.0 */ 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include "../planloader.h" 8 | #include "substrait/proto/plan.pb.h" 9 | 10 | namespace io::substrait::textplan { 11 | namespace { 12 | 13 | TEST(PlanLoaderTest, LoadAndSave) { 14 | auto serializedPlan = load_substrait_plan("data/q6_first_stage.json"); 15 | ASSERT_EQ(serializedPlan->error_message, nullptr); 16 | 17 | ::substrait::proto::Plan plan; 18 | bool parseStatus = 19 | plan.ParseFromArray(serializedPlan->buffer, serializedPlan->size); 20 | ASSERT_TRUE(parseStatus) << "Failed to parse the plan."; 21 | 22 | const char* saveStatus = save_substrait_plan( 23 | serializedPlan->buffer, 24 | serializedPlan->size, 25 | "outfile.splan", 26 | PlanFileFormat::kText); 27 | ASSERT_EQ(saveStatus, nullptr); 28 | 29 | free_substrait_plan(serializedPlan); 30 | } 31 | 32 | TEST(PlanLoaderTest, LoadMissingFile) { 33 | auto serializedPlan = load_substrait_plan("no_such_file.json"); 34 | ASSERT_THAT( 35 | serializedPlan->error_message, 36 | ::testing::StartsWith("Failed to open file no_such_file.json")); 37 | 38 | free_substrait_plan(serializedPlan); 39 | } 40 | 41 | } // namespace 42 | } // namespace io::substrait::textplan 43 | -------------------------------------------------------------------------------- /include/substrait/common/Exceptions.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: Apache-2.0 */ 2 | 3 | #pragma once 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | namespace io::substrait::common { 10 | namespace error_code { 11 | 12 | //====================== User Error Codes ======================: 13 | 14 | // An error raised when an argument verification fails 15 | inline constexpr const char* kInvalidArgument = "INVALID_ARGUMENT"; 16 | 17 | // An error raised when a requested operation is not supported. 18 | inline constexpr const char* kUnsupported = "UNSUPPORTED"; 19 | 20 | //====================== Runtime Error Codes ======================: 21 | 22 | // An error raised when the current state of a component is invalid. 23 | inline constexpr const char* kInvalidState = "INVALID_STATE"; 24 | 25 | // An error raised when unreachable code point was executed. 26 | inline constexpr const char* kUnreachableCode = "UNREACHABLE_CODE"; 27 | 28 | // An error raised when a requested operation is not implemented. 29 | inline constexpr const char* kNotImplemented = "NOT_IMPLEMENTED"; 30 | 31 | } // namespace error_code 32 | 33 | class SubstraitException : public std::exception { 34 | public: 35 | enum class Type { 36 | // Errors where the root cause of the problem is either because of bad input 37 | // or an unsupported pattern of use are classified with USER. Examples 38 | // of errors in this category include syntax errors, unavailable names or 39 | // objects. 40 | kUser = 0, 41 | 42 | // Errors where the root cause of the problem is some unreliable aspect of 43 | // the system are classified with SYSTEM. 44 | kSystem = 1 45 | }; 46 | SubstraitException( 47 | const char* file, 48 | size_t line, 49 | const char* function, 50 | const std::string& exceptionCode, 51 | const std::string& exceptionMessage, 52 | Type exceptionType = Type::kSystem, 53 | const std::string& exceptionName = "SubstraitException"); 54 | 55 | // Inherited 56 | [[nodiscard]] const char* what() const noexcept override { 57 | return msg_.c_str(); 58 | } 59 | 60 | private: 61 | const std::string msg_; 62 | }; 63 | 64 | class SubstraitUserError : public SubstraitException { 65 | public: 66 | SubstraitUserError( 67 | const char* file, 68 | size_t line, 69 | const char* function, 70 | const std::string& exceptionCode, 71 | const std::string& exceptionMessage, 72 | const std::string& exceptionName = "SubstraitUserError") 73 | : SubstraitException( 74 | file, 75 | line, 76 | function, 77 | exceptionCode, 78 | exceptionMessage, 79 | Type::kUser, 80 | exceptionName) {} 81 | }; 82 | 83 | class SubstraitRuntimeError final : public SubstraitException { 84 | public: 85 | SubstraitRuntimeError( 86 | const char* file, 87 | size_t line, 88 | const char* function, 89 | const std::string& exceptionCode, 90 | const std::string& exceptionMessage, 91 | const std::string& exceptionName = "SubstraitRuntimeError") 92 | : SubstraitException( 93 | file, 94 | line, 95 | function, 96 | exceptionCode, 97 | exceptionMessage, 98 | Type::kSystem, 99 | exceptionName) {} 100 | }; 101 | 102 | template 103 | std::string errorMessage(fmt::string_view fmt, const Args&... args) { 104 | return fmt::vformat(fmt, fmt::make_format_args(args...)); 105 | } 106 | 107 | #define SUBSTRAIT_THROW(exception, errorCode, ...) \ 108 | { \ 109 | auto message = ::io::substrait::common::errorMessage(__VA_ARGS__); \ 110 | throw exception(__FILE__, __LINE__, __FUNCTION__, errorCode, message); \ 111 | } 112 | 113 | #define SUBSTRAIT_UNSUPPORTED(...) \ 114 | SUBSTRAIT_THROW( \ 115 | ::io::substrait::common::SubstraitUserError, \ 116 | ::io::substrait::common::error_code::kUnsupported, \ 117 | ##__VA_ARGS__) 118 | 119 | #define SUBSTRAIT_UNREACHABLE(...) \ 120 | SUBSTRAIT_THROW( \ 121 | ::io::substrait::common::SubstraitRuntimeError, \ 122 | ::io::substrait::common::error_code::kUnreachableCode, \ 123 | ##__VA_ARGS__) 124 | 125 | #define SUBSTRAIT_FAIL(...) \ 126 | SUBSTRAIT_THROW( \ 127 | ::io::substrait::common::SubstraitRuntimeError, \ 128 | ::io::substrait::common::error_code::kInvalidState, \ 129 | ##__VA_ARGS__) 130 | 131 | #define SUBSTRAIT_USER_FAIL(...) \ 132 | SUBSTRAIT_THROW( \ 133 | ::io::substrait::common::SubstraitUserError, \ 134 | ::io::substrait::common::error_code::kInvalidState, \ 135 | ##__VA_ARGS__) 136 | 137 | #define SUBSTRAIT_NYI(...) \ 138 | SUBSTRAIT_THROW( \ 139 | ::io::substrait::common::SubstraitRuntimeError, \ 140 | ::io::substrait::common::error_code::kNotImplemented, \ 141 | ##__VA_ARGS__) 142 | 143 | #define SUBSTRAIT_IVALID_ARGUMENT(...) \ 144 | SUBSTRAIT_THROW( \ 145 | ::io::substrait::common::SubstraitUserError, \ 146 | ::io::substrait::common::error_code::kInvalidArgument, \ 147 | ##__VA_ARGS__) 148 | 149 | } // namespace io::substrait::common 150 | -------------------------------------------------------------------------------- /include/substrait/common/Io.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: Apache-2.0 */ 2 | 3 | #pragma once 4 | 5 | #include 6 | 7 | #include "absl/status/statusor.h" 8 | #include "substrait/proto/plan.pb.h" 9 | 10 | namespace io::substrait { 11 | 12 | /* 13 | * \brief The four different ways plans can be represented on disk. 14 | */ 15 | enum class PlanFileFormat { 16 | kBinary = 0, 17 | kJson = 1, 18 | kProtoText = 2, 19 | kText = 3, 20 | }; 21 | 22 | /* 23 | * \brief Loads a Substrait plan of any format from the given file. 24 | * 25 | * loadPlan determines which file type the specified file is and then calls 26 | * the appropriate load/parse method to consume it preserving any error 27 | * messages. 28 | * 29 | * This will load the plan into memory and then convert it consuming twice the 30 | * amount of memory that it consumed on disk. 31 | * 32 | * \param input_filename The filename containing the plan to convert. 33 | * \return If loading was successful, returns a plan. If loading was not 34 | * successful this is a status containing a list of parse errors in the status's 35 | * message. 36 | */ 37 | absl::StatusOr<::substrait::proto::Plan> loadPlan( 38 | std::string_view input_filename); 39 | 40 | /* 41 | * \brief Writes the provided plan to disk. 42 | * 43 | * savePlan writes the provided plan in the specified format to the specified 44 | * location. 45 | * 46 | * This routine will consume more memory during the conversion to the text 47 | * format as the original plan as well as the annotated parse tree will need to 48 | * reside in memory during the process. 49 | * 50 | * \param plan 51 | * \param output_filename 52 | * \param format 53 | * \return 54 | */ 55 | absl::Status savePlan( 56 | const ::substrait::proto::Plan& plan, 57 | std::string_view output_filename, 58 | PlanFileFormat format); 59 | 60 | } // namespace io::substrait 61 | -------------------------------------------------------------------------------- /include/substrait/expression/DecimalLiteral.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: Apache-2.0 */ 2 | 3 | #pragma once 4 | 5 | #include 6 | #include 7 | 8 | namespace substrait::proto { 9 | class Expression_Literal_Decimal; 10 | } 11 | 12 | namespace io::substrait::expression { 13 | 14 | class DecimalLiteral { 15 | DecimalLiteral(std::string v, int32_t p, int32_t s) 16 | : value_(std::move(v)), precision_(p), scale_(s) {} 17 | 18 | public: 19 | static DecimalLiteral fromProto( 20 | const ::substrait::proto::Expression_Literal_Decimal& proto); 21 | 22 | static DecimalLiteral 23 | fromString(const std::string& str, int32_t precision, int32_t scale); 24 | 25 | // Validates that the constructed decimal has an exactly 16 byte value with 26 | // a stated precision between 1 and 38. 27 | bool isValid(); 28 | 29 | // Converts the value portion of the decimal to a string only. 30 | std::string toBaseString(); 31 | 32 | // Converts the entirety of the decimal (including precision and scale) to 33 | // a string. 34 | std::string toString(); 35 | 36 | // Emits a proto version of the corresponding decimal. 37 | ::substrait::proto::Expression_Literal_Decimal toProto(); 38 | 39 | [[nodiscard]] int32_t precision() const { 40 | return precision_; 41 | } 42 | 43 | [[nodiscard]] int32_t scale() const { 44 | return scale_; 45 | } 46 | 47 | private: 48 | // Little-endian twos-complement integer representation of complete value 49 | // (ignoring precision). Always 16 bytes in length. 50 | std::string value_; 51 | // The maximum number of digits allowed in the value. 52 | // the maximum precision is 38. 53 | int32_t precision_; 54 | // The number of digits after the decimal point (may be negative). 55 | int32_t scale_; 56 | }; 57 | 58 | } // namespace io::substrait::expression 59 | -------------------------------------------------------------------------------- /include/substrait/function/Extension.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: Apache-2.0 */ 2 | 3 | #pragma once 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "substrait/function/Function.h" 11 | #include "substrait/function/FunctionSignature.h" 12 | #include "substrait/type/Type.h" 13 | 14 | namespace io::substrait { 15 | 16 | struct TypeVariant { 17 | std::string name; 18 | std::string uri; 19 | }; 20 | 21 | using TypeVariantPtr = std::shared_ptr; 22 | 23 | using FunctionImplMap = 24 | std::unordered_map>; 25 | 26 | using TypeVariantMap = std::unordered_map; 27 | 28 | class Extension { 29 | public: 30 | /// Deserialize default substrait extension by given basePath 31 | /// @throws exception if file not found 32 | static std::shared_ptr load(const std::string& basePath); 33 | 34 | /// Deserialize substrait extension by given basePath and extensionFiles. 35 | static std::shared_ptr load( 36 | const std::string& basePath, 37 | const std::vector& extensionFiles); 38 | 39 | /// Deserialize substrait extension by given extensionFiles. 40 | static std::shared_ptr load( 41 | const std::vector& extensionFiles); 42 | 43 | /// Add a scalar function implementation. 44 | void addScalarFunctionImpl(const FunctionImplementationPtr& functionImpl); 45 | 46 | /// Add an aggregate function implementation. 47 | void addAggregateFunctionImpl(const FunctionImplementationPtr& functionImpl); 48 | 49 | /// Add a window function implementation. 50 | void addWindowFunctionImpl(const FunctionImplementationPtr& functionImpl); 51 | 52 | /// Add a type variant. 53 | void addTypeVariant(const TypeVariantPtr& typeVariant); 54 | 55 | /// Lookup type variant by given type name. 56 | /// @return matched type variant 57 | TypeVariantPtr lookupType(const std::string& typeName) const; 58 | 59 | const FunctionImplMap& scalaFunctionImplMap() const { 60 | return scalarFunctionImplMap_; 61 | } 62 | 63 | const FunctionImplMap& windowFunctionImplMap() const { 64 | return windowFunctionImplMap_; 65 | } 66 | 67 | const FunctionImplMap& aggregateFunctionImplMap() const { 68 | return aggregateFunctionImplMap_; 69 | } 70 | 71 | private: 72 | FunctionImplMap scalarFunctionImplMap_; 73 | 74 | FunctionImplMap aggregateFunctionImplMap_; 75 | 76 | FunctionImplMap windowFunctionImplMap_; 77 | 78 | TypeVariantMap typeVariantMap_; 79 | }; 80 | 81 | using ExtensionPtr = std::shared_ptr; 82 | 83 | } // namespace io::substrait 84 | -------------------------------------------------------------------------------- /include/substrait/function/Function.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: Apache-2.0 */ 2 | 3 | #pragma once 4 | 5 | #include 6 | 7 | #include "substrait/function/FunctionSignature.h" 8 | #include "substrait/type/Type.h" 9 | 10 | namespace io::substrait { 11 | 12 | struct FunctionArgument { 13 | [[nodiscard]] virtual bool isRequired() const = 0; 14 | 15 | /// Convert argument type to short type string based on 16 | /// https://substrait.io/extensions/#function-signature-compound-names 17 | [[nodiscard]] virtual std::string toTypeString() const = 0; 18 | 19 | [[nodiscard]] virtual bool isWildcardType() const { 20 | return false; 21 | }; 22 | 23 | [[nodiscard]] virtual bool isValueArgument() const { 24 | return false; 25 | } 26 | 27 | [[nodiscard]] virtual bool isEnumArgument() const { 28 | return false; 29 | } 30 | 31 | [[nodiscard]] virtual bool isTypeArgument() const { 32 | return false; 33 | } 34 | }; 35 | 36 | using FunctionArgumentPtr = std::shared_ptr; 37 | 38 | struct EnumArgument : public FunctionArgument { 39 | bool required{}; 40 | 41 | [[nodiscard]] bool isRequired() const override { 42 | return required; 43 | } 44 | 45 | [[nodiscard]] std::string toTypeString() const override { 46 | return required ? "req" : "opt"; 47 | } 48 | 49 | [[nodiscard]] bool isEnumArgument() const override { 50 | return true; 51 | } 52 | }; 53 | 54 | struct TypeArgument : public FunctionArgument { 55 | [[nodiscard]] std::string toTypeString() const override { 56 | return "type"; 57 | } 58 | 59 | [[nodiscard]] bool isRequired() const override { 60 | return true; 61 | } 62 | 63 | [[nodiscard]] bool isTypeArgument() const override { 64 | return true; 65 | } 66 | }; 67 | 68 | struct ValueArgument : public FunctionArgument { 69 | ParameterizedTypePtr type; 70 | 71 | [[nodiscard]] std::string toTypeString() const override { 72 | return type->signature(); 73 | } 74 | 75 | [[nodiscard]] bool isRequired() const override { 76 | return true; 77 | } 78 | 79 | [[nodiscard]] bool isWildcardType() const override { 80 | return type->isWildcard(); 81 | } 82 | 83 | [[nodiscard]] bool isValueArgument() const override { 84 | return true; 85 | } 86 | }; 87 | 88 | struct FunctionVariadic { 89 | int min; 90 | std::optional max; 91 | }; 92 | 93 | struct FunctionImplementation { 94 | std::string name; 95 | std::string uri; 96 | std::vector arguments; 97 | ParameterizedTypePtr returnType; 98 | std::optional variadic; 99 | 100 | /// Test if the actual types matched with this function's implementation. 101 | virtual bool tryMatch(const FunctionSignature& signature); 102 | 103 | /// Create function signature by function name and arguments. 104 | [[nodiscard]] std::string signature() const; 105 | }; 106 | 107 | using FunctionImplementationPtr = std::shared_ptr; 108 | 109 | struct ScalarFunctionImplementation : public FunctionImplementation {}; 110 | 111 | struct AggregateFunctionImplementation : public FunctionImplementation { 112 | ParameterizedTypePtr intermediate; 113 | bool deterministic; 114 | 115 | bool tryMatch(const FunctionSignature& signature) override; 116 | }; 117 | 118 | } // namespace io::substrait 119 | -------------------------------------------------------------------------------- /include/substrait/function/FunctionLookup.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: Apache-2.0 */ 2 | 3 | #pragma once 4 | 5 | #include "substrait/function/Extension.h" 6 | #include "substrait/function/FunctionSignature.h" 7 | 8 | namespace io::substrait { 9 | 10 | class FunctionLookup { 11 | public: 12 | explicit FunctionLookup(ExtensionPtr extension) 13 | : extension_(std::move(extension)) {} 14 | 15 | [[nodiscard]] virtual FunctionImplementationPtr lookupFunction( 16 | const FunctionSignature& signature) const; 17 | 18 | virtual ~FunctionLookup() = default; 19 | 20 | protected: 21 | [[nodiscard]] virtual FunctionImplMap getFunctionImpls() const = 0; 22 | 23 | ExtensionPtr extension_{}; 24 | }; 25 | 26 | using FunctionLookupPtr = std::shared_ptr; 27 | 28 | class ScalarFunctionLookup : public FunctionLookup { 29 | public: 30 | explicit ScalarFunctionLookup(const ExtensionPtr& extension) 31 | : FunctionLookup(extension) {} 32 | 33 | protected: 34 | [[nodiscard]] FunctionImplMap getFunctionImpls() const override { 35 | return extension_->scalaFunctionImplMap(); 36 | } 37 | }; 38 | 39 | class AggregateFunctionLookup : public FunctionLookup { 40 | public: 41 | explicit AggregateFunctionLookup(const ExtensionPtr& extension) 42 | : FunctionLookup(extension) {} 43 | 44 | protected: 45 | [[nodiscard]] FunctionImplMap getFunctionImpls() const override { 46 | return extension_->aggregateFunctionImplMap(); 47 | } 48 | }; 49 | 50 | class WindowFunctionLookup : public FunctionLookup { 51 | public: 52 | explicit WindowFunctionLookup(const ExtensionPtr& extension) 53 | : FunctionLookup(extension) {} 54 | 55 | protected: 56 | [[nodiscard]] FunctionImplMap getFunctionImpls() const override { 57 | return extension_->windowFunctionImplMap(); 58 | } 59 | }; 60 | 61 | } // namespace io::substrait 62 | -------------------------------------------------------------------------------- /include/substrait/function/FunctionSignature.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: Apache-2.0 */ 2 | 3 | #pragma once 4 | 5 | #include "substrait/type/Type.h" 6 | 7 | namespace io::substrait { 8 | 9 | struct FunctionSignature { 10 | std::string name; 11 | std::vector arguments; 12 | TypePtr returnType; 13 | }; 14 | 15 | } // namespace io::substrait 16 | -------------------------------------------------------------------------------- /scripts/find_vs.ps1: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Find and enter a Visual Studio development environment. 3 | # Required to use Ninja instead of msbuild on our build agents. 4 | function Enter-VsDevEnv { 5 | [CmdletBinding()] 6 | param( 7 | [Parameter()] 8 | [switch]$Prerelease, 9 | [Parameter()] 10 | [string]$architecture = "x64" 11 | ) 12 | 13 | $ErrorActionPreference = 'Stop' 14 | 15 | if ($null -eq (Get-InstalledModule -name 'VSSetup' -ErrorAction SilentlyContinue)) { 16 | Install-Module -Name 'VSSetup' -Scope CurrentUser -SkipPublisherCheck -Force 17 | } 18 | Import-Module -Name 'VSSetup' 19 | 20 | Write-Verbose 'Searching for VC++ instances' 21 | $vsinfo = ` 22 | Get-VSSetupInstance -All -Prerelease:$Prerelease ` 23 | | Select-VSSetupInstance ` 24 | -Latest -Product * ` 25 | -Require 'Microsoft.VisualStudio.Component.VC.Tools.x86.x64' 26 | 27 | $vspath = $vsinfo.InstallationPath 28 | 29 | switch ($env:PROCESSOR_ARCHITECTURE) { 30 | "amd64" { $hostarch = "x64" } 31 | "x86" { $hostarch = "x86" } 32 | "arm64" { $hostarch = "arm64" } 33 | default { throw "Unknown architecture: $switch" } 34 | } 35 | 36 | $devShellModule = "$vspath\Common7\Tools\Microsoft.VisualStudio.DevShell.dll" 37 | 38 | Import-Module -Global -Name $devShellModule 39 | 40 | Write-Verbose 'Setting up environment variables' 41 | Enter-VsDevShell -VsInstanceId $vsinfo.InstanceId -SkipAutomaticLocation ` 42 | -devCmdArguments "-arch=$architecture -host_arch=$hostarch" 43 | 44 | Set-Item -Force -path "Env:\Platform" -Value $architecture 45 | 46 | remove-Module Microsoft.VisualStudio.DevShell, VSSetup 47 | } 48 | 49 | Enter-VsDevEnv 50 | -------------------------------------------------------------------------------- /scripts/run-clang-format.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # SPDX-License-Identifier: Apache-2.0 3 | SCRIPTDIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) 4 | WORKDIR="$( cd $SCRIPTDIR/.. && pwd )" 5 | 6 | find $WORKDIR/src $WORKDIR/include \( -name '*.h' -o -name '*.cpp' \) -exec clang-format-15 -style=file -i {} \; 7 | -------------------------------------------------------------------------------- /scripts/run-clang-tidy.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | # Run clang-tidy recursively and parallel on directory 5 | # Usage: run-clang-tidy sourcedir builddir excludedirs extensions 6 | # extensions and excludedirs are specified as comma-separated 7 | # string without dot, e.g. 'c,cpp' 8 | # e.g. run-clang-tidy . build test,other c,cpp file 9 | 10 | import os, sys, subprocess, multiprocessing 11 | manager = multiprocessing.Manager() 12 | failedfiles = manager.list() 13 | 14 | print("Arguments: " + str(sys.argv)) 15 | # Get absolute source dir after removing leading and trailing seperators from input. 16 | sourcedir = sys.argv[1].rstrip(os.sep) 17 | print("Source directory: " + sourcedir) 18 | builddir = sourcedir + os.sep + sys.argv[2].rstrip(os.sep) 19 | print("Build directory: " + builddir) 20 | # Split exclude dirs into a tuple 21 | excludedirs = tuple([(sourcedir + os.sep + s).rstrip(os.sep) for s in sys.argv[3].split(',')]) 22 | # If the build directory is not the same as the source directory, exclude it 23 | if not sourcedir == builddir: 24 | excludedirs = excludedirs + (builddir,) 25 | print("Exclude directories: " + str(excludedirs)) 26 | # Split extensions into a tuple 27 | extensions = tuple([("." + s) for s in sys.argv[4].split(',')]) 28 | print("Extensions: " + str(extensions)) 29 | 30 | clang_tidy_options = sys.argv[5] 31 | print("clang-tidy options: " + str(clang_tidy_options)) 32 | 33 | def runclangtidy(filepath): 34 | proc = subprocess.Popen("clang-tidy " + clang_tidy_options + " -p=" + builddir + " " + filepath, shell=True) 35 | if proc.wait() != 0: 36 | print("Error file: " + filepath) 37 | failedfiles.append(filepath) 38 | 39 | def collectfiles(dir, exclude, exts): 40 | collectedfiles = [] 41 | for root, dirs, files in os.walk(dir): 42 | for file in files: 43 | filepath = root + os.sep + file 44 | if (len(exclude) == 0 or not filepath.startswith(exclude)) and filepath.endswith(exts): 45 | collectedfiles.append(filepath) 46 | return collectedfiles 47 | 48 | # Define the pool AFTER the global variables and subprocess function because multiprocessing 49 | # has stricter requirements on member ordering 50 | # See: https://stackoverflow.com/questions/41385708/multiprocessing-example-giving-attributeerror 51 | pool = multiprocessing.Pool() 52 | pool.map(runclangtidy, collectfiles(sourcedir, excludedirs, extensions)) 53 | pool.close() 54 | pool.join() 55 | if len(failedfiles) > 0: 56 | print("Errors in " + str(len(failedfiles)) + " files") 57 | sys.exit(1) 58 | print("No errors found") 59 | sys.exit(0) 60 | -------------------------------------------------------------------------------- /scripts/run-clang-tidy.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # SPDX-License-Identifier: Apache-2.0 3 | SCRIPTDIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) 4 | WORKDIR="$( cd $SCRIPTDIR/.. && pwd )" 5 | 6 | # Make compile_command.json 7 | rm -rf tmp && mkdir tmp && cmake -Btmp -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DSUBSTRAIT_CPP_ROUNDTRIP_TESTING=ON 8 | # Build substrait protobuf 9 | pushd tmp/src/substrait/proto && make -j 2 && popd || exit 10 | # Build textplan grammar 11 | pushd tmp/ && make -j antlr4_runtime textplan_grammar_headers && popd || exit 12 | # Run clang-tidy 13 | if [ "$1" == "fix" ]; then 14 | python3 scripts/run-clang-tidy.py "$WORKDIR" "tmp" "third_party" "h,hpp,cc,cpp" "--quiet --fix" 15 | else 16 | python3 scripts/run-clang-tidy.py "$WORKDIR" "tmp" "third_party" "h,hpp,cc,cpp" "--quiet" 17 | fi 18 | -------------------------------------------------------------------------------- /scripts/run-cmake-format.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # SPDX-License-Identifier: Apache-2.0 3 | SCRIPTDIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) 4 | WORKDIR="$( cd $SCRIPTDIR/.. && pwd )" 5 | 6 | find $WORKDIR -type d \( -path ./third_party \) -prune -o \( -name '*.cmake' -o -name 'CMakeLists.txt' \) | grep -v "third_party" | grep -v "/.cmake" | xargs cmake-format -i 7 | cmake-format -i $WORKDIR/third_party/CMakeLists.txt 8 | -------------------------------------------------------------------------------- /scripts/setup-ubuntu.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | # Minimal setup for Ubuntu 20.04. 5 | set -eufx -o pipefail 6 | 7 | # Update the list of packages so we're running the latest. 8 | sudo apt update 9 | 10 | # Install all dependencies. 11 | sudo --preserve-env apt install -y \ 12 | wget \ 13 | g++ \ 14 | cmake \ 15 | ccache \ 16 | ninja-build \ 17 | checkinstall \ 18 | clang-tidy \ 19 | git \ 20 | wget \ 21 | clang-format-15 \ 22 | uuid-dev \ 23 | default-jre \ 24 | libcurl4-openssl-dev 25 | 26 | # Install the currently supported version of protobuf: 27 | PB_REL="https://github.com/protocolbuffers/protobuf/releases" 28 | PB_VER="28.2" 29 | curl -LO $PB_REL/download/v$PB_VER/protoc-$PB_VER-linux-x86_64.zip 30 | unzip protoc-$PB_VER-linux-x86_64.zip -d $HOME/.local 31 | export PATH="$PATH:$HOME/.local/bin" 32 | 33 | pip install cmake-format 34 | -------------------------------------------------------------------------------- /src/substrait/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | if(NOT "${CMAKE_BUILD_TYPE}" STREQUAL "") 4 | string(TOLOWER ${CMAKE_BUILD_TYPE} BUILD_SUBDIR_NAME) 5 | else() 6 | set(BUILD_SUBDIR_NAME "release") 7 | endif() 8 | 9 | cmake_path(GET CMAKE_CURRENT_BINARY_DIR PARENT_PATH 10 | CMAKE_CURRENT_BINARY_PARENT_DIR) 11 | cmake_path(GET CMAKE_CURRENT_BINARY_PARENT_DIR PARENT_PATH 12 | CMAKE_CURRENT_BINARY_TOPLEVEL_DIR) 13 | 14 | set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY 15 | "${CMAKE_CURRENT_BINARY_TOPLEVEL_DIR}/${BUILD_SUBDIR_NAME}/lib") 16 | set(CMAKE_RUNTIME_OUTPUT_DIRECTORY 17 | "${CMAKE_CURRENT_BINARY_TOPLEVEL_DIR}/${BUILD_SUBDIR_NAME}") 18 | set(ADDITIONAL_CLEAN_FILES 19 | "${CMAKE_ARCHIVE_OUTPUT_DIRECTORY};${CMAKE_RUNTIME_OUTPUT_DIRECTORY}") 20 | 21 | add_subdirectory(common) 22 | add_subdirectory(type) 23 | add_subdirectory(expression) 24 | add_subdirectory(function) 25 | add_subdirectory(proto) 26 | add_subdirectory(textplan) 27 | -------------------------------------------------------------------------------- /src/substrait/common/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | add_library(substrait_common Exceptions.cpp) 4 | target_sources( 5 | substrait_common PUBLIC FILE_SET HEADERS BASE_DIRS ../../../include/ FILES 6 | ../../../include/substrait/common/Exceptions.h) 7 | target_link_libraries(substrait_common fmt::fmt-header-only) 8 | 9 | add_library(substrait_io STATIC Io.cpp) 10 | target_sources(substrait_io PUBLIC FILE_SET HEADERS BASE_DIRS ../../../include/ 11 | FILES ../../../include/substrait/common/Io.h) 12 | 13 | add_dependencies( 14 | substrait_io 15 | substrait_proto 16 | substrait_textplan_converter 17 | substrait_textplan_loader 18 | fmt::fmt-header-only 19 | absl::status 20 | absl::statusor) 21 | target_include_directories( 22 | substrait_io INTERFACE $ 23 | $) 24 | target_link_libraries(substrait_io substrait_proto substrait_textplan_converter 25 | substrait_textplan_loader absl::status absl::statusor) 26 | 27 | if(${SUBSTRAIT_CPP_BUILD_TESTING}) 28 | add_subdirectory(tests) 29 | endif() 30 | 31 | add_executable(plantransformer PlanTransformerTool.cpp) 32 | 33 | target_link_libraries(plantransformer substrait_io) 34 | 35 | install( 36 | TARGETS substrait_common substrait_io plantransformer 37 | EXPORT SubstraitTargets 38 | LIBRARY FILE_SET HEADERS) 39 | -------------------------------------------------------------------------------- /src/substrait/common/Exceptions.cpp: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: Apache-2.0 */ 2 | 3 | #include "substrait/common/Exceptions.h" 4 | 5 | #include 6 | 7 | namespace io::substrait::common { 8 | 9 | SubstraitException::SubstraitException( 10 | const char* file, 11 | size_t line, 12 | const char* function, 13 | const std::string& exceptionCode, 14 | const std::string& exceptionMessage, 15 | Type exceptionType, 16 | const std::string& exceptionName) 17 | : msg_(fmt::format( 18 | "Exception: {}\nError Code: {}\nError Type: {}\nReason: {}\n" 19 | "Function: {}\nLocation: {}(Line:{})\n", 20 | exceptionName, 21 | exceptionCode, 22 | exceptionType == Type::kSystem ? "system" : "user", 23 | exceptionMessage, 24 | function, 25 | file, 26 | std::to_string(line))) {} 27 | 28 | } // namespace io::substrait::common 29 | -------------------------------------------------------------------------------- /src/substrait/common/Io.cpp: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: Apache-2.0 */ 2 | 3 | #include "substrait/common/Io.h" 4 | 5 | #include 6 | #include 7 | 8 | #include "substrait/proto/plan.pb.h" 9 | #include "substrait/textplan/converter/LoadBinary.h" 10 | #include "substrait/textplan/converter/SaveBinary.h" 11 | #include "substrait/textplan/parser/LoadText.h" 12 | 13 | namespace io::substrait { 14 | 15 | namespace { 16 | 17 | const std::regex kIsJson( 18 | R"(("extensionUris"|"extension_uris"|"extensions"|"relations"))"); 19 | const std::regex kIsProtoText( 20 | R"((^|\n)((relations|extensions|extension_uris|expected_type_urls) \{))"); 21 | const std::regex kIsText( 22 | R"((^|\n) *(pipelines|[a-z]+ *relation|schema|source|extension_space) *)"); 23 | 24 | PlanFileFormat detectFormat(std::string_view content) { 25 | if (std::regex_search(content.begin(), content.end(), kIsJson)) { 26 | return PlanFileFormat::kJson; 27 | } 28 | if (std::regex_search(content.begin(), content.end(), kIsProtoText)) { 29 | return PlanFileFormat::kProtoText; 30 | } 31 | if (std::regex_search(content.begin(), content.end(), kIsText)) { 32 | return PlanFileFormat::kText; 33 | } 34 | return PlanFileFormat::kBinary; 35 | } 36 | 37 | } // namespace 38 | 39 | absl::StatusOr<::substrait::proto::Plan> loadPlan( 40 | std::string_view input_filename) { 41 | auto contentOrError = textplan::readFromFile(input_filename.data()); 42 | if (!contentOrError.ok()) { 43 | return contentOrError.status(); 44 | } 45 | 46 | auto encoding = detectFormat(*contentOrError); 47 | absl::StatusOr<::substrait::proto::Plan> planOrError; 48 | switch (encoding) { 49 | case PlanFileFormat::kBinary: 50 | return textplan::loadFromBinary(*contentOrError); 51 | case PlanFileFormat::kJson: 52 | return textplan::loadFromJson(*contentOrError); 53 | case PlanFileFormat::kProtoText: 54 | return textplan::loadFromProtoText(*contentOrError); 55 | case PlanFileFormat::kText: 56 | return textplan::loadFromText(*contentOrError); 57 | } 58 | // There are no other possibilities so this can't happen. 59 | return absl::UnimplementedError("Unexpected format encountered."); 60 | } 61 | 62 | absl::Status savePlan( 63 | const ::substrait::proto::Plan& plan, 64 | std::string_view output_filename, 65 | PlanFileFormat format) { 66 | switch (format) { 67 | case PlanFileFormat::kBinary: 68 | return textplan::savePlanToBinary(plan, output_filename); 69 | case PlanFileFormat::kJson: 70 | return textplan::savePlanToJson(plan, output_filename); 71 | case PlanFileFormat::kProtoText: 72 | return textplan::savePlanToProtoText(plan, output_filename); 73 | case PlanFileFormat::kText: 74 | return textplan::savePlanToText(plan, output_filename); 75 | } 76 | return absl::UnimplementedError("Unexpected format requested."); 77 | } 78 | 79 | } // namespace io::substrait 80 | -------------------------------------------------------------------------------- /src/substrait/common/NumberUtils.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: Apache-2.0 */ 2 | 3 | #pragma once 4 | 5 | #include 6 | #include 7 | 8 | namespace io::substrait::common { 9 | 10 | class NumberUtils { 11 | public: 12 | static bool isInteger(std::string_view s) { 13 | return isNonNegativeInteger(s) || isNegativeInteger(s); 14 | } 15 | 16 | static bool isNonNegativeInteger(std::string_view s) { 17 | return !s.empty() && std::all_of(s.begin(), s.end(), [](unsigned char d) { 18 | return std::isdigit(d); 19 | }); 20 | } 21 | 22 | static bool isNegativeInteger(std::string_view s) { 23 | return s.size() >= 2 && s[0] == '-' && isNonNegativeInteger(s.substr(1)); 24 | } 25 | }; 26 | 27 | } // namespace io::substrait::common 28 | -------------------------------------------------------------------------------- /src/substrait/common/PlanTransformerTool.cpp: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: Apache-2.0 */ 2 | 3 | #include 4 | 5 | #include "substrait/common/Io.h" 6 | 7 | namespace io::substrait { 8 | namespace { 9 | 10 | PlanFileFormat planFileFormatFromText(std::string_view str) { 11 | std::string foo; 12 | foo.resize(str.size()); 13 | std::transform(str.begin(), str.end(), foo.begin(), [](unsigned char c) { 14 | return std::tolower(c); 15 | }); 16 | if (foo == "binary") { 17 | return PlanFileFormat::kBinary; 18 | } else if (foo == "json") { 19 | return PlanFileFormat::kJson; 20 | } else if (foo == "prototext") { 21 | return PlanFileFormat::kProtoText; 22 | } else if (foo == "text") { 23 | return PlanFileFormat::kText; 24 | } 25 | // If the format can't be understood, default to text. 26 | return PlanFileFormat::kText; 27 | } 28 | 29 | } // namespace 30 | } // namespace io::substrait 31 | 32 | int main(int argc, char* argv[]) { 33 | if (argc <= 3) { 34 | printf( 35 | "Usage: plantransformer [BINARY|JSON|PROTOTEXT|TEXT]\n"); 36 | return EXIT_FAILURE; 37 | } 38 | 39 | auto planOrError = io::substrait::loadPlan(argv[1]); 40 | if (!planOrError.ok()) { 41 | std::cerr << planOrError.status() << std::endl; 42 | return EXIT_FAILURE; 43 | } 44 | 45 | auto format = io::substrait::planFileFormatFromText(argv[3]); 46 | 47 | auto result = io::substrait::savePlan(*planOrError, argv[2], format); 48 | if (!result.ok()) { 49 | std::cerr << result << std::endl; 50 | return EXIT_FAILURE; 51 | } 52 | 53 | return EXIT_SUCCESS; 54 | } 55 | -------------------------------------------------------------------------------- /src/substrait/common/StringUtils.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: Apache-2.0 */ 2 | 3 | #pragma once 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | namespace io::substrait::common { 10 | class StringUtils final { 11 | public: 12 | static std::string_view ltrim(std::string_view s) { 13 | size_t start = s.find_first_not_of(kWhitespace); 14 | return (start == std::string::npos) ? "" : s.substr(start); 15 | } 16 | 17 | static std::string_view rtrim(std::string_view s) { 18 | size_t end = s.find_last_not_of(kWhitespace); 19 | return (end == std::string::npos) ? "" : s.substr(0, end + 1); 20 | } 21 | 22 | static std::string_view trim(std::string_view s) { 23 | return rtrim(ltrim(s)); 24 | } 25 | 26 | static constexpr std::string_view kWhitespace = " \n\r\t\f\v"; 27 | }; 28 | 29 | } // namespace io::substrait::common 30 | -------------------------------------------------------------------------------- /src/substrait/common/tests/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | add_test_case( 4 | substrait_common_test 5 | SOURCES 6 | NumberUtilsTest.cpp 7 | StringUtilsTest.cpp 8 | EXTRA_LINK_LIBS 9 | substrait_common 10 | gtest 11 | gtest_main) 12 | 13 | add_test_case( 14 | substrait_io_test 15 | SOURCES 16 | IoTest.cpp 17 | EXTRA_LINK_LIBS 18 | substrait_io 19 | protobuf-matchers 20 | gtest 21 | gtest_main) 22 | -------------------------------------------------------------------------------- /src/substrait/common/tests/IoTest.cpp: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: Apache-2.0 */ 2 | 3 | #include "substrait/common/Io.h" 4 | 5 | #include 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | #ifndef _WIN32 12 | #include 13 | #endif 14 | 15 | using ::protobuf_matchers::EqualsProto; 16 | using ::protobuf_matchers::Partially; 17 | 18 | namespace io::substrait { 19 | 20 | namespace { 21 | 22 | constexpr const char* planFileEncodingToString(PlanFileFormat e) noexcept { 23 | switch (e) { 24 | case PlanFileFormat::kBinary: 25 | return "kBinary"; 26 | case PlanFileFormat::kJson: 27 | return "kJson"; 28 | case PlanFileFormat::kProtoText: 29 | return "kProtoText"; 30 | case PlanFileFormat::kText: 31 | return "kText"; 32 | } 33 | return "IMPOSSIBLE"; 34 | } 35 | 36 | } // namespace 37 | 38 | class IoTest : public ::testing::Test {}; 39 | 40 | TEST_F(IoTest, LoadMissingFile) { 41 | auto result = ::io::substrait::loadPlan("non-existent-file"); 42 | ASSERT_FALSE(result.ok()); 43 | ASSERT_THAT( 44 | result.status().message(), 45 | ::testing::ContainsRegex("Failed to open file non-existent-file")); 46 | } 47 | 48 | class SaveAndLoadTestFixture : public ::testing::TestWithParam { 49 | public: 50 | void SetUp() override { 51 | testFileDirectory_ = (std::filesystem::temp_directory_path() / 52 | std::filesystem::path("my_temp_dir")) 53 | .string(); 54 | 55 | std::filesystem::create_directory(testFileDirectory_); 56 | if (!std::filesystem::exists(testFileDirectory_)) { 57 | ASSERT_TRUE(false) << "Failed to create temporary directory."; 58 | testFileDirectory_.clear(); 59 | } 60 | } 61 | 62 | void TearDown() override { 63 | if (!testFileDirectory_.empty()) { 64 | std::error_code err; 65 | std::filesystem::remove_all(testFileDirectory_, err); 66 | ASSERT_FALSE(err) << err.message(); 67 | } 68 | } 69 | 70 | static std::string makeTempFileName() { 71 | static int tempFileNum = 0; 72 | return "testfile" + std::to_string(++tempFileNum); 73 | } 74 | 75 | protected: 76 | std::string testFileDirectory_; 77 | }; 78 | 79 | TEST_P(SaveAndLoadTestFixture, SaveAndLoad) { 80 | auto tempFilename = testFileDirectory_ + "/" + makeTempFileName(); 81 | PlanFileFormat encoding = GetParam(); 82 | 83 | ::substrait::proto::Plan plan; 84 | auto root = plan.add_relations()->mutable_root(); 85 | auto read = root->mutable_input()->mutable_read(); 86 | read->mutable_common()->mutable_direct(); 87 | read->mutable_named_table()->add_names("table_name"); 88 | auto status = ::io::substrait::savePlan(plan, tempFilename, encoding); 89 | ASSERT_TRUE(status.ok()) << "Save failed.\n" << status; 90 | auto result = ::io::substrait::loadPlan(tempFilename); 91 | ASSERT_TRUE(result.ok()) << "Load failed.\n" << result.status(); 92 | ASSERT_THAT( 93 | *result, 94 | Partially(EqualsProto<::substrait::proto::Plan>( 95 | R"(relations { 96 | root { 97 | input { 98 | read { 99 | common { 100 | direct { 101 | } 102 | } 103 | named_table { 104 | names: "table_name" 105 | } 106 | } 107 | } 108 | } 109 | })"))); 110 | } 111 | 112 | static auto getFormats() { 113 | return testing::Values( 114 | PlanFileFormat::kBinary, 115 | PlanFileFormat::kJson, 116 | PlanFileFormat::kProtoText 117 | 118 | #ifndef _WIN32 119 | // Text format is currently not supported on Windows 120 | , 121 | PlanFileFormat::kText 122 | #endif 123 | ); 124 | } 125 | 126 | INSTANTIATE_TEST_SUITE_P( 127 | SaveAndLoadTests, 128 | SaveAndLoadTestFixture, 129 | getFormats(), 130 | [](const testing::TestParamInfo& info) { 131 | return planFileEncodingToString(info.param); 132 | }); 133 | 134 | } // namespace io::substrait 135 | -------------------------------------------------------------------------------- /src/substrait/common/tests/NumberUtilsTest.cpp: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: Apache-2.0 */ 2 | 3 | #include 4 | 5 | #include "substrait/common/NumberUtils.h" 6 | 7 | using io::substrait::common::NumberUtils; 8 | 9 | class NumberUtilsTest : public ::testing::Test {}; 10 | 11 | TEST_F(NumberUtilsTest, isNonNegativeInteger) { 12 | ASSERT_TRUE(NumberUtils::isNonNegativeInteger("1")); 13 | ASSERT_TRUE(NumberUtils::isNonNegativeInteger("0")); 14 | ASSERT_FALSE(NumberUtils::isNonNegativeInteger("-1")); 15 | ASSERT_FALSE(NumberUtils::isNonNegativeInteger("L1")); 16 | ASSERT_FALSE(NumberUtils::isNonNegativeInteger("1L")); 17 | ASSERT_FALSE(NumberUtils::isNonNegativeInteger("")); 18 | } 19 | 20 | TEST_F(NumberUtilsTest, isNegativeInteger) { 21 | ASSERT_TRUE(NumberUtils::isNegativeInteger("-1")); 22 | ASSERT_FALSE(NumberUtils::isNegativeInteger("0")); 23 | ASSERT_FALSE(NumberUtils::isNegativeInteger("-1L")); 24 | ASSERT_FALSE(NumberUtils::isNegativeInteger("1")); 25 | ASSERT_FALSE(NumberUtils::isNegativeInteger("1L")); 26 | ASSERT_FALSE(NumberUtils::isNegativeInteger("")); 27 | } 28 | 29 | TEST_F(NumberUtilsTest, isInteger) { 30 | ASSERT_TRUE(NumberUtils::isInteger("1")); 31 | ASSERT_TRUE(NumberUtils::isInteger("0")); 32 | ASSERT_TRUE(NumberUtils::isInteger("-1")); 33 | ASSERT_FALSE(NumberUtils::isInteger("L1")); 34 | ASSERT_FALSE(NumberUtils::isInteger("1L")); 35 | ASSERT_FALSE(NumberUtils::isInteger("-L1")); 36 | ASSERT_FALSE(NumberUtils::isInteger("-1L")); 37 | ASSERT_FALSE(NumberUtils::isInteger("")); 38 | } 39 | -------------------------------------------------------------------------------- /src/substrait/common/tests/StringUtilsTest.cpp: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: Apache-2.0 */ 2 | 3 | #include 4 | 5 | #include "substrait/common/StringUtils.h" 6 | 7 | using io::substrait::common::StringUtils; 8 | 9 | class StringUtilsTest : public ::testing::Test {}; 10 | 11 | TEST_F(StringUtilsTest, ltrim) { 12 | ASSERT_EQ(StringUtils::ltrim(" 1"), "1"); 13 | ASSERT_EQ(StringUtils::ltrim(" 1"), "1"); 14 | ASSERT_EQ(StringUtils::ltrim(" 1 "), "1 "); 15 | ASSERT_EQ(StringUtils::ltrim(" 1 1 "), "1 1 "); 16 | ASSERT_EQ(StringUtils::ltrim(" "), ""); 17 | } 18 | 19 | TEST_F(StringUtilsTest, rtrim) { 20 | ASSERT_EQ(StringUtils::rtrim("1 "), "1"); 21 | ASSERT_EQ(StringUtils::rtrim("1 "), "1"); 22 | ASSERT_EQ(StringUtils::rtrim("1 1 "), "1 1"); 23 | ASSERT_EQ(StringUtils::rtrim(" "), ""); 24 | } 25 | 26 | TEST_F(StringUtilsTest, trim) { 27 | ASSERT_EQ(StringUtils::trim(" 1"), "1"); 28 | ASSERT_EQ(StringUtils::trim("1 "), "1"); 29 | ASSERT_EQ(StringUtils::trim(" 1 "), "1"); 30 | ASSERT_EQ(StringUtils::trim(" 1 1 "), "1 1"); 31 | ASSERT_EQ(StringUtils::trim(" "), ""); 32 | } 33 | -------------------------------------------------------------------------------- /src/substrait/expression/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | add_library(substrait_expression DecimalLiteral.cpp) 4 | target_sources( 5 | substrait_expression 6 | PUBLIC FILE_SET HEADERS BASE_DIRS ../../../include/ FILES 7 | ../../../include/substrait/expression/DecimalLiteral.h) 8 | 9 | target_link_libraries(substrait_expression substrait_proto absl::numeric 10 | absl::strings) 11 | 12 | if(${SUBSTRAIT_CPP_BUILD_TESTING}) 13 | add_subdirectory(tests) 14 | endif() 15 | 16 | install( 17 | TARGETS substrait_expression 18 | EXPORT SubstraitTargets 19 | FILE_SET HEADERS) 20 | -------------------------------------------------------------------------------- /src/substrait/expression/DecimalLiteral.cpp: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: Apache-2.0 */ 2 | 3 | #include "substrait/expression/DecimalLiteral.h" 4 | 5 | #include 6 | 7 | #include "absl/numeric/int128.h" 8 | #include "absl/strings/numbers.h" 9 | #include "substrait/proto/algebra.pb.h" 10 | 11 | namespace io::substrait::expression { 12 | 13 | namespace { 14 | 15 | // negate flips the sign of a two-complements value. 16 | std::string negate(const std::string& value) { 17 | std::string newValue = value; 18 | // Flip all the bits and add one. 19 | bool carryover = true; 20 | for (char& b : newValue) { 21 | uint8_t newB = ~(static_cast(b)); 22 | if (carryover) { 23 | newB++; 24 | } 25 | carryover = carryover && (newB == 0); 26 | b = static_cast(newB & 0xff); 27 | } 28 | return newValue; 29 | } 30 | 31 | void uint128ToBytes(const absl::uint128& value, std::uint8_t* bytes) { 32 | // Copy the low 64 bits of the uint128 value into the first 8 bytes of the 33 | // output buffer. 34 | std::memcpy(bytes, &value, 8); 35 | 36 | // Copy the high 64 bits of the uint128 value into the next 8 bytes of the 37 | // output buffer. 38 | std::memcpy(bytes + 8, reinterpret_cast(&value) + 8, 8); 39 | } 40 | 41 | } // namespace 42 | 43 | DecimalLiteral DecimalLiteral::fromProto( 44 | const ::substrait::proto::Expression_Literal_Decimal& proto) { 45 | return {proto.value(), proto.precision(), proto.scale()}; 46 | } 47 | 48 | DecimalLiteral DecimalLiteral::fromString( 49 | const std::string& str, 50 | int32_t precision, 51 | int32_t scale) { 52 | absl::uint128 v; 53 | if (!absl::SimpleAtoi(str, &v)) { 54 | // TODO -- Store the parse errors so that they can be examined later. 55 | return {"", 0, 0}; 56 | } 57 | std::uint8_t valueBytes[16]; 58 | uint128ToBytes(v, valueBytes); 59 | return {std::string((const char*)valueBytes, 16), precision, scale}; 60 | } 61 | 62 | bool DecimalLiteral::isValid() { 63 | return value_.size() == 16 && precision_ >= 1 && precision_ <= 38; 64 | } 65 | 66 | std::string DecimalLiteral::toBaseString() { 67 | std::stringstream decimalString; 68 | if (value_.empty()) { 69 | return "0"; 70 | } 71 | 72 | std::string processingValue = value_; 73 | 74 | // Determine the sign of the value. 75 | size_t numBytes = value_.size(); 76 | bool isNegative = ((value_[numBytes - 1] & 0x80) != 0); 77 | if (isNegative) { 78 | decimalString << "-"; 79 | processingValue = negate(value_); 80 | } 81 | 82 | // Collect the bytes into an unsigned integer. 83 | absl::uint128 value = 0; // Will only hold 16 hex chars. 84 | for (size_t i = numBytes; i > 0; i--) { 85 | value = (value << 8) | static_cast(processingValue[i - 1]); 86 | } 87 | 88 | // Pull off the digits backwards. 89 | std::stringstream valueString; 90 | while (value >= 10) { 91 | valueString << static_cast('0' + (value % 10)); 92 | value /= 10; 93 | } 94 | valueString << static_cast('0' + value); 95 | 96 | // Reverse the digits. 97 | std::string v = valueString.str(); 98 | for (auto c = v.rbegin(); c != v.rend(); c++) { 99 | decimalString << *c; 100 | } 101 | 102 | return decimalString.str(); 103 | } 104 | 105 | std::string DecimalLiteral::toString() { 106 | std::stringstream decimalString; 107 | 108 | decimalString << toBaseString(); 109 | 110 | if (scale_ > 0) { 111 | decimalString << "E-" << scale_; 112 | } else if (scale_ < 0) { 113 | decimalString << "E+" << -scale_; 114 | } 115 | decimalString << "@precision=" << precision_; 116 | 117 | return decimalString.str(); 118 | } 119 | 120 | ::substrait::proto::Expression_Literal_Decimal DecimalLiteral::toProto() { 121 | ::substrait::proto::Expression_Literal_Decimal result; 122 | result.set_value(value_); 123 | result.set_precision(precision_); 124 | result.set_scale(scale_); 125 | return result; 126 | }; 127 | 128 | } // namespace io::substrait::expression 129 | -------------------------------------------------------------------------------- /src/substrait/expression/tests/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | add_test_case( 4 | substrait_expression_test 5 | SOURCES 6 | DecimalTest.cpp 7 | EXTRA_LINK_LIBS 8 | substrait_expression 9 | gmock 10 | gtest 11 | gtest_main) 12 | -------------------------------------------------------------------------------- /src/substrait/expression/tests/DecimalTest.cpp: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: Apache-2.0 */ 2 | 3 | #include "substrait/expression/DecimalLiteral.h" 4 | 5 | #include 6 | #include 7 | 8 | #include "substrait/proto/algebra.pb.h" 9 | 10 | namespace io::substrait::expression { 11 | 12 | class TestCase { 13 | public: 14 | std::string name; 15 | DecimalLiteral input; 16 | 17 | bool expectedValidity; 18 | std::string expectedValue; 19 | }; 20 | 21 | static DecimalLiteral 22 | createDecimal(const std::string& value, int32_t scale, int32_t precision) { 23 | ::substrait::proto::Expression_Literal_Decimal proto; 24 | proto.set_value(value); 25 | proto.set_precision(precision); 26 | proto.set_scale(scale); 27 | return DecimalLiteral::fromProto(proto); 28 | } 29 | 30 | class DecimalTest : public ::testing::TestWithParam {}; 31 | 32 | std::vector getTestCases() { 33 | static std::vector cases = { 34 | {"42noscale", 35 | createDecimal("\x2A", 0, 3), // NOLINT 36 | false, 37 | "42@precision=3"}, 38 | {"42positivescale", 39 | createDecimal("\x2A", +2, 3), // NOLINT 40 | false, 41 | "42E-2@precision=3"}, 42 | {"42negativescale", 43 | createDecimal("\x2A", -2, 3), // NOLINT 44 | false, 45 | "42E+2@precision=3"}, 46 | 47 | {"8bit, 0", createDecimal({"\x00", 1}, 0, 4), false, "0@precision=4"}, 48 | {"8bit, 1", createDecimal("\x01", 0, 4), false, "1@precision=4"}, 49 | {"8bit, 2", createDecimal("\x02", 0, 4), false, "2@precision=4"}, 50 | {"8bit, 126", 51 | createDecimal("\x7E", 0, 4), // NOLINT 52 | false, 53 | "126@precision=4"}, 54 | {"8bit, 127", createDecimal("\x7F", 0, 4), false, "127@precision=4"}, 55 | {"8bit, 128", createDecimal("\x80", 0, 4), false, "-128@precision=4"}, 56 | {"8bit, 129", createDecimal("\x81", 0, 4), false, "-127@precision=4"}, 57 | {"8bit, 130", createDecimal("\x82", 0, 4), false, "-126@precision=4"}, 58 | {"8bit, 254", createDecimal("\xFE", 0, 4), false, "-2@precision=4"}, 59 | {"8bit, 255", createDecimal("\xFF", 0, 4), false, "-1@precision=4"}, 60 | 61 | {"16bit, 2", createDecimal("\x02", 0, 4), false, "2@precision=4"}, 62 | {"16bit, 127", 63 | createDecimal({"\x7F\x00", 2}, 0, 4), 64 | false, 65 | "127@precision=4"}, 66 | {"16bit, 128", 67 | createDecimal({"\x80\x00", 2}, 0, 4), 68 | false, 69 | "128@precision=4"}, 70 | {"16bit, 33333", 71 | createDecimal("\x39\x30", 0, 4), // NOLINT 72 | false, 73 | "12345@precision=4"}, 74 | {"16bit, 53191", 75 | createDecimal("\xC7\xCF", 0, 4), 76 | false, 77 | "-12345@precision=4"}, 78 | {"16bit, 65534", 79 | createDecimal("\xFE\xFF", 0, 4), 80 | false, 81 | "-2@precision=4"}, 82 | {"16bit, 65535", 83 | createDecimal("\xFF\xFF", 0, 4), 84 | false, 85 | "-1@precision=4"}, 86 | 87 | {"propersize", 88 | createDecimal( 89 | "\x12\x34\x56\x78\x12\x34\x56\x78\x12\x34\x56\x78\x12\x34\x56\x78", 90 | -2, 91 | 31), 92 | true, 93 | "159954953172672629770948536149615195154E+2@precision=31"}, 94 | {"zeroprecision", 95 | createDecimal( 96 | "\x12\x34\x56\x78\x12\x34\x56\x78\x12\x34\x56\x78\x12\x34\x56\x78", 97 | -2, 98 | 0), 99 | false, 100 | "159954953172672629770948536149615195154E+2@precision=0"}, 101 | {"negativeprecision", 102 | createDecimal( 103 | "\x12\x34\x56\x78\x12\x34\x56\x78\x12\x34\x56\x78\x12\x34\x56\x78", 104 | -2, 105 | -2), 106 | false, 107 | "159954953172672629770948536149615195154E+2@precision=-2"}, 108 | {"largeprecision", 109 | createDecimal( 110 | "\x12\x34\x56\x78\x12\x34\x56\x78\x12\x34\x56\x78\x12\x34\x56\x78", 111 | -2, 112 | 42), 113 | false, 114 | "159954953172672629770948536149615195154E+2@precision=42"}, 115 | 116 | {"zero", 117 | createDecimal( 118 | {"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", 119 | 16}, 120 | 2, 121 | 31), 122 | true, 123 | "0E-2@precision=31"}, 124 | {"minint", 125 | createDecimal( 126 | {"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x80", 127 | 16}, 128 | 0, 129 | 38), 130 | true, 131 | "-170141183460469231731687303715884105728@precision=38"}, 132 | {"maxint", 133 | createDecimal( 134 | {"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x7f", 135 | 16}, 136 | 0, 137 | 38), 138 | true, 139 | "170141183460469231731687303715884105727@precision=38"}, 140 | {"negativeone", 141 | createDecimal( 142 | {"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff", 143 | 16}, 144 | 2, 145 | 31), 146 | true, 147 | "-1E-2@precision=31"}, 148 | }; 149 | return cases; 150 | } 151 | 152 | TEST_P(DecimalTest, isValid) { 153 | auto [name, decimal, expectedValidity, _] = GetParam(); 154 | ASSERT_THAT(decimal.isValid(), ::testing::Eq(expectedValidity)); 155 | } 156 | 157 | TEST_P(DecimalTest, toString) { 158 | auto [name, decimal, _, expectedValue] = GetParam(); 159 | ASSERT_THAT(decimal.toString(), ::testing::Eq(expectedValue)); 160 | } 161 | 162 | INSTANTIATE_TEST_SUITE_P( 163 | DecimalTests, 164 | DecimalTest, 165 | ::testing::ValuesIn(getTestCases()), 166 | [](const testing::TestParamInfo& info) { 167 | std::string identifier = info.param.name; 168 | // Remove non-alphanumeric characters to make the test framework happy. 169 | identifier.erase( 170 | std::remove_if( 171 | identifier.begin(), 172 | identifier.end(), 173 | [](auto const& c) -> bool { return !std::isalnum(c); }), 174 | identifier.end()); 175 | return identifier; 176 | }); 177 | 178 | } // namespace io::substrait::expression 179 | -------------------------------------------------------------------------------- /src/substrait/function/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | set(FUNCTION_SRCS Function.cpp Extension.cpp FunctionLookup.cpp) 4 | 5 | add_library(substrait_function ${FUNCTION_SRCS}) 6 | target_sources( 7 | substrait_function 8 | PUBLIC FILE_SET 9 | HEADERS 10 | BASE_DIRS 11 | ../../../include/ 12 | FILES 13 | ../../../include/substrait/function/Extension.h 14 | ../../../include/substrait/function/Function.h 15 | ../../../include/substrait/function/FunctionLookup.h 16 | ../../../include/substrait/function/FunctionSignature.h) 17 | 18 | target_link_libraries(substrait_function substrait_type yaml-cpp) 19 | 20 | if(${SUBSTRAIT_CPP_BUILD_TESTING}) 21 | add_subdirectory(tests) 22 | endif() 23 | 24 | install( 25 | TARGETS substrait_function 26 | EXPORT SubstraitTargets 27 | FILE_SET HEADERS) 28 | -------------------------------------------------------------------------------- /src/substrait/function/Function.cpp: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: Apache-2.0 */ 2 | 3 | #include "substrait/function/Function.h" 4 | #include 5 | 6 | namespace io::substrait { 7 | 8 | bool FunctionImplementation::tryMatch(const FunctionSignature& signature) { 9 | const auto& actualTypes = signature.arguments; 10 | if (variadic.has_value()) { 11 | // return false if actual types length less than min of variadic 12 | const auto max = variadic->max; 13 | if ((actualTypes.size() < variadic->min) || 14 | (max.has_value() && actualTypes.size() > max.value())) { 15 | return false; 16 | } 17 | 18 | const auto& variadicArgument = arguments[0]; 19 | // actual type must same as the variadicArgument 20 | if (const auto& variadicValueArgument = 21 | std::dynamic_pointer_cast(variadicArgument)) { 22 | for (const auto& actualType : actualTypes) { 23 | if (!variadicValueArgument->type->isMatch(actualType)) { 24 | return false; 25 | } 26 | } 27 | } 28 | } else { 29 | std::vector> valueArguments; 30 | for (const auto& argument : arguments) { 31 | if (const auto& variadicValueArgument = 32 | std::dynamic_pointer_cast(argument)) { 33 | valueArguments.emplace_back(variadicValueArgument); 34 | } 35 | } 36 | // return false if size of actual types not equal to size of value 37 | // arguments. 38 | if (valueArguments.size() != actualTypes.size()) { 39 | return false; 40 | } 41 | 42 | for (auto i = 0; i < actualTypes.size(); i++) { 43 | const auto& valueArgument = valueArguments[i]; 44 | if (!valueArgument->type->isMatch(actualTypes[i])) { 45 | return false; 46 | } 47 | } 48 | } 49 | const auto& sigReturnType = signature.returnType; 50 | if (this->returnType && sigReturnType) { 51 | return returnType->isMatch(sigReturnType); 52 | } else { 53 | return true; 54 | } 55 | } 56 | 57 | std::string FunctionImplementation::signature() const { 58 | std::stringstream ss; 59 | ss << name; 60 | if (!arguments.empty()) { 61 | ss << ":"; 62 | for (auto it = arguments.begin(); it != arguments.end(); ++it) { 63 | const auto& typeSign = (*it)->toTypeString(); 64 | if (it == arguments.end() - 1) { 65 | ss << typeSign; 66 | } else { 67 | ss << typeSign << "_"; 68 | } 69 | } 70 | } 71 | 72 | return ss.str(); 73 | } 74 | 75 | bool AggregateFunctionImplementation::tryMatch( 76 | const FunctionSignature& signature) { 77 | bool matched = FunctionImplementation::tryMatch(signature); 78 | if (!matched && intermediate) { 79 | const auto& actualTypes = signature.arguments; 80 | if (actualTypes.size() == 1) { 81 | return intermediate->isMatch(actualTypes[0]); 82 | } 83 | } 84 | return matched; 85 | } 86 | 87 | } // namespace io::substrait 88 | -------------------------------------------------------------------------------- /src/substrait/function/FunctionLookup.cpp: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: Apache-2.0 */ 2 | 3 | #include "substrait/function/FunctionLookup.h" 4 | 5 | namespace io::substrait { 6 | 7 | FunctionImplementationPtr FunctionLookup::lookupFunction( 8 | const FunctionSignature& signature) const { 9 | const auto& functionImpls = getFunctionImpls(); 10 | auto functionImplsIter = functionImpls.find(signature.name); 11 | if (functionImplsIter != functionImpls.end()) { 12 | for (const auto& candidateFunctionImpl : functionImplsIter->second) { 13 | if (candidateFunctionImpl->tryMatch(signature)) { 14 | return candidateFunctionImpl; 15 | } 16 | } 17 | } 18 | return nullptr; 19 | } 20 | 21 | } // namespace io::substrait 22 | -------------------------------------------------------------------------------- /src/substrait/function/tests/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | add_test_case( 4 | substrait_function_test 5 | SOURCES 6 | FunctionLookupTest.cpp 7 | EXTRA_LINK_LIBS 8 | substrait_function 9 | gtest 10 | gtest_main) 11 | -------------------------------------------------------------------------------- /src/substrait/function/tests/FunctionLookupTest.cpp: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: Apache-2.0 */ 2 | 3 | #include 4 | 5 | #include 6 | #include 7 | 8 | #include "substrait/function/FunctionLookup.h" 9 | 10 | using namespace io::substrait; 11 | 12 | class FunctionLookupTest : public ::testing::Test { 13 | protected: 14 | static std::string getExtensionAbsolutePath() { 15 | const std::string absolutePath = __FILE__; 16 | std::filesystem::path path(absolutePath); 17 | std::filesystem::path parentDir = path.parent_path(); 18 | return (parentDir / "../../../../third_party/substrait/extensions/") 19 | .string(); 20 | } 21 | 22 | void SetUp() override { 23 | ExtensionPtr extension = Extension::load(getExtensionAbsolutePath()); 24 | scalarFunctionLookup_ = std::make_shared(extension); 25 | aggregateFunctionLookup_ = 26 | std::make_shared(extension); 27 | } 28 | 29 | void testScalarFunctionLookup( 30 | const FunctionSignature& inputSignature, 31 | const std::string& outputSignature) { 32 | const auto& functionImpl = 33 | scalarFunctionLookup_->lookupFunction(inputSignature); 34 | 35 | ASSERT_TRUE(functionImpl != nullptr); 36 | ASSERT_EQ(functionImpl->signature(), outputSignature); 37 | } 38 | 39 | void testAggregateFunctionLookup( 40 | const FunctionSignature& inputSignature, 41 | const std::string& outputSignature) { 42 | const auto& functionImpl = 43 | aggregateFunctionLookup_->lookupFunction(inputSignature); 44 | 45 | ASSERT_TRUE(functionImpl != nullptr); 46 | ASSERT_EQ(functionImpl->signature(), outputSignature); 47 | } 48 | 49 | private: 50 | FunctionLookupPtr scalarFunctionLookup_; 51 | FunctionLookupPtr aggregateFunctionLookup_; 52 | }; 53 | 54 | TEST_F(FunctionLookupTest, compareFunction) { 55 | testScalarFunctionLookup( 56 | {"lt", {tinyint(), tinyint()}, boolean()}, "lt:any1_any1"); 57 | 58 | testScalarFunctionLookup( 59 | {"lt", {smallint(), smallint()}, boolean()}, "lt:any1_any1"); 60 | 61 | testScalarFunctionLookup( 62 | {"lt", {integer(), integer()}, boolean()}, "lt:any1_any1"); 63 | 64 | testScalarFunctionLookup( 65 | {"lt", {bigint(), bigint()}, boolean()}, "lt:any1_any1"); 66 | 67 | testScalarFunctionLookup( 68 | {"lt", {float4(), float4()}, boolean()}, "lt:any1_any1"); 69 | 70 | testScalarFunctionLookup( 71 | {"lt", {float8(), float8()}, boolean()}, "lt:any1_any1"); 72 | testScalarFunctionLookup( 73 | {"between", {tinyint(), tinyint(), tinyint()}, boolean()}, 74 | "between:any1_any1_any1"); 75 | } 76 | 77 | TEST_F(FunctionLookupTest, arithmeticFunction) { 78 | testScalarFunctionLookup( 79 | {"add", {tinyint(), tinyint()}, tinyint()}, "add:i8_i8"); 80 | 81 | testScalarFunctionLookup( 82 | {"divide", 83 | { 84 | float4(), 85 | float4(), 86 | }, 87 | float4()}, 88 | "divide:fp32_fp32"); 89 | } 90 | 91 | TEST_F(FunctionLookupTest, aggregate) { 92 | // for intermediate type 93 | testAggregateFunctionLookup( 94 | {"avg", {row({float8(), bigint()})}, float4()}, "avg:fp32"); 95 | } 96 | 97 | TEST_F(FunctionLookupTest, logical) { 98 | testScalarFunctionLookup({"and", {}, boolean()}, "and:bool"); 99 | testScalarFunctionLookup({"and", {boolean()}, boolean()}, "and:bool"); 100 | testScalarFunctionLookup( 101 | {"and", {boolean(), boolean()}, boolean()}, "and:bool"); 102 | 103 | testScalarFunctionLookup( 104 | {"or", {boolean(), boolean()}, boolean()}, "or:bool"); 105 | testScalarFunctionLookup({"not", {boolean()}, boolean()}, "not:bool"); 106 | testScalarFunctionLookup( 107 | {"xor", {boolean(), boolean()}, boolean()}, "xor:bool_bool"); 108 | } 109 | 110 | TEST_F(FunctionLookupTest, stringFunction) { 111 | testScalarFunctionLookup( 112 | {"like", {string(), string()}, boolean()}, "like:str_str"); 113 | testScalarFunctionLookup( 114 | {"like", {varchar(3), varchar(4)}, boolean()}, 115 | "like:vchar_vchar"); 116 | testScalarFunctionLookup( 117 | {"substring", {string(), integer(), integer()}, string()}, 118 | "substring:str_i32_i32"); 119 | } 120 | -------------------------------------------------------------------------------- /src/substrait/proto/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | find_package(Protobuf REQUIRED) 3 | 4 | find_package(Perl REQUIRED) 5 | set(UPDATE_PROTO_PACKAGE_TOOL 6 | "${CMAKE_CURRENT_SOURCE_DIR}/update_proto_package.pl") 7 | 8 | set(RAW_PROTO_INCLUDE_DIR 9 | "${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/substrait/proto/substrait" 10 | ) 11 | cmake_path(SET PROTO_INCLUDE_DIR NORMALIZE "${RAW_PROTO_INCLUDE_DIR}") 12 | cmake_path(GET PROTO_INCLUDE_DIR PARENT_PATH PROTO_INCLUDE_TOPLEVEL_DIR) 13 | set(PROTOBUF_IMPORT_DIRS ${PROTO_INCLUDE_DIR}/extensions) 14 | 15 | file(GLOB PROTOBUF_FILELIST ${PROTO_INCLUDE_DIR}/*.proto 16 | ${PROTO_INCLUDE_DIR}/extensions/*.proto) 17 | 18 | # Create a copy of all of the input proto files with an updated package 19 | # (substrait.proto). 20 | set(GENERATED_PROTO_TOPLEVEL_DIR 21 | "${CMAKE_BINARY_DIR}/substrait/proto/generated-protos") 22 | set(GENERATED_PROTO_MIDLEVEL_DIR 23 | "${GENERATED_PROTO_TOPLEVEL_DIR}/substrait/proto") 24 | set(GENERATED_PROTOBUF_LIST) 25 | foreach(PROTO_FILE IN LISTS PROTOBUF_FILELIST) 26 | file(RELATIVE_PATH RELATIVE_PROTO_PATH "${PROTO_INCLUDE_DIR}" "${PROTO_FILE}") 27 | set(GENERATED_PROTO_FILE 28 | ${GENERATED_PROTO_MIDLEVEL_DIR}/${RELATIVE_PROTO_PATH}) 29 | 30 | cmake_path(GET GENERATED_PROTO_FILE PARENT_PATH GENERATED_PROTO_DIR) 31 | file(MAKE_DIRECTORY ${GENERATED_PROTO_DIR}) 32 | add_custom_command( 33 | OUTPUT ${GENERATED_PROTO_FILE} 34 | COMMAND ${PERL_EXECUTABLE} ${UPDATE_PROTO_PACKAGE_TOOL} ${PROTO_FILE} > 35 | ${GENERATED_PROTO_FILE} 36 | DEPENDS ${PROTO_FILE} ${UPDATE_PROTO_PACKAGE_TOOL} 37 | COMMENT "Modified package name in protobuf definition for ${PROTO_FILE}." 38 | VERBATIM) 39 | list(APPEND GENERATED_PROTOBUF_LIST ${GENERATED_PROTO_FILE}) 40 | endforeach() 41 | 42 | # Generate cpp sources for the protobufs in 43 | # third_party/substrait/proto/substrait. 44 | set(PROTO_HDRS) 45 | set(PROTO_SRCS) 46 | 47 | set(PROTO_OUTPUT_DIR "${CMAKE_CURRENT_BINARY_DIR}") 48 | 49 | cmake_path(GET PROTO_OUTPUT_DIR PARENT_PATH PROTO_OUTPUT_PARENT_DIR) 50 | cmake_path(GET PROTO_OUTPUT_PARENT_DIR PARENT_PATH PROTO_OUTPUT_MIDLEVEL_DIR) 51 | cmake_path(GET PROTO_OUTPUT_MIDLEVEL_DIR PARENT_PATH PROTO_OUTPUT_TOPLEVEL_DIR) 52 | 53 | foreach(PROTO_FILE IN LISTS PROTOBUF_FILELIST) 54 | file(RELATIVE_PATH RELATIVE_PROTO_PATH "${PROTO_INCLUDE_DIR}" "${PROTO_FILE}") 55 | set(GENERATED_PROTO_FILE 56 | ${GENERATED_PROTO_MIDLEVEL_DIR}/${RELATIVE_PROTO_PATH}) 57 | message( 58 | STATUS "Defined C++ source/header build rule for ${RELATIVE_PROTO_PATH}") 59 | 60 | cmake_path(REMOVE_EXTENSION RELATIVE_PROTO_PATH) 61 | 62 | set(PROTO_HDR ${PROTO_OUTPUT_PARENT_DIR}/proto/${RELATIVE_PROTO_PATH}.pb.h) 63 | set(PROTO_SRC ${PROTO_OUTPUT_PARENT_DIR}/proto/${RELATIVE_PROTO_PATH}.pb.cc) 64 | add_custom_command( 65 | OUTPUT ${PROTO_SRC} ${PROTO_HDR} 66 | COMMAND 67 | protobuf::protoc "--proto_path=${GENERATED_PROTO_TOPLEVEL_DIR}" 68 | "--proto_path=${protobuf_SOURCE_DIR}/src" 69 | "--cpp_out=${PROTO_OUTPUT_MIDLEVEL_DIR}" ${GENERATED_PROTO_FILE} 70 | DEPENDS ${GENERATED_PROTOBUF_LIST} protobuf::protoc 71 | COMMENT "Generated C++ protobuf module for ${PROTO_FILE}" 72 | VERBATIM) 73 | list(APPEND PROTO_HDRS ${PROTO_HDR}) 74 | list(APPEND PROTO_SRCS ${PROTO_SRC}) 75 | endforeach() 76 | 77 | # Add the generated protobuf C++ files to our exported library. 78 | add_library(substrait_proto ${PROTO_SRCS} ${PROTO_HDRS} ProtoUtils.cpp) 79 | target_sources( 80 | substrait_proto 81 | PUBLIC FILE_SET 82 | HEADERS 83 | BASE_DIRS 84 | ${PROTO_OUTPUT_TOPLEVEL_DIR}/src 85 | ../.. 86 | FILES 87 | ${PROTO_HDRS} 88 | ProtoUtils.h) 89 | 90 | # Include the protobuf library as a dependency to use this class. 91 | target_link_libraries(substrait_proto protobuf::libprotobuf) 92 | 93 | # Make sure we can see our own generated include files. 94 | target_include_directories( 95 | substrait_proto SYSTEM 96 | PUBLIC $ 97 | $) 98 | 99 | install( 100 | TARGETS substrait_proto 101 | EXPORT SubstraitTargets 102 | LIBRARY FILE_SET HEADERS) 103 | -------------------------------------------------------------------------------- /src/substrait/proto/ProtoUtils.cpp: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: Apache-2.0 */ 2 | 3 | #include "substrait/proto/ProtoUtils.h" 4 | 5 | namespace substrait::proto { 6 | 7 | std::string planRelTypeCaseName(::substrait::proto::PlanRel::RelTypeCase num) { 8 | static std::vector caseNames = { 9 | "unknown", 10 | "rel", 11 | "root", 12 | }; 13 | 14 | if (num >= caseNames.size()) { 15 | return "unknown"; 16 | } 17 | 18 | return caseNames[num]; 19 | } 20 | 21 | std::string relTypeCaseName(::substrait::proto::Rel::RelTypeCase num) { 22 | static std::vector caseNames = { 23 | "unknown", 24 | "read", 25 | "filter", 26 | "fetch", 27 | "aggregate", 28 | "sort", 29 | "join", 30 | "project", 31 | "set", 32 | "extensionsingle", 33 | "extensionmulti", 34 | "extensionleaf", 35 | "cross", 36 | "hashjoin", 37 | "mergejoin", 38 | }; 39 | 40 | if (num >= caseNames.size()) { 41 | return "unknown"; 42 | } 43 | 44 | return caseNames[num]; 45 | } 46 | 47 | } // namespace substrait::proto 48 | -------------------------------------------------------------------------------- /src/substrait/proto/ProtoUtils.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: Apache-2.0 */ 2 | 3 | #pragma once 4 | 5 | #include 6 | 7 | #include "substrait/proto/algebra.pb.h" 8 | #include "substrait/proto/plan.pb.h" 9 | 10 | namespace substrait::proto { 11 | 12 | std::string planRelTypeCaseName(::substrait::proto::PlanRel::RelTypeCase num); 13 | 14 | std::string relTypeCaseName(::substrait::proto::Rel::RelTypeCase num); 15 | 16 | } // namespace substrait::proto 17 | -------------------------------------------------------------------------------- /src/substrait/proto/update_proto_package.pl: -------------------------------------------------------------------------------- 1 | #!/bin/perl -w 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | # Renames package declarations for protobuffers from substrait to substrait.proto. 5 | # This allows us to modify where the generated C++ have their definitions without 6 | # affecting existing customers of the protobuf. 7 | 8 | while (<>) { 9 | s|^(package substrait)|$1.proto|; 10 | s!^(import "substrait/)(.*proto\")!$1proto/$2!; 11 | s|substrait\.extensions|substrait.proto.extensions|g; 12 | print; 13 | } 14 | -------------------------------------------------------------------------------- /src/substrait/textplan/Any.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: Apache-2.0 */ 2 | 3 | #pragma once 4 | 5 | #include 6 | 7 | #include "fmt/format.h" 8 | 9 | namespace io::substrait::textplan { 10 | 11 | template 12 | inline ValueType 13 | any_cast(const std::any& value, const char* file, int line) { // NOLINT 14 | try { 15 | return std::any_cast(value); 16 | } catch (std::bad_any_cast& ex) { 17 | throw std::invalid_argument( 18 | fmt::format("{}:{} - {}", file, line, "bad any cast")); 19 | } 20 | } 21 | 22 | // A wrapper around std::any_cast that provides exceptions with line numbers. 23 | #define ANY_CAST(ValueType, Value) \ 24 | ::io::substrait::textplan::any_cast(Value, __FILE__, __LINE__) 25 | 26 | // Casts the any if it matches the given type otherwise it returns nullopt. 27 | #define ANY_CAST_IF(ValueType, value) \ 28 | value.type() != typeid(ValueType) \ 29 | ? ::std::nullopt \ 30 | : ::std::make_optional(ANY_CAST(ValueType, value)) 31 | 32 | } // namespace io::substrait::textplan 33 | -------------------------------------------------------------------------------- /src/substrait/textplan/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | add_subdirectory(converter) 4 | add_subdirectory(parser) 5 | 6 | add_library( 7 | symbol_table 8 | Location.cpp 9 | Location.h 10 | PlanPrinterVisitor.cpp 11 | PlanPrinterVisitor.h 12 | StringManipulation.cpp 13 | StringManipulation.h 14 | SymbolTable.cpp 15 | SymbolTable.h 16 | SymbolTablePrinter.cpp 17 | SymbolTablePrinter.h 18 | StructuredSymbolData.h 19 | Any.h) 20 | 21 | add_library(error_listener SubstraitErrorListener.cpp SubstraitErrorListener.h) 22 | 23 | add_library(parse_result ParseResult.cpp ParseResult.h) 24 | 25 | add_dependencies(symbol_table substrait_proto substrait_common absl::strings 26 | fmt::fmt-header-only) 27 | 28 | target_link_libraries( 29 | symbol_table 30 | substrait_base_proto_visitor 31 | substrait_proto 32 | substrait_common 33 | substrait_expression 34 | absl::strings 35 | fmt::fmt-header-only 36 | date::date) 37 | 38 | # Provide access to the generated protobuffer headers hierarchy. 39 | target_include_directories( 40 | symbol_table PUBLIC $) 41 | 42 | if(${SUBSTRAIT_CPP_BUILD_TESTING}) 43 | add_subdirectory(tests) 44 | endif() 45 | 46 | install(TARGETS error_listener parse_result symbol_table 47 | EXPORT SubstraitTargets) 48 | -------------------------------------------------------------------------------- /src/substrait/textplan/Finally.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: Apache-2.0 */ 2 | 3 | #pragma once 4 | 5 | namespace io::substrait::textplan { 6 | 7 | template 8 | struct FinalAction { 9 | explicit FinalAction(F f) : clean_{f} {} 10 | 11 | ~FinalAction() { 12 | clean_(); 13 | } 14 | 15 | private: 16 | F clean_; 17 | }; 18 | 19 | template 20 | FinalAction finally(F f) { 21 | return FinalAction(f); 22 | } 23 | 24 | } // namespace io::substrait::textplan 25 | -------------------------------------------------------------------------------- /src/substrait/textplan/Location.cpp: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: Apache-2.0 */ 2 | 3 | #include "substrait/textplan/Location.h" 4 | 5 | #include 6 | 7 | namespace io::substrait::textplan { 8 | 9 | constexpr Location Location::kUnknownLocation( 10 | static_cast(nullptr)); 11 | 12 | bool operator==(const Location& c1, const Location& c2) { 13 | // Test only one side since we only store one kind of content per table. 14 | if (std::holds_alternative(c1.loc_)) { 15 | if (!std::holds_alternative(c2.loc_)) { 16 | return false; 17 | } 18 | auto a1 = std::get(c1.loc_); 19 | auto a2 = std::get(c2.loc_); 20 | return a1 == a2; 21 | } else if (std::holds_alternative( 22 | c1.loc_)) { 23 | if (!std::holds_alternative(c2.loc_)) { 24 | return false; 25 | } 26 | auto a1 = std::get(c1.loc_); 27 | auto a2 = std::get(c2.loc_); 28 | return a1 == a2; 29 | } 30 | // Should not be reached. 31 | return false; 32 | } 33 | 34 | } // namespace io::substrait::textplan 35 | 36 | std::size_t std::hash<::io::substrait::textplan::Location>::operator()( 37 | const ::io::substrait::textplan::Location& loc) const noexcept { 38 | if (std::holds_alternative(loc.loc_)) { 39 | return std::hash()( 40 | std::get(loc.loc_)); 41 | } else if (std::holds_alternative( 42 | loc.loc_)) { 43 | return std::hash()( 44 | std::get(loc.loc_)); 45 | } 46 | // Should not be reached. 47 | return 0; 48 | } 49 | 50 | bool std::less<::io::substrait::textplan::Location>::operator()( 51 | const ::io::substrait::textplan::Location& lhs, 52 | const ::io::substrait::textplan::Location& rhs) const noexcept { 53 | if (std::holds_alternative(lhs.loc_)) { 54 | if (!std::holds_alternative(rhs.loc_)) { 55 | // This alternative is always less than the other location types. 56 | return true; 57 | } 58 | return std::get(lhs.loc_) < 59 | std::get(rhs.loc_); 60 | } else if (std::holds_alternative( 61 | lhs.loc_)) { 62 | if (!std::holds_alternative(rhs.loc_)) { 63 | // This alternative is always more than the other location types. 64 | return false; 65 | } 66 | return std::get(lhs.loc_) < 67 | std::get(rhs.loc_); 68 | } 69 | // Should not be reached. 70 | return false; 71 | } 72 | -------------------------------------------------------------------------------- /src/substrait/textplan/Location.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: Apache-2.0 */ 2 | 3 | #pragma once 4 | 5 | #include 6 | #include 7 | 8 | namespace antlr4 { 9 | class ParserRuleContext; 10 | } 11 | 12 | namespace google::protobuf { 13 | class Message; 14 | }; 15 | 16 | namespace io::substrait::textplan { 17 | 18 | // Location is used for keeping track of where a symbol is within a parse tree. 19 | // Since SymbolTable supports both antlr4 and protobuf messages there are 20 | // essentially two flavors of location. It is expected that only one type of 21 | // location would be used in any SymbolTable instance. 22 | class Location { 23 | public: 24 | constexpr explicit Location(antlr4::ParserRuleContext* node) : loc_(node) {} 25 | 26 | constexpr explicit Location(const google::protobuf::Message* msg) 27 | : loc_(msg) {} 28 | 29 | static const Location kUnknownLocation; 30 | 31 | protected: 32 | friend bool operator==(const Location& c1, const Location& c2); 33 | 34 | friend bool operator!=(const Location& c1, const Location& c2) { 35 | return !(c1 == c2); 36 | } 37 | 38 | private: 39 | friend std::hash; 40 | friend std::less; 41 | 42 | std::variant 43 | loc_; 44 | }; 45 | 46 | } // namespace io::substrait::textplan 47 | 48 | template <> 49 | struct std::hash<::io::substrait::textplan::Location> { 50 | std::size_t operator()( 51 | const ::io::substrait::textplan::Location& loc) const noexcept; 52 | }; 53 | 54 | template <> 55 | struct std::less<::io::substrait::textplan::Location> { 56 | bool operator()( 57 | const ::io::substrait::textplan::Location& lhs, 58 | const ::io::substrait::textplan::Location& rhs) const noexcept; 59 | }; 60 | 61 | // Convenience macro useful for constructing parser based locations when the 62 | // type needs recasting. 63 | #define PARSER_LOCATION(ctx) \ 64 | ::io::substrait::textplan::Location( \ 65 | dynamic_cast(ctx)) 66 | 67 | // Convenience macro useful for constructing protobuffer based locations. 68 | #define PROTO_LOCATION(proto) \ 69 | ::io::substrait::textplan::Location( \ 70 | dynamic_cast(&(proto))) 71 | -------------------------------------------------------------------------------- /src/substrait/textplan/ParseResult.cpp: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: Apache-2.0 */ 2 | 3 | #include "substrait/textplan/ParseResult.h" 4 | 5 | #include 6 | 7 | namespace io::substrait::textplan { 8 | 9 | std::ostream& operator<<(std::ostream& os, const ParseResult& result) { 10 | if (result.successful()) { 11 | os << *result.symbolTable_; 12 | } 13 | auto msgs = result.getSyntaxErrors(); 14 | if (!msgs.empty()) { 15 | os << "{" << std::endl; 16 | for (const std::string& msg : msgs) { 17 | os << " \"" << msg << "\"," << std::endl; 18 | } 19 | os << "}"; 20 | } 21 | msgs = result.getSemanticErrors(); 22 | if (!msgs.empty()) { 23 | os << "{" << std::endl; 24 | for (const std::string& msg : msgs) { 25 | os << " \"" << msg << "\"," << std::endl; 26 | } 27 | os << "}"; 28 | } 29 | return os; 30 | } 31 | 32 | } // namespace io::substrait::textplan 33 | -------------------------------------------------------------------------------- /src/substrait/textplan/ParseResult.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: Apache-2.0 */ 2 | 3 | #pragma once 4 | 5 | #include 6 | #include 7 | 8 | #include "substrait/textplan/SymbolTable.h" 9 | 10 | namespace io::substrait::textplan { 11 | 12 | // ParseResult contains the result of a parse (from text to binary) or the 13 | // conversion (from binary to text). The symbol table contains nearly 14 | // all the information necessary to reconstruct either form. 15 | class ParseResult { 16 | public: 17 | ParseResult( 18 | SymbolTable symbolTable, 19 | std::vector syntaxErrors, 20 | std::vector semanticErrors) { 21 | symbolTable_ = std::make_shared(std::move(symbolTable)); 22 | syntaxErrors_ = std::move(syntaxErrors); 23 | semanticErrors_ = std::move(semanticErrors); 24 | } 25 | 26 | [[nodiscard]] bool successful() const { 27 | return syntaxErrors_.empty() && semanticErrors_.empty(); 28 | } 29 | 30 | [[nodiscard]] const SymbolTable& getSymbolTable() const { 31 | return *symbolTable_; 32 | } 33 | 34 | [[nodiscard]] const std::vector& getSyntaxErrors() const { 35 | return syntaxErrors_; 36 | } 37 | 38 | [[nodiscard]] const std::vector& getSemanticErrors() const { 39 | return semanticErrors_; 40 | } 41 | 42 | [[nodiscard]] std::vector getAllErrors() const { 43 | std::vector errors; 44 | errors.insert(errors.end(), syntaxErrors_.begin(), syntaxErrors_.end()); 45 | errors.insert(errors.end(), semanticErrors_.begin(), semanticErrors_.end()); 46 | return errors; 47 | } 48 | 49 | void addErrors(const std::vector& errors) { 50 | syntaxErrors_.insert(syntaxErrors_.end(), errors.begin(), errors.end()); 51 | } 52 | 53 | // Add the capability for ::testing::PrintToString to print ParseResult. 54 | friend std::ostream& operator<<(std::ostream& os, const ParseResult& result); 55 | 56 | private: 57 | std::shared_ptr symbolTable_; 58 | std::vector syntaxErrors_; 59 | std::vector semanticErrors_; 60 | }; 61 | 62 | } // namespace io::substrait::textplan 63 | -------------------------------------------------------------------------------- /src/substrait/textplan/PlanPrinterVisitor.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: Apache-2.0 */ 2 | 3 | #pragma once 4 | 5 | #include 6 | 7 | #include "substrait/proto/plan.pb.h" 8 | #include "substrait/textplan/SubstraitErrorListener.h" 9 | #include "substrait/textplan/SymbolTable.h" 10 | #include "substrait/textplan/converter/BasePlanProtoVisitor.h" 11 | 12 | namespace io::substrait::textplan { 13 | 14 | class PlanPrinterVisitor : public BasePlanProtoVisitor { 15 | public: 16 | // PlanPrinterVisitor takes ownership of the provided symbol table. 17 | explicit PlanPrinterVisitor(const SymbolTable& symbolTable) { 18 | symbolTable_ = std::make_shared(symbolTable); 19 | errorListener_ = std::make_shared(); 20 | currentScope_ = &SymbolInfo::kUnknown; 21 | functionDepth_ = 0; 22 | }; 23 | 24 | [[nodiscard]] std::shared_ptr getSymbolTable() const { 25 | return symbolTable_; 26 | }; 27 | 28 | [[nodiscard]] std::shared_ptr getErrorListener() 29 | const { 30 | return errorListener_; 31 | }; 32 | 33 | std::string printRelation(const SymbolInfo& symbol); 34 | std::string typeToText(const ::substrait::proto::Type& type); 35 | 36 | private: 37 | std::string lookupFieldReference( 38 | uint32_t fieldReference, 39 | const SymbolInfo* currentScope, 40 | uint32_t stepsOut, 41 | bool needFullyQualified); 42 | 43 | std::string lookupFieldReferenceForEmit( 44 | uint32_t fieldReference, 45 | const SymbolInfo* currentScope, 46 | uint32_t stepsOut, 47 | bool needFullyQualified); 48 | 49 | std::string lookupFunctionReference(uint32_t function_reference); 50 | 51 | std::any visitSubqueryScalar( 52 | const ::substrait::proto::Expression_Subquery_Scalar& query) override; 53 | std::any visitSubqueryInPredicate( 54 | const ::substrait::proto::Expression_Subquery_InPredicate& query) 55 | override; 56 | std::any visitSubquerySetPredicate( 57 | const ::substrait::proto::Expression_Subquery_SetPredicate& query) 58 | override; 59 | std::any visitSubquerySetComparison( 60 | const ::substrait::proto::Expression_Subquery_SetComparison& query) 61 | override; 62 | std::any visitSelect( 63 | const ::substrait::proto::Expression_MaskExpression_Select& select) 64 | override; 65 | std::any visitType(const ::substrait::proto::Type& type) override; 66 | std::any visitStruct( 67 | const ::substrait::proto::Type_Struct& structure) override; 68 | std::any visitLiteral( 69 | const ::substrait::proto::Expression::Literal& literal) override; 70 | std::any visitFieldReference( 71 | const ::substrait::proto::Expression::FieldReference& ref) override; 72 | std::any visitScalarFunction( 73 | const ::substrait::proto::Expression::ScalarFunction& function) override; 74 | std::any visitWindowFunction( 75 | const ::substrait::proto::Expression::WindowFunction& function) override; 76 | std::any visitIfThen( 77 | const ::substrait::proto::Expression::IfThen& ifthen) override; 78 | std::any visitSwitchExpression( 79 | const ::substrait::proto::Expression::SwitchExpression& expression) 80 | override; 81 | std::any visitSingularOrList( 82 | const ::substrait::proto::Expression::SingularOrList& expression) 83 | override; 84 | std::any visitMultiOrList( 85 | const ::substrait::proto::Expression::MultiOrList& expression) override; 86 | std::any visitCast(const ::substrait::proto::Expression::Cast& cast) override; 87 | std::any visitNested( 88 | const ::substrait::proto::Expression_Nested& structure) override; 89 | std::any visitEnum(const ::substrait::proto::Expression_Enum& value) override; 90 | std::any visitStructSelect( 91 | const ::substrait::proto::Expression_MaskExpression_StructSelect& 92 | structure) override; 93 | std::any visitListSelect( 94 | const ::substrait::proto::Expression_MaskExpression_ListSelect& select) 95 | override; 96 | std::any visitListSelectItem( 97 | const ::substrait::proto:: 98 | Expression_MaskExpression_ListSelect_ListSelectItem& item) override; 99 | std::any visitMapSelect( 100 | const ::substrait::proto::Expression_MaskExpression_MapSelect& select) 101 | override; 102 | std::any visitExpressionLiteralStruct( 103 | const ::substrait::proto::Expression_Literal_Struct& structure) override; 104 | std::any visitFileOrFiles( 105 | const ::substrait::proto::ReadRel_LocalFiles_FileOrFiles& structure) 106 | override; 107 | std::any visitReferenceSegment( 108 | const ::substrait::proto::Expression_ReferenceSegment& segment) override; 109 | 110 | std::any visitRelationCommon( 111 | const ::substrait::proto::RelCommon& common) override; 112 | std::any visitAggregateFunction( 113 | const ::substrait::proto::AggregateFunction& function) override; 114 | std::any visitExpression( 115 | const ::substrait::proto::Expression& expression) override; 116 | std::any visitMaskExpression( 117 | const ::substrait::proto::Expression::MaskExpression& expression) 118 | override; 119 | 120 | std::any visitRelation(const ::substrait::proto::Rel& relation) override; 121 | 122 | std::any visitReadRelation( 123 | const ::substrait::proto::ReadRel& relation) override; 124 | std::any visitFilterRelation( 125 | const ::substrait::proto::FilterRel& relation) override; 126 | std::any visitFetchRelation( 127 | const ::substrait::proto::FetchRel& relation) override; 128 | std::any visitAggregateRelation( 129 | const ::substrait::proto::AggregateRel& relation) override; 130 | std::any visitSortRelation( 131 | const ::substrait::proto::SortRel& relation) override; 132 | std::any visitProjectRelation( 133 | const ::substrait::proto::ProjectRel& relation) override; 134 | std::any visitJoinRelation( 135 | const ::substrait::proto::JoinRel& relation) override; 136 | std::any visitCrossRelation( 137 | const ::substrait::proto::CrossRel& relation) override; 138 | 139 | std::shared_ptr symbolTable_; 140 | std::shared_ptr errorListener_; 141 | const SymbolInfo* currentScope_; /* not owned */ 142 | int currentScopeIndex_{-1}; 143 | int functionDepth_; 144 | }; 145 | 146 | } // namespace io::substrait::textplan 147 | -------------------------------------------------------------------------------- /src/substrait/textplan/StringManipulation.cpp: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: Apache-2.0 */ 2 | 3 | #include "StringManipulation.h" 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | namespace io::substrait::textplan { 11 | 12 | bool startsWith(std::string_view haystack, std::string_view needle) { 13 | return haystack.size() > needle.size() && 14 | haystack.substr(0, needle.size()) == needle; 15 | } 16 | 17 | bool endsWith(std::string_view haystack, std::string_view needle) { 18 | return haystack.size() > needle.size() && 19 | haystack.substr(haystack.size() - needle.size(), needle.size()) == needle; 20 | } 21 | 22 | } // namespace io::substrait::textplan 23 | -------------------------------------------------------------------------------- /src/substrait/textplan/StringManipulation.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: Apache-2.0 */ 2 | 3 | #pragma once 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | namespace io::substrait::textplan { 10 | 11 | // Yields true if the string 'haystack' starts with the string 'needle'. 12 | bool startsWith(std::string_view haystack, std::string_view needle); 13 | 14 | // Returns true if the string 'haystack' ends with the string 'needle'. 15 | bool endsWith(std::string_view haystack, std::string_view needle); 16 | 17 | } // namespace io::substrait::textplan 18 | -------------------------------------------------------------------------------- /src/substrait/textplan/StructuredSymbolData.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: Apache-2.0 */ 2 | 3 | #pragma once 4 | 5 | #include 6 | #include 7 | 8 | #include "substrait/proto/algebra.pb.h" 9 | 10 | namespace io::substrait::textplan { 11 | 12 | struct SymbolInfo; 13 | 14 | // Used by the PlanRelation and Relation concepts to track connectivity. 15 | struct RelationData { 16 | // Keeps track of the first node in a pipeline. For relations starting a 17 | // pipeline this will not be a self-reference -- it will be nullptr unless 18 | // it is in another pipeline (which in that case the value will be the node 19 | // that starts that pipeline). As such this will only have nullptr as a value 20 | // when it is a root node. 21 | const SymbolInfo* pipelineStart{nullptr}; 22 | 23 | // The next node in the pipeline that this node is part of. 24 | const SymbolInfo* continuingPipeline{nullptr}; 25 | // The next nodes in the pipelines that this node starts. 26 | std::vector newPipelines; 27 | // Expressions in this relation consume subqueries with these symbols. 28 | std::vector subQueryPipelines; 29 | 30 | // The information corresponding to the relation without any references to 31 | // other relations or inputs. 32 | ::substrait::proto::Rel relation; 33 | 34 | // Source stores the input symbol of a read relation. 35 | const SymbolInfo* source{nullptr}; 36 | // Schema keeps track schema used in this relation. 37 | const SymbolInfo* schema{nullptr}; 38 | 39 | // Column name for each field known to this relation (in field order). Used 40 | // to determine what fields are coming in as well and fields are going out. 41 | std::vector fieldReferences; 42 | 43 | // Each field reference here was generated within the current relation. 44 | std::vector generatedFieldReferences; 45 | 46 | // Local aliases for field references in this relation. Used to replace the 47 | // normal form symbols would take for this relation's use only. (Later 48 | // references to the symbol would use the alias.) 49 | std::map generatedFieldReferenceAlternativeExpression; 50 | 51 | // Temporary storage for global aliases for expressions. Used during the 52 | // construction of a relation. 53 | std::map generatedFieldReferenceAliases; 54 | 55 | // If populated, supersedes the combination of fieldReferences and 56 | // generatedFieldReferences for the field symbols exposed by this relation. 57 | std::vector outputFieldReferences; 58 | 59 | // Contains the field reference names seen so far while processing this 60 | // relation along with the id of the first occurrence. Used to detect when 61 | // fully qualified references are necessary. 62 | std::map seenFieldReferenceNames; 63 | }; 64 | 65 | // Used by Schema symbols to keep track of assigned values. 66 | struct SchemaData { 67 | explicit SchemaData(uint32_t anchor_reference) 68 | : anchorReference(anchor_reference){}; 69 | 70 | uint32_t anchorReference; 71 | }; 72 | 73 | // Used by Extension Space symbols to keep track of assigned values. 74 | struct ExtensionSpaceData { 75 | explicit ExtensionSpaceData(uint32_t anchorReference) 76 | : anchorReference(anchorReference){}; 77 | 78 | uint32_t anchorReference; 79 | }; 80 | 81 | // Used by Function symbols to keep track of the name and assigned anchors. 82 | struct FunctionData { 83 | FunctionData( 84 | std::string name, 85 | std::optional extensionUriReference, 86 | uint32_t anchor) 87 | : name(std::move(name)), 88 | extensionUriReference(extensionUriReference), 89 | anchor(anchor){}; 90 | 91 | std::string name; 92 | std::optional extensionUriReference; 93 | uint32_t anchor; 94 | }; 95 | 96 | } // namespace io::substrait::textplan 97 | -------------------------------------------------------------------------------- /src/substrait/textplan/SubstraitErrorListener.cpp: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: Apache-2.0 */ 2 | 3 | #include "SubstraitErrorListener.h" 4 | 5 | #include 6 | #include 7 | 8 | namespace io::substrait::textplan { 9 | 10 | void SubstraitErrorListener::addError( 11 | size_t linenum, 12 | size_t charnum, 13 | const std::string& msg) { 14 | errors_.push_back({{linenum, charnum}, msg}); 15 | } 16 | 17 | std::vector SubstraitErrorListener::getErrorMessages() { 18 | std::vector messages; 19 | for (const auto& instance : getErrors()) { 20 | if (instance.location.line == -1 || 21 | instance.location.charPositionInLine == -1) { 22 | messages.push_back(instance.message); 23 | continue; 24 | } 25 | messages.push_back( 26 | std::to_string(instance.location.line) + ":" + 27 | std::to_string(instance.location.charPositionInLine) + " → " + 28 | instance.message); 29 | } 30 | return messages; 31 | } 32 | 33 | } // namespace io::substrait::textplan 34 | -------------------------------------------------------------------------------- /src/substrait/textplan/SubstraitErrorListener.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: Apache-2.0 */ 2 | 3 | #pragma once 4 | 5 | #include 6 | #include 7 | 8 | namespace io::substrait::textplan { 9 | 10 | struct ErrorLocation { 11 | size_t line; 12 | size_t charPositionInLine; 13 | }; 14 | 15 | struct ErrorInstance { 16 | ErrorLocation location; 17 | std::string message; 18 | }; 19 | 20 | // SubstraitErrorListener is similar in behavior to an antlr4::ErrorListener to 21 | // provide a similar error collection methodology regardless of how the input 22 | // data is obtained. 23 | class SubstraitErrorListener { 24 | public: 25 | SubstraitErrorListener() = default; 26 | 27 | void addError(size_t linenum, size_t charnum, const std::string& msg); 28 | 29 | void addError(const std::string& msg) { 30 | addError(-1, -1, msg); 31 | }; 32 | 33 | void addErrorInstances(const std::vector& errors) { 34 | errors_.insert(errors_.end(), errors.begin(), errors.end()); 35 | } 36 | 37 | const std::vector& getErrors() { 38 | return errors_; 39 | }; 40 | 41 | bool hasErrors() { 42 | return !errors_.empty(); 43 | } 44 | 45 | std::vector getErrorMessages(); 46 | 47 | private: 48 | std::vector errors_; 49 | }; 50 | 51 | } // namespace io::substrait::textplan 52 | -------------------------------------------------------------------------------- /src/substrait/textplan/SymbolTablePrinter.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: Apache-2.0 */ 2 | 3 | #pragma once 4 | 5 | #include 6 | 7 | #include "SubstraitErrorListener.h" 8 | #include "SymbolTable.h" 9 | 10 | namespace substrait::proto { 11 | class Expression; 12 | class Plan; 13 | class Rel; 14 | } // namespace substrait::proto 15 | 16 | namespace io::substrait::textplan { 17 | 18 | class SymbolTablePrinter { 19 | public: 20 | static std::string outputToText( 21 | const SymbolTable& symbolTable, 22 | SubstraitErrorListener* errorListener); 23 | 24 | static ::substrait::proto::Plan outputToBinaryPlan( 25 | const SymbolTable& symbolTable); 26 | 27 | private: 28 | static void addInputsToRelation( 29 | const SymbolTable& symbolTable, 30 | const SymbolInfo& symbolInfo, 31 | ::substrait::proto::Rel* relation); 32 | 33 | static void addInputsToExpression( 34 | const SymbolTable& symbolTable, 35 | const std::vector& symbolInfos, 36 | ::substrait::proto::Expression* expression, 37 | int* consumedPipelines); 38 | }; 39 | 40 | } // namespace io::substrait::textplan 41 | -------------------------------------------------------------------------------- /src/substrait/textplan/converter/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | include(../../../../third_party/datetime.cmake) 4 | 5 | set(TEXTPLAN_SRCS 6 | InitialPlanProtoVisitor.cpp 7 | InitialPlanProtoVisitor.h 8 | PipelineVisitor.cpp 9 | PipelineVisitor.h 10 | LoadBinary.cpp 11 | LoadBinary.h 12 | SaveBinary.cpp 13 | SaveBinary.h 14 | ParseBinary.cpp 15 | ParseBinary.h) 16 | 17 | add_library(substrait_base_proto_visitor BasePlanProtoVisitor.cpp 18 | BasePlanProtoVisitor.h) 19 | 20 | target_link_libraries( 21 | substrait_base_proto_visitor 22 | substrait_common 23 | substrait_proto 24 | error_listener 25 | fmt::fmt-header-only 26 | absl::status 27 | absl::statusor) 28 | 29 | add_library(substrait_textplan_converter ${TEXTPLAN_SRCS}) 30 | 31 | target_link_libraries( 32 | substrait_textplan_converter 33 | substrait_base_proto_visitor 34 | substrait_common 35 | substrait_proto 36 | symbol_table 37 | error_listener 38 | fmt::fmt-header-only 39 | absl::status 40 | absl::statusor) 41 | 42 | if(${SUBSTRAIT_CPP_BUILD_TESTING}) 43 | add_subdirectory(tests) 44 | endif() 45 | 46 | add_executable(planconverter Tool.cpp) 47 | 48 | target_link_libraries(planconverter substrait_textplan_converter substrait_io) 49 | 50 | set(NORMALIZER_SRCS ReferenceNormalizer.cpp ReferenceNormalizer.h) 51 | 52 | add_library(substrait_textplan_normalizer ${NORMALIZER_SRCS}) 53 | 54 | target_link_libraries(substrait_textplan_normalizer 55 | substrait_textplan_converter) 56 | 57 | install(TARGETS planconverter substrait_textplan_converter 58 | substrait_base_proto_visitor substrait_textplan_normalizer 59 | EXPORT SubstraitTargets) 60 | -------------------------------------------------------------------------------- /src/substrait/textplan/converter/InitialPlanProtoVisitor.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: Apache-2.0 */ 2 | 3 | #pragma once 4 | 5 | #include 6 | 7 | #include "substrait/proto/plan.pb.h" 8 | #include "substrait/textplan/StructuredSymbolData.h" 9 | #include "substrait/textplan/SubstraitErrorListener.h" 10 | #include "substrait/textplan/SymbolTable.h" 11 | #include "substrait/textplan/converter/BasePlanProtoVisitor.h" 12 | 13 | namespace io::substrait::textplan { 14 | 15 | // InitialPlanProtoVisitor is the first part of the binary to text conversion 16 | // process which identifies the prominent symbols and gives them names. 17 | class InitialPlanProtoVisitor : public BasePlanProtoVisitor { 18 | public: 19 | explicit InitialPlanProtoVisitor() { 20 | symbolTable_ = std::make_shared(); 21 | errorListener_ = std::make_shared(); 22 | }; 23 | 24 | [[nodiscard]] std::shared_ptr getSymbolTable() const { 25 | return symbolTable_; 26 | }; 27 | 28 | [[nodiscard]] std::shared_ptr getErrorListener() 29 | const { 30 | return errorListener_; 31 | }; 32 | 33 | private: 34 | std::any visitExpression( 35 | const ::substrait::proto::Expression& expression) override; 36 | 37 | std::any visitExtensionUri( 38 | const ::substrait::proto::extensions::SimpleExtensionURI& uri) override; 39 | std::any visitExtension( 40 | const ::substrait::proto::extensions::SimpleExtensionDeclaration& 41 | extension) override; 42 | 43 | std::any visitPlanRelation( 44 | const ::substrait::proto::PlanRel& relation) override; 45 | std::any visitRelation(const ::substrait::proto::Rel& relation) override; 46 | std::any visitRelationRoot( 47 | const ::substrait::proto::RelRoot& relation) override; 48 | std::any visitReadRelation( 49 | const ::substrait::proto::ReadRel& relation) override; 50 | 51 | std::any visitVirtualTable( 52 | const ::substrait::proto::ReadRel_VirtualTable& table) override; 53 | std::any visitLocalFiles( 54 | const ::substrait::proto::ReadRel_LocalFiles& local) override; 55 | std::any visitNamedTable( 56 | const ::substrait::proto::ReadRel_NamedTable& table) override; 57 | std::any visitExtensionTable( 58 | const ::substrait::proto::ReadRel_ExtensionTable& table) override; 59 | 60 | std::any visitNamedStruct( 61 | const ::substrait::proto::NamedStruct& named) override; 62 | 63 | // Populates the input schema from the relations that come before. 64 | void updateLocalSchema( 65 | const std::shared_ptr& relationData, 66 | const ::substrait::proto::Rel& relation, 67 | const ::substrait::proto::Rel& internalRelation); 68 | 69 | static void addFieldToRelation( 70 | const std::shared_ptr& relationData, 71 | const SymbolInfo* field); 72 | 73 | void addFieldsToRelation( 74 | const std::shared_ptr& relationData, 75 | const ::substrait::proto::Rel& relation); 76 | 77 | void addFieldsToRelation( 78 | const std::shared_ptr& relationData, 79 | const ::substrait::proto::Rel& left, 80 | const ::substrait::proto::Rel& right); 81 | 82 | template 83 | void addFieldsToRelation( 84 | const std::shared_ptr& relationData, 85 | const T& relations) { 86 | for (const auto& rel : relations) { 87 | addFieldsToRelation(relationData, rel); 88 | } 89 | }; 90 | 91 | void addGroupingToRelation( 92 | const std::shared_ptr& relationData, 93 | const ::substrait::proto::AggregateRel_Grouping& grouping); 94 | 95 | std::shared_ptr symbolTable_; 96 | std::shared_ptr errorListener_; 97 | 98 | const ::substrait::proto::Rel* currentRelationScope_{nullptr}; // Not owned. 99 | std::vector outerRelations_; 100 | 101 | std::map 102 | readRelationSources_; 103 | std::map 104 | readRelationSchemas_; 105 | }; 106 | 107 | } // namespace io::substrait::textplan 108 | -------------------------------------------------------------------------------- /src/substrait/textplan/converter/LoadBinary.cpp: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: Apache-2.0 */ 2 | 3 | #include "substrait/textplan/converter/LoadBinary.h" 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | #include "substrait/proto/plan.pb.h" 18 | #include "substrait/textplan/StringManipulation.h" 19 | 20 | namespace io::substrait::textplan { 21 | 22 | namespace { 23 | 24 | class StringErrorCollector : public google::protobuf::io::ErrorCollector { 25 | public: 26 | void RecordError( 27 | int line, 28 | google::protobuf::io::ColumnNumber column, 29 | absl::string_view message) override { 30 | errors_.push_back( 31 | std::to_string(line + 1) + ":" + std::to_string(column + 1) + " → " + 32 | std::string(message)); 33 | } 34 | 35 | [[nodiscard]] std::vector getErrors() const { 36 | return errors_; 37 | } 38 | 39 | private: 40 | std::vector errors_; 41 | }; 42 | 43 | } // namespace 44 | 45 | absl::StatusOr readFromFile(std::string_view msgPath) { 46 | std::ifstream file(std::string{msgPath}, std::ios::binary); 47 | if (file.fail()) { 48 | auto currDir = std::filesystem::current_path().string(); 49 | return absl::ErrnoToStatus( 50 | errno, 51 | fmt::format( 52 | "Failed to open file {} when running in {}", msgPath, currDir)); 53 | } 54 | std::stringstream buffer; 55 | buffer << file.rdbuf(); 56 | return buffer.str(); 57 | } 58 | 59 | absl::StatusOr<::substrait::proto::Plan> loadFromJson(const std::string& json) { 60 | if (json.empty()) { 61 | return absl::InternalError("Provided JSON string was empty."); 62 | } 63 | std::string_view usableJson = json; 64 | if (json[0] == '#') { 65 | int idx = 0; 66 | while (idx < json.size() && json[idx] != '\n') { 67 | idx++; 68 | } 69 | usableJson.remove_prefix(idx); 70 | } 71 | ::substrait::proto::Plan plan; 72 | auto status = google::protobuf::util::JsonStringToMessage( 73 | std::string{usableJson}, &plan); 74 | if (!status.ok()) { 75 | std::string msg{status.message()}; 76 | return absl::InternalError( 77 | fmt::format("Failed to parse Substrait JSON: {}", msg)); 78 | } 79 | return plan; 80 | } 81 | 82 | absl::StatusOr<::substrait::proto::Plan> loadFromProtoText( 83 | const std::string& text) { 84 | ::substrait::proto::Plan plan; 85 | ::google::protobuf::TextFormat::Parser parser; 86 | StringErrorCollector collector; 87 | parser.RecordErrorsTo(&collector); 88 | if (!parser.ParseFromString(text, &plan)) { 89 | auto errors = collector.getErrors(); 90 | return absl::InternalError(absl::StrJoin(errors, "")); 91 | } 92 | return plan; 93 | } 94 | 95 | absl::StatusOr<::substrait::proto::Plan> loadFromBinary( 96 | const std::string& bytes) { 97 | ::substrait::proto::Plan plan; 98 | if (!plan.ParseFromString(bytes)) { 99 | return absl::InternalError("Failed to parse as a binary Substrait plan."); 100 | } 101 | return plan; 102 | } 103 | 104 | } // namespace io::substrait::textplan 105 | -------------------------------------------------------------------------------- /src/substrait/textplan/converter/LoadBinary.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: Apache-2.0 */ 2 | 3 | #pragma once 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | namespace substrait::proto { 10 | class Plan; 11 | } 12 | 13 | namespace io::substrait::textplan { 14 | 15 | // Read the contents of a file from disk. 16 | absl::StatusOr readFromFile(std::string_view msgPath); 17 | 18 | // Reads a plan from a json-encoded text proto. 19 | // Returns a list of errors if the file cannot be parsed. 20 | absl::StatusOr<::substrait::proto::Plan> loadFromJson(const std::string& json); 21 | 22 | // Reads a plan encoded as a text protobuf. 23 | // Returns a list of errors if the file cannot be parsed. 24 | absl::StatusOr<::substrait::proto::Plan> loadFromProtoText( 25 | const std::string& text); 26 | 27 | // Reads a plan serialized as a binary protobuf. 28 | // Returns a list of errors if the file cannot be parsed. 29 | absl::StatusOr<::substrait::proto::Plan> loadFromBinary( 30 | const std::string& bytes); 31 | 32 | } // namespace io::substrait::textplan 33 | -------------------------------------------------------------------------------- /src/substrait/textplan/converter/ParseBinary.cpp: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: Apache-2.0 */ 2 | 3 | #include "substrait/textplan/converter/ParseBinary.h" 4 | 5 | #include "substrait/proto/plan.pb.h" 6 | #include "substrait/textplan/PlanPrinterVisitor.h" 7 | #include "substrait/textplan/converter/InitialPlanProtoVisitor.h" 8 | #include "substrait/textplan/converter/PipelineVisitor.h" 9 | 10 | namespace io::substrait::textplan { 11 | 12 | ParseResult parseBinaryPlan(const ::substrait::proto::Plan& plan) { 13 | InitialPlanProtoVisitor visitor; 14 | visitor.visit(plan); 15 | auto symbols = visitor.getSymbolTable(); 16 | auto syntaxErrors = visitor.getErrorListener()->getErrorMessages(); 17 | std::vector semanticErrors; 18 | 19 | PipelineVisitor pipeliner(*symbols); 20 | pipeliner.visit(plan); 21 | 22 | PlanPrinterVisitor printer(*pipeliner.getSymbolTable()); 23 | printer.visit(plan); 24 | auto moreErrors = printer.getErrorListener()->getErrorMessages(); 25 | semanticErrors.insert( 26 | semanticErrors.end(), moreErrors.begin(), moreErrors.end()); 27 | 28 | return {*printer.getSymbolTable(), syntaxErrors, semanticErrors}; 29 | } 30 | 31 | } // namespace io::substrait::textplan 32 | -------------------------------------------------------------------------------- /src/substrait/textplan/converter/ParseBinary.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: Apache-2.0 */ 2 | 3 | #pragma once 4 | 5 | #include "substrait/textplan/ParseResult.h" 6 | 7 | namespace substrait::proto { 8 | class Plan; 9 | } 10 | 11 | namespace io::substrait::textplan { 12 | 13 | ParseResult parseBinaryPlan(const ::substrait::proto::Plan& plan); 14 | 15 | } // namespace io::substrait::textplan 16 | -------------------------------------------------------------------------------- /src/substrait/textplan/converter/PipelineVisitor.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: Apache-2.0 */ 2 | 3 | #pragma once 4 | 5 | #include "substrait/textplan/StructuredSymbolData.h" 6 | #include "substrait/textplan/SymbolTable.h" 7 | #include "substrait/textplan/converter/BasePlanProtoVisitor.h" 8 | 9 | namespace io::substrait::textplan { 10 | 11 | class PipelineVisitor : public BasePlanProtoVisitor { 12 | public: 13 | explicit PipelineVisitor(const SymbolTable& symbolTable) { 14 | symbolTable_ = std::make_shared(symbolTable); 15 | } 16 | 17 | [[nodiscard]] std::shared_ptr getSymbolTable() const { 18 | return symbolTable_; 19 | }; 20 | 21 | private: 22 | std::any visitExpression( 23 | const ::substrait::proto::Expression& expression) override; 24 | 25 | std::any visitRelation(const ::substrait::proto::Rel& relation) override; 26 | 27 | std::any visitPlanRelation( 28 | const ::substrait::proto::PlanRel& relation) override; 29 | 30 | std::shared_ptr symbolTable_; 31 | 32 | const SymbolInfo* currentRelationScope_{nullptr}; 33 | }; 34 | 35 | } // namespace io::substrait::textplan 36 | -------------------------------------------------------------------------------- /src/substrait/textplan/converter/README.md: -------------------------------------------------------------------------------- 1 | # Using the Plan Converter Tool 2 | 3 | The plan converter takes any number of Substrait plan files of any format and 4 | converts them into the Substrait Text Plan format. 5 | 6 | ## Usage: 7 | ``` 8 | planconverter ... 9 | ``` 10 | -------------------------------------------------------------------------------- /src/substrait/textplan/converter/ReferenceNormalizer.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: Apache-2.0 */ 2 | 3 | #pragma once 4 | 5 | #include "substrait/proto/plan.pb.h" 6 | 7 | namespace io::substrait::textplan { 8 | 9 | // ReferenceNormalizer renumbers the extension space uri references 10 | // and function references in a consistent manner. This makes it easier 11 | // for differencing tools to compare two similar binary plans. The behavior 12 | // of this tool is undefined on invalid plans. 13 | class ReferenceNormalizer { 14 | public: 15 | ReferenceNormalizer() = default; 16 | 17 | static void normalize(::substrait::proto::Plan* plan); 18 | 19 | private: 20 | static void normalizeSpaces(::substrait::proto::Plan* plan); 21 | static void normalizeFunctions(::substrait::proto::Plan* plan); 22 | }; 23 | 24 | } // namespace io::substrait::textplan 25 | -------------------------------------------------------------------------------- /src/substrait/textplan/converter/SaveBinary.cpp: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: Apache-2.0 */ 2 | 3 | #include "substrait/textplan/converter/SaveBinary.h" 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #ifdef _WIN32 12 | #include 13 | #else 14 | #include 15 | #include 16 | #endif 17 | 18 | #include 19 | 20 | #include "substrait/proto/plan.pb.h" 21 | #include "substrait/textplan/StringManipulation.h" 22 | #include "substrait/textplan/SymbolTablePrinter.h" 23 | #include "substrait/textplan/converter/ParseBinary.h" 24 | 25 | namespace io::substrait::textplan { 26 | 27 | absl::Status savePlanToBinary( 28 | const ::substrait::proto::Plan& plan, 29 | std::string_view output_filename) { 30 | // Open file in binary mode and get its file descriptor 31 | std::ofstream of(std::string{output_filename}, std::ios::binary); 32 | if (!of) { 33 | return absl::InternalError( 34 | fmt::format("Failed to open file {} for writing", output_filename)); 35 | } 36 | 37 | if (!plan.SerializeToOstream(&of)) { 38 | return ::absl::UnknownError("Failed to write plan to stream."); 39 | } 40 | 41 | of.close(); 42 | return absl::OkStatus(); 43 | } 44 | 45 | absl::Status savePlanToJson( 46 | const ::substrait::proto::Plan& plan, 47 | std::string_view output_filename) { 48 | std::ofstream stream(std::string{output_filename}); 49 | if ((stream.fail())) { 50 | return absl::UnavailableError( 51 | fmt::format("Failed to open file {} for writing", output_filename)); 52 | } 53 | 54 | std::string output; 55 | auto status = ::google::protobuf::util::MessageToJsonString(plan, &output); 56 | if (!status.ok()) { 57 | return absl::UnknownError("Failed to save plan as a JSON protobuf."); 58 | } 59 | stream << output; 60 | stream.close(); 61 | if (stream.fail()) { 62 | return absl::UnknownError("Failed to write the plan as a JSON protobuf."); 63 | } 64 | return absl::OkStatus(); 65 | } 66 | 67 | absl::Status savePlanToText( 68 | const ::substrait::proto::Plan& plan, 69 | std::string_view output_filename) { 70 | std::ofstream stream(std::string{output_filename}); 71 | if ((stream.fail())) { 72 | return absl::UnavailableError( 73 | fmt::format("Failed to open file {} for writing", output_filename)); 74 | } 75 | 76 | auto result = parseBinaryPlan(plan); 77 | auto errors = result.getAllErrors(); 78 | if (!errors.empty()) { 79 | return absl::UnknownError(absl::StrJoin(errors, "")); 80 | } 81 | SubstraitErrorListener errorListener; 82 | stream << SymbolTablePrinter::outputToText( 83 | result.getSymbolTable(), &errorListener); 84 | stream.close(); 85 | if (stream.fail()) { 86 | return absl::UnknownError("Failed to write the plan as text."); 87 | } 88 | if (!errorListener.getErrorMessages().empty()) { 89 | return absl::UnknownError(fmt::format( 90 | "Errors while writing to text: {}", 91 | absl::StrJoin(errorListener.getErrorMessages(), "\n"))); 92 | } 93 | return absl::OkStatus(); 94 | } 95 | 96 | absl::Status savePlanToProtoText( 97 | const ::substrait::proto::Plan& plan, 98 | std::string_view output_filename) { 99 | int outputFileDescriptor = 100 | creat(std::string{output_filename}.c_str(), S_IREAD | S_IWRITE); 101 | if (outputFileDescriptor == -1) { 102 | return absl::ErrnoToStatus( 103 | errno, 104 | fmt::format("Failed to open file {} for writing", output_filename)); 105 | } 106 | auto stream = 107 | new google::protobuf::io::FileOutputStream(outputFileDescriptor); 108 | 109 | if (!::google::protobuf::TextFormat::Print(plan, stream)) { 110 | return absl::UnknownError("Failed to save plan as a text protobuf."); 111 | } 112 | 113 | if (!stream->Close()) { 114 | return absl::AbortedError("Failed to close file descriptor."); 115 | } 116 | delete stream; 117 | return absl::OkStatus(); 118 | } 119 | 120 | } // namespace io::substrait::textplan 121 | -------------------------------------------------------------------------------- /src/substrait/textplan/converter/SaveBinary.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: Apache-2.0 */ 2 | 3 | #pragma once 4 | 5 | #include "absl/status/status.h" 6 | 7 | namespace substrait::proto { 8 | class Plan; 9 | } 10 | 11 | namespace io::substrait::textplan { 12 | 13 | // Serializes a plan to disk as a binary protobuf. 14 | absl::Status savePlanToBinary( 15 | const ::substrait::proto::Plan& plan, 16 | std::string_view output_filename); 17 | 18 | // Serializes a plan to disk as a JSON-encoded protobuf. 19 | absl::Status savePlanToJson( 20 | const ::substrait::proto::Plan& plan, 21 | std::string_view output_filename); 22 | 23 | // Calls the converter to store a plan on disk as a text-based substrait plan. 24 | absl::Status savePlanToText( 25 | const ::substrait::proto::Plan& plan, 26 | std::string_view output_filename); 27 | 28 | // Serializes a plan to disk as a text-encoded protobuf. 29 | absl::Status savePlanToProtoText( 30 | const ::substrait::proto::Plan& plan, 31 | std::string_view output_filename); 32 | 33 | } // namespace io::substrait::textplan 34 | -------------------------------------------------------------------------------- /src/substrait/textplan/converter/Tool.cpp: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: Apache-2.0 */ 2 | 3 | #ifndef _WIN32 4 | #include 5 | #endif 6 | 7 | #include 8 | 9 | #include "substrait/common/Io.h" 10 | #include "substrait/textplan/SymbolTablePrinter.h" 11 | #include "substrait/textplan/converter/LoadBinary.h" 12 | #include "substrait/textplan/converter/ParseBinary.h" 13 | 14 | namespace io::substrait::textplan { 15 | namespace { 16 | 17 | void convertPlanToText(const char* filename) { 18 | auto planOrError = loadPlan(filename); 19 | if (!planOrError.ok()) { 20 | std::cerr << planOrError.status() << std::endl; 21 | return; 22 | } 23 | 24 | auto result = parseBinaryPlan(*planOrError); 25 | SubstraitErrorListener errorListener; 26 | auto textResult = 27 | SymbolTablePrinter::outputToText(result.getSymbolTable(), &errorListener); 28 | result.addErrors(errorListener.getErrorMessages()); 29 | auto errors = result.getAllErrors(); 30 | if (!errors.empty()) { 31 | for (const auto& err : errors) { 32 | std::cerr << err << std::endl; 33 | } 34 | } 35 | std::cout << textResult; 36 | } 37 | 38 | } // namespace 39 | } // namespace io::substrait::textplan 40 | 41 | int main(int argc, char* argv[]) { 42 | if (argc <= 1) { 43 | printf("Usage: planconverter ...\n"); 44 | return EXIT_FAILURE; 45 | } 46 | 47 | #ifdef _WIN32 48 | for (int currArg = 1; currArg < argc; currArg++) { 49 | printf("===== %s =====\n", argv[currArg]); 50 | io::substrait::textplan::convertPlanToText(argv[currArg]); 51 | } 52 | #else 53 | for (int currArg = 1; currArg < argc; currArg++) { 54 | glob_t globResult; 55 | glob(argv[currArg], GLOB_TILDE, nullptr, &globResult); 56 | for (size_t i = 0; i < globResult.gl_pathc; i++) { 57 | printf("===== %s =====\n", globResult.gl_pathv[i]); 58 | io::substrait::textplan::convertPlanToText(globResult.gl_pathv[i]); 59 | } 60 | } 61 | #endif 62 | 63 | return EXIT_SUCCESS; 64 | } 65 | -------------------------------------------------------------------------------- /src/substrait/textplan/converter/data/q6_first_stage.golden.splan: -------------------------------------------------------------------------------- 1 | pipelines { 2 | read -> filter -> project -> aggregate -> root; 3 | } 4 | 5 | read relation read { 6 | source local; 7 | base_schema schema; 8 | filter and( 9 | and( 10 | and( 11 | and( 12 | and( 13 | and( 14 | and( 15 | is_not_null(schema.l_shipdate_new)->bool?, 16 | is_not_null(schema.l_discount)->bool?)->bool?, 17 | is_not_null(schema.l_quantity)->bool?)->bool?, 18 | gte(schema.l_shipdate_new, 8766_fp64)->bool?)->bool?, 19 | lt(schema.l_shipdate_new, 9131_fp64)->bool?)->bool?, 20 | gte(schema.l_discount, 0.05_fp64)->bool?)->bool?, 21 | lte(schema.l_discount, 0.07_fp64)->bool?)->bool?, 22 | lt(schema.l_quantity, 24_fp64)->bool?)->bool?; 23 | } 24 | 25 | filter relation filter { 26 | filter and( 27 | and( 28 | and( 29 | and( 30 | gte(schema.l_shipdate_new, 8766_fp64)->bool?, 31 | lt(schema.l_shipdate_new, 9131_fp64)->bool?)->bool?, 32 | gte(schema.l_discount, 0.05_fp64)->bool?)->bool?, 33 | lte(schema.l_discount, 0.07_fp64)->bool?)->bool?, 34 | lt(schema.l_quantity, 24_fp64)->bool?)->bool?; 35 | } 36 | 37 | project relation project { 38 | expression schema.l_extendedprice; 39 | expression schema.l_discount; 40 | 41 | emit schema.l_extendedprice; 42 | emit schema.l_discount; 43 | } 44 | 45 | aggregate relation aggregate { 46 | measure { 47 | measure sum( 48 | multiply(schema.l_extendedprice, schema.l_discount)->fp64?)->fp64?@AGGREGATION_PHASE_INITIAL_TO_INTERMEDIATE NAMED measurename; 49 | } 50 | } 51 | 52 | schema schema { 53 | l_quantity fp64?; 54 | l_extendedprice fp64?; 55 | l_discount fp64?; 56 | l_shipdate_new fp64?; 57 | } 58 | 59 | source local_files local { 60 | items = [ 61 | {uri_file: "/mock_lineitem.orc" start: 0 length: 3719 orc: {}} 62 | ] 63 | } 64 | 65 | extension_space { 66 | function and:bool_bool as and; 67 | function gte:fp64_fp64 as gte; 68 | function is_not_null:fp64 as is_not_null; 69 | function lt:fp64_fp64 as lt; 70 | function lte:fp64_fp64 as lte; 71 | function multiply:opt_fp64_fp64 as multiply; 72 | function sum:opt_fp64 as sum; 73 | } 74 | -------------------------------------------------------------------------------- /src/substrait/textplan/converter/tests/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | add_test_case( 4 | textplan_conversion_test 5 | SOURCES 6 | BinaryToTextPlanConversionTest.cpp 7 | EXTRA_LINK_LIBS 8 | substrait_textplan_converter 9 | substrait_common 10 | parse_result_matchers 11 | protobuf-matchers 12 | gmock 13 | gtest 14 | gtest_main) 15 | 16 | cmake_path(GET CMAKE_CURRENT_SOURCE_DIR PARENT_PATH TEXTPLAN_SOURCE_DIR) 17 | 18 | add_custom_command( 19 | TARGET textplan_conversion_test 20 | POST_BUILD 21 | COMMAND ${CMAKE_COMMAND} -E echo "Copying unit test data.." 22 | COMMAND ${CMAKE_COMMAND} -E make_directory 23 | "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/tests/data" 24 | COMMAND 25 | ${CMAKE_COMMAND} -E copy "${TEXTPLAN_SOURCE_DIR}/data/q6_first_stage.json" 26 | "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/tests/data/q6_first_stage.json" 27 | COMMAND 28 | ${CMAKE_COMMAND} -E copy 29 | "${TEXTPLAN_SOURCE_DIR}/data/q6_first_stage.golden.splan" 30 | "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/tests/data/q6_first_stage.golden.splan") 31 | 32 | message( 33 | STATUS 34 | "test data will be here: ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/tests/data/q6_first_stage.json" 35 | ) 36 | -------------------------------------------------------------------------------- /src/substrait/textplan/data/set-comparision-any.json: -------------------------------------------------------------------------------- 1 | # "CREATE TABLE NATION (N_REGIONKEY BIGINT NOT NULL)" "SELECT * FROM NATION WHERE N_REGIONKEY < ANY (SELECT N_REGIONKEY FROM NATION)" 2 | { 3 | "extensionUris": [], 4 | "extensions": [], 5 | "relations": [{ 6 | "root": { 7 | "input": { 8 | "project": { 9 | "common": { 10 | "emit": { 11 | "outputMapping": [1] 12 | } 13 | }, 14 | "input": { 15 | "filter": { 16 | "common": { 17 | "direct": { 18 | } 19 | }, 20 | "input": { 21 | "read": { 22 | "common": { 23 | "direct": { 24 | } 25 | }, 26 | "baseSchema": { 27 | "names": ["N_REGIONKEY"], 28 | "struct": { 29 | "types": [{ 30 | "i64": { 31 | "typeVariationReference": 0, 32 | "nullability": "NULLABILITY_REQUIRED" 33 | } 34 | }], 35 | "typeVariationReference": 0, 36 | "nullability": "NULLABILITY_REQUIRED" 37 | } 38 | }, 39 | "namedTable": { 40 | "names": ["NATION"] 41 | } 42 | } 43 | }, 44 | "condition": { 45 | "subquery": { 46 | "setComparison": { 47 | "comparisonOp": 3, 48 | "reductionOp": 1, 49 | "left": { 50 | "selection": { 51 | "directReference": { 52 | "structField": { 53 | "field": 0 54 | } 55 | }, 56 | "rootReference": { 57 | } 58 | } 59 | }, 60 | "right": { 61 | "project": { 62 | "common": { 63 | "emit": { 64 | "outputMapping": [1] 65 | } 66 | }, 67 | "input": { 68 | "read": { 69 | "common": { 70 | "direct": { 71 | } 72 | }, 73 | "baseSchema": { 74 | "names": ["N_REGIONKEY"], 75 | "struct": { 76 | "types": [{ 77 | "i64": { 78 | "typeVariationReference": 0, 79 | "nullability": "NULLABILITY_REQUIRED" 80 | } 81 | }], 82 | "typeVariationReference": 0, 83 | "nullability": "NULLABILITY_REQUIRED" 84 | } 85 | }, 86 | "namedTable": { 87 | "names": ["NATION"] 88 | } 89 | } 90 | }, 91 | "expressions": [{ 92 | "selection": { 93 | "directReference": { 94 | "structField": { 95 | "field": 0 96 | } 97 | }, 98 | "rootReference": { 99 | } 100 | } 101 | }] 102 | } 103 | } 104 | } 105 | } 106 | } 107 | } 108 | }, 109 | "expressions": [{ 110 | "selection": { 111 | "directReference": { 112 | "structField": { 113 | "field": 0 114 | } 115 | }, 116 | "rootReference": { 117 | } 118 | } 119 | }] 120 | } 121 | }, 122 | "names": ["N_REGIONKEY"] 123 | } 124 | }], 125 | "expectedTypeUrls": [] 126 | } 127 | -------------------------------------------------------------------------------- /src/substrait/textplan/parser/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | include(../../../../third_party/datetime.cmake) 4 | 5 | add_subdirectory(grammar) 6 | 7 | add_library( 8 | substrait_textplan_loader 9 | SubstraitPlanVisitor.cpp 10 | SubstraitPlanVisitor.h 11 | SubstraitPlanPipelineVisitor.cpp 12 | SubstraitPlanPipelineVisitor.h 13 | SubstraitPlanRelationVisitor.cpp 14 | SubstraitPlanRelationVisitor.h 15 | SubstraitPlanSubqueryRelationVisitor.cpp 16 | SubstraitPlanSubqueryRelationVisitor.h 17 | SubstraitPlanTypeVisitor.cpp 18 | SubstraitPlanTypeVisitor.h 19 | LoadText.cpp 20 | LoadText.h 21 | ParseText.cpp 22 | ParseText.h 23 | SubstraitParserErrorListener.cpp) 24 | 25 | target_link_libraries( 26 | substrait_textplan_loader 27 | symbol_table 28 | error_listener 29 | substrait_proto 30 | substrait_type 31 | substrait_expression 32 | textplan_grammar 33 | fmt::fmt-header-only 34 | date::date 35 | date::date-tz 36 | absl::status 37 | absl::statusor) 38 | 39 | if(UNIX) 40 | add_executable(planparser Tool.cpp) 41 | target_link_libraries(planparser substrait_textplan_loader error_listener) 42 | install(TARGETS planparser EXPORT SubstraitTargets) 43 | endif() 44 | install(TARGETS substrait_textplan_loader EXPORT SubstraitTargets) 45 | 46 | if(${SUBSTRAIT_CPP_BUILD_TESTING}) 47 | add_subdirectory(tests) 48 | endif() 49 | -------------------------------------------------------------------------------- /src/substrait/textplan/parser/LoadText.cpp: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: Apache-2.0 */ 2 | 3 | #include "substrait/textplan/parser/LoadText.h" 4 | 5 | #include 6 | 7 | #include "substrait/proto/plan.pb.h" 8 | #include "substrait/textplan/StringManipulation.h" 9 | #include "substrait/textplan/SymbolTablePrinter.h" 10 | #include "substrait/textplan/parser/ParseText.h" 11 | 12 | namespace io::substrait::textplan { 13 | 14 | absl::StatusOr<::substrait::proto::Plan> loadFromText(const std::string& text) { 15 | auto stream = loadTextString(text); 16 | auto parseResult = parseStream(&stream); 17 | if (!parseResult.successful()) { 18 | auto errors = parseResult.getAllErrors(); 19 | return absl::UnknownError(absl::StrJoin(errors, "")); 20 | } 21 | 22 | return SymbolTablePrinter::outputToBinaryPlan(parseResult.getSymbolTable()); 23 | } 24 | 25 | } // namespace io::substrait::textplan 26 | -------------------------------------------------------------------------------- /src/substrait/textplan/parser/LoadText.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: Apache-2.0 */ 2 | 3 | #pragma once 4 | 5 | #include 6 | 7 | namespace substrait::proto { 8 | class Plan; 9 | } 10 | 11 | namespace io::substrait::textplan { 12 | 13 | // Reads a plan encoded as a text protobuf. 14 | // Returns a list of errors if the text cannot be parsed. 15 | absl::StatusOr<::substrait::proto::Plan> loadFromText(const std::string& text); 16 | 17 | } // namespace io::substrait::textplan 18 | -------------------------------------------------------------------------------- /src/substrait/textplan/parser/ParseText.cpp: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: Apache-2.0 */ 2 | 3 | #include "substrait/textplan/parser/ParseText.h" 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #include "SubstraitPlanLexer/SubstraitPlanLexer.h" 12 | #include "SubstraitPlanParser/SubstraitPlanParser.h" 13 | #include "substrait/textplan/StructuredSymbolData.h" 14 | #include "substrait/textplan/parser/SubstraitParserErrorListener.h" 15 | #include "substrait/textplan/parser/SubstraitPlanPipelineVisitor.h" 16 | #include "substrait/textplan/parser/SubstraitPlanRelationVisitor.h" 17 | #include "substrait/textplan/parser/SubstraitPlanSubqueryRelationVisitor.h" 18 | #include "substrait/textplan/parser/SubstraitPlanVisitor.h" 19 | 20 | namespace io::substrait::textplan { 21 | 22 | std::optional loadTextFile( 23 | std::string_view filename) { 24 | std::ifstream stream(std::string{filename}); 25 | if (stream.bad() || stream.fail()) { 26 | std::cout << "Bad stream." << std::endl; 27 | return std::nullopt; 28 | } 29 | if (!stream.is_open()) { 30 | std::cout << "Stream is not open." << std::endl; 31 | return std::nullopt; 32 | } 33 | return {stream}; 34 | } 35 | 36 | antlr4::ANTLRInputStream loadTextString(std::string_view text) { 37 | return {text}; 38 | } 39 | 40 | ParseResult parseStream(antlr4::ANTLRInputStream* stream) { 41 | SubstraitParserErrorListener errorListener; 42 | 43 | SubstraitPlanLexer lexer(stream); 44 | lexer.removeErrorListeners(); 45 | lexer.addErrorListener(&errorListener); 46 | antlr4::CommonTokenStream tokens(&lexer); 47 | 48 | tokens.fill(); 49 | 50 | SubstraitPlanParser parser(&tokens); 51 | parser.removeErrorListeners(); 52 | parser.addErrorListener(&errorListener); 53 | auto* tree = parser.plan(); 54 | 55 | SymbolTable visitorSymbolTable; 56 | auto visitorErrorListener = std::make_shared(); 57 | auto visitor = std::make_shared( 58 | visitorSymbolTable, visitorErrorListener); 59 | try { 60 | visitor->visitPlan(tree); 61 | } catch (std::invalid_argument& ex) { 62 | // Catches the any_cast exception and logs a useful error message. 63 | errorListener.syntaxError( 64 | &parser, 65 | nullptr, 66 | /*line=*/1, 67 | /*charPositionInLine=*/1, 68 | ex.what(), 69 | std::current_exception()); 70 | } catch (...) { 71 | errorListener.syntaxError( 72 | &parser, 73 | nullptr, 74 | /*line=*/1, 75 | /*charPositionInLine=*/1, 76 | "uncaught parser exception encountered", 77 | std::current_exception()); 78 | } 79 | 80 | auto pipelineVisitor = std::make_shared( 81 | *visitor->getSymbolTable(), visitor->getErrorListener()); 82 | try { 83 | pipelineVisitor->visitPlan(tree); 84 | } catch (std::invalid_argument& ex) { 85 | // Catches the any_cast exception and logs a useful error message. 86 | errorListener.syntaxError( 87 | &parser, 88 | nullptr, 89 | /*line=*/1, 90 | /*charPositionInLine=*/1, 91 | ex.what(), 92 | std::current_exception()); 93 | } catch (...) { 94 | errorListener.syntaxError( 95 | &parser, 96 | nullptr, 97 | /*line=*/1, 98 | /*charPositionInLine=*/1, 99 | "uncaught parser exception encountered", 100 | std::current_exception()); 101 | } 102 | 103 | auto relationVisitor = std::make_shared( 104 | *pipelineVisitor->getSymbolTable(), pipelineVisitor->getErrorListener()); 105 | try { 106 | relationVisitor->visitPlan(tree); 107 | } catch (std::invalid_argument& ex) { 108 | // Catches the any_cast exception and logs a useful error message. 109 | errorListener.syntaxError( 110 | &parser, 111 | nullptr, 112 | /*line=*/1, 113 | /*charPositionInLine=*/1, 114 | ex.what(), 115 | std::current_exception()); 116 | } catch (...) { 117 | errorListener.syntaxError( 118 | &parser, 119 | nullptr, 120 | /*line=*/1, 121 | /*charPositionInLine=*/1, 122 | "uncaught parser relation exception encountered", 123 | std::current_exception()); 124 | } 125 | 126 | if (relationVisitor->getErrorListener()->hasErrors()) { 127 | // We have enough errors that proceeding to the final step isn't useful. 128 | return { 129 | *relationVisitor->getSymbolTable(), 130 | errorListener.getErrorMessages(), 131 | relationVisitor->getErrorListener()->getErrorMessages()}; 132 | } 133 | 134 | auto subQueryRelationVisitor = 135 | std::make_shared( 136 | *relationVisitor->getSymbolTable(), 137 | relationVisitor->getErrorListener()); 138 | try { 139 | subQueryRelationVisitor->visitPlan(tree); 140 | } catch (std::invalid_argument& ex) { 141 | // Catches the any_cast exception and logs a useful error message. 142 | errorListener.syntaxError( 143 | &parser, 144 | nullptr, 145 | /*line=*/1, 146 | /*charPositionInLine=*/1, 147 | ex.what(), 148 | std::current_exception()); 149 | } catch (...) { 150 | errorListener.syntaxError( 151 | &parser, 152 | nullptr, 153 | /*line=*/1, 154 | /*charPositionInLine=*/1, 155 | "uncaught parser relation exception encountered", 156 | std::current_exception()); 157 | } 158 | 159 | auto finalSymbolTable = subQueryRelationVisitor->getSymbolTable(); 160 | return { 161 | *finalSymbolTable, 162 | errorListener.getErrorMessages(), 163 | subQueryRelationVisitor->getErrorListener()->getErrorMessages()}; 164 | } 165 | 166 | } // namespace io::substrait::textplan 167 | -------------------------------------------------------------------------------- /src/substrait/textplan/parser/ParseText.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: Apache-2.0 */ 2 | 3 | #pragma once 4 | 5 | #include 6 | 7 | #include "substrait/textplan/ParseResult.h" 8 | #include "substrait/textplan/SymbolTable.h" 9 | #include "substrait/textplan/parser/SubstraitPlanVisitor.h" 10 | 11 | namespace antlr4 { 12 | class ANTLRInputStream; 13 | } // namespace antlr4 14 | 15 | namespace io::substrait::textplan { 16 | 17 | std::optional loadTextFile(std::string_view filename); 18 | antlr4::ANTLRInputStream loadTextString(std::string_view text); 19 | 20 | ParseResult parseStream(antlr4::ANTLRInputStream* stream); 21 | 22 | } // namespace io::substrait::textplan 23 | -------------------------------------------------------------------------------- /src/substrait/textplan/parser/README.md: -------------------------------------------------------------------------------- 1 | # Parser Phases 2 | 3 | There are four phases that the parser goes through to parse a textplan into its 4 | internal representation (stored inside a ```SymbolTable`````). 5 | 6 | * ```SubstraitPlanVisitor``` 7 | * Finds the relations, processes functions, data files, and schemas. 8 | * ```SubstraitPlanPipelineVisitor``` 9 | * Stores the connections between the relations as determined by the pipelines 10 | section. 11 | * ```SubstraitPlanRelationVisitor``` 12 | * Determines the input fields for all of the trivally reachable relations. 13 | * ```SubstraitPlanSubqueryRelationVisitor``` 14 | * Determines the input fields for subqueries. 15 | 16 | ```SubstraitPlanTypeVisitor``` is a common visitor used by all but 17 | ```SubstraitPlanPipelineVisitor``` to do type lookups. 18 | 19 | The phase order is implemented in ```ParserText.cpp```. 20 | -------------------------------------------------------------------------------- /src/substrait/textplan/parser/SubstraitParserErrorListener.cpp: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: Apache-2.0 */ 2 | 3 | #include "substrait/textplan/parser/SubstraitParserErrorListener.h" 4 | 5 | #include 6 | #include 7 | 8 | namespace io::substrait::textplan { 9 | 10 | void SubstraitParserErrorListener::syntaxError( 11 | antlr4::Recognizer* recognizer, 12 | antlr4::Token* offendingSymbol, 13 | size_t line, 14 | size_t charPositionInLine, 15 | const std::string& msg, 16 | std::exception_ptr e) { 17 | SubstraitErrorListener::addError(line, charPositionInLine, msg); 18 | } 19 | 20 | void SubstraitParserErrorListener::addError( 21 | const antlr4::Token* offendingSymbol, 22 | const std::string& msg) { 23 | SubstraitErrorListener::addError( 24 | offendingSymbol->getLine(), 25 | offendingSymbol->getCharPositionInLine(), 26 | msg); 27 | } 28 | 29 | } // namespace io::substrait::textplan 30 | -------------------------------------------------------------------------------- /src/substrait/textplan/parser/SubstraitParserErrorListener.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: Apache-2.0 */ 2 | 3 | #pragma once 4 | 5 | #include 6 | 7 | #include "substrait/textplan/SubstraitErrorListener.h" 8 | 9 | namespace io::substrait::textplan { 10 | 11 | class SubstraitParserErrorListener : public antlr4::BaseErrorListener, 12 | public SubstraitErrorListener { 13 | public: 14 | SubstraitParserErrorListener() = default; 15 | 16 | void syntaxError( 17 | antlr4::Recognizer* recognizer, 18 | antlr4::Token* offendingSymbol, 19 | size_t line, 20 | size_t charPositionInLine, 21 | const std::string& msg, 22 | std::exception_ptr e) override; 23 | 24 | void addError(const antlr4::Token* offendingSymbol, const std::string& msg); 25 | }; 26 | 27 | } // namespace io::substrait::textplan 28 | -------------------------------------------------------------------------------- /src/substrait/textplan/parser/SubstraitPlanPipelineVisitor.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: Apache-2.0 */ 2 | 3 | #pragma once 4 | 5 | #include "SubstraitPlanParser/SubstraitPlanParser.h" 6 | #include "SubstraitPlanParser/SubstraitPlanParserBaseVisitor.h" 7 | #include "substrait/textplan/SymbolTable.h" 8 | #include "substrait/textplan/parser/SubstraitParserErrorListener.h" 9 | 10 | namespace io::substrait::textplan { 11 | 12 | class SubstraitPlanPipelineVisitor : public SubstraitPlanParserBaseVisitor { 13 | public: 14 | SubstraitPlanPipelineVisitor( 15 | const SymbolTable& symbolTable, 16 | std::shared_ptr errorListener) { 17 | symbolTable_ = std::make_shared(symbolTable); 18 | errorListener_ = std::move(errorListener); 19 | } 20 | 21 | [[nodiscard]] std::shared_ptr getSymbolTable() const { 22 | return symbolTable_; 23 | }; 24 | 25 | [[nodiscard]] std::shared_ptr getErrorListener() 26 | const { 27 | return errorListener_; 28 | }; 29 | 30 | std::any visitPipelines(SubstraitPlanParser::PipelinesContext* ctx) override; 31 | std::any visitPipeline(SubstraitPlanParser::PipelineContext* ctx) override; 32 | 33 | std::any visitRelation(SubstraitPlanParser::RelationContext* ctx) override; 34 | std::any visitExpressionScalarSubquery( 35 | SubstraitPlanParser::ExpressionScalarSubqueryContext* ctx) override; 36 | std::any visitExpressionInPredicateSubquery( 37 | SubstraitPlanParser::ExpressionInPredicateSubqueryContext* ctx) override; 38 | std::any visitExpressionSetPredicateSubquery( 39 | SubstraitPlanParser::ExpressionSetPredicateSubqueryContext* ctx) override; 40 | std::any visitExpressionSetComparisonSubquery( 41 | SubstraitPlanParser::ExpressionSetComparisonSubqueryContext* ctx) 42 | override; 43 | 44 | private: 45 | // Creates a symbol table entry if we don't already have one, then adds the 46 | // current location. 47 | void updateRelationSymbol( 48 | SubstraitPlanParser::PipelineContext* ctx, 49 | const std::string& relationName); 50 | 51 | std::shared_ptr symbolTable_; 52 | std::shared_ptr errorListener_; 53 | 54 | const SymbolInfo* currentRelationScope_{nullptr}; 55 | Location currentRelationScopeLocation_{Location::kUnknownLocation}; 56 | }; 57 | 58 | } // namespace io::substrait::textplan 59 | -------------------------------------------------------------------------------- /src/substrait/textplan/parser/SubstraitPlanTypeVisitor.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: Apache-2.0 */ 2 | 3 | #pragma once 4 | 5 | #include "SubstraitPlanParser/SubstraitPlanParser.h" 6 | #include "SubstraitPlanParser/SubstraitPlanParserBaseVisitor.h" 7 | #include "substrait/textplan/SymbolTable.h" 8 | #include "substrait/textplan/parser/SubstraitParserErrorListener.h" 9 | #include "substrait/type/Type.h" 10 | 11 | namespace substrait::proto { 12 | class Type; 13 | } 14 | 15 | namespace io::substrait::textplan { 16 | 17 | class SubstraitPlanTypeVisitor : public SubstraitPlanParserBaseVisitor { 18 | public: 19 | SubstraitPlanTypeVisitor( 20 | const SymbolTable& symbolTable, 21 | std::shared_ptr errorListener) { 22 | symbolTable_ = std::make_shared(symbolTable); 23 | errorListener_ = std::move(errorListener); 24 | } 25 | 26 | std::any visitLiteral_basic_type( 27 | SubstraitPlanParser::Literal_basic_typeContext* ctx) override; 28 | std::any visitLiteral_complex_type( 29 | SubstraitPlanParser::Literal_complex_typeContext* ctx) override; 30 | 31 | protected: 32 | ::substrait::proto::Type textToTypeProto( 33 | const antlr4::ParserRuleContext* ctx, 34 | const std::string& typeText); 35 | 36 | ::substrait::proto::Type typeToProto( 37 | const antlr4::ParserRuleContext* ctx, 38 | const ParameterizedType& decodedType); 39 | 40 | // Identifies whether the given context has a parent node of a constant 41 | // including a struct. This allows {3years, 1month, 42 | // 2days}_interval_year_month_day to have the optional label tags which are 43 | // not real types. 44 | bool insideStructLiteralWithExternalType(const antlr4::RuleContext* ctx); 45 | 46 | std::shared_ptr symbolTable_; 47 | std::shared_ptr errorListener_; 48 | }; 49 | 50 | } // namespace io::substrait::textplan 51 | -------------------------------------------------------------------------------- /src/substrait/textplan/parser/SubstraitPlanVisitor.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: Apache-2.0 */ 2 | 3 | #pragma once 4 | 5 | #include "SubstraitPlanParser/SubstraitPlanParser.h" 6 | #include "substrait/textplan/SymbolTable.h" 7 | #include "substrait/textplan/parser/SubstraitParserErrorListener.h" 8 | #include "substrait/textplan/parser/SubstraitPlanTypeVisitor.h" 9 | 10 | namespace io::substrait::textplan { 11 | 12 | class SubstraitPlanVisitor : public SubstraitPlanTypeVisitor { 13 | public: 14 | SubstraitPlanVisitor( 15 | const SymbolTable& symbolTable, 16 | std::shared_ptr errorListener) 17 | : SubstraitPlanTypeVisitor(symbolTable, std::move(errorListener)) {} 18 | 19 | [[nodiscard]] std::shared_ptr getSymbolTable() const { 20 | return symbolTable_; 21 | }; 22 | 23 | [[nodiscard]] std::shared_ptr getErrorListener() 24 | const { 25 | return errorListener_; 26 | }; 27 | 28 | std::any visitPlan(SubstraitPlanParser::PlanContext* context) override; 29 | std::any visitPlan_detail( 30 | SubstraitPlanParser::Plan_detailContext* ctx) override; 31 | std::any visitPipelines(SubstraitPlanParser::PipelinesContext* ctx) override; 32 | std::any visitPipeline(SubstraitPlanParser::PipelineContext* ctx) override; 33 | std::any visitExtensionspace( 34 | SubstraitPlanParser::ExtensionspaceContext* ctx) override; 35 | std::any visitFunction(SubstraitPlanParser::FunctionContext* ctx) override; 36 | std::any visitName(SubstraitPlanParser::NameContext* ctx) override; 37 | std::any visitSignature(SubstraitPlanParser::SignatureContext* ctx) override; 38 | std::any visitSchema_definition( 39 | SubstraitPlanParser::Schema_definitionContext* ctx) override; 40 | std::any visitSchema_item( 41 | SubstraitPlanParser::Schema_itemContext* ctx) override; 42 | std::any visitRelation(SubstraitPlanParser::RelationContext* ctx) override; 43 | std::any visitRoot_relation( 44 | SubstraitPlanParser::Root_relationContext* ctx) override; 45 | std::any visitRelation_type( 46 | SubstraitPlanParser::Relation_typeContext* ctx) override; 47 | std::any visitSource_definition( 48 | SubstraitPlanParser::Source_definitionContext* ctx) override; 49 | std::any visitLiteral_specifier( 50 | SubstraitPlanParser::Literal_specifierContext* ctx) override; 51 | std::any visitMap_literal_value( 52 | SubstraitPlanParser::Map_literal_valueContext* ctx) override; 53 | std::any visitMap_literal( 54 | SubstraitPlanParser::Map_literalContext* ctx) override; 55 | std::any visitStruct_literal( 56 | SubstraitPlanParser::Struct_literalContext* ctx) override; 57 | std::any visitConstant(SubstraitPlanParser::ConstantContext* ctx) override; 58 | std::any visitColumn_name( 59 | SubstraitPlanParser::Column_nameContext* ctx) override; 60 | std::any visitSource_reference( 61 | SubstraitPlanParser::Source_referenceContext* ctx) override; 62 | std::any visitExpressionFunctionUse( 63 | SubstraitPlanParser::ExpressionFunctionUseContext* ctx) override; 64 | std::any visitExpressionConstant( 65 | SubstraitPlanParser::ExpressionConstantContext* ctx) override; 66 | std::any visitExpressionCast( 67 | SubstraitPlanParser::ExpressionCastContext* ctx) override; 68 | std::any visitExpressionColumn( 69 | SubstraitPlanParser::ExpressionColumnContext* ctx) override; 70 | std::any visitRelationCommon( 71 | SubstraitPlanParser::RelationCommonContext* ctx) override; 72 | std::any visitRelationUsesSchema( 73 | SubstraitPlanParser::RelationUsesSchemaContext* ctx) override; 74 | std::any visitRelation_filter_behavior( 75 | SubstraitPlanParser::Relation_filter_behaviorContext* ctx) override; 76 | std::any visitMeasure_detail( 77 | SubstraitPlanParser::Measure_detailContext* ctx) override; 78 | std::any visitRelationFilter( 79 | SubstraitPlanParser::RelationFilterContext* ctx) override; 80 | std::any visitRelationExpression( 81 | SubstraitPlanParser::RelationExpressionContext* ctx) override; 82 | std::any visitRelationAdvancedExtension( 83 | SubstraitPlanParser::RelationAdvancedExtensionContext* ctx) override; 84 | std::any visitRelationSourceReference( 85 | SubstraitPlanParser::RelationSourceReferenceContext* ctx) override; 86 | std::any visitRelationGrouping( 87 | SubstraitPlanParser::RelationGroupingContext* ctx) override; 88 | std::any visitRelationMeasure( 89 | SubstraitPlanParser::RelationMeasureContext* ctx) override; 90 | std::any visitRelationSort( 91 | SubstraitPlanParser::RelationSortContext* ctx) override; 92 | std::any visitRelationCount( 93 | SubstraitPlanParser::RelationCountContext* ctx) override; 94 | std::any visitRelationJoinType( 95 | SubstraitPlanParser::RelationJoinTypeContext* ctx) override; 96 | std::any visitFile_location( 97 | SubstraitPlanParser::File_locationContext* ctx) override; 98 | std::any visitFile_detail( 99 | SubstraitPlanParser::File_detailContext* ctx) override; 100 | std::any visitFile(SubstraitPlanParser::FileContext* ctx) override; 101 | std::any visitLocal_files_detail( 102 | SubstraitPlanParser::Local_files_detailContext* ctx) override; 103 | std::any visitLocalFiles( 104 | SubstraitPlanParser::LocalFilesContext* ctx) override; 105 | std::any visitVirtualTable( 106 | SubstraitPlanParser::VirtualTableContext* ctx) override; 107 | std::any visitNamedTable( 108 | SubstraitPlanParser::NamedTableContext* ctx) override; 109 | std::any visitExtensionTable( 110 | SubstraitPlanParser::ExtensionTableContext* ctx) override; 111 | std::any visitNamed_table_detail( 112 | SubstraitPlanParser::Named_table_detailContext* ctx) override; 113 | std::any visitRelation_ref( 114 | SubstraitPlanParser::Relation_refContext* ctx) override; 115 | std::any visitSort_field( 116 | SubstraitPlanParser::Sort_fieldContext* ctx) override; 117 | std::any visitId(SubstraitPlanParser::IdContext* ctx) override; 118 | std::any visitSimple_id(SubstraitPlanParser::Simple_idContext* ctx) override; 119 | 120 | private: 121 | const SymbolInfo* currentRelationScope_{nullptr}; // Not owned. 122 | 123 | int numSpacesSeen_{0}; 124 | int numFunctionsSeen_{0}; 125 | }; 126 | 127 | } // namespace io::substrait::textplan 128 | -------------------------------------------------------------------------------- /src/substrait/textplan/parser/Tool.cpp: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: Apache-2.0 */ 2 | 3 | #include 4 | #include 5 | 6 | #include "substrait/textplan/SymbolTablePrinter.h" 7 | #include "substrait/textplan/parser/ParseText.h" 8 | 9 | namespace io::substrait::textplan { 10 | namespace { 11 | 12 | void readText(const char* filename) { 13 | auto stream = io::substrait::textplan::loadTextFile(filename); 14 | if (!stream.has_value()) { 15 | std::cerr << "An error occurred while reading: " << filename << std::endl; 16 | return; 17 | } 18 | auto parseResult = io::substrait::textplan::parseStream(&*stream); 19 | if (!parseResult.successful()) { 20 | for (const std::string& msg : parseResult.getAllErrors()) { 21 | std::cout << msg << std::endl; 22 | } 23 | return; 24 | } 25 | 26 | SubstraitErrorListener errorListener; 27 | auto text = SymbolTablePrinter::outputToText( 28 | parseResult.getSymbolTable(), &errorListener); 29 | if (errorListener.hasErrors()) { 30 | for (const std::string& msg : errorListener.getErrorMessages()) { 31 | std::cout << msg << std::endl; 32 | } 33 | return; 34 | } 35 | 36 | std::cout << text; 37 | } 38 | 39 | } // namespace 40 | } // namespace io::substrait::textplan 41 | 42 | int main(int argc, char* argv[]) { 43 | while (true) { 44 | int optionIndex = 0; 45 | static struct option longOptions[] = {{nullptr, 0, nullptr, 0}}; 46 | 47 | int c = getopt_long(argc, argv, "", longOptions, &optionIndex); 48 | if (c == -1) { 49 | break; 50 | } 51 | } 52 | 53 | if (optind >= argc) { 54 | printf("Usage: planparser ...\n"); 55 | return EXIT_FAILURE; 56 | } 57 | 58 | int currArg = optind; 59 | for (; currArg < argc; currArg++) { 60 | printf("===== %s =====\n", argv[currArg]); 61 | io::substrait::textplan::readText(argv[currArg]); 62 | } 63 | 64 | return EXIT_SUCCESS; 65 | } 66 | -------------------------------------------------------------------------------- /src/substrait/textplan/parser/data/provided_sample1.json: -------------------------------------------------------------------------------- 1 | { 2 | "extensionUris": [{ 3 | "extensionUriAnchor": 1, 4 | "uri": "/functions_arithmetic.yaml" 5 | }, { 6 | "extensionUriAnchor": 2, 7 | "uri": "/functions_string.yaml" 8 | }], 9 | "extensions": [{ 10 | "extensionFunction": { 11 | "extensionUriReference": 1, 12 | "name": "add:opt_i32_i32" 13 | } 14 | }, { 15 | "extensionFunction": { 16 | "extensionUriReference": 1, 17 | "functionAnchor": 1, 18 | "name": "subtract:opt_i32_i32" 19 | } 20 | }, { 21 | "extensionFunction": { 22 | "extensionUriReference": 2, 23 | "functionAnchor": 2, 24 | "name": "concat:str" 25 | } 26 | }], 27 | "relations": [{ 28 | "root": { 29 | "input": { 30 | "project": { 31 | "common": { 32 | "direct": { 33 | } 34 | }, 35 | "input": { 36 | "read": { 37 | "common": { 38 | "direct": { 39 | } 40 | }, 41 | "baseSchema": { 42 | "names": ["r_regionkey", "r_name", "r_comment"], 43 | "struct": { 44 | "types": [{ 45 | "i32": { 46 | "nullability": "NULLABILITY_REQUIRED" 47 | } 48 | }, { 49 | "string": { 50 | "nullability": "NULLABILITY_REQUIRED" 51 | } 52 | }, { 53 | "string": { 54 | "nullability": "NULLABILITY_NULLABLE" 55 | } 56 | }], 57 | "nullability": "NULLABILITY_REQUIRED" 58 | } 59 | }, 60 | "namedTable": { 61 | "names": ["#2"] 62 | } 63 | } 64 | }, 65 | "expressions": [{ 66 | "selection": { 67 | "directReference": { 68 | "structField": { 69 | } 70 | }, 71 | "rootReference": { 72 | } 73 | } 74 | }, { 75 | "selection": { 76 | "directReference": { 77 | "structField": { 78 | "field": 1 79 | } 80 | }, 81 | "rootReference": { 82 | } 83 | } 84 | }, { 85 | "selection": { 86 | "directReference": { 87 | "structField": { 88 | "field": 2 89 | } 90 | }, 91 | "rootReference": { 92 | } 93 | } 94 | }, { 95 | "scalarFunction": { 96 | "outputType": { 97 | "i32": { 98 | "nullability": "NULLABILITY_REQUIRED" 99 | } 100 | }, 101 | "arguments": [{ 102 | "value": { 103 | "selection": { 104 | "directReference": { 105 | "structField": { 106 | } 107 | }, 108 | "rootReference": { 109 | } 110 | } 111 | } 112 | }, { 113 | "value": { 114 | "literal": { 115 | "i32": 1 116 | } 117 | } 118 | }] 119 | } 120 | }, { 121 | "scalarFunction": { 122 | "functionReference": 1, 123 | "outputType": { 124 | "i32": { 125 | "nullability": "NULLABILITY_REQUIRED" 126 | } 127 | }, 128 | "arguments": [{ 129 | "value": { 130 | "selection": { 131 | "directReference": { 132 | "structField": { 133 | } 134 | }, 135 | "rootReference": { 136 | } 137 | } 138 | } 139 | }, { 140 | "value": { 141 | "literal": { 142 | "i32": 1 143 | } 144 | } 145 | }] 146 | } 147 | }, { 148 | "scalarFunction": { 149 | "functionReference": 2, 150 | "outputType": { 151 | "string": { 152 | "nullability": "NULLABILITY_REQUIRED" 153 | } 154 | }, 155 | "arguments": [{ 156 | "value": { 157 | "selection": { 158 | "directReference": { 159 | "structField": { 160 | "field": 1 161 | } 162 | }, 163 | "rootReference": { 164 | } 165 | } 166 | } 167 | }, { 168 | "value": { 169 | "selection": { 170 | "directReference": { 171 | "structField": { 172 | "field": 1 173 | } 174 | }, 175 | "rootReference": { 176 | } 177 | } 178 | } 179 | }] 180 | } 181 | }] 182 | } 183 | }, 184 | "names": ["r_regionkey", "r_name", "r_comment", "plus", "subtract", "r_name2"] 185 | } 186 | }] 187 | } 188 | -------------------------------------------------------------------------------- /src/substrait/textplan/parser/data/provided_sample1.splan: -------------------------------------------------------------------------------- 1 | pipelines { 2 | read -> project -> root; 3 | } 4 | 5 | read relation read { 6 | base_schema schema; 7 | source named; 8 | } 9 | 10 | project relation project { 11 | expression r_regionkey; 12 | expression r_name; 13 | expression r_comment; 14 | expression add(r_regionkey, 1_i32); 15 | expression subtract(r_regionkey, 1_i32); 16 | expression concat(r_name, r_name); 17 | } 18 | 19 | schema schema { 20 | r_regionkey i32; 21 | r_name string; 22 | r_comment string?; 23 | } 24 | 25 | source named_table named { 26 | names = [ 27 | "#2", 28 | ] 29 | } 30 | 31 | extension_space https://place.com/boolean.yaml { 32 | function add:opt_i32_i32 as add; 33 | function subtract:opt_i32_i32 as subtract; 34 | function concat:str as concat; 35 | } 36 | -------------------------------------------------------------------------------- /src/substrait/textplan/parser/grammar/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | find_package(ANTLR REQUIRED) 4 | 5 | antlr_target(SubstraitPlanLexer SubstraitPlanLexer.g4 LEXER PACKAGE 6 | io::substrait::textplan) 7 | antlr_target( 8 | SubstraitPlanParser 9 | SubstraitPlanParser.g4 10 | PARSER 11 | PACKAGE 12 | io::substrait::textplan 13 | DEPENDS_ANTLR 14 | SubstraitPlanLexer 15 | COMPILE_FLAGS 16 | -lib 17 | ${ANTLR_SubstraitPlanLexer_OUTPUT_DIR} 18 | VISITOR) 19 | 20 | include_directories(${ANTLR_SubstraitPlanLexer_OUTPUT_DIR}) 21 | include_directories(${ANTLR_SubstraitPlanParser_OUTPUT_DIR}) 22 | 23 | add_custom_target(textplan_grammar_headers 24 | DEPENDS ${ANTLR_SubstraitPlanParser_OUTPUTS}) 25 | 26 | message( 27 | STATUS 28 | "ANTLR4 generated files: ${ANTLR_SubstraitPlanLexer_CXX_OUTPUTS} ${ANTLR_SubstraitPlanParser_CXX_OUTPUTS}" 29 | ) 30 | add_library(textplan_grammar ${ANTLR_SubstraitPlanLexer_CXX_OUTPUTS} 31 | ${ANTLR_SubstraitPlanParser_CXX_OUTPUTS}) 32 | 33 | set(GRAMMAR_DIR ${CMAKE_CURRENT_BINARY_DIR}) 34 | message(STATUS "generated dir: ${GRAMMAR_DIR}/antlr4cpp_generated_src") 35 | 36 | target_include_directories( 37 | textplan_grammar 38 | PUBLIC $) 39 | 40 | target_link_libraries(textplan_grammar antlr4_static) 41 | 42 | # Things which link against textplan_grammar (and pull in antlr4 headers) should 43 | # always set 'ANTLR4CPP_STATIC', to avoid declaring things in the antlr4 44 | # headers as dllexport'ed. 45 | target_compile_definitions(textplan_grammar PUBLIC ANTLR4CPP_STATIC) 46 | 47 | install(TARGETS textplan_grammar EXPORT SubstraitTargets) 48 | -------------------------------------------------------------------------------- /src/substrait/textplan/parser/grammar/SubstraitPlanLexer.g4: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | // $antlr-format alignTrailingComments on, columnLimit 150, maxEmptyLinesToKeep 1, reflowComments off, useTab off 4 | // $antlr-format allowShortRulesOnASingleLine on, alignSemicolons ownLine 5 | 6 | lexer grammar SubstraitPlanLexer; 7 | 8 | options { 9 | caseInsensitive = true; 10 | } 11 | 12 | @lexer::header { 13 | // SPDX-License-Identifier: Apache-2.0 14 | } 15 | 16 | @lexer::postinclude { 17 | #ifndef _WIN32 18 | #pragma GCC diagnostic ignored "-Wunused-parameter" 19 | #endif 20 | } 21 | 22 | channels { CommentsChannel, DirectiveChannel } 23 | 24 | tokens { SPACES } 25 | 26 | EXTENSION_SPACE: 'EXTENSION_SPACE' -> mode(EXTENSIONS); 27 | FUNCTION: 'FUNCTION'; 28 | AS: 'AS'; 29 | NAMED: 'NAMED'; 30 | SCHEMA: 'SCHEMA'; 31 | RELATION: 'RELATION'; 32 | PIPELINES: 'PIPELINES'; 33 | 34 | COMMON: 'COMMON'; 35 | BASE_SCHEMA: 'BASE_SCHEMA'; 36 | FILTER: 'FILTER'; 37 | PROJECTION: 'PROJECTION'; 38 | EXPRESSION: 'EXPRESSION'; 39 | ADVANCED_EXTENSION: 'ADVANCED_EXTENSION'; 40 | GROUPING: 'GROUPING'; 41 | MEASURE: 'MEASURE'; 42 | INVOCATION: 'INVOCATION'; 43 | SORT: 'SORT'; 44 | BY: 'BY'; 45 | COUNT: 'COUNT'; 46 | TYPE: 'TYPE'; 47 | EMIT: 'EMIT'; 48 | 49 | SUBQUERY: 'SUBQUERY'; 50 | EXISTS: 'EXISTS'; 51 | UNIQUE: 'UNIQUE'; 52 | IN: 'IN'; 53 | ALL: 'ALL'; 54 | ANY: 'ANY'; 55 | COMPARISON: 'EQ'|'NE'|'LT'|'GT'|'LE'|'GE'; 56 | 57 | VIRTUAL_TABLE: 'VIRTUAL_TABLE'; 58 | LOCAL_FILES: 'LOCAL_FILES'; 59 | NAMED_TABLE: 'NAMED_TABLE'; 60 | EXTENSION_TABLE: 'EXTENSION_TABLE'; 61 | 62 | SOURCE: 'SOURCE'; 63 | ROOT: 'ROOT'; 64 | ITEMS: 'ITEMS'; 65 | NAMES: 'NAMES'; 66 | URI_FILE: 'URI_FILE'; 67 | URI_PATH: 'URI_PATH'; 68 | URI_PATH_GLOB: 'URI_PATH_GLOB'; 69 | URI_FOLDER: 'URI_FOLDER'; 70 | PARTITION_INDEX: 'PARTITION_INDEX'; 71 | START: 'START'; 72 | LENGTH: 'LENGTH'; 73 | ORC: 'ORC'; 74 | PARQUET: 'PARQUET'; 75 | NULLVAL: 'NULL'; 76 | TRUEVAL: 'TRUE'; 77 | FALSEVAL: 'FALSE'; 78 | 79 | LIST: 'LIST'; 80 | MAP: 'MAP'; 81 | STRUCT: 'STRUCT'; 82 | 83 | ARROW: '->'; 84 | COLON: ':'; 85 | SEMICOLON: ';'; 86 | LEFTBRACE: '{'; 87 | RIGHTBRACE: '}'; 88 | LEFTPAREN: '('; 89 | RIGHTPAREN: ')'; 90 | fragment QUOTE: '"'; 91 | COMMA: ','; 92 | PERIOD: '.'; 93 | EQUAL: '='; 94 | LEFTBRACKET: '['; 95 | RIGHTBRACKET: ']'; 96 | UNDERSCORE: '_'; 97 | MINUS: '-'; 98 | LEFTANGLEBRACKET: '<'; 99 | RIGHTANGLEBRACKET: '>'; 100 | QUESTIONMARK: '?'; 101 | ATSIGN: '@'; 102 | 103 | IDENTIFIER 104 | : [A-Z][A-Z0-9$]* 105 | ; 106 | 107 | NUMBER 108 | : MINUS? [0-9]+ ( PERIOD [0-9]+ )? 109 | | MINUS? [0-9]+ ( PERIOD [0-9]+ )? 'E' ('+' | MINUS) [0-9]+ 110 | ; 111 | 112 | STRING : '"' (ESCAPEDQUOTE | ~["])* '"' ; 113 | fragment ESCAPEDQUOTE : '\\' '"' ; 114 | fragment HEX : [0-9A-F] ; 115 | fragment DIGIT : [0-9] ; 116 | 117 | RAW_LITERAL_SINGLE_BACKTICK : '`' ~[`]+? '`' -> type(STRING) ; 118 | RAW_LITERAL_DOUBLE_BACKTICK : '``' .+? '``' -> type(STRING) ; 119 | RAW_LITERAL_TRIPLE_BACKTICK : '```' .+? '```' -> type(STRING) ; 120 | 121 | SINGLE_LINE_COMMENT: '//' ~[\r\n]* (('\r'? '\n') | EOF) -> channel(HIDDEN); 122 | 123 | SPACES: [ \u000B\t\r\n] -> channel(HIDDEN); 124 | 125 | mode EXTENSIONS; 126 | fragment SCHEME: [A-Z]+ ; 127 | fragment HOSTNAME: [A-Z0-9-.]+ ; 128 | fragment FILENAME: [A-Z0-9-._]+; 129 | fragment PATH: FILENAME ( '/' FILENAME )*; 130 | 131 | URI 132 | : SCHEME ':' ( '//' HOSTNAME '/' )? PATH 133 | | '/'? PATH 134 | ; 135 | 136 | EXTENSIONS_LEFTBRACE: '{' -> mode(DEFAULT_MODE), type(LEFTBRACE); 137 | 138 | EXTENSIONS_SPACES: [ \u000B\t\r\n] -> channel(HIDDEN), type(SPACES); 139 | -------------------------------------------------------------------------------- /src/substrait/textplan/parser/tests/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | add_test_case( 4 | textplan_parser_test 5 | SOURCES 6 | TextPlanParserTest.cpp 7 | EXTRA_LINK_LIBS 8 | substrait_textplan_loader 9 | substrait_common 10 | parse_result_matchers 11 | protobuf-matchers 12 | gmock 13 | gtest 14 | gtest_main) 15 | 16 | cmake_path(GET CMAKE_CURRENT_SOURCE_DIR PARENT_PATH TEXTPLAN_SOURCE_DIR) 17 | 18 | add_custom_command( 19 | TARGET textplan_parser_test 20 | POST_BUILD 21 | COMMAND ${CMAKE_COMMAND} -E echo "Copying unit test data..." 22 | COMMAND ${CMAKE_COMMAND} -E make_directory 23 | "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/tests/data" 24 | COMMAND 25 | ${CMAKE_COMMAND} -E copy 26 | "${TEXTPLAN_SOURCE_DIR}/data/provided_sample1.splan" 27 | "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/tests/data/provided_sample1.splan") 28 | -------------------------------------------------------------------------------- /src/substrait/textplan/tests/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | add_library(parse_result_matchers ParseResultMatchers.cpp ParseResultMatchers.h) 4 | 5 | add_dependencies(parse_result_matchers parse_result) 6 | 7 | target_link_libraries(parse_result_matchers parse_result symbol_table 8 | substrait_proto gmock) 9 | 10 | add_test_case( 11 | symbol_table_test 12 | SOURCES 13 | SymbolTableTest.cpp 14 | EXTRA_LINK_LIBS 15 | symbol_table 16 | substrait_common 17 | substrait_proto 18 | fmt::fmt-header-only 19 | gmock 20 | gtest 21 | gtest_main) 22 | 23 | add_test_case( 24 | substrait_textplan_round_trip_test 25 | SOURCES 26 | RoundtripTest.cpp 27 | EXTRA_LINK_LIBS 28 | substrait_textplan_converter 29 | substrait_textplan_loader 30 | substrait_textplan_normalizer 31 | substrait_common 32 | substrait_proto 33 | parse_result_matchers 34 | protobuf-matchers 35 | fmt::fmt-header-only 36 | gmock 37 | gtest 38 | gtest_main) 39 | 40 | cmake_path(GET CMAKE_CURRENT_SOURCE_DIR PARENT_PATH TEXTPLAN_SOURCE_DIR) 41 | 42 | # Get all JSON files in the data directory 43 | file(GLOB JSON_FILES "${TEXTPLAN_SOURCE_DIR}/data/*.json") 44 | 45 | add_custom_command( 46 | TARGET substrait_textplan_round_trip_test 47 | POST_BUILD 48 | COMMAND ${CMAKE_COMMAND} -E echo "Copying unit test data.." 49 | COMMAND ${CMAKE_COMMAND} -E make_directory 50 | "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/tests/data" 51 | COMMAND 52 | ${CMAKE_COMMAND} -E copy 53 | "${TEXTPLAN_SOURCE_DIR}/converter/data/q6_first_stage.json" 54 | "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/tests/data/q6_first_stage.json") 55 | 56 | foreach(json_file ${TEXTPLAN_JSON_FILES}) 57 | add_custom_command( 58 | TARGET substrait_textplan_round_trip_test 59 | POST_BUILD 60 | COMMAND ${CMAKE_COMMAND} -E copy "${json_file}" 61 | "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/tests/data/") 62 | endforeach() 63 | 64 | message( 65 | STATUS "test data will be here: ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/tests/data" 66 | ) 67 | -------------------------------------------------------------------------------- /src/substrait/textplan/tests/ParseResultMatchers.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: Apache-2.0 */ 2 | 3 | #pragma once 4 | 5 | #include 6 | #include 7 | 8 | #include 9 | #include 10 | 11 | #include "substrait/proto/plan.pb.h" 12 | #include "substrait/textplan/ParseResult.h" 13 | 14 | // NOLINTBEGIN(readability-identifier-naming) 15 | 16 | namespace io::substrait::textplan { 17 | 18 | [[maybe_unused]] ::testing::Matcher ParsesOk(); 19 | 20 | [[maybe_unused]] ::testing::Matcher HasSymbols( 21 | const std::vector& expectedSymbols); 22 | 23 | [[maybe_unused]] ::testing::Matcher HasSymbolsWithTypes( 24 | const std::vector& expected_symbols, 25 | const std::vector& interesting_types); 26 | 27 | [[maybe_unused]] ::testing::Matcher WhenSerialized( 28 | ::testing::Matcher stringMatcher); 29 | 30 | [[maybe_unused]] ::testing::Matcher AsBinaryPlan( 31 | ::testing::Matcher protoMatcher); 32 | 33 | [[maybe_unused]] ::testing::Matcher HasErrors( 34 | const std::vector& expectedErrors); 35 | 36 | // Matches strings ignoring differences in kinds of whitespace (as long as they 37 | // are present) and ignoring trailing whitespace as well. 38 | [[maybe_unused]] ::testing::Matcher EqSquashingWhitespace( 39 | const std::string& expectedString); 40 | 41 | // NOLINTEND(readability-identifier-naming) 42 | 43 | } // namespace io::substrait::textplan 44 | -------------------------------------------------------------------------------- /src/substrait/textplan/tests/RoundtripTest.cpp: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: Apache-2.0 */ 2 | 3 | #include 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include 13 | #include 14 | #include 15 | 16 | #include "substrait/textplan/SymbolTablePrinter.h" 17 | #include "substrait/textplan/converter/LoadBinary.h" 18 | #include "substrait/textplan/converter/ParseBinary.h" 19 | #include "substrait/textplan/converter/ReferenceNormalizer.h" 20 | #include "substrait/textplan/parser/ParseText.h" 21 | #include "substrait/textplan/tests/ParseResultMatchers.h" 22 | 23 | using ::protobuf_matchers::EqualsProto; 24 | using ::protobuf_matchers::IgnoringFields; 25 | using ::testing::AllOf; 26 | 27 | namespace io::substrait::textplan { 28 | namespace { 29 | 30 | bool endsWith(const std::string& haystack, const std::string& needle) { 31 | return haystack.size() > needle.size() && 32 | haystack.substr(haystack.size() - needle.size()) == needle; 33 | } 34 | 35 | std::string addLineNumbers(const std::string& text) { 36 | std::stringstream input{text}; 37 | std::stringstream result; 38 | int lineNum = 0; 39 | std::string line; 40 | while (std::getline(input, line, '\n')) { 41 | result << std::setw(4) << ++lineNum << " " << line << std::endl; 42 | } 43 | return result.str(); 44 | } 45 | 46 | ::substrait::proto::Plan normalizePlan(const ::substrait::proto::Plan& plan) { 47 | ::substrait::proto::Plan newPlan = plan; 48 | ReferenceNormalizer::normalize(&newPlan); 49 | return newPlan; 50 | } 51 | 52 | class RoundTripBinaryToTextFixture 53 | : public ::testing::TestWithParam {}; 54 | 55 | std::vector getTestCases() { 56 | const std::filesystem::path currPath = std::filesystem::current_path(); 57 | std::vector filenames{}; 58 | std::filesystem::path testDataPath = currPath; 59 | testDataPath.append("data"); 60 | for (auto const& dirEntry : 61 | std::filesystem::recursive_directory_iterator{testDataPath}) { 62 | std::string pathName = dirEntry.path().string(); 63 | if (endsWith(pathName, ".json") && 64 | !endsWith(pathName, "q6_first_stage.json")) { 65 | filenames.push_back(pathName); 66 | } 67 | } 68 | std::sort(filenames.begin(), filenames.end()); 69 | return filenames; 70 | } 71 | 72 | GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(RoundTripBinaryToTextFixture); 73 | 74 | TEST_P(RoundTripBinaryToTextFixture, RoundTrip) { 75 | auto filename = GetParam(); 76 | auto jsonOrError = readFromFile(filename); 77 | ASSERT_TRUE(jsonOrError.ok()); 78 | auto planOrError = loadFromJson(*jsonOrError); 79 | ASSERT_TRUE(planOrError.ok()); 80 | 81 | auto plan = *planOrError; 82 | 83 | auto textResult = parseBinaryPlan(plan); 84 | auto textSymbols = textResult.getSymbolTable().getSymbols(); 85 | 86 | SubstraitErrorListener errorListener; 87 | std::string outputText = SymbolTablePrinter::outputToText( 88 | textResult.getSymbolTable(), &errorListener); 89 | textResult.addErrors(errorListener.getErrorMessages()); 90 | 91 | ASSERT_THAT(textResult, ParsesOk()) 92 | << std::endl 93 | << "Initial result:" << std::endl 94 | << addLineNumbers(outputText) << std::endl 95 | << textResult.getSymbolTable().toDebugString() << std::endl; 96 | 97 | auto stream = loadTextString(outputText); 98 | auto result = parseStream(&stream); 99 | ASSERT_NO_THROW(auto outputBinary = SymbolTablePrinter::outputToBinaryPlan( 100 | result.getSymbolTable());); 101 | 102 | auto normalizedPlan = normalizePlan(plan); 103 | ASSERT_THAT( 104 | result, 105 | ::testing::AllOf( 106 | ParsesOk(), 107 | HasErrors({}), 108 | AsBinaryPlan(IgnoringFields( 109 | {"substrait.proto.RelCommon.Emit.output_mapping"}, 110 | EqualsProto(normalizedPlan))))) 111 | << std::endl 112 | << "Intermediate result:" << std::endl 113 | << addLineNumbers(outputText) << std::endl 114 | << result.getSymbolTable().toDebugString() << std::endl; 115 | } 116 | 117 | INSTANTIATE_TEST_SUITE_P( 118 | RoundTripBinaryToTextTests, 119 | RoundTripBinaryToTextFixture, 120 | ::testing::ValuesIn(getTestCases()), 121 | [](const testing::TestParamInfo& info) { 122 | std::string identifier = info.param; 123 | auto lastSlash = identifier.find_last_of('/'); 124 | if (lastSlash != std::string::npos) { 125 | identifier = identifier.substr(lastSlash); 126 | } 127 | if (endsWith(identifier, ".json")) { 128 | identifier = identifier.substr(0, identifier.length() - 5); 129 | } 130 | 131 | // Remove non-alphanumeric characters to make the test framework happy. 132 | identifier.erase( 133 | std::remove_if( 134 | identifier.begin(), 135 | identifier.end(), 136 | [](auto const& c) -> bool { return !std::isalnum(c); }), 137 | identifier.end()); 138 | return identifier; 139 | }); 140 | 141 | } // namespace 142 | } // namespace io::substrait::textplan 143 | -------------------------------------------------------------------------------- /src/substrait/textplan/tests/SymbolTableTest.cpp: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: Apache-2.0 */ 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include "substrait/proto/plan.pb.h" 8 | #include "substrait/textplan/Any.h" 9 | #include "substrait/textplan/Location.h" 10 | #include "substrait/textplan/SymbolTable.h" 11 | 12 | namespace io::substrait::textplan { 13 | namespace { 14 | 15 | class SymbolTableTest : public ::testing::Test { 16 | public: 17 | SymbolTableTest() : unspecifiedLocation_(Location::kUnknownLocation){}; 18 | 19 | protected: 20 | static std::vector symbolNames( 21 | const std::vector>& symbols) { 22 | std::vector names; 23 | for (const auto& symbol : symbols) { 24 | names.push_back(symbol->name); 25 | } 26 | return names; 27 | } 28 | 29 | static SymbolTable createSimpleTable(::substrait::proto::Plan* plan) { 30 | auto* ptr1 = plan->add_relations(); 31 | auto* ptr2 = plan->add_extension_uris(); 32 | auto* ptr3 = plan->add_extensions(); 33 | 34 | SymbolTable table; 35 | table.defineSymbol( 36 | "symbol1", 37 | Location(ptr1), 38 | SymbolType::kUnknown, 39 | RelationType::kUnknown, 40 | ptr1); 41 | table.defineSymbol( 42 | "symbol2", 43 | Location(ptr2), 44 | SymbolType::kUnknown, 45 | RelationType::kUnknown, 46 | ptr2); 47 | table.defineSymbol( 48 | "symbol3", 49 | Location(ptr3), 50 | SymbolType::kUnknown, 51 | RelationType::kUnknown, 52 | ptr3); 53 | return table; 54 | } 55 | 56 | const Location unspecifiedLocation_; 57 | }; 58 | 59 | TEST_F(SymbolTableTest, DuplicateSymbolsNotDetected) { 60 | SymbolTable table; 61 | table.defineSymbol( 62 | "a", 63 | unspecifiedLocation_, 64 | SymbolType::kUnknown, 65 | RelationType::kUnknown, 66 | nullptr); 67 | table.defineSymbol( 68 | "a", 69 | unspecifiedLocation_, 70 | SymbolType::kUnknown, 71 | RelationType::kUnknown, 72 | nullptr); 73 | 74 | ASSERT_THAT( 75 | symbolNames(table.getSymbols()), ::testing::ElementsAre("a", "a")); 76 | } 77 | 78 | TEST_F(SymbolTableTest, DuplicateSymbolsHandledByUnique) { 79 | SymbolTable table; 80 | table.defineUniqueSymbol( 81 | "a", 82 | unspecifiedLocation_, 83 | SymbolType::kUnknown, 84 | RelationType::kUnknown, 85 | nullptr); 86 | table.defineUniqueSymbol( 87 | "a", 88 | unspecifiedLocation_, 89 | SymbolType::kUnknown, 90 | RelationType::kUnknown, 91 | nullptr); 92 | 93 | ASSERT_THAT( 94 | symbolNames(table.getSymbols()), ::testing::ElementsAre("a", "a2")); 95 | } 96 | 97 | TEST_F(SymbolTableTest, LocationsUnchangedAfterCopy) { 98 | ::substrait::proto::Plan plan; 99 | SymbolTable table = createSimpleTable(&plan); 100 | auto* ptr1 = &plan.relations(0); 101 | auto* ptr2 = plan.mutable_extension_uris(0); 102 | auto* ptr3 = &plan.extensions(0); 103 | 104 | const SymbolTable& table2 = table; 105 | auto symbols = table2.getSymbols(); 106 | ASSERT_THAT( 107 | symbolNames(symbols), 108 | ::testing::ElementsAre("symbol1", "symbol2", "symbol3")); 109 | 110 | ASSERT_THAT( 111 | ANY_CAST(::substrait::proto::PlanRel*, symbols[0]->blob), 112 | ::testing::Eq(ptr1)); 113 | ASSERT_THAT( 114 | ANY_CAST( 115 | ::substrait::proto::extensions::SimpleExtensionURI*, 116 | symbols[1]->blob), 117 | ::testing::Eq(ptr2)); 118 | ASSERT_THAT( 119 | ANY_CAST( 120 | ::substrait::proto::extensions::SimpleExtensionDeclaration*, 121 | symbols[2]->blob), 122 | ::testing::Eq(ptr3)); 123 | 124 | ASSERT_THAT( 125 | symbols[0]->sourceLocation, ::testing::Eq(symbols[0]->sourceLocation)); 126 | ASSERT_THAT( 127 | symbols[0]->sourceLocation, 128 | ::testing::Not(::testing::Eq(symbols[1]->sourceLocation))); 129 | ASSERT_THAT( 130 | symbols[0]->sourceLocation, 131 | ::testing::Not(::testing::Eq(symbols[2]->sourceLocation))); 132 | ASSERT_THAT( 133 | symbols[1]->sourceLocation, 134 | ::testing::Not(::testing::Eq(symbols[2]->sourceLocation))); 135 | 136 | ASSERT_THAT( 137 | table.getSymbols()[0]->sourceLocation, 138 | ::testing::Eq(symbols[0]->sourceLocation)); 139 | ASSERT_THAT( 140 | table.getSymbols()[1]->sourceLocation, 141 | ::testing::Eq(symbols[1]->sourceLocation)); 142 | ASSERT_THAT( 143 | table.getSymbols()[2]->sourceLocation, 144 | ::testing::Eq(symbols[2]->sourceLocation)); 145 | } 146 | 147 | } // namespace 148 | } // namespace io::substrait::textplan 149 | -------------------------------------------------------------------------------- /src/substrait/type/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | set(TYPE_SRCS Type.cpp) 4 | 5 | add_library(substrait_type ${TYPE_SRCS}) 6 | target_sources( 7 | substrait_type PUBLIC FILE_SET HEADERS BASE_DIRS ../../../include/ FILES 8 | ../../../include/substrait/type/Type.h) 9 | 10 | target_link_libraries(substrait_type substrait_common) 11 | 12 | if(${SUBSTRAIT_CPP_BUILD_TESTING}) 13 | add_subdirectory(tests) 14 | endif() 15 | 16 | install( 17 | TARGETS substrait_type 18 | EXPORT SubstraitTargets 19 | FILE_SET HEADERS) 20 | -------------------------------------------------------------------------------- /src/substrait/type/tests/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | add_test_case( 4 | substrait_type_test 5 | EXTRA_LINK_LIBS 6 | substrait_type 7 | gtest 8 | gtest_main 9 | SOURCES 10 | TypeTest.cpp) 11 | -------------------------------------------------------------------------------- /third_party/.clang-tidy: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | --- 3 | Checks: "-*" 4 | -------------------------------------------------------------------------------- /third_party/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | # Ensure `option()` in subdirectories honors normal variables set here. 4 | set(CMAKE_POLICY_DEFAULT_CMP0077 NEW) 5 | 6 | set(ABSL_ENABLE_INSTALL ON) 7 | if(NOT ${ABSL_INCLUDED_WITH_PROTOBUF}) 8 | set(ABSL_PROPAGATE_CXX_STD ON) 9 | add_subdirectory(abseil-cpp) 10 | endif() 11 | 12 | include(datetime.cmake) 13 | 14 | add_subdirectory(fmt) 15 | 16 | if(WIN32) 17 | # For Windows: Prevent overriding the parent project's compiler/linker settings 18 | set(gtest_force_shared_crt 19 | ON 20 | CACHE BOOL "" FORCE) 21 | endif() 22 | 23 | find_package(GTest QUIET) 24 | if(NOT ${GTEST_FOUND}) 25 | message(STATUS "Retrieving external GoogleTest library.") 26 | include(FetchContent) 27 | fetchcontent_declare( 28 | GTest 29 | GIT_REPOSITORY https://github.com/google/googletest.git 30 | GIT_TAG v1.14.0 31 | OVERRIDE_FIND_PACKAGE) 32 | fetchcontent_makeavailable(GTest) 33 | endif() 34 | if(MSVC) 35 | # ------------------------------------------------------------------------------ 36 | # gtest MSVC fix 37 | # ------------------------------------------------------------------------------ 38 | # For some reason, googletest has include path issues when built with MSVC. 39 | # Specifically, this seems like some incorrect assumptions about include paths 40 | # inside the gmock project. 41 | # We can fix this by injecting the include paths here. 42 | function(fix_gtest_include TARGET) 43 | target_include_directories( 44 | ${TARGET} 45 | PUBLIC $ 46 | $ 47 | $ 48 | $ 49 | $) 50 | endfunction() 51 | set(gtest_erroneous_targets gmock gmock_main) 52 | foreach(target ${gtest_erroneous_targets}) 53 | fix_gtest_include(${target}) 54 | endforeach() 55 | endif() 56 | 57 | set(PREVIOUS_BUILD_TESTING ${BUILD_TESTING}) 58 | set(BUILD_TESTING OFF) 59 | add_subdirectory(protobuf-matchers) 60 | set(BUILD_TESTING ${PREVIOUS_BUILD_TESTING}) 61 | 62 | set(YAML_CPP_INSTALL ON) 63 | set(YAML_CPP_BUILD_TESTS 64 | OFF 65 | CACHE BOOL "Enable testing") 66 | include_directories(yaml-cpp/include) 67 | add_subdirectory(yaml-cpp) 68 | -------------------------------------------------------------------------------- /third_party/antlr4/cmake/FindANTLR.cmake: -------------------------------------------------------------------------------- 1 | find_package(Java QUIET COMPONENTS Runtime) 2 | 3 | if(NOT ANTLR_EXECUTABLE) 4 | find_program(ANTLR_EXECUTABLE 5 | NAMES antlr.jar antlr4.jar antlr-4.jar antlr-4.13.2-complete.jar) 6 | endif() 7 | 8 | if(ANTLR_EXECUTABLE AND Java_JAVA_EXECUTABLE) 9 | execute_process( 10 | COMMAND ${Java_JAVA_EXECUTABLE} -jar ${ANTLR_EXECUTABLE} 11 | OUTPUT_VARIABLE ANTLR_COMMAND_OUTPUT 12 | ERROR_VARIABLE ANTLR_COMMAND_ERROR 13 | RESULT_VARIABLE ANTLR_COMMAND_RESULT 14 | OUTPUT_STRIP_TRAILING_WHITESPACE) 15 | 16 | if(ANTLR_COMMAND_RESULT EQUAL 0) 17 | string(REGEX MATCH "Version [0-9]+(\\.[0-9]+)*" ANTLR_VERSION ${ANTLR_COMMAND_OUTPUT}) 18 | string(REPLACE "Version " "" ANTLR_VERSION ${ANTLR_VERSION}) 19 | else() 20 | message( 21 | SEND_ERROR 22 | "Command '${Java_JAVA_EXECUTABLE} -jar ${ANTLR_EXECUTABLE}' " 23 | "failed with the output '${ANTLR_COMMAND_ERROR}'") 24 | endif() 25 | 26 | macro(ANTLR_TARGET Name InputFile) 27 | set(ANTLR_OPTIONS LEXER PARSER LISTENER VISITOR) 28 | set(ANTLR_ONE_VALUE_ARGS PACKAGE OUTPUT_DIRECTORY DEPENDS_ANTLR) 29 | set(ANTLR_MULTI_VALUE_ARGS COMPILE_FLAGS DEPENDS) 30 | cmake_parse_arguments(ANTLR_TARGET 31 | "${ANTLR_OPTIONS}" 32 | "${ANTLR_ONE_VALUE_ARGS}" 33 | "${ANTLR_MULTI_VALUE_ARGS}" 34 | ${ARGN}) 35 | 36 | set(ANTLR_${Name}_INPUT ${InputFile}) 37 | 38 | get_filename_component(ANTLR_INPUT ${InputFile} NAME_WE) 39 | 40 | if(ANTLR_TARGET_OUTPUT_DIRECTORY) 41 | set(ANTLR_${Name}_OUTPUT_DIR ${ANTLR_TARGET_OUTPUT_DIRECTORY}) 42 | else() 43 | set(ANTLR_${Name}_OUTPUT_DIR 44 | ${CMAKE_CURRENT_BINARY_DIR}/antlr4cpp_generated_src/${ANTLR_INPUT}) 45 | endif() 46 | 47 | unset(ANTLR_${Name}_CXX_OUTPUTS) 48 | 49 | if((ANTLR_TARGET_LEXER AND NOT ANTLR_TARGET_PARSER) OR 50 | (ANTLR_TARGET_PARSER AND NOT ANTLR_TARGET_LEXER)) 51 | list(APPEND ANTLR_${Name}_CXX_OUTPUTS 52 | ${ANTLR_${Name}_OUTPUT_DIR}/${ANTLR_INPUT}.h 53 | ${ANTLR_${Name}_OUTPUT_DIR}/${ANTLR_INPUT}.cpp) 54 | set(ANTLR_${Name}_OUTPUTS 55 | ${ANTLR_${Name}_OUTPUT_DIR}/${ANTLR_INPUT}.interp 56 | ${ANTLR_${Name}_OUTPUT_DIR}/${ANTLR_INPUT}.tokens) 57 | else() 58 | list(APPEND ANTLR_${Name}_CXX_OUTPUTS 59 | ${ANTLR_${Name}_OUTPUT_DIR}/${ANTLR_INPUT}Lexer.h 60 | ${ANTLR_${Name}_OUTPUT_DIR}/${ANTLR_INPUT}Lexer.cpp 61 | ${ANTLR_${Name}_OUTPUT_DIR}/${ANTLR_INPUT}Parser.h 62 | ${ANTLR_${Name}_OUTPUT_DIR}/${ANTLR_INPUT}Parser.cpp) 63 | list(APPEND ANTLR_${Name}_OUTPUTS 64 | ${ANTLR_${Name}_OUTPUT_DIR}/${ANTLR_INPUT}Lexer.interp 65 | ${ANTLR_${Name}_OUTPUT_DIR}/${ANTLR_INPUT}Lexer.tokens) 66 | endif() 67 | 68 | if(ANTLR_TARGET_LISTENER) 69 | list(APPEND ANTLR_${Name}_CXX_OUTPUTS 70 | ${ANTLR_${Name}_OUTPUT_DIR}/${ANTLR_INPUT}BaseListener.h 71 | ${ANTLR_${Name}_OUTPUT_DIR}/${ANTLR_INPUT}BaseListener.cpp 72 | ${ANTLR_${Name}_OUTPUT_DIR}/${ANTLR_INPUT}Listener.h 73 | ${ANTLR_${Name}_OUTPUT_DIR}/${ANTLR_INPUT}Listener.cpp) 74 | list(APPEND ANTLR_TARGET_COMPILE_FLAGS -listener) 75 | endif() 76 | 77 | if(ANTLR_TARGET_VISITOR) 78 | list(APPEND ANTLR_${Name}_CXX_OUTPUTS 79 | ${ANTLR_${Name}_OUTPUT_DIR}/${ANTLR_INPUT}BaseVisitor.h 80 | ${ANTLR_${Name}_OUTPUT_DIR}/${ANTLR_INPUT}BaseVisitor.cpp 81 | ${ANTLR_${Name}_OUTPUT_DIR}/${ANTLR_INPUT}Visitor.h 82 | ${ANTLR_${Name}_OUTPUT_DIR}/${ANTLR_INPUT}Visitor.cpp) 83 | list(APPEND ANTLR_TARGET_COMPILE_FLAGS -visitor) 84 | endif() 85 | 86 | if(ANTLR_TARGET_PACKAGE) 87 | list(APPEND ANTLR_TARGET_COMPILE_FLAGS -package ${ANTLR_TARGET_PACKAGE}) 88 | endif() 89 | 90 | list(APPEND ANTLR_${Name}_OUTPUTS ${ANTLR_${Name}_CXX_OUTPUTS}) 91 | 92 | if(ANTLR_TARGET_DEPENDS_ANTLR) 93 | if(ANTLR_${ANTLR_TARGET_DEPENDS_ANTLR}_INPUT) 94 | list(APPEND ANTLR_TARGET_DEPENDS 95 | ${ANTLR_${ANTLR_TARGET_DEPENDS_ANTLR}_INPUT}) 96 | list(APPEND ANTLR_TARGET_DEPENDS 97 | ${ANTLR_${ANTLR_TARGET_DEPENDS_ANTLR}_OUTPUTS}) 98 | else() 99 | message(SEND_ERROR 100 | "ANTLR target '${ANTLR_TARGET_DEPENDS_ANTLR}' not found") 101 | endif() 102 | endif() 103 | 104 | add_custom_command( 105 | OUTPUT ${ANTLR_${Name}_OUTPUTS} 106 | COMMAND ${Java_JAVA_EXECUTABLE} -jar ${ANTLR_EXECUTABLE} 107 | ${InputFile} 108 | -o ${ANTLR_${Name}_OUTPUT_DIR} 109 | -no-listener 110 | -Dlanguage=Cpp 111 | ${ANTLR_TARGET_COMPILE_FLAGS} 112 | DEPENDS ${InputFile} 113 | ${ANTLR_TARGET_DEPENDS} 114 | WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} 115 | COMMENT "Building ${Name} with ANTLR ${ANTLR_VERSION}") 116 | endmacro(ANTLR_TARGET) 117 | 118 | endif(ANTLR_EXECUTABLE AND Java_JAVA_EXECUTABLE) 119 | 120 | include(FindPackageHandleStandardArgs) 121 | find_package_handle_standard_args( 122 | ANTLR 123 | REQUIRED_VARS ANTLR_EXECUTABLE Java_JAVA_EXECUTABLE 124 | VERSION_VAR ANTLR_VERSION) 125 | -------------------------------------------------------------------------------- /third_party/datetime.cmake: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | include_guard(GLOBAL) 4 | 5 | set (BUILD_TZ_LIB ON CACHE BOOL "timezone library is a dependency" FORCE) 6 | set (USE_SYSTEM_TZ_DB ON CACHE BOOL "use OS compiled timezone database" FORCE) 7 | include(FetchContent) 8 | FetchContent_Declare(date_src 9 | GIT_REPOSITORY https://github.com/HowardHinnant/date.git 10 | GIT_TAG v3.0.1 11 | ) 12 | FetchContent_MakeAvailable(date_src) 13 | -------------------------------------------------------------------------------- /third_party/protobuf.cmake: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | include_guard(GLOBAL) 4 | 5 | set(ABSL_PROPAGATE_CXX_STD ON) 6 | 7 | include(FetchContent) 8 | FetchContent_Declare(GTest 9 | GIT_REPOSITORY https://github.com/google/googletest.git 10 | GIT_TAG v1.14.0 11 | OVERRIDE_FIND_PACKAGE 12 | ) 13 | FetchContent_Declare(Protobuf 14 | GIT_REPOSITORY https://github.com/protocolbuffers/protobuf.git 15 | GIT_TAG v29.3 16 | SYSTEM 17 | OVERRIDE_FIND_PACKAGE 18 | ) 19 | 20 | # Disable warnings for dependency targets. 21 | set(protobuf_BUILD_TESTS OFF CACHE INTERNAL "") 22 | if(MSVC) 23 | set(protobuf_MSVC_STATIC_RUNTIME OFF) 24 | set(gtest_force_shared_crt ON) 25 | add_compile_options("/W0") 26 | else() 27 | add_compile_options("-w") 28 | endif() 29 | FetchContent_MakeAvailable(Protobuf GTest) 30 | --------------------------------------------------------------------------------