├── tests ├── __init__.py ├── coverage │ ├── __init__.py │ ├── nodes.py │ └── case_file_parser.py ├── cases │ ├── rounding_decimal │ │ ├── floor.test │ │ ├── ceil.test │ │ └── round.test │ ├── comparison │ │ ├── is_true.test │ │ ├── is_false.test │ │ ├── is_not_true.test │ │ ├── is_not_false.test │ │ ├── is_null.test │ │ ├── is_finite.test │ │ ├── is_not_null.test │ │ ├── is_infinite.test │ │ ├── is_nan.test │ │ ├── is_not_distinct_from.test │ │ ├── equal.test │ │ ├── coalesce.test │ │ ├── nullif.test │ │ ├── lt.test │ │ ├── gt.test │ │ ├── not_equal.test │ │ ├── gte.test │ │ ├── lte.test │ │ └── between.test │ ├── rounding │ │ ├── ceil.test │ │ ├── floor.test │ │ └── round.test │ ├── boolean │ │ ├── not.test │ │ ├── bool_or.test │ │ ├── bool_and.test │ │ ├── or.test │ │ ├── and.test │ │ ├── xor.test │ │ └── and_not.test │ ├── arithmetic │ │ ├── acos.test │ │ ├── asinh.test │ │ ├── atan2.test │ │ ├── atanh.test │ │ ├── tan.test │ │ ├── atan.test │ │ ├── asin.test │ │ ├── cos.test │ │ ├── sin.test │ │ ├── sinh.test │ │ ├── cosh.test │ │ ├── tanh.test │ │ ├── factorial.test │ │ ├── exp.test │ │ ├── power.test │ │ ├── acosh.test │ │ ├── sqrt.test │ │ ├── shift_right.test │ │ ├── bitwise_not.test │ │ ├── shift_left.test │ │ ├── abs.test │ │ ├── bitwise_or.test │ │ ├── bitwise_and.test │ │ ├── negate.test │ │ ├── shift_right_unsigned.test │ │ ├── bitwise_xor.test │ │ ├── divide.test │ │ ├── modulus.test │ │ ├── max.test │ │ ├── min.test │ │ ├── subtract.test │ │ ├── sum.test │ │ ├── multiply.test │ │ └── add.test │ ├── aggregate_generic │ │ └── count.test │ ├── string │ │ ├── repeat.test │ │ ├── string_split.test │ │ ├── like.test │ │ ├── lower.test │ │ ├── upper.test │ │ ├── reverse.test │ │ ├── concat_ws.test │ │ ├── replace.test │ │ ├── left.test │ │ ├── bit_length.test │ │ ├── right.test │ │ ├── char_length.test │ │ ├── octet_length.test │ │ ├── concat.test │ │ ├── lpad.test │ │ ├── rpad.test │ │ ├── ltrim.test │ │ ├── trim.test │ │ ├── rtrim.test │ │ ├── ends_with.test │ │ ├── starts_with.test │ │ ├── contains.test │ │ ├── substring.test │ │ └── regexp_string_split.test │ ├── datetime │ │ ├── add_intervals.test │ │ ├── subtract_datetime.test │ │ ├── add_datetime.test │ │ ├── gt_datetime.test │ │ ├── lt_datetime.test │ │ ├── gte_datetime.test │ │ ├── lte_datetime.test │ │ └── extract.test │ ├── list │ │ ├── transform.test │ │ └── filter.test │ ├── arithmetic_decimal │ │ ├── factorial_decimal.test │ │ ├── sum_decimal.test │ │ ├── power.test │ │ ├── power_decimal.test │ │ ├── sqrt_decimal.test │ │ ├── min_decimal.test │ │ ├── max_decimal.test │ │ ├── bitwise_and.test │ │ ├── bitwise_xor.test │ │ └── bitwise_or.test │ ├── logarithmic │ │ ├── ln.test │ │ ├── log10.test │ │ ├── log2.test │ │ └── logb.test │ └── aggregate_approx │ │ └── approx_count_distinct.test ├── baseline.json ├── test_proto_example_validator.py └── test_extensions.py ├── .python-version ├── site ├── .gitignore ├── docs │ ├── extensions │ │ └── .gitignore │ ├── relations │ │ ├── user_defined_relations.md │ │ ├── _config │ │ ├── embedded_relations.md │ │ └── common_fields.md │ ├── serialization │ │ ├── _config │ │ ├── text_serialization.md │ │ └── basics.md │ ├── tools │ │ ├── _config │ │ ├── substrait_validator.md │ │ ├── producer_tools.md │ │ └── third_party_tools.md │ ├── spec │ │ ├── _config │ │ ├── technology_principles.md │ │ ├── versioning.md │ │ └── extending.md │ ├── stylesheets │ │ └── extra.css │ ├── types │ │ ├── _config │ │ ├── type_variations.md │ │ ├── type_system.md │ │ └── type_aliases.md │ ├── _config │ ├── expressions │ │ ├── table_functions.md │ │ ├── _config │ │ ├── user_defined_functions.md │ │ ├── dynamic_parameters.md │ │ ├── extended_expression.md │ │ ├── subqueries.md │ │ ├── window_functions.md │ │ ├── aggregate_functions.md │ │ └── embedded_functions.md │ ├── tutorial │ │ └── examples.md │ ├── img │ │ └── logo.svg │ └── index.md ├── examples │ ├── types │ │ ├── point_with_nstruct.yaml │ │ ├── point_with_structure.yaml │ │ ├── union_variadic.yaml │ │ ├── point_with_two_params.yaml │ │ ├── tuple_optional_variadic.yaml │ │ ├── point_with_enum_param.yaml │ │ ├── vector_with_constraints.yaml │ │ ├── point_with_datatype_param.yaml │ │ └── user_defined_point.yaml │ ├── extensions │ │ ├── any_type_function.yaml │ │ ├── any1_type_function.yaml │ │ ├── distance_functions.yaml │ │ ├── double_function.yaml │ │ └── lambda_function_example.yaml │ ├── proto-textformat │ │ ├── README.md │ │ ├── lambda │ │ │ ├── nested_lambda_capture.textproto │ │ │ └── simple_multiply.textproto │ │ ├── field_reference │ │ │ └── lambda_param_nested_struct.textproto │ │ └── lambda_invocation │ │ │ └── inline_invocation.textproto │ └── README.md ├── data │ ├── smc.yaml │ └── committers.yaml ├── requirements.txt └── README.md ├── buf.work.yaml ├── proto ├── buf.lock ├── buf.yaml └── substrait │ ├── capabilities.proto │ └── extended_expression.proto ├── .gitignore ├── .github ├── CODEOWNERS ├── dependabot.yml ├── workflows │ ├── licence_check.yml │ ├── stale.yml │ ├── pr_breaking.yml │ ├── site.yml │ ├── release.yml │ └── pr_title.yml └── pull_request_template.md ├── .licenserc.yaml ├── requirements.txt ├── ci └── release │ ├── verify.sh │ ├── prepare.sh │ ├── publish.sh │ ├── run.sh │ └── dry_run.sh ├── .gitattributes ├── grammar ├── prepend_license.sh ├── README.md ├── Makefile ├── SubstraitLexer.g4 └── FuncTestCaseLexer.g4 ├── .yamllint.yaml ├── extensions ├── extension_types.yaml ├── type_variations.yaml ├── functions_aggregate_approx.yaml ├── functions_set.yaml ├── functions_aggregate_generic.yaml ├── functions_list.yaml ├── unknown.yaml └── functions_aggregate_decimal_output.yaml ├── go.mod ├── .flake8 ├── dialects └── tests │ ├── expressions_test.yaml │ ├── relations_test.yaml │ ├── types_test.yaml │ └── functions_test.yaml ├── README.md ├── .editorconfig ├── buf.gen.yaml ├── pyproject.toml ├── CITATION.cff ├── core.go ├── .pre-commit-config.yaml ├── go.sum ├── .releaserc.json ├── core_test.go └── CONTRIBUTING.md /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.python-version: -------------------------------------------------------------------------------- 1 | 3.13 -------------------------------------------------------------------------------- /tests/coverage/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /site/.gitignore: -------------------------------------------------------------------------------- 1 | venv 2 | site 3 | -------------------------------------------------------------------------------- /site/docs/extensions/.gitignore: -------------------------------------------------------------------------------- 1 | *.md 2 | !index.md 3 | -------------------------------------------------------------------------------- /buf.work.yaml: -------------------------------------------------------------------------------- 1 | version: v1 2 | directories: 3 | - proto 4 | -------------------------------------------------------------------------------- /proto/buf.lock: -------------------------------------------------------------------------------- 1 | # Generated by buf. DO NOT EDIT. 2 | version: v1 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | **/target 2 | **/.gradle 3 | **/.idea 4 | **/build 5 | gen 6 | -------------------------------------------------------------------------------- /site/docs/relations/user_defined_relations.md: -------------------------------------------------------------------------------- 1 | # User Defined Relations 2 | 3 | Pending 4 | 5 | -------------------------------------------------------------------------------- /site/docs/serialization/_config: -------------------------------------------------------------------------------- 1 | arrange: 2 | 3 | - basics.md 4 | - binary_serialization.md 5 | - text_serialization.md 6 | -------------------------------------------------------------------------------- /site/docs/tools/_config: -------------------------------------------------------------------------------- 1 | arrange: 2 | - producer_tools.md 3 | - substrait_validator.md 4 | - third_party_tools.md 5 | -------------------------------------------------------------------------------- /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | * @jacques-n @cpcloud @westonpace @epsilonprime @vbarua @yongchul 4 | -------------------------------------------------------------------------------- /site/docs/spec/_config: -------------------------------------------------------------------------------- 1 | arrange: 2 | - versioning.md 3 | - specification.md 4 | - technology_principles.md 5 | - extending.md 6 | -------------------------------------------------------------------------------- /.licenserc.yaml: -------------------------------------------------------------------------------- 1 | header: 2 | license: 3 | spdx-id: Apache-2.0 4 | 5 | paths: 6 | - 'proto/substrait/**' 7 | 8 | comment: never -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | antlr4-python3-runtime==4.13.2 2 | black==24.8.0 3 | flake8==7.0.0 4 | protobuf==6.33.1 5 | pytest==8.3.4 6 | pyyaml==6.0.2 7 | -------------------------------------------------------------------------------- /ci/release/verify.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # shellcheck shell=bash 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | set -euo pipefail 6 | 7 | buf lint 8 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | proto/buf.lock linguist-generated=true 2 | tests/coverage/antlr_parser/** linguist-generated=true 3 | tests/type/antlr_parser/** linguist-generated=true 4 | -------------------------------------------------------------------------------- /site/docs/stylesheets/extra.css: -------------------------------------------------------------------------------- 1 | img.bordered { 2 | height: auto; 3 | width: auto; 4 | border: 1px solid #9f9f9f; 5 | transition: transform ease-in-out 0.3s; 6 | } -------------------------------------------------------------------------------- /site/docs/types/_config: -------------------------------------------------------------------------------- 1 | arrange: 2 | - type_system.md 3 | - type_classes.md 4 | - type_variations.md 5 | - type_parsing.md 6 | - named_structs.md 7 | - type_aliases.md 8 | -------------------------------------------------------------------------------- /grammar/prepend_license.sh: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | for f in $1/*.py; do 4 | echo '# SPDX-License-Identifier: Apache-2.0' | cat - $f > temp && mv temp $f 5 | done -------------------------------------------------------------------------------- /site/docs/relations/_config: -------------------------------------------------------------------------------- 1 | arrange: 2 | - basics.md 3 | - common_fields.md 4 | - logical_relations.md 5 | - physical_relations.md 6 | - user_defined_relations.md 7 | - embedded_relations.md 8 | -------------------------------------------------------------------------------- /ci/release/prepare.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # shellcheck shell=bash 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | set -euo pipefail 6 | 7 | # build artifacts 8 | buf build 9 | buf generate 10 | -------------------------------------------------------------------------------- /.yamllint.yaml: -------------------------------------------------------------------------------- 1 | rules: 2 | line-length: 3 | max: 120 4 | brackets: 5 | forbid: false 6 | min-spaces-inside: 0 7 | max-spaces-inside: 1 8 | min-spaces-inside-empty: 0 9 | max-spaces-inside-empty: 0 10 | -------------------------------------------------------------------------------- /proto/buf.yaml: -------------------------------------------------------------------------------- 1 | version: v1 2 | name: buf.build/substrait/substrait 3 | lint: 4 | use: 5 | - DEFAULT 6 | ignore_only: 7 | PACKAGE_VERSION_SUFFIX: 8 | - substrait 9 | breaking: 10 | use: 11 | - FILE 12 | -------------------------------------------------------------------------------- /site/examples/types/point_with_nstruct.yaml: -------------------------------------------------------------------------------- 1 | # Alternative way to define point structure using NSTRUCT syntax 2 | urn: extension:example:point_nstruct 3 | types: 4 | - name: point 5 | structure: "NSTRUCT" 6 | -------------------------------------------------------------------------------- /site/examples/types/point_with_structure.yaml: -------------------------------------------------------------------------------- 1 | # User-defined point type with structure information 2 | urn: extension:example:point_with_structure 3 | types: 4 | - name: point 5 | structure: 6 | longitude: i32 7 | latitude: i32 8 | -------------------------------------------------------------------------------- /site/examples/types/union_variadic.yaml: -------------------------------------------------------------------------------- 1 | # Union type with variadic parameter (one or more types) 2 | urn: extension:example:union_variadic 3 | types: 4 | - name: union 5 | parameters: 6 | - name: T 7 | type: dataType 8 | variadic: true 9 | -------------------------------------------------------------------------------- /site/docs/_config: -------------------------------------------------------------------------------- 1 | arrange: 2 | - index.md 3 | - spec 4 | - types 5 | - expressions 6 | - relations 7 | - serialization 8 | - extensions 9 | - community 10 | - governance.md 11 | - about.md 12 | - tools 13 | - tutorial 14 | - faq.md 15 | -------------------------------------------------------------------------------- /site/docs/tools/substrait_validator.md: -------------------------------------------------------------------------------- 1 | # Substrait Validator 2 | 3 | The [Substrait Validator](https://github.com/substrait-io/substrait-validator) is a tool 4 | used to validate substrait plans as well as print diagnostics information regarding the plan validity. 5 | -------------------------------------------------------------------------------- /ci/release/publish.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # shellcheck shell=bash 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | set -euo pipefail 6 | 7 | cd "$(git rev-parse --show-toplevel)"/proto || exit 1 8 | 9 | buf push --tag "v${1}" --tag "$(git rev-parse HEAD)" 10 | -------------------------------------------------------------------------------- /extensions/extension_types.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | urn: extension:io.substrait:extension_types 3 | types: 4 | - name: point 5 | structure: 6 | latitude: i32 7 | longitude: i32 8 | - name: line 9 | structure: 10 | start: point 11 | end: point 12 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: "github-actions" 4 | directory: "/" 5 | schedule: 6 | interval: "daily" 7 | 8 | - package-ecosystem: "pip" 9 | directory: "/site" 10 | schedule: 11 | interval: "daily" 12 | -------------------------------------------------------------------------------- /tests/cases/rounding_decimal/floor.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_rounding_decimal.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | floor(2.25::dec<3,2>) = 2::dec<2,0> 6 | floor(-65.5::dec<3,1>) = -66::dec<3,0> 7 | -------------------------------------------------------------------------------- /site/docs/expressions/table_functions.md: -------------------------------------------------------------------------------- 1 | # Table Functions 2 | 3 | Table functions produce zero or more records for each input record. Table functions use a signature similar to scalar functions. However, they are not allowed in the same contexts. 4 | 5 | 6 | 7 | to be completed... 8 | 9 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/substrait-io/substrait 2 | 3 | go 1.22.0 4 | 5 | require github.com/stretchr/testify v1.9.0 6 | 7 | require ( 8 | github.com/davecgh/go-spew v1.1.1 // indirect 9 | github.com/pmezard/go-difflib v1.0.0 // indirect 10 | gopkg.in/yaml.v3 v3.0.1 // indirect 11 | ) 12 | -------------------------------------------------------------------------------- /site/examples/types/point_with_two_params.yaml: -------------------------------------------------------------------------------- 1 | # Compound user-defined type with two data type parameters 2 | urn: extension:example:point_two_params 3 | types: 4 | - name: point 5 | parameters: 6 | - name: LONG 7 | type: dataType 8 | - name: LAT 9 | type: dataType 10 | -------------------------------------------------------------------------------- /tests/cases/comparison/is_true.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_comparison.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | is_true(true::bool) = true::bool 6 | is_true(false::bool) = false::bool 7 | is_true(null::bool) = false::bool 8 | -------------------------------------------------------------------------------- /site/docs/tools/producer_tools.md: -------------------------------------------------------------------------------- 1 | # Producer Tools 2 | 3 | ## Isthmus 4 | 5 | [Isthmus](https://github.com/substrait-io/substrait-java/tree/main/isthmus) is an application 6 | that serializes SQL to [Substrait Protobuf](https://substrait.io/serialization/binary_serialization/) 7 | via the Calcite SQL compiler. 8 | -------------------------------------------------------------------------------- /tests/cases/comparison/is_false.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_comparison.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | is_false(true::bool) = false::bool 6 | is_false(false::bool) = true::bool 7 | is_false(null::bool) = false::bool 8 | -------------------------------------------------------------------------------- /site/examples/types/tuple_optional_variadic.yaml: -------------------------------------------------------------------------------- 1 | # Tuple type with optional variadic parameter (zero or more types) 2 | urn: extension:example:tuple_variadic 3 | types: 4 | - name: tuple 5 | parameters: 6 | - name: T 7 | type: dataType 8 | optional: true 9 | variadic: true 10 | -------------------------------------------------------------------------------- /tests/cases/rounding/ceil.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_rounding.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | ceil(2.25::fp32) = 3::fp32 6 | ceil(2.0000007152557373046875::fp64) = 3::fp64 7 | ceil(-65.500000001223334444::fp64) = -65::fp64 8 | -------------------------------------------------------------------------------- /site/examples/extensions/any_type_function.yaml: -------------------------------------------------------------------------------- 1 | # Example showing the 'any' type - arguments can be of any type 2 | urn: extension:example:any_type 3 | scalar_functions: 4 | - name: foo 5 | impls: 6 | - args: 7 | - name: a 8 | value: any 9 | - name: b 10 | value: any 11 | return: int64 12 | -------------------------------------------------------------------------------- /tests/cases/comparison/is_not_true.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_comparison.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | is_not_true(true::bool) = false::bool 6 | is_not_true(false::bool) = true::bool 7 | is_not_true(null::bool) = true::bool 8 | -------------------------------------------------------------------------------- /tests/cases/rounding/floor.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_rounding.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | floor(2.25::fp32) = 2::fp32 6 | floor(2.0000007152557373046875::fp64) = 2::fp64 7 | floor(-65.490000001223334444::fp64) = -66::fp64 8 | -------------------------------------------------------------------------------- /tests/cases/rounding_decimal/ceil.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_rounding_decimal.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | ceil(2.25::dec<3,2>) = 3::dec<2,0> 6 | ceil(-65.5::dec<3,1>) = -65::dec<3,0> 7 | ceil(9.9::dec<2,1>) = 10::dec<2,0> 8 | -------------------------------------------------------------------------------- /site/docs/expressions/_config: -------------------------------------------------------------------------------- 1 | arrange: 2 | - field_references.md 3 | - scalar_functions.md 4 | - aggregate_functions.md 5 | - specialized_record_expressions.md 6 | - window_functions.md 7 | - table_functions.md 8 | - user_defined_functions.md 9 | - embedded_functions.md 10 | - dynamic_parameters.md 11 | -------------------------------------------------------------------------------- /tests/cases/comparison/is_not_false.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_comparison.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | is_not_false(true::bool) = true::bool 6 | is_not_false(false::bool) = false::bool 7 | is_not_false(null::bool) = true::bool 8 | -------------------------------------------------------------------------------- /site/examples/extensions/any1_type_function.yaml: -------------------------------------------------------------------------------- 1 | # Example showing the 'any1' type - arguments must be of the same type 2 | urn: extension:example:any1_type 3 | scalar_functions: 4 | - name: bar 5 | impls: 6 | - args: 7 | - name: a 8 | value: any1 9 | - name: b 10 | value: any1 11 | return: int64 12 | -------------------------------------------------------------------------------- /site/examples/types/point_with_enum_param.yaml: -------------------------------------------------------------------------------- 1 | # Compound user-defined type with an enumeration parameter 2 | urn: extension:example:point_enum_param 3 | types: 4 | - name: point 5 | parameters: 6 | - name: coordinate_type 7 | type: enumeration 8 | options: 9 | - integer 10 | - double 11 | -------------------------------------------------------------------------------- /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | ignore = E203, E266, E501, W503, F403, F401 3 | max-line-length = 88 4 | select = B,C,E,F,W,T4,B9 5 | exclude = 6 | # exclude generated test parser 7 | tests/coverage/antlr_parser/*.py, 8 | # exclude generated type parser 9 | tests/type/antlr_parser/*.py, 10 | # exclude generated files 11 | gen/ -------------------------------------------------------------------------------- /tests/cases/boolean/not.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_boolean.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | not(true::bool) = false::bool 6 | not(false::bool) = true::bool 7 | 8 | # null_input: Examples with null as input 9 | not(null::bool) = null::bool 10 | -------------------------------------------------------------------------------- /site/data/smc.yaml: -------------------------------------------------------------------------------- 1 | - Name: Phillip Cloud 2 | Association: Voltron Data 3 | - Name: Weston Pace 4 | Association: LanceDB 5 | - Name: Jacques Nadeau 6 | Association: Sundeck 7 | - Name: Victor Barua 8 | Association: Datadog 9 | - Name: David Sisson 10 | Association: Voltron Data 11 | - Name: Yongchul Kwon 12 | Association: Microsoft 13 | -------------------------------------------------------------------------------- /site/docs/relations/embedded_relations.md: -------------------------------------------------------------------------------- 1 | # Embedded Relations 2 | 3 | Pending. 4 | 5 | Embedded relations allow a Substrait producer to define a set operation that will be embedded in the plan. 6 | 7 | TODO: define lots of details about what interfaces, languages, formats, etc. Should reasonably be an extension of embedded user defined table functions. 8 | -------------------------------------------------------------------------------- /tests/cases/arithmetic/acos.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | acos(0.00::fp32) = 1.5707963267948966::fp32 6 | acos(1.0::fp64) = 0.0::fp64 7 | acos(-0.0000009::fp64) = 1.5707972267948966::fp64 8 | acos(null::fp64) = null::fp64 9 | -------------------------------------------------------------------------------- /site/examples/types/vector_with_constraints.yaml: -------------------------------------------------------------------------------- 1 | # Vector type with integer parameter constrained to 2 or 3 dimensions 2 | urn: extension:example:vector_constrained 3 | types: 4 | - name: vector 5 | parameters: 6 | - name: T 7 | type: dataType 8 | - name: dimensions 9 | type: integer 10 | min: 2 11 | max: 3 12 | -------------------------------------------------------------------------------- /.github/workflows/licence_check.yml: -------------------------------------------------------------------------------- 1 | name: License check 2 | 3 | on: pull_request 4 | 5 | jobs: 6 | license: 7 | runs-on: ubuntu-latest 8 | steps: 9 | - uses: actions/checkout@v6 10 | 11 | - name: Check License Header 12 | uses: enarx/spdx@master 13 | with: 14 | licenses: |- 15 | Apache-2.0 16 | MIT 17 | -------------------------------------------------------------------------------- /dialects/tests/expressions_test.yaml: -------------------------------------------------------------------------------- 1 | name: "dialect test file for expressions" 2 | 3 | supported_expressions: 4 | - LITERAL 5 | - SELECTION 6 | - SCALAR_FUNCTION 7 | - expression: CAST 8 | failure_options: [RETURN_NULL] 9 | - expression: SUBQUERY 10 | subquery_types: [SCALAR] 11 | - expression: NESTED 12 | nested_types: 13 | - LIST 14 | -------------------------------------------------------------------------------- /tests/cases/arithmetic/asinh.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | asinh(0.01::fp32) = 0.009999833340832886::fp32 6 | asinh(1.0::fp64) = 0.881373587019543::fp64 7 | asinh(0.0009::fp64) = 0.0008999998785000443::fp64 8 | asinh(null::fp64) = null::fp64 9 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Substrait 2 | 3 | Substrait is a new project focused on producing an independent description of data compute operations. It is composed primarily of: 4 | 5 | 1. A formal specification 6 | 2. A human readable text representation 7 | 3. A compact cross-language binary representation 8 | 9 | For more details, please go to [substrait.io](https://substrait.io) 10 | 11 | -------------------------------------------------------------------------------- /site/examples/types/point_with_datatype_param.yaml: -------------------------------------------------------------------------------- 1 | # Compound user-defined type with a data type parameter 2 | urn: extension:example:point_parameterized 3 | types: 4 | - name: point 5 | parameters: 6 | - name: T 7 | description: | 8 | The type used for the longitude and latitude 9 | components of the point. 10 | type: dataType 11 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | charset = utf-8 5 | end_of_line = lf 6 | insert_final_newline = true 7 | indent_style = space 8 | trim_trailing_whitespace = true 9 | 10 | [site/**] 11 | charset = unset 12 | end_of_line = unset 13 | insert_final_newline = unset 14 | indent_style = unset 15 | trim_trailing_whitespace = unset 16 | 17 | [*.{proto,yaml,yml}] 18 | indent_size = 2 19 | -------------------------------------------------------------------------------- /site/examples/extensions/distance_functions.yaml: -------------------------------------------------------------------------------- 1 | urn: extension:example:distance_functions 2 | dependencies: 3 | ext: extension:io.substrait:extension_types 4 | scalar_functions: 5 | - name: distance 6 | description: The distance between two points. 7 | impls: 8 | - args: 9 | - name: a 10 | value: ext.point 11 | - name: b 12 | value: ext.point 13 | return: f64 14 | -------------------------------------------------------------------------------- /tests/cases/comparison/is_null.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_comparison.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | is_null(25::i16) = false::bool 6 | is_null(false::bool) = false::bool 7 | is_null(7.823::dec<38, 3>) = false::bool 8 | is_null(null::i16) = true::bool 9 | is_null(null::dec<38, 3>) = true::bool 10 | -------------------------------------------------------------------------------- /tests/cases/arithmetic/atan2.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | atan2(0.0::fp32, 0.0::fp32) = 0.0::fp32 6 | atan2(1.0::fp64, 1.0::fp64) = 0.7853981633974483::fp64 7 | atan2(0.009::fp64, 0.0008::fp64) = 1.482140444927459::fp64 8 | atan2(null::fp64, 0.0008::fp64) = null::fp64 9 | -------------------------------------------------------------------------------- /tests/cases/arithmetic/atanh.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | atanh(0.0::fp32) = 0.0::fp32 6 | atanh(1.0::fp64) = inf::fp64 7 | atanh(0.009::fp64) = 0.009000243011810481::fp64 8 | atanh(-0.009::fp64) = -0.009000243011810481::fp64 9 | atanh(null::fp64) = null::fp64 10 | -------------------------------------------------------------------------------- /tests/cases/arithmetic/tan.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | tan(0.0::fp32) = 0.0::fp32 6 | tan(0.5::fp64) = 0.5463024898437905::fp64 7 | tan(7.01::fp64) = 0.8891974677731088::fp64 8 | tan(-7.01::fp64) = -0.8891974677731088::fp64 9 | tan(null::fp64) = null::fp64 10 | -------------------------------------------------------------------------------- /tests/cases/arithmetic/atan.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | atan(0.0::fp32) = 0.0::fp32 6 | atan(1.0::fp64) = 0.7853981633974483::fp64 7 | atan(7.01::fp64) = 1.4290989925795292::fp64 8 | atan(-7.01::fp64) = -1.4290989925795292::fp64 9 | atan(null::fp64) = null::fp64 10 | -------------------------------------------------------------------------------- /tests/cases/arithmetic/asin.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | asin(0.0::fp32) = 0.0::fp32 6 | asin(1.0::fp64) = 1.5707963267948966::fp64 7 | asin(0.009::fp64) = 0.009000121504428887::fp64 8 | asin(-0.009::fp64) = -0.009000121504428887::fp64 9 | asin(null::fp64) = null::fp64 10 | -------------------------------------------------------------------------------- /tests/cases/arithmetic/cos.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | cos(0.00::fp32) = 1.0::fp32 6 | cos(1.0::fp64) = 0.5403023058681398::fp64 7 | cos(7.0000009::fp64) = 0.7539016630550606::fp64 8 | cos(-7.00000095::fp64) = 0.7539016302056953::fp64 9 | cos(null::fp64) = null::fp64 10 | -------------------------------------------------------------------------------- /tests/cases/arithmetic/sin.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | sin(0.0::fp32) = 0.0::fp32 6 | sin(1.0::fp64) = 0.8414709848078965::fp64 7 | sin(7.0000009::fp64) = 0.6569872772305518::fp64 8 | sin(-7.0000009::fp64) = -0.6569872772305518::fp64 9 | sin(null::fp64) = null::fp64 10 | -------------------------------------------------------------------------------- /tests/cases/arithmetic/sinh.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | sinh(0.0::fp32) = 0.0::fp32 6 | sinh(1.0::fp64) = 1.1752011936438014::fp64 7 | sinh(7.0000009::fp64) = 548.3166167588001::fp64 8 | sinh(-7.0000009::fp64) = -548.3166167588001::fp64 9 | sinh(null::fp64) = null::fp64 10 | -------------------------------------------------------------------------------- /tests/cases/arithmetic/cosh.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | cosh(0.00::fp32) = 1.0::fp32 6 | cosh(1.0::fp64) = 1.5430806348152437::fp64 7 | cosh(7.0000009::fp64) = 548.3175286399451::fp64 8 | cosh(-7.00000095::fp64) = 548.3175560557769::fp64 9 | cosh(null::fp64) = null::fp64 10 | -------------------------------------------------------------------------------- /tests/cases/arithmetic/tanh.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | tanh(0.0::fp32) = 0.0::fp32 6 | tanh(1.0::fp64) = 0.7615941559557649::fp64 7 | tanh(7.0000009::fp64) = 0.9999983369469382::fp64 8 | tanh(-7.0000009::fp64) = -0.9999983369469382::fp64 9 | tanh(null::fp64) = null::fp64 10 | -------------------------------------------------------------------------------- /site/requirements.txt: -------------------------------------------------------------------------------- 1 | mkdocs>=1.4.2,<2 2 | mkdocs-material>=9.1.5 3 | mkdocs-minify-plugin>=0.6.1,<1 4 | mkdocs-redirects>=1.2.0,<2 5 | pymdown-extensions>=9.9.1,<11 6 | mkdocs-awesome-pages-plugin>=2.8.0,<3 7 | mkdocs-gen-files>=0.4.0,<1 8 | mkdocs-markdownextradata-plugin>=0.2.5,<1 9 | mkdocs-protobuf>=0.1.0,<1 10 | mkdocs-table-reader-plugin>=2,<4 11 | pygments>=2.14,<3 12 | oyaml>=1.0,<2 13 | mdutils>=1.4.0,<2 14 | -------------------------------------------------------------------------------- /tests/cases/aggregate_generic/count.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_AGGREGATE_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_aggregate_generic.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | count((100, -200, 300, -400, 5, 6)::i16) = 6::i64 6 | count((1000)::i16) = 1::i64 7 | count(()::i16) = 0::i64 8 | count((Null, Null, Null)::i16) = 0::i64 9 | count((Null, Null, Null, 1000)::i16) = 1::i64 10 | -------------------------------------------------------------------------------- /tests/cases/comparison/is_finite.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_comparison.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | is_finite(0.0::fp32) = true::bool 6 | is_finite(0.55::fp32) = true::bool 7 | is_finite(1000.000000000001::fp64) = true::bool 8 | is_finite(-inf::fp64) = false::bool 9 | is_finite(inf::fp64) = false::bool 10 | is_finite(null::fp64) = null::bool 11 | -------------------------------------------------------------------------------- /tests/baseline.json: -------------------------------------------------------------------------------- 1 | { 2 | "registry": { 3 | "extension_count": 15, 4 | "dependency_count": 15, 5 | "function_count": 170, 6 | "num_aggregate_functions": 29, 7 | "num_scalar_functions": 166, 8 | "num_window_functions": 11, 9 | "num_function_overloads": 529 10 | }, 11 | "coverage": { 12 | "total_test_count": 1136, 13 | "num_function_variants": 529, 14 | "num_covered_function_variants": 241 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /tests/cases/comparison/is_not_null.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_comparison.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | is_not_null(25::i16) = true::bool 6 | is_not_null(true::bool) = true::bool 7 | is_not_null(7.25::fp32) = true::bool 8 | is_not_null(7.25::dec<38, 3>) = true::bool 9 | is_not_null(null::i8) = false::bool 10 | is_not_null(null::dec<38, 3>) = false::bool 11 | -------------------------------------------------------------------------------- /tests/cases/comparison/is_infinite.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_comparison.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | is_infinite(0.0::fp32) = false::bool 6 | is_infinite(0.55::fp32) = false::bool 7 | is_infinite(1000.000000000001::fp64) = false::bool 8 | is_infinite(-inf::fp64) = true::bool 9 | is_infinite(inf::fp64) = true::bool 10 | is_infinite(null::fp64) = null::bool 11 | -------------------------------------------------------------------------------- /tests/cases/arithmetic/factorial.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | factorial(0::i32) = 1::i32 6 | factorial(1::i32) = 1::i32 7 | factorial(20::i64) = 2432902008176640000::i64 8 | factorial(null::i32) = null::i32 9 | 10 | # overflow: Examples demonstrating overflow behavior 11 | factorial(1000000::i32) [overflow:ERROR] = 12 | -------------------------------------------------------------------------------- /tests/cases/comparison/is_nan.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_comparison.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | is_nan(0.0::fp32) = false::bool 6 | is_nan(0.55::fp32) = false::bool 7 | is_nan(1000.000000000001::fp64) = false::bool 8 | is_nan(-inf::fp64) = false::bool 9 | is_nan(inf::fp64) = false::bool 10 | is_nan(null::fp64) = null::bool 11 | is_nan(nan::fp64) = true::bool 12 | -------------------------------------------------------------------------------- /dialects/tests/relations_test.yaml: -------------------------------------------------------------------------------- 1 | name: "dialect test file for relations" 2 | 3 | supported_relations: 4 | - AGGREGATE 5 | - FETCH 6 | - FILTER 7 | - PROJECT 8 | - SORT 9 | - relation: JOIN 10 | join_types: 11 | - INNER 12 | - LEFT 13 | - RIGHT 14 | - relation: READ 15 | read_types: [ICEBERG_TABLE, VIRTUAL_TABLE] 16 | - relation: EXTENSION_LEAF 17 | message_types: 18 | - type.googleapis.com/google.profile.Person 19 | 20 | -------------------------------------------------------------------------------- /site/examples/types/user_defined_point.yaml: -------------------------------------------------------------------------------- 1 | # User-defined type example: a point type with two scalar functions 2 | urn: extension:example:point_type 3 | types: 4 | - name: "point" 5 | 6 | scalar_functions: 7 | - name: "lat" 8 | impls: 9 | - args: 10 | - name: p 11 | value: u!point 12 | return: fp64 13 | - name: "lon" 14 | impls: 15 | - args: 16 | - name: p 17 | value: u!point 18 | return: fp64 19 | -------------------------------------------------------------------------------- /dialects/tests/types_test.yaml: -------------------------------------------------------------------------------- 1 | name: "dialect test file for types" 2 | 3 | dependencies: 4 | geo: extension:io.substrait:functions_geometry 5 | 6 | supported_types: 7 | - I8 8 | - I16 9 | - type: I32 10 | system_metadata: 11 | name: INTEGER 12 | supported_as_column: true 13 | - type: I64 14 | system_metadata: 15 | name: BIGINT 16 | supported_as_column: true 17 | - type: USER_DEFINED 18 | source: geo 19 | name: geometry 20 | 21 | -------------------------------------------------------------------------------- /buf.gen.yaml: -------------------------------------------------------------------------------- 1 | version: v1 2 | plugins: 3 | - plugin: buf.build/protocolbuffers/cpp:v23.0 4 | out: gen/proto/cpp 5 | - plugin: buf.build/protocolbuffers/csharp:v23.0 6 | out: gen/proto/csharp 7 | - plugin: buf.build/protocolbuffers/java:v23.0 8 | out: gen/proto/java 9 | - plugin: buf.build/protocolbuffers/python:v23.0 10 | out: gen/proto/python 11 | - plugin: buf.build/protocolbuffers/go:v1.30.0 12 | out: gen/proto/go 13 | opt: 14 | - paths=source_relative 15 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.black] 2 | # exclude filters out files found by Black itself during discovery 3 | exclude = ''' 4 | ( 5 | .*/antlr_parser/.*\.py 6 | | gen/.* 7 | ) 8 | ''' 9 | # pre-commit passes files into Black, rather than letting it discover files 10 | # force-exclude can be used to filter out these files from formatting 11 | force-exclude = ''' 12 | ( 13 | .*/antlr_parser/.*\.py 14 | | gen/.* 15 | ) 16 | ''' 17 | 18 | [tool.pytest.ini_options] 19 | pythonpath = ["gen/proto/python"] -------------------------------------------------------------------------------- /tests/cases/arithmetic/exp.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | exp(100::i64) = 2.6881171418161356e+43::fp64 6 | exp(0.25::fp32) = 1.2840254166877414::fp32 7 | exp(0.693::fp64) = 1.9997056605411638::fp64 8 | exp(2.0000007152557373046875::fp64) = 7.3890613839973085::fp64 9 | exp(0.0::fp64) = 1.0::fp64 10 | exp(null::fp64) = null::fp64 11 | exp(1000::i64) = inf::fp64 12 | -------------------------------------------------------------------------------- /tests/cases/arithmetic/power.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | power(8::i64, 2::i64) = 64::i64 6 | power(1.0::fp32, -1.0::fp32) = 1.0::fp32 7 | power(2.0::fp64, -2.0::fp64) = 0.25::fp64 8 | power(13::i64, 10::i64) = 137858491849::i64 9 | 10 | # floating_exception: Examples demonstrating exceptional floating point cases 11 | power(1.5e+100::fp64, 1.5e+208::fp64) = inf::fp64 12 | -------------------------------------------------------------------------------- /tests/cases/string/repeat.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_string.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | repeat('abc'::str, 2::i64) = 'abcabc'::str 6 | repeat('aBc'::str, 0::i64) = ''::str 7 | repeat(' abd'::str, 3::i64) = ' abd abd abd'::str 8 | repeat(' '::str, 5::i64) = ' '::str 9 | repeat(''::str, 2::i64) = ''::str 10 | 11 | # null_input: Examples with null as input 12 | repeat(null::str, 2::i64) = null::str 13 | -------------------------------------------------------------------------------- /tests/cases/rounding_decimal/round.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_rounding_decimal.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | round(2.0::dec<2,1>, 2::i32) = 2::dec<3,1> 6 | round(2.75::dec<3,2>, 1::i32) = 2.8::dec<4,2> 7 | 8 | # negative_rounding: Examples with negative rounding 9 | round(2.0::dec<2,1>, -2::i32) = 0::dec<3,1> 10 | round(123::dec<3,0>, -2::i32) = 100::dec<4,0> 11 | round(8793.5::dec<5,1>, -2::i32) = 8800::dec<6,1> 12 | -------------------------------------------------------------------------------- /tests/cases/string/string_split.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_string.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | string_split('abc'::str, ' '::str) = ['abc']::list 6 | string_split('abc abc'::str, ' '::str) = ['abc', 'abc']::list 7 | string_split('bacad'::str, 'a'::str) = ['b', 'c', 'd']::list 8 | string_split('a b c d'::str, ' '::str) = ['a', 'b', 'c', 'd']::list 9 | string_split('a b c d'::str, null::str) = ['a b c d']::list 10 | -------------------------------------------------------------------------------- /ci/release/run.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # shellcheck shell=bash 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | set -euo pipefail 6 | 7 | npx --yes \ 8 | -p "semantic-release@24.1.2" \ 9 | -p "@semantic-release/commit-analyzer" \ 10 | -p "@semantic-release/release-notes-generator" \ 11 | -p "@semantic-release/changelog" \ 12 | -p "@semantic-release/github" \ 13 | -p "@semantic-release/exec" \ 14 | -p "@semantic-release/git" \ 15 | -p "conventional-changelog-conventionalcommits@8.0.0" \ 16 | semantic-release --ci 17 | -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | cff-version: 1.2.0 2 | title: >- 3 | Substrait: Cross-Language Serialization for Relational 4 | Algebra 5 | message: >- 6 | If you use this software, please cite it using the 7 | metadata from this file. 8 | type: software 9 | authors: 10 | - given-names: substrait-io 11 | identifiers: 12 | - type: url 13 | value: 'https://github.com/substrait-io/substrait' 14 | repository-code: 'https://github.com/substrait-io/substrait' 15 | url: 'https://substrait.io/' 16 | license: Apache-2.0 17 | date-released: '2021-09-01' 18 | -------------------------------------------------------------------------------- /tests/cases/string/like.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_string.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | like('abcdefg'::str, 'abcdefg'::str) = true::bool 6 | like('abcdefg'::str, 'abc'::str) = false::bool 7 | 8 | # wildcard: Examples using wildcards 9 | like('abcdefg'::str, 'abc%'::str) = true::bool 10 | like('abcdefg'::str, '%efg'::str) = true::bool 11 | like('abcdefg'::str, '_bcdefg'::str) = true::bool 12 | like('abcdefg'::str, 'abc_efg'::str) = true::bool 13 | -------------------------------------------------------------------------------- /tests/cases/arithmetic/acosh.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | acosh(1.0::fp64) = 0.0::fp64 6 | acosh(10.0005::fp64) = 2.9932730967481995::fp64 7 | acosh(null::fp64) = null::fp64 8 | 9 | # On_domain_error: Examples demonstrating On_domain_error behavior 10 | acosh(0.01::fp32) [on_domain_error:ERROR] = 11 | acosh(0.5::fp64) [on_domain_error:NAN] = nan::fp64 12 | acosh(0.5::fp32) [on_domain_error:NONE] = null::fp32 13 | -------------------------------------------------------------------------------- /tests/cases/string/lower.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_string.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | lower('ABC'::str) = 'abc'::str 6 | lower('aBc'::str) = 'abc'::str 7 | lower('abc'::str) = 'abc'::str 8 | lower(''::str) = ''::str 9 | 10 | # null_input: Examples with null as input 11 | lower(null::str) = null::str 12 | 13 | # unicode: Examples with unicode characters as input 14 | lower('ÆÆÃÃA'::str) [full_unicode:TRUE] = 'ææããa'::str 15 | lower('😄'::str) = '😄'::str 16 | -------------------------------------------------------------------------------- /tests/cases/string/upper.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_string.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | upper('abc'::str) = 'ABC'::str 6 | upper('aBc'::str) = 'ABC'::str 7 | upper('ABC'::str) = 'ABC'::str 8 | upper(''::str) = ''::str 9 | 10 | # null_input: Examples with null as input 11 | upper(null::str) = null::str 12 | 13 | # unicode: Examples with unicode characters as input 14 | upper('ææããa'::str) [full_unicode:TRUE] = 'ÆÆÃÃA'::str 15 | upper('😄'::str) = '😄'::str 16 | -------------------------------------------------------------------------------- /tests/cases/arithmetic/sqrt.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | sqrt(25::i64) = 5::fp64 6 | sqrt(0::i64) = 0::fp64 7 | sqrt(-1::i64) [on_domain_error:NAN] = null::fp64 8 | sqrt(-9223372036854775800::i64) [on_domain_error:NAN] = null::fp64 9 | sqrt(9223372036854775800::i64) = 3037000499.97605::fp64 10 | sqrt(null::i64) = null::fp64 11 | sqrt(6.25::fp32) = 2.5::fp32 12 | sqrt(2.0000007152557373046875::fp64) = 1.4142138152541635::fp64 13 | -------------------------------------------------------------------------------- /tests/cases/string/reverse.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_string.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | reverse('abc'::str) = 'cba'::str 6 | reverse('aBc'::str) = 'cBa'::str 7 | reverse(' 123'::str) = '321 '::str 8 | reverse(''::str) = ''::str 9 | 10 | # null_input: Examples with null as input 11 | reverse(null::str) = null::str 12 | 13 | # unicode: Examples with unicode characters as input 14 | reverse('ææããa'::str) = 'aããææ'::str 15 | reverse('😔😄'::str) = '😄😔'::str 16 | -------------------------------------------------------------------------------- /grammar/README.md: -------------------------------------------------------------------------------- 1 | # Grammar 2 | This file defines the grammars for: 3 | 1. The Substrait Type language used in the YAML extensions. 4 | 2. The test grammar language used to unit tests functions. 5 | 6 | ## Regenerating 7 | To regenerate all of the parsers use the following command 8 | ```sh 9 | make all 10 | ``` 11 | 12 | ### Requirements 13 | You will need [ANTLR](https://www.antlr.org/index.html) available on your machine to regenerate the parser. 14 | 15 | #### MacOS 16 | ``` 17 | brew install antlr 18 | ``` 19 | 20 | #### Ubuntu 21 | ``` 22 | sudo apt-get install antlr4 23 | ``` -------------------------------------------------------------------------------- /tests/cases/boolean/bool_or.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_AGGREGATE_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_boolean.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | bool_or((true, true)::bool) = true::bool 6 | bool_or((false, false)::bool) = false::bool 7 | bool_or((true, false)::bool) = true::bool 8 | bool_or((false)::bool) = false::bool 9 | bool_or((true)::bool) = true::bool 10 | bool_or((true, null)::bool) = true::bool 11 | bool_or((null, null)::bool) = null::bool 12 | bool_or((false, null)::bool) = false::bool 13 | bool_or(()::bool) = null::bool 14 | -------------------------------------------------------------------------------- /tests/cases/datetime/add_intervals.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_datetime.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | add_intervals('PT10H'::iday, 'PT5H'::iday) = 'P0DT15H0M0S'::iday 6 | add_intervals('P10D'::iday, 'P5D'::iday) = 'P15D'::iday 7 | add_intervals('P1D'::iday, 'PT10H'::iday) = 'P1DT10H0M0S'::iday 8 | 9 | # null_input: Basic examples where the input args or return is null 10 | add_intervals(null::iyear, 'P1Y'::iyear) = null::iyear 11 | add_intervals(null::iday, 'P1D'::iday) = null::iday 12 | -------------------------------------------------------------------------------- /tests/cases/rounding/round.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_rounding.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | round(2::i8, 2::i32) = 2::i8 6 | round(2.75::fp32, 1::i32) = 2.8::fp32 7 | round(2.0000007152457373046875::fp64, 10::i32) = 2.0000007152::fp64 8 | round(2.0000007152457373046875::fp64, 10::i32) = 2.0000007152::fp64 9 | 10 | # negative_rounding: Examples with negative rounding 11 | round(2::i8, -2::i32) = 0::i8 12 | round(123::i8, -2::i32) = 100::i8 13 | round(8793::i16, -2::i32) = 8800::i16 14 | -------------------------------------------------------------------------------- /site/examples/extensions/double_function.yaml: -------------------------------------------------------------------------------- 1 | %YAML 1.2 2 | --- 3 | urn: extension:example:double_function 4 | scalar_functions: 5 | - 6 | name: "double" 7 | description: "Double the value" 8 | impls: 9 | - args: 10 | - name: x 11 | value: fp32 12 | options: 13 | on_domain_error: 14 | values: [ NAN, ERROR ] 15 | return: fp32 16 | - args: 17 | - name: x 18 | value: i32 19 | options: 20 | on_domain_error: 21 | values: [ NAN, ERROR ] 22 | return: i32 23 | -------------------------------------------------------------------------------- /tests/cases/boolean/bool_and.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_AGGREGATE_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_boolean.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | bool_and((true, true)::bool) = true::bool 6 | bool_and((true, false)::bool) = false::bool 7 | bool_and((false, false)::bool) = false::bool 8 | bool_and((false)::bool) = false::bool 9 | bool_and((true)::bool) = true::bool 10 | bool_and((true, null)::bool) = true::bool 11 | bool_and((null, null)::bool) = null::bool 12 | bool_and((false, null)::bool) = false::bool 13 | bool_and(()::bool) = null::bool 14 | -------------------------------------------------------------------------------- /tests/cases/boolean/or.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_boolean.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | or(true::bool, true::bool) = true::bool 6 | or(true::bool, false::bool) = true::bool 7 | or(false::bool, false::bool) = false::bool 8 | 9 | # null_input: Examples with null as input 10 | or(true::bool, null::bool) = true::bool 11 | or(null::bool, true::bool) = true::bool 12 | or(false::bool, null::bool) = null::bool 13 | or(null::bool, false::bool) = null::bool 14 | or(null::bool, null::bool) = null::bool 15 | -------------------------------------------------------------------------------- /tests/cases/string/concat_ws.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_string.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | concat_ws(','::str, 'Banana'::str, 'Apple'::str, 'Melon'::str) = 'Banana,Apple,Melon'::str 6 | concat_ws(''::str, 'Banana'::str, 'Apple'::str) = 'BananaApple'::str 7 | concat_ws(null::str, 'Banana'::str, 'Apple'::str, 'Melon'::str) = null::str 8 | concat_ws(','::str, null::str, 'Apple'::str, 'Melon'::str) = 'Apple,Melon'::str 9 | concat_ws(','::str, 'Apple'::str, null::str, 'Melon'::str) = 'Apple,Melon'::str 10 | -------------------------------------------------------------------------------- /tests/cases/boolean/and.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_boolean.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | and(true::bool, true::bool) = true::bool 6 | and(true::bool, false::bool) = false::bool 7 | and(false::bool, false::bool) = false::bool 8 | 9 | # null_input: Examples with null as input 10 | and(true::bool, null::bool) = null::bool 11 | and(null::bool, true::bool) = null::bool 12 | and(false::bool, null::bool) = false::bool 13 | and(null::bool, false::bool) = false::bool 14 | and(null::bool, null::bool) = null::bool 15 | -------------------------------------------------------------------------------- /tests/cases/boolean/xor.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_boolean.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | xor(true::bool, false::bool) = true::bool 6 | xor(true::bool, true::bool) = false::bool 7 | xor(false::bool, false::bool) = false::bool 8 | xor(false::bool, true::bool) = true::bool 9 | 10 | # null_input: Examples with null as input 11 | xor(true::bool, null::bool) = null::bool 12 | xor(null::bool, true::bool) = null::bool 13 | xor(false::bool, null::bool) = null::bool 14 | xor(null::bool, false::bool) = null::bool 15 | -------------------------------------------------------------------------------- /site/docs/tools/third_party_tools.md: -------------------------------------------------------------------------------- 1 | # Third Party Tools 2 | 3 | ## Substrait-tools 4 | The [substrait-tools](https://pypi.org/project/substrait-tools/) python package provides 5 | a command line interface for producing/consuming substrait plans by leveraging the APIs 6 | from different producers and consumers. 7 | 8 | ## Substrait Fiddle 9 | [Substrait Fiddle](https://substrait-fiddle.com) is an online tool to share, debug, and prototype Substrait plans. 10 | 11 | The [Substrait Fiddle Source](https://github.com/voltrondata/substrait-fiddle) is available allowing it to be run in any environment. 12 | 13 | -------------------------------------------------------------------------------- /core.go: -------------------------------------------------------------------------------- 1 | // Package substrait provides access to Substrait artifacts via embed.FS. 2 | // Use substrait.GetSubstraitFS() to retrieve the embed.FS object. 3 | package substrait 4 | 5 | import "embed" 6 | 7 | //go:embed extensions/* 8 | var substraitExtensionsFS embed.FS 9 | 10 | func GetSubstraitFS() embed.FS { 11 | return substraitExtensionsFS 12 | } 13 | 14 | func GetSubstraitExtensionsFS() embed.FS { 15 | return substraitExtensionsFS 16 | } 17 | 18 | //go:embed tests/cases/*/*.test 19 | var substraitTestsFS embed.FS 20 | 21 | func GetSubstraitTestsFS() embed.FS { 22 | return substraitTestsFS 23 | } 24 | -------------------------------------------------------------------------------- /tests/cases/string/replace.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_string.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | replace('abcabcabc'::str, 'bc'::str, 'dd'::str) = 'addaddadd'::str 6 | replace('abcabcabc'::str, ' '::str, 'dd'::str) = 'abcabcabc'::str 7 | replace('abc def ghi'::str, ' '::str, ','::str) = 'abc,def,ghi'::str 8 | 9 | # null_input: Examples with null as input 10 | replace('abcd'::str, null::str, ','::str) = null::str 11 | replace('abcd'::str, ' '::str, null::str) = null::str 12 | replace(null::str, ' '::str, ','::str) = null::str 13 | -------------------------------------------------------------------------------- /tests/cases/string/left.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_string.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | left('abcdef'::str, 2::i32) = 'ab'::str 6 | left('abcdef'::str, 6::i32) = 'abcdef'::str 7 | left('abcdef'::str, 10::i32) = 'abcdef'::str 8 | left(' abcdef abcdef'::str, 10::i32) = ' abcdef '::str 9 | left(null::str, 10::i32) = null::str 10 | left('abcdef'::str, null::i32) = null::str 11 | 12 | # unicode: Examples with unicode characters as input 13 | left('ææããa'::str, 2::i32) = 'ææ'::str 14 | left('😔😄😔😄'::str, 2::i32) = '😔😄'::str 15 | -------------------------------------------------------------------------------- /tests/cases/string/bit_length.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_string.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | bit_length('abc'::str) = 24::i64 6 | bit_length(''::str) = 0::i64 7 | bit_length(' '::str) = 8::i64 8 | bit_length('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'::str) = 384::i64 9 | bit_length(' 456'::str) = 48::i64 10 | 11 | # null_input: Examples with null as input 12 | bit_length(null::str) = null::i64 13 | 14 | # unicode: Examples with unicode characters as input 15 | bit_length('à'::str) = 16::i64 16 | bit_length('😄'::str) = 32::i64 17 | -------------------------------------------------------------------------------- /tests/cases/string/right.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_string.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | right('abcdef'::str, 2::i32) = 'ef'::str 6 | right('abcdef'::str, 6::i32) = 'abcdef'::str 7 | right('abcdef'::str, 10::i32) = 'abcdef'::str 8 | right(' abcdef abcdef'::str, 10::i32) = 'ef abcdef'::str 9 | right(null::str, 10::i32) = null::str 10 | right('abcdef'::str, null::i32) = null::str 11 | 12 | # unicode: Examples with unicode characters as input 13 | right('ææããa'::str, 2::i32) = 'ãa'::str 14 | right('😔😄😔😄'::str, 2::i32) = '😔😄'::str 15 | -------------------------------------------------------------------------------- /tests/cases/string/char_length.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_string.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | char_length('abc'::str) = 3::i64 6 | char_length(''::str) = 0::i64 7 | char_length(' '::str) = 1::i64 8 | char_length('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'::str) = 48::i64 9 | char_length(' 456'::str) = 6::i64 10 | 11 | # null_input: Examples with null as input 12 | char_length(null::str) = null::i64 13 | 14 | # unicode: Examples with unicode characters as input 15 | char_length('à'::str) = 1::i64 16 | char_length('😄'::str) = 1::i64 17 | -------------------------------------------------------------------------------- /tests/cases/string/octet_length.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_string.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | octet_length('abc'::str) = 3::i64 6 | octet_length(''::str) = 0::i64 7 | octet_length(' '::str) = 1::i64 8 | octet_length('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'::str) = 48::i64 9 | octet_length(' 456'::str) = 6::i64 10 | 11 | # null_input: Examples with null as input 12 | octet_length(null::str) = null::i64 13 | 14 | # unicode: Examples with unicode characters as input 15 | octet_length('à'::str) = 2::i64 16 | octet_length('😄'::str) = 4::i64 17 | -------------------------------------------------------------------------------- /tests/cases/arithmetic/shift_right.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | shift_right(2::i32, 1::i32) = 1::i32 6 | shift_right(1024::i32, 8::i32) = 4::i32 7 | shift_right(301989888::i64, 3::i32) = 37748736::i64 8 | shift_right(301989888::i64, 16::i32) = 4608::i64 9 | shift_right(-3::i32, 1::i32) = -2::i32 10 | shift_right(-3::i32, 2::i32) = -1::i32 11 | shift_right(-3::i64, 1::i32) = -2::i64 12 | shift_right(-3::i64, 2::i32) = -1::i64 13 | shift_right(null::i64, 2::i32) = null::i64 14 | shift_right(127::i64, null::i32) = null::i64 15 | -------------------------------------------------------------------------------- /tests/cases/arithmetic/bitwise_not.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | bitwise_not(0::i8) = -1::i8 6 | bitwise_not(1::i8) = -2::i8 7 | bitwise_not(-127::i8) = 126::i8 8 | bitwise_not(31766::i16) = -31767::i16 9 | bitwise_not(-31766::i16) = 31765::i16 10 | bitwise_not(2147483647::i32) = -2147483648::i32 11 | bitwise_not(2147483647::i32) = -2147483648::i32 12 | bitwise_not(9223372036854775807::i64) = -9223372036854775808::i64 13 | bitwise_not(-9223372036854775807::i64) = 9223372036854775806::i64 14 | bitwise_not(null::i64) = null::i64 15 | -------------------------------------------------------------------------------- /tests/cases/arithmetic/shift_left.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | shift_left(2::i32, 1::i32) = 4::i32 6 | shift_left(511::i32, 8::i32) = 130816::i32 7 | shift_left(301989888::i64, 3::i32) = 2415919104::i64 8 | shift_left(301989888::i64, 8::i32) = 19791209299968::i64 9 | shift_left(-3::i32, 1::i32) = -6::i32 10 | shift_left(-3::i32, 2::i32) = -12::i32 11 | shift_left(-3::i64, 1::i32) = -6::i64 12 | shift_left(-3::i64, 2::i32) = -12::i64 13 | shift_left(null::i64, 2::i32) = null::i64 14 | shift_left(127::i64, null::i32) = null::i64 15 | -------------------------------------------------------------------------------- /site/docs/tutorial/examples.md: -------------------------------------------------------------------------------- 1 | # Code samples and examples 2 | 3 | It's very useful to have examples of how APIs are used; both to get information on the best practices for using APIs and ideas of how they can be used. 4 | 5 | Each language binding is intended to contain examples that are relevant to that language. New contributions are always welcome. 6 | 7 | ## Java 8 | 9 | - [Substrait-Spark](https://github.com/substrait-io/substrait-java/tree/main/examples/substrait-spark) this demonstrates how Substrait plans can be created and consumed within Apache Spark. The examples run within a simple Spark cluster, composed by a couple of Docker containers. 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /tests/cases/boolean/and_not.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_boolean.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | and_not(true::bool, false::bool) = true::bool 6 | and_not(true::bool, true::bool) = false::bool 7 | and_not(false::bool, true::bool) = false::bool 8 | and_not(false::bool, false::bool) = false::bool 9 | 10 | # null_input: Examples with null as input 11 | and_not(true::bool, null::bool) = null::bool 12 | and_not(null::bool, false::bool) = null::bool 13 | and_not(false::bool, null::bool) = false::bool 14 | and_not(null::bool, true::bool) = false::bool 15 | and_not(null::bool, null::bool) = null::bool 16 | -------------------------------------------------------------------------------- /tests/cases/list/transform.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_list.yaml' 3 | ### SUBSTRAIT_DEPENDENCY: '/extensions/functions_arithmetic.yaml' 4 | 5 | # basic: Basic list transforming examples 6 | transform([1, 2, 3]::list, (x -> multiply(x, 2::i32))::func i32>) = [2, 4, 6]::list 7 | transform([5, 10, 15]::list, (x -> add(x, 1::i32))::func i32>) = [6, 11, 16]::list 8 | transform([]::list, (x -> multiply(x, 2::i32))::func i32>) = []::list 9 | 10 | # nullable_elements: Handling nullable elements 11 | transform([1, Null, 3]::list, (x -> multiply(x, 2::i32))::func i32?>) = [2, Null, 6]::list 12 | -------------------------------------------------------------------------------- /tests/cases/arithmetic/abs.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | abs(25::i8) = 25::i8 6 | abs(-200::i16) = 200::i16 7 | abs(30000::i32) = 30000::i32 8 | abs(-9223372036854775800::i64) = 9223372036854775800::i64 9 | abs(2.55::fp32) = 2.55::fp32 10 | abs(-2.0000007152557373046875::fp64) = 2.0000007152557373046875::fp64 11 | 12 | # null_input: Examples with null as input 13 | abs(null::i8) = null::i8 14 | 15 | # overflow: Examples demonstrating overflow behavior 16 | abs(-128::i8) [overflow:ERROR] = 17 | abs(-128::i8) [overflow:SATURATE] = 127::i8 18 | abs(-128::i8) [overflow:SILENT] = 19 | -------------------------------------------------------------------------------- /tests/cases/arithmetic_decimal/factorial_decimal.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic_decimal.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | factorial(0::dec<1, 0>) = 1::dec<38, 0> 6 | factorial(1::dec<1, 0>) = 1::dec<38, 0> 7 | factorial(20::dec<2, 0>) = 2432902008176640000::dec<38, 0> 8 | 9 | # overflow: Examples demonstrating overflow behavior 10 | factorial(34::dec<2, 0>) = 11 | 12 | # negative_value: Examples demonstrating behavior on negative value 13 | factorial(-1::dec<1, 0>) = 14 | 15 | # null_values: test with null values 16 | factorial(null::dec<38, 0>) = null::dec<38, 0> 17 | factorial(null::dec<1, 0>) = null::dec<38, 0> 18 | -------------------------------------------------------------------------------- /tests/cases/arithmetic/bitwise_or.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | bitwise_or(0::i8, 1::i8) = 1::i8 6 | bitwise_or(127::i8, 127::i8) = 127::i8 7 | bitwise_or(-127::i8, -10::i8) = -9::i8 8 | bitwise_or(31766::i16, 900::i16) = 32662::i16 9 | bitwise_or(-31766::i16, 900::i16) = -31762::i16 10 | bitwise_or(2147483647::i32, 123456789::i32) = 2147483647::i32 11 | bitwise_or(9223372036854775807::i64, 127::i64) = 9223372036854775807::i64 12 | bitwise_or(-9223372036854775807::i64, 127::i64) = -9223372036854775681::i64 13 | bitwise_or(null::i64, 127::i64) = null::i64 14 | bitwise_or(127::i64, null::i64) = null::i64 15 | -------------------------------------------------------------------------------- /tests/cases/string/concat.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_string.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | concat('abcd'::str, 'efg'::str) = 'abcdefg'::str 6 | 7 | # null_input: Examples with null as input 8 | concat('abcd'::str, null::str) [null_handling:ACCEPT_NULLS] = null::str 9 | concat('abcd'::str, null::str) [null_handling:IGNORE_NULLS] = 'abcd'::str 10 | concat(null::str, 'abcd'::str) [null_handling:ACCEPT_NULLS] = null::str 11 | concat(null::str, 'abcd'::str) [null_handling:IGNORE_NULLS] = 'abcd'::str 12 | concat(null::str, null::str) [null_handling:ACCEPT_NULLS] = null::str 13 | concat(null::str, null::str) [null_handling:IGNORE_NULLS] = ''::str 14 | -------------------------------------------------------------------------------- /tests/cases/string/lpad.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_string.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | lpad('abcdef'::str, 10::i32, ' '::str) = ' abcdef'::str 6 | lpad('abcdef '::str, 20::i32, '1'::str) = '1111111111abcdef '::str 7 | lpad(' abcdef'::str, 20::i32, '1'::str) = '1111111111 abcdef'::str 8 | lpad('abcdef'::str, 6::i32, ' '::str) = 'abcdef'::str 9 | lpad('abcdef'::str, 20::i32, 'aabb'::str) = 'aabbaabbaabbaaabcdef'::str 10 | lpad('abcdef'::str, 4::i32, ' '::str) = 'abcd'::str 11 | lpad('abcdef'::str, -1::i32, ' '::str) = ''::str 12 | lpad(null::str, 4::i32, ' '::str) = null::str 13 | lpad('abcdef'::str, 10::i32, null::str) = null::str 14 | -------------------------------------------------------------------------------- /tests/cases/string/rpad.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_string.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | rpad('abcdef'::str, 10::i32, ' '::str) = 'abcdef '::str 6 | rpad('abcdef '::str, 20::i32, '1'::str) = 'abcdef 1111111111'::str 7 | rpad(' abcdef'::str, 20::i32, '1'::str) = ' abcdef1111111111'::str 8 | rpad('abcdef'::str, 6::i32, ' '::str) = 'abcdef'::str 9 | rpad('abcdef'::str, 20::i32, 'aabb'::str) = 'abcdefaabbaabbaabbaa'::str 10 | rpad('abcdef'::str, 4::i32, ' '::str) = 'abcd'::str 11 | rpad('abcdef'::str, -1::i32, ' '::str) = ''::str 12 | rpad(null::str, 4::i32, ' '::str) = null::str 13 | rpad('abcdef'::str, 10::i32, null::str) = null::str 14 | -------------------------------------------------------------------------------- /tests/cases/logarithmic/ln.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_logarithmic.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | ln(100000::i64) = 11.512925464970229::fp64 6 | ln(1.0::fp32) = 0::fp32 7 | ln(2.015::fp64) = 0.7006191953986464::fp64 8 | 9 | # infinity: Examples with infinity as input 10 | ln(-inf::fp64) [on_domain_error:ERROR] = 11 | ln(-inf::fp64) [on_domain_error:NAN] = nan::fp64 12 | ln(-inf::fp64) [on_domain_error:NONE] = null::fp64 13 | ln(inf::fp64) = inf::fp64 14 | 15 | # log_zero: Examples with log zero 16 | ln(0.0::fp64) [on_log_zero:ERROR] = 17 | ln(0.0::fp64) [on_log_zero:NAN] = null::fp64 18 | ln(0.0::fp64) [on_log_zero:MINUS_INFINITY] = -inf::fp64 19 | -------------------------------------------------------------------------------- /tests/cases/arithmetic/bitwise_and.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | bitwise_and(0::i8, 1::i8) = 0::i8 6 | bitwise_and(127::i8, 127::i8) = 127::i8 7 | bitwise_and(-127::i8, -10::i8) = -128::i8 8 | bitwise_and(31766::i16, 900::i16) = 4::i16 9 | bitwise_and(-31766::i16, 900::i16) = 896::i16 10 | bitwise_and(2147483647::i32, 1234567::i32) = 1234567::i32 11 | bitwise_and(2147483647::i32, 1234567::i32) = 1234567::i32 12 | bitwise_and(9223372036854775807::i64, 127::i64) = 127::i64 13 | bitwise_and(-9223372036854775807::i64, 127::i64) = 1::i64 14 | bitwise_and(null::i64, 127::i64) = null::i64 15 | bitwise_and(127::i64, null::i64) = null::i64 16 | -------------------------------------------------------------------------------- /tests/cases/list/filter.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_list.yaml' 3 | ### SUBSTRAIT_DEPENDENCY: '/extensions/functions_comparison.yaml' 4 | 5 | # basic: Basic filtering examples 6 | filter([1, 2, 3, 4, 5]::list, (x -> gt(x, 2::i32))::func bool>) = [3, 4, 5]::list 7 | filter([1, 2, 3, 4, 5]::list, (x -> lt(x, 3::i32))::func bool>) = [1, 2]::list 8 | filter([]::list, (x -> gt(x, 0::i32))::func bool>) = []::list 9 | 10 | # nullable_elements: Handling nullable elements 11 | filter([1, null, 3]::list, (n -> gt(n, 0::i32))::func bool>) = [1, 3]::list 12 | filter([1, null, 3]::list, (e -> is_null(e))::func bool>) = [null]::list 13 | -------------------------------------------------------------------------------- /tests/cases/comparison/is_not_distinct_from.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_comparison.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | is_not_distinct_from(1::i16, 1::i16) = true::bool 6 | is_not_distinct_from(2::i16, 1::i16) = false::bool 7 | is_not_distinct_from(1.75::dec<38, 2>, 1.75::dec<38, 2>) = true::bool 8 | is_not_distinct_from(1.75::dec<38, 2>, 1.1::dec<38, 2>) = false::bool 9 | 10 | # null_input: Examples with null as input 11 | is_not_distinct_from(null::i16, 1::i16) = false::bool 12 | is_not_distinct_from(null::i16, null::i16) = true::bool 13 | is_not_distinct_from(10::dec<38, 0>, null::dec<38, 0>) = false::bool 14 | is_not_distinct_from(null::dec<38, 0>, null::dec<38, 0>) = true::bool 15 | -------------------------------------------------------------------------------- /site/docs/img/logo.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/cases/aggregate_approx/approx_count_distinct.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_AGGREGATE_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_aggregate_approx.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | approx_count_distinct((1, -2, 3, -4, 5, 6)::i8) = 6::i64 6 | approx_count_distinct((-32767, -20000, 30000, 5, 32767)::i16) = 5::i64 7 | approx_count_distinct((-2147483648, -10000000, 30000000, 2147483647)::i32) = 4::i64 8 | approx_count_distinct((-214748364800000, -1000000000, 0, 922337203685477580)::i64) = 4::i64 9 | approx_count_distinct((1)::i8) = 1::i64 10 | approx_count_distinct(()::i8) = 0::i64 11 | approx_count_distinct((Null, Null, Null)::i8) = 0::i64 12 | approx_count_distinct((Null, Null, 4, 3, Null, 922337203685477580, 12833888)::i64) = 4::i64 13 | -------------------------------------------------------------------------------- /tests/cases/logarithmic/log10.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_logarithmic.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | log10(100000::i64) = 5.0::fp64 6 | log10(1.0::fp32) = 0::fp32 7 | log10(2.015::fp64) = 0.3042750504771283::fp64 8 | 9 | # infinity: Examples with infinity as input 10 | log10(-inf::fp64) [on_domain_error:ERROR] = 11 | log10(-inf::fp64) [on_domain_error:NAN] = nan::fp64 12 | log10(-inf::fp64) [on_domain_error:NONE] = null::fp64 13 | log10(inf::fp64) = inf::fp64 14 | 15 | # log_zero: Examples with log zero 16 | log10(0.0::fp64) [on_log_zero:ERROR] = 17 | log10(0.0::fp64) [on_log_zero:NAN] = null::fp64 18 | log10(0.0::fp64) [on_log_zero:MINUS_INFINITY] = -inf::fp64 19 | -------------------------------------------------------------------------------- /tests/cases/string/ltrim.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_string.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | ltrim('abc'::str, ' '::str) = 'abc'::str 6 | ltrim(' abc'::str, ' '::str) = 'abc'::str 7 | ltrim('abc '::str, ' '::str) = 'abc '::str 8 | ltrim(' abc '::str, ' '::str) = 'abc '::str 9 | ltrim(''::str, ' '::str) = ''::str 10 | ltrim(' '::str, ' '::str) = ''::str 11 | ltrim(null::str, ' '::str) = null::str 12 | 13 | # two_inputs: Examples with character input to trim off 14 | ltrim('aaaaabc'::str, 'a'::str) [spaces_only:FALSE] = 'bc'::str 15 | ltrim('abcabcdef'::str, 'abc'::str) [spaces_only:FALSE] = 'def'::str 16 | ltrim('abccbadef'::str, 'abc'::str) [spaces_only:FALSE] = 'def'::str 17 | -------------------------------------------------------------------------------- /tests/cases/string/trim.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_string.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | trim('abc'::str, ' '::str) = 'abc'::str 6 | trim(' abc'::str, ' '::str) = 'abc'::str 7 | trim('abc '::str, ' '::str) = 'abc'::str 8 | trim(' abc '::str, ' '::str) = 'abc'::str 9 | trim(''::str, ' '::str) = ''::str 10 | trim(' '::str, ' '::str) = ''::str 11 | trim(null::str, ' '::str) = null::str 12 | 13 | # two_inputs: Examples with character input to trim off 14 | trim('aaaaabcccccaaa'::str, 'a'::str) [spaces_only:False] = 'bccccc'::str 15 | trim('defabcabcdef'::str, 'def'::str) [spaces_only:False] = 'abcabc'::str 16 | trim('abcdefcbaa'::str, 'abc'::str) [spaces_only:False] = 'def'::str 17 | -------------------------------------------------------------------------------- /tests/cases/arithmetic/negate.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | negate(25::i8) = -25::i8 6 | negate(-200::i16) = 200::i16 7 | negate(30000::i32) = -30000::i32 8 | negate(9223372036854775800::i64) = -9223372036854775800::i64 9 | negate(2.50::fp32) = -2.50::fp32 10 | negate(2.000002861022949::fp64) = -2.000002861022949::fp64 11 | negate(inf::fp64) = -inf::fp64 12 | 13 | # null_input: Examples with null as input 14 | negate(null::i8) = null::i8 15 | 16 | # overflow: Examples demonstrating overflow behavior 17 | negate(-128::i8) [overflow:ERROR] = 18 | negate(-128::i8) [overflow:SATURATE] = 127::i8 19 | negate(-128::i8) [overflow:SILENT] = 20 | -------------------------------------------------------------------------------- /tests/cases/string/rtrim.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_string.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | rtrim('abc'::str, ' '::str) = 'abc'::str 6 | rtrim(' abc'::str, ' '::str) = ' abc'::str 7 | rtrim('abc '::str, ' '::str) = 'abc'::str 8 | rtrim(' abc '::str, ' '::str) = ' abc'::str 9 | rtrim(''::str, ' '::str) = ''::str 10 | rtrim(' '::str, ' '::str) = ''::str 11 | rtrim(null::str, ' '::str) = null::str 12 | 13 | # two_inputs: Examples with character input to trim off 14 | rtrim('aaaaabccccc'::str, 'c'::str) [spaces_only:FALSE] = 'aaaaab'::str 15 | rtrim('abcabcdef'::str, 'def'::str) [spaces_only:FALSE] = 'abcabc'::str 16 | rtrim('defabccba'::str, 'abc'::str) [spaces_only:FALSE] = 'def'::str 17 | -------------------------------------------------------------------------------- /tests/cases/datetime/subtract_datetime.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_datetime.yaml' 3 | 4 | # timestamps: examples using the timestamp type 5 | subtract('2016-12-31T13:30:15'::ts, 'P5D'::iday) = '2016-12-26T13:30:15'::ts 6 | subtract('2016-12-01T13:30:15'::ts, 'P5Y'::iyear) = '2011-12-01T13:30:15'::ts 7 | subtract('2016-12-01T13:30:15'::ts, 'PT5H'::iday) = '2016-12-01T08:30:15'::ts 8 | 9 | # date: examples using the date type 10 | subtract('2020-12-31'::date, 'P5D'::iday) = '2020-12-26'::date 11 | subtract('2020-12-31'::date, 'P5Y'::iyear) = '2015-12-31'::date 12 | subtract('2020-12-31'::date, 'P5M'::iyear) = '2020-07-31'::date 13 | 14 | # null_input: examples with null args or return 15 | subtract(null::date, 'P5D'::iday) = null::date 16 | -------------------------------------------------------------------------------- /site/examples/extensions/lambda_function_example.yaml: -------------------------------------------------------------------------------- 1 | %YAML 1.2 2 | --- 3 | urn: extension:io.substrait:functions_list 4 | scalar_functions: 5 | - name: "transform" 6 | description: >- 7 | Transforms each element of a list using a lambda function. 8 | Also known as "map" in functional programming. 9 | 10 | Returns a new list where each element is the result of applying 11 | the transformer function to the corresponding element in the input list. 12 | 13 | The lambda receives one parameter (the current element) and must return 14 | the transformed value. 15 | impls: 16 | - args: 17 | - name: input 18 | value: list 19 | - name: transformer 20 | value: func any2> 21 | nullability: MIRROR 22 | return: list 23 | -------------------------------------------------------------------------------- /tests/cases/arithmetic/shift_right_unsigned.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | shift_right_unsigned(2::i32, 1::i32) = 1::i32 6 | shift_right_unsigned(1024::i32, 8::i32) = 4::i32 7 | shift_right_unsigned(301989888::i64, 3::i32) = 37748736::i64 8 | shift_right_unsigned(301989888::i64, 16::i32) = 4608::i64 9 | shift_right_unsigned(-3::i32, 1::i32) = 2147483646::i32 10 | shift_right_unsigned(-3::i32, 2::i32) = 1073741823::i32 11 | shift_right_unsigned(-3::i64, 1::i32) = 9223372036854775806::i64 12 | shift_right_unsigned(-3::i64, 2::i32) = 4611686018427387903::i64 13 | shift_right_unsigned(null::i64, 2::i32) = null::i64 14 | shift_right_unsigned(127::i64, null::i32) = null::i64 15 | -------------------------------------------------------------------------------- /tests/cases/arithmetic/bitwise_xor.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | bitwise_xor(0::i8, 1::i8) = 1::i8 6 | bitwise_xor(127::i8, 127::i8) = 0::i8 7 | bitwise_xor(-127::i8, -10::i8) = 119::i8 8 | bitwise_xor(31766::i16, 900::i16) = 32658::i16 9 | bitwise_xor(-31766::i16, 900::i16) = -32658::i16 10 | bitwise_xor(2147483647::i32, 123456789::i32) = 2024026858::i32 11 | bitwise_xor(2147483647::i32, 123456789::i32) = 2024026858::i32 12 | bitwise_xor(9223372036854775807::i64, 127::i64) = 9223372036854775680::i64 13 | bitwise_xor(-9223372036854775807::i64, 127::i64) = -9223372036854775682::i64 14 | bitwise_xor(null::i64, 127::i64) = null::i64 15 | bitwise_xor(127::i64, null::i64) = null::i64 16 | -------------------------------------------------------------------------------- /tests/cases/logarithmic/log2.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_logarithmic.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | log2(100000::i64) = 16.609640474436812::fp64 6 | log2(1.0::fp32) = 0::fp32 7 | log2(8.0::fp64) = 3.0::fp64 8 | log2(2.015::fp64) = 1.0107798387532427::fp64 9 | 10 | # infinity: Examples with infinity as input 11 | log2(-inf::fp64) [on_domain_error:ERROR] = 12 | log2(-inf::fp64) [on_domain_error:NAN] = nan::fp64 13 | log2(-inf::fp64) [on_domain_error:NONE] = null::fp64 14 | log2(inf::fp64) = inf::fp64 15 | 16 | # log_zero: Examples with log zero 17 | log2(0.0::fp64) [on_log_zero:ERROR] = 18 | log2(0.0::fp64) [on_log_zero:NAN] = null::fp64 19 | log2(0.0::fp64) [on_log_zero:MINUS_INFINITY] = -inf::fp64 20 | -------------------------------------------------------------------------------- /tests/cases/datetime/add_datetime.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_datetime.yaml' 3 | 4 | # timestamps: examples using the timestamp types 5 | add('2016-12-31T13:30:15'::ts, 'P5D'::iday) = '2017-01-05T13:30:15'::ts 6 | add('2016-12-01T13:30:15'::ts, 'P5Y'::iyear) = '2021-12-01T13:30:15'::ts 7 | add('2016-12-01T13:30:15'::ts, 'PT5H'::iday) = '2016-12-01T18:30:15'::ts 8 | 9 | # date_to_timestamp: examples using the date types and resulting in a timestamp 10 | add('2020-12-31'::date, 'P5D'::iday) = '2021-01-05T00:00:00'::ts 11 | add('2020-12-31'::date, 'P5Y'::iyear) = '2025-12-31T00:00:00'::ts 12 | add('2020-12-31'::date, 'P5M'::iyear) = '2021-05-31T00:00:00'::ts 13 | 14 | # null_input: examples with null args or return 15 | add(null::date, 'P5D'::iday) = null::ts 16 | -------------------------------------------------------------------------------- /.github/workflows/stale.yml: -------------------------------------------------------------------------------- 1 | name: 'Close stale PRs' 2 | on: 3 | schedule: 4 | - cron: '0 0 * * *' 5 | workflow_dispatch: 6 | 7 | permissions: 8 | pull-requests: write 9 | 10 | jobs: 11 | stale: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - uses: actions/stale@v10 15 | with: 16 | repo-token: ${{ secrets.GITHUB_TOKEN }} 17 | days-before-issue-stale: -1 # don't operate on issues 18 | stale-pr-message: | 19 | This PR has been automatically marked as stale because it has not had 20 | recent activity. It will be closed in 7 days if no further activity occurs. 21 | 22 | close-pr-message: | 23 | This PR has been automatically closed due to inactivity. 24 | If you believe this was closed in error, please reopen it. 25 | -------------------------------------------------------------------------------- /site/examples/proto-textformat/README.md: -------------------------------------------------------------------------------- 1 | # Protobuf Text Format Examples 2 | 3 | Each subdirectory contains examples of different protobuf message types in text format (textproto). These examples are embedded in the documentation and validated in CI/CD to ensure they remain valid as the proto schema evolves. 4 | 5 | We use protobuf text format (textproto) rather than JSON for these examples because textproto supports comments. This allows us to annotate examples inline with explanatory notes about what each field does. JSON does not support comments, which would make the examples less instructive. 6 | 7 | ## Directories 8 | 9 | - `lambda/` - Examples of `Expression.Lambda` messages 10 | - `lambda_invocation/` - Examples of `Expression.LambdaInvocation` messages 11 | - `field_reference/` - Examples of `Expression.FieldReference` messages 12 | -------------------------------------------------------------------------------- /tests/cases/string/ends_with.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_string.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | ends_with('abcd'::str, 'd'::str) = true::bool 6 | ends_with('abcd'::str, 'a'::str) = false::bool 7 | ends_with('abcd'::str, 'CD'::str) = false::bool 8 | 9 | # case_insenstivity: multi byte character comparison with case insensitivity 10 | ends_with('abcd'::str, 'CD'::str) [case_sensitivity:CASE_INSENSITIVE] = true::bool 11 | 12 | # multi_byte_characters: multi byte character comparison 13 | ends_with('😊a😊b😊😊'::str, 'b😊😊'::str) = true::bool 14 | 15 | # multi_byte_characters case insensitivity: multi byte character comparison with case insensitivity 16 | ends_with('😊a😊b😊😊'::str, 'B😊😊'::str) [case_sensitivity:CASE_INSENSITIVE] = true::bool 17 | -------------------------------------------------------------------------------- /tests/cases/string/starts_with.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_string.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | starts_with('abcd'::str, 'a'::str) = true::bool 6 | starts_with('abcd'::str, 'z'::str) = false::bool 7 | starts_with('abcd'::str, 'AB'::str) = false::bool 8 | 9 | # case_insenstivity: multi byte character comparison with case insensitivity 10 | starts_with('abcd'::str, 'AB'::str) [case_sensitivity:CASE_INSENSITIVE] = true::bool 11 | 12 | # multi_byte_characters: multi byte character comparison 13 | starts_with('😊a😊b😊😊'::str, '😊a'::str) = true::bool 14 | 15 | # multi_byte_characters case insensitivity: multi byte character comparison with case insensitivity 16 | starts_with('😊a😊b😊😊'::str, '😊A'::str) [case_sensitivity:CASE_INSENSITIVE] = true::bool 17 | -------------------------------------------------------------------------------- /tests/cases/comparison/equal.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_comparison.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | equal(1::i8, 1::i8) = true::bool 6 | equal(300::i16, 200::i16) = false::bool 7 | equal(-2147483648::i32, -2147483648::i32) = true::bool 8 | equal(9223372036854775807::i64, 9223372036854775804::i64) = false::bool 9 | equal(inf::fp64, inf::fp64) = true::bool 10 | equal(inf::fp64, 1.5e+308::fp64) = false::bool 11 | equal(10::dec<38, 0>, 10::dec<38, 0>) = true::bool 12 | equal(inf::fp64, -inf::fp64) = false::bool 13 | 14 | # null_input: Examples with null as input 15 | equal(null::i16, 1::i16) = null::bool 16 | equal(null::i16, null::i16) = null::bool 17 | equal(7::dec<38, 0>, null::dec<38, 0>) = null::bool 18 | equal(null::dec<38, 0>, null::dec<38, 0>) = null::bool 19 | -------------------------------------------------------------------------------- /tests/cases/logarithmic/logb.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_logarithmic.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | logb(10::i64, 100000::i64) = 5.0::fp64 6 | logb(7::fp64, 1.0::fp64) = 0::fp64 7 | logb(2::fp64, 7::fp64) = 2.8073549220576041::fp64 8 | 9 | # infinity: Examples with infinity as input 10 | logb(2.34::fp64, inf::fp64) = inf::fp64 11 | logb(10::fp64, -inf::fp64) [on_domain_error:ERROR] = 12 | logb(10::fp64, -inf::fp64) [on_domain_error:NAN] = nan::fp64 13 | logb(10::fp64, -inf::fp64) [on_domain_error:NONE] = null::fp64 14 | 15 | # log_zero: Examples with log zero 16 | logb(2.0::fp64, 0.0::fp64) [on_log_zero:ERROR] = 17 | logb(2.0::fp64, 0.0::fp64) [on_log_zero:NAN] = null::fp64 18 | logb(2.0::fp64, 0.0::fp64) [on_log_zero:MINUS_INFINITY] = -inf::fp64 19 | -------------------------------------------------------------------------------- /extensions/type_variations.yaml: -------------------------------------------------------------------------------- 1 | %YAML 1.2 2 | --- 3 | urn: extension:io.substrait:type_variations 4 | type_variations: 5 | - parent: string 6 | name: dict4 7 | description: a four-byte dictionary encoded string 8 | functions: INHERITS 9 | - parent: string 10 | name: bigoffset 11 | description: >- 12 | The arrow large string representation of strings, still restricted to the default string size defined in 13 | Substrait. 14 | functions: SEPARATE 15 | - parent: struct 16 | name: avro 17 | description: an avro encoded struct 18 | functions: SEPARATE 19 | - parent: struct 20 | name: cstruct 21 | description: a cstruct representation of the struct 22 | functions: SEPARATE 23 | - parent: struct 24 | name: dict2 25 | description: a 2-byte dictionary encoded string. 26 | functions: INHERITS 27 | -------------------------------------------------------------------------------- /tests/cases/string/contains.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_string.yaml' 3 | 4 | # basic: Basic examples contains as prefix 5 | contains('abcdefg'::str, 'abc'::str) = true::bool 6 | contains('abcdefg'::str, 'CdE'::str) = false::bool 7 | contains('abcdefg'::str, 'CdE'::str) [case_sensitivity:CASE_INSENSITIVE] = true::bool 8 | contains('abcdefg'::str, 'cde'::str) = true::bool 9 | contains('abcdefg'::str, 'fg'::str) = true::bool 10 | contains('abcdefg'::str, 'aef'::str) = false::bool 11 | 12 | # multi_byte_characters: multi byte characters exists in the string 13 | contains('😊a😊b😊😊'::str, 'a😊b'::str) = true::bool 14 | contains('😊a😊b😊😊'::str, 'A😊B'::str) = false::bool 15 | contains('😊a😊b😊😊'::str, 'A😊B'::str) [case_sensitivity:CASE_INSENSITIVE] = true::bool 16 | contains('😊a😊b😊😊'::str, 'a😊c'::str) = false::bool 17 | -------------------------------------------------------------------------------- /tests/cases/comparison/coalesce.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_comparison.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | coalesce(1::i8, 2::i8) = 1::i8 6 | coalesce(null::i8, 2::i8) = 2::i8 7 | coalesce(null::i16, null::i16) = null::i16 8 | coalesce(2000000::i32, null::i32) = 2000000::i32 9 | coalesce(null::i64, 9223372036854775807::i64) = 9223372036854775807::i64 10 | coalesce(null::fp32, -65.500000::fp32) = -65.500000::fp32 11 | coalesce(inf::fp64, -inf::fp64) = inf::fp64 12 | coalesce(7::dec<38, 0>, 4::dec<38, 0>) = 7::dec<38, 0> 13 | coalesce(null::dec<38, 0>, 2::dec<38, 0>) = 2::dec<38, 0> 14 | coalesce(null::dec<38, 0>, null::dec<38, 0>) = null::dec<38, 0> 15 | coalesce(2000000::dec<38, 0>, null::dec<38, 0>) = 2000000::dec<38, 0> 16 | coalesce(null::dec<38, 0>, 2000000::dec<38, 0>) = 2000000::dec<38, 0> 17 | -------------------------------------------------------------------------------- /extensions/functions_aggregate_approx.yaml: -------------------------------------------------------------------------------- 1 | %YAML 1.2 2 | --- 3 | urn: extension:io.substrait:functions_aggregate_approx 4 | aggregate_functions: 5 | - name: "approx_count_distinct" 6 | description: >- 7 | Calculates the approximate number of rows that contain distinct values of the expression argument using 8 | HyperLogLog. This function provides an alternative to the COUNT (DISTINCT expression) function, which 9 | returns the exact number of rows that contain distinct values of an expression. APPROX_COUNT_DISTINCT 10 | processes large amounts of data significantly faster than COUNT, with negligible deviation from the exact 11 | result. 12 | impls: 13 | - args: 14 | - name: x 15 | value: any 16 | nullability: DECLARED_OUTPUT 17 | decomposable: MANY 18 | intermediate: binary 19 | return: i64 20 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/nametake/pre-commit-buf 3 | rev: v2.0.0 4 | hooks: 5 | - id: buf-lint 6 | - repo: https://github.com/adrienverge/yamllint.git 7 | rev: v1.35.1 8 | hooks: 9 | - id: yamllint 10 | args: [-c=.yamllint.yaml] 11 | - repo: https://github.com/alessandrojcm/commitlint-pre-commit-hook 12 | rev: v9.20.0 13 | hooks: 14 | - id: commitlint 15 | stages: [commit-msg] 16 | - repo: https://github.com/psf/black 17 | rev: 24.8.0 18 | hooks: 19 | - id: black 20 | - repo: https://github.com/pycqa/flake8 21 | rev: 7.0.0 22 | hooks: 23 | - id: flake8 24 | - repo: local 25 | hooks: 26 | - id: check-substrait-extensions_coverage 27 | name: Check Substrait extensions and test coverage 28 | entry: pytest tests/test_extensions.py::test_substrait_extension_coverage 29 | language: python 30 | pass_filenames: false 31 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 2 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 3 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 4 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 5 | github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= 6 | github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= 7 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= 8 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 9 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 10 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 11 | -------------------------------------------------------------------------------- /site/examples/README.md: -------------------------------------------------------------------------------- 1 | # Documentation Examples 2 | 3 | This directory contains example files that are included in the Substrait documentation. 4 | 5 | By storing examples as separate files instead of inline in markdown, we can easily validate against schemas via CI/CD. 6 | 7 | ## Directory Structure 8 | 9 | ``` 10 | examples/ 11 | ├── extensions/ # Extension function examples (e.g., any types) 12 | ├── types/ # User-defined type examples 13 | └── README.md # This file 14 | ``` 15 | 16 | All examples are validated against `text/simple_extensions_schema.yaml` in CI/CD. 17 | 18 | ## Including Examples in Markdown 19 | 20 | Use the pymdownx.snippets syntax to include example files: 21 | 22 | ````markdown 23 | ```yaml 24 | --8<-- "examples/extensions/distance_functions.yaml" 25 | ``` 26 | ```` 27 | 28 | The snippet will be rendered with syntax highlighting and the actual file content. 29 | -------------------------------------------------------------------------------- /site/examples/proto-textformat/lambda/nested_lambda_capture.textproto: -------------------------------------------------------------------------------- 1 | # Represents: `(outer_x: i32) -> ((inner_y: i32) -> add(outer_x, inner_y))` 2 | # Demonstrates steps_out: 3 | # - steps_out: 1 with struct_field: 0 -> outer_x 4 | # - steps_out: 0 with struct_field: 0 -> inner_y 5 | # 6 | # message Expression.Lambda 7 | 8 | parameters: {types: [{i32: {nullability: NULLABILITY_REQUIRED}}]} 9 | body: { 10 | lambda: { 11 | parameters: {types: [{i32: {nullability: NULLABILITY_REQUIRED}}]} 12 | body: { 13 | scalar_function: { 14 | function_reference: 1 # reference to add 15 | arguments: [ 16 | {value: {selection: {lambda_parameter_reference: {steps_out: 1}, direct_reference: {struct_field: {field: 0}}}}}, # outer_x 17 | {value: {selection: {lambda_parameter_reference: {steps_out: 0}, direct_reference: {struct_field: {field: 0}}}}} # inner_y 18 | ] 19 | } 20 | } 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /tests/cases/comparison/nullif.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_comparison.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | nullif(10::i8, 2::i8) = 10::i8 6 | nullif(3::i8, 3::i8) = null::i8 7 | nullif(1::i16, 5::i16) = 1::i16 8 | nullif(1::i16, 1::i16) = null::i16 9 | nullif(7.25::fp32, 1.00::fp32) = 7.25::fp32 10 | nullif(1.11::fp32, 1.11::fp32) = null::fp32 11 | nullif(false::bool, true::bool) = false::bool 12 | nullif(true::bool, false::bool) = true::bool 13 | nullif(false::bool, false::bool) = null::bool 14 | nullif(true::bool, true::bool) = null::bool 15 | 16 | # null_input: Examples with null as input 17 | nullif(null::bool, true::bool) = null::bool 18 | nullif(true::bool, null::bool) = true::bool 19 | nullif(null::bool, null::bool) = null::bool 20 | nullif(10::dec<38, 0>, null::dec<38, 0>) = 10::dec<38, 0> 21 | nullif(null::dec<38, 0>, null::dec<38, 0>) = null::bool 22 | -------------------------------------------------------------------------------- /extensions/functions_set.yaml: -------------------------------------------------------------------------------- 1 | %YAML 1.2 2 | --- 3 | urn: extension:io.substrait:functions_set 4 | scalar_functions: 5 | - 6 | name: "index_in" 7 | description: > 8 | Checks the membership of a value in a list of values 9 | 10 | Returns the first 0-based index value of some input `needle` if `needle` is equal to 11 | any element in `haystack`. Returns `NULL` if not found. 12 | 13 | If `needle` is `NULL`, returns `NULL`. 14 | 15 | If `needle` is `NaN`: 16 | - Returns 0-based index of `NaN` in `input` (default) 17 | - Returns `NULL` (if `NAN_IS_NOT_NAN` is specified) 18 | impls: 19 | - args: 20 | - name: needle 21 | value: any1 22 | - name: haystack 23 | value: list 24 | options: 25 | nan_equality: 26 | values: [ NAN_IS_NAN, NAN_IS_NOT_NAN ] 27 | nullability: DECLARED_OUTPUT 28 | return: i64? 29 | -------------------------------------------------------------------------------- /tests/cases/arithmetic/divide.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | divide(25::i8, 5::i8) = 5::i8 6 | divide(200::i16, -100::i16) = -2::i16 7 | divide(60000::i32, 200::i32) = 300::i32 8 | divide(4000000000::i64, -5000::i64) = -800000::i64 9 | 10 | # division_by_zero: Examples demonstrating division by zero 11 | divide(5::i8, 0::i8) [on_division_by_zero:NAN] = null::i8 12 | divide(5::i8, 0::i8) [on_division_by_zero:ERROR] = 13 | 14 | # overflow: Examples demonstrating overflow behavior 15 | divide(-9223372036854775808::i64, -1::i64) [overflow:ERROR] = 16 | divide(-128::i8, -1::i8) [overflow:SATURATE] = 127::i8 17 | 18 | # floating_exception: Examples demonstrating exceptional floating point cases 19 | divide(1.5e+208::fp64, 1.5e-200::fp64) = inf::fp64 20 | divide(1.5e+200::fp64, -1.5e-208::fp64) = -inf::fp64 21 | -------------------------------------------------------------------------------- /site/docs/expressions/user_defined_functions.md: -------------------------------------------------------------------------------- 1 | # User-Defined Functions 2 | 3 | Substrait supports the creation of custom functions using [simple extensions](../extensions/index.md#simple-extensions), using the facilities described in [scalar functions](scalar_functions.md). The functions defined by Substrait use the same mechanism. The extension files for standard functions can be found [here](https://github.com/substrait-io/substrait/tree/main/extensions). 4 | 5 | Here's an example function that doubles its input: 6 | 7 | !!! info inline end "Implementation Note" 8 | This implementation is only defined on 32-bit floats and integers but could be defined on all numbers (and even lists and strings). The user of the implementation can specify what happens when the resulting value falls outside of the valid range for a 32-bit float (either return NAN or raise an error). 9 | 10 | ```yaml 11 | --8<-- "examples/extensions/double_function.yaml" 12 | ``` 13 | -------------------------------------------------------------------------------- /tests/cases/comparison/lt.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_comparison.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | lt(1::i8, 2::i8) = true::bool 6 | lt(200::i16, 100::i16) = false::bool 7 | lt(1000::i16, 1000::i16) = false::bool 8 | lt(2000000000::i32, 1000000000::i32) = false::bool 9 | lt(-922337203685775808::i64, -922337203685775807::i64) = true::bool 10 | lt(7.25::fp32, 2.50::fp32) = false::bool 11 | lt(7.25::dec<38, 2>, 7.25::dec<38, 2>) = false::bool 12 | lt(2.49::dec<38, 2>, 2.50::dec<38, 2>) = true::bool 13 | lt(1.5e+308::fp64, inf::fp64) = true::bool 14 | lt(-1.5e+308::fp64, -inf::fp64) = false::bool 15 | 16 | # null_input: Examples with null as input 17 | lt(null::dec<38, 2>, 2.50::dec<38, 2>) = null::bool 18 | lt(null::dec<38, 2>, null::dec<38, 2>) = null::bool 19 | lt(null::i16, 1::i16) = null::bool 20 | lt(2::i16, null::i16) = null::bool 21 | lt(null::i16, null::i16) = null::bool 22 | -------------------------------------------------------------------------------- /tests/cases/arithmetic/modulus.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | modulus(9::i8, 3::i8) = 0::i8 6 | modulus(10::i8, -3::i8) = 1::i8 7 | modulus(32767::i16, 1000::i16) = 767::i16 8 | modulus(-2147483647::i32, 300000000::i32) = -47483647::i32 9 | modulus(-9223372036854775800::i64, -80000000000000::i64) = -12036854775800::i64 10 | modulus(5::i8, null::i8) = null::i8 11 | modulus(null::i64, 1::i64) = null::i64 12 | modulus(null::i64, null::i64) = null::i64 13 | 14 | # on_domain_error: Examples demonstrating operation when the divisor is 0 15 | modulus(5::i8, 0::i8) [on_domain_error:NULL] = null::i8 16 | modulus(5::i8, 0::i8) [on_domain_error:ERROR] = 17 | 18 | # division_type: Examples demonstrating truncate and floor division types 19 | modulus(8::i8, -3::i8) [division_type:TRUNCATE] = 2::i8 20 | modulus(8::i8, -3::i8) [division_type:FLOOR] = -1::i8 21 | -------------------------------------------------------------------------------- /tests/cases/comparison/gt.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_comparison.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | gt(1::i8, 2::i8) = false::bool 6 | gt(200::i16, 199::i16) = true::bool 7 | gt(200::i16, 200::i16) = false::bool 8 | gt(2000000000::i32, 1000000000::i32) = true::bool 9 | gt(-922337203685775808::i64, -922337203685775807::i64) = false::bool 10 | gt(7.25::fp32, 2.50::fp32) = true::bool 11 | gt(-922337203685775808::dec<38, 0>, -922337203685775807::dec<38, 0>) = false::bool 12 | gt(7.25::dec<38, 2>, 2.50::dec<38, 2>) = true::bool 13 | gt(-1.5e+308::fp64, -inf::fp64) = true::bool 14 | gt(inf::fp64, 1.5e+308::fp64) = true::bool 15 | 16 | # null_input: Examples with null as input 17 | gt(null::i16, 100::i16) = null::bool 18 | gt(2::i16, null::i16) = null::bool 19 | gt(null::i16, null::i16) = null::bool 20 | gt(2::dec<38, 2>, null::dec<38, 2>) = null::bool 21 | gt(null::dec<38, 2>, null::dec<38, 2>) = null::bool 22 | -------------------------------------------------------------------------------- /tests/cases/arithmetic/max.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_AGGREGATE_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | max((20, -3, 1, -10, 0, 5)::i8) = 20::i8 6 | max((-32768, 32767, 20000, -30000)::i16) = 32767::i16 7 | max((-214748648, 214748647, 21470048, 4000000)::i32) = 214748647::i32 8 | max((2000000000, -3217908979, 629000000, -100000000, 0, 987654321)::i64) = 2000000000::i64 9 | max((2.5, 0, 5.0, -2.5, -7.5)::fp32) = 5.0::fp32 10 | max((1.5e+308, 1.5e+10, -1.5e+8, -1.5e+7, -1.5e+70)::fp64) = 1.5e+308::fp64 11 | 12 | # null_handling: Examples with null as input or output 13 | max((Null, Null, Null)::i16) = Null::i16 14 | max(()::i16) = Null::i16 15 | max((2000000000, Null, 629000000, -100000000, Null, 987654321)::i64) = 2000000000::i64 16 | max((Null, inf)::fp64) = inf::fp64 17 | max((Null, -inf, -1.5e+8, -1.5e+7, -1.5e+70)::fp64) = -1.5e+7::fp64 18 | max((1.5e+308, 1.5e+10, Null, -1.5e+7, Null)::fp64) = 1.5e+308::fp64 19 | -------------------------------------------------------------------------------- /site/docs/expressions/dynamic_parameters.md: -------------------------------------------------------------------------------- 1 | # Dynamic Parameter Expression 2 | 3 | The dynamic parameter expression represents a placeholder within an expression whose value is determined at runtime. 4 | This is particularly useful for parameterized queries where certain values are not known until execution. 5 | Additionally, using dynamic parameters can enable other use cases, such as sharing execution plans without embedding sensitive information. 6 | 7 | A dynamic parameter expression includes the following properties: 8 | 9 | | Property | Description | Required | 10 | |-----------------------|-------------------------------------------------------------------------------|----------| 11 | | `type` | Specifies the expected data type of the dynamic parameter. | Yes | 12 | | `parameter_reference` | A surrogate key used within a plan to reference a specific parameter binding. | Yes | 13 | -------------------------------------------------------------------------------- /tests/cases/arithmetic/min.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_AGGREGATE_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | min((20, -3, 1, -10, 0, 5)::i8) = -10::i8 6 | min((-32768, 32767, 20000, -30000)::i16) = -32768::i16 7 | min((-214748648, 214748647, 21470048, 4000000)::i32) = -214748648::i32 8 | min((2000000000, -3217908979, 629000000, -100000000, 0, 987654321)::i64) = -3217908979::i64 9 | min((2.5, 0, 5.0, -2.5, -7.5)::fp32) = -7.5::fp32 10 | min((1.5e+308, 1.5e+10, -1.5e+8, -1.5e+7, -1.5e+70)::fp64) = -1.5e+70::fp64 11 | 12 | # null_handling: Examples with null as input or output 13 | min((Null, inf)::fp64) = inf::fp64 14 | min((Null, Null, Null)::i16) = Null::i16 15 | min(()::i16) = Null::i16 16 | min((2000000000, Null, 629000000, -100000000, Null, 987654321)::i64) = -100000000::i64 17 | min((Null, -inf, -1.5e+8, -1.5e+7, -1.5e+70)::fp64) = -inf::fp64 18 | min((1.5e+308, 1.5e+10, Null, -1.5e+7, Null)::fp64) = -1.5e+7::fp64 19 | -------------------------------------------------------------------------------- /tests/cases/arithmetic_decimal/sum_decimal.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_AGGREGATE_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic_decimal.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | sum((0, -1, 2, 20)::dec<2, 0>) = 21::dec<38, 0> 6 | sum((2000000, -3217908, 629000, -100000, 0, 987654)::dec<7, 0>) = 298746::dec<38, 0> 7 | sum((2.5, 0, 5.0, -2.5, -7.5)::dec<2, 1>) = -2.5::dec<38, 2> 8 | sum((2.5000007152557373046875, 7.0000007152557373046875, 0, 7.0000007152557373046875)::dec<23, 22>) = 16.5000021457672119140625::dec<38, 22> 9 | 10 | # overflow: Examples demonstrating overflow behavior 11 | sum((99999999999999999999999999999999999999, 1, 1, 1, 1, 99999999999999999999999999999999999999)::dec<38, 0>) [overflow:ERROR] = 12 | 13 | # null_handling: Examples with null as input or output 14 | sum((Null, Null, Null)::dec<1, 0>) = Null::dec<38, 0> 15 | sum(()::dec<1, 0>) = Null::dec<38, 0> 16 | sum((200000, Null, 629000, -10000, 0, 987621)::dec<6, 0>) = 1806621::dec<38, 0> 17 | -------------------------------------------------------------------------------- /tests/cases/arithmetic_decimal/power.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic_decimal.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | power(8::dec<38, 0>, 2::dec<38, 0>) = 64::fp64 6 | power(1.0::dec<38, 0>, -1.0::dec<38, 0>) = 1.0::fp64 7 | power(2.0::dec<38, 0>, -2.0::dec<38, 0>) = 0.25::fp64 8 | power(13::dec<38, 0>, 10::dec<38, 0>) = 137858491849::fp64 9 | 10 | # result_more_than_input_precision: Examples demonstrating result with more precision than input 11 | power(16::dec<2, 0>, 4::dec<38, 0>) = 65536::fp64 12 | 13 | # floating_exception: Examples demonstrating exceptional floating point cases 14 | power(1.5e+10::dec<38, 0>, 1.5e+20::dec<38, 0>) = inf::fp64 15 | power(-16::dec<4, 0>, 1001::dec<4, 0>) = -inf::fp64 16 | 17 | # complex_number: Examples demonstrating complex number output 18 | power(-1::dec, 0.5::dec<38, 1>) [complex_number_result:NAN] = nan::fp64 19 | power(-1::dec, 0.5::dec<38, 1>) [complex_number_result:ERROR] = 20 | -------------------------------------------------------------------------------- /tests/cases/comparison/not_equal.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_comparison.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | not_equal(1::i8, 1::i8) = false::bool 6 | not_equal(300::i16, 200::i16) = true::bool 7 | not_equal(-2147483648::i32, -2147483648::i32) = false::bool 8 | not_equal(9223372036854775807::i64, 9223372036854775804::i64) = true::bool 9 | not_equal(9223372036854775807::dec<38, 0>, 9223372036854775804::dec<38, 0>) = true::bool 10 | not_equal(9223372036854775804::dec<38, 0>, 9223372036854775804::dec<38, 0>) = false::bool 11 | not_equal(inf::fp64, inf::fp64) = false::bool 12 | not_equal(inf::fp64, 1.5e+308::fp64) = true::bool 13 | not_equal(inf::fp64, -inf::fp64) = true::bool 14 | 15 | # null_input: Examples with null as input 16 | not_equal(null::dec<38, 2>, 2.50::dec<38, 2>) = null::bool 17 | not_equal(null::dec<38, 2>, null::dec<38, 2>) = null::bool 18 | not_equal(null::i16, 1::i16) = null::bool 19 | not_equal(null::i16, null::i16) = null::bool 20 | -------------------------------------------------------------------------------- /proto/substrait/capabilities.proto: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | syntax = "proto3"; 3 | 4 | package substrait; 5 | 6 | option csharp_namespace = "Substrait.Protobuf"; 7 | option go_package = "github.com/substrait-io/substrait-protobuf/go/substraitpb"; 8 | option java_multiple_files = true; 9 | option java_package = "io.substrait.proto"; 10 | 11 | // Defines a set of Capabilities that a system (producer or consumer) supports. 12 | message Capabilities { 13 | // List of Substrait versions this system supports 14 | repeated string substrait_versions = 1; 15 | 16 | // list of com.google.Any message types this system supports for advanced 17 | // extensions. 18 | repeated string advanced_extension_type_urls = 2; 19 | 20 | // list of simple extensions this system supports. 21 | repeated SimpleExtension simple_extensions = 3; 22 | 23 | message SimpleExtension { 24 | string uri = 1; 25 | repeated string function_keys = 2; 26 | repeated string type_keys = 3; 27 | repeated string type_variation_keys = 4; 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /site/examples/proto-textformat/field_reference/lambda_param_nested_struct.textproto: -------------------------------------------------------------------------------- 1 | # Access nested field within a struct parameter 2 | # Example: lambda parameter is a Person struct with fields: 3 | # field 0: name (string) 4 | # field 1: age (i32) 5 | # field 2: address (struct with fields:) 6 | # field 0: street (string) 7 | # field 1: city (string) 8 | # field 2: zip (string) 9 | # 10 | # This demonstrates accessing: person.address.city 11 | # Equivalent to: lambda_parameter[0].struct_field[2].struct_field[1] 12 | # 13 | # message Expression.FieldReference 14 | 15 | lambda_parameter_reference: { 16 | steps_out: 0 # Current lambda's parameters 17 | } 18 | direct_reference: { 19 | struct_field: { 20 | field: 0 # First parameter (person) 21 | child: { 22 | struct_field: { 23 | field: 2 # Third field of person (address) 24 | child: { 25 | struct_field: { 26 | field: 1 # Second field of address (city) 27 | } 28 | } 29 | } 30 | } 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /tests/cases/comparison/gte.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_comparison.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | gte(1::i8, 2::i8) = false::bool 6 | gte(2::i8, 2::i8) = true::bool 7 | gte(200::i16, 199::i16) = true::bool 8 | gte(2000000000::i32, 1000000000::i32) = true::bool 9 | gte(-922337203685775808::i64, -922337203685775807::i64) = false::bool 10 | gte(7.25::fp32, 2.50::fp32) = true::bool 11 | gte(7.25::fp32, 7.25::fp32) = true::bool 12 | gte(7.25::dec<38, 2>, 7.25::dec<38, 2>) = true::bool 13 | gte(7.25::dec<38, 2>, 7.27::dec<38, 2>) = false::bool 14 | gte(inf::fp64, 1.5e+308::fp64) = true::bool 15 | gte(inf::fp64, inf::fp64) = true::bool 16 | gte(-inf::fp64, -1.5e+308::fp64) = false::bool 17 | 18 | # null_input: Examples with null as input 19 | gte(null::dec<38, 2>, 7.25::dec<38, 2>) = null::bool 20 | gte(null::dec<38, 2>, null::dec<38, 2>) = null::bool 21 | gte(null::i16, 1::i16) = null::bool 22 | gte(2::i16, null::i16) = null::bool 23 | gte(null::i16, null::i16) = null::bool 24 | -------------------------------------------------------------------------------- /tests/cases/comparison/lte.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_comparison.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | lte(1::i8, 2::i8) = true::bool 6 | lte(2::i8, 2::i8) = true::bool 7 | lte(200::i16, 199::i16) = false::bool 8 | lte(2000000000::i32, 1000000000::i32) = false::bool 9 | lte(-922337203685775808::i64, -922337203685775807::i64) = true::bool 10 | lte(7.00::fp32, 2.50::fp32) = false::bool 11 | lte(7.25::fp32, 7.25::fp32) = true::bool 12 | lte(7.25::dec<38, 2>, 7.25::dec<38, 2>) = true::bool 13 | lte(2.59::dec<38, 2>, 2.50::dec<38, 2>) = false::bool 14 | lte(1.5e+308::fp64, inf::fp64) = true::bool 15 | lte(inf::fp64, inf::fp64) = true::bool 16 | lte(-1.5e+308::fp64, -inf::fp64) = false::bool 17 | 18 | # null_input: Examples with null as input 19 | lte(null::dec<38, 2>, 2.50::dec<38, 2>) = null::bool 20 | lte(null::dec<38, 2>, null::dec<38, 2>) = null::bool 21 | lte(null::i16, 1::i16) = null::bool 22 | lte(2::i16, null::i16) = null::bool 23 | lte(null::i16, null::i16) = null::bool 24 | -------------------------------------------------------------------------------- /grammar/Makefile: -------------------------------------------------------------------------------- 1 | TYPE_GRAMMAR=SubstraitLexer.g4 SubstraitType.g4 2 | TYPE_OUTPUT_DIR=../tests/type/antlr_parser 3 | TESTCASE_GRAMMAR=FuncTestCaseLexer.g4 FuncTestCaseParser.g4 4 | TESTCASE_OUTPUT_DIR=../tests/coverage/antlr_parser 5 | 6 | all: generate_testcase_parser generate_type_parser 7 | 8 | generate_testcase_parser: 9 | @echo "\nGenerating Test Case Parser" 10 | antlr -visitor -Dlanguage=Python3 -o $(TESTCASE_OUTPUT_DIR) $(TESTCASE_GRAMMAR) 11 | rm -rf $(TESTCASE_OUTPUT_DIR)/*.tokens $(TESTCASE_OUTPUT_DIR)/*.interp 12 | ./prepend_license.sh $(TESTCASE_OUTPUT_DIR) 13 | 14 | generate_type_parser: 15 | @echo "\nGenerating Substrait Type" 16 | antlr -visitor -Dlanguage=Python3 -o $(TYPE_OUTPUT_DIR) $(TYPE_GRAMMAR) 17 | rm -rf $(TYPE_OUTPUT_DIR)/*.tokens $(TYPE_OUTPUT_DIR)/*.interp 18 | ./prepend_license.sh $(TYPE_OUTPUT_DIR) 19 | 20 | clean: 21 | rm -rf $(TYPE_OUTPUT_DIR)/*.py $(TYPE_OUTPUT_DIR)/*.tokens $(TYPE_OUTPUT_DIR)/*.interp 22 | rm -rf $(TESTCASE_OUTPUT_DIR)/*.py $(TESTCASE_OUTPUT_DIR)/*.tokens $(TESTCASE_OUTPUT_DIR)/*.interp 23 | rm -rf ./*.tokens 24 | -------------------------------------------------------------------------------- /.releaserc.json: -------------------------------------------------------------------------------- 1 | { 2 | "branches": ["main"], 3 | "preset": "conventionalcommits", 4 | "plugins": [ 5 | [ 6 | "@semantic-release/commit-analyzer", 7 | { 8 | "releaseRules": [ 9 | {"breaking": true, "release": "minor"} 10 | ] 11 | } 12 | ], 13 | "@semantic-release/release-notes-generator", 14 | [ 15 | "@semantic-release/changelog", 16 | { 17 | "changelogTitle": "Release Notes\n---", 18 | "changelogFile": "CHANGELOG.md" 19 | } 20 | ], 21 | [ 22 | "@semantic-release/exec", 23 | { 24 | "verifyConditionsCmd": "ci/release/verify.sh", 25 | "prepareCmd": "ci/release/prepare.sh", 26 | "publishCmd": "ci/release/publish.sh ${nextRelease.version}" 27 | } 28 | ], 29 | [ 30 | "@semantic-release/github", 31 | { 32 | "successComment": false 33 | } 34 | ], 35 | [ 36 | "@semantic-release/git", 37 | { 38 | "assets": ["CHANGELOG.md"], 39 | "message": "chore(release): ${nextRelease.version}" 40 | } 41 | ] 42 | ] 43 | } 44 | -------------------------------------------------------------------------------- /site/examples/proto-textformat/lambda/simple_multiply.textproto: -------------------------------------------------------------------------------- 1 | # Represents: `(x: i32) -> x * 2` 2 | # 3 | # message Expression.Lambda 4 | 5 | parameters: { 6 | types: [ 7 | { 8 | i32: { 9 | nullability: NULLABILITY_REQUIRED 10 | } 11 | } 12 | ] 13 | } 14 | 15 | body: { 16 | scalar_function: { 17 | function_reference: 1 # Reference to multiply function 18 | arguments: [ 19 | { 20 | # First argument: lambda parameter x 21 | value: { 22 | selection: { 23 | lambda_parameter_reference: { 24 | steps_out: 0 # 0 = current lambda 25 | } 26 | direct_reference: { 27 | struct_field: { 28 | field: 0 # 0 = first parameter (x) 29 | } 30 | } 31 | } 32 | } 33 | }, 34 | { 35 | value: { 36 | literal: { 37 | i32: 2 38 | } 39 | } 40 | } 41 | ] 42 | 43 | output_type: { 44 | i32: { 45 | nullability: NULLABILITY_REQUIRED 46 | } 47 | } 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /tests/cases/arithmetic/subtract.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | subtract(120::i8, 5::i8) = 115::i8 6 | subtract(-100::i16, 100::i16) = -200::i16 7 | subtract(-30000::i32, 30000::i32) = -60000::i32 8 | subtract(-2000000000::i64, 2000000000::i64) = -4000000000::i64 9 | 10 | # overflow: Examples demonstrating overflow behavior 11 | subtract(-120::i8, 10::i8) [overflow:ERROR] = 12 | subtract(-30000::i16, 30000::i16) [overflow:ERROR] = 13 | subtract(-2000000000::i32, 2000000000::i32) [overflow:ERROR] = 14 | subtract(-9223372036854775808::i64, 1::i64) [overflow:ERROR] = 15 | subtract(-120::i8, 10::i8) [overflow:SATURATE] = -128::i8 16 | subtract(120::i8, -10::i8) [overflow:SATURATE] = 127::i8 17 | subtract(-120::i8, 10::i8) [overflow:SILENT] = 18 | 19 | # floating_exception: Examples demonstrating exceptional floating point cases 20 | subtract(-1.5e+308::fp64, 1.5e+308::fp64) = -inf::fp64 21 | subtract(1.5e+308::fp64, -1.5e+308::fp64) = inf::fp64 22 | -------------------------------------------------------------------------------- /tests/cases/arithmetic/sum.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_AGGREGATE_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | sum((0, -1, 2, 20)::i8) = 21::i64 6 | sum((2000000, -3217908, 629000, -100000, 0, 987654)::i32) = 298746::i64 7 | sum((2.5, 0, 5.0, -2.5, -7.5)::fp32) = -2.5::fp64 8 | sum((2.5000007152557373046875, 7.0000007152557373046875, 0, 7.0000007152557373046875)::fp64) = 16.500002145767212::fp64 9 | 10 | # overflow: Examples demonstrating overflow behavior 11 | sum((9223372036854775806, 1, 1, 1, 1, 10000000000)::i64) [overflow:ERROR] = 12 | 13 | # floating_exception: Examples demonstrating exceptional floating point cases 14 | sum((1.5e+308, 1.5e+308, 1.5e+308)::fp64) = inf::fp64 15 | sum((-1.5e+308, -1.5e+308, -1.5e+308)::fp64) = -inf::fp64 16 | sum((2.500000715, inf, 2.500000715)::fp64) = inf::fp64 17 | sum((2.5000007, -inf, 2.5000007, 10.0)::fp64) = -inf::fp64 18 | 19 | # null_handling: Examples with null as input or output 20 | sum((Null, Null, Null)::i16) = Null::i64 21 | sum(()::i16) = Null::i64 22 | sum((200000, Null, 629000, -10000, 0, 987621)::i32) = 1806621::i64 23 | -------------------------------------------------------------------------------- /tests/cases/arithmetic_decimal/power_decimal.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic_decimal.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | power(8::dec, 2::dec<38, 0>) = 64::fp64 6 | power(1.0::dec, -1.0::dec<38, 0>) = 1.0::fp64 7 | power(2.0::dec<38, 0>, -2.0::dec<38, 0>) = 0.25::fp64 8 | power(13::dec<38, 0>, 10::dec<38, 0>) = 137858491849::fp64 9 | 10 | # result_more_than_input_precision: Examples demonstrating result with more precision than input 11 | power(16::dec<2, 0>, 4::dec<38, 0>) = 65536::fp64 12 | 13 | # floating_exception: Examples demonstrating exceptional floating point cases 14 | power(1.5e+10::dec<38, 0>, 1.5e+20::dec<38, 0>) = inf::fp64 15 | power(-16::dec<4, 0>, 1001::dec<4, 0>) = -inf::fp64 16 | 17 | # complex_number: Examples demonstrating complex number output 18 | power(-1::dec, 0.5::dec<38, 1>) [complex_number_result:NAN] = nan::fp64 19 | power(-1::dec, 0.5::dec<38, 1>) [complex_number_result:ERROR] = 20 | 21 | # null_values: test with null values 22 | power(null::dec<38, 0>, 127::dec<38, 0>) = null::fp64 23 | power(null::dec<38, 0>, null::dec<38, 0>) = null::fp64 24 | -------------------------------------------------------------------------------- /tests/cases/string/substring.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_string.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | substring('abcdefg'::str, 1::i32, 5::i32) = 'abcde'::str 6 | substring('abcdefg'::str, 1::i32, 5::i32) = 'abcde'::str 7 | 8 | # start_greater_than_length: Example where start argument greater than the length of the string 9 | substring('abcdefg'::str, 10::i32, 2::i32) = ''::str 10 | substring('abcdefg'::str, 10::i32, 2::i32) = ''::str 11 | 12 | # multi_byte_characters: Example where multi byte characters exist in the string 13 | substring('😊a😊b😊😊'::str, 1::i32, 3::i32) = '😊a😊'::str 14 | substring('😊a😊b😊😊'::str, 1::i32, 3::i32) = '😊a😊'::str 15 | 16 | # negative_start: Example where start argument is a negative integer 17 | substring('abcdefg'::str, -1::i32, 2::i32) [negative_start:WRAP_FROM_END] = 'g'::str 18 | substring('abcdefg'::str, -2::i32, 1::i32) [negative_start:WRAP_FROM_END] = 'f'::str 19 | substring('abcdefg'::str, -1::i32, 2::i32) [negative_start:LEFT_OF_BEGINNING] = ''::str 20 | substring('abcdefg'::str, -1::i32, 3::i32) [negative_start:LEFT_OF_BEGINNING] = 'a'::str 21 | -------------------------------------------------------------------------------- /tests/cases/arithmetic_decimal/sqrt_decimal.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic_decimal.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | sqrt(25::dec<2, 0>) = 5::fp64 6 | sqrt(0::dec<1, 0>) = 0::fp64 7 | 8 | # max_input: max allowed input returns correct result 9 | sqrt(99999999999999999999999999999999999999::dec<38, 0>) = 1e+19::fp64 10 | 11 | # real_number: real number as input 12 | sqrt(6.25::dec<3, 2>) = 2.5::fp64 13 | sqrt(2.0000007152557373046875::dec<23, 22>) = 1.4142138152541635::fp64 14 | 15 | # verify_real_number: verify real number operation are different and doesnt behave as nearby int 16 | sqrt(9::dec<1, 0>) = 3::fp64 17 | sqrt(8.3::dec<2, 1>) = 2.8809720581775866::fp64 18 | sqrt(8.5::dec<2, 1>) = 2.9154759474226504::fp64 19 | sqrt(8.7::dec<2, 1>) = 2.949576240750525::fp64 20 | sqrt(9.2::dec<2, 1>) = 3.03315017762062::fp64 21 | 22 | # negative_input: negative input returns error 23 | sqrt(-9223372036854775800::dec<19, 0>) = 24 | sqrt(-2.5::dec<2, 1>) = 25 | 26 | # null_values: test with null values 27 | sqrt(null::dec<38, 0>) = null::fp64 28 | sqrt(null::dec<1, 0>) = null::fp64 29 | -------------------------------------------------------------------------------- /tests/cases/datetime/gt_datetime.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_datetime.yaml' 3 | 4 | # timestamps: examples using the timestamp type 5 | gt('2016-12-31T13:30:15'::ts, '2017-12-31T13:30:15'::ts) = false::bool 6 | gt('2018-12-31T13:30:15'::ts, '2017-12-31T13:30:15'::ts) = true::bool 7 | 8 | # timestamp_tz: examples using the timestamp_tz type 9 | gt('1999-01-08T01:05:05-08:00'::tstz, '1999-01-08T04:05:06-05:00'::tstz) = false::bool 10 | gt('1999-01-08T01:05:07-08:00'::tstz, '1999-01-08T04:05:06-05:00'::tstz) = true::bool 11 | 12 | # date: examples using the date type 13 | gt('2020-12-30'::date, '2020-12-31'::date) = false::bool 14 | gt('2020-12-31'::date, '2020-12-30'::date) = true::bool 15 | 16 | # interval: examples using the interval type 17 | gt('P7D'::iday, 'P6D'::iday) = true::bool 18 | gt('P5D'::iday, 'P6D'::iday) = false::bool 19 | gt('P5Y'::iyear, 'P6Y'::iyear) = false::bool 20 | gt('P7Y'::iyear, 'P6Y'::iyear) = true::bool 21 | 22 | # null_input: examples with null args 23 | gt(null::iday, 'P5D'::iday) = null::bool 24 | gt(null::date, '2020-12-30'::date) = null::bool 25 | gt(null::ts, '2018-12-31T13:30:15'::ts) = null::bool 26 | -------------------------------------------------------------------------------- /tests/cases/datetime/lt_datetime.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_datetime.yaml' 3 | 4 | # timestamps: examples using the timestamp type 5 | lt('2016-12-31T13:30:15'::ts, '2017-12-31T13:30:15'::ts) = true::bool 6 | lt('2018-12-31T13:30:15'::ts, '2017-12-31T13:30:15'::ts) = false::bool 7 | 8 | # timestamp_tz: examples using the timestamp_tz type 9 | lt('1999-01-08T01:05:05-08:00'::tstz, '1999-01-08T04:05:06-05:00'::tstz) = true::bool 10 | lt('1999-01-08T01:05:06-08:00'::tstz, '1999-01-08T04:05:06-05:00'::tstz) = false::bool 11 | 12 | # date: examples using the date type 13 | lt('2020-12-30'::date, '2020-12-31'::date) = true::bool 14 | lt('2020-12-31'::date, '2020-12-30'::date) = false::bool 15 | 16 | # interval: examples using the interval type 17 | lt('P7D'::iday, 'P6D'::iday) = false::bool 18 | lt('P5D'::iday, 'P6D'::iday) = true::bool 19 | lt('P5Y'::iyear, 'P6Y'::iyear) = true::bool 20 | lt('P7Y'::iyear, 'P6Y'::iyear) = false::bool 21 | 22 | # null_input: examples with null args or return 23 | lt(null::iday, 'P5D'::iday) = null::bool 24 | lt(null::date, '2020-12-30'::date) = null::bool 25 | lt(null::ts, '2018-12-31T13:30:15'::ts) = null::bool 26 | -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | Thank you for submitting a PR! 2 | 3 | Before you continue, please ensure that your PR title and description (this message!) follow [conventional commit syntax](1). Substrait uses an automated release process that, among other things, uses PR titles & descriptions to build a changelog, so the syntax and format matter! 4 | 5 | The title of the PR should be a valid commit header. 6 | 7 | Some examples of proper commit message headers and PR titles: 8 | 9 | - `feat: add feature X` 10 | - `fix: X in case of Y` 11 | - `docs: improve documentation for X` 12 | 13 | Note the case and grammar conventions. 14 | 15 | Furthermore, the description of any PR that includes a breaking change should contain a paragraph that starts with `BREAKING CHANGE: ...`, where `...` explains what changed. The automated release process uses this to determine how it should bump the version number. Anything that changes the behavior of a plan that was previously legal is considered a breaking change; note that this includes behavior specifications that only exist in Substrait in the form of behavior descriptions on the website or in comments. 16 | 17 | [1]: https://www.conventionalcommits.org/en/v1.0.0/ 18 | -------------------------------------------------------------------------------- /.github/workflows/pr_breaking.yml: -------------------------------------------------------------------------------- 1 | name: Breaking Changes Check 2 | 3 | on: 4 | pull_request: 5 | types: [opened, edited, synchronize, reopened] 6 | jobs: 7 | breaking: 8 | name: Ensure breaking changes are labeled in description 9 | runs-on: ubuntu-latest 10 | steps: 11 | - uses: actions/checkout@v6 12 | - uses: bufbuild/buf-setup-action@v1.50.0 13 | with: 14 | github_token: ${{ github.token }} 15 | - name: check for breaking changes 16 | id: check-breaking 17 | run: | 18 | 19 | if ! buf breaking --against 'https://github.com/substrait-io/substrait.git#branch=main'; then 20 | breaking="true" 21 | else 22 | breaking="false" 23 | fi 24 | 25 | echo "breaking=${breaking}" >> $GITHUB_OUTPUT 26 | - name: check whether the PR description includes a breaking change footer 27 | if: ${{ fromJson(steps.check-breaking.outputs.breaking) }} 28 | run: | 29 | # check PR description for a BREAKING CHANGE section if any breaking changes occurred 30 | grep '^BREAKING CHANGE: ' <<< $COMMIT_DESC 31 | env: 32 | COMMIT_DESC: ${{ github.event.pull_request.body }} 33 | -------------------------------------------------------------------------------- /tests/cases/arithmetic/multiply.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | multiply(25::i8, 5::i8) = 125::i8 6 | multiply(2::i16, -100::i16) = -200::i16 7 | multiply(300::i32, 200::i32) = 60000::i32 8 | multiply(80000::i64, -5000::i64) = -400000000::i64 9 | 10 | # overflow: Examples demonstrating overflow behavior 11 | multiply(13::i8, 10::i8) [overflow:ERROR] = 12 | multiply(11::i16, 3000::i16) [overflow:ERROR] = 13 | multiply(3::i32, 1000000000::i32) [overflow:ERROR] = 14 | multiply(1000000000000000000::i64, 10::i64) [overflow:ERROR] = 15 | multiply(13::i8, 10::i8) [overflow:SATURATE] = 127::i8 16 | multiply(-13::i8, -10::i8) [overflow:SATURATE] = -128::i8 17 | multiply(13::i8, 10::i8) [overflow:SILENT] = 18 | 19 | # floating_exception: Examples demonstrating exceptional floating point cases 20 | multiply(1.5e+100::fp64, 1.5e+208::fp64) = inf::fp64 21 | multiply(1.5e+100::fp64, -1.5e+208::fp64) = -inf::fp64 22 | 23 | # types: Examples demonstrating behavior of different data types 24 | multiply(4.5::fp64, 2.5000007152557373046875::fp64) = 11.250003218650818::fp64 25 | -------------------------------------------------------------------------------- /tests/cases/arithmetic_decimal/min_decimal.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_AGGREGATE_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic_decimal.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | min((20, -3, 1, -10, 0, 5)::dec<2, 0>) = -10::dec<2, 0> 6 | min((-32768, 32767, 20000, -30000)::dec<5, 0>) = -32768::dec<5, 0> 7 | min((-214748648, 214748647, 21470048, 4000000)::dec<9, 0>) = -214748648::dec<9, 0> 8 | min((2000000000, -3217908979, 629000000, -100000000, 0, 987654321)::dec<10, 0>) = -3217908979::dec<10, 0> 9 | min((2.5, 0, 5.0, -2.5, -7.5)::dec<2, 1>) = -7.5::dec<2, 1> 10 | min((99999999999999999999999999999999999999, -99999999999999999999999999999999999998, -99999999999999999999999999999999999997, 0, 1111)::dec<38, 0>) = -99999999999999999999999999999999999998::dec<38, 0> 11 | 12 | # null_handling: Examples with null as input or output 13 | min((Null, Null, Null)::dec<1, 0>) = Null::dec<1, 0> 14 | min(()::dec<1, 0>) = Null::dec<1, 0> 15 | min((2000000000, Null, 629000000, -100000000, Null, 987654321)::dec<10, 0>) = -100000000::dec<10, 0> 16 | min((-99999999999999999999999999999999999998, Null, 99999999999999999999999999999999999999, Null)::dec<38, 0>) = -99999999999999999999999999999999999998::dec<38, 0> 17 | -------------------------------------------------------------------------------- /.github/workflows/site.yml: -------------------------------------------------------------------------------- 1 | name: Site 2 | 3 | on: 4 | push: 5 | branches: [main] 6 | paths: 7 | - "site/**" 8 | - "extensions/**" 9 | - "text/**" 10 | 11 | jobs: 12 | site: 13 | name: Build & Deploy Website 14 | runs-on: ubuntu-latest 15 | if: ${{ github.repository == 'substrait-io/substrait' }} 16 | steps: 17 | - uses: actions/checkout@v6 18 | - uses: actions/setup-python@v6 19 | with: 20 | python-version: '3.13' 21 | cache: 'pip' 22 | cache-dependency-path: ./site/requirements.txt 23 | - run: pip install -r ./site/requirements.txt 24 | - name: Copy schema file for website 25 | run: | 26 | mkdir -p ./site/docs/schemas 27 | cp ./text/simple_extensions_schema.yaml ./site/docs/schemas/simple_extensions 28 | - name: Generate Static Site 29 | run: mkdocs build 30 | working-directory: ./site 31 | - name: Deploy Static Site to GitHub 32 | uses: peaceiris/actions-gh-pages@v4 33 | with: 34 | external_repository: substrait-io/substrait.io 35 | publish_branch: main 36 | deploy_key: ${{ secrets.SUBSTRAIT_SITE_DEPLOY_KEY }} 37 | publish_dir: ./site/site 38 | cname: substrait.io 39 | -------------------------------------------------------------------------------- /site/docs/spec/technology_principles.md: -------------------------------------------------------------------------------- 1 | # Technology Principles 2 | 3 | * Provide a good suite of well-specified common functionality in databases and data science applications. 4 | * Make it easy for users to privately or publicly extend the representation to support specialized/custom operations. 5 | * Produce something that is language agnostic and requires minimal work to start developing against in a new language. 6 | * Drive towards a common format that avoids specialization for single favorite producer or consumer. 7 | * Establish clear delineation between specifications that MUST be respected to and those that can be optionally ignored. 8 | * Establish a forgiving compatibility approach and versioning scheme that supports cross-version compatibility in maximum number of cases. 9 | * Minimize the need for consumer intelligence by excluding concepts like overloading, type coercion, implicit casting, field name handling, etc. (Note: this is weak and should be better stated.) 10 | * Decomposability/severability: A particular producer or consumer should be able to produce or consume only a subset of the specification and interact well with any other Substrait system as long the specific operations requested fit within the subset of specification supported by the counter system. 11 | 12 | 13 | -------------------------------------------------------------------------------- /tests/cases/comparison/between.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_comparison.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | between(5::i8, 0::i8, 127::i8) = true::bool 6 | between(20000::i16, 1::i16, 30000::i16) = true::bool 7 | between(1030000000::i32, 1000000000::i32, 2000000000::i32) = true::bool 8 | between(10300000000900::i64, 1000000000::i64, 9223372036854775807::i64) = true::bool 9 | between(2::i8, 1::i8, -120::i8) = false::bool 10 | between(2::i8, 2::i8, 3::i8) = true::bool 11 | between(2::i8, 1::i8, 2::i8) = true::bool 12 | between(-10000::i16, -20000::i16, -30000::i16) = false::bool 13 | between(-100000000::i32, -1000000000::i32, -2000000000::i32) = false::bool 14 | between(92233720368547758::i64, 1::i64, -9223372036854775807::i64) = false::bool 15 | between(14.01::fp32, 20.90::fp32, 88.00::fp32) = false::bool 16 | between(14.011::fp64, 0.00::fp64, inf::fp64) = true::bool 17 | between(inf::fp64, 0.00::fp64, 100.09::fp64) = false::bool 18 | between(-100.0011::fp64, -inf::fp64, 0.00::fp64) = true::bool 19 | 20 | # null_input: Examples with null as input 21 | between(null::i8, 1::i8, 10::i8) = null::bool 22 | between(1::i64, null::i64, 10::i64) = null::bool 23 | between(1::i64, 1::i64, null::i64) = null::bool 24 | -------------------------------------------------------------------------------- /tests/cases/arithmetic_decimal/max_decimal.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_AGGREGATE_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic_decimal.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | max((20, -3, 1, -10, 0, 5)::dec<2, 0>) = 20::dec<2, 0> 6 | max((-32768, 32767, 20000, -30000)::dec<5, 0>) = 32767::dec<5, 0> 7 | max((-214748648, 214748647, 21470048, 4000000)::dec<9, 0>) = 214748647::dec<9, 0> 8 | max((2000000000, -3217908979, 629000000, -100000000, 0, 987654321)::dec<10, 0>) = 2000000000::dec<10, 0> 9 | max((2.5, 0, 5.0, -2.5, -7.5)::dec<2, 1>) = 5.0::dec<2, 1> 10 | max((99999999999999999999999999999999999999, 0, -99999999999999999999999999999999999998, 111111111, -76)::dec<38, 0>) = 99999999999999999999999999999999999999::dec<38, 0> 11 | 12 | # null_handling: Examples with null as input or output 13 | max((Null, Null, Null)::dec<1, 0>) = null::dec<1, 0> 14 | max(()::dec<1, 0>) = null::dec<1, 0> 15 | max((2000000000, Null, 629000000, -100000000, Null, 987654321)::dec<10, 0>) = 2000000000::dec<10, 0> 16 | max((Null, Null)::dec<1, 0>) = null::dec<1, 0> 17 | max(()::dec<1, 0>) = null::dec<1, 0> 18 | max((99999999999999999999999999999999999999, -99999999999999999999999999999999999998, Null, 11111111111111111111111111111111111111, Null)::dec<38, 0>) = 99999999999999999999999999999999999999::dec<38, 0> 19 | -------------------------------------------------------------------------------- /site/data/committers.yaml: -------------------------------------------------------------------------------- 1 | - Name: Jeroen van Straten 2 | Association: Qblox 3 | - Name: Carlo Curino 4 | Association: Microsoft 5 | - Name: James Taylor 6 | Association: Sundeck 7 | - Name: Sutou Kouhei 8 | Association: Clearcode 9 | - Name: Micah Kornfeld 10 | Association: Google 11 | - Name: Jinfeng Ni 12 | Association: Sundeck 13 | - Name: Andy Grove 14 | Association: Nvidia 15 | - Name: Jesus Camacho Rodriguez 16 | Association: Microsoft 17 | - Name: Rich Tia 18 | Association: Voltron Data 19 | - Name: Vibhatha Abeykoon 20 | Association: Voltron Data 21 | - Name: Nic Crane 22 | Association: Recast 23 | - Name: Gil Forsyth 24 | Association: Voltron Data 25 | - Name: ChaoJun Zhang 26 | Association: Intel 27 | - Name: Matthijs Brobbel 28 | Association: Voltron Data 29 | - Name: Matt Topol 30 | Association: Voltron Data 31 | - Name: Ingo Müller 32 | Association: Google 33 | - Name: Arttu Voutilainen 34 | Association: Palantir Technologies 35 | - Name: Bruno Volpato 36 | Association: Datadog 37 | - Name: Anshul Data 38 | Association: Sundeck 39 | - Name: Chandra Sanapala 40 | Association: Sundeck 41 | - Name: Andrew Coleman 42 | Association: IBM 43 | - Name: Mark Lewis 44 | Association: IBM 45 | - Name: Niels Pardon 46 | Association: IBM 47 | - Name: Ben Bellick 48 | Association: Datadog 49 | -------------------------------------------------------------------------------- /extensions/functions_aggregate_generic.yaml: -------------------------------------------------------------------------------- 1 | %YAML 1.2 2 | --- 3 | urn: extension:io.substrait:functions_aggregate_generic 4 | aggregate_functions: 5 | - name: "count" 6 | description: Count a set of values 7 | impls: 8 | - args: 9 | - name: x 10 | value: any 11 | options: 12 | overflow: 13 | values: [SILENT, SATURATE, ERROR] 14 | nullability: DECLARED_OUTPUT 15 | decomposable: MANY 16 | intermediate: i64 17 | return: i64 18 | - name: "count" 19 | description: "Count a set of records (not field referenced)" 20 | impls: 21 | - options: 22 | overflow: 23 | values: [SILENT, SATURATE, ERROR] 24 | nullability: DECLARED_OUTPUT 25 | decomposable: MANY 26 | intermediate: i64 27 | return: i64 28 | - name: "any_value" 29 | description: > 30 | Selects an arbitrary value from a group of values. 31 | 32 | If the input is empty, the function returns null. 33 | impls: 34 | - args: 35 | - name: x 36 | value: any1 37 | options: 38 | ignore_nulls: 39 | values: [ "TRUE", "FALSE" ] 40 | nullability: DECLARED_OUTPUT 41 | decomposable: MANY 42 | intermediate: any1? 43 | return: any1? 44 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Release 2 | 3 | on: 4 | schedule: 5 | # 2 AM on Sunday 6 | - cron: "0 2 * * 0" 7 | workflow_dispatch: 8 | 9 | # we do not want more than one release workflow executing at the same time, ever 10 | concurrency: 11 | group: release 12 | # cancelling in the middle of a release would create incomplete releases 13 | # so cancel-in-progress is false 14 | cancel-in-progress: false 15 | 16 | jobs: 17 | release: 18 | runs-on: ubuntu-latest 19 | if: github.repository == 'substrait-io/substrait' 20 | steps: 21 | - uses: tibdex/github-app-token@v2 22 | id: generate-token 23 | with: 24 | app_id: ${{ secrets.APP_ID }} 25 | private_key: ${{ secrets.APP_PRIVATE_KEY }} 26 | 27 | - uses: actions/checkout@v6 28 | with: 29 | fetch-depth: 0 30 | token: ${{ steps.generate-token.outputs.token }} 31 | 32 | - uses: actions/setup-node@v6 33 | with: 34 | node-version: "20" 35 | 36 | - uses: bufbuild/buf-setup-action@v1.50.0 37 | with: 38 | github_token: ${{ github.token }} 39 | 40 | - name: run semantic-release 41 | run: ./ci/release/run.sh 42 | env: 43 | BUF_TOKEN: ${{ secrets.BUF_TOKEN }} 44 | GITHUB_TOKEN: ${{ steps.generate-token.outputs.token }} 45 | -------------------------------------------------------------------------------- /ci/release/dry_run.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # shellcheck shell=bash 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | set -euo pipefail 6 | 7 | curdir="$PWD" 8 | worktree="$(mktemp -d)" 9 | branch="$(basename "$worktree")" 10 | 11 | git worktree add "$worktree" 12 | 13 | function cleanup() { 14 | cd "$curdir" || exit 1 15 | git worktree remove "$worktree" 16 | git worktree prune 17 | git branch -D "$branch" 18 | } 19 | 20 | trap cleanup EXIT ERR 21 | 22 | cd "$worktree" || exit 1 23 | 24 | export GITHUB_REF="$branch" 25 | 26 | npx --yes \ 27 | -p "semantic-release@24.1.2" \ 28 | -p "@semantic-release/commit-analyzer" \ 29 | -p "@semantic-release/release-notes-generator" \ 30 | -p "@semantic-release/changelog" \ 31 | -p "@semantic-release/exec" \ 32 | -p "@semantic-release/git" \ 33 | -p "conventional-changelog-conventionalcommits@8.0.0" \ 34 | semantic-release \ 35 | --ci false \ 36 | --dry-run \ 37 | --preset conventionalcommits \ 38 | --plugins \ 39 | --analyze-commits "@semantic-release/commit-analyzer" \ 40 | --generate-notes "@semantic-release/release-notes-generator" \ 41 | --verify-conditions "@semantic-release/changelog,@semantic-release/exec,@semantic-release/git" \ 42 | --prepare "@semantic-release/changelog,@semantic-release/exec" \ 43 | --branches "$branch" \ 44 | --repository-url "file://$PWD" 45 | -------------------------------------------------------------------------------- /tests/cases/arithmetic/add.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | add(120::i8, 5::i8) = 125::i8 6 | add(100::i16, 100::i16) = 200::i16 7 | add(30000::i32, 30000::i32) = 60000::i32 8 | add(2000000000::i64, 2000000000::i64) = 4000000000::i64 9 | 10 | # overflow: Examples demonstrating overflow behavior 11 | add(120::i8, 10::i8) [overflow:ERROR] = 12 | add(30000::i16, 30000::i16) [overflow:ERROR] = 13 | add(2000000000::i32, 2000000000::i32) [overflow:ERROR] = 14 | add(9223372036854775807::i64, 1::i64) [overflow:ERROR] = 15 | add(120::i8, 10::i8) [overflow:SATURATE] = 127::i8 16 | add(-120::i8, -10::i8) [overflow:SATURATE] = -128::i8 17 | add(120::i8, 10::i8) [overflow:SILENT] = 18 | 19 | # floating_exception: Examples demonstrating exceptional floating point cases 20 | add(1.5e+308::fp64, 1.5e+308::fp64) = inf::fp64 21 | add(-1.5e+308::fp64, -1.5e+308::fp64) = -inf::fp64 22 | 23 | # rounding: Examples demonstrating floating point rounding behavior 24 | add(4.5::fp32, 2.5000007152557373046875::fp32) [rounding:TIE_TO_EVEN] = 7.00000095367431640625::fp32 25 | 26 | # types: Examples demonstrating behavior of different data types 27 | add(4.5::fp64, 2.5000007152557373046875::fp64) = 7.0000007152557373046875::fp64 28 | -------------------------------------------------------------------------------- /site/docs/relations/common_fields.md: -------------------------------------------------------------------------------- 1 | # Common Fields 2 | 3 | Every relation contains a common section containing optional hints and emit behavior. 4 | 5 | 6 | ## Emit 7 | 8 | A relation which has a direct emit kind outputs the relation's output without reordering or selection. A relation that specifies an emit output mapping can output its output columns in any order and may leave output columns out. 9 | 10 | ???+ info "Relation Output" 11 | 12 | * Many relations (such as Project) by default provide as their output the list of all their input columns plus any generated columns as its output columns. Review each relation to understand its specific output default. 13 | 14 | 15 | ## Hints 16 | 17 | Hints provide information that can improve performance but cannot be used to control the behavior. Table statistics, runtime constraints, name hints, and saved computations all fall into this category. 18 | 19 | ???+ info "Hint Design" 20 | 21 | * If a hint is not present or has incorrect data the consumer should be able to ignore it and still arrive at the correct result. 22 | 23 | 24 | ### Saved Computations 25 | 26 | Computations can be used to save a data structure to use elsewhere. For instance, let's say we have a plan with a HashEquiJoin and an AggregateDistinct operation. The HashEquiJoin could save its hash table as part of saved computation id number 1 and the AggregateDistinct could read in computation id number 1. 27 | -------------------------------------------------------------------------------- /tests/cases/datetime/gte_datetime.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_datetime.yaml' 3 | 4 | # timestamps: examples using the timestamp type 5 | gte('2016-12-31T13:30:15'::ts, '2017-12-31T13:30:15'::ts) = false::bool 6 | gte('2017-12-31T13:30:15'::ts, '2017-12-31T13:30:15'::ts) = true::bool 7 | gte('2018-12-31T13:30:15'::ts, '2017-12-31T13:30:15'::ts) = true::bool 8 | 9 | # timestamp_tz: examples using the timestamp_tz type 10 | gte('1999-01-08T01:05:05-08:00'::tstz, '1999-01-08T04:05:06-05:00'::tstz) = false::bool 11 | gte('1999-01-08T01:05:06-08:00'::tstz, '1999-01-08T01:05:06-08:00'::tstz) = true::bool 12 | gte('1999-01-08T01:05:06-08:00'::tstz, '1999-01-08T04:05:05-05:00'::tstz) = true::bool 13 | 14 | # date: examples using the date type 15 | gte('2020-12-30'::date, '2020-12-31'::date) = false::bool 16 | gte('2020-12-31'::date, '2020-12-31'::date) = true::bool 17 | gte('2020-12-31'::date, '2020-12-30'::date) = true::bool 18 | 19 | # interval: examples using the interval type 20 | gte('P7D'::iday, 'P7D'::iday) = true::bool 21 | gte('P7D'::iday, 'P6D'::iday) = true::bool 22 | gte('P5D'::iday, 'P6D'::iday) = false::bool 23 | gte('P5Y'::iyear, 'P6Y'::iyear) = false::bool 24 | gte('P7Y'::iyear, 'P7Y'::iyear) = true::bool 25 | gte('P7Y'::iyear, 'P6Y'::iyear) = true::bool 26 | 27 | # null_input: examples with null args or return 28 | gte(null::iday, 'P5D'::iday) = null::bool 29 | gte(null::date, '2020-12-30'::date) = null::bool 30 | gte(null::ts, '2018-12-31T13:30:15'::ts) = null::bool 31 | -------------------------------------------------------------------------------- /tests/cases/datetime/lte_datetime.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_datetime.yaml' 3 | 4 | # timestamps: examples using the timestamp type 5 | lte('2016-12-31T13:30:15'::ts, '2017-12-31T13:30:15'::ts) = true::bool 6 | lte('2017-12-31T13:30:15'::ts, '2017-12-31T13:30:15'::ts) = true::bool 7 | lte('2018-12-31T13:30:15'::ts, '2017-12-31T13:30:15'::ts) = false::bool 8 | 9 | # timestamp_tz: examples using the timestamp_tz type 10 | lte('1999-01-08T01:05:05-08:00'::tstz, '1999-01-08T04:05:06-05:00'::tstz) = true::bool 11 | lte('1999-01-08T01:05:06-08:00'::tstz, '1999-01-08T01:05:06-08:00'::tstz) = true::bool 12 | lte('1999-01-08T01:05:06-08:00'::tstz, '1999-01-08T04:05:05-05:00'::tstz) = false::bool 13 | 14 | # date: examples using the date type 15 | lte('2020-12-30'::date, '2020-12-31'::date) = true::bool 16 | lte('2020-12-31'::date, '2020-12-31'::date) = true::bool 17 | lte('2020-12-31'::date, '2020-12-30'::date) = false::bool 18 | 19 | # interval: examples using the interval type 20 | lte('P7D'::iday, 'P7D'::iday) = true::bool 21 | lte('P7D'::iday, 'P6D'::iday) = false::bool 22 | lte('P5D'::iday, 'P6D'::iday) = true::bool 23 | lte('P5Y'::iyear, 'P6Y'::iyear) = true::bool 24 | lte('P7Y'::iyear, 'P7Y'::iyear) = true::bool 25 | lte('P7Y'::iyear, 'P6Y'::iyear) = false::bool 26 | 27 | # null_input: examples with null args or return 28 | lte(null::iday, 'P5D'::iday) = null::bool 29 | lte(null::date, '2020-12-30'::date) = null::bool 30 | lte(null::ts, '2018-12-31T13:30:15'::ts) = null::bool 31 | -------------------------------------------------------------------------------- /site/README.md: -------------------------------------------------------------------------------- 1 | ## Substrait Site 2 | 3 | This directory contains the source for the Substrait site. 4 | 5 | * Site structure is maintained in mkdocs.yml 6 | * Pages are maintained in markdown in the `docs/` folder 7 | * Links use bare page names: `[link text](target-page)` 8 | 9 | ### Installation 10 | 11 | The site is built using mkdocs. To install mkdocs and the theme, run: 12 | 13 | ``` 14 | # Activate the virtual environment (if installed) 15 | cd site/ 16 | . venv/bin/activate 17 | # Install or update the dependencies 18 | pip install -r ./requirements.txt 19 | ``` 20 | 21 | It is easier to use `virtualenv` to keep the Python dependencies for `site/` 22 | separate from your other projects and/or distinct from system managed Python 23 | dependencies. 24 | 25 | * To use `virtualenv`, you need Python 3.7/3.8 installed locally. 26 | * For Ubuntu: `apt-get install python3 virtualenv` 27 | * For MacOS/brew: `brew install python pyenv-virtualenv` 28 | * Install the virtual environment: 29 | ``` 30 | # cd to the site/ directory 31 | cd site/ 32 | # setup the virtual environment (only needed once) 33 | virtualenv -p $(which python3) venv 34 | # activate the virtual environment 35 | . venv/bin/activate 36 | # Install or update the dependencies as usual 37 | pip install -r ./requirements.txt 38 | ``` 39 | 40 | ### Local Changes 41 | 42 | To see changes locally before committing, use mkdocs to run a local server from this directory. 43 | 44 | ``` 45 | mkdocs serve 46 | ``` 47 | 48 | ### Publishing 49 | 50 | TBD 51 | -------------------------------------------------------------------------------- /tests/cases/arithmetic_decimal/bitwise_and.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic_decimal.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | bitwise_and(0::dec<1, 0>, 1::dec<1, 0>) = 0::dec<1, 0> 6 | bitwise_and(127::dec<3, 0>, 127::dec<3, 0>) = 127::dec<3, 0> 7 | bitwise_and(-127::dec<3, 0>, -10::dec<2, 0>) = -128::dec<3, 0> 8 | bitwise_and(31766::dec<5, 0>, 900::dec<3, 0>) = 4::dec<5, 0> 9 | bitwise_and(-31766::dec<5, 0>, 900::dec<3, 0>) = 896::dec<5, 0> 10 | bitwise_and(2147483647::dec<10, 0>, 1234567::dec<7, 0>) = 1234567::dec<10, 0> 11 | bitwise_and(-2147483647::dec<10, 0>, 1234567::dec<7, 0>) = 1::dec<10, 0> 12 | bitwise_and(9223372036854775807::dec<19, 0>, 127::dec<3, 0>) = 127::dec<19, 0> 13 | bitwise_and(-9223372036854775807::dec<19, 0>, 127::dec<3, 0>) = 1::dec<19, 0> 14 | 15 | # max_values: test with max values 16 | bitwise_and(99999999999999999999999999999999999999::dec<38, 0>, 99999999999999999999999999999999999999::dec<38, 0>) = 99999999999999999999999999999999999999::dec<38, 0> 17 | bitwise_and(99999999999999999999999999999999999999::dec<38, 0>, 00000000000000000000000000000000000000::dec<38, 0>) = 0::dec<38, 0> 18 | bitwise_and(-99999999999999999999999999999999999999::dec<38, 0>, -99999999999999999999999999999999999999::dec<38, 0>) = -99999999999999999999999999999999999999::dec<38, 0> 19 | 20 | # null_values: test with null values 21 | bitwise_and(null::dec<1, 0>, 127::dec<3, 0>) = null::dec<3, 0> 22 | bitwise_and(null::dec<1, 0>, null::dec<1, 0>) = null::dec<1, 0> 23 | -------------------------------------------------------------------------------- /extensions/functions_list.yaml: -------------------------------------------------------------------------------- 1 | %YAML 1.2 2 | --- 3 | urn: extension:io.substrait:functions_list 4 | scalar_functions: 5 | - name: "transform" 6 | description: >- 7 | Transforms each element of a list using the provided function. 8 | Also known as "map" in functional programming. 9 | 10 | Returns a new list where each element is the result of applying 11 | the transformer to the corresponding element in the input list. 12 | 13 | The transformer receives one parameter (the current element) and must return 14 | the transformed value. 15 | impls: 16 | - args: 17 | - name: input 18 | value: list 19 | - name: transformer 20 | value: func any2> 21 | nullability: MIRROR 22 | return: list 23 | 24 | - name: "filter" 25 | description: >- 26 | Filters a list of elements based on a predicate function. 27 | 28 | Returns a new list containing only elements for which the predicate 29 | function returns true. 30 | 31 | The predicate receives one parameter (the current element) and must return a 32 | boolean. 33 | 34 | Elements for which the predicate returns true are included in the 35 | result. Elements for which the predicate returns false or null are excluded. 36 | impls: 37 | - args: 38 | - name: input 39 | value: list 40 | - name: predicate 41 | value: func boolean> 42 | nullability: MIRROR 43 | return: list 44 | -------------------------------------------------------------------------------- /tests/cases/arithmetic_decimal/bitwise_xor.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic_decimal.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | bitwise_xor(0::dec<1, 0>, 1::dec<1, 0>) = 1::dec<1, 0> 6 | bitwise_xor(127::dec<3, 0>, 127::dec<3, 0>) = 0::dec<3, 0> 7 | bitwise_xor(-127::dec<3, 0>, -10::dec<2, 0>) = 119::dec<3, 0> 8 | bitwise_xor(31766::dec<5, 0>, 900::dec<3, 0>) = 32658::dec<5, 0> 9 | bitwise_xor(-31766::dec<5, 0>, 900::dec<3, 0>) = -32658::dec<5, 0> 10 | bitwise_xor(2147483647::dec<10, 0>, 123456789::dec<9, 0>) = 2024026858::dec<10, 0> 11 | bitwise_xor(-2147483647::dec<10, 0>, 123456789::dec<9, 0>) = -2024026860::dec<10, 0> 12 | bitwise_xor(9223372036854775807::dec<19, 0>, 127::dec<3, 0>) = 9223372036854775680::dec<19, 0> 13 | bitwise_xor(-9223372036854775807::dec<19, 0>, 127::dec<3, 0>) = -9223372036854775682::dec<19, 0> 14 | 15 | # max_values: test with max values 16 | bitwise_xor(99999999999999999999999999999999999999::dec<38, 0>, 99999999999999999999999999999999999999::dec<38, 0>) = 0::dec<38, 0> 17 | bitwise_xor(99999999999999999999999999999999999999::dec<38, 0>, 00000000000000000000000000000000000000::dec<38, 0>) = 99999999999999999999999999999999999999::dec<38, 0> 18 | bitwise_xor(-99999999999999999999999999999999999999::dec<38, 0>, -99999999999999999999999999999999999999::dec<38, 0>) = 0::dec<38, 0> 19 | 20 | # null_values: test with null values 21 | bitwise_xor(null::dec<1, 0>, 127::dec<3, 0>) = null::dec<3, 0> 22 | bitwise_xor(null::dec<1, 0>, null::dec<1, 0>) = null::dec<1, 0> 23 | -------------------------------------------------------------------------------- /core_test.go: -------------------------------------------------------------------------------- 1 | package substrait 2 | 3 | import ( 4 | "embed" 5 | "io/fs" 6 | "testing" 7 | 8 | "github.com/stretchr/testify/assert" 9 | "github.com/stretchr/testify/require" 10 | ) 11 | 12 | func TestGetSubstraitExtensionsFS(t *testing.T) { 13 | fsArr := []embed.FS{GetSubstraitExtensionsFS(), GetSubstraitFS()} 14 | for _, got := range fsArr { 15 | filePaths, err := ListFiles(got, ".") 16 | require.NoError(t, err) 17 | assert.Greater(t, len(filePaths), 15) 18 | assert.Contains(t, filePaths, "extensions/functions_arithmetic.yaml") 19 | assert.Contains(t, filePaths, "extensions/functions_arithmetic_decimal.yaml") 20 | assert.Contains(t, filePaths, "extensions/functions_datetime.yaml") 21 | } 22 | } 23 | 24 | func TestGetSubstraitTestsFS(t *testing.T) { 25 | got := GetSubstraitTestsFS() 26 | filePaths, err := ListFiles(got, ".") 27 | require.NoError(t, err) 28 | assert.Greater(t, len(filePaths), 3) 29 | assert.Contains(t, filePaths, "tests/cases/arithmetic/add.test") 30 | assert.Contains(t, filePaths, "tests/cases/arithmetic/max.test") 31 | assert.Contains(t, filePaths, "tests/cases/arithmetic_decimal/power.test") 32 | assert.Contains(t, filePaths, "tests/cases/datetime/lt_datetime.test") 33 | } 34 | 35 | func ListFiles(embedFs embed.FS, root string) ([]string, error) { 36 | var files []string 37 | err := fs.WalkDir(embedFs, root, func(path string, d fs.DirEntry, err error) error { 38 | if err != nil { 39 | return err 40 | } 41 | if !d.IsDir() { 42 | files = append(files, path) 43 | } 44 | return nil 45 | }) 46 | return files, err 47 | } 48 | -------------------------------------------------------------------------------- /tests/cases/arithmetic_decimal/bitwise_or.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic_decimal.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | bitwise_or(0::dec<1, 0>, 1::dec<1, 0>) = 1::dec<1, 0> 6 | bitwise_or(127::dec<3, 0>, 127::dec<3, 0>) = 127::dec<3, 0> 7 | bitwise_or(-127::dec<3, 0>, -10::dec<2, 0>) = -9::dec<3, 0> 8 | bitwise_or(31766::dec<5, 0>, 900::dec<3, 0>) = 32662::dec<5, 0> 9 | bitwise_or(-31766::dec<5, 0>, 900::dec<3, 0>) = -31762::dec<5, 0> 10 | bitwise_or(2147483647::dec<10, 0>, 123456789::dec<9, 0>) = 2147483647::dec<10, 0> 11 | bitwise_or(-2147483647::dec<10, 0>, 123456789::dec<9, 0>) = -2024026859::dec<10, 0> 12 | bitwise_or(9223372036854775807::dec<19, 0>, 127::dec<3, 0>) = 9223372036854775807::dec<19, 0> 13 | bitwise_or(-9223372036854775807::dec<19, 0>, 127::dec<3, 0>) = -9223372036854775681::dec<19, 0> 14 | 15 | # max_values: test with max values 16 | bitwise_or(99999999999999999999999999999999999999::dec<38, 0>, 99999999999999999999999999999999999999::dec<38, 0>) = 99999999999999999999999999999999999999::dec<38, 0> 17 | bitwise_or(99999999999999999999999999999999999999::dec<38, 0>, 00000000000000000000000000000000000000::dec<38, 0>) = 99999999999999999999999999999999999999::dec<38, 0> 18 | bitwise_or(-99999999999999999999999999999999999999::dec<38, 0>, -99999999999999999999999999999999999999::dec<38, 0>) = -99999999999999999999999999999999999999::dec<38, 0> 19 | 20 | # null_values: test with null values 21 | bitwise_or(null::dec<1, 0>, 127::dec<3, 0>) = null::dec<3, 0> 22 | bitwise_or(null::dec<1, 0>, null::dec<1, 0>) = null::dec<1, 0> 23 | -------------------------------------------------------------------------------- /site/examples/proto-textformat/lambda_invocation/inline_invocation.textproto: -------------------------------------------------------------------------------- 1 | # Represents the invocation of the lambda `(x: i32) -> x * 2)` on parameter `5` 2 | # i.e. `(x: i32) -> x * 2)(5)` 3 | # 4 | # message Expression.LambdaInvocation 5 | 6 | lambda: { 7 | # Lambda parameter: x (type i32) 8 | parameters: { 9 | types: [ 10 | { 11 | i32: { 12 | nullability: NULLABILITY_REQUIRED 13 | } 14 | } 15 | ] 16 | } 17 | 18 | # Lambda body: x * 2 19 | body: { 20 | scalar_function: { 21 | function_reference: 1 # Reference to multiply function 22 | arguments: [ 23 | { 24 | # First argument: lambda parameter x 25 | value: { 26 | selection: { 27 | lambda_parameter_reference: { 28 | steps_out: 0 # 0 = current lambda 29 | } 30 | direct_reference: { 31 | struct_field: { 32 | field: 0 # 0 = first parameter (x) 33 | } 34 | } 35 | } 36 | } 37 | }, 38 | { 39 | # Second argument: literal 2 40 | value: { 41 | literal: { 42 | i32: 2 43 | } 44 | } 45 | } 46 | ] 47 | 48 | output_type: { 49 | i32: { 50 | nullability: NULLABILITY_REQUIRED 51 | } 52 | } 53 | } 54 | } 55 | } 56 | 57 | # Invocation arguments: struct with one field containing 5 58 | arguments: { 59 | fields: [ 60 | { 61 | literal: { 62 | i32: 5 63 | } 64 | } 65 | ] 66 | } 67 | -------------------------------------------------------------------------------- /extensions/unknown.yaml: -------------------------------------------------------------------------------- 1 | %YAML 1.2 2 | --- 3 | urn: extension:io.substrait:unknown 4 | types: 5 | - name: unknown 6 | scalar_functions: 7 | - name: "add" 8 | impls: 9 | - args: 10 | - value: unknown 11 | - value: unknown 12 | return: unknown 13 | - name: "subtract" 14 | impls: 15 | - args: 16 | - value: unknown 17 | - value: unknown 18 | return: unknown 19 | - name: "multiply" 20 | impls: 21 | - args: 22 | - value: unknown 23 | - value: unknown 24 | return: unknown 25 | - name: "divide" 26 | impls: 27 | - args: 28 | - value: unknown 29 | - value: unknown 30 | return: unknown 31 | - name: "modulus" 32 | impls: 33 | - args: 34 | - value: unknown 35 | - value: unknown 36 | return: unknown 37 | aggregate_functions: 38 | - name: "sum" 39 | impls: 40 | - args: 41 | - value: unknown 42 | intermediate: unknown 43 | return: unknown 44 | - name: "avg" 45 | impls: 46 | - args: 47 | - value: unknown 48 | intermediate: unknown 49 | return: unknown 50 | - name: "min" 51 | impls: 52 | - args: 53 | - value: unknown 54 | intermediate: unknown 55 | return: unknown 56 | - name: "max" 57 | impls: 58 | - args: 59 | - value: unknown 60 | intermediate: unknown 61 | return: unknown 62 | - name: "count" 63 | impls: 64 | - args: 65 | - value: unknown 66 | intermediate: unknown 67 | return: unknown 68 | -------------------------------------------------------------------------------- /site/docs/spec/versioning.md: -------------------------------------------------------------------------------- 1 | # Versioning 2 | 3 | As an interface specification, the goal of Substrait is to reach a point where (breaking) changes will never need to happen again, or at least be few and far between. 4 | By analogy, Apache Arrow's in-memory format specification has stayed functionally constant, despite many major library versions being released. 5 | However, we're not there yet. 6 | When we believe that we've reached this point, we will signal this by releasing version 1.0.0. 7 | Until then, we will remain in the 0.x.x version regime. 8 | 9 | Despite this, we strive to maintain backward compatibility for both the binary representation and the text representation by means of deprecation. 10 | When a breaking change cannot be reasonably avoided, we may remove previously deprecated fields. 11 | All deprecated fields will be removed for the 1.0.0 release. 12 | 13 | Substrait uses [semantic versioning](https://semver.org/) for its version numbers, with the addition that, during 0.x.y, we increment the x digit for breaking changes and new features, and the y digit for fixes and other nonfunctional changes. 14 | The release process is currently automated and makes a new release every week, provided something has changed on the main branch since the previous release. 15 | This release cadence will likely be slowed down as stability increases over time. 16 | [Conventional commits](https://www.conventionalcommits.org/en/v1.0.0-beta.2/) are used to distinguish between breaking changes, new features, and fixes, 17 | and GitHub actions are used to verify that there are indeed no breaking protobuf changes in a commit, unless the commit message states this. 18 | -------------------------------------------------------------------------------- /site/docs/serialization/text_serialization.md: -------------------------------------------------------------------------------- 1 | # Text Serialization 2 | 3 | To maximize the new user experience, it is important for Substrait to have a text representation of plans. This allows people to experiment with basic tooling. Building simple CLI tools that do things like SQL > Plan and Plan > SQL or REPL plan construction can all be done relatively straightforwardly with a text representation. 4 | 5 | The recommended text serialization format is JSON. Since the text format is not designed for performance, the format can be produced to maximize readability. This also allows nice symmetry between the construction of plans and the configuration of various extensions such as function signatures and user defined types. 6 | 7 | To ensure the JSON is valid, the object will be defined using the [OpenApi 3.1 specification](https://spec.openapis.org/oas/latest.html). This not only allows strong validation, the OpenApi specification enables [code generators](https://github.com/OpenAPITools/openapi-generator) to be easily used to produce plans in many languages. 8 | 9 | While JSON will be used for much of the plan serialization, Substrait uses a custom simplistic grammar for record level expressions. While one can construct an equation such as `(10 + 5)/2` using a tree of function and literal objects, it is much more human-readable to consume a plan when the information is written similarly to the way one typically consumes scalar expressions. This grammar will be maintained in an ANTLR grammar (targetable to multiple programming languages) and is also planned to be supported via JSON schema definition format tag so that the grammar can be validated as part of the schema validation. 10 | 11 | -------------------------------------------------------------------------------- /site/docs/types/type_variations.md: -------------------------------------------------------------------------------- 1 | # Type Variations 2 | 3 | Type variations may be used to represent differences in representation between different consumers. For example, an engine might support dictionary encoding for a string, or could be using either a row-wise or columnar representation of a struct. All variations of a type are expected to have the same semantics when operated on by functions or other expressions. 4 | 5 | All variations except the "system-preferred" variation (a.k.a. `[0]`, see [Type Parsing](type_parsing.md)) must be defined using [simple extensions](../extensions/index.md#simple-extensions). The key properties of these variations are: 6 | 7 | | Property | Description | 8 | | ----------------- | ------------------------------------------------------------ | 9 | | Base Type Class | The type class that this variation belongs to. | 10 | | Name | The name used to reference this type. Should be unique within type variations for this parent type within a simple extension. | 11 | | Description | A human description of the purpose of this type variation. | 12 | | Function Behavior | **INHERITS** or **SEPARATE**: whether functions that support the system-preferred variation implicitly also support this variation, or whether functions should be resolved independently. For example, if one has the function `add(i8,i8)` defined and then defines an `i8` variation, this determines whether the `i8` variation can be bound to the base `add` operation (inherits) or whether a specialized version of `add` needs to be defined specifically for this variation (separate). Defaults to inherits. | 13 | -------------------------------------------------------------------------------- /tests/cases/datetime/extract.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_datetime.yaml' 3 | 4 | # timestamps: examples using the timestamp and timestamptz types 5 | extract('YEAR'::str, '2016-12-31T13:30:15'::ts) = 2016::i64 6 | extract('ISOYEAR'::str, '2016-01-01T13:30:15'::ts) = 2015::i64 7 | extract('QUARTER'::str, '2016-12-31T13:30:15'::ts) = 4::i64 8 | extract('MONTH'::str, '2016-12-31T13:30:15'::ts) = 12::i64 9 | extract('WEEK'::str, '2016-12-31T13:30:15'::ts) = 52::i64 10 | extract('DAY'::str, '2016-12-31T13:30:15'::ts) = 31::i64 11 | extract('ISODOW'::str, '2016-12-25T13:30:15'::ts) = 7::i64 12 | extract('DOW'::str, '2016-12-25T13:30:15'::ts) = 0::i64 13 | extract('DOY'::str, '2016-12-25T13:30:15'::ts) = 360::i64 14 | extract('HOUR'::str, '2016-12-31T13:30:15'::ts) = 13::i64 15 | extract('MINUTE'::str, '2016-12-31T13:30:15'::ts) = 30::i64 16 | extract('SECOND'::str, '2016-12-31T13:30:15'::ts) = 15::i64 17 | extract('MILLISECONDS'::str, '2016-12-31T13:30:15'::ts) = 15000::i64 18 | extract('MICROSECONDS'::str, '2016-12-31T13:30:15.220000'::ts) = 15220000::i64 19 | extract('EPOCH'::str, '2016-12-31T13:30:15'::ts) = 1483191015::i64 20 | 21 | # date: examples using the date type 22 | extract('YEAR'::str, '2020-12-31'::date) = 2020::i64 23 | extract('MONTH'::str, '2020-12-31'::date) = 12::i64 24 | extract('DAY'::str, '2020-12-31'::date) = 31::i64 25 | 26 | # time: examples using the time type 27 | extract('HOUR'::str, '01:02:03'::time) = 1::i64 28 | extract('MINUTE'::str, '01:02:03'::time) = 2::i64 29 | extract('SECOND'::str, '01:02:03'::time) = 3::i64 30 | extract('MILLISECOND'::str, '01:02:03.155'::time) = 3155::i64 31 | extract('MICROSECOND'::str, '01:02:03.45'::time) = 3450000::i64 32 | -------------------------------------------------------------------------------- /extensions/functions_aggregate_decimal_output.yaml: -------------------------------------------------------------------------------- 1 | %YAML 1.2 2 | --- 3 | urn: extension:io.substrait:functions_aggregate_decimal_output 4 | aggregate_functions: 5 | - name: "count" 6 | description: Count a set of values. Result is returned as a decimal instead of i64. 7 | impls: 8 | - args: 9 | - name: x 10 | value: any 11 | options: 12 | overflow: 13 | values: [SILENT, SATURATE, ERROR] 14 | nullability: DECLARED_OUTPUT 15 | decomposable: MANY 16 | intermediate: decimal<38,0> 17 | return: decimal<38,0> 18 | - name: "count" 19 | description: "Count a set of records (not field referenced). Result is returned as a decimal instead of i64." 20 | impls: 21 | - options: 22 | overflow: 23 | values: [SILENT, SATURATE, ERROR] 24 | nullability: DECLARED_OUTPUT 25 | decomposable: MANY 26 | intermediate: decimal<38,0> 27 | return: decimal<38,0> 28 | - name: "approx_count_distinct" 29 | description: >- 30 | Calculates the approximate number of rows that contain distinct values of the expression argument using 31 | HyperLogLog. This function provides an alternative to the COUNT (DISTINCT expression) function, which 32 | returns the exact number of rows that contain distinct values of an expression. APPROX_COUNT_DISTINCT 33 | processes large amounts of data significantly faster than COUNT, with negligible deviation from the exact 34 | result. Result is returned as a decimal instead of i64. 35 | impls: 36 | - args: 37 | - name: x 38 | value: any 39 | nullability: DECLARED_OUTPUT 40 | decomposable: MANY 41 | intermediate: binary 42 | return: decimal<38,0> 43 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to Substrait 2 | 3 | Welcome! 4 | 5 | ## Dependencies 6 | 7 | There's no formal set of dependencies for Substrait, but here are some that are useful to have: 8 | 9 | * [`buf`](https://docs.buf.build/installation) for easy generation of proto serialization/deserialization code 10 | * [`protoc`](https://grpc.io/docs/protoc-installation/), used by `buf` and usable independent of `buf` 11 | * A Python environment with [the website's `requirements.txt`](https://github.com/substrait-io/substrait/blob/main/site/requirements.txt) dependencies installed if you want to see changes to the website locally 12 | 13 | ## Documentation Examples 14 | 15 | When adding examples to the documentation, please use external example files instead of inline code blocks. This ensures examples are validated against schemas in CI/CD and prevents documentation drift. 16 | 17 | See [`site/examples/README.md`](site/examples/README.md) for complete instructions on creating and including validated examples. 18 | 19 | Quick example: 20 | 21 | ```markdown 22 | ```yaml 23 | --8<-- "examples/extensions/my_example.yaml" 24 | ``` 25 | ``` 26 | 27 | ## Commit Conventions 28 | 29 | Substrait follows [conventional commits](https://www.conventionalcommits.org/en/v1.0.0/) for commit message structure. You can use [`pre-commit`](https://pre-commit.com/) to check your messages for you, but note that you must install pre-commit using `pre-commit install --hook-type commit-msg` for this to work. CI will also lint your commit messages. Please also ensure that your PR title and initial comment together form a valid commit message; that will save us some work formatting the merge commit message when we merge your PR. 30 | 31 | Examples of commit messages can be seen [here](https://www.conventionalcommits.org/en/v1.0.0/#examples). 32 | -------------------------------------------------------------------------------- /dialects/tests/functions_test.yaml: -------------------------------------------------------------------------------- 1 | name: "dialect test file for functions" 2 | 3 | dependencies: 4 | arithmetic: extension:io.substrait:functions_arithmetic 5 | boolean: extension:io.substrait:functions_boolean 6 | comparison: extension:io.substrait:functions_comparison 7 | string: extension:io.substrait:functions_string 8 | 9 | supported_scalar_functions: 10 | - source: arithmetic 11 | name: add 12 | system_metadata: 13 | name: + 14 | notation: INFIX 15 | required_options: 16 | overflow: ERROR 17 | supported_impls: 18 | - i32_i32 19 | - i64_i64 20 | - source: arithmetic 21 | name: add 22 | system_metadata: 23 | name: + 24 | notation: INFIX 25 | required_options: 26 | overflow: ERROR 27 | rounding: TIE_TO_EVEN 28 | supported_impls: 29 | - fp32_fp32 30 | - fp64_fp64 31 | - source: boolean 32 | name: and 33 | system_metadata: 34 | name: and 35 | notation: INFIX 36 | supported_impls: 37 | - bool 38 | - source: comparison 39 | name: is_null 40 | system_metadata: 41 | name: IS NULL 42 | notation: POSTFIX 43 | supported_impls: 44 | - any 45 | - source: string 46 | name: concat 47 | system_metadata: 48 | name: '||' 49 | required_options: 50 | null_handling: ACCEPT_NULLS 51 | notation: INFIX 52 | supported_impls: 53 | - vchar 54 | - str 55 | variadic: 56 | min: 1 57 | max: 5 58 | 59 | supported_aggregate_functions: 60 | - source: arithmetic 61 | name: max 62 | supported_impls: 63 | - i32 64 | - i64 65 | - source: arithmetic 66 | name: min 67 | supported_impls: 68 | - i32 69 | - i64 70 | 71 | supported_window_functions: 72 | - source: arithmetic 73 | name: ntile 74 | supported_impls: 75 | - i32 76 | - source: arithmetic 77 | name: rank 78 | supported_impls: 79 | - "" 80 | -------------------------------------------------------------------------------- /site/docs/serialization/basics.md: -------------------------------------------------------------------------------- 1 | # Basics 2 | 3 | Substrait is designed to be serialized into various different formats. Currently we support a binary serialization for 4 | transmission of plans between programs (e.g. IPC or network communication) and a text serialization for debugging and human readability. Other formats may be added in the future. 5 | 6 | These formats serialize a collection of plans. Substrait does not define how a collection of plans is to be interpreted. 7 | For example, the following scenarios are all valid uses of a collection of plans: 8 | 9 | - A query engine receives a plan and executes it. It receives a collection of plans with a single root plan. The 10 | top-level node of the root plan defines the output of the query. Non-root plans may be included as common subplans 11 | which are referenced from the root plan. 12 | - A transpiler may convert plans from one dialect to another. It could take, as input, a single root plan. Then 13 | it could output a serialized binary containing multiple root plans. Each root plan is a representation of the 14 | input plan in a different dialect. 15 | - A distributed scheduler might expect 1+ root plans. Each root plan describes a different stage of computation. 16 | 17 | Libraries should make sure to thoroughly describe the way plan collections will be produced or consumed. 18 | 19 | ## Root plans 20 | 21 | We often refer to query plans as a graph of nodes (typically a DAG unless the query is recursive). However, we 22 | encode this graph as a collection of trees with a single root tree that references other trees (which may also 23 | transitively reference other trees). Plan serializations all have some way to indicate which plan(s) are "root" 24 | plans. Any plan that is not a root plan and is not referenced (directly or transitively) by some root plan 25 | can safely be ignored. 26 | -------------------------------------------------------------------------------- /tests/coverage/nodes.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | from dataclasses import dataclass 3 | from typing import List 4 | 5 | 6 | @dataclass 7 | class CaseGroup: 8 | name: str 9 | description: str 10 | 11 | 12 | @dataclass 13 | class SubstraitError: 14 | error: str 15 | 16 | 17 | @dataclass 18 | class CaseLiteral: 19 | value: str | int | float | list | None 20 | type: str 21 | 22 | def get_base_type(self): 23 | type_str = self.type 24 | if "<" in type_str: 25 | type_str = type_str[: type_str.find("<")] 26 | if type_str.endswith("?"): 27 | return type_str[:-1] 28 | return type_str 29 | 30 | 31 | @dataclass 32 | class AggregateArgument: 33 | column_name: str 34 | column_type: str 35 | table_name: str 36 | scalar_value: CaseLiteral | None 37 | 38 | 39 | @dataclass 40 | class TestCase: 41 | func_name: str 42 | base_uri: str 43 | group: CaseGroup | None 44 | options: dict 45 | rows: List[List] | None 46 | args: List[CaseLiteral] | List[AggregateArgument] 47 | result: CaseLiteral | str | SubstraitError 48 | comment: str 49 | 50 | def get_return_type(self): 51 | if isinstance(self.result, CaseLiteral): 52 | return self.result.type 53 | return self.result 54 | 55 | def is_return_type_error(self): 56 | return isinstance(self.result, SubstraitError) 57 | 58 | def get_arg_types(self): 59 | return [arg.get_base_type() for arg in self.args] 60 | 61 | def get_signature(self): 62 | return f"{self.func_name}({', '.join([arg.type for arg in self.args])}) = {self.get_return_type()}" 63 | 64 | 65 | @dataclass 66 | class TestFile: 67 | path: str 68 | version: str 69 | include: str # Primary extension being tested 70 | dependencies: List[str] # Additional extensions needed for tests 71 | testcases: List[TestCase] 72 | -------------------------------------------------------------------------------- /site/docs/expressions/extended_expression.md: -------------------------------------------------------------------------------- 1 | # Extended Expression 2 | 3 | Extended Expression messages are provided for expression-level protocols as an alternative to using a Plan. They mainly target expression-only evaluations, such as those computed in Filter/Project/Aggregation rels. Unlike the original Expression defined in the substrait protocol, Extended Expression messages require more information to completely describe the computation context including: input data schema, referred function signatures, and output schema. 4 | 5 | Since Extended Expression will be used seperately from the Plan rel representation, it will need to include basic fields like Version. 6 | 7 | === "ExtendedExpression Message" 8 | 9 | ```proto 10 | %%% proto.message.ExtendedExpression %%% 11 | ``` 12 | 13 | ## Input and output data schema 14 | 15 | Similar to `base_schema` defined in [ReadRel](https://github.com/substrait-io/substrait/blob/7f272f13f22cd5f5842baea42bcf7961e6251881/proto/substrait/algebra.proto#L58), the input data schema describes the name/type/nullibilty and layout info of input data for the target expression evalutation. It also has a field `name` to define the name of the output data. 16 | 17 | ## Referred expression 18 | 19 | An Extended Exression will have one or more referred expressions, which can be either [Expression](https://github.com/substrait-io/substrait/blob/7f272f13f22cd5f5842baea42bcf7961e6251881/proto/substrait/algebra.proto) or [AggregateFunction](https://github.com/substrait-io/substrait/blob/7f272f13f22cd5f5842baea42bcf7961e6251881/proto/substrait/algebra.proto#L1170). Additional types of expressions may be added in the future. 20 | 21 | For a message with multiple expressions, users may produce each Extended Expression in the same order as they occur in the original Plan rel. But, the consumer does NOT have to handle them in this order. A consumer needs only to ensure that the columns in the final output are organized in the same order as defined in the message. 22 | 23 | ## Function extensions 24 | 25 | Function extensions work the same for both Extended Expression and the original Expression defined in the Substrait protocol. 26 | -------------------------------------------------------------------------------- /site/docs/types/type_system.md: -------------------------------------------------------------------------------- 1 | # Type System 2 | 3 | Substrait tries to cover the most common types used in data manipulation. Types beyond this common core may be represented using [simple extensions](../extensions/index.md#simple-extensions). 4 | 5 | Substrait types fundamentally consist of four components: 6 | 7 | | Component | Condition | Examples | Description 8 | | ------------------------------- | ------------------- | ----------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 9 | | [Class](type_classes.md) | Always | `i8`, `string`, `STRUCT`, extensions | Together with the parameter pack, describes the set of non-null values supported by the type. Subdivided into simple and compound type classes. 10 | | Nullability | Always | Either `NULLABLE` (`?` suffix) or `REQUIRED` (no suffix) | Describes whether values of this type can be null. Note that null is considered to be a special value of a nullable type, rather than the only value of a special null type. 11 | | [Variation](type_variations.md) | Always | No suffix or explicitly `[0]` (system-preferred), or an extension | Allows different variations of the same type class to exist in a system at a time, usually distinguished by in-memory format. 12 | | Parameters | Compound types only | `<10, 2>` (for `DECIMAL`), `` (for `STRUCT`) | Some combination of zero or more data types or integers. The expected set of parameters and the significance of each parameter depends on the type class. 13 | 14 | Refer to [Type Parsing](type_parsing.md) for a description of the syntax used to describe types. 15 | 16 | !!! note "Note" 17 | Substrait employs a strict type system without any coercion rules. All changes in types must be made explicit via [cast expressions](../expressions/specialized_record_expressions.md). 18 | -------------------------------------------------------------------------------- /site/docs/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Home 3 | --- 4 | 5 | 6 | ## What is Substrait? 7 | 8 | Substrait is a format for describing compute operations on structured data. It is designed for interoperability across different languages and systems. 9 | 10 | 11 | 12 | ## How does it work? 13 | 14 | Substrait provides a well-defined, cross-language [specification](spec/specification.md) for data compute operations. This includes a consistent declaration of common operations, custom operations and one or more serialized representations of this specification. The spec focuses on the semantics of each operation. In addition to the specification the Substrait ecosystem also includes a number of libraries and [useful tools](tools/producer_tools.md). 15 | 16 | We highly recommend the [tutorial](tutorial/sql_to_substrait.md) to learn how a Substrait plan is constructed. 17 | 18 | 19 | 20 | ## Benefits 21 | 22 | * Avoids every system needing to create a communication method between every other system -- each system merely supports ingesting and producing Substrait and it instantly becomes a part of the greater ecosystem. 23 | * Makes every part of the system upgradable. There's a new query engine that's ten times faster? Just plug it in! 24 | * Enables heterogeneous environments -- run on a cluster of an unknown set of execution engines! 25 | * The text version of the Substrait plan allows you to quickly see how a plan functions without needing a visualizer (although there are Substrait visualizers as well!). 26 | 27 | 28 | 29 | ## Example Use Cases 30 | 31 | * Communicate a compute plan between a SQL parser and an execution engine (e.g. Calcite SQL parsing to Arrow C++ compute kernel) 32 | * Serialize a plan that represents a SQL view for consistent use in multiple systems (e.g. Iceberg views in Spark and Trino) 33 | * Submit a plan to different execution engines (e.g. Datafusion and Postgres) and get a consistent interpretation of the semantics. 34 | * Create an alternative plan generation implementation that can connect an existing end-user compute expression system to an existing end-user processing engine (e.g. Pandas operations executed inside SingleStore) 35 | * Build a pluggable plan visualization tool (e.g. D3 based plan visualizer) 36 | 37 | -------------------------------------------------------------------------------- /site/docs/types/type_aliases.md: -------------------------------------------------------------------------------- 1 | # Type Aliases 2 | 3 | In a Substrait plan, types are spelled out whenever and wherever they are needed. For parameterized types, all type parameters are spelled out per type reference. For a parameterized type with a large number of parameters, complex nested type parameters, or string parameters, this can significantly bloat the size of the plan proportional to the number of such type references in the plan as such type parameters are repeatedly spelled out. 4 | 5 | To alleviate the problem, Substrait offers a type alias mechanism. 6 | 7 | Type aliases allow a plan to declare a type once and reference it multiple times within a plan. A type alias can be used wherever a type is expected. 8 | 9 | ## Type Alias 10 | 11 | A type alias is a mapping from an anchor to a concrete Substrait type. A valid type alias is described below. 12 | 13 | * All type parameters must be specified. 14 | * Cannot directly be another alias. 15 | * Type parameters can reference other aliased types as long as no circular dependencies are introduced. 16 | * Nullability of aliased type is **ignored**. Nullability must be specified when the aliased type is referenced. 17 | * Type variation may be specified in the aliased type. 18 | 19 | ## Type Alias Reference 20 | 21 | A type alias reference is a Substrait type and can appear wherever a Substrait type is expected. The reference must specify the nullability of the aliased type. 22 | 23 | ## Examples 24 | 25 | ``` 26 | type alias 1 --> VARCHAR<100> // OK to alias Substrait VARCHAR. 27 | type alias 2 --> UserDefined // OK to reference other type alias 1. UserDefined?>. 28 | type alias 3 --> UserDefined // OK to reference other type alias 1. UserDefined>. 29 | 30 | type alias 4 --> type alias ref (1, NULLABLE) // NOT OK to alias another alias directly. 31 | type alias 5 --> STRUCT // NOT OK to reference an undefined type alias 0. 32 | type alias 6 --> STRUCT STRUCT 34 | type alias 8 --> STRUCT // NOT OK because type alias 7 and 8 have a circular dependency. 35 | ``` -------------------------------------------------------------------------------- /tests/cases/string/regexp_string_split.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_string.yaml' 3 | 4 | # lazy_matching: Examples with lazy matching 5 | regexp_string_split('Hello'::str, 'Hel+?'::str) = ['', 'lo']::list 6 | regexp_string_split('Hello'::str, 'Hel+'::str) = ['', 'o']::list 7 | 8 | # greedy_matching: Examples with greedy matching 9 | regexp_string_split('HHHelloooo'::str, 'Hel+'::str) = ['HH', 'oooo']::list 10 | 11 | # position_anchors: Examples with position anchors 12 | regexp_string_split('abcdefg'::str, '\Aabc'::str) = ['', 'defg']::list 13 | regexp_string_split('abcdefg'::str, 'efg$'::str) = ['abcd', '']::list 14 | 15 | # metacharacters: Examples with metacharacters 16 | regexp_string_split('abc1abc'::str, '\d'::str) = ['abc', 'abc']::list 17 | regexp_string_split('111a111'::str, '\D'::str) = ['111', '111']::list 18 | regexp_string_split('abc def'::str, '\s'::str) = ['abc', 'def']::list 19 | regexp_string_split('a bcdef'::str, '\S'::str) = ['', ' ', '', '', '', '', '']::list 20 | regexp_string_split(' abcdef'::str, '\w'::str) = [' ', '', '', '', '', '', '']::list 21 | regexp_string_split('a bcdef'::str, '\W'::str) = ['a', 'bcdef']::list 22 | 23 | # occurrence_indicator: Examples with occurrence indicators 24 | regexp_string_split('abc123abc'::str, '[0-9]+'::str) = ['abc', 'abc']::list 25 | regexp_string_split('abc123abc'::str, '[bc]'::str) = ['a', '', '123a', '', '']::list 26 | regexp_string_split('abcde'::str, '(.*)c'::str) = ['', 'de']::list 27 | regexp_string_split('abbbbc'::str, '[b]{2,3}'::str) = ['a', 'bc']::list 28 | 29 | # lookahead: Examples with lookahead 30 | regexp_string_split('100 dollars'::str, '\d+(?= dollars)'::str) [lookaround:TRUE] = ['', ' dollars']::list 31 | 32 | # negative_lookahead: Examples with negative lookahead 33 | regexp_string_split('100 pesos'::str, '\d+(?!\d| dollars)'::str) [lookaround:TRUE] = ['', ' pesos']::list 34 | 35 | # lookbehind: Examples with lookbehind 36 | regexp_string_split('USD100'::str, '(?<=USD)\d{3}'::str) [lookaround:TRUE] = ['USD', '']::list 37 | 38 | # negative_lookbehind: Examples with negative lookbehind 39 | regexp_string_split('JPY100'::str, '\d{3}(? 40 | -------------------------------------------------------------------------------- /proto/substrait/extended_expression.proto: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | syntax = "proto3"; 3 | 4 | package substrait; 5 | 6 | import "substrait/algebra.proto"; 7 | import "substrait/extensions/extensions.proto"; 8 | import "substrait/plan.proto"; 9 | import "substrait/type.proto"; 10 | 11 | option csharp_namespace = "Substrait.Protobuf"; 12 | option go_package = "github.com/substrait-io/substrait-protobuf/go/substraitpb"; 13 | option java_multiple_files = true; 14 | option java_package = "io.substrait.proto"; 15 | 16 | message ExpressionReference { 17 | oneof expr_type { 18 | Expression expression = 1; 19 | AggregateFunction measure = 2; 20 | } 21 | // Field names in depth-first order 22 | repeated string output_names = 3; 23 | } 24 | 25 | // Describe a set of operations to complete. 26 | // For compactness sake, identifiers are normalized at the plan level. 27 | message ExtendedExpression { 28 | // Substrait version of the expression. Optional up to 0.17.0, required for later 29 | // versions. 30 | Version version = 7; 31 | 32 | // a list of yaml specifications this expression may depend on 33 | // this is now deprecated and extension_urns should be used instead. 34 | repeated substrait.extensions.SimpleExtensionURI extension_uris = 1 [deprecated = true]; 35 | 36 | // a list of extension specifications this expression may depend on, 37 | // referenced by Extension URN 38 | repeated substrait.extensions.SimpleExtensionURN extension_urns = 8; 39 | 40 | // a list of extensions this expression may depend on 41 | repeated substrait.extensions.SimpleExtensionDeclaration extensions = 2; 42 | 43 | // one or more expression trees with same order in plan rel 44 | repeated ExpressionReference referred_expr = 3; 45 | 46 | NamedStruct base_schema = 4; 47 | // additional extensions associated with this expression. 48 | substrait.extensions.AdvancedExtension advanced_extensions = 5; 49 | 50 | // A list of com.google.Any entities that this plan may use. Can be used to 51 | // warn if some embedded message types are unknown. Note that this list may 52 | // include message types that are ignorable (optimizations) or that are 53 | // unused. In many cases, a consumer may be able to work with a plan even if 54 | // one or more message types defined here are unknown. 55 | repeated string expected_type_urls = 6; 56 | } 57 | -------------------------------------------------------------------------------- /site/docs/expressions/subqueries.md: -------------------------------------------------------------------------------- 1 | # Subqueries 2 | 3 | Subqueries are scalar expressions comprised of another query. 4 | 5 | ## Forms 6 | 7 | ### Scalar 8 | 9 | Scalar subqueries are subqueries that return one row and one column. 10 | 11 | | Property | Description | Required | 12 | | -------- | -------------- | -------- | 13 | | Input | Input relation | Yes | 14 | 15 | ### `IN` predicate 16 | 17 | An `IN` subquery predicate checks that the left expression is contained in the 18 | right subquery. 19 | 20 | #### Examples 21 | 22 | ```sql 23 | SELECT * 24 | FROM t1 25 | WHERE x IN (SELECT * FROM t2) 26 | ``` 27 | 28 | ```sql 29 | SELECT * 30 | FROM t1 31 | WHERE (x, y) IN (SELECT a, b FROM t2) 32 | ``` 33 | 34 | | Property | Description | Required | 35 | | -------- | ------------------------------------------- | -------- | 36 | | Needles | Expressions whose existence will be checked | Yes | 37 | | Haystack | Subquery to check | Yes | 38 | 39 | ### Set predicates 40 | 41 | A set predicate is a predicate over a set of rows in the form of a subquery. 42 | 43 | `EXISTS` and `UNIQUE` are common SQL spellings of these kinds of predicates. 44 | 45 | | Property | Description | Required | 46 | | --------- | ------------------------------------------ | -------- | 47 | | Operation | The operation to perform over the set | Yes | 48 | | Tuples | Set of tuples to check using the operation | Yes | 49 | 50 | ### Set comparisons 51 | 52 | A set comparison subquery is a subquery comparison using `ANY` or `ALL` operations. 53 | 54 | #### Examples 55 | 56 | ```sql 57 | SELECT * 58 | FROM t1 59 | WHERE x < ANY(SELECT y from t2) 60 | ``` 61 | 62 | | Property | Description | Required | 63 | | --------------------- | ---------------------------------------------- | -------- | 64 | | Reduction operation | The kind of reduction to use over the subquery | Yes | 65 | | Comparison operation | The kind of comparison operation to use | Yes | 66 | | Expression | Left-hand side expression to check | Yes | 67 | | Subquery | Subquery to check | Yes | 68 | 69 | 70 | 71 | === "Protobuf Representation" 72 | 73 | ```proto 74 | %%% proto.message.Expression.Subquery %%% 75 | ``` 76 | -------------------------------------------------------------------------------- /site/docs/expressions/window_functions.md: -------------------------------------------------------------------------------- 1 | # Window Functions 2 | 3 | Window functions are functions which consume values from multiple records to produce a single output. They are similar to aggregate functions, but also have a focused window of analysis to compare to their partition window. Window functions are similar to scalar values to an end user, producing a single value for each input record. However, the consumption visibility for the production of each single record can be many records. 4 | 5 | 6 | 7 | Window function signatures contain all the properties defined for [aggregate functions](aggregate_functions.md). Additionally, they contain the properties below 8 | 9 | | Property | Description | Required | 10 | | ----------- | ------------------------------------------------------------ | ------------------------------- | 11 | | Inherits | All properties defined for aggregate functions. | N/A | 12 | | Window Type | STREAMING or PARTITION. Describes whether the function needs to see all data for the specific partition operation simultaneously. Operations like SUM can produce values in a streaming manner with no complete visibility of the partition. NTILE requires visibility of the entire partition before it can start producing values. | Optional, defaults to PARTITION | 13 | 14 | 15 | 16 | When binding a window function, the binding must include the following additional properties beyond the standard aggregate binding properties: 17 | 18 | | Property | Description | Required | 19 | | ----------- | ------------------------------------------------------------ | ------------------------------------------------------------ | 20 | | Partition | A list of partitioning expressions. | False, defaults to a single partition for the entire dataset | 21 | | Lower Bound | Bound Following(int64), Bound Trailing(int64) or CurrentRow. | False, defaults to start of partition | 22 | | Upper Bound | Bound Following(int64), Bound Trailing(int64) or CurrentRow. | False, defaults to end of partition | 23 | 24 | ## Aggregate Functions as Window Functions 25 | 26 | Aggregate functions can be treated as a window functions with Window Type set to STREAMING. 27 | 28 | AVG, COUNT, MAX, MIN and SUM are examples of aggregate functions that are commonly allowed in window contexts. 29 | -------------------------------------------------------------------------------- /.github/workflows/pr_title.yml: -------------------------------------------------------------------------------- 1 | name: PR Title Check 2 | 3 | on: 4 | pull_request_target: 5 | types: [opened, edited, synchronize, reopened] 6 | jobs: 7 | commitlint: 8 | name: PR title / description conforms to semantic-release 9 | runs-on: ubuntu-latest 10 | steps: 11 | - uses: actions/setup-node@v6 12 | with: 13 | node-version: "20" 14 | - run: npm install @commitlint/config-conventional 15 | - run: > 16 | echo 'module.exports = { 17 | // Workaround for https://github.com/dependabot/dependabot-core/issues/5923 18 | "ignores": [(message) => /^Bumps \[.+]\(.+\) from .+ to .+\.$/m.test(message)], 19 | "rules": { 20 | "body-max-line-length": [0, "always", Infinity], 21 | "footer-max-line-length": [0, "always", Infinity], 22 | "body-leading-blank": [0, "always"] 23 | } 24 | }' > .commitlintrc.js 25 | - run: npx commitlint --extends @commitlint/config-conventional --verbose <<< $COMMIT_MSG 26 | env: 27 | COMMIT_MSG: > 28 | ${{ github.event.pull_request.title }} 29 | 30 | ${{ github.event.pull_request.body }} 31 | - if: failure() 32 | uses: actions/github-script@v8 33 | with: 34 | script: | 35 | const message = `**ACTION NEEDED** 36 | 37 | Substrait follows the [Conventional Commits 38 | specification](https://www.conventionalcommits.org/en/v1.0.0/) for 39 | release automation. 40 | 41 | The PR title and description are used as the merge commit message.\ 42 | Please update your PR title and description to match the specification. 43 | ` 44 | // Get list of current comments 45 | const comments = await github.paginate(github.rest.issues.listComments, { 46 | owner: context.repo.owner, 47 | repo: context.repo.repo, 48 | issue_number: context.issue.number 49 | }); 50 | // Check if this job already commented 51 | for (const comment of comments) { 52 | if (comment.body === message) { 53 | return // Already commented 54 | } 55 | } 56 | // Post the comment about Conventional Commits 57 | github.rest.issues.createComment({ 58 | owner: context.repo.owner, 59 | repo: context.repo.repo, 60 | issue_number: context.issue.number, 61 | body: message 62 | }) 63 | core.setFailed(message) 64 | -------------------------------------------------------------------------------- /tests/coverage/case_file_parser.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | import os 3 | 4 | from antlr4 import CommonTokenStream, FileStream 5 | from antlr4.error.ErrorListener import ErrorListener 6 | 7 | from tests.coverage.antlr_parser.FuncTestCaseLexer import FuncTestCaseLexer 8 | from tests.coverage.antlr_parser.FuncTestCaseParser import FuncTestCaseParser 9 | from tests.coverage.visitor import TestCaseVisitor, ParseError 10 | 11 | 12 | class ParseErrorListener(ErrorListener): 13 | def __init__(self): 14 | super(ParseErrorListener, self).__init__() 15 | self.errors = [] 16 | 17 | def syntaxError(self, recognizer, offending_symbol, line, column, msg, e): 18 | error_message = f"Syntax error at line {line}, column {column}: {msg}" 19 | self.errors.append(error_message) 20 | 21 | 22 | def parse_stream(input_stream, file_path): 23 | # Create a lexer and parser 24 | lexer = FuncTestCaseLexer(input_stream) 25 | token_stream = CommonTokenStream(lexer) 26 | parser = FuncTestCaseParser(token_stream) 27 | 28 | # Add custom error listener 29 | error_listener = ParseErrorListener() 30 | parser.removeErrorListeners() 31 | parser.addErrorListener(error_listener) 32 | 33 | tree = parser.doc() # This is the entry point of testfile parser 34 | if parser.getNumberOfSyntaxErrors() > 0: 35 | print(tree.toStringTree(recog=parser)) 36 | print(f"{parser.getNumberOfSyntaxErrors()} Syntax errors found, exiting") 37 | raise ParseError(f"Syntax errors: {error_listener.errors}") 38 | 39 | # uncomment below line to see the parse tree for debugging 40 | # print(tree.toStringTree(recog=parser)) 41 | 42 | visitor = TestCaseVisitor(file_path) 43 | test_file = visitor.visit(tree) 44 | return test_file 45 | 46 | 47 | def parse_one_file(file_path): 48 | return parse_stream(FileStream(file_path, "UTF-8"), file_path) 49 | 50 | 51 | def parse_testcase_directory_recursively(dir_path): 52 | # for each file in directory call parse_one_file 53 | test_files = [] 54 | for child in os.listdir(dir_path): 55 | child_path = os.path.join(dir_path, child) 56 | if os.path.isfile(child_path) and child.endswith(".test"): 57 | test_file = parse_one_file(child_path) 58 | test_files.append(test_file) 59 | elif os.path.isdir(child_path): 60 | test_files_in_a_dir = parse_testcase_directory_recursively(child_path) 61 | test_files.extend(test_files_in_a_dir) 62 | return test_files 63 | 64 | 65 | def load_all_testcases(dir_path) -> list: 66 | return parse_testcase_directory_recursively(dir_path) 67 | -------------------------------------------------------------------------------- /tests/test_proto_example_validator.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | """Proto Example Validator: Validates protobuf textformat examples. 3 | 4 | Ensures examples are valid and use no unknown fields. 5 | """ 6 | from pathlib import Path 7 | from google.protobuf import text_format 8 | from google.protobuf.message import Message 9 | import pytest 10 | 11 | try: 12 | from substrait import algebra_pb2 13 | except ImportError: 14 | raise ImportError( 15 | "Protobuf bindings not found. Run 'buf generate' to generate them." 16 | ) 17 | 18 | 19 | def validate_example(textproto: str, message_class: type[Message]) -> None: 20 | """Parse and validate a textproto string with strict field checking.""" 21 | message = message_class() 22 | text_format.Parse(textproto, message, allow_unknown_field=False) 23 | assert message.ListFields(), "Message has no fields populated" 24 | 25 | 26 | def test_validation_rejects_unknown_fields(): 27 | """Test that validation rejects proto text with unknown fields.""" 28 | invalid_textproto = """ 29 | parameters: {types: [{i32: {nullability: NULLABILITY_REQUIRED}}]} 30 | body: {literal: {i32: 42}} 31 | unknown_field: "should fail" 32 | """ 33 | with pytest.raises(text_format.ParseError, match="unknown_field"): 34 | validate_example(invalid_textproto, algebra_pb2.Expression.Lambda) 35 | 36 | 37 | def test_validation_rejects_empty_messages(): 38 | """Test that validation rejects empty messages.""" 39 | with pytest.raises(AssertionError, match="no fields populated"): 40 | validate_example("", algebra_pb2.Expression.Lambda) 41 | 42 | 43 | def test_validate_lambdas(): 44 | """Validate lambda expression examples.""" 45 | examples_dir = Path("site/examples/proto-textformat/lambda") 46 | for textproto_file in examples_dir.glob("*.textproto"): 47 | validate_example(textproto_file.read_text(), algebra_pb2.Expression.Lambda) 48 | 49 | 50 | def test_validate_lambda_invocations(): 51 | """Validate lambda invocation examples.""" 52 | examples_dir = Path("site/examples/proto-textformat/lambda_invocation") 53 | for textproto_file in examples_dir.glob("*.textproto"): 54 | validate_example( 55 | textproto_file.read_text(), algebra_pb2.Expression.LambdaInvocation 56 | ) 57 | 58 | 59 | def test_validate_field_references(): 60 | """Validate field reference examples.""" 61 | examples_dir = Path("site/examples/proto-textformat/field_reference") 62 | for textproto_file in examples_dir.glob("*.textproto"): 63 | validate_example( 64 | textproto_file.read_text(), algebra_pb2.Expression.FieldReference 65 | ) 66 | -------------------------------------------------------------------------------- /grammar/SubstraitLexer.g4: -------------------------------------------------------------------------------- 1 | lexer grammar SubstraitLexer; 2 | 3 | options { 4 | caseInsensitive = true; 5 | } 6 | 7 | // Whitespace and comment handling 8 | LineComment : '//' ~[\r\n]* -> channel(HIDDEN) ; 9 | BlockComment : ( '/*' ( ~'*' | '*'+ ~[*/] ) '*'* '*/' ) -> channel(HIDDEN) ; 10 | Whitespace : [ \t\r]+ -> channel(HIDDEN) ; 11 | 12 | fragment DIGIT: [0-9]; 13 | 14 | // Syntactic keywords. 15 | If : 'IF'; 16 | Then : 'THEN'; 17 | Else : 'ELSE'; 18 | Func : 'FUNC'; 19 | 20 | // TYPES 21 | Boolean : 'BOOLEAN'; 22 | I8 : 'I8'; 23 | I16 : 'I16'; 24 | I32 : 'I32'; 25 | I64 : 'I64'; 26 | FP32 : 'FP32'; 27 | FP64 : 'FP64'; 28 | String : 'STRING'; 29 | Binary : 'BINARY'; 30 | Timestamp: 'TIMESTAMP'; 31 | Timestamp_TZ: 'TIMESTAMP_TZ'; 32 | Date : 'DATE'; 33 | Time : 'TIME'; 34 | Interval_Year: 'INTERVAL_YEAR'; 35 | Interval_Day: 'INTERVAL_DAY'; 36 | UUID : 'UUID'; 37 | Decimal : 'DECIMAL'; 38 | Precision_Time: 'PRECISION_TIME'; 39 | Precision_Timestamp: 'PRECISION_TIMESTAMP'; 40 | Precision_Timestamp_TZ: 'PRECISION_TIMESTAMP_TZ'; 41 | FixedChar: 'FIXEDCHAR'; 42 | VarChar : 'VARCHAR'; 43 | FixedBinary: 'FIXEDBINARY'; 44 | Struct : 'STRUCT'; 45 | NStruct : 'NSTRUCT'; 46 | List : 'LIST'; 47 | Map : 'MAP'; 48 | UserDefined: 'U!'; 49 | 50 | // short names for types 51 | Bool: 'BOOL'; 52 | Str: 'STR'; 53 | VBin: 'VBIN'; 54 | Ts: 'TS'; 55 | TsTZ: 'TSTZ'; 56 | IYear: 'IYEAR'; 57 | IDay: 'IDAY'; 58 | Dec: 'DEC'; 59 | PT: 'PT'; 60 | PTs: 'PTS'; 61 | PTsTZ: 'PTSTZ'; 62 | FChar: 'FCHAR'; 63 | VChar: 'VCHAR'; 64 | FBin: 'FBIN'; 65 | 66 | Any: 'ANY'; 67 | AnyVar: Any [0-9]; 68 | 69 | DoubleColon: '::'; 70 | 71 | // MATH 72 | Plus : '+'; 73 | Minus : '-'; 74 | Asterisk : '*'; 75 | ForwardSlash : '/'; 76 | Percent : '%'; 77 | 78 | // COMPARE 79 | Eq : '='; 80 | Ne : '!='; 81 | Gte : '>='; 82 | Lte : '<='; 83 | Gt : '>'; 84 | Lt : '<'; 85 | Bang : '!'; 86 | 87 | // ORGANIZE 88 | OAngleBracket: Lt; 89 | CAngleBracket: Gt; 90 | OParen: '('; 91 | CParen: ')'; 92 | OBracket: '['; 93 | CBracket: ']'; 94 | Comma: ','; 95 | Colon: ':'; 96 | QMark: '?'; 97 | Hash: '#'; 98 | Dot: '.'; 99 | 100 | 101 | // OPERATIONS 102 | And : 'AND'; 103 | Or : 'OR'; 104 | Assign : ':='; 105 | Arrow : '->'; 106 | 107 | 108 | 109 | fragment Int 110 | : '1'..'9' Digit* 111 | | '0' 112 | ; 113 | 114 | fragment Digit 115 | : '0'..'9' 116 | ; 117 | 118 | Number 119 | : '-'? Int 120 | ; 121 | 122 | Identifier 123 | : ('A'..'Z' | '_' | '$') ('A'..'Z' | '_' | '$' | Digit)* 124 | ; 125 | 126 | Newline 127 | : ( '\r' '\n'? 128 | | '\n' 129 | ) 130 | ; 131 | -------------------------------------------------------------------------------- /site/docs/expressions/aggregate_functions.md: -------------------------------------------------------------------------------- 1 | # Aggregate Functions 2 | 3 | Aggregate functions are functions that define an operation which consumes values from multiple records to a produce a single output. Aggregate functions in SQL are typically used in GROUP BY functions. Aggregate functions are similar to scalar functions and function signatures with a small set of different properties. 4 | 5 | Aggregate function signatures contain all the properties defined for [scalar functions](scalar_functions.md). Additionally, they contain the properties below: 6 | 7 | | Property | Description | Required | 8 | | ------------------------ | --------------------------------------------------------------- | ------------------------------- | 9 | | Inherits | All properties defined for scalar function. | N/A | 10 | | Ordered | Whether the result of this function is sensitive to sort order. | Optional, defaults to false | 11 | | Maximum set size | Maximum allowed set size as an unsigned integer. | Optional, defaults to unlimited | 12 | | Decomposable | Whether the function can be executed in one or more intermediate steps. Valid options are: `NONE`, `ONE`, `MANY`, describing how intermediate steps can be taken. | Optional, defaults to `NONE` | 13 | | Intermediate Output Type | If the function is decomposable, represents the intermediate output type that is used, if the function is defined as either `ONE` or `MANY` decomposable. Will be a struct in many cases. | Required for `ONE` and `MANY`. | 14 | | Invocation | Whether the function uses all or only distinct values in the aggregation calculation. Valid options are: `ALL`, `DISTINCT`. | Optional, defaults to `ALL` | 15 | 16 | 17 | 18 | ## Aggregate Binding 19 | 20 | When binding an aggregate function, the binding must include the following additional properties beyond the standard scalar binding properties: 21 | 22 | | Property | Description | 23 | | -------- | ------------------------------------------------------------ | 24 | | Phase | Describes the input type of the data: [INITIAL_TO_INTERMEDIATE, INTERMEDIATE_TO_INTERMEDIATE, INITIAL_TO_RESULT, INTERMEDIATE_TO_RESULT] describing what portion of the operation is required. For functions that are NOT decomposable, the only valid option will be INITIAL_TO_RESULT. | 25 | | Ordering | Zero or more ordering keys along with key order (ASC\|DESC\|NULL FIRST, etc.), declared similar to the sort keys in an `ORDER BY` relational operation. If no sorts are specified, the records are not sorted prior to being passed to the aggregate function. | 26 | 27 | -------------------------------------------------------------------------------- /tests/test_extensions.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | import json 3 | import os 4 | from dataclasses import asdict 5 | 6 | from tests.baseline import read_baseline_file, generate_baseline 7 | from tests.coverage.case_file_parser import load_all_testcases 8 | from tests.coverage.coverage import get_test_coverage 9 | from tests.coverage.extensions import build_type_to_short_type 10 | from tests.coverage.extensions import Extension 11 | 12 | 13 | # NOTE: this test is run as part of pre-commit hook 14 | def test_substrait_extension_coverage(): 15 | script_dir = os.path.dirname(os.path.abspath(__file__)) 16 | baseline = read_baseline_file(os.path.join(script_dir, "baseline.json")) 17 | extensions_path = os.path.join(script_dir, "../extensions") 18 | registry = Extension.read_substrait_extensions(extensions_path) 19 | 20 | test_case_dir = os.path.join(script_dir, "./cases") 21 | all_test_files = load_all_testcases(test_case_dir) 22 | coverage = get_test_coverage(all_test_files, registry) 23 | 24 | assert ( 25 | coverage.num_tests_with_no_matching_function == 0 26 | ), f"{coverage.num_tests_with_no_matching_function} tests with no matching function" 27 | 28 | actual_baseline = generate_baseline(registry, coverage) 29 | errors = actual_baseline.validate_against(baseline) 30 | assert not errors, ( 31 | "\n".join(errors) 32 | + f"The baseline file does not match the current test coverage. " 33 | f"Please update the file at tests/baseline.json to align with the current baseline" 34 | f"{json.dumps(asdict(actual_baseline), indent=2)}" 35 | ) 36 | 37 | if baseline != actual_baseline: 38 | print("\nBaseline has changed, updating tests/baseline.json") 39 | print(json.dumps(asdict(actual_baseline), indent=2)) 40 | 41 | 42 | def test_build_type_to_short_type(): 43 | long_to_short = build_type_to_short_type() 44 | assert long_to_short["i64"] == "i64" 45 | assert long_to_short["fp64"] == "fp64" 46 | assert long_to_short["timestamp"] == "ts" 47 | assert long_to_short["timestamp_tz"] == "tstz" 48 | assert long_to_short["precision_timestamp"] == "pts" 49 | assert long_to_short["precision_timestamp_tz"] == "ptstz" 50 | assert long_to_short["interval_year"] == "iyear" 51 | assert long_to_short["interval_day"] == "iday" 52 | assert long_to_short["decimal"] == "dec" 53 | assert long_to_short["boolean"] == "bool" 54 | assert long_to_short["string"] == "str" 55 | assert long_to_short["binary"] == "vbin" 56 | assert long_to_short["fixedbinary"] == "fbin" 57 | assert long_to_short["fixedchar"] == "fchar" 58 | assert long_to_short["varchar"] == "vchar" 59 | assert long_to_short["list"] == "list" 60 | assert long_to_short["map"] == "map" 61 | assert long_to_short["struct"] == "struct" 62 | -------------------------------------------------------------------------------- /site/docs/spec/extending.md: -------------------------------------------------------------------------------- 1 | # Extending 2 | 3 | Substrait is a community project and requires consensus about new additions to the specification in order to maintain consistency. The best way to get consensus is to discuss ideas. The main ways to communicate are: 4 | 5 | * Substrait Mailing List 6 | * Substrait Slack 7 | * Community Meeting 8 | 9 | ## Minor changes 10 | 11 | Simple changes like typos and bug fixes do not require as much effort. [File an issue](https://github.com/substrait-io/substrait/issues) or [send a PR](https://github.com/substrait-io/substrait/pulls) and we can discuss it there. 12 | 13 | ## Complex changes 14 | 15 | For complex features it is useful to discuss the change first. It will be useful to gather some background information to help get everyone on the same page. 16 | 17 | ### Outline the issue 18 | 19 | #### Language 20 | 21 | Every engine has its own terminology. Every Spark user probably knows what an "attribute" is. Velox users will know what a "RowVector" means. Etc. However, Substrait is used by people that come from a variety of backgrounds and you should generally assume that its users do not know anything about your own implementation. As a result, all PRs and discussion should endeavor to use Substrait terminology wherever possible. 22 | 23 | #### Motivation 24 | 25 | What problems does this relation solve? If it is a more logical relation then how does it allow users to express new capabilities? If it is more of an internal relation then how does it map to existing logical relations? How is it different than other existing relations? Why do we need this? 26 | 27 | #### Examples 28 | 29 | Provide example input and output for the relation. Show example plans. Try and motivate your examples, as best as possible, with something that looks like a real world problem. These will go a long ways towards helping others understand the purpose of a relation. 30 | 31 | #### Alternatives 32 | 33 | Discuss what alternatives are out there. Are there other ways to achieve similar results? Do some systems handle this problem differently? 34 | 35 | ### Survey existing implementation 36 | 37 | It's unlikely that this is the first time that this has been done. Figuring out 38 | 39 | ### Prototype the feature 40 | 41 | Novel approaches should be implemented as an extension first. 42 | 43 | ### Substrait design principles 44 | 45 | Substrait is designed around interoperability so a feature only used by a single system may not be accepted. But don't dispair! Substrait has a highly developed extension system for this express purpose. 46 | 47 | ### You don't have to do it alone 48 | 49 | If you are hoping to add a feature and these criteria seem intimidating then feel free to start a mailing list discussion before you have all the information and ask for help. Investigating other implementations, in particular, is something that can be quite difficult to do on your own. 50 | -------------------------------------------------------------------------------- /grammar/FuncTestCaseLexer.g4: -------------------------------------------------------------------------------- 1 | lexer grammar FuncTestCaseLexer; 2 | 3 | import SubstraitLexer; 4 | 5 | options { 6 | caseInsensitive = true; 7 | } 8 | 9 | Whitespace : [ \t\n\r]+ -> channel(HIDDEN) ; 10 | 11 | TripleHash: '###'; 12 | SubstraitScalarTest: 'SUBSTRAIT_SCALAR_TEST'; 13 | SubstraitAggregateTest: 'SUBSTRAIT_AGGREGATE_TEST'; 14 | SubstraitInclude: 'SUBSTRAIT_INCLUDE'; 15 | SubstraitDependency: 'SUBSTRAIT_DEPENDENCY'; 16 | 17 | FormatVersion 18 | : 'v' DIGIT+ ('.' DIGIT+)? 19 | ; 20 | 21 | DescriptionLine 22 | : '# ' ~[\r\n]* '\r'? '\n' 23 | ; 24 | 25 | Define: 'DEFINE'; 26 | ErrorResult: ''; 27 | UndefineResult: ''; 28 | Overflow: 'OVERFLOW'; 29 | Rounding: 'ROUNDING'; 30 | Error: 'ERROR'; 31 | Saturate: 'SATURATE'; 32 | Silent: 'SILENT'; 33 | TieToEven: 'TIE_TO_EVEN'; 34 | NaN: 'NAN'; 35 | AcceptNulls: 'ACCEPT_NULLS'; 36 | IgnoreNulls: 'IGNORE_NULLS'; 37 | NullHandling: 'NULL_HANDLING'; 38 | SpacesOnly: 'SPACES_ONLY'; 39 | Truncate: 'TRUNCATE'; 40 | 41 | IntegerLiteral 42 | : [+-]? Int 43 | ; 44 | 45 | DecimalLiteral 46 | : [+-]? [0-9]+ ('.' [0-9]+)? 47 | ; 48 | 49 | FloatLiteral 50 | : [+-]? [0-9]+ ('.' [0-9]*)? ( 'E' [+-]? [0-9]+ )? 51 | | [+-]? 'inf' 52 | | 'snan' 53 | ; 54 | 55 | BooleanLiteral 56 | : 'true' | 'false' 57 | ; 58 | 59 | fragment FourDigits: [0-9][0-9][0-9][0-9]; 60 | fragment TwoDigits: [0-9][0-9]; 61 | 62 | TimestampTzLiteral 63 | : '\'' FourDigits '-' TwoDigits '-' TwoDigits 'T' TwoDigits ':' TwoDigits ':' TwoDigits ( '.' [0-9]+ )? 64 | [+-] TwoDigits ':' TwoDigits '\'' 65 | ; 66 | 67 | TimestampLiteral 68 | : '\'' FourDigits '-' TwoDigits '-' TwoDigits 'T' TwoDigits ':' TwoDigits ':' TwoDigits ( '.' [0-9]+ )? '\'' 69 | ; 70 | 71 | TimeLiteral 72 | : '\'' TwoDigits ':' TwoDigits ':' TwoDigits ( '.' [0-9]+ )? '\'' 73 | ; 74 | 75 | DateLiteral 76 | : '\'' FourDigits '-' TwoDigits '-' TwoDigits '\'' 77 | ; 78 | 79 | PeriodPrefix: 'P'; 80 | TimePrefix: 'T'; 81 | YearPrefix: 'Y'; 82 | MSuffix: 'M'; // used for both months and minutes 83 | DaySuffix: 'D'; 84 | HourSuffix: 'H'; 85 | SecondSuffix: 'S'; 86 | FractionalSecondSuffix: 'F'; 87 | OAngleBracket: Lt; 88 | CAngleBracket: Gt; 89 | 90 | IntervalYearLiteral 91 | : '\'' PeriodPrefix IntegerLiteral YearPrefix (IntegerLiteral MSuffix)? '\'' 92 | | '\'' PeriodPrefix IntegerLiteral MSuffix '\'' 93 | ; 94 | 95 | IntervalDayLiteral 96 | : '\'' PeriodPrefix IntegerLiteral DaySuffix (TimePrefix TimeInterval)? '\'' 97 | | '\'' PeriodPrefix TimePrefix TimeInterval '\'' 98 | ; 99 | 100 | fragment TimeInterval 101 | : IntegerLiteral HourSuffix (IntegerLiteral MSuffix)? (DecimalLiteral SecondSuffix)? 102 | | IntegerLiteral MSuffix (DecimalLiteral SecondSuffix)? 103 | | DecimalLiteral SecondSuffix 104 | ; 105 | 106 | NullLiteral: 'null'; 107 | 108 | StringLiteral 109 | : '\'' ('\\' . | '\'\'' | ~['\\])* '\'' 110 | ; 111 | 112 | ColumnName 113 | : 'COL' Int 114 | ; 115 | -------------------------------------------------------------------------------- /site/docs/expressions/embedded_functions.md: -------------------------------------------------------------------------------- 1 | # Embedded Functions 2 | 3 | Embedded functions are a special kind of function where the implementation is embedded within the actual plan. They are commonly used in tools where a user intersperses business logic within a data pipeline. This is more common in data science workflows than traditional SQL workflows. 4 | 5 | Embedded functions are not pre-registered. Embedded functions require that data be consumed and produced with a standard API, may require memory allocation and have determinate error reporting behavior. They may also have specific runtime dependencies. For example, a Python pickle function may depend on pyarrow 5.0 and pynessie 1.0. 6 | 7 | Properties for an embedded function include: 8 | 9 | | Property | Description | Required | 10 | | ------------------- | ---------------------------------------------------------- | -------- | 11 | | Function Type | The type of embedded function presented. | Required | 12 | | Function Properties | Function properties, one of those items defined below. | Required | 13 | | Output Type | The fully resolved output type for this embedded function. | Required | 14 | 15 | The binary representation of an embedded function is: 16 | 17 | 18 | === "Binary Representation" 19 | ```proto 20 | %%% proto.message.Expression.EmbeddedFunction %%% 21 | ``` 22 | 23 | === "Human Readable Representation" 24 | As the bytes are opaque to Substrait there is no equivalent human readable form. 25 | 26 | 27 | ## Function Details 28 | 29 | There are many types of possible stored functions. For each, Substrait works to expose the function in as descriptive a way as possible to support the largest number of consumers. 30 | 31 | 32 | 33 | ## Python Pickle Function Type 34 | 35 | | Property | Description | Required | 36 | | ----------- | ------------------------------------------------------------ | -------------------------- | 37 | | Pickle Body | binary pickle encoded function using [TBD] API representation to access arguments. | True | 38 | | Prereqs | A list of specific Python conda packages that are prerequisites for access (a structured version of a requirements.txt file). | Optional, defaults to none | 39 | 40 | 41 | 42 | ## WebAssembly Function Type 43 | 44 | | Property | Description | Required | 45 | | -------- | ------------------------------------------------------------ | -------------------------- | 46 | | Script | WebAssembly function | True | 47 | | Prereqs | A list of AssemblyScript prerequisites required to compile the assemblyscript function using NPM coordinates. | Optional, defaults to none | 48 | 49 | 50 | 51 | ???+ question "Discussion Points" 52 | 53 | * What are the common embedded function formats? 54 | * How do we expose the data for a function? 55 | * How do we express batching capabilities? 56 | * How do we ensure/declare containerization? 57 | --------------------------------------------------------------------------------