├── .editorconfig ├── .flake8 ├── .gitattributes ├── .github ├── CODEOWNERS ├── dependabot.yml ├── pull_request_template.md └── workflows │ ├── licence_check.yml │ ├── pr.yml │ ├── pr_breaking.yml │ ├── pr_title.yml │ ├── release.yml │ └── site.yml ├── .gitignore ├── .licenserc.yaml ├── .pre-commit-config.yaml ├── .python-version ├── .releaserc.json ├── .yamllint.yaml ├── CHANGELOG.md ├── CITATION.cff ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── buf.gen.yaml ├── buf.work.yaml ├── ci └── release │ ├── dry_run.sh │ ├── prepare.sh │ ├── publish.sh │ ├── run.sh │ └── verify.sh ├── core.go ├── core_test.go ├── extensions ├── extension_types.yaml ├── functions_aggregate_approx.yaml ├── functions_aggregate_decimal_output.yaml ├── functions_aggregate_generic.yaml ├── functions_arithmetic.yaml ├── functions_arithmetic_decimal.yaml ├── functions_boolean.yaml ├── functions_comparison.yaml ├── functions_datetime.yaml ├── functions_geometry.yaml ├── functions_logarithmic.yaml ├── functions_rounding.yaml ├── functions_rounding_decimal.yaml ├── functions_set.yaml ├── functions_string.yaml ├── type_variations.yaml └── unknown.yaml ├── go.mod ├── go.sum ├── grammar ├── FuncTestCaseLexer.g4 ├── FuncTestCaseParser.g4 ├── Makefile ├── README.md ├── SubstraitLexer.g4 ├── SubstraitType.g4 └── prepend_license.sh ├── proto ├── buf.lock ├── buf.yaml └── substrait │ ├── algebra.proto │ ├── capabilities.proto │ ├── extended_expression.proto │ ├── extensions │ └── extensions.proto │ ├── function.proto │ ├── parameterized_types.proto │ ├── plan.proto │ ├── type.proto │ └── type_expressions.proto ├── pyproject.toml ├── requirements.txt ├── site ├── .gitignore ├── README.md ├── data │ ├── committers.yaml │ └── smc.yaml ├── docs │ ├── _config │ ├── about.md │ ├── community │ │ ├── index.md │ │ └── powered_by.md │ ├── expressions │ │ ├── _config │ │ ├── aggregate_functions.md │ │ ├── dynamic_parameters.md │ │ ├── embedded_functions.md │ │ ├── extended_expression.md │ │ ├── field_references.md │ │ ├── scalar_functions.md │ │ ├── specialized_record_expressions.md │ │ ├── subqueries.md │ │ ├── table_functions.md │ │ ├── user_defined_functions.md │ │ └── window_functions.md │ ├── extensions │ │ ├── .gitignore │ │ ├── generate_function_docs.py │ │ └── index.md │ ├── faq.md │ ├── governance.md │ ├── img │ │ └── logo.svg │ ├── index.md │ ├── relations │ │ ├── _config │ │ ├── basics.md │ │ ├── common_fields.md │ │ ├── embedded_relations.md │ │ ├── logical_relations.md │ │ ├── physical_relations.md │ │ └── user_defined_relations.md │ ├── serialization │ │ ├── _config │ │ ├── basics.md │ │ ├── binary_serialization.md │ │ └── text_serialization.md │ ├── spec │ │ ├── _config │ │ ├── extending.md │ │ ├── specification.md │ │ ├── technology_principles.md │ │ └── versioning.md │ ├── stylesheets │ │ └── extra.css │ ├── tools │ │ ├── _config │ │ ├── producer_tools.md │ │ ├── substrait_validator.md │ │ └── third_party_tools.md │ ├── tutorial │ │ ├── examples.md │ │ ├── expression_trees.svg │ │ ├── field_indices_layout.svg │ │ ├── final_plan.json │ │ ├── plan_tree_versus_expression.svg │ │ ├── sql_to_substrait.md │ │ └── substrait_components.svg │ └── types │ │ ├── _config │ │ ├── named_structs.md │ │ ├── type_classes.md │ │ ├── type_parsing.md │ │ ├── type_system.md │ │ └── type_variations.md ├── mkdocs.yml ├── overrides │ └── partials │ │ └── footer.html └── requirements.txt ├── tests ├── README.md ├── __init__.py ├── baseline.json ├── baseline.py ├── cases │ ├── aggregate_approx │ │ └── approx_count_distinct.test │ ├── aggregate_generic │ │ └── count.test │ ├── arithmetic │ │ ├── abs.test │ │ ├── acos.test │ │ ├── acosh.test │ │ ├── add.test │ │ ├── asin.test │ │ ├── asinh.test │ │ ├── atan.test │ │ ├── atan2.test │ │ ├── atanh.test │ │ ├── bitwise_and.test │ │ ├── bitwise_not.test │ │ ├── bitwise_or.test │ │ ├── bitwise_xor.test │ │ ├── cos.test │ │ ├── cosh.test │ │ ├── divide.test │ │ ├── exp.test │ │ ├── factorial.test │ │ ├── max.test │ │ ├── min.test │ │ ├── modulus.test │ │ ├── multiply.test │ │ ├── negate.test │ │ ├── power.test │ │ ├── sin.test │ │ ├── sinh.test │ │ ├── sqrt.test │ │ ├── subtract.test │ │ ├── sum.test │ │ ├── tan.test │ │ └── tanh.test │ ├── arithmetic_decimal │ │ ├── bitwise_and.test │ │ ├── bitwise_or.test │ │ ├── bitwise_xor.test │ │ ├── factorial_decimal.test │ │ ├── max_decimal.test │ │ ├── min_decimal.test │ │ ├── power.test │ │ ├── power_decimal.test │ │ ├── sqrt_decimal.test │ │ └── sum_decimal.test │ ├── boolean │ │ ├── and.test │ │ ├── and_not.test │ │ ├── bool_and.test │ │ ├── bool_or.test │ │ ├── not.test │ │ ├── or.test │ │ └── xor.test │ ├── comparison │ │ ├── between.test │ │ ├── coalesce.test │ │ ├── equal.test │ │ ├── gt.test │ │ ├── gte.test │ │ ├── is_false.test │ │ ├── is_finite.test │ │ ├── is_infinite.test │ │ ├── is_nan.test │ │ ├── is_not_distinct_from.test │ │ ├── is_not_false.test │ │ ├── is_not_null.test │ │ ├── is_not_true.test │ │ ├── is_null.test │ │ ├── is_true.test │ │ ├── lt.test │ │ ├── lte.test │ │ ├── not_equal.test │ │ └── nullif.test │ ├── datetime │ │ ├── add_datetime.test │ │ ├── add_intervals.test │ │ ├── extract.test │ │ ├── gt_datetime.test │ │ ├── gte_datetime.test │ │ ├── lt_datetime.test │ │ ├── lte_datetime.test │ │ └── subtract_datetime.test │ ├── logarithmic │ │ ├── ln.test │ │ ├── log10.test │ │ ├── log2.test │ │ └── logb.test │ ├── rounding │ │ ├── ceil.test │ │ ├── floor.test │ │ └── round.test │ ├── rounding_decimal │ │ ├── ceil.test │ │ ├── floor.test │ │ └── round.test │ └── string │ │ ├── bit_length.test │ │ ├── char_length.test │ │ ├── concat.test │ │ ├── concat_ws.test │ │ ├── contains.test │ │ ├── ends_with.test │ │ ├── left.test │ │ ├── like.test │ │ ├── lower.test │ │ ├── lpad.test │ │ ├── ltrim.test │ │ ├── octet_length.test │ │ ├── regexp_count_substring.test │ │ ├── regexp_match_substring.test │ │ ├── regexp_replace.test │ │ ├── regexp_string_split.test │ │ ├── repeat.test │ │ ├── replace.test │ │ ├── reverse.test │ │ ├── right.test │ │ ├── rpad.test │ │ ├── rtrim.test │ │ ├── starts_with.test │ │ ├── string_split.test │ │ ├── substring.test │ │ ├── trim.test │ │ └── upper.test ├── coverage │ ├── __init__.py │ ├── antlr_parser │ │ ├── FuncTestCaseLexer.py │ │ ├── FuncTestCaseParser.py │ │ ├── FuncTestCaseParserListener.py │ │ └── FuncTestCaseParserVisitor.py │ ├── case_file_parser.py │ ├── coverage.py │ ├── extensions.py │ ├── nodes.py │ ├── test_coverage.py │ └── visitor.py ├── test_extensions.py └── type │ └── antlr_parser │ ├── SubstraitLexer.py │ ├── SubstraitTypeLexer.py │ ├── SubstraitTypeListener.py │ ├── SubstraitTypeParser.py │ └── SubstraitTypeVisitor.py ├── text └── simple_extensions_schema.yaml └── tools └── proto_prefix.py /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | charset = utf-8 5 | end_of_line = lf 6 | insert_final_newline = true 7 | indent_style = space 8 | trim_trailing_whitespace = true 9 | 10 | [site/**] 11 | charset = unset 12 | end_of_line = unset 13 | insert_final_newline = unset 14 | indent_style = unset 15 | trim_trailing_whitespace = unset 16 | 17 | [*.{proto,yaml,yml}] 18 | indent_size = 2 19 | -------------------------------------------------------------------------------- /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | ignore = E203, E266, E501, W503, F403, F401 3 | max-line-length = 88 4 | select = B,C,E,F,W,T4,B9 5 | exclude = 6 | # exclude generated test parser 7 | tests/coverage/antlr_parser/*.py, 8 | # exclude generated type parser 9 | tests/type/antlr_parser/*.py -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | proto/buf.lock linguist-generated=true 2 | -------------------------------------------------------------------------------- /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | * @jacques-n @cpcloud @westonpace @epsilonprime @vbarua 4 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: "github-actions" 4 | directory: "/" 5 | schedule: 6 | interval: "daily" 7 | 8 | - package-ecosystem: "pip" 9 | directory: "/site" 10 | schedule: 11 | interval: "daily" 12 | -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | Thank you for submitting a PR! 2 | 3 | Before you continue, please ensure that your PR title and description (this message!) follow [conventional commit syntax](1). Substrait uses an automated release process that, among other things, uses PR titles & descriptions to build a changelog, so the syntax and format matter! 4 | 5 | The title of the PR should be a valid commit header. 6 | 7 | Some examples of proper commit message headers and PR titles: 8 | 9 | - `feat: add feature X` 10 | - `fix: X in case of Y` 11 | - `docs: improve documentation for X` 12 | 13 | Note the case and grammar conventions. 14 | 15 | Furthermore, the description of any PR that includes a breaking change should contain a paragraph that starts with `BREAKING CHANGE: ...`, where `...` explains what changed. The automated release process uses this to determine how it should bump the version number. Anything that changes the behavior of a plan that was previously legal is considered a breaking change; note that this includes behavior specifications that only exist in Substrait in the form of behavior descriptions on the website or in comments. 16 | 17 | [1]: https://www.conventionalcommits.org/en/v1.0.0/ 18 | -------------------------------------------------------------------------------- /.github/workflows/licence_check.yml: -------------------------------------------------------------------------------- 1 | name: License check 2 | 3 | on: pull_request 4 | 5 | jobs: 6 | license: 7 | runs-on: ubuntu-latest 8 | steps: 9 | - uses: actions/checkout@v4 10 | 11 | - name: Check License Header 12 | uses: enarx/spdx@master 13 | with: 14 | licenses: |- 15 | Apache-2.0 16 | MIT 17 | -------------------------------------------------------------------------------- /.github/workflows/pr.yml: -------------------------------------------------------------------------------- 1 | name: PR Build Check 2 | 3 | on: 4 | pull_request: 5 | jobs: 6 | site: 7 | name: Build Website 8 | runs-on: ubuntu-latest 9 | steps: 10 | - uses: actions/checkout@v4 11 | - uses: actions/setup-python@v5 12 | with: 13 | python-version: '3.13' 14 | cache: 'pip' 15 | cache-dependency-path: ./site/requirements.txt 16 | - run: pip install -r ./site/requirements.txt 17 | - name: Generate Static Site 18 | run: mkdocs build 19 | working-directory: ./site 20 | editorconfig-checker: 21 | name: Check editorconfig 22 | runs-on: ubuntu-latest 23 | steps: 24 | - uses: editorconfig-checker/action-editorconfig-checker@v2 25 | proto-format-check: 26 | name: Check Protobuf Style 27 | runs-on: ubuntu-latest 28 | steps: 29 | - uses: actions/checkout@v4 30 | - uses: bufbuild/buf-setup-action@v1.50.0 31 | with: 32 | github_token: ${{ github.token }} 33 | - run: buf format --diff --exit-code 34 | proto: 35 | name: Check Protobuf 36 | runs-on: ubuntu-latest 37 | steps: 38 | - uses: actions/checkout@v4 39 | - uses: bufbuild/buf-setup-action@v1.50.0 40 | with: 41 | github_token: ${{ github.token }} 42 | - uses: bufbuild/buf-lint-action@v1 43 | - name: Compile protobuf 44 | run: buf generate 45 | yamllint: 46 | name: Lint YAML extensions 47 | runs-on: ubuntu-latest 48 | steps: 49 | - uses: actions/checkout@v4 50 | - name: Run yamllint 51 | run: yamllint . 52 | yamlvalidate: 53 | name: Validate YAML extensions 54 | runs-on: ubuntu-latest 55 | steps: 56 | - uses: actions/checkout@v4 57 | - uses: actions/setup-node@v4 58 | with: 59 | node-version: "20" 60 | - run: npm install -g ajv-cli 61 | - run: | 62 | set -euo pipefail 63 | for i in $(ls); 64 | do 65 | ajv validate -s ../text/simple_extensions_schema.yaml --strict=true --spec=draft2020 -d "$i" 66 | done 67 | working-directory: ./extensions 68 | dry_run_release: 69 | name: Dry-run release 70 | runs-on: ubuntu-latest 71 | steps: 72 | - uses: actions/checkout@v4 73 | with: 74 | fetch-depth: 0 75 | - uses: bufbuild/buf-setup-action@v1.50.0 76 | - uses: actions/setup-node@v4 77 | with: 78 | node-version: "20" 79 | - run: ./ci/release/dry_run.sh 80 | python-style: 81 | name: Style-check and lint Python files, and run tests 82 | runs-on: ubuntu-latest 83 | steps: 84 | - uses: actions/checkout@v4 85 | - name: Install dependencies 86 | run: python3 -m pip install -r requirements.txt 87 | - name: Black 88 | run: python3 -m black --diff --check . 89 | - name: Flake8 90 | run: python3 -m flake8 . 91 | - name: Run tests including test_substrait_extension_coverage 92 | run: | 93 | pytest 94 | check-proto-prefix: 95 | name: Check proto-prefix.py 96 | runs-on: ubuntu-latest 97 | steps: 98 | - uses: actions/checkout@v4 99 | - uses: bufbuild/buf-setup-action@v1.50.0 100 | - name: Run proto-prefix.py 101 | run: tools/proto_prefix.py output test proto go_package=github.com/test/proto 102 | - name: Modify buf config to build rewritten proto files 103 | run: | 104 | echo "version: v1" > buf.work.yaml 105 | echo "directories:" >> buf.work.yaml 106 | echo " - output" >> buf.work.yaml 107 | - name: Compile rewritten proto files 108 | run: buf generate 109 | -------------------------------------------------------------------------------- /.github/workflows/pr_breaking.yml: -------------------------------------------------------------------------------- 1 | name: Breaking Changes Check 2 | 3 | on: 4 | pull_request: 5 | types: [opened, edited, synchronize, reopened] 6 | jobs: 7 | breaking: 8 | name: Ensure breaking changes are labeled in description 9 | runs-on: ubuntu-latest 10 | steps: 11 | - uses: actions/checkout@v4 12 | - uses: bufbuild/buf-setup-action@v1.50.0 13 | with: 14 | github_token: ${{ github.token }} 15 | - name: check for breaking changes 16 | id: check-breaking 17 | run: | 18 | 19 | if ! buf breaking --against 'https://github.com/substrait-io/substrait.git#branch=main'; then 20 | breaking="true" 21 | else 22 | breaking="false" 23 | fi 24 | 25 | echo "breaking=${breaking}" >> $GITHUB_OUTPUT 26 | - name: check whether the PR description includes a breaking change footer 27 | if: ${{ fromJson(steps.check-breaking.outputs.breaking) }} 28 | run: | 29 | # check PR description for a BREAKING CHANGE section if any breaking changes occurred 30 | grep '^BREAKING CHANGE: ' <<< $COMMIT_DESC 31 | env: 32 | COMMIT_DESC: ${{ github.event.pull_request.body }} 33 | -------------------------------------------------------------------------------- /.github/workflows/pr_title.yml: -------------------------------------------------------------------------------- 1 | name: PR Title Check 2 | 3 | on: 4 | pull_request_target: 5 | types: [opened, edited, synchronize, reopened] 6 | jobs: 7 | commitlint: 8 | name: PR title / description conforms to semantic-release 9 | runs-on: ubuntu-latest 10 | steps: 11 | - uses: actions/setup-node@v4 12 | with: 13 | node-version: "20" 14 | - run: npm install @commitlint/config-conventional 15 | - run: > 16 | echo 'module.exports = { 17 | // Workaround for https://github.com/dependabot/dependabot-core/issues/5923 18 | "ignores": [(message) => /^Bumps \[.+]\(.+\) from .+ to .+\.$/m.test(message)], 19 | "rules": { 20 | "body-max-line-length": [0, "always", Infinity], 21 | "footer-max-line-length": [0, "always", Infinity], 22 | "body-leading-blank": [0, "always"] 23 | } 24 | }' > .commitlintrc.js 25 | - run: npx commitlint --extends @commitlint/config-conventional --verbose <<< $COMMIT_MSG 26 | env: 27 | COMMIT_MSG: > 28 | ${{ github.event.pull_request.title }} 29 | 30 | ${{ github.event.pull_request.body }} 31 | - if: failure() 32 | uses: actions/github-script@v7 33 | with: 34 | script: | 35 | const message = `**ACTION NEEDED** 36 | 37 | Substrait follows the [Conventional Commits 38 | specification](https://www.conventionalcommits.org/en/v1.0.0/) for 39 | release automation. 40 | 41 | The PR title and description are used as the merge commit message.\ 42 | Please update your PR title and description to match the specification. 43 | ` 44 | // Get list of current comments 45 | const comments = await github.paginate(github.rest.issues.listComments, { 46 | owner: context.repo.owner, 47 | repo: context.repo.repo, 48 | issue_number: context.issue.number 49 | }); 50 | // Check if this job already commented 51 | for (const comment of comments) { 52 | if (comment.body === message) { 53 | return // Already commented 54 | } 55 | } 56 | // Post the comment about Conventional Commits 57 | github.rest.issues.createComment({ 58 | owner: context.repo.owner, 59 | repo: context.repo.repo, 60 | issue_number: context.issue.number, 61 | body: message 62 | }) 63 | core.setFailed(message) 64 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Release 2 | 3 | on: 4 | schedule: 5 | # 2 AM on Sunday 6 | - cron: "0 2 * * 0" 7 | workflow_dispatch: 8 | 9 | # we do not want more than one release workflow executing at the same time, ever 10 | concurrency: 11 | group: release 12 | # cancelling in the middle of a release would create incomplete releases 13 | # so cancel-in-progress is false 14 | cancel-in-progress: false 15 | 16 | jobs: 17 | release: 18 | runs-on: ubuntu-latest 19 | if: github.repository == 'substrait-io/substrait' 20 | steps: 21 | - uses: tibdex/github-app-token@v2 22 | id: generate-token 23 | with: 24 | app_id: ${{ secrets.APP_ID }} 25 | private_key: ${{ secrets.APP_PRIVATE_KEY }} 26 | 27 | - uses: actions/checkout@v4 28 | with: 29 | fetch-depth: 0 30 | token: ${{ steps.generate-token.outputs.token }} 31 | 32 | - uses: actions/setup-node@v4 33 | with: 34 | node-version: "20" 35 | 36 | - uses: bufbuild/buf-setup-action@v1.50.0 37 | with: 38 | github_token: ${{ github.token }} 39 | 40 | - name: run semantic-release 41 | run: ./ci/release/run.sh 42 | env: 43 | BUF_TOKEN: ${{ secrets.BUF_TOKEN }} 44 | GITHUB_TOKEN: ${{ steps.generate-token.outputs.token }} 45 | -------------------------------------------------------------------------------- /.github/workflows/site.yml: -------------------------------------------------------------------------------- 1 | name: Site 2 | 3 | on: 4 | push: 5 | branches: [main] 6 | paths: 7 | - "site/**" 8 | - "extensions/**" 9 | 10 | jobs: 11 | site: 12 | name: Build & Deploy Website 13 | runs-on: ubuntu-latest 14 | if: ${{ github.repository == 'substrait-io/substrait' }} 15 | steps: 16 | - uses: actions/checkout@v4 17 | - uses: actions/setup-python@v5 18 | with: 19 | python-version: '3.13' 20 | cache: 'pip' 21 | cache-dependency-path: ./site/requirements.txt 22 | - run: pip install -r ./site/requirements.txt 23 | - name: Generate Static Site 24 | run: mkdocs build 25 | working-directory: ./site 26 | - name: Deploy Static Site to GitHub 27 | uses: peaceiris/actions-gh-pages@v4 28 | with: 29 | external_repository: substrait-io/substrait.io 30 | publish_branch: main 31 | deploy_key: ${{ secrets.SUBSTRAIT_SITE_DEPLOY_KEY }} 32 | publish_dir: ./site/site 33 | cname: substrait.io 34 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | **/target 2 | **/.gradle 3 | **/.idea 4 | **/build 5 | gen 6 | -------------------------------------------------------------------------------- /.licenserc.yaml: -------------------------------------------------------------------------------- 1 | header: 2 | license: 3 | spdx-id: Apache-2.0 4 | 5 | paths: 6 | - 'proto/substrait/**' 7 | 8 | comment: never -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/nametake/pre-commit-buf 3 | rev: v2.0.0 4 | hooks: 5 | - id: buf-lint 6 | - repo: https://github.com/adrienverge/yamllint.git 7 | rev: v1.35.1 8 | hooks: 9 | - id: yamllint 10 | args: [-c=.yamllint.yaml] 11 | - repo: https://github.com/alessandrojcm/commitlint-pre-commit-hook 12 | rev: v9.20.0 13 | hooks: 14 | - id: commitlint 15 | stages: [commit-msg] 16 | - repo: https://github.com/psf/black 17 | rev: 24.8.0 18 | hooks: 19 | - id: black 20 | - repo: https://github.com/pycqa/flake8 21 | rev: 7.0.0 22 | hooks: 23 | - id: flake8 24 | - repo: local 25 | hooks: 26 | - id: check-substrait-extensions_coverage 27 | name: Check Substrait extensions and test coverage 28 | entry: pytest tests/test_extensions.py::test_substrait_extension_coverage 29 | language: python 30 | pass_filenames: false 31 | -------------------------------------------------------------------------------- /.python-version: -------------------------------------------------------------------------------- 1 | 3.13 -------------------------------------------------------------------------------- /.releaserc.json: -------------------------------------------------------------------------------- 1 | { 2 | "branches": ["main"], 3 | "preset": "conventionalcommits", 4 | "plugins": [ 5 | [ 6 | "@semantic-release/commit-analyzer", 7 | { 8 | "releaseRules": [ 9 | {"breaking": true, "release": "minor"} 10 | ] 11 | } 12 | ], 13 | "@semantic-release/release-notes-generator", 14 | [ 15 | "@semantic-release/changelog", 16 | { 17 | "changelogTitle": "Release Notes\n---", 18 | "changelogFile": "CHANGELOG.md" 19 | } 20 | ], 21 | [ 22 | "@semantic-release/exec", 23 | { 24 | "verifyConditionsCmd": "ci/release/verify.sh", 25 | "prepareCmd": "ci/release/prepare.sh", 26 | "publishCmd": "ci/release/publish.sh ${nextRelease.version}" 27 | } 28 | ], 29 | [ 30 | "@semantic-release/github", 31 | { 32 | "successComment": false 33 | } 34 | ], 35 | [ 36 | "@semantic-release/git", 37 | { 38 | "assets": ["CHANGELOG.md"], 39 | "message": "chore(release): ${nextRelease.version}" 40 | } 41 | ] 42 | ] 43 | } 44 | -------------------------------------------------------------------------------- /.yamllint.yaml: -------------------------------------------------------------------------------- 1 | rules: 2 | line-length: 3 | max: 120 4 | brackets: 5 | forbid: false 6 | min-spaces-inside: 0 7 | max-spaces-inside: 1 8 | min-spaces-inside-empty: 0 9 | max-spaces-inside-empty: 0 10 | -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | cff-version: 1.2.0 2 | title: >- 3 | Substrait: Cross-Language Serialization for Relational 4 | Algebra 5 | message: >- 6 | If you use this software, please cite it using the 7 | metadata from this file. 8 | type: software 9 | authors: 10 | - given-names: substrait-io 11 | identifiers: 12 | - type: url 13 | value: 'https://github.com/substrait-io/substrait' 14 | repository-code: 'https://github.com/substrait-io/substrait' 15 | url: 'https://substrait.io/' 16 | license: Apache-2.0 17 | date-released: '2021-09-01' 18 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to Substrait 2 | 3 | Welcome! 4 | 5 | ## Dependencies 6 | 7 | There's no formal set of dependencies for Substrait, but here are some that are useful to have: 8 | 9 | * [`buf`](https://docs.buf.build/installation) for easy generation of proto serialization/deserialization code 10 | * [`protoc`](https://grpc.io/docs/protoc-installation/), used by `buf` and usable independent of `buf` 11 | * A Python environment with [the website's `requirements.txt`](https://github.com/substrait-io/substrait/blob/main/site/requirements.txt) dependencies installed if you want to see changes to the website locally 12 | 13 | ## Commit Conventions 14 | 15 | Substrait follows [conventional commits](https://www.conventionalcommits.org/en/v1.0.0/) for commit message structure. You can use [`pre-commit`](https://pre-commit.com/) to check your messages for you, but note that you must install pre-commit using `pre-commit install --hook-type commit-msg` for this to work. CI will also lint your commit messages. Please also ensure that your PR title and initial comment together form a valid commit message; that will save us some work formatting the merge commit message when we merge your PR. 16 | 17 | Examples of commit messages can be seen [here](https://www.conventionalcommits.org/en/v1.0.0/#examples). 18 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Substrait 2 | 3 | Substrait is a new project focused on producing an independent description of data compute operations. It is composed primarily of: 4 | 5 | 1. A formal specification 6 | 2. A human readable text representation 7 | 3. A compact cross-language binary representation 8 | 9 | For more details, please go to [substrait.io](https://substrait.io) 10 | 11 | -------------------------------------------------------------------------------- /buf.gen.yaml: -------------------------------------------------------------------------------- 1 | version: v1 2 | plugins: 3 | - plugin: buf.build/protocolbuffers/cpp:v23.0 4 | out: gen/proto/cpp 5 | - plugin: buf.build/protocolbuffers/csharp:v23.0 6 | out: gen/proto/csharp 7 | - plugin: buf.build/protocolbuffers/java:v23.0 8 | out: gen/proto/java 9 | - plugin: buf.build/protocolbuffers/python:v23.0 10 | out: gen/proto/python 11 | - plugin: buf.build/protocolbuffers/go:v1.30.0 12 | out: gen/proto/go 13 | opt: 14 | - paths=source_relative 15 | -------------------------------------------------------------------------------- /buf.work.yaml: -------------------------------------------------------------------------------- 1 | version: v1 2 | directories: 3 | - proto 4 | -------------------------------------------------------------------------------- /ci/release/dry_run.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # shellcheck shell=bash 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | set -euo pipefail 6 | 7 | curdir="$PWD" 8 | worktree="$(mktemp -d)" 9 | branch="$(basename "$worktree")" 10 | 11 | git worktree add "$worktree" 12 | 13 | function cleanup() { 14 | cd "$curdir" || exit 1 15 | git worktree remove "$worktree" 16 | git worktree prune 17 | git branch -D "$branch" 18 | } 19 | 20 | trap cleanup EXIT ERR 21 | 22 | cd "$worktree" || exit 1 23 | 24 | export GITHUB_REF="$branch" 25 | 26 | npx --yes \ 27 | -p "semantic-release@24.1.2" \ 28 | -p "@semantic-release/commit-analyzer" \ 29 | -p "@semantic-release/release-notes-generator" \ 30 | -p "@semantic-release/changelog" \ 31 | -p "@semantic-release/exec" \ 32 | -p "@semantic-release/git" \ 33 | -p "conventional-changelog-conventionalcommits@8.0.0" \ 34 | semantic-release \ 35 | --ci false \ 36 | --dry-run \ 37 | --preset conventionalcommits \ 38 | --plugins \ 39 | --analyze-commits "@semantic-release/commit-analyzer" \ 40 | --generate-notes "@semantic-release/release-notes-generator" \ 41 | --verify-conditions "@semantic-release/changelog,@semantic-release/exec,@semantic-release/git" \ 42 | --prepare "@semantic-release/changelog,@semantic-release/exec" \ 43 | --branches "$branch" \ 44 | --repository-url "file://$PWD" 45 | -------------------------------------------------------------------------------- /ci/release/prepare.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # shellcheck shell=bash 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | set -euo pipefail 6 | 7 | # build artifacts 8 | buf build 9 | buf generate 10 | -------------------------------------------------------------------------------- /ci/release/publish.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # shellcheck shell=bash 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | set -euo pipefail 6 | 7 | cd "$(git rev-parse --show-toplevel)"/proto || exit 1 8 | 9 | buf push --tag "v${1}" --tag "$(git rev-parse HEAD)" 10 | -------------------------------------------------------------------------------- /ci/release/run.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # shellcheck shell=bash 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | set -euo pipefail 6 | 7 | npx --yes \ 8 | -p "semantic-release@24.1.2" \ 9 | -p "@semantic-release/commit-analyzer" \ 10 | -p "@semantic-release/release-notes-generator" \ 11 | -p "@semantic-release/changelog" \ 12 | -p "@semantic-release/github" \ 13 | -p "@semantic-release/exec" \ 14 | -p "@semantic-release/git" \ 15 | -p "conventional-changelog-conventionalcommits@8.0.0" \ 16 | semantic-release --ci 17 | -------------------------------------------------------------------------------- /ci/release/verify.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # shellcheck shell=bash 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | set -euo pipefail 6 | 7 | buf lint 8 | -------------------------------------------------------------------------------- /core.go: -------------------------------------------------------------------------------- 1 | // Package substrait provides access to Substrait artifacts via embed.FS. 2 | // Use substrait.GetSubstraitFS() to retrieve the embed.FS object. 3 | package substrait 4 | 5 | import "embed" 6 | 7 | //go:embed extensions/* 8 | var substraitExtensionsFS embed.FS 9 | 10 | func GetSubstraitFS() embed.FS { 11 | return substraitExtensionsFS 12 | } 13 | 14 | func GetSubstraitExtensionsFS() embed.FS { 15 | return substraitExtensionsFS 16 | } 17 | 18 | //go:embed tests/cases/*/*.test 19 | var substraitTestsFS embed.FS 20 | 21 | func GetSubstraitTestsFS() embed.FS { 22 | return substraitTestsFS 23 | } 24 | -------------------------------------------------------------------------------- /core_test.go: -------------------------------------------------------------------------------- 1 | package substrait 2 | 3 | import ( 4 | "embed" 5 | "io/fs" 6 | "testing" 7 | 8 | "github.com/stretchr/testify/assert" 9 | "github.com/stretchr/testify/require" 10 | ) 11 | 12 | func TestGetSubstraitExtensionsFS(t *testing.T) { 13 | fsArr := []embed.FS{GetSubstraitExtensionsFS(), GetSubstraitFS()} 14 | for _, got := range fsArr { 15 | filePaths, err := ListFiles(got, ".") 16 | require.NoError(t, err) 17 | assert.Greater(t, len(filePaths), 15) 18 | assert.Contains(t, filePaths, "extensions/functions_arithmetic.yaml") 19 | assert.Contains(t, filePaths, "extensions/functions_arithmetic_decimal.yaml") 20 | assert.Contains(t, filePaths, "extensions/functions_datetime.yaml") 21 | } 22 | } 23 | 24 | func TestGetSubstraitTestsFS(t *testing.T) { 25 | got := GetSubstraitTestsFS() 26 | filePaths, err := ListFiles(got, ".") 27 | require.NoError(t, err) 28 | assert.Greater(t, len(filePaths), 3) 29 | assert.Contains(t, filePaths, "tests/cases/arithmetic/add.test") 30 | assert.Contains(t, filePaths, "tests/cases/arithmetic/max.test") 31 | assert.Contains(t, filePaths, "tests/cases/arithmetic_decimal/power.test") 32 | assert.Contains(t, filePaths, "tests/cases/datetime/lt_datetime.test") 33 | } 34 | 35 | func ListFiles(embedFs embed.FS, root string) ([]string, error) { 36 | var files []string 37 | err := fs.WalkDir(embedFs, root, func(path string, d fs.DirEntry, err error) error { 38 | if err != nil { 39 | return err 40 | } 41 | if !d.IsDir() { 42 | files = append(files, path) 43 | } 44 | return nil 45 | }) 46 | return files, err 47 | } 48 | -------------------------------------------------------------------------------- /extensions/extension_types.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | types: 3 | - name: point 4 | structure: 5 | latitude: i32 6 | longitude: i32 7 | - name: line 8 | structure: 9 | start: point 10 | end: point 11 | -------------------------------------------------------------------------------- /extensions/functions_aggregate_approx.yaml: -------------------------------------------------------------------------------- 1 | %YAML 1.2 2 | --- 3 | aggregate_functions: 4 | - name: "approx_count_distinct" 5 | description: >- 6 | Calculates the approximate number of rows that contain distinct values of the expression argument using 7 | HyperLogLog. This function provides an alternative to the COUNT (DISTINCT expression) function, which 8 | returns the exact number of rows that contain distinct values of an expression. APPROX_COUNT_DISTINCT 9 | processes large amounts of data significantly faster than COUNT, with negligible deviation from the exact 10 | result. 11 | impls: 12 | - args: 13 | - name: x 14 | value: any 15 | nullability: DECLARED_OUTPUT 16 | decomposable: MANY 17 | intermediate: binary 18 | return: i64 19 | -------------------------------------------------------------------------------- /extensions/functions_aggregate_decimal_output.yaml: -------------------------------------------------------------------------------- 1 | %YAML 1.2 2 | --- 3 | aggregate_functions: 4 | - name: "count" 5 | description: Count a set of values. Result is returned as a decimal instead of i64. 6 | impls: 7 | - args: 8 | - name: x 9 | value: any 10 | options: 11 | overflow: 12 | values: [SILENT, SATURATE, ERROR] 13 | nullability: DECLARED_OUTPUT 14 | decomposable: MANY 15 | intermediate: decimal<38,0> 16 | return: decimal<38,0> 17 | - name: "count" 18 | description: "Count a set of records (not field referenced). Result is returned as a decimal instead of i64." 19 | impls: 20 | - options: 21 | overflow: 22 | values: [SILENT, SATURATE, ERROR] 23 | nullability: DECLARED_OUTPUT 24 | decomposable: MANY 25 | intermediate: decimal<38,0> 26 | return: decimal<38,0> 27 | - name: "approx_count_distinct" 28 | description: >- 29 | Calculates the approximate number of rows that contain distinct values of the expression argument using 30 | HyperLogLog. This function provides an alternative to the COUNT (DISTINCT expression) function, which 31 | returns the exact number of rows that contain distinct values of an expression. APPROX_COUNT_DISTINCT 32 | processes large amounts of data significantly faster than COUNT, with negligible deviation from the exact 33 | result. Result is returned as a decimal instead of i64. 34 | impls: 35 | - args: 36 | - name: x 37 | value: any 38 | nullability: DECLARED_OUTPUT 39 | decomposable: MANY 40 | intermediate: binary 41 | return: decimal<38,0> 42 | -------------------------------------------------------------------------------- /extensions/functions_aggregate_generic.yaml: -------------------------------------------------------------------------------- 1 | %YAML 1.2 2 | --- 3 | aggregate_functions: 4 | - name: "count" 5 | description: Count a set of values 6 | impls: 7 | - args: 8 | - name: x 9 | value: any 10 | options: 11 | overflow: 12 | values: [SILENT, SATURATE, ERROR] 13 | nullability: DECLARED_OUTPUT 14 | decomposable: MANY 15 | intermediate: i64 16 | return: i64 17 | - name: "count" 18 | description: "Count a set of records (not field referenced)" 19 | impls: 20 | - options: 21 | overflow: 22 | values: [SILENT, SATURATE, ERROR] 23 | nullability: DECLARED_OUTPUT 24 | decomposable: MANY 25 | intermediate: i64 26 | return: i64 27 | - name: "any_value" 28 | description: > 29 | Selects an arbitrary value from a group of values. 30 | 31 | If the input is empty, the function returns null. 32 | impls: 33 | - args: 34 | - name: x 35 | value: any1 36 | options: 37 | ignore_nulls: 38 | values: [ "TRUE", "FALSE" ] 39 | nullability: DECLARED_OUTPUT 40 | decomposable: MANY 41 | intermediate: any1? 42 | return: any1? 43 | -------------------------------------------------------------------------------- /extensions/functions_rounding_decimal.yaml: -------------------------------------------------------------------------------- 1 | %YAML 1.2 2 | --- 3 | scalar_functions: 4 | - 5 | name: "ceil" 6 | description: > 7 | Rounding to the ceiling of the value `x`. 8 | impls: 9 | - args: 10 | - value: decimal 11 | name: x 12 | return: |- 13 | integral_least_num_digits = P - S + 1 14 | precision = min(integral_least_num_digits, 38) 15 | decimal? 16 | - 17 | name: "floor" 18 | description: > 19 | Rounding to the floor of the value `x`. 20 | impls: 21 | - args: 22 | - value: decimal 23 | name: x 24 | return: |- 25 | integral_least_num_digits = P - S + 1 26 | precision = min(integral_least_num_digits, 38) 27 | decimal? 28 | - 29 | name: "round" 30 | description: > 31 | Rounding the value `x` to `s` decimal places. 32 | impls: 33 | - args: 34 | - value: decimal 35 | name: x 36 | description: > 37 | Numerical expression to be rounded. 38 | - value: i32 39 | name: s 40 | description: > 41 | Number of decimal places to be rounded to. 42 | 43 | When `s` is a positive number, the rounding 44 | is performed to a `s` number of decimal places. 45 | 46 | When `s` is a negative number, the rounding is 47 | performed to the left side of the decimal point 48 | as specified by `s`. 49 | 50 | The precision of the resultant decimal type is one 51 | more than the precision of the input decimal type to 52 | allow for numbers that round up or down to the next 53 | decimal magnitude. 54 | E.g. `round(9.9, 0)` -> `10.0`. 55 | The scale of the resultant decimal type cannot be 56 | larger than the scale of the input decimal type. 57 | options: 58 | rounding: 59 | description: > 60 | When a boundary is computed to lie somewhere between two values, 61 | and this value cannot be exactly represented, this specifies how 62 | to round it. 63 | 64 | - TIE_TO_EVEN: round to nearest value; if exactly halfway, tie 65 | to the even option. 66 | - TIE_AWAY_FROM_ZERO: round to nearest value; if exactly 67 | halfway, tie away from zero. 68 | - TRUNCATE: always round toward zero. 69 | - CEILING: always round toward positive infinity. 70 | - FLOOR: always round toward negative infinity. 71 | - AWAY_FROM_ZERO: round negative values with FLOOR rule, round positive values with CEILING rule 72 | - TIE_DOWN: round ties with FLOOR rule 73 | - TIE_UP: round ties with CEILING rule 74 | - TIE_TOWARDS_ZERO: round ties with TRUNCATE rule 75 | - TIE_TO_ODD: round to nearest value; if exactly halfway, tie 76 | to the odd option. 77 | values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR, 78 | AWAY_FROM_ZERO, TIE_DOWN, TIE_UP, TIE_TOWARDS_ZERO, TIE_TO_ODD ] 79 | nullability: DECLARED_OUTPUT 80 | return: |- 81 | precision = min(P + 1, 38) 82 | decimal? 83 | -------------------------------------------------------------------------------- /extensions/functions_set.yaml: -------------------------------------------------------------------------------- 1 | %YAML 1.2 2 | --- 3 | scalar_functions: 4 | - 5 | name: "index_in" 6 | description: > 7 | Checks the membership of a value in a list of values 8 | 9 | Returns the first 0-based index value of some input `needle` if `needle` is equal to 10 | any element in `haystack`. Returns `NULL` if not found. 11 | 12 | If `needle` is `NULL`, returns `NULL`. 13 | 14 | If `needle` is `NaN`: 15 | - Returns 0-based index of `NaN` in `input` (default) 16 | - Returns `NULL` (if `NAN_IS_NOT_NAN` is specified) 17 | impls: 18 | - args: 19 | - name: needle 20 | value: any1 21 | - name: haystack 22 | value: list 23 | options: 24 | nan_equality: 25 | values: [ NAN_IS_NAN, NAN_IS_NOT_NAN ] 26 | nullability: DECLARED_OUTPUT 27 | return: i64? 28 | -------------------------------------------------------------------------------- /extensions/type_variations.yaml: -------------------------------------------------------------------------------- 1 | %YAML 1.2 2 | --- 3 | type_variations: 4 | - parent: string 5 | name: dict4 6 | description: a four-byte dictionary encoded string 7 | functions: INHERITS 8 | - parent: string 9 | name: bigoffset 10 | description: >- 11 | The arrow large string representation of strings, still restricted to the default string size defined in 12 | Substrait. 13 | functions: SEPARATE 14 | - parent: struct 15 | name: avro 16 | description: an avro encoded struct 17 | functions: SEPARATE 18 | - parent: struct 19 | name: cstruct 20 | description: a cstruct representation of the struct 21 | functions: SEPARATE 22 | - parent: struct 23 | name: dict2 24 | description: a 2-byte dictionary encoded string. 25 | functions: INHERITS 26 | -------------------------------------------------------------------------------- /extensions/unknown.yaml: -------------------------------------------------------------------------------- 1 | %YAML 1.2 2 | --- 3 | types: 4 | - name: unknown 5 | scalar_functions: 6 | - name: "add" 7 | impls: 8 | - args: 9 | - value: unknown 10 | - value: unknown 11 | return: unknown 12 | - name: "subtract" 13 | impls: 14 | - args: 15 | - value: unknown 16 | - value: unknown 17 | return: unknown 18 | - name: "multiply" 19 | impls: 20 | - args: 21 | - value: unknown 22 | - value: unknown 23 | return: unknown 24 | - name: "divide" 25 | impls: 26 | - args: 27 | - value: unknown 28 | - value: unknown 29 | return: unknown 30 | - name: "modulus" 31 | impls: 32 | - args: 33 | - value: unknown 34 | - value: unknown 35 | return: unknown 36 | aggregate_functions: 37 | - name: "sum" 38 | impls: 39 | - args: 40 | - value: unknown 41 | intermediate: unknown 42 | return: unknown 43 | - name: "avg" 44 | impls: 45 | - args: 46 | - value: unknown 47 | intermediate: unknown 48 | return: unknown 49 | - name: "min" 50 | impls: 51 | - args: 52 | - value: unknown 53 | intermediate: unknown 54 | return: unknown 55 | - name: "max" 56 | impls: 57 | - args: 58 | - value: unknown 59 | intermediate: unknown 60 | return: unknown 61 | - name: "count" 62 | impls: 63 | - args: 64 | - value: unknown 65 | intermediate: unknown 66 | return: unknown 67 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/substrait-io/substrait 2 | 3 | go 1.22.0 4 | 5 | require github.com/stretchr/testify v1.9.0 6 | 7 | require ( 8 | github.com/davecgh/go-spew v1.1.1 // indirect 9 | github.com/pmezard/go-difflib v1.0.0 // indirect 10 | gopkg.in/yaml.v3 v3.0.1 // indirect 11 | ) 12 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 2 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 3 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 4 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 5 | github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= 6 | github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= 7 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= 8 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 9 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 10 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 11 | -------------------------------------------------------------------------------- /grammar/FuncTestCaseLexer.g4: -------------------------------------------------------------------------------- 1 | lexer grammar FuncTestCaseLexer; 2 | 3 | import SubstraitLexer; 4 | 5 | options { 6 | caseInsensitive = true; 7 | } 8 | 9 | Whitespace : [ \t\n\r]+ -> channel(HIDDEN) ; 10 | 11 | TripleHash: '###'; 12 | SubstraitScalarTest: 'SUBSTRAIT_SCALAR_TEST'; 13 | SubstraitAggregateTest: 'SUBSTRAIT_AGGREGATE_TEST'; 14 | SubstraitInclude: 'SUBSTRAIT_INCLUDE'; 15 | 16 | FormatVersion 17 | : 'v' DIGIT+ ('.' DIGIT+)? 18 | ; 19 | 20 | DescriptionLine 21 | : '# ' ~[\r\n]* '\r'? '\n' 22 | ; 23 | 24 | Define: 'DEFINE'; 25 | ErrorResult: ''; 26 | UndefineResult: ''; 27 | Overflow: 'OVERFLOW'; 28 | Rounding: 'ROUNDING'; 29 | Error: 'ERROR'; 30 | Saturate: 'SATURATE'; 31 | Silent: 'SILENT'; 32 | TieToEven: 'TIE_TO_EVEN'; 33 | NaN: 'NAN'; 34 | AcceptNulls: 'ACCEPT_NULLS'; 35 | IgnoreNulls: 'IGNORE_NULLS'; 36 | NullHandling: 'NULL_HANDLING'; 37 | SpacesOnly: 'SPACES_ONLY'; 38 | Truncate: 'TRUNCATE'; 39 | 40 | IntegerLiteral 41 | : [+-]? Int 42 | ; 43 | 44 | DecimalLiteral 45 | : [+-]? [0-9]+ ('.' [0-9]+)? 46 | ; 47 | 48 | FloatLiteral 49 | : [+-]? [0-9]+ ('.' [0-9]*)? ( 'E' [+-]? [0-9]+ )? 50 | | [+-]? 'inf' 51 | | 'snan' 52 | ; 53 | 54 | BooleanLiteral 55 | : 'true' | 'false' 56 | ; 57 | 58 | fragment FourDigits: [0-9][0-9][0-9][0-9]; 59 | fragment TwoDigits: [0-9][0-9]; 60 | 61 | TimestampTzLiteral 62 | : '\'' FourDigits '-' TwoDigits '-' TwoDigits 'T' TwoDigits ':' TwoDigits ':' TwoDigits ( '.' [0-9]+ )? 63 | [+-] TwoDigits ':' TwoDigits '\'' 64 | ; 65 | 66 | TimestampLiteral 67 | : '\'' FourDigits '-' TwoDigits '-' TwoDigits 'T' TwoDigits ':' TwoDigits ':' TwoDigits ( '.' [0-9]+ )? '\'' 68 | ; 69 | 70 | TimeLiteral 71 | : '\'' TwoDigits ':' TwoDigits ':' TwoDigits ( '.' [0-9]+ )? '\'' 72 | ; 73 | 74 | DateLiteral 75 | : '\'' FourDigits '-' TwoDigits '-' TwoDigits '\'' 76 | ; 77 | 78 | PeriodPrefix: 'P'; 79 | TimePrefix: 'T'; 80 | YearPrefix: 'Y'; 81 | MSuffix: 'M'; // used for both months and minutes 82 | DaySuffix: 'D'; 83 | HourSuffix: 'H'; 84 | SecondSuffix: 'S'; 85 | FractionalSecondSuffix: 'F'; 86 | OAngleBracket: Lt; 87 | CAngleBracket: Gt; 88 | 89 | IntervalYearLiteral 90 | : '\'' PeriodPrefix IntegerLiteral YearPrefix (IntegerLiteral MSuffix)? '\'' 91 | | '\'' PeriodPrefix IntegerLiteral MSuffix '\'' 92 | ; 93 | 94 | IntervalDayLiteral 95 | : '\'' PeriodPrefix IntegerLiteral DaySuffix (TimePrefix TimeInterval)? '\'' 96 | | '\'' PeriodPrefix TimePrefix TimeInterval '\'' 97 | ; 98 | 99 | fragment TimeInterval 100 | : IntegerLiteral HourSuffix (IntegerLiteral MSuffix)? (DecimalLiteral SecondSuffix)? 101 | | IntegerLiteral MSuffix (DecimalLiteral SecondSuffix)? 102 | | DecimalLiteral SecondSuffix 103 | ; 104 | 105 | NullLiteral: 'null'; 106 | 107 | StringLiteral 108 | : '\'' ('\\' . | '\'\'' | ~['\\])* '\'' 109 | ; 110 | 111 | ColumnName 112 | : 'COL' Int 113 | ; 114 | -------------------------------------------------------------------------------- /grammar/Makefile: -------------------------------------------------------------------------------- 1 | TYPE_GRAMMAR=SubstraitLexer.g4 SubstraitType.g4 2 | TYPE_OUTPUT_DIR=../tests/type/antlr_parser 3 | TESTCASE_GRAMMAR=FuncTestCaseLexer.g4 FuncTestCaseParser.g4 4 | TESTCASE_OUTPUT_DIR=../tests/coverage/antlr_parser 5 | 6 | all: generate_testcase_parser generate_type_parser 7 | 8 | generate_testcase_parser: 9 | @echo "\nGenerating Test Case Parser" 10 | antlr -visitor -Dlanguage=Python3 -o $(TESTCASE_OUTPUT_DIR) $(TESTCASE_GRAMMAR) 11 | rm -rf $(TESTCASE_OUTPUT_DIR)/*.tokens $(TESTCASE_OUTPUT_DIR)/*.interp 12 | ./prepend_license.sh $(TESTCASE_OUTPUT_DIR) 13 | 14 | generate_type_parser: 15 | @echo "\nGenerating Substrait Type" 16 | antlr -visitor -Dlanguage=Python3 -o $(TYPE_OUTPUT_DIR) $(TYPE_GRAMMAR) 17 | rm -rf $(TYPE_OUTPUT_DIR)/*.tokens $(TYPE_OUTPUT_DIR)/*.interp 18 | ./prepend_license.sh $(TYPE_OUTPUT_DIR) 19 | 20 | clean: 21 | rm -rf $(TYPE_OUTPUT_DIR)/*.py $(TYPE_OUTPUT_DIR)/*.tokens $(TYPE_OUTPUT_DIR)/*.interp 22 | rm -rf $(TESTCASE_OUTPUT_DIR)/*.py $(TESTCASE_OUTPUT_DIR)/*.tokens $(TESTCASE_OUTPUT_DIR)/*.interp 23 | rm -rf ./*.tokens 24 | -------------------------------------------------------------------------------- /grammar/README.md: -------------------------------------------------------------------------------- 1 | # Grammar 2 | This file defines the grammars for: 3 | 1. The Substrait Type language used in the YAML extensions. 4 | 2. The test grammar language used to unit tests functions. 5 | 6 | ## Regenerating 7 | To regenerate all of the parsers use the following command 8 | ```sh 9 | make all 10 | ``` 11 | 12 | ### Requirements 13 | You will need [ANTLR](https://www.antlr.org/index.html) available on your machine to regenerate the parser. 14 | 15 | #### MacOS 16 | ``` 17 | brew install antlr 18 | ``` 19 | 20 | #### Ubuntu 21 | ``` 22 | sudo apt-get install antlr4 23 | ``` -------------------------------------------------------------------------------- /grammar/SubstraitLexer.g4: -------------------------------------------------------------------------------- 1 | lexer grammar SubstraitLexer; 2 | 3 | options { 4 | caseInsensitive = true; 5 | } 6 | 7 | // Whitespace and comment handling 8 | LineComment : '//' ~[\r\n]* -> channel(HIDDEN) ; 9 | BlockComment : ( '/*' ( ~'*' | '*'+ ~[*/] ) '*'* '*/' ) -> channel(HIDDEN) ; 10 | Whitespace : [ \t\r]+ -> channel(HIDDEN) ; 11 | 12 | fragment DIGIT: [0-9]; 13 | 14 | // Syntactic keywords. 15 | If : 'IF'; 16 | Then : 'THEN'; 17 | Else : 'ELSE'; 18 | 19 | // TYPES 20 | Boolean : 'BOOLEAN'; 21 | I8 : 'I8'; 22 | I16 : 'I16'; 23 | I32 : 'I32'; 24 | I64 : 'I64'; 25 | FP32 : 'FP32'; 26 | FP64 : 'FP64'; 27 | String : 'STRING'; 28 | Binary : 'BINARY'; 29 | Timestamp: 'TIMESTAMP'; 30 | Timestamp_TZ: 'TIMESTAMP_TZ'; 31 | Date : 'DATE'; 32 | Time : 'TIME'; 33 | Interval_Year: 'INTERVAL_YEAR'; 34 | Interval_Day: 'INTERVAL_DAY'; 35 | UUID : 'UUID'; 36 | Decimal : 'DECIMAL'; 37 | Precision_Time: 'PRECISION_TIME'; 38 | Precision_Timestamp: 'PRECISION_TIMESTAMP'; 39 | Precision_Timestamp_TZ: 'PRECISION_TIMESTAMP_TZ'; 40 | FixedChar: 'FIXEDCHAR'; 41 | VarChar : 'VARCHAR'; 42 | FixedBinary: 'FIXEDBINARY'; 43 | Struct : 'STRUCT'; 44 | NStruct : 'NSTRUCT'; 45 | List : 'LIST'; 46 | Map : 'MAP'; 47 | UserDefined: 'U!'; 48 | 49 | // short names for types 50 | Bool: 'BOOL'; 51 | Str: 'STR'; 52 | VBin: 'VBIN'; 53 | Ts: 'TS'; 54 | TsTZ: 'TSTZ'; 55 | IYear: 'IYEAR'; 56 | IDay: 'IDAY'; 57 | Dec: 'DEC'; 58 | PT: 'PT'; 59 | PTs: 'PTS'; 60 | PTsTZ: 'PTSTZ'; 61 | FChar: 'FCHAR'; 62 | VChar: 'VCHAR'; 63 | FBin: 'FBIN'; 64 | 65 | Any: 'ANY'; 66 | AnyVar: Any [0-9]; 67 | 68 | DoubleColon: '::'; 69 | 70 | // MATH 71 | Plus : '+'; 72 | Minus : '-'; 73 | Asterisk : '*'; 74 | ForwardSlash : '/'; 75 | Percent : '%'; 76 | 77 | // COMPARE 78 | Eq : '='; 79 | Ne : '!='; 80 | Gte : '>='; 81 | Lte : '<='; 82 | Gt : '>'; 83 | Lt : '<'; 84 | Bang : '!'; 85 | 86 | // ORGANIZE 87 | OAngleBracket: Lt; 88 | CAngleBracket: Gt; 89 | OParen: '('; 90 | CParen: ')'; 91 | OBracket: '['; 92 | CBracket: ']'; 93 | Comma: ','; 94 | Colon: ':'; 95 | QMark: '?'; 96 | Hash: '#'; 97 | Dot: '.'; 98 | 99 | 100 | // OPERATIONS 101 | And : 'AND'; 102 | Or : 'OR'; 103 | Assign : ':='; 104 | 105 | 106 | 107 | fragment Int 108 | : '1'..'9' Digit* 109 | | '0' 110 | ; 111 | 112 | fragment Digit 113 | : '0'..'9' 114 | ; 115 | 116 | Number 117 | : '-'? Int 118 | ; 119 | 120 | Identifier 121 | : ('A'..'Z' | '_' | '$') ('A'..'Z' | '_' | '$' | Digit)* 122 | ; 123 | 124 | Newline 125 | : ( '\r' '\n'? 126 | | '\n' 127 | ) 128 | ; 129 | -------------------------------------------------------------------------------- /grammar/prepend_license.sh: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | for f in $1/*.py; do 4 | echo '# SPDX-License-Identifier: Apache-2.0' | cat - $f > temp && mv temp $f 5 | done -------------------------------------------------------------------------------- /proto/buf.lock: -------------------------------------------------------------------------------- 1 | # Generated by buf. DO NOT EDIT. 2 | version: v1 3 | -------------------------------------------------------------------------------- /proto/buf.yaml: -------------------------------------------------------------------------------- 1 | version: v1 2 | name: buf.build/substrait/substrait 3 | lint: 4 | use: 5 | - DEFAULT 6 | ignore_only: 7 | PACKAGE_VERSION_SUFFIX: 8 | - substrait 9 | breaking: 10 | use: 11 | - FILE 12 | -------------------------------------------------------------------------------- /proto/substrait/capabilities.proto: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | syntax = "proto3"; 3 | 4 | package substrait; 5 | 6 | option csharp_namespace = "Substrait.Protobuf"; 7 | option go_package = "github.com/substrait-io/substrait-protobuf/go/substraitpb"; 8 | option java_multiple_files = true; 9 | option java_package = "io.substrait.proto"; 10 | 11 | // Defines a set of Capabilities that a system (producer or consumer) supports. 12 | message Capabilities { 13 | // List of Substrait versions this system supports 14 | repeated string substrait_versions = 1; 15 | 16 | // list of com.google.Any message types this system supports for advanced 17 | // extensions. 18 | repeated string advanced_extension_type_urls = 2; 19 | 20 | // list of simple extensions this system supports. 21 | repeated SimpleExtension simple_extensions = 3; 22 | 23 | message SimpleExtension { 24 | string uri = 1; 25 | repeated string function_keys = 2; 26 | repeated string type_keys = 3; 27 | repeated string type_variation_keys = 4; 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /proto/substrait/extended_expression.proto: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | syntax = "proto3"; 3 | 4 | package substrait; 5 | 6 | import "substrait/algebra.proto"; 7 | import "substrait/extensions/extensions.proto"; 8 | import "substrait/plan.proto"; 9 | import "substrait/type.proto"; 10 | 11 | option csharp_namespace = "Substrait.Protobuf"; 12 | option go_package = "github.com/substrait-io/substrait-protobuf/go/substraitpb"; 13 | option java_multiple_files = true; 14 | option java_package = "io.substrait.proto"; 15 | 16 | message ExpressionReference { 17 | oneof expr_type { 18 | Expression expression = 1; 19 | AggregateFunction measure = 2; 20 | } 21 | // Field names in depth-first order 22 | repeated string output_names = 3; 23 | } 24 | 25 | // Describe a set of operations to complete. 26 | // For compactness sake, identifiers are normalized at the plan level. 27 | message ExtendedExpression { 28 | // Substrait version of the expression. Optional up to 0.17.0, required for later 29 | // versions. 30 | Version version = 7; 31 | 32 | // a list of yaml specifications this expression may depend on 33 | repeated substrait.extensions.SimpleExtensionURI extension_uris = 1; 34 | 35 | // a list of extensions this expression may depend on 36 | repeated substrait.extensions.SimpleExtensionDeclaration extensions = 2; 37 | 38 | // one or more expression trees with same order in plan rel 39 | repeated ExpressionReference referred_expr = 3; 40 | 41 | NamedStruct base_schema = 4; 42 | // additional extensions associated with this expression. 43 | substrait.extensions.AdvancedExtension advanced_extensions = 5; 44 | 45 | // A list of com.google.Any entities that this plan may use. Can be used to 46 | // warn if some embedded message types are unknown. Note that this list may 47 | // include message types that are ignorable (optimizations) or that are 48 | // unused. In many cases, a consumer may be able to work with a plan even if 49 | // one or more message types defined here are unknown. 50 | repeated string expected_type_urls = 6; 51 | } 52 | -------------------------------------------------------------------------------- /proto/substrait/extensions/extensions.proto: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | syntax = "proto3"; 3 | 4 | package substrait.extensions; 5 | 6 | import "google/protobuf/any.proto"; 7 | 8 | option csharp_namespace = "Substrait.Protobuf"; 9 | option go_package = "github.com/substrait-io/substrait-protobuf/go/substraitpb/extensions"; 10 | option java_multiple_files = true; 11 | option java_package = "io.substrait.proto"; 12 | 13 | message SimpleExtensionURI { 14 | // A surrogate key used in the context of a single plan used to reference the 15 | // URI associated with an extension. 16 | uint32 extension_uri_anchor = 1; 17 | 18 | // The URI where this extension YAML can be retrieved. This is the "namespace" 19 | // of this extension. 20 | string uri = 2; 21 | } 22 | 23 | // Describes a mapping between a specific extension entity and the uri where 24 | // that extension can be found. 25 | message SimpleExtensionDeclaration { 26 | oneof mapping_type { 27 | ExtensionType extension_type = 1; 28 | ExtensionTypeVariation extension_type_variation = 2; 29 | ExtensionFunction extension_function = 3; 30 | } 31 | 32 | // Describes a Type 33 | message ExtensionType { 34 | // references the extension_uri_anchor defined for a specific extension URI. 35 | uint32 extension_uri_reference = 1; 36 | 37 | // A surrogate key used in the context of a single plan to reference a 38 | // specific extension type 39 | uint32 type_anchor = 2; 40 | 41 | // the name of the type in the defined extension YAML. 42 | string name = 3; 43 | } 44 | 45 | message ExtensionTypeVariation { 46 | // references the extension_uri_anchor defined for a specific extension URI. 47 | uint32 extension_uri_reference = 1; 48 | 49 | // A surrogate key used in the context of a single plan to reference a 50 | // specific type variation 51 | uint32 type_variation_anchor = 2; 52 | 53 | // the name of the type in the defined extension YAML. 54 | string name = 3; 55 | } 56 | 57 | message ExtensionFunction { 58 | // references the extension_uri_anchor defined for a specific extension URI. 59 | uint32 extension_uri_reference = 1; 60 | 61 | // A surrogate key used in the context of a single plan to reference a 62 | // specific function 63 | uint32 function_anchor = 2; 64 | 65 | // A function signature compound name 66 | string name = 3; 67 | } 68 | } 69 | 70 | // A generic object that can be used to embed additional extension information 71 | // into the serialized substrait plan. 72 | message AdvancedExtension { 73 | // An optimization is helpful information that don't influence semantics. May 74 | // be ignored by a consumer. 75 | repeated google.protobuf.Any optimization = 1; 76 | 77 | // An enhancement alter semantics. Cannot be ignored by a consumer. 78 | google.protobuf.Any enhancement = 2; 79 | } 80 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.black] 2 | # exclude filters out files found by Black itself during discovery 3 | exclude = ''' 4 | ( 5 | .*/antlr_parser/.*\.py 6 | ) 7 | ''' 8 | # pre-commit passes files into Black, rather than letting it discover files 9 | # force-exclude can be used to filter out these files from formatting 10 | force-exclude = ''' 11 | ( 12 | .*/antlr_parser/.*\.py 13 | ) 14 | ''' -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | antlr4-python3-runtime==4.13.2 2 | black==24.8.0 3 | flake8==7.0.0 4 | pytest==8.3.4 5 | pyyaml==6.0.2 -------------------------------------------------------------------------------- /site/.gitignore: -------------------------------------------------------------------------------- 1 | venv 2 | site 3 | -------------------------------------------------------------------------------- /site/README.md: -------------------------------------------------------------------------------- 1 | ## Substrait Site 2 | 3 | This directory contains the source for the Substrait site. 4 | 5 | * Site structure is maintained in mkdocs.yml 6 | * Pages are maintained in markdown in the `docs/` folder 7 | * Links use bare page names: `[link text](target-page)` 8 | 9 | ### Installation 10 | 11 | The site is built using mkdocs. To install mkdocs and the theme, run: 12 | 13 | ``` 14 | # Activate the virtual environment (if installed) 15 | cd site/ 16 | . venv/bin/activate 17 | # Install or update the dependencies 18 | pip install -r ./requirements.txt 19 | ``` 20 | 21 | It is easier to use `virtualenv` to keep the Python dependencies for `site/` 22 | separate from your other projects and/or distinct from system managed Python 23 | dependencies. 24 | 25 | * To use `virtualenv`, you need Python 3.7/3.8 installed locally. 26 | * For Ubuntu: `apt-get install python3 virtualenv` 27 | * For MacOS/brew: `brew install python pyenv-virtualenv` 28 | * Install the virtual environment: 29 | ``` 30 | # cd to the site/ directory 31 | cd site/ 32 | # setup the virtual environment (only needed once) 33 | virtualenv -p $(which python3) venv 34 | # activate the virtual environment 35 | . venv/bin/activate 36 | # Install or update the dependencies as usual 37 | pip install -r ./requirements.txt 38 | ``` 39 | 40 | ### Local Changes 41 | 42 | To see changes locally before committing, use mkdocs to run a local server from this directory. 43 | 44 | ``` 45 | mkdocs serve 46 | ``` 47 | 48 | ### Publishing 49 | 50 | TBD 51 | -------------------------------------------------------------------------------- /site/data/committers.yaml: -------------------------------------------------------------------------------- 1 | - Name: Jeroen van Straten 2 | Association: Qblox 3 | - Name: Carlo Curino 4 | Association: Microsoft 5 | - Name: James Taylor 6 | Association: Sundeck 7 | - Name: Sutou Kouhei 8 | Association: Clearcode 9 | - Name: Micah Kornfeld 10 | Association: Google 11 | - Name: Jinfeng Ni 12 | Association: Sundeck 13 | - Name: Andy Grove 14 | Association: Nvidia 15 | - Name: Jesus Camacho Rodriguez 16 | Association: Microsoft 17 | - Name: Rich Tia 18 | Association: Voltron Data 19 | - Name: Vibhatha Abeykoon 20 | Association: Voltron Data 21 | - Name: Nic Crane 22 | Association: Recast 23 | - Name: Gil Forsyth 24 | Association: Voltron Data 25 | - Name: ChaoJun Zhang 26 | Association: Intel 27 | - Name: Matthijs Brobbel 28 | Association: Voltron Data 29 | - Name: Matt Topol 30 | Association: Voltron Data 31 | - Name: Ingo Müller 32 | Association: Google 33 | - Name: Arttu Voutilainen 34 | Association: Palantir Technologies 35 | - Name: Bruno Volpato 36 | Association: Datadog 37 | - Name: Anshul Data 38 | Association: Sundeck 39 | - Name: Chandra Sanapala 40 | Association: Sundeck 41 | -------------------------------------------------------------------------------- /site/data/smc.yaml: -------------------------------------------------------------------------------- 1 | - Name: Phillip Cloud 2 | Association: Voltron Data 3 | - Name: Weston Pace 4 | Association: LanceDB 5 | - Name: Jacques Nadeau 6 | Association: Sundeck 7 | - Name: Victor Barua 8 | Association: Datadog 9 | - Name: David Sisson 10 | Association: Voltron Data 11 | -------------------------------------------------------------------------------- /site/docs/_config: -------------------------------------------------------------------------------- 1 | arrange: 2 | - index.md 3 | - spec 4 | - types 5 | - expressions 6 | - relations 7 | - serialization 8 | - extensions 9 | - community 10 | - governance.md 11 | - about.md 12 | - tools 13 | - tutorial 14 | - faq.md 15 | -------------------------------------------------------------------------------- /site/docs/community/index.md: -------------------------------------------------------------------------------- 1 | # Community 2 | 3 | Substrait is developed as a consensus-driven open source product under the Apache 2.0 4 | license. Development is done in the open leveraging GitHub issues and PRs. 5 | 6 | ## Get In Touch 7 | 8 | [Mailing List/Google Group](https://groups.google.com/g/substrait) 9 | : We use the mailing list to discuss questions, formulate plans and collaborate asynchronously. 10 | 11 | [Slack Channel]({{versions.slackinvitelink}}) 12 | : The developers of Substrait frequent the Slack channel. You can get an 13 | invite to the channel by following [this link]({{versions.slackinvitelink}}). 14 | 15 | [GitHub Issues](https://github.com/substrait-io/substrait/issues) 16 | : Substrait is developed via GitHub issues and pull requests. If you see a problem 17 | or want to enhance the product, we suggest you file a GitHub issue for developers to 18 | review. 19 | 20 | [Twitter](https://twitter.com/substrait_io) 21 | : The [@substrait_io](https://twitter.com/substrait_io) account on Twitter is our official account. Follow-up to keep 22 | to date on what is happening with Substrait! 23 | 24 | [Docs](https://github.com/substrait-io/substrait/tree/main/site/docs) 25 | : Our website is all maintained in our source repository. If there is something you think 26 | can be improved, feel free to fork our repository and post a pull request. 27 | 28 | Meetings 29 | : Our community meets every other week on Wednesday. 30 | 31 | 32 | 33 | ## Talks 34 | Want to learn more about Substrait? Try the following presentations and slide decks. 35 | 36 | * Substrait: A Common Representation for Data Compute Plans (Jacques Nadeau, April 2022) [[slides](https://docs.google.com/presentation/d/1H89iwnnmHZ2oMgGpFon9Rv_fJ60RWE0c9drHCZAL8Bw)] 37 | 38 | ## Citation 39 | 40 | If you use Substrait in your research, please cite it using the following BibTeX entry: 41 | 42 | ```bibtex 43 | @misc{substrait, 44 | author = {substrait-io}, 45 | title = {Substrait: Cross-Language Serialization for Relational Algebra}, 46 | year = {2021}, 47 | month = {8}, 48 | day = {31}, 49 | publisher = {GitHub}, 50 | journal = {GitHub repository}, 51 | howpublished = {\url{https://github.com/substrait-io/substrait}} 52 | } 53 | ``` 54 | 55 | ## Contribution 56 | 57 | All contributors are welcome to Substrait. If you want to join the project, open a PR or get in touch with us as [above](#get-in-touch). 58 | 59 | 60 | ## Principles 61 | 62 | * Be inclusive and open to all. 63 | * Ensure a diverse set of contributors that come from multiple data backgrounds to maximize general utility. 64 | * Build a specification based on open consensus. 65 | * Avoid over-reliance/coupling to any single technology. 66 | * Make the specification and all tools freely available on a permissive license (ApacheV2) 67 | -------------------------------------------------------------------------------- /site/docs/community/powered_by.md: -------------------------------------------------------------------------------- 1 | # Powered by Substrait 2 | 3 | In addition to the work maintained in repositories within the 4 | [substrait-io GitHub organization](https://github.com/substrait-io), a growing 5 | list of other open source projects have adopted Substrait. 6 | 7 | 8 | [//]: # (Maintain this list in alphabetical order) 9 | 10 | [Acero](https://arrow.apache.org/docs/cpp/streaming_execution.html) 11 | : Acero is a query execution engine implemented as a part of the Apache Arrow 12 | C++ library. Acero provides a Substrait consumer interface. 13 | 14 | [ADBC](https://arrow.apache.org/adbc/) 15 | : ADBC (Arrow Database Connectivity) is an API specification for Apache 16 | Arrow-based database access. ADBC allows applications to pass queries either 17 | as SQL strings or Substrait plans. 18 | 19 | [Arrow Flight SQL](https://arrow.apache.org/docs/format/FlightSql.html) 20 | : Arrow Flight SQL is a client-server protocol for interacting with databases 21 | and query engines using the Apache Arrow in-memory columnar format and the 22 | [Arrow Flight RPC](https://arrow.apache.org/docs/format/Flight.html) 23 | framework. Arrow Flight SQL allows clients to send queries as SQL strings or 24 | Substrait plans. 25 | 26 | [DataFusion](https://arrow.apache.org/datafusion/) 27 | : DataFusion is an extensible query planning, optimization, and execution 28 | framework, written in Rust, that uses Apache Arrow as its in-memory format. 29 | DataFusion provides a Substrait producer and consumer that can convert 30 | DataFusion logical plans to and from Substrait plans. It can be used through the 31 | [DataFusion Python bindings](https://github.com/apache/arrow-datafusion-python#substrait-support). 32 | 33 | [DuckDB](https://duckdb.org) 34 | : DuckDB is an in-process SQL OLAP database management system. The [Substrait 35 | Community Extension](https://duckdb.org/community_extensions/extensions/substrait) 36 | allows users to produce and consume Substrait plans through DuckDB's 37 | SQL, Python, and R APIs. 38 | 39 | [Gluten](https://github.com/oap-project/gluten) 40 | : Gluten is a plugin for Apache Spark that allows computation to be offloaded 41 | to engines that have better performance or efficiency than Spark's built-in 42 | JVM-based engine. Gluten converts Spark physical plans to Substrait plans. 43 | 44 | [Ibis](https://ibis-project.org/) 45 | : Ibis is a Python library that provides a lightweight, universal interface 46 | for data wrangling. It includes a dataframe API for Python with support for 47 | more than 10 query execution engines, plus a 48 | [Substrait producer](https://github.com/ibis-project/ibis-substrait) 49 | to enable support for Substrait-consuming execution engines. 50 | 51 | [Substrait R Interface](https://github.com/voltrondata/substrait-r) 52 | : The Substrait R interface package allows users to construct Substrait plans 53 | from R for evaluation by Substrait-consuming execution engines. The package 54 | provides a [dplyr](https://dplyr.tidyverse.org) backend as well as 55 | lower-level interfaces for creating Substrait plans and integrations with 56 | Acero and DuckDB. 57 | 58 | [Velox](https://velox-lib.io) 59 | : Velox is a unified execution engine aimed at accelerating data management 60 | systems and streamlining their development. Velox provides a Substrait 61 | consumer interface. 62 | 63 | 64 | To add your project to this list, please open a 65 | [pull request](https://github.com/substrait-io/substrait/edit/main/site/docs/community/powered_by.md). 66 | -------------------------------------------------------------------------------- /site/docs/expressions/_config: -------------------------------------------------------------------------------- 1 | arrange: 2 | - field_references.md 3 | - scalar_functions.md 4 | - aggregate_functions.md 5 | - specialized_record_expressions.md 6 | - window_functions.md 7 | - table_functions.md 8 | - user_defined_functions.md 9 | - embedded_functions.md 10 | - dynamic_parameters.md 11 | -------------------------------------------------------------------------------- /site/docs/expressions/aggregate_functions.md: -------------------------------------------------------------------------------- 1 | # Aggregate Functions 2 | 3 | Aggregate functions are functions that define an operation which consumes values from multiple records to a produce a single output. Aggregate functions in SQL are typically used in GROUP BY functions. Aggregate functions are similar to scalar functions and function signatures with a small set of different properties. 4 | 5 | Aggregate function signatures contain all the properties defined for [scalar functions](scalar_functions.md). Additionally, they contain the properties below: 6 | 7 | | Property | Description | Required | 8 | | ------------------------ | --------------------------------------------------------------- | ------------------------------- | 9 | | Inherits | All properties defined for scalar function. | N/A | 10 | | Ordered | Whether the result of this function is sensitive to sort order. | Optional, defaults to false | 11 | | Maximum set size | Maximum allowed set size as an unsigned integer. | Optional, defaults to unlimited | 12 | | Decomposable | Whether the function can be executed in one or more intermediate steps. Valid options are: `NONE`, `ONE`, `MANY`, describing how intermediate steps can be taken. | Optional, defaults to `NONE` | 13 | | Intermediate Output Type | If the function is decomposable, represents the intermediate output type that is used, if the function is defined as either `ONE` or `MANY` decomposable. Will be a struct in many cases. | Required for `ONE` and `MANY`. | 14 | | Invocation | Whether the function uses all or only distinct values in the aggregation calculation. Valid options are: `ALL`, `DISTINCT`. | Optional, defaults to `ALL` | 15 | 16 | 17 | 18 | ## Aggregate Binding 19 | 20 | When binding an aggregate function, the binding must include the following additional properties beyond the standard scalar binding properties: 21 | 22 | | Property | Description | 23 | | -------- | ------------------------------------------------------------ | 24 | | Phase | Describes the input type of the data: [INITIAL_TO_INTERMEDIATE, INTERMEDIATE_TO_INTERMEDIATE, INITIAL_TO_RESULT, INTERMEDIATE_TO_RESULT] describing what portion of the operation is required. For functions that are NOT decomposable, the only valid option will be INITIAL_TO_RESULT. | 25 | | Ordering | Zero or more ordering keys along with key order (ASC\|DESC\|NULL FIRST, etc.), declared similar to the sort keys in an `ORDER BY` relational operation. If no sorts are specified, the records are not sorted prior to being passed to the aggregate function. | 26 | 27 | -------------------------------------------------------------------------------- /site/docs/expressions/dynamic_parameters.md: -------------------------------------------------------------------------------- 1 | # Dynamic Parameter Expression 2 | 3 | The dynamic parameter expression represents a placeholder within an expression whose value is determined at runtime. 4 | This is particularly useful for parameterized queries where certain values are not known until execution. 5 | Additionally, using dynamic parameters can enable other use cases, such as sharing execution plans without embedding sensitive information. 6 | 7 | A dynamic parameter expression includes the following properties: 8 | 9 | | Property | Description | Required | 10 | |-----------------------|-------------------------------------------------------------------------------|----------| 11 | | `type` | Specifies the expected data type of the dynamic parameter. | Yes | 12 | | `parameter_reference` | A surrogate key used within a plan to reference a specific parameter binding. | Yes | 13 | -------------------------------------------------------------------------------- /site/docs/expressions/embedded_functions.md: -------------------------------------------------------------------------------- 1 | # Embedded Functions 2 | 3 | Embedded functions are a special kind of function where the implementation is embedded within the actual plan. They are commonly used in tools where a user intersperses business logic within a data pipeline. This is more common in data science workflows than traditional SQL workflows. 4 | 5 | Embedded functions are not pre-registered. Embedded functions require that data be consumed and produced with a standard API, may require memory allocation and have determinate error reporting behavior. They may also have specific runtime dependencies. For example, a Python pickle function may depend on pyarrow 5.0 and pynessie 1.0. 6 | 7 | Properties for an embedded function include: 8 | 9 | | Property | Description | Required | 10 | | ------------------- | ---------------------------------------------------------- | -------- | 11 | | Function Type | The type of embedded function presented. | Required | 12 | | Function Properties | Function properties, one of those items defined below. | Required | 13 | | Output Type | The fully resolved output type for this embedded function. | Required | 14 | 15 | The binary representation of an embedded function is: 16 | 17 | 18 | === "Binary Representation" 19 | ```proto 20 | %%% proto.message.Expression.EmbeddedFunction %%% 21 | ``` 22 | 23 | === "Human Readable Representation" 24 | As the bytes are opaque to Substrait there is no equivalent human readable form. 25 | 26 | 27 | ## Function Details 28 | 29 | There are many types of possible stored functions. For each, Substrait works to expose the function in as descriptive a way as possible to support the largest number of consumers. 30 | 31 | 32 | 33 | ## Python Pickle Function Type 34 | 35 | | Property | Description | Required | 36 | | ----------- | ------------------------------------------------------------ | -------------------------- | 37 | | Pickle Body | binary pickle encoded function using [TBD] API representation to access arguments. | True | 38 | | Prereqs | A list of specific Python conda packages that are prerequisites for access (a structured version of a requirements.txt file). | Optional, defaults to none | 39 | 40 | 41 | 42 | ## WebAssembly Function Type 43 | 44 | | Property | Description | Required | 45 | | -------- | ------------------------------------------------------------ | -------------------------- | 46 | | Script | WebAssembly function | True | 47 | | Prereqs | A list of AssemblyScript prerequisites required to compile the assemblyscript function using NPM coordinates. | Optional, defaults to none | 48 | 49 | 50 | 51 | ???+ question "Discussion Points" 52 | 53 | * What are the common embedded function formats? 54 | * How do we expose the data for a function? 55 | * How do we express batching capabilities? 56 | * How do we ensure/declare containerization? 57 | -------------------------------------------------------------------------------- /site/docs/expressions/extended_expression.md: -------------------------------------------------------------------------------- 1 | # Extended Expression 2 | 3 | Extended Expression messages are provided for expression-level protocols as an alternative to using a Plan. They mainly target expression-only evaluations, such as those computed in Filter/Project/Aggregation rels. Unlike the original Expression defined in the substrait protocol, Extended Expression messages require more information to completely describe the computation context including: input data schema, referred function signatures, and output schema. 4 | 5 | Since Extended Expression will be used seperately from the Plan rel representation, it will need to include basic fields like Version. 6 | 7 | === "ExtendedExpression Message" 8 | 9 | ```proto 10 | %%% proto.message.ExtendedExpression %%% 11 | ``` 12 | 13 | ## Input and output data schema 14 | 15 | Similar to `base_schema` defined in [ReadRel](https://github.com/substrait-io/substrait/blob/7f272f13f22cd5f5842baea42bcf7961e6251881/proto/substrait/algebra.proto#L58), the input data schema describes the name/type/nullibilty and layout info of input data for the target expression evalutation. It also has a field `name` to define the name of the output data. 16 | 17 | ## Referred expression 18 | 19 | An Extended Exression will have one or more referred expressions, which can be either [Expression](https://github.com/substrait-io/substrait/blob/7f272f13f22cd5f5842baea42bcf7961e6251881/proto/substrait/algebra.proto) or [AggregateFunction](https://github.com/substrait-io/substrait/blob/7f272f13f22cd5f5842baea42bcf7961e6251881/proto/substrait/algebra.proto#L1170). Additional types of expressions may be added in the future. 20 | 21 | For a message with multiple expressions, users may produce each Extended Expression in the same order as they occur in the original Plan rel. But, the consumer does NOT have to handle them in this order. A consumer needs only to ensure that the columns in the final output are organized in the same order as defined in the message. 22 | 23 | ## Function extensions 24 | 25 | Function extensions work the same for both Extended Expression and the original Expression defined in the Substrait protocol. 26 | -------------------------------------------------------------------------------- /site/docs/expressions/subqueries.md: -------------------------------------------------------------------------------- 1 | # Subqueries 2 | 3 | Subqueries are scalar expressions comprised of another query. 4 | 5 | ## Forms 6 | 7 | ### Scalar 8 | 9 | Scalar subqueries are subqueries that return one row and one column. 10 | 11 | | Property | Description | Required | 12 | | -------- | -------------- | -------- | 13 | | Input | Input relation | Yes | 14 | 15 | ### `IN` predicate 16 | 17 | An `IN` subquery predicate checks that the left expression is contained in the 18 | right subquery. 19 | 20 | #### Examples 21 | 22 | ```sql 23 | SELECT * 24 | FROM t1 25 | WHERE x IN (SELECT * FROM t2) 26 | ``` 27 | 28 | ```sql 29 | SELECT * 30 | FROM t1 31 | WHERE (x, y) IN (SELECT a, b FROM t2) 32 | ``` 33 | 34 | | Property | Description | Required | 35 | | -------- | ------------------------------------------- | -------- | 36 | | Needles | Expressions whose existence will be checked | Yes | 37 | | Haystack | Subquery to check | Yes | 38 | 39 | ### Set predicates 40 | 41 | A set predicate is a predicate over a set of rows in the form of a subquery. 42 | 43 | `EXISTS` and `UNIQUE` are common SQL spellings of these kinds of predicates. 44 | 45 | | Property | Description | Required | 46 | | --------- | ------------------------------------------ | -------- | 47 | | Operation | The operation to perform over the set | Yes | 48 | | Tuples | Set of tuples to check using the operation | Yes | 49 | 50 | ### Set comparisons 51 | 52 | A set comparison subquery is a subquery comparison using `ANY` or `ALL` operations. 53 | 54 | #### Examples 55 | 56 | ```sql 57 | SELECT * 58 | FROM t1 59 | WHERE x < ANY(SELECT y from t2) 60 | ``` 61 | 62 | | Property | Description | Required | 63 | | --------------------- | ---------------------------------------------- | -------- | 64 | | Reduction operation | The kind of reduction to use over the subquery | Yes | 65 | | Comparison operation | The kind of comparison operation to use | Yes | 66 | | Expression | Left-hand side expression to check | Yes | 67 | | Subquery | Subquery to check | Yes | 68 | 69 | 70 | 71 | === "Protobuf Representation" 72 | 73 | ```proto 74 | %%% proto.message.Expression.Subquery %%% 75 | ``` 76 | -------------------------------------------------------------------------------- /site/docs/expressions/table_functions.md: -------------------------------------------------------------------------------- 1 | # Table Functions 2 | 3 | Table functions produce zero or more records for each input record. Table functions use a signature similar to scalar functions. However, they are not allowed in the same contexts. 4 | 5 | 6 | 7 | to be completed... 8 | 9 | -------------------------------------------------------------------------------- /site/docs/expressions/user_defined_functions.md: -------------------------------------------------------------------------------- 1 | # User-Defined Functions 2 | 3 | Substrait supports the creation of custom functions using [simple extensions](../extensions/index.md#simple-extensions), using the facilities described in [scalar functions](scalar_functions.md). The functions defined by Substrait use the same mechanism. The extension files for standard functions can be found [here](https://github.com/substrait-io/substrait/tree/main/extensions). 4 | 5 | Here's an example function that doubles its input: 6 | 7 | !!! info inline end "Implementation Note" 8 | This implementation is only defined on 32-bit floats and integers but could be defined on all numbers (and even lists and strings). The user of the implementation can specify what happens when the resulting value falls outside of the valid range for a 32-bit float (either return NAN or raise an error). 9 | 10 | ``` yaml 11 | %YAML 1.2 12 | --- 13 | scalar_functions: 14 | - 15 | name: "double" 16 | description: "Double the value" 17 | impls: 18 | - args: 19 | - name: x 20 | value: fp32 21 | options: 22 | on_domain_error: 23 | values: [ NAN, ERROR ] 24 | return: fp32 25 | - args: 26 | - name: x 27 | value: i32 28 | options: 29 | on_domain_error: 30 | values: [ NAN, ERROR ] 31 | return: i32 32 | ``` 33 | -------------------------------------------------------------------------------- /site/docs/expressions/window_functions.md: -------------------------------------------------------------------------------- 1 | # Window Functions 2 | 3 | Window functions are functions which consume values from multiple records to produce a single output. They are similar to aggregate functions, but also have a focused window of analysis to compare to their partition window. Window functions are similar to scalar values to an end user, producing a single value for each input record. However, the consumption visibility for the production of each single record can be many records. 4 | 5 | 6 | 7 | Window function signatures contain all the properties defined for [aggregate functions](aggregate_functions.md). Additionally, they contain the properties below 8 | 9 | | Property | Description | Required | 10 | | ----------- | ------------------------------------------------------------ | ------------------------------- | 11 | | Inherits | All properties defined for aggregate functions. | N/A | 12 | | Window Type | STREAMING or PARTITION. Describes whether the function needs to see all data for the specific partition operation simultaneously. Operations like SUM can produce values in a streaming manner with no complete visibility of the partition. NTILE requires visibility of the entire partition before it can start producing values. | Optional, defaults to PARTITION | 13 | 14 | 15 | 16 | When binding an aggregate function, the binding must include the following additional properties beyond the standard scalar binding properties: 17 | 18 | | Property | Description | Required | 19 | | ----------- | ------------------------------------------------------------ | ------------------------------------------------------------ | 20 | | Partition | A list of partitioning expressions. | False, defaults to a single partition for the entire dataset | 21 | | Lower Bound | Bound Following(int64), Bound Trailing(int64) or CurrentRow. | False, defaults to start of partition | 22 | | Upper Bound | Bound Following(int64), Bound Trailing(int64) or CurrentRow. | False, defaults to end of partition | 23 | 24 | ## Aggregate Functions as Window Functions 25 | 26 | Aggregate functions can be treated as a window functions with Window Type set to STREAMING. 27 | 28 | AVG, COUNT, MAX, MIN and SUM are examples of aggregate functions that are commonly allowed in window contexts. 29 | -------------------------------------------------------------------------------- /site/docs/extensions/.gitignore: -------------------------------------------------------------------------------- 1 | *.md 2 | !index.md 3 | -------------------------------------------------------------------------------- /site/docs/faq.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: FAQ 3 | --- 4 | 5 | # Frequently Asked Questions 6 | 7 | ## What is the purpose of the post-join filter field on Join relations? 8 | 9 | The post-join filter on the various Join relations is not always equivalent to an explicit Filter relation AFTER the Join. 10 | 11 | See the example [here](https://facebookincubator.github.io/velox/develop/joins.html#hash-join-implementation) that highlights how the post-join filter behaves differently than a Filter relation in the case of a left join. 12 | 13 | ## Why does the project relation keep existing columns? 14 | 15 | In several relational algebra systems ([DuckDB](https://duckdb.org/), [Velox](https://velox-lib.io/), [Apache Spark](https://spark.apache.org/), [Apache DataFusion](https://datafusion.apache.org/), etc.) the project relation is used both 16 | to add new columns and remove existing columns. It is defined by a list of expressions and there is one output 17 | column for each expression. 18 | 19 | In Substrait, the project relation is only used to add new columns. Any relation can remove columns by using the 20 | `emit` property in `RelCommon`. This is because it is very common for optimized plans to discard columns once they 21 | are no longer needed and this can happen anywhere in a plan. If this discard required a project relation then 22 | optimized plans would be cluttered with project relations that only remove columns. 23 | 24 | As a result, Substrait's project relation is a little different. It is also defined by a list of expressions. 25 | However, the output columns are a combination of the input columns and one column for each of the expressions. 26 | 27 | ## Where are field names represented? 28 | 29 | Some relational algebra systems, such as Spark, give names to the output fields of a relation. For example, in 30 | PySpark I might run `df.withColumn("num_chars", length("text")).filter("num_chars > 10")`. This creates a 31 | project relation, which calculates a new field named `num_chars`. This field is then referenced in the filter 32 | relation. Spark's logical plan maps closely to this and includes both the expression (`length("text")`) and the 33 | name of the output field (`num_chars`) in its project relation. 34 | 35 | Substrait does not name intermediate fields in a plan. This is because these field names have no effect on 36 | the computation that must be performed. In addition, it opens the door to name-based references, which Substrait 37 | also does not support, because these can be a source of errors and confusion. One of the goals of Substrait is 38 | to make it very easy for consumers to understand plans. All references in Substrait are done with ordinals. 39 | 40 | In order to allow plans that do use named fields to round-trip through Substrait there is a hint that can be 41 | used to add field names to a plan. This hint is called `output_names` and is located in `RelCommon`. Consumers 42 | should not rely on this hint being present in a plan but, if present, it can be used to provide field names to 43 | intermediate relations in a plan for round-trip or debugging purposes. 44 | 45 | There are a few places where Substrait DOES define field names: 46 | 47 | - Read relations have field names in the base schema. This is because it is quite common for reads to do a 48 | name-based lookup to determine the columns that need to be read from source files. 49 | - The root relation has field names. This is because the root relation is the final output of the plan and 50 | it is useful to have names for the fields in the final output. 51 | -------------------------------------------------------------------------------- /site/docs/img/logo.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /site/docs/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Home 3 | --- 4 | 5 | 6 | ## What is Substrait? 7 | 8 | Substrait is a format for describing compute operations on structured data. It is designed for interoperability across different languages and systems. 9 | 10 | 11 | 12 | ## How does it work? 13 | 14 | Substrait provides a well-defined, cross-language [specification](spec/specification.md) for data compute operations. This includes a consistent declaration of common operations, custom operations and one or more serialized representations of this specification. The spec focuses on the semantics of each operation. In addition to the specification the Substrait ecosystem also includes a number of libraries and [useful tools](tools/producer_tools.md). 15 | 16 | We highly recommend the [tutorial](tutorial/sql_to_substrait.md) to learn how a Substrait plan is constructed. 17 | 18 | 19 | 20 | ## Benefits 21 | 22 | * Avoids every system needing to create a communication method between every other system -- each system merely supports ingesting and producing Substrait and it instantly becomes a part of the greater ecosystem. 23 | * Makes every part of the system upgradable. There's a new query engine that's ten times faster? Just plug it in! 24 | * Enables heterogeneous environments -- run on a cluster of an unknown set of execution engines! 25 | * The text version of the Substrait plan allows you to quickly see how a plan functions without needing a visualizer (although there are Substrait visualizers as well!). 26 | 27 | 28 | 29 | ## Example Use Cases 30 | 31 | * Communicate a compute plan between a SQL parser and an execution engine (e.g. Calcite SQL parsing to Arrow C++ compute kernel) 32 | * Serialize a plan that represents a SQL view for consistent use in multiple systems (e.g. Iceberg views in Spark and Trino) 33 | * Submit a plan to different execution engines (e.g. Datafusion and Postgres) and get a consistent interpretation of the semantics. 34 | * Create an alternative plan generation implementation that can connect an existing end-user compute expression system to an existing end-user processing engine (e.g. Pandas operations executed inside SingleStore) 35 | * Build a pluggable plan visualization tool (e.g. D3 based plan visualizer) 36 | 37 | -------------------------------------------------------------------------------- /site/docs/relations/_config: -------------------------------------------------------------------------------- 1 | arrange: 2 | - basics.md 3 | - common_fields.md 4 | - logical_relations.md 5 | - physical_relations.md 6 | - user_defined_relations.md 7 | - embedded_relations.md 8 | -------------------------------------------------------------------------------- /site/docs/relations/common_fields.md: -------------------------------------------------------------------------------- 1 | # Common Fields 2 | 3 | Every relation contains a common section containing optional hints and emit behavior. 4 | 5 | 6 | ## Emit 7 | 8 | A relation which has a direct emit kind outputs the relation's output without reordering or selection. A relation that specifies an emit output mapping can output its output columns in any order and may leave output columns out. 9 | 10 | ???+ info "Relation Output" 11 | 12 | * Many relations (such as Project) by default provide as their output the list of all their input columns plus any generated columns as its output columns. Review each relation to understand its specific output default. 13 | 14 | 15 | ## Hints 16 | 17 | Hints provide information that can improve performance but cannot be used to control the behavior. Table statistics, runtime constraints, name hints, and saved computations all fall into this category. 18 | 19 | ???+ info "Hint Design" 20 | 21 | * If a hint is not present or has incorrect data the consumer should be able to ignore it and still arrive at the correct result. 22 | 23 | 24 | ### Saved Computations 25 | 26 | Computations can be used to save a data structure to use elsewhere. For instance, let's say we have a plan with a HashEquiJoin and an AggregateDistinct operation. The HashEquiJoin could save its hash table as part of saved computation id number 1 and the AggregateDistinct could read in computation id number 1. 27 | -------------------------------------------------------------------------------- /site/docs/relations/embedded_relations.md: -------------------------------------------------------------------------------- 1 | # Embedded Relations 2 | 3 | Pending. 4 | 5 | Embedded relations allow a Substrait producer to define a set operation that will be embedded in the plan. 6 | 7 | TODO: define lots of details about what interfaces, languages, formats, etc. Should reasonably be an extension of embedded user defined table functions. 8 | -------------------------------------------------------------------------------- /site/docs/relations/user_defined_relations.md: -------------------------------------------------------------------------------- 1 | # User Defined Relations 2 | 3 | Pending 4 | 5 | -------------------------------------------------------------------------------- /site/docs/serialization/_config: -------------------------------------------------------------------------------- 1 | arrange: 2 | 3 | - basics.md 4 | - binary_serialization.md 5 | - text_serialization.md 6 | -------------------------------------------------------------------------------- /site/docs/serialization/basics.md: -------------------------------------------------------------------------------- 1 | # Basics 2 | 3 | Substrait is designed to be serialized into various different formats. Currently we support a binary serialization for 4 | transmission of plans between programs (e.g. IPC or network communication) and a text serialization for debugging and human readability. Other formats may be added in the future. 5 | 6 | These formats serialize a collection of plans. Substrait does not define how a collection of plans is to be interpreted. 7 | For example, the following scenarios are all valid uses of a collection of plans: 8 | 9 | - A query engine receives a plan and executes it. It receives a collection of plans with a single root plan. The 10 | top-level node of the root plan defines the output of the query. Non-root plans may be included as common subplans 11 | which are referenced from the root plan. 12 | - A transpiler may convert plans from one dialect to another. It could take, as input, a single root plan. Then 13 | it could output a serialized binary containing multiple root plans. Each root plan is a representation of the 14 | input plan in a different dialect. 15 | - A distributed scheduler might expect 1+ root plans. Each root plan describes a different stage of computation. 16 | 17 | Libraries should make sure to thoroughly describe the way plan collections will be produced or consumed. 18 | 19 | ## Root plans 20 | 21 | We often refer to query plans as a graph of nodes (typically a DAG unless the query is recursive). However, we 22 | encode this graph as a collection of trees with a single root tree that references other trees (which may also 23 | transitively reference other trees). Plan serializations all have some way to indicate which plan(s) are "root" 24 | plans. Any plan that is not a root plan and is not referenced (directly or transitively) by some root plan 25 | can safely be ignored. 26 | -------------------------------------------------------------------------------- /site/docs/serialization/text_serialization.md: -------------------------------------------------------------------------------- 1 | # Text Serialization 2 | 3 | To maximize the new user experience, it is important for Substrait to have a text representation of plans. This allows people to experiment with basic tooling. Building simple CLI tools that do things like SQL > Plan and Plan > SQL or REPL plan construction can all be done relatively straightforwardly with a text representation. 4 | 5 | The recommended text serialization format is JSON. Since the text format is not designed for performance, the format can be produced to maximize readability. This also allows nice symmetry between the construction of plans and the configuration of various extensions such as function signatures and user defined types. 6 | 7 | To ensure the JSON is valid, the object will be defined using the [OpenApi 3.1 specification](https://spec.openapis.org/oas/latest.html). This not only allows strong validation, the OpenApi specification enables [code generators](https://github.com/OpenAPITools/openapi-generator) to be easily used to produce plans in many languages. 8 | 9 | While JSON will be used for much of the plan serialization, Substrait uses a custom simplistic grammar for record level expressions. While one can construct an equation such as `(10 + 5)/2` using a tree of function and literal objects, it is much more human-readable to consume a plan when the information is written similarly to the way one typically consumes scalar expressions. This grammar will be maintained in an ANTLR grammar (targetable to multiple programming languages) and is also planned to be supported via JSON schema definition format tag so that the grammar can be validated as part of the schema validation. 10 | 11 | -------------------------------------------------------------------------------- /site/docs/spec/_config: -------------------------------------------------------------------------------- 1 | arrange: 2 | - versioning.md 3 | - specification.md 4 | - technology_principles.md 5 | - extending.md 6 | -------------------------------------------------------------------------------- /site/docs/spec/extending.md: -------------------------------------------------------------------------------- 1 | # Extending 2 | 3 | Substrait is a community project and requires consensus about new additions to the specification in order to maintain consistency. The best way to get consensus is to discuss ideas. The main ways to communicate are: 4 | 5 | * Substrait Mailing List 6 | * Substrait Slack 7 | * Community Meeting 8 | 9 | ## Minor changes 10 | 11 | Simple changes like typos and bug fixes do not require as much effort. [File an issue](https://github.com/substrait-io/substrait/issues) or [send a PR](https://github.com/substrait-io/substrait/pulls) and we can discuss it there. 12 | 13 | ## Complex changes 14 | 15 | For complex features it is useful to discuss the change first. It will be useful to gather some background information to help get everyone on the same page. 16 | 17 | ### Outline the issue 18 | 19 | #### Language 20 | 21 | Every engine has its own terminology. Every Spark user probably knows what an "attribute" is. Velox users will know what a "RowVector" means. Etc. However, Substrait is used by people that come from a variety of backgrounds and you should generally assume that its users do not know anything about your own implementation. As a result, all PRs and discussion should endeavor to use Substrait terminology wherever possible. 22 | 23 | #### Motivation 24 | 25 | What problems does this relation solve? If it is a more logical relation then how does it allow users to express new capabilities? If it is more of an internal relation then how does it map to existing logical relations? How is it different than other existing relations? Why do we need this? 26 | 27 | #### Examples 28 | 29 | Provide example input and output for the relation. Show example plans. Try and motivate your examples, as best as possible, with something that looks like a real world problem. These will go a long ways towards helping others understand the purpose of a relation. 30 | 31 | #### Alternatives 32 | 33 | Discuss what alternatives are out there. Are there other ways to achieve similar results? Do some systems handle this problem differently? 34 | 35 | ### Survey existing implementation 36 | 37 | It's unlikely that this is the first time that this has been done. Figuring out 38 | 39 | ### Prototype the feature 40 | 41 | Novel approaches should be implemented as an extension first. 42 | 43 | ### Substrait design principles 44 | 45 | Substrait is designed around interoperability so a feature only used by a single system may not be accepted. But don't dispair! Substrait has a highly developed extension system for this express purpose. 46 | 47 | ### You don't have to do it alone 48 | 49 | If you are hoping to add a feature and these criteria seem intimidating then feel free to start a mailing list discussion before you have all the information and ask for help. Investigating other implementations, in particular, is something that can be quite difficult to do on your own. 50 | -------------------------------------------------------------------------------- /site/docs/spec/technology_principles.md: -------------------------------------------------------------------------------- 1 | # Technology Principles 2 | 3 | * Provide a good suite of well-specified common functionality in databases and data science applications. 4 | * Make it easy for users to privately or publicly extend the representation to support specialized/custom operations. 5 | * Produce something that is language agnostic and requires minimal work to start developing against in a new language. 6 | * Drive towards a common format that avoids specialization for single favorite producer or consumer. 7 | * Establish clear delineation between specifications that MUST be respected to and those that can be optionally ignored. 8 | * Establish a forgiving compatibility approach and versioning scheme that supports cross-version compatibility in maximum number of cases. 9 | * Minimize the need for consumer intelligence by excluding concepts like overloading, type coercion, implicit casting, field name handling, etc. (Note: this is weak and should be better stated.) 10 | * Decomposability/severability: A particular producer or consumer should be able to produce or consume only a subset of the specification and interact well with any other Substrait system as long the specific operations requested fit within the subset of specification supported by the counter system. 11 | 12 | 13 | -------------------------------------------------------------------------------- /site/docs/spec/versioning.md: -------------------------------------------------------------------------------- 1 | # Versioning 2 | 3 | As an interface specification, the goal of Substrait is to reach a point where (breaking) changes will never need to happen again, or at least be few and far between. 4 | By analogy, Apache Arrow's in-memory format specification has stayed functionally constant, despite many major library versions being released. 5 | However, we're not there yet. 6 | When we believe that we've reached this point, we will signal this by releasing version 1.0.0. 7 | Until then, we will remain in the 0.x.x version regime. 8 | 9 | Despite this, we strive to maintain backward compatibility for both the binary representation and the text representation by means of deprecation. 10 | When a breaking change cannot be reasonably avoided, we may remove previously deprecated fields. 11 | All deprecated fields will be removed for the 1.0.0 release. 12 | 13 | Substrait uses [semantic versioning](https://semver.org/) for its version numbers, with the addition that, during 0.x.y, we increment the x digit for breaking changes and new features, and the y digit for fixes and other nonfunctional changes. 14 | The release process is currently automated and makes a new release every week, provided something has changed on the main branch since the previous release. 15 | This release cadence will likely be slowed down as stability increases over time. 16 | [Conventional commits](https://www.conventionalcommits.org/en/v1.0.0-beta.2/) are used to distinguish between breaking changes, new features, and fixes, 17 | and GitHub actions are used to verify that there are indeed no breaking protobuf changes in a commit, unless the commit message states this. 18 | -------------------------------------------------------------------------------- /site/docs/stylesheets/extra.css: -------------------------------------------------------------------------------- 1 | img.bordered { 2 | height: auto; 3 | width: auto; 4 | border: 1px solid #9f9f9f; 5 | transition: transform ease-in-out 0.3s; 6 | } -------------------------------------------------------------------------------- /site/docs/tools/_config: -------------------------------------------------------------------------------- 1 | arrange: 2 | - producer_tools.md 3 | - substrait_validator.md 4 | - third_party_tools.md 5 | -------------------------------------------------------------------------------- /site/docs/tools/producer_tools.md: -------------------------------------------------------------------------------- 1 | # Producer Tools 2 | 3 | ## Isthmus 4 | 5 | [Isthmus](https://github.com/substrait-io/substrait-java/tree/main/isthmus) is an application 6 | that serializes SQL to [Substrait Protobuf](https://substrait.io/serialization/binary_serialization/) 7 | via the Calcite SQL compiler. 8 | -------------------------------------------------------------------------------- /site/docs/tools/substrait_validator.md: -------------------------------------------------------------------------------- 1 | # Substrait Validator 2 | 3 | The [Substrait Validator](https://github.com/substrait-io/substrait-validator) is a tool 4 | used to validate substrait plans as well as print diagnostics information regarding the plan validity. 5 | -------------------------------------------------------------------------------- /site/docs/tools/third_party_tools.md: -------------------------------------------------------------------------------- 1 | # Third Party Tools 2 | 3 | ## Substrait-tools 4 | The [substrait-tools](https://pypi.org/project/substrait-tools/) python package provides 5 | a command line interface for producing/consuming substrait plans by leveraging the APIs 6 | from different producers and consumers. 7 | 8 | ## Substrait Fiddle 9 | [Substrait Fiddle](https://substrait-fiddle.com) is an online tool to share, debug, and prototype Substrait plans. 10 | 11 | The [Substrait Fiddle Source](https://github.com/voltrondata/substrait-fiddle) is available allowing it to be run in any environment. 12 | 13 | -------------------------------------------------------------------------------- /site/docs/tutorial/examples.md: -------------------------------------------------------------------------------- 1 | # Code samples and examples 2 | 3 | It's very useful to have examples of how APIs are used; both to get information on the best practices for using APIs and ideas of how they can be used. 4 | 5 | Each language binding is intended to contain examples that are relevant to that language. New contributions are always welcome. 6 | 7 | ## Java 8 | 9 | - [Substrait-Spark](https://github.com/substrait-io/substrait-java/tree/main/examples/substrait-spark) this demonstrates how Substrait plans can be created and consumed within Apache Spark. The examples run within a simple Spark cluster, composed by a couple of Docker containers. 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /site/docs/tutorial/expression_trees.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | INDEX_IN( 14 | categories, 15 | "Computers" 16 | ) 17 | IS NULL 18 | 19 | 20 | 21 | 22 | 23 | Function 24 | is_null 25 | 26 | 27 | 28 | 29 | 30 | 31 | Function 32 | index_in 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | Field Ref 41 | categories 42 | 43 | 44 | 45 | 46 | 47 | Literal 48 | "Computers" 49 | 50 | 51 | 52 | 53 | 92 | -------------------------------------------------------------------------------- /site/docs/types/_config: -------------------------------------------------------------------------------- 1 | arrange: 2 | - type_system.md 3 | - type_classes.md 4 | - type_variations.md 5 | - type_parsing.md 6 | - named_structs.md 7 | -------------------------------------------------------------------------------- /site/docs/types/named_structs.md: -------------------------------------------------------------------------------- 1 | # Named Structs 2 | 3 | A Named Struct is a special type construct that combines: 4 | * A Struct type 5 | * A list of names for the fields in the Struct, in depth-first search order 6 | 7 | The depth-first search order for names arises from the the ability to nest Structs within other types. All Struct fields must be named, even nested fields. 8 | 9 | Named Structs are most commonly used to model the schema of Read relations. 10 | 11 | ## Determining Names 12 | When producing/consuming names for a NamedStruct, some types require special handling: 13 | 14 | ### Struct 15 | A struct has names for each of its inner fields. 16 | 17 | For example, the following Struct 18 | ``` 19 | struct 20 | ↑ ↑ 21 | a b 22 | ``` 23 | has 2 names, one for each of its inner fields. 24 | 25 | ### Structs within Compound Types 26 | Struct types nested in compound types must also be be named. 27 | 28 | #### Structs within Maps 29 | If a Map contains Structs, either as keys or values or both, the Struct fields must be named. Keys are named before values. For example the following Map 30 | ``` 31 | map, struct> 32 | ↑ ↑ ↑ ↑ ↑ 33 | a b c d e 34 | 35 | ``` 36 | has 5 named fields 37 | * 2 names [a, b] for the struct fields used as a key 38 | * 3 names [c, d, e] for the struct fields used as a value 39 | 40 | #### Structs within List 41 | If a List contains Structs, the Struct fields must be named. For example the following List 42 | ``` 43 | list> 44 | ↑ ↑ 45 | a b 46 | ``` 47 | has 2 named fields [a, b] for the struct fields. 48 | 49 | #### Structs within Struct 50 | Structs can also be embedded within Structs. 51 | 52 | A Struct like 53 | ``` 54 | struct, struct> 55 | ↑ ↑ ↑ ↑ ↑ ↑ ↑ 56 | a b c d e f g 57 | ``` 58 | has 7 names 59 | * 1 name [a] for the 1st nested struct field 60 | * 2 names [b, c] for the fields within the 1st nested struct 61 | * 1 name [d] the for the 2nd nested struct field 62 | * 3 names [e, f, g] for the fields within the 2nd nested struct 63 | 64 | ### Putting It All Together 65 | 66 | #### Simple Named Struct 67 | ``` 68 | NamedStruct { 69 | names: [a, b, c, d] 70 | struct: struct, map, i64> 71 | ↑ ↑ ↑ ↑ 72 | a b c d 73 | } 74 | ``` 75 | 76 | #### Structs in Compound Types 77 | ``` 78 | NamedStruct { 79 | names: [a, b, c, d, e, f, g, h] 80 | struct: struct>, map>, i64> 81 | ↑ ↑ ↑ ↑ ↑ ↑ ↑ ↑ 82 | a b c d e f g h 83 | } 84 | ``` 85 | 86 | #### Structs in Structs 87 | ``` 88 | NamedStruct { 89 | names: [a, b, c, d, e, f, g, h, i] 90 | struct: struct, i64, struct>>> 91 | ↑ ↑ ↑ ↑ ↑ ↑ ↑ ↑ ↑ ↑ 92 | a b c d e f g h i j 93 | } 94 | ``` 95 | 96 | -------------------------------------------------------------------------------- /site/docs/types/type_system.md: -------------------------------------------------------------------------------- 1 | # Type System 2 | 3 | Substrait tries to cover the most common types used in data manipulation. Types beyond this common core may be represented using [simple extensions](../extensions/index.md#simple-extensions). 4 | 5 | Substrait types fundamentally consist of four components: 6 | 7 | | Component | Condition | Examples | Description 8 | | ------------------------------- | ------------------- | ----------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 9 | | [Class](type_classes.md) | Always | `i8`, `string`, `STRUCT`, extensions | Together with the parameter pack, describes the set of non-null values supported by the type. Subdivided into simple and compound type classes. 10 | | Nullability | Always | Either `NULLABLE` (`?` suffix) or `REQUIRED` (no suffix) | Describes whether values of this type can be null. Note that null is considered to be a special value of a nullable type, rather than the only value of a special null type. 11 | | [Variation](type_variations.md) | Always | No suffix or explicitly `[0]` (system-preferred), or an extension | Allows different variations of the same type class to exist in a system at a time, usually distinguished by in-memory format. 12 | | Parameters | Compound types only | `<10, 2>` (for `DECIMAL`), `` (for `STRUCT`) | Some combination of zero or more data types or integers. The expected set of parameters and the significance of each parameter depends on the type class. 13 | 14 | Refer to [Type Parsing](type_parsing.md) for a description of the syntax used to describe types. 15 | 16 | !!! note "Note" 17 | Substrait employs a strict type system without any coercion rules. All changes in types must be made explicit via [cast expressions](../expressions/specialized_record_expressions.md). 18 | -------------------------------------------------------------------------------- /site/docs/types/type_variations.md: -------------------------------------------------------------------------------- 1 | # Type Variations 2 | 3 | Type variations may be used to represent differences in representation between different consumers. For example, an engine might support dictionary encoding for a string, or could be using either a row-wise or columnar representation of a struct. All variations of a type are expected to have the same semantics when operated on by functions or other expressions. 4 | 5 | All variations except the "system-preferred" variation (a.k.a. `[0]`, see [Type Parsing](type_parsing.md)) must be defined using [simple extensions](../extensions/index.md#simple-extensions). The key properties of these variations are: 6 | 7 | | Property | Description | 8 | | ----------------- | ------------------------------------------------------------ | 9 | | Base Type Class | The type class that this variation belongs to. | 10 | | Name | The name used to reference this type. Should be unique within type variations for this parent type within a simple extension. | 11 | | Description | A human description of the purpose of this type variation. | 12 | | Function Behavior | **INHERITS** or **SEPARATE**: whether functions that support the system-preferred variation implicitly also support this variation, or whether functions should be resolved independently. For example, if one has the function `add(i8,i8)` defined and then defines an `i8` variation, this determines whether the `i8` variation can be bound to the base `add` operation (inherits) or whether a specialized version of `add` needs to be defined specifically for this variation (separate). Defaults to inherits. | 13 | -------------------------------------------------------------------------------- /site/mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name: "Substrait: Cross-Language Serialization for Relational Algebra" 2 | site_description: >- 3 | Substrait is a new specification and set of tools that allow different systems to express clear data manipulation 4 | operations. 5 | site_url: "https://substrait.io" 6 | edit_uri: "" 7 | strict: true 8 | remote_name: origin 9 | remote_branch: gh-pages 10 | use_directory_urls: true 11 | #include_search_page: false 12 | #search_index_only: true 13 | theme: 14 | name: material 15 | custom_dir: overrides 16 | logo: img/logo.svg 17 | palette: 18 | primary: pink 19 | features: 20 | - navigation.tabs 21 | - navigation.tabs.sticky 22 | - navigation.expand 23 | - navigation.instant 24 | - toc.integrate 25 | extra_css: 26 | - stylesheets/extra.css 27 | 28 | extra: 29 | versions: 30 | slackinvitelink: https://join.slack.com/t/substrait/shared_invite/zt-10oeki45w-FARWnh4NMpXnm4x~hWyiGQ 31 | analytics: 32 | provider: google 33 | property: G-57ZX8S93Q5 34 | social: 35 | - icon: fontawesome/brands/twitter 36 | link: https://twitter.com/substrait_io 37 | repo_url: https://github.com/substrait-io/substrait 38 | plugins: 39 | - table-reader 40 | - markdownextradata 41 | - search 42 | - awesome-pages: 43 | filename: _config 44 | - minify: 45 | minify_html: true 46 | - mkdocs_protobuf: 47 | proto_dir: ../proto/substrait 48 | indent_depth: 4 # required to make superfences happy 49 | - redirects: 50 | redirect_maps: 51 | 'types/simple_logical_types.md': 'types/type_classes.md' 52 | 'types/compound_logical_types.md': 'types/type_classes.md' 53 | 'types/user_defined_types.md': 'types/type_classes.md' 54 | - gen-files: 55 | scripts: 56 | - docs/extensions/generate_function_docs.py 57 | watch: 58 | - ../extensions 59 | markdown_extensions: 60 | - smarty 61 | - sane_lists 62 | - extra 63 | - tables 64 | - markdown.extensions.admonition 65 | - markdown.extensions.attr_list 66 | - markdown.extensions.def_list 67 | - markdown.extensions.footnotes 68 | - meta 69 | - markdown.extensions.toc: 70 | permalink: true 71 | - pymdownx.arithmatex: 72 | generic: true 73 | - pymdownx.betterem: 74 | smart_enable: all 75 | - pymdownx.caret 76 | - pymdownx.critic 77 | - pymdownx.details 78 | - pymdownx.emoji: 79 | emoji_index: !!python/name:material.extensions.emoji.twemoji 80 | emoji_generator: !!python/name:material.extensions.emoji.to_svg 81 | - pymdownx.highlight 82 | - pymdownx.inlinehilite 83 | - pymdownx.keys 84 | - pymdownx.magiclink: 85 | repo_url_shorthand: true 86 | user: substrait-io 87 | repo: substrait 88 | - pymdownx.mark 89 | - pymdownx.smartsymbols 90 | - pymdownx.snippets: 91 | check_paths: true 92 | - pymdownx.superfences: 93 | custom_fences: 94 | - name: mermaid 95 | class: mermaid 96 | format: !!python/name:pymdownx.superfences.fence_code_format 97 | - pymdownx.tabbed: 98 | alternate_style: true 99 | - pymdownx.tasklist: 100 | custom_checkbox: true 101 | - pymdownx.tilde 102 | -------------------------------------------------------------------------------- /site/requirements.txt: -------------------------------------------------------------------------------- 1 | mkdocs>=1.4.2,<2 2 | mkdocs-material>=9.1.5 3 | mkdocs-minify-plugin>=0.6.1,<1 4 | mkdocs-redirects>=1.2.0,<2 5 | pymdown-extensions>=9.9.1,<11 6 | mkdocs-awesome-pages-plugin>=2.8.0,<3 7 | mkdocs-gen-files>=0.4.0,<1 8 | mkdocs-markdownextradata-plugin>=0.2.5,<1 9 | mkdocs-protobuf>=0.1.0,<1 10 | mkdocs-table-reader-plugin>=2,<4 11 | pygments>=2.14,<3 12 | oyaml>=1.0,<2 13 | mdutils>=1.4.0,<2 14 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/substrait-io/substrait/d430e521f203aec6a4e06731d4bfd68cdf61f443/tests/__init__.py -------------------------------------------------------------------------------- /tests/baseline.json: -------------------------------------------------------------------------------- 1 | { 2 | "registry": { 3 | "dependency_count": 13, 4 | "extension_count": 13, 5 | "function_count": 165, 6 | "num_aggregate_functions": 29, 7 | "num_scalar_functions": 158, 8 | "num_window_functions": 11, 9 | "num_function_overloads": 517 10 | }, 11 | "coverage": { 12 | "total_test_count": 1086, 13 | "num_function_variants": 517, 14 | "num_covered_function_variants": 229 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /tests/cases/aggregate_approx/approx_count_distinct.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_AGGREGATE_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_aggregate_approx.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | approx_count_distinct((1, -2, 3, -4, 5, 6)::i8) = 6::i64 6 | approx_count_distinct((-32767, -20000, 30000, 5, 32767)::i16) = 5::i64 7 | approx_count_distinct((-2147483648, -10000000, 30000000, 2147483647)::i32) = 4::i64 8 | approx_count_distinct((-214748364800000, -1000000000, 0, 922337203685477580)::i64) = 4::i64 9 | approx_count_distinct((1)::i8) = 1::i64 10 | approx_count_distinct(()::i8) = 0::i64 11 | approx_count_distinct((Null, Null, Null)::i8) = 0::i64 12 | approx_count_distinct((Null, Null, 4, 3, Null, 922337203685477580, 12833888)::i64) = 4::i64 13 | -------------------------------------------------------------------------------- /tests/cases/aggregate_generic/count.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_AGGREGATE_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_aggregate_generic.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | count((100, -200, 300, -400, 5, 6)::i16) = 6::i64 6 | count((1000)::i16) = 1::i64 7 | count(()::i16) = 0::i64 8 | count((Null, Null, Null)::i16) = 0::i64 9 | count((Null, Null, Null, 1000)::i16) = 1::i64 10 | -------------------------------------------------------------------------------- /tests/cases/arithmetic/abs.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | abs(25::i8) = 25::i8 6 | abs(-200::i16) = 200::i16 7 | abs(30000::i32) = 30000::i32 8 | abs(-9223372036854775800::i64) = 9223372036854775800::i64 9 | abs(2.55::fp32) = 2.55::fp32 10 | abs(-2.0000007152557373046875::fp64) = 2.0000007152557373046875::fp64 11 | 12 | # null_input: Examples with null as input 13 | abs(null::i8) = null::i8 14 | 15 | # overflow: Examples demonstrating overflow behavior 16 | abs(-128::i8) [overflow:ERROR] = 17 | abs(-128::i8) [overflow:SATURATE] = 127::i8 18 | abs(-128::i8) [overflow:SILENT] = 19 | -------------------------------------------------------------------------------- /tests/cases/arithmetic/acos.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | acos(0.00::fp32) = 1.5707963267948966::fp32 6 | acos(1.0::fp64) = 0.0::fp64 7 | acos(-0.0000009::fp64) = 1.5707972267948966::fp64 8 | acos(null::fp64) = null::fp64 9 | -------------------------------------------------------------------------------- /tests/cases/arithmetic/acosh.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | acosh(1.0::fp64) = 0.0::fp64 6 | acosh(10.0005::fp64) = 2.9932730967481995::fp64 7 | acosh(null::fp64) = null::fp64 8 | 9 | # On_domain_error: Examples demonstrating On_domain_error behavior 10 | acosh(0.01::fp32) [on_domain_error:ERROR] = 11 | acosh(0.5::fp64) [on_domain_error:NAN] = nan::fp64 12 | acosh(0.5::fp32) [on_domain_error:NONE] = null::fp32 13 | -------------------------------------------------------------------------------- /tests/cases/arithmetic/add.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | add(120::i8, 5::i8) = 125::i8 6 | add(100::i16, 100::i16) = 200::i16 7 | add(30000::i32, 30000::i32) = 60000::i32 8 | add(2000000000::i64, 2000000000::i64) = 4000000000::i64 9 | 10 | # overflow: Examples demonstrating overflow behavior 11 | add(120::i8, 10::i8) [overflow:ERROR] = 12 | add(30000::i16, 30000::i16) [overflow:ERROR] = 13 | add(2000000000::i32, 2000000000::i32) [overflow:ERROR] = 14 | add(9223372036854775807::i64, 1::i64) [overflow:ERROR] = 15 | add(120::i8, 10::i8) [overflow:SATURATE] = 127::i8 16 | add(-120::i8, -10::i8) [overflow:SATURATE] = -128::i8 17 | add(120::i8, 10::i8) [overflow:SILENT] = 18 | 19 | # floating_exception: Examples demonstrating exceptional floating point cases 20 | add(1.5e+308::fp64, 1.5e+308::fp64) = inf::fp64 21 | add(-1.5e+308::fp64, -1.5e+308::fp64) = -inf::fp64 22 | 23 | # rounding: Examples demonstrating floating point rounding behavior 24 | add(4.5::fp32, 2.5000007152557373046875::fp32) [rounding:TIE_TO_EVEN] = 7.00000095367431640625::fp32 25 | 26 | # types: Examples demonstrating behavior of different data types 27 | add(4.5::fp64, 2.5000007152557373046875::fp64) = 7.0000007152557373046875::fp64 28 | -------------------------------------------------------------------------------- /tests/cases/arithmetic/asin.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | asin(0.0::fp32) = 0.0::fp32 6 | asin(1.0::fp64) = 1.5707963267948966::fp64 7 | asin(0.009::fp64) = 0.009000121504428887::fp64 8 | asin(-0.009::fp64) = -0.009000121504428887::fp64 9 | asin(null::fp64) = null::fp64 10 | -------------------------------------------------------------------------------- /tests/cases/arithmetic/asinh.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | asinh(0.01::fp32) = 0.009999833340832886::fp32 6 | asinh(1.0::fp64) = 0.881373587019543::fp64 7 | asinh(0.0009::fp64) = 0.0008999998785000443::fp64 8 | asinh(null::fp64) = null::fp64 9 | -------------------------------------------------------------------------------- /tests/cases/arithmetic/atan.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | atan(0.0::fp32) = 0.0::fp32 6 | atan(1.0::fp64) = 0.7853981633974483::fp64 7 | atan(7.01::fp64) = 1.4290989925795292::fp64 8 | atan(-7.01::fp64) = -1.4290989925795292::fp64 9 | atan(null::fp64) = null::fp64 10 | -------------------------------------------------------------------------------- /tests/cases/arithmetic/atan2.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | atan2(0.0::fp32, 0.0::fp32) = 0.0::fp32 6 | atan2(1.0::fp64, 1.0::fp64) = 0.7853981633974483::fp64 7 | atan2(0.009::fp64, 0.0008::fp64) = 1.482140444927459::fp64 8 | atan2(null::fp64, 0.0008::fp64) = null::fp64 9 | -------------------------------------------------------------------------------- /tests/cases/arithmetic/atanh.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | atanh(0.0::fp32) = 0.0::fp32 6 | atanh(1.0::fp64) = inf::fp64 7 | atanh(0.009::fp64) = 0.009000243011810481::fp64 8 | atanh(-0.009::fp64) = -0.009000243011810481::fp64 9 | atanh(null::fp64) = null::fp64 10 | -------------------------------------------------------------------------------- /tests/cases/arithmetic/bitwise_and.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | bitwise_and(0::i8, 1::i8) = 0::i8 6 | bitwise_and(127::i8, 127::i8) = 127::i8 7 | bitwise_and(-127::i8, -10::i8) = -128::i8 8 | bitwise_and(31766::i16, 900::i16) = 4::i16 9 | bitwise_and(-31766::i16, 900::i16) = 896::i16 10 | bitwise_and(2147483647::i32, 1234567::i32) = 1234567::i32 11 | bitwise_and(2147483647::i32, 1234567::i32) = 1234567::i32 12 | bitwise_and(9223372036854775807::i64, 127::i64) = 127::i64 13 | bitwise_and(-9223372036854775807::i64, 127::i64) = 1::i64 14 | bitwise_and(null::i64, 127::i64) = null::i64 15 | bitwise_and(127::i64, null::i64) = null::i64 16 | -------------------------------------------------------------------------------- /tests/cases/arithmetic/bitwise_not.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | bitwise_not(0::i8) = -1::i8 6 | bitwise_not(1::i8) = -2::i8 7 | bitwise_not(-127::i8) = 126::i8 8 | bitwise_not(31766::i16) = -31767::i16 9 | bitwise_not(-31766::i16) = 31765::i16 10 | bitwise_not(2147483647::i32) = -2147483648::i32 11 | bitwise_not(2147483647::i32) = -2147483648::i32 12 | bitwise_not(9223372036854775807::i64) = -9223372036854775808::i64 13 | bitwise_not(-9223372036854775807::i64) = 9223372036854775806::i64 14 | bitwise_not(null::i64) = null::i64 15 | -------------------------------------------------------------------------------- /tests/cases/arithmetic/bitwise_or.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | bitwise_or(0::i8, 1::i8) = 1::i8 6 | bitwise_or(127::i8, 127::i8) = 127::i8 7 | bitwise_or(-127::i8, -10::i8) = -9::i8 8 | bitwise_or(31766::i16, 900::i16) = 32662::i16 9 | bitwise_or(-31766::i16, 900::i16) = -31762::i16 10 | bitwise_or(2147483647::i32, 123456789::i32) = 2147483647::i32 11 | bitwise_or(9223372036854775807::i64, 127::i64) = 9223372036854775807::i64 12 | bitwise_or(-9223372036854775807::i64, 127::i64) = -9223372036854775681::i64 13 | bitwise_or(null::i64, 127::i64) = null::i64 14 | bitwise_or(127::i64, null::i64) = null::i64 15 | -------------------------------------------------------------------------------- /tests/cases/arithmetic/bitwise_xor.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | bitwise_xor(0::i8, 1::i8) = 1::i8 6 | bitwise_xor(127::i8, 127::i8) = 0::i8 7 | bitwise_xor(-127::i8, -10::i8) = 119::i8 8 | bitwise_xor(31766::i16, 900::i16) = 32658::i16 9 | bitwise_xor(-31766::i16, 900::i16) = -32658::i16 10 | bitwise_xor(2147483647::i32, 123456789::i32) = 2024026858::i32 11 | bitwise_xor(2147483647::i32, 123456789::i32) = 2024026858::i32 12 | bitwise_xor(9223372036854775807::i64, 127::i64) = 9223372036854775680::i64 13 | bitwise_xor(-9223372036854775807::i64, 127::i64) = -9223372036854775682::i64 14 | bitwise_xor(null::i64, 127::i64) = null::i64 15 | bitwise_xor(127::i64, null::i64) = null::i64 16 | -------------------------------------------------------------------------------- /tests/cases/arithmetic/cos.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | cos(0.00::fp32) = 1.0::fp32 6 | cos(1.0::fp64) = 0.5403023058681398::fp64 7 | cos(7.0000009::fp64) = 0.7539016630550606::fp64 8 | cos(-7.00000095::fp64) = 0.7539016302056953::fp64 9 | cos(null::fp64) = null::fp64 10 | -------------------------------------------------------------------------------- /tests/cases/arithmetic/cosh.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | cosh(0.00::fp32) = 1.0::fp32 6 | cosh(1.0::fp64) = 1.5430806348152437::fp64 7 | cosh(7.0000009::fp64) = 548.3175286399451::fp64 8 | cosh(-7.00000095::fp64) = 548.3175560557769::fp64 9 | cosh(null::fp64) = null::fp64 10 | -------------------------------------------------------------------------------- /tests/cases/arithmetic/divide.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | divide(25::i8, 5::i8) = 5::i8 6 | divide(200::i16, -100::i16) = -2::i16 7 | divide(60000::i32, 200::i32) = 300::i32 8 | divide(4000000000::i64, -5000::i64) = -800000::i64 9 | 10 | # division_by_zero: Examples demonstrating division by zero 11 | divide(5::i8, 0::i8) [on_division_by_zero:NAN] = null::i8 12 | divide(5::i8, 0::i8) [on_division_by_zero:ERROR] = 13 | 14 | # overflow: Examples demonstrating overflow behavior 15 | divide(-9223372036854775808::i64, -1::i64) [overflow:ERROR] = 16 | divide(-128::i8, -1::i8) [overflow:SATURATE] = 127::i8 17 | 18 | # floating_exception: Examples demonstrating exceptional floating point cases 19 | divide(1.5e+208::fp64, 1.5e-200::fp64) = inf::fp64 20 | divide(1.5e+200::fp64, -1.5e-208::fp64) = -inf::fp64 21 | -------------------------------------------------------------------------------- /tests/cases/arithmetic/exp.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | exp(100::i64) = 2.6881171418161356e+43::fp64 6 | exp(0.25::fp32) = 1.2840254166877414::fp32 7 | exp(0.693::fp64) = 1.9997056605411638::fp64 8 | exp(2.0000007152557373046875::fp64) = 7.3890613839973085::fp64 9 | exp(0.0::fp64) = 1.0::fp64 10 | exp(null::fp64) = null::fp64 11 | exp(1000::i64) = inf::fp64 12 | -------------------------------------------------------------------------------- /tests/cases/arithmetic/factorial.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | factorial(0::i32) = 1::i32 6 | factorial(1::i32) = 1::i32 7 | factorial(20::i64) = 2432902008176640000::i64 8 | factorial(null::i32) = null::i32 9 | 10 | # overflow: Examples demonstrating overflow behavior 11 | factorial(1000000::i32) [overflow:ERROR] = 12 | -------------------------------------------------------------------------------- /tests/cases/arithmetic/max.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_AGGREGATE_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | max((20, -3, 1, -10, 0, 5)::i8) = 20::i8 6 | max((-32768, 32767, 20000, -30000)::i16) = 32767::i16 7 | max((-214748648, 214748647, 21470048, 4000000)::i32) = 214748647::i32 8 | max((2000000000, -3217908979, 629000000, -100000000, 0, 987654321)::i64) = 2000000000::i64 9 | max((2.5, 0, 5.0, -2.5, -7.5)::fp32) = 5.0::fp32 10 | max((1.5e+308, 1.5e+10, -1.5e+8, -1.5e+7, -1.5e+70)::fp64) = 1.5e+308::fp64 11 | 12 | # null_handling: Examples with null as input or output 13 | max((Null, Null, Null)::i16) = Null::i16 14 | max(()::i16) = Null::i16 15 | max((2000000000, Null, 629000000, -100000000, Null, 987654321)::i64) = 2000000000::i64 16 | max((Null, inf)::fp64) = inf::fp64 17 | max((Null, -inf, -1.5e+8, -1.5e+7, -1.5e+70)::fp64) = -1.5e+7::fp64 18 | max((1.5e+308, 1.5e+10, Null, -1.5e+7, Null)::fp64) = 1.5e+308::fp64 19 | -------------------------------------------------------------------------------- /tests/cases/arithmetic/min.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_AGGREGATE_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | min((20, -3, 1, -10, 0, 5)::i8) = -10::i8 6 | min((-32768, 32767, 20000, -30000)::i16) = -32768::i16 7 | min((-214748648, 214748647, 21470048, 4000000)::i32) = -214748648::i32 8 | min((2000000000, -3217908979, 629000000, -100000000, 0, 987654321)::i64) = -3217908979::i64 9 | min((2.5, 0, 5.0, -2.5, -7.5)::fp32) = -7.5::fp32 10 | min((1.5e+308, 1.5e+10, -1.5e+8, -1.5e+7, -1.5e+70)::fp64) = -1.5e+70::fp64 11 | 12 | # null_handling: Examples with null as input or output 13 | min((Null, inf)::fp64) = inf::fp64 14 | min((Null, Null, Null)::i16) = Null::i16 15 | min(()::i16) = Null::i16 16 | min((2000000000, Null, 629000000, -100000000, Null, 987654321)::i64) = -100000000::i64 17 | min((Null, -inf, -1.5e+8, -1.5e+7, -1.5e+70)::fp64) = -inf::fp64 18 | min((1.5e+308, 1.5e+10, Null, -1.5e+7, Null)::fp64) = -1.5e+7::fp64 19 | -------------------------------------------------------------------------------- /tests/cases/arithmetic/modulus.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | modulus(9::i8, 3::i8) = 0::i8 6 | modulus(10::i8, -3::i8) = 1::i8 7 | modulus(32767::i16, 1000::i16) = 767::i16 8 | modulus(-2147483647::i32, 300000000::i32) = -47483647::i32 9 | modulus(-9223372036854775800::i64, -80000000000000::i64) = -12036854775800::i64 10 | modulus(5::i8, null::i8) = null::i8 11 | modulus(null::i64, 1::i64) = null::i64 12 | modulus(null::i64, null::i64) = null::i64 13 | 14 | # on_domain_error: Examples demonstrating operation when the divisor is 0 15 | modulus(5::i8, 0::i8) [on_domain_error:NULL] = null::i8 16 | modulus(5::i8, 0::i8) [on_domain_error:ERROR] = 17 | 18 | # division_type: Examples demonstrating truncate and floor division types 19 | modulus(8::i8, -3::i8) [division_type:TRUNCATE] = 2::i8 20 | modulus(8::i8, -3::i8) [division_type:FLOOR] = -1::i8 21 | -------------------------------------------------------------------------------- /tests/cases/arithmetic/multiply.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | multiply(25::i8, 5::i8) = 125::i8 6 | multiply(2::i16, -100::i16) = -200::i16 7 | multiply(300::i32, 200::i32) = 60000::i32 8 | multiply(80000::i64, -5000::i64) = -400000000::i64 9 | 10 | # overflow: Examples demonstrating overflow behavior 11 | multiply(13::i8, 10::i8) [overflow:ERROR] = 12 | multiply(11::i16, 3000::i16) [overflow:ERROR] = 13 | multiply(3::i32, 1000000000::i32) [overflow:ERROR] = 14 | multiply(1000000000000000000::i64, 10::i64) [overflow:ERROR] = 15 | multiply(13::i8, 10::i8) [overflow:SATURATE] = 127::i8 16 | multiply(-13::i8, -10::i8) [overflow:SATURATE] = -128::i8 17 | multiply(13::i8, 10::i8) [overflow:SILENT] = 18 | 19 | # floating_exception: Examples demonstrating exceptional floating point cases 20 | multiply(1.5e+100::fp64, 1.5e+208::fp64) = inf::fp64 21 | multiply(1.5e+100::fp64, -1.5e+208::fp64) = -inf::fp64 22 | 23 | # types: Examples demonstrating behavior of different data types 24 | multiply(4.5::fp64, 2.5000007152557373046875::fp64) = 11.250003218650818::fp64 25 | -------------------------------------------------------------------------------- /tests/cases/arithmetic/negate.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | negate(25::i8) = -25::i8 6 | negate(-200::i16) = 200::i16 7 | negate(30000::i32) = -30000::i32 8 | negate(9223372036854775800::i64) = -9223372036854775800::i64 9 | negate(2.50::fp32) = -2.50::fp32 10 | negate(2.000002861022949::fp64) = -2.000002861022949::fp64 11 | negate(inf::fp64) = -inf::fp64 12 | 13 | # null_input: Examples with null as input 14 | negate(null::i8) = null::i8 15 | 16 | # overflow: Examples demonstrating overflow behavior 17 | negate(-128::i8) [overflow:ERROR] = 18 | negate(-128::i8) [overflow:SATURATE] = 127::i8 19 | negate(-128::i8) [overflow:SILENT] = 20 | -------------------------------------------------------------------------------- /tests/cases/arithmetic/power.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | power(8::i64, 2::i64) = 64::i64 6 | power(1.0::fp32, -1.0::fp32) = 1.0::fp32 7 | power(2.0::fp64, -2.0::fp64) = 0.25::fp64 8 | power(13::i64, 10::i64) = 137858491849::i64 9 | 10 | # floating_exception: Examples demonstrating exceptional floating point cases 11 | power(1.5e+100::fp64, 1.5e+208::fp64) = inf::fp64 12 | -------------------------------------------------------------------------------- /tests/cases/arithmetic/sin.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | sin(0.0::fp32) = 0.0::fp32 6 | sin(1.0::fp64) = 0.8414709848078965::fp64 7 | sin(7.0000009::fp64) = 0.6569872772305518::fp64 8 | sin(-7.0000009::fp64) = -0.6569872772305518::fp64 9 | sin(null::fp64) = null::fp64 10 | -------------------------------------------------------------------------------- /tests/cases/arithmetic/sinh.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | sinh(0.0::fp32) = 0.0::fp32 6 | sinh(1.0::fp64) = 1.1752011936438014::fp64 7 | sinh(7.0000009::fp64) = 548.3166167588001::fp64 8 | sinh(-7.0000009::fp64) = -548.3166167588001::fp64 9 | sinh(null::fp64) = null::fp64 10 | -------------------------------------------------------------------------------- /tests/cases/arithmetic/sqrt.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | sqrt(25::i64) = 5::fp64 6 | sqrt(0::i64) = 0::fp64 7 | sqrt(-1::i64) [on_domain_error:NAN] = null::fp64 8 | sqrt(-9223372036854775800::i64) [on_domain_error:NAN] = null::fp64 9 | sqrt(9223372036854775800::i64) = 3037000499.97605::fp64 10 | sqrt(null::i64) = null::fp64 11 | sqrt(6.25::fp32) = 2.5::fp32 12 | sqrt(2.0000007152557373046875::fp64) = 1.4142138152541635::fp64 13 | -------------------------------------------------------------------------------- /tests/cases/arithmetic/subtract.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | subtract(120::i8, 5::i8) = 115::i8 6 | subtract(-100::i16, 100::i16) = -200::i16 7 | subtract(-30000::i32, 30000::i32) = -60000::i32 8 | subtract(-2000000000::i64, 2000000000::i64) = -4000000000::i64 9 | 10 | # overflow: Examples demonstrating overflow behavior 11 | subtract(-120::i8, 10::i8) [overflow:ERROR] = 12 | subtract(-30000::i16, 30000::i16) [overflow:ERROR] = 13 | subtract(-2000000000::i32, 2000000000::i32) [overflow:ERROR] = 14 | subtract(-9223372036854775808::i64, 1::i64) [overflow:ERROR] = 15 | subtract(-120::i8, 10::i8) [overflow:SATURATE] = -128::i8 16 | subtract(120::i8, -10::i8) [overflow:SATURATE] = 127::i8 17 | subtract(-120::i8, 10::i8) [overflow:SILENT] = 18 | 19 | # floating_exception: Examples demonstrating exceptional floating point cases 20 | subtract(-1.5e+308::fp64, 1.5e+308::fp64) = -inf::fp64 21 | subtract(1.5e+308::fp64, -1.5e+308::fp64) = inf::fp64 22 | -------------------------------------------------------------------------------- /tests/cases/arithmetic/sum.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_AGGREGATE_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | sum((0, -1, 2, 20)::i8) = 21::i64 6 | sum((2000000, -3217908, 629000, -100000, 0, 987654)::i32) = 298746::i64 7 | sum((2.5, 0, 5.0, -2.5, -7.5)::fp32) = -2.5::fp64 8 | sum((2.5000007152557373046875, 7.0000007152557373046875, 0, 7.0000007152557373046875)::fp64) = 16.500002145767212::fp64 9 | 10 | # overflow: Examples demonstrating overflow behavior 11 | sum((9223372036854775806, 1, 1, 1, 1, 10000000000)::i64) [overflow:ERROR] = 12 | 13 | # floating_exception: Examples demonstrating exceptional floating point cases 14 | sum((1.5e+308, 1.5e+308, 1.5e+308)::fp64) = inf::fp64 15 | sum((-1.5e+308, -1.5e+308, -1.5e+308)::fp64) = -inf::fp64 16 | sum((2.500000715, inf, 2.500000715)::fp64) = inf::fp64 17 | sum((2.5000007, -inf, 2.5000007, 10.0)::fp64) = -inf::fp64 18 | 19 | # null_handling: Examples with null as input or output 20 | sum((Null, Null, Null)::i16) = Null::i64 21 | sum(()::i16) = Null::i64 22 | sum((200000, Null, 629000, -10000, 0, 987621)::i32) = 1806621::i64 23 | -------------------------------------------------------------------------------- /tests/cases/arithmetic/tan.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | tan(0.0::fp32) = 0.0::fp32 6 | tan(0.5::fp64) = 0.5463024898437905::fp64 7 | tan(7.01::fp64) = 0.8891974677731088::fp64 8 | tan(-7.01::fp64) = -0.8891974677731088::fp64 9 | tan(null::fp64) = null::fp64 10 | -------------------------------------------------------------------------------- /tests/cases/arithmetic/tanh.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | tanh(0.0::fp32) = 0.0::fp32 6 | tanh(1.0::fp64) = 0.7615941559557649::fp64 7 | tanh(7.0000009::fp64) = 0.9999983369469382::fp64 8 | tanh(-7.0000009::fp64) = -0.9999983369469382::fp64 9 | tanh(null::fp64) = null::fp64 10 | -------------------------------------------------------------------------------- /tests/cases/arithmetic_decimal/bitwise_and.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic_decimal.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | bitwise_and(0::dec<1, 0>, 1::dec<1, 0>) = 0::dec<1, 0> 6 | bitwise_and(127::dec<3, 0>, 127::dec<3, 0>) = 127::dec<3, 0> 7 | bitwise_and(-127::dec<3, 0>, -10::dec<2, 0>) = -128::dec<3, 0> 8 | bitwise_and(31766::dec<5, 0>, 900::dec<3, 0>) = 4::dec<5, 0> 9 | bitwise_and(-31766::dec<5, 0>, 900::dec<3, 0>) = 896::dec<5, 0> 10 | bitwise_and(2147483647::dec<10, 0>, 1234567::dec<7, 0>) = 1234567::dec<10, 0> 11 | bitwise_and(-2147483647::dec<10, 0>, 1234567::dec<7, 0>) = 1::dec<10, 0> 12 | bitwise_and(9223372036854775807::dec<19, 0>, 127::dec<3, 0>) = 127::dec<19, 0> 13 | bitwise_and(-9223372036854775807::dec<19, 0>, 127::dec<3, 0>) = 1::dec<19, 0> 14 | 15 | # max_values: test with max values 16 | bitwise_and(99999999999999999999999999999999999999::dec<38, 0>, 99999999999999999999999999999999999999::dec<38, 0>) = 99999999999999999999999999999999999999::dec<38, 0> 17 | bitwise_and(99999999999999999999999999999999999999::dec<38, 0>, 00000000000000000000000000000000000000::dec<38, 0>) = 0::dec<38, 0> 18 | bitwise_and(-99999999999999999999999999999999999999::dec<38, 0>, -99999999999999999999999999999999999999::dec<38, 0>) = -99999999999999999999999999999999999999::dec<38, 0> 19 | 20 | # null_values: test with null values 21 | bitwise_and(null::dec<1, 0>, 127::dec<3, 0>) = null::dec<3, 0> 22 | bitwise_and(null::dec<1, 0>, null::dec<1, 0>) = null::dec<1, 0> 23 | -------------------------------------------------------------------------------- /tests/cases/arithmetic_decimal/bitwise_or.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic_decimal.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | bitwise_or(0::dec<1, 0>, 1::dec<1, 0>) = 1::dec<1, 0> 6 | bitwise_or(127::dec<3, 0>, 127::dec<3, 0>) = 127::dec<3, 0> 7 | bitwise_or(-127::dec<3, 0>, -10::dec<2, 0>) = -9::dec<3, 0> 8 | bitwise_or(31766::dec<5, 0>, 900::dec<3, 0>) = 32662::dec<5, 0> 9 | bitwise_or(-31766::dec<5, 0>, 900::dec<3, 0>) = -31762::dec<5, 0> 10 | bitwise_or(2147483647::dec<10, 0>, 123456789::dec<9, 0>) = 2147483647::dec<10, 0> 11 | bitwise_or(-2147483647::dec<10, 0>, 123456789::dec<9, 0>) = -2024026859::dec<10, 0> 12 | bitwise_or(9223372036854775807::dec<19, 0>, 127::dec<3, 0>) = 9223372036854775807::dec<19, 0> 13 | bitwise_or(-9223372036854775807::dec<19, 0>, 127::dec<3, 0>) = -9223372036854775681::dec<19, 0> 14 | 15 | # max_values: test with max values 16 | bitwise_or(99999999999999999999999999999999999999::dec<38, 0>, 99999999999999999999999999999999999999::dec<38, 0>) = 99999999999999999999999999999999999999::dec<38, 0> 17 | bitwise_or(99999999999999999999999999999999999999::dec<38, 0>, 00000000000000000000000000000000000000::dec<38, 0>) = 99999999999999999999999999999999999999::dec<38, 0> 18 | bitwise_or(-99999999999999999999999999999999999999::dec<38, 0>, -99999999999999999999999999999999999999::dec<38, 0>) = -99999999999999999999999999999999999999::dec<38, 0> 19 | 20 | # null_values: test with null values 21 | bitwise_or(null::dec<1, 0>, 127::dec<3, 0>) = null::dec<3, 0> 22 | bitwise_or(null::dec<1, 0>, null::dec<1, 0>) = null::dec<1, 0> 23 | -------------------------------------------------------------------------------- /tests/cases/arithmetic_decimal/bitwise_xor.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic_decimal.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | bitwise_xor(0::dec<1, 0>, 1::dec<1, 0>) = 1::dec<1, 0> 6 | bitwise_xor(127::dec<3, 0>, 127::dec<3, 0>) = 0::dec<3, 0> 7 | bitwise_xor(-127::dec<3, 0>, -10::dec<2, 0>) = 119::dec<3, 0> 8 | bitwise_xor(31766::dec<5, 0>, 900::dec<3, 0>) = 32658::dec<5, 0> 9 | bitwise_xor(-31766::dec<5, 0>, 900::dec<3, 0>) = -32658::dec<5, 0> 10 | bitwise_xor(2147483647::dec<10, 0>, 123456789::dec<9, 0>) = 2024026858::dec<10, 0> 11 | bitwise_xor(-2147483647::dec<10, 0>, 123456789::dec<9, 0>) = -2024026860::dec<10, 0> 12 | bitwise_xor(9223372036854775807::dec<19, 0>, 127::dec<3, 0>) = 9223372036854775680::dec<19, 0> 13 | bitwise_xor(-9223372036854775807::dec<19, 0>, 127::dec<3, 0>) = -9223372036854775682::dec<19, 0> 14 | 15 | # max_values: test with max values 16 | bitwise_xor(99999999999999999999999999999999999999::dec<38, 0>, 99999999999999999999999999999999999999::dec<38, 0>) = 0::dec<38, 0> 17 | bitwise_xor(99999999999999999999999999999999999999::dec<38, 0>, 00000000000000000000000000000000000000::dec<38, 0>) = 99999999999999999999999999999999999999::dec<38, 0> 18 | bitwise_xor(-99999999999999999999999999999999999999::dec<38, 0>, -99999999999999999999999999999999999999::dec<38, 0>) = 0::dec<38, 0> 19 | 20 | # null_values: test with null values 21 | bitwise_xor(null::dec<1, 0>, 127::dec<3, 0>) = null::dec<3, 0> 22 | bitwise_xor(null::dec<1, 0>, null::dec<1, 0>) = null::dec<1, 0> 23 | -------------------------------------------------------------------------------- /tests/cases/arithmetic_decimal/factorial_decimal.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic_decimal.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | factorial(0::dec<1, 0>) = 1::dec<38, 0> 6 | factorial(1::dec<1, 0>) = 1::dec<38, 0> 7 | factorial(20::dec<2, 0>) = 2432902008176640000::dec<38, 0> 8 | 9 | # overflow: Examples demonstrating overflow behavior 10 | factorial(34::dec<2, 0>) = 11 | 12 | # negative_value: Examples demonstrating behavior on negative value 13 | factorial(-1::dec<1, 0>) = 14 | 15 | # null_values: test with null values 16 | factorial(null::dec<38, 0>) = null::dec<38, 0> 17 | factorial(null::dec<1, 0>) = null::dec<38, 0> 18 | -------------------------------------------------------------------------------- /tests/cases/arithmetic_decimal/max_decimal.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_AGGREGATE_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic_decimal.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | max((20, -3, 1, -10, 0, 5)::dec<2, 0>) = 20::dec<2, 0> 6 | max((-32768, 32767, 20000, -30000)::dec<5, 0>) = 32767::dec<5, 0> 7 | max((-214748648, 214748647, 21470048, 4000000)::dec<9, 0>) = 214748647::dec<9, 0> 8 | max((2000000000, -3217908979, 629000000, -100000000, 0, 987654321)::dec<10, 0>) = 2000000000::dec<10, 0> 9 | max((2.5, 0, 5.0, -2.5, -7.5)::dec<2, 1>) = 5.0::dec<2, 1> 10 | max((99999999999999999999999999999999999999, 0, -99999999999999999999999999999999999998, 111111111, -76)::dec<38, 0>) = 99999999999999999999999999999999999999::dec<38, 0> 11 | 12 | # null_handling: Examples with null as input or output 13 | max((Null, Null, Null)::dec<1, 0>) = null::dec<1, 0> 14 | max(()::dec<1, 0>) = null::dec<1, 0> 15 | max((2000000000, Null, 629000000, -100000000, Null, 987654321)::dec<10, 0>) = 2000000000::dec<10, 0> 16 | max((Null, Null)::dec<1, 0>) = null::dec<1, 0> 17 | max(()::dec<1, 0>) = null::dec<1, 0> 18 | max((99999999999999999999999999999999999999, -99999999999999999999999999999999999998, Null, 11111111111111111111111111111111111111, Null)::dec<38, 0>) = 99999999999999999999999999999999999999::dec<38, 0> 19 | -------------------------------------------------------------------------------- /tests/cases/arithmetic_decimal/min_decimal.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_AGGREGATE_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic_decimal.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | min((20, -3, 1, -10, 0, 5)::dec<2, 0>) = -10::dec<2, 0> 6 | min((-32768, 32767, 20000, -30000)::dec<5, 0>) = -32768::dec<5, 0> 7 | min((-214748648, 214748647, 21470048, 4000000)::dec<9, 0>) = -214748648::dec<9, 0> 8 | min((2000000000, -3217908979, 629000000, -100000000, 0, 987654321)::dec<10, 0>) = -3217908979::dec<10, 0> 9 | min((2.5, 0, 5.0, -2.5, -7.5)::dec<2, 1>) = -7.5::dec<2, 1> 10 | min((99999999999999999999999999999999999999, -99999999999999999999999999999999999998, -99999999999999999999999999999999999997, 0, 1111)::dec<38, 0>) = -99999999999999999999999999999999999998::dec<38, 0> 11 | 12 | # null_handling: Examples with null as input or output 13 | min((Null, Null, Null)::dec<1, 0>) = Null::dec<1, 0> 14 | min(()::dec<1, 0>) = Null::dec<1, 0> 15 | min((2000000000, Null, 629000000, -100000000, Null, 987654321)::dec<10, 0>) = -100000000::dec<10, 0> 16 | min((-99999999999999999999999999999999999998, Null, 99999999999999999999999999999999999999, Null)::dec<38, 0>) = -99999999999999999999999999999999999998::dec<38, 0> 17 | -------------------------------------------------------------------------------- /tests/cases/arithmetic_decimal/power.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic_decimal.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | power(8::dec<38, 0>, 2::dec<38, 0>) = 64::fp64 6 | power(1.0::dec<38, 0>, -1.0::dec<38, 0>) = 1.0::fp64 7 | power(2.0::dec<38, 0>, -2.0::dec<38, 0>) = 0.25::fp64 8 | power(13::dec<38, 0>, 10::dec<38, 0>) = 137858491849::fp64 9 | 10 | # result_more_than_input_precision: Examples demonstrating result with more precision than input 11 | power(16::dec<2, 0>, 4::dec<38, 0>) = 65536::fp64 12 | 13 | # floating_exception: Examples demonstrating exceptional floating point cases 14 | power(1.5e+10::dec<38, 0>, 1.5e+20::dec<38, 0>) = inf::fp64 15 | power(-16::dec<4, 0>, 1001::dec<4, 0>) = -inf::fp64 16 | 17 | # complex_number: Examples demonstrating complex number output 18 | power(-1::dec, 0.5::dec<38, 1>) [complex_number_result:NAN] = nan::fp64 19 | power(-1::dec, 0.5::dec<38, 1>) [complex_number_result:ERROR] = 20 | -------------------------------------------------------------------------------- /tests/cases/arithmetic_decimal/power_decimal.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic_decimal.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | power(8::dec, 2::dec<38, 0>) = 64::fp64 6 | power(1.0::dec, -1.0::dec<38, 0>) = 1.0::fp64 7 | power(2.0::dec<38, 0>, -2.0::dec<38, 0>) = 0.25::fp64 8 | power(13::dec<38, 0>, 10::dec<38, 0>) = 137858491849::fp64 9 | 10 | # result_more_than_input_precision: Examples demonstrating result with more precision than input 11 | power(16::dec<2, 0>, 4::dec<38, 0>) = 65536::fp64 12 | 13 | # floating_exception: Examples demonstrating exceptional floating point cases 14 | power(1.5e+10::dec<38, 0>, 1.5e+20::dec<38, 0>) = inf::fp64 15 | power(-16::dec<4, 0>, 1001::dec<4, 0>) = -inf::fp64 16 | 17 | # complex_number: Examples demonstrating complex number output 18 | power(-1::dec, 0.5::dec<38, 1>) [complex_number_result:NAN] = nan::fp64 19 | power(-1::dec, 0.5::dec<38, 1>) [complex_number_result:ERROR] = 20 | 21 | # null_values: test with null values 22 | power(null::dec<38, 0>, 127::dec<38, 0>) = null::fp64 23 | power(null::dec<38, 0>, null::dec<38, 0>) = null::fp64 24 | -------------------------------------------------------------------------------- /tests/cases/arithmetic_decimal/sqrt_decimal.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic_decimal.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | sqrt(25::dec<2, 0>) = 5::fp64 6 | sqrt(0::dec<1, 0>) = 0::fp64 7 | 8 | # max_input: max allowed input returns correct result 9 | sqrt(99999999999999999999999999999999999999::dec<38, 0>) = 1e+19::fp64 10 | 11 | # real_number: real number as input 12 | sqrt(6.25::dec<3, 2>) = 2.5::fp64 13 | sqrt(2.0000007152557373046875::dec<23, 22>) = 1.4142138152541635::fp64 14 | 15 | # verify_real_number: verify real number operation are different and doesnt behave as nearby int 16 | sqrt(9::dec<1, 0>) = 3::fp64 17 | sqrt(8.3::dec<2, 1>) = 2.8809720581775866::fp64 18 | sqrt(8.5::dec<2, 1>) = 2.9154759474226504::fp64 19 | sqrt(8.7::dec<2, 1>) = 2.949576240750525::fp64 20 | sqrt(9.2::dec<2, 1>) = 3.03315017762062::fp64 21 | 22 | # negative_input: negative input returns error 23 | sqrt(-9223372036854775800::dec<19, 0>) = 24 | sqrt(-2.5::dec<2, 1>) = 25 | 26 | # null_values: test with null values 27 | sqrt(null::dec<38, 0>) = null::fp64 28 | sqrt(null::dec<1, 0>) = null::fp64 29 | -------------------------------------------------------------------------------- /tests/cases/arithmetic_decimal/sum_decimal.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_AGGREGATE_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic_decimal.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | sum((0, -1, 2, 20)::dec<2, 0>) = 21::dec<38, 0> 6 | sum((2000000, -3217908, 629000, -100000, 0, 987654)::dec<7, 0>) = 298746::dec<38, 0> 7 | sum((2.5, 0, 5.0, -2.5, -7.5)::dec<2, 1>) = -2.5::dec<38, 2> 8 | sum((2.5000007152557373046875, 7.0000007152557373046875, 0, 7.0000007152557373046875)::dec<23, 22>) = 16.5000021457672119140625::dec<38, 22> 9 | 10 | # overflow: Examples demonstrating overflow behavior 11 | sum((99999999999999999999999999999999999999, 1, 1, 1, 1, 99999999999999999999999999999999999999)::dec<38, 0>) [overflow:ERROR] = 12 | 13 | # null_handling: Examples with null as input or output 14 | sum((Null, Null, Null)::dec<1, 0>) = Null::dec<38, 0> 15 | sum(()::dec<1, 0>) = Null::dec<38, 0> 16 | sum((200000, Null, 629000, -10000, 0, 987621)::dec<6, 0>) = 1806621::dec<38, 0> 17 | -------------------------------------------------------------------------------- /tests/cases/boolean/and.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_boolean.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | and(true::bool, true::bool) = true::bool 6 | and(true::bool, false::bool) = false::bool 7 | and(false::bool, false::bool) = false::bool 8 | 9 | # null_input: Examples with null as input 10 | and(true::bool, null::bool) = null::bool 11 | and(null::bool, true::bool) = null::bool 12 | and(false::bool, null::bool) = false::bool 13 | and(null::bool, false::bool) = false::bool 14 | and(null::bool, null::bool) = null::bool 15 | -------------------------------------------------------------------------------- /tests/cases/boolean/and_not.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_boolean.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | and_not(true::bool, false::bool) = true::bool 6 | and_not(true::bool, true::bool) = false::bool 7 | and_not(false::bool, true::bool) = false::bool 8 | and_not(false::bool, false::bool) = false::bool 9 | 10 | # null_input: Examples with null as input 11 | and_not(true::bool, null::bool) = null::bool 12 | and_not(null::bool, false::bool) = null::bool 13 | and_not(false::bool, null::bool) = false::bool 14 | and_not(null::bool, true::bool) = false::bool 15 | and_not(null::bool, null::bool) = null::bool 16 | -------------------------------------------------------------------------------- /tests/cases/boolean/bool_and.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_AGGREGATE_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_boolean.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | bool_and((true, true)::bool) = true::bool 6 | bool_and((true, false)::bool) = false::bool 7 | bool_and((false, false)::bool) = false::bool 8 | bool_and((false)::bool) = false::bool 9 | bool_and((true)::bool) = true::bool 10 | bool_and((true, null)::bool) = true::bool 11 | bool_and((null, null)::bool) = null::bool 12 | bool_and((false, null)::bool) = false::bool 13 | bool_and(()::bool) = null::bool 14 | -------------------------------------------------------------------------------- /tests/cases/boolean/bool_or.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_AGGREGATE_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_boolean.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | bool_or((true, true)::bool) = true::bool 6 | bool_or((false, false)::bool) = false::bool 7 | bool_or((true, false)::bool) = true::bool 8 | bool_or((false)::bool) = false::bool 9 | bool_or((true)::bool) = true::bool 10 | bool_or((true, null)::bool) = true::bool 11 | bool_or((null, null)::bool) = null::bool 12 | bool_or((false, null)::bool) = false::bool 13 | bool_or(()::bool) = null::bool 14 | -------------------------------------------------------------------------------- /tests/cases/boolean/not.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_boolean.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | not(true::bool) = false::bool 6 | not(false::bool) = true::bool 7 | 8 | # null_input: Examples with null as input 9 | not(null::bool) = null::bool 10 | -------------------------------------------------------------------------------- /tests/cases/boolean/or.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_boolean.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | or(true::bool, true::bool) = true::bool 6 | or(true::bool, false::bool) = true::bool 7 | or(false::bool, false::bool) = false::bool 8 | 9 | # null_input: Examples with null as input 10 | or(true::bool, null::bool) = true::bool 11 | or(null::bool, true::bool) = true::bool 12 | or(false::bool, null::bool) = null::bool 13 | or(null::bool, false::bool) = null::bool 14 | or(null::bool, null::bool) = null::bool 15 | -------------------------------------------------------------------------------- /tests/cases/boolean/xor.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_boolean.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | xor(true::bool, false::bool) = true::bool 6 | xor(true::bool, true::bool) = false::bool 7 | xor(false::bool, false::bool) = false::bool 8 | xor(false::bool, true::bool) = true::bool 9 | 10 | # null_input: Examples with null as input 11 | xor(true::bool, null::bool) = null::bool 12 | xor(null::bool, true::bool) = null::bool 13 | xor(false::bool, null::bool) = null::bool 14 | xor(null::bool, false::bool) = null::bool 15 | -------------------------------------------------------------------------------- /tests/cases/comparison/between.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_comparison.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | between(5::i8, 0::i8, 127::i8) = true::bool 6 | between(20000::i16, 1::i16, 30000::i16) = true::bool 7 | between(1030000000::i32, 1000000000::i32, 2000000000::i32) = true::bool 8 | between(10300000000900::i64, 1000000000::i64, 9223372036854775807::i64) = true::bool 9 | between(2::i8, 1::i8, -120::i8) = false::bool 10 | between(2::i8, 2::i8, 3::i8) = true::bool 11 | between(2::i8, 1::i8, 2::i8) = true::bool 12 | between(-10000::i16, -20000::i16, -30000::i16) = false::bool 13 | between(-100000000::i32, -1000000000::i32, -2000000000::i32) = false::bool 14 | between(92233720368547758::i64, 1::i64, -9223372036854775807::i64) = false::bool 15 | between(14.01::fp32, 20.90::fp32, 88.00::fp32) = false::bool 16 | between(14.011::fp64, 0.00::fp64, inf::fp64) = true::bool 17 | between(inf::fp64, 0.00::fp64, 100.09::fp64) = false::bool 18 | between(-100.0011::fp64, -inf::fp64, 0.00::fp64) = true::bool 19 | 20 | # null_input: Examples with null as input 21 | between(null::i8, 1::i8, 10::i8) = null::bool 22 | between(1::i64, null::i64, 10::i64) = null::bool 23 | between(1::i64, 1::i64, null::i64) = null::bool 24 | -------------------------------------------------------------------------------- /tests/cases/comparison/coalesce.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_comparison.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | coalesce(1::i8, 2::i8) = 1::i8 6 | coalesce(null::i8, 2::i8) = 2::i8 7 | coalesce(null::i16, null::i16) = null::i16 8 | coalesce(2000000::i32, null::i32) = 2000000::i32 9 | coalesce(null::i64, 9223372036854775807::i64) = 9223372036854775807::i64 10 | coalesce(null::fp32, -65.500000::fp32) = -65.500000::fp32 11 | coalesce(inf::fp64, -inf::fp64) = inf::fp64 12 | coalesce(7::dec<38, 0>, 4::dec<38, 0>) = 7::dec<38, 0> 13 | coalesce(null::dec<38, 0>, 2::dec<38, 0>) = 2::dec<38, 0> 14 | coalesce(null::dec<38, 0>, null::dec<38, 0>) = null::dec<38, 0> 15 | coalesce(2000000::dec<38, 0>, null::dec<38, 0>) = 2000000::dec<38, 0> 16 | coalesce(null::dec<38, 0>, 2000000::dec<38, 0>) = 2000000::dec<38, 0> 17 | -------------------------------------------------------------------------------- /tests/cases/comparison/equal.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_comparison.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | equal(1::i8, 1::i8) = true::bool 6 | equal(300::i16, 200::i16) = false::bool 7 | equal(-2147483648::i32, -2147483648::i32) = true::bool 8 | equal(9223372036854775807::i64, 9223372036854775804::i64) = false::bool 9 | equal(inf::fp64, inf::fp64) = true::bool 10 | equal(inf::fp64, 1.5e+308::fp64) = false::bool 11 | equal(10::dec<38, 0>, 10::dec<38, 0>) = true::bool 12 | equal(10::dec<38, 0>, 11.25::dec<38, 2>) = false::bool 13 | equal(inf::fp64, -inf::fp64) = false::bool 14 | 15 | # null_input: Examples with null as input 16 | equal(null::i16, 1::i16) = null::bool 17 | equal(null::i16, null::i16) = null::bool 18 | equal(7::dec<38, 0>, null::dec<38, 0>) = null::bool 19 | equal(null::dec<38, 0>, null::dec<38, 0>) = null::bool 20 | -------------------------------------------------------------------------------- /tests/cases/comparison/gt.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_comparison.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | gt(1::i8, 2::i8) = false::bool 6 | gt(200::i16, 199::i16) = true::bool 7 | gt(200::i16, 200::i16) = false::bool 8 | gt(2000000000::i32, 1000000000::i32) = true::bool 9 | gt(-922337203685775808::i64, -922337203685775807::i64) = false::bool 10 | gt(7.25::fp32, 2.50::fp32) = true::bool 11 | gt(-922337203685775808::dec<38, 0>, -922337203685775807::dec<38, 0>) = false::bool 12 | gt(7.25::dec<38, 2>, 2.50::dec<38, 2>) = true::bool 13 | gt(-1.5e+308::fp64, -inf::fp64) = true::bool 14 | gt(inf::fp64, 1.5e+308::fp64) = true::bool 15 | 16 | # null_input: Examples with null as input 17 | gt(null::i16, 100::i16) = null::bool 18 | gt(2::i16, null::i16) = null::bool 19 | gt(null::i16, null::i16) = null::bool 20 | gt(2::dec<38, 2>, null::dec<38, 2>) = null::bool 21 | gt(null::dec<38, 2>, null::dec<38, 2>) = null::bool 22 | -------------------------------------------------------------------------------- /tests/cases/comparison/gte.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_comparison.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | gte(1::i8, 2::i8) = false::bool 6 | gte(2::i8, 2::i8) = true::bool 7 | gte(200::i16, 199::i16) = true::bool 8 | gte(2000000000::i32, 1000000000::i32) = true::bool 9 | gte(-922337203685775808::i64, -922337203685775807::i64) = false::bool 10 | gte(7.25::fp32, 2.50::fp32) = true::bool 11 | gte(7.25::fp32, 7.25::fp32) = true::bool 12 | gte(7.25::dec<38, 2>, 7.25::dec<38, 2>) = true::bool 13 | gte(7.25::dec<38, 2>, 7.27::dec<38, 2>) = false::bool 14 | gte(inf::fp64, 1.5e+308::fp64) = true::bool 15 | gte(inf::fp64, inf::fp64) = true::bool 16 | gte(-inf::fp64, -1.5e+308::fp64) = false::bool 17 | 18 | # null_input: Examples with null as input 19 | gte(null::dec<38, 2>, 7.25::dec<38, 2>) = null::bool 20 | gte(null::dec<38, 2>, null::dec<38, 2>) = null::bool 21 | gte(null::i16, 1::i16) = null::bool 22 | gte(2::i16, null::i16) = null::bool 23 | gte(null::i16, null::i16) = null::bool 24 | -------------------------------------------------------------------------------- /tests/cases/comparison/is_false.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_comparison.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | is_false(true::bool) = false::bool 6 | is_false(false::bool) = true::bool 7 | is_false(null::bool) = false::bool 8 | -------------------------------------------------------------------------------- /tests/cases/comparison/is_finite.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_comparison.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | is_finite(0.0::fp32) = true::bool 6 | is_finite(0.55::fp32) = true::bool 7 | is_finite(1000.000000000001::fp64) = true::bool 8 | is_finite(-inf::fp64) = false::bool 9 | is_finite(inf::fp64) = false::bool 10 | is_finite(null::fp64) = null::bool 11 | -------------------------------------------------------------------------------- /tests/cases/comparison/is_infinite.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_comparison.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | is_infinite(0.0::fp32) = false::bool 6 | is_infinite(0.55::fp32) = false::bool 7 | is_infinite(1000.000000000001::fp64) = false::bool 8 | is_infinite(-inf::fp64) = true::bool 9 | is_infinite(inf::fp64) = true::bool 10 | is_infinite(null::fp64) = null::bool 11 | -------------------------------------------------------------------------------- /tests/cases/comparison/is_nan.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_comparison.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | is_nan(0.0::fp32) = false::bool 6 | is_nan(0.55::fp32) = false::bool 7 | is_nan(1000.000000000001::fp64) = false::bool 8 | is_nan(-inf::fp64) = false::bool 9 | is_nan(inf::fp64) = false::bool 10 | is_nan(null::fp64) = null::bool 11 | is_nan(nan::fp64) = true::bool 12 | -------------------------------------------------------------------------------- /tests/cases/comparison/is_not_distinct_from.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_comparison.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | is_not_distinct_from(1::i16, 1::i16) = true::bool 6 | is_not_distinct_from(2::i16, 1::i16) = false::bool 7 | is_not_distinct_from(1.75::dec<38, 2>, 1.75::dec<38, 2>) = true::bool 8 | is_not_distinct_from(1.75::dec<38, 2>, 1.1::dec<38, 2>) = false::bool 9 | 10 | # null_input: Examples with null as input 11 | is_not_distinct_from(null::i16, 1::i16) = false::bool 12 | is_not_distinct_from(null::i16, null::i16) = true::bool 13 | is_not_distinct_from(10::dec<38, 0>, null::dec<38, 0>) = false::bool 14 | is_not_distinct_from(null::dec<38, 0>, null::dec<38, 0>) = true::bool 15 | -------------------------------------------------------------------------------- /tests/cases/comparison/is_not_false.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_comparison.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | is_not_false(true::bool) = true::bool 6 | is_not_false(false::bool) = false::bool 7 | is_not_false(null::bool) = true::bool 8 | -------------------------------------------------------------------------------- /tests/cases/comparison/is_not_null.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_comparison.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | is_not_null(25::i16) = true::bool 6 | is_not_null(true::bool) = true::bool 7 | is_not_null(7.25::fp32) = true::bool 8 | is_not_null(7.25::dec<38, 3>) = true::bool 9 | is_not_null(null::i8) = false::bool 10 | is_not_null(null::dec<38, 3>) = false::bool 11 | -------------------------------------------------------------------------------- /tests/cases/comparison/is_not_true.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_comparison.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | is_not_true(true::bool) = false::bool 6 | is_not_true(false::bool) = true::bool 7 | is_not_true(null::bool) = true::bool 8 | -------------------------------------------------------------------------------- /tests/cases/comparison/is_null.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_comparison.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | is_null(25::i16) = false::bool 6 | is_null(false::bool) = false::bool 7 | is_null(7.823::dec<38, 3>) = false::bool 8 | is_null(null::i16) = true::bool 9 | is_null(null::dec<38, 3>) = true::bool 10 | -------------------------------------------------------------------------------- /tests/cases/comparison/is_true.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_comparison.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | is_true(true::bool) = true::bool 6 | is_true(false::bool) = false::bool 7 | is_true(null::bool) = false::bool 8 | -------------------------------------------------------------------------------- /tests/cases/comparison/lt.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_comparison.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | lt(1::i8, 2::i8) = true::bool 6 | lt(200::i16, 100::i16) = false::bool 7 | lt(1000::i16, 1000::i16) = false::bool 8 | lt(2000000000::i32, 1000000000::i32) = false::bool 9 | lt(-922337203685775808::i64, -922337203685775807::i64) = true::bool 10 | lt(7.25::fp32, 2.50::fp32) = false::bool 11 | lt(7.25::dec<38, 2>, 7.25::dec<38, 2>) = false::bool 12 | lt(2.49::dec<38, 2>, 2.50::dec<38, 2>) = true::bool 13 | lt(1.5e+308::fp64, inf::fp64) = true::bool 14 | lt(-1.5e+308::fp64, -inf::fp64) = false::bool 15 | 16 | # null_input: Examples with null as input 17 | lt(null::dec<38, 2>, 2.50::dec<38, 2>) = null::bool 18 | lt(null::dec<38, 2>, null::dec<38, 2>) = null::bool 19 | lt(null::i16, 1::i16) = null::bool 20 | lt(2::i16, null::i16) = null::bool 21 | lt(null::i16, null::i16) = null::bool 22 | -------------------------------------------------------------------------------- /tests/cases/comparison/lte.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_comparison.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | lte(1::i8, 2::i8) = true::bool 6 | lte(2::i8, 2::i8) = true::bool 7 | lte(200::i16, 199::i16) = false::bool 8 | lte(2000000000::i32, 1000000000::i32) = false::bool 9 | lte(-922337203685775808::i64, -922337203685775807::i64) = true::bool 10 | lte(7.00::fp32, 2.50::fp32) = false::bool 11 | lte(7.25::fp32, 7.25::fp32) = true::bool 12 | lte(7.25::dec<38, 2>, 7.25::dec<38, 2>) = true::bool 13 | lte(2.59::dec<38, 2>, 2.50::dec<38, 2>) = false::bool 14 | lte(1.5e+308::fp64, inf::fp64) = true::bool 15 | lte(inf::fp64, inf::fp64) = true::bool 16 | lte(-1.5e+308::fp64, -inf::fp64) = false::bool 17 | 18 | # null_input: Examples with null as input 19 | lte(null::dec<38, 2>, 2.50::dec<38, 2>) = null::bool 20 | lte(null::dec<38, 2>, null::dec<38, 2>) = null::bool 21 | lte(null::i16, 1::i16) = null::bool 22 | lte(2::i16, null::i16) = null::bool 23 | lte(null::i16, null::i16) = null::bool 24 | -------------------------------------------------------------------------------- /tests/cases/comparison/not_equal.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_comparison.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | not_equal(1::i8, 1::i8) = false::bool 6 | not_equal(300::i16, 200::i16) = true::bool 7 | not_equal(-2147483648::i32, -2147483648::i32) = false::bool 8 | not_equal(9223372036854775807::i64, 9223372036854775804::i64) = true::bool 9 | not_equal(9223372036854775807::dec<38, 0>, 9223372036854775804::dec<38, 0>) = true::bool 10 | not_equal(9223372036854775804::dec<38, 0>, 9223372036854775804::dec<38, 0>) = false::bool 11 | not_equal(inf::fp64, inf::fp64) = false::bool 12 | not_equal(inf::fp64, 1.5e+308::fp64) = true::bool 13 | not_equal(inf::fp64, -inf::fp64) = true::bool 14 | 15 | # null_input: Examples with null as input 16 | not_equal(null::dec<38, 2>, 2.50::dec<38, 2>) = null::bool 17 | not_equal(null::dec<38, 2>, null::dec<38, 2>) = null::bool 18 | not_equal(null::i16, 1::i16) = null::bool 19 | not_equal(null::i16, null::i16) = null::bool 20 | -------------------------------------------------------------------------------- /tests/cases/comparison/nullif.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_comparison.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | nullif(1::i16, 5::i16) = 1::i16 6 | nullif(7.25::fp32, 1.00::fp32) = 7.25::fp32 7 | nullif(1.11::fp32, 1.11::fp32) = null::fp32 8 | nullif(false::bool, true::bool) = false::bool 9 | nullif(true::bool, false::bool) = true::bool 10 | nullif(false::bool, false::bool) = null::bool 11 | nullif(true::bool, true::bool) = null::bool 12 | 13 | # null_input: Examples with null as input 14 | nullif(null::bool, true::bool) = null::bool 15 | nullif(true::bool, null::bool) = true::bool 16 | nullif(null::bool, null::bool) = null::bool 17 | nullif(10::dec<38, 0>, null::dec<38, 0>) = 10::dec<38, 0> 18 | nullif(null::dec<38, 0>, null::dec<38, 0>) = null::bool 19 | -------------------------------------------------------------------------------- /tests/cases/datetime/add_datetime.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_datetime.yaml' 3 | 4 | # timestamps: examples using the timestamp types 5 | add('2016-12-31T13:30:15'::ts, 'P5D'::iday) = '2017-01-05T13:30:15'::ts 6 | add('2016-12-01T13:30:15'::ts, 'P5Y'::iyear) = '2021-12-01T13:30:15'::ts 7 | add('2016-12-01T13:30:15'::ts, 'PT5H'::iday) = '2016-12-01T18:30:15'::ts 8 | 9 | # date_to_timestamp: examples using the date types and resulting in a timestamp 10 | add('2020-12-31'::date, 'P5D'::iday) = '2021-01-05T00:00:00'::ts 11 | add('2020-12-31'::date, 'P5Y'::iyear) = '2025-12-31T00:00:00'::ts 12 | add('2020-12-31'::date, 'P5M'::iyear) = '2021-05-31T00:00:00'::ts 13 | 14 | # null_input: examples with null args or return 15 | add(null::date, 'P5D'::iday) = null::ts 16 | -------------------------------------------------------------------------------- /tests/cases/datetime/add_intervals.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_datetime.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | add_intervals('PT10H'::iday, 'PT5H'::iday) = 'P0DT15H0M0S'::iday 6 | add_intervals('P10D'::iday, 'P5D'::iday) = 'P15D'::iday 7 | add_intervals('P1D'::iday, 'PT10H'::iday) = 'P1DT10H0M0S'::iday 8 | 9 | # null_input: Basic examples where the input args or return is null 10 | add_intervals(null::iyear, 'P1Y'::iyear) = null::iyear 11 | add_intervals(null::iday, 'P1D'::iday) = null::iday 12 | -------------------------------------------------------------------------------- /tests/cases/datetime/extract.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_datetime.yaml' 3 | 4 | # timestamps: examples using the timestamp and timestamptz types 5 | extract('YEAR'::str, '2016-12-31T13:30:15'::ts) = 2016::i64 6 | extract('ISOYEAR'::str, '2016-01-01T13:30:15'::ts) = 2015::i64 7 | extract('QUARTER'::str, '2016-12-31T13:30:15'::ts) = 4::i64 8 | extract('MONTH'::str, '2016-12-31T13:30:15'::ts) = 12::i64 9 | extract('WEEK'::str, '2016-12-31T13:30:15'::ts) = 52::i64 10 | extract('DAY'::str, '2016-12-31T13:30:15'::ts) = 31::i64 11 | extract('ISODOW'::str, '2016-12-25T13:30:15'::ts) = 7::i64 12 | extract('DOW'::str, '2016-12-25T13:30:15'::ts) = 0::i64 13 | extract('DOY'::str, '2016-12-25T13:30:15'::ts) = 360::i64 14 | extract('HOUR'::str, '2016-12-31T13:30:15'::ts) = 13::i64 15 | extract('MINUTE'::str, '2016-12-31T13:30:15'::ts) = 30::i64 16 | extract('SECOND'::str, '2016-12-31T13:30:15'::ts) = 15::i64 17 | extract('MILLISECONDS'::str, '2016-12-31T13:30:15'::ts) = 15000::i64 18 | extract('MICROSECONDS'::str, '2016-12-31T13:30:15.220000'::ts) = 15220000::i64 19 | extract('EPOCH'::str, '2016-12-31T13:30:15'::ts) = 1483191015::i64 20 | 21 | # date: examples using the date type 22 | extract('YEAR'::str, '2020-12-31'::date) = 2020::i64 23 | extract('MONTH'::str, '2020-12-31'::date) = 12::i64 24 | extract('DAY'::str, '2020-12-31'::date) = 31::i64 25 | 26 | # time: examples using the time type 27 | extract('HOUR'::str, '01:02:03'::time) = 1::i64 28 | extract('MINUTE'::str, '01:02:03'::time) = 2::i64 29 | extract('SECOND'::str, '01:02:03'::time) = 3::i64 30 | extract('MILLISECOND'::str, '01:02:03.155'::time) = 3155::i64 31 | extract('MICROSECOND'::str, '01:02:03.45'::time) = 3450000::i64 32 | -------------------------------------------------------------------------------- /tests/cases/datetime/gt_datetime.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_datetime.yaml' 3 | 4 | # timestamps: examples using the timestamp type 5 | gt('2016-12-31T13:30:15'::ts, '2017-12-31T13:30:15'::ts) = false::bool 6 | gt('2018-12-31T13:30:15'::ts, '2017-12-31T13:30:15'::ts) = true::bool 7 | 8 | # timestamp_tz: examples using the timestamp_tz type 9 | gt('1999-01-08T01:05:05-08:00'::tstz, '1999-01-08T04:05:06-05:00'::tstz) = false::bool 10 | gt('1999-01-08T01:05:07-08:00'::tstz, '1999-01-08T04:05:06-05:00'::tstz) = true::bool 11 | 12 | # date: examples using the date type 13 | gt('2020-12-30'::date, '2020-12-31'::date) = false::bool 14 | gt('2020-12-31'::date, '2020-12-30'::date) = true::bool 15 | 16 | # interval: examples using the interval type 17 | gt('P7D'::iday, 'P6D'::iday) = true::bool 18 | gt('P5D'::iday, 'P6D'::iday) = false::bool 19 | gt('P5Y'::iyear, 'P6Y'::iyear) = false::bool 20 | gt('P7Y'::iyear, 'P6Y'::iyear) = true::bool 21 | 22 | # null_input: examples with null args 23 | gt(null::iday, 'P5D'::iday) = null::bool 24 | gt(null::date, '2020-12-30'::date) = null::bool 25 | gt(null::ts, '2018-12-31T13:30:15'::ts) = null::bool 26 | -------------------------------------------------------------------------------- /tests/cases/datetime/gte_datetime.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_datetime.yaml' 3 | 4 | # timestamps: examples using the timestamp type 5 | gte('2016-12-31T13:30:15'::ts, '2017-12-31T13:30:15'::ts) = false::bool 6 | gte('2017-12-31T13:30:15'::ts, '2017-12-31T13:30:15'::ts) = true::bool 7 | gte('2018-12-31T13:30:15'::ts, '2017-12-31T13:30:15'::ts) = true::bool 8 | 9 | # timestamp_tz: examples using the timestamp_tz type 10 | gte('1999-01-08T01:05:05-08:00'::tstz, '1999-01-08T04:05:06-05:00'::tstz) = false::bool 11 | gte('1999-01-08T01:05:06-08:00'::tstz, '1999-01-08T01:05:06-08:00'::tstz) = true::bool 12 | gte('1999-01-08T01:05:06-08:00'::tstz, '1999-01-08T04:05:05-05:00'::tstz) = true::bool 13 | 14 | # date: examples using the date type 15 | gte('2020-12-30'::date, '2020-12-31'::date) = false::bool 16 | gte('2020-12-31'::date, '2020-12-31'::date) = true::bool 17 | gte('2020-12-31'::date, '2020-12-30'::date) = true::bool 18 | 19 | # interval: examples using the interval type 20 | gte('P7D'::iday, 'P7D'::iday) = true::bool 21 | gte('P7D'::iday, 'P6D'::iday) = true::bool 22 | gte('P5D'::iday, 'P6D'::iday) = false::bool 23 | gte('P5Y'::iyear, 'P6Y'::iyear) = false::bool 24 | gte('P7Y'::iyear, 'P7Y'::iyear) = true::bool 25 | gte('P7Y'::iyear, 'P6Y'::iyear) = true::bool 26 | 27 | # null_input: examples with null args or return 28 | gte(null::iday, 'P5D'::iday) = null::bool 29 | gte(null::date, '2020-12-30'::date) = null::bool 30 | gte(null::ts, '2018-12-31T13:30:15'::ts) = null::bool 31 | -------------------------------------------------------------------------------- /tests/cases/datetime/lt_datetime.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_datetime.yaml' 3 | 4 | # timestamps: examples using the timestamp type 5 | lt('2016-12-31T13:30:15'::ts, '2017-12-31T13:30:15'::ts) = true::bool 6 | lt('2018-12-31T13:30:15'::ts, '2017-12-31T13:30:15'::ts) = false::bool 7 | 8 | # timestamp_tz: examples using the timestamp_tz type 9 | lt('1999-01-08T01:05:05-08:00'::tstz, '1999-01-08T04:05:06-05:00'::tstz) = true::bool 10 | lt('1999-01-08T01:05:06-08:00'::tstz, '1999-01-08T04:05:06-05:00'::tstz) = false::bool 11 | 12 | # date: examples using the date type 13 | lt('2020-12-30'::date, '2020-12-31'::date) = true::bool 14 | lt('2020-12-31'::date, '2020-12-30'::date) = false::bool 15 | 16 | # interval: examples using the interval type 17 | lt('P7D'::iday, 'P6D'::iday) = false::bool 18 | lt('P5D'::iday, 'P6D'::iday) = true::bool 19 | lt('P5Y'::iyear, 'P6Y'::iyear) = true::bool 20 | lt('P7Y'::iyear, 'P6Y'::iyear) = false::bool 21 | 22 | # null_input: examples with null args or return 23 | lt(null::iday, 'P5D'::iday) = null::bool 24 | lt(null::date, '2020-12-30'::date) = null::bool 25 | lt(null::ts, '2018-12-31T13:30:15'::ts) = null::bool 26 | -------------------------------------------------------------------------------- /tests/cases/datetime/lte_datetime.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_datetime.yaml' 3 | 4 | # timestamps: examples using the timestamp type 5 | lte('2016-12-31T13:30:15'::ts, '2017-12-31T13:30:15'::ts) = true::bool 6 | lte('2017-12-31T13:30:15'::ts, '2017-12-31T13:30:15'::ts) = true::bool 7 | lte('2018-12-31T13:30:15'::ts, '2017-12-31T13:30:15'::ts) = false::bool 8 | 9 | # timestamp_tz: examples using the timestamp_tz type 10 | lte('1999-01-08T01:05:05-08:00'::tstz, '1999-01-08T04:05:06-05:00'::tstz) = true::bool 11 | lte('1999-01-08T01:05:06-08:00'::tstz, '1999-01-08T01:05:06-08:00'::tstz) = true::bool 12 | lte('1999-01-08T01:05:06-08:00'::tstz, '1999-01-08T04:05:05-05:00'::tstz) = false::bool 13 | 14 | # date: examples using the date type 15 | lte('2020-12-30'::date, '2020-12-31'::date) = true::bool 16 | lte('2020-12-31'::date, '2020-12-31'::date) = true::bool 17 | lte('2020-12-31'::date, '2020-12-30'::date) = false::bool 18 | 19 | # interval: examples using the interval type 20 | lte('P7D'::iday, 'P7D'::iday) = true::bool 21 | lte('P7D'::iday, 'P6D'::iday) = false::bool 22 | lte('P5D'::iday, 'P6D'::iday) = true::bool 23 | lte('P5Y'::iyear, 'P6Y'::iyear) = true::bool 24 | lte('P7Y'::iyear, 'P7Y'::iyear) = true::bool 25 | lte('P7Y'::iyear, 'P6Y'::iyear) = false::bool 26 | 27 | # null_input: examples with null args or return 28 | lte(null::iday, 'P5D'::iday) = null::bool 29 | lte(null::date, '2020-12-30'::date) = null::bool 30 | lte(null::ts, '2018-12-31T13:30:15'::ts) = null::bool 31 | -------------------------------------------------------------------------------- /tests/cases/datetime/subtract_datetime.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_datetime.yaml' 3 | 4 | # timestamps: examples using the timestamp type 5 | subtract('2016-12-31T13:30:15'::ts, 'P5D'::iday) = '2016-12-26T13:30:15'::ts 6 | subtract('2016-12-01T13:30:15'::ts, 'P5Y'::iyear) = '2011-12-01T13:30:15'::ts 7 | subtract('2016-12-01T13:30:15'::ts, 'PT5H'::iday) = '2016-12-01T08:30:15'::ts 8 | 9 | # date: examples using the date type 10 | subtract('2020-12-31'::date, 'P5D'::iday) = '2020-12-26'::date 11 | subtract('2020-12-31'::date, 'P5Y'::iyear) = '2015-12-31'::date 12 | subtract('2020-12-31'::date, 'P5M'::iyear) = '2020-07-31'::date 13 | 14 | # null_input: examples with null args or return 15 | subtract(null::date, 'P5D'::iday) = null::date 16 | -------------------------------------------------------------------------------- /tests/cases/logarithmic/ln.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_logarithmic.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | ln(100000::i64) = 11.512925464970229::fp64 6 | ln(1.0::fp32) = 0::fp32 7 | ln(2.015::fp64) = 0.7006191953986464::fp64 8 | 9 | # infinity: Examples with infinity as input 10 | ln(-inf::fp64) [on_domain_error:ERROR] = 11 | ln(-inf::fp64) [on_domain_error:NAN] = nan::fp64 12 | ln(-inf::fp64) [on_domain_error:NONE] = null::fp64 13 | ln(inf::fp64) = inf::fp64 14 | 15 | # log_zero: Examples with log zero 16 | ln(0.0::fp64) [on_log_zero:ERROR] = 17 | ln(0.0::fp64) [on_log_zero:NAN] = null::fp64 18 | ln(0.0::fp64) [on_log_zero:MINUS_INFINITY] = -inf::fp64 19 | -------------------------------------------------------------------------------- /tests/cases/logarithmic/log10.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_logarithmic.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | log10(100000::i64) = 5.0::fp64 6 | log10(1.0::fp32) = 0::fp32 7 | log10(2.015::fp64) = 0.3042750504771283::fp64 8 | 9 | # infinity: Examples with infinity as input 10 | log10(-inf::fp64) [on_domain_error:ERROR] = 11 | log10(-inf::fp64) [on_domain_error:NAN] = nan::fp64 12 | log10(-inf::fp64) [on_domain_error:NONE] = null::fp64 13 | log10(inf::fp64) = inf::fp64 14 | 15 | # log_zero: Examples with log zero 16 | log10(0.0::fp64) [on_log_zero:ERROR] = 17 | log10(0.0::fp64) [on_log_zero:NAN] = null::fp64 18 | log10(0.0::fp64) [on_log_zero:MINUS_INFINITY] = -inf::fp64 19 | -------------------------------------------------------------------------------- /tests/cases/logarithmic/log2.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_logarithmic.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | log2(100000::i64) = 16.609640474436812::fp64 6 | log2(1.0::fp32) = 0::fp32 7 | log2(8.0::fp64) = 3.0::fp64 8 | log2(2.015::fp64) = 1.0107798387532427::fp64 9 | 10 | # infinity: Examples with infinity as input 11 | log2(-inf::fp64) [on_domain_error:ERROR] = 12 | log2(-inf::fp64) [on_domain_error:NAN] = nan::fp64 13 | log2(-inf::fp64) [on_domain_error:NONE] = null::fp64 14 | log2(inf::fp64) = inf::fp64 15 | 16 | # log_zero: Examples with log zero 17 | log2(0.0::fp64) [on_log_zero:ERROR] = 18 | log2(0.0::fp64) [on_log_zero:NAN] = null::fp64 19 | log2(0.0::fp64) [on_log_zero:MINUS_INFINITY] = -inf::fp64 20 | -------------------------------------------------------------------------------- /tests/cases/logarithmic/logb.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_logarithmic.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | logb(10::i64, 100000::i64) = 5.0::fp64 6 | logb(7::fp64, 1.0::fp64) = 0::fp64 7 | logb(2::fp64, 7::fp64) = 2.8073549220576041::fp64 8 | 9 | # infinity: Examples with infinity as input 10 | logb(2.34::fp64, inf::fp64) = inf::fp64 11 | logb(10::fp64, -inf::fp64) [on_domain_error:ERROR] = 12 | logb(10::fp64, -inf::fp64) [on_domain_error:NAN] = nan::fp64 13 | logb(10::fp64, -inf::fp64) [on_domain_error:NONE] = null::fp64 14 | 15 | # log_zero: Examples with log zero 16 | logb(2.0::fp64, 0.0::fp64) [on_log_zero:ERROR] = 17 | logb(2.0::fp64, 0.0::fp64) [on_log_zero:NAN] = null::fp64 18 | logb(2.0::fp64, 0.0::fp64) [on_log_zero:MINUS_INFINITY] = -inf::fp64 19 | -------------------------------------------------------------------------------- /tests/cases/rounding/ceil.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_rounding.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | ceil(2.25::fp32) = 3::fp32 6 | ceil(2.0000007152557373046875::fp64) = 3::fp64 7 | ceil(-65.500000001223334444::fp64) = -65::fp64 8 | -------------------------------------------------------------------------------- /tests/cases/rounding/floor.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_rounding.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | floor(2.25::fp32) = 2::fp32 6 | floor(2.0000007152557373046875::fp64) = 2::fp64 7 | floor(-65.490000001223334444::fp64) = -66::fp64 8 | -------------------------------------------------------------------------------- /tests/cases/rounding/round.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_rounding.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | round(2::i8, 2::i32) = 2::i8 6 | round(2.75::fp32, 1::i32) = 2.8::fp32 7 | round(2.0000007152457373046875::fp64, 10::i32) = 2.0000007152::fp64 8 | round(2.0000007152457373046875::fp64, 10::i32) = 2.0000007152::fp64 9 | 10 | # negative_rounding: Examples with negative rounding 11 | round(2::i8, -2::i32) = 0::i8 12 | round(123::i8, -2::i32) = 100::i8 13 | round(8793::i16, -2::i32) = 8800::i16 14 | -------------------------------------------------------------------------------- /tests/cases/rounding_decimal/ceil.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_rounding_decimal.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | ceil(2.25::dec<3,2>) = 3::dec<2,0> 6 | ceil(-65.5::dec<3,1>) = -65::dec<3,0> 7 | ceil(9.9::dec<2,1>) = 10::dec<2,0> 8 | -------------------------------------------------------------------------------- /tests/cases/rounding_decimal/floor.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_rounding_decimal.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | floor(2.25::dec<3,2>) = 2::dec<2,0> 6 | floor(-65.5::dec<3,1>) = -66::dec<3,0> 7 | -------------------------------------------------------------------------------- /tests/cases/rounding_decimal/round.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_rounding_decimal.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | round(2.0::dec<2,1>, 2::i32) = 2::dec<3,1> 6 | round(2.75::dec<3,2>, 1::i32) = 2.8::dec<4,2> 7 | 8 | # negative_rounding: Examples with negative rounding 9 | round(2.0::dec<2,1>, -2::i32) = 0::dec<3,1> 10 | round(123::dec<3,0>, -2::i32) = 100::dec<4,0> 11 | round(8793.5::dec<5,1>, -2::i32) = 8800::dec<6,1> 12 | -------------------------------------------------------------------------------- /tests/cases/string/bit_length.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_string.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | bit_length('abc'::str) = 24::i64 6 | bit_length(''::str) = 0::i64 7 | bit_length(' '::str) = 8::i64 8 | bit_length('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'::str) = 384::i64 9 | bit_length(' 456'::str) = 48::i64 10 | 11 | # null_input: Examples with null as input 12 | bit_length(null::str) = null::i64 13 | 14 | # unicode: Examples with unicode characters as input 15 | bit_length('à'::str) = 16::i64 16 | bit_length('😄'::str) = 32::i64 17 | -------------------------------------------------------------------------------- /tests/cases/string/char_length.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_string.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | char_length('abc'::str) = 3::i64 6 | char_length(''::str) = 0::i64 7 | char_length(' '::str) = 1::i64 8 | char_length('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'::str) = 48::i64 9 | char_length(' 456'::str) = 6::i64 10 | 11 | # null_input: Examples with null as input 12 | char_length(null::str) = null::i64 13 | 14 | # unicode: Examples with unicode characters as input 15 | char_length('à'::str) = 1::i64 16 | char_length('😄'::str) = 1::i64 17 | -------------------------------------------------------------------------------- /tests/cases/string/concat.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_string.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | concat('abcd'::str, 'efg'::str) = 'abcdefg'::str 6 | 7 | # null_input: Examples with null as input 8 | concat('abcd'::str, null::str) [null_handling:ACCEPT_NULLS] = null::str 9 | concat('abcd'::str, null::str) [null_handling:IGNORE_NULLS] = 'abcd'::str 10 | concat(null::str, 'abcd'::str) [null_handling:ACCEPT_NULLS] = null::str 11 | concat(null::str, 'abcd'::str) [null_handling:IGNORE_NULLS] = 'abcd'::str 12 | concat(null::str, null::str) [null_handling:ACCEPT_NULLS] = null::str 13 | concat(null::str, null::str) [null_handling:IGNORE_NULLS] = ''::str 14 | -------------------------------------------------------------------------------- /tests/cases/string/concat_ws.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_string.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | concat_ws(','::str, 'Banana'::str, 'Apple'::str, 'Melon'::str) = 'Banana,Apple,Melon'::str 6 | concat_ws(''::str, 'Banana'::str, 'Apple'::str) = 'BananaApple'::str 7 | concat_ws(null::str, 'Banana'::str, 'Apple'::str, 'Melon'::str) = null::str 8 | concat_ws(','::str, null::str, 'Apple'::str, 'Melon'::str) = 'Apple,Melon'::str 9 | concat_ws(','::str, 'Apple'::str, null::str, 'Melon'::str) = 'Apple,Melon'::str 10 | -------------------------------------------------------------------------------- /tests/cases/string/contains.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_string.yaml' 3 | 4 | # basic: Basic examples contains as prefix 5 | contains('abcdefg'::str, 'abc'::str) = true::bool 6 | contains('abcdefg'::str, 'CdE'::str) = false::bool 7 | contains('abcdefg'::str, 'CdE'::str) [case_sensitivity:CASE_INSENSITIVE] = true::bool 8 | contains('abcdefg'::str, 'cde'::str) = true::bool 9 | contains('abcdefg'::str, 'fg'::str) = true::bool 10 | contains('abcdefg'::str, 'aef'::str) = false::bool 11 | 12 | # multi_byte_characters: multi byte characters exists in the string 13 | contains('😊a😊b😊😊'::str, 'a😊b'::str) = true::bool 14 | contains('😊a😊b😊😊'::str, 'A😊B'::str) = false::bool 15 | contains('😊a😊b😊😊'::str, 'A😊B'::str) [case_sensitivity:CASE_INSENSITIVE] = true::bool 16 | contains('😊a😊b😊😊'::str, 'a😊c'::str) = false::bool 17 | -------------------------------------------------------------------------------- /tests/cases/string/ends_with.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_string.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | ends_with('abcd'::str, 'd'::str) = true::bool 6 | ends_with('abcd'::str, 'a'::str) = false::bool 7 | ends_with('abcd'::str, 'CD'::str) = false::bool 8 | 9 | # case_insenstivity: multi byte character comparison with case insensitivity 10 | ends_with('abcd'::str, 'CD'::str) [case_sensitivity:CASE_INSENSITIVE] = true::bool 11 | 12 | # multi_byte_characters: multi byte character comparison 13 | ends_with('😊a😊b😊😊'::str, 'b😊😊'::str) = true::bool 14 | 15 | # multi_byte_characters case insensitivity: multi byte character comparison with case insensitivity 16 | ends_with('😊a😊b😊😊'::str, 'B😊😊'::str) [case_sensitivity:CASE_INSENSITIVE] = true::bool 17 | -------------------------------------------------------------------------------- /tests/cases/string/left.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_string.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | left('abcdef'::str, 2::i32) = 'ab'::str 6 | left('abcdef'::str, 6::i32) = 'abcdef'::str 7 | left('abcdef'::str, 10::i32) = 'abcdef'::str 8 | left(' abcdef abcdef'::str, 10::i32) = ' abcdef '::str 9 | left(null::str, 10::i32) = null::str 10 | left('abcdef'::str, null::i32) = null::str 11 | 12 | # unicode: Examples with unicode characters as input 13 | left('ææããa'::str, 2::i32) = 'ææ'::str 14 | left('😔😄😔😄'::str, 2::i32) = '😔😄'::str 15 | -------------------------------------------------------------------------------- /tests/cases/string/like.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_string.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | like('abcdefg'::str, 'abcdefg'::str) = true::bool 6 | like('abcdefg'::str, 'abc'::str) = false::bool 7 | 8 | # wildcard: Examples using wildcards 9 | like('abcdefg'::str, 'abc%'::str) = true::bool 10 | like('abcdefg'::str, '%efg'::str) = true::bool 11 | like('abcdefg'::str, '_bcdefg'::str) = true::bool 12 | like('abcdefg'::str, 'abc_efg'::str) = true::bool 13 | -------------------------------------------------------------------------------- /tests/cases/string/lower.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_string.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | lower('ABC'::str) = 'abc'::str 6 | lower('aBc'::str) = 'abc'::str 7 | lower('abc'::str) = 'abc'::str 8 | lower(''::str) = ''::str 9 | 10 | # null_input: Examples with null as input 11 | lower(null::str) = null::str 12 | 13 | # unicode: Examples with unicode characters as input 14 | lower('ÆÆÃÃA'::str) [full_unicode:TRUE] = 'ææããa'::str 15 | lower('😄'::str) = '😄'::str 16 | -------------------------------------------------------------------------------- /tests/cases/string/lpad.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_string.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | lpad('abcdef'::str, 10::i32, ' '::str) = ' abcdef'::str 6 | lpad('abcdef '::str, 20::i32, '1'::str) = '1111111111abcdef '::str 7 | lpad(' abcdef'::str, 20::i32, '1'::str) = '1111111111 abcdef'::str 8 | lpad('abcdef'::str, 6::i32, ' '::str) = 'abcdef'::str 9 | lpad('abcdef'::str, 20::i32, 'aabb'::str) = 'aabbaabbaabbaaabcdef'::str 10 | lpad('abcdef'::str, 4::i32, ' '::str) = 'abcd'::str 11 | lpad('abcdef'::str, -1::i32, ' '::str) = ''::str 12 | lpad(null::str, 4::i32, ' '::str) = null::str 13 | lpad('abcdef'::str, 10::i32, null::str) = null::str 14 | -------------------------------------------------------------------------------- /tests/cases/string/ltrim.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_string.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | ltrim('abc'::str, ' '::str) = 'abc'::str 6 | ltrim(' abc'::str, ' '::str) = 'abc'::str 7 | ltrim('abc '::str, ' '::str) = 'abc '::str 8 | ltrim(' abc '::str, ' '::str) = 'abc '::str 9 | ltrim(''::str, ' '::str) = ''::str 10 | ltrim(' '::str, ' '::str) = ''::str 11 | ltrim(null::str, ' '::str) = null::str 12 | 13 | # two_inputs: Examples with character input to trim off 14 | ltrim('aaaaabc'::str, 'a'::str) [spaces_only:FALSE] = 'bc'::str 15 | ltrim('abcabcdef'::str, 'abc'::str) [spaces_only:FALSE] = 'def'::str 16 | ltrim('abccbadef'::str, 'abc'::str) [spaces_only:FALSE] = 'def'::str 17 | -------------------------------------------------------------------------------- /tests/cases/string/octet_length.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_string.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | octet_length('abc'::str) = 3::i64 6 | octet_length(''::str) = 0::i64 7 | octet_length(' '::str) = 1::i64 8 | octet_length('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'::str) = 48::i64 9 | octet_length(' 456'::str) = 6::i64 10 | 11 | # null_input: Examples with null as input 12 | octet_length(null::str) = null::i64 13 | 14 | # unicode: Examples with unicode characters as input 15 | octet_length('à'::str) = 2::i64 16 | octet_length('😄'::str) = 4::i64 17 | -------------------------------------------------------------------------------- /tests/cases/string/regexp_count_substring.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_string.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | regexp_count_substring('foobarboopzoo'::str, 'o{1,}'::str, 1::i64) = 3::i64 6 | regexp_count_substring('foobarboopzoo'::str, 'o{1}'::str, 1::i64) = 6::i64 7 | regexp_count_substring('abcabcacb'::str, '[bc]'::str, 1::i64) = 6::i64 8 | regexp_count_substring('abcdefc'::str, '(.*)c'::str, 1::i64) = 1::i64 9 | regexp_count_substring('abcdefc'::str, '(.*)c?'::str, 1::i64) = 2::i64 10 | regexp_count_substring('foobarboopzoo'::str, 'o{1,}'::str) = 3::i64 11 | regexp_count_substring('foobarboopzoo'::str, 'o{1}'::str) = 6::i64 12 | regexp_count_substring('abcabcacb'::str, '[bc]'::str) = 6::i64 13 | regexp_count_substring('abcdefc'::str, '(.*)c'::str) = 1::i64 14 | regexp_count_substring('abcdefc'::str, '(.*)c?'::str) = 2::i64 15 | 16 | # null_input: Examples with null as input 17 | regexp_count_substring('Hello'::str, null::str, 1::i64) = null::i64 18 | regexp_count_substring(null::str, ' '::str, 1::i64) = null::i64 19 | regexp_count_substring('Hello'::str, null::str) = null::i64 20 | regexp_count_substring(null::str, ' '::str) = null::i64 21 | 22 | # metacharacters: Examples with metacharacters 23 | regexp_count_substring('abc1abc'::str, '\d'::str, 1::i64) = 1::i64 24 | regexp_count_substring('abc1abc'::str, '\D'::str, 1::i64) = 6::i64 25 | regexp_count_substring('abc def ghi'::str, '\s'::str, 1::i64) = 2::i64 26 | regexp_count_substring('abc def ghi'::str, '\S'::str, 1::i64) = 9::i64 27 | regexp_count_substring('abc def ghi'::str, '\w'::str, 1::i64) = 9::i64 28 | regexp_count_substring('abc def ghi,'::str, '\W'::str, 1::i64) = 3::i64 29 | regexp_count_substring('abc1abc'::str, '\d'::str) = 1::i64 30 | regexp_count_substring('abc1abc'::str, '\D'::str) = 6::i64 31 | regexp_count_substring('abc def ghi'::str, '\s'::str) = 2::i64 32 | regexp_count_substring('abc def ghi'::str, '\S'::str) = 9::i64 33 | regexp_count_substring('abc def ghi'::str, '\w'::str) = 9::i64 34 | regexp_count_substring('abc def ghi,'::str, '\W'::str) = 3::i64 35 | 36 | # lookahead: Examples with lookahead 37 | regexp_count_substring('100 dollars 100 dollars'::str, '\d+(?= dollars)'::str, 1::i64) [lookaround:TRUE] = 2::i64 38 | regexp_count_substring('100 dollars 100 dollars'::str, '\d+(?= dollars)'::str) [lookaround:TRUE] = 2::i64 39 | 40 | # negative_lookahead: Examples with negative lookahead 41 | regexp_count_substring('100 pesos, 99 pesos, 98 pesos'::str, '\d+(?!\d| dollars)'::str, 1::i64) [lookaround:TRUE] = 3::i64 42 | regexp_count_substring('100 pesos, 99 pesos, 98 pesos'::str, '\d+(?!\d| dollars)'::str) [lookaround:TRUE] = 3::i64 43 | 44 | # lookbehind: Examples with lookbehind 45 | regexp_count_substring('USD100'::str, '(?<=USD)\d{3}'::str, 1::i64) [lookaround:TRUE] = 1::i64 46 | regexp_count_substring('USD100'::str, '(?<=USD)\d{3}'::str) [lookaround:TRUE] = 1::i64 47 | 48 | # negative_lookbehind: Examples with negative lookbehind 49 | regexp_count_substring('JPY100JPY100'::str, '\d{3}(? 6 | regexp_string_split('Hello'::str, 'Hel+'::str) = ['', 'o']::list 7 | 8 | # greedy_matching: Examples with greedy matching 9 | regexp_string_split('HHHelloooo'::str, 'Hel+'::str) = ['HH', 'oooo']::list 10 | 11 | # position_anchors: Examples with position anchors 12 | regexp_string_split('abcdefg'::str, '\Aabc'::str) = ['', 'defg']::list 13 | regexp_string_split('abcdefg'::str, 'efg$'::str) = ['abcd', '']::list 14 | 15 | # metacharacters: Examples with metacharacters 16 | regexp_string_split('abc1abc'::str, '\d'::str) = ['abc', 'abc']::list 17 | regexp_string_split('111a111'::str, '\D'::str) = ['111', '111']::list 18 | regexp_string_split('abc def'::str, '\s'::str) = ['abc', 'def']::list 19 | regexp_string_split('a bcdef'::str, '\S'::str) = ['', ' ', '', '', '', '', '']::list 20 | regexp_string_split(' abcdef'::str, '\w'::str) = [' ', '', '', '', '', '', '']::list 21 | regexp_string_split('a bcdef'::str, '\W'::str) = ['a', 'bcdef']::list 22 | 23 | # occurrence_indicator: Examples with occurrence indicators 24 | regexp_string_split('abc123abc'::str, '[0-9]+'::str) = ['abc', 'abc']::list 25 | regexp_string_split('abc123abc'::str, '[bc]'::str) = ['a', '', '123a', '', '']::list 26 | regexp_string_split('abcde'::str, '(.*)c'::str) = ['', 'de']::list 27 | regexp_string_split('abbbbc'::str, '[b]{2,3}'::str) = ['a', 'bc']::list 28 | 29 | # lookahead: Examples with lookahead 30 | regexp_string_split('100 dollars'::str, '\d+(?= dollars)'::str) [lookaround:TRUE] = ['', ' dollars']::list 31 | 32 | # negative_lookahead: Examples with negative lookahead 33 | regexp_string_split('100 pesos'::str, '\d+(?!\d| dollars)'::str) [lookaround:TRUE] = ['', ' pesos']::list 34 | 35 | # lookbehind: Examples with lookbehind 36 | regexp_string_split('USD100'::str, '(?<=USD)\d{3}'::str) [lookaround:TRUE] = ['USD', '']::list 37 | 38 | # negative_lookbehind: Examples with negative lookbehind 39 | regexp_string_split('JPY100'::str, '\d{3}(? 40 | -------------------------------------------------------------------------------- /tests/cases/string/repeat.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_string.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | repeat('abc'::str, 2::i64) = 'abcabc'::str 6 | repeat('aBc'::str, 0::i64) = ''::str 7 | repeat(' abd'::str, 3::i64) = ' abd abd abd'::str 8 | repeat(' '::str, 5::i64) = ' '::str 9 | repeat(''::str, 2::i64) = ''::str 10 | 11 | # null_input: Examples with null as input 12 | repeat(null::str, 2::i64) = null::str 13 | -------------------------------------------------------------------------------- /tests/cases/string/replace.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_string.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | replace('abcabcabc'::str, 'bc'::str, 'dd'::str) = 'addaddadd'::str 6 | replace('abcabcabc'::str, ' '::str, 'dd'::str) = 'abcabcabc'::str 7 | replace('abc def ghi'::str, ' '::str, ','::str) = 'abc,def,ghi'::str 8 | 9 | # null_input: Examples with null as input 10 | replace('abcd'::str, null::str, ','::str) = null::str 11 | replace('abcd'::str, ' '::str, null::str) = null::str 12 | replace(null::str, ' '::str, ','::str) = null::str 13 | -------------------------------------------------------------------------------- /tests/cases/string/reverse.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_string.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | reverse('abc'::str) = 'cba'::str 6 | reverse('aBc'::str) = 'cBa'::str 7 | reverse(' 123'::str) = '321 '::str 8 | reverse(''::str) = ''::str 9 | 10 | # null_input: Examples with null as input 11 | reverse(null::str) = null::str 12 | 13 | # unicode: Examples with unicode characters as input 14 | reverse('ææããa'::str) = 'aããææ'::str 15 | reverse('😔😄'::str) = '😄😔'::str 16 | -------------------------------------------------------------------------------- /tests/cases/string/right.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_string.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | right('abcdef'::str, 2::i32) = 'ef'::str 6 | right('abcdef'::str, 6::i32) = 'abcdef'::str 7 | right('abcdef'::str, 10::i32) = 'abcdef'::str 8 | right(' abcdef abcdef'::str, 10::i32) = 'ef abcdef'::str 9 | right(null::str, 10::i32) = null::str 10 | right('abcdef'::str, null::i32) = null::str 11 | 12 | # unicode: Examples with unicode characters as input 13 | right('ææããa'::str, 2::i32) = 'ãa'::str 14 | right('😔😄😔😄'::str, 2::i32) = '😔😄'::str 15 | -------------------------------------------------------------------------------- /tests/cases/string/rpad.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_string.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | rpad('abcdef'::str, 10::i32, ' '::str) = 'abcdef '::str 6 | rpad('abcdef '::str, 20::i32, '1'::str) = 'abcdef 1111111111'::str 7 | rpad(' abcdef'::str, 20::i32, '1'::str) = ' abcdef1111111111'::str 8 | rpad('abcdef'::str, 6::i32, ' '::str) = 'abcdef'::str 9 | rpad('abcdef'::str, 20::i32, 'aabb'::str) = 'abcdefaabbaabbaabbaa'::str 10 | rpad('abcdef'::str, 4::i32, ' '::str) = 'abcd'::str 11 | rpad('abcdef'::str, -1::i32, ' '::str) = ''::str 12 | rpad(null::str, 4::i32, ' '::str) = null::str 13 | rpad('abcdef'::str, 10::i32, null::str) = null::str 14 | -------------------------------------------------------------------------------- /tests/cases/string/rtrim.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_string.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | rtrim('abc'::str, ' '::str) = 'abc'::str 6 | rtrim(' abc'::str, ' '::str) = ' abc'::str 7 | rtrim('abc '::str, ' '::str) = 'abc'::str 8 | rtrim(' abc '::str, ' '::str) = ' abc'::str 9 | rtrim(''::str, ' '::str) = ''::str 10 | rtrim(' '::str, ' '::str) = ''::str 11 | rtrim(null::str, ' '::str) = null::str 12 | 13 | # two_inputs: Examples with character input to trim off 14 | rtrim('aaaaabccccc'::str, 'c'::str) [spaces_only:FALSE] = 'aaaaab'::str 15 | rtrim('abcabcdef'::str, 'def'::str) [spaces_only:FALSE] = 'abcabc'::str 16 | rtrim('defabccba'::str, 'abc'::str) [spaces_only:FALSE] = 'def'::str 17 | -------------------------------------------------------------------------------- /tests/cases/string/starts_with.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_string.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | starts_with('abcd'::str, 'a'::str) = true::bool 6 | starts_with('abcd'::str, 'z'::str) = false::bool 7 | starts_with('abcd'::str, 'AB'::str) = false::bool 8 | 9 | # case_insenstivity: multi byte character comparison with case insensitivity 10 | starts_with('abcd'::str, 'AB'::str) [case_sensitivity:CASE_INSENSITIVE] = true::bool 11 | 12 | # multi_byte_characters: multi byte character comparison 13 | starts_with('😊a😊b😊😊'::str, '😊a'::str) = true::bool 14 | 15 | # multi_byte_characters case insensitivity: multi byte character comparison with case insensitivity 16 | starts_with('😊a😊b😊😊'::str, '😊A'::str) [case_sensitivity:CASE_INSENSITIVE] = true::bool 17 | -------------------------------------------------------------------------------- /tests/cases/string/string_split.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_string.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | string_split('abc'::str, ' '::str) = ['abc']::list 6 | string_split('abc abc'::str, ' '::str) = ['abc', 'abc']::list 7 | string_split('bacad'::str, 'a'::str) = ['b', 'c', 'd']::list 8 | string_split('a b c d'::str, ' '::str) = ['a', 'b', 'c', 'd']::list 9 | string_split('a b c d'::str, null::str) = ['a b c d']::list 10 | -------------------------------------------------------------------------------- /tests/cases/string/substring.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_string.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | substring('abcdefg'::str, 1::i32, 5::i32) = 'abcde'::str 6 | substring('abcdefg'::str, 1::i32, 5::i32) = 'abcde'::str 7 | 8 | # start_greater_than_length: Example where start argument greater than the length of the string 9 | substring('abcdefg'::str, 10::i32, 2::i32) = ''::str 10 | substring('abcdefg'::str, 10::i32, 2::i32) = ''::str 11 | 12 | # multi_byte_characters: Example where multi byte characters exist in the string 13 | substring('😊a😊b😊😊'::str, 1::i32, 3::i32) = '😊a😊'::str 14 | substring('😊a😊b😊😊'::str, 1::i32, 3::i32) = '😊a😊'::str 15 | 16 | # negative_start: Example where start argument is a negative integer 17 | substring('abcdefg'::str, -1::i32, 2::i32) [negative_start:WRAP_FROM_END] = 'g'::str 18 | substring('abcdefg'::str, -2::i32, 1::i32) [negative_start:WRAP_FROM_END] = 'f'::str 19 | substring('abcdefg'::str, -1::i32, 2::i32) [negative_start:LEFT_OF_BEGINNING] = ''::str 20 | substring('abcdefg'::str, -1::i32, 3::i32) [negative_start:LEFT_OF_BEGINNING] = 'a'::str 21 | -------------------------------------------------------------------------------- /tests/cases/string/trim.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_string.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | trim('abc'::str, ' '::str) = 'abc'::str 6 | trim(' abc'::str, ' '::str) = 'abc'::str 7 | trim('abc '::str, ' '::str) = 'abc'::str 8 | trim(' abc '::str, ' '::str) = 'abc'::str 9 | trim(''::str, ' '::str) = ''::str 10 | trim(' '::str, ' '::str) = ''::str 11 | trim(null::str, ' '::str) = null::str 12 | 13 | # two_inputs: Examples with character input to trim off 14 | trim('aaaaabcccccaaa'::str, 'a'::str) [spaces_only:False] = 'bccccc'::str 15 | trim('defabcabcdef'::str, 'def'::str) [spaces_only:False] = 'abcabc'::str 16 | trim('abcdefcbaa'::str, 'abc'::str) [spaces_only:False] = 'def'::str 17 | -------------------------------------------------------------------------------- /tests/cases/string/upper.test: -------------------------------------------------------------------------------- 1 | ### SUBSTRAIT_SCALAR_TEST: v1.0 2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_string.yaml' 3 | 4 | # basic: Basic examples without any special cases 5 | upper('abc'::str) = 'ABC'::str 6 | upper('aBc'::str) = 'ABC'::str 7 | upper('ABC'::str) = 'ABC'::str 8 | upper(''::str) = ''::str 9 | 10 | # null_input: Examples with null as input 11 | upper(null::str) = null::str 12 | 13 | # unicode: Examples with unicode characters as input 14 | upper('ææããa'::str) [full_unicode:TRUE] = 'ÆÆÃÃA'::str 15 | upper('😄'::str) = '😄'::str 16 | -------------------------------------------------------------------------------- /tests/coverage/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/substrait-io/substrait/d430e521f203aec6a4e06731d4bfd68cdf61f443/tests/coverage/__init__.py -------------------------------------------------------------------------------- /tests/coverage/case_file_parser.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | import os 3 | 4 | from antlr4 import CommonTokenStream, FileStream 5 | from antlr4.error.ErrorListener import ErrorListener 6 | 7 | from tests.coverage.antlr_parser.FuncTestCaseLexer import FuncTestCaseLexer 8 | from tests.coverage.antlr_parser.FuncTestCaseParser import FuncTestCaseParser 9 | from tests.coverage.visitor import TestCaseVisitor, ParseError 10 | 11 | 12 | class ParseErrorListener(ErrorListener): 13 | def __init__(self): 14 | super(ParseErrorListener, self).__init__() 15 | self.errors = [] 16 | 17 | def syntaxError(self, recognizer, offending_symbol, line, column, msg, e): 18 | error_message = f"Syntax error at line {line}, column {column}: {msg}" 19 | self.errors.append(error_message) 20 | 21 | 22 | def parse_stream(input_stream, file_path): 23 | # Create a lexer and parser 24 | lexer = FuncTestCaseLexer(input_stream) 25 | token_stream = CommonTokenStream(lexer) 26 | parser = FuncTestCaseParser(token_stream) 27 | 28 | # Add custom error listener 29 | error_listener = ParseErrorListener() 30 | parser.removeErrorListeners() 31 | parser.addErrorListener(error_listener) 32 | 33 | tree = parser.doc() # This is the entry point of testfile parser 34 | if parser.getNumberOfSyntaxErrors() > 0: 35 | print(tree.toStringTree(recog=parser)) 36 | print(f"{parser.getNumberOfSyntaxErrors()} Syntax errors found, exiting") 37 | raise ParseError(f"Syntax errors: {error_listener.errors}") 38 | 39 | # uncomment below line to see the parse tree for debugging 40 | # print(tree.toStringTree(recog=parser)) 41 | 42 | visitor = TestCaseVisitor(file_path) 43 | test_file = visitor.visit(tree) 44 | return test_file 45 | 46 | 47 | def parse_one_file(file_path): 48 | return parse_stream(FileStream(file_path, "UTF-8"), file_path) 49 | 50 | 51 | def parse_testcase_directory_recursively(dir_path): 52 | # for each file in directory call parse_one_file 53 | test_files = [] 54 | for child in os.listdir(dir_path): 55 | child_path = os.path.join(dir_path, child) 56 | if os.path.isfile(child_path) and child.endswith(".test"): 57 | test_file = parse_one_file(child_path) 58 | test_files.append(test_file) 59 | elif os.path.isdir(child_path): 60 | test_files_in_a_dir = parse_testcase_directory_recursively(child_path) 61 | test_files.extend(test_files_in_a_dir) 62 | return test_files 63 | 64 | 65 | def load_all_testcases(dir_path) -> list: 66 | return parse_testcase_directory_recursively(dir_path) 67 | -------------------------------------------------------------------------------- /tests/coverage/nodes.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | from dataclasses import dataclass 3 | from typing import List 4 | 5 | 6 | @dataclass 7 | class CaseGroup: 8 | name: str 9 | description: str 10 | 11 | 12 | @dataclass 13 | class SubstraitError: 14 | error: str 15 | 16 | 17 | @dataclass 18 | class CaseLiteral: 19 | value: str | int | float | list | None 20 | type: str 21 | 22 | def get_base_type(self): 23 | type_str = self.type 24 | if "<" in type_str: 25 | type_str = type_str[: type_str.find("<")] 26 | if type_str.endswith("?"): 27 | return type_str[:-1] 28 | return type_str 29 | 30 | 31 | @dataclass 32 | class AggregateArgument: 33 | column_name: str 34 | column_type: str 35 | table_name: str 36 | scalar_value: CaseLiteral | None 37 | 38 | 39 | @dataclass 40 | class TestCase: 41 | func_name: str 42 | base_uri: str 43 | group: CaseGroup | None 44 | options: dict 45 | rows: List[List] | None 46 | args: List[CaseLiteral] | List[AggregateArgument] 47 | result: CaseLiteral | str | SubstraitError 48 | comment: str 49 | 50 | def get_return_type(self): 51 | if isinstance(self.result, CaseLiteral): 52 | return self.result.type 53 | return self.result 54 | 55 | def is_return_type_error(self): 56 | return isinstance(self.result, SubstraitError) 57 | 58 | def get_arg_types(self): 59 | return [arg.get_base_type() for arg in self.args] 60 | 61 | def get_signature(self): 62 | return f"{self.func_name}({', '.join([arg.type for arg in self.args])}) = {self.get_return_type()}" 63 | 64 | 65 | @dataclass 66 | class TestFile: 67 | path: str 68 | version: str 69 | include: str 70 | testcases: List[TestCase] 71 | -------------------------------------------------------------------------------- /tests/test_extensions.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | import json 3 | import os 4 | from dataclasses import asdict 5 | 6 | from tests.baseline import read_baseline_file, generate_baseline 7 | from tests.coverage.case_file_parser import load_all_testcases 8 | from tests.coverage.coverage import get_test_coverage 9 | from tests.coverage.extensions import build_type_to_short_type 10 | from tests.coverage.extensions import Extension 11 | 12 | 13 | # NOTE: this test is run as part of pre-commit hook 14 | def test_substrait_extension_coverage(): 15 | script_dir = os.path.dirname(os.path.abspath(__file__)) 16 | baseline = read_baseline_file(os.path.join(script_dir, "baseline.json")) 17 | extensions_path = os.path.join(script_dir, "../extensions") 18 | registry = Extension.read_substrait_extensions(extensions_path) 19 | 20 | test_case_dir = os.path.join(script_dir, "./cases") 21 | all_test_files = load_all_testcases(test_case_dir) 22 | coverage = get_test_coverage(all_test_files, registry) 23 | 24 | assert ( 25 | coverage.num_tests_with_no_matching_function == 0 26 | ), f"{coverage.num_tests_with_no_matching_function} tests with no matching function" 27 | 28 | actual_baseline = generate_baseline(registry, coverage) 29 | errors = actual_baseline.validate_against(baseline) 30 | assert not errors, ( 31 | "\n".join(errors) 32 | + f"The baseline file does not match the current test coverage. " 33 | f"Please update the file at tests/baseline.json to align with the current baseline" 34 | f"{json.dumps(asdict(actual_baseline), indent=2)}" 35 | ) 36 | 37 | if baseline != actual_baseline: 38 | print("\nBaseline has changed, updating tests/baseline.json") 39 | print(json.dumps(asdict(actual_baseline), indent=2)) 40 | 41 | 42 | def test_build_type_to_short_type(): 43 | long_to_short = build_type_to_short_type() 44 | assert long_to_short["i64"] == "i64" 45 | assert long_to_short["fp64"] == "fp64" 46 | assert long_to_short["timestamp"] == "ts" 47 | assert long_to_short["timestamp_tz"] == "tstz" 48 | assert long_to_short["precision_timestamp"] == "pts" 49 | assert long_to_short["precision_timestamp_tz"] == "ptstz" 50 | assert long_to_short["interval_year"] == "iyear" 51 | assert long_to_short["interval_day"] == "iday" 52 | assert long_to_short["decimal"] == "dec" 53 | assert long_to_short["boolean"] == "bool" 54 | assert long_to_short["string"] == "str" 55 | assert long_to_short["binary"] == "vbin" 56 | assert long_to_short["fixedbinary"] == "fbin" 57 | assert long_to_short["fixedchar"] == "fchar" 58 | assert long_to_short["varchar"] == "vchar" 59 | assert long_to_short["list"] == "list" 60 | assert long_to_short["map"] == "map" 61 | assert long_to_short["struct"] == "struct" 62 | --------------------------------------------------------------------------------