├── .editorconfig
├── .flake8
├── .gitattributes
├── .github
├── CODEOWNERS
├── dependabot.yml
├── pull_request_template.md
└── workflows
│ ├── licence_check.yml
│ ├── pr.yml
│ ├── pr_breaking.yml
│ ├── pr_title.yml
│ ├── release.yml
│ └── site.yml
├── .gitignore
├── .licenserc.yaml
├── .pre-commit-config.yaml
├── .python-version
├── .releaserc.json
├── .yamllint.yaml
├── CHANGELOG.md
├── CITATION.cff
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── buf.gen.yaml
├── buf.work.yaml
├── ci
└── release
│ ├── dry_run.sh
│ ├── prepare.sh
│ ├── publish.sh
│ ├── run.sh
│ └── verify.sh
├── core.go
├── core_test.go
├── extensions
├── extension_types.yaml
├── functions_aggregate_approx.yaml
├── functions_aggregate_decimal_output.yaml
├── functions_aggregate_generic.yaml
├── functions_arithmetic.yaml
├── functions_arithmetic_decimal.yaml
├── functions_boolean.yaml
├── functions_comparison.yaml
├── functions_datetime.yaml
├── functions_geometry.yaml
├── functions_logarithmic.yaml
├── functions_rounding.yaml
├── functions_rounding_decimal.yaml
├── functions_set.yaml
├── functions_string.yaml
├── type_variations.yaml
└── unknown.yaml
├── go.mod
├── go.sum
├── grammar
├── FuncTestCaseLexer.g4
├── FuncTestCaseParser.g4
├── Makefile
├── README.md
├── SubstraitLexer.g4
├── SubstraitType.g4
└── prepend_license.sh
├── proto
├── buf.lock
├── buf.yaml
└── substrait
│ ├── algebra.proto
│ ├── capabilities.proto
│ ├── extended_expression.proto
│ ├── extensions
│ └── extensions.proto
│ ├── function.proto
│ ├── parameterized_types.proto
│ ├── plan.proto
│ ├── type.proto
│ └── type_expressions.proto
├── pyproject.toml
├── requirements.txt
├── site
├── .gitignore
├── README.md
├── data
│ ├── committers.yaml
│ └── smc.yaml
├── docs
│ ├── _config
│ ├── about.md
│ ├── community
│ │ ├── index.md
│ │ └── powered_by.md
│ ├── expressions
│ │ ├── _config
│ │ ├── aggregate_functions.md
│ │ ├── dynamic_parameters.md
│ │ ├── embedded_functions.md
│ │ ├── extended_expression.md
│ │ ├── field_references.md
│ │ ├── scalar_functions.md
│ │ ├── specialized_record_expressions.md
│ │ ├── subqueries.md
│ │ ├── table_functions.md
│ │ ├── user_defined_functions.md
│ │ └── window_functions.md
│ ├── extensions
│ │ ├── .gitignore
│ │ ├── generate_function_docs.py
│ │ └── index.md
│ ├── faq.md
│ ├── governance.md
│ ├── img
│ │ └── logo.svg
│ ├── index.md
│ ├── relations
│ │ ├── _config
│ │ ├── basics.md
│ │ ├── common_fields.md
│ │ ├── embedded_relations.md
│ │ ├── logical_relations.md
│ │ ├── physical_relations.md
│ │ └── user_defined_relations.md
│ ├── serialization
│ │ ├── _config
│ │ ├── basics.md
│ │ ├── binary_serialization.md
│ │ └── text_serialization.md
│ ├── spec
│ │ ├── _config
│ │ ├── extending.md
│ │ ├── specification.md
│ │ ├── technology_principles.md
│ │ └── versioning.md
│ ├── stylesheets
│ │ └── extra.css
│ ├── tools
│ │ ├── _config
│ │ ├── producer_tools.md
│ │ ├── substrait_validator.md
│ │ └── third_party_tools.md
│ ├── tutorial
│ │ ├── examples.md
│ │ ├── expression_trees.svg
│ │ ├── field_indices_layout.svg
│ │ ├── final_plan.json
│ │ ├── plan_tree_versus_expression.svg
│ │ ├── sql_to_substrait.md
│ │ └── substrait_components.svg
│ └── types
│ │ ├── _config
│ │ ├── named_structs.md
│ │ ├── type_classes.md
│ │ ├── type_parsing.md
│ │ ├── type_system.md
│ │ └── type_variations.md
├── mkdocs.yml
├── overrides
│ └── partials
│ │ └── footer.html
└── requirements.txt
├── tests
├── README.md
├── __init__.py
├── baseline.json
├── baseline.py
├── cases
│ ├── aggregate_approx
│ │ └── approx_count_distinct.test
│ ├── aggregate_generic
│ │ └── count.test
│ ├── arithmetic
│ │ ├── abs.test
│ │ ├── acos.test
│ │ ├── acosh.test
│ │ ├── add.test
│ │ ├── asin.test
│ │ ├── asinh.test
│ │ ├── atan.test
│ │ ├── atan2.test
│ │ ├── atanh.test
│ │ ├── bitwise_and.test
│ │ ├── bitwise_not.test
│ │ ├── bitwise_or.test
│ │ ├── bitwise_xor.test
│ │ ├── cos.test
│ │ ├── cosh.test
│ │ ├── divide.test
│ │ ├── exp.test
│ │ ├── factorial.test
│ │ ├── max.test
│ │ ├── min.test
│ │ ├── modulus.test
│ │ ├── multiply.test
│ │ ├── negate.test
│ │ ├── power.test
│ │ ├── sin.test
│ │ ├── sinh.test
│ │ ├── sqrt.test
│ │ ├── subtract.test
│ │ ├── sum.test
│ │ ├── tan.test
│ │ └── tanh.test
│ ├── arithmetic_decimal
│ │ ├── bitwise_and.test
│ │ ├── bitwise_or.test
│ │ ├── bitwise_xor.test
│ │ ├── factorial_decimal.test
│ │ ├── max_decimal.test
│ │ ├── min_decimal.test
│ │ ├── power.test
│ │ ├── power_decimal.test
│ │ ├── sqrt_decimal.test
│ │ └── sum_decimal.test
│ ├── boolean
│ │ ├── and.test
│ │ ├── and_not.test
│ │ ├── bool_and.test
│ │ ├── bool_or.test
│ │ ├── not.test
│ │ ├── or.test
│ │ └── xor.test
│ ├── comparison
│ │ ├── between.test
│ │ ├── coalesce.test
│ │ ├── equal.test
│ │ ├── gt.test
│ │ ├── gte.test
│ │ ├── is_false.test
│ │ ├── is_finite.test
│ │ ├── is_infinite.test
│ │ ├── is_nan.test
│ │ ├── is_not_distinct_from.test
│ │ ├── is_not_false.test
│ │ ├── is_not_null.test
│ │ ├── is_not_true.test
│ │ ├── is_null.test
│ │ ├── is_true.test
│ │ ├── lt.test
│ │ ├── lte.test
│ │ ├── not_equal.test
│ │ └── nullif.test
│ ├── datetime
│ │ ├── add_datetime.test
│ │ ├── add_intervals.test
│ │ ├── extract.test
│ │ ├── gt_datetime.test
│ │ ├── gte_datetime.test
│ │ ├── lt_datetime.test
│ │ ├── lte_datetime.test
│ │ └── subtract_datetime.test
│ ├── logarithmic
│ │ ├── ln.test
│ │ ├── log10.test
│ │ ├── log2.test
│ │ └── logb.test
│ ├── rounding
│ │ ├── ceil.test
│ │ ├── floor.test
│ │ └── round.test
│ ├── rounding_decimal
│ │ ├── ceil.test
│ │ ├── floor.test
│ │ └── round.test
│ └── string
│ │ ├── bit_length.test
│ │ ├── char_length.test
│ │ ├── concat.test
│ │ ├── concat_ws.test
│ │ ├── contains.test
│ │ ├── ends_with.test
│ │ ├── left.test
│ │ ├── like.test
│ │ ├── lower.test
│ │ ├── lpad.test
│ │ ├── ltrim.test
│ │ ├── octet_length.test
│ │ ├── regexp_count_substring.test
│ │ ├── regexp_match_substring.test
│ │ ├── regexp_replace.test
│ │ ├── regexp_string_split.test
│ │ ├── repeat.test
│ │ ├── replace.test
│ │ ├── reverse.test
│ │ ├── right.test
│ │ ├── rpad.test
│ │ ├── rtrim.test
│ │ ├── starts_with.test
│ │ ├── string_split.test
│ │ ├── substring.test
│ │ ├── trim.test
│ │ └── upper.test
├── coverage
│ ├── __init__.py
│ ├── antlr_parser
│ │ ├── FuncTestCaseLexer.py
│ │ ├── FuncTestCaseParser.py
│ │ ├── FuncTestCaseParserListener.py
│ │ └── FuncTestCaseParserVisitor.py
│ ├── case_file_parser.py
│ ├── coverage.py
│ ├── extensions.py
│ ├── nodes.py
│ ├── test_coverage.py
│ └── visitor.py
├── test_extensions.py
└── type
│ └── antlr_parser
│ ├── SubstraitLexer.py
│ ├── SubstraitTypeLexer.py
│ ├── SubstraitTypeListener.py
│ ├── SubstraitTypeParser.py
│ └── SubstraitTypeVisitor.py
├── text
└── simple_extensions_schema.yaml
└── tools
└── proto_prefix.py
/.editorconfig:
--------------------------------------------------------------------------------
1 | root = true
2 |
3 | [*]
4 | charset = utf-8
5 | end_of_line = lf
6 | insert_final_newline = true
7 | indent_style = space
8 | trim_trailing_whitespace = true
9 |
10 | [site/**]
11 | charset = unset
12 | end_of_line = unset
13 | insert_final_newline = unset
14 | indent_style = unset
15 | trim_trailing_whitespace = unset
16 |
17 | [*.{proto,yaml,yml}]
18 | indent_size = 2
19 |
--------------------------------------------------------------------------------
/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | ignore = E203, E266, E501, W503, F403, F401
3 | max-line-length = 88
4 | select = B,C,E,F,W,T4,B9
5 | exclude =
6 | # exclude generated test parser
7 | tests/coverage/antlr_parser/*.py,
8 | # exclude generated type parser
9 | tests/type/antlr_parser/*.py
--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | proto/buf.lock linguist-generated=true
2 |
--------------------------------------------------------------------------------
/.github/CODEOWNERS:
--------------------------------------------------------------------------------
1 | # SPDX-License-Identifier: Apache-2.0
2 |
3 | * @jacques-n @cpcloud @westonpace @epsilonprime @vbarua
4 |
--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 | updates:
3 | - package-ecosystem: "github-actions"
4 | directory: "/"
5 | schedule:
6 | interval: "daily"
7 |
8 | - package-ecosystem: "pip"
9 | directory: "/site"
10 | schedule:
11 | interval: "daily"
12 |
--------------------------------------------------------------------------------
/.github/pull_request_template.md:
--------------------------------------------------------------------------------
1 | Thank you for submitting a PR!
2 |
3 | Before you continue, please ensure that your PR title and description (this message!) follow [conventional commit syntax](1). Substrait uses an automated release process that, among other things, uses PR titles & descriptions to build a changelog, so the syntax and format matter!
4 |
5 | The title of the PR should be a valid commit header.
6 |
7 | Some examples of proper commit message headers and PR titles:
8 |
9 | - `feat: add feature X`
10 | - `fix: X in case of Y`
11 | - `docs: improve documentation for X`
12 |
13 | Note the case and grammar conventions.
14 |
15 | Furthermore, the description of any PR that includes a breaking change should contain a paragraph that starts with `BREAKING CHANGE: ...`, where `...` explains what changed. The automated release process uses this to determine how it should bump the version number. Anything that changes the behavior of a plan that was previously legal is considered a breaking change; note that this includes behavior specifications that only exist in Substrait in the form of behavior descriptions on the website or in comments.
16 |
17 | [1]: https://www.conventionalcommits.org/en/v1.0.0/
18 |
--------------------------------------------------------------------------------
/.github/workflows/licence_check.yml:
--------------------------------------------------------------------------------
1 | name: License check
2 |
3 | on: pull_request
4 |
5 | jobs:
6 | license:
7 | runs-on: ubuntu-latest
8 | steps:
9 | - uses: actions/checkout@v4
10 |
11 | - name: Check License Header
12 | uses: enarx/spdx@master
13 | with:
14 | licenses: |-
15 | Apache-2.0
16 | MIT
17 |
--------------------------------------------------------------------------------
/.github/workflows/pr.yml:
--------------------------------------------------------------------------------
1 | name: PR Build Check
2 |
3 | on:
4 | pull_request:
5 | jobs:
6 | site:
7 | name: Build Website
8 | runs-on: ubuntu-latest
9 | steps:
10 | - uses: actions/checkout@v4
11 | - uses: actions/setup-python@v5
12 | with:
13 | python-version: '3.13'
14 | cache: 'pip'
15 | cache-dependency-path: ./site/requirements.txt
16 | - run: pip install -r ./site/requirements.txt
17 | - name: Generate Static Site
18 | run: mkdocs build
19 | working-directory: ./site
20 | editorconfig-checker:
21 | name: Check editorconfig
22 | runs-on: ubuntu-latest
23 | steps:
24 | - uses: editorconfig-checker/action-editorconfig-checker@v2
25 | proto-format-check:
26 | name: Check Protobuf Style
27 | runs-on: ubuntu-latest
28 | steps:
29 | - uses: actions/checkout@v4
30 | - uses: bufbuild/buf-setup-action@v1.50.0
31 | with:
32 | github_token: ${{ github.token }}
33 | - run: buf format --diff --exit-code
34 | proto:
35 | name: Check Protobuf
36 | runs-on: ubuntu-latest
37 | steps:
38 | - uses: actions/checkout@v4
39 | - uses: bufbuild/buf-setup-action@v1.50.0
40 | with:
41 | github_token: ${{ github.token }}
42 | - uses: bufbuild/buf-lint-action@v1
43 | - name: Compile protobuf
44 | run: buf generate
45 | yamllint:
46 | name: Lint YAML extensions
47 | runs-on: ubuntu-latest
48 | steps:
49 | - uses: actions/checkout@v4
50 | - name: Run yamllint
51 | run: yamllint .
52 | yamlvalidate:
53 | name: Validate YAML extensions
54 | runs-on: ubuntu-latest
55 | steps:
56 | - uses: actions/checkout@v4
57 | - uses: actions/setup-node@v4
58 | with:
59 | node-version: "20"
60 | - run: npm install -g ajv-cli
61 | - run: |
62 | set -euo pipefail
63 | for i in $(ls);
64 | do
65 | ajv validate -s ../text/simple_extensions_schema.yaml --strict=true --spec=draft2020 -d "$i"
66 | done
67 | working-directory: ./extensions
68 | dry_run_release:
69 | name: Dry-run release
70 | runs-on: ubuntu-latest
71 | steps:
72 | - uses: actions/checkout@v4
73 | with:
74 | fetch-depth: 0
75 | - uses: bufbuild/buf-setup-action@v1.50.0
76 | - uses: actions/setup-node@v4
77 | with:
78 | node-version: "20"
79 | - run: ./ci/release/dry_run.sh
80 | python-style:
81 | name: Style-check and lint Python files, and run tests
82 | runs-on: ubuntu-latest
83 | steps:
84 | - uses: actions/checkout@v4
85 | - name: Install dependencies
86 | run: python3 -m pip install -r requirements.txt
87 | - name: Black
88 | run: python3 -m black --diff --check .
89 | - name: Flake8
90 | run: python3 -m flake8 .
91 | - name: Run tests including test_substrait_extension_coverage
92 | run: |
93 | pytest
94 | check-proto-prefix:
95 | name: Check proto-prefix.py
96 | runs-on: ubuntu-latest
97 | steps:
98 | - uses: actions/checkout@v4
99 | - uses: bufbuild/buf-setup-action@v1.50.0
100 | - name: Run proto-prefix.py
101 | run: tools/proto_prefix.py output test proto go_package=github.com/test/proto
102 | - name: Modify buf config to build rewritten proto files
103 | run: |
104 | echo "version: v1" > buf.work.yaml
105 | echo "directories:" >> buf.work.yaml
106 | echo " - output" >> buf.work.yaml
107 | - name: Compile rewritten proto files
108 | run: buf generate
109 |
--------------------------------------------------------------------------------
/.github/workflows/pr_breaking.yml:
--------------------------------------------------------------------------------
1 | name: Breaking Changes Check
2 |
3 | on:
4 | pull_request:
5 | types: [opened, edited, synchronize, reopened]
6 | jobs:
7 | breaking:
8 | name: Ensure breaking changes are labeled in description
9 | runs-on: ubuntu-latest
10 | steps:
11 | - uses: actions/checkout@v4
12 | - uses: bufbuild/buf-setup-action@v1.50.0
13 | with:
14 | github_token: ${{ github.token }}
15 | - name: check for breaking changes
16 | id: check-breaking
17 | run: |
18 |
19 | if ! buf breaking --against 'https://github.com/substrait-io/substrait.git#branch=main'; then
20 | breaking="true"
21 | else
22 | breaking="false"
23 | fi
24 |
25 | echo "breaking=${breaking}" >> $GITHUB_OUTPUT
26 | - name: check whether the PR description includes a breaking change footer
27 | if: ${{ fromJson(steps.check-breaking.outputs.breaking) }}
28 | run: |
29 | # check PR description for a BREAKING CHANGE section if any breaking changes occurred
30 | grep '^BREAKING CHANGE: ' <<< $COMMIT_DESC
31 | env:
32 | COMMIT_DESC: ${{ github.event.pull_request.body }}
33 |
--------------------------------------------------------------------------------
/.github/workflows/pr_title.yml:
--------------------------------------------------------------------------------
1 | name: PR Title Check
2 |
3 | on:
4 | pull_request_target:
5 | types: [opened, edited, synchronize, reopened]
6 | jobs:
7 | commitlint:
8 | name: PR title / description conforms to semantic-release
9 | runs-on: ubuntu-latest
10 | steps:
11 | - uses: actions/setup-node@v4
12 | with:
13 | node-version: "20"
14 | - run: npm install @commitlint/config-conventional
15 | - run: >
16 | echo 'module.exports = {
17 | // Workaround for https://github.com/dependabot/dependabot-core/issues/5923
18 | "ignores": [(message) => /^Bumps \[.+]\(.+\) from .+ to .+\.$/m.test(message)],
19 | "rules": {
20 | "body-max-line-length": [0, "always", Infinity],
21 | "footer-max-line-length": [0, "always", Infinity],
22 | "body-leading-blank": [0, "always"]
23 | }
24 | }' > .commitlintrc.js
25 | - run: npx commitlint --extends @commitlint/config-conventional --verbose <<< $COMMIT_MSG
26 | env:
27 | COMMIT_MSG: >
28 | ${{ github.event.pull_request.title }}
29 |
30 | ${{ github.event.pull_request.body }}
31 | - if: failure()
32 | uses: actions/github-script@v7
33 | with:
34 | script: |
35 | const message = `**ACTION NEEDED**
36 |
37 | Substrait follows the [Conventional Commits
38 | specification](https://www.conventionalcommits.org/en/v1.0.0/) for
39 | release automation.
40 |
41 | The PR title and description are used as the merge commit message.\
42 | Please update your PR title and description to match the specification.
43 | `
44 | // Get list of current comments
45 | const comments = await github.paginate(github.rest.issues.listComments, {
46 | owner: context.repo.owner,
47 | repo: context.repo.repo,
48 | issue_number: context.issue.number
49 | });
50 | // Check if this job already commented
51 | for (const comment of comments) {
52 | if (comment.body === message) {
53 | return // Already commented
54 | }
55 | }
56 | // Post the comment about Conventional Commits
57 | github.rest.issues.createComment({
58 | owner: context.repo.owner,
59 | repo: context.repo.repo,
60 | issue_number: context.issue.number,
61 | body: message
62 | })
63 | core.setFailed(message)
64 |
--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
1 | name: Release
2 |
3 | on:
4 | schedule:
5 | # 2 AM on Sunday
6 | - cron: "0 2 * * 0"
7 | workflow_dispatch:
8 |
9 | # we do not want more than one release workflow executing at the same time, ever
10 | concurrency:
11 | group: release
12 | # cancelling in the middle of a release would create incomplete releases
13 | # so cancel-in-progress is false
14 | cancel-in-progress: false
15 |
16 | jobs:
17 | release:
18 | runs-on: ubuntu-latest
19 | if: github.repository == 'substrait-io/substrait'
20 | steps:
21 | - uses: tibdex/github-app-token@v2
22 | id: generate-token
23 | with:
24 | app_id: ${{ secrets.APP_ID }}
25 | private_key: ${{ secrets.APP_PRIVATE_KEY }}
26 |
27 | - uses: actions/checkout@v4
28 | with:
29 | fetch-depth: 0
30 | token: ${{ steps.generate-token.outputs.token }}
31 |
32 | - uses: actions/setup-node@v4
33 | with:
34 | node-version: "20"
35 |
36 | - uses: bufbuild/buf-setup-action@v1.50.0
37 | with:
38 | github_token: ${{ github.token }}
39 |
40 | - name: run semantic-release
41 | run: ./ci/release/run.sh
42 | env:
43 | BUF_TOKEN: ${{ secrets.BUF_TOKEN }}
44 | GITHUB_TOKEN: ${{ steps.generate-token.outputs.token }}
45 |
--------------------------------------------------------------------------------
/.github/workflows/site.yml:
--------------------------------------------------------------------------------
1 | name: Site
2 |
3 | on:
4 | push:
5 | branches: [main]
6 | paths:
7 | - "site/**"
8 | - "extensions/**"
9 |
10 | jobs:
11 | site:
12 | name: Build & Deploy Website
13 | runs-on: ubuntu-latest
14 | if: ${{ github.repository == 'substrait-io/substrait' }}
15 | steps:
16 | - uses: actions/checkout@v4
17 | - uses: actions/setup-python@v5
18 | with:
19 | python-version: '3.13'
20 | cache: 'pip'
21 | cache-dependency-path: ./site/requirements.txt
22 | - run: pip install -r ./site/requirements.txt
23 | - name: Generate Static Site
24 | run: mkdocs build
25 | working-directory: ./site
26 | - name: Deploy Static Site to GitHub
27 | uses: peaceiris/actions-gh-pages@v4
28 | with:
29 | external_repository: substrait-io/substrait.io
30 | publish_branch: main
31 | deploy_key: ${{ secrets.SUBSTRAIT_SITE_DEPLOY_KEY }}
32 | publish_dir: ./site/site
33 | cname: substrait.io
34 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | **/target
2 | **/.gradle
3 | **/.idea
4 | **/build
5 | gen
6 |
--------------------------------------------------------------------------------
/.licenserc.yaml:
--------------------------------------------------------------------------------
1 | header:
2 | license:
3 | spdx-id: Apache-2.0
4 |
5 | paths:
6 | - 'proto/substrait/**'
7 |
8 | comment: never
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | repos:
2 | - repo: https://github.com/nametake/pre-commit-buf
3 | rev: v2.0.0
4 | hooks:
5 | - id: buf-lint
6 | - repo: https://github.com/adrienverge/yamllint.git
7 | rev: v1.35.1
8 | hooks:
9 | - id: yamllint
10 | args: [-c=.yamllint.yaml]
11 | - repo: https://github.com/alessandrojcm/commitlint-pre-commit-hook
12 | rev: v9.20.0
13 | hooks:
14 | - id: commitlint
15 | stages: [commit-msg]
16 | - repo: https://github.com/psf/black
17 | rev: 24.8.0
18 | hooks:
19 | - id: black
20 | - repo: https://github.com/pycqa/flake8
21 | rev: 7.0.0
22 | hooks:
23 | - id: flake8
24 | - repo: local
25 | hooks:
26 | - id: check-substrait-extensions_coverage
27 | name: Check Substrait extensions and test coverage
28 | entry: pytest tests/test_extensions.py::test_substrait_extension_coverage
29 | language: python
30 | pass_filenames: false
31 |
--------------------------------------------------------------------------------
/.python-version:
--------------------------------------------------------------------------------
1 | 3.13
--------------------------------------------------------------------------------
/.releaserc.json:
--------------------------------------------------------------------------------
1 | {
2 | "branches": ["main"],
3 | "preset": "conventionalcommits",
4 | "plugins": [
5 | [
6 | "@semantic-release/commit-analyzer",
7 | {
8 | "releaseRules": [
9 | {"breaking": true, "release": "minor"}
10 | ]
11 | }
12 | ],
13 | "@semantic-release/release-notes-generator",
14 | [
15 | "@semantic-release/changelog",
16 | {
17 | "changelogTitle": "Release Notes\n---",
18 | "changelogFile": "CHANGELOG.md"
19 | }
20 | ],
21 | [
22 | "@semantic-release/exec",
23 | {
24 | "verifyConditionsCmd": "ci/release/verify.sh",
25 | "prepareCmd": "ci/release/prepare.sh",
26 | "publishCmd": "ci/release/publish.sh ${nextRelease.version}"
27 | }
28 | ],
29 | [
30 | "@semantic-release/github",
31 | {
32 | "successComment": false
33 | }
34 | ],
35 | [
36 | "@semantic-release/git",
37 | {
38 | "assets": ["CHANGELOG.md"],
39 | "message": "chore(release): ${nextRelease.version}"
40 | }
41 | ]
42 | ]
43 | }
44 |
--------------------------------------------------------------------------------
/.yamllint.yaml:
--------------------------------------------------------------------------------
1 | rules:
2 | line-length:
3 | max: 120
4 | brackets:
5 | forbid: false
6 | min-spaces-inside: 0
7 | max-spaces-inside: 1
8 | min-spaces-inside-empty: 0
9 | max-spaces-inside-empty: 0
10 |
--------------------------------------------------------------------------------
/CITATION.cff:
--------------------------------------------------------------------------------
1 | cff-version: 1.2.0
2 | title: >-
3 | Substrait: Cross-Language Serialization for Relational
4 | Algebra
5 | message: >-
6 | If you use this software, please cite it using the
7 | metadata from this file.
8 | type: software
9 | authors:
10 | - given-names: substrait-io
11 | identifiers:
12 | - type: url
13 | value: 'https://github.com/substrait-io/substrait'
14 | repository-code: 'https://github.com/substrait-io/substrait'
15 | url: 'https://substrait.io/'
16 | license: Apache-2.0
17 | date-released: '2021-09-01'
18 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Contributing to Substrait
2 |
3 | Welcome!
4 |
5 | ## Dependencies
6 |
7 | There's no formal set of dependencies for Substrait, but here are some that are useful to have:
8 |
9 | * [`buf`](https://docs.buf.build/installation) for easy generation of proto serialization/deserialization code
10 | * [`protoc`](https://grpc.io/docs/protoc-installation/), used by `buf` and usable independent of `buf`
11 | * A Python environment with [the website's `requirements.txt`](https://github.com/substrait-io/substrait/blob/main/site/requirements.txt) dependencies installed if you want to see changes to the website locally
12 |
13 | ## Commit Conventions
14 |
15 | Substrait follows [conventional commits](https://www.conventionalcommits.org/en/v1.0.0/) for commit message structure. You can use [`pre-commit`](https://pre-commit.com/) to check your messages for you, but note that you must install pre-commit using `pre-commit install --hook-type commit-msg` for this to work. CI will also lint your commit messages. Please also ensure that your PR title and initial comment together form a valid commit message; that will save us some work formatting the merge commit message when we merge your PR.
16 |
17 | Examples of commit messages can be seen [here](https://www.conventionalcommits.org/en/v1.0.0/#examples).
18 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Substrait
2 |
3 | Substrait is a new project focused on producing an independent description of data compute operations. It is composed primarily of:
4 |
5 | 1. A formal specification
6 | 2. A human readable text representation
7 | 3. A compact cross-language binary representation
8 |
9 | For more details, please go to [substrait.io](https://substrait.io)
10 |
11 |
--------------------------------------------------------------------------------
/buf.gen.yaml:
--------------------------------------------------------------------------------
1 | version: v1
2 | plugins:
3 | - plugin: buf.build/protocolbuffers/cpp:v23.0
4 | out: gen/proto/cpp
5 | - plugin: buf.build/protocolbuffers/csharp:v23.0
6 | out: gen/proto/csharp
7 | - plugin: buf.build/protocolbuffers/java:v23.0
8 | out: gen/proto/java
9 | - plugin: buf.build/protocolbuffers/python:v23.0
10 | out: gen/proto/python
11 | - plugin: buf.build/protocolbuffers/go:v1.30.0
12 | out: gen/proto/go
13 | opt:
14 | - paths=source_relative
15 |
--------------------------------------------------------------------------------
/buf.work.yaml:
--------------------------------------------------------------------------------
1 | version: v1
2 | directories:
3 | - proto
4 |
--------------------------------------------------------------------------------
/ci/release/dry_run.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | # shellcheck shell=bash
3 | # SPDX-License-Identifier: Apache-2.0
4 |
5 | set -euo pipefail
6 |
7 | curdir="$PWD"
8 | worktree="$(mktemp -d)"
9 | branch="$(basename "$worktree")"
10 |
11 | git worktree add "$worktree"
12 |
13 | function cleanup() {
14 | cd "$curdir" || exit 1
15 | git worktree remove "$worktree"
16 | git worktree prune
17 | git branch -D "$branch"
18 | }
19 |
20 | trap cleanup EXIT ERR
21 |
22 | cd "$worktree" || exit 1
23 |
24 | export GITHUB_REF="$branch"
25 |
26 | npx --yes \
27 | -p "semantic-release@24.1.2" \
28 | -p "@semantic-release/commit-analyzer" \
29 | -p "@semantic-release/release-notes-generator" \
30 | -p "@semantic-release/changelog" \
31 | -p "@semantic-release/exec" \
32 | -p "@semantic-release/git" \
33 | -p "conventional-changelog-conventionalcommits@8.0.0" \
34 | semantic-release \
35 | --ci false \
36 | --dry-run \
37 | --preset conventionalcommits \
38 | --plugins \
39 | --analyze-commits "@semantic-release/commit-analyzer" \
40 | --generate-notes "@semantic-release/release-notes-generator" \
41 | --verify-conditions "@semantic-release/changelog,@semantic-release/exec,@semantic-release/git" \
42 | --prepare "@semantic-release/changelog,@semantic-release/exec" \
43 | --branches "$branch" \
44 | --repository-url "file://$PWD"
45 |
--------------------------------------------------------------------------------
/ci/release/prepare.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | # shellcheck shell=bash
3 | # SPDX-License-Identifier: Apache-2.0
4 |
5 | set -euo pipefail
6 |
7 | # build artifacts
8 | buf build
9 | buf generate
10 |
--------------------------------------------------------------------------------
/ci/release/publish.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | # shellcheck shell=bash
3 | # SPDX-License-Identifier: Apache-2.0
4 |
5 | set -euo pipefail
6 |
7 | cd "$(git rev-parse --show-toplevel)"/proto || exit 1
8 |
9 | buf push --tag "v${1}" --tag "$(git rev-parse HEAD)"
10 |
--------------------------------------------------------------------------------
/ci/release/run.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | # shellcheck shell=bash
3 | # SPDX-License-Identifier: Apache-2.0
4 |
5 | set -euo pipefail
6 |
7 | npx --yes \
8 | -p "semantic-release@24.1.2" \
9 | -p "@semantic-release/commit-analyzer" \
10 | -p "@semantic-release/release-notes-generator" \
11 | -p "@semantic-release/changelog" \
12 | -p "@semantic-release/github" \
13 | -p "@semantic-release/exec" \
14 | -p "@semantic-release/git" \
15 | -p "conventional-changelog-conventionalcommits@8.0.0" \
16 | semantic-release --ci
17 |
--------------------------------------------------------------------------------
/ci/release/verify.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | # shellcheck shell=bash
3 | # SPDX-License-Identifier: Apache-2.0
4 |
5 | set -euo pipefail
6 |
7 | buf lint
8 |
--------------------------------------------------------------------------------
/core.go:
--------------------------------------------------------------------------------
1 | // Package substrait provides access to Substrait artifacts via embed.FS.
2 | // Use substrait.GetSubstraitFS() to retrieve the embed.FS object.
3 | package substrait
4 |
5 | import "embed"
6 |
7 | //go:embed extensions/*
8 | var substraitExtensionsFS embed.FS
9 |
10 | func GetSubstraitFS() embed.FS {
11 | return substraitExtensionsFS
12 | }
13 |
14 | func GetSubstraitExtensionsFS() embed.FS {
15 | return substraitExtensionsFS
16 | }
17 |
18 | //go:embed tests/cases/*/*.test
19 | var substraitTestsFS embed.FS
20 |
21 | func GetSubstraitTestsFS() embed.FS {
22 | return substraitTestsFS
23 | }
24 |
--------------------------------------------------------------------------------
/core_test.go:
--------------------------------------------------------------------------------
1 | package substrait
2 |
3 | import (
4 | "embed"
5 | "io/fs"
6 | "testing"
7 |
8 | "github.com/stretchr/testify/assert"
9 | "github.com/stretchr/testify/require"
10 | )
11 |
12 | func TestGetSubstraitExtensionsFS(t *testing.T) {
13 | fsArr := []embed.FS{GetSubstraitExtensionsFS(), GetSubstraitFS()}
14 | for _, got := range fsArr {
15 | filePaths, err := ListFiles(got, ".")
16 | require.NoError(t, err)
17 | assert.Greater(t, len(filePaths), 15)
18 | assert.Contains(t, filePaths, "extensions/functions_arithmetic.yaml")
19 | assert.Contains(t, filePaths, "extensions/functions_arithmetic_decimal.yaml")
20 | assert.Contains(t, filePaths, "extensions/functions_datetime.yaml")
21 | }
22 | }
23 |
24 | func TestGetSubstraitTestsFS(t *testing.T) {
25 | got := GetSubstraitTestsFS()
26 | filePaths, err := ListFiles(got, ".")
27 | require.NoError(t, err)
28 | assert.Greater(t, len(filePaths), 3)
29 | assert.Contains(t, filePaths, "tests/cases/arithmetic/add.test")
30 | assert.Contains(t, filePaths, "tests/cases/arithmetic/max.test")
31 | assert.Contains(t, filePaths, "tests/cases/arithmetic_decimal/power.test")
32 | assert.Contains(t, filePaths, "tests/cases/datetime/lt_datetime.test")
33 | }
34 |
35 | func ListFiles(embedFs embed.FS, root string) ([]string, error) {
36 | var files []string
37 | err := fs.WalkDir(embedFs, root, func(path string, d fs.DirEntry, err error) error {
38 | if err != nil {
39 | return err
40 | }
41 | if !d.IsDir() {
42 | files = append(files, path)
43 | }
44 | return nil
45 | })
46 | return files, err
47 | }
48 |
--------------------------------------------------------------------------------
/extensions/extension_types.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | types:
3 | - name: point
4 | structure:
5 | latitude: i32
6 | longitude: i32
7 | - name: line
8 | structure:
9 | start: point
10 | end: point
11 |
--------------------------------------------------------------------------------
/extensions/functions_aggregate_approx.yaml:
--------------------------------------------------------------------------------
1 | %YAML 1.2
2 | ---
3 | aggregate_functions:
4 | - name: "approx_count_distinct"
5 | description: >-
6 | Calculates the approximate number of rows that contain distinct values of the expression argument using
7 | HyperLogLog. This function provides an alternative to the COUNT (DISTINCT expression) function, which
8 | returns the exact number of rows that contain distinct values of an expression. APPROX_COUNT_DISTINCT
9 | processes large amounts of data significantly faster than COUNT, with negligible deviation from the exact
10 | result.
11 | impls:
12 | - args:
13 | - name: x
14 | value: any
15 | nullability: DECLARED_OUTPUT
16 | decomposable: MANY
17 | intermediate: binary
18 | return: i64
19 |
--------------------------------------------------------------------------------
/extensions/functions_aggregate_decimal_output.yaml:
--------------------------------------------------------------------------------
1 | %YAML 1.2
2 | ---
3 | aggregate_functions:
4 | - name: "count"
5 | description: Count a set of values. Result is returned as a decimal instead of i64.
6 | impls:
7 | - args:
8 | - name: x
9 | value: any
10 | options:
11 | overflow:
12 | values: [SILENT, SATURATE, ERROR]
13 | nullability: DECLARED_OUTPUT
14 | decomposable: MANY
15 | intermediate: decimal<38,0>
16 | return: decimal<38,0>
17 | - name: "count"
18 | description: "Count a set of records (not field referenced). Result is returned as a decimal instead of i64."
19 | impls:
20 | - options:
21 | overflow:
22 | values: [SILENT, SATURATE, ERROR]
23 | nullability: DECLARED_OUTPUT
24 | decomposable: MANY
25 | intermediate: decimal<38,0>
26 | return: decimal<38,0>
27 | - name: "approx_count_distinct"
28 | description: >-
29 | Calculates the approximate number of rows that contain distinct values of the expression argument using
30 | HyperLogLog. This function provides an alternative to the COUNT (DISTINCT expression) function, which
31 | returns the exact number of rows that contain distinct values of an expression. APPROX_COUNT_DISTINCT
32 | processes large amounts of data significantly faster than COUNT, with negligible deviation from the exact
33 | result. Result is returned as a decimal instead of i64.
34 | impls:
35 | - args:
36 | - name: x
37 | value: any
38 | nullability: DECLARED_OUTPUT
39 | decomposable: MANY
40 | intermediate: binary
41 | return: decimal<38,0>
42 |
--------------------------------------------------------------------------------
/extensions/functions_aggregate_generic.yaml:
--------------------------------------------------------------------------------
1 | %YAML 1.2
2 | ---
3 | aggregate_functions:
4 | - name: "count"
5 | description: Count a set of values
6 | impls:
7 | - args:
8 | - name: x
9 | value: any
10 | options:
11 | overflow:
12 | values: [SILENT, SATURATE, ERROR]
13 | nullability: DECLARED_OUTPUT
14 | decomposable: MANY
15 | intermediate: i64
16 | return: i64
17 | - name: "count"
18 | description: "Count a set of records (not field referenced)"
19 | impls:
20 | - options:
21 | overflow:
22 | values: [SILENT, SATURATE, ERROR]
23 | nullability: DECLARED_OUTPUT
24 | decomposable: MANY
25 | intermediate: i64
26 | return: i64
27 | - name: "any_value"
28 | description: >
29 | Selects an arbitrary value from a group of values.
30 |
31 | If the input is empty, the function returns null.
32 | impls:
33 | - args:
34 | - name: x
35 | value: any1
36 | options:
37 | ignore_nulls:
38 | values: [ "TRUE", "FALSE" ]
39 | nullability: DECLARED_OUTPUT
40 | decomposable: MANY
41 | intermediate: any1?
42 | return: any1?
43 |
--------------------------------------------------------------------------------
/extensions/functions_rounding_decimal.yaml:
--------------------------------------------------------------------------------
1 | %YAML 1.2
2 | ---
3 | scalar_functions:
4 | -
5 | name: "ceil"
6 | description: >
7 | Rounding to the ceiling of the value `x`.
8 | impls:
9 | - args:
10 | - value: decimal
11 | name: x
12 | return: |-
13 | integral_least_num_digits = P - S + 1
14 | precision = min(integral_least_num_digits, 38)
15 | decimal?
16 | -
17 | name: "floor"
18 | description: >
19 | Rounding to the floor of the value `x`.
20 | impls:
21 | - args:
22 | - value: decimal
23 | name: x
24 | return: |-
25 | integral_least_num_digits = P - S + 1
26 | precision = min(integral_least_num_digits, 38)
27 | decimal?
28 | -
29 | name: "round"
30 | description: >
31 | Rounding the value `x` to `s` decimal places.
32 | impls:
33 | - args:
34 | - value: decimal
35 | name: x
36 | description: >
37 | Numerical expression to be rounded.
38 | - value: i32
39 | name: s
40 | description: >
41 | Number of decimal places to be rounded to.
42 |
43 | When `s` is a positive number, the rounding
44 | is performed to a `s` number of decimal places.
45 |
46 | When `s` is a negative number, the rounding is
47 | performed to the left side of the decimal point
48 | as specified by `s`.
49 |
50 | The precision of the resultant decimal type is one
51 | more than the precision of the input decimal type to
52 | allow for numbers that round up or down to the next
53 | decimal magnitude.
54 | E.g. `round(9.9, 0)` -> `10.0`.
55 | The scale of the resultant decimal type cannot be
56 | larger than the scale of the input decimal type.
57 | options:
58 | rounding:
59 | description: >
60 | When a boundary is computed to lie somewhere between two values,
61 | and this value cannot be exactly represented, this specifies how
62 | to round it.
63 |
64 | - TIE_TO_EVEN: round to nearest value; if exactly halfway, tie
65 | to the even option.
66 | - TIE_AWAY_FROM_ZERO: round to nearest value; if exactly
67 | halfway, tie away from zero.
68 | - TRUNCATE: always round toward zero.
69 | - CEILING: always round toward positive infinity.
70 | - FLOOR: always round toward negative infinity.
71 | - AWAY_FROM_ZERO: round negative values with FLOOR rule, round positive values with CEILING rule
72 | - TIE_DOWN: round ties with FLOOR rule
73 | - TIE_UP: round ties with CEILING rule
74 | - TIE_TOWARDS_ZERO: round ties with TRUNCATE rule
75 | - TIE_TO_ODD: round to nearest value; if exactly halfway, tie
76 | to the odd option.
77 | values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR,
78 | AWAY_FROM_ZERO, TIE_DOWN, TIE_UP, TIE_TOWARDS_ZERO, TIE_TO_ODD ]
79 | nullability: DECLARED_OUTPUT
80 | return: |-
81 | precision = min(P + 1, 38)
82 | decimal?
83 |
--------------------------------------------------------------------------------
/extensions/functions_set.yaml:
--------------------------------------------------------------------------------
1 | %YAML 1.2
2 | ---
3 | scalar_functions:
4 | -
5 | name: "index_in"
6 | description: >
7 | Checks the membership of a value in a list of values
8 |
9 | Returns the first 0-based index value of some input `needle` if `needle` is equal to
10 | any element in `haystack`. Returns `NULL` if not found.
11 |
12 | If `needle` is `NULL`, returns `NULL`.
13 |
14 | If `needle` is `NaN`:
15 | - Returns 0-based index of `NaN` in `input` (default)
16 | - Returns `NULL` (if `NAN_IS_NOT_NAN` is specified)
17 | impls:
18 | - args:
19 | - name: needle
20 | value: any1
21 | - name: haystack
22 | value: list
23 | options:
24 | nan_equality:
25 | values: [ NAN_IS_NAN, NAN_IS_NOT_NAN ]
26 | nullability: DECLARED_OUTPUT
27 | return: i64?
28 |
--------------------------------------------------------------------------------
/extensions/type_variations.yaml:
--------------------------------------------------------------------------------
1 | %YAML 1.2
2 | ---
3 | type_variations:
4 | - parent: string
5 | name: dict4
6 | description: a four-byte dictionary encoded string
7 | functions: INHERITS
8 | - parent: string
9 | name: bigoffset
10 | description: >-
11 | The arrow large string representation of strings, still restricted to the default string size defined in
12 | Substrait.
13 | functions: SEPARATE
14 | - parent: struct
15 | name: avro
16 | description: an avro encoded struct
17 | functions: SEPARATE
18 | - parent: struct
19 | name: cstruct
20 | description: a cstruct representation of the struct
21 | functions: SEPARATE
22 | - parent: struct
23 | name: dict2
24 | description: a 2-byte dictionary encoded string.
25 | functions: INHERITS
26 |
--------------------------------------------------------------------------------
/extensions/unknown.yaml:
--------------------------------------------------------------------------------
1 | %YAML 1.2
2 | ---
3 | types:
4 | - name: unknown
5 | scalar_functions:
6 | - name: "add"
7 | impls:
8 | - args:
9 | - value: unknown
10 | - value: unknown
11 | return: unknown
12 | - name: "subtract"
13 | impls:
14 | - args:
15 | - value: unknown
16 | - value: unknown
17 | return: unknown
18 | - name: "multiply"
19 | impls:
20 | - args:
21 | - value: unknown
22 | - value: unknown
23 | return: unknown
24 | - name: "divide"
25 | impls:
26 | - args:
27 | - value: unknown
28 | - value: unknown
29 | return: unknown
30 | - name: "modulus"
31 | impls:
32 | - args:
33 | - value: unknown
34 | - value: unknown
35 | return: unknown
36 | aggregate_functions:
37 | - name: "sum"
38 | impls:
39 | - args:
40 | - value: unknown
41 | intermediate: unknown
42 | return: unknown
43 | - name: "avg"
44 | impls:
45 | - args:
46 | - value: unknown
47 | intermediate: unknown
48 | return: unknown
49 | - name: "min"
50 | impls:
51 | - args:
52 | - value: unknown
53 | intermediate: unknown
54 | return: unknown
55 | - name: "max"
56 | impls:
57 | - args:
58 | - value: unknown
59 | intermediate: unknown
60 | return: unknown
61 | - name: "count"
62 | impls:
63 | - args:
64 | - value: unknown
65 | intermediate: unknown
66 | return: unknown
67 |
--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
1 | module github.com/substrait-io/substrait
2 |
3 | go 1.22.0
4 |
5 | require github.com/stretchr/testify v1.9.0
6 |
7 | require (
8 | github.com/davecgh/go-spew v1.1.1 // indirect
9 | github.com/pmezard/go-difflib v1.0.0 // indirect
10 | gopkg.in/yaml.v3 v3.0.1 // indirect
11 | )
12 |
--------------------------------------------------------------------------------
/go.sum:
--------------------------------------------------------------------------------
1 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
2 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
3 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
4 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
5 | github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
6 | github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
7 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
8 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
9 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
10 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
11 |
--------------------------------------------------------------------------------
/grammar/FuncTestCaseLexer.g4:
--------------------------------------------------------------------------------
1 | lexer grammar FuncTestCaseLexer;
2 |
3 | import SubstraitLexer;
4 |
5 | options {
6 | caseInsensitive = true;
7 | }
8 |
9 | Whitespace : [ \t\n\r]+ -> channel(HIDDEN) ;
10 |
11 | TripleHash: '###';
12 | SubstraitScalarTest: 'SUBSTRAIT_SCALAR_TEST';
13 | SubstraitAggregateTest: 'SUBSTRAIT_AGGREGATE_TEST';
14 | SubstraitInclude: 'SUBSTRAIT_INCLUDE';
15 |
16 | FormatVersion
17 | : 'v' DIGIT+ ('.' DIGIT+)?
18 | ;
19 |
20 | DescriptionLine
21 | : '# ' ~[\r\n]* '\r'? '\n'
22 | ;
23 |
24 | Define: 'DEFINE';
25 | ErrorResult: '';
26 | UndefineResult: '';
27 | Overflow: 'OVERFLOW';
28 | Rounding: 'ROUNDING';
29 | Error: 'ERROR';
30 | Saturate: 'SATURATE';
31 | Silent: 'SILENT';
32 | TieToEven: 'TIE_TO_EVEN';
33 | NaN: 'NAN';
34 | AcceptNulls: 'ACCEPT_NULLS';
35 | IgnoreNulls: 'IGNORE_NULLS';
36 | NullHandling: 'NULL_HANDLING';
37 | SpacesOnly: 'SPACES_ONLY';
38 | Truncate: 'TRUNCATE';
39 |
40 | IntegerLiteral
41 | : [+-]? Int
42 | ;
43 |
44 | DecimalLiteral
45 | : [+-]? [0-9]+ ('.' [0-9]+)?
46 | ;
47 |
48 | FloatLiteral
49 | : [+-]? [0-9]+ ('.' [0-9]*)? ( 'E' [+-]? [0-9]+ )?
50 | | [+-]? 'inf'
51 | | 'snan'
52 | ;
53 |
54 | BooleanLiteral
55 | : 'true' | 'false'
56 | ;
57 |
58 | fragment FourDigits: [0-9][0-9][0-9][0-9];
59 | fragment TwoDigits: [0-9][0-9];
60 |
61 | TimestampTzLiteral
62 | : '\'' FourDigits '-' TwoDigits '-' TwoDigits 'T' TwoDigits ':' TwoDigits ':' TwoDigits ( '.' [0-9]+ )?
63 | [+-] TwoDigits ':' TwoDigits '\''
64 | ;
65 |
66 | TimestampLiteral
67 | : '\'' FourDigits '-' TwoDigits '-' TwoDigits 'T' TwoDigits ':' TwoDigits ':' TwoDigits ( '.' [0-9]+ )? '\''
68 | ;
69 |
70 | TimeLiteral
71 | : '\'' TwoDigits ':' TwoDigits ':' TwoDigits ( '.' [0-9]+ )? '\''
72 | ;
73 |
74 | DateLiteral
75 | : '\'' FourDigits '-' TwoDigits '-' TwoDigits '\''
76 | ;
77 |
78 | PeriodPrefix: 'P';
79 | TimePrefix: 'T';
80 | YearPrefix: 'Y';
81 | MSuffix: 'M'; // used for both months and minutes
82 | DaySuffix: 'D';
83 | HourSuffix: 'H';
84 | SecondSuffix: 'S';
85 | FractionalSecondSuffix: 'F';
86 | OAngleBracket: Lt;
87 | CAngleBracket: Gt;
88 |
89 | IntervalYearLiteral
90 | : '\'' PeriodPrefix IntegerLiteral YearPrefix (IntegerLiteral MSuffix)? '\''
91 | | '\'' PeriodPrefix IntegerLiteral MSuffix '\''
92 | ;
93 |
94 | IntervalDayLiteral
95 | : '\'' PeriodPrefix IntegerLiteral DaySuffix (TimePrefix TimeInterval)? '\''
96 | | '\'' PeriodPrefix TimePrefix TimeInterval '\''
97 | ;
98 |
99 | fragment TimeInterval
100 | : IntegerLiteral HourSuffix (IntegerLiteral MSuffix)? (DecimalLiteral SecondSuffix)?
101 | | IntegerLiteral MSuffix (DecimalLiteral SecondSuffix)?
102 | | DecimalLiteral SecondSuffix
103 | ;
104 |
105 | NullLiteral: 'null';
106 |
107 | StringLiteral
108 | : '\'' ('\\' . | '\'\'' | ~['\\])* '\''
109 | ;
110 |
111 | ColumnName
112 | : 'COL' Int
113 | ;
114 |
--------------------------------------------------------------------------------
/grammar/Makefile:
--------------------------------------------------------------------------------
1 | TYPE_GRAMMAR=SubstraitLexer.g4 SubstraitType.g4
2 | TYPE_OUTPUT_DIR=../tests/type/antlr_parser
3 | TESTCASE_GRAMMAR=FuncTestCaseLexer.g4 FuncTestCaseParser.g4
4 | TESTCASE_OUTPUT_DIR=../tests/coverage/antlr_parser
5 |
6 | all: generate_testcase_parser generate_type_parser
7 |
8 | generate_testcase_parser:
9 | @echo "\nGenerating Test Case Parser"
10 | antlr -visitor -Dlanguage=Python3 -o $(TESTCASE_OUTPUT_DIR) $(TESTCASE_GRAMMAR)
11 | rm -rf $(TESTCASE_OUTPUT_DIR)/*.tokens $(TESTCASE_OUTPUT_DIR)/*.interp
12 | ./prepend_license.sh $(TESTCASE_OUTPUT_DIR)
13 |
14 | generate_type_parser:
15 | @echo "\nGenerating Substrait Type"
16 | antlr -visitor -Dlanguage=Python3 -o $(TYPE_OUTPUT_DIR) $(TYPE_GRAMMAR)
17 | rm -rf $(TYPE_OUTPUT_DIR)/*.tokens $(TYPE_OUTPUT_DIR)/*.interp
18 | ./prepend_license.sh $(TYPE_OUTPUT_DIR)
19 |
20 | clean:
21 | rm -rf $(TYPE_OUTPUT_DIR)/*.py $(TYPE_OUTPUT_DIR)/*.tokens $(TYPE_OUTPUT_DIR)/*.interp
22 | rm -rf $(TESTCASE_OUTPUT_DIR)/*.py $(TESTCASE_OUTPUT_DIR)/*.tokens $(TESTCASE_OUTPUT_DIR)/*.interp
23 | rm -rf ./*.tokens
24 |
--------------------------------------------------------------------------------
/grammar/README.md:
--------------------------------------------------------------------------------
1 | # Grammar
2 | This file defines the grammars for:
3 | 1. The Substrait Type language used in the YAML extensions.
4 | 2. The test grammar language used to unit tests functions.
5 |
6 | ## Regenerating
7 | To regenerate all of the parsers use the following command
8 | ```sh
9 | make all
10 | ```
11 |
12 | ### Requirements
13 | You will need [ANTLR](https://www.antlr.org/index.html) available on your machine to regenerate the parser.
14 |
15 | #### MacOS
16 | ```
17 | brew install antlr
18 | ```
19 |
20 | #### Ubuntu
21 | ```
22 | sudo apt-get install antlr4
23 | ```
--------------------------------------------------------------------------------
/grammar/SubstraitLexer.g4:
--------------------------------------------------------------------------------
1 | lexer grammar SubstraitLexer;
2 |
3 | options {
4 | caseInsensitive = true;
5 | }
6 |
7 | // Whitespace and comment handling
8 | LineComment : '//' ~[\r\n]* -> channel(HIDDEN) ;
9 | BlockComment : ( '/*' ( ~'*' | '*'+ ~[*/] ) '*'* '*/' ) -> channel(HIDDEN) ;
10 | Whitespace : [ \t\r]+ -> channel(HIDDEN) ;
11 |
12 | fragment DIGIT: [0-9];
13 |
14 | // Syntactic keywords.
15 | If : 'IF';
16 | Then : 'THEN';
17 | Else : 'ELSE';
18 |
19 | // TYPES
20 | Boolean : 'BOOLEAN';
21 | I8 : 'I8';
22 | I16 : 'I16';
23 | I32 : 'I32';
24 | I64 : 'I64';
25 | FP32 : 'FP32';
26 | FP64 : 'FP64';
27 | String : 'STRING';
28 | Binary : 'BINARY';
29 | Timestamp: 'TIMESTAMP';
30 | Timestamp_TZ: 'TIMESTAMP_TZ';
31 | Date : 'DATE';
32 | Time : 'TIME';
33 | Interval_Year: 'INTERVAL_YEAR';
34 | Interval_Day: 'INTERVAL_DAY';
35 | UUID : 'UUID';
36 | Decimal : 'DECIMAL';
37 | Precision_Time: 'PRECISION_TIME';
38 | Precision_Timestamp: 'PRECISION_TIMESTAMP';
39 | Precision_Timestamp_TZ: 'PRECISION_TIMESTAMP_TZ';
40 | FixedChar: 'FIXEDCHAR';
41 | VarChar : 'VARCHAR';
42 | FixedBinary: 'FIXEDBINARY';
43 | Struct : 'STRUCT';
44 | NStruct : 'NSTRUCT';
45 | List : 'LIST';
46 | Map : 'MAP';
47 | UserDefined: 'U!';
48 |
49 | // short names for types
50 | Bool: 'BOOL';
51 | Str: 'STR';
52 | VBin: 'VBIN';
53 | Ts: 'TS';
54 | TsTZ: 'TSTZ';
55 | IYear: 'IYEAR';
56 | IDay: 'IDAY';
57 | Dec: 'DEC';
58 | PT: 'PT';
59 | PTs: 'PTS';
60 | PTsTZ: 'PTSTZ';
61 | FChar: 'FCHAR';
62 | VChar: 'VCHAR';
63 | FBin: 'FBIN';
64 |
65 | Any: 'ANY';
66 | AnyVar: Any [0-9];
67 |
68 | DoubleColon: '::';
69 |
70 | // MATH
71 | Plus : '+';
72 | Minus : '-';
73 | Asterisk : '*';
74 | ForwardSlash : '/';
75 | Percent : '%';
76 |
77 | // COMPARE
78 | Eq : '=';
79 | Ne : '!=';
80 | Gte : '>=';
81 | Lte : '<=';
82 | Gt : '>';
83 | Lt : '<';
84 | Bang : '!';
85 |
86 | // ORGANIZE
87 | OAngleBracket: Lt;
88 | CAngleBracket: Gt;
89 | OParen: '(';
90 | CParen: ')';
91 | OBracket: '[';
92 | CBracket: ']';
93 | Comma: ',';
94 | Colon: ':';
95 | QMark: '?';
96 | Hash: '#';
97 | Dot: '.';
98 |
99 |
100 | // OPERATIONS
101 | And : 'AND';
102 | Or : 'OR';
103 | Assign : ':=';
104 |
105 |
106 |
107 | fragment Int
108 | : '1'..'9' Digit*
109 | | '0'
110 | ;
111 |
112 | fragment Digit
113 | : '0'..'9'
114 | ;
115 |
116 | Number
117 | : '-'? Int
118 | ;
119 |
120 | Identifier
121 | : ('A'..'Z' | '_' | '$') ('A'..'Z' | '_' | '$' | Digit)*
122 | ;
123 |
124 | Newline
125 | : ( '\r' '\n'?
126 | | '\n'
127 | )
128 | ;
129 |
--------------------------------------------------------------------------------
/grammar/prepend_license.sh:
--------------------------------------------------------------------------------
1 | # SPDX-License-Identifier: Apache-2.0
2 |
3 | for f in $1/*.py; do
4 | echo '# SPDX-License-Identifier: Apache-2.0' | cat - $f > temp && mv temp $f
5 | done
--------------------------------------------------------------------------------
/proto/buf.lock:
--------------------------------------------------------------------------------
1 | # Generated by buf. DO NOT EDIT.
2 | version: v1
3 |
--------------------------------------------------------------------------------
/proto/buf.yaml:
--------------------------------------------------------------------------------
1 | version: v1
2 | name: buf.build/substrait/substrait
3 | lint:
4 | use:
5 | - DEFAULT
6 | ignore_only:
7 | PACKAGE_VERSION_SUFFIX:
8 | - substrait
9 | breaking:
10 | use:
11 | - FILE
12 |
--------------------------------------------------------------------------------
/proto/substrait/capabilities.proto:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 | syntax = "proto3";
3 |
4 | package substrait;
5 |
6 | option csharp_namespace = "Substrait.Protobuf";
7 | option go_package = "github.com/substrait-io/substrait-protobuf/go/substraitpb";
8 | option java_multiple_files = true;
9 | option java_package = "io.substrait.proto";
10 |
11 | // Defines a set of Capabilities that a system (producer or consumer) supports.
12 | message Capabilities {
13 | // List of Substrait versions this system supports
14 | repeated string substrait_versions = 1;
15 |
16 | // list of com.google.Any message types this system supports for advanced
17 | // extensions.
18 | repeated string advanced_extension_type_urls = 2;
19 |
20 | // list of simple extensions this system supports.
21 | repeated SimpleExtension simple_extensions = 3;
22 |
23 | message SimpleExtension {
24 | string uri = 1;
25 | repeated string function_keys = 2;
26 | repeated string type_keys = 3;
27 | repeated string type_variation_keys = 4;
28 | }
29 | }
30 |
--------------------------------------------------------------------------------
/proto/substrait/extended_expression.proto:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 | syntax = "proto3";
3 |
4 | package substrait;
5 |
6 | import "substrait/algebra.proto";
7 | import "substrait/extensions/extensions.proto";
8 | import "substrait/plan.proto";
9 | import "substrait/type.proto";
10 |
11 | option csharp_namespace = "Substrait.Protobuf";
12 | option go_package = "github.com/substrait-io/substrait-protobuf/go/substraitpb";
13 | option java_multiple_files = true;
14 | option java_package = "io.substrait.proto";
15 |
16 | message ExpressionReference {
17 | oneof expr_type {
18 | Expression expression = 1;
19 | AggregateFunction measure = 2;
20 | }
21 | // Field names in depth-first order
22 | repeated string output_names = 3;
23 | }
24 |
25 | // Describe a set of operations to complete.
26 | // For compactness sake, identifiers are normalized at the plan level.
27 | message ExtendedExpression {
28 | // Substrait version of the expression. Optional up to 0.17.0, required for later
29 | // versions.
30 | Version version = 7;
31 |
32 | // a list of yaml specifications this expression may depend on
33 | repeated substrait.extensions.SimpleExtensionURI extension_uris = 1;
34 |
35 | // a list of extensions this expression may depend on
36 | repeated substrait.extensions.SimpleExtensionDeclaration extensions = 2;
37 |
38 | // one or more expression trees with same order in plan rel
39 | repeated ExpressionReference referred_expr = 3;
40 |
41 | NamedStruct base_schema = 4;
42 | // additional extensions associated with this expression.
43 | substrait.extensions.AdvancedExtension advanced_extensions = 5;
44 |
45 | // A list of com.google.Any entities that this plan may use. Can be used to
46 | // warn if some embedded message types are unknown. Note that this list may
47 | // include message types that are ignorable (optimizations) or that are
48 | // unused. In many cases, a consumer may be able to work with a plan even if
49 | // one or more message types defined here are unknown.
50 | repeated string expected_type_urls = 6;
51 | }
52 |
--------------------------------------------------------------------------------
/proto/substrait/extensions/extensions.proto:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 | syntax = "proto3";
3 |
4 | package substrait.extensions;
5 |
6 | import "google/protobuf/any.proto";
7 |
8 | option csharp_namespace = "Substrait.Protobuf";
9 | option go_package = "github.com/substrait-io/substrait-protobuf/go/substraitpb/extensions";
10 | option java_multiple_files = true;
11 | option java_package = "io.substrait.proto";
12 |
13 | message SimpleExtensionURI {
14 | // A surrogate key used in the context of a single plan used to reference the
15 | // URI associated with an extension.
16 | uint32 extension_uri_anchor = 1;
17 |
18 | // The URI where this extension YAML can be retrieved. This is the "namespace"
19 | // of this extension.
20 | string uri = 2;
21 | }
22 |
23 | // Describes a mapping between a specific extension entity and the uri where
24 | // that extension can be found.
25 | message SimpleExtensionDeclaration {
26 | oneof mapping_type {
27 | ExtensionType extension_type = 1;
28 | ExtensionTypeVariation extension_type_variation = 2;
29 | ExtensionFunction extension_function = 3;
30 | }
31 |
32 | // Describes a Type
33 | message ExtensionType {
34 | // references the extension_uri_anchor defined for a specific extension URI.
35 | uint32 extension_uri_reference = 1;
36 |
37 | // A surrogate key used in the context of a single plan to reference a
38 | // specific extension type
39 | uint32 type_anchor = 2;
40 |
41 | // the name of the type in the defined extension YAML.
42 | string name = 3;
43 | }
44 |
45 | message ExtensionTypeVariation {
46 | // references the extension_uri_anchor defined for a specific extension URI.
47 | uint32 extension_uri_reference = 1;
48 |
49 | // A surrogate key used in the context of a single plan to reference a
50 | // specific type variation
51 | uint32 type_variation_anchor = 2;
52 |
53 | // the name of the type in the defined extension YAML.
54 | string name = 3;
55 | }
56 |
57 | message ExtensionFunction {
58 | // references the extension_uri_anchor defined for a specific extension URI.
59 | uint32 extension_uri_reference = 1;
60 |
61 | // A surrogate key used in the context of a single plan to reference a
62 | // specific function
63 | uint32 function_anchor = 2;
64 |
65 | // A function signature compound name
66 | string name = 3;
67 | }
68 | }
69 |
70 | // A generic object that can be used to embed additional extension information
71 | // into the serialized substrait plan.
72 | message AdvancedExtension {
73 | // An optimization is helpful information that don't influence semantics. May
74 | // be ignored by a consumer.
75 | repeated google.protobuf.Any optimization = 1;
76 |
77 | // An enhancement alter semantics. Cannot be ignored by a consumer.
78 | google.protobuf.Any enhancement = 2;
79 | }
80 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.black]
2 | # exclude filters out files found by Black itself during discovery
3 | exclude = '''
4 | (
5 | .*/antlr_parser/.*\.py
6 | )
7 | '''
8 | # pre-commit passes files into Black, rather than letting it discover files
9 | # force-exclude can be used to filter out these files from formatting
10 | force-exclude = '''
11 | (
12 | .*/antlr_parser/.*\.py
13 | )
14 | '''
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | antlr4-python3-runtime==4.13.2
2 | black==24.8.0
3 | flake8==7.0.0
4 | pytest==8.3.4
5 | pyyaml==6.0.2
--------------------------------------------------------------------------------
/site/.gitignore:
--------------------------------------------------------------------------------
1 | venv
2 | site
3 |
--------------------------------------------------------------------------------
/site/README.md:
--------------------------------------------------------------------------------
1 | ## Substrait Site
2 |
3 | This directory contains the source for the Substrait site.
4 |
5 | * Site structure is maintained in mkdocs.yml
6 | * Pages are maintained in markdown in the `docs/` folder
7 | * Links use bare page names: `[link text](target-page)`
8 |
9 | ### Installation
10 |
11 | The site is built using mkdocs. To install mkdocs and the theme, run:
12 |
13 | ```
14 | # Activate the virtual environment (if installed)
15 | cd site/
16 | . venv/bin/activate
17 | # Install or update the dependencies
18 | pip install -r ./requirements.txt
19 | ```
20 |
21 | It is easier to use `virtualenv` to keep the Python dependencies for `site/`
22 | separate from your other projects and/or distinct from system managed Python
23 | dependencies.
24 |
25 | * To use `virtualenv`, you need Python 3.7/3.8 installed locally.
26 | * For Ubuntu: `apt-get install python3 virtualenv`
27 | * For MacOS/brew: `brew install python pyenv-virtualenv`
28 | * Install the virtual environment:
29 | ```
30 | # cd to the site/ directory
31 | cd site/
32 | # setup the virtual environment (only needed once)
33 | virtualenv -p $(which python3) venv
34 | # activate the virtual environment
35 | . venv/bin/activate
36 | # Install or update the dependencies as usual
37 | pip install -r ./requirements.txt
38 | ```
39 |
40 | ### Local Changes
41 |
42 | To see changes locally before committing, use mkdocs to run a local server from this directory.
43 |
44 | ```
45 | mkdocs serve
46 | ```
47 |
48 | ### Publishing
49 |
50 | TBD
51 |
--------------------------------------------------------------------------------
/site/data/committers.yaml:
--------------------------------------------------------------------------------
1 | - Name: Jeroen van Straten
2 | Association: Qblox
3 | - Name: Carlo Curino
4 | Association: Microsoft
5 | - Name: James Taylor
6 | Association: Sundeck
7 | - Name: Sutou Kouhei
8 | Association: Clearcode
9 | - Name: Micah Kornfeld
10 | Association: Google
11 | - Name: Jinfeng Ni
12 | Association: Sundeck
13 | - Name: Andy Grove
14 | Association: Nvidia
15 | - Name: Jesus Camacho Rodriguez
16 | Association: Microsoft
17 | - Name: Rich Tia
18 | Association: Voltron Data
19 | - Name: Vibhatha Abeykoon
20 | Association: Voltron Data
21 | - Name: Nic Crane
22 | Association: Recast
23 | - Name: Gil Forsyth
24 | Association: Voltron Data
25 | - Name: ChaoJun Zhang
26 | Association: Intel
27 | - Name: Matthijs Brobbel
28 | Association: Voltron Data
29 | - Name: Matt Topol
30 | Association: Voltron Data
31 | - Name: Ingo Müller
32 | Association: Google
33 | - Name: Arttu Voutilainen
34 | Association: Palantir Technologies
35 | - Name: Bruno Volpato
36 | Association: Datadog
37 | - Name: Anshul Data
38 | Association: Sundeck
39 | - Name: Chandra Sanapala
40 | Association: Sundeck
41 |
--------------------------------------------------------------------------------
/site/data/smc.yaml:
--------------------------------------------------------------------------------
1 | - Name: Phillip Cloud
2 | Association: Voltron Data
3 | - Name: Weston Pace
4 | Association: LanceDB
5 | - Name: Jacques Nadeau
6 | Association: Sundeck
7 | - Name: Victor Barua
8 | Association: Datadog
9 | - Name: David Sisson
10 | Association: Voltron Data
11 |
--------------------------------------------------------------------------------
/site/docs/_config:
--------------------------------------------------------------------------------
1 | arrange:
2 | - index.md
3 | - spec
4 | - types
5 | - expressions
6 | - relations
7 | - serialization
8 | - extensions
9 | - community
10 | - governance.md
11 | - about.md
12 | - tools
13 | - tutorial
14 | - faq.md
15 |
--------------------------------------------------------------------------------
/site/docs/community/index.md:
--------------------------------------------------------------------------------
1 | # Community
2 |
3 | Substrait is developed as a consensus-driven open source product under the Apache 2.0
4 | license. Development is done in the open leveraging GitHub issues and PRs.
5 |
6 | ## Get In Touch
7 |
8 | [Mailing List/Google Group](https://groups.google.com/g/substrait)
9 | : We use the mailing list to discuss questions, formulate plans and collaborate asynchronously.
10 |
11 | [Slack Channel]({{versions.slackinvitelink}})
12 | : The developers of Substrait frequent the Slack channel. You can get an
13 | invite to the channel by following [this link]({{versions.slackinvitelink}}).
14 |
15 | [GitHub Issues](https://github.com/substrait-io/substrait/issues)
16 | : Substrait is developed via GitHub issues and pull requests. If you see a problem
17 | or want to enhance the product, we suggest you file a GitHub issue for developers to
18 | review.
19 |
20 | [Twitter](https://twitter.com/substrait_io)
21 | : The [@substrait_io](https://twitter.com/substrait_io) account on Twitter is our official account. Follow-up to keep
22 | to date on what is happening with Substrait!
23 |
24 | [Docs](https://github.com/substrait-io/substrait/tree/main/site/docs)
25 | : Our website is all maintained in our source repository. If there is something you think
26 | can be improved, feel free to fork our repository and post a pull request.
27 |
28 | Meetings
29 | : Our community meets every other week on Wednesday.
30 |
31 |
32 |
33 | ## Talks
34 | Want to learn more about Substrait? Try the following presentations and slide decks.
35 |
36 | * Substrait: A Common Representation for Data Compute Plans (Jacques Nadeau, April 2022) [[slides](https://docs.google.com/presentation/d/1H89iwnnmHZ2oMgGpFon9Rv_fJ60RWE0c9drHCZAL8Bw)]
37 |
38 | ## Citation
39 |
40 | If you use Substrait in your research, please cite it using the following BibTeX entry:
41 |
42 | ```bibtex
43 | @misc{substrait,
44 | author = {substrait-io},
45 | title = {Substrait: Cross-Language Serialization for Relational Algebra},
46 | year = {2021},
47 | month = {8},
48 | day = {31},
49 | publisher = {GitHub},
50 | journal = {GitHub repository},
51 | howpublished = {\url{https://github.com/substrait-io/substrait}}
52 | }
53 | ```
54 |
55 | ## Contribution
56 |
57 | All contributors are welcome to Substrait. If you want to join the project, open a PR or get in touch with us as [above](#get-in-touch).
58 |
59 |
60 | ## Principles
61 |
62 | * Be inclusive and open to all.
63 | * Ensure a diverse set of contributors that come from multiple data backgrounds to maximize general utility.
64 | * Build a specification based on open consensus.
65 | * Avoid over-reliance/coupling to any single technology.
66 | * Make the specification and all tools freely available on a permissive license (ApacheV2)
67 |
--------------------------------------------------------------------------------
/site/docs/community/powered_by.md:
--------------------------------------------------------------------------------
1 | # Powered by Substrait
2 |
3 | In addition to the work maintained in repositories within the
4 | [substrait-io GitHub organization](https://github.com/substrait-io), a growing
5 | list of other open source projects have adopted Substrait.
6 |
7 |
8 | [//]: # (Maintain this list in alphabetical order)
9 |
10 | [Acero](https://arrow.apache.org/docs/cpp/streaming_execution.html)
11 | : Acero is a query execution engine implemented as a part of the Apache Arrow
12 | C++ library. Acero provides a Substrait consumer interface.
13 |
14 | [ADBC](https://arrow.apache.org/adbc/)
15 | : ADBC (Arrow Database Connectivity) is an API specification for Apache
16 | Arrow-based database access. ADBC allows applications to pass queries either
17 | as SQL strings or Substrait plans.
18 |
19 | [Arrow Flight SQL](https://arrow.apache.org/docs/format/FlightSql.html)
20 | : Arrow Flight SQL is a client-server protocol for interacting with databases
21 | and query engines using the Apache Arrow in-memory columnar format and the
22 | [Arrow Flight RPC](https://arrow.apache.org/docs/format/Flight.html)
23 | framework. Arrow Flight SQL allows clients to send queries as SQL strings or
24 | Substrait plans.
25 |
26 | [DataFusion](https://arrow.apache.org/datafusion/)
27 | : DataFusion is an extensible query planning, optimization, and execution
28 | framework, written in Rust, that uses Apache Arrow as its in-memory format.
29 | DataFusion provides a Substrait producer and consumer that can convert
30 | DataFusion logical plans to and from Substrait plans. It can be used through the
31 | [DataFusion Python bindings](https://github.com/apache/arrow-datafusion-python#substrait-support).
32 |
33 | [DuckDB](https://duckdb.org)
34 | : DuckDB is an in-process SQL OLAP database management system. The [Substrait
35 | Community Extension](https://duckdb.org/community_extensions/extensions/substrait)
36 | allows users to produce and consume Substrait plans through DuckDB's
37 | SQL, Python, and R APIs.
38 |
39 | [Gluten](https://github.com/oap-project/gluten)
40 | : Gluten is a plugin for Apache Spark that allows computation to be offloaded
41 | to engines that have better performance or efficiency than Spark's built-in
42 | JVM-based engine. Gluten converts Spark physical plans to Substrait plans.
43 |
44 | [Ibis](https://ibis-project.org/)
45 | : Ibis is a Python library that provides a lightweight, universal interface
46 | for data wrangling. It includes a dataframe API for Python with support for
47 | more than 10 query execution engines, plus a
48 | [Substrait producer](https://github.com/ibis-project/ibis-substrait)
49 | to enable support for Substrait-consuming execution engines.
50 |
51 | [Substrait R Interface](https://github.com/voltrondata/substrait-r)
52 | : The Substrait R interface package allows users to construct Substrait plans
53 | from R for evaluation by Substrait-consuming execution engines. The package
54 | provides a [dplyr](https://dplyr.tidyverse.org) backend as well as
55 | lower-level interfaces for creating Substrait plans and integrations with
56 | Acero and DuckDB.
57 |
58 | [Velox](https://velox-lib.io)
59 | : Velox is a unified execution engine aimed at accelerating data management
60 | systems and streamlining their development. Velox provides a Substrait
61 | consumer interface.
62 |
63 |
64 | To add your project to this list, please open a
65 | [pull request](https://github.com/substrait-io/substrait/edit/main/site/docs/community/powered_by.md).
66 |
--------------------------------------------------------------------------------
/site/docs/expressions/_config:
--------------------------------------------------------------------------------
1 | arrange:
2 | - field_references.md
3 | - scalar_functions.md
4 | - aggregate_functions.md
5 | - specialized_record_expressions.md
6 | - window_functions.md
7 | - table_functions.md
8 | - user_defined_functions.md
9 | - embedded_functions.md
10 | - dynamic_parameters.md
11 |
--------------------------------------------------------------------------------
/site/docs/expressions/aggregate_functions.md:
--------------------------------------------------------------------------------
1 | # Aggregate Functions
2 |
3 | Aggregate functions are functions that define an operation which consumes values from multiple records to a produce a single output. Aggregate functions in SQL are typically used in GROUP BY functions. Aggregate functions are similar to scalar functions and function signatures with a small set of different properties.
4 |
5 | Aggregate function signatures contain all the properties defined for [scalar functions](scalar_functions.md). Additionally, they contain the properties below:
6 |
7 | | Property | Description | Required |
8 | | ------------------------ | --------------------------------------------------------------- | ------------------------------- |
9 | | Inherits | All properties defined for scalar function. | N/A |
10 | | Ordered | Whether the result of this function is sensitive to sort order. | Optional, defaults to false |
11 | | Maximum set size | Maximum allowed set size as an unsigned integer. | Optional, defaults to unlimited |
12 | | Decomposable | Whether the function can be executed in one or more intermediate steps. Valid options are: `NONE`, `ONE`, `MANY`, describing how intermediate steps can be taken. | Optional, defaults to `NONE` |
13 | | Intermediate Output Type | If the function is decomposable, represents the intermediate output type that is used, if the function is defined as either `ONE` or `MANY` decomposable. Will be a struct in many cases. | Required for `ONE` and `MANY`. |
14 | | Invocation | Whether the function uses all or only distinct values in the aggregation calculation. Valid options are: `ALL`, `DISTINCT`. | Optional, defaults to `ALL` |
15 |
16 |
17 |
18 | ## Aggregate Binding
19 |
20 | When binding an aggregate function, the binding must include the following additional properties beyond the standard scalar binding properties:
21 |
22 | | Property | Description |
23 | | -------- | ------------------------------------------------------------ |
24 | | Phase | Describes the input type of the data: [INITIAL_TO_INTERMEDIATE, INTERMEDIATE_TO_INTERMEDIATE, INITIAL_TO_RESULT, INTERMEDIATE_TO_RESULT] describing what portion of the operation is required. For functions that are NOT decomposable, the only valid option will be INITIAL_TO_RESULT. |
25 | | Ordering | Zero or more ordering keys along with key order (ASC\|DESC\|NULL FIRST, etc.), declared similar to the sort keys in an `ORDER BY` relational operation. If no sorts are specified, the records are not sorted prior to being passed to the aggregate function. |
26 |
27 |
--------------------------------------------------------------------------------
/site/docs/expressions/dynamic_parameters.md:
--------------------------------------------------------------------------------
1 | # Dynamic Parameter Expression
2 |
3 | The dynamic parameter expression represents a placeholder within an expression whose value is determined at runtime.
4 | This is particularly useful for parameterized queries where certain values are not known until execution.
5 | Additionally, using dynamic parameters can enable other use cases, such as sharing execution plans without embedding sensitive information.
6 |
7 | A dynamic parameter expression includes the following properties:
8 |
9 | | Property | Description | Required |
10 | |-----------------------|-------------------------------------------------------------------------------|----------|
11 | | `type` | Specifies the expected data type of the dynamic parameter. | Yes |
12 | | `parameter_reference` | A surrogate key used within a plan to reference a specific parameter binding. | Yes |
13 |
--------------------------------------------------------------------------------
/site/docs/expressions/embedded_functions.md:
--------------------------------------------------------------------------------
1 | # Embedded Functions
2 |
3 | Embedded functions are a special kind of function where the implementation is embedded within the actual plan. They are commonly used in tools where a user intersperses business logic within a data pipeline. This is more common in data science workflows than traditional SQL workflows.
4 |
5 | Embedded functions are not pre-registered. Embedded functions require that data be consumed and produced with a standard API, may require memory allocation and have determinate error reporting behavior. They may also have specific runtime dependencies. For example, a Python pickle function may depend on pyarrow 5.0 and pynessie 1.0.
6 |
7 | Properties for an embedded function include:
8 |
9 | | Property | Description | Required |
10 | | ------------------- | ---------------------------------------------------------- | -------- |
11 | | Function Type | The type of embedded function presented. | Required |
12 | | Function Properties | Function properties, one of those items defined below. | Required |
13 | | Output Type | The fully resolved output type for this embedded function. | Required |
14 |
15 | The binary representation of an embedded function is:
16 |
17 |
18 | === "Binary Representation"
19 | ```proto
20 | %%% proto.message.Expression.EmbeddedFunction %%%
21 | ```
22 |
23 | === "Human Readable Representation"
24 | As the bytes are opaque to Substrait there is no equivalent human readable form.
25 |
26 |
27 | ## Function Details
28 |
29 | There are many types of possible stored functions. For each, Substrait works to expose the function in as descriptive a way as possible to support the largest number of consumers.
30 |
31 |
32 |
33 | ## Python Pickle Function Type
34 |
35 | | Property | Description | Required |
36 | | ----------- | ------------------------------------------------------------ | -------------------------- |
37 | | Pickle Body | binary pickle encoded function using [TBD] API representation to access arguments. | True |
38 | | Prereqs | A list of specific Python conda packages that are prerequisites for access (a structured version of a requirements.txt file). | Optional, defaults to none |
39 |
40 |
41 |
42 | ## WebAssembly Function Type
43 |
44 | | Property | Description | Required |
45 | | -------- | ------------------------------------------------------------ | -------------------------- |
46 | | Script | WebAssembly function | True |
47 | | Prereqs | A list of AssemblyScript prerequisites required to compile the assemblyscript function using NPM coordinates. | Optional, defaults to none |
48 |
49 |
50 |
51 | ???+ question "Discussion Points"
52 |
53 | * What are the common embedded function formats?
54 | * How do we expose the data for a function?
55 | * How do we express batching capabilities?
56 | * How do we ensure/declare containerization?
57 |
--------------------------------------------------------------------------------
/site/docs/expressions/extended_expression.md:
--------------------------------------------------------------------------------
1 | # Extended Expression
2 |
3 | Extended Expression messages are provided for expression-level protocols as an alternative to using a Plan. They mainly target expression-only evaluations, such as those computed in Filter/Project/Aggregation rels. Unlike the original Expression defined in the substrait protocol, Extended Expression messages require more information to completely describe the computation context including: input data schema, referred function signatures, and output schema.
4 |
5 | Since Extended Expression will be used seperately from the Plan rel representation, it will need to include basic fields like Version.
6 |
7 | === "ExtendedExpression Message"
8 |
9 | ```proto
10 | %%% proto.message.ExtendedExpression %%%
11 | ```
12 |
13 | ## Input and output data schema
14 |
15 | Similar to `base_schema` defined in [ReadRel](https://github.com/substrait-io/substrait/blob/7f272f13f22cd5f5842baea42bcf7961e6251881/proto/substrait/algebra.proto#L58), the input data schema describes the name/type/nullibilty and layout info of input data for the target expression evalutation. It also has a field `name` to define the name of the output data.
16 |
17 | ## Referred expression
18 |
19 | An Extended Exression will have one or more referred expressions, which can be either [Expression](https://github.com/substrait-io/substrait/blob/7f272f13f22cd5f5842baea42bcf7961e6251881/proto/substrait/algebra.proto) or [AggregateFunction](https://github.com/substrait-io/substrait/blob/7f272f13f22cd5f5842baea42bcf7961e6251881/proto/substrait/algebra.proto#L1170). Additional types of expressions may be added in the future.
20 |
21 | For a message with multiple expressions, users may produce each Extended Expression in the same order as they occur in the original Plan rel. But, the consumer does NOT have to handle them in this order. A consumer needs only to ensure that the columns in the final output are organized in the same order as defined in the message.
22 |
23 | ## Function extensions
24 |
25 | Function extensions work the same for both Extended Expression and the original Expression defined in the Substrait protocol.
26 |
--------------------------------------------------------------------------------
/site/docs/expressions/subqueries.md:
--------------------------------------------------------------------------------
1 | # Subqueries
2 |
3 | Subqueries are scalar expressions comprised of another query.
4 |
5 | ## Forms
6 |
7 | ### Scalar
8 |
9 | Scalar subqueries are subqueries that return one row and one column.
10 |
11 | | Property | Description | Required |
12 | | -------- | -------------- | -------- |
13 | | Input | Input relation | Yes |
14 |
15 | ### `IN` predicate
16 |
17 | An `IN` subquery predicate checks that the left expression is contained in the
18 | right subquery.
19 |
20 | #### Examples
21 |
22 | ```sql
23 | SELECT *
24 | FROM t1
25 | WHERE x IN (SELECT * FROM t2)
26 | ```
27 |
28 | ```sql
29 | SELECT *
30 | FROM t1
31 | WHERE (x, y) IN (SELECT a, b FROM t2)
32 | ```
33 |
34 | | Property | Description | Required |
35 | | -------- | ------------------------------------------- | -------- |
36 | | Needles | Expressions whose existence will be checked | Yes |
37 | | Haystack | Subquery to check | Yes |
38 |
39 | ### Set predicates
40 |
41 | A set predicate is a predicate over a set of rows in the form of a subquery.
42 |
43 | `EXISTS` and `UNIQUE` are common SQL spellings of these kinds of predicates.
44 |
45 | | Property | Description | Required |
46 | | --------- | ------------------------------------------ | -------- |
47 | | Operation | The operation to perform over the set | Yes |
48 | | Tuples | Set of tuples to check using the operation | Yes |
49 |
50 | ### Set comparisons
51 |
52 | A set comparison subquery is a subquery comparison using `ANY` or `ALL` operations.
53 |
54 | #### Examples
55 |
56 | ```sql
57 | SELECT *
58 | FROM t1
59 | WHERE x < ANY(SELECT y from t2)
60 | ```
61 |
62 | | Property | Description | Required |
63 | | --------------------- | ---------------------------------------------- | -------- |
64 | | Reduction operation | The kind of reduction to use over the subquery | Yes |
65 | | Comparison operation | The kind of comparison operation to use | Yes |
66 | | Expression | Left-hand side expression to check | Yes |
67 | | Subquery | Subquery to check | Yes |
68 |
69 |
70 |
71 | === "Protobuf Representation"
72 |
73 | ```proto
74 | %%% proto.message.Expression.Subquery %%%
75 | ```
76 |
--------------------------------------------------------------------------------
/site/docs/expressions/table_functions.md:
--------------------------------------------------------------------------------
1 | # Table Functions
2 |
3 | Table functions produce zero or more records for each input record. Table functions use a signature similar to scalar functions. However, they are not allowed in the same contexts.
4 |
5 |
6 |
7 | to be completed...
8 |
9 |
--------------------------------------------------------------------------------
/site/docs/expressions/user_defined_functions.md:
--------------------------------------------------------------------------------
1 | # User-Defined Functions
2 |
3 | Substrait supports the creation of custom functions using [simple extensions](../extensions/index.md#simple-extensions), using the facilities described in [scalar functions](scalar_functions.md). The functions defined by Substrait use the same mechanism. The extension files for standard functions can be found [here](https://github.com/substrait-io/substrait/tree/main/extensions).
4 |
5 | Here's an example function that doubles its input:
6 |
7 | !!! info inline end "Implementation Note"
8 | This implementation is only defined on 32-bit floats and integers but could be defined on all numbers (and even lists and strings). The user of the implementation can specify what happens when the resulting value falls outside of the valid range for a 32-bit float (either return NAN or raise an error).
9 |
10 | ``` yaml
11 | %YAML 1.2
12 | ---
13 | scalar_functions:
14 | -
15 | name: "double"
16 | description: "Double the value"
17 | impls:
18 | - args:
19 | - name: x
20 | value: fp32
21 | options:
22 | on_domain_error:
23 | values: [ NAN, ERROR ]
24 | return: fp32
25 | - args:
26 | - name: x
27 | value: i32
28 | options:
29 | on_domain_error:
30 | values: [ NAN, ERROR ]
31 | return: i32
32 | ```
33 |
--------------------------------------------------------------------------------
/site/docs/expressions/window_functions.md:
--------------------------------------------------------------------------------
1 | # Window Functions
2 |
3 | Window functions are functions which consume values from multiple records to produce a single output. They are similar to aggregate functions, but also have a focused window of analysis to compare to their partition window. Window functions are similar to scalar values to an end user, producing a single value for each input record. However, the consumption visibility for the production of each single record can be many records.
4 |
5 |
6 |
7 | Window function signatures contain all the properties defined for [aggregate functions](aggregate_functions.md). Additionally, they contain the properties below
8 |
9 | | Property | Description | Required |
10 | | ----------- | ------------------------------------------------------------ | ------------------------------- |
11 | | Inherits | All properties defined for aggregate functions. | N/A |
12 | | Window Type | STREAMING or PARTITION. Describes whether the function needs to see all data for the specific partition operation simultaneously. Operations like SUM can produce values in a streaming manner with no complete visibility of the partition. NTILE requires visibility of the entire partition before it can start producing values. | Optional, defaults to PARTITION |
13 |
14 |
15 |
16 | When binding an aggregate function, the binding must include the following additional properties beyond the standard scalar binding properties:
17 |
18 | | Property | Description | Required |
19 | | ----------- | ------------------------------------------------------------ | ------------------------------------------------------------ |
20 | | Partition | A list of partitioning expressions. | False, defaults to a single partition for the entire dataset |
21 | | Lower Bound | Bound Following(int64), Bound Trailing(int64) or CurrentRow. | False, defaults to start of partition |
22 | | Upper Bound | Bound Following(int64), Bound Trailing(int64) or CurrentRow. | False, defaults to end of partition |
23 |
24 | ## Aggregate Functions as Window Functions
25 |
26 | Aggregate functions can be treated as a window functions with Window Type set to STREAMING.
27 |
28 | AVG, COUNT, MAX, MIN and SUM are examples of aggregate functions that are commonly allowed in window contexts.
29 |
--------------------------------------------------------------------------------
/site/docs/extensions/.gitignore:
--------------------------------------------------------------------------------
1 | *.md
2 | !index.md
3 |
--------------------------------------------------------------------------------
/site/docs/faq.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: FAQ
3 | ---
4 |
5 | # Frequently Asked Questions
6 |
7 | ## What is the purpose of the post-join filter field on Join relations?
8 |
9 | The post-join filter on the various Join relations is not always equivalent to an explicit Filter relation AFTER the Join.
10 |
11 | See the example [here](https://facebookincubator.github.io/velox/develop/joins.html#hash-join-implementation) that highlights how the post-join filter behaves differently than a Filter relation in the case of a left join.
12 |
13 | ## Why does the project relation keep existing columns?
14 |
15 | In several relational algebra systems ([DuckDB](https://duckdb.org/), [Velox](https://velox-lib.io/), [Apache Spark](https://spark.apache.org/), [Apache DataFusion](https://datafusion.apache.org/), etc.) the project relation is used both
16 | to add new columns and remove existing columns. It is defined by a list of expressions and there is one output
17 | column for each expression.
18 |
19 | In Substrait, the project relation is only used to add new columns. Any relation can remove columns by using the
20 | `emit` property in `RelCommon`. This is because it is very common for optimized plans to discard columns once they
21 | are no longer needed and this can happen anywhere in a plan. If this discard required a project relation then
22 | optimized plans would be cluttered with project relations that only remove columns.
23 |
24 | As a result, Substrait's project relation is a little different. It is also defined by a list of expressions.
25 | However, the output columns are a combination of the input columns and one column for each of the expressions.
26 |
27 | ## Where are field names represented?
28 |
29 | Some relational algebra systems, such as Spark, give names to the output fields of a relation. For example, in
30 | PySpark I might run `df.withColumn("num_chars", length("text")).filter("num_chars > 10")`. This creates a
31 | project relation, which calculates a new field named `num_chars`. This field is then referenced in the filter
32 | relation. Spark's logical plan maps closely to this and includes both the expression (`length("text")`) and the
33 | name of the output field (`num_chars`) in its project relation.
34 |
35 | Substrait does not name intermediate fields in a plan. This is because these field names have no effect on
36 | the computation that must be performed. In addition, it opens the door to name-based references, which Substrait
37 | also does not support, because these can be a source of errors and confusion. One of the goals of Substrait is
38 | to make it very easy for consumers to understand plans. All references in Substrait are done with ordinals.
39 |
40 | In order to allow plans that do use named fields to round-trip through Substrait there is a hint that can be
41 | used to add field names to a plan. This hint is called `output_names` and is located in `RelCommon`. Consumers
42 | should not rely on this hint being present in a plan but, if present, it can be used to provide field names to
43 | intermediate relations in a plan for round-trip or debugging purposes.
44 |
45 | There are a few places where Substrait DOES define field names:
46 |
47 | - Read relations have field names in the base schema. This is because it is quite common for reads to do a
48 | name-based lookup to determine the columns that need to be read from source files.
49 | - The root relation has field names. This is because the root relation is the final output of the plan and
50 | it is useful to have names for the fields in the final output.
51 |
--------------------------------------------------------------------------------
/site/docs/img/logo.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/site/docs/index.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: Home
3 | ---
4 |
5 |
6 | ## What is Substrait?
7 |
8 | Substrait is a format for describing compute operations on structured data. It is designed for interoperability across different languages and systems.
9 |
10 |
11 |
12 | ## How does it work?
13 |
14 | Substrait provides a well-defined, cross-language [specification](spec/specification.md) for data compute operations. This includes a consistent declaration of common operations, custom operations and one or more serialized representations of this specification. The spec focuses on the semantics of each operation. In addition to the specification the Substrait ecosystem also includes a number of libraries and [useful tools](tools/producer_tools.md).
15 |
16 | We highly recommend the [tutorial](tutorial/sql_to_substrait.md) to learn how a Substrait plan is constructed.
17 |
18 |
19 |
20 | ## Benefits
21 |
22 | * Avoids every system needing to create a communication method between every other system -- each system merely supports ingesting and producing Substrait and it instantly becomes a part of the greater ecosystem.
23 | * Makes every part of the system upgradable. There's a new query engine that's ten times faster? Just plug it in!
24 | * Enables heterogeneous environments -- run on a cluster of an unknown set of execution engines!
25 | * The text version of the Substrait plan allows you to quickly see how a plan functions without needing a visualizer (although there are Substrait visualizers as well!).
26 |
27 |
28 |
29 | ## Example Use Cases
30 |
31 | * Communicate a compute plan between a SQL parser and an execution engine (e.g. Calcite SQL parsing to Arrow C++ compute kernel)
32 | * Serialize a plan that represents a SQL view for consistent use in multiple systems (e.g. Iceberg views in Spark and Trino)
33 | * Submit a plan to different execution engines (e.g. Datafusion and Postgres) and get a consistent interpretation of the semantics.
34 | * Create an alternative plan generation implementation that can connect an existing end-user compute expression system to an existing end-user processing engine (e.g. Pandas operations executed inside SingleStore)
35 | * Build a pluggable plan visualization tool (e.g. D3 based plan visualizer)
36 |
37 |
--------------------------------------------------------------------------------
/site/docs/relations/_config:
--------------------------------------------------------------------------------
1 | arrange:
2 | - basics.md
3 | - common_fields.md
4 | - logical_relations.md
5 | - physical_relations.md
6 | - user_defined_relations.md
7 | - embedded_relations.md
8 |
--------------------------------------------------------------------------------
/site/docs/relations/common_fields.md:
--------------------------------------------------------------------------------
1 | # Common Fields
2 |
3 | Every relation contains a common section containing optional hints and emit behavior.
4 |
5 |
6 | ## Emit
7 |
8 | A relation which has a direct emit kind outputs the relation's output without reordering or selection. A relation that specifies an emit output mapping can output its output columns in any order and may leave output columns out.
9 |
10 | ???+ info "Relation Output"
11 |
12 | * Many relations (such as Project) by default provide as their output the list of all their input columns plus any generated columns as its output columns. Review each relation to understand its specific output default.
13 |
14 |
15 | ## Hints
16 |
17 | Hints provide information that can improve performance but cannot be used to control the behavior. Table statistics, runtime constraints, name hints, and saved computations all fall into this category.
18 |
19 | ???+ info "Hint Design"
20 |
21 | * If a hint is not present or has incorrect data the consumer should be able to ignore it and still arrive at the correct result.
22 |
23 |
24 | ### Saved Computations
25 |
26 | Computations can be used to save a data structure to use elsewhere. For instance, let's say we have a plan with a HashEquiJoin and an AggregateDistinct operation. The HashEquiJoin could save its hash table as part of saved computation id number 1 and the AggregateDistinct could read in computation id number 1.
27 |
--------------------------------------------------------------------------------
/site/docs/relations/embedded_relations.md:
--------------------------------------------------------------------------------
1 | # Embedded Relations
2 |
3 | Pending.
4 |
5 | Embedded relations allow a Substrait producer to define a set operation that will be embedded in the plan.
6 |
7 | TODO: define lots of details about what interfaces, languages, formats, etc. Should reasonably be an extension of embedded user defined table functions.
8 |
--------------------------------------------------------------------------------
/site/docs/relations/user_defined_relations.md:
--------------------------------------------------------------------------------
1 | # User Defined Relations
2 |
3 | Pending
4 |
5 |
--------------------------------------------------------------------------------
/site/docs/serialization/_config:
--------------------------------------------------------------------------------
1 | arrange:
2 |
3 | - basics.md
4 | - binary_serialization.md
5 | - text_serialization.md
6 |
--------------------------------------------------------------------------------
/site/docs/serialization/basics.md:
--------------------------------------------------------------------------------
1 | # Basics
2 |
3 | Substrait is designed to be serialized into various different formats. Currently we support a binary serialization for
4 | transmission of plans between programs (e.g. IPC or network communication) and a text serialization for debugging and human readability. Other formats may be added in the future.
5 |
6 | These formats serialize a collection of plans. Substrait does not define how a collection of plans is to be interpreted.
7 | For example, the following scenarios are all valid uses of a collection of plans:
8 |
9 | - A query engine receives a plan and executes it. It receives a collection of plans with a single root plan. The
10 | top-level node of the root plan defines the output of the query. Non-root plans may be included as common subplans
11 | which are referenced from the root plan.
12 | - A transpiler may convert plans from one dialect to another. It could take, as input, a single root plan. Then
13 | it could output a serialized binary containing multiple root plans. Each root plan is a representation of the
14 | input plan in a different dialect.
15 | - A distributed scheduler might expect 1+ root plans. Each root plan describes a different stage of computation.
16 |
17 | Libraries should make sure to thoroughly describe the way plan collections will be produced or consumed.
18 |
19 | ## Root plans
20 |
21 | We often refer to query plans as a graph of nodes (typically a DAG unless the query is recursive). However, we
22 | encode this graph as a collection of trees with a single root tree that references other trees (which may also
23 | transitively reference other trees). Plan serializations all have some way to indicate which plan(s) are "root"
24 | plans. Any plan that is not a root plan and is not referenced (directly or transitively) by some root plan
25 | can safely be ignored.
26 |
--------------------------------------------------------------------------------
/site/docs/serialization/text_serialization.md:
--------------------------------------------------------------------------------
1 | # Text Serialization
2 |
3 | To maximize the new user experience, it is important for Substrait to have a text representation of plans. This allows people to experiment with basic tooling. Building simple CLI tools that do things like SQL > Plan and Plan > SQL or REPL plan construction can all be done relatively straightforwardly with a text representation.
4 |
5 | The recommended text serialization format is JSON. Since the text format is not designed for performance, the format can be produced to maximize readability. This also allows nice symmetry between the construction of plans and the configuration of various extensions such as function signatures and user defined types.
6 |
7 | To ensure the JSON is valid, the object will be defined using the [OpenApi 3.1 specification](https://spec.openapis.org/oas/latest.html). This not only allows strong validation, the OpenApi specification enables [code generators](https://github.com/OpenAPITools/openapi-generator) to be easily used to produce plans in many languages.
8 |
9 | While JSON will be used for much of the plan serialization, Substrait uses a custom simplistic grammar for record level expressions. While one can construct an equation such as `(10 + 5)/2` using a tree of function and literal objects, it is much more human-readable to consume a plan when the information is written similarly to the way one typically consumes scalar expressions. This grammar will be maintained in an ANTLR grammar (targetable to multiple programming languages) and is also planned to be supported via JSON schema definition format tag so that the grammar can be validated as part of the schema validation.
10 |
11 |
--------------------------------------------------------------------------------
/site/docs/spec/_config:
--------------------------------------------------------------------------------
1 | arrange:
2 | - versioning.md
3 | - specification.md
4 | - technology_principles.md
5 | - extending.md
6 |
--------------------------------------------------------------------------------
/site/docs/spec/extending.md:
--------------------------------------------------------------------------------
1 | # Extending
2 |
3 | Substrait is a community project and requires consensus about new additions to the specification in order to maintain consistency. The best way to get consensus is to discuss ideas. The main ways to communicate are:
4 |
5 | * Substrait Mailing List
6 | * Substrait Slack
7 | * Community Meeting
8 |
9 | ## Minor changes
10 |
11 | Simple changes like typos and bug fixes do not require as much effort. [File an issue](https://github.com/substrait-io/substrait/issues) or [send a PR](https://github.com/substrait-io/substrait/pulls) and we can discuss it there.
12 |
13 | ## Complex changes
14 |
15 | For complex features it is useful to discuss the change first. It will be useful to gather some background information to help get everyone on the same page.
16 |
17 | ### Outline the issue
18 |
19 | #### Language
20 |
21 | Every engine has its own terminology. Every Spark user probably knows what an "attribute" is. Velox users will know what a "RowVector" means. Etc. However, Substrait is used by people that come from a variety of backgrounds and you should generally assume that its users do not know anything about your own implementation. As a result, all PRs and discussion should endeavor to use Substrait terminology wherever possible.
22 |
23 | #### Motivation
24 |
25 | What problems does this relation solve? If it is a more logical relation then how does it allow users to express new capabilities? If it is more of an internal relation then how does it map to existing logical relations? How is it different than other existing relations? Why do we need this?
26 |
27 | #### Examples
28 |
29 | Provide example input and output for the relation. Show example plans. Try and motivate your examples, as best as possible, with something that looks like a real world problem. These will go a long ways towards helping others understand the purpose of a relation.
30 |
31 | #### Alternatives
32 |
33 | Discuss what alternatives are out there. Are there other ways to achieve similar results? Do some systems handle this problem differently?
34 |
35 | ### Survey existing implementation
36 |
37 | It's unlikely that this is the first time that this has been done. Figuring out
38 |
39 | ### Prototype the feature
40 |
41 | Novel approaches should be implemented as an extension first.
42 |
43 | ### Substrait design principles
44 |
45 | Substrait is designed around interoperability so a feature only used by a single system may not be accepted. But don't dispair! Substrait has a highly developed extension system for this express purpose.
46 |
47 | ### You don't have to do it alone
48 |
49 | If you are hoping to add a feature and these criteria seem intimidating then feel free to start a mailing list discussion before you have all the information and ask for help. Investigating other implementations, in particular, is something that can be quite difficult to do on your own.
50 |
--------------------------------------------------------------------------------
/site/docs/spec/technology_principles.md:
--------------------------------------------------------------------------------
1 | # Technology Principles
2 |
3 | * Provide a good suite of well-specified common functionality in databases and data science applications.
4 | * Make it easy for users to privately or publicly extend the representation to support specialized/custom operations.
5 | * Produce something that is language agnostic and requires minimal work to start developing against in a new language.
6 | * Drive towards a common format that avoids specialization for single favorite producer or consumer.
7 | * Establish clear delineation between specifications that MUST be respected to and those that can be optionally ignored.
8 | * Establish a forgiving compatibility approach and versioning scheme that supports cross-version compatibility in maximum number of cases.
9 | * Minimize the need for consumer intelligence by excluding concepts like overloading, type coercion, implicit casting, field name handling, etc. (Note: this is weak and should be better stated.)
10 | * Decomposability/severability: A particular producer or consumer should be able to produce or consume only a subset of the specification and interact well with any other Substrait system as long the specific operations requested fit within the subset of specification supported by the counter system.
11 |
12 |
13 |
--------------------------------------------------------------------------------
/site/docs/spec/versioning.md:
--------------------------------------------------------------------------------
1 | # Versioning
2 |
3 | As an interface specification, the goal of Substrait is to reach a point where (breaking) changes will never need to happen again, or at least be few and far between.
4 | By analogy, Apache Arrow's in-memory format specification has stayed functionally constant, despite many major library versions being released.
5 | However, we're not there yet.
6 | When we believe that we've reached this point, we will signal this by releasing version 1.0.0.
7 | Until then, we will remain in the 0.x.x version regime.
8 |
9 | Despite this, we strive to maintain backward compatibility for both the binary representation and the text representation by means of deprecation.
10 | When a breaking change cannot be reasonably avoided, we may remove previously deprecated fields.
11 | All deprecated fields will be removed for the 1.0.0 release.
12 |
13 | Substrait uses [semantic versioning](https://semver.org/) for its version numbers, with the addition that, during 0.x.y, we increment the x digit for breaking changes and new features, and the y digit for fixes and other nonfunctional changes.
14 | The release process is currently automated and makes a new release every week, provided something has changed on the main branch since the previous release.
15 | This release cadence will likely be slowed down as stability increases over time.
16 | [Conventional commits](https://www.conventionalcommits.org/en/v1.0.0-beta.2/) are used to distinguish between breaking changes, new features, and fixes,
17 | and GitHub actions are used to verify that there are indeed no breaking protobuf changes in a commit, unless the commit message states this.
18 |
--------------------------------------------------------------------------------
/site/docs/stylesheets/extra.css:
--------------------------------------------------------------------------------
1 | img.bordered {
2 | height: auto;
3 | width: auto;
4 | border: 1px solid #9f9f9f;
5 | transition: transform ease-in-out 0.3s;
6 | }
--------------------------------------------------------------------------------
/site/docs/tools/_config:
--------------------------------------------------------------------------------
1 | arrange:
2 | - producer_tools.md
3 | - substrait_validator.md
4 | - third_party_tools.md
5 |
--------------------------------------------------------------------------------
/site/docs/tools/producer_tools.md:
--------------------------------------------------------------------------------
1 | # Producer Tools
2 |
3 | ## Isthmus
4 |
5 | [Isthmus](https://github.com/substrait-io/substrait-java/tree/main/isthmus) is an application
6 | that serializes SQL to [Substrait Protobuf](https://substrait.io/serialization/binary_serialization/)
7 | via the Calcite SQL compiler.
8 |
--------------------------------------------------------------------------------
/site/docs/tools/substrait_validator.md:
--------------------------------------------------------------------------------
1 | # Substrait Validator
2 |
3 | The [Substrait Validator](https://github.com/substrait-io/substrait-validator) is a tool
4 | used to validate substrait plans as well as print diagnostics information regarding the plan validity.
5 |
--------------------------------------------------------------------------------
/site/docs/tools/third_party_tools.md:
--------------------------------------------------------------------------------
1 | # Third Party Tools
2 |
3 | ## Substrait-tools
4 | The [substrait-tools](https://pypi.org/project/substrait-tools/) python package provides
5 | a command line interface for producing/consuming substrait plans by leveraging the APIs
6 | from different producers and consumers.
7 |
8 | ## Substrait Fiddle
9 | [Substrait Fiddle](https://substrait-fiddle.com) is an online tool to share, debug, and prototype Substrait plans.
10 |
11 | The [Substrait Fiddle Source](https://github.com/voltrondata/substrait-fiddle) is available allowing it to be run in any environment.
12 |
13 |
--------------------------------------------------------------------------------
/site/docs/tutorial/examples.md:
--------------------------------------------------------------------------------
1 | # Code samples and examples
2 |
3 | It's very useful to have examples of how APIs are used; both to get information on the best practices for using APIs and ideas of how they can be used.
4 |
5 | Each language binding is intended to contain examples that are relevant to that language. New contributions are always welcome.
6 |
7 | ## Java
8 |
9 | - [Substrait-Spark](https://github.com/substrait-io/substrait-java/tree/main/examples/substrait-spark) this demonstrates how Substrait plans can be created and consumed within Apache Spark. The examples run within a simple Spark cluster, composed by a couple of Docker containers.
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/site/docs/tutorial/expression_trees.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/site/docs/types/_config:
--------------------------------------------------------------------------------
1 | arrange:
2 | - type_system.md
3 | - type_classes.md
4 | - type_variations.md
5 | - type_parsing.md
6 | - named_structs.md
7 |
--------------------------------------------------------------------------------
/site/docs/types/named_structs.md:
--------------------------------------------------------------------------------
1 | # Named Structs
2 |
3 | A Named Struct is a special type construct that combines:
4 | * A Struct type
5 | * A list of names for the fields in the Struct, in depth-first search order
6 |
7 | The depth-first search order for names arises from the the ability to nest Structs within other types. All Struct fields must be named, even nested fields.
8 |
9 | Named Structs are most commonly used to model the schema of Read relations.
10 |
11 | ## Determining Names
12 | When producing/consuming names for a NamedStruct, some types require special handling:
13 |
14 | ### Struct
15 | A struct has names for each of its inner fields.
16 |
17 | For example, the following Struct
18 | ```
19 | struct
20 | ↑ ↑
21 | a b
22 | ```
23 | has 2 names, one for each of its inner fields.
24 |
25 | ### Structs within Compound Types
26 | Struct types nested in compound types must also be be named.
27 |
28 | #### Structs within Maps
29 | If a Map contains Structs, either as keys or values or both, the Struct fields must be named. Keys are named before values. For example the following Map
30 | ```
31 | map, struct>
32 | ↑ ↑ ↑ ↑ ↑
33 | a b c d e
34 |
35 | ```
36 | has 5 named fields
37 | * 2 names [a, b] for the struct fields used as a key
38 | * 3 names [c, d, e] for the struct fields used as a value
39 |
40 | #### Structs within List
41 | If a List contains Structs, the Struct fields must be named. For example the following List
42 | ```
43 | list>
44 | ↑ ↑
45 | a b
46 | ```
47 | has 2 named fields [a, b] for the struct fields.
48 |
49 | #### Structs within Struct
50 | Structs can also be embedded within Structs.
51 |
52 | A Struct like
53 | ```
54 | struct, struct>
55 | ↑ ↑ ↑ ↑ ↑ ↑ ↑
56 | a b c d e f g
57 | ```
58 | has 7 names
59 | * 1 name [a] for the 1st nested struct field
60 | * 2 names [b, c] for the fields within the 1st nested struct
61 | * 1 name [d] the for the 2nd nested struct field
62 | * 3 names [e, f, g] for the fields within the 2nd nested struct
63 |
64 | ### Putting It All Together
65 |
66 | #### Simple Named Struct
67 | ```
68 | NamedStruct {
69 | names: [a, b, c, d]
70 | struct: struct, map, i64>
71 | ↑ ↑ ↑ ↑
72 | a b c d
73 | }
74 | ```
75 |
76 | #### Structs in Compound Types
77 | ```
78 | NamedStruct {
79 | names: [a, b, c, d, e, f, g, h]
80 | struct: struct>, map>, i64>
81 | ↑ ↑ ↑ ↑ ↑ ↑ ↑ ↑
82 | a b c d e f g h
83 | }
84 | ```
85 |
86 | #### Structs in Structs
87 | ```
88 | NamedStruct {
89 | names: [a, b, c, d, e, f, g, h, i]
90 | struct: struct, i64, struct>>>
91 | ↑ ↑ ↑ ↑ ↑ ↑ ↑ ↑ ↑ ↑
92 | a b c d e f g h i j
93 | }
94 | ```
95 |
96 |
--------------------------------------------------------------------------------
/site/docs/types/type_system.md:
--------------------------------------------------------------------------------
1 | # Type System
2 |
3 | Substrait tries to cover the most common types used in data manipulation. Types beyond this common core may be represented using [simple extensions](../extensions/index.md#simple-extensions).
4 |
5 | Substrait types fundamentally consist of four components:
6 |
7 | | Component | Condition | Examples | Description
8 | | ------------------------------- | ------------------- | ----------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------
9 | | [Class](type_classes.md) | Always | `i8`, `string`, `STRUCT`, extensions | Together with the parameter pack, describes the set of non-null values supported by the type. Subdivided into simple and compound type classes.
10 | | Nullability | Always | Either `NULLABLE` (`?` suffix) or `REQUIRED` (no suffix) | Describes whether values of this type can be null. Note that null is considered to be a special value of a nullable type, rather than the only value of a special null type.
11 | | [Variation](type_variations.md) | Always | No suffix or explicitly `[0]` (system-preferred), or an extension | Allows different variations of the same type class to exist in a system at a time, usually distinguished by in-memory format.
12 | | Parameters | Compound types only | `<10, 2>` (for `DECIMAL`), `` (for `STRUCT`) | Some combination of zero or more data types or integers. The expected set of parameters and the significance of each parameter depends on the type class.
13 |
14 | Refer to [Type Parsing](type_parsing.md) for a description of the syntax used to describe types.
15 |
16 | !!! note "Note"
17 | Substrait employs a strict type system without any coercion rules. All changes in types must be made explicit via [cast expressions](../expressions/specialized_record_expressions.md).
18 |
--------------------------------------------------------------------------------
/site/docs/types/type_variations.md:
--------------------------------------------------------------------------------
1 | # Type Variations
2 |
3 | Type variations may be used to represent differences in representation between different consumers. For example, an engine might support dictionary encoding for a string, or could be using either a row-wise or columnar representation of a struct. All variations of a type are expected to have the same semantics when operated on by functions or other expressions.
4 |
5 | All variations except the "system-preferred" variation (a.k.a. `[0]`, see [Type Parsing](type_parsing.md)) must be defined using [simple extensions](../extensions/index.md#simple-extensions). The key properties of these variations are:
6 |
7 | | Property | Description |
8 | | ----------------- | ------------------------------------------------------------ |
9 | | Base Type Class | The type class that this variation belongs to. |
10 | | Name | The name used to reference this type. Should be unique within type variations for this parent type within a simple extension. |
11 | | Description | A human description of the purpose of this type variation. |
12 | | Function Behavior | **INHERITS** or **SEPARATE**: whether functions that support the system-preferred variation implicitly also support this variation, or whether functions should be resolved independently. For example, if one has the function `add(i8,i8)` defined and then defines an `i8` variation, this determines whether the `i8` variation can be bound to the base `add` operation (inherits) or whether a specialized version of `add` needs to be defined specifically for this variation (separate). Defaults to inherits. |
13 |
--------------------------------------------------------------------------------
/site/mkdocs.yml:
--------------------------------------------------------------------------------
1 | site_name: "Substrait: Cross-Language Serialization for Relational Algebra"
2 | site_description: >-
3 | Substrait is a new specification and set of tools that allow different systems to express clear data manipulation
4 | operations.
5 | site_url: "https://substrait.io"
6 | edit_uri: ""
7 | strict: true
8 | remote_name: origin
9 | remote_branch: gh-pages
10 | use_directory_urls: true
11 | #include_search_page: false
12 | #search_index_only: true
13 | theme:
14 | name: material
15 | custom_dir: overrides
16 | logo: img/logo.svg
17 | palette:
18 | primary: pink
19 | features:
20 | - navigation.tabs
21 | - navigation.tabs.sticky
22 | - navigation.expand
23 | - navigation.instant
24 | - toc.integrate
25 | extra_css:
26 | - stylesheets/extra.css
27 |
28 | extra:
29 | versions:
30 | slackinvitelink: https://join.slack.com/t/substrait/shared_invite/zt-10oeki45w-FARWnh4NMpXnm4x~hWyiGQ
31 | analytics:
32 | provider: google
33 | property: G-57ZX8S93Q5
34 | social:
35 | - icon: fontawesome/brands/twitter
36 | link: https://twitter.com/substrait_io
37 | repo_url: https://github.com/substrait-io/substrait
38 | plugins:
39 | - table-reader
40 | - markdownextradata
41 | - search
42 | - awesome-pages:
43 | filename: _config
44 | - minify:
45 | minify_html: true
46 | - mkdocs_protobuf:
47 | proto_dir: ../proto/substrait
48 | indent_depth: 4 # required to make superfences happy
49 | - redirects:
50 | redirect_maps:
51 | 'types/simple_logical_types.md': 'types/type_classes.md'
52 | 'types/compound_logical_types.md': 'types/type_classes.md'
53 | 'types/user_defined_types.md': 'types/type_classes.md'
54 | - gen-files:
55 | scripts:
56 | - docs/extensions/generate_function_docs.py
57 | watch:
58 | - ../extensions
59 | markdown_extensions:
60 | - smarty
61 | - sane_lists
62 | - extra
63 | - tables
64 | - markdown.extensions.admonition
65 | - markdown.extensions.attr_list
66 | - markdown.extensions.def_list
67 | - markdown.extensions.footnotes
68 | - meta
69 | - markdown.extensions.toc:
70 | permalink: true
71 | - pymdownx.arithmatex:
72 | generic: true
73 | - pymdownx.betterem:
74 | smart_enable: all
75 | - pymdownx.caret
76 | - pymdownx.critic
77 | - pymdownx.details
78 | - pymdownx.emoji:
79 | emoji_index: !!python/name:material.extensions.emoji.twemoji
80 | emoji_generator: !!python/name:material.extensions.emoji.to_svg
81 | - pymdownx.highlight
82 | - pymdownx.inlinehilite
83 | - pymdownx.keys
84 | - pymdownx.magiclink:
85 | repo_url_shorthand: true
86 | user: substrait-io
87 | repo: substrait
88 | - pymdownx.mark
89 | - pymdownx.smartsymbols
90 | - pymdownx.snippets:
91 | check_paths: true
92 | - pymdownx.superfences:
93 | custom_fences:
94 | - name: mermaid
95 | class: mermaid
96 | format: !!python/name:pymdownx.superfences.fence_code_format
97 | - pymdownx.tabbed:
98 | alternate_style: true
99 | - pymdownx.tasklist:
100 | custom_checkbox: true
101 | - pymdownx.tilde
102 |
--------------------------------------------------------------------------------
/site/requirements.txt:
--------------------------------------------------------------------------------
1 | mkdocs>=1.4.2,<2
2 | mkdocs-material>=9.1.5
3 | mkdocs-minify-plugin>=0.6.1,<1
4 | mkdocs-redirects>=1.2.0,<2
5 | pymdown-extensions>=9.9.1,<11
6 | mkdocs-awesome-pages-plugin>=2.8.0,<3
7 | mkdocs-gen-files>=0.4.0,<1
8 | mkdocs-markdownextradata-plugin>=0.2.5,<1
9 | mkdocs-protobuf>=0.1.0,<1
10 | mkdocs-table-reader-plugin>=2,<4
11 | pygments>=2.14,<3
12 | oyaml>=1.0,<2
13 | mdutils>=1.4.0,<2
14 |
--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/substrait-io/substrait/d430e521f203aec6a4e06731d4bfd68cdf61f443/tests/__init__.py
--------------------------------------------------------------------------------
/tests/baseline.json:
--------------------------------------------------------------------------------
1 | {
2 | "registry": {
3 | "dependency_count": 13,
4 | "extension_count": 13,
5 | "function_count": 165,
6 | "num_aggregate_functions": 29,
7 | "num_scalar_functions": 158,
8 | "num_window_functions": 11,
9 | "num_function_overloads": 517
10 | },
11 | "coverage": {
12 | "total_test_count": 1086,
13 | "num_function_variants": 517,
14 | "num_covered_function_variants": 229
15 | }
16 | }
17 |
--------------------------------------------------------------------------------
/tests/cases/aggregate_approx/approx_count_distinct.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_AGGREGATE_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_aggregate_approx.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | approx_count_distinct((1, -2, 3, -4, 5, 6)::i8) = 6::i64
6 | approx_count_distinct((-32767, -20000, 30000, 5, 32767)::i16) = 5::i64
7 | approx_count_distinct((-2147483648, -10000000, 30000000, 2147483647)::i32) = 4::i64
8 | approx_count_distinct((-214748364800000, -1000000000, 0, 922337203685477580)::i64) = 4::i64
9 | approx_count_distinct((1)::i8) = 1::i64
10 | approx_count_distinct(()::i8) = 0::i64
11 | approx_count_distinct((Null, Null, Null)::i8) = 0::i64
12 | approx_count_distinct((Null, Null, 4, 3, Null, 922337203685477580, 12833888)::i64) = 4::i64
13 |
--------------------------------------------------------------------------------
/tests/cases/aggregate_generic/count.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_AGGREGATE_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_aggregate_generic.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | count((100, -200, 300, -400, 5, 6)::i16) = 6::i64
6 | count((1000)::i16) = 1::i64
7 | count(()::i16) = 0::i64
8 | count((Null, Null, Null)::i16) = 0::i64
9 | count((Null, Null, Null, 1000)::i16) = 1::i64
10 |
--------------------------------------------------------------------------------
/tests/cases/arithmetic/abs.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | abs(25::i8) = 25::i8
6 | abs(-200::i16) = 200::i16
7 | abs(30000::i32) = 30000::i32
8 | abs(-9223372036854775800::i64) = 9223372036854775800::i64
9 | abs(2.55::fp32) = 2.55::fp32
10 | abs(-2.0000007152557373046875::fp64) = 2.0000007152557373046875::fp64
11 |
12 | # null_input: Examples with null as input
13 | abs(null::i8) = null::i8
14 |
15 | # overflow: Examples demonstrating overflow behavior
16 | abs(-128::i8) [overflow:ERROR] =
17 | abs(-128::i8) [overflow:SATURATE] = 127::i8
18 | abs(-128::i8) [overflow:SILENT] =
19 |
--------------------------------------------------------------------------------
/tests/cases/arithmetic/acos.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | acos(0.00::fp32) = 1.5707963267948966::fp32
6 | acos(1.0::fp64) = 0.0::fp64
7 | acos(-0.0000009::fp64) = 1.5707972267948966::fp64
8 | acos(null::fp64) = null::fp64
9 |
--------------------------------------------------------------------------------
/tests/cases/arithmetic/acosh.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | acosh(1.0::fp64) = 0.0::fp64
6 | acosh(10.0005::fp64) = 2.9932730967481995::fp64
7 | acosh(null::fp64) = null::fp64
8 |
9 | # On_domain_error: Examples demonstrating On_domain_error behavior
10 | acosh(0.01::fp32) [on_domain_error:ERROR] =
11 | acosh(0.5::fp64) [on_domain_error:NAN] = nan::fp64
12 | acosh(0.5::fp32) [on_domain_error:NONE] = null::fp32
13 |
--------------------------------------------------------------------------------
/tests/cases/arithmetic/add.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | add(120::i8, 5::i8) = 125::i8
6 | add(100::i16, 100::i16) = 200::i16
7 | add(30000::i32, 30000::i32) = 60000::i32
8 | add(2000000000::i64, 2000000000::i64) = 4000000000::i64
9 |
10 | # overflow: Examples demonstrating overflow behavior
11 | add(120::i8, 10::i8) [overflow:ERROR] =
12 | add(30000::i16, 30000::i16) [overflow:ERROR] =
13 | add(2000000000::i32, 2000000000::i32) [overflow:ERROR] =
14 | add(9223372036854775807::i64, 1::i64) [overflow:ERROR] =
15 | add(120::i8, 10::i8) [overflow:SATURATE] = 127::i8
16 | add(-120::i8, -10::i8) [overflow:SATURATE] = -128::i8
17 | add(120::i8, 10::i8) [overflow:SILENT] =
18 |
19 | # floating_exception: Examples demonstrating exceptional floating point cases
20 | add(1.5e+308::fp64, 1.5e+308::fp64) = inf::fp64
21 | add(-1.5e+308::fp64, -1.5e+308::fp64) = -inf::fp64
22 |
23 | # rounding: Examples demonstrating floating point rounding behavior
24 | add(4.5::fp32, 2.5000007152557373046875::fp32) [rounding:TIE_TO_EVEN] = 7.00000095367431640625::fp32
25 |
26 | # types: Examples demonstrating behavior of different data types
27 | add(4.5::fp64, 2.5000007152557373046875::fp64) = 7.0000007152557373046875::fp64
28 |
--------------------------------------------------------------------------------
/tests/cases/arithmetic/asin.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | asin(0.0::fp32) = 0.0::fp32
6 | asin(1.0::fp64) = 1.5707963267948966::fp64
7 | asin(0.009::fp64) = 0.009000121504428887::fp64
8 | asin(-0.009::fp64) = -0.009000121504428887::fp64
9 | asin(null::fp64) = null::fp64
10 |
--------------------------------------------------------------------------------
/tests/cases/arithmetic/asinh.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | asinh(0.01::fp32) = 0.009999833340832886::fp32
6 | asinh(1.0::fp64) = 0.881373587019543::fp64
7 | asinh(0.0009::fp64) = 0.0008999998785000443::fp64
8 | asinh(null::fp64) = null::fp64
9 |
--------------------------------------------------------------------------------
/tests/cases/arithmetic/atan.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | atan(0.0::fp32) = 0.0::fp32
6 | atan(1.0::fp64) = 0.7853981633974483::fp64
7 | atan(7.01::fp64) = 1.4290989925795292::fp64
8 | atan(-7.01::fp64) = -1.4290989925795292::fp64
9 | atan(null::fp64) = null::fp64
10 |
--------------------------------------------------------------------------------
/tests/cases/arithmetic/atan2.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | atan2(0.0::fp32, 0.0::fp32) = 0.0::fp32
6 | atan2(1.0::fp64, 1.0::fp64) = 0.7853981633974483::fp64
7 | atan2(0.009::fp64, 0.0008::fp64) = 1.482140444927459::fp64
8 | atan2(null::fp64, 0.0008::fp64) = null::fp64
9 |
--------------------------------------------------------------------------------
/tests/cases/arithmetic/atanh.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | atanh(0.0::fp32) = 0.0::fp32
6 | atanh(1.0::fp64) = inf::fp64
7 | atanh(0.009::fp64) = 0.009000243011810481::fp64
8 | atanh(-0.009::fp64) = -0.009000243011810481::fp64
9 | atanh(null::fp64) = null::fp64
10 |
--------------------------------------------------------------------------------
/tests/cases/arithmetic/bitwise_and.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | bitwise_and(0::i8, 1::i8) = 0::i8
6 | bitwise_and(127::i8, 127::i8) = 127::i8
7 | bitwise_and(-127::i8, -10::i8) = -128::i8
8 | bitwise_and(31766::i16, 900::i16) = 4::i16
9 | bitwise_and(-31766::i16, 900::i16) = 896::i16
10 | bitwise_and(2147483647::i32, 1234567::i32) = 1234567::i32
11 | bitwise_and(2147483647::i32, 1234567::i32) = 1234567::i32
12 | bitwise_and(9223372036854775807::i64, 127::i64) = 127::i64
13 | bitwise_and(-9223372036854775807::i64, 127::i64) = 1::i64
14 | bitwise_and(null::i64, 127::i64) = null::i64
15 | bitwise_and(127::i64, null::i64) = null::i64
16 |
--------------------------------------------------------------------------------
/tests/cases/arithmetic/bitwise_not.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | bitwise_not(0::i8) = -1::i8
6 | bitwise_not(1::i8) = -2::i8
7 | bitwise_not(-127::i8) = 126::i8
8 | bitwise_not(31766::i16) = -31767::i16
9 | bitwise_not(-31766::i16) = 31765::i16
10 | bitwise_not(2147483647::i32) = -2147483648::i32
11 | bitwise_not(2147483647::i32) = -2147483648::i32
12 | bitwise_not(9223372036854775807::i64) = -9223372036854775808::i64
13 | bitwise_not(-9223372036854775807::i64) = 9223372036854775806::i64
14 | bitwise_not(null::i64) = null::i64
15 |
--------------------------------------------------------------------------------
/tests/cases/arithmetic/bitwise_or.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | bitwise_or(0::i8, 1::i8) = 1::i8
6 | bitwise_or(127::i8, 127::i8) = 127::i8
7 | bitwise_or(-127::i8, -10::i8) = -9::i8
8 | bitwise_or(31766::i16, 900::i16) = 32662::i16
9 | bitwise_or(-31766::i16, 900::i16) = -31762::i16
10 | bitwise_or(2147483647::i32, 123456789::i32) = 2147483647::i32
11 | bitwise_or(9223372036854775807::i64, 127::i64) = 9223372036854775807::i64
12 | bitwise_or(-9223372036854775807::i64, 127::i64) = -9223372036854775681::i64
13 | bitwise_or(null::i64, 127::i64) = null::i64
14 | bitwise_or(127::i64, null::i64) = null::i64
15 |
--------------------------------------------------------------------------------
/tests/cases/arithmetic/bitwise_xor.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | bitwise_xor(0::i8, 1::i8) = 1::i8
6 | bitwise_xor(127::i8, 127::i8) = 0::i8
7 | bitwise_xor(-127::i8, -10::i8) = 119::i8
8 | bitwise_xor(31766::i16, 900::i16) = 32658::i16
9 | bitwise_xor(-31766::i16, 900::i16) = -32658::i16
10 | bitwise_xor(2147483647::i32, 123456789::i32) = 2024026858::i32
11 | bitwise_xor(2147483647::i32, 123456789::i32) = 2024026858::i32
12 | bitwise_xor(9223372036854775807::i64, 127::i64) = 9223372036854775680::i64
13 | bitwise_xor(-9223372036854775807::i64, 127::i64) = -9223372036854775682::i64
14 | bitwise_xor(null::i64, 127::i64) = null::i64
15 | bitwise_xor(127::i64, null::i64) = null::i64
16 |
--------------------------------------------------------------------------------
/tests/cases/arithmetic/cos.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | cos(0.00::fp32) = 1.0::fp32
6 | cos(1.0::fp64) = 0.5403023058681398::fp64
7 | cos(7.0000009::fp64) = 0.7539016630550606::fp64
8 | cos(-7.00000095::fp64) = 0.7539016302056953::fp64
9 | cos(null::fp64) = null::fp64
10 |
--------------------------------------------------------------------------------
/tests/cases/arithmetic/cosh.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | cosh(0.00::fp32) = 1.0::fp32
6 | cosh(1.0::fp64) = 1.5430806348152437::fp64
7 | cosh(7.0000009::fp64) = 548.3175286399451::fp64
8 | cosh(-7.00000095::fp64) = 548.3175560557769::fp64
9 | cosh(null::fp64) = null::fp64
10 |
--------------------------------------------------------------------------------
/tests/cases/arithmetic/divide.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | divide(25::i8, 5::i8) = 5::i8
6 | divide(200::i16, -100::i16) = -2::i16
7 | divide(60000::i32, 200::i32) = 300::i32
8 | divide(4000000000::i64, -5000::i64) = -800000::i64
9 |
10 | # division_by_zero: Examples demonstrating division by zero
11 | divide(5::i8, 0::i8) [on_division_by_zero:NAN] = null::i8
12 | divide(5::i8, 0::i8) [on_division_by_zero:ERROR] =
13 |
14 | # overflow: Examples demonstrating overflow behavior
15 | divide(-9223372036854775808::i64, -1::i64) [overflow:ERROR] =
16 | divide(-128::i8, -1::i8) [overflow:SATURATE] = 127::i8
17 |
18 | # floating_exception: Examples demonstrating exceptional floating point cases
19 | divide(1.5e+208::fp64, 1.5e-200::fp64) = inf::fp64
20 | divide(1.5e+200::fp64, -1.5e-208::fp64) = -inf::fp64
21 |
--------------------------------------------------------------------------------
/tests/cases/arithmetic/exp.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | exp(100::i64) = 2.6881171418161356e+43::fp64
6 | exp(0.25::fp32) = 1.2840254166877414::fp32
7 | exp(0.693::fp64) = 1.9997056605411638::fp64
8 | exp(2.0000007152557373046875::fp64) = 7.3890613839973085::fp64
9 | exp(0.0::fp64) = 1.0::fp64
10 | exp(null::fp64) = null::fp64
11 | exp(1000::i64) = inf::fp64
12 |
--------------------------------------------------------------------------------
/tests/cases/arithmetic/factorial.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | factorial(0::i32) = 1::i32
6 | factorial(1::i32) = 1::i32
7 | factorial(20::i64) = 2432902008176640000::i64
8 | factorial(null::i32) = null::i32
9 |
10 | # overflow: Examples demonstrating overflow behavior
11 | factorial(1000000::i32) [overflow:ERROR] =
12 |
--------------------------------------------------------------------------------
/tests/cases/arithmetic/max.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_AGGREGATE_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | max((20, -3, 1, -10, 0, 5)::i8) = 20::i8
6 | max((-32768, 32767, 20000, -30000)::i16) = 32767::i16
7 | max((-214748648, 214748647, 21470048, 4000000)::i32) = 214748647::i32
8 | max((2000000000, -3217908979, 629000000, -100000000, 0, 987654321)::i64) = 2000000000::i64
9 | max((2.5, 0, 5.0, -2.5, -7.5)::fp32) = 5.0::fp32
10 | max((1.5e+308, 1.5e+10, -1.5e+8, -1.5e+7, -1.5e+70)::fp64) = 1.5e+308::fp64
11 |
12 | # null_handling: Examples with null as input or output
13 | max((Null, Null, Null)::i16) = Null::i16
14 | max(()::i16) = Null::i16
15 | max((2000000000, Null, 629000000, -100000000, Null, 987654321)::i64) = 2000000000::i64
16 | max((Null, inf)::fp64) = inf::fp64
17 | max((Null, -inf, -1.5e+8, -1.5e+7, -1.5e+70)::fp64) = -1.5e+7::fp64
18 | max((1.5e+308, 1.5e+10, Null, -1.5e+7, Null)::fp64) = 1.5e+308::fp64
19 |
--------------------------------------------------------------------------------
/tests/cases/arithmetic/min.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_AGGREGATE_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | min((20, -3, 1, -10, 0, 5)::i8) = -10::i8
6 | min((-32768, 32767, 20000, -30000)::i16) = -32768::i16
7 | min((-214748648, 214748647, 21470048, 4000000)::i32) = -214748648::i32
8 | min((2000000000, -3217908979, 629000000, -100000000, 0, 987654321)::i64) = -3217908979::i64
9 | min((2.5, 0, 5.0, -2.5, -7.5)::fp32) = -7.5::fp32
10 | min((1.5e+308, 1.5e+10, -1.5e+8, -1.5e+7, -1.5e+70)::fp64) = -1.5e+70::fp64
11 |
12 | # null_handling: Examples with null as input or output
13 | min((Null, inf)::fp64) = inf::fp64
14 | min((Null, Null, Null)::i16) = Null::i16
15 | min(()::i16) = Null::i16
16 | min((2000000000, Null, 629000000, -100000000, Null, 987654321)::i64) = -100000000::i64
17 | min((Null, -inf, -1.5e+8, -1.5e+7, -1.5e+70)::fp64) = -inf::fp64
18 | min((1.5e+308, 1.5e+10, Null, -1.5e+7, Null)::fp64) = -1.5e+7::fp64
19 |
--------------------------------------------------------------------------------
/tests/cases/arithmetic/modulus.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | modulus(9::i8, 3::i8) = 0::i8
6 | modulus(10::i8, -3::i8) = 1::i8
7 | modulus(32767::i16, 1000::i16) = 767::i16
8 | modulus(-2147483647::i32, 300000000::i32) = -47483647::i32
9 | modulus(-9223372036854775800::i64, -80000000000000::i64) = -12036854775800::i64
10 | modulus(5::i8, null::i8) = null::i8
11 | modulus(null::i64, 1::i64) = null::i64
12 | modulus(null::i64, null::i64) = null::i64
13 |
14 | # on_domain_error: Examples demonstrating operation when the divisor is 0
15 | modulus(5::i8, 0::i8) [on_domain_error:NULL] = null::i8
16 | modulus(5::i8, 0::i8) [on_domain_error:ERROR] =
17 |
18 | # division_type: Examples demonstrating truncate and floor division types
19 | modulus(8::i8, -3::i8) [division_type:TRUNCATE] = 2::i8
20 | modulus(8::i8, -3::i8) [division_type:FLOOR] = -1::i8
21 |
--------------------------------------------------------------------------------
/tests/cases/arithmetic/multiply.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | multiply(25::i8, 5::i8) = 125::i8
6 | multiply(2::i16, -100::i16) = -200::i16
7 | multiply(300::i32, 200::i32) = 60000::i32
8 | multiply(80000::i64, -5000::i64) = -400000000::i64
9 |
10 | # overflow: Examples demonstrating overflow behavior
11 | multiply(13::i8, 10::i8) [overflow:ERROR] =
12 | multiply(11::i16, 3000::i16) [overflow:ERROR] =
13 | multiply(3::i32, 1000000000::i32) [overflow:ERROR] =
14 | multiply(1000000000000000000::i64, 10::i64) [overflow:ERROR] =
15 | multiply(13::i8, 10::i8) [overflow:SATURATE] = 127::i8
16 | multiply(-13::i8, -10::i8) [overflow:SATURATE] = -128::i8
17 | multiply(13::i8, 10::i8) [overflow:SILENT] =
18 |
19 | # floating_exception: Examples demonstrating exceptional floating point cases
20 | multiply(1.5e+100::fp64, 1.5e+208::fp64) = inf::fp64
21 | multiply(1.5e+100::fp64, -1.5e+208::fp64) = -inf::fp64
22 |
23 | # types: Examples demonstrating behavior of different data types
24 | multiply(4.5::fp64, 2.5000007152557373046875::fp64) = 11.250003218650818::fp64
25 |
--------------------------------------------------------------------------------
/tests/cases/arithmetic/negate.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | negate(25::i8) = -25::i8
6 | negate(-200::i16) = 200::i16
7 | negate(30000::i32) = -30000::i32
8 | negate(9223372036854775800::i64) = -9223372036854775800::i64
9 | negate(2.50::fp32) = -2.50::fp32
10 | negate(2.000002861022949::fp64) = -2.000002861022949::fp64
11 | negate(inf::fp64) = -inf::fp64
12 |
13 | # null_input: Examples with null as input
14 | negate(null::i8) = null::i8
15 |
16 | # overflow: Examples demonstrating overflow behavior
17 | negate(-128::i8) [overflow:ERROR] =
18 | negate(-128::i8) [overflow:SATURATE] = 127::i8
19 | negate(-128::i8) [overflow:SILENT] =
20 |
--------------------------------------------------------------------------------
/tests/cases/arithmetic/power.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | power(8::i64, 2::i64) = 64::i64
6 | power(1.0::fp32, -1.0::fp32) = 1.0::fp32
7 | power(2.0::fp64, -2.0::fp64) = 0.25::fp64
8 | power(13::i64, 10::i64) = 137858491849::i64
9 |
10 | # floating_exception: Examples demonstrating exceptional floating point cases
11 | power(1.5e+100::fp64, 1.5e+208::fp64) = inf::fp64
12 |
--------------------------------------------------------------------------------
/tests/cases/arithmetic/sin.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | sin(0.0::fp32) = 0.0::fp32
6 | sin(1.0::fp64) = 0.8414709848078965::fp64
7 | sin(7.0000009::fp64) = 0.6569872772305518::fp64
8 | sin(-7.0000009::fp64) = -0.6569872772305518::fp64
9 | sin(null::fp64) = null::fp64
10 |
--------------------------------------------------------------------------------
/tests/cases/arithmetic/sinh.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | sinh(0.0::fp32) = 0.0::fp32
6 | sinh(1.0::fp64) = 1.1752011936438014::fp64
7 | sinh(7.0000009::fp64) = 548.3166167588001::fp64
8 | sinh(-7.0000009::fp64) = -548.3166167588001::fp64
9 | sinh(null::fp64) = null::fp64
10 |
--------------------------------------------------------------------------------
/tests/cases/arithmetic/sqrt.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | sqrt(25::i64) = 5::fp64
6 | sqrt(0::i64) = 0::fp64
7 | sqrt(-1::i64) [on_domain_error:NAN] = null::fp64
8 | sqrt(-9223372036854775800::i64) [on_domain_error:NAN] = null::fp64
9 | sqrt(9223372036854775800::i64) = 3037000499.97605::fp64
10 | sqrt(null::i64) = null::fp64
11 | sqrt(6.25::fp32) = 2.5::fp32
12 | sqrt(2.0000007152557373046875::fp64) = 1.4142138152541635::fp64
13 |
--------------------------------------------------------------------------------
/tests/cases/arithmetic/subtract.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | subtract(120::i8, 5::i8) = 115::i8
6 | subtract(-100::i16, 100::i16) = -200::i16
7 | subtract(-30000::i32, 30000::i32) = -60000::i32
8 | subtract(-2000000000::i64, 2000000000::i64) = -4000000000::i64
9 |
10 | # overflow: Examples demonstrating overflow behavior
11 | subtract(-120::i8, 10::i8) [overflow:ERROR] =
12 | subtract(-30000::i16, 30000::i16) [overflow:ERROR] =
13 | subtract(-2000000000::i32, 2000000000::i32) [overflow:ERROR] =
14 | subtract(-9223372036854775808::i64, 1::i64) [overflow:ERROR] =
15 | subtract(-120::i8, 10::i8) [overflow:SATURATE] = -128::i8
16 | subtract(120::i8, -10::i8) [overflow:SATURATE] = 127::i8
17 | subtract(-120::i8, 10::i8) [overflow:SILENT] =
18 |
19 | # floating_exception: Examples demonstrating exceptional floating point cases
20 | subtract(-1.5e+308::fp64, 1.5e+308::fp64) = -inf::fp64
21 | subtract(1.5e+308::fp64, -1.5e+308::fp64) = inf::fp64
22 |
--------------------------------------------------------------------------------
/tests/cases/arithmetic/sum.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_AGGREGATE_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | sum((0, -1, 2, 20)::i8) = 21::i64
6 | sum((2000000, -3217908, 629000, -100000, 0, 987654)::i32) = 298746::i64
7 | sum((2.5, 0, 5.0, -2.5, -7.5)::fp32) = -2.5::fp64
8 | sum((2.5000007152557373046875, 7.0000007152557373046875, 0, 7.0000007152557373046875)::fp64) = 16.500002145767212::fp64
9 |
10 | # overflow: Examples demonstrating overflow behavior
11 | sum((9223372036854775806, 1, 1, 1, 1, 10000000000)::i64) [overflow:ERROR] =
12 |
13 | # floating_exception: Examples demonstrating exceptional floating point cases
14 | sum((1.5e+308, 1.5e+308, 1.5e+308)::fp64) = inf::fp64
15 | sum((-1.5e+308, -1.5e+308, -1.5e+308)::fp64) = -inf::fp64
16 | sum((2.500000715, inf, 2.500000715)::fp64) = inf::fp64
17 | sum((2.5000007, -inf, 2.5000007, 10.0)::fp64) = -inf::fp64
18 |
19 | # null_handling: Examples with null as input or output
20 | sum((Null, Null, Null)::i16) = Null::i64
21 | sum(()::i16) = Null::i64
22 | sum((200000, Null, 629000, -10000, 0, 987621)::i32) = 1806621::i64
23 |
--------------------------------------------------------------------------------
/tests/cases/arithmetic/tan.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | tan(0.0::fp32) = 0.0::fp32
6 | tan(0.5::fp64) = 0.5463024898437905::fp64
7 | tan(7.01::fp64) = 0.8891974677731088::fp64
8 | tan(-7.01::fp64) = -0.8891974677731088::fp64
9 | tan(null::fp64) = null::fp64
10 |
--------------------------------------------------------------------------------
/tests/cases/arithmetic/tanh.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | tanh(0.0::fp32) = 0.0::fp32
6 | tanh(1.0::fp64) = 0.7615941559557649::fp64
7 | tanh(7.0000009::fp64) = 0.9999983369469382::fp64
8 | tanh(-7.0000009::fp64) = -0.9999983369469382::fp64
9 | tanh(null::fp64) = null::fp64
10 |
--------------------------------------------------------------------------------
/tests/cases/arithmetic_decimal/bitwise_and.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic_decimal.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | bitwise_and(0::dec<1, 0>, 1::dec<1, 0>) = 0::dec<1, 0>
6 | bitwise_and(127::dec<3, 0>, 127::dec<3, 0>) = 127::dec<3, 0>
7 | bitwise_and(-127::dec<3, 0>, -10::dec<2, 0>) = -128::dec<3, 0>
8 | bitwise_and(31766::dec<5, 0>, 900::dec<3, 0>) = 4::dec<5, 0>
9 | bitwise_and(-31766::dec<5, 0>, 900::dec<3, 0>) = 896::dec<5, 0>
10 | bitwise_and(2147483647::dec<10, 0>, 1234567::dec<7, 0>) = 1234567::dec<10, 0>
11 | bitwise_and(-2147483647::dec<10, 0>, 1234567::dec<7, 0>) = 1::dec<10, 0>
12 | bitwise_and(9223372036854775807::dec<19, 0>, 127::dec<3, 0>) = 127::dec<19, 0>
13 | bitwise_and(-9223372036854775807::dec<19, 0>, 127::dec<3, 0>) = 1::dec<19, 0>
14 |
15 | # max_values: test with max values
16 | bitwise_and(99999999999999999999999999999999999999::dec<38, 0>, 99999999999999999999999999999999999999::dec<38, 0>) = 99999999999999999999999999999999999999::dec<38, 0>
17 | bitwise_and(99999999999999999999999999999999999999::dec<38, 0>, 00000000000000000000000000000000000000::dec<38, 0>) = 0::dec<38, 0>
18 | bitwise_and(-99999999999999999999999999999999999999::dec<38, 0>, -99999999999999999999999999999999999999::dec<38, 0>) = -99999999999999999999999999999999999999::dec<38, 0>
19 |
20 | # null_values: test with null values
21 | bitwise_and(null::dec<1, 0>, 127::dec<3, 0>) = null::dec<3, 0>
22 | bitwise_and(null::dec<1, 0>, null::dec<1, 0>) = null::dec<1, 0>
23 |
--------------------------------------------------------------------------------
/tests/cases/arithmetic_decimal/bitwise_or.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic_decimal.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | bitwise_or(0::dec<1, 0>, 1::dec<1, 0>) = 1::dec<1, 0>
6 | bitwise_or(127::dec<3, 0>, 127::dec<3, 0>) = 127::dec<3, 0>
7 | bitwise_or(-127::dec<3, 0>, -10::dec<2, 0>) = -9::dec<3, 0>
8 | bitwise_or(31766::dec<5, 0>, 900::dec<3, 0>) = 32662::dec<5, 0>
9 | bitwise_or(-31766::dec<5, 0>, 900::dec<3, 0>) = -31762::dec<5, 0>
10 | bitwise_or(2147483647::dec<10, 0>, 123456789::dec<9, 0>) = 2147483647::dec<10, 0>
11 | bitwise_or(-2147483647::dec<10, 0>, 123456789::dec<9, 0>) = -2024026859::dec<10, 0>
12 | bitwise_or(9223372036854775807::dec<19, 0>, 127::dec<3, 0>) = 9223372036854775807::dec<19, 0>
13 | bitwise_or(-9223372036854775807::dec<19, 0>, 127::dec<3, 0>) = -9223372036854775681::dec<19, 0>
14 |
15 | # max_values: test with max values
16 | bitwise_or(99999999999999999999999999999999999999::dec<38, 0>, 99999999999999999999999999999999999999::dec<38, 0>) = 99999999999999999999999999999999999999::dec<38, 0>
17 | bitwise_or(99999999999999999999999999999999999999::dec<38, 0>, 00000000000000000000000000000000000000::dec<38, 0>) = 99999999999999999999999999999999999999::dec<38, 0>
18 | bitwise_or(-99999999999999999999999999999999999999::dec<38, 0>, -99999999999999999999999999999999999999::dec<38, 0>) = -99999999999999999999999999999999999999::dec<38, 0>
19 |
20 | # null_values: test with null values
21 | bitwise_or(null::dec<1, 0>, 127::dec<3, 0>) = null::dec<3, 0>
22 | bitwise_or(null::dec<1, 0>, null::dec<1, 0>) = null::dec<1, 0>
23 |
--------------------------------------------------------------------------------
/tests/cases/arithmetic_decimal/bitwise_xor.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic_decimal.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | bitwise_xor(0::dec<1, 0>, 1::dec<1, 0>) = 1::dec<1, 0>
6 | bitwise_xor(127::dec<3, 0>, 127::dec<3, 0>) = 0::dec<3, 0>
7 | bitwise_xor(-127::dec<3, 0>, -10::dec<2, 0>) = 119::dec<3, 0>
8 | bitwise_xor(31766::dec<5, 0>, 900::dec<3, 0>) = 32658::dec<5, 0>
9 | bitwise_xor(-31766::dec<5, 0>, 900::dec<3, 0>) = -32658::dec<5, 0>
10 | bitwise_xor(2147483647::dec<10, 0>, 123456789::dec<9, 0>) = 2024026858::dec<10, 0>
11 | bitwise_xor(-2147483647::dec<10, 0>, 123456789::dec<9, 0>) = -2024026860::dec<10, 0>
12 | bitwise_xor(9223372036854775807::dec<19, 0>, 127::dec<3, 0>) = 9223372036854775680::dec<19, 0>
13 | bitwise_xor(-9223372036854775807::dec<19, 0>, 127::dec<3, 0>) = -9223372036854775682::dec<19, 0>
14 |
15 | # max_values: test with max values
16 | bitwise_xor(99999999999999999999999999999999999999::dec<38, 0>, 99999999999999999999999999999999999999::dec<38, 0>) = 0::dec<38, 0>
17 | bitwise_xor(99999999999999999999999999999999999999::dec<38, 0>, 00000000000000000000000000000000000000::dec<38, 0>) = 99999999999999999999999999999999999999::dec<38, 0>
18 | bitwise_xor(-99999999999999999999999999999999999999::dec<38, 0>, -99999999999999999999999999999999999999::dec<38, 0>) = 0::dec<38, 0>
19 |
20 | # null_values: test with null values
21 | bitwise_xor(null::dec<1, 0>, 127::dec<3, 0>) = null::dec<3, 0>
22 | bitwise_xor(null::dec<1, 0>, null::dec<1, 0>) = null::dec<1, 0>
23 |
--------------------------------------------------------------------------------
/tests/cases/arithmetic_decimal/factorial_decimal.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic_decimal.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | factorial(0::dec<1, 0>) = 1::dec<38, 0>
6 | factorial(1::dec<1, 0>) = 1::dec<38, 0>
7 | factorial(20::dec<2, 0>) = 2432902008176640000::dec<38, 0>
8 |
9 | # overflow: Examples demonstrating overflow behavior
10 | factorial(34::dec<2, 0>) =
11 |
12 | # negative_value: Examples demonstrating behavior on negative value
13 | factorial(-1::dec<1, 0>) =
14 |
15 | # null_values: test with null values
16 | factorial(null::dec<38, 0>) = null::dec<38, 0>
17 | factorial(null::dec<1, 0>) = null::dec<38, 0>
18 |
--------------------------------------------------------------------------------
/tests/cases/arithmetic_decimal/max_decimal.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_AGGREGATE_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic_decimal.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | max((20, -3, 1, -10, 0, 5)::dec<2, 0>) = 20::dec<2, 0>
6 | max((-32768, 32767, 20000, -30000)::dec<5, 0>) = 32767::dec<5, 0>
7 | max((-214748648, 214748647, 21470048, 4000000)::dec<9, 0>) = 214748647::dec<9, 0>
8 | max((2000000000, -3217908979, 629000000, -100000000, 0, 987654321)::dec<10, 0>) = 2000000000::dec<10, 0>
9 | max((2.5, 0, 5.0, -2.5, -7.5)::dec<2, 1>) = 5.0::dec<2, 1>
10 | max((99999999999999999999999999999999999999, 0, -99999999999999999999999999999999999998, 111111111, -76)::dec<38, 0>) = 99999999999999999999999999999999999999::dec<38, 0>
11 |
12 | # null_handling: Examples with null as input or output
13 | max((Null, Null, Null)::dec<1, 0>) = null::dec<1, 0>
14 | max(()::dec<1, 0>) = null::dec<1, 0>
15 | max((2000000000, Null, 629000000, -100000000, Null, 987654321)::dec<10, 0>) = 2000000000::dec<10, 0>
16 | max((Null, Null)::dec<1, 0>) = null::dec<1, 0>
17 | max(()::dec<1, 0>) = null::dec<1, 0>
18 | max((99999999999999999999999999999999999999, -99999999999999999999999999999999999998, Null, 11111111111111111111111111111111111111, Null)::dec<38, 0>) = 99999999999999999999999999999999999999::dec<38, 0>
19 |
--------------------------------------------------------------------------------
/tests/cases/arithmetic_decimal/min_decimal.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_AGGREGATE_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic_decimal.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | min((20, -3, 1, -10, 0, 5)::dec<2, 0>) = -10::dec<2, 0>
6 | min((-32768, 32767, 20000, -30000)::dec<5, 0>) = -32768::dec<5, 0>
7 | min((-214748648, 214748647, 21470048, 4000000)::dec<9, 0>) = -214748648::dec<9, 0>
8 | min((2000000000, -3217908979, 629000000, -100000000, 0, 987654321)::dec<10, 0>) = -3217908979::dec<10, 0>
9 | min((2.5, 0, 5.0, -2.5, -7.5)::dec<2, 1>) = -7.5::dec<2, 1>
10 | min((99999999999999999999999999999999999999, -99999999999999999999999999999999999998, -99999999999999999999999999999999999997, 0, 1111)::dec<38, 0>) = -99999999999999999999999999999999999998::dec<38, 0>
11 |
12 | # null_handling: Examples with null as input or output
13 | min((Null, Null, Null)::dec<1, 0>) = Null::dec<1, 0>
14 | min(()::dec<1, 0>) = Null::dec<1, 0>
15 | min((2000000000, Null, 629000000, -100000000, Null, 987654321)::dec<10, 0>) = -100000000::dec<10, 0>
16 | min((-99999999999999999999999999999999999998, Null, 99999999999999999999999999999999999999, Null)::dec<38, 0>) = -99999999999999999999999999999999999998::dec<38, 0>
17 |
--------------------------------------------------------------------------------
/tests/cases/arithmetic_decimal/power.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic_decimal.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | power(8::dec<38, 0>, 2::dec<38, 0>) = 64::fp64
6 | power(1.0::dec<38, 0>, -1.0::dec<38, 0>) = 1.0::fp64
7 | power(2.0::dec<38, 0>, -2.0::dec<38, 0>) = 0.25::fp64
8 | power(13::dec<38, 0>, 10::dec<38, 0>) = 137858491849::fp64
9 |
10 | # result_more_than_input_precision: Examples demonstrating result with more precision than input
11 | power(16::dec<2, 0>, 4::dec<38, 0>) = 65536::fp64
12 |
13 | # floating_exception: Examples demonstrating exceptional floating point cases
14 | power(1.5e+10::dec<38, 0>, 1.5e+20::dec<38, 0>) = inf::fp64
15 | power(-16::dec<4, 0>, 1001::dec<4, 0>) = -inf::fp64
16 |
17 | # complex_number: Examples demonstrating complex number output
18 | power(-1::dec, 0.5::dec<38, 1>) [complex_number_result:NAN] = nan::fp64
19 | power(-1::dec, 0.5::dec<38, 1>) [complex_number_result:ERROR] =
20 |
--------------------------------------------------------------------------------
/tests/cases/arithmetic_decimal/power_decimal.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic_decimal.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | power(8::dec, 2::dec<38, 0>) = 64::fp64
6 | power(1.0::dec, -1.0::dec<38, 0>) = 1.0::fp64
7 | power(2.0::dec<38, 0>, -2.0::dec<38, 0>) = 0.25::fp64
8 | power(13::dec<38, 0>, 10::dec<38, 0>) = 137858491849::fp64
9 |
10 | # result_more_than_input_precision: Examples demonstrating result with more precision than input
11 | power(16::dec<2, 0>, 4::dec<38, 0>) = 65536::fp64
12 |
13 | # floating_exception: Examples demonstrating exceptional floating point cases
14 | power(1.5e+10::dec<38, 0>, 1.5e+20::dec<38, 0>) = inf::fp64
15 | power(-16::dec<4, 0>, 1001::dec<4, 0>) = -inf::fp64
16 |
17 | # complex_number: Examples demonstrating complex number output
18 | power(-1::dec, 0.5::dec<38, 1>) [complex_number_result:NAN] = nan::fp64
19 | power(-1::dec, 0.5::dec<38, 1>) [complex_number_result:ERROR] =
20 |
21 | # null_values: test with null values
22 | power(null::dec<38, 0>, 127::dec<38, 0>) = null::fp64
23 | power(null::dec<38, 0>, null::dec<38, 0>) = null::fp64
24 |
--------------------------------------------------------------------------------
/tests/cases/arithmetic_decimal/sqrt_decimal.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic_decimal.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | sqrt(25::dec<2, 0>) = 5::fp64
6 | sqrt(0::dec<1, 0>) = 0::fp64
7 |
8 | # max_input: max allowed input returns correct result
9 | sqrt(99999999999999999999999999999999999999::dec<38, 0>) = 1e+19::fp64
10 |
11 | # real_number: real number as input
12 | sqrt(6.25::dec<3, 2>) = 2.5::fp64
13 | sqrt(2.0000007152557373046875::dec<23, 22>) = 1.4142138152541635::fp64
14 |
15 | # verify_real_number: verify real number operation are different and doesnt behave as nearby int
16 | sqrt(9::dec<1, 0>) = 3::fp64
17 | sqrt(8.3::dec<2, 1>) = 2.8809720581775866::fp64
18 | sqrt(8.5::dec<2, 1>) = 2.9154759474226504::fp64
19 | sqrt(8.7::dec<2, 1>) = 2.949576240750525::fp64
20 | sqrt(9.2::dec<2, 1>) = 3.03315017762062::fp64
21 |
22 | # negative_input: negative input returns error
23 | sqrt(-9223372036854775800::dec<19, 0>) =
24 | sqrt(-2.5::dec<2, 1>) =
25 |
26 | # null_values: test with null values
27 | sqrt(null::dec<38, 0>) = null::fp64
28 | sqrt(null::dec<1, 0>) = null::fp64
29 |
--------------------------------------------------------------------------------
/tests/cases/arithmetic_decimal/sum_decimal.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_AGGREGATE_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic_decimal.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | sum((0, -1, 2, 20)::dec<2, 0>) = 21::dec<38, 0>
6 | sum((2000000, -3217908, 629000, -100000, 0, 987654)::dec<7, 0>) = 298746::dec<38, 0>
7 | sum((2.5, 0, 5.0, -2.5, -7.5)::dec<2, 1>) = -2.5::dec<38, 2>
8 | sum((2.5000007152557373046875, 7.0000007152557373046875, 0, 7.0000007152557373046875)::dec<23, 22>) = 16.5000021457672119140625::dec<38, 22>
9 |
10 | # overflow: Examples demonstrating overflow behavior
11 | sum((99999999999999999999999999999999999999, 1, 1, 1, 1, 99999999999999999999999999999999999999)::dec<38, 0>) [overflow:ERROR] =
12 |
13 | # null_handling: Examples with null as input or output
14 | sum((Null, Null, Null)::dec<1, 0>) = Null::dec<38, 0>
15 | sum(()::dec<1, 0>) = Null::dec<38, 0>
16 | sum((200000, Null, 629000, -10000, 0, 987621)::dec<6, 0>) = 1806621::dec<38, 0>
17 |
--------------------------------------------------------------------------------
/tests/cases/boolean/and.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_boolean.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | and(true::bool, true::bool) = true::bool
6 | and(true::bool, false::bool) = false::bool
7 | and(false::bool, false::bool) = false::bool
8 |
9 | # null_input: Examples with null as input
10 | and(true::bool, null::bool) = null::bool
11 | and(null::bool, true::bool) = null::bool
12 | and(false::bool, null::bool) = false::bool
13 | and(null::bool, false::bool) = false::bool
14 | and(null::bool, null::bool) = null::bool
15 |
--------------------------------------------------------------------------------
/tests/cases/boolean/and_not.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_boolean.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | and_not(true::bool, false::bool) = true::bool
6 | and_not(true::bool, true::bool) = false::bool
7 | and_not(false::bool, true::bool) = false::bool
8 | and_not(false::bool, false::bool) = false::bool
9 |
10 | # null_input: Examples with null as input
11 | and_not(true::bool, null::bool) = null::bool
12 | and_not(null::bool, false::bool) = null::bool
13 | and_not(false::bool, null::bool) = false::bool
14 | and_not(null::bool, true::bool) = false::bool
15 | and_not(null::bool, null::bool) = null::bool
16 |
--------------------------------------------------------------------------------
/tests/cases/boolean/bool_and.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_AGGREGATE_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_boolean.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | bool_and((true, true)::bool) = true::bool
6 | bool_and((true, false)::bool) = false::bool
7 | bool_and((false, false)::bool) = false::bool
8 | bool_and((false)::bool) = false::bool
9 | bool_and((true)::bool) = true::bool
10 | bool_and((true, null)::bool) = true::bool
11 | bool_and((null, null)::bool) = null::bool
12 | bool_and((false, null)::bool) = false::bool
13 | bool_and(()::bool) = null::bool
14 |
--------------------------------------------------------------------------------
/tests/cases/boolean/bool_or.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_AGGREGATE_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_boolean.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | bool_or((true, true)::bool) = true::bool
6 | bool_or((false, false)::bool) = false::bool
7 | bool_or((true, false)::bool) = true::bool
8 | bool_or((false)::bool) = false::bool
9 | bool_or((true)::bool) = true::bool
10 | bool_or((true, null)::bool) = true::bool
11 | bool_or((null, null)::bool) = null::bool
12 | bool_or((false, null)::bool) = false::bool
13 | bool_or(()::bool) = null::bool
14 |
--------------------------------------------------------------------------------
/tests/cases/boolean/not.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_boolean.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | not(true::bool) = false::bool
6 | not(false::bool) = true::bool
7 |
8 | # null_input: Examples with null as input
9 | not(null::bool) = null::bool
10 |
--------------------------------------------------------------------------------
/tests/cases/boolean/or.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_boolean.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | or(true::bool, true::bool) = true::bool
6 | or(true::bool, false::bool) = true::bool
7 | or(false::bool, false::bool) = false::bool
8 |
9 | # null_input: Examples with null as input
10 | or(true::bool, null::bool) = true::bool
11 | or(null::bool, true::bool) = true::bool
12 | or(false::bool, null::bool) = null::bool
13 | or(null::bool, false::bool) = null::bool
14 | or(null::bool, null::bool) = null::bool
15 |
--------------------------------------------------------------------------------
/tests/cases/boolean/xor.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_boolean.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | xor(true::bool, false::bool) = true::bool
6 | xor(true::bool, true::bool) = false::bool
7 | xor(false::bool, false::bool) = false::bool
8 | xor(false::bool, true::bool) = true::bool
9 |
10 | # null_input: Examples with null as input
11 | xor(true::bool, null::bool) = null::bool
12 | xor(null::bool, true::bool) = null::bool
13 | xor(false::bool, null::bool) = null::bool
14 | xor(null::bool, false::bool) = null::bool
15 |
--------------------------------------------------------------------------------
/tests/cases/comparison/between.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_comparison.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | between(5::i8, 0::i8, 127::i8) = true::bool
6 | between(20000::i16, 1::i16, 30000::i16) = true::bool
7 | between(1030000000::i32, 1000000000::i32, 2000000000::i32) = true::bool
8 | between(10300000000900::i64, 1000000000::i64, 9223372036854775807::i64) = true::bool
9 | between(2::i8, 1::i8, -120::i8) = false::bool
10 | between(2::i8, 2::i8, 3::i8) = true::bool
11 | between(2::i8, 1::i8, 2::i8) = true::bool
12 | between(-10000::i16, -20000::i16, -30000::i16) = false::bool
13 | between(-100000000::i32, -1000000000::i32, -2000000000::i32) = false::bool
14 | between(92233720368547758::i64, 1::i64, -9223372036854775807::i64) = false::bool
15 | between(14.01::fp32, 20.90::fp32, 88.00::fp32) = false::bool
16 | between(14.011::fp64, 0.00::fp64, inf::fp64) = true::bool
17 | between(inf::fp64, 0.00::fp64, 100.09::fp64) = false::bool
18 | between(-100.0011::fp64, -inf::fp64, 0.00::fp64) = true::bool
19 |
20 | # null_input: Examples with null as input
21 | between(null::i8, 1::i8, 10::i8) = null::bool
22 | between(1::i64, null::i64, 10::i64) = null::bool
23 | between(1::i64, 1::i64, null::i64) = null::bool
24 |
--------------------------------------------------------------------------------
/tests/cases/comparison/coalesce.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_comparison.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | coalesce(1::i8, 2::i8) = 1::i8
6 | coalesce(null::i8, 2::i8) = 2::i8
7 | coalesce(null::i16, null::i16) = null::i16
8 | coalesce(2000000::i32, null::i32) = 2000000::i32
9 | coalesce(null::i64, 9223372036854775807::i64) = 9223372036854775807::i64
10 | coalesce(null::fp32, -65.500000::fp32) = -65.500000::fp32
11 | coalesce(inf::fp64, -inf::fp64) = inf::fp64
12 | coalesce(7::dec<38, 0>, 4::dec<38, 0>) = 7::dec<38, 0>
13 | coalesce(null::dec<38, 0>, 2::dec<38, 0>) = 2::dec<38, 0>
14 | coalesce(null::dec<38, 0>, null::dec<38, 0>) = null::dec<38, 0>
15 | coalesce(2000000::dec<38, 0>, null::dec<38, 0>) = 2000000::dec<38, 0>
16 | coalesce(null::dec<38, 0>, 2000000::dec<38, 0>) = 2000000::dec<38, 0>
17 |
--------------------------------------------------------------------------------
/tests/cases/comparison/equal.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_comparison.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | equal(1::i8, 1::i8) = true::bool
6 | equal(300::i16, 200::i16) = false::bool
7 | equal(-2147483648::i32, -2147483648::i32) = true::bool
8 | equal(9223372036854775807::i64, 9223372036854775804::i64) = false::bool
9 | equal(inf::fp64, inf::fp64) = true::bool
10 | equal(inf::fp64, 1.5e+308::fp64) = false::bool
11 | equal(10::dec<38, 0>, 10::dec<38, 0>) = true::bool
12 | equal(10::dec<38, 0>, 11.25::dec<38, 2>) = false::bool
13 | equal(inf::fp64, -inf::fp64) = false::bool
14 |
15 | # null_input: Examples with null as input
16 | equal(null::i16, 1::i16) = null::bool
17 | equal(null::i16, null::i16) = null::bool
18 | equal(7::dec<38, 0>, null::dec<38, 0>) = null::bool
19 | equal(null::dec<38, 0>, null::dec<38, 0>) = null::bool
20 |
--------------------------------------------------------------------------------
/tests/cases/comparison/gt.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_comparison.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | gt(1::i8, 2::i8) = false::bool
6 | gt(200::i16, 199::i16) = true::bool
7 | gt(200::i16, 200::i16) = false::bool
8 | gt(2000000000::i32, 1000000000::i32) = true::bool
9 | gt(-922337203685775808::i64, -922337203685775807::i64) = false::bool
10 | gt(7.25::fp32, 2.50::fp32) = true::bool
11 | gt(-922337203685775808::dec<38, 0>, -922337203685775807::dec<38, 0>) = false::bool
12 | gt(7.25::dec<38, 2>, 2.50::dec<38, 2>) = true::bool
13 | gt(-1.5e+308::fp64, -inf::fp64) = true::bool
14 | gt(inf::fp64, 1.5e+308::fp64) = true::bool
15 |
16 | # null_input: Examples with null as input
17 | gt(null::i16, 100::i16) = null::bool
18 | gt(2::i16, null::i16) = null::bool
19 | gt(null::i16, null::i16) = null::bool
20 | gt(2::dec<38, 2>, null::dec<38, 2>) = null::bool
21 | gt(null::dec<38, 2>, null::dec<38, 2>) = null::bool
22 |
--------------------------------------------------------------------------------
/tests/cases/comparison/gte.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_comparison.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | gte(1::i8, 2::i8) = false::bool
6 | gte(2::i8, 2::i8) = true::bool
7 | gte(200::i16, 199::i16) = true::bool
8 | gte(2000000000::i32, 1000000000::i32) = true::bool
9 | gte(-922337203685775808::i64, -922337203685775807::i64) = false::bool
10 | gte(7.25::fp32, 2.50::fp32) = true::bool
11 | gte(7.25::fp32, 7.25::fp32) = true::bool
12 | gte(7.25::dec<38, 2>, 7.25::dec<38, 2>) = true::bool
13 | gte(7.25::dec<38, 2>, 7.27::dec<38, 2>) = false::bool
14 | gte(inf::fp64, 1.5e+308::fp64) = true::bool
15 | gte(inf::fp64, inf::fp64) = true::bool
16 | gte(-inf::fp64, -1.5e+308::fp64) = false::bool
17 |
18 | # null_input: Examples with null as input
19 | gte(null::dec<38, 2>, 7.25::dec<38, 2>) = null::bool
20 | gte(null::dec<38, 2>, null::dec<38, 2>) = null::bool
21 | gte(null::i16, 1::i16) = null::bool
22 | gte(2::i16, null::i16) = null::bool
23 | gte(null::i16, null::i16) = null::bool
24 |
--------------------------------------------------------------------------------
/tests/cases/comparison/is_false.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_comparison.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | is_false(true::bool) = false::bool
6 | is_false(false::bool) = true::bool
7 | is_false(null::bool) = false::bool
8 |
--------------------------------------------------------------------------------
/tests/cases/comparison/is_finite.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_comparison.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | is_finite(0.0::fp32) = true::bool
6 | is_finite(0.55::fp32) = true::bool
7 | is_finite(1000.000000000001::fp64) = true::bool
8 | is_finite(-inf::fp64) = false::bool
9 | is_finite(inf::fp64) = false::bool
10 | is_finite(null::fp64) = null::bool
11 |
--------------------------------------------------------------------------------
/tests/cases/comparison/is_infinite.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_comparison.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | is_infinite(0.0::fp32) = false::bool
6 | is_infinite(0.55::fp32) = false::bool
7 | is_infinite(1000.000000000001::fp64) = false::bool
8 | is_infinite(-inf::fp64) = true::bool
9 | is_infinite(inf::fp64) = true::bool
10 | is_infinite(null::fp64) = null::bool
11 |
--------------------------------------------------------------------------------
/tests/cases/comparison/is_nan.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_comparison.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | is_nan(0.0::fp32) = false::bool
6 | is_nan(0.55::fp32) = false::bool
7 | is_nan(1000.000000000001::fp64) = false::bool
8 | is_nan(-inf::fp64) = false::bool
9 | is_nan(inf::fp64) = false::bool
10 | is_nan(null::fp64) = null::bool
11 | is_nan(nan::fp64) = true::bool
12 |
--------------------------------------------------------------------------------
/tests/cases/comparison/is_not_distinct_from.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_comparison.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | is_not_distinct_from(1::i16, 1::i16) = true::bool
6 | is_not_distinct_from(2::i16, 1::i16) = false::bool
7 | is_not_distinct_from(1.75::dec<38, 2>, 1.75::dec<38, 2>) = true::bool
8 | is_not_distinct_from(1.75::dec<38, 2>, 1.1::dec<38, 2>) = false::bool
9 |
10 | # null_input: Examples with null as input
11 | is_not_distinct_from(null::i16, 1::i16) = false::bool
12 | is_not_distinct_from(null::i16, null::i16) = true::bool
13 | is_not_distinct_from(10::dec<38, 0>, null::dec<38, 0>) = false::bool
14 | is_not_distinct_from(null::dec<38, 0>, null::dec<38, 0>) = true::bool
15 |
--------------------------------------------------------------------------------
/tests/cases/comparison/is_not_false.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_comparison.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | is_not_false(true::bool) = true::bool
6 | is_not_false(false::bool) = false::bool
7 | is_not_false(null::bool) = true::bool
8 |
--------------------------------------------------------------------------------
/tests/cases/comparison/is_not_null.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_comparison.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | is_not_null(25::i16) = true::bool
6 | is_not_null(true::bool) = true::bool
7 | is_not_null(7.25::fp32) = true::bool
8 | is_not_null(7.25::dec<38, 3>) = true::bool
9 | is_not_null(null::i8) = false::bool
10 | is_not_null(null::dec<38, 3>) = false::bool
11 |
--------------------------------------------------------------------------------
/tests/cases/comparison/is_not_true.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_comparison.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | is_not_true(true::bool) = false::bool
6 | is_not_true(false::bool) = true::bool
7 | is_not_true(null::bool) = true::bool
8 |
--------------------------------------------------------------------------------
/tests/cases/comparison/is_null.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_comparison.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | is_null(25::i16) = false::bool
6 | is_null(false::bool) = false::bool
7 | is_null(7.823::dec<38, 3>) = false::bool
8 | is_null(null::i16) = true::bool
9 | is_null(null::dec<38, 3>) = true::bool
10 |
--------------------------------------------------------------------------------
/tests/cases/comparison/is_true.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_comparison.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | is_true(true::bool) = true::bool
6 | is_true(false::bool) = false::bool
7 | is_true(null::bool) = false::bool
8 |
--------------------------------------------------------------------------------
/tests/cases/comparison/lt.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_comparison.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | lt(1::i8, 2::i8) = true::bool
6 | lt(200::i16, 100::i16) = false::bool
7 | lt(1000::i16, 1000::i16) = false::bool
8 | lt(2000000000::i32, 1000000000::i32) = false::bool
9 | lt(-922337203685775808::i64, -922337203685775807::i64) = true::bool
10 | lt(7.25::fp32, 2.50::fp32) = false::bool
11 | lt(7.25::dec<38, 2>, 7.25::dec<38, 2>) = false::bool
12 | lt(2.49::dec<38, 2>, 2.50::dec<38, 2>) = true::bool
13 | lt(1.5e+308::fp64, inf::fp64) = true::bool
14 | lt(-1.5e+308::fp64, -inf::fp64) = false::bool
15 |
16 | # null_input: Examples with null as input
17 | lt(null::dec<38, 2>, 2.50::dec<38, 2>) = null::bool
18 | lt(null::dec<38, 2>, null::dec<38, 2>) = null::bool
19 | lt(null::i16, 1::i16) = null::bool
20 | lt(2::i16, null::i16) = null::bool
21 | lt(null::i16, null::i16) = null::bool
22 |
--------------------------------------------------------------------------------
/tests/cases/comparison/lte.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_comparison.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | lte(1::i8, 2::i8) = true::bool
6 | lte(2::i8, 2::i8) = true::bool
7 | lte(200::i16, 199::i16) = false::bool
8 | lte(2000000000::i32, 1000000000::i32) = false::bool
9 | lte(-922337203685775808::i64, -922337203685775807::i64) = true::bool
10 | lte(7.00::fp32, 2.50::fp32) = false::bool
11 | lte(7.25::fp32, 7.25::fp32) = true::bool
12 | lte(7.25::dec<38, 2>, 7.25::dec<38, 2>) = true::bool
13 | lte(2.59::dec<38, 2>, 2.50::dec<38, 2>) = false::bool
14 | lte(1.5e+308::fp64, inf::fp64) = true::bool
15 | lte(inf::fp64, inf::fp64) = true::bool
16 | lte(-1.5e+308::fp64, -inf::fp64) = false::bool
17 |
18 | # null_input: Examples with null as input
19 | lte(null::dec<38, 2>, 2.50::dec<38, 2>) = null::bool
20 | lte(null::dec<38, 2>, null::dec<38, 2>) = null::bool
21 | lte(null::i16, 1::i16) = null::bool
22 | lte(2::i16, null::i16) = null::bool
23 | lte(null::i16, null::i16) = null::bool
24 |
--------------------------------------------------------------------------------
/tests/cases/comparison/not_equal.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_comparison.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | not_equal(1::i8, 1::i8) = false::bool
6 | not_equal(300::i16, 200::i16) = true::bool
7 | not_equal(-2147483648::i32, -2147483648::i32) = false::bool
8 | not_equal(9223372036854775807::i64, 9223372036854775804::i64) = true::bool
9 | not_equal(9223372036854775807::dec<38, 0>, 9223372036854775804::dec<38, 0>) = true::bool
10 | not_equal(9223372036854775804::dec<38, 0>, 9223372036854775804::dec<38, 0>) = false::bool
11 | not_equal(inf::fp64, inf::fp64) = false::bool
12 | not_equal(inf::fp64, 1.5e+308::fp64) = true::bool
13 | not_equal(inf::fp64, -inf::fp64) = true::bool
14 |
15 | # null_input: Examples with null as input
16 | not_equal(null::dec<38, 2>, 2.50::dec<38, 2>) = null::bool
17 | not_equal(null::dec<38, 2>, null::dec<38, 2>) = null::bool
18 | not_equal(null::i16, 1::i16) = null::bool
19 | not_equal(null::i16, null::i16) = null::bool
20 |
--------------------------------------------------------------------------------
/tests/cases/comparison/nullif.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_comparison.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | nullif(1::i16, 5::i16) = 1::i16
6 | nullif(7.25::fp32, 1.00::fp32) = 7.25::fp32
7 | nullif(1.11::fp32, 1.11::fp32) = null::fp32
8 | nullif(false::bool, true::bool) = false::bool
9 | nullif(true::bool, false::bool) = true::bool
10 | nullif(false::bool, false::bool) = null::bool
11 | nullif(true::bool, true::bool) = null::bool
12 |
13 | # null_input: Examples with null as input
14 | nullif(null::bool, true::bool) = null::bool
15 | nullif(true::bool, null::bool) = true::bool
16 | nullif(null::bool, null::bool) = null::bool
17 | nullif(10::dec<38, 0>, null::dec<38, 0>) = 10::dec<38, 0>
18 | nullif(null::dec<38, 0>, null::dec<38, 0>) = null::bool
19 |
--------------------------------------------------------------------------------
/tests/cases/datetime/add_datetime.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_datetime.yaml'
3 |
4 | # timestamps: examples using the timestamp types
5 | add('2016-12-31T13:30:15'::ts, 'P5D'::iday) = '2017-01-05T13:30:15'::ts
6 | add('2016-12-01T13:30:15'::ts, 'P5Y'::iyear) = '2021-12-01T13:30:15'::ts
7 | add('2016-12-01T13:30:15'::ts, 'PT5H'::iday) = '2016-12-01T18:30:15'::ts
8 |
9 | # date_to_timestamp: examples using the date types and resulting in a timestamp
10 | add('2020-12-31'::date, 'P5D'::iday) = '2021-01-05T00:00:00'::ts
11 | add('2020-12-31'::date, 'P5Y'::iyear) = '2025-12-31T00:00:00'::ts
12 | add('2020-12-31'::date, 'P5M'::iyear) = '2021-05-31T00:00:00'::ts
13 |
14 | # null_input: examples with null args or return
15 | add(null::date, 'P5D'::iday) = null::ts
16 |
--------------------------------------------------------------------------------
/tests/cases/datetime/add_intervals.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_datetime.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | add_intervals('PT10H'::iday, 'PT5H'::iday) = 'P0DT15H0M0S'::iday
6 | add_intervals('P10D'::iday, 'P5D'::iday) = 'P15D'::iday
7 | add_intervals('P1D'::iday, 'PT10H'::iday) = 'P1DT10H0M0S'::iday
8 |
9 | # null_input: Basic examples where the input args or return is null
10 | add_intervals(null::iyear, 'P1Y'::iyear) = null::iyear
11 | add_intervals(null::iday, 'P1D'::iday) = null::iday
12 |
--------------------------------------------------------------------------------
/tests/cases/datetime/extract.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_datetime.yaml'
3 |
4 | # timestamps: examples using the timestamp and timestamptz types
5 | extract('YEAR'::str, '2016-12-31T13:30:15'::ts) = 2016::i64
6 | extract('ISOYEAR'::str, '2016-01-01T13:30:15'::ts) = 2015::i64
7 | extract('QUARTER'::str, '2016-12-31T13:30:15'::ts) = 4::i64
8 | extract('MONTH'::str, '2016-12-31T13:30:15'::ts) = 12::i64
9 | extract('WEEK'::str, '2016-12-31T13:30:15'::ts) = 52::i64
10 | extract('DAY'::str, '2016-12-31T13:30:15'::ts) = 31::i64
11 | extract('ISODOW'::str, '2016-12-25T13:30:15'::ts) = 7::i64
12 | extract('DOW'::str, '2016-12-25T13:30:15'::ts) = 0::i64
13 | extract('DOY'::str, '2016-12-25T13:30:15'::ts) = 360::i64
14 | extract('HOUR'::str, '2016-12-31T13:30:15'::ts) = 13::i64
15 | extract('MINUTE'::str, '2016-12-31T13:30:15'::ts) = 30::i64
16 | extract('SECOND'::str, '2016-12-31T13:30:15'::ts) = 15::i64
17 | extract('MILLISECONDS'::str, '2016-12-31T13:30:15'::ts) = 15000::i64
18 | extract('MICROSECONDS'::str, '2016-12-31T13:30:15.220000'::ts) = 15220000::i64
19 | extract('EPOCH'::str, '2016-12-31T13:30:15'::ts) = 1483191015::i64
20 |
21 | # date: examples using the date type
22 | extract('YEAR'::str, '2020-12-31'::date) = 2020::i64
23 | extract('MONTH'::str, '2020-12-31'::date) = 12::i64
24 | extract('DAY'::str, '2020-12-31'::date) = 31::i64
25 |
26 | # time: examples using the time type
27 | extract('HOUR'::str, '01:02:03'::time) = 1::i64
28 | extract('MINUTE'::str, '01:02:03'::time) = 2::i64
29 | extract('SECOND'::str, '01:02:03'::time) = 3::i64
30 | extract('MILLISECOND'::str, '01:02:03.155'::time) = 3155::i64
31 | extract('MICROSECOND'::str, '01:02:03.45'::time) = 3450000::i64
32 |
--------------------------------------------------------------------------------
/tests/cases/datetime/gt_datetime.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_datetime.yaml'
3 |
4 | # timestamps: examples using the timestamp type
5 | gt('2016-12-31T13:30:15'::ts, '2017-12-31T13:30:15'::ts) = false::bool
6 | gt('2018-12-31T13:30:15'::ts, '2017-12-31T13:30:15'::ts) = true::bool
7 |
8 | # timestamp_tz: examples using the timestamp_tz type
9 | gt('1999-01-08T01:05:05-08:00'::tstz, '1999-01-08T04:05:06-05:00'::tstz) = false::bool
10 | gt('1999-01-08T01:05:07-08:00'::tstz, '1999-01-08T04:05:06-05:00'::tstz) = true::bool
11 |
12 | # date: examples using the date type
13 | gt('2020-12-30'::date, '2020-12-31'::date) = false::bool
14 | gt('2020-12-31'::date, '2020-12-30'::date) = true::bool
15 |
16 | # interval: examples using the interval type
17 | gt('P7D'::iday, 'P6D'::iday) = true::bool
18 | gt('P5D'::iday, 'P6D'::iday) = false::bool
19 | gt('P5Y'::iyear, 'P6Y'::iyear) = false::bool
20 | gt('P7Y'::iyear, 'P6Y'::iyear) = true::bool
21 |
22 | # null_input: examples with null args
23 | gt(null::iday, 'P5D'::iday) = null::bool
24 | gt(null::date, '2020-12-30'::date) = null::bool
25 | gt(null::ts, '2018-12-31T13:30:15'::ts) = null::bool
26 |
--------------------------------------------------------------------------------
/tests/cases/datetime/gte_datetime.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_datetime.yaml'
3 |
4 | # timestamps: examples using the timestamp type
5 | gte('2016-12-31T13:30:15'::ts, '2017-12-31T13:30:15'::ts) = false::bool
6 | gte('2017-12-31T13:30:15'::ts, '2017-12-31T13:30:15'::ts) = true::bool
7 | gte('2018-12-31T13:30:15'::ts, '2017-12-31T13:30:15'::ts) = true::bool
8 |
9 | # timestamp_tz: examples using the timestamp_tz type
10 | gte('1999-01-08T01:05:05-08:00'::tstz, '1999-01-08T04:05:06-05:00'::tstz) = false::bool
11 | gte('1999-01-08T01:05:06-08:00'::tstz, '1999-01-08T01:05:06-08:00'::tstz) = true::bool
12 | gte('1999-01-08T01:05:06-08:00'::tstz, '1999-01-08T04:05:05-05:00'::tstz) = true::bool
13 |
14 | # date: examples using the date type
15 | gte('2020-12-30'::date, '2020-12-31'::date) = false::bool
16 | gte('2020-12-31'::date, '2020-12-31'::date) = true::bool
17 | gte('2020-12-31'::date, '2020-12-30'::date) = true::bool
18 |
19 | # interval: examples using the interval type
20 | gte('P7D'::iday, 'P7D'::iday) = true::bool
21 | gte('P7D'::iday, 'P6D'::iday) = true::bool
22 | gte('P5D'::iday, 'P6D'::iday) = false::bool
23 | gte('P5Y'::iyear, 'P6Y'::iyear) = false::bool
24 | gte('P7Y'::iyear, 'P7Y'::iyear) = true::bool
25 | gte('P7Y'::iyear, 'P6Y'::iyear) = true::bool
26 |
27 | # null_input: examples with null args or return
28 | gte(null::iday, 'P5D'::iday) = null::bool
29 | gte(null::date, '2020-12-30'::date) = null::bool
30 | gte(null::ts, '2018-12-31T13:30:15'::ts) = null::bool
31 |
--------------------------------------------------------------------------------
/tests/cases/datetime/lt_datetime.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_datetime.yaml'
3 |
4 | # timestamps: examples using the timestamp type
5 | lt('2016-12-31T13:30:15'::ts, '2017-12-31T13:30:15'::ts) = true::bool
6 | lt('2018-12-31T13:30:15'::ts, '2017-12-31T13:30:15'::ts) = false::bool
7 |
8 | # timestamp_tz: examples using the timestamp_tz type
9 | lt('1999-01-08T01:05:05-08:00'::tstz, '1999-01-08T04:05:06-05:00'::tstz) = true::bool
10 | lt('1999-01-08T01:05:06-08:00'::tstz, '1999-01-08T04:05:06-05:00'::tstz) = false::bool
11 |
12 | # date: examples using the date type
13 | lt('2020-12-30'::date, '2020-12-31'::date) = true::bool
14 | lt('2020-12-31'::date, '2020-12-30'::date) = false::bool
15 |
16 | # interval: examples using the interval type
17 | lt('P7D'::iday, 'P6D'::iday) = false::bool
18 | lt('P5D'::iday, 'P6D'::iday) = true::bool
19 | lt('P5Y'::iyear, 'P6Y'::iyear) = true::bool
20 | lt('P7Y'::iyear, 'P6Y'::iyear) = false::bool
21 |
22 | # null_input: examples with null args or return
23 | lt(null::iday, 'P5D'::iday) = null::bool
24 | lt(null::date, '2020-12-30'::date) = null::bool
25 | lt(null::ts, '2018-12-31T13:30:15'::ts) = null::bool
26 |
--------------------------------------------------------------------------------
/tests/cases/datetime/lte_datetime.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_datetime.yaml'
3 |
4 | # timestamps: examples using the timestamp type
5 | lte('2016-12-31T13:30:15'::ts, '2017-12-31T13:30:15'::ts) = true::bool
6 | lte('2017-12-31T13:30:15'::ts, '2017-12-31T13:30:15'::ts) = true::bool
7 | lte('2018-12-31T13:30:15'::ts, '2017-12-31T13:30:15'::ts) = false::bool
8 |
9 | # timestamp_tz: examples using the timestamp_tz type
10 | lte('1999-01-08T01:05:05-08:00'::tstz, '1999-01-08T04:05:06-05:00'::tstz) = true::bool
11 | lte('1999-01-08T01:05:06-08:00'::tstz, '1999-01-08T01:05:06-08:00'::tstz) = true::bool
12 | lte('1999-01-08T01:05:06-08:00'::tstz, '1999-01-08T04:05:05-05:00'::tstz) = false::bool
13 |
14 | # date: examples using the date type
15 | lte('2020-12-30'::date, '2020-12-31'::date) = true::bool
16 | lte('2020-12-31'::date, '2020-12-31'::date) = true::bool
17 | lte('2020-12-31'::date, '2020-12-30'::date) = false::bool
18 |
19 | # interval: examples using the interval type
20 | lte('P7D'::iday, 'P7D'::iday) = true::bool
21 | lte('P7D'::iday, 'P6D'::iday) = false::bool
22 | lte('P5D'::iday, 'P6D'::iday) = true::bool
23 | lte('P5Y'::iyear, 'P6Y'::iyear) = true::bool
24 | lte('P7Y'::iyear, 'P7Y'::iyear) = true::bool
25 | lte('P7Y'::iyear, 'P6Y'::iyear) = false::bool
26 |
27 | # null_input: examples with null args or return
28 | lte(null::iday, 'P5D'::iday) = null::bool
29 | lte(null::date, '2020-12-30'::date) = null::bool
30 | lte(null::ts, '2018-12-31T13:30:15'::ts) = null::bool
31 |
--------------------------------------------------------------------------------
/tests/cases/datetime/subtract_datetime.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_datetime.yaml'
3 |
4 | # timestamps: examples using the timestamp type
5 | subtract('2016-12-31T13:30:15'::ts, 'P5D'::iday) = '2016-12-26T13:30:15'::ts
6 | subtract('2016-12-01T13:30:15'::ts, 'P5Y'::iyear) = '2011-12-01T13:30:15'::ts
7 | subtract('2016-12-01T13:30:15'::ts, 'PT5H'::iday) = '2016-12-01T08:30:15'::ts
8 |
9 | # date: examples using the date type
10 | subtract('2020-12-31'::date, 'P5D'::iday) = '2020-12-26'::date
11 | subtract('2020-12-31'::date, 'P5Y'::iyear) = '2015-12-31'::date
12 | subtract('2020-12-31'::date, 'P5M'::iyear) = '2020-07-31'::date
13 |
14 | # null_input: examples with null args or return
15 | subtract(null::date, 'P5D'::iday) = null::date
16 |
--------------------------------------------------------------------------------
/tests/cases/logarithmic/ln.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_logarithmic.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | ln(100000::i64) = 11.512925464970229::fp64
6 | ln(1.0::fp32) = 0::fp32
7 | ln(2.015::fp64) = 0.7006191953986464::fp64
8 |
9 | # infinity: Examples with infinity as input
10 | ln(-inf::fp64) [on_domain_error:ERROR] =
11 | ln(-inf::fp64) [on_domain_error:NAN] = nan::fp64
12 | ln(-inf::fp64) [on_domain_error:NONE] = null::fp64
13 | ln(inf::fp64) = inf::fp64
14 |
15 | # log_zero: Examples with log zero
16 | ln(0.0::fp64) [on_log_zero:ERROR] =
17 | ln(0.0::fp64) [on_log_zero:NAN] = null::fp64
18 | ln(0.0::fp64) [on_log_zero:MINUS_INFINITY] = -inf::fp64
19 |
--------------------------------------------------------------------------------
/tests/cases/logarithmic/log10.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_logarithmic.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | log10(100000::i64) = 5.0::fp64
6 | log10(1.0::fp32) = 0::fp32
7 | log10(2.015::fp64) = 0.3042750504771283::fp64
8 |
9 | # infinity: Examples with infinity as input
10 | log10(-inf::fp64) [on_domain_error:ERROR] =
11 | log10(-inf::fp64) [on_domain_error:NAN] = nan::fp64
12 | log10(-inf::fp64) [on_domain_error:NONE] = null::fp64
13 | log10(inf::fp64) = inf::fp64
14 |
15 | # log_zero: Examples with log zero
16 | log10(0.0::fp64) [on_log_zero:ERROR] =
17 | log10(0.0::fp64) [on_log_zero:NAN] = null::fp64
18 | log10(0.0::fp64) [on_log_zero:MINUS_INFINITY] = -inf::fp64
19 |
--------------------------------------------------------------------------------
/tests/cases/logarithmic/log2.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_logarithmic.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | log2(100000::i64) = 16.609640474436812::fp64
6 | log2(1.0::fp32) = 0::fp32
7 | log2(8.0::fp64) = 3.0::fp64
8 | log2(2.015::fp64) = 1.0107798387532427::fp64
9 |
10 | # infinity: Examples with infinity as input
11 | log2(-inf::fp64) [on_domain_error:ERROR] =
12 | log2(-inf::fp64) [on_domain_error:NAN] = nan::fp64
13 | log2(-inf::fp64) [on_domain_error:NONE] = null::fp64
14 | log2(inf::fp64) = inf::fp64
15 |
16 | # log_zero: Examples with log zero
17 | log2(0.0::fp64) [on_log_zero:ERROR] =
18 | log2(0.0::fp64) [on_log_zero:NAN] = null::fp64
19 | log2(0.0::fp64) [on_log_zero:MINUS_INFINITY] = -inf::fp64
20 |
--------------------------------------------------------------------------------
/tests/cases/logarithmic/logb.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_logarithmic.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | logb(10::i64, 100000::i64) = 5.0::fp64
6 | logb(7::fp64, 1.0::fp64) = 0::fp64
7 | logb(2::fp64, 7::fp64) = 2.8073549220576041::fp64
8 |
9 | # infinity: Examples with infinity as input
10 | logb(2.34::fp64, inf::fp64) = inf::fp64
11 | logb(10::fp64, -inf::fp64) [on_domain_error:ERROR] =
12 | logb(10::fp64, -inf::fp64) [on_domain_error:NAN] = nan::fp64
13 | logb(10::fp64, -inf::fp64) [on_domain_error:NONE] = null::fp64
14 |
15 | # log_zero: Examples with log zero
16 | logb(2.0::fp64, 0.0::fp64) [on_log_zero:ERROR] =
17 | logb(2.0::fp64, 0.0::fp64) [on_log_zero:NAN] = null::fp64
18 | logb(2.0::fp64, 0.0::fp64) [on_log_zero:MINUS_INFINITY] = -inf::fp64
19 |
--------------------------------------------------------------------------------
/tests/cases/rounding/ceil.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_rounding.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | ceil(2.25::fp32) = 3::fp32
6 | ceil(2.0000007152557373046875::fp64) = 3::fp64
7 | ceil(-65.500000001223334444::fp64) = -65::fp64
8 |
--------------------------------------------------------------------------------
/tests/cases/rounding/floor.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_rounding.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | floor(2.25::fp32) = 2::fp32
6 | floor(2.0000007152557373046875::fp64) = 2::fp64
7 | floor(-65.490000001223334444::fp64) = -66::fp64
8 |
--------------------------------------------------------------------------------
/tests/cases/rounding/round.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_rounding.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | round(2::i8, 2::i32) = 2::i8
6 | round(2.75::fp32, 1::i32) = 2.8::fp32
7 | round(2.0000007152457373046875::fp64, 10::i32) = 2.0000007152::fp64
8 | round(2.0000007152457373046875::fp64, 10::i32) = 2.0000007152::fp64
9 |
10 | # negative_rounding: Examples with negative rounding
11 | round(2::i8, -2::i32) = 0::i8
12 | round(123::i8, -2::i32) = 100::i8
13 | round(8793::i16, -2::i32) = 8800::i16
14 |
--------------------------------------------------------------------------------
/tests/cases/rounding_decimal/ceil.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_rounding_decimal.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | ceil(2.25::dec<3,2>) = 3::dec<2,0>
6 | ceil(-65.5::dec<3,1>) = -65::dec<3,0>
7 | ceil(9.9::dec<2,1>) = 10::dec<2,0>
8 |
--------------------------------------------------------------------------------
/tests/cases/rounding_decimal/floor.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_rounding_decimal.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | floor(2.25::dec<3,2>) = 2::dec<2,0>
6 | floor(-65.5::dec<3,1>) = -66::dec<3,0>
7 |
--------------------------------------------------------------------------------
/tests/cases/rounding_decimal/round.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_rounding_decimal.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | round(2.0::dec<2,1>, 2::i32) = 2::dec<3,1>
6 | round(2.75::dec<3,2>, 1::i32) = 2.8::dec<4,2>
7 |
8 | # negative_rounding: Examples with negative rounding
9 | round(2.0::dec<2,1>, -2::i32) = 0::dec<3,1>
10 | round(123::dec<3,0>, -2::i32) = 100::dec<4,0>
11 | round(8793.5::dec<5,1>, -2::i32) = 8800::dec<6,1>
12 |
--------------------------------------------------------------------------------
/tests/cases/string/bit_length.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_string.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | bit_length('abc'::str) = 24::i64
6 | bit_length(''::str) = 0::i64
7 | bit_length(' '::str) = 8::i64
8 | bit_length('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'::str) = 384::i64
9 | bit_length(' 456'::str) = 48::i64
10 |
11 | # null_input: Examples with null as input
12 | bit_length(null::str) = null::i64
13 |
14 | # unicode: Examples with unicode characters as input
15 | bit_length('à'::str) = 16::i64
16 | bit_length('😄'::str) = 32::i64
17 |
--------------------------------------------------------------------------------
/tests/cases/string/char_length.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_string.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | char_length('abc'::str) = 3::i64
6 | char_length(''::str) = 0::i64
7 | char_length(' '::str) = 1::i64
8 | char_length('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'::str) = 48::i64
9 | char_length(' 456'::str) = 6::i64
10 |
11 | # null_input: Examples with null as input
12 | char_length(null::str) = null::i64
13 |
14 | # unicode: Examples with unicode characters as input
15 | char_length('à'::str) = 1::i64
16 | char_length('😄'::str) = 1::i64
17 |
--------------------------------------------------------------------------------
/tests/cases/string/concat.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_string.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | concat('abcd'::str, 'efg'::str) = 'abcdefg'::str
6 |
7 | # null_input: Examples with null as input
8 | concat('abcd'::str, null::str) [null_handling:ACCEPT_NULLS] = null::str
9 | concat('abcd'::str, null::str) [null_handling:IGNORE_NULLS] = 'abcd'::str
10 | concat(null::str, 'abcd'::str) [null_handling:ACCEPT_NULLS] = null::str
11 | concat(null::str, 'abcd'::str) [null_handling:IGNORE_NULLS] = 'abcd'::str
12 | concat(null::str, null::str) [null_handling:ACCEPT_NULLS] = null::str
13 | concat(null::str, null::str) [null_handling:IGNORE_NULLS] = ''::str
14 |
--------------------------------------------------------------------------------
/tests/cases/string/concat_ws.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_string.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | concat_ws(','::str, 'Banana'::str, 'Apple'::str, 'Melon'::str) = 'Banana,Apple,Melon'::str
6 | concat_ws(''::str, 'Banana'::str, 'Apple'::str) = 'BananaApple'::str
7 | concat_ws(null::str, 'Banana'::str, 'Apple'::str, 'Melon'::str) = null::str
8 | concat_ws(','::str, null::str, 'Apple'::str, 'Melon'::str) = 'Apple,Melon'::str
9 | concat_ws(','::str, 'Apple'::str, null::str, 'Melon'::str) = 'Apple,Melon'::str
10 |
--------------------------------------------------------------------------------
/tests/cases/string/contains.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_string.yaml'
3 |
4 | # basic: Basic examples contains as prefix
5 | contains('abcdefg'::str, 'abc'::str) = true::bool
6 | contains('abcdefg'::str, 'CdE'::str) = false::bool
7 | contains('abcdefg'::str, 'CdE'::str) [case_sensitivity:CASE_INSENSITIVE] = true::bool
8 | contains('abcdefg'::str, 'cde'::str) = true::bool
9 | contains('abcdefg'::str, 'fg'::str) = true::bool
10 | contains('abcdefg'::str, 'aef'::str) = false::bool
11 |
12 | # multi_byte_characters: multi byte characters exists in the string
13 | contains('😊a😊b😊😊'::str, 'a😊b'::str) = true::bool
14 | contains('😊a😊b😊😊'::str, 'A😊B'::str) = false::bool
15 | contains('😊a😊b😊😊'::str, 'A😊B'::str) [case_sensitivity:CASE_INSENSITIVE] = true::bool
16 | contains('😊a😊b😊😊'::str, 'a😊c'::str) = false::bool
17 |
--------------------------------------------------------------------------------
/tests/cases/string/ends_with.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_string.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | ends_with('abcd'::str, 'd'::str) = true::bool
6 | ends_with('abcd'::str, 'a'::str) = false::bool
7 | ends_with('abcd'::str, 'CD'::str) = false::bool
8 |
9 | # case_insenstivity: multi byte character comparison with case insensitivity
10 | ends_with('abcd'::str, 'CD'::str) [case_sensitivity:CASE_INSENSITIVE] = true::bool
11 |
12 | # multi_byte_characters: multi byte character comparison
13 | ends_with('😊a😊b😊😊'::str, 'b😊😊'::str) = true::bool
14 |
15 | # multi_byte_characters case insensitivity: multi byte character comparison with case insensitivity
16 | ends_with('😊a😊b😊😊'::str, 'B😊😊'::str) [case_sensitivity:CASE_INSENSITIVE] = true::bool
17 |
--------------------------------------------------------------------------------
/tests/cases/string/left.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_string.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | left('abcdef'::str, 2::i32) = 'ab'::str
6 | left('abcdef'::str, 6::i32) = 'abcdef'::str
7 | left('abcdef'::str, 10::i32) = 'abcdef'::str
8 | left(' abcdef abcdef'::str, 10::i32) = ' abcdef '::str
9 | left(null::str, 10::i32) = null::str
10 | left('abcdef'::str, null::i32) = null::str
11 |
12 | # unicode: Examples with unicode characters as input
13 | left('ææããa'::str, 2::i32) = 'ææ'::str
14 | left('😔😄😔😄'::str, 2::i32) = '😔😄'::str
15 |
--------------------------------------------------------------------------------
/tests/cases/string/like.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_string.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | like('abcdefg'::str, 'abcdefg'::str) = true::bool
6 | like('abcdefg'::str, 'abc'::str) = false::bool
7 |
8 | # wildcard: Examples using wildcards
9 | like('abcdefg'::str, 'abc%'::str) = true::bool
10 | like('abcdefg'::str, '%efg'::str) = true::bool
11 | like('abcdefg'::str, '_bcdefg'::str) = true::bool
12 | like('abcdefg'::str, 'abc_efg'::str) = true::bool
13 |
--------------------------------------------------------------------------------
/tests/cases/string/lower.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_string.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | lower('ABC'::str) = 'abc'::str
6 | lower('aBc'::str) = 'abc'::str
7 | lower('abc'::str) = 'abc'::str
8 | lower(''::str) = ''::str
9 |
10 | # null_input: Examples with null as input
11 | lower(null::str) = null::str
12 |
13 | # unicode: Examples with unicode characters as input
14 | lower('ÆÆÃÃA'::str) [full_unicode:TRUE] = 'ææããa'::str
15 | lower('😄'::str) = '😄'::str
16 |
--------------------------------------------------------------------------------
/tests/cases/string/lpad.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_string.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | lpad('abcdef'::str, 10::i32, ' '::str) = ' abcdef'::str
6 | lpad('abcdef '::str, 20::i32, '1'::str) = '1111111111abcdef '::str
7 | lpad(' abcdef'::str, 20::i32, '1'::str) = '1111111111 abcdef'::str
8 | lpad('abcdef'::str, 6::i32, ' '::str) = 'abcdef'::str
9 | lpad('abcdef'::str, 20::i32, 'aabb'::str) = 'aabbaabbaabbaaabcdef'::str
10 | lpad('abcdef'::str, 4::i32, ' '::str) = 'abcd'::str
11 | lpad('abcdef'::str, -1::i32, ' '::str) = ''::str
12 | lpad(null::str, 4::i32, ' '::str) = null::str
13 | lpad('abcdef'::str, 10::i32, null::str) = null::str
14 |
--------------------------------------------------------------------------------
/tests/cases/string/ltrim.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_string.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | ltrim('abc'::str, ' '::str) = 'abc'::str
6 | ltrim(' abc'::str, ' '::str) = 'abc'::str
7 | ltrim('abc '::str, ' '::str) = 'abc '::str
8 | ltrim(' abc '::str, ' '::str) = 'abc '::str
9 | ltrim(''::str, ' '::str) = ''::str
10 | ltrim(' '::str, ' '::str) = ''::str
11 | ltrim(null::str, ' '::str) = null::str
12 |
13 | # two_inputs: Examples with character input to trim off
14 | ltrim('aaaaabc'::str, 'a'::str) [spaces_only:FALSE] = 'bc'::str
15 | ltrim('abcabcdef'::str, 'abc'::str) [spaces_only:FALSE] = 'def'::str
16 | ltrim('abccbadef'::str, 'abc'::str) [spaces_only:FALSE] = 'def'::str
17 |
--------------------------------------------------------------------------------
/tests/cases/string/octet_length.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_string.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | octet_length('abc'::str) = 3::i64
6 | octet_length(''::str) = 0::i64
7 | octet_length(' '::str) = 1::i64
8 | octet_length('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'::str) = 48::i64
9 | octet_length(' 456'::str) = 6::i64
10 |
11 | # null_input: Examples with null as input
12 | octet_length(null::str) = null::i64
13 |
14 | # unicode: Examples with unicode characters as input
15 | octet_length('à'::str) = 2::i64
16 | octet_length('😄'::str) = 4::i64
17 |
--------------------------------------------------------------------------------
/tests/cases/string/regexp_count_substring.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_string.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | regexp_count_substring('foobarboopzoo'::str, 'o{1,}'::str, 1::i64) = 3::i64
6 | regexp_count_substring('foobarboopzoo'::str, 'o{1}'::str, 1::i64) = 6::i64
7 | regexp_count_substring('abcabcacb'::str, '[bc]'::str, 1::i64) = 6::i64
8 | regexp_count_substring('abcdefc'::str, '(.*)c'::str, 1::i64) = 1::i64
9 | regexp_count_substring('abcdefc'::str, '(.*)c?'::str, 1::i64) = 2::i64
10 | regexp_count_substring('foobarboopzoo'::str, 'o{1,}'::str) = 3::i64
11 | regexp_count_substring('foobarboopzoo'::str, 'o{1}'::str) = 6::i64
12 | regexp_count_substring('abcabcacb'::str, '[bc]'::str) = 6::i64
13 | regexp_count_substring('abcdefc'::str, '(.*)c'::str) = 1::i64
14 | regexp_count_substring('abcdefc'::str, '(.*)c?'::str) = 2::i64
15 |
16 | # null_input: Examples with null as input
17 | regexp_count_substring('Hello'::str, null::str, 1::i64) = null::i64
18 | regexp_count_substring(null::str, ' '::str, 1::i64) = null::i64
19 | regexp_count_substring('Hello'::str, null::str) = null::i64
20 | regexp_count_substring(null::str, ' '::str) = null::i64
21 |
22 | # metacharacters: Examples with metacharacters
23 | regexp_count_substring('abc1abc'::str, '\d'::str, 1::i64) = 1::i64
24 | regexp_count_substring('abc1abc'::str, '\D'::str, 1::i64) = 6::i64
25 | regexp_count_substring('abc def ghi'::str, '\s'::str, 1::i64) = 2::i64
26 | regexp_count_substring('abc def ghi'::str, '\S'::str, 1::i64) = 9::i64
27 | regexp_count_substring('abc def ghi'::str, '\w'::str, 1::i64) = 9::i64
28 | regexp_count_substring('abc def ghi,'::str, '\W'::str, 1::i64) = 3::i64
29 | regexp_count_substring('abc1abc'::str, '\d'::str) = 1::i64
30 | regexp_count_substring('abc1abc'::str, '\D'::str) = 6::i64
31 | regexp_count_substring('abc def ghi'::str, '\s'::str) = 2::i64
32 | regexp_count_substring('abc def ghi'::str, '\S'::str) = 9::i64
33 | regexp_count_substring('abc def ghi'::str, '\w'::str) = 9::i64
34 | regexp_count_substring('abc def ghi,'::str, '\W'::str) = 3::i64
35 |
36 | # lookahead: Examples with lookahead
37 | regexp_count_substring('100 dollars 100 dollars'::str, '\d+(?= dollars)'::str, 1::i64) [lookaround:TRUE] = 2::i64
38 | regexp_count_substring('100 dollars 100 dollars'::str, '\d+(?= dollars)'::str) [lookaround:TRUE] = 2::i64
39 |
40 | # negative_lookahead: Examples with negative lookahead
41 | regexp_count_substring('100 pesos, 99 pesos, 98 pesos'::str, '\d+(?!\d| dollars)'::str, 1::i64) [lookaround:TRUE] = 3::i64
42 | regexp_count_substring('100 pesos, 99 pesos, 98 pesos'::str, '\d+(?!\d| dollars)'::str) [lookaround:TRUE] = 3::i64
43 |
44 | # lookbehind: Examples with lookbehind
45 | regexp_count_substring('USD100'::str, '(?<=USD)\d{3}'::str, 1::i64) [lookaround:TRUE] = 1::i64
46 | regexp_count_substring('USD100'::str, '(?<=USD)\d{3}'::str) [lookaround:TRUE] = 1::i64
47 |
48 | # negative_lookbehind: Examples with negative lookbehind
49 | regexp_count_substring('JPY100JPY100'::str, '\d{3}(?
6 | regexp_string_split('Hello'::str, 'Hel+'::str) = ['', 'o']::list
7 |
8 | # greedy_matching: Examples with greedy matching
9 | regexp_string_split('HHHelloooo'::str, 'Hel+'::str) = ['HH', 'oooo']::list
10 |
11 | # position_anchors: Examples with position anchors
12 | regexp_string_split('abcdefg'::str, '\Aabc'::str) = ['', 'defg']::list
13 | regexp_string_split('abcdefg'::str, 'efg$'::str) = ['abcd', '']::list
14 |
15 | # metacharacters: Examples with metacharacters
16 | regexp_string_split('abc1abc'::str, '\d'::str) = ['abc', 'abc']::list
17 | regexp_string_split('111a111'::str, '\D'::str) = ['111', '111']::list
18 | regexp_string_split('abc def'::str, '\s'::str) = ['abc', 'def']::list
19 | regexp_string_split('a bcdef'::str, '\S'::str) = ['', ' ', '', '', '', '', '']::list
20 | regexp_string_split(' abcdef'::str, '\w'::str) = [' ', '', '', '', '', '', '']::list
21 | regexp_string_split('a bcdef'::str, '\W'::str) = ['a', 'bcdef']::list
22 |
23 | # occurrence_indicator: Examples with occurrence indicators
24 | regexp_string_split('abc123abc'::str, '[0-9]+'::str) = ['abc', 'abc']::list
25 | regexp_string_split('abc123abc'::str, '[bc]'::str) = ['a', '', '123a', '', '']::list
26 | regexp_string_split('abcde'::str, '(.*)c'::str) = ['', 'de']::list
27 | regexp_string_split('abbbbc'::str, '[b]{2,3}'::str) = ['a', 'bc']::list
28 |
29 | # lookahead: Examples with lookahead
30 | regexp_string_split('100 dollars'::str, '\d+(?= dollars)'::str) [lookaround:TRUE] = ['', ' dollars']::list
31 |
32 | # negative_lookahead: Examples with negative lookahead
33 | regexp_string_split('100 pesos'::str, '\d+(?!\d| dollars)'::str) [lookaround:TRUE] = ['', ' pesos']::list
34 |
35 | # lookbehind: Examples with lookbehind
36 | regexp_string_split('USD100'::str, '(?<=USD)\d{3}'::str) [lookaround:TRUE] = ['USD', '']::list
37 |
38 | # negative_lookbehind: Examples with negative lookbehind
39 | regexp_string_split('JPY100'::str, '\d{3}(?
40 |
--------------------------------------------------------------------------------
/tests/cases/string/repeat.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_string.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | repeat('abc'::str, 2::i64) = 'abcabc'::str
6 | repeat('aBc'::str, 0::i64) = ''::str
7 | repeat(' abd'::str, 3::i64) = ' abd abd abd'::str
8 | repeat(' '::str, 5::i64) = ' '::str
9 | repeat(''::str, 2::i64) = ''::str
10 |
11 | # null_input: Examples with null as input
12 | repeat(null::str, 2::i64) = null::str
13 |
--------------------------------------------------------------------------------
/tests/cases/string/replace.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_string.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | replace('abcabcabc'::str, 'bc'::str, 'dd'::str) = 'addaddadd'::str
6 | replace('abcabcabc'::str, ' '::str, 'dd'::str) = 'abcabcabc'::str
7 | replace('abc def ghi'::str, ' '::str, ','::str) = 'abc,def,ghi'::str
8 |
9 | # null_input: Examples with null as input
10 | replace('abcd'::str, null::str, ','::str) = null::str
11 | replace('abcd'::str, ' '::str, null::str) = null::str
12 | replace(null::str, ' '::str, ','::str) = null::str
13 |
--------------------------------------------------------------------------------
/tests/cases/string/reverse.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_string.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | reverse('abc'::str) = 'cba'::str
6 | reverse('aBc'::str) = 'cBa'::str
7 | reverse(' 123'::str) = '321 '::str
8 | reverse(''::str) = ''::str
9 |
10 | # null_input: Examples with null as input
11 | reverse(null::str) = null::str
12 |
13 | # unicode: Examples with unicode characters as input
14 | reverse('ææããa'::str) = 'aããææ'::str
15 | reverse('😔😄'::str) = '😄😔'::str
16 |
--------------------------------------------------------------------------------
/tests/cases/string/right.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_string.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | right('abcdef'::str, 2::i32) = 'ef'::str
6 | right('abcdef'::str, 6::i32) = 'abcdef'::str
7 | right('abcdef'::str, 10::i32) = 'abcdef'::str
8 | right(' abcdef abcdef'::str, 10::i32) = 'ef abcdef'::str
9 | right(null::str, 10::i32) = null::str
10 | right('abcdef'::str, null::i32) = null::str
11 |
12 | # unicode: Examples with unicode characters as input
13 | right('ææããa'::str, 2::i32) = 'ãa'::str
14 | right('😔😄😔😄'::str, 2::i32) = '😔😄'::str
15 |
--------------------------------------------------------------------------------
/tests/cases/string/rpad.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_string.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | rpad('abcdef'::str, 10::i32, ' '::str) = 'abcdef '::str
6 | rpad('abcdef '::str, 20::i32, '1'::str) = 'abcdef 1111111111'::str
7 | rpad(' abcdef'::str, 20::i32, '1'::str) = ' abcdef1111111111'::str
8 | rpad('abcdef'::str, 6::i32, ' '::str) = 'abcdef'::str
9 | rpad('abcdef'::str, 20::i32, 'aabb'::str) = 'abcdefaabbaabbaabbaa'::str
10 | rpad('abcdef'::str, 4::i32, ' '::str) = 'abcd'::str
11 | rpad('abcdef'::str, -1::i32, ' '::str) = ''::str
12 | rpad(null::str, 4::i32, ' '::str) = null::str
13 | rpad('abcdef'::str, 10::i32, null::str) = null::str
14 |
--------------------------------------------------------------------------------
/tests/cases/string/rtrim.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_string.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | rtrim('abc'::str, ' '::str) = 'abc'::str
6 | rtrim(' abc'::str, ' '::str) = ' abc'::str
7 | rtrim('abc '::str, ' '::str) = 'abc'::str
8 | rtrim(' abc '::str, ' '::str) = ' abc'::str
9 | rtrim(''::str, ' '::str) = ''::str
10 | rtrim(' '::str, ' '::str) = ''::str
11 | rtrim(null::str, ' '::str) = null::str
12 |
13 | # two_inputs: Examples with character input to trim off
14 | rtrim('aaaaabccccc'::str, 'c'::str) [spaces_only:FALSE] = 'aaaaab'::str
15 | rtrim('abcabcdef'::str, 'def'::str) [spaces_only:FALSE] = 'abcabc'::str
16 | rtrim('defabccba'::str, 'abc'::str) [spaces_only:FALSE] = 'def'::str
17 |
--------------------------------------------------------------------------------
/tests/cases/string/starts_with.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_string.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | starts_with('abcd'::str, 'a'::str) = true::bool
6 | starts_with('abcd'::str, 'z'::str) = false::bool
7 | starts_with('abcd'::str, 'AB'::str) = false::bool
8 |
9 | # case_insenstivity: multi byte character comparison with case insensitivity
10 | starts_with('abcd'::str, 'AB'::str) [case_sensitivity:CASE_INSENSITIVE] = true::bool
11 |
12 | # multi_byte_characters: multi byte character comparison
13 | starts_with('😊a😊b😊😊'::str, '😊a'::str) = true::bool
14 |
15 | # multi_byte_characters case insensitivity: multi byte character comparison with case insensitivity
16 | starts_with('😊a😊b😊😊'::str, '😊A'::str) [case_sensitivity:CASE_INSENSITIVE] = true::bool
17 |
--------------------------------------------------------------------------------
/tests/cases/string/string_split.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_string.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | string_split('abc'::str, ' '::str) = ['abc']::list
6 | string_split('abc abc'::str, ' '::str) = ['abc', 'abc']::list
7 | string_split('bacad'::str, 'a'::str) = ['b', 'c', 'd']::list
8 | string_split('a b c d'::str, ' '::str) = ['a', 'b', 'c', 'd']::list
9 | string_split('a b c d'::str, null::str) = ['a b c d']::list
10 |
--------------------------------------------------------------------------------
/tests/cases/string/substring.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_string.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | substring('abcdefg'::str, 1::i32, 5::i32) = 'abcde'::str
6 | substring('abcdefg'::str, 1::i32, 5::i32) = 'abcde'::str
7 |
8 | # start_greater_than_length: Example where start argument greater than the length of the string
9 | substring('abcdefg'::str, 10::i32, 2::i32) = ''::str
10 | substring('abcdefg'::str, 10::i32, 2::i32) = ''::str
11 |
12 | # multi_byte_characters: Example where multi byte characters exist in the string
13 | substring('😊a😊b😊😊'::str, 1::i32, 3::i32) = '😊a😊'::str
14 | substring('😊a😊b😊😊'::str, 1::i32, 3::i32) = '😊a😊'::str
15 |
16 | # negative_start: Example where start argument is a negative integer
17 | substring('abcdefg'::str, -1::i32, 2::i32) [negative_start:WRAP_FROM_END] = 'g'::str
18 | substring('abcdefg'::str, -2::i32, 1::i32) [negative_start:WRAP_FROM_END] = 'f'::str
19 | substring('abcdefg'::str, -1::i32, 2::i32) [negative_start:LEFT_OF_BEGINNING] = ''::str
20 | substring('abcdefg'::str, -1::i32, 3::i32) [negative_start:LEFT_OF_BEGINNING] = 'a'::str
21 |
--------------------------------------------------------------------------------
/tests/cases/string/trim.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_string.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | trim('abc'::str, ' '::str) = 'abc'::str
6 | trim(' abc'::str, ' '::str) = 'abc'::str
7 | trim('abc '::str, ' '::str) = 'abc'::str
8 | trim(' abc '::str, ' '::str) = 'abc'::str
9 | trim(''::str, ' '::str) = ''::str
10 | trim(' '::str, ' '::str) = ''::str
11 | trim(null::str, ' '::str) = null::str
12 |
13 | # two_inputs: Examples with character input to trim off
14 | trim('aaaaabcccccaaa'::str, 'a'::str) [spaces_only:False] = 'bccccc'::str
15 | trim('defabcabcdef'::str, 'def'::str) [spaces_only:False] = 'abcabc'::str
16 | trim('abcdefcbaa'::str, 'abc'::str) [spaces_only:False] = 'def'::str
17 |
--------------------------------------------------------------------------------
/tests/cases/string/upper.test:
--------------------------------------------------------------------------------
1 | ### SUBSTRAIT_SCALAR_TEST: v1.0
2 | ### SUBSTRAIT_INCLUDE: '/extensions/functions_string.yaml'
3 |
4 | # basic: Basic examples without any special cases
5 | upper('abc'::str) = 'ABC'::str
6 | upper('aBc'::str) = 'ABC'::str
7 | upper('ABC'::str) = 'ABC'::str
8 | upper(''::str) = ''::str
9 |
10 | # null_input: Examples with null as input
11 | upper(null::str) = null::str
12 |
13 | # unicode: Examples with unicode characters as input
14 | upper('ææããa'::str) [full_unicode:TRUE] = 'ÆÆÃÃA'::str
15 | upper('😄'::str) = '😄'::str
16 |
--------------------------------------------------------------------------------
/tests/coverage/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/substrait-io/substrait/d430e521f203aec6a4e06731d4bfd68cdf61f443/tests/coverage/__init__.py
--------------------------------------------------------------------------------
/tests/coverage/case_file_parser.py:
--------------------------------------------------------------------------------
1 | # SPDX-License-Identifier: Apache-2.0
2 | import os
3 |
4 | from antlr4 import CommonTokenStream, FileStream
5 | from antlr4.error.ErrorListener import ErrorListener
6 |
7 | from tests.coverage.antlr_parser.FuncTestCaseLexer import FuncTestCaseLexer
8 | from tests.coverage.antlr_parser.FuncTestCaseParser import FuncTestCaseParser
9 | from tests.coverage.visitor import TestCaseVisitor, ParseError
10 |
11 |
12 | class ParseErrorListener(ErrorListener):
13 | def __init__(self):
14 | super(ParseErrorListener, self).__init__()
15 | self.errors = []
16 |
17 | def syntaxError(self, recognizer, offending_symbol, line, column, msg, e):
18 | error_message = f"Syntax error at line {line}, column {column}: {msg}"
19 | self.errors.append(error_message)
20 |
21 |
22 | def parse_stream(input_stream, file_path):
23 | # Create a lexer and parser
24 | lexer = FuncTestCaseLexer(input_stream)
25 | token_stream = CommonTokenStream(lexer)
26 | parser = FuncTestCaseParser(token_stream)
27 |
28 | # Add custom error listener
29 | error_listener = ParseErrorListener()
30 | parser.removeErrorListeners()
31 | parser.addErrorListener(error_listener)
32 |
33 | tree = parser.doc() # This is the entry point of testfile parser
34 | if parser.getNumberOfSyntaxErrors() > 0:
35 | print(tree.toStringTree(recog=parser))
36 | print(f"{parser.getNumberOfSyntaxErrors()} Syntax errors found, exiting")
37 | raise ParseError(f"Syntax errors: {error_listener.errors}")
38 |
39 | # uncomment below line to see the parse tree for debugging
40 | # print(tree.toStringTree(recog=parser))
41 |
42 | visitor = TestCaseVisitor(file_path)
43 | test_file = visitor.visit(tree)
44 | return test_file
45 |
46 |
47 | def parse_one_file(file_path):
48 | return parse_stream(FileStream(file_path, "UTF-8"), file_path)
49 |
50 |
51 | def parse_testcase_directory_recursively(dir_path):
52 | # for each file in directory call parse_one_file
53 | test_files = []
54 | for child in os.listdir(dir_path):
55 | child_path = os.path.join(dir_path, child)
56 | if os.path.isfile(child_path) and child.endswith(".test"):
57 | test_file = parse_one_file(child_path)
58 | test_files.append(test_file)
59 | elif os.path.isdir(child_path):
60 | test_files_in_a_dir = parse_testcase_directory_recursively(child_path)
61 | test_files.extend(test_files_in_a_dir)
62 | return test_files
63 |
64 |
65 | def load_all_testcases(dir_path) -> list:
66 | return parse_testcase_directory_recursively(dir_path)
67 |
--------------------------------------------------------------------------------
/tests/coverage/nodes.py:
--------------------------------------------------------------------------------
1 | # SPDX-License-Identifier: Apache-2.0
2 | from dataclasses import dataclass
3 | from typing import List
4 |
5 |
6 | @dataclass
7 | class CaseGroup:
8 | name: str
9 | description: str
10 |
11 |
12 | @dataclass
13 | class SubstraitError:
14 | error: str
15 |
16 |
17 | @dataclass
18 | class CaseLiteral:
19 | value: str | int | float | list | None
20 | type: str
21 |
22 | def get_base_type(self):
23 | type_str = self.type
24 | if "<" in type_str:
25 | type_str = type_str[: type_str.find("<")]
26 | if type_str.endswith("?"):
27 | return type_str[:-1]
28 | return type_str
29 |
30 |
31 | @dataclass
32 | class AggregateArgument:
33 | column_name: str
34 | column_type: str
35 | table_name: str
36 | scalar_value: CaseLiteral | None
37 |
38 |
39 | @dataclass
40 | class TestCase:
41 | func_name: str
42 | base_uri: str
43 | group: CaseGroup | None
44 | options: dict
45 | rows: List[List] | None
46 | args: List[CaseLiteral] | List[AggregateArgument]
47 | result: CaseLiteral | str | SubstraitError
48 | comment: str
49 |
50 | def get_return_type(self):
51 | if isinstance(self.result, CaseLiteral):
52 | return self.result.type
53 | return self.result
54 |
55 | def is_return_type_error(self):
56 | return isinstance(self.result, SubstraitError)
57 |
58 | def get_arg_types(self):
59 | return [arg.get_base_type() for arg in self.args]
60 |
61 | def get_signature(self):
62 | return f"{self.func_name}({', '.join([arg.type for arg in self.args])}) = {self.get_return_type()}"
63 |
64 |
65 | @dataclass
66 | class TestFile:
67 | path: str
68 | version: str
69 | include: str
70 | testcases: List[TestCase]
71 |
--------------------------------------------------------------------------------
/tests/test_extensions.py:
--------------------------------------------------------------------------------
1 | # SPDX-License-Identifier: Apache-2.0
2 | import json
3 | import os
4 | from dataclasses import asdict
5 |
6 | from tests.baseline import read_baseline_file, generate_baseline
7 | from tests.coverage.case_file_parser import load_all_testcases
8 | from tests.coverage.coverage import get_test_coverage
9 | from tests.coverage.extensions import build_type_to_short_type
10 | from tests.coverage.extensions import Extension
11 |
12 |
13 | # NOTE: this test is run as part of pre-commit hook
14 | def test_substrait_extension_coverage():
15 | script_dir = os.path.dirname(os.path.abspath(__file__))
16 | baseline = read_baseline_file(os.path.join(script_dir, "baseline.json"))
17 | extensions_path = os.path.join(script_dir, "../extensions")
18 | registry = Extension.read_substrait_extensions(extensions_path)
19 |
20 | test_case_dir = os.path.join(script_dir, "./cases")
21 | all_test_files = load_all_testcases(test_case_dir)
22 | coverage = get_test_coverage(all_test_files, registry)
23 |
24 | assert (
25 | coverage.num_tests_with_no_matching_function == 0
26 | ), f"{coverage.num_tests_with_no_matching_function} tests with no matching function"
27 |
28 | actual_baseline = generate_baseline(registry, coverage)
29 | errors = actual_baseline.validate_against(baseline)
30 | assert not errors, (
31 | "\n".join(errors)
32 | + f"The baseline file does not match the current test coverage. "
33 | f"Please update the file at tests/baseline.json to align with the current baseline"
34 | f"{json.dumps(asdict(actual_baseline), indent=2)}"
35 | )
36 |
37 | if baseline != actual_baseline:
38 | print("\nBaseline has changed, updating tests/baseline.json")
39 | print(json.dumps(asdict(actual_baseline), indent=2))
40 |
41 |
42 | def test_build_type_to_short_type():
43 | long_to_short = build_type_to_short_type()
44 | assert long_to_short["i64"] == "i64"
45 | assert long_to_short["fp64"] == "fp64"
46 | assert long_to_short["timestamp"] == "ts"
47 | assert long_to_short["timestamp_tz"] == "tstz"
48 | assert long_to_short["precision_timestamp"] == "pts"
49 | assert long_to_short["precision_timestamp_tz"] == "ptstz"
50 | assert long_to_short["interval_year"] == "iyear"
51 | assert long_to_short["interval_day"] == "iday"
52 | assert long_to_short["decimal"] == "dec"
53 | assert long_to_short["boolean"] == "bool"
54 | assert long_to_short["string"] == "str"
55 | assert long_to_short["binary"] == "vbin"
56 | assert long_to_short["fixedbinary"] == "fbin"
57 | assert long_to_short["fixedchar"] == "fchar"
58 | assert long_to_short["varchar"] == "vchar"
59 | assert long_to_short["list"] == "list"
60 | assert long_to_short["map"] == "map"
61 | assert long_to_short["struct"] == "struct"
62 |
--------------------------------------------------------------------------------