├── .github
└── workflows
│ ├── deploy.yml
│ └── test.yml
├── .gitignore
├── .gitmodules
├── .markdownlint.json
├── .vscode
├── launch.json
└── settings.json
├── LICENSE
├── NOTICE.txt
├── README.md
├── bft
├── __init__.py
├── cases
│ ├── __init__.py
│ ├── loader.py
│ ├── parser.py
│ ├── runner.py
│ └── types.py
├── core
│ ├── __init__.py
│ ├── function.py
│ ├── index_parser.py
│ ├── yaml_parser.py
│ └── yaml_parser_test.py
├── dialects
│ ├── __init__.py
│ ├── loader.py
│ ├── parser.py
│ └── types.py
├── html
│ ├── __init__.py
│ ├── builder.py
│ └── types.py
├── substrait
│ ├── __init__.py
│ └── extension_file_parser.py
├── supplements
│ ├── __init__.py
│ ├── parser.py
│ └── types.py
├── templates
│ ├── function_desc.j2
│ └── function_index.j2
├── testers
│ ├── __init__.py
│ ├── base_tester.py
│ ├── cudf
│ │ ├── __init__.py
│ │ ├── runner.py
│ │ └── tester.py
│ ├── datafusion
│ │ ├── __init__.py
│ │ ├── runner.py
│ │ └── tester.py
│ ├── duckdb
│ │ ├── __init__.py
│ │ ├── runner.py
│ │ ├── runner_test.py
│ │ └── tester.py
│ ├── postgres
│ │ ├── __init__.py
│ │ ├── runner.py
│ │ └── tester.py
│ ├── snowflake
│ │ ├── __init__.py
│ │ ├── config.yaml
│ │ ├── runner.py
│ │ └── tester.py
│ ├── sqlite
│ │ ├── __init__.py
│ │ ├── runner.py
│ │ └── tester.py
│ └── velox
│ │ ├── runner.py
│ │ └── tester.py
├── tests
│ ├── __init__.py
│ ├── base.py
│ ├── conftest.py
│ ├── test_cudf.py
│ ├── test_datafusion.py
│ ├── test_duckdb.py
│ ├── test_postgres.py
│ ├── test_pyvelox.py
│ ├── test_snowflake.py
│ └── test_sqlite.py
└── utils
│ └── utils.py
├── build_site.py
├── ci
└── docker
│ ├── base-tester.Dockerfile
│ ├── datafusion.Dockerfile
│ ├── duckdb.Dockerfile
│ ├── postgres-compose.yaml
│ ├── postgres-server.Dockerfile
│ ├── sqlite.Dockerfile
│ ├── velox-compose.yaml
│ └── velox.Dockerfile
├── dialects
├── cudf.yaml
├── datafusion.yaml
├── duckdb.yaml
├── postgres.yaml
├── snowflake.yaml
├── sqlite.yaml
└── velox_presto.yaml
├── index.yaml
├── requirements.txt
├── static_site
├── android-chrome-192x192.png
├── android-chrome-512x512.png
├── apple-touch-icon.png
├── assets
│ ├── index
│ │ ├── script.js
│ │ └── style.css
│ └── supplementary
│ │ ├── script.js
│ │ ├── style.css
│ │ └── terminal.css
├── favicon-16x16.png
├── favicon-32x32.png
└── favicon.ico
├── supplemental
└── arithmetic
│ ├── abs.md
│ ├── acos.md
│ ├── acosh.md
│ ├── add.md
│ ├── asin.md
│ ├── asinh.md
│ ├── atan.md
│ ├── atan2.md
│ ├── atanh.md
│ ├── bitwise_and.md
│ ├── bitwise_not.md
│ ├── bitwise_or.md
│ ├── bitwise_xor.md
│ ├── cos.md
│ ├── cosh.md
│ ├── definitions.yaml
│ ├── divide.md
│ ├── exponential.md
│ ├── factorial.md
│ ├── modulus.md
│ ├── multiply.md
│ ├── negate.md
│ ├── power.md
│ ├── sign.md
│ ├── sin.md
│ ├── sinh.md
│ ├── sqrt.md
│ ├── subtract.md
│ ├── sum.md
│ ├── tan.md
│ └── tanh.md
└── tools
├── convert_testcases
├── check_testcase_format_conversion_roundtrip.py
├── convert_testcase_helper.py
├── convert_testcases_to_substrait_test_format.py
├── convert_testcases_to_yaml_format.py
├── test_convert_testcases_to_substrait_test_format.py
└── test_convert_testcases_to_yaml_format.py
├── schema
└── casefile.yaml
└── yaml_to_json.py
/.github/workflows/deploy.yml:
--------------------------------------------------------------------------------
1 | name: Deploy to gh-pages
2 | on:
3 | workflow_dispatch:
4 | workflow_run:
5 | workflows:
6 | - test
7 | types:
8 | - completed
9 |
10 | jobs:
11 | deploy:
12 | if: ${{ github.event_name == 'workflow_dispatch' || github.event.workflow_run.conclusion == 'success' }}
13 | runs-on: ubuntu-latest
14 | steps:
15 | - name: Checkout
16 | uses: actions/checkout@v3
17 | with:
18 | submodules: recursive
19 | - uses: actions/setup-python@v4
20 | with:
21 | python-version: "3.11"
22 | cache: "pip"
23 | - run: pip install -r requirements.txt
24 | - name: Build Site
25 | run: python build_site.py
26 | - name: Deploy
27 | uses: JamesIves/github-pages-deploy-action@v4
28 | with:
29 | folder: dist
30 |
--------------------------------------------------------------------------------
/.github/workflows/test.yml:
--------------------------------------------------------------------------------
1 | name: Test
2 | on:
3 | pull_request:
4 | push:
5 | branches:
6 | - main
7 |
8 | jobs:
9 | sqlite:
10 | name: Run tests with sqlite
11 | runs-on: ubuntu-latest
12 | steps:
13 | - name: Checkout
14 | uses: actions/checkout@v3
15 | with:
16 | submodules: recursive
17 |
18 | - name: Build & run
19 | run: docker run --rm $(docker build -q --file ./ci/docker/sqlite.Dockerfile .)
20 | duckdb:
21 | name: Run tests with duckdb
22 | runs-on: ubuntu-latest
23 | steps:
24 | - name: Checkout
25 | uses: actions/checkout@v3
26 | with:
27 | submodules: recursive
28 |
29 | - name: Build & run
30 | run: docker run --rm $(docker build -q --file ./ci/docker/duckdb.Dockerfile .)
31 | datafusion:
32 | name: Run tests with datafusion
33 | runs-on: ubuntu-latest
34 | steps:
35 | - name: Checkout
36 | uses: actions/checkout@v3
37 | with:
38 | submodules: recursive
39 |
40 | - name: Build & run
41 | run: docker run --rm $(docker build -q --file ./ci/docker/datafusion.Dockerfile .)
42 | postgres:
43 | name: Run tests with postgres
44 | runs-on: ubuntu-latest
45 | steps:
46 | - name: Checkout
47 | uses: actions/checkout@v3
48 | with:
49 | submodules: recursive
50 |
51 | - name: Build
52 | run: docker compose -f ./ci/docker/postgres-compose.yaml build
53 |
54 | - name: Run
55 | run: docker compose -f ./ci/docker/postgres-compose.yaml run app
56 | velox:
57 | name: Run tests with velox
58 | runs-on: ubuntu-latest
59 | steps:
60 | - name: Checkout
61 | uses: actions/checkout@v3
62 | with:
63 | submodules: recursive
64 | - name: Build
65 | run: docker compose -f ./ci/docker/velox-compose.yaml build
66 |
67 | - name: Run
68 | run: docker compose -f ./ci/docker/velox-compose.yaml run app
69 | site:
70 | name: Build site
71 | runs-on: ubuntu-latest
72 | steps:
73 | - name: Checkout
74 | uses: actions/checkout@v3
75 | with:
76 | submodules: recursive
77 | - uses: actions/setup-python@v4
78 | with:
79 | python-version: "3.11"
80 | cache: "pip"
81 | - run: pip install -r requirements.txt
82 | - name: Build Site
83 | run: python build_site.py
84 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | share/python-wheels/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 | MANIFEST
28 |
29 | # PyInstaller
30 | # Usually these files are written by a python script from a template
31 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
32 | *.manifest
33 | *.spec
34 |
35 | # Installer logs
36 | pip-log.txt
37 | pip-delete-this-directory.txt
38 |
39 | # Unit test / coverage reports
40 | htmlcov/
41 | .tox/
42 | .nox/
43 | .coverage
44 | .coverage.*
45 | .cache
46 | nosetests.xml
47 | coverage.xml
48 | *.cover
49 | *.py,cover
50 | .hypothesis/
51 | .pytest_cache/
52 | cover/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | .pybuilder/
76 | target/
77 |
78 | # Jupyter Notebook
79 | .ipynb_checkpoints
80 |
81 | # IPython
82 | profile_default/
83 | ipython_config.py
84 |
85 | # pyenv
86 | # For a library or package, you might want to ignore these files since the code is
87 | # intended to run in multiple environments; otherwise, check them in:
88 | # .python-version
89 |
90 | # pipenv
91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
94 | # install all needed dependencies.
95 | #Pipfile.lock
96 |
97 | # poetry
98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99 | # This is especially recommended for binary packages to ensure reproducibility, and is more
100 | # commonly ignored for libraries.
101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 |
104 | # pdm
105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | # in version control.
109 | # https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 |
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 |
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 |
119 | # SageMath parsed files
120 | *.sage.py
121 |
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 |
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 |
135 | # Rope project settings
136 | .ropeproject
137 |
138 | # mkdocs documentation
139 | /site
140 |
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 |
146 | # Pyre type checker
147 | .pyre/
148 |
149 | # pytype static type analyzer
150 | .pytype/
151 |
152 | # Cython debug symbols
153 | cython_debug/
154 |
155 | # PyCharm
156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | # and can be added to the global gitignore or merged into this file. For a more nuclear
159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/
161 |
--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "substrait"]
2 | path = substrait
3 | url = https://github.com/substrait-io/substrait.git
4 |
--------------------------------------------------------------------------------
/.markdownlint.json:
--------------------------------------------------------------------------------
1 | {
2 | "MD013": true
3 | }
--------------------------------------------------------------------------------
/.vscode/launch.json:
--------------------------------------------------------------------------------
1 | {
2 | // Use IntelliSense to learn about possible attributes.
3 | // Hover to view descriptions of existing attributes.
4 | // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
5 | "version": "0.2.0",
6 | "configurations": [
7 | {
8 | "name": "Build Site",
9 | "type": "python",
10 | "request": "launch",
11 | "env": {
12 | "PYTHONPATH": "${workspaceFolder}"
13 | },
14 | "module": "bft.html.builder",
15 | "justMyCode": true
16 | },
17 | {
18 | "name": "Run Tests",
19 | "type": "python",
20 | "request": "launch",
21 | "env": {
22 | "PYTHONPATH": "${workspaceFolder}"
23 | },
24 | "module": "pytest",
25 | "args": [
26 | "bft"
27 | ],
28 | "justMyCode": true
29 | }
30 | ]
31 | }
--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 | "python.formatting.provider": "black",
3 | "editor.formatOnSave": true,
4 | "editor.codeActionsOnSave": {
5 | "source.organizeImports": "explicit"
6 | },
7 | "isort.args": [
8 | "--profile",
9 | "black"
10 | ],
11 | "yaml.schemas": {
12 | "./tools/schema/casefile.yaml": "cases/**",
13 | "https://json.schemastore.org/github-workflow.json": "file:///home/pace/dev/bft/.github/workflows/deploy.yml"
14 | },
15 | "python.testing.unittestEnabled": false,
16 | "python.testing.pytestEnabled": true,
17 | "python.testing.pytestArgs": [
18 | "bft"
19 | ]
20 | }
21 |
--------------------------------------------------------------------------------
/NOTICE.txt:
--------------------------------------------------------------------------------
1 | Copyright 2023 Voltron Data, Inc.
2 |
3 | This product includes software developed at
4 | Voltron Data, Inc. (http://www.voltrondata.com/).
5 |
--------------------------------------------------------------------------------
/bft/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/substrait-io/bft/34b7a6e19d536e42082169eae0d049ce94f86b19/bft/__init__.py
--------------------------------------------------------------------------------
/bft/cases/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/substrait-io/bft/34b7a6e19d536e42082169eae0d049ce94f86b19/bft/cases/__init__.py
--------------------------------------------------------------------------------
/bft/cases/loader.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 | from typing import List
3 |
4 | from .parser import CaseFileParser
5 | from .types import Case
6 |
7 |
8 | def load_cases(cases_dir: str) -> List[Case]:
9 | cases = []
10 | parser = CaseFileParser()
11 | for case_path in Path(cases_dir).rglob("*.yaml"):
12 | with open(case_path, "rb") as case_f:
13 | for case_file in parser.parse(case_f):
14 | for case in case_file.cases:
15 | cases.append(case)
16 | return cases
17 |
--------------------------------------------------------------------------------
/bft/cases/parser.py:
--------------------------------------------------------------------------------
1 | import math
2 | from typing import BinaryIO, Iterable, List
3 |
4 | from bft.core.yaml_parser import BaseYamlParser, BaseYamlVisitor
5 |
6 | from .types import Case, CaseFile, CaseGroup, CaseLiteral, ProtoCase
7 |
8 |
9 | class CaseFileVisitor(BaseYamlVisitor[CaseFile]):
10 | def __init__(self):
11 | super().__init__()
12 | self.__groups = {}
13 |
14 | def __resolve_proto_case(self, case: ProtoCase, base_uri: str, function: str) -> Case:
15 | if case.group not in self.__groups:
16 | raise Exception(
17 | "A case referred to group " + case.group +" which was not defined in the file"
18 | )
19 | grp = self.__groups[case.group]
20 | return Case(function, base_uri, grp, case.args, case.result, case.options)
21 |
22 | def visit_group(self, group):
23 | id = self._get_or_die(group, "id")
24 | description = self._get_or_die(group, "description")
25 | self.__groups[id] = CaseGroup(id, description)
26 | return id
27 |
28 | def __normalize_yaml_literal(self, value, data_type):
29 | # YAML/JSON can't represent infinity or nan
30 | # so its a special case
31 | if data_type.startswith("fp"):
32 | if isinstance(value, str):
33 | if value.lower().startswith("inf"):
34 | return float("inf")
35 | elif value.lower().startswith("-inf"):
36 | return float("-inf")
37 | elif value.lower().startswith("1e"):
38 | return float(value.lower())
39 | elif value.lower().startswith("nan"):
40 | return math.nan
41 | else:
42 | raise ValueError(f"Unrecognized float string literal {value}")
43 | return value
44 |
45 | def visit_literal(self, lit):
46 | value = self._get_or_die(lit, "value")
47 | data_type = self._get_or_die(lit, "type")
48 | is_not_a_func_arg = self._get_or_else(lit, "is_not_a_func_arg", False)
49 | value = self.__normalize_yaml_literal(value, data_type)
50 | return CaseLiteral(value, data_type, is_not_a_func_arg)
51 |
52 | def visit_literal_result(self, lit):
53 | value = self._get_or_die(lit, "value")
54 | data_type = self._get_or_die(lit, "type")
55 | value = self.__normalize_yaml_literal(value, data_type)
56 | return CaseLiteral(value, data_type)
57 |
58 | def visit_result(self, res):
59 | special = self._get_or_else(res, "special", None)
60 | if special is None:
61 | return self.visit_literal_result(res)
62 | return special
63 |
64 | def visit_case(self, case):
65 | grp = self._get_or_die(case, "group")
66 | if not isinstance(grp, str):
67 | grp = self.visit_group(grp)
68 | result = self._visit_or_die(self.visit_result, case, "result")
69 | args = self._visit_list(self.visit_literal, case, "args")
70 | opts = self._get_or_else(case, "options", {})
71 | opt_tuples = []
72 | for opt_key in sorted(opts.keys()):
73 | opt_tuples.append((opt_key, opts[opt_key]))
74 | return ProtoCase(grp, args, result, opt_tuples)
75 |
76 | def visit(self, case_file):
77 | base_uri = self._get_or_die(case_file, 'base_uri')
78 | func_name = self._get_or_die(case_file, "function")
79 | proto_cases = self._visit_list(self.visit_case, case_file, "cases")
80 | cases = [self.__resolve_proto_case(c, base_uri, func_name) for c in proto_cases]
81 | return CaseFile(func_name, base_uri, cases)
82 |
83 |
84 | class CaseFileParser(BaseYamlParser[CaseFile]):
85 | def get_visitor(self) -> CaseFileVisitor:
86 | return CaseFileVisitor()
87 |
--------------------------------------------------------------------------------
/bft/cases/runner.py:
--------------------------------------------------------------------------------
1 | from abc import ABC, abstractmethod
2 | from typing import Literal, NamedTuple
3 |
4 | from bft.dialects.types import Dialect, SqlMapping
5 |
6 | from .types import Case
7 |
8 |
9 | class CaseResult(NamedTuple):
10 | passed: bool
11 | expected_pass: bool
12 | reason: str
13 |
14 |
15 | class CaseRunner(ABC):
16 | @abstractmethod
17 | def run_case(self, case: Case) -> CaseResult:
18 | pass
19 |
20 |
21 | class SqlCaseResult(NamedTuple):
22 | type: Literal["success", "error", "unsupported", "unexpected_pass", "mismatch"]
23 | err: str
24 | actual: str
25 |
26 | @staticmethod
27 | def success():
28 | return SqlCaseResult("success", None, None)
29 |
30 | @staticmethod
31 | def error(err: str):
32 | return SqlCaseResult("error", err, None)
33 |
34 | @staticmethod
35 | def unsupported(err: str):
36 | return SqlCaseResult("unsupported", err, None)
37 |
38 | @staticmethod
39 | def unexpected_pass(actual: str):
40 | return SqlCaseResult("unexpected_pass", None, actual)
41 |
42 | @staticmethod
43 | def mismatch(actual: str):
44 | return SqlCaseResult("mismatch", None, actual)
45 |
46 |
47 | class SqlCaseRunner(CaseRunner):
48 | def __init__(self, dialect: Dialect):
49 | self.__dialect = dialect
50 |
51 | def run_case(self, case: Case) -> CaseResult:
52 | mapping = self.__dialect.mapping_for_case(case)
53 | if mapping is None:
54 | return CaseResult(
55 | False,
56 | False,
57 | f"The dialect {self.__dialect.name} does not support the function '{case.function}'",
58 | )
59 | result = self.run_sql_case(case, mapping)
60 | if result.type == "success":
61 | return CaseResult(result, mapping.should_pass, mapping.reason)
62 | elif result.type == "unsupported":
63 | if mapping.should_pass:
64 | return CaseResult(
65 | False,
66 | True,
67 | f"This case should have been supported. Instead it reported {result.err}",
68 | )
69 | else:
70 | return CaseResult(False, False, mapping.reason)
71 | elif result.type == "error":
72 | if case.result == "error":
73 | # Case expected to error. Dialect may or may not have expected it
74 | should_pass = mapping.should_pass
75 | if mapping.unsupported:
76 | # Unsupported test case, expected an error and got an error
77 | should_pass = True
78 | return CaseResult(True, should_pass, mapping.reason)
79 | else:
80 | if mapping.should_pass:
81 | # Case should not have error. Dialect should not have error
82 | return CaseResult(False, mapping.should_pass, result.err)
83 | else:
84 | # Case should not have error but it's expected for dialect
85 | return CaseResult(False, mapping.should_pass, mapping.reason)
86 | elif result.type == "unexpected_pass":
87 | # Case expected error. No error happened.
88 | if mapping.should_pass:
89 | # This was not expected given the dialect
90 | return CaseResult(
91 | False,
92 | mapping.should_pass,
93 | f"This case should have given an error. Instead it returned the value {result.actual}",
94 | )
95 | else:
96 | # In this dialect, this case passes even though it shouldn't
97 | return CaseResult(False, mapping.should_pass, mapping.reason)
98 | elif result.type == "mismatch":
99 | if mapping.should_pass:
100 | return CaseResult(
101 | False,
102 | mapping.should_pass,
103 | f"This case should have yielded the result {case.result.value} but instead it returned {result.actual}",
104 | )
105 | else:
106 | return CaseResult(False, mapping.should_pass, mapping.reason)
107 | else:
108 | raise Exception("Unexpected case result type")
109 |
110 | @abstractmethod
111 | def run_sql_case(self, case: Case, mapping: SqlMapping) -> SqlCaseResult:
112 | pass
113 |
--------------------------------------------------------------------------------
/bft/cases/types.py:
--------------------------------------------------------------------------------
1 | from typing import Dict, List, Literal, NamedTuple, Tuple
2 |
3 |
4 | class CaseLiteral(NamedTuple):
5 | value: str | int | float | list
6 | type: str
7 | is_not_a_func_arg: bool = False # if true it is used only to populate test data
8 |
9 |
10 | class CaseGroup(NamedTuple):
11 | id: str
12 | description: str
13 |
14 |
15 | class Case(NamedTuple):
16 | function: str
17 | base_uri: str
18 | group: CaseGroup
19 | args: List[CaseLiteral]
20 | result: CaseLiteral | Literal["error", "undefined"]
21 | options: List[Tuple[str, str]]
22 |
23 |
24 | def case_to_kernel_str(
25 | function: str,
26 | args: List[CaseLiteral],
27 | result: CaseLiteral | Literal["error", "undefined"],
28 | ):
29 | joined_args = ", ".join([arg.type for arg in args])
30 | result_str = result
31 | if not isinstance(result_str, str):
32 | result_str = result.type
33 | return f"{function}({joined_args}) -> {result_str}"
34 |
35 |
36 | class CaseFile(NamedTuple):
37 | function: str
38 | base_uri: str
39 | cases: List[Case]
40 |
41 |
42 | class ProtoCase(NamedTuple):
43 | group: str
44 | args: List[CaseLiteral]
45 | result: CaseLiteral | Literal["error", "undefined"]
46 | options: Dict[str, str]
47 |
--------------------------------------------------------------------------------
/bft/core/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/substrait-io/bft/34b7a6e19d536e42082169eae0d049ce94f86b19/bft/core/__init__.py
--------------------------------------------------------------------------------
/bft/core/function.py:
--------------------------------------------------------------------------------
1 | from typing import List, NamedTuple
2 |
3 |
4 | class Option(NamedTuple):
5 | name: str
6 | values: List[str]
7 |
8 |
9 | class Kernel(NamedTuple):
10 | arg_types: List[str]
11 | return_type: str
12 | available_options: List[str]
13 | variadic: str
14 |
15 |
16 | class FunctionDefinition(object):
17 | def __init__(
18 | self,
19 | name: str,
20 | uri: str,
21 | description: str,
22 | options: List[Option],
23 | kernels: List[Kernel],
24 | ):
25 | self.name = name
26 | self.uri = uri
27 | self.description = description
28 | self.options = options
29 | self.kernels = kernels
30 |
31 | @property
32 | def details(self):
33 | return []
34 |
35 | @property
36 | def properties(self):
37 | return
38 |
39 |
40 | class FunctionBuilder(object):
41 | def __init__(self, name: str):
42 | self.name = name
43 | self.uri: str = None
44 | self.description: str = None
45 | self.options = {}
46 | self.kernels = []
47 |
48 | def set_description(self, description: str):
49 | self.description = description
50 |
51 | def set_uri(self, uri: str):
52 | self.uri = uri
53 |
54 | def try_set_description(self, description: str):
55 | if self.description is None:
56 | self.description = description
57 |
58 | def note_option(self, name: str, values: List[str]):
59 | if name in self.options:
60 | existing_values = self.options[name]
61 | # Merge existing values and new values using set union
62 | self.options[name] = list(set(existing_values).union(values))
63 | else:
64 | # Add the new values directly if the option does not exist
65 | self.options[name] = values
66 |
67 | def note_kernel(
68 | self,
69 | arg_types: List[str],
70 | return_type: str,
71 | available_options: List[str],
72 | variadic: int,
73 | ):
74 | self.kernels.append(Kernel(arg_types, return_type, available_options, variadic))
75 |
76 | def finish(self) -> FunctionDefinition:
77 | if self.description is None:
78 | self.description = "Description is missing and would go here"
79 | opts = []
80 | for key, values in self.options.items():
81 | opts.append(Option(key, values))
82 | return FunctionDefinition(
83 | self.name, self.uri, self.description, opts, self.kernels
84 | )
85 |
86 |
87 | class LibraryBuilder(object):
88 | def __init__(self):
89 | self.functions = {}
90 |
91 | def get_function(self, name, category):
92 | full_name = f"{category}_{name}"
93 | if name not in self.functions:
94 | self.functions[full_name] = FunctionBuilder(full_name)
95 | return self.functions[full_name]
96 |
97 | def function_names(self) -> List[str]:
98 | return sorted(self.functions.keys())
99 |
100 | def finish(self) -> List[FunctionDefinition]:
101 | built_functions = []
102 | for func_name in sorted(self.functions.keys()):
103 | built_functions.append(self.functions[func_name].finish())
104 | return built_functions
105 |
--------------------------------------------------------------------------------
/bft/core/index_parser.py:
--------------------------------------------------------------------------------
1 | from typing import List, NamedTuple
2 |
3 | from .yaml_parser import BaseYamlParser, BaseYamlVisitor
4 |
5 |
6 | class IndexFunctionsFile(NamedTuple):
7 | location: str
8 | canonical_uri: str
9 |
10 | class IndexFile(NamedTuple):
11 | function_files: List[IndexFunctionsFile]
12 | case_directories: List[str]
13 | dialect_directories: List[str]
14 | supplement_directories: List[str]
15 |
16 | class IndexFileVisitor(BaseYamlVisitor[IndexFile]):
17 | def __init__(self):
18 | super().__init__()
19 |
20 | def visit_function_file(self, function_file):
21 | location = self._get_or_die(function_file, "location")
22 | canonical_uri = self._get_or_die(function_file, "canonical")
23 | return IndexFunctionsFile(location, canonical_uri)
24 |
25 | def visit(self, index_file):
26 | substrait = self._get_or_die(index_file, "substrait")
27 | function_files = self._visit_list(self.visit_function_file, substrait, "extensions")
28 | case_files = self._get_or_else(index_file, "cases", [])
29 | dialect_files = self._get_or_else(index_file, "dialects", [])
30 | supplement_files = self._get_or_else(index_file, "supplements", [])
31 | return IndexFile(function_files, case_files, dialect_files, supplement_files)
32 |
33 |
34 | class IndexFileParser(BaseYamlParser[IndexFile]):
35 | def get_visitor(self) -> IndexFile:
36 | return IndexFileVisitor()
37 |
38 | def load_index(index_path: str) -> IndexFile:
39 | parser = IndexFileParser()
40 | with open(index_path, 'rb') as f:
41 | return parser.parse(f)[0]
--------------------------------------------------------------------------------
/bft/core/yaml_parser.py:
--------------------------------------------------------------------------------
1 | import math
2 | from abc import ABC, abstractmethod
3 | from decimal import Decimal
4 | from typing import BinaryIO, Generic, Iterable, List, TypeVar
5 |
6 | import yaml
7 |
8 | from bft.cases.types import CaseLiteral
9 |
10 | try:
11 | from yaml import CSafeDumper as SafeDumper
12 | from yaml import CSafeLoader as SafeLoader
13 | except ImportError:
14 | from yaml import SafeDumper, SafeLoader
15 |
16 | T = TypeVar("T")
17 |
18 |
19 | class BaseYamlVisitor(ABC, Generic[T]):
20 | def __init__(self):
21 | self.__location_stack: List[str] = []
22 |
23 | def _fail(self, err):
24 | loc = "/".join(self.__location_stack)
25 | raise Exception(f"Error visiting case file. Location={loc} Message={err}")
26 |
27 | def _visit_list(self, visitor, obj, attr, required=False):
28 | if attr in obj:
29 | val = obj[attr]
30 | results = []
31 | if not isinstance(val, Iterable):
32 | self._fail(f"Expected attribute {attr} to be iterable")
33 | for idx, item in enumerate(val):
34 | self.__location_stack.append(f"{attr}[{idx}]")
35 | results.append(visitor(item))
36 | self.__location_stack.pop()
37 | for result in results:
38 | if isinstance(result, CaseLiteral) and isinstance(result.value, list):
39 | if len(result.value) > 0:
40 | for i, s in enumerate(result.value):
41 | lower_s = str(s).lower()
42 | if lower_s.startswith("'inf'"):
43 | result.value[i] = float("inf")
44 | elif lower_s.startswith("'-inf'"):
45 | result.value[i] = float("-inf")
46 | elif lower_s.startswith("'nan'"):
47 | result.value[i] = math.nan
48 | results.append(CaseLiteral(result.value, result.type, result.is_not_a_func_arg))
49 | results.remove(result)
50 | return results
51 | elif required:
52 | self._fail(f"Expected required attribute {attr}")
53 | else:
54 | return []
55 |
56 | def __visit_or_maybe_die(self, visitor, obj, attr, required, default=None):
57 | if attr in obj:
58 | val = obj[attr]
59 | self.__location_stack.append(f"{attr}")
60 | visited = visitor(val)
61 | self.__location_stack.pop()
62 | return visited
63 | elif required:
64 | self._fail(f"Expected required attribte {attr}")
65 | else:
66 | return default
67 |
68 | def _visit_or_die(self, visitor, obj, attr):
69 | return self.__visit_or_maybe_die(visitor, obj, attr, False)
70 |
71 | def _visit_or_else(self, visitor, obj, attr, default):
72 | return self.__visit_or_maybe_die(visitor, obj, attr, True, default)
73 |
74 | def _get_or_die(self, obj, attr):
75 | if attr in obj:
76 | return obj[attr]
77 | self._fail(f"Expected required attribute {attr}")
78 |
79 | def _get_or_else(self, obj, attr, default):
80 | if attr in obj:
81 | return obj[attr]
82 | return default
83 |
84 | @abstractmethod
85 | def visit(yamlobj) -> T:
86 | pass
87 |
88 |
89 | class BaseYamlParser(ABC, Generic[T]):
90 | @abstractmethod
91 | def get_visitor(self) -> BaseYamlVisitor[T]:
92 | pass
93 |
94 | def get_loader(self):
95 | loader = yaml.SafeLoader
96 | """Add tag "!decimal" to the loader """
97 | loader.add_constructor("!decimal", self.decimal_constructor)
98 | loader.add_constructor("!decimallist", self.list_of_decimal_constructor)
99 | return loader
100 |
101 | def decimal_constructor(self, loader: yaml.SafeLoader, node: yaml.nodes.MappingNode):
102 | return self.get_decimal_value(loader, node)
103 |
104 | def get_decimal_value(self, loader: yaml.SafeLoader, node: yaml.ScalarNode):
105 | value = loader.construct_scalar(node)
106 | if isinstance(value, str) and value.lower() == 'null':
107 | return None
108 | return Decimal(value)
109 |
110 | def list_of_decimal_constructor(self, loader: yaml.SafeLoader, node: yaml.nodes.MappingNode):
111 | return [self.get_decimal_value(loader, item) for item in node.value]
112 |
113 | def parse(self, f: BinaryIO) -> List[T]:
114 | loader = self.get_loader()
115 | objs = yaml.load_all(f, loader)
116 | visitor = self.get_visitor()
117 | return [visitor.visit(obj) for obj in objs]
118 |
--------------------------------------------------------------------------------
/bft/core/yaml_parser_test.py:
--------------------------------------------------------------------------------
1 | from decimal import Decimal
2 | from typing import NamedTuple
3 |
4 | from bft.core.yaml_parser import BaseYamlParser
5 |
6 |
7 | class TestDecimalResult(NamedTuple):
8 | cases: Decimal | list[Decimal]
9 |
10 | class TestCaseVisitor():
11 | def visit(self, testcase):
12 | return TestDecimalResult(testcase)
13 | class DecimalTestCaseParser(BaseYamlParser[TestDecimalResult]):
14 | def get_visitor(self) -> TestCaseVisitor:
15 | return TestCaseVisitor()
16 |
17 | def test_yaml_parser_decimal_tag():
18 | parser = DecimalTestCaseParser()
19 | # parser returns list of parsed values
20 | assert parser.parse(b"!decimal 1") == [TestDecimalResult(Decimal('1'))]
21 | assert parser.parse(b"!decimal 1.78766") == [TestDecimalResult(Decimal('1.78766'))]
22 | assert parser.parse(b"!decimal null") == [TestDecimalResult(None)]
23 | assert parser.parse(b"!decimallist [1.2, null, 7.547]") == [TestDecimalResult([Decimal('1.2'), None, Decimal('7.547')])]
24 |
--------------------------------------------------------------------------------
/bft/dialects/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/substrait-io/bft/34b7a6e19d536e42082169eae0d049ce94f86b19/bft/dialects/__init__.py
--------------------------------------------------------------------------------
/bft/dialects/loader.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 | from typing import List
3 |
4 | from .parser import DialectFileParser
5 | from .types import DialectFile, DialectsLibrary
6 |
7 |
8 | def load_dialects(dialects_dir: str) -> DialectsLibrary:
9 | parser = DialectFileParser()
10 | dialect_files: List[DialectFile] = []
11 | for dialect_path in Path(dialects_dir).rglob("*.yaml"):
12 | with open(dialect_path, "rb") as dialect_f:
13 | for dialect_file in parser.parse(dialect_f):
14 | dialect_files.append(dialect_file)
15 | return DialectsLibrary(dialect_files)
16 |
--------------------------------------------------------------------------------
/bft/dialects/parser.py:
--------------------------------------------------------------------------------
1 | from bft.core.yaml_parser import BaseYamlParser, BaseYamlVisitor
2 | from bft.dialects.types import DialectFile, DialectFunction, DialectKernel, short_type_to_type
3 |
4 |
5 | class DialectFileVisitor(BaseYamlVisitor[DialectFile]):
6 | @staticmethod
7 | def visit_kernel(kernel):
8 | arg_types = []
9 | if kernel != '':
10 | arg_types = [DialectFileVisitor.get_long_type(arg_type) for arg_type in kernel.split("_")]
11 | return DialectKernel(arg_types, any)
12 |
13 | @staticmethod
14 | def get_long_type(short_type):
15 | long_type = short_type_to_type.get(short_type, None)
16 | if long_type is None:
17 | return short_type
18 | return long_type
19 |
20 | @staticmethod
21 | def _get_unqualified_func_name(name):
22 | return name.split(".")[-1]
23 |
24 | def visit_function(self, func):
25 | name = self._get_or_die(func, "name")
26 | required_opts = self._get_or_else(func, "required_options", {})
27 | local_name = self._get_or_else(func, "local_name", self._get_unqualified_func_name(name))
28 | infix = self._get_or_else(func, "infix", False)
29 | postfix = self._get_or_else(func, "postfix", False)
30 | between = self._get_or_else(func, "between", False)
31 | aggregate = self._get_or_else(func, "aggregate", False)
32 | unsupported = self._get_or_else(func, "unsupported", False)
33 | # The extract function uses a special grammar in some SQL dialects.
34 | # i.e. SELECT EXTRACT(YEAR FROM times) FROM my_table
35 | extract = self._get_or_else(func, "extract", False)
36 | good_kernels = self._visit_list(self.visit_kernel, func, "supported_kernels")
37 | variadic_min = self._get_or_else(func, "variadic", -1)
38 | return DialectFunction(
39 | name,
40 | local_name,
41 | infix,
42 | postfix,
43 | between,
44 | aggregate,
45 | unsupported,
46 | extract,
47 | required_opts,
48 | variadic_min,
49 | good_kernels,
50 | )
51 |
52 | def visit(self, dfile):
53 | name = self._get_or_die(dfile, "name")
54 | dtype = self._get_or_die(dfile, "type")
55 | scalar_functions = self._visit_list(
56 | self.visit_function, dfile, "scalar_functions"
57 | )
58 | aggregate_functions = self._visit_list(
59 | self.visit_function, dfile, "aggregate_functions"
60 | )
61 | uri_to_func_prefix = {uri: func_prefix for func_prefix, uri in dfile.get("dependencies", {}).items()}
62 | supported_types = self._visit_list(self.get_long_type, dfile, "supported_types")
63 | return DialectFile(name, dtype, scalar_functions, aggregate_functions, uri_to_func_prefix, supported_types)
64 |
65 |
66 | class DialectFileParser(BaseYamlParser[DialectFile]):
67 | def get_visitor(self) -> DialectFileVisitor:
68 | return DialectFileVisitor()
69 |
--------------------------------------------------------------------------------
/bft/html/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/substrait-io/bft/34b7a6e19d536e42082169eae0d049ce94f86b19/bft/html/__init__.py
--------------------------------------------------------------------------------
/bft/html/types.py:
--------------------------------------------------------------------------------
1 | from typing import Dict, List, Literal, NamedTuple
2 |
3 | from bft.core.function import Kernel
4 |
5 |
6 | class FunctionOptionValueInfo(NamedTuple):
7 | # The name of the value
8 | # Sourced from Substrait YAML
9 | name: str
10 | # Description of the option
11 | # Sourced from BFT markdown
12 | description: str
13 |
14 |
15 | # An option that can control function behavior
16 | class FunctionOptionInfo(NamedTuple):
17 | # The name of the option
18 | # Sourced from Substrait YAML
19 | name: str
20 | # Description of the option
21 | # Sourced from Substrait YAML
22 | # Can be overridden by BFT markdown
23 | description: str
24 | # Possible values for the option
25 | # Sourced from Substrait YAML
26 | values: List[FunctionOptionValueInfo]
27 |
28 |
29 | # Information about how the function behaves in different dialects
30 | class FunctionDialectInfo(NamedTuple):
31 | # Name of the dialect (e.g. sqlite)
32 | # Sourced from dialect files
33 | name: str
34 | # Required options for this function in the given dialect
35 | # Sourced from Substrait YAML
36 | options: Dict[str, str]
37 | case_info: List[str]
38 | kernel_info: List[bool]
39 |
40 |
41 | # Additional details or motivation for the function
42 | class FunctionDetailInfo(NamedTuple):
43 | # Title of the detail section
44 | # Sourced from BFT markdown
45 | title: str
46 | # Body of the detail section
47 | # Sourced from BFT markdown
48 | description: str
49 |
50 |
51 | # Invariants that the function respects
52 | # Mostly useful for property-based testing
53 | class FunctionPropertyInfo(NamedTuple):
54 | # The name of the invariant
55 | # Sourced from BFT markdown
56 | id: str
57 | # A description of the invariant
58 | # Sourced from BFT markdown
59 | description: str
60 |
61 |
62 | class FunctionExampleResultInfo(NamedTuple):
63 | # Value of the result
64 | # Sourced from case files
65 | value: str
66 |
67 |
68 | class FunctionExampleCaseInfo(NamedTuple):
69 | # Arguments to the function for this test case
70 | # Sourced from case files
71 | args: List[str]
72 | # Options values for this function
73 | # Sourced from case files
74 | options: List[str]
75 | # Result of the function run on the args
76 | # Sourced from case files
77 | result: Literal["error"] | Literal["undefined"] | FunctionExampleResultInfo
78 |
79 |
80 | class FunctionExampleGroupInfo(NamedTuple):
81 | # Description of the example group
82 | # Sourced from case files
83 | description: str
84 | # Argument types for the examples in the group
85 | # Sourced from case files
86 | arg_types: List[str]
87 | # Names of options used in the examples in this group
88 | # Sourced from case files
89 | option_names: List[str]
90 | # Result type for the examples in the group
91 | # Sourced from case files
92 | result_type: str
93 | # Example executions
94 | cases: List[FunctionExampleCaseInfo]
95 |
96 |
97 | # Information describing a function
98 | class FunctionInfo(NamedTuple):
99 | # Name of the function (e.g. add)
100 | # Sourced from Substrait YAML
101 | name: str
102 | # The Substrait URI for the function (e.g. https://github.com/substrait-io/substrait/blob/main/extensions/functions_arithmetic.yaml)
103 | # Sourced from Substrait YAML
104 | # Can be overridden by BFT markdown
105 | uri: str
106 | # The last part of the URI (e.g. functions_arithmetic.yaml)
107 | # Sourced from Substrait YAML
108 | uri_short: str
109 | # A very brief (ideally one sentence) description of the function
110 | # Sourced from Substrait YAML
111 | brief: str
112 | # Available options for the function
113 | options: List[FunctionOptionInfo]
114 | # Available kernels for the function
115 | kernels: List[Kernel]
116 | # Dialect info for the function
117 | dialects: List[FunctionDialectInfo]
118 | # Function details
119 | details: List[FunctionDetailInfo]
120 | # Properties that hold true for the function
121 | properties: List[FunctionPropertyInfo]
122 | # Example function executions
123 | example_groups: List[FunctionExampleGroupInfo]
124 |
125 |
126 | class FunctionIndexItem(NamedTuple):
127 | # Name of the function
128 | name: str
129 | # Summary of the function, sourced from Substrait YAML
130 | brief: str
131 | # Function category, i.e. Arithmetic, String, etc.
132 | category: str
133 |
134 |
135 | class FunctionIndexInfo(NamedTuple):
136 | functions: List[FunctionIndexItem]
137 |
--------------------------------------------------------------------------------
/bft/substrait/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/substrait-io/bft/34b7a6e19d536e42082169eae0d049ce94f86b19/bft/substrait/__init__.py
--------------------------------------------------------------------------------
/bft/substrait/extension_file_parser.py:
--------------------------------------------------------------------------------
1 | import pathlib
2 | from collections import namedtuple
3 | from collections.abc import Iterable
4 | from typing import Dict, List, NamedTuple
5 |
6 | import yaml
7 |
8 | try:
9 | from yaml import CSafeDumper as SafeDumper
10 | from yaml import CSafeLoader as SafeLoader
11 | except ImportError:
12 | from yaml import SafeLoader, SafeDumper
13 |
14 | from typing import BinaryIO
15 |
16 | from ..core.function import FunctionBuilder, LibraryBuilder
17 |
18 |
19 | class ValueArg(NamedTuple):
20 | name: str
21 | description: str
22 | type: str
23 |
24 |
25 | class EnumArg(NamedTuple):
26 | name: str
27 | description: str
28 | options: List[str]
29 |
30 |
31 | class Implementation(NamedTuple):
32 | args: List[ValueArg | EnumArg]
33 | options: Dict[str, List[str]]
34 | return_type: str
35 | variadic: int
36 |
37 |
38 | class Function(NamedTuple):
39 | name: str
40 | description: str
41 | implementations: List[Implementation]
42 |
43 |
44 | class ExtensionsFile(NamedTuple):
45 | functions: List[Function]
46 |
47 |
48 | class ExtensionFileVisitor(object):
49 | def __init__(self):
50 | self.location_stack = []
51 |
52 | def __fail(self, err):
53 | loc = "/".join(self.location_stack)
54 | raise Exception(f"Error visiting extension file. Location={loc} Message={err}")
55 |
56 | def __visit_list(self, visitor, obj, attr, required=False):
57 | if attr in obj:
58 | val = obj[attr]
59 | results = []
60 | if not isinstance(val, Iterable):
61 | self.__fail(f"Expected attribute {attr} to be iterable")
62 | for idx, item in enumerate(val):
63 | self.location_stack.append(f"{attr}[{idx}]")
64 | results.append(visitor(item))
65 | self.location_stack.pop()
66 | return results
67 | elif required:
68 | self.__fail(f"Expected required attribute {attr}")
69 | else:
70 | return []
71 |
72 | def __get_or_die(self, obj, attr):
73 | if attr in obj:
74 | return obj[attr]
75 | self.__fail(f"Expected required attribute {attr}")
76 |
77 | def __get_or_else(self, obj, attr, default):
78 | if attr in obj:
79 | return obj[attr]
80 | return default
81 |
82 | def visit_ext_file(self, parsed_file):
83 | scalar_functions = self.__visit_list(
84 | self.visit_function, parsed_file, "scalar_functions"
85 | )
86 | aggregate_functions = self.__visit_list(
87 | self.visit_function, parsed_file, "aggregate_functions"
88 | )
89 | return ExtensionsFile(scalar_functions + aggregate_functions)
90 |
91 | def visit_impl_arg(self, arg):
92 | name = self.__get_or_else(arg, "name", None)
93 | description = self.__get_or_else(arg, "description", None)
94 | value = self.__get_or_else(arg, "value", None)
95 | if value:
96 | return ValueArg(name, description, value)
97 | else:
98 | options = self.__get_or_else(arg, "options", None)
99 | if options is None:
100 | self.__fail(
101 | "Argument encountered that did not have any value or options"
102 | )
103 | return EnumArg(name, description, options)
104 |
105 | def visit_implementation(self, impl):
106 | args = self.__visit_list(self.visit_impl_arg, impl, "args")
107 | options = self.__get_or_else(impl, "options", {})
108 | opts = {}
109 | variadic = "0"
110 | if "variadic" in impl:
111 | variadic = str(impl["variadic"]["min"])
112 | for key in options.keys():
113 | values = self.__get_or_die(options[key], "values")
114 | opts[key] = values
115 | return_type = self.__get_or_die(impl, "return")
116 | return Implementation(args, opts, return_type, variadic)
117 |
118 | def visit_function(self, func):
119 | name = self.__get_or_die(func, "name")
120 | description = self.__get_or_else(func, "description", None)
121 | implementations = self.__visit_list(self.visit_implementation, func, "impls")
122 | return Function(name, description, implementations)
123 |
124 |
125 | class ExtensionFileParser(object):
126 | def parse(self, f: BinaryIO) -> None:
127 | data = yaml.load(f, SafeLoader)
128 | return ExtensionFileVisitor().visit_ext_file(data)
129 |
130 |
131 | def add_extensions_file_to_library(
132 | location: str, ext_file: ExtensionsFile, library: LibraryBuilder
133 | ):
134 | function_category = pathlib.Path(location.name).stem.replace("functions_", "")
135 | for func in ext_file.functions:
136 | builder: FunctionBuilder = library.get_function(func.name, function_category)
137 | builder.set_uri(pathlib.Path(location).name)
138 | if func.description is not None:
139 | builder.try_set_description(func.description)
140 | for impl in func.implementations:
141 | for opt_name, opt_values in impl.options.items():
142 | builder.note_option(opt_name, opt_values)
143 | arg_types = []
144 | for arg in impl.args:
145 | if isinstance(arg, ValueArg):
146 | arg_types.append(arg.type)
147 | else:
148 | arg_types.append("|".join(arg.options))
149 | builder.note_kernel(
150 | arg_types, impl.return_type, impl.options.keys(), impl.variadic
151 | )
152 |
--------------------------------------------------------------------------------
/bft/supplements/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/substrait-io/bft/34b7a6e19d536e42082169eae0d049ce94f86b19/bft/supplements/__init__.py
--------------------------------------------------------------------------------
/bft/supplements/parser.py:
--------------------------------------------------------------------------------
1 | import pathlib
2 | from typing import Dict, TextIO
3 |
4 | from mistletoe.ast_renderer import get_ast
5 | from mistletoe.block_token import Document, Heading, Paragraph
6 | from mistletoe.html_renderer import HTMLRenderer
7 | from mistletoe.span_token import RawText
8 |
9 | from .types import BasicSupplement, OptionSupplement, SupplementsFile
10 |
11 |
12 | class SupplementsParser(object):
13 | def __init__(self):
14 | self.html_renderer = HTMLRenderer()
15 | self.__reset()
16 |
17 | def __reset(self):
18 | self.__finish = None
19 | self.__paragraphs = []
20 | self.__sub_section_title = None
21 | self.__option_name = None
22 | self.__option_description = None
23 | self.__parsing_options = False
24 | self.__current_option_value = None
25 | self.__supplements = []
26 | self.options = {}
27 | self.details = []
28 | self.properties = []
29 |
30 | def __get_simple_text(self, heading: Heading) -> str:
31 | if len(heading.children) != 1:
32 | raise Exception(
33 | f"Expected heading to have one line of simple text but there were {len(heading.children)} sub-elements"
34 | )
35 | text_child = heading.children[0]
36 | if not isinstance(text_child, RawText):
37 | raise Exception(
38 | f"Expected heading to contain simple raw text butit was {type(text_child)}"
39 | )
40 | return text_child.content
41 |
42 | def __add_options(self):
43 | pass
44 |
45 | def __add_properties(self):
46 | self.properties = self.__supplements
47 |
48 | def __add_details(self):
49 | self.details = self.__supplements
50 |
51 | def __finish_last_task(self):
52 | if self.__finish is not None:
53 | self.__finish()
54 | self.__finish = None
55 | self.__parsing_options = False
56 | self.__supplements = []
57 |
58 | def __finish_option(self):
59 | if self.__option_name is None:
60 | return
61 | self.options[self.__option_name.lower()] = OptionSupplement(
62 | self.__option_description, self.__supplements
63 | )
64 | self.__option_name = None
65 | self.__supplements = []
66 |
67 | def __finish_section(self):
68 | if self.__sub_section_title is None:
69 | if len(self.__paragraphs) != 0:
70 | raise Exception(
71 | f"Encountered paragraphs but no L3/L4 section title starting at {self.__paragraphs[0]}"
72 | )
73 | return
74 | if len(self.__paragraphs) == 0:
75 | raise Exception(f"Sub-section {self.__sub_section_title} had no paragraphs")
76 | self.__supplements.append(
77 | BasicSupplement(self.__sub_section_title, "\n".join(self.__paragraphs))
78 | )
79 | self.__sub_section_title = None
80 | self.__paragraphs = []
81 |
82 | def __finish_last_l3(self):
83 | if self.__parsing_options:
84 | self.__finish_option()
85 | else:
86 | self.__finish_section()
87 |
88 | def __finish_last_l4(self):
89 | if self.__option_name is None:
90 | return
91 | content = "\n".join(self.__paragraphs)
92 | if self.__current_option_value is None:
93 | self.__option_description = content
94 | else:
95 | self.__supplements.append(
96 | BasicSupplement(self.__current_option_value.upper(), content)
97 | )
98 | self.__current_option_value = None
99 | self.__paragraphs = []
100 |
101 | def __parse_heading(self, heading: Heading):
102 | heading_title = self.__get_simple_text(heading)
103 | if heading.level == 2:
104 | self.__finish_last_l4()
105 | self.__finish_last_l3()
106 | self.__finish_last_task()
107 | if heading_title.lower() == "options":
108 | self.__parsing_options = True
109 | self.__finish = self.__add_options
110 | elif heading_title.lower() == "details":
111 | self.__finish = self.__add_details
112 | elif heading_title.lower() == "properties":
113 | self.__finish = self.__add_properties
114 | else:
115 | raise Exception(f"Unexpected L2 heading '{heading_title}'")
116 | elif heading.level == 3:
117 | if self.__finish is None:
118 | raise Exception(
119 | f"L3 heading {heading_title} with no L2 heading preceding it"
120 | )
121 | self.__finish_last_l4()
122 | self.__finish_last_l3()
123 | if self.__parsing_options:
124 | self.__option_name = heading_title
125 | else:
126 | self.__sub_section_title = heading_title
127 | elif heading.level == 4:
128 | if not self.__parsing_options:
129 | raise Exception(
130 | f"L4 heading {heading_title} encountered but we are not currently parsing options"
131 | )
132 | self.__finish_last_l4()
133 | self.__current_option_value = heading_title
134 |
135 | def __parse_paragraph(self, paragraph: Paragraph):
136 | self.__paragraphs.append(self.html_renderer.render_paragraph(paragraph))
137 |
138 | def __parse_child(self, child):
139 | if isinstance(child, Heading):
140 | self.__parse_heading(child)
141 | elif isinstance(child, Paragraph):
142 | self.__parse_paragraph(child)
143 | else:
144 | raise Exception(
145 | f"Unrecognized top-level element type in supplements file {type(child)}"
146 | )
147 |
148 | def parse_supplements_doc(self, f: TextIO, directory_path: str) -> SupplementsFile:
149 | self.__reset()
150 | doc = Document(f)
151 |
152 | if len(doc.children) == 0:
153 | raise Exception(
154 | "Supplements document appears to be empty. It should at least have a title"
155 | )
156 |
157 | title_section = doc.children[0]
158 | if not isinstance(title_section, Heading) or title_section.level != 1:
159 | raise Exception(
160 | "First element in a supplements doc should be a level 1 heading with the name of the function"
161 | )
162 |
163 | function_name = self.__get_simple_text(title_section).lower()
164 | for child in doc.children[1:]:
165 | self.__parse_child(child)
166 |
167 | self.__finish_last_l4()
168 | self.__finish_last_l3()
169 | self.__finish_last_task()
170 |
171 | return SupplementsFile(
172 | function_name, directory_path, self.options, self.details, self.properties
173 | )
174 |
175 |
176 | def load_supplements(supplements_dir: str) -> Dict[str, SupplementsFile]:
177 | supplements = {}
178 | parser = SupplementsParser()
179 | for sup_path in pathlib.Path(supplements_dir).rglob("*.md"):
180 | with open(sup_path, "r") as sup_f:
181 | sup = parser.parse_supplements_doc(sup_f, str(pathlib.Path(sup_path).parent))
182 | supplements[sup.function.lower()] = sup
183 | return supplements
184 |
--------------------------------------------------------------------------------
/bft/supplements/types.py:
--------------------------------------------------------------------------------
1 | from typing import Dict, List, NamedTuple
2 |
3 |
4 | class BasicSupplement(NamedTuple):
5 | title: str
6 | description: str
7 |
8 |
9 | class OptionSupplement(NamedTuple):
10 | description: str
11 | values: List[BasicSupplement]
12 |
13 |
14 | class SupplementsFile(NamedTuple):
15 | function: str
16 | dir_path: str
17 | options: Dict[str, OptionSupplement]
18 | details: List[BasicSupplement]
19 | properties: List[BasicSupplement]
20 |
21 |
22 | def empty_supplements_file(function_name: str):
23 | return SupplementsFile(function_name, "", {}, [], [])
24 |
--------------------------------------------------------------------------------
/bft/templates/function_desc.j2:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | {{ name }} Function - BFT
7 |
8 |
9 | {% if 'aggregate' in name %}
10 |
11 | {% else %}
12 |
13 | {% endif %}
14 |
15 |
16 |
17 |
18 |
19 |
33 |
34 |
35 | {{ name|title}}
36 |
37 |
38 | Defined in {{ uri_short }}
39 |
40 |
41 |
42 | {{ brief }}
43 |
44 |
45 |
46 |
47 | Options ¶
48 | {% for option in options %}
49 | {{option.name|title}}
50 | {{option.description}}
51 |
52 | {% for value in option.values %}
53 | {{value.name|upper}}
54 | {{value.description}}
55 |
56 | {% endfor %}
57 |
58 | {% endfor %}
59 |
60 |
61 |
62 | Kernels ¶
63 |
64 | {% for kernel in kernels %}
65 | {{ name }}({{ kernel.arg_types|join(', ') }}) -> {{ kernel.return_type }} : [{{ kernel.available_options|join(', ') }}] (not supported by dialect)
66 | {% endfor %}
67 |
68 |
69 |
70 |
71 | Dialects ¶
72 |
73 | {% for dialect in dialects %}
74 | {{ dialect.name }}
75 | {% endfor %}
76 |
77 | {% for dialect in dialects %}
78 |
79 | {% if dialect.options is none %}
80 | Dialect isn't yet supported
81 | {% else %}
82 | {% for opt, val in dialect.options.items() %}
83 | {{ opt }}
84 | {{ val }}
85 | {% endfor %}
86 | {% endif %}
87 |
88 | {% endfor %}
89 |
90 |
91 |
92 | Details ¶
93 | {% if details %}
94 | {% for detail in details %}
95 | {{ detail.title }}
96 | {{ detail.description }}
97 | {% endfor %}
98 | {% else %}
99 | No supplemental information about the details available
100 | {% endif %}
101 |
102 |
103 |
104 | Properties ¶
105 | {% if properties %}
106 |
107 | {% for property in properties | sort(attribute='id') %}
108 | {{ property.id }}
109 | {{ property.description }}
110 | {% endfor %}
111 |
112 | {% else %}
113 | No supplemental information about the properties available
114 | {% endif %}
115 |
116 |
117 |
118 | Examples ¶
119 | {% for example_group in example_groups %}
120 |
121 | {{ example_group.description }}
122 |
123 |
124 | {% for opt in example_group.option_names %}
125 | {{ opt }}
126 | {% endfor %}
127 | {% for arg_type in example_group.arg_types %}
128 | arg{{ loop.index }} ({{ arg_type }})
129 | {% endfor %}
130 | result ({{ example_group.result_type }})
131 |
132 |
133 |
134 | {% for case in example_group.cases %}
135 |
136 | {% for opt in case.options %}
137 | {{ opt }}
138 | {% endfor %}
139 | {% for arg in case.args %}
140 | {{ arg }}
141 | {% endfor %}
142 | {{ case.result.value | default(case.result | upper) }}
143 |
144 |
145 |
146 |
147 | {% endfor -%}
148 |
149 |
150 | {% endfor %}
151 |
152 |
153 |
154 |
179 |
180 |
181 |
--------------------------------------------------------------------------------
/bft/templates/function_index.j2:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | BFT - Home
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 | Big Function Taxonomy
25 |
26 |
27 |
28 |
29 |
33 |
34 |
35 |
36 |
37 |
38 |
47 |
48 |
49 |
50 |
51 |
The B(ig) F(unction) T(axonomy)
52 |
53 |
54 |
55 | The BFT aims to be a comprehensive catalogue of functions. Functions are the backbone of
56 | any compute system, but they are chronically under documented and often full of corner
57 | cases whose behavior differs in various systems. By documenting exhaustively documenting
58 | these corner cases we hope to make it possible for systems to fully describe their behaviors.
59 | This will make it easier to know what problems will be encountered switching between systems and,
60 | in some cases, make it possible to obtain the correct behavior through expression transformation
61 | or a precise application of function options.
62 |
63 |
64 |
65 | {% for category, category_functions in functions_by_category %}
66 |
67 |
68 |
69 |
{{ category|title }} Functions
70 |
71 |
72 |
73 | {% endfor %}
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 | {% for category, category_functions in functions_by_category %}
84 |
85 |
86 |
{{ category|title }} Functions
87 |
88 |
89 |
90 |
91 | {% for function in category_functions %}
92 |
93 | {% if 'aggregate' in function.name %}
94 | {{ "_".join(function.name.split('_')[2:])|title }}
95 | {{ function.brief }}
96 | {% else %}
97 | {{ "_".join(function.name.split('_')[1:])|title }}
98 | {{ function.brief }}
99 | {% endif %}
100 |
101 | {% endfor %}
102 |
103 |
104 |
105 |
106 | {% endfor %}
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
130 |
131 |
132 |
133 |
--------------------------------------------------------------------------------
/bft/testers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/substrait-io/bft/34b7a6e19d536e42082169eae0d049ce94f86b19/bft/testers/__init__.py
--------------------------------------------------------------------------------
/bft/testers/base_tester.py:
--------------------------------------------------------------------------------
1 | from abc import ABC, abstractmethod
2 | from pathlib import Path
3 | from typing import List, NamedTuple
4 |
5 | from bft.cases.runner import CaseRunner
6 | from bft.cases.types import Case
7 | from bft.dialects.types import Dialect, DialectsLibrary
8 |
9 |
10 | class TestResult(NamedTuple):
11 | function: str
12 | group: str
13 | index: int
14 | passed: bool
15 | should_have_passed: bool
16 | reason: str
17 |
18 |
19 | class BaseTester(ABC):
20 | @abstractmethod
21 | def get_runner(self, dialect: Dialect) -> CaseRunner:
22 | pass
23 |
24 | @abstractmethod
25 | def get_dialect(self, library: DialectsLibrary) -> Dialect:
26 | pass
27 |
28 | def prepare(self, dialects: DialectsLibrary):
29 | self.dialect = self.get_dialect(dialects)
30 | self.runner = self.get_runner(self.dialect)
31 | self.group_indices = {}
32 |
33 | def run_test(self, case: Case) -> TestResult:
34 | result = self.runner.run_case(case)
35 | group_index = self.group_indices.get(case.group.id, 0)
36 | self.group_indices[case.group.id] = group_index + 1
37 | return TestResult(
38 | case.function,
39 | case.group.id,
40 | group_index,
41 | result.passed,
42 | result.expected_pass,
43 | result.reason,
44 | )
45 |
--------------------------------------------------------------------------------
/bft/testers/cudf/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/substrait-io/bft/34b7a6e19d536e42082169eae0d049ce94f86b19/bft/testers/cudf/__init__.py
--------------------------------------------------------------------------------
/bft/testers/cudf/runner.py:
--------------------------------------------------------------------------------
1 | import math
2 | import operator
3 |
4 | import cudf
5 | import numpy
6 |
7 | from bft.cases.runner import SqlCaseResult, SqlCaseRunner
8 | from bft.cases.types import Case
9 | from bft.dialects.types import SqlMapping
10 | from bft.utils.utils import type_to_dialect_type
11 |
12 | type_map = {
13 | "i8": cudf.dtype("int8"),
14 | "i16": cudf.dtype("int16"),
15 | "i32": cudf.dtype("int32"),
16 | "i64": cudf.dtype("int64"),
17 | "fp32": cudf.dtype("float32"),
18 | "fp64": cudf.dtype("float64"),
19 | "boolean": cudf.dtype("bool"),
20 | "string": cudf.dtype("string"),
21 | "timestamp": cudf.dtype("datetime64[s]"),
22 | "date": cudf.dtype("datetime64[s]"),
23 | }
24 |
25 |
26 | def type_to_cudf_dtype(type: str):
27 | return type_to_dialect_type(type, type_map)
28 |
29 |
30 | def is_string_function(data_types):
31 | return cudf.dtype("string") in data_types
32 |
33 |
34 | def is_datetime_function(data_types):
35 | return cudf.dtype("datetime64[s]") in data_types
36 |
37 |
38 | def is_numpy_type(data_type):
39 | return type(data_type).__module__ == numpy.__name__
40 |
41 |
42 | def get_str_fn_result(
43 | fn_name: str, arg_vectors: list[cudf.Series], arg_values: list[str], is_regexp: bool
44 | ):
45 | if len(arg_vectors) == 1:
46 | fn = getattr(arg_vectors[0].str, fn_name)
47 | return fn()
48 | elif len(arg_vectors) == 2:
49 | fn = getattr(arg_vectors[0].str, fn_name)
50 | if is_regexp:
51 | return fn(arg_values[1], regex=True)
52 | else:
53 | return fn(arg_values[1])
54 | else:
55 | fn = getattr(arg_vectors[0].str, fn_name)
56 | opt_arg = True if arg_values[2] is not None else False
57 | if opt_arg and is_regexp:
58 | return fn(arg_values[1], arg_values[2], regex=True)
59 | elif opt_arg:
60 | return fn(arg_values[1], arg_values[2])
61 | else:
62 | return fn(arg_values[1])
63 |
64 |
65 | def get_dt_fn_result(
66 | mapping: str, dtype, arg_vectors: list[cudf.Series], arg_values: list[str]
67 | ):
68 | fn_name = mapping.local_name
69 | if len(arg_vectors) == 2:
70 | if mapping.infix:
71 | gdf = cudf.DataFrame(
72 | {"a": arg_values[0], "b": arg_values[1]},
73 | dtype=dtype,
74 | )
75 | result = gdf.eval(f"(a){fn_name}(b)")
76 | elif mapping.extract:
77 | extract_property = arg_values[0].lower()
78 | result = getattr(arg_vectors[1].dt, extract_property)
79 | return result
80 |
81 |
82 | class CudfRunner(SqlCaseRunner):
83 | def __init__(self, dialect):
84 | super().__init__(dialect)
85 |
86 | def run_sql_case(self, case: Case, mapping: SqlMapping) -> SqlCaseResult:
87 | arg_vectors = []
88 | arg_values = []
89 | data_types = []
90 | fn_name = mapping.local_name
91 | is_regexp = True if "regexp" in case.function else False
92 | for arg in case.args:
93 | dtype = type_to_cudf_dtype(arg.type)
94 | if dtype is None:
95 | return SqlCaseResult.unsupported(
96 | f"The type {arg.type} is not supported"
97 | )
98 | arg_vectors.append(cudf.Series(arg.value, dtype=dtype))
99 | arg_values.append(arg.value)
100 | data_types.append(dtype)
101 |
102 | try:
103 | if is_datetime_function(data_types):
104 | result = get_dt_fn_result(mapping, dtype, arg_vectors, arg_values)
105 | elif is_string_function(data_types):
106 | result = get_str_fn_result(fn_name, arg_vectors, arg_values, is_regexp)
107 | elif len(arg_vectors) == 1:
108 | # Some functions that only take a single arg are able to be executed against
109 | # both a Series and a Dataframe whereas others are only able to be executed against a Dataframe.
110 | if mapping.aggregate:
111 | arg_values = arg_values[0]
112 | try:
113 | gdf = cudf.DataFrame({"a": arg_values}, dtype=dtype)
114 | result = gdf.eval(f"{fn_name}(a)")
115 | except ValueError:
116 | fn = getattr(arg_vectors[0], fn_name)
117 | result = fn()
118 | elif len(arg_vectors) == 2:
119 | if mapping.infix:
120 | # If there are only Null/Nan/None values in the column, they are set to False instead of .
121 | # We add extra data to ensure the value exists in the dataframe.
122 | gdf = cudf.DataFrame(
123 | {"a": [arg_values[0], True], "b": [arg_values[1], True]},
124 | dtype=dtype,
125 | )
126 | result = gdf.eval(f"(a){fn_name}(b)")
127 | else:
128 | try:
129 | fn = getattr(arg_vectors[0], fn_name)
130 | result = fn(arg_vectors[1])
131 | except AttributeError:
132 | fn = getattr(operator, fn_name)
133 | result = fn(arg_vectors[0], arg_vectors[1])
134 | except ValueError: # Case for round function
135 | fn = getattr(arg_vectors[0], fn_name)
136 | result = fn(arg_values[1])
137 | else:
138 | fn = getattr(arg_vectors[0], fn_name)
139 | try:
140 | result = fn(arg_vectors[1:])
141 | except TypeError:
142 | result = fn(arg_values[1], arg_values[2])
143 | except RuntimeError as err:
144 | return SqlCaseResult.error(str(err))
145 |
146 | if mapping.aggregate:
147 | if is_numpy_type(result):
148 | result = result.item()
149 | else:
150 | if result.empty and (
151 | case.result.value is None or case.result.value is False
152 | ):
153 | return SqlCaseResult.success()
154 | elif len(result) != 1 and not mapping.infix:
155 | raise Exception("Scalar function with one row output more than one row")
156 | else:
157 | result = result[0]
158 |
159 | if case.result == "undefined":
160 | return SqlCaseResult.success()
161 | elif case.result == "error":
162 | return SqlCaseResult.unexpected_pass(str(result))
163 | elif case.result == "nan":
164 | if math.isnan(result):
165 | return SqlCaseResult.success()
166 | else:
167 | if case.result.value is None:
168 | if str(result) == "" or math.isnan(result) or result is None:
169 | return SqlCaseResult.success()
170 | else:
171 | return SqlCaseResult.mismatch(str(result))
172 | elif case.result.value == result:
173 | return SqlCaseResult.success()
174 | elif case.result.value == str(result):
175 | return SqlCaseResult.success()
176 | elif numpy.float32(case.result.value) == result:
177 | return SqlCaseResult.success()
178 | else:
179 | return SqlCaseResult.mismatch(str(result))
180 |
--------------------------------------------------------------------------------
/bft/testers/cudf/tester.py:
--------------------------------------------------------------------------------
1 | from bft.dialects.types import Dialect, DialectsLibrary
2 | from bft.testers.base_tester import BaseTester
3 |
4 | from .runner import CudfRunner
5 |
6 |
7 | class CudfTester(BaseTester):
8 | def get_runner(self, dialect: Dialect):
9 | return CudfRunner(dialect)
10 |
11 | def get_dialect(self, library: DialectsLibrary):
12 | return library.get_dialect_by_name("cudf")
13 |
--------------------------------------------------------------------------------
/bft/testers/datafusion/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/substrait-io/bft/34b7a6e19d536e42082169eae0d049ce94f86b19/bft/testers/datafusion/__init__.py
--------------------------------------------------------------------------------
/bft/testers/datafusion/runner.py:
--------------------------------------------------------------------------------
1 | import math
2 | from datetime import datetime
3 |
4 | import datafusion
5 | import numpy
6 | import pyarrow as pa
7 |
8 | from bft.cases.runner import SqlCaseResult, SqlCaseRunner
9 | from bft.cases.types import Case, CaseLiteral
10 | from bft.dialects.types import SqlMapping
11 | from bft.utils.utils import type_to_dialect_type
12 |
13 | type_map = {
14 | "i8": pa.int8(),
15 | "i16": pa.int16(),
16 | "i32": pa.int32(),
17 | "i64": pa.int64(),
18 | "fp32": pa.float32(),
19 | "fp64": pa.float64(),
20 | "boolean": pa.bool_(),
21 | "string": pa.string(),
22 | "date": pa.timestamp("s"),
23 | "time": pa.timestamp("s"),
24 | "timestamp": pa.timestamp("s"),
25 | "timestamp_tz": pa.timestamp("s"),
26 | }
27 |
28 |
29 | def type_to_datafusion_type(type: str):
30 | return type_to_dialect_type(type, type_map)
31 |
32 |
33 | def handle_special_cases(lit: CaseLiteral):
34 | if lit == "nan":
35 | return math.nan
36 | elif lit == "inf":
37 | return float("inf")
38 | elif lit == "-inf":
39 | return float("-inf")
40 | return lit
41 |
42 |
43 | def is_string_type(arg):
44 | return (
45 | arg.type in ["string", "timestamp", "timestamp_tz", "date", "time"]
46 | or arg.value in ["Null"]
47 | ) and arg.value is not None
48 |
49 |
50 | def arg_with_type(arg):
51 | if is_string_type(arg):
52 | arg_val = str(arg.value)
53 | elif isinstance(arg.value, list) or arg.value is None:
54 | arg_val = None
55 | elif arg.type.startswith("i"):
56 | arg_val = int(arg.value)
57 | elif arg.type.startswith("fp"):
58 | arg_val = float(arg.value)
59 | else:
60 | arg_val = arg.value
61 | return arg_val
62 |
63 |
64 | def str_to_datetime(str_val, type):
65 | if type == "time":
66 | return datetime.strptime(str_val, "%H:%M:%S.%f")
67 | if len(str_val) > 19:
68 | return datetime.strptime(str_val, "%Y-%m-%d %H:%M:%S %Z")
69 | elif len(str_val) < 16:
70 | return datetime.strptime(str_val, "%Y-%m-%d")
71 | else:
72 | return datetime.strptime(str_val, "%Y-%m-%d %H:%M:%S")
73 |
74 |
75 | class DatafusionRunner(SqlCaseRunner):
76 | def __init__(self, dialect):
77 | super().__init__(dialect)
78 | self.ctx = datafusion.SessionContext()
79 |
80 | def run_sql_case(self, case: Case, mapping: SqlMapping) -> SqlCaseResult:
81 |
82 | try:
83 | arg_vectors = []
84 | arg_names = []
85 | arg_vals_list = []
86 | orig_types = []
87 | arg_types_list = []
88 |
89 | if mapping.aggregate:
90 | arg_vectors = []
91 | for arg_idx, arg in enumerate(case.args):
92 | arg_vals = []
93 | arg_type = type_to_datafusion_type(arg.type)
94 | if arg_type is None:
95 | return SqlCaseResult.unsupported(f"Unsupported type {arg.type}")
96 | for val in arg.value:
97 | arg_vals.append(handle_special_cases(val))
98 | arg_names.append(f"arg{arg_idx}")
99 | arg_vectors.append(pa.array(arg_vals, arg_type))
100 | else:
101 | for arg_idx, arg in enumerate(case.args):
102 | arg_val = arg_with_type(arg)
103 | arg_type = type_to_datafusion_type(arg.type)
104 | if arg_type is None:
105 | return SqlCaseResult.unsupported(f"Unsupported type {arg.type}")
106 | orig_types.append(arg.type)
107 | arg_vals_list.append(arg_val)
108 | arg_types_list.append(arg_type)
109 | arg_names.append(f"arg{arg_idx}")
110 |
111 | for val, arg_type, orig_type in zip(
112 | arg_vals_list, arg_types_list, orig_types
113 | ):
114 | if isinstance(arg_type, pa.lib.TimestampType):
115 | val = str_to_datetime(val, orig_type)
116 | arg_vectors.append(pa.array([val], arg_type))
117 |
118 | joined_arg_names = ",".join(arg_names)
119 | batch = pa.RecordBatch.from_arrays(
120 | arg_vectors,
121 | names=arg_names,
122 | )
123 | self.ctx.register_record_batches("my_table", [[batch]])
124 | if mapping.infix:
125 | if len(case.args) != 2:
126 | raise Exception(f"Infix function with {len(case.args)} args")
127 | expr_str = f"SELECT {arg_names[0]} {mapping.local_name} {arg_names[1]} FROM my_table;"
128 | elif mapping.postfix:
129 | if len(arg_names) != 1:
130 | raise Exception(f"Postfix function with {len(arg_names)} args")
131 | expr_str = f"SELECT {arg_names[0]} {mapping.local_name} FROM my_table;"
132 | elif mapping.extract:
133 | if len(arg_names) != 2:
134 | raise Exception(f"Extract function with {len(arg_names)} args")
135 | expr_str = f"SELECT {mapping.local_name}({arg_vals_list[0]} FROM {arg_names[1]}) FROM my_table;"
136 | elif mapping.local_name == 'count(*)':
137 | expr_str = f"SELECT {mapping.local_name} FROM my_table;"
138 | elif mapping.aggregate:
139 | if len(arg_names) < 1:
140 | raise Exception(f"Aggregate function with {len(arg_names)} args")
141 | expr_str = f"SELECT {mapping.local_name}({arg_names[0]}) FROM my_table;"
142 | else:
143 | expr_str = (
144 | f"SELECT {mapping.local_name}({joined_arg_names}) FROM my_table;"
145 | )
146 |
147 | result = self.ctx.sql(expr_str).collect()[0].columns[0].to_pylist()
148 |
149 | if len(result) != 1:
150 | raise Exception("Scalar function with one row output more than one row")
151 | result = result[0]
152 |
153 | if case.result == "undefined":
154 | return SqlCaseResult.success()
155 | elif case.result == "error":
156 | return SqlCaseResult.unexpected_pass(str(result))
157 | elif case.result == "nan":
158 | if math.isnan(result):
159 | return SqlCaseResult.success()
160 | # Issues with python float comparison:
161 | # https://tutorpython.com/python-mathisclose/#The_problem_with_using_for_float_comparison
162 | # https://stackoverflow.com/questions/5595425/what-is-the-best-way-to-compare-floats-for-almost-equality-in-python
163 | # Datafusion bug with float when converting from a dataframe to a pylist:
164 | # https://github.com/apache/arrow-datafusion/issues/9950
165 | elif case.result.type.startswith('fp') and case.result.value:
166 | if math.isclose(result, case.result.value, rel_tol=1e-6):
167 | return SqlCaseResult.success()
168 | else:
169 | if result == case.result.value:
170 | return SqlCaseResult.success()
171 | else:
172 | return SqlCaseResult.mismatch(str(result))
173 | except Exception as err:
174 | return SqlCaseResult.error(str(err))
175 | finally:
176 | self.ctx.deregister_table("my_table")
177 |
--------------------------------------------------------------------------------
/bft/testers/datafusion/tester.py:
--------------------------------------------------------------------------------
1 | from bft.dialects.types import Dialect, DialectsLibrary
2 | from bft.testers.base_tester import BaseTester
3 |
4 | from .runner import DatafusionRunner
5 |
6 |
7 | class DatafustionTester(BaseTester):
8 | def get_runner(self, dialect: Dialect):
9 | return DatafusionRunner(dialect)
10 |
11 | def get_dialect(self, library: DialectsLibrary):
12 | return library.get_dialect_by_name("datafusion")
13 |
--------------------------------------------------------------------------------
/bft/testers/duckdb/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/substrait-io/bft/34b7a6e19d536e42082169eae0d049ce94f86b19/bft/testers/duckdb/__init__.py
--------------------------------------------------------------------------------
/bft/testers/duckdb/runner.py:
--------------------------------------------------------------------------------
1 | import datetime
2 | import math
3 | from typing import Dict, NamedTuple
4 |
5 | import duckdb
6 |
7 | from bft.cases.runner import SqlCaseResult, SqlCaseRunner
8 | from bft.cases.types import Case
9 | from bft.dialects.types import SqlMapping
10 | from bft.utils.utils import type_to_dialect_type, datetype_value_equal
11 |
12 | type_map = {
13 | "i8": "TINYINT",
14 | "i16": "SMALLINT",
15 | "i32": "INTEGER",
16 | "i64": "BIGINT",
17 | "fp32": "REAL",
18 | "fp64": "DOUBLE",
19 | "boolean": "BOOLEAN",
20 | "string": "VARCHAR",
21 | "date": "DATE",
22 | "time": "TIME",
23 | "timestamp": "TIMESTAMP",
24 | "timestamp_tz": "TIMESTAMPTZ",
25 | "interval": "INTERVAL",
26 | "decimal": "DECIMAL",
27 | }
28 |
29 |
30 | def type_to_duckdb_type(type: str):
31 | return type_to_dialect_type(type, type_map)
32 |
33 |
34 | def literal_to_str(lit: str | int | float):
35 | if lit is None:
36 | return "null"
37 | elif lit in [math.nan, "nan"]:
38 | return "'NaN'"
39 | elif lit in [float("inf"), "inf"]:
40 | return "'Infinity'"
41 | elif lit in [float("-inf"), "-inf"]:
42 | return "'-Infinity'"
43 | return str(lit)
44 |
45 |
46 | def is_string_type(arg):
47 | return (
48 | arg.type in ["string", "timestamp", "timestamp_tz", "date", "time"]
49 | and arg.value is not None
50 | )
51 |
52 |
53 | def is_datetype(arg):
54 | return type(arg) in [datetime.datetime, datetime.date, datetime.timedelta]
55 |
56 | class DuckDBRunner(SqlCaseRunner):
57 | def __init__(self, dialect):
58 | super().__init__(dialect)
59 | self.conn = duckdb.connect()
60 |
61 | def run_sql_case(self, case: Case, mapping: SqlMapping) -> SqlCaseResult:
62 |
63 | try:
64 | arg_defs = [
65 | f"arg{idx} {type_to_duckdb_type(arg.type)}"
66 | for idx, arg in enumerate(case.args)
67 | ]
68 | schema = ",".join(arg_defs)
69 | self.conn.execute(f"CREATE TABLE my_table({schema});")
70 | self.conn.execute(f"SET TimeZone='UTC';")
71 |
72 | arg_names = [f"arg{idx}" for idx in range(len(case.args))]
73 | joined_arg_names = ",".join(arg_names)
74 | arg_vals_list = list()
75 | for arg in case.args:
76 | if is_string_type(arg):
77 | arg_vals_list.append("'" + literal_to_str(arg.value) + "'")
78 | else:
79 | arg_vals_list.append(literal_to_str(arg.value))
80 | arg_vals = ", ".join(arg_vals_list)
81 | if mapping.aggregate:
82 | arg_vals_list = list()
83 | for arg in case.args:
84 | arg_vals = ""
85 | for value in arg.value:
86 | if is_string_type(arg):
87 | if value:
88 | arg_vals += f"('{literal_to_str(value)}'),"
89 | else:
90 | arg_vals += f"({literal_to_str(value)}),"
91 | else:
92 | arg_vals += f"({literal_to_str(value)}),"
93 | arg_vals_list.append([arg_vals[:-1]])
94 | for arg_name, arg_vals in zip(arg_names, arg_vals_list):
95 | if len(arg_vals[0]):
96 | self.conn.execute(
97 | f"INSERT INTO my_table ({arg_name}) VALUES {arg_vals[0]};"
98 | )
99 | else:
100 | self.conn.execute(
101 | f"INSERT INTO my_table ({joined_arg_names}) VALUES ({arg_vals});"
102 | )
103 |
104 | if mapping.infix:
105 | if len(arg_names) != 2:
106 | raise Exception(f"Infix function with {len(arg_names)} args")
107 | expr = f"SELECT {arg_names[0]} {mapping.local_name} {arg_names[1]} FROM my_table;"
108 | elif mapping.postfix:
109 | if len(arg_names) != 1:
110 | raise Exception(f"Postfix function with {len(arg_names)} args")
111 | expr = f"SELECT {arg_names[0]} {mapping.local_name} FROM my_table;"
112 | elif mapping.extract:
113 | if len(arg_names) != 2:
114 | raise Exception(f"Extract function with {len(arg_names)} args")
115 | expr = f"SELECT {mapping.local_name}({arg_vals_list[0]} FROM {arg_names[1]}) FROM my_table;"
116 | elif mapping.local_name == "count(*)":
117 | expr = f"SELECT {mapping.local_name} FROM my_table;"
118 | elif mapping.aggregate:
119 | if len(arg_names) < 1:
120 | raise Exception(f"Aggregate function with {len(arg_names)} args")
121 | expr = f"SELECT {mapping.local_name}({arg_names[0]}) FROM my_table;"
122 | else:
123 | expr = f"SELECT {mapping.local_name}({joined_arg_names}) FROM my_table;"
124 | result = self.conn.execute(expr).fetchone()[0]
125 |
126 | if case.result == "undefined":
127 | return SqlCaseResult.success()
128 | elif case.result == "error":
129 | return SqlCaseResult.unexpected_pass(str(result))
130 | elif str(result) == "nan":
131 | if case.result == "nan":
132 | return SqlCaseResult.success()
133 | else:
134 | return SqlCaseResult.mismatch(str(result))
135 | # Issues with python float comparison:
136 | # https://tutorpython.com/python-mathisclose/#The_problem_with_using_for_float_comparison
137 | # https://stackoverflow.com/questions/5595425/what-is-the-best-way-to-compare-floats-for-almost-equality-in-python
138 | elif case.result.type.startswith("fp") and case.result.value and result:
139 | if math.isclose(result, case.result.value, rel_tol=1e-7):
140 | return SqlCaseResult.success()
141 | else:
142 | if result == case.result.value:
143 | return SqlCaseResult.success()
144 | elif is_datetype(result) and datetype_value_equal(
145 | result, case.result.value
146 | ):
147 | return SqlCaseResult.success()
148 | else:
149 | return SqlCaseResult.mismatch(str(result))
150 | except duckdb.Error as err:
151 | return SqlCaseResult.error(str(err))
152 | finally:
153 | self.conn.execute("DROP TABLE my_table")
154 |
--------------------------------------------------------------------------------
/bft/testers/duckdb/runner_test.py:
--------------------------------------------------------------------------------
1 | from bft.testers.duckdb.runner import type_to_duckdb_type
2 |
3 |
4 | def test_type_to_duckdb_type():
5 | assert type_to_duckdb_type("interval") == "INTERVAL"
6 | assert type_to_duckdb_type("decimal<37, 3>") == "DECIMAL(37, 3)"
7 | assert type_to_duckdb_type("non_existent") is None
8 |
--------------------------------------------------------------------------------
/bft/testers/duckdb/tester.py:
--------------------------------------------------------------------------------
1 | from bft.dialects.types import Dialect, DialectsLibrary
2 | from bft.testers.base_tester import BaseTester
3 |
4 | from .runner import DuckDBRunner
5 |
6 |
7 | class DuckDBTester(BaseTester):
8 | def get_runner(self, dialect: Dialect):
9 | return DuckDBRunner(dialect)
10 |
11 | def get_dialect(self, library: DialectsLibrary):
12 | return library.get_dialect_by_name("duckdb")
13 |
--------------------------------------------------------------------------------
/bft/testers/postgres/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/substrait-io/bft/34b7a6e19d536e42082169eae0d049ce94f86b19/bft/testers/postgres/__init__.py
--------------------------------------------------------------------------------
/bft/testers/postgres/runner.py:
--------------------------------------------------------------------------------
1 | import datetime
2 | import math
3 | import os
4 |
5 | import psycopg
6 |
7 | from bft.cases.runner import SqlCaseResult, SqlCaseRunner
8 | from bft.cases.types import Case
9 | from bft.dialects.types import SqlMapping
10 | from bft.utils.utils import datetype_value_equal
11 |
12 | type_map = {
13 | "i16": "smallint",
14 | "i32": "integer",
15 | "i64": "bigint",
16 | "fp32": "float4",
17 | "fp64": "float8",
18 | "boolean": "boolean",
19 | "string": "text",
20 | "date": "date",
21 | "time": "time",
22 | "timestamp": "timestamp",
23 | "timestamp_tz": "timestamptz",
24 | "interval": "interval",
25 | }
26 |
27 |
28 | def type_to_postgres_type(type: str):
29 | if type not in type_map:
30 | return None
31 | return type_map[type]
32 |
33 |
34 | def literal_to_str(lit: str | int | float):
35 | if lit is None:
36 | return "null"
37 | elif lit in [float("inf"), "inf"]:
38 | return "'Infinity'"
39 | elif lit in [float("-inf"), "-inf"]:
40 | return "'-Infinity'"
41 | return str(lit)
42 |
43 |
44 | def is_string_type(arg):
45 | return (
46 | arg.type in ["string", "timestamp", "timestamp_tz", "date", "time"]
47 | and arg.value is not None
48 | )
49 |
50 |
51 | def is_datetype(arg):
52 | print(f"postgres type is: {type(arg)}")
53 | return type(arg) in [datetime.datetime, datetime.date, datetime.timedelta]
54 |
55 |
56 | def get_connection_str():
57 | host = os.environ.get("POSTGRES_HOST", "localhost")
58 | dbname = os.environ.get("POSTGRES_DB", "bft")
59 | user = os.environ.get("POSTGRES_USER", "postgres")
60 | password = os.environ.get("POSTGRES_PASSWORD", "postgres")
61 | return f"{host=} {dbname=} {user=} {password=}"
62 |
63 |
64 | class PostgresRunner(SqlCaseRunner):
65 | def __init__(self, dialect):
66 | super().__init__(dialect)
67 | self.conn = psycopg.connect(get_connection_str())
68 |
69 | def run_sql_case(self, case: Case, mapping: SqlMapping) -> SqlCaseResult:
70 | self.conn.execute("BEGIN;")
71 |
72 | try:
73 | arg_defs = []
74 | for idx, arg in enumerate(case.args):
75 | arg_type = type_to_postgres_type(arg.type)
76 | if arg_type is None:
77 | return SqlCaseResult.unsupported(f"Unsupported type {arg.type}")
78 | arg_defs.append(f"arg{idx} {arg_type}")
79 | schema = ",".join(arg_defs)
80 | self.conn.execute(f"CREATE TABLE my_table({schema});")
81 |
82 | arg_names = [f"arg{idx}" for idx in range(len(case.args))]
83 | joined_arg_names = ",".join(arg_names)
84 | arg_vals_list = list()
85 | for arg in case.args:
86 | if is_string_type(arg):
87 | arg_vals_list.append("'" + literal_to_str(arg.value) + "'")
88 | else:
89 | arg_vals_list.append(literal_to_str(arg.value))
90 | arg_vals = ", ".join(arg_vals_list)
91 | if mapping.aggregate:
92 | arg_vals_list = list()
93 | for arg in case.args:
94 | arg_vals = ""
95 | for value in arg.value:
96 | if is_string_type(arg):
97 | if value:
98 | arg_vals += f"('{literal_to_str(value)}'),"
99 | else:
100 | arg_vals += f"({literal_to_str(value)}),"
101 | else:
102 | arg_vals += f"({literal_to_str(value)}),"
103 | arg_vals_list.append([arg_vals[:-1]])
104 | for arg_name, arg_vals in zip(arg_names, arg_vals_list):
105 | if len(arg_vals[0]):
106 | self.conn.execute(
107 | f"INSERT INTO my_table ({arg_name}) VALUES {arg_vals[0]};"
108 | )
109 | else:
110 | self.conn.execute(
111 | f"INSERT INTO my_table ({joined_arg_names}) VALUES ({arg_vals});"
112 | )
113 |
114 | if mapping.infix:
115 | if len(arg_names) != 2:
116 | raise Exception(f"Infix function with {len(arg_names)} args")
117 | expr = f"SELECT {arg_names[0]} {mapping.local_name} {arg_names[1]} FROM my_table;"
118 | elif mapping.postfix:
119 | if len(arg_names) != 1:
120 | raise Exception(f"Postfix function with {len(arg_names)} args")
121 | expr = f"SELECT {arg_names[0]} {mapping.local_name} FROM my_table;"
122 | elif mapping.extract:
123 | if len(arg_names) != 2:
124 | raise Exception(f"Extract function with {len(arg_names)} args")
125 | expr = f"SELECT {mapping.local_name}({arg_vals_list[0]} FROM {arg_names[1]}) FROM my_table;"
126 | elif mapping.between:
127 | if len(arg_names) != 3:
128 | raise Exception(f"Between function with {len(arg_names)} args")
129 | expr = f"SELECT {arg_names[0]} BETWEEN {arg_names[1]} AND {arg_names[2]} FROM my_table;"
130 | elif mapping.local_name == 'count(*)':
131 | expr = f"SELECT {mapping.local_name} FROM my_table;"
132 | elif mapping.aggregate:
133 | if len(arg_names) < 1:
134 | raise Exception(f"Aggregate function with {len(arg_names)} args")
135 | expr = f"SELECT {mapping.local_name}({arg_names[0]}) FROM my_table;"
136 | else:
137 | expr = f"SELECT {mapping.local_name}({joined_arg_names}) FROM my_table;"
138 | result = self.conn.execute(expr).fetchone()[0]
139 |
140 | if case.result == "undefined":
141 | return SqlCaseResult.success()
142 | elif case.result == "error":
143 | return SqlCaseResult.unexpected_pass(str(result))
144 | elif case.result == "nan":
145 | print(f"Expected NAN but received {result}")
146 | return SqlCaseResult.error(str(result))
147 | # Issues with python float comparison:
148 | # https://tutorpython.com/python-mathisclose/#The_problem_with_using_for_float_comparison
149 | # https://stackoverflow.com/questions/5595425/what-is-the-best-way-to-compare-floats-for-almost-equality-in-python
150 | elif case.result.type.startswith("fp") and case.result.value:
151 | if math.isclose(result, case.result.value, rel_tol=1e-7):
152 | return SqlCaseResult.success()
153 | else:
154 | if result == case.result.value:
155 | return SqlCaseResult.success()
156 | elif is_datetype(result) and datetype_value_equal(
157 | result, case.result.value
158 | ):
159 | return SqlCaseResult.success()
160 | else:
161 | return SqlCaseResult.mismatch(str(result))
162 | except psycopg.Error as err:
163 | return SqlCaseResult.error(str(err))
164 | finally:
165 | self.conn.rollback()
166 |
--------------------------------------------------------------------------------
/bft/testers/postgres/tester.py:
--------------------------------------------------------------------------------
1 | from bft.dialects.types import Dialect, DialectsLibrary
2 | from bft.testers.base_tester import BaseTester
3 |
4 | from .runner import PostgresRunner
5 |
6 |
7 | class PostgresTester(BaseTester):
8 | def get_runner(self, dialect: Dialect):
9 | return PostgresRunner(dialect)
10 |
11 | def get_dialect(self, library: DialectsLibrary):
12 | return library.get_dialect_by_name("postgres")
13 |
--------------------------------------------------------------------------------
/bft/testers/snowflake/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/substrait-io/bft/34b7a6e19d536e42082169eae0d049ce94f86b19/bft/testers/snowflake/__init__.py
--------------------------------------------------------------------------------
/bft/testers/snowflake/config.yaml:
--------------------------------------------------------------------------------
1 | snowflake:
2 | username:
3 | account:
4 | warehouse:
5 | database:
6 | schema:
7 |
--------------------------------------------------------------------------------
/bft/testers/snowflake/runner.py:
--------------------------------------------------------------------------------
1 | import datetime
2 | import math
3 | import os
4 | import yaml
5 | from typing import Dict, NamedTuple
6 | from cryptography.hazmat.primitives.serialization import load_der_private_key
7 | from cryptography.hazmat.backends import default_backend
8 |
9 | from snowflake.connector import connect
10 | from snowflake.connector.errors import Error
11 |
12 | from bft.cases.runner import SqlCaseResult, SqlCaseRunner
13 | from bft.cases.types import Case
14 | from bft.dialects.types import SqlMapping
15 | from bft.utils.utils import type_to_dialect_type
16 |
17 | type_map = {
18 | "fp64": "FLOAT",
19 | "boolean": "BOOLEAN",
20 | "string": "VARCHAR",
21 | "date": "DATE",
22 | "time": "TIME",
23 | "timestamp": "TIMESTAMP",
24 | "timestamp_tz": "TIMESTAMPTZ",
25 | "interval": "INTERVAL",
26 | "decimal": "DECIMAL",
27 | }
28 |
29 |
30 | def type_to_snowflake_type(type: str):
31 | return type_to_dialect_type(type, type_map)
32 |
33 |
34 | def literal_to_str(lit: str | int | float):
35 | if lit is None:
36 | return "null"
37 | elif lit in [math.nan, "nan"]:
38 | return "'NaN'"
39 | elif lit in [float("inf"), "inf"]:
40 | return "'inf'"
41 | elif lit in [float("-inf"), "-inf"]:
42 | return "'-inf'"
43 | return str(lit)
44 |
45 |
46 | def literal_to_float(lit: str | int | float):
47 | if lit in [float("inf"), "inf"]:
48 | return "TO_DOUBLE('inf'::float)"
49 | elif lit in [float("-inf"), "-inf"]:
50 | return "TO_DOUBLE('-inf'::float)"
51 | return lit
52 |
53 |
54 | def is_float_type(arg):
55 | return arg.type in ["fp32", "fp64"]
56 |
57 |
58 | def is_string_type(arg):
59 | return (
60 | arg.type in ["string", "timestamp", "timestamp_tz", "date", "time"]
61 | and arg.value is not None
62 | )
63 |
64 |
65 | def is_datetype(arg):
66 | return type(arg) in [datetime.datetime, datetime.date, datetime.timedelta]
67 |
68 |
69 | class SnowflakeRunner(SqlCaseRunner):
70 | def __init__(self, dialect):
71 | super().__init__(dialect)
72 | with open("testers/snowflake/config.yaml", "r") as file:
73 | config = yaml.safe_load(file)
74 | sf_config = config["snowflake"]
75 | print(f"Connecting to {sf_config['account']} as {sf_config['username']}")
76 | private_key_path = os.environ["SNOWSQL_PRIVATE_KEY_PATH"]
77 | with open(private_key_path, "rb") as f:
78 | private_key = f.read()
79 |
80 | self.conn = connect(
81 | user=sf_config["username"],
82 | private_key=private_key,
83 | account=sf_config["account"],
84 | database=sf_config["database"],
85 | schema=sf_config["schema"],
86 | warehouse=sf_config["warehouse"],
87 | )
88 |
89 | def run_sql_case(self, case: Case, mapping: SqlMapping) -> SqlCaseResult:
90 |
91 | try:
92 | print(f"Running testcase {case} {mapping}")
93 | cursor = self.conn.cursor()
94 | arg_defs = []
95 | for idx, arg in enumerate(case.args):
96 | arg_type = type_to_snowflake_type(arg.type)
97 | if arg_type is None:
98 | return SqlCaseResult.unsupported(f"Unsupported type {arg.type}")
99 | arg_defs.append(f"arg{idx} {arg_type}")
100 | schema = ",".join(arg_defs)
101 | cursor.execute(f"CREATE TABLE my_table({schema});")
102 | cursor.execute(f"SET TimeZone='UTC';")
103 | print(f"Running case: {case} create table my_table({schema});")
104 |
105 | arg_names = [f"arg{idx}" for idx in range(len(case.args))]
106 | joined_arg_names = ",".join(arg_names)
107 | arg_vals_list = list()
108 | for arg in case.args:
109 | if is_string_type(arg):
110 | arg_vals_list.append("'" + literal_to_str(arg.value) + "'")
111 | else:
112 | arg_vals_list.append(literal_to_str(arg.value))
113 | arg_vals = ", ".join(arg_vals_list)
114 | if mapping.aggregate:
115 | arg_vals_list = list()
116 | for arg in case.args:
117 | arg_vals = ""
118 | for value in arg.value:
119 | if is_string_type(arg):
120 | if value:
121 | arg_vals += f"('{literal_to_str(value)}'),"
122 | else:
123 | arg_vals += f"({literal_to_str(value)}),"
124 | elif is_float_type(arg):
125 | if value:
126 | arg_vals += f"({literal_to_float(value)}),"
127 | else:
128 | arg_vals += f"({literal_to_str(value)}),"
129 | else:
130 | arg_vals += f"({literal_to_str(value)}),"
131 | arg_vals_list.append([arg_vals[:-1]])
132 | for arg_name, arg_vals in zip(arg_names, arg_vals_list):
133 | if len(arg_vals[0]):
134 | cursor.execute(
135 | f"INSERT INTO my_table ({arg_name}) VALUES {arg_vals[0]};"
136 | )
137 | else:
138 | cursor.execute(
139 | f"INSERT INTO my_table ({joined_arg_names}) VALUES ({arg_vals});"
140 | )
141 |
142 | if mapping.infix:
143 | if len(arg_names) != 2:
144 | raise Exception(f"Infix function with {len(arg_names)} args")
145 | expr = f"SELECT {arg_names[0]} {mapping.local_name} {arg_names[1]} FROM my_table;"
146 | elif mapping.postfix:
147 | if len(arg_names) != 1:
148 | raise Exception(f"Postfix function with {len(arg_names)} args")
149 | expr = f"SELECT {arg_names[0]} {mapping.local_name} FROM my_table;"
150 | elif mapping.extract:
151 | if len(arg_names) != 2:
152 | raise Exception(f"Extract function with {len(arg_names)} args")
153 | expr = f"SELECT {mapping.local_name}({arg_vals_list[0]} FROM {arg_names[1]}) FROM my_table;"
154 | elif mapping.local_name == "count(*)":
155 | expr = f"SELECT {mapping.local_name} FROM my_table;"
156 | elif mapping.aggregate:
157 | if len(arg_names) < 1:
158 | raise Exception(f"Aggregate function with {len(arg_names)} args")
159 | expr = f"SELECT {mapping.local_name}({arg_names[0]}) FROM my_table;"
160 | else:
161 | expr = f"SELECT {mapping.local_name}({joined_arg_names}) FROM my_table;"
162 | result = cursor.execute(expr).fetchone()[0]
163 |
164 | if case.result == "undefined":
165 | return SqlCaseResult.success()
166 | elif case.result == "error":
167 | return SqlCaseResult.unexpected_pass(str(result))
168 | # Issues with python float comparison:
169 | # https://tutorpython.com/python-mathisclose/#The_problem_with_using_for_float_comparison
170 | # https://stackoverflow.com/questions/5595425/what-is-the-best-way-to-compare-floats-for-almost-equality-in-python
171 | elif case.result.type.startswith("fp") and case.result.value and result:
172 | if math.isclose(result, case.result.value, rel_tol=1e-7):
173 | return SqlCaseResult.success()
174 | else:
175 | if result == case.result.value:
176 | return SqlCaseResult.success()
177 | elif is_datetype(result) and str(result) == case.result.value:
178 | return SqlCaseResult.success()
179 | else:
180 | return SqlCaseResult.mismatch(str(result))
181 | except Error as err:
182 | return SqlCaseResult.error(str(err))
183 | finally:
184 | cursor.execute("DROP TABLE IF EXISTS my_table")
185 | cursor.close()
186 |
--------------------------------------------------------------------------------
/bft/testers/snowflake/tester.py:
--------------------------------------------------------------------------------
1 | from bft.dialects.types import Dialect, DialectsLibrary
2 | from bft.testers.base_tester import BaseTester
3 |
4 | from .runner import SnowflakeRunner
5 |
6 |
7 | class SnowflakeTester(BaseTester):
8 | def get_runner(self, dialect: Dialect):
9 | return SnowflakeRunner(dialect)
10 |
11 | def get_dialect(self, library: DialectsLibrary):
12 | return library.get_dialect_by_name("snowflake")
13 |
--------------------------------------------------------------------------------
/bft/testers/sqlite/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/substrait-io/bft/34b7a6e19d536e42082169eae0d049ce94f86b19/bft/testers/sqlite/__init__.py
--------------------------------------------------------------------------------
/bft/testers/sqlite/runner.py:
--------------------------------------------------------------------------------
1 | import math
2 | import sqlite3
3 | from typing import Dict, NamedTuple
4 |
5 | from bft.cases.runner import SqlCaseResult, SqlCaseRunner
6 | from bft.cases.types import Case, CaseLiteral
7 | from bft.dialects.types import SqlMapping
8 | from bft.utils.utils import type_to_dialect_type
9 |
10 | type_map = {
11 | "i8": "TINYINT",
12 | "i16": "SMALLINT",
13 | "i32": "INT",
14 | "i64": "HUGEINT",
15 | "fp32": "REAL",
16 | "fp64": "REAL",
17 | "boolean": "BOOLEAN",
18 | "string": "TEXT",
19 | }
20 |
21 |
22 | def type_to_sqlite_type(type: str):
23 | return type_to_dialect_type(type, type_map)
24 |
25 |
26 | def literal_to_str(lit: str | int | float):
27 | if lit is None:
28 | return "null"
29 | elif lit in [float("inf"), "inf"]:
30 | return "9e999"
31 | elif lit in [float("-inf"), "-inf"]:
32 | return "-9e999"
33 | return str(lit)
34 |
35 |
36 | def flatten(l: list):
37 | return [item for sublist in l for item in sublist]
38 |
39 |
40 | def extract_argument_values(case: Case, mapping: SqlMapping):
41 | arg_vals_list = []
42 | for arg in case.args:
43 | arg_vals = []
44 | if arg.type == "string" and arg.value is not None:
45 | arg_vals.append("'" + literal_to_str(arg.value) + "'")
46 | elif mapping.aggregate:
47 | for value in arg.value:
48 | arg_vals.append(literal_to_str(value))
49 | else:
50 | arg_vals.append(literal_to_str(arg.value))
51 | arg_vals_list.append(arg_vals)
52 | return arg_vals_list
53 |
54 |
55 | class SqliteRunner(SqlCaseRunner):
56 | def __init__(self, dialect):
57 | super().__init__(dialect)
58 | self.conn = sqlite3.connect(":memory:")
59 |
60 | def run_sql_case(self, case: Case, mapping: SqlMapping) -> SqlCaseResult:
61 | self.conn.execute("BEGIN;")
62 |
63 | try:
64 | arg_defs = []
65 | for idx, arg in enumerate(case.args):
66 | arg_type = type_to_sqlite_type(arg.type)
67 | if arg_type is None:
68 | return SqlCaseResult.unsupported(f"Unsupported type {arg.type}")
69 | arg_defs.append(f"arg{idx} {arg_type}")
70 | schema = ",".join(arg_defs)
71 | self.conn.execute(f"CREATE TABLE my_table({schema});")
72 |
73 | arg_names = [f"arg{idx}" for idx in range(len(case.args))]
74 |
75 | joined_arg_names = ",".join(arg_names)
76 | arg_vals_list = extract_argument_values(case, mapping)
77 | arg_vals = ', '.join(flatten(arg_vals_list))
78 |
79 | if mapping.aggregate:
80 | for arg_name, arg_vals in zip(arg_names, arg_vals_list):
81 | str_arg_vals = ",".join(f"({val})" for val in arg_vals)
82 | if arg_vals:
83 | self.conn.execute(
84 | f"INSERT INTO my_table ({arg_name}) VALUES {str_arg_vals};"
85 | )
86 | else:
87 | self.conn.execute(
88 | f"INSERT INTO my_table ({joined_arg_names}) VALUES ({arg_vals});"
89 | )
90 |
91 | if mapping.infix:
92 | if len(arg_names) != 2:
93 | raise Exception(f"Infix function with {len(arg_names)} args")
94 | expr = f"SELECT {arg_names[0]} {mapping.local_name} {arg_names[1]} FROM my_table;"
95 | elif mapping.postfix:
96 | if len(arg_names) != 1:
97 | raise Exception(f"Postfix function with {len(arg_names)} args")
98 | expr = f"SELECT {arg_names[0]} {mapping.local_name} FROM my_table;"
99 | elif mapping.between:
100 | if len(arg_names) != 3:
101 | raise Exception(f"Between function with {len(arg_names)} args")
102 | expr = f"SELECT {arg_names[0]} BETWEEN {arg_names[1]} AND {arg_names[2]} FROM my_table;"
103 | elif mapping.local_name == 'count(*)':
104 | expr = f"SELECT {mapping.local_name} FROM my_table;"
105 | elif mapping.aggregate:
106 | if len(arg_names) < 1:
107 | raise Exception(f"Aggregate function with {len(arg_names)} args")
108 | expr = f"SELECT {mapping.local_name}({arg_names[0]}) FROM my_table;"
109 | else:
110 | expr = f"SELECT {mapping.local_name}({joined_arg_names}) FROM my_table;"
111 | result = self.conn.execute(expr).fetchone()[0]
112 |
113 | if case.result == "undefined":
114 | return SqlCaseResult.success()
115 | elif case.result == "error":
116 | return SqlCaseResult.unexpected_pass(str(result))
117 | elif case.result == "nan":
118 | return SqlCaseResult.error(str(result))
119 | # Issues with python float comparison:
120 | # https://tutorpython.com/python-mathisclose/#The_problem_with_using_for_float_comparison
121 | # https://stackoverflow.com/questions/5595425/what-is-the-best-way-to-compare-floats-for-almost-equality-in-python
122 | elif case.result.type.startswith("fp") and case.result.value and result:
123 | if math.isclose(result, case.result.value, rel_tol=1e-7):
124 | return SqlCaseResult.success()
125 | else:
126 | if result == case.result.value:
127 | return SqlCaseResult.success()
128 | else:
129 | return SqlCaseResult.mismatch(str(result))
130 | except sqlite3.Error as err:
131 | return SqlCaseResult.error(str(err))
132 | finally:
133 | self.conn.rollback()
134 |
--------------------------------------------------------------------------------
/bft/testers/sqlite/tester.py:
--------------------------------------------------------------------------------
1 | from bft.dialects.types import Dialect, DialectsLibrary
2 | from bft.testers.base_tester import BaseTester
3 |
4 | from .runner import SqliteRunner
5 |
6 |
7 | class SqliteTester(BaseTester):
8 | def get_runner(self, dialect: Dialect):
9 | return SqliteRunner(dialect)
10 |
11 | def get_dialect(self, library: DialectsLibrary):
12 | return library.get_dialect_by_name("sqlite")
13 |
--------------------------------------------------------------------------------
/bft/testers/velox/runner.py:
--------------------------------------------------------------------------------
1 | import math
2 |
3 | import pyvelox.pyvelox as pv
4 |
5 | from bft.cases.runner import Case, SqlCaseResult, SqlCaseRunner, SqlMapping
6 | from bft.dialects.types import Dialect
7 |
8 |
9 | def is_type_supported(type):
10 | return type in set({"i64", "fp64", "boolean", "string"})
11 |
12 |
13 | class VeloxRunner(SqlCaseRunner):
14 | def __init__(self, dialect: Dialect):
15 | super().__init__(dialect)
16 |
17 | def run_sql_case(self, case: Case, mapping: SqlMapping) -> SqlCaseResult:
18 | arg_vectors = []
19 | arg_names = []
20 | for arg_idx, arg in enumerate(case.args):
21 | if not is_type_supported(arg.type):
22 | return SqlCaseResult.unsupported(
23 | f"The type {arg.type} is not supported"
24 | )
25 | arg_vectors.append(pv.from_list([arg.value]))
26 | arg_names.append(f"arg{arg_idx}")
27 | if mapping.infix:
28 | if len(case.args) != 2:
29 | raise Exception(f"Infix function with {len(case.args)} args")
30 | expr_str = f"arg0 {mapping.local_name} arg1"
31 | elif mapping.postfix:
32 | if len(arg_names) != 1:
33 | raise Exception(f"Postfix function with {len(arg_names)} args")
34 | expr_str = f"arg0 {mapping.local_name}"
35 | elif mapping.between:
36 | if len(arg_names) != 3:
37 | raise Exception(f"between function with {len(arg_names)} args")
38 | expr_str = f"arg0 {mapping.local_name} arg1 and arg2"
39 | else:
40 | joined_args = ", ".join(arg_names)
41 | expr_str = f"{mapping.local_name}({joined_args})"
42 |
43 | try:
44 | expr = pv.Expression.from_string(expr_str)
45 | answer = expr.evaluate(arg_names, arg_vectors)
46 | result = [v for v in answer]
47 | except RuntimeError as err:
48 | return SqlCaseResult.error(str(err))
49 |
50 | if len(result) != 1:
51 | raise Exception("Scalar function with one row output more than one row")
52 | result = result[0]
53 |
54 | if case.result == "undefined":
55 | return SqlCaseResult.success()
56 | elif case.result == "error":
57 | return SqlCaseResult.unexpected_pass(str(result))
58 | elif case.result == "nan":
59 | if math.isnan(result):
60 | return SqlCaseResult.success()
61 | else:
62 | if result == case.result.value:
63 | return SqlCaseResult.success()
64 | else:
65 | return SqlCaseResult.mismatch(str(result))
66 |
--------------------------------------------------------------------------------
/bft/testers/velox/tester.py:
--------------------------------------------------------------------------------
1 | from bft.cases.runner import CaseRunner
2 | from bft.dialects.types import Dialect, DialectsLibrary
3 | from bft.testers.base_tester import BaseTester
4 | from bft.testers.velox.runner import VeloxRunner
5 |
6 |
7 | class VeloxTester(BaseTester):
8 | def get_runner(self, dialect: Dialect) -> CaseRunner:
9 | return VeloxRunner(dialect)
10 |
11 | def get_dialect(self, library: DialectsLibrary) -> Dialect:
12 | return library.get_dialect_by_name("velox_presto")
13 |
--------------------------------------------------------------------------------
/bft/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/substrait-io/bft/34b7a6e19d536e42082169eae0d049ce94f86b19/bft/tests/__init__.py
--------------------------------------------------------------------------------
/bft/tests/base.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 | from typing import List
3 |
4 | import pytest
5 |
6 | from bft.cases.parser import CaseFileParser
7 | from bft.cases.types import Case
8 | from bft.testers.base_tester import BaseTester
9 | from tools.convert_testcases.convert_testcases_to_yaml_format import (
10 | convert_directory as convert_directory_from_substrait,
11 | )
12 |
13 |
14 | # Would be nice to have this as a session-scoped fixture but it doesn't seem that
15 | # parameter values can be a fixture
16 | def cases() -> List[Case]:
17 | cases = []
18 | bft_dir = Path(__file__).parent.parent.parent
19 | parser = CaseFileParser()
20 | cases_dir = bft_dir / "cases"
21 | substrait_cases_dir = bft_dir / "substrait" / "tests" / "cases"
22 | convert_directory_from_substrait(substrait_cases_dir, cases_dir)
23 | for case_path in cases_dir.resolve().rglob("*.yaml"):
24 | with open(case_path, "rb") as case_f:
25 | for case_file in parser.parse(case_f):
26 | for case in case_file.cases:
27 | case = transform_case(case)
28 | cases.append(case)
29 | return cases
30 |
31 |
32 | def transform_case(case):
33 | # Create a new Case instance with updated `args`
34 | return Case(
35 | function=case.function,
36 | base_uri=case.base_uri,
37 | group=case.group,
38 | args=case.args, # Update args here
39 | result=case.result,
40 | options=case.options,
41 | )
42 |
43 |
44 | def case_id_fn(case: Case):
45 | return f"{case.function}_{case.group.id}_{case.group.index}"
46 |
47 |
48 | def run_test(case: Case, tester: BaseTester):
49 | if tester.runner.__class__.__name__ == "VeloxRunner":
50 | for case_literal in case.args:
51 | if case_literal.value is None:
52 | pytest.skip("Skipping. Pyvelox does not support null input")
53 | if tester.runner.__class__.__name__ == "PostgresRunner":
54 | if type(case.result) != str and "inf" in str(case.result[0]):
55 | pytest.skip(
56 | "Skipping. Postgres errors out when dealing with infinite addition"
57 | )
58 | result = tester.run_test(case)
59 | if result.passed:
60 | if not result.should_have_passed:
61 | pytest.fail(f"Unexpected pass: {result.reason}")
62 | else:
63 | assert result.passed
64 | else:
65 | if result.should_have_passed:
66 | pytest.fail(f"Unexpected fail: {result.reason}")
67 | else:
68 | pytest.xfail(result.reason)
69 |
--------------------------------------------------------------------------------
/bft/tests/conftest.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 | from typing import List
3 |
4 | import pytest
5 |
6 | from bft.dialects.loader import load_dialects
7 | from bft.dialects.types import DialectsLibrary
8 |
9 |
10 | @pytest.fixture(scope="session")
11 | def dialects() -> DialectsLibrary:
12 | dialects_dir = Path(__file__) / ".." / ".." / ".." / "dialects"
13 | return load_dialects(str(dialects_dir.resolve()))
14 |
--------------------------------------------------------------------------------
/bft/tests/test_cudf.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | from bft.testers.cudf.tester import CudfTester
4 |
5 | from .base import cases, run_test
6 |
7 |
8 | @pytest.fixture(scope="module")
9 | def tester(dialects):
10 | instance = CudfTester()
11 | instance.prepare(dialects)
12 | return instance
13 |
14 |
15 | @pytest.mark.parametrize("case", cases())
16 | def test_functions(case, tester):
17 | run_test(case, tester)
18 |
--------------------------------------------------------------------------------
/bft/tests/test_datafusion.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | from bft.testers.datafusion.tester import DatafustionTester
4 |
5 | from .base import cases, run_test
6 |
7 |
8 | @pytest.fixture(scope="module")
9 | def tester(dialects):
10 | instance = DatafustionTester()
11 | instance.prepare(dialects)
12 | return instance
13 |
14 |
15 | @pytest.mark.parametrize("case", cases())
16 | def test_functions(case, tester):
17 | run_test(case, tester)
18 |
--------------------------------------------------------------------------------
/bft/tests/test_duckdb.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | from bft.testers.duckdb.tester import DuckDBTester
4 |
5 | from .base import cases, run_test
6 |
7 |
8 | @pytest.fixture(scope="module")
9 | def tester(dialects):
10 | instance = DuckDBTester()
11 | instance.prepare(dialects)
12 | return instance
13 |
14 |
15 | @pytest.mark.parametrize("case", cases())
16 | def test_functions(case, tester):
17 | run_test(case, tester)
18 |
--------------------------------------------------------------------------------
/bft/tests/test_postgres.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | from bft.testers.postgres.tester import PostgresTester
4 |
5 | from .base import cases, run_test
6 |
7 |
8 | @pytest.fixture(scope="module")
9 | def tester(dialects):
10 | instance = PostgresTester()
11 | instance.prepare(dialects)
12 | return instance
13 |
14 |
15 | @pytest.mark.parametrize("case", cases())
16 | def test_functions(case, tester):
17 | run_test(case, tester)
18 |
--------------------------------------------------------------------------------
/bft/tests/test_pyvelox.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | from bft.testers.velox.tester import VeloxTester
4 |
5 | from .base import cases, run_test
6 |
7 |
8 | @pytest.fixture(scope="module")
9 | def tester(dialects):
10 | instance = VeloxTester()
11 | instance.prepare(dialects)
12 | return instance
13 |
14 |
15 | @pytest.mark.parametrize("case", cases())
16 | def test_functions(case, tester):
17 | run_test(case, tester)
18 |
--------------------------------------------------------------------------------
/bft/tests/test_snowflake.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | from bft.testers.snowflake.tester import SnowflakeTester
4 |
5 | from .base import cases, run_test
6 |
7 |
8 | @pytest.fixture(scope="module")
9 | def tester(dialects):
10 | instance = SnowflakeTester()
11 | instance.prepare(dialects)
12 | return instance
13 |
14 |
15 | @pytest.mark.parametrize("case", cases())
16 | def test_functions(case, tester):
17 | run_test(case, tester)
18 |
--------------------------------------------------------------------------------
/bft/tests/test_sqlite.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | from bft.testers.sqlite.tester import SqliteTester
4 |
5 | from .base import cases, run_test
6 |
7 |
8 | @pytest.fixture(scope="module")
9 | def tester(dialects):
10 | instance = SqliteTester()
11 | instance.prepare(dialects)
12 | return instance
13 |
14 |
15 | @pytest.mark.parametrize("case", cases())
16 | def test_functions(case, tester):
17 | run_test(case, tester)
18 |
--------------------------------------------------------------------------------
/bft/utils/utils.py:
--------------------------------------------------------------------------------
1 | from typing import Dict
2 | import datetime
3 |
4 |
5 | def type_to_dialect_type(type: str, type_map: Dict[str, str])->str:
6 | """
7 | Convert a substrait type to a dialect type
8 |
9 | :param type: substrait name of base type (i.e. without parameters)
10 | :param type_map:map of substrait type to dialect base type (i.e. without parameters)
11 | :return:dialect type
12 |
13 | e.g. type_map: {"interval": "INTERVAL", "decimal": "NUMERIC"}
14 | input type: "decimal<37, 3>", -> output: "NUMERIC(37, 3)"
15 | e.g. input type: "interval", output: "INTERVAL"
16 |
17 | in above example "decimal" or "interval" are referred as base type whereas decimal<37, 3> is parameterized type
18 |
19 | """
20 | type_to_check = type.split("<")[0].strip() if "<" in type else type
21 | if type_to_check not in type_map:
22 | return None
23 | type_val = type_map[type_to_check]
24 | if not "<" in type:
25 | return type_val
26 | # transform parameterized type name to have dialect type
27 | return type.replace(type_to_check, type_val).replace("<", "(").replace(">", ")")
28 |
29 | def has_only_date(value: datetime.datetime):
30 | if (
31 | value.hour == 0
32 | and value.minute == 0
33 | and value.second == 0
34 | and value.microsecond == 0
35 | ):
36 | return True
37 | return False
38 |
39 | def datetype_value_equal(result, case_result):
40 | if str(result) == case_result:
41 | return True
42 | if (
43 | isinstance(result, datetime.datetime)
44 | and has_only_date(result)
45 | and str(result.date()) == case_result
46 | ):
47 | return True
48 | return False
49 |
--------------------------------------------------------------------------------
/build_site.py:
--------------------------------------------------------------------------------
1 | import shutil
2 | from pathlib import Path
3 |
4 | from bft.html.builder import build_site
5 |
6 |
7 | def copy_with_progress(src, dst, copy_function=shutil.copy2):
8 | for source_path in Path(src).rglob('*'):
9 | relative_path = source_path.relative_to(src)
10 | destination_path = dst / relative_path
11 |
12 | if source_path.is_file():
13 | destination_path.parent.mkdir(parents=True, exist_ok=True)
14 | copy_function(source_path, destination_path)
15 | print(f"Copying: {source_path} -> {destination_path}")
16 |
17 | root = Path(__file__).parent.resolve()
18 | index = root / "index.yaml"
19 | dest = root / "dist"
20 |
21 | # Remove the destination directory if it exists
22 | if dest.exists():
23 | shutil.rmtree(dest)
24 |
25 | # Create the destination directory
26 | dest.mkdir()
27 |
28 | build_site(index, dest)
29 |
30 | static_content_dir = root / "static_site"
31 |
32 | # Use the custom copy_with_progress function
33 | copy_with_progress(static_content_dir, dest)
34 |
35 | print("Copying static files completed.")
36 |
--------------------------------------------------------------------------------
/ci/docker/base-tester.Dockerfile:
--------------------------------------------------------------------------------
1 | FROM alpine:3.18
2 | ARG PIP_PACKAGES
3 |
4 | ENV PYTHONUNBUFFERED=1
5 | ENV PYTHONPATH=/bft/substrait
6 | RUN apk add --update --no-cache python3 && ln -sf python3 /usr/bin/python
7 | RUN python3 -m ensurepip
8 | RUN echo "PIP_PACKAGES is $PIP_PACKAGES"
9 | RUN pip3 install --no-cache --upgrade pip setuptools pytest pyyaml mistletoe $PIP_PACKAGES ruamel.yaml antlr4-python3-runtime pytz
10 |
11 | WORKDIR /bft
12 | COPY . .
13 |
14 | CMD /usr/bin/python -mpytest bft/tests/test_sqlite.py
15 |
--------------------------------------------------------------------------------
/ci/docker/datafusion.Dockerfile:
--------------------------------------------------------------------------------
1 | FROM ubuntu:22.04
2 |
3 | ENV PYTHONUNBUFFERED=1
4 | ENV PYTHONPATH=/bft/substrait
5 | RUN apt-get update && apt-get install -y python3.10 && ln -sf python3 /usr/bin/python
6 | RUN apt install -y pip
7 | RUN pip install --upgrade pip setuptools pytest pyyaml mistletoe datafusion ruamel.yaml antlr4-python3-runtime pytz numpy
8 |
9 | WORKDIR /bft
10 | COPY . .
11 |
12 | CMD /usr/bin/python -mpytest bft/tests/test_datafusion.py
13 |
--------------------------------------------------------------------------------
/ci/docker/duckdb.Dockerfile:
--------------------------------------------------------------------------------
1 | FROM alpine:3.18
2 |
3 | ENV PYTHONUNBUFFERED=1
4 | ENV PYTHONPATH=/bft/substrait
5 | RUN apk add --update --no-cache python3 && ln -sf python3 /usr/bin/python
6 | RUN python3 -m ensurepip
7 | RUN pip3 install --no-cache --upgrade pip setuptools pytest pyyaml mistletoe duckdb ruamel.yaml antlr4-python3-runtime pytz
8 |
9 | WORKDIR /bft
10 | COPY . .
11 |
12 | CMD /usr/bin/python -mpytest bft/tests/test_duckdb.py
13 |
--------------------------------------------------------------------------------
/ci/docker/postgres-compose.yaml:
--------------------------------------------------------------------------------
1 | services:
2 | app:
3 | image: bft/tester
4 | build:
5 | context: ../..
6 | dockerfile: ./ci/docker/base-tester.Dockerfile
7 | args:
8 | PIP_PACKAGES: psycopg[binary]
9 | command: /usr/bin/python -mpytest bft/tests/test_postgres.py
10 | depends_on:
11 | postgres:
12 | condition: service_healthy
13 | environment:
14 | POSTGRES_HOST: postgres
15 | POSTGRES_USER: postgres
16 | POSTGRES_PASSWORD: postgres
17 | POSTGRES_DB: bft
18 |
19 | postgres:
20 | image: postgres:15-alpine
21 | environment:
22 | POSTGRES_DB: bft
23 | POSTGRES_PASSWORD: postgres
24 | healthcheck:
25 | test: ["CMD-SHELL", "pg_isready -U postgres"]
26 | interval: 5s
27 | timeout: 5s
28 | retries: 5
29 |
--------------------------------------------------------------------------------
/ci/docker/postgres-server.Dockerfile:
--------------------------------------------------------------------------------
1 | FROM postgres:15-alpine
2 | ENV POSTGRES_DB=bft
3 | ENV POSTGRES_PASSWORD=postgres
4 |
5 | ENV PYTHONUNBUFFERED=1
6 | ENV PYTHONPATH=/bft/substrait
7 | RUN apk add --update --no-cache python3 && ln -sf python3 /usr/bin/python
8 | RUN python3 -m ensurepip
9 | RUN pip3 install --no-cache --upgrade pip setuptools pytest pyyaml mistletoe psycopg[binary] ruamel.yaml antlr4-python3-runtime pytz
10 |
11 | WORKDIR /bft
12 | COPY . .
13 |
14 | CMD /usr/bin/python -mpytest bft/tests/test_postgres.py
15 |
--------------------------------------------------------------------------------
/ci/docker/sqlite.Dockerfile:
--------------------------------------------------------------------------------
1 | FROM alpine:3.18
2 |
3 | ENV PYTHONUNBUFFERED=1
4 | ENV PYTHONPATH=/bft/substrait
5 | RUN apk add --update --no-cache python3 && ln -sf python3 /usr/bin/python
6 | RUN python3 -m ensurepip
7 | RUN pip3 install --no-cache --upgrade pip setuptools pytest pyyaml mistletoe ruamel.yaml antlr4-python3-runtime pytz
8 |
9 | WORKDIR /bft
10 | COPY . .
11 |
12 | # CMD to run all commands and display the results
13 | CMD /usr/bin/python -mpytest bft/tests/test_sqlite.py
14 |
--------------------------------------------------------------------------------
/ci/docker/velox-compose.yaml:
--------------------------------------------------------------------------------
1 | services:
2 | app:
3 | image: bft/tester
4 | build:
5 | context: ../..
6 | dockerfile: ./ci/docker/velox.Dockerfile
7 | args:
8 | PIP_PACKAGES: pyvelox
9 | command: /usr/bin/python3 -mpytest bft/tests/test_pyvelox.py
10 |
--------------------------------------------------------------------------------
/ci/docker/velox.Dockerfile:
--------------------------------------------------------------------------------
1 | FROM ubuntu:22.04
2 |
3 | ENV PYTHONUNBUFFERED=1
4 | ENV PYTHONPATH=/bft/substrait
5 | RUN apt-get update && apt-get install -y \
6 | python3 \
7 | python3-pip
8 | RUN pip3 install --no-cache --upgrade pip setuptools pytest pyyaml mistletoe pyvelox ruamel.yaml antlr4-python3-runtime pytz
9 |
10 | WORKDIR /bft
11 | COPY . .
12 |
13 | CMD /usr/bin/python -mpytest bft/tests/test_pyvelox.py
14 |
--------------------------------------------------------------------------------
/index.yaml:
--------------------------------------------------------------------------------
1 | substrait:
2 | extensions:
3 | - location: ./substrait/extensions/functions_aggregate_approx.yaml
4 | canonical: https://github.com/substrait-io/substrait/blob/main/extensions/functions_aggregate_approx.yaml
5 | - location: ./substrait/extensions/functions_aggregate_generic.yaml
6 | canonical: https://github.com/substrait-io/substrait/blob/main/extensions/functions_aggregate_generic.yaml
7 | - location: ./substrait/extensions/functions_arithmetic.yaml
8 | canonical: https://github.com/substrait-io/substrait/blob/main/extensions/functions_arithmetic.yaml
9 | - location: ./substrait/extensions/functions_boolean.yaml
10 | canonical: https://github.com/substrait-io/substrait/blob/main/extensions/functions_boolean.yaml
11 | - location: ./substrait/extensions/functions_comparison.yaml
12 | canonical: https://github.com/substrait-io/substrait/blob/main/extensions/functions_comparison.yaml
13 | - location: ./substrait/extensions/functions_datetime.yaml
14 | canonical: https://github.com/substrait-io/substrait/blob/main/extensions/functions_datetime.yaml
15 | - location: ./substrait/extensions/functions_logarithmic.yaml
16 | canonical: https://github.com/substrait-io/substrait/blob/main/extensions/functions_logarithmic.yaml
17 | - location: ./substrait/extensions/functions_rounding.yaml
18 | canonical: https://github.com/substrait-io/substrait/blob/main/extensions/functions_rounding.yaml
19 | - location: ./substrait/extensions/functions_string.yaml
20 | canonical: https://github.com/substrait-io/substrait/blob/main/extensions/functions_string.yaml
21 | cases:
22 | - ./cases
23 | dialects:
24 | - ./dialects
25 | supplements:
26 | - ./supplemental
27 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | datafusion
2 | duckdb
3 | jinja2
4 | mistletoe
5 | pytest
6 | pyvelox
7 | pyyaml
8 | snowflake
9 | ruamel.yaml
10 | deepdiff
11 | pytz
12 |
--------------------------------------------------------------------------------
/static_site/android-chrome-192x192.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/substrait-io/bft/34b7a6e19d536e42082169eae0d049ce94f86b19/static_site/android-chrome-192x192.png
--------------------------------------------------------------------------------
/static_site/android-chrome-512x512.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/substrait-io/bft/34b7a6e19d536e42082169eae0d049ce94f86b19/static_site/android-chrome-512x512.png
--------------------------------------------------------------------------------
/static_site/apple-touch-icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/substrait-io/bft/34b7a6e19d536e42082169eae0d049ce94f86b19/static_site/apple-touch-icon.png
--------------------------------------------------------------------------------
/static_site/assets/index/script.js:
--------------------------------------------------------------------------------
1 | function showContent(sectionId) {
2 | // Hide all sections
3 | var sections = document.querySelectorAll('.content-container article');
4 | sections.forEach(function (section) {
5 | section.style.display = 'none';
6 | });
7 |
8 | // Show the selected section
9 | var selectedSection = document.getElementById(sectionId);
10 | if (selectedSection) {
11 | selectedSection.style.display = 'block';
12 | }
13 |
14 | // Hide the search results section
15 | var searchResultsSection = document.getElementById('searchResultsSection');
16 | if (searchResultsSection) {
17 | searchResultsSection.style.display = 'none';
18 | }
19 | }
20 |
21 |
22 | function searchFunctions() {
23 | try {
24 | const searchTerm = document.getElementById("searchInput").value.toLowerCase();
25 | let searchResults = [];
26 |
27 | for (let i = 0; i < functionsData.length; i++) {
28 | const functionName = functionsData[i].name.toLowerCase().replace(/^.*?_/, '');
29 | const functionBrief = JSON.parse('"' + functionsData[i].brief + '"').toLowerCase();
30 |
31 | /* Perform search based on priority
32 | Highest priority (Rank 3) - Exact name match
33 | Rank 2 - Partial name match
34 | Rank 1 - Exact match in brief contents
35 | Rank 0 - Partial match in brief contents
36 |
37 | */
38 | const exactNameMatch = functionName === searchTerm;
39 | const partialNameMatch = !exactNameMatch && functionName.includes(searchTerm);
40 | const exactBriefMatch = ` ${functionBrief} `.includes(` ${searchTerm} `);
41 | const partialBriefMatch = !exactBriefMatch && functionBrief.includes(searchTerm);
42 |
43 | if (exactNameMatch || partialNameMatch || exactBriefMatch || partialBriefMatch) {
44 |
45 | let rank = 0;
46 | if (exactNameMatch) rank = 3;
47 | else if (partialNameMatch) rank = 2;
48 | else if (exactBriefMatch) rank = 1;
49 |
50 | searchResults.push({
51 | category: functionsData[i].category,
52 | name: functionsData[i].name,
53 | brief: functionsData[i].brief,
54 | rank: rank
55 | });
56 | }
57 | }
58 |
59 | // Sort search results by rank in descending order
60 | searchResults.sort((a, b) => b.rank - a.rank);
61 |
62 | displaySearchResults(searchResults);
63 | } catch (error) {
64 | console.error("Error while searching functions:", error);
65 | }
66 | }
67 |
68 |
69 |
70 | function displaySearchResults(results) {
71 | try {
72 | let homeSection = document.getElementById("home");
73 | let searchResultsSection = document.getElementById("searchResultsSection");
74 |
75 | // Hide home section and show search results section
76 | homeSection.style.display = "none";
77 | searchResultsSection.style.display = "block";
78 |
79 | let categoryTitleElement = document.createElement("h2");
80 | categoryTitleElement.className = "category-title";
81 | categoryTitleElement.innerHTML = "Search results";
82 |
83 | searchResultsSection.innerHTML = "";
84 | searchResultsSection.appendChild(categoryTitleElement);
85 |
86 | // Display search results in the section
87 | for (var i = 0; i < results.length; i++) {
88 | var cardLink = document.createElement("a");
89 | cardLink.className = "card mb-3 search-result-card";
90 | cardLink.href = "./" + results[i].name.toLowerCase() + ".html";
91 | cardLink.onclick = function () {
92 | window.location.href = cardLink.href;
93 | };
94 |
95 | var cardBody = document.createElement("div");
96 | cardBody.className = "card-body";
97 |
98 | var cardTitle = document.createElement("h5");
99 | cardTitle.className = "card-title search-result-title";
100 | cardTitle.innerHTML = results[i].category + " Functions";
101 |
102 | var cardText = document.createElement("p");
103 | cardText.className = "card-text search-result-text";
104 | cardText.innerHTML = "" + results[i].name.replace(/^.*?_/, '') + " : " + results[i].brief;
105 |
106 | cardBody.appendChild(cardTitle);
107 | cardBody.appendChild(cardText);
108 | cardLink.appendChild(cardBody);
109 |
110 | searchResultsSection.appendChild(cardLink);
111 | }
112 | } catch (error) {
113 | console.error("Error in displaying search results:", error);
114 | }
115 | }
116 |
--------------------------------------------------------------------------------
/static_site/assets/index/style.css:
--------------------------------------------------------------------------------
1 | body {
2 | font-family: 'Courier New', Courier, monospace;
3 | color: #005050;
4 | background-color: #f8f9fa;
5 | }
6 |
7 | .menu-container {
8 | position: fixed;
9 | top: 3%;
10 | left: 0%;
11 | height: 100vh;
12 | padding-left: 1%;
13 | padding-top: 2%;
14 | background-color: #005050;
15 | color: #ffffff;
16 | border-right: 1px solid #dee2e6;
17 | overflow-y: auto;
18 | z-index: 1000;
19 | width: max-content;
20 | }
21 |
22 | .menu-item {
23 | display: block;
24 | padding: 8px 16px;
25 | margin-bottom: 10px;
26 | cursor: pointer;
27 | transition: background-color 0.3s;
28 | border: 1px solid transparent;
29 | color: #ffffff;
30 | }
31 |
32 | .menu-item:hover {
33 | background-color: #007e7e;
34 | border: 1px solid #dee2e6;
35 | color: #ffffff;
36 | }
37 |
38 | .content-container {
39 | margin-left: 18%;
40 | margin-top: 11%;
41 | background-color: #f8f9fa;
42 | }
43 |
44 | .fixed-container {
45 | position: fixed;
46 | background-color: #f8f9fa;
47 | z-index: 1000;
48 | width: 72%;
49 | margin-top: -7%;
50 | padding-top: 2%;
51 | margin-bottom: 0%;
52 | }
53 |
54 | .hidden {
55 | display: none;
56 | }
57 |
58 | .category-title {
59 | border-bottom: 1px solid #dee2e6;
60 | padding-bottom: 10px;
61 | margin-bottom: 20px;
62 | }
63 |
64 | .nav-link {
65 | color: #2a2a2a;
66 | }
67 |
68 | .nav-link:hover {
69 | color: #007bff;
70 | }
71 |
72 | .navbar {
73 | position: fixed;
74 | z-index: 2000;
75 | width: 100%;
76 | }
77 |
78 | .card {
79 | transition: transform 0.2s;
80 | }
81 |
82 | .card:hover {
83 | transform: scale(1.05);
84 | }
85 |
86 |
87 | .functions-table {
88 | border-collapse: collapse;
89 | width: 100%;
90 | }
91 |
92 | .functions-table td {
93 | padding: 0.5rem;
94 | }
95 |
96 | .functions-table .title-column {
97 | width: 30%;
98 | color: #201D38;
99 | font-weight: bold;
100 | }
101 |
102 | .functions-table tr:hover {
103 | background-color: #005050;
104 | color: #ffffff;
105 | }
106 |
107 | .functions-table tr:hover .title-column {
108 | color: #ffffff;
109 | }
110 |
111 | .search-result-title {
112 | color: #005050;
113 | text-decoration: none;
114 | }
115 |
116 | .search-result-text {
117 | color: black;
118 | }
119 |
--------------------------------------------------------------------------------
/static_site/assets/supplementary/script.js:
--------------------------------------------------------------------------------
1 | (function () {
2 |
3 | var dialectInfo = {};
4 |
5 | function updateDialect(newValue) {
6 | const desiredDialect = `dialect-${newValue}`;
7 | const caseInfo = dialectInfo[newValue].examples;
8 | const dialectDefinitions = document.querySelectorAll('.dialect-definition');
9 | for (var dialectDefinition of dialectDefinitions) {
10 | if (dialectDefinition.id == desiredDialect) {
11 | dialectDefinition.removeAttribute('hidden');
12 | } else {
13 | dialectDefinition.setAttribute('hidden', '');
14 | }
15 | }
16 |
17 | const cases = document.querySelectorAll('.bft-case');
18 | const errMessages = document.querySelectorAll('.bft-case-err-message');
19 |
20 | for (let i = 0; i < caseInfo.length; i++) {
21 | const case_msg = caseInfo[i];
22 | if (case_msg == null) {
23 | cases[i].classList.remove("bft-error-case");
24 | errMessages[i].setAttribute("hidden", "");
25 | } else {
26 | cases[i].classList.add("bft-error-case");
27 | errMessages[i].removeAttribute("hidden");
28 | errMessages[i].querySelector("td").innerText = case_msg;
29 | }
30 | }
31 |
32 | const kernelInfo = dialectInfo[newValue].kernels;
33 | const kernelItems = document.querySelectorAll('.bft-kernel');
34 | for (let i = 0; i < kernelInfo.length; i++) {
35 | const kernelSpans = kernelItems[i].querySelectorAll('span');
36 | if (kernelInfo[i]) {
37 | kernelSpans[0].classList.remove('bft-unsupported-kernel');
38 | kernelSpans[1].setAttribute('hidden', '');
39 | } else {
40 | kernelSpans[0].classList.add('bft-unsupported-kernel');
41 | kernelSpans[1].removeAttribute('hidden');
42 | }
43 | }
44 | }
45 |
46 | window.bftInitialize = function (functionDialectInfo) {
47 | dialectInfo = functionDialectInfo;
48 | const dialectSelect = document.getElementById('dialect');
49 | updateDialect(dialectSelect.value);
50 | dialectSelect.addEventListener('change', (e) => {
51 | updateDialect(e.target.value);
52 | });
53 | }
54 |
55 | })();
56 |
--------------------------------------------------------------------------------
/static_site/assets/supplementary/style.css:
--------------------------------------------------------------------------------
1 | .tooltip {
2 | position: absolute;
3 | z-index: 99;
4 | padding: 5px;
5 | background: #222;
6 | color: #fff;
7 | border-radius: 5px;
8 | }
9 |
10 | tbody {
11 | position: relative;
12 | }
13 |
14 | .bft-error-case td {
15 | background-color: #FFCDD2;
16 | background-clip: padding-box;
17 | }
18 |
19 | .bft-case-err-message {
20 | font-weight: lighter !important;
21 | font-style: italic;
22 | }
23 |
24 | /* We don't use row headers */
25 | table tbody td:first-child {
26 | font-weight: initial;
27 | }
28 |
29 | .bft-unsupported-kernel {
30 | text-decoration: line-through;
31 | }
32 |
33 | a.disabled {
34 | cursor: initial;
35 | color: var(--secondary-color);
36 | text-decoration: none;
37 | }
38 |
39 | a.disabled:hover {
40 | background: initial;
41 | }
42 |
--------------------------------------------------------------------------------
/static_site/favicon-16x16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/substrait-io/bft/34b7a6e19d536e42082169eae0d049ce94f86b19/static_site/favicon-16x16.png
--------------------------------------------------------------------------------
/static_site/favicon-32x32.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/substrait-io/bft/34b7a6e19d536e42082169eae0d049ce94f86b19/static_site/favicon-32x32.png
--------------------------------------------------------------------------------
/static_site/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/substrait-io/bft/34b7a6e19d536e42082169eae0d049ce94f86b19/static_site/favicon.ico
--------------------------------------------------------------------------------
/supplemental/arithmetic/abs.md:
--------------------------------------------------------------------------------
1 | # Abs
2 |
3 | ## Options
4 |
5 | ### Overflow
6 |
7 | Computing the absolute of integer values may result in overflow due to unevenness of two's complement.
8 | This option helps control the behavior of the function when the input goes out of permissible range
9 | of the type class.
10 |
11 | #### SILENT
12 |
13 | /[%Overflow$SILENT%]
14 |
15 | #### SATURATE
16 |
17 | /[%Overflow$SATURATE%]
18 |
19 | #### ERROR
20 |
21 | /[%Overflow$ERROR%]
22 |
23 | ## Details
24 |
25 | ### Non multiplicative
26 |
27 | Although the mathematical operation for Absolute value is multiplicative, but the function is not
28 | due to overflow. For example, for int8, abs(-1 * -128) will not be the same as
29 | abs(-1) * abs(-128), since the former will cause an overflow.
30 |
31 | ### Triangular Inequality
32 |
33 | Mathematically, the absolute operation has the triangular inequality, i.e. for two real numbers,
34 | x & y, abs(x+y) <= abs(x) + abs(y). This might not hold true for the abs function due to overflow.
35 | For example, for int8, abs(-127 + 1) will not be the same as abs(-127) + abs(1), since the
36 | latter will overflow.
37 |
38 | ## Properties
39 |
40 | ### Null propagating
41 |
42 | /[%Properties$Null_propagating%]
43 |
44 | ### NaN propagating
45 |
46 | /[%Properties$NaN_propagating%]
47 |
48 | ### Stateless
49 |
50 | /[%Properties$Stateless%]
51 |
--------------------------------------------------------------------------------
/supplemental/arithmetic/acos.md:
--------------------------------------------------------------------------------
1 | # Acos
2 |
3 | ## Options
4 |
5 | ### Rounding
6 |
7 | Arccosine of an input can yield a result that is not exactly
8 | representable in the given type class. In this case the value will be rounded.
9 | Rounding behaviors are defined as part of the IEEE 754 standard.
10 |
11 | #### TIE_TO_EVEN
12 |
13 | /[%Rounding$TIE_TO_EVEN%]
14 |
15 | #### TIE_AWAY_FROM_ZERO
16 |
17 | /[%Rounding$TIE_AWAY_FROM_ZERO%]
18 |
19 | #### TRUNCATE
20 |
21 | /[%Rounding$TRUNCATE%]
22 |
23 | #### CEILING
24 |
25 | /[%Rounding$CEILING%]
26 |
27 | #### FLOOR
28 |
29 | /[%Rounding$FLOOR%]
30 |
31 | ### On_domain_error
32 |
33 | Arccosine function has a domain of [-1,1], i.e. values of only this range are allowed. This option controls the behavior when the function is called with values outside of this range.
34 |
35 | #### NAN
36 |
37 | /[%On_domain_error$NAN%]
38 |
39 | #### ERROR
40 |
41 | /[%On_domain_error$ERROR%]
42 |
43 | ## Details
44 |
45 | ### Other floating point exceptions
46 |
47 | The IEEE 754 standard defines a number of exceptions beyond rounding. For
48 | example, division by zero, overflow, and underflow. However, these exceptions
49 | have default behaviors defined by IEEE 754 and, since no known engine deviates
50 | from these default values, these exceptions are not exposed as options. For more
51 | information on what happens in these cases refer to the IEEE 754 standard.
52 |
53 | ### Numerical Precision
54 |
55 | The precision of the acos function depends on the architecture in various dialects.
56 |
57 | ### Output Range
58 |
59 | The arccosine function has an output range of [0, pi], and it results to 0
60 | at 1.
61 |
62 | ## Properties
63 |
64 | ### Null propagating
65 |
66 | /[%Properties$Null_propagating%]
67 |
68 | ### NaN propagating
69 |
70 | /[%Properties$NaN_propagating%]
71 |
72 | ### Stateless
73 |
74 | /[%Properties$Stateless%]
75 |
--------------------------------------------------------------------------------
/supplemental/arithmetic/acosh.md:
--------------------------------------------------------------------------------
1 | # Acosh
2 |
3 | ## Options
4 |
5 | ### Rounding
6 |
7 | Hyperbolic arccosine of an input can yield a result that is not exactly
8 | representable in the given type class. In this case the value will be rounded.
9 | Rounding behaviors are defined as part of the IEEE 754 standard.
10 |
11 | #### TIE_TO_EVEN
12 |
13 | /[%Rounding$TIE_TO_EVEN%]
14 |
15 | #### TIE_AWAY_FROM_ZERO
16 |
17 | /[%Rounding$TIE_AWAY_FROM_ZERO%]
18 |
19 | #### TRUNCATE
20 |
21 | /[%Rounding$TRUNCATE%]
22 |
23 | #### CEILING
24 |
25 | /[%Rounding$CEILING%]
26 |
27 | #### FLOOR
28 |
29 | /[%Rounding$FLOOR%]
30 |
31 | ### On_domain_error
32 |
33 | Hyperbolic arccosine function has a domain of [1, Infinity], i.e. input should be greater than one. This option controls the behavior when the function is called with values outside of this range.
34 |
35 | #### NAN
36 |
37 | /[%On_domain_error$NAN%]
38 |
39 | #### ERROR
40 |
41 | /[%On_domain_error$ERROR%]
42 |
43 | ## Details
44 |
45 | ### Other floating point exceptions
46 |
47 | The IEEE 754 standard defines a number of exceptions beyond rounding. For
48 | example, division by zero, overflow, and underflow. However, these exceptions
49 | have default behaviors defined by IEEE 754 and, since no known engine deviates
50 | from these default values, these exceptions are not exposed as options. For more
51 | information on what happens in these cases refer to the IEEE 754 standard.
52 |
53 | ### Numerical Precision
54 |
55 | The precision of the acosh function depends on the architecture in various dialects.
56 |
57 | ### Output Range
58 |
59 | The acosh function has an output range of [0, Infinity], and it results to 0
60 | at 1.
61 |
62 | ## Properties
63 |
64 | ### Null propagating
65 |
66 | /[%Properties$Null_propagating%]
67 |
68 | ### NaN propagating
69 |
70 | /[%Properties$NaN_propagating%]
71 |
72 | ### Stateless
73 |
74 | /[%Properties$Stateless%]
75 |
--------------------------------------------------------------------------------
/supplemental/arithmetic/add.md:
--------------------------------------------------------------------------------
1 | # Add
2 |
3 | ## Options
4 |
5 | ### Overflow
6 |
7 | Adding two integers can trigger an overflow when the result is outside the
8 | representable range of the type class. This option controls what happens when
9 | this overflow occurs.
10 |
11 | #### SILENT
12 |
13 | /[%Overflow$SILENT%] For e.g. adding two int16 cannot
14 | yield an int32 on overflow.
15 |
16 | #### SATURATE
17 |
18 | /[%Overflow$SATURATE%]
19 |
20 | #### ERROR
21 |
22 | /[%Overflow$ERROR%]
23 |
24 | ### Rounding
25 |
26 | Adding two floating point numbers can yield a result that is not exactly
27 | representable in the given type class. In this case the value will be rounded.
28 | Rounding behaviors are defined as part of the IEEE 754 standard.
29 |
30 | #### TIE_TO_EVEN
31 |
32 | /[%Rounding$TIE_TO_EVEN%]
33 |
34 | #### TIE_AWAY_FROM_ZERO
35 |
36 | /[%Rounding$TIE_AWAY_FROM_ZERO%]
37 |
38 | #### TRUNCATE
39 |
40 | /[%Rounding$TRUNCATE%]
41 |
42 | #### CEILING
43 |
44 | /[%Rounding$CEILING%]
45 |
46 | #### FLOOR
47 |
48 | /[%Rounding$FLOOR%]
49 |
50 | ## Details
51 |
52 | ### Other floating point exceptions
53 |
54 | The IEEE 754 standard defines a number of exceptions beyond rounding. For
55 | example, division by zero, overflow, and underflow. However, these exceptions
56 | have default behaviors defined by IEEE 754 and, since no known engine deviates
57 | from these default values, these exceptions are not exposed as options. For more
58 | information on what happens in these cases refer to the IEEE 754 standard.
59 |
60 | ### Not commutative
61 |
62 | Addition, the algebraic operation, is commutative. So it may be tempting to
63 | believe the add function is commutative as well. However, this is not true because
64 | of overflow. For example, when working with int8 the result of
65 | add(add(120, 10), -5) will yield a different result than add(add(120, -5), 10)
66 | because the first will overflow and the second will not.
67 |
68 | ## Properties
69 |
70 | ### Null propagating
71 |
72 | /[%Properties$Null_propagating%]
73 |
74 | ### NaN propagating
75 |
76 | /[%Properties$NaN_propagating%]
77 |
78 | ### Stateless
79 |
80 | /[%Properties$Stateless%] This is not guaranteed to be true for integer addition when overflow is SILENT.
81 |
--------------------------------------------------------------------------------
/supplemental/arithmetic/asin.md:
--------------------------------------------------------------------------------
1 | # Asin
2 |
3 | ## Options
4 |
5 | ### Rounding
6 |
7 | Arcsine of an input can yield a result that is not exactly
8 | representable in the given type class. In this case the value will be rounded.
9 | Rounding behaviors are defined as part of the IEEE 754 standard.
10 |
11 | #### TIE_TO_EVEN
12 |
13 | /[%Rounding$TIE_TO_EVEN%]
14 |
15 | #### TIE_AWAY_FROM_ZERO
16 |
17 | /[%Rounding$TIE_AWAY_FROM_ZERO%]
18 |
19 | #### TRUNCATE
20 |
21 | /[%Rounding$TRUNCATE%]
22 |
23 | #### CEILING
24 |
25 | /[%Rounding$CEILING%]
26 |
27 | #### FLOOR
28 |
29 | /[%Rounding$FLOOR%]
30 |
31 | ### On_domain_error
32 |
33 | Arcsine function has a domain of [-1,1], i.e. values of only this range are allowed. This option controls the behavior when the function is called with values outside of this range.
34 |
35 | #### NAN
36 |
37 | /[%On_domain_error$NAN%]
38 |
39 | #### ERROR
40 |
41 | /[%On_domain_error$ERROR%]
42 |
43 | ## Details
44 |
45 | ### Other floating point exceptions
46 |
47 | The IEEE 754 standard defines a number of exceptions beyond rounding. For
48 | example, division by zero, overflow, and underflow. However, these exceptions
49 | have default behaviors defined by IEEE 754 and, since no known engine deviates
50 | from these default values, these exceptions are not exposed as options. For more
51 | information on what happens in these cases refer to the IEEE 754 standard.
52 |
53 | ### Numerical Precision
54 |
55 | The precision of the asin function depends on the architecture in various dialects.
56 |
57 | ### Output Range
58 |
59 | The arcsine function has an output range of [-pi/2, pi/2], where it results to 0
60 | at 0.
61 |
62 | ## Properties
63 |
64 | ### Null propagating
65 |
66 | /[%Properties$Null_propagating%]
67 |
68 | ### NaN propagating
69 |
70 | /[%Properties$NaN_propagating%]
71 |
72 | ### Stateless
73 |
74 | /[%Properties$Stateless%]
75 |
--------------------------------------------------------------------------------
/supplemental/arithmetic/asinh.md:
--------------------------------------------------------------------------------
1 | # Asinh
2 |
3 | ## Options
4 |
5 | ### Rounding
6 |
7 | Hyperbolic arcsine of an input can yield a result that is not exactly
8 | representable in the given type class. In this case the value will be rounded.
9 | Rounding behaviors are defined as part of the IEEE 754 standard.
10 |
11 | #### TIE_TO_EVEN
12 |
13 | /[%Rounding$TIE_TO_EVEN%]
14 |
15 | #### TIE_AWAY_FROM_ZERO
16 |
17 | /[%Rounding$TIE_AWAY_FROM_ZERO%]
18 |
19 | #### TRUNCATE
20 |
21 | /[%Rounding$TRUNCATE%]
22 |
23 | #### CEILING
24 |
25 | /[%Rounding$CEILING%]
26 |
27 | #### FLOOR
28 |
29 | /[%Rounding$FLOOR%]
30 |
31 | ## Details
32 |
33 | ### Other floating point exceptions
34 |
35 | The IEEE 754 standard defines a number of exceptions beyond rounding. For
36 | example, division by zero, overflow, and underflow. However, these exceptions
37 | have default behaviors defined by IEEE 754 and, since no known engine deviates
38 | from these default values, these exceptions are not exposed as options. For more
39 | information on what happens in these cases refer to the IEEE 754 standard.
40 |
41 | ### Numerical Precision
42 |
43 | The precision of the asinh function depends on the architecture in various dialects.
44 |
45 | ### Output Range
46 |
47 | The asinh function has an output range of all Real numbers, and it results to 0
48 | at 0.
49 |
50 | ## Properties
51 |
52 | ### Null propagating
53 |
54 | /[%Properties$Null_propagating%]
55 |
56 | ### NaN propagating
57 |
58 | /[%Properties$NaN_propagating%]
59 |
60 | ### Stateless
61 |
62 | /[%Properties$Stateless%]
63 |
--------------------------------------------------------------------------------
/supplemental/arithmetic/atan.md:
--------------------------------------------------------------------------------
1 | # Atan
2 |
3 | ## Options
4 |
5 | ### Rounding
6 |
7 | Arctangent of an input can yield a result that is not exactly
8 | representable in the given type class. In this case the value will be rounded.
9 | Rounding behaviors are defined as part of the IEEE 754 standard.
10 |
11 | #### TIE_TO_EVEN
12 |
13 | /[%Rounding$TIE_TO_EVEN%]
14 |
15 | #### TIE_AWAY_FROM_ZERO
16 |
17 | /[%Rounding$TIE_AWAY_FROM_ZERO%]
18 |
19 | #### TRUNCATE
20 |
21 | /[%Rounding$TRUNCATE%]
22 |
23 | #### CEILING
24 |
25 | /[%Rounding$CEILING%]
26 |
27 | #### FLOOR
28 |
29 | /[%Rounding$FLOOR%]
30 |
31 | ## Details
32 |
33 | ### Other floating point exceptions
34 |
35 | The IEEE 754 standard defines a number of exceptions beyond rounding. For
36 | example, division by zero, overflow, and underflow. However, these exceptions
37 | have default behaviors defined by IEEE 754 and, since no known engine deviates
38 | from these default values, these exceptions are not exposed as options. For more
39 | information on what happens in these cases refer to the IEEE 754 standard.
40 |
41 | ### Numerical Precision
42 |
43 | The precision of the atan function depends on the architecture in various dialects.
44 |
45 | ### Output Range
46 |
47 | The arctangent function has an output range of [-pi/2, pi/2], and it results to 0
48 | at 0.
49 |
50 | ## Properties
51 |
52 | ### Null propagating
53 |
54 | /[%Properties$Null_propagating%]
55 |
56 | ### NaN propagating
57 |
58 | /[%Properties$NaN_propagating%]
59 |
60 | ### Stateless
61 |
62 | /[%Properties$Stateless%]
63 |
--------------------------------------------------------------------------------
/supplemental/arithmetic/atan2.md:
--------------------------------------------------------------------------------
1 | # Atan2
2 |
3 | ## Options
4 |
5 | ### Rounding
6 |
7 | Arctangent of an input can yield a result that is not exactly
8 | representable in the given type class. In this case the value will be rounded.
9 | Rounding behaviors are defined as part of the IEEE 754 standard.
10 |
11 | #### TIE_TO_EVEN
12 |
13 | /[%Rounding$TIE_TO_EVEN%]
14 |
15 | #### TIE_AWAY_FROM_ZERO
16 |
17 | /[%Rounding$TIE_AWAY_FROM_ZERO%]
18 |
19 | #### TRUNCATE
20 |
21 | /[%Rounding$TRUNCATE%]
22 |
23 | #### CEILING
24 |
25 | /[%Rounding$CEILING%]
26 |
27 | #### FLOOR
28 |
29 | /[%Rounding$FLOOR%]
30 |
31 | ### On_domain_error
32 |
33 | Mathematically, atan2 function has a domain of [-Infinity, Infinity], i.e. values of only this range are allowed. This option controls the behavior when the function is called with values outside of this range.
34 |
35 | #### NAN
36 |
37 | /[%On_domain_error$NAN%]
38 |
39 | #### ERROR
40 |
41 | /[%On_domain_error$ERROR%]
42 |
43 | ## Details
44 |
45 | ### Other floating point exceptions
46 |
47 | The IEEE 754 standard defines a number of exceptions beyond rounding. For
48 | example, division by zero, overflow, and underflow. However, these exceptions
49 | have default behaviors defined by IEEE 754 and, since no known engine deviates
50 | from these default values, these exceptions are not exposed as options. For more
51 | information on what happens in these cases refer to the IEEE 754 standard.
52 |
53 | ### Numerical Precision
54 |
55 | The precision of the atan2 function depends on the architecture in various dialects.
56 |
57 | ### Output Range
58 |
59 | The atan2 function has an output range of [-Infinty, Infinty].
60 |
61 | ## Properties
62 |
63 | ### Null propagating
64 |
65 | /[%Properties$Null_propagating%]
66 |
67 | ### NaN propagating
68 |
69 | /[%Properties$NaN_propagating%]
70 |
71 | ### Stateless
72 |
73 | /[%Properties$Stateless%]
74 |
--------------------------------------------------------------------------------
/supplemental/arithmetic/atanh.md:
--------------------------------------------------------------------------------
1 | # Atanh
2 |
3 | ## Options
4 |
5 | ### Rounding
6 |
7 | Hyperbolic arctangent of an input can yield a result that is not exactly
8 | representable in the given type class. In this case the value will be rounded.
9 | Rounding behaviors are defined as part of the IEEE 754 standard.
10 |
11 | #### TIE_TO_EVEN
12 |
13 | /[%Rounding$TIE_TO_EVEN%]
14 |
15 | #### TIE_AWAY_FROM_ZERO
16 |
17 | /[%Rounding$TIE_AWAY_FROM_ZERO%]
18 |
19 | #### TRUNCATE
20 |
21 | /[%Rounding$TRUNCATE%]
22 |
23 | #### CEILING
24 |
25 | /[%Rounding$CEILING%]
26 |
27 | #### FLOOR
28 |
29 | /[%Rounding$FLOOR%]
30 |
31 | ### On_domain_error
32 |
33 | Hyperbolic arcctangent function has a domain of [-1, 1]. This option controls the behavior when the function is called with values outside of this range.
34 |
35 | #### NAN
36 |
37 | /[%On_domain_error$NAN%]
38 |
39 | #### ERROR
40 |
41 | /[%On_domain_error$ERROR%]
42 |
43 | ## Details
44 |
45 | ### Other floating point exceptions
46 |
47 | The IEEE 754 standard defines a number of exceptions beyond rounding. For
48 | example, division by zero, overflow, and underflow. However, these exceptions
49 | have default behaviors defined by IEEE 754 and, since no known engine deviates
50 | from these default values, these exceptions are not exposed as options. For more
51 | information on what happens in these cases refer to the IEEE 754 standard.
52 |
53 | ### Numerical Precision
54 |
55 | The precision of the atanh function depends on the architecture in various dialects.
56 |
57 | ### Output Range
58 |
59 | The atanh function has an output range of all real numbers, and it results to 0
60 | at 0.
61 |
62 | ## Properties
63 |
64 | ### Null propagating
65 |
66 | /[%Properties$Null_propagating%]
67 |
68 | ### NaN propagating
69 |
70 | /[%Properties$NaN_propagating%]
71 |
72 | ### Stateless
73 |
74 | /[%Properties$Stateless%]
75 |
--------------------------------------------------------------------------------
/supplemental/arithmetic/bitwise_and.md:
--------------------------------------------------------------------------------
1 | # Bitwise_and
2 |
3 | ## Details
4 |
5 | ### Associative
6 |
7 | The bitwise_and function is associative, i.e.
8 | the grouping of operands does not affect the result. For example,
9 | bitwise_and(bitwise_and(a,b), c) will be same as bitwise_and(a, bitwise_and(b,c)).
10 |
11 | ### Commutative
12 |
13 | The order of operands does not affect the result in Bitwise_and. For example,
14 | bitwise_and(a,b) will be the same as bitwise_and(b,a).
15 |
16 | ### Identity
17 |
18 | For any valid integer, the bitwise_and with the bit pattern of all ones will result
19 | to itself. For example, bitwise_and(123, 111) = 123
20 |
21 | ### Bitwise Not Relationship
22 |
23 | The result of performing a bitwise_and operation between a value
24 | x and its bitwise_not is always 0.
25 |
26 | ## Properties
27 |
28 | ### Null propagating
29 |
30 | /[%Properties$Null_propagating%]
31 |
32 | ### NaN propagating
33 |
34 | /[%Properties$NaN_propagating%]
35 |
36 | ### Stateless
37 |
38 | /[%Properties$Stateless%]
39 |
--------------------------------------------------------------------------------
/supplemental/arithmetic/bitwise_not.md:
--------------------------------------------------------------------------------
1 | # Bitwise_not
2 |
3 | ## Details
4 |
5 | ### Complementary
6 |
7 | The bitwise not function behaves complimentary with itself, i.e.
8 | bitwise_not(bitwise_not(x)) will be equal to x, for any integer.
9 |
10 | ### XOR Relationship
11 |
12 | Bitwise_not has a relationship with the XOR function, where the XORing of
13 | a valid integer with the bit pattern of all 1s results in the bitwise_not of
14 | that integer.
15 |
16 | ### Two's complement
17 |
18 | The bitwise_not of a valid integer is equivalent to negating the number and subtracting 1.
19 |
20 | ## Properties
21 |
22 | ### Null propagating
23 |
24 | /[%Properties$Null_propagating%]
25 |
26 | ### NaN propagating
27 |
28 | /[%Properties$NaN_propagating%]
29 |
30 | ### Stateless
31 |
32 | /[%Properties$Stateless%]
33 |
--------------------------------------------------------------------------------
/supplemental/arithmetic/bitwise_or.md:
--------------------------------------------------------------------------------
1 | # Bitwise_or
2 |
3 | ## Details
4 |
5 | ### Associative
6 |
7 | The bitwise_or function is associative, i.e.
8 | the grouping of operands does not affect the result. For example,
9 | bitwise_or(bitwise_or(a,b), c) will be same as bitwise_or(a, bitwise_or(b,c)).
10 |
11 | ### Commutative
12 |
13 | The order of operands does not affect the result in Bitwise_or. For example,
14 | bitwise_or(a,b) will be the same as bitwise_or(b,a).
15 |
16 | ### Identity
17 |
18 | For any valid integer, the bitwise_or with zero will result
19 | to itself. For example, bitwise_or(123, 000) = 123
20 |
21 | ## Properties
22 |
23 | ### Null propagating
24 |
25 | /[%Properties$Null_propagating%]
26 |
27 | ### NaN propagating
28 |
29 | /[%Properties$NaN_propagating%]
30 |
31 | ### Stateless
32 |
33 | /[%Properties$Stateless%]
34 |
--------------------------------------------------------------------------------
/supplemental/arithmetic/bitwise_xor.md:
--------------------------------------------------------------------------------
1 | # Bitwise_or
2 |
3 | ## Details
4 |
5 | ### Associative
6 |
7 | The bitwise_xor function is associative, i.e.
8 | the grouping of operands does not affect the result. For example,
9 | bitwise_xor(bitwise_xor(a,b), c) will be same as bitwise_xor(a, bitwise_xor(b,c)).
10 |
11 | ### Commutative
12 |
13 | The order of operands does not affect the result in Bitwise_xor. For example,
14 | bitwise_xor(a,b) will be the same as bitwise_xor(b,a).
15 |
16 | ## Properties
17 |
18 | ### Null propagating
19 |
20 | /[%Properties$Null_propagating%]
21 |
22 | ### NaN propagating
23 |
24 | /[%Properties$NaN_propagating%]
25 |
26 | ### Stateless
27 |
28 | /[%Properties$Stateless%]
29 |
--------------------------------------------------------------------------------
/supplemental/arithmetic/cos.md:
--------------------------------------------------------------------------------
1 | # Cos
2 |
3 | ## Options
4 |
5 | ### Rounding
6 |
7 | Cosine of an input can yield a result that is not exactly
8 | representable in the given type class. In this case the value will be rounded.
9 | Rounding behaviors are defined as part of the IEEE 754 standard.
10 |
11 | #### TIE_TO_EVEN
12 |
13 | /[%Rounding$TIE_TO_EVEN%]
14 |
15 | #### TIE_AWAY_FROM_ZERO
16 |
17 | /[%Rounding$TIE_AWAY_FROM_ZERO%]
18 |
19 | #### TRUNCATE
20 |
21 | /[%Rounding$TRUNCATE%]
22 |
23 | #### CEILING
24 |
25 | /[%Rounding$CEILING%]
26 |
27 | #### FLOOR
28 |
29 | /[%Rounding$FLOOR%]
30 |
31 | ## Details
32 |
33 | ### Other floating point exceptions
34 |
35 | The IEEE 754 standard defines a number of exceptions beyond rounding. For
36 | example, division by zero, overflow, and underflow. However, these exceptions
37 | have default behaviors defined by IEEE 754 and, since no known engine deviates
38 | from these default values, these exceptions are not exposed as options. For more
39 | information on what happens in these cases refer to the IEEE 754 standard.
40 |
41 | ### Numerical Precision
42 |
43 | The precision of the cosine function depends on the architecture in various dialects.
44 |
45 | ### Output Range
46 |
47 | Being a sinusoidal trigonometric function, the output of the cos function is restricted to [-1,1].
48 |
49 | ## Properties
50 |
51 | ### Null propagating
52 |
53 | /[%Properties$Null_propagating%]
54 |
55 | ### NaN propagating
56 |
57 | /[%Properties$NaN_propagating%]
58 |
59 | ### Stateless
60 |
61 | /[%Properties$Stateless%]
62 |
--------------------------------------------------------------------------------
/supplemental/arithmetic/cosh.md:
--------------------------------------------------------------------------------
1 | # Cosh
2 |
3 | ## Options
4 |
5 | ### Rounding
6 |
7 | Hyperbolic cosine of an input can yield a result that is not exactly
8 | representable in the given type class. In this case the value will be rounded.
9 | Rounding behaviors are defined as part of the IEEE 754 standard.
10 |
11 | #### TIE_TO_EVEN
12 |
13 | /[%Rounding$TIE_TO_EVEN%]
14 |
15 | #### TIE_AWAY_FROM_ZERO
16 |
17 | /[%Rounding$TIE_AWAY_FROM_ZERO%]
18 |
19 | #### TRUNCATE
20 |
21 | /[%Rounding$TRUNCATE%]
22 |
23 | #### CEILING
24 |
25 | /[%Rounding$CEILING%]
26 |
27 | #### FLOOR
28 |
29 | /[%Rounding$FLOOR%]
30 |
31 | ## Details
32 |
33 | ### Other floating point exceptions
34 |
35 | The IEEE 754 standard defines a number of exceptions beyond rounding. For
36 | example, division by zero, overflow, and underflow. However, these exceptions
37 | have default behaviors defined by IEEE 754 and, since no known engine deviates
38 | from these default values, these exceptions are not exposed as options. For more
39 | information on what happens in these cases refer to the IEEE 754 standard.
40 |
41 | ### Numerical Precision
42 |
43 | The precision of the cosh function depends on the architecture in various dialects.
44 |
45 | ### Output Range
46 |
47 | The Hyperbolic cosine function has an output range of [1, Infinity], and it results to 1
48 | at 0 radians.
49 |
50 | ## Properties
51 |
52 | ### Null propagating
53 |
54 | /[%Properties$Null_propagating%]
55 |
56 | ### NaN propagating
57 |
58 | /[%Properties$NaN_propagating%]
59 |
60 | ### Stateless
61 |
62 | /[%Properties$Stateless%]
63 |
--------------------------------------------------------------------------------
/supplemental/arithmetic/definitions.yaml:
--------------------------------------------------------------------------------
1 | Overflow:
2 | SILENT: >
3 | If an overflow occurs then an integer value will be returned. The value is
4 | undefined. It may be any integer and can change from engine to engine or
5 | even from row to row within the same query. The only constraint is that it
6 | must be a valid value for the result type class.
7 |
8 | SATURATE: >
9 | If an overflow occurs then the largest (for positive overflow) or smallest
10 | (for negative overflow) possible value for the type class will be returned.
11 |
12 | ERROR: >
13 | If an overflow occurs then an error should be raised.
14 |
15 | Rounding:
16 | TIE_TO_EVEN: >
17 | Round to the nearest value. If the number is exactly halfway between two
18 | values then round to the number whose least significant digit is even. Or,
19 | because we are working with binary digits, round to the number whose last digit
20 | is 0. This is the default behavior in many systems because it helps to avoid
21 | bias in rounding.
22 |
23 | TIE_AWAY_FROM_ZERO: >
24 | Round to the nearest value. If the number is exactly halfway between two values
25 | then round to the number furthest from zero.
26 |
27 | TRUNCATE: >
28 | Round to the nearest value. If the number is exactly halfway between two values
29 | then round to the value closest to zero.
30 |
31 | CEILING: >
32 | Round to the value closest to positive infinity.
33 |
34 | FLOOR: >
35 | Round to the value closest to negative infinity.
36 |
37 | Properties:
38 | Null_propagating: >
39 | If any of the inputs is null then the output will be null
40 |
41 | NaN_propagating: >
42 | If any of the inputs is NaN (and the other input is not null) then the output
43 | will be NaN
44 |
45 | Stateless: >
46 | The output will be the same regardless of the order of input rows.
47 |
48 | On_domain_error:
49 | NAN: >
50 | Return a Not a Number value if any or all of the input values are either 0 or ±infinity.
51 | ERROR: >
52 | If any or all of the input values are either 0 or ±infinity an error should be raised.
53 |
--------------------------------------------------------------------------------
/supplemental/arithmetic/divide.md:
--------------------------------------------------------------------------------
1 | # Divide
2 |
3 | ## Options
4 |
5 | ### Overflow
6 |
7 | Dividing two integers can trigger an overflow when the result is outside the
8 | representable range of the type class. This option controls what happens when
9 | this overflow occurs.
10 |
11 | #### SILENT
12 |
13 | If an overflow occurs then an integer value will be returned. The value is
14 | undefined. It may be any integer and can change from engine to engine or
15 | even from row to row within the same query. The only constraint is that it
16 | must be a valid value for the result type class (e.g. dividing two int16
17 | cannot yield an int32 on overflow)
18 |
19 | #### SATURATE
20 |
21 | If an overflow occurs then the largest (for positive overflow) or smallest
22 | (for negative overflow) possible value for the type class will be returned.
23 |
24 | #### ERROR
25 |
26 | If an overflow occurs then an error should be raised.
27 |
28 | ### Rounding
29 |
30 | Dividing two floating point numbers can yield a result that is not exactly
31 | representable in the given type class. In this case the value will be rounded.
32 | Rounding behaviors are defined as part of the IEEE 754 standard.
33 |
34 | #### TIE_TO_EVEN
35 |
36 | Round to the nearest value. If the number is exactly halfway between two
37 | values then round to the number whose least significant digit is even. Or,
38 | because we are working with binary digits, round to the number whose last digit
39 | is 0. This is the default behavior in many systems because it helps to avoid
40 | bias in rounding.
41 |
42 | #### TIE_AWAY_FROM_ZERO
43 |
44 | Round to the nearest value. If the number is exactly halfway between two values
45 | then round to the number furthest from zero.
46 |
47 | #### TRUNCATE
48 |
49 | Round to the nearest value. If the number is exactly halfway between two values
50 | then round to the value closest to zero.
51 |
52 | #### CEILING
53 |
54 | Round to the value closest to positive infinity.
55 |
56 | #### FLOOR
57 |
58 | Round to the value closest to negative infinity.
59 |
60 | ### On_domain_error
61 |
62 | Option controls what happens when the dividend and divisor in a divide function
63 | are either both 0 or both ±infinity.
64 |
65 | #### NAN
66 |
67 | /[%On_domain_error$NAN%]
68 |
69 | #### ERROR
70 |
71 | /[%On_domain_error$ERROR%]
72 |
73 | ### On_division_by_zero
74 |
75 | Option controls function behavior in cases when the divisor is 0 but the dividend is not zero.
76 |
77 | #### LIMIT
78 |
79 | Return +infinity or -infinity depending on the signs of the dividend and the divisor involved.
80 |
81 | ## Details
82 |
83 | ### Other floating point exceptions
84 |
85 | The IEEE 754 standard defines a number of exceptions beyond rounding. For
86 | example, overflow, and underflow. However, these exceptions
87 | have default behaviors defined by IEEE 754 and, since no known engine deviates
88 | from these default values, these exceptions are not exposed as options. For more
89 | information on what happens in these cases refer to the IEEE 754 standard.
90 |
91 | ### Not commutative
92 |
93 | Division, the algebraic operation, is commutative. So it may be tempting to
94 | believe the divide function is commutative as well. However, this is not true
95 | because of overflow. For example, when working with int8 the result of
96 | divide(divide(-128, -1), -1) will yield a different result than
97 | divide(-128, divide(-1, -1)) because the first will overflow and the second
98 | will not.
99 |
100 | ## Properties
101 |
102 | ### Null propagating
103 |
104 | If any of the inputs is null then the output will be null
105 |
106 | ### NaN propagating
107 |
108 | If any of the inputs is NaN (and the other input is not null) then the output
109 | will be NaN
110 |
111 | ### Stateless
112 |
113 | The output will be the same regardless of the order of input rows. This is not
114 | guaranteed to be true for integer division when overflow is SILENT.
115 |
--------------------------------------------------------------------------------
/supplemental/arithmetic/exponential.md:
--------------------------------------------------------------------------------
1 | # Exp
2 |
3 | ## Options
4 |
5 | ### Rounding
6 |
7 | Exponential of an input can yield a result that is not exactly
8 | representable in the given type class. In this case the value will be rounded.
9 | Rounding behaviors are defined as part of the IEEE 754 standard.
10 |
11 | #### TIE_TO_EVEN
12 |
13 | /[%Rounding$TIE_TO_EVEN%]
14 |
15 | #### TIE_AWAY_FROM_ZERO
16 |
17 | /[%Rounding$TIE_AWAY_FROM_ZERO%]
18 |
19 | #### TRUNCATE
20 |
21 | /[%Rounding$TRUNCATE%]
22 |
23 | #### CEILING
24 |
25 | /[%Rounding$CEILING%]
26 |
27 | #### FLOOR
28 |
29 | /[%Rounding$FLOOR%]
30 |
31 | ## Details
32 |
33 | ### Other floating point exceptions
34 |
35 | The IEEE 754 standard defines a number of exceptions beyond rounding. For
36 | example, division by zero, overflow, and underflow. However, these exceptions
37 | have default behaviors defined by IEEE 754 and, since no known engine deviates
38 | from these default values, these exceptions are not exposed as options. For more
39 | information on what happens in these cases refer to the IEEE 754 standard.
40 |
41 | ### Numerical Precision
42 |
43 | The precision of the exponential function depends on the precision of the input types
44 | and the way the operation is carried out in various dialects.
45 |
46 | ## Properties
47 |
48 | ### Null propagating
49 |
50 | /[%Properties$Null_propagating%]
51 |
52 | ### NaN propagating
53 |
54 | /[%Properties$NaN_propagating%]
55 |
56 | ### Stateless
57 |
58 | /[%Properties$Stateless%]
59 |
--------------------------------------------------------------------------------
/supplemental/arithmetic/factorial.md:
--------------------------------------------------------------------------------
1 | # Factorial
2 |
3 | ## Options
4 |
5 | ### Overflow
6 |
7 | Factorial being a function that may return a large value out of the permissible limit
8 | of the type class can cause an overflow. This option helps
9 | control the behavior upon overflow in the Factorial function.
10 |
11 | #### SILENT
12 |
13 | /[%Overflow$SILENT%]
14 |
15 | #### SATURATE
16 |
17 | /[%Overflow$SATURATE%]
18 |
19 | #### ERROR
20 |
21 | /[%Overflow$ERROR%]
22 |
23 | ## Details
24 |
25 | ### Input restrictions
26 |
27 | Mathematically, factorial is not defined for negative integers or non-integer values, since it is essentially
28 | the reducing product of a given positive integer.
29 |
30 | ## Properties
31 |
32 | ### Null propagating
33 |
34 | /[%Properties$Null_propagating%]
35 |
36 | ### NaN propagating
37 |
38 | /[%Properties$NaN_propagating%]
39 |
40 | ### Stateless
41 |
42 | /[%Properties$Stateless%]
43 |
--------------------------------------------------------------------------------
/supplemental/arithmetic/modulus.md:
--------------------------------------------------------------------------------
1 | # Modulus
2 |
3 | ## Options
4 |
5 | ### Overflow
6 |
7 | The modulus operation typically occurs after finding the quotient,
8 | i.e., mod(x, y) = x - round_func(x/y), where the round_func can be
9 | to truncate, floor, or any such operation. Thus, the entire operation
10 | may trigger an overflow when the result is outside the representable
11 | range of the type class. This option controls what happens when this overflow occurs.
12 |
13 | #### SILENT
14 |
15 | If an overflow occurs then an integer value will be returned. The value is
16 | undefined. It may be any integer and can change from engine to engine or
17 | even from row to row within the same query. The only constraint is that it
18 | must be a valid value for the result type class (e.g. modulus of int16
19 | cannot yield an int32 on overflow)
20 |
21 | #### SATURATE
22 |
23 | If an overflow occurs then the largest (for positive overflow) or smallest
24 | (for negative overflow) possible value for the type class will be returned.
25 |
26 | #### ERROR
27 |
28 | If an overflow occurs then an error should be raised.
29 |
30 | ### Division_type
31 |
32 | Determines the nature of division rounding function and quotient
33 | evaluation that shall lead to the reminder. The reminder will be
34 | determined by r = x - round_func(x/y)
35 |
36 | #### TRUNCATE
37 |
38 | The quotient is evaluated i.e. the round_func(x/y) is truncated,
39 | thus the fractional result is rounded towards zero.
40 |
41 | #### FLOOR
42 |
43 | The quotient is evaluated i.e. the round_func(x/y) is floored,
44 | thus the fractional result is rounded to the largest integer
45 | value less than or equal to it.
46 |
47 | ### On_domain_error
48 |
49 | Option controls what happens when the dividend is ±infinity or
50 | the divisor is 0 or ±infinity in a divide function.
51 |
52 | #### NULL
53 |
54 | Return a NULL if the dividend is ±infinity or the divisor is 0
55 | or ±infinity.
56 |
57 | #### ERROR
58 |
59 | If the dividend is ±infinity or the divisor is 0 or ±infinity,
60 | an error should be raised.
61 |
62 | ## Details
63 |
64 | ### Overflow
65 |
66 | The Modulus function requires the Overflow option in situations
67 | where any or all of the involved operations result in overflow
68 | from the specified range. For example, in mod(-128, -1) within
69 | the int8 range, an overflow will occur as the operation will
70 | lead to (-128) - round_func(-128/-1). Since the division operation
71 | (-128/-1) results in an overflow (given that the range of int8
72 | is -127 to 128), the Overflow option becomes essential.
73 |
74 | ### Not commutative
75 |
76 | Modulus as an arithmetic operation is not commutative by nature.
77 |
78 | ## Properties
79 |
80 | ### Null propagating
81 |
82 | If any of the inputs is null then the output will be null
83 |
84 | ### NaN propagating
85 |
86 | If any of the inputs is NaN (and the other input is not null) then the output
87 | will be NaN
88 |
89 | ### Stateless
90 |
91 | The output will be the same regardless of the order of input rows. This is not
92 | guaranteed to be true for integer division when overflow is SILENT.
93 |
--------------------------------------------------------------------------------
/supplemental/arithmetic/multiply.md:
--------------------------------------------------------------------------------
1 | # Multiply
2 |
3 | ## Options
4 |
5 | ### Overflow
6 |
7 | Multiplying two integers can trigger an overflow when the result is outside the
8 | representable range of the type class. This option controls what happens when
9 | this overflow occurs.
10 |
11 | #### SILENT
12 |
13 | If an overflow occurs then an integer value will be returned. The value is
14 | undefined. It may be any integer and can change from engine to engine or
15 | even from row to row within the same query. The only constraint is that it
16 | must be a valid value for the result type class (e.g. multiplying two int16
17 | cannot yield an int32 on overflow)
18 |
19 | #### SATURATE
20 |
21 | If an overflow occurs then the largest (for positive overflow) or smallest
22 | (for negative overflow) possible value for the type class will be returned.
23 |
24 | #### ERROR
25 |
26 | If an overflow occurs then an error should be raised.
27 |
28 | ### Rounding
29 |
30 | Multiplying two floating point numbers can yield a result that is not exactly
31 | representable in the given type class. In this case the value will be rounded.
32 | Rounding behaviors are defined as part of the IEEE 754 standard.
33 |
34 | #### TIE_TO_EVEN
35 |
36 | Round to the nearest value. If the number is exactly halfway between two
37 | values then round to the number whose least significant digit is even. Or,
38 | because we are working with binary digits, round to the number whose last digit
39 | is 0. This is the default behavior in many systems because it helps to avoid
40 | bias in rounding.
41 |
42 | #### TIE_AWAY_FROM_ZERO
43 |
44 | Round to the nearest value. If the number is exactly halfway between two values
45 | then round to the number furthest from zero.
46 |
47 | #### TRUNCATE
48 |
49 | Round to the nearest value. If the number is exactly halfway between two values
50 | then round to the value closest to zero.
51 |
52 | #### CEILING
53 |
54 | Round to the value closest to positive infinity.
55 |
56 | #### FLOOR
57 |
58 | Round to the value closest to negative infinity.
59 |
60 | ## Details
61 |
62 | ### Other floating point exceptions
63 |
64 | The IEEE 754 standard defines a number of exceptions beyond rounding. For
65 | example, division by zero, overflow, and underflow. However, these exceptions
66 | have default behaviors defined by IEEE 754 and, since no known engine deviates
67 | from these default values, these exceptions are not exposed as options. For more
68 | information on what happens in these cases refer to the IEEE 754 standard.
69 |
70 | ### Not commutative
71 |
72 | Multiplication, the algebraic operation, is commutative. So it may be tempting to
73 | believe the multiply function is commutative as well. However, this is not true
74 | because of overflow. For example, when working with int8 the result of
75 | multiply(multiply(-1, -128), -1) may yield a different result than
76 | multiply(multiply(-1, -1), -128) because the first will overflow and the second
77 | will not.
78 |
79 | ## Properties
80 |
81 | ### Null propagating
82 |
83 | If any of the inputs is null then the output will be null
84 |
85 | ### NaN propagating
86 |
87 | If any of the inputs is NaN (and the other input is not null) then the output
88 | will be NaN
89 |
90 | ### Stateless
91 |
92 | The output will be the same regardless of the order of input rows. This is not
93 | guaranteed to be true for integer multiplication when overflow is SILENT.
94 |
--------------------------------------------------------------------------------
/supplemental/arithmetic/negate.md:
--------------------------------------------------------------------------------
1 | # Negate
2 |
3 | ## Options
4 |
5 | ### Overflow
6 |
7 | Negating a number on the limit of the allowed range of the type class may lead to
8 | overflowing. For example, if we have consider for i8, negate(-128), then the result
9 | will overflow since the range for the int8 type class is [-128,127]. This option helps
10 | control the behavior upon overflow in the negate function.
11 |
12 | #### SILENT
13 |
14 | /[%Overflow$SILENT%]
15 |
16 | #### SATURATE
17 |
18 | /[%Overflow$SATURATE%]
19 |
20 | #### ERROR
21 |
22 | /[%Overflow$ERROR%]
23 |
24 | ## Details
25 |
26 | ### Not Idempotent
27 |
28 | While the algebraic operation is Idempotent, but the function is not, because of Overflow.
29 | For example, with in8, the result of negate(negate(-128)) will not be -128 as this will overflow.
30 |
31 | ### Not commutative
32 |
33 | Negation, the algebraic operation, is commutative. So it may be tempting to
34 | believe the add function is commutative as well. However, this is not true because
35 | of overflow. For example, when working with int8 the result of
36 | negate(124 + 4) will yield a different result than negate(124) + negate(4)
37 | because the first will overflow and the second will not.
38 |
39 | ## Properties
40 |
41 | ### Null propagating
42 |
43 | /[%Properties$Null_propagating%]
44 |
45 | ### NaN propagating
46 |
47 | /[%Properties$NaN_propagating%]
48 |
49 | ### Stateless
50 |
51 | /[%Properties$Stateless%]
52 |
--------------------------------------------------------------------------------
/supplemental/arithmetic/power.md:
--------------------------------------------------------------------------------
1 | # Power
2 |
3 | ## Options
4 |
5 | ### Overflow
6 |
7 | The power operation may lead to overflowing when the result is
8 | outside the representable range of the type class.
9 | This option controls what happens when this overflow occurs.
10 |
11 | #### SILENT
12 |
13 | /[%Overflow$SILENT%]
14 |
15 | #### SATURATE
16 |
17 | /[%Overflow$SATURATE%]
18 |
19 | #### ERROR
20 |
21 | /[%Overflow$ERROR%]
22 |
23 | ## Details
24 |
25 | ### Overflow
26 |
27 | The power function requires the Overflow control for situations where
28 | the resulting value exceeds the type class limit. For example, in
29 | pow(2, 65), although the input values are in the allowed int64 range,
30 | but the result goes out of range.
31 |
32 | ### Numerical Precision
33 |
34 | The precision of the power function depends on the precision of the input types
35 | and the way the operation is carried out in various dialects.
36 |
37 | ## Properties
38 |
39 | ### Null propagating
40 |
41 | /[%Properties$Null_propagating%]
42 |
43 | ### NaN propagating
44 |
45 | /[%Properties$NaN_propagating%]
46 |
47 | ### Stateless
48 |
49 | /[%Properties$Stateless%]
50 |
--------------------------------------------------------------------------------
/supplemental/arithmetic/sign.md:
--------------------------------------------------------------------------------
1 | # Sign
2 |
3 | ## Details
4 |
5 | ### Multiplicative
6 |
7 | The Sign function is multiplicative, i.e. sign(x * y) = sign(x) * sign(y). Say for example, in int8,
8 | sign(-2 * 3) will be the same as sign(-2) * sign(3).
9 |
10 | ## Properties
11 |
12 | ### Null propagating
13 |
14 | /[%Properties$Null_propagating%]
15 |
16 | ### NaN propagating
17 |
18 | /[%Properties$NaN_propagating%]
19 |
20 | ### Stateless
21 |
22 | /[%Properties$Stateless%]
23 |
--------------------------------------------------------------------------------
/supplemental/arithmetic/sin.md:
--------------------------------------------------------------------------------
1 | # Sin
2 |
3 | ## Options
4 |
5 | ### Rounding
6 |
7 | Sine of an input can yield a result that is not exactly
8 | representable in the given type class. In this case the value will be rounded.
9 | Rounding behaviors are defined as part of the IEEE 754 standard.
10 |
11 | #### TIE_TO_EVEN
12 |
13 | /[%Rounding$TIE_TO_EVEN%]
14 |
15 | #### TIE_AWAY_FROM_ZERO
16 |
17 | /[%Rounding$TIE_AWAY_FROM_ZERO%]
18 |
19 | #### TRUNCATE
20 |
21 | /[%Rounding$TRUNCATE%]
22 |
23 | #### CEILING
24 |
25 | /[%Rounding$CEILING%]
26 |
27 | #### FLOOR
28 |
29 | /[%Rounding$FLOOR%]
30 |
31 | ## Details
32 |
33 | ### Other floating point exceptions
34 |
35 | The IEEE 754 standard defines a number of exceptions beyond rounding. For
36 | example, division by zero, overflow, and underflow. However, these exceptions
37 | have default behaviors defined by IEEE 754 and, since no known engine deviates
38 | from these default values, these exceptions are not exposed as options. For more
39 | information on what happens in these cases refer to the IEEE 754 standard.
40 |
41 | ### Numerical Precision
42 |
43 | The precision of the sin function depends on the architecture in various dialects.
44 |
45 | ### Output Range
46 |
47 | Being a sinusoidal trigonometric function, the output of the sin function is restricted to [-1,1].
48 |
49 | ## Properties
50 |
51 | ### Null propagating
52 |
53 | /[%Properties$Null_propagating%]
54 |
55 | ### NaN propagating
56 |
57 | /[%Properties$NaN_propagating%]
58 |
59 | ### Stateless
60 |
61 | /[%Properties$Stateless%]
62 |
--------------------------------------------------------------------------------
/supplemental/arithmetic/sinh.md:
--------------------------------------------------------------------------------
1 | # Sinh
2 |
3 | ## Options
4 |
5 | ### Rounding
6 |
7 | Hyperbolic sine of an input can yield a result that is not exactly
8 | representable in the given type class. In this case the value will be rounded.
9 | Rounding behaviors are defined as part of the IEEE 754 standard.
10 |
11 | #### TIE_TO_EVEN
12 |
13 | /[%Rounding$TIE_TO_EVEN%]
14 |
15 | #### TIE_AWAY_FROM_ZERO
16 |
17 | /[%Rounding$TIE_AWAY_FROM_ZERO%]
18 |
19 | #### TRUNCATE
20 |
21 | /[%Rounding$TRUNCATE%]
22 |
23 | #### CEILING
24 |
25 | /[%Rounding$CEILING%]
26 |
27 | #### FLOOR
28 |
29 | /[%Rounding$FLOOR%]
30 |
31 | ## Details
32 |
33 | ### Other floating point exceptions
34 |
35 | The IEEE 754 standard defines a number of exceptions beyond rounding. For
36 | example, division by zero, overflow, and underflow. However, these exceptions
37 | have default behaviors defined by IEEE 754 and, since no known engine deviates
38 | from these default values, these exceptions are not exposed as options. For more
39 | information on what happens in these cases refer to the IEEE 754 standard.
40 |
41 | ### Numerical Precision
42 |
43 | The precision of the sinh function depends on the architecture in various dialects.
44 |
45 | ### Output Range
46 |
47 | The Hyperbolic sine function has an output range of [-Infinity, Infinity], and it results to 0
48 | at 0 radians.
49 |
50 | ## Properties
51 |
52 | ### Null propagating
53 |
54 | /[%Properties$Null_propagating%]
55 |
56 | ### NaN propagating
57 |
58 | /[%Properties$NaN_propagating%]
59 |
60 | ### Stateless
61 |
62 | /[%Properties$Stateless%]
63 |
--------------------------------------------------------------------------------
/supplemental/arithmetic/sqrt.md:
--------------------------------------------------------------------------------
1 | # Sqrt
2 |
3 | ## Options
4 |
5 | ### Rounding
6 |
7 | Taking the square root of an input can yield a result that is not exactly
8 | representable in the given type class. In this case the value will be rounded.
9 | Rounding behaviors are defined as part of the IEEE 754 standard.
10 |
11 | #### TIE_TO_EVEN
12 |
13 | /[%Rounding$TIE_TO_EVEN%]
14 |
15 | #### TIE_AWAY_FROM_ZERO
16 |
17 | /[%Rounding$TIE_AWAY_FROM_ZERO%]
18 |
19 | #### TRUNCATE
20 |
21 | /[%Rounding$TRUNCATE%]
22 |
23 | #### CEILING
24 |
25 | /[%Rounding$CEILING%]
26 |
27 | #### FLOOR
28 |
29 | /[%Rounding$FLOOR%]
30 |
31 | ### On_domain_error
32 |
33 | Square root operation is typically allowed only for non-negative real numbers. This option controls the behavior when the function is called with values not adhering to this rule.
34 |
35 | #### NAN
36 |
37 | /[%On_domain_error$NAN%]
38 |
39 | #### ERROR
40 |
41 | /[%On_domain_error$ERROR%]
42 |
43 | ## Details
44 |
45 | ### Other floating point exceptions
46 |
47 | The IEEE 754 standard defines a number of exceptions beyond rounding. For
48 | example, division by zero, overflow, and underflow. However, these exceptions
49 | have default behaviors defined by IEEE 754 and, since no known engine deviates
50 | from these default values, these exceptions are not exposed as options. For more
51 | information on what happens in these cases refer to the IEEE 754 standard.
52 |
53 | ### Domain restrictions
54 |
55 | Mathematically, square root function for negative real numbers results to complex numbers, and thus in function usage, typically only positive real numbers are allowed. Applying the function on a negative real number may raise an Error or result in a NaN value.
56 |
57 |
58 | ## Properties
59 |
60 | ### Null propagating
61 |
62 | /[%Properties$Null_propagating%]
63 |
64 | ### NaN propagating
65 |
66 | /[%Properties$NaN_propagating%]
67 |
68 | ### Stateless
69 |
70 | /[%Properties$Stateless%]
71 |
--------------------------------------------------------------------------------
/supplemental/arithmetic/subtract.md:
--------------------------------------------------------------------------------
1 | # Subtract
2 |
3 | ## Options
4 |
5 | ### Overflow
6 |
7 | Subtracting two integers can trigger an overflow when the result is outside the
8 | representable range of the type class. This option controls what happens when
9 | this overflow occurs.
10 |
11 | #### SILENT
12 |
13 | /[%Overflow$SILENT%] For e.g. subtracting two int16 cannot
14 | yield an int32 on overflow.
15 |
16 | #### SATURATE
17 |
18 | /[%Overflow$SATURATE%]
19 |
20 | #### ERROR
21 |
22 | /[%Overflow$ERROR%]
23 |
24 | ### Rounding
25 |
26 | Subtracting two floating point numbers can yield a result that is not exactly
27 | representable in the given type class. In this case the value will be rounded.
28 | Rounding behaviors are defined as part of the IEEE 754 standard.
29 |
30 | #### TIE_TO_EVEN
31 |
32 | /[%Rounding$TIE_TO_EVEN%]
33 |
34 | #### TIE_AWAY_FROM_ZERO
35 |
36 | /[%Rounding$TIE_AWAY_FROM_ZERO%]
37 |
38 | #### TRUNCATE
39 |
40 | /[%Rounding$TRUNCATE%]
41 |
42 | #### CEILING
43 |
44 | /[%Rounding$CEILING%]
45 |
46 | #### FLOOR
47 |
48 | /[%Rounding$FLOOR%]
49 |
50 | ## Details
51 |
52 | ### Other floating point exceptions
53 |
54 | The IEEE 754 standard defines a number of exceptions beyond rounding. For
55 | example, division by zero, overflow, and underflow. However, these exceptions
56 | have default behaviors defined by IEEE 754 and, since no known engine deviates
57 | from these default values, these exceptions are not exposed as options. For more
58 | information on what happens in these cases refer to the IEEE 754 standard.
59 |
60 | ### Not commutative
61 |
62 | Subtraction, the algebraic operation, is commutative. So it may be tempting to
63 | believe the subtract function is commutative as well. However, this is not true
64 | because of overflow. For example, when working with int8 the result of
65 | subtract(subtract(-120, 10), -5) will yield a different result than
66 | subtract(subtract(-120, -5), 10) because the first will overflow and the second
67 | will not.
68 |
69 | ## Properties
70 |
71 | ### Null propagating
72 |
73 | /[%Properties$Null_propagating%]
74 |
75 | ### NaN propagating
76 |
77 | /[%Properties$NaN_propagating%]
78 |
79 | ### Stateless
80 |
81 | /[%Properties$Stateless%] This is not
82 | guaranteed to be true for integer subtraction when overflow is SILENT.
83 |
--------------------------------------------------------------------------------
/supplemental/arithmetic/sum.md:
--------------------------------------------------------------------------------
1 | # Sum
2 |
3 | ## Options
4 |
5 | ### Overflow
6 |
7 | Sum of a set of values can trigger an overflow when the result is outside the
8 | representable range of the type class. This option controls what happens when
9 | this overflow occurs.
10 |
11 | #### SILENT
12 |
13 | If an overflow occurs then an integer value will be returned. The value is
14 | undefined. It may be any integer and can change from engine to engine or
15 | even from row to row within the same query. The only constraint is that it
16 | must be a valid value for the result type class (e.g. adding two int16 cannot
17 | yield an int32 on overflow)
18 |
19 | #### SATURATE
20 |
21 | If an overflow occurs then the largest (for positive overflow) or smallest
22 | (for negative overflow) possible value for the type class will be returned.
23 |
24 | #### ERROR
25 |
26 | If an overflow occurs then an error should be raised.
27 |
28 | ## Details
29 |
30 | ### Other floating point exceptions
31 |
32 | The IEEE 754 standard defines a number of exceptions beyond rounding. For
33 | example, division by zero, overflow, and underflow. However, these exceptions
34 | have default behaviors defined by IEEE 754 and, since no known engine deviates
35 | from these default values, these exceptions are not exposed as options. For more
36 | information on what happens in these cases refer to the IEEE 754 standard.
37 |
38 | ### Not commutative
39 |
40 | Addition, the algebraic operation, is commutative. So it may be tempting to
41 | believe the add function is commutative as well. However, this is not true because
42 | of overflow. For example, when working with int8 the result of
43 | add(add(120, 10), -5) will yield a different result than add(add(120, -5), 10)
44 | because the first will overflow and the second will not.
45 |
46 | ## Properties
47 |
48 | ### Nullability
49 |
50 | Specifies how the nullability of output arguments are mapped to
51 | input arguments. The Sum aggregate function follows a
52 | DECLARED_OUTPUT nullability.
53 |
54 | ### Decomposable
55 |
56 | The Sum aggregate function can be decomposed in more than
57 | one intermediate steps.
58 |
59 | ### Intermediate
60 |
61 | The intermediate output type of the Sum function is the
62 | type class of the input arguments.
63 |
--------------------------------------------------------------------------------
/supplemental/arithmetic/tan.md:
--------------------------------------------------------------------------------
1 | # Tan
2 |
3 | ## Options
4 |
5 | ### Rounding
6 |
7 | Tangent of an input can yield a result that is not exactly
8 | representable in the given type class. In this case the value will be rounded.
9 | Rounding behaviors are defined as part of the IEEE 754 standard.
10 |
11 | #### TIE_TO_EVEN
12 |
13 | /[%Rounding$TIE_TO_EVEN%]
14 |
15 | #### TIE_AWAY_FROM_ZERO
16 |
17 | /[%Rounding$TIE_AWAY_FROM_ZERO%]
18 |
19 | #### TRUNCATE
20 |
21 | /[%Rounding$TRUNCATE%]
22 |
23 | #### CEILING
24 |
25 | /[%Rounding$CEILING%]
26 |
27 | #### FLOOR
28 |
29 | /[%Rounding$FLOOR%]
30 |
31 | ## Details
32 |
33 | ### Other floating point exceptions
34 |
35 | The IEEE 754 standard defines a number of exceptions beyond rounding. For
36 | example, division by zero, overflow, and underflow. However, these exceptions
37 | have default behaviors defined by IEEE 754 and, since no known engine deviates
38 | from these default values, these exceptions are not exposed as options. For more
39 | information on what happens in these cases refer to the IEEE 754 standard.
40 |
41 | ### Numerical Precision
42 |
43 | The precision of the tan function depends on the architecture in various dialects.
44 |
45 | ### Output Range
46 |
47 | Mathematically, the tangent function has a range [-Inf, Inf], since it is undefined and approaches
48 | infinity in input values of (pi/2) + k*pi, where k is any integer. Computationally, the inputs
49 | where the tangent function is not defined results in approximately 1255.76 or -1255.76. Thus,
50 | the output range becomes [-1255.76, 1255.76].
51 |
52 | ## Properties
53 |
54 | ### Null propagating
55 |
56 | /[%Properties$Null_propagating%]
57 |
58 | ### NaN propagating
59 |
60 | /[%Properties$NaN_propagating%]
61 |
62 | ### Stateless
63 |
64 | /[%Properties$Stateless%]
65 |
--------------------------------------------------------------------------------
/supplemental/arithmetic/tanh.md:
--------------------------------------------------------------------------------
1 | # Tanh
2 |
3 | ## Options
4 |
5 | ### Rounding
6 |
7 | Hyperbolic tangent of an input can yield a result that is not exactly
8 | representable in the given type class. In this case the value will be rounded.
9 | Rounding behaviors are defined as part of the IEEE 754 standard.
10 |
11 | #### TIE_TO_EVEN
12 |
13 | /[%Rounding$TIE_TO_EVEN%]
14 |
15 | #### TIE_AWAY_FROM_ZERO
16 |
17 | /[%Rounding$TIE_AWAY_FROM_ZERO%]
18 |
19 | #### TRUNCATE
20 |
21 | /[%Rounding$TRUNCATE%]
22 |
23 | #### CEILING
24 |
25 | /[%Rounding$CEILING%]
26 |
27 | #### FLOOR
28 |
29 | /[%Rounding$FLOOR%]
30 |
31 | ## Details
32 |
33 | ### Other floating point exceptions
34 |
35 | The IEEE 754 standard defines a number of exceptions beyond rounding. For
36 | example, division by zero, overflow, and underflow. However, these exceptions
37 | have default behaviors defined by IEEE 754 and, since no known engine deviates
38 | from these default values, these exceptions are not exposed as options. For more
39 | information on what happens in these cases refer to the IEEE 754 standard.
40 |
41 | ### Numerical Precision
42 |
43 | The precision of the tanh function depends on the architecture in various dialects.
44 |
45 | ### Output Range
46 |
47 | The Hyperbolic cosine function has an output range of [-1, 1], and it results to 0
48 | at 0 radians.
49 |
50 | ## Properties
51 |
52 | ### Null propagating
53 |
54 | /[%Properties$Null_propagating%]
55 |
56 | ### NaN propagating
57 |
58 | /[%Properties$NaN_propagating%]
59 |
60 | ### Stateless
61 |
62 | /[%Properties$Stateless%]
63 |
--------------------------------------------------------------------------------
/tools/convert_testcases/check_testcase_format_conversion_roundtrip.py:
--------------------------------------------------------------------------------
1 | import os
2 | import re
3 | import shutil
4 |
5 | from ruamel.yaml import YAML
6 | from deepdiff import DeepDiff
7 |
8 | from convert_testcases_to_substrait_test_format import (
9 | convert_directory as convert_directory_to_substrait,
10 | load_test_file,
11 | )
12 | from convert_testcases_to_yaml_format import (
13 | convert_directory as convert_directory_to_yaml,
14 | )
15 |
16 |
17 | def compare_test_files(original_file, roundtrip_file):
18 | o_file = load_test_file(original_file)
19 | r_file = load_test_file(roundtrip_file)
20 | assert o_file == r_file
21 |
22 |
23 | # Compare tests in yaml format, roundtrip_dir contains files converted from substrait test format to yaml
24 | def compare_directories(original_dir, roundtrip_dir):
25 | count = 0
26 | for root, _, files in os.walk(original_dir):
27 | for file_name in files:
28 | if file_name.endswith(".yaml"):
29 | original_file = os.path.join(root, file_name)
30 | relative_path = os.path.relpath(original_file, original_dir)
31 | roundtrip_file = os.path.join(roundtrip_dir, relative_path).replace(
32 | ".test", ".yaml"
33 | )
34 |
35 | if not os.path.exists(roundtrip_file):
36 | print(f"File missing in roundtrip directory: {roundtrip_file}")
37 | count += 1
38 | continue
39 |
40 | if not compare_test_files(original_file, roundtrip_file):
41 | count += 1
42 | else:
43 | print(f"YAML content matches: {original_file} and {roundtrip_file}")
44 | return count
45 |
46 |
47 | def main():
48 | # Directories
49 | initial_cases_dir = "../../substrait/tests/cases"
50 | temp_dir = "./temp"
51 | intermediate_dir = f"{temp_dir}/bft_cases"
52 | roundtrip_dir = f"{temp_dir}/roundtrip_substrait_cases"
53 | uri_prefix = (
54 | "https://github.com/substrait-io/substrait/blob/main/extensions/substrait"
55 | )
56 |
57 | # Step 1: Convert from initial_cases_dir to intermediate_dir
58 | convert_directory_to_yaml(initial_cases_dir, intermediate_dir)
59 |
60 | # Step 2: Convert from intermediate_dir to roundtrip_dir
61 | convert_directory_to_substrait(intermediate_dir, roundtrip_dir, uri_prefix)
62 |
63 | # Step 3: Compare tests in initial and rounttrip_dir in yaml format
64 | count = compare_directories(initial_cases_dir, roundtrip_dir)
65 | if count == 0:
66 | print(
67 | "All substrait test files match between original and roundtrip directories."
68 | )
69 | else:
70 | print(
71 | f"Differences found in {count} test files between original and roundtrip directories."
72 | )
73 |
74 | shutil.rmtree(temp_dir)
75 |
76 |
77 | if __name__ == "__main__":
78 | main()
79 |
--------------------------------------------------------------------------------
/tools/convert_testcases/convert_testcases_to_substrait_test_format.py:
--------------------------------------------------------------------------------
1 | import yaml
2 | import os
3 | from collections import defaultdict
4 | from itertools import count
5 | from tools.convert_testcases.convert_testcase_helper import (
6 | convert_to_substrait_test_value,
7 | )
8 |
9 |
10 | # Define a custom YAML loader that interprets all values as strings
11 | def string_loader(loader, node):
12 | return str(loader.construct_scalar(node))
13 |
14 |
15 | def list_of_decimal_constructor(loader: yaml.SafeLoader, node: yaml.nodes.MappingNode):
16 | return [string_loader(loader, item) for item in node.value]
17 |
18 |
19 | def load_test_file(file_path):
20 | """Load a YAML file, interpreting all values as strings."""
21 | # Override default YAML constructors to load all types as strings
22 | for tag in ("str", "int", "float", "bool", "null", "decimal"):
23 | yaml.add_constructor(f"tag:yaml.org,2002:{tag}", string_loader)
24 |
25 | yaml.add_constructor("!decimal", string_loader)
26 | yaml.add_constructor("!isostring", string_loader)
27 | yaml.add_constructor("!decimallist", list_of_decimal_constructor)
28 |
29 | with open(file_path, "r") as file:
30 | return yaml.load(file, Loader=yaml.FullLoader)
31 |
32 |
33 | def format_return_value(case):
34 | result = case.get("result", {})
35 | special = result.get("special")
36 |
37 | if special:
38 | special = special.lower()
39 |
40 | # Handle special cases for ERROR and UNDEFINED
41 | if special in {"error", "undefined"}:
42 | return f""
43 |
44 | if special == "nan":
45 | return "nan::fp64"
46 |
47 | # Return formatted result with format_value
48 | return convert_to_substrait_test_value(result.get("value"), result.get("type"))
49 |
50 |
51 | def format_test_case_group(case, description_map):
52 | """Extract group name and description for test case."""
53 | group = case.get("group", "basic")
54 | group_name = group if isinstance(group, str) else group.get("id", "basic")
55 | description = group.get("description", "") if isinstance(group, dict) else ""
56 |
57 | if group_name not in description_map:
58 | description_map[group_name] = description
59 |
60 | return f"{group_name}: {description_map.get(group_name, '')}"
61 |
62 |
63 | def generate_define_table(case, table_id):
64 | """Generates the table definition only if there are arguments with 'is_not_a_func_arg'."""
65 | args = case.get("args", [])
66 |
67 | # If args is empty, return an empty string, as no table is needed
68 | if not args:
69 | return ""
70 |
71 | # Gather column types and names based on args
72 | formatted_columns = ", ".join(str(arg["type"]) for arg in args) if args else ""
73 |
74 | # Transpose the arguments' values to construct rows
75 | values = [
76 | [
77 | convert_to_substrait_test_value(value, arg["type"], 1)
78 | for value in arg.get("value", [])
79 | ]
80 | for arg in args
81 | ]
82 | rows = zip(*values) # zip will combine each nth element of each argument
83 |
84 | # Format rows as strings for the table definition
85 | formatted_rows = ", ".join(f"({', '.join(map(str, row))})" for row in rows)
86 |
87 | # Define table format with column types
88 | table_definition = (
89 | f"DEFINE t{table_id}({formatted_columns}) = ({formatted_rows}) \n"
90 | )
91 |
92 | return table_definition
93 |
94 |
95 | def format_test_case(case, function, description_map, table_id_counter, is_aggregate):
96 | """Format a single test case."""
97 | description = format_test_case_group(case, description_map)
98 | options = case.get("options")
99 | options = (
100 | f" [{', '.join(f'{k}:{convert_to_substrait_test_value(v, None)}' for k, v in options.items())}]"
101 | if options
102 | else ""
103 | )
104 | results = format_return_value(case)
105 |
106 | args = [arg for arg in case.get("args", []) if not arg.get("is_not_a_func_arg")]
107 | if is_aggregate and len(args) != 1:
108 | table_id = next(table_id_counter)
109 | args = ", ".join(f"t{table_id}.col{idx}" for idx in range(len(args)))
110 | table_definition = generate_define_table(case, table_id)
111 | return description, f"{table_definition}{function}({args}){options} = {results}"
112 |
113 | args = ", ".join(
114 | convert_to_substrait_test_value(arg.get("value"), str(arg["type"]))
115 | for arg in case.get("args", [])
116 | )
117 | return description, f"{function}({args}){options} = {results}"
118 |
119 |
120 | def convert_test_file_to_new_format(input_data, prefix, is_aggregate):
121 | """Parse YAML test data to formatted cases."""
122 | function = input_data["function"]
123 | base_uri = input_data["base_uri"][len(prefix) :]
124 | description_map = {}
125 | table_id_counter = count(0)
126 | groups = defaultdict(lambda: {"tests": []})
127 |
128 | for case in input_data["cases"]:
129 | description, formatted_test = format_test_case(
130 | case, function, description_map, table_id_counter, is_aggregate
131 | )
132 | groups[description]["tests"].append(formatted_test)
133 |
134 | output_lines = [
135 | f"{'### SUBSTRAIT_AGGREGATE_TEST: v1.0' if is_aggregate else '### SUBSTRAIT_SCALAR_TEST: v1.0'}\n",
136 | f"### SUBSTRAIT_INCLUDE: '{base_uri}'\n",
137 | ]
138 |
139 | for description, details in groups.items():
140 | output_lines.append(f"\n# {description}\n")
141 | output_lines.extend(f"{test}\n" for test in details["tests"])
142 |
143 | return output_lines
144 |
145 |
146 | def output_test_data(output_file, lines):
147 | """Write formatted lines to a file."""
148 | os.makedirs(os.path.dirname(output_file), exist_ok=True)
149 | with open(output_file, "w") as file:
150 | file.writelines(lines)
151 |
152 | print(f"Converted '{output_file}' successfully.")
153 |
154 |
155 | def convert_directory(input_dir, output_dir, prefix):
156 | """Process all YAML files in a directory, convert and save them to output directory."""
157 | for root, _, files in os.walk(input_dir):
158 | for filename in filter(lambda f: f.endswith(".yaml"), files):
159 | input_file = os.path.join(root, filename)
160 | output_file = os.path.join(
161 | output_dir, os.path.relpath(input_file, input_dir)
162 | ).replace(".yaml", ".test")
163 | is_aggregate = "aggregate" in input_file
164 |
165 | yaml_data = load_test_file(input_file)
166 | output_lines = convert_test_file_to_new_format(
167 | yaml_data, prefix, is_aggregate
168 | )
169 | output_test_data(output_file, output_lines)
170 |
171 |
172 | if __name__ == "__main__":
173 | input_directory = "../../cases"
174 | output_directory = "../../substrait/tests/cases"
175 | uri_prefix = (
176 | "https://github.com/substrait-io/substrait/blob/main/extensions/substrait"
177 | )
178 | convert_directory(input_directory, output_directory, uri_prefix)
179 |
--------------------------------------------------------------------------------
/tools/convert_testcases/convert_testcases_to_yaml_format.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | from ruamel.yaml import YAML
4 | from tests.coverage.nodes import (
5 | TestFile,
6 | AggregateArgument,
7 | )
8 | from tests.coverage.case_file_parser import load_all_testcases
9 | from tools.convert_testcases.convert_testcase_helper import (
10 | convert_to_yaml_value,
11 | convert_to_long_type,
12 | SQUOTE_PLACEHOLDER,
13 | DQUOTE_PLACEHOLDER,
14 | iso_duration_to_timedelta,
15 | )
16 |
17 | yaml = YAML()
18 | yaml.indent(mapping=2, sequence=4, offset=2) # Adjust indentations as needed
19 | yaml.width = 4096 # Extend line width to prevent line breaks
20 |
21 |
22 | def convert_result(test_case):
23 | """Convert the result section based on specific conditions."""
24 | if test_case.is_return_type_error():
25 | return {"special": str(test_case.result.error)}
26 | elif str(test_case.result.value) == "nan":
27 | return {"special": "nan"}
28 | elif test_case.func_name == "add_intervals" and test_case.result.type == "iday":
29 | return {
30 | "value": convert_to_yaml_value(
31 | (
32 | iso_duration_to_timedelta(test_case.result.value)
33 | if test_case.result.value is not None
34 | else None
35 | ),
36 | "str",
37 | ),
38 | "type": "string",
39 | }
40 | else:
41 | return {
42 | "value": convert_to_yaml_value(
43 | test_case.result.value, test_case.result.type
44 | ),
45 | "type": convert_to_long_type(test_case.result.type),
46 | }
47 |
48 |
49 | def convert_table_definition(test_case):
50 | column_types = None
51 |
52 | if all(isinstance(arg, AggregateArgument) for arg in test_case.args):
53 | # Extract the column_type from each AggregateArgument
54 | column_types = [arg.column_type for arg in test_case.args]
55 | elif test_case.args is not None:
56 | column_types = [
57 | convert_to_long_type(
58 | arg.scalar_value.type
59 | if isinstance(arg, AggregateArgument)
60 | else arg.type
61 | )
62 | for arg in test_case.args
63 | ]
64 |
65 | columns = list(map(list, zip(*test_case.rows)))
66 | if not columns:
67 | # Handle the case where columns is empty, but column_types is not
68 | return [
69 | {"value": [], "type": col_type, "is_not_a_func_arg": "true"}
70 | for col_type in column_types
71 | ]
72 | else:
73 | # Handle the case where columns is not empty
74 | return [
75 | {
76 | "value": convert_to_yaml_value(column, col_type),
77 | "type": col_type,
78 | "is_not_a_func_arg": "true",
79 | }
80 | for column, col_type in zip(columns, column_types)
81 | ]
82 |
83 |
84 | def convert_group(test_case, groups):
85 | id = str(test_case.group.name.split(": ")[0])
86 | desc = test_case.group.name.split(": ")[1] if ": " in test_case.group.name else ""
87 | group = id if id in groups else {"id": id, "description": desc}
88 | groups[id] = desc
89 | return group
90 |
91 |
92 | def convert_test_case_to_old_format(test_case, groups):
93 | # Match group headers with descriptions
94 | print(f"converting test '{test_case}'")
95 | case = {}
96 | case["group"] = convert_group(test_case, groups)
97 |
98 | if test_case.rows is not None:
99 | case["args"] = convert_table_definition(test_case)
100 | else:
101 | if isinstance(test_case.args[0], AggregateArgument):
102 | case["args"] = [
103 | {
104 | "value": convert_to_yaml_value(
105 | arg.scalar_value.value, arg.scalar_value.type
106 | ),
107 | "type": convert_to_long_type(arg.scalar_value.type),
108 | }
109 | for arg in test_case.args
110 | ]
111 | else:
112 | case["args"] = [
113 | {
114 | "value": convert_to_yaml_value(arg.value, arg.type),
115 | "type": convert_to_long_type(arg.type),
116 | }
117 | for arg in test_case.args
118 | ]
119 |
120 | if len(test_case.options) > 0:
121 | case["options"] = {
122 | key: convert_to_yaml_value(value, None)
123 | for key, value in test_case.options.items()
124 | }
125 |
126 | case["result"] = convert_result(test_case)
127 | return case
128 |
129 |
130 | def convert_test_file_to_yaml(testFile: TestFile):
131 | # Get function name from the first expression
132 | function = None
133 | cases = []
134 | groups = {}
135 |
136 | for test_case in testFile.testcases:
137 | function = test_case.func_name
138 | cases.append(convert_test_case_to_old_format(test_case, groups))
139 |
140 | # Construct the full YAML structure
141 | return {
142 | "base_uri": f"https://github.com/substrait-io/substrait/blob/main/extensions/substrait{testFile.include}",
143 | "function": function,
144 | "cases": cases,
145 | }
146 |
147 |
148 | def output_test_data(output_file, input_path, yaml_data):
149 | with open(output_file, "w") as f:
150 | yaml.dump(yaml_data, f)
151 |
152 | fix_quotes(output_file)
153 |
154 | print(f"Converted '{input_path}' to '{output_file}'.")
155 |
156 |
157 | def fix_quotes(file_path):
158 | with open(file_path, "r") as file:
159 | content = file.read()
160 |
161 | # Remove all single quotes
162 | content = (
163 | content.replace("'", "")
164 | .replace('"', "")
165 | .replace(SQUOTE_PLACEHOLDER, "'")
166 | .replace(DQUOTE_PLACEHOLDER, '"')
167 | )
168 |
169 | with open(file_path, "w") as file:
170 | file.write(content)
171 |
172 |
173 | def convert_directory(input_dir, output_dir):
174 | input_test_files = load_all_testcases(input_dir)
175 | for input_test_file in input_test_files:
176 | input_file = input_test_file.path
177 | relative_path = os.path.relpath(input_file, input_dir)
178 | output_file = os.path.join(output_dir, relative_path).replace(".test", ".yaml")
179 | os.makedirs(os.path.dirname(output_file), exist_ok=True)
180 | yaml_data = convert_test_file_to_yaml(input_test_file)
181 | output_test_data(output_file, input_test_file.path, yaml_data)
182 |
183 |
184 | def main():
185 | input_dir = "../../substrait/tests/cases"
186 | output_dir = "../../cases" # Specify the output directory
187 | convert_directory(input_dir, output_dir)
188 |
189 |
190 | if __name__ == "__main__":
191 | main()
192 |
--------------------------------------------------------------------------------
/tools/schema/casefile.yaml:
--------------------------------------------------------------------------------
1 | $id: https://thebft.info/schemas/casefile.json
2 | $schema: https://json-schema.org/draft/2020-12/schema
3 | type: object
4 | properties:
5 | function:
6 | type: string
7 | cases:
8 | type: array
9 | items:
10 | type: object
11 | properties:
12 | group:
13 | oneOf:
14 | - type: object
15 | properties:
16 | id:
17 | type: string
18 | description:
19 | type: string
20 | required:
21 | - id
22 | - description
23 | additionalProperties: false
24 | - type: string
25 | args:
26 | type: array
27 | items:
28 | type: object
29 | properties:
30 | value:
31 | oneOf:
32 | - type: string
33 | - type: number
34 | - type: boolean
35 | - type: "null"
36 | - type: array
37 | type:
38 | type: string
39 | required:
40 | - value
41 | - type
42 | additionalProperties: false
43 | options:
44 | type: object
45 | additionalProperties:
46 | type: string
47 | result:
48 | oneOf:
49 | - type: object
50 | properties:
51 | value:
52 | oneOf:
53 | - type: string
54 | - type: number
55 | - type: boolean
56 | - type: "null"
57 | type:
58 | type: string
59 | required:
60 | - value
61 | - type
62 | additionalProperties: false
63 | - type: object
64 | properties:
65 | special:
66 | enum:
67 | - error
68 | - undefined
69 | required:
70 | - special
71 | additionalProperties: false
72 | additionalProperties: false
73 | required:
74 | - group
75 | - result
76 | additionalProperties: false
77 | required:
78 | - function
79 | - cases
80 |
--------------------------------------------------------------------------------
/tools/yaml_to_json.py:
--------------------------------------------------------------------------------
1 | import json
2 | from pathlib import Path
3 |
4 | import yaml
5 |
6 | try:
7 | from yaml import CSafeLoader as SafeLoader
8 | except ImportError:
9 | from yaml import SafeLoader
10 |
11 | BASE_DIR = Path(__file__).parent.parent
12 | JSON_DIR = BASE_DIR / "function_json"
13 | CASES_DIR = BASE_DIR / "cases"
14 | FUNCTION_FOLDERS = Path(CASES_DIR).glob("*")
15 |
16 |
17 | for function_folder in FUNCTION_FOLDERS:
18 | folder_path = CASES_DIR / function_folder.name
19 | json_path = JSON_DIR / function_folder.name
20 | Path(json_path).mkdir(parents=True, exist_ok=True)
21 | function_yamls = Path(folder_path).rglob("*.yaml")
22 | for function_yaml in function_yamls:
23 | yaml_file = folder_path / function_yaml.name
24 | json_file = json_path / function_yaml.stem
25 | with open(yaml_file) as f:
26 | dataMap = yaml.load(f, SafeLoader)
27 | with open(f"{json_file}.json", "w") as outfile:
28 | outfile.write('{}\n'.format(json.dumps(dataMap, indent=4)))
29 |
--------------------------------------------------------------------------------