├── .github └── workflows │ ├── deploy.yml │ └── test.yml ├── .gitignore ├── .gitmodules ├── .markdownlint.json ├── .vscode ├── launch.json └── settings.json ├── LICENSE ├── NOTICE.txt ├── README.md ├── bft ├── __init__.py ├── cases │ ├── __init__.py │ ├── loader.py │ ├── parser.py │ ├── runner.py │ └── types.py ├── core │ ├── __init__.py │ ├── function.py │ ├── index_parser.py │ ├── yaml_parser.py │ └── yaml_parser_test.py ├── dialects │ ├── __init__.py │ ├── loader.py │ ├── parser.py │ └── types.py ├── html │ ├── __init__.py │ ├── builder.py │ └── types.py ├── substrait │ ├── __init__.py │ └── extension_file_parser.py ├── supplements │ ├── __init__.py │ ├── parser.py │ └── types.py ├── templates │ ├── function_desc.j2 │ └── function_index.j2 ├── testers │ ├── __init__.py │ ├── base_tester.py │ ├── cudf │ │ ├── __init__.py │ │ ├── runner.py │ │ └── tester.py │ ├── datafusion │ │ ├── __init__.py │ │ ├── runner.py │ │ └── tester.py │ ├── duckdb │ │ ├── __init__.py │ │ ├── runner.py │ │ ├── runner_test.py │ │ └── tester.py │ ├── postgres │ │ ├── __init__.py │ │ ├── runner.py │ │ └── tester.py │ ├── snowflake │ │ ├── __init__.py │ │ ├── config.yaml │ │ ├── runner.py │ │ └── tester.py │ ├── sqlite │ │ ├── __init__.py │ │ ├── runner.py │ │ └── tester.py │ └── velox │ │ ├── runner.py │ │ └── tester.py ├── tests │ ├── __init__.py │ ├── base.py │ ├── conftest.py │ ├── test_cudf.py │ ├── test_datafusion.py │ ├── test_duckdb.py │ ├── test_postgres.py │ ├── test_pyvelox.py │ ├── test_snowflake.py │ └── test_sqlite.py └── utils │ └── utils.py ├── build_site.py ├── ci └── docker │ ├── base-tester.Dockerfile │ ├── datafusion.Dockerfile │ ├── duckdb.Dockerfile │ ├── postgres-compose.yaml │ ├── postgres-server.Dockerfile │ ├── sqlite.Dockerfile │ ├── velox-compose.yaml │ └── velox.Dockerfile ├── dialects ├── cudf.yaml ├── datafusion.yaml ├── duckdb.yaml ├── postgres.yaml ├── snowflake.yaml ├── sqlite.yaml └── velox_presto.yaml ├── index.yaml ├── requirements.txt ├── static_site ├── android-chrome-192x192.png ├── android-chrome-512x512.png ├── apple-touch-icon.png ├── assets │ ├── index │ │ ├── script.js │ │ └── style.css │ └── supplementary │ │ ├── script.js │ │ ├── style.css │ │ └── terminal.css ├── favicon-16x16.png ├── favicon-32x32.png └── favicon.ico ├── supplemental └── arithmetic │ ├── abs.md │ ├── acos.md │ ├── acosh.md │ ├── add.md │ ├── asin.md │ ├── asinh.md │ ├── atan.md │ ├── atan2.md │ ├── atanh.md │ ├── bitwise_and.md │ ├── bitwise_not.md │ ├── bitwise_or.md │ ├── bitwise_xor.md │ ├── cos.md │ ├── cosh.md │ ├── definitions.yaml │ ├── divide.md │ ├── exponential.md │ ├── factorial.md │ ├── modulus.md │ ├── multiply.md │ ├── negate.md │ ├── power.md │ ├── sign.md │ ├── sin.md │ ├── sinh.md │ ├── sqrt.md │ ├── subtract.md │ ├── sum.md │ ├── tan.md │ └── tanh.md └── tools ├── convert_testcases ├── check_testcase_format_conversion_roundtrip.py ├── convert_testcase_helper.py ├── convert_testcases_to_substrait_test_format.py ├── convert_testcases_to_yaml_format.py ├── test_convert_testcases_to_substrait_test_format.py └── test_convert_testcases_to_yaml_format.py ├── schema └── casefile.yaml └── yaml_to_json.py /.github/workflows/deploy.yml: -------------------------------------------------------------------------------- 1 | name: Deploy to gh-pages 2 | on: 3 | workflow_dispatch: 4 | workflow_run: 5 | workflows: 6 | - test 7 | types: 8 | - completed 9 | 10 | jobs: 11 | deploy: 12 | if: ${{ github.event_name == 'workflow_dispatch' || github.event.workflow_run.conclusion == 'success' }} 13 | runs-on: ubuntu-latest 14 | steps: 15 | - name: Checkout 16 | uses: actions/checkout@v3 17 | with: 18 | submodules: recursive 19 | - uses: actions/setup-python@v4 20 | with: 21 | python-version: "3.11" 22 | cache: "pip" 23 | - run: pip install -r requirements.txt 24 | - name: Build Site 25 | run: python build_site.py 26 | - name: Deploy 27 | uses: JamesIves/github-pages-deploy-action@v4 28 | with: 29 | folder: dist 30 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Test 2 | on: 3 | pull_request: 4 | push: 5 | branches: 6 | - main 7 | 8 | jobs: 9 | sqlite: 10 | name: Run tests with sqlite 11 | runs-on: ubuntu-latest 12 | steps: 13 | - name: Checkout 14 | uses: actions/checkout@v3 15 | with: 16 | submodules: recursive 17 | 18 | - name: Build & run 19 | run: docker run --rm $(docker build -q --file ./ci/docker/sqlite.Dockerfile .) 20 | duckdb: 21 | name: Run tests with duckdb 22 | runs-on: ubuntu-latest 23 | steps: 24 | - name: Checkout 25 | uses: actions/checkout@v3 26 | with: 27 | submodules: recursive 28 | 29 | - name: Build & run 30 | run: docker run --rm $(docker build -q --file ./ci/docker/duckdb.Dockerfile .) 31 | datafusion: 32 | name: Run tests with datafusion 33 | runs-on: ubuntu-latest 34 | steps: 35 | - name: Checkout 36 | uses: actions/checkout@v3 37 | with: 38 | submodules: recursive 39 | 40 | - name: Build & run 41 | run: docker run --rm $(docker build -q --file ./ci/docker/datafusion.Dockerfile .) 42 | postgres: 43 | name: Run tests with postgres 44 | runs-on: ubuntu-latest 45 | steps: 46 | - name: Checkout 47 | uses: actions/checkout@v3 48 | with: 49 | submodules: recursive 50 | 51 | - name: Build 52 | run: docker compose -f ./ci/docker/postgres-compose.yaml build 53 | 54 | - name: Run 55 | run: docker compose -f ./ci/docker/postgres-compose.yaml run app 56 | velox: 57 | name: Run tests with velox 58 | runs-on: ubuntu-latest 59 | steps: 60 | - name: Checkout 61 | uses: actions/checkout@v3 62 | with: 63 | submodules: recursive 64 | - name: Build 65 | run: docker compose -f ./ci/docker/velox-compose.yaml build 66 | 67 | - name: Run 68 | run: docker compose -f ./ci/docker/velox-compose.yaml run app 69 | site: 70 | name: Build site 71 | runs-on: ubuntu-latest 72 | steps: 73 | - name: Checkout 74 | uses: actions/checkout@v3 75 | with: 76 | submodules: recursive 77 | - uses: actions/setup-python@v4 78 | with: 79 | python-version: "3.11" 80 | cache: "pip" 81 | - run: pip install -r requirements.txt 82 | - name: Build Site 83 | run: python build_site.py 84 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ 161 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "substrait"] 2 | path = substrait 3 | url = https://github.com/substrait-io/substrait.git 4 | -------------------------------------------------------------------------------- /.markdownlint.json: -------------------------------------------------------------------------------- 1 | { 2 | "MD013": true 3 | } -------------------------------------------------------------------------------- /.vscode/launch.json: -------------------------------------------------------------------------------- 1 | { 2 | // Use IntelliSense to learn about possible attributes. 3 | // Hover to view descriptions of existing attributes. 4 | // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 5 | "version": "0.2.0", 6 | "configurations": [ 7 | { 8 | "name": "Build Site", 9 | "type": "python", 10 | "request": "launch", 11 | "env": { 12 | "PYTHONPATH": "${workspaceFolder}" 13 | }, 14 | "module": "bft.html.builder", 15 | "justMyCode": true 16 | }, 17 | { 18 | "name": "Run Tests", 19 | "type": "python", 20 | "request": "launch", 21 | "env": { 22 | "PYTHONPATH": "${workspaceFolder}" 23 | }, 24 | "module": "pytest", 25 | "args": [ 26 | "bft" 27 | ], 28 | "justMyCode": true 29 | } 30 | ] 31 | } -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "python.formatting.provider": "black", 3 | "editor.formatOnSave": true, 4 | "editor.codeActionsOnSave": { 5 | "source.organizeImports": "explicit" 6 | }, 7 | "isort.args": [ 8 | "--profile", 9 | "black" 10 | ], 11 | "yaml.schemas": { 12 | "./tools/schema/casefile.yaml": "cases/**", 13 | "https://json.schemastore.org/github-workflow.json": "file:///home/pace/dev/bft/.github/workflows/deploy.yml" 14 | }, 15 | "python.testing.unittestEnabled": false, 16 | "python.testing.pytestEnabled": true, 17 | "python.testing.pytestArgs": [ 18 | "bft" 19 | ] 20 | } 21 | -------------------------------------------------------------------------------- /NOTICE.txt: -------------------------------------------------------------------------------- 1 | Copyright 2023 Voltron Data, Inc. 2 | 3 | This product includes software developed at 4 | Voltron Data, Inc. (http://www.voltrondata.com/). 5 | -------------------------------------------------------------------------------- /bft/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/substrait-io/bft/34b7a6e19d536e42082169eae0d049ce94f86b19/bft/__init__.py -------------------------------------------------------------------------------- /bft/cases/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/substrait-io/bft/34b7a6e19d536e42082169eae0d049ce94f86b19/bft/cases/__init__.py -------------------------------------------------------------------------------- /bft/cases/loader.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from typing import List 3 | 4 | from .parser import CaseFileParser 5 | from .types import Case 6 | 7 | 8 | def load_cases(cases_dir: str) -> List[Case]: 9 | cases = [] 10 | parser = CaseFileParser() 11 | for case_path in Path(cases_dir).rglob("*.yaml"): 12 | with open(case_path, "rb") as case_f: 13 | for case_file in parser.parse(case_f): 14 | for case in case_file.cases: 15 | cases.append(case) 16 | return cases 17 | -------------------------------------------------------------------------------- /bft/cases/parser.py: -------------------------------------------------------------------------------- 1 | import math 2 | from typing import BinaryIO, Iterable, List 3 | 4 | from bft.core.yaml_parser import BaseYamlParser, BaseYamlVisitor 5 | 6 | from .types import Case, CaseFile, CaseGroup, CaseLiteral, ProtoCase 7 | 8 | 9 | class CaseFileVisitor(BaseYamlVisitor[CaseFile]): 10 | def __init__(self): 11 | super().__init__() 12 | self.__groups = {} 13 | 14 | def __resolve_proto_case(self, case: ProtoCase, base_uri: str, function: str) -> Case: 15 | if case.group not in self.__groups: 16 | raise Exception( 17 | "A case referred to group " + case.group +" which was not defined in the file" 18 | ) 19 | grp = self.__groups[case.group] 20 | return Case(function, base_uri, grp, case.args, case.result, case.options) 21 | 22 | def visit_group(self, group): 23 | id = self._get_or_die(group, "id") 24 | description = self._get_or_die(group, "description") 25 | self.__groups[id] = CaseGroup(id, description) 26 | return id 27 | 28 | def __normalize_yaml_literal(self, value, data_type): 29 | # YAML/JSON can't represent infinity or nan 30 | # so its a special case 31 | if data_type.startswith("fp"): 32 | if isinstance(value, str): 33 | if value.lower().startswith("inf"): 34 | return float("inf") 35 | elif value.lower().startswith("-inf"): 36 | return float("-inf") 37 | elif value.lower().startswith("1e"): 38 | return float(value.lower()) 39 | elif value.lower().startswith("nan"): 40 | return math.nan 41 | else: 42 | raise ValueError(f"Unrecognized float string literal {value}") 43 | return value 44 | 45 | def visit_literal(self, lit): 46 | value = self._get_or_die(lit, "value") 47 | data_type = self._get_or_die(lit, "type") 48 | is_not_a_func_arg = self._get_or_else(lit, "is_not_a_func_arg", False) 49 | value = self.__normalize_yaml_literal(value, data_type) 50 | return CaseLiteral(value, data_type, is_not_a_func_arg) 51 | 52 | def visit_literal_result(self, lit): 53 | value = self._get_or_die(lit, "value") 54 | data_type = self._get_or_die(lit, "type") 55 | value = self.__normalize_yaml_literal(value, data_type) 56 | return CaseLiteral(value, data_type) 57 | 58 | def visit_result(self, res): 59 | special = self._get_or_else(res, "special", None) 60 | if special is None: 61 | return self.visit_literal_result(res) 62 | return special 63 | 64 | def visit_case(self, case): 65 | grp = self._get_or_die(case, "group") 66 | if not isinstance(grp, str): 67 | grp = self.visit_group(grp) 68 | result = self._visit_or_die(self.visit_result, case, "result") 69 | args = self._visit_list(self.visit_literal, case, "args") 70 | opts = self._get_or_else(case, "options", {}) 71 | opt_tuples = [] 72 | for opt_key in sorted(opts.keys()): 73 | opt_tuples.append((opt_key, opts[opt_key])) 74 | return ProtoCase(grp, args, result, opt_tuples) 75 | 76 | def visit(self, case_file): 77 | base_uri = self._get_or_die(case_file, 'base_uri') 78 | func_name = self._get_or_die(case_file, "function") 79 | proto_cases = self._visit_list(self.visit_case, case_file, "cases") 80 | cases = [self.__resolve_proto_case(c, base_uri, func_name) for c in proto_cases] 81 | return CaseFile(func_name, base_uri, cases) 82 | 83 | 84 | class CaseFileParser(BaseYamlParser[CaseFile]): 85 | def get_visitor(self) -> CaseFileVisitor: 86 | return CaseFileVisitor() 87 | -------------------------------------------------------------------------------- /bft/cases/runner.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from typing import Literal, NamedTuple 3 | 4 | from bft.dialects.types import Dialect, SqlMapping 5 | 6 | from .types import Case 7 | 8 | 9 | class CaseResult(NamedTuple): 10 | passed: bool 11 | expected_pass: bool 12 | reason: str 13 | 14 | 15 | class CaseRunner(ABC): 16 | @abstractmethod 17 | def run_case(self, case: Case) -> CaseResult: 18 | pass 19 | 20 | 21 | class SqlCaseResult(NamedTuple): 22 | type: Literal["success", "error", "unsupported", "unexpected_pass", "mismatch"] 23 | err: str 24 | actual: str 25 | 26 | @staticmethod 27 | def success(): 28 | return SqlCaseResult("success", None, None) 29 | 30 | @staticmethod 31 | def error(err: str): 32 | return SqlCaseResult("error", err, None) 33 | 34 | @staticmethod 35 | def unsupported(err: str): 36 | return SqlCaseResult("unsupported", err, None) 37 | 38 | @staticmethod 39 | def unexpected_pass(actual: str): 40 | return SqlCaseResult("unexpected_pass", None, actual) 41 | 42 | @staticmethod 43 | def mismatch(actual: str): 44 | return SqlCaseResult("mismatch", None, actual) 45 | 46 | 47 | class SqlCaseRunner(CaseRunner): 48 | def __init__(self, dialect: Dialect): 49 | self.__dialect = dialect 50 | 51 | def run_case(self, case: Case) -> CaseResult: 52 | mapping = self.__dialect.mapping_for_case(case) 53 | if mapping is None: 54 | return CaseResult( 55 | False, 56 | False, 57 | f"The dialect {self.__dialect.name} does not support the function '{case.function}'", 58 | ) 59 | result = self.run_sql_case(case, mapping) 60 | if result.type == "success": 61 | return CaseResult(result, mapping.should_pass, mapping.reason) 62 | elif result.type == "unsupported": 63 | if mapping.should_pass: 64 | return CaseResult( 65 | False, 66 | True, 67 | f"This case should have been supported. Instead it reported {result.err}", 68 | ) 69 | else: 70 | return CaseResult(False, False, mapping.reason) 71 | elif result.type == "error": 72 | if case.result == "error": 73 | # Case expected to error. Dialect may or may not have expected it 74 | should_pass = mapping.should_pass 75 | if mapping.unsupported: 76 | # Unsupported test case, expected an error and got an error 77 | should_pass = True 78 | return CaseResult(True, should_pass, mapping.reason) 79 | else: 80 | if mapping.should_pass: 81 | # Case should not have error. Dialect should not have error 82 | return CaseResult(False, mapping.should_pass, result.err) 83 | else: 84 | # Case should not have error but it's expected for dialect 85 | return CaseResult(False, mapping.should_pass, mapping.reason) 86 | elif result.type == "unexpected_pass": 87 | # Case expected error. No error happened. 88 | if mapping.should_pass: 89 | # This was not expected given the dialect 90 | return CaseResult( 91 | False, 92 | mapping.should_pass, 93 | f"This case should have given an error. Instead it returned the value {result.actual}", 94 | ) 95 | else: 96 | # In this dialect, this case passes even though it shouldn't 97 | return CaseResult(False, mapping.should_pass, mapping.reason) 98 | elif result.type == "mismatch": 99 | if mapping.should_pass: 100 | return CaseResult( 101 | False, 102 | mapping.should_pass, 103 | f"This case should have yielded the result {case.result.value} but instead it returned {result.actual}", 104 | ) 105 | else: 106 | return CaseResult(False, mapping.should_pass, mapping.reason) 107 | else: 108 | raise Exception("Unexpected case result type") 109 | 110 | @abstractmethod 111 | def run_sql_case(self, case: Case, mapping: SqlMapping) -> SqlCaseResult: 112 | pass 113 | -------------------------------------------------------------------------------- /bft/cases/types.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, List, Literal, NamedTuple, Tuple 2 | 3 | 4 | class CaseLiteral(NamedTuple): 5 | value: str | int | float | list 6 | type: str 7 | is_not_a_func_arg: bool = False # if true it is used only to populate test data 8 | 9 | 10 | class CaseGroup(NamedTuple): 11 | id: str 12 | description: str 13 | 14 | 15 | class Case(NamedTuple): 16 | function: str 17 | base_uri: str 18 | group: CaseGroup 19 | args: List[CaseLiteral] 20 | result: CaseLiteral | Literal["error", "undefined"] 21 | options: List[Tuple[str, str]] 22 | 23 | 24 | def case_to_kernel_str( 25 | function: str, 26 | args: List[CaseLiteral], 27 | result: CaseLiteral | Literal["error", "undefined"], 28 | ): 29 | joined_args = ", ".join([arg.type for arg in args]) 30 | result_str = result 31 | if not isinstance(result_str, str): 32 | result_str = result.type 33 | return f"{function}({joined_args}) -> {result_str}" 34 | 35 | 36 | class CaseFile(NamedTuple): 37 | function: str 38 | base_uri: str 39 | cases: List[Case] 40 | 41 | 42 | class ProtoCase(NamedTuple): 43 | group: str 44 | args: List[CaseLiteral] 45 | result: CaseLiteral | Literal["error", "undefined"] 46 | options: Dict[str, str] 47 | -------------------------------------------------------------------------------- /bft/core/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/substrait-io/bft/34b7a6e19d536e42082169eae0d049ce94f86b19/bft/core/__init__.py -------------------------------------------------------------------------------- /bft/core/function.py: -------------------------------------------------------------------------------- 1 | from typing import List, NamedTuple 2 | 3 | 4 | class Option(NamedTuple): 5 | name: str 6 | values: List[str] 7 | 8 | 9 | class Kernel(NamedTuple): 10 | arg_types: List[str] 11 | return_type: str 12 | available_options: List[str] 13 | variadic: str 14 | 15 | 16 | class FunctionDefinition(object): 17 | def __init__( 18 | self, 19 | name: str, 20 | uri: str, 21 | description: str, 22 | options: List[Option], 23 | kernels: List[Kernel], 24 | ): 25 | self.name = name 26 | self.uri = uri 27 | self.description = description 28 | self.options = options 29 | self.kernels = kernels 30 | 31 | @property 32 | def details(self): 33 | return [] 34 | 35 | @property 36 | def properties(self): 37 | return 38 | 39 | 40 | class FunctionBuilder(object): 41 | def __init__(self, name: str): 42 | self.name = name 43 | self.uri: str = None 44 | self.description: str = None 45 | self.options = {} 46 | self.kernels = [] 47 | 48 | def set_description(self, description: str): 49 | self.description = description 50 | 51 | def set_uri(self, uri: str): 52 | self.uri = uri 53 | 54 | def try_set_description(self, description: str): 55 | if self.description is None: 56 | self.description = description 57 | 58 | def note_option(self, name: str, values: List[str]): 59 | if name in self.options: 60 | existing_values = self.options[name] 61 | # Merge existing values and new values using set union 62 | self.options[name] = list(set(existing_values).union(values)) 63 | else: 64 | # Add the new values directly if the option does not exist 65 | self.options[name] = values 66 | 67 | def note_kernel( 68 | self, 69 | arg_types: List[str], 70 | return_type: str, 71 | available_options: List[str], 72 | variadic: int, 73 | ): 74 | self.kernels.append(Kernel(arg_types, return_type, available_options, variadic)) 75 | 76 | def finish(self) -> FunctionDefinition: 77 | if self.description is None: 78 | self.description = "Description is missing and would go here" 79 | opts = [] 80 | for key, values in self.options.items(): 81 | opts.append(Option(key, values)) 82 | return FunctionDefinition( 83 | self.name, self.uri, self.description, opts, self.kernels 84 | ) 85 | 86 | 87 | class LibraryBuilder(object): 88 | def __init__(self): 89 | self.functions = {} 90 | 91 | def get_function(self, name, category): 92 | full_name = f"{category}_{name}" 93 | if name not in self.functions: 94 | self.functions[full_name] = FunctionBuilder(full_name) 95 | return self.functions[full_name] 96 | 97 | def function_names(self) -> List[str]: 98 | return sorted(self.functions.keys()) 99 | 100 | def finish(self) -> List[FunctionDefinition]: 101 | built_functions = [] 102 | for func_name in sorted(self.functions.keys()): 103 | built_functions.append(self.functions[func_name].finish()) 104 | return built_functions 105 | -------------------------------------------------------------------------------- /bft/core/index_parser.py: -------------------------------------------------------------------------------- 1 | from typing import List, NamedTuple 2 | 3 | from .yaml_parser import BaseYamlParser, BaseYamlVisitor 4 | 5 | 6 | class IndexFunctionsFile(NamedTuple): 7 | location: str 8 | canonical_uri: str 9 | 10 | class IndexFile(NamedTuple): 11 | function_files: List[IndexFunctionsFile] 12 | case_directories: List[str] 13 | dialect_directories: List[str] 14 | supplement_directories: List[str] 15 | 16 | class IndexFileVisitor(BaseYamlVisitor[IndexFile]): 17 | def __init__(self): 18 | super().__init__() 19 | 20 | def visit_function_file(self, function_file): 21 | location = self._get_or_die(function_file, "location") 22 | canonical_uri = self._get_or_die(function_file, "canonical") 23 | return IndexFunctionsFile(location, canonical_uri) 24 | 25 | def visit(self, index_file): 26 | substrait = self._get_or_die(index_file, "substrait") 27 | function_files = self._visit_list(self.visit_function_file, substrait, "extensions") 28 | case_files = self._get_or_else(index_file, "cases", []) 29 | dialect_files = self._get_or_else(index_file, "dialects", []) 30 | supplement_files = self._get_or_else(index_file, "supplements", []) 31 | return IndexFile(function_files, case_files, dialect_files, supplement_files) 32 | 33 | 34 | class IndexFileParser(BaseYamlParser[IndexFile]): 35 | def get_visitor(self) -> IndexFile: 36 | return IndexFileVisitor() 37 | 38 | def load_index(index_path: str) -> IndexFile: 39 | parser = IndexFileParser() 40 | with open(index_path, 'rb') as f: 41 | return parser.parse(f)[0] -------------------------------------------------------------------------------- /bft/core/yaml_parser.py: -------------------------------------------------------------------------------- 1 | import math 2 | from abc import ABC, abstractmethod 3 | from decimal import Decimal 4 | from typing import BinaryIO, Generic, Iterable, List, TypeVar 5 | 6 | import yaml 7 | 8 | from bft.cases.types import CaseLiteral 9 | 10 | try: 11 | from yaml import CSafeDumper as SafeDumper 12 | from yaml import CSafeLoader as SafeLoader 13 | except ImportError: 14 | from yaml import SafeDumper, SafeLoader 15 | 16 | T = TypeVar("T") 17 | 18 | 19 | class BaseYamlVisitor(ABC, Generic[T]): 20 | def __init__(self): 21 | self.__location_stack: List[str] = [] 22 | 23 | def _fail(self, err): 24 | loc = "/".join(self.__location_stack) 25 | raise Exception(f"Error visiting case file. Location={loc} Message={err}") 26 | 27 | def _visit_list(self, visitor, obj, attr, required=False): 28 | if attr in obj: 29 | val = obj[attr] 30 | results = [] 31 | if not isinstance(val, Iterable): 32 | self._fail(f"Expected attribute {attr} to be iterable") 33 | for idx, item in enumerate(val): 34 | self.__location_stack.append(f"{attr}[{idx}]") 35 | results.append(visitor(item)) 36 | self.__location_stack.pop() 37 | for result in results: 38 | if isinstance(result, CaseLiteral) and isinstance(result.value, list): 39 | if len(result.value) > 0: 40 | for i, s in enumerate(result.value): 41 | lower_s = str(s).lower() 42 | if lower_s.startswith("'inf'"): 43 | result.value[i] = float("inf") 44 | elif lower_s.startswith("'-inf'"): 45 | result.value[i] = float("-inf") 46 | elif lower_s.startswith("'nan'"): 47 | result.value[i] = math.nan 48 | results.append(CaseLiteral(result.value, result.type, result.is_not_a_func_arg)) 49 | results.remove(result) 50 | return results 51 | elif required: 52 | self._fail(f"Expected required attribute {attr}") 53 | else: 54 | return [] 55 | 56 | def __visit_or_maybe_die(self, visitor, obj, attr, required, default=None): 57 | if attr in obj: 58 | val = obj[attr] 59 | self.__location_stack.append(f"{attr}") 60 | visited = visitor(val) 61 | self.__location_stack.pop() 62 | return visited 63 | elif required: 64 | self._fail(f"Expected required attribte {attr}") 65 | else: 66 | return default 67 | 68 | def _visit_or_die(self, visitor, obj, attr): 69 | return self.__visit_or_maybe_die(visitor, obj, attr, False) 70 | 71 | def _visit_or_else(self, visitor, obj, attr, default): 72 | return self.__visit_or_maybe_die(visitor, obj, attr, True, default) 73 | 74 | def _get_or_die(self, obj, attr): 75 | if attr in obj: 76 | return obj[attr] 77 | self._fail(f"Expected required attribute {attr}") 78 | 79 | def _get_or_else(self, obj, attr, default): 80 | if attr in obj: 81 | return obj[attr] 82 | return default 83 | 84 | @abstractmethod 85 | def visit(yamlobj) -> T: 86 | pass 87 | 88 | 89 | class BaseYamlParser(ABC, Generic[T]): 90 | @abstractmethod 91 | def get_visitor(self) -> BaseYamlVisitor[T]: 92 | pass 93 | 94 | def get_loader(self): 95 | loader = yaml.SafeLoader 96 | """Add tag "!decimal" to the loader """ 97 | loader.add_constructor("!decimal", self.decimal_constructor) 98 | loader.add_constructor("!decimallist", self.list_of_decimal_constructor) 99 | return loader 100 | 101 | def decimal_constructor(self, loader: yaml.SafeLoader, node: yaml.nodes.MappingNode): 102 | return self.get_decimal_value(loader, node) 103 | 104 | def get_decimal_value(self, loader: yaml.SafeLoader, node: yaml.ScalarNode): 105 | value = loader.construct_scalar(node) 106 | if isinstance(value, str) and value.lower() == 'null': 107 | return None 108 | return Decimal(value) 109 | 110 | def list_of_decimal_constructor(self, loader: yaml.SafeLoader, node: yaml.nodes.MappingNode): 111 | return [self.get_decimal_value(loader, item) for item in node.value] 112 | 113 | def parse(self, f: BinaryIO) -> List[T]: 114 | loader = self.get_loader() 115 | objs = yaml.load_all(f, loader) 116 | visitor = self.get_visitor() 117 | return [visitor.visit(obj) for obj in objs] 118 | -------------------------------------------------------------------------------- /bft/core/yaml_parser_test.py: -------------------------------------------------------------------------------- 1 | from decimal import Decimal 2 | from typing import NamedTuple 3 | 4 | from bft.core.yaml_parser import BaseYamlParser 5 | 6 | 7 | class TestDecimalResult(NamedTuple): 8 | cases: Decimal | list[Decimal] 9 | 10 | class TestCaseVisitor(): 11 | def visit(self, testcase): 12 | return TestDecimalResult(testcase) 13 | class DecimalTestCaseParser(BaseYamlParser[TestDecimalResult]): 14 | def get_visitor(self) -> TestCaseVisitor: 15 | return TestCaseVisitor() 16 | 17 | def test_yaml_parser_decimal_tag(): 18 | parser = DecimalTestCaseParser() 19 | # parser returns list of parsed values 20 | assert parser.parse(b"!decimal 1") == [TestDecimalResult(Decimal('1'))] 21 | assert parser.parse(b"!decimal 1.78766") == [TestDecimalResult(Decimal('1.78766'))] 22 | assert parser.parse(b"!decimal null") == [TestDecimalResult(None)] 23 | assert parser.parse(b"!decimallist [1.2, null, 7.547]") == [TestDecimalResult([Decimal('1.2'), None, Decimal('7.547')])] 24 | -------------------------------------------------------------------------------- /bft/dialects/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/substrait-io/bft/34b7a6e19d536e42082169eae0d049ce94f86b19/bft/dialects/__init__.py -------------------------------------------------------------------------------- /bft/dialects/loader.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from typing import List 3 | 4 | from .parser import DialectFileParser 5 | from .types import DialectFile, DialectsLibrary 6 | 7 | 8 | def load_dialects(dialects_dir: str) -> DialectsLibrary: 9 | parser = DialectFileParser() 10 | dialect_files: List[DialectFile] = [] 11 | for dialect_path in Path(dialects_dir).rglob("*.yaml"): 12 | with open(dialect_path, "rb") as dialect_f: 13 | for dialect_file in parser.parse(dialect_f): 14 | dialect_files.append(dialect_file) 15 | return DialectsLibrary(dialect_files) 16 | -------------------------------------------------------------------------------- /bft/dialects/parser.py: -------------------------------------------------------------------------------- 1 | from bft.core.yaml_parser import BaseYamlParser, BaseYamlVisitor 2 | from bft.dialects.types import DialectFile, DialectFunction, DialectKernel, short_type_to_type 3 | 4 | 5 | class DialectFileVisitor(BaseYamlVisitor[DialectFile]): 6 | @staticmethod 7 | def visit_kernel(kernel): 8 | arg_types = [] 9 | if kernel != '': 10 | arg_types = [DialectFileVisitor.get_long_type(arg_type) for arg_type in kernel.split("_")] 11 | return DialectKernel(arg_types, any) 12 | 13 | @staticmethod 14 | def get_long_type(short_type): 15 | long_type = short_type_to_type.get(short_type, None) 16 | if long_type is None: 17 | return short_type 18 | return long_type 19 | 20 | @staticmethod 21 | def _get_unqualified_func_name(name): 22 | return name.split(".")[-1] 23 | 24 | def visit_function(self, func): 25 | name = self._get_or_die(func, "name") 26 | required_opts = self._get_or_else(func, "required_options", {}) 27 | local_name = self._get_or_else(func, "local_name", self._get_unqualified_func_name(name)) 28 | infix = self._get_or_else(func, "infix", False) 29 | postfix = self._get_or_else(func, "postfix", False) 30 | between = self._get_or_else(func, "between", False) 31 | aggregate = self._get_or_else(func, "aggregate", False) 32 | unsupported = self._get_or_else(func, "unsupported", False) 33 | # The extract function uses a special grammar in some SQL dialects. 34 | # i.e. SELECT EXTRACT(YEAR FROM times) FROM my_table 35 | extract = self._get_or_else(func, "extract", False) 36 | good_kernels = self._visit_list(self.visit_kernel, func, "supported_kernels") 37 | variadic_min = self._get_or_else(func, "variadic", -1) 38 | return DialectFunction( 39 | name, 40 | local_name, 41 | infix, 42 | postfix, 43 | between, 44 | aggregate, 45 | unsupported, 46 | extract, 47 | required_opts, 48 | variadic_min, 49 | good_kernels, 50 | ) 51 | 52 | def visit(self, dfile): 53 | name = self._get_or_die(dfile, "name") 54 | dtype = self._get_or_die(dfile, "type") 55 | scalar_functions = self._visit_list( 56 | self.visit_function, dfile, "scalar_functions" 57 | ) 58 | aggregate_functions = self._visit_list( 59 | self.visit_function, dfile, "aggregate_functions" 60 | ) 61 | uri_to_func_prefix = {uri: func_prefix for func_prefix, uri in dfile.get("dependencies", {}).items()} 62 | supported_types = self._visit_list(self.get_long_type, dfile, "supported_types") 63 | return DialectFile(name, dtype, scalar_functions, aggregate_functions, uri_to_func_prefix, supported_types) 64 | 65 | 66 | class DialectFileParser(BaseYamlParser[DialectFile]): 67 | def get_visitor(self) -> DialectFileVisitor: 68 | return DialectFileVisitor() 69 | -------------------------------------------------------------------------------- /bft/html/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/substrait-io/bft/34b7a6e19d536e42082169eae0d049ce94f86b19/bft/html/__init__.py -------------------------------------------------------------------------------- /bft/html/types.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, List, Literal, NamedTuple 2 | 3 | from bft.core.function import Kernel 4 | 5 | 6 | class FunctionOptionValueInfo(NamedTuple): 7 | # The name of the value 8 | # Sourced from Substrait YAML 9 | name: str 10 | # Description of the option 11 | # Sourced from BFT markdown 12 | description: str 13 | 14 | 15 | # An option that can control function behavior 16 | class FunctionOptionInfo(NamedTuple): 17 | # The name of the option 18 | # Sourced from Substrait YAML 19 | name: str 20 | # Description of the option 21 | # Sourced from Substrait YAML 22 | # Can be overridden by BFT markdown 23 | description: str 24 | # Possible values for the option 25 | # Sourced from Substrait YAML 26 | values: List[FunctionOptionValueInfo] 27 | 28 | 29 | # Information about how the function behaves in different dialects 30 | class FunctionDialectInfo(NamedTuple): 31 | # Name of the dialect (e.g. sqlite) 32 | # Sourced from dialect files 33 | name: str 34 | # Required options for this function in the given dialect 35 | # Sourced from Substrait YAML 36 | options: Dict[str, str] 37 | case_info: List[str] 38 | kernel_info: List[bool] 39 | 40 | 41 | # Additional details or motivation for the function 42 | class FunctionDetailInfo(NamedTuple): 43 | # Title of the detail section 44 | # Sourced from BFT markdown 45 | title: str 46 | # Body of the detail section 47 | # Sourced from BFT markdown 48 | description: str 49 | 50 | 51 | # Invariants that the function respects 52 | # Mostly useful for property-based testing 53 | class FunctionPropertyInfo(NamedTuple): 54 | # The name of the invariant 55 | # Sourced from BFT markdown 56 | id: str 57 | # A description of the invariant 58 | # Sourced from BFT markdown 59 | description: str 60 | 61 | 62 | class FunctionExampleResultInfo(NamedTuple): 63 | # Value of the result 64 | # Sourced from case files 65 | value: str 66 | 67 | 68 | class FunctionExampleCaseInfo(NamedTuple): 69 | # Arguments to the function for this test case 70 | # Sourced from case files 71 | args: List[str] 72 | # Options values for this function 73 | # Sourced from case files 74 | options: List[str] 75 | # Result of the function run on the args 76 | # Sourced from case files 77 | result: Literal["error"] | Literal["undefined"] | FunctionExampleResultInfo 78 | 79 | 80 | class FunctionExampleGroupInfo(NamedTuple): 81 | # Description of the example group 82 | # Sourced from case files 83 | description: str 84 | # Argument types for the examples in the group 85 | # Sourced from case files 86 | arg_types: List[str] 87 | # Names of options used in the examples in this group 88 | # Sourced from case files 89 | option_names: List[str] 90 | # Result type for the examples in the group 91 | # Sourced from case files 92 | result_type: str 93 | # Example executions 94 | cases: List[FunctionExampleCaseInfo] 95 | 96 | 97 | # Information describing a function 98 | class FunctionInfo(NamedTuple): 99 | # Name of the function (e.g. add) 100 | # Sourced from Substrait YAML 101 | name: str 102 | # The Substrait URI for the function (e.g. https://github.com/substrait-io/substrait/blob/main/extensions/functions_arithmetic.yaml) 103 | # Sourced from Substrait YAML 104 | # Can be overridden by BFT markdown 105 | uri: str 106 | # The last part of the URI (e.g. functions_arithmetic.yaml) 107 | # Sourced from Substrait YAML 108 | uri_short: str 109 | # A very brief (ideally one sentence) description of the function 110 | # Sourced from Substrait YAML 111 | brief: str 112 | # Available options for the function 113 | options: List[FunctionOptionInfo] 114 | # Available kernels for the function 115 | kernels: List[Kernel] 116 | # Dialect info for the function 117 | dialects: List[FunctionDialectInfo] 118 | # Function details 119 | details: List[FunctionDetailInfo] 120 | # Properties that hold true for the function 121 | properties: List[FunctionPropertyInfo] 122 | # Example function executions 123 | example_groups: List[FunctionExampleGroupInfo] 124 | 125 | 126 | class FunctionIndexItem(NamedTuple): 127 | # Name of the function 128 | name: str 129 | # Summary of the function, sourced from Substrait YAML 130 | brief: str 131 | # Function category, i.e. Arithmetic, String, etc. 132 | category: str 133 | 134 | 135 | class FunctionIndexInfo(NamedTuple): 136 | functions: List[FunctionIndexItem] 137 | -------------------------------------------------------------------------------- /bft/substrait/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/substrait-io/bft/34b7a6e19d536e42082169eae0d049ce94f86b19/bft/substrait/__init__.py -------------------------------------------------------------------------------- /bft/substrait/extension_file_parser.py: -------------------------------------------------------------------------------- 1 | import pathlib 2 | from collections import namedtuple 3 | from collections.abc import Iterable 4 | from typing import Dict, List, NamedTuple 5 | 6 | import yaml 7 | 8 | try: 9 | from yaml import CSafeDumper as SafeDumper 10 | from yaml import CSafeLoader as SafeLoader 11 | except ImportError: 12 | from yaml import SafeLoader, SafeDumper 13 | 14 | from typing import BinaryIO 15 | 16 | from ..core.function import FunctionBuilder, LibraryBuilder 17 | 18 | 19 | class ValueArg(NamedTuple): 20 | name: str 21 | description: str 22 | type: str 23 | 24 | 25 | class EnumArg(NamedTuple): 26 | name: str 27 | description: str 28 | options: List[str] 29 | 30 | 31 | class Implementation(NamedTuple): 32 | args: List[ValueArg | EnumArg] 33 | options: Dict[str, List[str]] 34 | return_type: str 35 | variadic: int 36 | 37 | 38 | class Function(NamedTuple): 39 | name: str 40 | description: str 41 | implementations: List[Implementation] 42 | 43 | 44 | class ExtensionsFile(NamedTuple): 45 | functions: List[Function] 46 | 47 | 48 | class ExtensionFileVisitor(object): 49 | def __init__(self): 50 | self.location_stack = [] 51 | 52 | def __fail(self, err): 53 | loc = "/".join(self.location_stack) 54 | raise Exception(f"Error visiting extension file. Location={loc} Message={err}") 55 | 56 | def __visit_list(self, visitor, obj, attr, required=False): 57 | if attr in obj: 58 | val = obj[attr] 59 | results = [] 60 | if not isinstance(val, Iterable): 61 | self.__fail(f"Expected attribute {attr} to be iterable") 62 | for idx, item in enumerate(val): 63 | self.location_stack.append(f"{attr}[{idx}]") 64 | results.append(visitor(item)) 65 | self.location_stack.pop() 66 | return results 67 | elif required: 68 | self.__fail(f"Expected required attribute {attr}") 69 | else: 70 | return [] 71 | 72 | def __get_or_die(self, obj, attr): 73 | if attr in obj: 74 | return obj[attr] 75 | self.__fail(f"Expected required attribute {attr}") 76 | 77 | def __get_or_else(self, obj, attr, default): 78 | if attr in obj: 79 | return obj[attr] 80 | return default 81 | 82 | def visit_ext_file(self, parsed_file): 83 | scalar_functions = self.__visit_list( 84 | self.visit_function, parsed_file, "scalar_functions" 85 | ) 86 | aggregate_functions = self.__visit_list( 87 | self.visit_function, parsed_file, "aggregate_functions" 88 | ) 89 | return ExtensionsFile(scalar_functions + aggregate_functions) 90 | 91 | def visit_impl_arg(self, arg): 92 | name = self.__get_or_else(arg, "name", None) 93 | description = self.__get_or_else(arg, "description", None) 94 | value = self.__get_or_else(arg, "value", None) 95 | if value: 96 | return ValueArg(name, description, value) 97 | else: 98 | options = self.__get_or_else(arg, "options", None) 99 | if options is None: 100 | self.__fail( 101 | "Argument encountered that did not have any value or options" 102 | ) 103 | return EnumArg(name, description, options) 104 | 105 | def visit_implementation(self, impl): 106 | args = self.__visit_list(self.visit_impl_arg, impl, "args") 107 | options = self.__get_or_else(impl, "options", {}) 108 | opts = {} 109 | variadic = "0" 110 | if "variadic" in impl: 111 | variadic = str(impl["variadic"]["min"]) 112 | for key in options.keys(): 113 | values = self.__get_or_die(options[key], "values") 114 | opts[key] = values 115 | return_type = self.__get_or_die(impl, "return") 116 | return Implementation(args, opts, return_type, variadic) 117 | 118 | def visit_function(self, func): 119 | name = self.__get_or_die(func, "name") 120 | description = self.__get_or_else(func, "description", None) 121 | implementations = self.__visit_list(self.visit_implementation, func, "impls") 122 | return Function(name, description, implementations) 123 | 124 | 125 | class ExtensionFileParser(object): 126 | def parse(self, f: BinaryIO) -> None: 127 | data = yaml.load(f, SafeLoader) 128 | return ExtensionFileVisitor().visit_ext_file(data) 129 | 130 | 131 | def add_extensions_file_to_library( 132 | location: str, ext_file: ExtensionsFile, library: LibraryBuilder 133 | ): 134 | function_category = pathlib.Path(location.name).stem.replace("functions_", "") 135 | for func in ext_file.functions: 136 | builder: FunctionBuilder = library.get_function(func.name, function_category) 137 | builder.set_uri(pathlib.Path(location).name) 138 | if func.description is not None: 139 | builder.try_set_description(func.description) 140 | for impl in func.implementations: 141 | for opt_name, opt_values in impl.options.items(): 142 | builder.note_option(opt_name, opt_values) 143 | arg_types = [] 144 | for arg in impl.args: 145 | if isinstance(arg, ValueArg): 146 | arg_types.append(arg.type) 147 | else: 148 | arg_types.append("|".join(arg.options)) 149 | builder.note_kernel( 150 | arg_types, impl.return_type, impl.options.keys(), impl.variadic 151 | ) 152 | -------------------------------------------------------------------------------- /bft/supplements/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/substrait-io/bft/34b7a6e19d536e42082169eae0d049ce94f86b19/bft/supplements/__init__.py -------------------------------------------------------------------------------- /bft/supplements/parser.py: -------------------------------------------------------------------------------- 1 | import pathlib 2 | from typing import Dict, TextIO 3 | 4 | from mistletoe.ast_renderer import get_ast 5 | from mistletoe.block_token import Document, Heading, Paragraph 6 | from mistletoe.html_renderer import HTMLRenderer 7 | from mistletoe.span_token import RawText 8 | 9 | from .types import BasicSupplement, OptionSupplement, SupplementsFile 10 | 11 | 12 | class SupplementsParser(object): 13 | def __init__(self): 14 | self.html_renderer = HTMLRenderer() 15 | self.__reset() 16 | 17 | def __reset(self): 18 | self.__finish = None 19 | self.__paragraphs = [] 20 | self.__sub_section_title = None 21 | self.__option_name = None 22 | self.__option_description = None 23 | self.__parsing_options = False 24 | self.__current_option_value = None 25 | self.__supplements = [] 26 | self.options = {} 27 | self.details = [] 28 | self.properties = [] 29 | 30 | def __get_simple_text(self, heading: Heading) -> str: 31 | if len(heading.children) != 1: 32 | raise Exception( 33 | f"Expected heading to have one line of simple text but there were {len(heading.children)} sub-elements" 34 | ) 35 | text_child = heading.children[0] 36 | if not isinstance(text_child, RawText): 37 | raise Exception( 38 | f"Expected heading to contain simple raw text butit was {type(text_child)}" 39 | ) 40 | return text_child.content 41 | 42 | def __add_options(self): 43 | pass 44 | 45 | def __add_properties(self): 46 | self.properties = self.__supplements 47 | 48 | def __add_details(self): 49 | self.details = self.__supplements 50 | 51 | def __finish_last_task(self): 52 | if self.__finish is not None: 53 | self.__finish() 54 | self.__finish = None 55 | self.__parsing_options = False 56 | self.__supplements = [] 57 | 58 | def __finish_option(self): 59 | if self.__option_name is None: 60 | return 61 | self.options[self.__option_name.lower()] = OptionSupplement( 62 | self.__option_description, self.__supplements 63 | ) 64 | self.__option_name = None 65 | self.__supplements = [] 66 | 67 | def __finish_section(self): 68 | if self.__sub_section_title is None: 69 | if len(self.__paragraphs) != 0: 70 | raise Exception( 71 | f"Encountered paragraphs but no L3/L4 section title starting at {self.__paragraphs[0]}" 72 | ) 73 | return 74 | if len(self.__paragraphs) == 0: 75 | raise Exception(f"Sub-section {self.__sub_section_title} had no paragraphs") 76 | self.__supplements.append( 77 | BasicSupplement(self.__sub_section_title, "\n".join(self.__paragraphs)) 78 | ) 79 | self.__sub_section_title = None 80 | self.__paragraphs = [] 81 | 82 | def __finish_last_l3(self): 83 | if self.__parsing_options: 84 | self.__finish_option() 85 | else: 86 | self.__finish_section() 87 | 88 | def __finish_last_l4(self): 89 | if self.__option_name is None: 90 | return 91 | content = "\n".join(self.__paragraphs) 92 | if self.__current_option_value is None: 93 | self.__option_description = content 94 | else: 95 | self.__supplements.append( 96 | BasicSupplement(self.__current_option_value.upper(), content) 97 | ) 98 | self.__current_option_value = None 99 | self.__paragraphs = [] 100 | 101 | def __parse_heading(self, heading: Heading): 102 | heading_title = self.__get_simple_text(heading) 103 | if heading.level == 2: 104 | self.__finish_last_l4() 105 | self.__finish_last_l3() 106 | self.__finish_last_task() 107 | if heading_title.lower() == "options": 108 | self.__parsing_options = True 109 | self.__finish = self.__add_options 110 | elif heading_title.lower() == "details": 111 | self.__finish = self.__add_details 112 | elif heading_title.lower() == "properties": 113 | self.__finish = self.__add_properties 114 | else: 115 | raise Exception(f"Unexpected L2 heading '{heading_title}'") 116 | elif heading.level == 3: 117 | if self.__finish is None: 118 | raise Exception( 119 | f"L3 heading {heading_title} with no L2 heading preceding it" 120 | ) 121 | self.__finish_last_l4() 122 | self.__finish_last_l3() 123 | if self.__parsing_options: 124 | self.__option_name = heading_title 125 | else: 126 | self.__sub_section_title = heading_title 127 | elif heading.level == 4: 128 | if not self.__parsing_options: 129 | raise Exception( 130 | f"L4 heading {heading_title} encountered but we are not currently parsing options" 131 | ) 132 | self.__finish_last_l4() 133 | self.__current_option_value = heading_title 134 | 135 | def __parse_paragraph(self, paragraph: Paragraph): 136 | self.__paragraphs.append(self.html_renderer.render_paragraph(paragraph)) 137 | 138 | def __parse_child(self, child): 139 | if isinstance(child, Heading): 140 | self.__parse_heading(child) 141 | elif isinstance(child, Paragraph): 142 | self.__parse_paragraph(child) 143 | else: 144 | raise Exception( 145 | f"Unrecognized top-level element type in supplements file {type(child)}" 146 | ) 147 | 148 | def parse_supplements_doc(self, f: TextIO, directory_path: str) -> SupplementsFile: 149 | self.__reset() 150 | doc = Document(f) 151 | 152 | if len(doc.children) == 0: 153 | raise Exception( 154 | "Supplements document appears to be empty. It should at least have a title" 155 | ) 156 | 157 | title_section = doc.children[0] 158 | if not isinstance(title_section, Heading) or title_section.level != 1: 159 | raise Exception( 160 | "First element in a supplements doc should be a level 1 heading with the name of the function" 161 | ) 162 | 163 | function_name = self.__get_simple_text(title_section).lower() 164 | for child in doc.children[1:]: 165 | self.__parse_child(child) 166 | 167 | self.__finish_last_l4() 168 | self.__finish_last_l3() 169 | self.__finish_last_task() 170 | 171 | return SupplementsFile( 172 | function_name, directory_path, self.options, self.details, self.properties 173 | ) 174 | 175 | 176 | def load_supplements(supplements_dir: str) -> Dict[str, SupplementsFile]: 177 | supplements = {} 178 | parser = SupplementsParser() 179 | for sup_path in pathlib.Path(supplements_dir).rglob("*.md"): 180 | with open(sup_path, "r") as sup_f: 181 | sup = parser.parse_supplements_doc(sup_f, str(pathlib.Path(sup_path).parent)) 182 | supplements[sup.function.lower()] = sup 183 | return supplements 184 | -------------------------------------------------------------------------------- /bft/supplements/types.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, List, NamedTuple 2 | 3 | 4 | class BasicSupplement(NamedTuple): 5 | title: str 6 | description: str 7 | 8 | 9 | class OptionSupplement(NamedTuple): 10 | description: str 11 | values: List[BasicSupplement] 12 | 13 | 14 | class SupplementsFile(NamedTuple): 15 | function: str 16 | dir_path: str 17 | options: Dict[str, OptionSupplement] 18 | details: List[BasicSupplement] 19 | properties: List[BasicSupplement] 20 | 21 | 22 | def empty_supplements_file(function_name: str): 23 | return SupplementsFile(function_name, "", {}, [], []) 24 | -------------------------------------------------------------------------------- /bft/templates/function_desc.j2: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | {{ name }} Function - BFT 7 | 8 | 9 | {% if 'aggregate' in name %} 10 | 11 | {% else %} 12 | 13 | {% endif %} 14 | 15 | 16 | 17 | 18 | 19 |
20 | 25 | 32 |
33 | 34 |
35 |

{{ name|title}}

36 |
37 |

38 | Defined in {{ uri_short }} 39 |

40 |
41 |

42 | {{ brief }} 43 |

44 |
45 |
46 |
47 |

Options 

48 | {% for option in options %} 49 |

{{option.name|title}}

50 | {{option.description}} 51 |
52 | {% for value in option.values %} 53 |
{{value.name|upper}}
54 |
{{value.description}} 55 |
56 | {% endfor %} 57 |
58 | {% endfor %} 59 |
60 |
61 |
62 |

Kernels 

63 | 68 |
69 |
70 |
71 |

Dialects 

72 | 77 | {% for dialect in dialects %} 78 |
79 | {% if dialect.options is none %} 80 |
Dialect isn't yet supported
81 | {% else %} 82 | {% for opt, val in dialect.options.items() %} 83 |
{{ opt }}
84 |
{{ val }}
85 | {% endfor %} 86 | {% endif %} 87 |
88 | {% endfor %} 89 |
90 |
91 |
92 |

Details 

93 | {% if details %} 94 | {% for detail in details %} 95 |

{{ detail.title }}

96 | {{ detail.description }} 97 | {% endfor %} 98 | {% else %} 99 |

No supplemental information about the details available

100 | {% endif %} 101 |
102 |
103 |
104 |

Properties 

105 | {% if properties %} 106 |
107 | {% for property in properties | sort(attribute='id') %} 108 |
{{ property.id }}
109 |
{{ property.description }}
110 | {% endfor %} 111 |
112 | {% else %} 113 |

No supplemental information about the properties available

114 | {% endif %} 115 |
116 |
117 |
118 |

Examples 

119 | {% for example_group in example_groups %} 120 | 121 | 122 | 123 | 124 | {% for opt in example_group.option_names %} 125 | 126 | {% endfor %} 127 | {% for arg_type in example_group.arg_types %} 128 | 129 | {% endfor %} 130 | 131 | 132 | 133 | 134 | {% for case in example_group.cases %} 135 | 136 | {% for opt in case.options %} 137 | 138 | {% endfor %} 139 | {% for arg in case.args %} 140 | 141 | {% endfor %} 142 | 143 | 144 | 145 | 147 | {% endfor -%} 148 | 149 |
{{ example_group.description }}
{{ opt }}arg{{ loop.index }} ({{ arg_type }})result ({{ example_group.result_type }})
{{ opt }}{{ arg }}{{ case.result.value | default(case.result | upper) }}
150 | {% endfor %} 151 |
152 |
153 | 154 | 179 | 180 | 181 | -------------------------------------------------------------------------------- /bft/templates/function_index.j2: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | BFT - Home 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 35 | 36 |
37 |
38 | 47 |
48 | 49 |
50 |
51 |

The B(ig) F(unction) T(axonomy)

52 |
53 |
54 |

55 | The BFT aims to be a comprehensive catalogue of functions. Functions are the backbone of 56 | any compute system, but they are chronically under documented and often full of corner 57 | cases whose behavior differs in various systems. By documenting exhaustively documenting 58 | these corner cases we hope to make it possible for systems to fully describe their behaviors. 59 | This will make it easier to know what problems will be encountered switching between systems and, 60 | in some cases, make it possible to obtain the correct behavior through expression transformation 61 | or a precise application of function options. 62 |

63 |
64 |
65 | {% for category, category_functions in functions_by_category %} 66 |
67 |
68 |
69 |
{{ category|title }} Functions
70 |
71 |
72 |
73 | {% endfor %} 74 |
75 |
76 |
77 |
78 | 79 | 82 | 83 | {% for category, category_functions in functions_by_category %} 84 | 106 | {% endfor %} 107 |
108 |
109 |
110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 130 | 131 | 132 | 133 | -------------------------------------------------------------------------------- /bft/testers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/substrait-io/bft/34b7a6e19d536e42082169eae0d049ce94f86b19/bft/testers/__init__.py -------------------------------------------------------------------------------- /bft/testers/base_tester.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from pathlib import Path 3 | from typing import List, NamedTuple 4 | 5 | from bft.cases.runner import CaseRunner 6 | from bft.cases.types import Case 7 | from bft.dialects.types import Dialect, DialectsLibrary 8 | 9 | 10 | class TestResult(NamedTuple): 11 | function: str 12 | group: str 13 | index: int 14 | passed: bool 15 | should_have_passed: bool 16 | reason: str 17 | 18 | 19 | class BaseTester(ABC): 20 | @abstractmethod 21 | def get_runner(self, dialect: Dialect) -> CaseRunner: 22 | pass 23 | 24 | @abstractmethod 25 | def get_dialect(self, library: DialectsLibrary) -> Dialect: 26 | pass 27 | 28 | def prepare(self, dialects: DialectsLibrary): 29 | self.dialect = self.get_dialect(dialects) 30 | self.runner = self.get_runner(self.dialect) 31 | self.group_indices = {} 32 | 33 | def run_test(self, case: Case) -> TestResult: 34 | result = self.runner.run_case(case) 35 | group_index = self.group_indices.get(case.group.id, 0) 36 | self.group_indices[case.group.id] = group_index + 1 37 | return TestResult( 38 | case.function, 39 | case.group.id, 40 | group_index, 41 | result.passed, 42 | result.expected_pass, 43 | result.reason, 44 | ) 45 | -------------------------------------------------------------------------------- /bft/testers/cudf/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/substrait-io/bft/34b7a6e19d536e42082169eae0d049ce94f86b19/bft/testers/cudf/__init__.py -------------------------------------------------------------------------------- /bft/testers/cudf/runner.py: -------------------------------------------------------------------------------- 1 | import math 2 | import operator 3 | 4 | import cudf 5 | import numpy 6 | 7 | from bft.cases.runner import SqlCaseResult, SqlCaseRunner 8 | from bft.cases.types import Case 9 | from bft.dialects.types import SqlMapping 10 | from bft.utils.utils import type_to_dialect_type 11 | 12 | type_map = { 13 | "i8": cudf.dtype("int8"), 14 | "i16": cudf.dtype("int16"), 15 | "i32": cudf.dtype("int32"), 16 | "i64": cudf.dtype("int64"), 17 | "fp32": cudf.dtype("float32"), 18 | "fp64": cudf.dtype("float64"), 19 | "boolean": cudf.dtype("bool"), 20 | "string": cudf.dtype("string"), 21 | "timestamp": cudf.dtype("datetime64[s]"), 22 | "date": cudf.dtype("datetime64[s]"), 23 | } 24 | 25 | 26 | def type_to_cudf_dtype(type: str): 27 | return type_to_dialect_type(type, type_map) 28 | 29 | 30 | def is_string_function(data_types): 31 | return cudf.dtype("string") in data_types 32 | 33 | 34 | def is_datetime_function(data_types): 35 | return cudf.dtype("datetime64[s]") in data_types 36 | 37 | 38 | def is_numpy_type(data_type): 39 | return type(data_type).__module__ == numpy.__name__ 40 | 41 | 42 | def get_str_fn_result( 43 | fn_name: str, arg_vectors: list[cudf.Series], arg_values: list[str], is_regexp: bool 44 | ): 45 | if len(arg_vectors) == 1: 46 | fn = getattr(arg_vectors[0].str, fn_name) 47 | return fn() 48 | elif len(arg_vectors) == 2: 49 | fn = getattr(arg_vectors[0].str, fn_name) 50 | if is_regexp: 51 | return fn(arg_values[1], regex=True) 52 | else: 53 | return fn(arg_values[1]) 54 | else: 55 | fn = getattr(arg_vectors[0].str, fn_name) 56 | opt_arg = True if arg_values[2] is not None else False 57 | if opt_arg and is_regexp: 58 | return fn(arg_values[1], arg_values[2], regex=True) 59 | elif opt_arg: 60 | return fn(arg_values[1], arg_values[2]) 61 | else: 62 | return fn(arg_values[1]) 63 | 64 | 65 | def get_dt_fn_result( 66 | mapping: str, dtype, arg_vectors: list[cudf.Series], arg_values: list[str] 67 | ): 68 | fn_name = mapping.local_name 69 | if len(arg_vectors) == 2: 70 | if mapping.infix: 71 | gdf = cudf.DataFrame( 72 | {"a": arg_values[0], "b": arg_values[1]}, 73 | dtype=dtype, 74 | ) 75 | result = gdf.eval(f"(a){fn_name}(b)") 76 | elif mapping.extract: 77 | extract_property = arg_values[0].lower() 78 | result = getattr(arg_vectors[1].dt, extract_property) 79 | return result 80 | 81 | 82 | class CudfRunner(SqlCaseRunner): 83 | def __init__(self, dialect): 84 | super().__init__(dialect) 85 | 86 | def run_sql_case(self, case: Case, mapping: SqlMapping) -> SqlCaseResult: 87 | arg_vectors = [] 88 | arg_values = [] 89 | data_types = [] 90 | fn_name = mapping.local_name 91 | is_regexp = True if "regexp" in case.function else False 92 | for arg in case.args: 93 | dtype = type_to_cudf_dtype(arg.type) 94 | if dtype is None: 95 | return SqlCaseResult.unsupported( 96 | f"The type {arg.type} is not supported" 97 | ) 98 | arg_vectors.append(cudf.Series(arg.value, dtype=dtype)) 99 | arg_values.append(arg.value) 100 | data_types.append(dtype) 101 | 102 | try: 103 | if is_datetime_function(data_types): 104 | result = get_dt_fn_result(mapping, dtype, arg_vectors, arg_values) 105 | elif is_string_function(data_types): 106 | result = get_str_fn_result(fn_name, arg_vectors, arg_values, is_regexp) 107 | elif len(arg_vectors) == 1: 108 | # Some functions that only take a single arg are able to be executed against 109 | # both a Series and a Dataframe whereas others are only able to be executed against a Dataframe. 110 | if mapping.aggregate: 111 | arg_values = arg_values[0] 112 | try: 113 | gdf = cudf.DataFrame({"a": arg_values}, dtype=dtype) 114 | result = gdf.eval(f"{fn_name}(a)") 115 | except ValueError: 116 | fn = getattr(arg_vectors[0], fn_name) 117 | result = fn() 118 | elif len(arg_vectors) == 2: 119 | if mapping.infix: 120 | # If there are only Null/Nan/None values in the column, they are set to False instead of . 121 | # We add extra data to ensure the value exists in the dataframe. 122 | gdf = cudf.DataFrame( 123 | {"a": [arg_values[0], True], "b": [arg_values[1], True]}, 124 | dtype=dtype, 125 | ) 126 | result = gdf.eval(f"(a){fn_name}(b)") 127 | else: 128 | try: 129 | fn = getattr(arg_vectors[0], fn_name) 130 | result = fn(arg_vectors[1]) 131 | except AttributeError: 132 | fn = getattr(operator, fn_name) 133 | result = fn(arg_vectors[0], arg_vectors[1]) 134 | except ValueError: # Case for round function 135 | fn = getattr(arg_vectors[0], fn_name) 136 | result = fn(arg_values[1]) 137 | else: 138 | fn = getattr(arg_vectors[0], fn_name) 139 | try: 140 | result = fn(arg_vectors[1:]) 141 | except TypeError: 142 | result = fn(arg_values[1], arg_values[2]) 143 | except RuntimeError as err: 144 | return SqlCaseResult.error(str(err)) 145 | 146 | if mapping.aggregate: 147 | if is_numpy_type(result): 148 | result = result.item() 149 | else: 150 | if result.empty and ( 151 | case.result.value is None or case.result.value is False 152 | ): 153 | return SqlCaseResult.success() 154 | elif len(result) != 1 and not mapping.infix: 155 | raise Exception("Scalar function with one row output more than one row") 156 | else: 157 | result = result[0] 158 | 159 | if case.result == "undefined": 160 | return SqlCaseResult.success() 161 | elif case.result == "error": 162 | return SqlCaseResult.unexpected_pass(str(result)) 163 | elif case.result == "nan": 164 | if math.isnan(result): 165 | return SqlCaseResult.success() 166 | else: 167 | if case.result.value is None: 168 | if str(result) == "" or math.isnan(result) or result is None: 169 | return SqlCaseResult.success() 170 | else: 171 | return SqlCaseResult.mismatch(str(result)) 172 | elif case.result.value == result: 173 | return SqlCaseResult.success() 174 | elif case.result.value == str(result): 175 | return SqlCaseResult.success() 176 | elif numpy.float32(case.result.value) == result: 177 | return SqlCaseResult.success() 178 | else: 179 | return SqlCaseResult.mismatch(str(result)) 180 | -------------------------------------------------------------------------------- /bft/testers/cudf/tester.py: -------------------------------------------------------------------------------- 1 | from bft.dialects.types import Dialect, DialectsLibrary 2 | from bft.testers.base_tester import BaseTester 3 | 4 | from .runner import CudfRunner 5 | 6 | 7 | class CudfTester(BaseTester): 8 | def get_runner(self, dialect: Dialect): 9 | return CudfRunner(dialect) 10 | 11 | def get_dialect(self, library: DialectsLibrary): 12 | return library.get_dialect_by_name("cudf") 13 | -------------------------------------------------------------------------------- /bft/testers/datafusion/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/substrait-io/bft/34b7a6e19d536e42082169eae0d049ce94f86b19/bft/testers/datafusion/__init__.py -------------------------------------------------------------------------------- /bft/testers/datafusion/runner.py: -------------------------------------------------------------------------------- 1 | import math 2 | from datetime import datetime 3 | 4 | import datafusion 5 | import numpy 6 | import pyarrow as pa 7 | 8 | from bft.cases.runner import SqlCaseResult, SqlCaseRunner 9 | from bft.cases.types import Case, CaseLiteral 10 | from bft.dialects.types import SqlMapping 11 | from bft.utils.utils import type_to_dialect_type 12 | 13 | type_map = { 14 | "i8": pa.int8(), 15 | "i16": pa.int16(), 16 | "i32": pa.int32(), 17 | "i64": pa.int64(), 18 | "fp32": pa.float32(), 19 | "fp64": pa.float64(), 20 | "boolean": pa.bool_(), 21 | "string": pa.string(), 22 | "date": pa.timestamp("s"), 23 | "time": pa.timestamp("s"), 24 | "timestamp": pa.timestamp("s"), 25 | "timestamp_tz": pa.timestamp("s"), 26 | } 27 | 28 | 29 | def type_to_datafusion_type(type: str): 30 | return type_to_dialect_type(type, type_map) 31 | 32 | 33 | def handle_special_cases(lit: CaseLiteral): 34 | if lit == "nan": 35 | return math.nan 36 | elif lit == "inf": 37 | return float("inf") 38 | elif lit == "-inf": 39 | return float("-inf") 40 | return lit 41 | 42 | 43 | def is_string_type(arg): 44 | return ( 45 | arg.type in ["string", "timestamp", "timestamp_tz", "date", "time"] 46 | or arg.value in ["Null"] 47 | ) and arg.value is not None 48 | 49 | 50 | def arg_with_type(arg): 51 | if is_string_type(arg): 52 | arg_val = str(arg.value) 53 | elif isinstance(arg.value, list) or arg.value is None: 54 | arg_val = None 55 | elif arg.type.startswith("i"): 56 | arg_val = int(arg.value) 57 | elif arg.type.startswith("fp"): 58 | arg_val = float(arg.value) 59 | else: 60 | arg_val = arg.value 61 | return arg_val 62 | 63 | 64 | def str_to_datetime(str_val, type): 65 | if type == "time": 66 | return datetime.strptime(str_val, "%H:%M:%S.%f") 67 | if len(str_val) > 19: 68 | return datetime.strptime(str_val, "%Y-%m-%d %H:%M:%S %Z") 69 | elif len(str_val) < 16: 70 | return datetime.strptime(str_val, "%Y-%m-%d") 71 | else: 72 | return datetime.strptime(str_val, "%Y-%m-%d %H:%M:%S") 73 | 74 | 75 | class DatafusionRunner(SqlCaseRunner): 76 | def __init__(self, dialect): 77 | super().__init__(dialect) 78 | self.ctx = datafusion.SessionContext() 79 | 80 | def run_sql_case(self, case: Case, mapping: SqlMapping) -> SqlCaseResult: 81 | 82 | try: 83 | arg_vectors = [] 84 | arg_names = [] 85 | arg_vals_list = [] 86 | orig_types = [] 87 | arg_types_list = [] 88 | 89 | if mapping.aggregate: 90 | arg_vectors = [] 91 | for arg_idx, arg in enumerate(case.args): 92 | arg_vals = [] 93 | arg_type = type_to_datafusion_type(arg.type) 94 | if arg_type is None: 95 | return SqlCaseResult.unsupported(f"Unsupported type {arg.type}") 96 | for val in arg.value: 97 | arg_vals.append(handle_special_cases(val)) 98 | arg_names.append(f"arg{arg_idx}") 99 | arg_vectors.append(pa.array(arg_vals, arg_type)) 100 | else: 101 | for arg_idx, arg in enumerate(case.args): 102 | arg_val = arg_with_type(arg) 103 | arg_type = type_to_datafusion_type(arg.type) 104 | if arg_type is None: 105 | return SqlCaseResult.unsupported(f"Unsupported type {arg.type}") 106 | orig_types.append(arg.type) 107 | arg_vals_list.append(arg_val) 108 | arg_types_list.append(arg_type) 109 | arg_names.append(f"arg{arg_idx}") 110 | 111 | for val, arg_type, orig_type in zip( 112 | arg_vals_list, arg_types_list, orig_types 113 | ): 114 | if isinstance(arg_type, pa.lib.TimestampType): 115 | val = str_to_datetime(val, orig_type) 116 | arg_vectors.append(pa.array([val], arg_type)) 117 | 118 | joined_arg_names = ",".join(arg_names) 119 | batch = pa.RecordBatch.from_arrays( 120 | arg_vectors, 121 | names=arg_names, 122 | ) 123 | self.ctx.register_record_batches("my_table", [[batch]]) 124 | if mapping.infix: 125 | if len(case.args) != 2: 126 | raise Exception(f"Infix function with {len(case.args)} args") 127 | expr_str = f"SELECT {arg_names[0]} {mapping.local_name} {arg_names[1]} FROM my_table;" 128 | elif mapping.postfix: 129 | if len(arg_names) != 1: 130 | raise Exception(f"Postfix function with {len(arg_names)} args") 131 | expr_str = f"SELECT {arg_names[0]} {mapping.local_name} FROM my_table;" 132 | elif mapping.extract: 133 | if len(arg_names) != 2: 134 | raise Exception(f"Extract function with {len(arg_names)} args") 135 | expr_str = f"SELECT {mapping.local_name}({arg_vals_list[0]} FROM {arg_names[1]}) FROM my_table;" 136 | elif mapping.local_name == 'count(*)': 137 | expr_str = f"SELECT {mapping.local_name} FROM my_table;" 138 | elif mapping.aggregate: 139 | if len(arg_names) < 1: 140 | raise Exception(f"Aggregate function with {len(arg_names)} args") 141 | expr_str = f"SELECT {mapping.local_name}({arg_names[0]}) FROM my_table;" 142 | else: 143 | expr_str = ( 144 | f"SELECT {mapping.local_name}({joined_arg_names}) FROM my_table;" 145 | ) 146 | 147 | result = self.ctx.sql(expr_str).collect()[0].columns[0].to_pylist() 148 | 149 | if len(result) != 1: 150 | raise Exception("Scalar function with one row output more than one row") 151 | result = result[0] 152 | 153 | if case.result == "undefined": 154 | return SqlCaseResult.success() 155 | elif case.result == "error": 156 | return SqlCaseResult.unexpected_pass(str(result)) 157 | elif case.result == "nan": 158 | if math.isnan(result): 159 | return SqlCaseResult.success() 160 | # Issues with python float comparison: 161 | # https://tutorpython.com/python-mathisclose/#The_problem_with_using_for_float_comparison 162 | # https://stackoverflow.com/questions/5595425/what-is-the-best-way-to-compare-floats-for-almost-equality-in-python 163 | # Datafusion bug with float when converting from a dataframe to a pylist: 164 | # https://github.com/apache/arrow-datafusion/issues/9950 165 | elif case.result.type.startswith('fp') and case.result.value: 166 | if math.isclose(result, case.result.value, rel_tol=1e-6): 167 | return SqlCaseResult.success() 168 | else: 169 | if result == case.result.value: 170 | return SqlCaseResult.success() 171 | else: 172 | return SqlCaseResult.mismatch(str(result)) 173 | except Exception as err: 174 | return SqlCaseResult.error(str(err)) 175 | finally: 176 | self.ctx.deregister_table("my_table") 177 | -------------------------------------------------------------------------------- /bft/testers/datafusion/tester.py: -------------------------------------------------------------------------------- 1 | from bft.dialects.types import Dialect, DialectsLibrary 2 | from bft.testers.base_tester import BaseTester 3 | 4 | from .runner import DatafusionRunner 5 | 6 | 7 | class DatafustionTester(BaseTester): 8 | def get_runner(self, dialect: Dialect): 9 | return DatafusionRunner(dialect) 10 | 11 | def get_dialect(self, library: DialectsLibrary): 12 | return library.get_dialect_by_name("datafusion") 13 | -------------------------------------------------------------------------------- /bft/testers/duckdb/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/substrait-io/bft/34b7a6e19d536e42082169eae0d049ce94f86b19/bft/testers/duckdb/__init__.py -------------------------------------------------------------------------------- /bft/testers/duckdb/runner.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import math 3 | from typing import Dict, NamedTuple 4 | 5 | import duckdb 6 | 7 | from bft.cases.runner import SqlCaseResult, SqlCaseRunner 8 | from bft.cases.types import Case 9 | from bft.dialects.types import SqlMapping 10 | from bft.utils.utils import type_to_dialect_type, datetype_value_equal 11 | 12 | type_map = { 13 | "i8": "TINYINT", 14 | "i16": "SMALLINT", 15 | "i32": "INTEGER", 16 | "i64": "BIGINT", 17 | "fp32": "REAL", 18 | "fp64": "DOUBLE", 19 | "boolean": "BOOLEAN", 20 | "string": "VARCHAR", 21 | "date": "DATE", 22 | "time": "TIME", 23 | "timestamp": "TIMESTAMP", 24 | "timestamp_tz": "TIMESTAMPTZ", 25 | "interval": "INTERVAL", 26 | "decimal": "DECIMAL", 27 | } 28 | 29 | 30 | def type_to_duckdb_type(type: str): 31 | return type_to_dialect_type(type, type_map) 32 | 33 | 34 | def literal_to_str(lit: str | int | float): 35 | if lit is None: 36 | return "null" 37 | elif lit in [math.nan, "nan"]: 38 | return "'NaN'" 39 | elif lit in [float("inf"), "inf"]: 40 | return "'Infinity'" 41 | elif lit in [float("-inf"), "-inf"]: 42 | return "'-Infinity'" 43 | return str(lit) 44 | 45 | 46 | def is_string_type(arg): 47 | return ( 48 | arg.type in ["string", "timestamp", "timestamp_tz", "date", "time"] 49 | and arg.value is not None 50 | ) 51 | 52 | 53 | def is_datetype(arg): 54 | return type(arg) in [datetime.datetime, datetime.date, datetime.timedelta] 55 | 56 | class DuckDBRunner(SqlCaseRunner): 57 | def __init__(self, dialect): 58 | super().__init__(dialect) 59 | self.conn = duckdb.connect() 60 | 61 | def run_sql_case(self, case: Case, mapping: SqlMapping) -> SqlCaseResult: 62 | 63 | try: 64 | arg_defs = [ 65 | f"arg{idx} {type_to_duckdb_type(arg.type)}" 66 | for idx, arg in enumerate(case.args) 67 | ] 68 | schema = ",".join(arg_defs) 69 | self.conn.execute(f"CREATE TABLE my_table({schema});") 70 | self.conn.execute(f"SET TimeZone='UTC';") 71 | 72 | arg_names = [f"arg{idx}" for idx in range(len(case.args))] 73 | joined_arg_names = ",".join(arg_names) 74 | arg_vals_list = list() 75 | for arg in case.args: 76 | if is_string_type(arg): 77 | arg_vals_list.append("'" + literal_to_str(arg.value) + "'") 78 | else: 79 | arg_vals_list.append(literal_to_str(arg.value)) 80 | arg_vals = ", ".join(arg_vals_list) 81 | if mapping.aggregate: 82 | arg_vals_list = list() 83 | for arg in case.args: 84 | arg_vals = "" 85 | for value in arg.value: 86 | if is_string_type(arg): 87 | if value: 88 | arg_vals += f"('{literal_to_str(value)}')," 89 | else: 90 | arg_vals += f"({literal_to_str(value)})," 91 | else: 92 | arg_vals += f"({literal_to_str(value)})," 93 | arg_vals_list.append([arg_vals[:-1]]) 94 | for arg_name, arg_vals in zip(arg_names, arg_vals_list): 95 | if len(arg_vals[0]): 96 | self.conn.execute( 97 | f"INSERT INTO my_table ({arg_name}) VALUES {arg_vals[0]};" 98 | ) 99 | else: 100 | self.conn.execute( 101 | f"INSERT INTO my_table ({joined_arg_names}) VALUES ({arg_vals});" 102 | ) 103 | 104 | if mapping.infix: 105 | if len(arg_names) != 2: 106 | raise Exception(f"Infix function with {len(arg_names)} args") 107 | expr = f"SELECT {arg_names[0]} {mapping.local_name} {arg_names[1]} FROM my_table;" 108 | elif mapping.postfix: 109 | if len(arg_names) != 1: 110 | raise Exception(f"Postfix function with {len(arg_names)} args") 111 | expr = f"SELECT {arg_names[0]} {mapping.local_name} FROM my_table;" 112 | elif mapping.extract: 113 | if len(arg_names) != 2: 114 | raise Exception(f"Extract function with {len(arg_names)} args") 115 | expr = f"SELECT {mapping.local_name}({arg_vals_list[0]} FROM {arg_names[1]}) FROM my_table;" 116 | elif mapping.local_name == "count(*)": 117 | expr = f"SELECT {mapping.local_name} FROM my_table;" 118 | elif mapping.aggregate: 119 | if len(arg_names) < 1: 120 | raise Exception(f"Aggregate function with {len(arg_names)} args") 121 | expr = f"SELECT {mapping.local_name}({arg_names[0]}) FROM my_table;" 122 | else: 123 | expr = f"SELECT {mapping.local_name}({joined_arg_names}) FROM my_table;" 124 | result = self.conn.execute(expr).fetchone()[0] 125 | 126 | if case.result == "undefined": 127 | return SqlCaseResult.success() 128 | elif case.result == "error": 129 | return SqlCaseResult.unexpected_pass(str(result)) 130 | elif str(result) == "nan": 131 | if case.result == "nan": 132 | return SqlCaseResult.success() 133 | else: 134 | return SqlCaseResult.mismatch(str(result)) 135 | # Issues with python float comparison: 136 | # https://tutorpython.com/python-mathisclose/#The_problem_with_using_for_float_comparison 137 | # https://stackoverflow.com/questions/5595425/what-is-the-best-way-to-compare-floats-for-almost-equality-in-python 138 | elif case.result.type.startswith("fp") and case.result.value and result: 139 | if math.isclose(result, case.result.value, rel_tol=1e-7): 140 | return SqlCaseResult.success() 141 | else: 142 | if result == case.result.value: 143 | return SqlCaseResult.success() 144 | elif is_datetype(result) and datetype_value_equal( 145 | result, case.result.value 146 | ): 147 | return SqlCaseResult.success() 148 | else: 149 | return SqlCaseResult.mismatch(str(result)) 150 | except duckdb.Error as err: 151 | return SqlCaseResult.error(str(err)) 152 | finally: 153 | self.conn.execute("DROP TABLE my_table") 154 | -------------------------------------------------------------------------------- /bft/testers/duckdb/runner_test.py: -------------------------------------------------------------------------------- 1 | from bft.testers.duckdb.runner import type_to_duckdb_type 2 | 3 | 4 | def test_type_to_duckdb_type(): 5 | assert type_to_duckdb_type("interval") == "INTERVAL" 6 | assert type_to_duckdb_type("decimal<37, 3>") == "DECIMAL(37, 3)" 7 | assert type_to_duckdb_type("non_existent") is None 8 | -------------------------------------------------------------------------------- /bft/testers/duckdb/tester.py: -------------------------------------------------------------------------------- 1 | from bft.dialects.types import Dialect, DialectsLibrary 2 | from bft.testers.base_tester import BaseTester 3 | 4 | from .runner import DuckDBRunner 5 | 6 | 7 | class DuckDBTester(BaseTester): 8 | def get_runner(self, dialect: Dialect): 9 | return DuckDBRunner(dialect) 10 | 11 | def get_dialect(self, library: DialectsLibrary): 12 | return library.get_dialect_by_name("duckdb") 13 | -------------------------------------------------------------------------------- /bft/testers/postgres/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/substrait-io/bft/34b7a6e19d536e42082169eae0d049ce94f86b19/bft/testers/postgres/__init__.py -------------------------------------------------------------------------------- /bft/testers/postgres/runner.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import math 3 | import os 4 | 5 | import psycopg 6 | 7 | from bft.cases.runner import SqlCaseResult, SqlCaseRunner 8 | from bft.cases.types import Case 9 | from bft.dialects.types import SqlMapping 10 | from bft.utils.utils import datetype_value_equal 11 | 12 | type_map = { 13 | "i16": "smallint", 14 | "i32": "integer", 15 | "i64": "bigint", 16 | "fp32": "float4", 17 | "fp64": "float8", 18 | "boolean": "boolean", 19 | "string": "text", 20 | "date": "date", 21 | "time": "time", 22 | "timestamp": "timestamp", 23 | "timestamp_tz": "timestamptz", 24 | "interval": "interval", 25 | } 26 | 27 | 28 | def type_to_postgres_type(type: str): 29 | if type not in type_map: 30 | return None 31 | return type_map[type] 32 | 33 | 34 | def literal_to_str(lit: str | int | float): 35 | if lit is None: 36 | return "null" 37 | elif lit in [float("inf"), "inf"]: 38 | return "'Infinity'" 39 | elif lit in [float("-inf"), "-inf"]: 40 | return "'-Infinity'" 41 | return str(lit) 42 | 43 | 44 | def is_string_type(arg): 45 | return ( 46 | arg.type in ["string", "timestamp", "timestamp_tz", "date", "time"] 47 | and arg.value is not None 48 | ) 49 | 50 | 51 | def is_datetype(arg): 52 | print(f"postgres type is: {type(arg)}") 53 | return type(arg) in [datetime.datetime, datetime.date, datetime.timedelta] 54 | 55 | 56 | def get_connection_str(): 57 | host = os.environ.get("POSTGRES_HOST", "localhost") 58 | dbname = os.environ.get("POSTGRES_DB", "bft") 59 | user = os.environ.get("POSTGRES_USER", "postgres") 60 | password = os.environ.get("POSTGRES_PASSWORD", "postgres") 61 | return f"{host=} {dbname=} {user=} {password=}" 62 | 63 | 64 | class PostgresRunner(SqlCaseRunner): 65 | def __init__(self, dialect): 66 | super().__init__(dialect) 67 | self.conn = psycopg.connect(get_connection_str()) 68 | 69 | def run_sql_case(self, case: Case, mapping: SqlMapping) -> SqlCaseResult: 70 | self.conn.execute("BEGIN;") 71 | 72 | try: 73 | arg_defs = [] 74 | for idx, arg in enumerate(case.args): 75 | arg_type = type_to_postgres_type(arg.type) 76 | if arg_type is None: 77 | return SqlCaseResult.unsupported(f"Unsupported type {arg.type}") 78 | arg_defs.append(f"arg{idx} {arg_type}") 79 | schema = ",".join(arg_defs) 80 | self.conn.execute(f"CREATE TABLE my_table({schema});") 81 | 82 | arg_names = [f"arg{idx}" for idx in range(len(case.args))] 83 | joined_arg_names = ",".join(arg_names) 84 | arg_vals_list = list() 85 | for arg in case.args: 86 | if is_string_type(arg): 87 | arg_vals_list.append("'" + literal_to_str(arg.value) + "'") 88 | else: 89 | arg_vals_list.append(literal_to_str(arg.value)) 90 | arg_vals = ", ".join(arg_vals_list) 91 | if mapping.aggregate: 92 | arg_vals_list = list() 93 | for arg in case.args: 94 | arg_vals = "" 95 | for value in arg.value: 96 | if is_string_type(arg): 97 | if value: 98 | arg_vals += f"('{literal_to_str(value)}')," 99 | else: 100 | arg_vals += f"({literal_to_str(value)})," 101 | else: 102 | arg_vals += f"({literal_to_str(value)})," 103 | arg_vals_list.append([arg_vals[:-1]]) 104 | for arg_name, arg_vals in zip(arg_names, arg_vals_list): 105 | if len(arg_vals[0]): 106 | self.conn.execute( 107 | f"INSERT INTO my_table ({arg_name}) VALUES {arg_vals[0]};" 108 | ) 109 | else: 110 | self.conn.execute( 111 | f"INSERT INTO my_table ({joined_arg_names}) VALUES ({arg_vals});" 112 | ) 113 | 114 | if mapping.infix: 115 | if len(arg_names) != 2: 116 | raise Exception(f"Infix function with {len(arg_names)} args") 117 | expr = f"SELECT {arg_names[0]} {mapping.local_name} {arg_names[1]} FROM my_table;" 118 | elif mapping.postfix: 119 | if len(arg_names) != 1: 120 | raise Exception(f"Postfix function with {len(arg_names)} args") 121 | expr = f"SELECT {arg_names[0]} {mapping.local_name} FROM my_table;" 122 | elif mapping.extract: 123 | if len(arg_names) != 2: 124 | raise Exception(f"Extract function with {len(arg_names)} args") 125 | expr = f"SELECT {mapping.local_name}({arg_vals_list[0]} FROM {arg_names[1]}) FROM my_table;" 126 | elif mapping.between: 127 | if len(arg_names) != 3: 128 | raise Exception(f"Between function with {len(arg_names)} args") 129 | expr = f"SELECT {arg_names[0]} BETWEEN {arg_names[1]} AND {arg_names[2]} FROM my_table;" 130 | elif mapping.local_name == 'count(*)': 131 | expr = f"SELECT {mapping.local_name} FROM my_table;" 132 | elif mapping.aggregate: 133 | if len(arg_names) < 1: 134 | raise Exception(f"Aggregate function with {len(arg_names)} args") 135 | expr = f"SELECT {mapping.local_name}({arg_names[0]}) FROM my_table;" 136 | else: 137 | expr = f"SELECT {mapping.local_name}({joined_arg_names}) FROM my_table;" 138 | result = self.conn.execute(expr).fetchone()[0] 139 | 140 | if case.result == "undefined": 141 | return SqlCaseResult.success() 142 | elif case.result == "error": 143 | return SqlCaseResult.unexpected_pass(str(result)) 144 | elif case.result == "nan": 145 | print(f"Expected NAN but received {result}") 146 | return SqlCaseResult.error(str(result)) 147 | # Issues with python float comparison: 148 | # https://tutorpython.com/python-mathisclose/#The_problem_with_using_for_float_comparison 149 | # https://stackoverflow.com/questions/5595425/what-is-the-best-way-to-compare-floats-for-almost-equality-in-python 150 | elif case.result.type.startswith("fp") and case.result.value: 151 | if math.isclose(result, case.result.value, rel_tol=1e-7): 152 | return SqlCaseResult.success() 153 | else: 154 | if result == case.result.value: 155 | return SqlCaseResult.success() 156 | elif is_datetype(result) and datetype_value_equal( 157 | result, case.result.value 158 | ): 159 | return SqlCaseResult.success() 160 | else: 161 | return SqlCaseResult.mismatch(str(result)) 162 | except psycopg.Error as err: 163 | return SqlCaseResult.error(str(err)) 164 | finally: 165 | self.conn.rollback() 166 | -------------------------------------------------------------------------------- /bft/testers/postgres/tester.py: -------------------------------------------------------------------------------- 1 | from bft.dialects.types import Dialect, DialectsLibrary 2 | from bft.testers.base_tester import BaseTester 3 | 4 | from .runner import PostgresRunner 5 | 6 | 7 | class PostgresTester(BaseTester): 8 | def get_runner(self, dialect: Dialect): 9 | return PostgresRunner(dialect) 10 | 11 | def get_dialect(self, library: DialectsLibrary): 12 | return library.get_dialect_by_name("postgres") 13 | -------------------------------------------------------------------------------- /bft/testers/snowflake/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/substrait-io/bft/34b7a6e19d536e42082169eae0d049ce94f86b19/bft/testers/snowflake/__init__.py -------------------------------------------------------------------------------- /bft/testers/snowflake/config.yaml: -------------------------------------------------------------------------------- 1 | snowflake: 2 | username: 3 | account: 4 | warehouse: 5 | database: 6 | schema: 7 | -------------------------------------------------------------------------------- /bft/testers/snowflake/runner.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import math 3 | import os 4 | import yaml 5 | from typing import Dict, NamedTuple 6 | from cryptography.hazmat.primitives.serialization import load_der_private_key 7 | from cryptography.hazmat.backends import default_backend 8 | 9 | from snowflake.connector import connect 10 | from snowflake.connector.errors import Error 11 | 12 | from bft.cases.runner import SqlCaseResult, SqlCaseRunner 13 | from bft.cases.types import Case 14 | from bft.dialects.types import SqlMapping 15 | from bft.utils.utils import type_to_dialect_type 16 | 17 | type_map = { 18 | "fp64": "FLOAT", 19 | "boolean": "BOOLEAN", 20 | "string": "VARCHAR", 21 | "date": "DATE", 22 | "time": "TIME", 23 | "timestamp": "TIMESTAMP", 24 | "timestamp_tz": "TIMESTAMPTZ", 25 | "interval": "INTERVAL", 26 | "decimal": "DECIMAL", 27 | } 28 | 29 | 30 | def type_to_snowflake_type(type: str): 31 | return type_to_dialect_type(type, type_map) 32 | 33 | 34 | def literal_to_str(lit: str | int | float): 35 | if lit is None: 36 | return "null" 37 | elif lit in [math.nan, "nan"]: 38 | return "'NaN'" 39 | elif lit in [float("inf"), "inf"]: 40 | return "'inf'" 41 | elif lit in [float("-inf"), "-inf"]: 42 | return "'-inf'" 43 | return str(lit) 44 | 45 | 46 | def literal_to_float(lit: str | int | float): 47 | if lit in [float("inf"), "inf"]: 48 | return "TO_DOUBLE('inf'::float)" 49 | elif lit in [float("-inf"), "-inf"]: 50 | return "TO_DOUBLE('-inf'::float)" 51 | return lit 52 | 53 | 54 | def is_float_type(arg): 55 | return arg.type in ["fp32", "fp64"] 56 | 57 | 58 | def is_string_type(arg): 59 | return ( 60 | arg.type in ["string", "timestamp", "timestamp_tz", "date", "time"] 61 | and arg.value is not None 62 | ) 63 | 64 | 65 | def is_datetype(arg): 66 | return type(arg) in [datetime.datetime, datetime.date, datetime.timedelta] 67 | 68 | 69 | class SnowflakeRunner(SqlCaseRunner): 70 | def __init__(self, dialect): 71 | super().__init__(dialect) 72 | with open("testers/snowflake/config.yaml", "r") as file: 73 | config = yaml.safe_load(file) 74 | sf_config = config["snowflake"] 75 | print(f"Connecting to {sf_config['account']} as {sf_config['username']}") 76 | private_key_path = os.environ["SNOWSQL_PRIVATE_KEY_PATH"] 77 | with open(private_key_path, "rb") as f: 78 | private_key = f.read() 79 | 80 | self.conn = connect( 81 | user=sf_config["username"], 82 | private_key=private_key, 83 | account=sf_config["account"], 84 | database=sf_config["database"], 85 | schema=sf_config["schema"], 86 | warehouse=sf_config["warehouse"], 87 | ) 88 | 89 | def run_sql_case(self, case: Case, mapping: SqlMapping) -> SqlCaseResult: 90 | 91 | try: 92 | print(f"Running testcase {case} {mapping}") 93 | cursor = self.conn.cursor() 94 | arg_defs = [] 95 | for idx, arg in enumerate(case.args): 96 | arg_type = type_to_snowflake_type(arg.type) 97 | if arg_type is None: 98 | return SqlCaseResult.unsupported(f"Unsupported type {arg.type}") 99 | arg_defs.append(f"arg{idx} {arg_type}") 100 | schema = ",".join(arg_defs) 101 | cursor.execute(f"CREATE TABLE my_table({schema});") 102 | cursor.execute(f"SET TimeZone='UTC';") 103 | print(f"Running case: {case} create table my_table({schema});") 104 | 105 | arg_names = [f"arg{idx}" for idx in range(len(case.args))] 106 | joined_arg_names = ",".join(arg_names) 107 | arg_vals_list = list() 108 | for arg in case.args: 109 | if is_string_type(arg): 110 | arg_vals_list.append("'" + literal_to_str(arg.value) + "'") 111 | else: 112 | arg_vals_list.append(literal_to_str(arg.value)) 113 | arg_vals = ", ".join(arg_vals_list) 114 | if mapping.aggregate: 115 | arg_vals_list = list() 116 | for arg in case.args: 117 | arg_vals = "" 118 | for value in arg.value: 119 | if is_string_type(arg): 120 | if value: 121 | arg_vals += f"('{literal_to_str(value)}')," 122 | else: 123 | arg_vals += f"({literal_to_str(value)})," 124 | elif is_float_type(arg): 125 | if value: 126 | arg_vals += f"({literal_to_float(value)})," 127 | else: 128 | arg_vals += f"({literal_to_str(value)})," 129 | else: 130 | arg_vals += f"({literal_to_str(value)})," 131 | arg_vals_list.append([arg_vals[:-1]]) 132 | for arg_name, arg_vals in zip(arg_names, arg_vals_list): 133 | if len(arg_vals[0]): 134 | cursor.execute( 135 | f"INSERT INTO my_table ({arg_name}) VALUES {arg_vals[0]};" 136 | ) 137 | else: 138 | cursor.execute( 139 | f"INSERT INTO my_table ({joined_arg_names}) VALUES ({arg_vals});" 140 | ) 141 | 142 | if mapping.infix: 143 | if len(arg_names) != 2: 144 | raise Exception(f"Infix function with {len(arg_names)} args") 145 | expr = f"SELECT {arg_names[0]} {mapping.local_name} {arg_names[1]} FROM my_table;" 146 | elif mapping.postfix: 147 | if len(arg_names) != 1: 148 | raise Exception(f"Postfix function with {len(arg_names)} args") 149 | expr = f"SELECT {arg_names[0]} {mapping.local_name} FROM my_table;" 150 | elif mapping.extract: 151 | if len(arg_names) != 2: 152 | raise Exception(f"Extract function with {len(arg_names)} args") 153 | expr = f"SELECT {mapping.local_name}({arg_vals_list[0]} FROM {arg_names[1]}) FROM my_table;" 154 | elif mapping.local_name == "count(*)": 155 | expr = f"SELECT {mapping.local_name} FROM my_table;" 156 | elif mapping.aggregate: 157 | if len(arg_names) < 1: 158 | raise Exception(f"Aggregate function with {len(arg_names)} args") 159 | expr = f"SELECT {mapping.local_name}({arg_names[0]}) FROM my_table;" 160 | else: 161 | expr = f"SELECT {mapping.local_name}({joined_arg_names}) FROM my_table;" 162 | result = cursor.execute(expr).fetchone()[0] 163 | 164 | if case.result == "undefined": 165 | return SqlCaseResult.success() 166 | elif case.result == "error": 167 | return SqlCaseResult.unexpected_pass(str(result)) 168 | # Issues with python float comparison: 169 | # https://tutorpython.com/python-mathisclose/#The_problem_with_using_for_float_comparison 170 | # https://stackoverflow.com/questions/5595425/what-is-the-best-way-to-compare-floats-for-almost-equality-in-python 171 | elif case.result.type.startswith("fp") and case.result.value and result: 172 | if math.isclose(result, case.result.value, rel_tol=1e-7): 173 | return SqlCaseResult.success() 174 | else: 175 | if result == case.result.value: 176 | return SqlCaseResult.success() 177 | elif is_datetype(result) and str(result) == case.result.value: 178 | return SqlCaseResult.success() 179 | else: 180 | return SqlCaseResult.mismatch(str(result)) 181 | except Error as err: 182 | return SqlCaseResult.error(str(err)) 183 | finally: 184 | cursor.execute("DROP TABLE IF EXISTS my_table") 185 | cursor.close() 186 | -------------------------------------------------------------------------------- /bft/testers/snowflake/tester.py: -------------------------------------------------------------------------------- 1 | from bft.dialects.types import Dialect, DialectsLibrary 2 | from bft.testers.base_tester import BaseTester 3 | 4 | from .runner import SnowflakeRunner 5 | 6 | 7 | class SnowflakeTester(BaseTester): 8 | def get_runner(self, dialect: Dialect): 9 | return SnowflakeRunner(dialect) 10 | 11 | def get_dialect(self, library: DialectsLibrary): 12 | return library.get_dialect_by_name("snowflake") 13 | -------------------------------------------------------------------------------- /bft/testers/sqlite/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/substrait-io/bft/34b7a6e19d536e42082169eae0d049ce94f86b19/bft/testers/sqlite/__init__.py -------------------------------------------------------------------------------- /bft/testers/sqlite/runner.py: -------------------------------------------------------------------------------- 1 | import math 2 | import sqlite3 3 | from typing import Dict, NamedTuple 4 | 5 | from bft.cases.runner import SqlCaseResult, SqlCaseRunner 6 | from bft.cases.types import Case, CaseLiteral 7 | from bft.dialects.types import SqlMapping 8 | from bft.utils.utils import type_to_dialect_type 9 | 10 | type_map = { 11 | "i8": "TINYINT", 12 | "i16": "SMALLINT", 13 | "i32": "INT", 14 | "i64": "HUGEINT", 15 | "fp32": "REAL", 16 | "fp64": "REAL", 17 | "boolean": "BOOLEAN", 18 | "string": "TEXT", 19 | } 20 | 21 | 22 | def type_to_sqlite_type(type: str): 23 | return type_to_dialect_type(type, type_map) 24 | 25 | 26 | def literal_to_str(lit: str | int | float): 27 | if lit is None: 28 | return "null" 29 | elif lit in [float("inf"), "inf"]: 30 | return "9e999" 31 | elif lit in [float("-inf"), "-inf"]: 32 | return "-9e999" 33 | return str(lit) 34 | 35 | 36 | def flatten(l: list): 37 | return [item for sublist in l for item in sublist] 38 | 39 | 40 | def extract_argument_values(case: Case, mapping: SqlMapping): 41 | arg_vals_list = [] 42 | for arg in case.args: 43 | arg_vals = [] 44 | if arg.type == "string" and arg.value is not None: 45 | arg_vals.append("'" + literal_to_str(arg.value) + "'") 46 | elif mapping.aggregate: 47 | for value in arg.value: 48 | arg_vals.append(literal_to_str(value)) 49 | else: 50 | arg_vals.append(literal_to_str(arg.value)) 51 | arg_vals_list.append(arg_vals) 52 | return arg_vals_list 53 | 54 | 55 | class SqliteRunner(SqlCaseRunner): 56 | def __init__(self, dialect): 57 | super().__init__(dialect) 58 | self.conn = sqlite3.connect(":memory:") 59 | 60 | def run_sql_case(self, case: Case, mapping: SqlMapping) -> SqlCaseResult: 61 | self.conn.execute("BEGIN;") 62 | 63 | try: 64 | arg_defs = [] 65 | for idx, arg in enumerate(case.args): 66 | arg_type = type_to_sqlite_type(arg.type) 67 | if arg_type is None: 68 | return SqlCaseResult.unsupported(f"Unsupported type {arg.type}") 69 | arg_defs.append(f"arg{idx} {arg_type}") 70 | schema = ",".join(arg_defs) 71 | self.conn.execute(f"CREATE TABLE my_table({schema});") 72 | 73 | arg_names = [f"arg{idx}" for idx in range(len(case.args))] 74 | 75 | joined_arg_names = ",".join(arg_names) 76 | arg_vals_list = extract_argument_values(case, mapping) 77 | arg_vals = ', '.join(flatten(arg_vals_list)) 78 | 79 | if mapping.aggregate: 80 | for arg_name, arg_vals in zip(arg_names, arg_vals_list): 81 | str_arg_vals = ",".join(f"({val})" for val in arg_vals) 82 | if arg_vals: 83 | self.conn.execute( 84 | f"INSERT INTO my_table ({arg_name}) VALUES {str_arg_vals};" 85 | ) 86 | else: 87 | self.conn.execute( 88 | f"INSERT INTO my_table ({joined_arg_names}) VALUES ({arg_vals});" 89 | ) 90 | 91 | if mapping.infix: 92 | if len(arg_names) != 2: 93 | raise Exception(f"Infix function with {len(arg_names)} args") 94 | expr = f"SELECT {arg_names[0]} {mapping.local_name} {arg_names[1]} FROM my_table;" 95 | elif mapping.postfix: 96 | if len(arg_names) != 1: 97 | raise Exception(f"Postfix function with {len(arg_names)} args") 98 | expr = f"SELECT {arg_names[0]} {mapping.local_name} FROM my_table;" 99 | elif mapping.between: 100 | if len(arg_names) != 3: 101 | raise Exception(f"Between function with {len(arg_names)} args") 102 | expr = f"SELECT {arg_names[0]} BETWEEN {arg_names[1]} AND {arg_names[2]} FROM my_table;" 103 | elif mapping.local_name == 'count(*)': 104 | expr = f"SELECT {mapping.local_name} FROM my_table;" 105 | elif mapping.aggregate: 106 | if len(arg_names) < 1: 107 | raise Exception(f"Aggregate function with {len(arg_names)} args") 108 | expr = f"SELECT {mapping.local_name}({arg_names[0]}) FROM my_table;" 109 | else: 110 | expr = f"SELECT {mapping.local_name}({joined_arg_names}) FROM my_table;" 111 | result = self.conn.execute(expr).fetchone()[0] 112 | 113 | if case.result == "undefined": 114 | return SqlCaseResult.success() 115 | elif case.result == "error": 116 | return SqlCaseResult.unexpected_pass(str(result)) 117 | elif case.result == "nan": 118 | return SqlCaseResult.error(str(result)) 119 | # Issues with python float comparison: 120 | # https://tutorpython.com/python-mathisclose/#The_problem_with_using_for_float_comparison 121 | # https://stackoverflow.com/questions/5595425/what-is-the-best-way-to-compare-floats-for-almost-equality-in-python 122 | elif case.result.type.startswith("fp") and case.result.value and result: 123 | if math.isclose(result, case.result.value, rel_tol=1e-7): 124 | return SqlCaseResult.success() 125 | else: 126 | if result == case.result.value: 127 | return SqlCaseResult.success() 128 | else: 129 | return SqlCaseResult.mismatch(str(result)) 130 | except sqlite3.Error as err: 131 | return SqlCaseResult.error(str(err)) 132 | finally: 133 | self.conn.rollback() 134 | -------------------------------------------------------------------------------- /bft/testers/sqlite/tester.py: -------------------------------------------------------------------------------- 1 | from bft.dialects.types import Dialect, DialectsLibrary 2 | from bft.testers.base_tester import BaseTester 3 | 4 | from .runner import SqliteRunner 5 | 6 | 7 | class SqliteTester(BaseTester): 8 | def get_runner(self, dialect: Dialect): 9 | return SqliteRunner(dialect) 10 | 11 | def get_dialect(self, library: DialectsLibrary): 12 | return library.get_dialect_by_name("sqlite") 13 | -------------------------------------------------------------------------------- /bft/testers/velox/runner.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import pyvelox.pyvelox as pv 4 | 5 | from bft.cases.runner import Case, SqlCaseResult, SqlCaseRunner, SqlMapping 6 | from bft.dialects.types import Dialect 7 | 8 | 9 | def is_type_supported(type): 10 | return type in set({"i64", "fp64", "boolean", "string"}) 11 | 12 | 13 | class VeloxRunner(SqlCaseRunner): 14 | def __init__(self, dialect: Dialect): 15 | super().__init__(dialect) 16 | 17 | def run_sql_case(self, case: Case, mapping: SqlMapping) -> SqlCaseResult: 18 | arg_vectors = [] 19 | arg_names = [] 20 | for arg_idx, arg in enumerate(case.args): 21 | if not is_type_supported(arg.type): 22 | return SqlCaseResult.unsupported( 23 | f"The type {arg.type} is not supported" 24 | ) 25 | arg_vectors.append(pv.from_list([arg.value])) 26 | arg_names.append(f"arg{arg_idx}") 27 | if mapping.infix: 28 | if len(case.args) != 2: 29 | raise Exception(f"Infix function with {len(case.args)} args") 30 | expr_str = f"arg0 {mapping.local_name} arg1" 31 | elif mapping.postfix: 32 | if len(arg_names) != 1: 33 | raise Exception(f"Postfix function with {len(arg_names)} args") 34 | expr_str = f"arg0 {mapping.local_name}" 35 | elif mapping.between: 36 | if len(arg_names) != 3: 37 | raise Exception(f"between function with {len(arg_names)} args") 38 | expr_str = f"arg0 {mapping.local_name} arg1 and arg2" 39 | else: 40 | joined_args = ", ".join(arg_names) 41 | expr_str = f"{mapping.local_name}({joined_args})" 42 | 43 | try: 44 | expr = pv.Expression.from_string(expr_str) 45 | answer = expr.evaluate(arg_names, arg_vectors) 46 | result = [v for v in answer] 47 | except RuntimeError as err: 48 | return SqlCaseResult.error(str(err)) 49 | 50 | if len(result) != 1: 51 | raise Exception("Scalar function with one row output more than one row") 52 | result = result[0] 53 | 54 | if case.result == "undefined": 55 | return SqlCaseResult.success() 56 | elif case.result == "error": 57 | return SqlCaseResult.unexpected_pass(str(result)) 58 | elif case.result == "nan": 59 | if math.isnan(result): 60 | return SqlCaseResult.success() 61 | else: 62 | if result == case.result.value: 63 | return SqlCaseResult.success() 64 | else: 65 | return SqlCaseResult.mismatch(str(result)) 66 | -------------------------------------------------------------------------------- /bft/testers/velox/tester.py: -------------------------------------------------------------------------------- 1 | from bft.cases.runner import CaseRunner 2 | from bft.dialects.types import Dialect, DialectsLibrary 3 | from bft.testers.base_tester import BaseTester 4 | from bft.testers.velox.runner import VeloxRunner 5 | 6 | 7 | class VeloxTester(BaseTester): 8 | def get_runner(self, dialect: Dialect) -> CaseRunner: 9 | return VeloxRunner(dialect) 10 | 11 | def get_dialect(self, library: DialectsLibrary) -> Dialect: 12 | return library.get_dialect_by_name("velox_presto") 13 | -------------------------------------------------------------------------------- /bft/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/substrait-io/bft/34b7a6e19d536e42082169eae0d049ce94f86b19/bft/tests/__init__.py -------------------------------------------------------------------------------- /bft/tests/base.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from typing import List 3 | 4 | import pytest 5 | 6 | from bft.cases.parser import CaseFileParser 7 | from bft.cases.types import Case 8 | from bft.testers.base_tester import BaseTester 9 | from tools.convert_testcases.convert_testcases_to_yaml_format import ( 10 | convert_directory as convert_directory_from_substrait, 11 | ) 12 | 13 | 14 | # Would be nice to have this as a session-scoped fixture but it doesn't seem that 15 | # parameter values can be a fixture 16 | def cases() -> List[Case]: 17 | cases = [] 18 | bft_dir = Path(__file__).parent.parent.parent 19 | parser = CaseFileParser() 20 | cases_dir = bft_dir / "cases" 21 | substrait_cases_dir = bft_dir / "substrait" / "tests" / "cases" 22 | convert_directory_from_substrait(substrait_cases_dir, cases_dir) 23 | for case_path in cases_dir.resolve().rglob("*.yaml"): 24 | with open(case_path, "rb") as case_f: 25 | for case_file in parser.parse(case_f): 26 | for case in case_file.cases: 27 | case = transform_case(case) 28 | cases.append(case) 29 | return cases 30 | 31 | 32 | def transform_case(case): 33 | # Create a new Case instance with updated `args` 34 | return Case( 35 | function=case.function, 36 | base_uri=case.base_uri, 37 | group=case.group, 38 | args=case.args, # Update args here 39 | result=case.result, 40 | options=case.options, 41 | ) 42 | 43 | 44 | def case_id_fn(case: Case): 45 | return f"{case.function}_{case.group.id}_{case.group.index}" 46 | 47 | 48 | def run_test(case: Case, tester: BaseTester): 49 | if tester.runner.__class__.__name__ == "VeloxRunner": 50 | for case_literal in case.args: 51 | if case_literal.value is None: 52 | pytest.skip("Skipping. Pyvelox does not support null input") 53 | if tester.runner.__class__.__name__ == "PostgresRunner": 54 | if type(case.result) != str and "inf" in str(case.result[0]): 55 | pytest.skip( 56 | "Skipping. Postgres errors out when dealing with infinite addition" 57 | ) 58 | result = tester.run_test(case) 59 | if result.passed: 60 | if not result.should_have_passed: 61 | pytest.fail(f"Unexpected pass: {result.reason}") 62 | else: 63 | assert result.passed 64 | else: 65 | if result.should_have_passed: 66 | pytest.fail(f"Unexpected fail: {result.reason}") 67 | else: 68 | pytest.xfail(result.reason) 69 | -------------------------------------------------------------------------------- /bft/tests/conftest.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from typing import List 3 | 4 | import pytest 5 | 6 | from bft.dialects.loader import load_dialects 7 | from bft.dialects.types import DialectsLibrary 8 | 9 | 10 | @pytest.fixture(scope="session") 11 | def dialects() -> DialectsLibrary: 12 | dialects_dir = Path(__file__) / ".." / ".." / ".." / "dialects" 13 | return load_dialects(str(dialects_dir.resolve())) 14 | -------------------------------------------------------------------------------- /bft/tests/test_cudf.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from bft.testers.cudf.tester import CudfTester 4 | 5 | from .base import cases, run_test 6 | 7 | 8 | @pytest.fixture(scope="module") 9 | def tester(dialects): 10 | instance = CudfTester() 11 | instance.prepare(dialects) 12 | return instance 13 | 14 | 15 | @pytest.mark.parametrize("case", cases()) 16 | def test_functions(case, tester): 17 | run_test(case, tester) 18 | -------------------------------------------------------------------------------- /bft/tests/test_datafusion.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from bft.testers.datafusion.tester import DatafustionTester 4 | 5 | from .base import cases, run_test 6 | 7 | 8 | @pytest.fixture(scope="module") 9 | def tester(dialects): 10 | instance = DatafustionTester() 11 | instance.prepare(dialects) 12 | return instance 13 | 14 | 15 | @pytest.mark.parametrize("case", cases()) 16 | def test_functions(case, tester): 17 | run_test(case, tester) 18 | -------------------------------------------------------------------------------- /bft/tests/test_duckdb.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from bft.testers.duckdb.tester import DuckDBTester 4 | 5 | from .base import cases, run_test 6 | 7 | 8 | @pytest.fixture(scope="module") 9 | def tester(dialects): 10 | instance = DuckDBTester() 11 | instance.prepare(dialects) 12 | return instance 13 | 14 | 15 | @pytest.mark.parametrize("case", cases()) 16 | def test_functions(case, tester): 17 | run_test(case, tester) 18 | -------------------------------------------------------------------------------- /bft/tests/test_postgres.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from bft.testers.postgres.tester import PostgresTester 4 | 5 | from .base import cases, run_test 6 | 7 | 8 | @pytest.fixture(scope="module") 9 | def tester(dialects): 10 | instance = PostgresTester() 11 | instance.prepare(dialects) 12 | return instance 13 | 14 | 15 | @pytest.mark.parametrize("case", cases()) 16 | def test_functions(case, tester): 17 | run_test(case, tester) 18 | -------------------------------------------------------------------------------- /bft/tests/test_pyvelox.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from bft.testers.velox.tester import VeloxTester 4 | 5 | from .base import cases, run_test 6 | 7 | 8 | @pytest.fixture(scope="module") 9 | def tester(dialects): 10 | instance = VeloxTester() 11 | instance.prepare(dialects) 12 | return instance 13 | 14 | 15 | @pytest.mark.parametrize("case", cases()) 16 | def test_functions(case, tester): 17 | run_test(case, tester) 18 | -------------------------------------------------------------------------------- /bft/tests/test_snowflake.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from bft.testers.snowflake.tester import SnowflakeTester 4 | 5 | from .base import cases, run_test 6 | 7 | 8 | @pytest.fixture(scope="module") 9 | def tester(dialects): 10 | instance = SnowflakeTester() 11 | instance.prepare(dialects) 12 | return instance 13 | 14 | 15 | @pytest.mark.parametrize("case", cases()) 16 | def test_functions(case, tester): 17 | run_test(case, tester) 18 | -------------------------------------------------------------------------------- /bft/tests/test_sqlite.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from bft.testers.sqlite.tester import SqliteTester 4 | 5 | from .base import cases, run_test 6 | 7 | 8 | @pytest.fixture(scope="module") 9 | def tester(dialects): 10 | instance = SqliteTester() 11 | instance.prepare(dialects) 12 | return instance 13 | 14 | 15 | @pytest.mark.parametrize("case", cases()) 16 | def test_functions(case, tester): 17 | run_test(case, tester) 18 | -------------------------------------------------------------------------------- /bft/utils/utils.py: -------------------------------------------------------------------------------- 1 | from typing import Dict 2 | import datetime 3 | 4 | 5 | def type_to_dialect_type(type: str, type_map: Dict[str, str])->str: 6 | """ 7 | Convert a substrait type to a dialect type 8 | 9 | :param type: substrait name of base type (i.e. without parameters) 10 | :param type_map:map of substrait type to dialect base type (i.e. without parameters) 11 | :return:dialect type 12 | 13 | e.g. type_map: {"interval": "INTERVAL", "decimal": "NUMERIC"} 14 | input type: "decimal<37, 3>", -> output: "NUMERIC(37, 3)" 15 | e.g. input type: "interval", output: "INTERVAL" 16 | 17 | in above example "decimal" or "interval" are referred as base type whereas decimal<37, 3> is parameterized type 18 | 19 | """ 20 | type_to_check = type.split("<")[0].strip() if "<" in type else type 21 | if type_to_check not in type_map: 22 | return None 23 | type_val = type_map[type_to_check] 24 | if not "<" in type: 25 | return type_val 26 | # transform parameterized type name to have dialect type 27 | return type.replace(type_to_check, type_val).replace("<", "(").replace(">", ")") 28 | 29 | def has_only_date(value: datetime.datetime): 30 | if ( 31 | value.hour == 0 32 | and value.minute == 0 33 | and value.second == 0 34 | and value.microsecond == 0 35 | ): 36 | return True 37 | return False 38 | 39 | def datetype_value_equal(result, case_result): 40 | if str(result) == case_result: 41 | return True 42 | if ( 43 | isinstance(result, datetime.datetime) 44 | and has_only_date(result) 45 | and str(result.date()) == case_result 46 | ): 47 | return True 48 | return False 49 | -------------------------------------------------------------------------------- /build_site.py: -------------------------------------------------------------------------------- 1 | import shutil 2 | from pathlib import Path 3 | 4 | from bft.html.builder import build_site 5 | 6 | 7 | def copy_with_progress(src, dst, copy_function=shutil.copy2): 8 | for source_path in Path(src).rglob('*'): 9 | relative_path = source_path.relative_to(src) 10 | destination_path = dst / relative_path 11 | 12 | if source_path.is_file(): 13 | destination_path.parent.mkdir(parents=True, exist_ok=True) 14 | copy_function(source_path, destination_path) 15 | print(f"Copying: {source_path} -> {destination_path}") 16 | 17 | root = Path(__file__).parent.resolve() 18 | index = root / "index.yaml" 19 | dest = root / "dist" 20 | 21 | # Remove the destination directory if it exists 22 | if dest.exists(): 23 | shutil.rmtree(dest) 24 | 25 | # Create the destination directory 26 | dest.mkdir() 27 | 28 | build_site(index, dest) 29 | 30 | static_content_dir = root / "static_site" 31 | 32 | # Use the custom copy_with_progress function 33 | copy_with_progress(static_content_dir, dest) 34 | 35 | print("Copying static files completed.") 36 | -------------------------------------------------------------------------------- /ci/docker/base-tester.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM alpine:3.18 2 | ARG PIP_PACKAGES 3 | 4 | ENV PYTHONUNBUFFERED=1 5 | ENV PYTHONPATH=/bft/substrait 6 | RUN apk add --update --no-cache python3 && ln -sf python3 /usr/bin/python 7 | RUN python3 -m ensurepip 8 | RUN echo "PIP_PACKAGES is $PIP_PACKAGES" 9 | RUN pip3 install --no-cache --upgrade pip setuptools pytest pyyaml mistletoe $PIP_PACKAGES ruamel.yaml antlr4-python3-runtime pytz 10 | 11 | WORKDIR /bft 12 | COPY . . 13 | 14 | CMD /usr/bin/python -mpytest bft/tests/test_sqlite.py 15 | -------------------------------------------------------------------------------- /ci/docker/datafusion.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:22.04 2 | 3 | ENV PYTHONUNBUFFERED=1 4 | ENV PYTHONPATH=/bft/substrait 5 | RUN apt-get update && apt-get install -y python3.10 && ln -sf python3 /usr/bin/python 6 | RUN apt install -y pip 7 | RUN pip install --upgrade pip setuptools pytest pyyaml mistletoe datafusion ruamel.yaml antlr4-python3-runtime pytz numpy 8 | 9 | WORKDIR /bft 10 | COPY . . 11 | 12 | CMD /usr/bin/python -mpytest bft/tests/test_datafusion.py 13 | -------------------------------------------------------------------------------- /ci/docker/duckdb.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM alpine:3.18 2 | 3 | ENV PYTHONUNBUFFERED=1 4 | ENV PYTHONPATH=/bft/substrait 5 | RUN apk add --update --no-cache python3 && ln -sf python3 /usr/bin/python 6 | RUN python3 -m ensurepip 7 | RUN pip3 install --no-cache --upgrade pip setuptools pytest pyyaml mistletoe duckdb ruamel.yaml antlr4-python3-runtime pytz 8 | 9 | WORKDIR /bft 10 | COPY . . 11 | 12 | CMD /usr/bin/python -mpytest bft/tests/test_duckdb.py 13 | -------------------------------------------------------------------------------- /ci/docker/postgres-compose.yaml: -------------------------------------------------------------------------------- 1 | services: 2 | app: 3 | image: bft/tester 4 | build: 5 | context: ../.. 6 | dockerfile: ./ci/docker/base-tester.Dockerfile 7 | args: 8 | PIP_PACKAGES: psycopg[binary] 9 | command: /usr/bin/python -mpytest bft/tests/test_postgres.py 10 | depends_on: 11 | postgres: 12 | condition: service_healthy 13 | environment: 14 | POSTGRES_HOST: postgres 15 | POSTGRES_USER: postgres 16 | POSTGRES_PASSWORD: postgres 17 | POSTGRES_DB: bft 18 | 19 | postgres: 20 | image: postgres:15-alpine 21 | environment: 22 | POSTGRES_DB: bft 23 | POSTGRES_PASSWORD: postgres 24 | healthcheck: 25 | test: ["CMD-SHELL", "pg_isready -U postgres"] 26 | interval: 5s 27 | timeout: 5s 28 | retries: 5 29 | -------------------------------------------------------------------------------- /ci/docker/postgres-server.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM postgres:15-alpine 2 | ENV POSTGRES_DB=bft 3 | ENV POSTGRES_PASSWORD=postgres 4 | 5 | ENV PYTHONUNBUFFERED=1 6 | ENV PYTHONPATH=/bft/substrait 7 | RUN apk add --update --no-cache python3 && ln -sf python3 /usr/bin/python 8 | RUN python3 -m ensurepip 9 | RUN pip3 install --no-cache --upgrade pip setuptools pytest pyyaml mistletoe psycopg[binary] ruamel.yaml antlr4-python3-runtime pytz 10 | 11 | WORKDIR /bft 12 | COPY . . 13 | 14 | CMD /usr/bin/python -mpytest bft/tests/test_postgres.py 15 | -------------------------------------------------------------------------------- /ci/docker/sqlite.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM alpine:3.18 2 | 3 | ENV PYTHONUNBUFFERED=1 4 | ENV PYTHONPATH=/bft/substrait 5 | RUN apk add --update --no-cache python3 && ln -sf python3 /usr/bin/python 6 | RUN python3 -m ensurepip 7 | RUN pip3 install --no-cache --upgrade pip setuptools pytest pyyaml mistletoe ruamel.yaml antlr4-python3-runtime pytz 8 | 9 | WORKDIR /bft 10 | COPY . . 11 | 12 | # CMD to run all commands and display the results 13 | CMD /usr/bin/python -mpytest bft/tests/test_sqlite.py 14 | -------------------------------------------------------------------------------- /ci/docker/velox-compose.yaml: -------------------------------------------------------------------------------- 1 | services: 2 | app: 3 | image: bft/tester 4 | build: 5 | context: ../.. 6 | dockerfile: ./ci/docker/velox.Dockerfile 7 | args: 8 | PIP_PACKAGES: pyvelox 9 | command: /usr/bin/python3 -mpytest bft/tests/test_pyvelox.py 10 | -------------------------------------------------------------------------------- /ci/docker/velox.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:22.04 2 | 3 | ENV PYTHONUNBUFFERED=1 4 | ENV PYTHONPATH=/bft/substrait 5 | RUN apt-get update && apt-get install -y \ 6 | python3 \ 7 | python3-pip 8 | RUN pip3 install --no-cache --upgrade pip setuptools pytest pyyaml mistletoe pyvelox ruamel.yaml antlr4-python3-runtime pytz 9 | 10 | WORKDIR /bft 11 | COPY . . 12 | 13 | CMD /usr/bin/python -mpytest bft/tests/test_pyvelox.py 14 | -------------------------------------------------------------------------------- /index.yaml: -------------------------------------------------------------------------------- 1 | substrait: 2 | extensions: 3 | - location: ./substrait/extensions/functions_aggregate_approx.yaml 4 | canonical: https://github.com/substrait-io/substrait/blob/main/extensions/functions_aggregate_approx.yaml 5 | - location: ./substrait/extensions/functions_aggregate_generic.yaml 6 | canonical: https://github.com/substrait-io/substrait/blob/main/extensions/functions_aggregate_generic.yaml 7 | - location: ./substrait/extensions/functions_arithmetic.yaml 8 | canonical: https://github.com/substrait-io/substrait/blob/main/extensions/functions_arithmetic.yaml 9 | - location: ./substrait/extensions/functions_boolean.yaml 10 | canonical: https://github.com/substrait-io/substrait/blob/main/extensions/functions_boolean.yaml 11 | - location: ./substrait/extensions/functions_comparison.yaml 12 | canonical: https://github.com/substrait-io/substrait/blob/main/extensions/functions_comparison.yaml 13 | - location: ./substrait/extensions/functions_datetime.yaml 14 | canonical: https://github.com/substrait-io/substrait/blob/main/extensions/functions_datetime.yaml 15 | - location: ./substrait/extensions/functions_logarithmic.yaml 16 | canonical: https://github.com/substrait-io/substrait/blob/main/extensions/functions_logarithmic.yaml 17 | - location: ./substrait/extensions/functions_rounding.yaml 18 | canonical: https://github.com/substrait-io/substrait/blob/main/extensions/functions_rounding.yaml 19 | - location: ./substrait/extensions/functions_string.yaml 20 | canonical: https://github.com/substrait-io/substrait/blob/main/extensions/functions_string.yaml 21 | cases: 22 | - ./cases 23 | dialects: 24 | - ./dialects 25 | supplements: 26 | - ./supplemental 27 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | datafusion 2 | duckdb 3 | jinja2 4 | mistletoe 5 | pytest 6 | pyvelox 7 | pyyaml 8 | snowflake 9 | ruamel.yaml 10 | deepdiff 11 | pytz 12 | -------------------------------------------------------------------------------- /static_site/android-chrome-192x192.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/substrait-io/bft/34b7a6e19d536e42082169eae0d049ce94f86b19/static_site/android-chrome-192x192.png -------------------------------------------------------------------------------- /static_site/android-chrome-512x512.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/substrait-io/bft/34b7a6e19d536e42082169eae0d049ce94f86b19/static_site/android-chrome-512x512.png -------------------------------------------------------------------------------- /static_site/apple-touch-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/substrait-io/bft/34b7a6e19d536e42082169eae0d049ce94f86b19/static_site/apple-touch-icon.png -------------------------------------------------------------------------------- /static_site/assets/index/script.js: -------------------------------------------------------------------------------- 1 | function showContent(sectionId) { 2 | // Hide all sections 3 | var sections = document.querySelectorAll('.content-container article'); 4 | sections.forEach(function (section) { 5 | section.style.display = 'none'; 6 | }); 7 | 8 | // Show the selected section 9 | var selectedSection = document.getElementById(sectionId); 10 | if (selectedSection) { 11 | selectedSection.style.display = 'block'; 12 | } 13 | 14 | // Hide the search results section 15 | var searchResultsSection = document.getElementById('searchResultsSection'); 16 | if (searchResultsSection) { 17 | searchResultsSection.style.display = 'none'; 18 | } 19 | } 20 | 21 | 22 | function searchFunctions() { 23 | try { 24 | const searchTerm = document.getElementById("searchInput").value.toLowerCase(); 25 | let searchResults = []; 26 | 27 | for (let i = 0; i < functionsData.length; i++) { 28 | const functionName = functionsData[i].name.toLowerCase().replace(/^.*?_/, ''); 29 | const functionBrief = JSON.parse('"' + functionsData[i].brief + '"').toLowerCase(); 30 | 31 | /* Perform search based on priority 32 | Highest priority (Rank 3) - Exact name match 33 | Rank 2 - Partial name match 34 | Rank 1 - Exact match in brief contents 35 | Rank 0 - Partial match in brief contents 36 | 37 | */ 38 | const exactNameMatch = functionName === searchTerm; 39 | const partialNameMatch = !exactNameMatch && functionName.includes(searchTerm); 40 | const exactBriefMatch = ` ${functionBrief} `.includes(` ${searchTerm} `); 41 | const partialBriefMatch = !exactBriefMatch && functionBrief.includes(searchTerm); 42 | 43 | if (exactNameMatch || partialNameMatch || exactBriefMatch || partialBriefMatch) { 44 | 45 | let rank = 0; 46 | if (exactNameMatch) rank = 3; 47 | else if (partialNameMatch) rank = 2; 48 | else if (exactBriefMatch) rank = 1; 49 | 50 | searchResults.push({ 51 | category: functionsData[i].category, 52 | name: functionsData[i].name, 53 | brief: functionsData[i].brief, 54 | rank: rank 55 | }); 56 | } 57 | } 58 | 59 | // Sort search results by rank in descending order 60 | searchResults.sort((a, b) => b.rank - a.rank); 61 | 62 | displaySearchResults(searchResults); 63 | } catch (error) { 64 | console.error("Error while searching functions:", error); 65 | } 66 | } 67 | 68 | 69 | 70 | function displaySearchResults(results) { 71 | try { 72 | let homeSection = document.getElementById("home"); 73 | let searchResultsSection = document.getElementById("searchResultsSection"); 74 | 75 | // Hide home section and show search results section 76 | homeSection.style.display = "none"; 77 | searchResultsSection.style.display = "block"; 78 | 79 | let categoryTitleElement = document.createElement("h2"); 80 | categoryTitleElement.className = "category-title"; 81 | categoryTitleElement.innerHTML = "Search results"; 82 | 83 | searchResultsSection.innerHTML = ""; 84 | searchResultsSection.appendChild(categoryTitleElement); 85 | 86 | // Display search results in the section 87 | for (var i = 0; i < results.length; i++) { 88 | var cardLink = document.createElement("a"); 89 | cardLink.className = "card mb-3 search-result-card"; 90 | cardLink.href = "./" + results[i].name.toLowerCase() + ".html"; 91 | cardLink.onclick = function () { 92 | window.location.href = cardLink.href; 93 | }; 94 | 95 | var cardBody = document.createElement("div"); 96 | cardBody.className = "card-body"; 97 | 98 | var cardTitle = document.createElement("h5"); 99 | cardTitle.className = "card-title search-result-title"; 100 | cardTitle.innerHTML = results[i].category + " Functions"; 101 | 102 | var cardText = document.createElement("p"); 103 | cardText.className = "card-text search-result-text"; 104 | cardText.innerHTML = "" + results[i].name.replace(/^.*?_/, '') + ": " + results[i].brief; 105 | 106 | cardBody.appendChild(cardTitle); 107 | cardBody.appendChild(cardText); 108 | cardLink.appendChild(cardBody); 109 | 110 | searchResultsSection.appendChild(cardLink); 111 | } 112 | } catch (error) { 113 | console.error("Error in displaying search results:", error); 114 | } 115 | } 116 | -------------------------------------------------------------------------------- /static_site/assets/index/style.css: -------------------------------------------------------------------------------- 1 | body { 2 | font-family: 'Courier New', Courier, monospace; 3 | color: #005050; 4 | background-color: #f8f9fa; 5 | } 6 | 7 | .menu-container { 8 | position: fixed; 9 | top: 3%; 10 | left: 0%; 11 | height: 100vh; 12 | padding-left: 1%; 13 | padding-top: 2%; 14 | background-color: #005050; 15 | color: #ffffff; 16 | border-right: 1px solid #dee2e6; 17 | overflow-y: auto; 18 | z-index: 1000; 19 | width: max-content; 20 | } 21 | 22 | .menu-item { 23 | display: block; 24 | padding: 8px 16px; 25 | margin-bottom: 10px; 26 | cursor: pointer; 27 | transition: background-color 0.3s; 28 | border: 1px solid transparent; 29 | color: #ffffff; 30 | } 31 | 32 | .menu-item:hover { 33 | background-color: #007e7e; 34 | border: 1px solid #dee2e6; 35 | color: #ffffff; 36 | } 37 | 38 | .content-container { 39 | margin-left: 18%; 40 | margin-top: 11%; 41 | background-color: #f8f9fa; 42 | } 43 | 44 | .fixed-container { 45 | position: fixed; 46 | background-color: #f8f9fa; 47 | z-index: 1000; 48 | width: 72%; 49 | margin-top: -7%; 50 | padding-top: 2%; 51 | margin-bottom: 0%; 52 | } 53 | 54 | .hidden { 55 | display: none; 56 | } 57 | 58 | .category-title { 59 | border-bottom: 1px solid #dee2e6; 60 | padding-bottom: 10px; 61 | margin-bottom: 20px; 62 | } 63 | 64 | .nav-link { 65 | color: #2a2a2a; 66 | } 67 | 68 | .nav-link:hover { 69 | color: #007bff; 70 | } 71 | 72 | .navbar { 73 | position: fixed; 74 | z-index: 2000; 75 | width: 100%; 76 | } 77 | 78 | .card { 79 | transition: transform 0.2s; 80 | } 81 | 82 | .card:hover { 83 | transform: scale(1.05); 84 | } 85 | 86 | 87 | .functions-table { 88 | border-collapse: collapse; 89 | width: 100%; 90 | } 91 | 92 | .functions-table td { 93 | padding: 0.5rem; 94 | } 95 | 96 | .functions-table .title-column { 97 | width: 30%; 98 | color: #201D38; 99 | font-weight: bold; 100 | } 101 | 102 | .functions-table tr:hover { 103 | background-color: #005050; 104 | color: #ffffff; 105 | } 106 | 107 | .functions-table tr:hover .title-column { 108 | color: #ffffff; 109 | } 110 | 111 | .search-result-title { 112 | color: #005050; 113 | text-decoration: none; 114 | } 115 | 116 | .search-result-text { 117 | color: black; 118 | } 119 | -------------------------------------------------------------------------------- /static_site/assets/supplementary/script.js: -------------------------------------------------------------------------------- 1 | (function () { 2 | 3 | var dialectInfo = {}; 4 | 5 | function updateDialect(newValue) { 6 | const desiredDialect = `dialect-${newValue}`; 7 | const caseInfo = dialectInfo[newValue].examples; 8 | const dialectDefinitions = document.querySelectorAll('.dialect-definition'); 9 | for (var dialectDefinition of dialectDefinitions) { 10 | if (dialectDefinition.id == desiredDialect) { 11 | dialectDefinition.removeAttribute('hidden'); 12 | } else { 13 | dialectDefinition.setAttribute('hidden', ''); 14 | } 15 | } 16 | 17 | const cases = document.querySelectorAll('.bft-case'); 18 | const errMessages = document.querySelectorAll('.bft-case-err-message'); 19 | 20 | for (let i = 0; i < caseInfo.length; i++) { 21 | const case_msg = caseInfo[i]; 22 | if (case_msg == null) { 23 | cases[i].classList.remove("bft-error-case"); 24 | errMessages[i].setAttribute("hidden", ""); 25 | } else { 26 | cases[i].classList.add("bft-error-case"); 27 | errMessages[i].removeAttribute("hidden"); 28 | errMessages[i].querySelector("td").innerText = case_msg; 29 | } 30 | } 31 | 32 | const kernelInfo = dialectInfo[newValue].kernels; 33 | const kernelItems = document.querySelectorAll('.bft-kernel'); 34 | for (let i = 0; i < kernelInfo.length; i++) { 35 | const kernelSpans = kernelItems[i].querySelectorAll('span'); 36 | if (kernelInfo[i]) { 37 | kernelSpans[0].classList.remove('bft-unsupported-kernel'); 38 | kernelSpans[1].setAttribute('hidden', ''); 39 | } else { 40 | kernelSpans[0].classList.add('bft-unsupported-kernel'); 41 | kernelSpans[1].removeAttribute('hidden'); 42 | } 43 | } 44 | } 45 | 46 | window.bftInitialize = function (functionDialectInfo) { 47 | dialectInfo = functionDialectInfo; 48 | const dialectSelect = document.getElementById('dialect'); 49 | updateDialect(dialectSelect.value); 50 | dialectSelect.addEventListener('change', (e) => { 51 | updateDialect(e.target.value); 52 | }); 53 | } 54 | 55 | })(); 56 | -------------------------------------------------------------------------------- /static_site/assets/supplementary/style.css: -------------------------------------------------------------------------------- 1 | .tooltip { 2 | position: absolute; 3 | z-index: 99; 4 | padding: 5px; 5 | background: #222; 6 | color: #fff; 7 | border-radius: 5px; 8 | } 9 | 10 | tbody { 11 | position: relative; 12 | } 13 | 14 | .bft-error-case td { 15 | background-color: #FFCDD2; 16 | background-clip: padding-box; 17 | } 18 | 19 | .bft-case-err-message { 20 | font-weight: lighter !important; 21 | font-style: italic; 22 | } 23 | 24 | /* We don't use row headers */ 25 | table tbody td:first-child { 26 | font-weight: initial; 27 | } 28 | 29 | .bft-unsupported-kernel { 30 | text-decoration: line-through; 31 | } 32 | 33 | a.disabled { 34 | cursor: initial; 35 | color: var(--secondary-color); 36 | text-decoration: none; 37 | } 38 | 39 | a.disabled:hover { 40 | background: initial; 41 | } 42 | -------------------------------------------------------------------------------- /static_site/favicon-16x16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/substrait-io/bft/34b7a6e19d536e42082169eae0d049ce94f86b19/static_site/favicon-16x16.png -------------------------------------------------------------------------------- /static_site/favicon-32x32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/substrait-io/bft/34b7a6e19d536e42082169eae0d049ce94f86b19/static_site/favicon-32x32.png -------------------------------------------------------------------------------- /static_site/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/substrait-io/bft/34b7a6e19d536e42082169eae0d049ce94f86b19/static_site/favicon.ico -------------------------------------------------------------------------------- /supplemental/arithmetic/abs.md: -------------------------------------------------------------------------------- 1 | # Abs 2 | 3 | ## Options 4 | 5 | ### Overflow 6 | 7 | Computing the absolute of integer values may result in overflow due to unevenness of two's complement. 8 | This option helps control the behavior of the function when the input goes out of permissible range 9 | of the type class. 10 | 11 | #### SILENT 12 | 13 | /[%Overflow$SILENT%] 14 | 15 | #### SATURATE 16 | 17 | /[%Overflow$SATURATE%] 18 | 19 | #### ERROR 20 | 21 | /[%Overflow$ERROR%] 22 | 23 | ## Details 24 | 25 | ### Non multiplicative 26 | 27 | Although the mathematical operation for Absolute value is multiplicative, but the function is not 28 | due to overflow. For example, for int8, abs(-1 * -128) will not be the same as 29 | abs(-1) * abs(-128), since the former will cause an overflow. 30 | 31 | ### Triangular Inequality 32 | 33 | Mathematically, the absolute operation has the triangular inequality, i.e. for two real numbers, 34 | x & y, abs(x+y) <= abs(x) + abs(y). This might not hold true for the abs function due to overflow. 35 | For example, for int8, abs(-127 + 1) will not be the same as abs(-127) + abs(1), since the 36 | latter will overflow. 37 | 38 | ## Properties 39 | 40 | ### Null propagating 41 | 42 | /[%Properties$Null_propagating%] 43 | 44 | ### NaN propagating 45 | 46 | /[%Properties$NaN_propagating%] 47 | 48 | ### Stateless 49 | 50 | /[%Properties$Stateless%] 51 | -------------------------------------------------------------------------------- /supplemental/arithmetic/acos.md: -------------------------------------------------------------------------------- 1 | # Acos 2 | 3 | ## Options 4 | 5 | ### Rounding 6 | 7 | Arccosine of an input can yield a result that is not exactly 8 | representable in the given type class. In this case the value will be rounded. 9 | Rounding behaviors are defined as part of the IEEE 754 standard. 10 | 11 | #### TIE_TO_EVEN 12 | 13 | /[%Rounding$TIE_TO_EVEN%] 14 | 15 | #### TIE_AWAY_FROM_ZERO 16 | 17 | /[%Rounding$TIE_AWAY_FROM_ZERO%] 18 | 19 | #### TRUNCATE 20 | 21 | /[%Rounding$TRUNCATE%] 22 | 23 | #### CEILING 24 | 25 | /[%Rounding$CEILING%] 26 | 27 | #### FLOOR 28 | 29 | /[%Rounding$FLOOR%] 30 | 31 | ### On_domain_error 32 | 33 | Arccosine function has a domain of [-1,1], i.e. values of only this range are allowed. This option controls the behavior when the function is called with values outside of this range. 34 | 35 | #### NAN 36 | 37 | /[%On_domain_error$NAN%] 38 | 39 | #### ERROR 40 | 41 | /[%On_domain_error$ERROR%] 42 | 43 | ## Details 44 | 45 | ### Other floating point exceptions 46 | 47 | The IEEE 754 standard defines a number of exceptions beyond rounding. For 48 | example, division by zero, overflow, and underflow. However, these exceptions 49 | have default behaviors defined by IEEE 754 and, since no known engine deviates 50 | from these default values, these exceptions are not exposed as options. For more 51 | information on what happens in these cases refer to the IEEE 754 standard. 52 | 53 | ### Numerical Precision 54 | 55 | The precision of the acos function depends on the architecture in various dialects. 56 | 57 | ### Output Range 58 | 59 | The arccosine function has an output range of [0, pi], and it results to 0 60 | at 1. 61 | 62 | ## Properties 63 | 64 | ### Null propagating 65 | 66 | /[%Properties$Null_propagating%] 67 | 68 | ### NaN propagating 69 | 70 | /[%Properties$NaN_propagating%] 71 | 72 | ### Stateless 73 | 74 | /[%Properties$Stateless%] 75 | -------------------------------------------------------------------------------- /supplemental/arithmetic/acosh.md: -------------------------------------------------------------------------------- 1 | # Acosh 2 | 3 | ## Options 4 | 5 | ### Rounding 6 | 7 | Hyperbolic arccosine of an input can yield a result that is not exactly 8 | representable in the given type class. In this case the value will be rounded. 9 | Rounding behaviors are defined as part of the IEEE 754 standard. 10 | 11 | #### TIE_TO_EVEN 12 | 13 | /[%Rounding$TIE_TO_EVEN%] 14 | 15 | #### TIE_AWAY_FROM_ZERO 16 | 17 | /[%Rounding$TIE_AWAY_FROM_ZERO%] 18 | 19 | #### TRUNCATE 20 | 21 | /[%Rounding$TRUNCATE%] 22 | 23 | #### CEILING 24 | 25 | /[%Rounding$CEILING%] 26 | 27 | #### FLOOR 28 | 29 | /[%Rounding$FLOOR%] 30 | 31 | ### On_domain_error 32 | 33 | Hyperbolic arccosine function has a domain of [1, Infinity], i.e. input should be greater than one. This option controls the behavior when the function is called with values outside of this range. 34 | 35 | #### NAN 36 | 37 | /[%On_domain_error$NAN%] 38 | 39 | #### ERROR 40 | 41 | /[%On_domain_error$ERROR%] 42 | 43 | ## Details 44 | 45 | ### Other floating point exceptions 46 | 47 | The IEEE 754 standard defines a number of exceptions beyond rounding. For 48 | example, division by zero, overflow, and underflow. However, these exceptions 49 | have default behaviors defined by IEEE 754 and, since no known engine deviates 50 | from these default values, these exceptions are not exposed as options. For more 51 | information on what happens in these cases refer to the IEEE 754 standard. 52 | 53 | ### Numerical Precision 54 | 55 | The precision of the acosh function depends on the architecture in various dialects. 56 | 57 | ### Output Range 58 | 59 | The acosh function has an output range of [0, Infinity], and it results to 0 60 | at 1. 61 | 62 | ## Properties 63 | 64 | ### Null propagating 65 | 66 | /[%Properties$Null_propagating%] 67 | 68 | ### NaN propagating 69 | 70 | /[%Properties$NaN_propagating%] 71 | 72 | ### Stateless 73 | 74 | /[%Properties$Stateless%] 75 | -------------------------------------------------------------------------------- /supplemental/arithmetic/add.md: -------------------------------------------------------------------------------- 1 | # Add 2 | 3 | ## Options 4 | 5 | ### Overflow 6 | 7 | Adding two integers can trigger an overflow when the result is outside the 8 | representable range of the type class. This option controls what happens when 9 | this overflow occurs. 10 | 11 | #### SILENT 12 | 13 | /[%Overflow$SILENT%] For e.g. adding two int16 cannot 14 | yield an int32 on overflow. 15 | 16 | #### SATURATE 17 | 18 | /[%Overflow$SATURATE%] 19 | 20 | #### ERROR 21 | 22 | /[%Overflow$ERROR%] 23 | 24 | ### Rounding 25 | 26 | Adding two floating point numbers can yield a result that is not exactly 27 | representable in the given type class. In this case the value will be rounded. 28 | Rounding behaviors are defined as part of the IEEE 754 standard. 29 | 30 | #### TIE_TO_EVEN 31 | 32 | /[%Rounding$TIE_TO_EVEN%] 33 | 34 | #### TIE_AWAY_FROM_ZERO 35 | 36 | /[%Rounding$TIE_AWAY_FROM_ZERO%] 37 | 38 | #### TRUNCATE 39 | 40 | /[%Rounding$TRUNCATE%] 41 | 42 | #### CEILING 43 | 44 | /[%Rounding$CEILING%] 45 | 46 | #### FLOOR 47 | 48 | /[%Rounding$FLOOR%] 49 | 50 | ## Details 51 | 52 | ### Other floating point exceptions 53 | 54 | The IEEE 754 standard defines a number of exceptions beyond rounding. For 55 | example, division by zero, overflow, and underflow. However, these exceptions 56 | have default behaviors defined by IEEE 754 and, since no known engine deviates 57 | from these default values, these exceptions are not exposed as options. For more 58 | information on what happens in these cases refer to the IEEE 754 standard. 59 | 60 | ### Not commutative 61 | 62 | Addition, the algebraic operation, is commutative. So it may be tempting to 63 | believe the add function is commutative as well. However, this is not true because 64 | of overflow. For example, when working with int8 the result of 65 | add(add(120, 10), -5) will yield a different result than add(add(120, -5), 10) 66 | because the first will overflow and the second will not. 67 | 68 | ## Properties 69 | 70 | ### Null propagating 71 | 72 | /[%Properties$Null_propagating%] 73 | 74 | ### NaN propagating 75 | 76 | /[%Properties$NaN_propagating%] 77 | 78 | ### Stateless 79 | 80 | /[%Properties$Stateless%] This is not guaranteed to be true for integer addition when overflow is SILENT. 81 | -------------------------------------------------------------------------------- /supplemental/arithmetic/asin.md: -------------------------------------------------------------------------------- 1 | # Asin 2 | 3 | ## Options 4 | 5 | ### Rounding 6 | 7 | Arcsine of an input can yield a result that is not exactly 8 | representable in the given type class. In this case the value will be rounded. 9 | Rounding behaviors are defined as part of the IEEE 754 standard. 10 | 11 | #### TIE_TO_EVEN 12 | 13 | /[%Rounding$TIE_TO_EVEN%] 14 | 15 | #### TIE_AWAY_FROM_ZERO 16 | 17 | /[%Rounding$TIE_AWAY_FROM_ZERO%] 18 | 19 | #### TRUNCATE 20 | 21 | /[%Rounding$TRUNCATE%] 22 | 23 | #### CEILING 24 | 25 | /[%Rounding$CEILING%] 26 | 27 | #### FLOOR 28 | 29 | /[%Rounding$FLOOR%] 30 | 31 | ### On_domain_error 32 | 33 | Arcsine function has a domain of [-1,1], i.e. values of only this range are allowed. This option controls the behavior when the function is called with values outside of this range. 34 | 35 | #### NAN 36 | 37 | /[%On_domain_error$NAN%] 38 | 39 | #### ERROR 40 | 41 | /[%On_domain_error$ERROR%] 42 | 43 | ## Details 44 | 45 | ### Other floating point exceptions 46 | 47 | The IEEE 754 standard defines a number of exceptions beyond rounding. For 48 | example, division by zero, overflow, and underflow. However, these exceptions 49 | have default behaviors defined by IEEE 754 and, since no known engine deviates 50 | from these default values, these exceptions are not exposed as options. For more 51 | information on what happens in these cases refer to the IEEE 754 standard. 52 | 53 | ### Numerical Precision 54 | 55 | The precision of the asin function depends on the architecture in various dialects. 56 | 57 | ### Output Range 58 | 59 | The arcsine function has an output range of [-pi/2, pi/2], where it results to 0 60 | at 0. 61 | 62 | ## Properties 63 | 64 | ### Null propagating 65 | 66 | /[%Properties$Null_propagating%] 67 | 68 | ### NaN propagating 69 | 70 | /[%Properties$NaN_propagating%] 71 | 72 | ### Stateless 73 | 74 | /[%Properties$Stateless%] 75 | -------------------------------------------------------------------------------- /supplemental/arithmetic/asinh.md: -------------------------------------------------------------------------------- 1 | # Asinh 2 | 3 | ## Options 4 | 5 | ### Rounding 6 | 7 | Hyperbolic arcsine of an input can yield a result that is not exactly 8 | representable in the given type class. In this case the value will be rounded. 9 | Rounding behaviors are defined as part of the IEEE 754 standard. 10 | 11 | #### TIE_TO_EVEN 12 | 13 | /[%Rounding$TIE_TO_EVEN%] 14 | 15 | #### TIE_AWAY_FROM_ZERO 16 | 17 | /[%Rounding$TIE_AWAY_FROM_ZERO%] 18 | 19 | #### TRUNCATE 20 | 21 | /[%Rounding$TRUNCATE%] 22 | 23 | #### CEILING 24 | 25 | /[%Rounding$CEILING%] 26 | 27 | #### FLOOR 28 | 29 | /[%Rounding$FLOOR%] 30 | 31 | ## Details 32 | 33 | ### Other floating point exceptions 34 | 35 | The IEEE 754 standard defines a number of exceptions beyond rounding. For 36 | example, division by zero, overflow, and underflow. However, these exceptions 37 | have default behaviors defined by IEEE 754 and, since no known engine deviates 38 | from these default values, these exceptions are not exposed as options. For more 39 | information on what happens in these cases refer to the IEEE 754 standard. 40 | 41 | ### Numerical Precision 42 | 43 | The precision of the asinh function depends on the architecture in various dialects. 44 | 45 | ### Output Range 46 | 47 | The asinh function has an output range of all Real numbers, and it results to 0 48 | at 0. 49 | 50 | ## Properties 51 | 52 | ### Null propagating 53 | 54 | /[%Properties$Null_propagating%] 55 | 56 | ### NaN propagating 57 | 58 | /[%Properties$NaN_propagating%] 59 | 60 | ### Stateless 61 | 62 | /[%Properties$Stateless%] 63 | -------------------------------------------------------------------------------- /supplemental/arithmetic/atan.md: -------------------------------------------------------------------------------- 1 | # Atan 2 | 3 | ## Options 4 | 5 | ### Rounding 6 | 7 | Arctangent of an input can yield a result that is not exactly 8 | representable in the given type class. In this case the value will be rounded. 9 | Rounding behaviors are defined as part of the IEEE 754 standard. 10 | 11 | #### TIE_TO_EVEN 12 | 13 | /[%Rounding$TIE_TO_EVEN%] 14 | 15 | #### TIE_AWAY_FROM_ZERO 16 | 17 | /[%Rounding$TIE_AWAY_FROM_ZERO%] 18 | 19 | #### TRUNCATE 20 | 21 | /[%Rounding$TRUNCATE%] 22 | 23 | #### CEILING 24 | 25 | /[%Rounding$CEILING%] 26 | 27 | #### FLOOR 28 | 29 | /[%Rounding$FLOOR%] 30 | 31 | ## Details 32 | 33 | ### Other floating point exceptions 34 | 35 | The IEEE 754 standard defines a number of exceptions beyond rounding. For 36 | example, division by zero, overflow, and underflow. However, these exceptions 37 | have default behaviors defined by IEEE 754 and, since no known engine deviates 38 | from these default values, these exceptions are not exposed as options. For more 39 | information on what happens in these cases refer to the IEEE 754 standard. 40 | 41 | ### Numerical Precision 42 | 43 | The precision of the atan function depends on the architecture in various dialects. 44 | 45 | ### Output Range 46 | 47 | The arctangent function has an output range of [-pi/2, pi/2], and it results to 0 48 | at 0. 49 | 50 | ## Properties 51 | 52 | ### Null propagating 53 | 54 | /[%Properties$Null_propagating%] 55 | 56 | ### NaN propagating 57 | 58 | /[%Properties$NaN_propagating%] 59 | 60 | ### Stateless 61 | 62 | /[%Properties$Stateless%] 63 | -------------------------------------------------------------------------------- /supplemental/arithmetic/atan2.md: -------------------------------------------------------------------------------- 1 | # Atan2 2 | 3 | ## Options 4 | 5 | ### Rounding 6 | 7 | Arctangent of an input can yield a result that is not exactly 8 | representable in the given type class. In this case the value will be rounded. 9 | Rounding behaviors are defined as part of the IEEE 754 standard. 10 | 11 | #### TIE_TO_EVEN 12 | 13 | /[%Rounding$TIE_TO_EVEN%] 14 | 15 | #### TIE_AWAY_FROM_ZERO 16 | 17 | /[%Rounding$TIE_AWAY_FROM_ZERO%] 18 | 19 | #### TRUNCATE 20 | 21 | /[%Rounding$TRUNCATE%] 22 | 23 | #### CEILING 24 | 25 | /[%Rounding$CEILING%] 26 | 27 | #### FLOOR 28 | 29 | /[%Rounding$FLOOR%] 30 | 31 | ### On_domain_error 32 | 33 | Mathematically, atan2 function has a domain of [-Infinity, Infinity], i.e. values of only this range are allowed. This option controls the behavior when the function is called with values outside of this range. 34 | 35 | #### NAN 36 | 37 | /[%On_domain_error$NAN%] 38 | 39 | #### ERROR 40 | 41 | /[%On_domain_error$ERROR%] 42 | 43 | ## Details 44 | 45 | ### Other floating point exceptions 46 | 47 | The IEEE 754 standard defines a number of exceptions beyond rounding. For 48 | example, division by zero, overflow, and underflow. However, these exceptions 49 | have default behaviors defined by IEEE 754 and, since no known engine deviates 50 | from these default values, these exceptions are not exposed as options. For more 51 | information on what happens in these cases refer to the IEEE 754 standard. 52 | 53 | ### Numerical Precision 54 | 55 | The precision of the atan2 function depends on the architecture in various dialects. 56 | 57 | ### Output Range 58 | 59 | The atan2 function has an output range of [-Infinty, Infinty]. 60 | 61 | ## Properties 62 | 63 | ### Null propagating 64 | 65 | /[%Properties$Null_propagating%] 66 | 67 | ### NaN propagating 68 | 69 | /[%Properties$NaN_propagating%] 70 | 71 | ### Stateless 72 | 73 | /[%Properties$Stateless%] 74 | -------------------------------------------------------------------------------- /supplemental/arithmetic/atanh.md: -------------------------------------------------------------------------------- 1 | # Atanh 2 | 3 | ## Options 4 | 5 | ### Rounding 6 | 7 | Hyperbolic arctangent of an input can yield a result that is not exactly 8 | representable in the given type class. In this case the value will be rounded. 9 | Rounding behaviors are defined as part of the IEEE 754 standard. 10 | 11 | #### TIE_TO_EVEN 12 | 13 | /[%Rounding$TIE_TO_EVEN%] 14 | 15 | #### TIE_AWAY_FROM_ZERO 16 | 17 | /[%Rounding$TIE_AWAY_FROM_ZERO%] 18 | 19 | #### TRUNCATE 20 | 21 | /[%Rounding$TRUNCATE%] 22 | 23 | #### CEILING 24 | 25 | /[%Rounding$CEILING%] 26 | 27 | #### FLOOR 28 | 29 | /[%Rounding$FLOOR%] 30 | 31 | ### On_domain_error 32 | 33 | Hyperbolic arcctangent function has a domain of [-1, 1]. This option controls the behavior when the function is called with values outside of this range. 34 | 35 | #### NAN 36 | 37 | /[%On_domain_error$NAN%] 38 | 39 | #### ERROR 40 | 41 | /[%On_domain_error$ERROR%] 42 | 43 | ## Details 44 | 45 | ### Other floating point exceptions 46 | 47 | The IEEE 754 standard defines a number of exceptions beyond rounding. For 48 | example, division by zero, overflow, and underflow. However, these exceptions 49 | have default behaviors defined by IEEE 754 and, since no known engine deviates 50 | from these default values, these exceptions are not exposed as options. For more 51 | information on what happens in these cases refer to the IEEE 754 standard. 52 | 53 | ### Numerical Precision 54 | 55 | The precision of the atanh function depends on the architecture in various dialects. 56 | 57 | ### Output Range 58 | 59 | The atanh function has an output range of all real numbers, and it results to 0 60 | at 0. 61 | 62 | ## Properties 63 | 64 | ### Null propagating 65 | 66 | /[%Properties$Null_propagating%] 67 | 68 | ### NaN propagating 69 | 70 | /[%Properties$NaN_propagating%] 71 | 72 | ### Stateless 73 | 74 | /[%Properties$Stateless%] 75 | -------------------------------------------------------------------------------- /supplemental/arithmetic/bitwise_and.md: -------------------------------------------------------------------------------- 1 | # Bitwise_and 2 | 3 | ## Details 4 | 5 | ### Associative 6 | 7 | The bitwise_and function is associative, i.e. 8 | the grouping of operands does not affect the result. For example, 9 | bitwise_and(bitwise_and(a,b), c) will be same as bitwise_and(a, bitwise_and(b,c)). 10 | 11 | ### Commutative 12 | 13 | The order of operands does not affect the result in Bitwise_and. For example, 14 | bitwise_and(a,b) will be the same as bitwise_and(b,a). 15 | 16 | ### Identity 17 | 18 | For any valid integer, the bitwise_and with the bit pattern of all ones will result 19 | to itself. For example, bitwise_and(123, 111) = 123 20 | 21 | ### Bitwise Not Relationship 22 | 23 | The result of performing a bitwise_and operation between a value 24 | x and its bitwise_not is always 0. 25 | 26 | ## Properties 27 | 28 | ### Null propagating 29 | 30 | /[%Properties$Null_propagating%] 31 | 32 | ### NaN propagating 33 | 34 | /[%Properties$NaN_propagating%] 35 | 36 | ### Stateless 37 | 38 | /[%Properties$Stateless%] 39 | -------------------------------------------------------------------------------- /supplemental/arithmetic/bitwise_not.md: -------------------------------------------------------------------------------- 1 | # Bitwise_not 2 | 3 | ## Details 4 | 5 | ### Complementary 6 | 7 | The bitwise not function behaves complimentary with itself, i.e. 8 | bitwise_not(bitwise_not(x)) will be equal to x, for any integer. 9 | 10 | ### XOR Relationship 11 | 12 | Bitwise_not has a relationship with the XOR function, where the XORing of 13 | a valid integer with the bit pattern of all 1s results in the bitwise_not of 14 | that integer. 15 | 16 | ### Two's complement 17 | 18 | The bitwise_not of a valid integer is equivalent to negating the number and subtracting 1. 19 | 20 | ## Properties 21 | 22 | ### Null propagating 23 | 24 | /[%Properties$Null_propagating%] 25 | 26 | ### NaN propagating 27 | 28 | /[%Properties$NaN_propagating%] 29 | 30 | ### Stateless 31 | 32 | /[%Properties$Stateless%] 33 | -------------------------------------------------------------------------------- /supplemental/arithmetic/bitwise_or.md: -------------------------------------------------------------------------------- 1 | # Bitwise_or 2 | 3 | ## Details 4 | 5 | ### Associative 6 | 7 | The bitwise_or function is associative, i.e. 8 | the grouping of operands does not affect the result. For example, 9 | bitwise_or(bitwise_or(a,b), c) will be same as bitwise_or(a, bitwise_or(b,c)). 10 | 11 | ### Commutative 12 | 13 | The order of operands does not affect the result in Bitwise_or. For example, 14 | bitwise_or(a,b) will be the same as bitwise_or(b,a). 15 | 16 | ### Identity 17 | 18 | For any valid integer, the bitwise_or with zero will result 19 | to itself. For example, bitwise_or(123, 000) = 123 20 | 21 | ## Properties 22 | 23 | ### Null propagating 24 | 25 | /[%Properties$Null_propagating%] 26 | 27 | ### NaN propagating 28 | 29 | /[%Properties$NaN_propagating%] 30 | 31 | ### Stateless 32 | 33 | /[%Properties$Stateless%] 34 | -------------------------------------------------------------------------------- /supplemental/arithmetic/bitwise_xor.md: -------------------------------------------------------------------------------- 1 | # Bitwise_or 2 | 3 | ## Details 4 | 5 | ### Associative 6 | 7 | The bitwise_xor function is associative, i.e. 8 | the grouping of operands does not affect the result. For example, 9 | bitwise_xor(bitwise_xor(a,b), c) will be same as bitwise_xor(a, bitwise_xor(b,c)). 10 | 11 | ### Commutative 12 | 13 | The order of operands does not affect the result in Bitwise_xor. For example, 14 | bitwise_xor(a,b) will be the same as bitwise_xor(b,a). 15 | 16 | ## Properties 17 | 18 | ### Null propagating 19 | 20 | /[%Properties$Null_propagating%] 21 | 22 | ### NaN propagating 23 | 24 | /[%Properties$NaN_propagating%] 25 | 26 | ### Stateless 27 | 28 | /[%Properties$Stateless%] 29 | -------------------------------------------------------------------------------- /supplemental/arithmetic/cos.md: -------------------------------------------------------------------------------- 1 | # Cos 2 | 3 | ## Options 4 | 5 | ### Rounding 6 | 7 | Cosine of an input can yield a result that is not exactly 8 | representable in the given type class. In this case the value will be rounded. 9 | Rounding behaviors are defined as part of the IEEE 754 standard. 10 | 11 | #### TIE_TO_EVEN 12 | 13 | /[%Rounding$TIE_TO_EVEN%] 14 | 15 | #### TIE_AWAY_FROM_ZERO 16 | 17 | /[%Rounding$TIE_AWAY_FROM_ZERO%] 18 | 19 | #### TRUNCATE 20 | 21 | /[%Rounding$TRUNCATE%] 22 | 23 | #### CEILING 24 | 25 | /[%Rounding$CEILING%] 26 | 27 | #### FLOOR 28 | 29 | /[%Rounding$FLOOR%] 30 | 31 | ## Details 32 | 33 | ### Other floating point exceptions 34 | 35 | The IEEE 754 standard defines a number of exceptions beyond rounding. For 36 | example, division by zero, overflow, and underflow. However, these exceptions 37 | have default behaviors defined by IEEE 754 and, since no known engine deviates 38 | from these default values, these exceptions are not exposed as options. For more 39 | information on what happens in these cases refer to the IEEE 754 standard. 40 | 41 | ### Numerical Precision 42 | 43 | The precision of the cosine function depends on the architecture in various dialects. 44 | 45 | ### Output Range 46 | 47 | Being a sinusoidal trigonometric function, the output of the cos function is restricted to [-1,1]. 48 | 49 | ## Properties 50 | 51 | ### Null propagating 52 | 53 | /[%Properties$Null_propagating%] 54 | 55 | ### NaN propagating 56 | 57 | /[%Properties$NaN_propagating%] 58 | 59 | ### Stateless 60 | 61 | /[%Properties$Stateless%] 62 | -------------------------------------------------------------------------------- /supplemental/arithmetic/cosh.md: -------------------------------------------------------------------------------- 1 | # Cosh 2 | 3 | ## Options 4 | 5 | ### Rounding 6 | 7 | Hyperbolic cosine of an input can yield a result that is not exactly 8 | representable in the given type class. In this case the value will be rounded. 9 | Rounding behaviors are defined as part of the IEEE 754 standard. 10 | 11 | #### TIE_TO_EVEN 12 | 13 | /[%Rounding$TIE_TO_EVEN%] 14 | 15 | #### TIE_AWAY_FROM_ZERO 16 | 17 | /[%Rounding$TIE_AWAY_FROM_ZERO%] 18 | 19 | #### TRUNCATE 20 | 21 | /[%Rounding$TRUNCATE%] 22 | 23 | #### CEILING 24 | 25 | /[%Rounding$CEILING%] 26 | 27 | #### FLOOR 28 | 29 | /[%Rounding$FLOOR%] 30 | 31 | ## Details 32 | 33 | ### Other floating point exceptions 34 | 35 | The IEEE 754 standard defines a number of exceptions beyond rounding. For 36 | example, division by zero, overflow, and underflow. However, these exceptions 37 | have default behaviors defined by IEEE 754 and, since no known engine deviates 38 | from these default values, these exceptions are not exposed as options. For more 39 | information on what happens in these cases refer to the IEEE 754 standard. 40 | 41 | ### Numerical Precision 42 | 43 | The precision of the cosh function depends on the architecture in various dialects. 44 | 45 | ### Output Range 46 | 47 | The Hyperbolic cosine function has an output range of [1, Infinity], and it results to 1 48 | at 0 radians. 49 | 50 | ## Properties 51 | 52 | ### Null propagating 53 | 54 | /[%Properties$Null_propagating%] 55 | 56 | ### NaN propagating 57 | 58 | /[%Properties$NaN_propagating%] 59 | 60 | ### Stateless 61 | 62 | /[%Properties$Stateless%] 63 | -------------------------------------------------------------------------------- /supplemental/arithmetic/definitions.yaml: -------------------------------------------------------------------------------- 1 | Overflow: 2 | SILENT: > 3 | If an overflow occurs then an integer value will be returned. The value is 4 | undefined. It may be any integer and can change from engine to engine or 5 | even from row to row within the same query. The only constraint is that it 6 | must be a valid value for the result type class. 7 | 8 | SATURATE: > 9 | If an overflow occurs then the largest (for positive overflow) or smallest 10 | (for negative overflow) possible value for the type class will be returned. 11 | 12 | ERROR: > 13 | If an overflow occurs then an error should be raised. 14 | 15 | Rounding: 16 | TIE_TO_EVEN: > 17 | Round to the nearest value. If the number is exactly halfway between two 18 | values then round to the number whose least significant digit is even. Or, 19 | because we are working with binary digits, round to the number whose last digit 20 | is 0. This is the default behavior in many systems because it helps to avoid 21 | bias in rounding. 22 | 23 | TIE_AWAY_FROM_ZERO: > 24 | Round to the nearest value. If the number is exactly halfway between two values 25 | then round to the number furthest from zero. 26 | 27 | TRUNCATE: > 28 | Round to the nearest value. If the number is exactly halfway between two values 29 | then round to the value closest to zero. 30 | 31 | CEILING: > 32 | Round to the value closest to positive infinity. 33 | 34 | FLOOR: > 35 | Round to the value closest to negative infinity. 36 | 37 | Properties: 38 | Null_propagating: > 39 | If any of the inputs is null then the output will be null 40 | 41 | NaN_propagating: > 42 | If any of the inputs is NaN (and the other input is not null) then the output 43 | will be NaN 44 | 45 | Stateless: > 46 | The output will be the same regardless of the order of input rows. 47 | 48 | On_domain_error: 49 | NAN: > 50 | Return a Not a Number value if any or all of the input values are either 0 or ±infinity. 51 | ERROR: > 52 | If any or all of the input values are either 0 or ±infinity an error should be raised. 53 | -------------------------------------------------------------------------------- /supplemental/arithmetic/divide.md: -------------------------------------------------------------------------------- 1 | # Divide 2 | 3 | ## Options 4 | 5 | ### Overflow 6 | 7 | Dividing two integers can trigger an overflow when the result is outside the 8 | representable range of the type class. This option controls what happens when 9 | this overflow occurs. 10 | 11 | #### SILENT 12 | 13 | If an overflow occurs then an integer value will be returned. The value is 14 | undefined. It may be any integer and can change from engine to engine or 15 | even from row to row within the same query. The only constraint is that it 16 | must be a valid value for the result type class (e.g. dividing two int16 17 | cannot yield an int32 on overflow) 18 | 19 | #### SATURATE 20 | 21 | If an overflow occurs then the largest (for positive overflow) or smallest 22 | (for negative overflow) possible value for the type class will be returned. 23 | 24 | #### ERROR 25 | 26 | If an overflow occurs then an error should be raised. 27 | 28 | ### Rounding 29 | 30 | Dividing two floating point numbers can yield a result that is not exactly 31 | representable in the given type class. In this case the value will be rounded. 32 | Rounding behaviors are defined as part of the IEEE 754 standard. 33 | 34 | #### TIE_TO_EVEN 35 | 36 | Round to the nearest value. If the number is exactly halfway between two 37 | values then round to the number whose least significant digit is even. Or, 38 | because we are working with binary digits, round to the number whose last digit 39 | is 0. This is the default behavior in many systems because it helps to avoid 40 | bias in rounding. 41 | 42 | #### TIE_AWAY_FROM_ZERO 43 | 44 | Round to the nearest value. If the number is exactly halfway between two values 45 | then round to the number furthest from zero. 46 | 47 | #### TRUNCATE 48 | 49 | Round to the nearest value. If the number is exactly halfway between two values 50 | then round to the value closest to zero. 51 | 52 | #### CEILING 53 | 54 | Round to the value closest to positive infinity. 55 | 56 | #### FLOOR 57 | 58 | Round to the value closest to negative infinity. 59 | 60 | ### On_domain_error 61 | 62 | Option controls what happens when the dividend and divisor in a divide function 63 | are either both 0 or both ±infinity. 64 | 65 | #### NAN 66 | 67 | /[%On_domain_error$NAN%] 68 | 69 | #### ERROR 70 | 71 | /[%On_domain_error$ERROR%] 72 | 73 | ### On_division_by_zero 74 | 75 | Option controls function behavior in cases when the divisor is 0 but the dividend is not zero. 76 | 77 | #### LIMIT 78 | 79 | Return +infinity or -infinity depending on the signs of the dividend and the divisor involved. 80 | 81 | ## Details 82 | 83 | ### Other floating point exceptions 84 | 85 | The IEEE 754 standard defines a number of exceptions beyond rounding. For 86 | example, overflow, and underflow. However, these exceptions 87 | have default behaviors defined by IEEE 754 and, since no known engine deviates 88 | from these default values, these exceptions are not exposed as options. For more 89 | information on what happens in these cases refer to the IEEE 754 standard. 90 | 91 | ### Not commutative 92 | 93 | Division, the algebraic operation, is commutative. So it may be tempting to 94 | believe the divide function is commutative as well. However, this is not true 95 | because of overflow. For example, when working with int8 the result of 96 | divide(divide(-128, -1), -1) will yield a different result than 97 | divide(-128, divide(-1, -1)) because the first will overflow and the second 98 | will not. 99 | 100 | ## Properties 101 | 102 | ### Null propagating 103 | 104 | If any of the inputs is null then the output will be null 105 | 106 | ### NaN propagating 107 | 108 | If any of the inputs is NaN (and the other input is not null) then the output 109 | will be NaN 110 | 111 | ### Stateless 112 | 113 | The output will be the same regardless of the order of input rows. This is not 114 | guaranteed to be true for integer division when overflow is SILENT. 115 | -------------------------------------------------------------------------------- /supplemental/arithmetic/exponential.md: -------------------------------------------------------------------------------- 1 | # Exp 2 | 3 | ## Options 4 | 5 | ### Rounding 6 | 7 | Exponential of an input can yield a result that is not exactly 8 | representable in the given type class. In this case the value will be rounded. 9 | Rounding behaviors are defined as part of the IEEE 754 standard. 10 | 11 | #### TIE_TO_EVEN 12 | 13 | /[%Rounding$TIE_TO_EVEN%] 14 | 15 | #### TIE_AWAY_FROM_ZERO 16 | 17 | /[%Rounding$TIE_AWAY_FROM_ZERO%] 18 | 19 | #### TRUNCATE 20 | 21 | /[%Rounding$TRUNCATE%] 22 | 23 | #### CEILING 24 | 25 | /[%Rounding$CEILING%] 26 | 27 | #### FLOOR 28 | 29 | /[%Rounding$FLOOR%] 30 | 31 | ## Details 32 | 33 | ### Other floating point exceptions 34 | 35 | The IEEE 754 standard defines a number of exceptions beyond rounding. For 36 | example, division by zero, overflow, and underflow. However, these exceptions 37 | have default behaviors defined by IEEE 754 and, since no known engine deviates 38 | from these default values, these exceptions are not exposed as options. For more 39 | information on what happens in these cases refer to the IEEE 754 standard. 40 | 41 | ### Numerical Precision 42 | 43 | The precision of the exponential function depends on the precision of the input types 44 | and the way the operation is carried out in various dialects. 45 | 46 | ## Properties 47 | 48 | ### Null propagating 49 | 50 | /[%Properties$Null_propagating%] 51 | 52 | ### NaN propagating 53 | 54 | /[%Properties$NaN_propagating%] 55 | 56 | ### Stateless 57 | 58 | /[%Properties$Stateless%] 59 | -------------------------------------------------------------------------------- /supplemental/arithmetic/factorial.md: -------------------------------------------------------------------------------- 1 | # Factorial 2 | 3 | ## Options 4 | 5 | ### Overflow 6 | 7 | Factorial being a function that may return a large value out of the permissible limit 8 | of the type class can cause an overflow. This option helps 9 | control the behavior upon overflow in the Factorial function. 10 | 11 | #### SILENT 12 | 13 | /[%Overflow$SILENT%] 14 | 15 | #### SATURATE 16 | 17 | /[%Overflow$SATURATE%] 18 | 19 | #### ERROR 20 | 21 | /[%Overflow$ERROR%] 22 | 23 | ## Details 24 | 25 | ### Input restrictions 26 | 27 | Mathematically, factorial is not defined for negative integers or non-integer values, since it is essentially 28 | the reducing product of a given positive integer. 29 | 30 | ## Properties 31 | 32 | ### Null propagating 33 | 34 | /[%Properties$Null_propagating%] 35 | 36 | ### NaN propagating 37 | 38 | /[%Properties$NaN_propagating%] 39 | 40 | ### Stateless 41 | 42 | /[%Properties$Stateless%] 43 | -------------------------------------------------------------------------------- /supplemental/arithmetic/modulus.md: -------------------------------------------------------------------------------- 1 | # Modulus 2 | 3 | ## Options 4 | 5 | ### Overflow 6 | 7 | The modulus operation typically occurs after finding the quotient, 8 | i.e., mod(x, y) = x - round_func(x/y), where the round_func can be 9 | to truncate, floor, or any such operation. Thus, the entire operation 10 | may trigger an overflow when the result is outside the representable 11 | range of the type class. This option controls what happens when this overflow occurs. 12 | 13 | #### SILENT 14 | 15 | If an overflow occurs then an integer value will be returned. The value is 16 | undefined. It may be any integer and can change from engine to engine or 17 | even from row to row within the same query. The only constraint is that it 18 | must be a valid value for the result type class (e.g. modulus of int16 19 | cannot yield an int32 on overflow) 20 | 21 | #### SATURATE 22 | 23 | If an overflow occurs then the largest (for positive overflow) or smallest 24 | (for negative overflow) possible value for the type class will be returned. 25 | 26 | #### ERROR 27 | 28 | If an overflow occurs then an error should be raised. 29 | 30 | ### Division_type 31 | 32 | Determines the nature of division rounding function and quotient 33 | evaluation that shall lead to the reminder. The reminder will be 34 | determined by r = x - round_func(x/y) 35 | 36 | #### TRUNCATE 37 | 38 | The quotient is evaluated i.e. the round_func(x/y) is truncated, 39 | thus the fractional result is rounded towards zero. 40 | 41 | #### FLOOR 42 | 43 | The quotient is evaluated i.e. the round_func(x/y) is floored, 44 | thus the fractional result is rounded to the largest integer 45 | value less than or equal to it. 46 | 47 | ### On_domain_error 48 | 49 | Option controls what happens when the dividend is ±infinity or 50 | the divisor is 0 or ±infinity in a divide function. 51 | 52 | #### NULL 53 | 54 | Return a NULL if the dividend is ±infinity or the divisor is 0 55 | or ±infinity. 56 | 57 | #### ERROR 58 | 59 | If the dividend is ±infinity or the divisor is 0 or ±infinity, 60 | an error should be raised. 61 | 62 | ## Details 63 | 64 | ### Overflow 65 | 66 | The Modulus function requires the Overflow option in situations 67 | where any or all of the involved operations result in overflow 68 | from the specified range. For example, in mod(-128, -1) within 69 | the int8 range, an overflow will occur as the operation will 70 | lead to (-128) - round_func(-128/-1). Since the division operation 71 | (-128/-1) results in an overflow (given that the range of int8 72 | is -127 to 128), the Overflow option becomes essential. 73 | 74 | ### Not commutative 75 | 76 | Modulus as an arithmetic operation is not commutative by nature. 77 | 78 | ## Properties 79 | 80 | ### Null propagating 81 | 82 | If any of the inputs is null then the output will be null 83 | 84 | ### NaN propagating 85 | 86 | If any of the inputs is NaN (and the other input is not null) then the output 87 | will be NaN 88 | 89 | ### Stateless 90 | 91 | The output will be the same regardless of the order of input rows. This is not 92 | guaranteed to be true for integer division when overflow is SILENT. 93 | -------------------------------------------------------------------------------- /supplemental/arithmetic/multiply.md: -------------------------------------------------------------------------------- 1 | # Multiply 2 | 3 | ## Options 4 | 5 | ### Overflow 6 | 7 | Multiplying two integers can trigger an overflow when the result is outside the 8 | representable range of the type class. This option controls what happens when 9 | this overflow occurs. 10 | 11 | #### SILENT 12 | 13 | If an overflow occurs then an integer value will be returned. The value is 14 | undefined. It may be any integer and can change from engine to engine or 15 | even from row to row within the same query. The only constraint is that it 16 | must be a valid value for the result type class (e.g. multiplying two int16 17 | cannot yield an int32 on overflow) 18 | 19 | #### SATURATE 20 | 21 | If an overflow occurs then the largest (for positive overflow) or smallest 22 | (for negative overflow) possible value for the type class will be returned. 23 | 24 | #### ERROR 25 | 26 | If an overflow occurs then an error should be raised. 27 | 28 | ### Rounding 29 | 30 | Multiplying two floating point numbers can yield a result that is not exactly 31 | representable in the given type class. In this case the value will be rounded. 32 | Rounding behaviors are defined as part of the IEEE 754 standard. 33 | 34 | #### TIE_TO_EVEN 35 | 36 | Round to the nearest value. If the number is exactly halfway between two 37 | values then round to the number whose least significant digit is even. Or, 38 | because we are working with binary digits, round to the number whose last digit 39 | is 0. This is the default behavior in many systems because it helps to avoid 40 | bias in rounding. 41 | 42 | #### TIE_AWAY_FROM_ZERO 43 | 44 | Round to the nearest value. If the number is exactly halfway between two values 45 | then round to the number furthest from zero. 46 | 47 | #### TRUNCATE 48 | 49 | Round to the nearest value. If the number is exactly halfway between two values 50 | then round to the value closest to zero. 51 | 52 | #### CEILING 53 | 54 | Round to the value closest to positive infinity. 55 | 56 | #### FLOOR 57 | 58 | Round to the value closest to negative infinity. 59 | 60 | ## Details 61 | 62 | ### Other floating point exceptions 63 | 64 | The IEEE 754 standard defines a number of exceptions beyond rounding. For 65 | example, division by zero, overflow, and underflow. However, these exceptions 66 | have default behaviors defined by IEEE 754 and, since no known engine deviates 67 | from these default values, these exceptions are not exposed as options. For more 68 | information on what happens in these cases refer to the IEEE 754 standard. 69 | 70 | ### Not commutative 71 | 72 | Multiplication, the algebraic operation, is commutative. So it may be tempting to 73 | believe the multiply function is commutative as well. However, this is not true 74 | because of overflow. For example, when working with int8 the result of 75 | multiply(multiply(-1, -128), -1) may yield a different result than 76 | multiply(multiply(-1, -1), -128) because the first will overflow and the second 77 | will not. 78 | 79 | ## Properties 80 | 81 | ### Null propagating 82 | 83 | If any of the inputs is null then the output will be null 84 | 85 | ### NaN propagating 86 | 87 | If any of the inputs is NaN (and the other input is not null) then the output 88 | will be NaN 89 | 90 | ### Stateless 91 | 92 | The output will be the same regardless of the order of input rows. This is not 93 | guaranteed to be true for integer multiplication when overflow is SILENT. 94 | -------------------------------------------------------------------------------- /supplemental/arithmetic/negate.md: -------------------------------------------------------------------------------- 1 | # Negate 2 | 3 | ## Options 4 | 5 | ### Overflow 6 | 7 | Negating a number on the limit of the allowed range of the type class may lead to 8 | overflowing. For example, if we have consider for i8, negate(-128), then the result 9 | will overflow since the range for the int8 type class is [-128,127]. This option helps 10 | control the behavior upon overflow in the negate function. 11 | 12 | #### SILENT 13 | 14 | /[%Overflow$SILENT%] 15 | 16 | #### SATURATE 17 | 18 | /[%Overflow$SATURATE%] 19 | 20 | #### ERROR 21 | 22 | /[%Overflow$ERROR%] 23 | 24 | ## Details 25 | 26 | ### Not Idempotent 27 | 28 | While the algebraic operation is Idempotent, but the function is not, because of Overflow. 29 | For example, with in8, the result of negate(negate(-128)) will not be -128 as this will overflow. 30 | 31 | ### Not commutative 32 | 33 | Negation, the algebraic operation, is commutative. So it may be tempting to 34 | believe the add function is commutative as well. However, this is not true because 35 | of overflow. For example, when working with int8 the result of 36 | negate(124 + 4) will yield a different result than negate(124) + negate(4) 37 | because the first will overflow and the second will not. 38 | 39 | ## Properties 40 | 41 | ### Null propagating 42 | 43 | /[%Properties$Null_propagating%] 44 | 45 | ### NaN propagating 46 | 47 | /[%Properties$NaN_propagating%] 48 | 49 | ### Stateless 50 | 51 | /[%Properties$Stateless%] 52 | -------------------------------------------------------------------------------- /supplemental/arithmetic/power.md: -------------------------------------------------------------------------------- 1 | # Power 2 | 3 | ## Options 4 | 5 | ### Overflow 6 | 7 | The power operation may lead to overflowing when the result is 8 | outside the representable range of the type class. 9 | This option controls what happens when this overflow occurs. 10 | 11 | #### SILENT 12 | 13 | /[%Overflow$SILENT%] 14 | 15 | #### SATURATE 16 | 17 | /[%Overflow$SATURATE%] 18 | 19 | #### ERROR 20 | 21 | /[%Overflow$ERROR%] 22 | 23 | ## Details 24 | 25 | ### Overflow 26 | 27 | The power function requires the Overflow control for situations where 28 | the resulting value exceeds the type class limit. For example, in 29 | pow(2, 65), although the input values are in the allowed int64 range, 30 | but the result goes out of range. 31 | 32 | ### Numerical Precision 33 | 34 | The precision of the power function depends on the precision of the input types 35 | and the way the operation is carried out in various dialects. 36 | 37 | ## Properties 38 | 39 | ### Null propagating 40 | 41 | /[%Properties$Null_propagating%] 42 | 43 | ### NaN propagating 44 | 45 | /[%Properties$NaN_propagating%] 46 | 47 | ### Stateless 48 | 49 | /[%Properties$Stateless%] 50 | -------------------------------------------------------------------------------- /supplemental/arithmetic/sign.md: -------------------------------------------------------------------------------- 1 | # Sign 2 | 3 | ## Details 4 | 5 | ### Multiplicative 6 | 7 | The Sign function is multiplicative, i.e. sign(x * y) = sign(x) * sign(y). Say for example, in int8, 8 | sign(-2 * 3) will be the same as sign(-2) * sign(3). 9 | 10 | ## Properties 11 | 12 | ### Null propagating 13 | 14 | /[%Properties$Null_propagating%] 15 | 16 | ### NaN propagating 17 | 18 | /[%Properties$NaN_propagating%] 19 | 20 | ### Stateless 21 | 22 | /[%Properties$Stateless%] 23 | -------------------------------------------------------------------------------- /supplemental/arithmetic/sin.md: -------------------------------------------------------------------------------- 1 | # Sin 2 | 3 | ## Options 4 | 5 | ### Rounding 6 | 7 | Sine of an input can yield a result that is not exactly 8 | representable in the given type class. In this case the value will be rounded. 9 | Rounding behaviors are defined as part of the IEEE 754 standard. 10 | 11 | #### TIE_TO_EVEN 12 | 13 | /[%Rounding$TIE_TO_EVEN%] 14 | 15 | #### TIE_AWAY_FROM_ZERO 16 | 17 | /[%Rounding$TIE_AWAY_FROM_ZERO%] 18 | 19 | #### TRUNCATE 20 | 21 | /[%Rounding$TRUNCATE%] 22 | 23 | #### CEILING 24 | 25 | /[%Rounding$CEILING%] 26 | 27 | #### FLOOR 28 | 29 | /[%Rounding$FLOOR%] 30 | 31 | ## Details 32 | 33 | ### Other floating point exceptions 34 | 35 | The IEEE 754 standard defines a number of exceptions beyond rounding. For 36 | example, division by zero, overflow, and underflow. However, these exceptions 37 | have default behaviors defined by IEEE 754 and, since no known engine deviates 38 | from these default values, these exceptions are not exposed as options. For more 39 | information on what happens in these cases refer to the IEEE 754 standard. 40 | 41 | ### Numerical Precision 42 | 43 | The precision of the sin function depends on the architecture in various dialects. 44 | 45 | ### Output Range 46 | 47 | Being a sinusoidal trigonometric function, the output of the sin function is restricted to [-1,1]. 48 | 49 | ## Properties 50 | 51 | ### Null propagating 52 | 53 | /[%Properties$Null_propagating%] 54 | 55 | ### NaN propagating 56 | 57 | /[%Properties$NaN_propagating%] 58 | 59 | ### Stateless 60 | 61 | /[%Properties$Stateless%] 62 | -------------------------------------------------------------------------------- /supplemental/arithmetic/sinh.md: -------------------------------------------------------------------------------- 1 | # Sinh 2 | 3 | ## Options 4 | 5 | ### Rounding 6 | 7 | Hyperbolic sine of an input can yield a result that is not exactly 8 | representable in the given type class. In this case the value will be rounded. 9 | Rounding behaviors are defined as part of the IEEE 754 standard. 10 | 11 | #### TIE_TO_EVEN 12 | 13 | /[%Rounding$TIE_TO_EVEN%] 14 | 15 | #### TIE_AWAY_FROM_ZERO 16 | 17 | /[%Rounding$TIE_AWAY_FROM_ZERO%] 18 | 19 | #### TRUNCATE 20 | 21 | /[%Rounding$TRUNCATE%] 22 | 23 | #### CEILING 24 | 25 | /[%Rounding$CEILING%] 26 | 27 | #### FLOOR 28 | 29 | /[%Rounding$FLOOR%] 30 | 31 | ## Details 32 | 33 | ### Other floating point exceptions 34 | 35 | The IEEE 754 standard defines a number of exceptions beyond rounding. For 36 | example, division by zero, overflow, and underflow. However, these exceptions 37 | have default behaviors defined by IEEE 754 and, since no known engine deviates 38 | from these default values, these exceptions are not exposed as options. For more 39 | information on what happens in these cases refer to the IEEE 754 standard. 40 | 41 | ### Numerical Precision 42 | 43 | The precision of the sinh function depends on the architecture in various dialects. 44 | 45 | ### Output Range 46 | 47 | The Hyperbolic sine function has an output range of [-Infinity, Infinity], and it results to 0 48 | at 0 radians. 49 | 50 | ## Properties 51 | 52 | ### Null propagating 53 | 54 | /[%Properties$Null_propagating%] 55 | 56 | ### NaN propagating 57 | 58 | /[%Properties$NaN_propagating%] 59 | 60 | ### Stateless 61 | 62 | /[%Properties$Stateless%] 63 | -------------------------------------------------------------------------------- /supplemental/arithmetic/sqrt.md: -------------------------------------------------------------------------------- 1 | # Sqrt 2 | 3 | ## Options 4 | 5 | ### Rounding 6 | 7 | Taking the square root of an input can yield a result that is not exactly 8 | representable in the given type class. In this case the value will be rounded. 9 | Rounding behaviors are defined as part of the IEEE 754 standard. 10 | 11 | #### TIE_TO_EVEN 12 | 13 | /[%Rounding$TIE_TO_EVEN%] 14 | 15 | #### TIE_AWAY_FROM_ZERO 16 | 17 | /[%Rounding$TIE_AWAY_FROM_ZERO%] 18 | 19 | #### TRUNCATE 20 | 21 | /[%Rounding$TRUNCATE%] 22 | 23 | #### CEILING 24 | 25 | /[%Rounding$CEILING%] 26 | 27 | #### FLOOR 28 | 29 | /[%Rounding$FLOOR%] 30 | 31 | ### On_domain_error 32 | 33 | Square root operation is typically allowed only for non-negative real numbers. This option controls the behavior when the function is called with values not adhering to this rule. 34 | 35 | #### NAN 36 | 37 | /[%On_domain_error$NAN%] 38 | 39 | #### ERROR 40 | 41 | /[%On_domain_error$ERROR%] 42 | 43 | ## Details 44 | 45 | ### Other floating point exceptions 46 | 47 | The IEEE 754 standard defines a number of exceptions beyond rounding. For 48 | example, division by zero, overflow, and underflow. However, these exceptions 49 | have default behaviors defined by IEEE 754 and, since no known engine deviates 50 | from these default values, these exceptions are not exposed as options. For more 51 | information on what happens in these cases refer to the IEEE 754 standard. 52 | 53 | ### Domain restrictions 54 | 55 | Mathematically, square root function for negative real numbers results to complex numbers, and thus in function usage, typically only positive real numbers are allowed. Applying the function on a negative real number may raise an Error or result in a NaN value. 56 | 57 | 58 | ## Properties 59 | 60 | ### Null propagating 61 | 62 | /[%Properties$Null_propagating%] 63 | 64 | ### NaN propagating 65 | 66 | /[%Properties$NaN_propagating%] 67 | 68 | ### Stateless 69 | 70 | /[%Properties$Stateless%] 71 | -------------------------------------------------------------------------------- /supplemental/arithmetic/subtract.md: -------------------------------------------------------------------------------- 1 | # Subtract 2 | 3 | ## Options 4 | 5 | ### Overflow 6 | 7 | Subtracting two integers can trigger an overflow when the result is outside the 8 | representable range of the type class. This option controls what happens when 9 | this overflow occurs. 10 | 11 | #### SILENT 12 | 13 | /[%Overflow$SILENT%] For e.g. subtracting two int16 cannot 14 | yield an int32 on overflow. 15 | 16 | #### SATURATE 17 | 18 | /[%Overflow$SATURATE%] 19 | 20 | #### ERROR 21 | 22 | /[%Overflow$ERROR%] 23 | 24 | ### Rounding 25 | 26 | Subtracting two floating point numbers can yield a result that is not exactly 27 | representable in the given type class. In this case the value will be rounded. 28 | Rounding behaviors are defined as part of the IEEE 754 standard. 29 | 30 | #### TIE_TO_EVEN 31 | 32 | /[%Rounding$TIE_TO_EVEN%] 33 | 34 | #### TIE_AWAY_FROM_ZERO 35 | 36 | /[%Rounding$TIE_AWAY_FROM_ZERO%] 37 | 38 | #### TRUNCATE 39 | 40 | /[%Rounding$TRUNCATE%] 41 | 42 | #### CEILING 43 | 44 | /[%Rounding$CEILING%] 45 | 46 | #### FLOOR 47 | 48 | /[%Rounding$FLOOR%] 49 | 50 | ## Details 51 | 52 | ### Other floating point exceptions 53 | 54 | The IEEE 754 standard defines a number of exceptions beyond rounding. For 55 | example, division by zero, overflow, and underflow. However, these exceptions 56 | have default behaviors defined by IEEE 754 and, since no known engine deviates 57 | from these default values, these exceptions are not exposed as options. For more 58 | information on what happens in these cases refer to the IEEE 754 standard. 59 | 60 | ### Not commutative 61 | 62 | Subtraction, the algebraic operation, is commutative. So it may be tempting to 63 | believe the subtract function is commutative as well. However, this is not true 64 | because of overflow. For example, when working with int8 the result of 65 | subtract(subtract(-120, 10), -5) will yield a different result than 66 | subtract(subtract(-120, -5), 10) because the first will overflow and the second 67 | will not. 68 | 69 | ## Properties 70 | 71 | ### Null propagating 72 | 73 | /[%Properties$Null_propagating%] 74 | 75 | ### NaN propagating 76 | 77 | /[%Properties$NaN_propagating%] 78 | 79 | ### Stateless 80 | 81 | /[%Properties$Stateless%] This is not 82 | guaranteed to be true for integer subtraction when overflow is SILENT. 83 | -------------------------------------------------------------------------------- /supplemental/arithmetic/sum.md: -------------------------------------------------------------------------------- 1 | # Sum 2 | 3 | ## Options 4 | 5 | ### Overflow 6 | 7 | Sum of a set of values can trigger an overflow when the result is outside the 8 | representable range of the type class. This option controls what happens when 9 | this overflow occurs. 10 | 11 | #### SILENT 12 | 13 | If an overflow occurs then an integer value will be returned. The value is 14 | undefined. It may be any integer and can change from engine to engine or 15 | even from row to row within the same query. The only constraint is that it 16 | must be a valid value for the result type class (e.g. adding two int16 cannot 17 | yield an int32 on overflow) 18 | 19 | #### SATURATE 20 | 21 | If an overflow occurs then the largest (for positive overflow) or smallest 22 | (for negative overflow) possible value for the type class will be returned. 23 | 24 | #### ERROR 25 | 26 | If an overflow occurs then an error should be raised. 27 | 28 | ## Details 29 | 30 | ### Other floating point exceptions 31 | 32 | The IEEE 754 standard defines a number of exceptions beyond rounding. For 33 | example, division by zero, overflow, and underflow. However, these exceptions 34 | have default behaviors defined by IEEE 754 and, since no known engine deviates 35 | from these default values, these exceptions are not exposed as options. For more 36 | information on what happens in these cases refer to the IEEE 754 standard. 37 | 38 | ### Not commutative 39 | 40 | Addition, the algebraic operation, is commutative. So it may be tempting to 41 | believe the add function is commutative as well. However, this is not true because 42 | of overflow. For example, when working with int8 the result of 43 | add(add(120, 10), -5) will yield a different result than add(add(120, -5), 10) 44 | because the first will overflow and the second will not. 45 | 46 | ## Properties 47 | 48 | ### Nullability 49 | 50 | Specifies how the nullability of output arguments are mapped to 51 | input arguments. The Sum aggregate function follows a 52 | DECLARED_OUTPUT nullability. 53 | 54 | ### Decomposable 55 | 56 | The Sum aggregate function can be decomposed in more than 57 | one intermediate steps. 58 | 59 | ### Intermediate 60 | 61 | The intermediate output type of the Sum function is the 62 | type class of the input arguments. 63 | -------------------------------------------------------------------------------- /supplemental/arithmetic/tan.md: -------------------------------------------------------------------------------- 1 | # Tan 2 | 3 | ## Options 4 | 5 | ### Rounding 6 | 7 | Tangent of an input can yield a result that is not exactly 8 | representable in the given type class. In this case the value will be rounded. 9 | Rounding behaviors are defined as part of the IEEE 754 standard. 10 | 11 | #### TIE_TO_EVEN 12 | 13 | /[%Rounding$TIE_TO_EVEN%] 14 | 15 | #### TIE_AWAY_FROM_ZERO 16 | 17 | /[%Rounding$TIE_AWAY_FROM_ZERO%] 18 | 19 | #### TRUNCATE 20 | 21 | /[%Rounding$TRUNCATE%] 22 | 23 | #### CEILING 24 | 25 | /[%Rounding$CEILING%] 26 | 27 | #### FLOOR 28 | 29 | /[%Rounding$FLOOR%] 30 | 31 | ## Details 32 | 33 | ### Other floating point exceptions 34 | 35 | The IEEE 754 standard defines a number of exceptions beyond rounding. For 36 | example, division by zero, overflow, and underflow. However, these exceptions 37 | have default behaviors defined by IEEE 754 and, since no known engine deviates 38 | from these default values, these exceptions are not exposed as options. For more 39 | information on what happens in these cases refer to the IEEE 754 standard. 40 | 41 | ### Numerical Precision 42 | 43 | The precision of the tan function depends on the architecture in various dialects. 44 | 45 | ### Output Range 46 | 47 | Mathematically, the tangent function has a range [-Inf, Inf], since it is undefined and approaches 48 | infinity in input values of (pi/2) + k*pi, where k is any integer. Computationally, the inputs 49 | where the tangent function is not defined results in approximately 1255.76 or -1255.76. Thus, 50 | the output range becomes [-1255.76, 1255.76]. 51 | 52 | ## Properties 53 | 54 | ### Null propagating 55 | 56 | /[%Properties$Null_propagating%] 57 | 58 | ### NaN propagating 59 | 60 | /[%Properties$NaN_propagating%] 61 | 62 | ### Stateless 63 | 64 | /[%Properties$Stateless%] 65 | -------------------------------------------------------------------------------- /supplemental/arithmetic/tanh.md: -------------------------------------------------------------------------------- 1 | # Tanh 2 | 3 | ## Options 4 | 5 | ### Rounding 6 | 7 | Hyperbolic tangent of an input can yield a result that is not exactly 8 | representable in the given type class. In this case the value will be rounded. 9 | Rounding behaviors are defined as part of the IEEE 754 standard. 10 | 11 | #### TIE_TO_EVEN 12 | 13 | /[%Rounding$TIE_TO_EVEN%] 14 | 15 | #### TIE_AWAY_FROM_ZERO 16 | 17 | /[%Rounding$TIE_AWAY_FROM_ZERO%] 18 | 19 | #### TRUNCATE 20 | 21 | /[%Rounding$TRUNCATE%] 22 | 23 | #### CEILING 24 | 25 | /[%Rounding$CEILING%] 26 | 27 | #### FLOOR 28 | 29 | /[%Rounding$FLOOR%] 30 | 31 | ## Details 32 | 33 | ### Other floating point exceptions 34 | 35 | The IEEE 754 standard defines a number of exceptions beyond rounding. For 36 | example, division by zero, overflow, and underflow. However, these exceptions 37 | have default behaviors defined by IEEE 754 and, since no known engine deviates 38 | from these default values, these exceptions are not exposed as options. For more 39 | information on what happens in these cases refer to the IEEE 754 standard. 40 | 41 | ### Numerical Precision 42 | 43 | The precision of the tanh function depends on the architecture in various dialects. 44 | 45 | ### Output Range 46 | 47 | The Hyperbolic cosine function has an output range of [-1, 1], and it results to 0 48 | at 0 radians. 49 | 50 | ## Properties 51 | 52 | ### Null propagating 53 | 54 | /[%Properties$Null_propagating%] 55 | 56 | ### NaN propagating 57 | 58 | /[%Properties$NaN_propagating%] 59 | 60 | ### Stateless 61 | 62 | /[%Properties$Stateless%] 63 | -------------------------------------------------------------------------------- /tools/convert_testcases/check_testcase_format_conversion_roundtrip.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | import shutil 4 | 5 | from ruamel.yaml import YAML 6 | from deepdiff import DeepDiff 7 | 8 | from convert_testcases_to_substrait_test_format import ( 9 | convert_directory as convert_directory_to_substrait, 10 | load_test_file, 11 | ) 12 | from convert_testcases_to_yaml_format import ( 13 | convert_directory as convert_directory_to_yaml, 14 | ) 15 | 16 | 17 | def compare_test_files(original_file, roundtrip_file): 18 | o_file = load_test_file(original_file) 19 | r_file = load_test_file(roundtrip_file) 20 | assert o_file == r_file 21 | 22 | 23 | # Compare tests in yaml format, roundtrip_dir contains files converted from substrait test format to yaml 24 | def compare_directories(original_dir, roundtrip_dir): 25 | count = 0 26 | for root, _, files in os.walk(original_dir): 27 | for file_name in files: 28 | if file_name.endswith(".yaml"): 29 | original_file = os.path.join(root, file_name) 30 | relative_path = os.path.relpath(original_file, original_dir) 31 | roundtrip_file = os.path.join(roundtrip_dir, relative_path).replace( 32 | ".test", ".yaml" 33 | ) 34 | 35 | if not os.path.exists(roundtrip_file): 36 | print(f"File missing in roundtrip directory: {roundtrip_file}") 37 | count += 1 38 | continue 39 | 40 | if not compare_test_files(original_file, roundtrip_file): 41 | count += 1 42 | else: 43 | print(f"YAML content matches: {original_file} and {roundtrip_file}") 44 | return count 45 | 46 | 47 | def main(): 48 | # Directories 49 | initial_cases_dir = "../../substrait/tests/cases" 50 | temp_dir = "./temp" 51 | intermediate_dir = f"{temp_dir}/bft_cases" 52 | roundtrip_dir = f"{temp_dir}/roundtrip_substrait_cases" 53 | uri_prefix = ( 54 | "https://github.com/substrait-io/substrait/blob/main/extensions/substrait" 55 | ) 56 | 57 | # Step 1: Convert from initial_cases_dir to intermediate_dir 58 | convert_directory_to_yaml(initial_cases_dir, intermediate_dir) 59 | 60 | # Step 2: Convert from intermediate_dir to roundtrip_dir 61 | convert_directory_to_substrait(intermediate_dir, roundtrip_dir, uri_prefix) 62 | 63 | # Step 3: Compare tests in initial and rounttrip_dir in yaml format 64 | count = compare_directories(initial_cases_dir, roundtrip_dir) 65 | if count == 0: 66 | print( 67 | "All substrait test files match between original and roundtrip directories." 68 | ) 69 | else: 70 | print( 71 | f"Differences found in {count} test files between original and roundtrip directories." 72 | ) 73 | 74 | shutil.rmtree(temp_dir) 75 | 76 | 77 | if __name__ == "__main__": 78 | main() 79 | -------------------------------------------------------------------------------- /tools/convert_testcases/convert_testcases_to_substrait_test_format.py: -------------------------------------------------------------------------------- 1 | import yaml 2 | import os 3 | from collections import defaultdict 4 | from itertools import count 5 | from tools.convert_testcases.convert_testcase_helper import ( 6 | convert_to_substrait_test_value, 7 | ) 8 | 9 | 10 | # Define a custom YAML loader that interprets all values as strings 11 | def string_loader(loader, node): 12 | return str(loader.construct_scalar(node)) 13 | 14 | 15 | def list_of_decimal_constructor(loader: yaml.SafeLoader, node: yaml.nodes.MappingNode): 16 | return [string_loader(loader, item) for item in node.value] 17 | 18 | 19 | def load_test_file(file_path): 20 | """Load a YAML file, interpreting all values as strings.""" 21 | # Override default YAML constructors to load all types as strings 22 | for tag in ("str", "int", "float", "bool", "null", "decimal"): 23 | yaml.add_constructor(f"tag:yaml.org,2002:{tag}", string_loader) 24 | 25 | yaml.add_constructor("!decimal", string_loader) 26 | yaml.add_constructor("!isostring", string_loader) 27 | yaml.add_constructor("!decimallist", list_of_decimal_constructor) 28 | 29 | with open(file_path, "r") as file: 30 | return yaml.load(file, Loader=yaml.FullLoader) 31 | 32 | 33 | def format_return_value(case): 34 | result = case.get("result", {}) 35 | special = result.get("special") 36 | 37 | if special: 38 | special = special.lower() 39 | 40 | # Handle special cases for ERROR and UNDEFINED 41 | if special in {"error", "undefined"}: 42 | return f"" 43 | 44 | if special == "nan": 45 | return "nan::fp64" 46 | 47 | # Return formatted result with format_value 48 | return convert_to_substrait_test_value(result.get("value"), result.get("type")) 49 | 50 | 51 | def format_test_case_group(case, description_map): 52 | """Extract group name and description for test case.""" 53 | group = case.get("group", "basic") 54 | group_name = group if isinstance(group, str) else group.get("id", "basic") 55 | description = group.get("description", "") if isinstance(group, dict) else "" 56 | 57 | if group_name not in description_map: 58 | description_map[group_name] = description 59 | 60 | return f"{group_name}: {description_map.get(group_name, '')}" 61 | 62 | 63 | def generate_define_table(case, table_id): 64 | """Generates the table definition only if there are arguments with 'is_not_a_func_arg'.""" 65 | args = case.get("args", []) 66 | 67 | # If args is empty, return an empty string, as no table is needed 68 | if not args: 69 | return "" 70 | 71 | # Gather column types and names based on args 72 | formatted_columns = ", ".join(str(arg["type"]) for arg in args) if args else "" 73 | 74 | # Transpose the arguments' values to construct rows 75 | values = [ 76 | [ 77 | convert_to_substrait_test_value(value, arg["type"], 1) 78 | for value in arg.get("value", []) 79 | ] 80 | for arg in args 81 | ] 82 | rows = zip(*values) # zip will combine each nth element of each argument 83 | 84 | # Format rows as strings for the table definition 85 | formatted_rows = ", ".join(f"({', '.join(map(str, row))})" for row in rows) 86 | 87 | # Define table format with column types 88 | table_definition = ( 89 | f"DEFINE t{table_id}({formatted_columns}) = ({formatted_rows}) \n" 90 | ) 91 | 92 | return table_definition 93 | 94 | 95 | def format_test_case(case, function, description_map, table_id_counter, is_aggregate): 96 | """Format a single test case.""" 97 | description = format_test_case_group(case, description_map) 98 | options = case.get("options") 99 | options = ( 100 | f" [{', '.join(f'{k}:{convert_to_substrait_test_value(v, None)}' for k, v in options.items())}]" 101 | if options 102 | else "" 103 | ) 104 | results = format_return_value(case) 105 | 106 | args = [arg for arg in case.get("args", []) if not arg.get("is_not_a_func_arg")] 107 | if is_aggregate and len(args) != 1: 108 | table_id = next(table_id_counter) 109 | args = ", ".join(f"t{table_id}.col{idx}" for idx in range(len(args))) 110 | table_definition = generate_define_table(case, table_id) 111 | return description, f"{table_definition}{function}({args}){options} = {results}" 112 | 113 | args = ", ".join( 114 | convert_to_substrait_test_value(arg.get("value"), str(arg["type"])) 115 | for arg in case.get("args", []) 116 | ) 117 | return description, f"{function}({args}){options} = {results}" 118 | 119 | 120 | def convert_test_file_to_new_format(input_data, prefix, is_aggregate): 121 | """Parse YAML test data to formatted cases.""" 122 | function = input_data["function"] 123 | base_uri = input_data["base_uri"][len(prefix) :] 124 | description_map = {} 125 | table_id_counter = count(0) 126 | groups = defaultdict(lambda: {"tests": []}) 127 | 128 | for case in input_data["cases"]: 129 | description, formatted_test = format_test_case( 130 | case, function, description_map, table_id_counter, is_aggregate 131 | ) 132 | groups[description]["tests"].append(formatted_test) 133 | 134 | output_lines = [ 135 | f"{'### SUBSTRAIT_AGGREGATE_TEST: v1.0' if is_aggregate else '### SUBSTRAIT_SCALAR_TEST: v1.0'}\n", 136 | f"### SUBSTRAIT_INCLUDE: '{base_uri}'\n", 137 | ] 138 | 139 | for description, details in groups.items(): 140 | output_lines.append(f"\n# {description}\n") 141 | output_lines.extend(f"{test}\n" for test in details["tests"]) 142 | 143 | return output_lines 144 | 145 | 146 | def output_test_data(output_file, lines): 147 | """Write formatted lines to a file.""" 148 | os.makedirs(os.path.dirname(output_file), exist_ok=True) 149 | with open(output_file, "w") as file: 150 | file.writelines(lines) 151 | 152 | print(f"Converted '{output_file}' successfully.") 153 | 154 | 155 | def convert_directory(input_dir, output_dir, prefix): 156 | """Process all YAML files in a directory, convert and save them to output directory.""" 157 | for root, _, files in os.walk(input_dir): 158 | for filename in filter(lambda f: f.endswith(".yaml"), files): 159 | input_file = os.path.join(root, filename) 160 | output_file = os.path.join( 161 | output_dir, os.path.relpath(input_file, input_dir) 162 | ).replace(".yaml", ".test") 163 | is_aggregate = "aggregate" in input_file 164 | 165 | yaml_data = load_test_file(input_file) 166 | output_lines = convert_test_file_to_new_format( 167 | yaml_data, prefix, is_aggregate 168 | ) 169 | output_test_data(output_file, output_lines) 170 | 171 | 172 | if __name__ == "__main__": 173 | input_directory = "../../cases" 174 | output_directory = "../../substrait/tests/cases" 175 | uri_prefix = ( 176 | "https://github.com/substrait-io/substrait/blob/main/extensions/substrait" 177 | ) 178 | convert_directory(input_directory, output_directory, uri_prefix) 179 | -------------------------------------------------------------------------------- /tools/convert_testcases/convert_testcases_to_yaml_format.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from ruamel.yaml import YAML 4 | from tests.coverage.nodes import ( 5 | TestFile, 6 | AggregateArgument, 7 | ) 8 | from tests.coverage.case_file_parser import load_all_testcases 9 | from tools.convert_testcases.convert_testcase_helper import ( 10 | convert_to_yaml_value, 11 | convert_to_long_type, 12 | SQUOTE_PLACEHOLDER, 13 | DQUOTE_PLACEHOLDER, 14 | iso_duration_to_timedelta, 15 | ) 16 | 17 | yaml = YAML() 18 | yaml.indent(mapping=2, sequence=4, offset=2) # Adjust indentations as needed 19 | yaml.width = 4096 # Extend line width to prevent line breaks 20 | 21 | 22 | def convert_result(test_case): 23 | """Convert the result section based on specific conditions.""" 24 | if test_case.is_return_type_error(): 25 | return {"special": str(test_case.result.error)} 26 | elif str(test_case.result.value) == "nan": 27 | return {"special": "nan"} 28 | elif test_case.func_name == "add_intervals" and test_case.result.type == "iday": 29 | return { 30 | "value": convert_to_yaml_value( 31 | ( 32 | iso_duration_to_timedelta(test_case.result.value) 33 | if test_case.result.value is not None 34 | else None 35 | ), 36 | "str", 37 | ), 38 | "type": "string", 39 | } 40 | else: 41 | return { 42 | "value": convert_to_yaml_value( 43 | test_case.result.value, test_case.result.type 44 | ), 45 | "type": convert_to_long_type(test_case.result.type), 46 | } 47 | 48 | 49 | def convert_table_definition(test_case): 50 | column_types = None 51 | 52 | if all(isinstance(arg, AggregateArgument) for arg in test_case.args): 53 | # Extract the column_type from each AggregateArgument 54 | column_types = [arg.column_type for arg in test_case.args] 55 | elif test_case.args is not None: 56 | column_types = [ 57 | convert_to_long_type( 58 | arg.scalar_value.type 59 | if isinstance(arg, AggregateArgument) 60 | else arg.type 61 | ) 62 | for arg in test_case.args 63 | ] 64 | 65 | columns = list(map(list, zip(*test_case.rows))) 66 | if not columns: 67 | # Handle the case where columns is empty, but column_types is not 68 | return [ 69 | {"value": [], "type": col_type, "is_not_a_func_arg": "true"} 70 | for col_type in column_types 71 | ] 72 | else: 73 | # Handle the case where columns is not empty 74 | return [ 75 | { 76 | "value": convert_to_yaml_value(column, col_type), 77 | "type": col_type, 78 | "is_not_a_func_arg": "true", 79 | } 80 | for column, col_type in zip(columns, column_types) 81 | ] 82 | 83 | 84 | def convert_group(test_case, groups): 85 | id = str(test_case.group.name.split(": ")[0]) 86 | desc = test_case.group.name.split(": ")[1] if ": " in test_case.group.name else "" 87 | group = id if id in groups else {"id": id, "description": desc} 88 | groups[id] = desc 89 | return group 90 | 91 | 92 | def convert_test_case_to_old_format(test_case, groups): 93 | # Match group headers with descriptions 94 | print(f"converting test '{test_case}'") 95 | case = {} 96 | case["group"] = convert_group(test_case, groups) 97 | 98 | if test_case.rows is not None: 99 | case["args"] = convert_table_definition(test_case) 100 | else: 101 | if isinstance(test_case.args[0], AggregateArgument): 102 | case["args"] = [ 103 | { 104 | "value": convert_to_yaml_value( 105 | arg.scalar_value.value, arg.scalar_value.type 106 | ), 107 | "type": convert_to_long_type(arg.scalar_value.type), 108 | } 109 | for arg in test_case.args 110 | ] 111 | else: 112 | case["args"] = [ 113 | { 114 | "value": convert_to_yaml_value(arg.value, arg.type), 115 | "type": convert_to_long_type(arg.type), 116 | } 117 | for arg in test_case.args 118 | ] 119 | 120 | if len(test_case.options) > 0: 121 | case["options"] = { 122 | key: convert_to_yaml_value(value, None) 123 | for key, value in test_case.options.items() 124 | } 125 | 126 | case["result"] = convert_result(test_case) 127 | return case 128 | 129 | 130 | def convert_test_file_to_yaml(testFile: TestFile): 131 | # Get function name from the first expression 132 | function = None 133 | cases = [] 134 | groups = {} 135 | 136 | for test_case in testFile.testcases: 137 | function = test_case.func_name 138 | cases.append(convert_test_case_to_old_format(test_case, groups)) 139 | 140 | # Construct the full YAML structure 141 | return { 142 | "base_uri": f"https://github.com/substrait-io/substrait/blob/main/extensions/substrait{testFile.include}", 143 | "function": function, 144 | "cases": cases, 145 | } 146 | 147 | 148 | def output_test_data(output_file, input_path, yaml_data): 149 | with open(output_file, "w") as f: 150 | yaml.dump(yaml_data, f) 151 | 152 | fix_quotes(output_file) 153 | 154 | print(f"Converted '{input_path}' to '{output_file}'.") 155 | 156 | 157 | def fix_quotes(file_path): 158 | with open(file_path, "r") as file: 159 | content = file.read() 160 | 161 | # Remove all single quotes 162 | content = ( 163 | content.replace("'", "") 164 | .replace('"', "") 165 | .replace(SQUOTE_PLACEHOLDER, "'") 166 | .replace(DQUOTE_PLACEHOLDER, '"') 167 | ) 168 | 169 | with open(file_path, "w") as file: 170 | file.write(content) 171 | 172 | 173 | def convert_directory(input_dir, output_dir): 174 | input_test_files = load_all_testcases(input_dir) 175 | for input_test_file in input_test_files: 176 | input_file = input_test_file.path 177 | relative_path = os.path.relpath(input_file, input_dir) 178 | output_file = os.path.join(output_dir, relative_path).replace(".test", ".yaml") 179 | os.makedirs(os.path.dirname(output_file), exist_ok=True) 180 | yaml_data = convert_test_file_to_yaml(input_test_file) 181 | output_test_data(output_file, input_test_file.path, yaml_data) 182 | 183 | 184 | def main(): 185 | input_dir = "../../substrait/tests/cases" 186 | output_dir = "../../cases" # Specify the output directory 187 | convert_directory(input_dir, output_dir) 188 | 189 | 190 | if __name__ == "__main__": 191 | main() 192 | -------------------------------------------------------------------------------- /tools/schema/casefile.yaml: -------------------------------------------------------------------------------- 1 | $id: https://thebft.info/schemas/casefile.json 2 | $schema: https://json-schema.org/draft/2020-12/schema 3 | type: object 4 | properties: 5 | function: 6 | type: string 7 | cases: 8 | type: array 9 | items: 10 | type: object 11 | properties: 12 | group: 13 | oneOf: 14 | - type: object 15 | properties: 16 | id: 17 | type: string 18 | description: 19 | type: string 20 | required: 21 | - id 22 | - description 23 | additionalProperties: false 24 | - type: string 25 | args: 26 | type: array 27 | items: 28 | type: object 29 | properties: 30 | value: 31 | oneOf: 32 | - type: string 33 | - type: number 34 | - type: boolean 35 | - type: "null" 36 | - type: array 37 | type: 38 | type: string 39 | required: 40 | - value 41 | - type 42 | additionalProperties: false 43 | options: 44 | type: object 45 | additionalProperties: 46 | type: string 47 | result: 48 | oneOf: 49 | - type: object 50 | properties: 51 | value: 52 | oneOf: 53 | - type: string 54 | - type: number 55 | - type: boolean 56 | - type: "null" 57 | type: 58 | type: string 59 | required: 60 | - value 61 | - type 62 | additionalProperties: false 63 | - type: object 64 | properties: 65 | special: 66 | enum: 67 | - error 68 | - undefined 69 | required: 70 | - special 71 | additionalProperties: false 72 | additionalProperties: false 73 | required: 74 | - group 75 | - result 76 | additionalProperties: false 77 | required: 78 | - function 79 | - cases 80 | -------------------------------------------------------------------------------- /tools/yaml_to_json.py: -------------------------------------------------------------------------------- 1 | import json 2 | from pathlib import Path 3 | 4 | import yaml 5 | 6 | try: 7 | from yaml import CSafeLoader as SafeLoader 8 | except ImportError: 9 | from yaml import SafeLoader 10 | 11 | BASE_DIR = Path(__file__).parent.parent 12 | JSON_DIR = BASE_DIR / "function_json" 13 | CASES_DIR = BASE_DIR / "cases" 14 | FUNCTION_FOLDERS = Path(CASES_DIR).glob("*") 15 | 16 | 17 | for function_folder in FUNCTION_FOLDERS: 18 | folder_path = CASES_DIR / function_folder.name 19 | json_path = JSON_DIR / function_folder.name 20 | Path(json_path).mkdir(parents=True, exist_ok=True) 21 | function_yamls = Path(folder_path).rglob("*.yaml") 22 | for function_yaml in function_yamls: 23 | yaml_file = folder_path / function_yaml.name 24 | json_file = json_path / function_yaml.stem 25 | with open(yaml_file) as f: 26 | dataMap = yaml.load(f, SafeLoader) 27 | with open(f"{json_file}.json", "w") as outfile: 28 | outfile.write('{}\n'.format(json.dumps(dataMap, indent=4))) 29 | --------------------------------------------------------------------------------