├── .github
    └── workflows
    │   ├── deploy.yml
    │   └── test.yml
├── .gitignore
├── .gitmodules
├── .markdownlint.json
├── .vscode
    ├── launch.json
    └── settings.json
├── LICENSE
├── NOTICE.txt
├── README.md
├── bft
    ├── __init__.py
    ├── cases
    │   ├── __init__.py
    │   ├── loader.py
    │   ├── parser.py
    │   ├── runner.py
    │   └── types.py
    ├── core
    │   ├── __init__.py
    │   ├── function.py
    │   ├── index_parser.py
    │   ├── yaml_parser.py
    │   └── yaml_parser_test.py
    ├── dialects
    │   ├── __init__.py
    │   ├── loader.py
    │   ├── parser.py
    │   └── types.py
    ├── html
    │   ├── __init__.py
    │   ├── builder.py
    │   └── types.py
    ├── substrait
    │   ├── __init__.py
    │   └── extension_file_parser.py
    ├── supplements
    │   ├── __init__.py
    │   ├── parser.py
    │   └── types.py
    ├── templates
    │   ├── function_desc.j2
    │   └── function_index.j2
    ├── testers
    │   ├── __init__.py
    │   ├── base_tester.py
    │   ├── cudf
    │   │   ├── __init__.py
    │   │   ├── runner.py
    │   │   └── tester.py
    │   ├── datafusion
    │   │   ├── __init__.py
    │   │   ├── runner.py
    │   │   └── tester.py
    │   ├── duckdb
    │   │   ├── __init__.py
    │   │   ├── runner.py
    │   │   ├── runner_test.py
    │   │   └── tester.py
    │   ├── postgres
    │   │   ├── __init__.py
    │   │   ├── runner.py
    │   │   └── tester.py
    │   ├── snowflake
    │   │   ├── __init__.py
    │   │   ├── config.yaml
    │   │   ├── runner.py
    │   │   └── tester.py
    │   ├── sqlite
    │   │   ├── __init__.py
    │   │   ├── runner.py
    │   │   └── tester.py
    │   └── velox
    │   │   ├── runner.py
    │   │   └── tester.py
    ├── tests
    │   ├── __init__.py
    │   ├── base.py
    │   ├── conftest.py
    │   ├── test_cudf.py
    │   ├── test_datafusion.py
    │   ├── test_duckdb.py
    │   ├── test_postgres.py
    │   ├── test_pyvelox.py
    │   ├── test_snowflake.py
    │   └── test_sqlite.py
    └── utils
    │   └── utils.py
├── build_site.py
├── ci
    └── docker
    │   ├── base-tester.Dockerfile
    │   ├── datafusion.Dockerfile
    │   ├── duckdb.Dockerfile
    │   ├── postgres-compose.yaml
    │   ├── postgres-server.Dockerfile
    │   ├── sqlite.Dockerfile
    │   ├── velox-compose.yaml
    │   └── velox.Dockerfile
├── dialects
    ├── cudf.yaml
    ├── datafusion.yaml
    ├── duckdb.yaml
    ├── postgres.yaml
    ├── snowflake.yaml
    ├── sqlite.yaml
    └── velox_presto.yaml
├── index.yaml
├── requirements.txt
├── static_site
    ├── android-chrome-192x192.png
    ├── android-chrome-512x512.png
    ├── apple-touch-icon.png
    ├── assets
    │   ├── index
    │   │   ├── script.js
    │   │   └── style.css
    │   └── supplementary
    │   │   ├── script.js
    │   │   ├── style.css
    │   │   └── terminal.css
    ├── favicon-16x16.png
    ├── favicon-32x32.png
    └── favicon.ico
├── supplemental
    └── arithmetic
    │   ├── abs.md
    │   ├── acos.md
    │   ├── acosh.md
    │   ├── add.md
    │   ├── asin.md
    │   ├── asinh.md
    │   ├── atan.md
    │   ├── atan2.md
    │   ├── atanh.md
    │   ├── bitwise_and.md
    │   ├── bitwise_not.md
    │   ├── bitwise_or.md
    │   ├── bitwise_xor.md
    │   ├── cos.md
    │   ├── cosh.md
    │   ├── definitions.yaml
    │   ├── divide.md
    │   ├── exponential.md
    │   ├── factorial.md
    │   ├── modulus.md
    │   ├── multiply.md
    │   ├── negate.md
    │   ├── power.md
    │   ├── sign.md
    │   ├── sin.md
    │   ├── sinh.md
    │   ├── sqrt.md
    │   ├── subtract.md
    │   ├── sum.md
    │   ├── tan.md
    │   └── tanh.md
└── tools
    ├── convert_testcases
        ├── check_testcase_format_conversion_roundtrip.py
        ├── convert_testcase_helper.py
        ├── convert_testcases_to_substrait_test_format.py
        ├── convert_testcases_to_yaml_format.py
        ├── test_convert_testcases_to_substrait_test_format.py
        └── test_convert_testcases_to_yaml_format.py
    ├── schema
        └── casefile.yaml
    └── yaml_to_json.py


/.github/workflows/deploy.yml:
--------------------------------------------------------------------------------
 1 | name: Deploy to gh-pages
 2 | on:
 3 |   workflow_dispatch:
 4 |   workflow_run:
 5 |     workflows:
 6 |       - test
 7 |     types:
 8 |       - completed
 9 | 
10 | jobs:
11 |   deploy:
12 |     if: ${{ github.event_name == 'workflow_dispatch' || github.event.workflow_run.conclusion == 'success' }}
13 |     runs-on: ubuntu-latest
14 |     steps:
15 |       - name: Checkout
16 |         uses: actions/checkout@v3
17 |         with:
18 |           submodules: recursive
19 |       - uses: actions/setup-python@v4
20 |         with:
21 |           python-version: "3.11"
22 |           cache: "pip"
23 |       - run: pip install -r requirements.txt
24 |       - name: Build Site
25 |         run: python build_site.py
26 |       - name: Deploy
27 |         uses: JamesIves/github-pages-deploy-action@v4
28 |         with:
29 |           folder: dist
30 | 


--------------------------------------------------------------------------------
/.github/workflows/test.yml:
--------------------------------------------------------------------------------
 1 | name: Test
 2 | on:
 3 |   pull_request:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 | 
 8 | jobs:
 9 |   sqlite:
10 |     name: Run tests with sqlite
11 |     runs-on: ubuntu-latest
12 |     steps:
13 |       - name: Checkout
14 |         uses: actions/checkout@v3
15 |         with:
16 |           submodules: recursive
17 | 
18 |       - name: Build & run
19 |         run: docker run --rm $(docker build -q --file ./ci/docker/sqlite.Dockerfile .)
20 |   duckdb:
21 |     name: Run tests with duckdb
22 |     runs-on: ubuntu-latest
23 |     steps:
24 |       - name: Checkout
25 |         uses: actions/checkout@v3
26 |         with:
27 |           submodules: recursive
28 | 
29 |       - name: Build & run
30 |         run: docker run --rm $(docker build -q --file ./ci/docker/duckdb.Dockerfile .)
31 |   datafusion:
32 |     name: Run tests with datafusion
33 |     runs-on: ubuntu-latest
34 |     steps:
35 |       - name: Checkout
36 |         uses: actions/checkout@v3
37 |         with:
38 |           submodules: recursive
39 | 
40 |       - name: Build & run
41 |         run: docker run --rm $(docker build -q --file ./ci/docker/datafusion.Dockerfile .)
42 |   postgres:
43 |     name: Run tests with postgres
44 |     runs-on: ubuntu-latest
45 |     steps:
46 |       - name: Checkout
47 |         uses: actions/checkout@v3
48 |         with:
49 |           submodules: recursive
50 | 
51 |       - name: Build
52 |         run: docker compose -f ./ci/docker/postgres-compose.yaml build
53 | 
54 |       - name: Run
55 |         run: docker compose -f ./ci/docker/postgres-compose.yaml run app
56 |   velox:
57 |     name: Run tests with velox
58 |     runs-on: ubuntu-latest
59 |     steps:
60 |       - name: Checkout
61 |         uses: actions/checkout@v3
62 |         with:
63 |           submodules: recursive
64 |       - name: Build
65 |         run: docker compose -f ./ci/docker/velox-compose.yaml build
66 | 
67 |       - name: Run
68 |         run: docker compose -f ./ci/docker/velox-compose.yaml run app
69 |   site:
70 |     name: Build site
71 |     runs-on: ubuntu-latest
72 |     steps:
73 |       - name: Checkout
74 |         uses: actions/checkout@v3
75 |         with:
76 |           submodules: recursive
77 |       - uses: actions/setup-python@v4
78 |         with:
79 |           python-version: "3.11"
80 |           cache: "pip"
81 |       - run: pip install -r requirements.txt
82 |       - name: Build Site
83 |         run: python build_site.py
84 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 | 
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 | 
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 | 
119 | # SageMath parsed files
120 | *.sage.py
121 | 
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 | 
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 | 
135 | # Rope project settings
136 | .ropeproject
137 | 
138 | # mkdocs documentation
139 | /site
140 | 
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 | 
146 | # Pyre type checker
147 | .pyre/
148 | 
149 | # pytype static type analyzer
150 | .pytype/
151 | 
152 | # Cython debug symbols
153 | cython_debug/
154 | 
155 | # PyCharm
156 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
159 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/
161 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "substrait"]
2 | 	path = substrait
3 | 	url = https://github.com/substrait-io/substrait.git
4 | 


--------------------------------------------------------------------------------
/.markdownlint.json:
--------------------------------------------------------------------------------
1 | {
2 |     "MD013": true
3 | }


--------------------------------------------------------------------------------
/.vscode/launch.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     // Use IntelliSense to learn about possible attributes.
 3 |     // Hover to view descriptions of existing attributes.
 4 |     // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
 5 |     "version": "0.2.0",
 6 |     "configurations": [
 7 |         {
 8 |             "name": "Build Site",
 9 |             "type": "python",
10 |             "request": "launch",
11 |             "env": {
12 |                 "PYTHONPATH": "${workspaceFolder}"
13 |             },
14 |             "module": "bft.html.builder",
15 |             "justMyCode": true
16 |         },
17 |         {
18 |             "name": "Run Tests",
19 |             "type": "python",
20 |             "request": "launch",
21 |             "env": {
22 |                 "PYTHONPATH": "${workspaceFolder}"
23 |             },
24 |             "module": "pytest",
25 |             "args": [
26 |                 "bft"
27 |             ],
28 |             "justMyCode": true
29 |         }
30 |     ]
31 | }


--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "python.formatting.provider": "black",
 3 |     "editor.formatOnSave": true,
 4 |     "editor.codeActionsOnSave": {
 5 |         "source.organizeImports": "explicit"
 6 |     },
 7 |     "isort.args": [
 8 |         "--profile",
 9 |         "black"
10 |     ],
11 |     "yaml.schemas": {
12 |         "./tools/schema/casefile.yaml": "cases/**",
13 |         "https://json.schemastore.org/github-workflow.json": "file:///home/pace/dev/bft/.github/workflows/deploy.yml"
14 |     },
15 |     "python.testing.unittestEnabled": false,
16 |     "python.testing.pytestEnabled": true,
17 |     "python.testing.pytestArgs": [
18 |         "bft"
19 |     ]
20 | }
21 | 


--------------------------------------------------------------------------------
/NOTICE.txt:
--------------------------------------------------------------------------------
1 | Copyright 2023 Voltron Data, Inc.
2 | 
3 | This product includes software developed at
4 | Voltron Data, Inc. (http://www.voltrondata.com/).
5 | 


--------------------------------------------------------------------------------
/bft/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/substrait-io/bft/34b7a6e19d536e42082169eae0d049ce94f86b19/bft/__init__.py


--------------------------------------------------------------------------------
/bft/cases/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/substrait-io/bft/34b7a6e19d536e42082169eae0d049ce94f86b19/bft/cases/__init__.py


--------------------------------------------------------------------------------
/bft/cases/loader.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | from typing import List
 3 | 
 4 | from .parser import CaseFileParser
 5 | from .types import Case
 6 | 
 7 | 
 8 | def load_cases(cases_dir: str) -> List[Case]:
 9 |     cases = []
10 |     parser = CaseFileParser()
11 |     for case_path in Path(cases_dir).rglob("*.yaml"):
12 |         with open(case_path, "rb") as case_f:
13 |             for case_file in parser.parse(case_f):
14 |                 for case in case_file.cases:
15 |                     cases.append(case)
16 |     return cases
17 | 


--------------------------------------------------------------------------------
/bft/cases/parser.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | from typing import BinaryIO, Iterable, List
 3 | 
 4 | from bft.core.yaml_parser import BaseYamlParser, BaseYamlVisitor
 5 | 
 6 | from .types import Case, CaseFile, CaseGroup, CaseLiteral, ProtoCase
 7 | 
 8 | 
 9 | class CaseFileVisitor(BaseYamlVisitor[CaseFile]):
10 |     def __init__(self):
11 |         super().__init__()
12 |         self.__groups = {}
13 | 
14 |     def __resolve_proto_case(self, case: ProtoCase, base_uri: str, function: str) -> Case:
15 |         if case.group not in self.__groups:
16 |             raise Exception(
17 |                 "A case referred to group " + case.group +" which was not defined in the file"
18 |             )
19 |         grp = self.__groups[case.group]
20 |         return Case(function, base_uri, grp, case.args, case.result, case.options)
21 | 
22 |     def visit_group(self, group):
23 |         id = self._get_or_die(group, "id")
24 |         description = self._get_or_die(group, "description")
25 |         self.__groups[id] = CaseGroup(id, description)
26 |         return id
27 | 
28 |     def __normalize_yaml_literal(self, value, data_type):
29 |         # YAML/JSON can't represent infinity or nan
30 |         # so its a special case
31 |         if data_type.startswith("fp"):
32 |             if isinstance(value, str):
33 |                 if value.lower().startswith("inf"):
34 |                     return float("inf")
35 |                 elif value.lower().startswith("-inf"):
36 |                     return float("-inf")
37 |                 elif value.lower().startswith("1e"):
38 |                     return float(value.lower())
39 |                 elif value.lower().startswith("nan"):
40 |                     return math.nan
41 |                 else:
42 |                     raise ValueError(f"Unrecognized float string literal {value}")
43 |         return value
44 | 
45 |     def visit_literal(self, lit):
46 |         value = self._get_or_die(lit, "value")
47 |         data_type = self._get_or_die(lit, "type")
48 |         is_not_a_func_arg = self._get_or_else(lit, "is_not_a_func_arg", False)
49 |         value = self.__normalize_yaml_literal(value, data_type)
50 |         return CaseLiteral(value, data_type, is_not_a_func_arg)
51 | 
52 |     def visit_literal_result(self, lit):
53 |         value = self._get_or_die(lit, "value")
54 |         data_type = self._get_or_die(lit, "type")
55 |         value = self.__normalize_yaml_literal(value, data_type)
56 |         return CaseLiteral(value, data_type)
57 | 
58 |     def visit_result(self, res):
59 |         special = self._get_or_else(res, "special", None)
60 |         if special is None:
61 |             return self.visit_literal_result(res)
62 |         return special
63 | 
64 |     def visit_case(self, case):
65 |         grp = self._get_or_die(case, "group")
66 |         if not isinstance(grp, str):
67 |             grp = self.visit_group(grp)
68 |         result = self._visit_or_die(self.visit_result, case, "result")
69 |         args = self._visit_list(self.visit_literal, case, "args")
70 |         opts = self._get_or_else(case, "options", {})
71 |         opt_tuples = []
72 |         for opt_key in sorted(opts.keys()):
73 |             opt_tuples.append((opt_key, opts[opt_key]))
74 |         return ProtoCase(grp, args, result, opt_tuples)
75 | 
76 |     def visit(self, case_file):
77 |         base_uri = self._get_or_die(case_file, 'base_uri')
78 |         func_name = self._get_or_die(case_file, "function")
79 |         proto_cases = self._visit_list(self.visit_case, case_file, "cases")
80 |         cases = [self.__resolve_proto_case(c, base_uri, func_name) for c in proto_cases]
81 |         return CaseFile(func_name, base_uri, cases)
82 | 
83 | 
84 | class CaseFileParser(BaseYamlParser[CaseFile]):
85 |     def get_visitor(self) -> CaseFileVisitor:
86 |         return CaseFileVisitor()
87 | 


--------------------------------------------------------------------------------
/bft/cases/runner.py:
--------------------------------------------------------------------------------
  1 | from abc import ABC, abstractmethod
  2 | from typing import Literal, NamedTuple
  3 | 
  4 | from bft.dialects.types import Dialect, SqlMapping
  5 | 
  6 | from .types import Case
  7 | 
  8 | 
  9 | class CaseResult(NamedTuple):
 10 |     passed: bool
 11 |     expected_pass: bool
 12 |     reason: str
 13 | 
 14 | 
 15 | class CaseRunner(ABC):
 16 |     @abstractmethod
 17 |     def run_case(self, case: Case) -> CaseResult:
 18 |         pass
 19 | 
 20 | 
 21 | class SqlCaseResult(NamedTuple):
 22 |     type: Literal["success", "error", "unsupported", "unexpected_pass", "mismatch"]
 23 |     err: str
 24 |     actual: str
 25 | 
 26 |     @staticmethod
 27 |     def success():
 28 |         return SqlCaseResult("success", None, None)
 29 | 
 30 |     @staticmethod
 31 |     def error(err: str):
 32 |         return SqlCaseResult("error", err, None)
 33 | 
 34 |     @staticmethod
 35 |     def unsupported(err: str):
 36 |         return SqlCaseResult("unsupported", err, None)
 37 | 
 38 |     @staticmethod
 39 |     def unexpected_pass(actual: str):
 40 |         return SqlCaseResult("unexpected_pass", None, actual)
 41 | 
 42 |     @staticmethod
 43 |     def mismatch(actual: str):
 44 |         return SqlCaseResult("mismatch", None, actual)
 45 | 
 46 | 
 47 | class SqlCaseRunner(CaseRunner):
 48 |     def __init__(self, dialect: Dialect):
 49 |         self.__dialect = dialect
 50 | 
 51 |     def run_case(self, case: Case) -> CaseResult:
 52 |         mapping = self.__dialect.mapping_for_case(case)
 53 |         if mapping is None:
 54 |             return CaseResult(
 55 |                 False,
 56 |                 False,
 57 |                 f"The dialect {self.__dialect.name} does not support the function '{case.function}'",
 58 |             )
 59 |         result = self.run_sql_case(case, mapping)
 60 |         if result.type == "success":
 61 |             return CaseResult(result, mapping.should_pass, mapping.reason)
 62 |         elif result.type == "unsupported":
 63 |             if mapping.should_pass:
 64 |                 return CaseResult(
 65 |                     False,
 66 |                     True,
 67 |                     f"This case should have been supported.  Instead it reported {result.err}",
 68 |                 )
 69 |             else:
 70 |                 return CaseResult(False, False, mapping.reason)
 71 |         elif result.type == "error":
 72 |             if case.result == "error":
 73 |                 # Case expected to error.  Dialect may or may not have expected it
 74 |                 should_pass = mapping.should_pass
 75 |                 if mapping.unsupported:
 76 |                     # Unsupported test case, expected an error and got an error
 77 |                     should_pass = True
 78 |                 return CaseResult(True, should_pass, mapping.reason)
 79 |             else:
 80 |                 if mapping.should_pass:
 81 |                     # Case should not have error.  Dialect should not have error
 82 |                     return CaseResult(False, mapping.should_pass, result.err)
 83 |                 else:
 84 |                     # Case should not have error but it's expected for dialect
 85 |                     return CaseResult(False, mapping.should_pass, mapping.reason)
 86 |         elif result.type == "unexpected_pass":
 87 |             # Case expected error.  No error happened.
 88 |             if mapping.should_pass:
 89 |                 # This was not expected given the dialect
 90 |                 return CaseResult(
 91 |                     False,
 92 |                     mapping.should_pass,
 93 |                     f"This case should have given an error.  Instead it returned the value {result.actual}",
 94 |                 )
 95 |             else:
 96 |                 # In this dialect, this case passes even though it shouldn't
 97 |                 return CaseResult(False, mapping.should_pass, mapping.reason)
 98 |         elif result.type == "mismatch":
 99 |             if mapping.should_pass:
100 |                 return CaseResult(
101 |                     False,
102 |                     mapping.should_pass,
103 |                     f"This case should have yielded the result {case.result.value} but instead it returned {result.actual}",
104 |                 )
105 |             else:
106 |                 return CaseResult(False, mapping.should_pass, mapping.reason)
107 |         else:
108 |             raise Exception("Unexpected case result type")
109 | 
110 |     @abstractmethod
111 |     def run_sql_case(self, case: Case, mapping: SqlMapping) -> SqlCaseResult:
112 |         pass
113 | 


--------------------------------------------------------------------------------
/bft/cases/types.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, List, Literal, NamedTuple, Tuple
 2 | 
 3 | 
 4 | class CaseLiteral(NamedTuple):
 5 |     value: str | int | float | list
 6 |     type: str
 7 |     is_not_a_func_arg: bool = False  # if true it is used only to populate test data
 8 | 
 9 | 
10 | class CaseGroup(NamedTuple):
11 |     id: str
12 |     description: str
13 | 
14 | 
15 | class Case(NamedTuple):
16 |     function: str
17 |     base_uri: str
18 |     group: CaseGroup
19 |     args: List[CaseLiteral]
20 |     result: CaseLiteral | Literal["error", "undefined"]
21 |     options: List[Tuple[str, str]]
22 | 
23 | 
24 | def case_to_kernel_str(
25 |     function: str,
26 |     args: List[CaseLiteral],
27 |     result: CaseLiteral | Literal["error", "undefined"],
28 | ):
29 |     joined_args = ", ".join([arg.type for arg in args])
30 |     result_str = result
31 |     if not isinstance(result_str, str):
32 |         result_str = result.type
33 |     return f"{function}({joined_args}) -> {result_str}"
34 | 
35 | 
36 | class CaseFile(NamedTuple):
37 |     function: str
38 |     base_uri: str
39 |     cases: List[Case]
40 | 
41 | 
42 | class ProtoCase(NamedTuple):
43 |     group: str
44 |     args: List[CaseLiteral]
45 |     result: CaseLiteral | Literal["error", "undefined"]
46 |     options: Dict[str, str]
47 | 


--------------------------------------------------------------------------------
/bft/core/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/substrait-io/bft/34b7a6e19d536e42082169eae0d049ce94f86b19/bft/core/__init__.py


--------------------------------------------------------------------------------
/bft/core/function.py:
--------------------------------------------------------------------------------
  1 | from typing import List, NamedTuple
  2 | 
  3 | 
  4 | class Option(NamedTuple):
  5 |     name: str
  6 |     values: List[str]
  7 | 
  8 | 
  9 | class Kernel(NamedTuple):
 10 |     arg_types: List[str]
 11 |     return_type: str
 12 |     available_options: List[str]
 13 |     variadic: str
 14 | 
 15 | 
 16 | class FunctionDefinition(object):
 17 |     def __init__(
 18 |         self,
 19 |         name: str,
 20 |         uri: str,
 21 |         description: str,
 22 |         options: List[Option],
 23 |         kernels: List[Kernel],
 24 |     ):
 25 |         self.name = name
 26 |         self.uri = uri
 27 |         self.description = description
 28 |         self.options = options
 29 |         self.kernels = kernels
 30 | 
 31 |     @property
 32 |     def details(self):
 33 |         return []
 34 | 
 35 |     @property
 36 |     def properties(self):
 37 |         return
 38 | 
 39 | 
 40 | class FunctionBuilder(object):
 41 |     def __init__(self, name: str):
 42 |         self.name = name
 43 |         self.uri: str = None
 44 |         self.description: str = None
 45 |         self.options = {}
 46 |         self.kernels = []
 47 | 
 48 |     def set_description(self, description: str):
 49 |         self.description = description
 50 | 
 51 |     def set_uri(self, uri: str):
 52 |         self.uri = uri
 53 | 
 54 |     def try_set_description(self, description: str):
 55 |         if self.description is None:
 56 |             self.description = description
 57 | 
 58 |     def note_option(self, name: str, values: List[str]):
 59 |         if name in self.options:
 60 |             existing_values = self.options[name]
 61 |             # Merge existing values and new values using set union
 62 |             self.options[name] = list(set(existing_values).union(values))
 63 |         else:
 64 |             # Add the new values directly if the option does not exist
 65 |             self.options[name] = values
 66 | 
 67 |     def note_kernel(
 68 |         self,
 69 |         arg_types: List[str],
 70 |         return_type: str,
 71 |         available_options: List[str],
 72 |         variadic: int,
 73 |     ):
 74 |         self.kernels.append(Kernel(arg_types, return_type, available_options, variadic))
 75 | 
 76 |     def finish(self) -> FunctionDefinition:
 77 |         if self.description is None:
 78 |             self.description = "Description is missing and would go here"
 79 |         opts = []
 80 |         for key, values in self.options.items():
 81 |             opts.append(Option(key, values))
 82 |         return FunctionDefinition(
 83 |             self.name, self.uri, self.description, opts, self.kernels
 84 |         )
 85 | 
 86 | 
 87 | class LibraryBuilder(object):
 88 |     def __init__(self):
 89 |         self.functions = {}
 90 | 
 91 |     def get_function(self, name, category):
 92 |         full_name = f"{category}_{name}"
 93 |         if name not in self.functions:
 94 |             self.functions[full_name] = FunctionBuilder(full_name)
 95 |         return self.functions[full_name]
 96 | 
 97 |     def function_names(self) -> List[str]:
 98 |         return sorted(self.functions.keys())
 99 | 
100 |     def finish(self) -> List[FunctionDefinition]:
101 |         built_functions = []
102 |         for func_name in sorted(self.functions.keys()):
103 |             built_functions.append(self.functions[func_name].finish())
104 |         return built_functions
105 | 


--------------------------------------------------------------------------------
/bft/core/index_parser.py:
--------------------------------------------------------------------------------
 1 | from typing import List, NamedTuple
 2 | 
 3 | from .yaml_parser import BaseYamlParser, BaseYamlVisitor
 4 | 
 5 | 
 6 | class IndexFunctionsFile(NamedTuple):
 7 |     location: str
 8 |     canonical_uri: str
 9 | 
10 | class IndexFile(NamedTuple):
11 |     function_files: List[IndexFunctionsFile]
12 |     case_directories: List[str]
13 |     dialect_directories: List[str]
14 |     supplement_directories: List[str]
15 | 
16 | class IndexFileVisitor(BaseYamlVisitor[IndexFile]):
17 |     def __init__(self):
18 |         super().__init__()
19 | 
20 |     def visit_function_file(self, function_file):
21 |         location = self._get_or_die(function_file, "location")
22 |         canonical_uri = self._get_or_die(function_file, "canonical")
23 |         return IndexFunctionsFile(location, canonical_uri)
24 | 
25 |     def visit(self, index_file):
26 |         substrait = self._get_or_die(index_file, "substrait")
27 |         function_files = self._visit_list(self.visit_function_file, substrait, "extensions")
28 |         case_files = self._get_or_else(index_file, "cases", [])
29 |         dialect_files = self._get_or_else(index_file, "dialects", [])
30 |         supplement_files = self._get_or_else(index_file, "supplements", [])
31 |         return IndexFile(function_files, case_files, dialect_files, supplement_files)
32 | 
33 | 
34 | class IndexFileParser(BaseYamlParser[IndexFile]):
35 |     def get_visitor(self) -> IndexFile:
36 |         return IndexFileVisitor()
37 | 
38 | def load_index(index_path: str) -> IndexFile:
39 |     parser = IndexFileParser()
40 |     with open(index_path, 'rb') as f:
41 |         return parser.parse(f)[0]


--------------------------------------------------------------------------------
/bft/core/yaml_parser.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | from abc import ABC, abstractmethod
  3 | from decimal import Decimal
  4 | from typing import BinaryIO, Generic, Iterable, List, TypeVar
  5 | 
  6 | import yaml
  7 | 
  8 | from bft.cases.types import CaseLiteral
  9 | 
 10 | try:
 11 |     from yaml import CSafeDumper as SafeDumper
 12 |     from yaml import CSafeLoader as SafeLoader
 13 | except ImportError:
 14 |     from yaml import SafeDumper, SafeLoader
 15 | 
 16 | T = TypeVar("T")
 17 | 
 18 | 
 19 | class BaseYamlVisitor(ABC, Generic[T]):
 20 |     def __init__(self):
 21 |         self.__location_stack: List[str] = []
 22 | 
 23 |     def _fail(self, err):
 24 |         loc = "/".join(self.__location_stack)
 25 |         raise Exception(f"Error visiting case file.  Location={loc} Message={err}")
 26 | 
 27 |     def _visit_list(self, visitor, obj, attr, required=False):
 28 |         if attr in obj:
 29 |             val = obj[attr]
 30 |             results = []
 31 |             if not isinstance(val, Iterable):
 32 |                 self._fail(f"Expected attribute {attr} to be iterable")
 33 |             for idx, item in enumerate(val):
 34 |                 self.__location_stack.append(f"{attr}[{idx}]")
 35 |                 results.append(visitor(item))
 36 |                 self.__location_stack.pop()
 37 |             for result in results:
 38 |                 if isinstance(result, CaseLiteral) and isinstance(result.value, list):
 39 |                     if len(result.value) > 0:
 40 |                         for i, s in enumerate(result.value):
 41 |                             lower_s = str(s).lower()
 42 |                             if lower_s.startswith("'inf'"):
 43 |                                 result.value[i] = float("inf")
 44 |                             elif lower_s.startswith("'-inf'"):
 45 |                                 result.value[i] = float("-inf")
 46 |                             elif lower_s.startswith("'nan'"):
 47 |                                 result.value[i] = math.nan
 48 |                         results.append(CaseLiteral(result.value, result.type, result.is_not_a_func_arg))
 49 |                         results.remove(result)
 50 |             return results
 51 |         elif required:
 52 |             self._fail(f"Expected required attribute {attr}")
 53 |         else:
 54 |             return []
 55 | 
 56 |     def __visit_or_maybe_die(self, visitor, obj, attr, required, default=None):
 57 |         if attr in obj:
 58 |             val = obj[attr]
 59 |             self.__location_stack.append(f"{attr}")
 60 |             visited = visitor(val)
 61 |             self.__location_stack.pop()
 62 |             return visited
 63 |         elif required:
 64 |             self._fail(f"Expected required attribte {attr}")
 65 |         else:
 66 |             return default
 67 | 
 68 |     def _visit_or_die(self, visitor, obj, attr):
 69 |         return self.__visit_or_maybe_die(visitor, obj, attr, False)
 70 | 
 71 |     def _visit_or_else(self, visitor, obj, attr, default):
 72 |         return self.__visit_or_maybe_die(visitor, obj, attr, True, default)
 73 | 
 74 |     def _get_or_die(self, obj, attr):
 75 |         if attr in obj:
 76 |             return obj[attr]
 77 |         self._fail(f"Expected required attribute {attr}")
 78 | 
 79 |     def _get_or_else(self, obj, attr, default):
 80 |         if attr in obj:
 81 |             return obj[attr]
 82 |         return default
 83 | 
 84 |     @abstractmethod
 85 |     def visit(yamlobj) -> T:
 86 |         pass
 87 | 
 88 | 
 89 | class BaseYamlParser(ABC, Generic[T]):
 90 |     @abstractmethod
 91 |     def get_visitor(self) -> BaseYamlVisitor[T]:
 92 |         pass
 93 | 
 94 |     def get_loader(self):
 95 |         loader = yaml.SafeLoader
 96 |         """Add tag "!decimal" to the loader """
 97 |         loader.add_constructor("!decimal", self.decimal_constructor)
 98 |         loader.add_constructor("!decimallist", self.list_of_decimal_constructor)
 99 |         return loader
100 | 
101 |     def decimal_constructor(self, loader: yaml.SafeLoader, node: yaml.nodes.MappingNode):
102 |         return self.get_decimal_value(loader, node)
103 | 
104 |     def get_decimal_value(self, loader: yaml.SafeLoader, node: yaml.ScalarNode):
105 |         value = loader.construct_scalar(node)
106 |         if isinstance(value, str) and value.lower() == 'null':
107 |             return None
108 |         return Decimal(value)
109 | 
110 |     def list_of_decimal_constructor(self, loader: yaml.SafeLoader, node: yaml.nodes.MappingNode):
111 |         return [self.get_decimal_value(loader, item) for item in node.value]
112 | 
113 |     def parse(self, f: BinaryIO) -> List[T]:
114 |         loader = self.get_loader()
115 |         objs = yaml.load_all(f, loader)
116 |         visitor = self.get_visitor()
117 |         return [visitor.visit(obj) for obj in objs]
118 | 


--------------------------------------------------------------------------------
/bft/core/yaml_parser_test.py:
--------------------------------------------------------------------------------
 1 | from decimal import Decimal
 2 | from typing import NamedTuple
 3 | 
 4 | from bft.core.yaml_parser import BaseYamlParser
 5 | 
 6 | 
 7 | class TestDecimalResult(NamedTuple):
 8 |     cases: Decimal | list[Decimal]
 9 | 
10 | class TestCaseVisitor():
11 |     def visit(self, testcase):
12 |         return TestDecimalResult(testcase)
13 | class DecimalTestCaseParser(BaseYamlParser[TestDecimalResult]):
14 |     def get_visitor(self) -> TestCaseVisitor:
15 |         return TestCaseVisitor()
16 | 
17 | def test_yaml_parser_decimal_tag():
18 |     parser = DecimalTestCaseParser()
19 |     # parser returns list of parsed values
20 |     assert   parser.parse(b"!decimal 1") == [TestDecimalResult(Decimal('1'))]
21 |     assert   parser.parse(b"!decimal 1.78766") == [TestDecimalResult(Decimal('1.78766'))]
22 |     assert parser.parse(b"!decimal null") == [TestDecimalResult(None)]
23 |     assert parser.parse(b"!decimallist [1.2, null, 7.547]") == [TestDecimalResult([Decimal('1.2'), None, Decimal('7.547')])]
24 | 


--------------------------------------------------------------------------------
/bft/dialects/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/substrait-io/bft/34b7a6e19d536e42082169eae0d049ce94f86b19/bft/dialects/__init__.py


--------------------------------------------------------------------------------
/bft/dialects/loader.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | from typing import List
 3 | 
 4 | from .parser import DialectFileParser
 5 | from .types import DialectFile, DialectsLibrary
 6 | 
 7 | 
 8 | def load_dialects(dialects_dir: str) -> DialectsLibrary:
 9 |     parser = DialectFileParser()
10 |     dialect_files: List[DialectFile] = []
11 |     for dialect_path in Path(dialects_dir).rglob("*.yaml"):
12 |         with open(dialect_path, "rb") as dialect_f:
13 |             for dialect_file in parser.parse(dialect_f):
14 |                 dialect_files.append(dialect_file)
15 |     return DialectsLibrary(dialect_files)
16 | 


--------------------------------------------------------------------------------
/bft/dialects/parser.py:
--------------------------------------------------------------------------------
 1 | from bft.core.yaml_parser import BaseYamlParser, BaseYamlVisitor
 2 | from bft.dialects.types import DialectFile, DialectFunction, DialectKernel, short_type_to_type
 3 | 
 4 | 
 5 | class DialectFileVisitor(BaseYamlVisitor[DialectFile]):
 6 |     @staticmethod
 7 |     def visit_kernel(kernel):
 8 |         arg_types = []
 9 |         if kernel != '':
10 |             arg_types = [DialectFileVisitor.get_long_type(arg_type) for arg_type in kernel.split("_")]
11 |         return DialectKernel(arg_types, any)
12 | 
13 |     @staticmethod
14 |     def get_long_type(short_type):
15 |         long_type = short_type_to_type.get(short_type, None)
16 |         if long_type is None:
17 |             return short_type
18 |         return long_type
19 | 
20 |     @staticmethod
21 |     def _get_unqualified_func_name(name):
22 |         return name.split(".")[-1]
23 | 
24 |     def visit_function(self, func):
25 |         name = self._get_or_die(func, "name")
26 |         required_opts = self._get_or_else(func, "required_options", {})
27 |         local_name = self._get_or_else(func, "local_name", self._get_unqualified_func_name(name))
28 |         infix = self._get_or_else(func, "infix", False)
29 |         postfix = self._get_or_else(func, "postfix", False)
30 |         between = self._get_or_else(func, "between", False)
31 |         aggregate = self._get_or_else(func, "aggregate", False)
32 |         unsupported = self._get_or_else(func, "unsupported", False)
33 |         # The extract function uses a special grammar in some SQL dialects.
34 |         # i.e. SELECT EXTRACT(YEAR FROM times) FROM my_table
35 |         extract = self._get_or_else(func, "extract", False)
36 |         good_kernels = self._visit_list(self.visit_kernel, func, "supported_kernels")
37 |         variadic_min = self._get_or_else(func, "variadic", -1)
38 |         return DialectFunction(
39 |             name,
40 |             local_name,
41 |             infix,
42 |             postfix,
43 |             between,
44 |             aggregate,
45 |             unsupported,
46 |             extract,
47 |             required_opts,
48 |             variadic_min,
49 |             good_kernels,
50 |         )
51 | 
52 |     def visit(self, dfile):
53 |         name = self._get_or_die(dfile, "name")
54 |         dtype = self._get_or_die(dfile, "type")
55 |         scalar_functions = self._visit_list(
56 |             self.visit_function, dfile, "scalar_functions"
57 |         )
58 |         aggregate_functions = self._visit_list(
59 |             self.visit_function, dfile, "aggregate_functions"
60 |         )
61 |         uri_to_func_prefix = {uri: func_prefix for func_prefix, uri in dfile.get("dependencies", {}).items()}
62 |         supported_types = self._visit_list(self.get_long_type, dfile, "supported_types")
63 |         return DialectFile(name, dtype, scalar_functions, aggregate_functions, uri_to_func_prefix, supported_types)
64 | 
65 | 
66 | class DialectFileParser(BaseYamlParser[DialectFile]):
67 |     def get_visitor(self) -> DialectFileVisitor:
68 |         return DialectFileVisitor()
69 | 


--------------------------------------------------------------------------------
/bft/html/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/substrait-io/bft/34b7a6e19d536e42082169eae0d049ce94f86b19/bft/html/__init__.py


--------------------------------------------------------------------------------
/bft/html/types.py:
--------------------------------------------------------------------------------
  1 | from typing import Dict, List, Literal, NamedTuple
  2 | 
  3 | from bft.core.function import Kernel
  4 | 
  5 | 
  6 | class FunctionOptionValueInfo(NamedTuple):
  7 |     # The name of the value
  8 |     # Sourced from Substrait YAML
  9 |     name: str
 10 |     # Description of the option
 11 |     # Sourced from BFT markdown
 12 |     description: str
 13 | 
 14 | 
 15 | # An option that can control function behavior
 16 | class FunctionOptionInfo(NamedTuple):
 17 |     # The name of the option
 18 |     # Sourced from Substrait YAML
 19 |     name: str
 20 |     # Description of the option
 21 |     # Sourced from Substrait YAML
 22 |     # Can be overridden by BFT markdown
 23 |     description: str
 24 |     # Possible values for the option
 25 |     # Sourced from Substrait YAML
 26 |     values: List[FunctionOptionValueInfo]
 27 | 
 28 | 
 29 | # Information about how the function behaves in different dialects
 30 | class FunctionDialectInfo(NamedTuple):
 31 |     # Name of the dialect (e.g. sqlite)
 32 |     # Sourced from dialect files
 33 |     name: str
 34 |     # Required options for this function in the given dialect
 35 |     # Sourced from Substrait YAML
 36 |     options: Dict[str, str]
 37 |     case_info: List[str]
 38 |     kernel_info: List[bool]
 39 | 
 40 | 
 41 | # Additional details or motivation for the function
 42 | class FunctionDetailInfo(NamedTuple):
 43 |     # Title of the detail section
 44 |     # Sourced from BFT markdown
 45 |     title: str
 46 |     # Body of the detail section
 47 |     # Sourced from BFT markdown
 48 |     description: str
 49 | 
 50 | 
 51 | # Invariants that the function respects
 52 | # Mostly useful for property-based testing
 53 | class FunctionPropertyInfo(NamedTuple):
 54 |     # The name of the invariant
 55 |     # Sourced from BFT markdown
 56 |     id: str
 57 |     # A description of the invariant
 58 |     # Sourced from BFT markdown
 59 |     description: str
 60 | 
 61 | 
 62 | class FunctionExampleResultInfo(NamedTuple):
 63 |     # Value of the result
 64 |     # Sourced from case files
 65 |     value: str
 66 | 
 67 | 
 68 | class FunctionExampleCaseInfo(NamedTuple):
 69 |     # Arguments to the function for this test case
 70 |     # Sourced from case files
 71 |     args: List[str]
 72 |     # Options values for this function
 73 |     # Sourced from case files
 74 |     options: List[str]
 75 |     # Result of the function run on the args
 76 |     # Sourced from case files
 77 |     result: Literal["error"] | Literal["undefined"] | FunctionExampleResultInfo
 78 | 
 79 | 
 80 | class FunctionExampleGroupInfo(NamedTuple):
 81 |     # Description of the example group
 82 |     # Sourced from case files
 83 |     description: str
 84 |     # Argument types for the examples in the group
 85 |     # Sourced from case files
 86 |     arg_types: List[str]
 87 |     # Names of options used in the examples in this group
 88 |     # Sourced from case files
 89 |     option_names: List[str]
 90 |     # Result type for the examples in the group
 91 |     # Sourced from case files
 92 |     result_type: str
 93 |     # Example executions
 94 |     cases: List[FunctionExampleCaseInfo]
 95 | 
 96 | 
 97 | # Information describing a function
 98 | class FunctionInfo(NamedTuple):
 99 |     # Name of the function (e.g. add)
100 |     # Sourced from Substrait YAML
101 |     name: str
102 |     # The Substrait URI for the function (e.g. https://github.com/substrait-io/substrait/blob/main/extensions/functions_arithmetic.yaml)
103 |     # Sourced from Substrait YAML
104 |     # Can be overridden by BFT markdown
105 |     uri: str
106 |     # The last part of the URI (e.g. functions_arithmetic.yaml)
107 |     # Sourced from Substrait YAML
108 |     uri_short: str
109 |     # A very brief (ideally one sentence) description of the function
110 |     # Sourced from Substrait YAML
111 |     brief: str
112 |     # Available options for the function
113 |     options: List[FunctionOptionInfo]
114 |     # Available kernels for the function
115 |     kernels: List[Kernel]
116 |     # Dialect info for the function
117 |     dialects: List[FunctionDialectInfo]
118 |     # Function details
119 |     details: List[FunctionDetailInfo]
120 |     # Properties that hold true for the function
121 |     properties: List[FunctionPropertyInfo]
122 |     # Example function executions
123 |     example_groups: List[FunctionExampleGroupInfo]
124 | 
125 | 
126 | class FunctionIndexItem(NamedTuple):
127 |     # Name of the function
128 |     name: str
129 |     # Summary of the function, sourced from Substrait YAML
130 |     brief: str
131 |     # Function category, i.e. Arithmetic, String, etc.
132 |     category: str
133 | 
134 | 
135 | class FunctionIndexInfo(NamedTuple):
136 |     functions: List[FunctionIndexItem]
137 | 


--------------------------------------------------------------------------------
/bft/substrait/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/substrait-io/bft/34b7a6e19d536e42082169eae0d049ce94f86b19/bft/substrait/__init__.py


--------------------------------------------------------------------------------
/bft/substrait/extension_file_parser.py:
--------------------------------------------------------------------------------
  1 | import pathlib
  2 | from collections import namedtuple
  3 | from collections.abc import Iterable
  4 | from typing import Dict, List, NamedTuple
  5 | 
  6 | import yaml
  7 | 
  8 | try:
  9 |     from yaml import CSafeDumper as SafeDumper
 10 |     from yaml import CSafeLoader as SafeLoader
 11 | except ImportError:
 12 |     from yaml import SafeLoader, SafeDumper
 13 | 
 14 | from typing import BinaryIO
 15 | 
 16 | from ..core.function import FunctionBuilder, LibraryBuilder
 17 | 
 18 | 
 19 | class ValueArg(NamedTuple):
 20 |     name: str
 21 |     description: str
 22 |     type: str
 23 | 
 24 | 
 25 | class EnumArg(NamedTuple):
 26 |     name: str
 27 |     description: str
 28 |     options: List[str]
 29 | 
 30 | 
 31 | class Implementation(NamedTuple):
 32 |     args: List[ValueArg | EnumArg]
 33 |     options: Dict[str, List[str]]
 34 |     return_type: str
 35 |     variadic: int
 36 | 
 37 | 
 38 | class Function(NamedTuple):
 39 |     name: str
 40 |     description: str
 41 |     implementations: List[Implementation]
 42 | 
 43 | 
 44 | class ExtensionsFile(NamedTuple):
 45 |     functions: List[Function]
 46 | 
 47 | 
 48 | class ExtensionFileVisitor(object):
 49 |     def __init__(self):
 50 |         self.location_stack = []
 51 | 
 52 |     def __fail(self, err):
 53 |         loc = "/".join(self.location_stack)
 54 |         raise Exception(f"Error visiting extension file.  Location={loc} Message={err}")
 55 | 
 56 |     def __visit_list(self, visitor, obj, attr, required=False):
 57 |         if attr in obj:
 58 |             val = obj[attr]
 59 |             results = []
 60 |             if not isinstance(val, Iterable):
 61 |                 self.__fail(f"Expected attribute {attr} to be iterable")
 62 |             for idx, item in enumerate(val):
 63 |                 self.location_stack.append(f"{attr}[{idx}]")
 64 |                 results.append(visitor(item))
 65 |                 self.location_stack.pop()
 66 |             return results
 67 |         elif required:
 68 |             self.__fail(f"Expected required attribute {attr}")
 69 |         else:
 70 |             return []
 71 | 
 72 |     def __get_or_die(self, obj, attr):
 73 |         if attr in obj:
 74 |             return obj[attr]
 75 |         self.__fail(f"Expected required attribute {attr}")
 76 | 
 77 |     def __get_or_else(self, obj, attr, default):
 78 |         if attr in obj:
 79 |             return obj[attr]
 80 |         return default
 81 | 
 82 |     def visit_ext_file(self, parsed_file):
 83 |         scalar_functions = self.__visit_list(
 84 |             self.visit_function, parsed_file, "scalar_functions"
 85 |         )
 86 |         aggregate_functions = self.__visit_list(
 87 |             self.visit_function, parsed_file, "aggregate_functions"
 88 |         )
 89 |         return ExtensionsFile(scalar_functions + aggregate_functions)
 90 | 
 91 |     def visit_impl_arg(self, arg):
 92 |         name = self.__get_or_else(arg, "name", None)
 93 |         description = self.__get_or_else(arg, "description", None)
 94 |         value = self.__get_or_else(arg, "value", None)
 95 |         if value:
 96 |             return ValueArg(name, description, value)
 97 |         else:
 98 |             options = self.__get_or_else(arg, "options", None)
 99 |             if options is None:
100 |                 self.__fail(
101 |                     "Argument encountered that did not have any value or options"
102 |                 )
103 |             return EnumArg(name, description, options)
104 | 
105 |     def visit_implementation(self, impl):
106 |         args = self.__visit_list(self.visit_impl_arg, impl, "args")
107 |         options = self.__get_or_else(impl, "options", {})
108 |         opts = {}
109 |         variadic = "0"
110 |         if "variadic" in impl:
111 |             variadic = str(impl["variadic"]["min"])
112 |         for key in options.keys():
113 |             values = self.__get_or_die(options[key], "values")
114 |             opts[key] = values
115 |         return_type = self.__get_or_die(impl, "return")
116 |         return Implementation(args, opts, return_type, variadic)
117 | 
118 |     def visit_function(self, func):
119 |         name = self.__get_or_die(func, "name")
120 |         description = self.__get_or_else(func, "description", None)
121 |         implementations = self.__visit_list(self.visit_implementation, func, "impls")
122 |         return Function(name, description, implementations)
123 | 
124 | 
125 | class ExtensionFileParser(object):
126 |     def parse(self, f: BinaryIO) -> None:
127 |         data = yaml.load(f, SafeLoader)
128 |         return ExtensionFileVisitor().visit_ext_file(data)
129 | 
130 | 
131 | def add_extensions_file_to_library(
132 |     location: str, ext_file: ExtensionsFile, library: LibraryBuilder
133 | ):
134 |     function_category = pathlib.Path(location.name).stem.replace("functions_", "")
135 |     for func in ext_file.functions:
136 |         builder: FunctionBuilder = library.get_function(func.name, function_category)
137 |         builder.set_uri(pathlib.Path(location).name)
138 |         if func.description is not None:
139 |             builder.try_set_description(func.description)
140 |         for impl in func.implementations:
141 |             for opt_name, opt_values in impl.options.items():
142 |                 builder.note_option(opt_name, opt_values)
143 |             arg_types = []
144 |             for arg in impl.args:
145 |                 if isinstance(arg, ValueArg):
146 |                     arg_types.append(arg.type)
147 |                 else:
148 |                     arg_types.append("|".join(arg.options))
149 |             builder.note_kernel(
150 |                 arg_types, impl.return_type, impl.options.keys(), impl.variadic
151 |             )
152 | 


--------------------------------------------------------------------------------
/bft/supplements/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/substrait-io/bft/34b7a6e19d536e42082169eae0d049ce94f86b19/bft/supplements/__init__.py


--------------------------------------------------------------------------------
/bft/supplements/parser.py:
--------------------------------------------------------------------------------
  1 | import pathlib
  2 | from typing import Dict, TextIO
  3 | 
  4 | from mistletoe.ast_renderer import get_ast
  5 | from mistletoe.block_token import Document, Heading, Paragraph
  6 | from mistletoe.html_renderer import HTMLRenderer
  7 | from mistletoe.span_token import RawText
  8 | 
  9 | from .types import BasicSupplement, OptionSupplement, SupplementsFile
 10 | 
 11 | 
 12 | class SupplementsParser(object):
 13 |     def __init__(self):
 14 |         self.html_renderer = HTMLRenderer()
 15 |         self.__reset()
 16 | 
 17 |     def __reset(self):
 18 |         self.__finish = None
 19 |         self.__paragraphs = []
 20 |         self.__sub_section_title = None
 21 |         self.__option_name = None
 22 |         self.__option_description = None
 23 |         self.__parsing_options = False
 24 |         self.__current_option_value = None
 25 |         self.__supplements = []
 26 |         self.options = {}
 27 |         self.details = []
 28 |         self.properties = []
 29 | 
 30 |     def __get_simple_text(self, heading: Heading) -> str:
 31 |         if len(heading.children) != 1:
 32 |             raise Exception(
 33 |                 f"Expected heading to have one line of simple text but there were {len(heading.children)} sub-elements"
 34 |             )
 35 |         text_child = heading.children[0]
 36 |         if not isinstance(text_child, RawText):
 37 |             raise Exception(
 38 |                 f"Expected heading to contain simple raw text butit was {type(text_child)}"
 39 |             )
 40 |         return text_child.content
 41 | 
 42 |     def __add_options(self):
 43 |         pass
 44 | 
 45 |     def __add_properties(self):
 46 |         self.properties = self.__supplements
 47 | 
 48 |     def __add_details(self):
 49 |         self.details = self.__supplements
 50 | 
 51 |     def __finish_last_task(self):
 52 |         if self.__finish is not None:
 53 |             self.__finish()
 54 |         self.__finish = None
 55 |         self.__parsing_options = False
 56 |         self.__supplements = []
 57 | 
 58 |     def __finish_option(self):
 59 |         if self.__option_name is None:
 60 |             return
 61 |         self.options[self.__option_name.lower()] = OptionSupplement(
 62 |             self.__option_description, self.__supplements
 63 |         )
 64 |         self.__option_name = None
 65 |         self.__supplements = []
 66 | 
 67 |     def __finish_section(self):
 68 |         if self.__sub_section_title is None:
 69 |             if len(self.__paragraphs) != 0:
 70 |                 raise Exception(
 71 |                     f"Encountered paragraphs but no L3/L4 section title starting at {self.__paragraphs[0]}"
 72 |                 )
 73 |             return
 74 |         if len(self.__paragraphs) == 0:
 75 |             raise Exception(f"Sub-section {self.__sub_section_title} had no paragraphs")
 76 |         self.__supplements.append(
 77 |             BasicSupplement(self.__sub_section_title, "\n".join(self.__paragraphs))
 78 |         )
 79 |         self.__sub_section_title = None
 80 |         self.__paragraphs = []
 81 | 
 82 |     def __finish_last_l3(self):
 83 |         if self.__parsing_options:
 84 |             self.__finish_option()
 85 |         else:
 86 |             self.__finish_section()
 87 | 
 88 |     def __finish_last_l4(self):
 89 |         if self.__option_name is None:
 90 |             return
 91 |         content = "\n".join(self.__paragraphs)
 92 |         if self.__current_option_value is None:
 93 |             self.__option_description = content
 94 |         else:
 95 |             self.__supplements.append(
 96 |                 BasicSupplement(self.__current_option_value.upper(), content)
 97 |             )
 98 |             self.__current_option_value = None
 99 |         self.__paragraphs = []
100 | 
101 |     def __parse_heading(self, heading: Heading):
102 |         heading_title = self.__get_simple_text(heading)
103 |         if heading.level == 2:
104 |             self.__finish_last_l4()
105 |             self.__finish_last_l3()
106 |             self.__finish_last_task()
107 |             if heading_title.lower() == "options":
108 |                 self.__parsing_options = True
109 |                 self.__finish = self.__add_options
110 |             elif heading_title.lower() == "details":
111 |                 self.__finish = self.__add_details
112 |             elif heading_title.lower() == "properties":
113 |                 self.__finish = self.__add_properties
114 |             else:
115 |                 raise Exception(f"Unexpected L2 heading '{heading_title}'")
116 |         elif heading.level == 3:
117 |             if self.__finish is None:
118 |                 raise Exception(
119 |                     f"L3 heading {heading_title} with no L2 heading preceding it"
120 |                 )
121 |             self.__finish_last_l4()
122 |             self.__finish_last_l3()
123 |             if self.__parsing_options:
124 |                 self.__option_name = heading_title
125 |             else:
126 |                 self.__sub_section_title = heading_title
127 |         elif heading.level == 4:
128 |             if not self.__parsing_options:
129 |                 raise Exception(
130 |                     f"L4 heading {heading_title} encountered but we are not currently parsing options"
131 |                 )
132 |             self.__finish_last_l4()
133 |             self.__current_option_value = heading_title
134 | 
135 |     def __parse_paragraph(self, paragraph: Paragraph):
136 |         self.__paragraphs.append(self.html_renderer.render_paragraph(paragraph))
137 | 
138 |     def __parse_child(self, child):
139 |         if isinstance(child, Heading):
140 |             self.__parse_heading(child)
141 |         elif isinstance(child, Paragraph):
142 |             self.__parse_paragraph(child)
143 |         else:
144 |             raise Exception(
145 |                 f"Unrecognized top-level element type in supplements file {type(child)}"
146 |             )
147 | 
148 |     def parse_supplements_doc(self, f: TextIO, directory_path: str) -> SupplementsFile:
149 |         self.__reset()
150 |         doc = Document(f)
151 | 
152 |         if len(doc.children) == 0:
153 |             raise Exception(
154 |                 "Supplements document appears to be empty.  It should at least have a title"
155 |             )
156 | 
157 |         title_section = doc.children[0]
158 |         if not isinstance(title_section, Heading) or title_section.level != 1:
159 |             raise Exception(
160 |                 "First element in a supplements doc should be a level 1 heading with the name of the function"
161 |             )
162 | 
163 |         function_name = self.__get_simple_text(title_section).lower()
164 |         for child in doc.children[1:]:
165 |             self.__parse_child(child)
166 | 
167 |         self.__finish_last_l4()
168 |         self.__finish_last_l3()
169 |         self.__finish_last_task()
170 | 
171 |         return SupplementsFile(
172 |             function_name, directory_path, self.options, self.details, self.properties
173 |         )
174 | 
175 | 
176 | def load_supplements(supplements_dir: str) -> Dict[str, SupplementsFile]:
177 |     supplements = {}
178 |     parser = SupplementsParser()
179 |     for sup_path in pathlib.Path(supplements_dir).rglob("*.md"):
180 |         with open(sup_path, "r") as sup_f:
181 |             sup = parser.parse_supplements_doc(sup_f, str(pathlib.Path(sup_path).parent))
182 |             supplements[sup.function.lower()] = sup
183 |     return supplements
184 | 


--------------------------------------------------------------------------------
/bft/supplements/types.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, List, NamedTuple
 2 | 
 3 | 
 4 | class BasicSupplement(NamedTuple):
 5 |     title: str
 6 |     description: str
 7 | 
 8 | 
 9 | class OptionSupplement(NamedTuple):
10 |     description: str
11 |     values: List[BasicSupplement]
12 | 
13 | 
14 | class SupplementsFile(NamedTuple):
15 |     function: str
16 |     dir_path: str
17 |     options: Dict[str, OptionSupplement]
18 |     details: List[BasicSupplement]
19 |     properties: List[BasicSupplement]
20 | 
21 | 
22 | def empty_supplements_file(function_name: str):
23 |     return SupplementsFile(function_name, "", {}, [], [])
24 | 


--------------------------------------------------------------------------------
/bft/templates/function_desc.j2:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html lang="en">
  3 | 
  4 | <head>
  5 |     <meta charset="utf-8">
  6 |     <title>{{ name }} Function - BFT</title>
  7 | 
  8 |     <meta name="viewport" content="width=device-width, initial-scale=1">
  9 |     {% if 'aggregate' in name %}
 10 |         <meta name="description" content="Reference for the {{ "_".join(name.split('_')[2:]) }} function">
 11 |     {% else %}
 12 |         <meta name="description" content="Reference for the {{ "_".join(name.split('_')[1:]) }} function">
 13 |     {% endif %}
 14 |     <link rel="stylesheet" href="./assets/supplementary/terminal.css">
 15 |     <link rel="stylesheet" href="./assets/supplementary/style.css">
 16 | </head>
 17 | 
 18 | <body class="terminal">
 19 |     <header class="container terminal-nav">
 20 |         <div class="terminal-logo">
 21 |             <div class="logo terminal-prompt">
 22 |                 <a href="#">The BFT</a>
 23 |             </div>
 24 |         </div>
 25 |         <nav class="terminal-menu">
 26 |             <ul>
 27 |                 <li>
 28 |                     <a href="./index.html" class="menu-item">Function Index</a>
 29 |                 </li>
 30 |             </ul>
 31 |         </nav>
 32 |     </header>
 33 | 
 34 |     <article class="container">
 35 |         <h2>{{ name|title}}</h2>
 36 |         <section>
 37 |             <p>
 38 |                 Defined in <a href="{{ uri }}">{{ uri_short }}</a>
 39 |             </p>
 40 |             <hr>
 41 |             <p>
 42 |                 {{ brief }}
 43 |             </p>
 44 |         </section>
 45 |         <hr>
 46 |         <section id="options">
 47 |             <h3>Options&nbsp;<a href="#options">&para;</a></h3>
 48 |             {% for option in options %}
 49 |                 <h4>{{option.name|title}}</h4>
 50 |                 {{option.description}}
 51 |                 <dl>
 52 |                     {% for value in option.values %}
 53 |                     <dt>{{value.name|upper}}</dt>
 54 |                     <dd>{{value.description}}
 55 |                     </dd>
 56 |                     {% endfor %}
 57 |                 </dl>
 58 |             {% endfor %}
 59 |         </section>
 60 |         <hr>
 61 |         <section id="kernels">
 62 |             <h3>Kernels&nbsp;<a href="#kernels">&para;</a></h3>
 63 |             <ul>
 64 |                 {% for kernel in kernels %}
 65 |                 <li class="bft-kernel"><span>{{ name }}({{ kernel.arg_types|join(', ') }}) -> {{ kernel.return_type }} : [{{ kernel.available_options|join(', ') }}]</span><span hidden>&nbsp;(not supported by dialect)</span></li>
 66 |                 {% endfor %}
 67 |             </ul>
 68 |         </section>
 69 |         <hr>
 70 |         <section id="dialects">
 71 |             <h3>Dialects&nbsp;<a href="#dialects">&para;</a></h3>
 72 |             <select id="dialect">
 73 |                 {% for dialect in dialects %}
 74 |                 <option>{{ dialect.name }}</option>
 75 |                 {% endfor %}
 76 |             </select>
 77 |             {% for dialect in dialects %}
 78 |             <dl class="dialect-definition" id="dialect-{{ dialect.name }}">
 79 |                 {% if dialect.options is none %}
 80 |                     <dt>Dialect isn't yet supported</dt>
 81 |                 {% else %}
 82 |                     {% for opt, val in dialect.options.items() %}
 83 |                         <dt>{{ opt }}</dt>
 84 |                         <dd>{{ val }}</dd>
 85 |                     {% endfor %}
 86 |                 {% endif %}
 87 |             </dl>
 88 |             {% endfor %}
 89 |         </section>
 90 |         <hr>
 91 |         <section id="details">
 92 |             <h3>Details&nbsp;<a href="#details">&para;</a></h3>
 93 |             {% if details %}
 94 |                 {% for detail in details %}
 95 |                     <h4>{{ detail.title }}</h4>
 96 |                     {{ detail.description }}
 97 |                 {% endfor %}
 98 |             {% else %}
 99 |                 <p>No supplemental information about the details available</p>
100 |             {% endif %}
101 |         </section>
102 |         <hr>
103 |         <section id="properties">
104 |             <h3>Properties&nbsp;<a href="#properties">&para;</a></h3>
105 |             {% if properties %}
106 |                 <dl>
107 |                     {% for property in properties | sort(attribute='id') %}
108 |                         <dt>{{ property.id }}</dt>
109 |                         <dd>{{ property.description }}</dd>
110 |                     {% endfor %}
111 |                 </dl>
112 |             {% else %}
113 |                 <p>No supplemental information about the properties available</p>
114 |             {% endif %}
115 |         </section>
116 |         <hr>
117 |         <section id="examples">
118 |             <h3>Examples&nbsp;<a href="#examples">&para;</a></h3>
119 |             {% for example_group in example_groups %}
120 |             <table>
121 |                 <caption>{{ example_group.description }}</caption>
122 |                 <thead>
123 |                     <tr>
124 |                         {% for opt in example_group.option_names %}
125 |                         <th>{{ opt }}</th>
126 |                         {% endfor %}
127 |                         {% for arg_type in example_group.arg_types %}
128 |                         <th>arg{{ loop.index }} ({{ arg_type }})</th>
129 |                         {% endfor %}
130 |                         <th>result ({{ example_group.result_type }})</th>
131 |                     </tr>
132 |                 </thead>
133 |                 <tbody>
134 |                     {% for case in example_group.cases %}
135 |                     <tr class="bft-case">
136 |                         {% for opt in case.options %}
137 |                         <td>{{ opt }}</td>
138 |                         {% endfor %}
139 |                         {% for arg in case.args %}
140 |                         <td>{{ arg }}</td>
141 |                         {% endfor %}
142 |                         <td>{{ case.result.value | default(case.result | upper) }}</td>
143 |                     </tr>
144 |                     <tr class="bft-error-case bft-case-err-message" hidden>
145 |                         <td colspan="4"/>
146 |                     </tr>
147 |                     {% endfor -%}
148 |                 </tbody>
149 |             </table>
150 |             {% endfor %}
151 |         </section>
152 |     </article>
153 |     <script src="./assets/supplementary/script.js"></script>
154 |     <script>
155 |       var bftDialects = {
156 |         {% for dialect in dialects %}
157 |         '{{dialect.name}}': {
158 |             examples: [
159 |                 {% for case in dialect.case_info %}
160 |                 {% if case %}
161 |                   "{{ case }}",
162 |                 {% else %}
163 |                   null,
164 |                 {% endif %}
165 |                 {% endfor %}
166 |             ],
167 |             kernels: [
168 |                 {% for kernel in dialect.kernel_info %}
169 |                 {{ kernel|lower }},
170 |                 {% endfor %}
171 |             ]
172 |         },
173 |         {% endfor %}
174 |       };
175 |       window.addEventListener('load', function() {
176 |         window.bftInitialize(bftDialects);
177 |       });
178 |     </script>
179 | </body>
180 | </html>
181 | 


--------------------------------------------------------------------------------
/bft/templates/function_index.j2:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html lang="en">
  3 | 
  4 | <head>
  5 |     <meta charset="utf-8">
  6 |     <title>BFT - Home</title>
  7 | 
  8 |     <meta name="viewport" content="width=device-width, initial-scale=1">
  9 |     <meta name="description" content="Big Function Taxonomy">
 10 | 
 11 |     <link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.5.2/css/bootstrap.min.css">
 12 |     <link rel="stylesheet" href="./assets/index/style.css">
 13 |     <link rel="preconnect" href="https://fonts.googleapis.com">
 14 |     <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
 15 |     <link href="https://fonts.googleapis.com/css2?family=Work+Sans:wght@300;400;500;700&display=swap" rel="stylesheet">
 16 | 
 17 | </head>
 18 | 
 19 | <body>
 20 | 
 21 |     <nav class="navbar" style="background-color: #201D38;">
 22 |         <div class="navbar-brand logo">
 23 |             <span class="terminal-text" style="font-size: 200%; font-family: 'Work Sans', sans-serif; color: white;">
 24 |                 Big Function Taxonomy
 25 |             </span>
 26 |         </div>
 27 | 
 28 |         <div class="ml-auto">
 29 |             <form class="form-inline" onsubmit="searchFunctions(); return false;">
 30 |                 <input class="form-control mr-2" type="text" id="searchInput" placeholder="Search functions...">
 31 |                 <button class="btn btn-outline-light" type="submit">Search</button>
 32 |             </form>
 33 |         </div>
 34 |     </nav>
 35 | 
 36 |     <div class="container-fluid">
 37 |         <div class="row">
 38 |             <div class="col-md-3 menu-container mt-5">
 39 |                 <ul class="list-unstyled">
 40 |                     <li class="menu-item" onclick="showContent('home')">Home</li>
 41 |                     {% set functions_by_category = functions|groupby('category') %}
 42 |                     {% for category, category_functions in functions_by_category %}
 43 |                         <li class="menu-item" onclick="showContent('{{ category|lower }}')">{{ category|title }} Functions</li>
 44 |                     {% endfor %}
 45 |                 </ul>
 46 |             </div>
 47 |             <div class="col-md-9 content-container">
 48 |                 
 49 |                 <article class="container mt-4" id="home">
 50 |                     <div class="fixed-container">
 51 |                         <h2 class="category-title">The B(ig) F(unction) T(axonomy)</h2>
 52 |                     </div>
 53 |                     <section>
 54 |                         <p style="color: black;">
 55 |                             The BFT aims to be a comprehensive catalogue of functions. Functions are the backbone of 
 56 |                             any compute system, but they are chronically under documented and often full of corner 
 57 |                             cases whose behavior differs in various systems. By documenting exhaustively documenting 
 58 |                             these corner cases we hope to make it possible for systems to fully describe their behaviors. 
 59 |                             This will make it easier to know what problems will be encountered switching between systems and, 
 60 |                             in some cases, make it possible to obtain the correct behavior through expression transformation 
 61 |                             or a precise application of function options.
 62 |                         </p>
 63 |                     <section>
 64 |                         <div class="row">
 65 |                             {% for category, category_functions in functions_by_category %}
 66 |                                 <div class="col-md-6 mb-3" onclick="showContent('{{ category|lower }}')" style="cursor: pointer;">
 67 |                                     <div class="card">
 68 |                                         <div class="card-body">
 69 |                                             <h5 class="card-title">{{ category|title }} Functions</h5>
 70 |                                         </div>
 71 |                                     </div>
 72 |                                 </div>
 73 |                             {% endfor %}
 74 |                         </div>
 75 |                     </section>
 76 |                     </section>
 77 |                 </article>
 78 | 
 79 |                 <article class="container hidden mt-4" id="searchResultsSection">
 80 |                     <!-- Search results will be shown here -->
 81 |                 </article>
 82 | 
 83 |                 {% for category, category_functions in functions_by_category %}
 84 |                     <article class="container hidden mt-4" id="{{ category|lower }}">
 85 |                         <div class="fixed-container">
 86 |                             <h2 class="category-title">{{ category|title }} Functions</h2>
 87 |                         </div>
 88 |                         <section>
 89 |                             <table class="functions-table">
 90 |                                 <tbody>
 91 |                                     {% for function in category_functions %}
 92 |                                         <tr onclick="window.location='./{{ function.name|lower }}.html';" style="cursor: pointer;">
 93 |                                             {% if 'aggregate' in function.name %}
 94 |                                                 <td class="title-column">{{ "_".join(function.name.split('_')[2:])|title }}</td>
 95 |                                                 <td>{{ function.brief }}</td>
 96 |                                             {% else %}
 97 |                                                 <td class="title-column">{{ "_".join(function.name.split('_')[1:])|title }}</td>
 98 |                                                 <td>{{ function.brief }}</td>
 99 |                                             {% endif %}
100 |                                         </tr>
101 |                                     {% endfor %}
102 |                                 </tbody>
103 |                             </table>
104 |                         </section>
105 |                     </article>
106 |                 {% endfor %}
107 |             </div>
108 |         </div>
109 |     </div>
110 | 
111 | 
112 |     <script src="https://code.jquery.com/jquery-3.5.1.slim.min.js"></script>
113 |     <script src="https://cdn.jsdelivr.net/npm/@popperjs/core@2.9.1/dist/umd/popper.min.js"></script>
114 |     <script src="https://stackpath.bootstrapcdn.com/bootstrap/4.5.2/js/bootstrap.min.js"></script>
115 |     <script src="./assets/index/script.js"></script>
116 | 
117 |     <script>
118 |         var functionsData = [
119 |             {% for category, category_functions in functions_by_category %}
120 |                 {% for function in category_functions %}
121 |                     {
122 |                         category: "{{ category|title }}",
123 |                         name: "{{ function.name|title }}",
124 |                         brief: {{ function.brief|replace("\n", "\\n")|replace('"', '\\"')|tojson|safe }}
125 |                     },
126 |                 {% endfor %}
127 |             {% endfor %}
128 |         ];
129 |     </script>
130 | 
131 | </body>
132 | </html>
133 | 


--------------------------------------------------------------------------------
/bft/testers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/substrait-io/bft/34b7a6e19d536e42082169eae0d049ce94f86b19/bft/testers/__init__.py


--------------------------------------------------------------------------------
/bft/testers/base_tester.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | from pathlib import Path
 3 | from typing import List, NamedTuple
 4 | 
 5 | from bft.cases.runner import CaseRunner
 6 | from bft.cases.types import Case
 7 | from bft.dialects.types import Dialect, DialectsLibrary
 8 | 
 9 | 
10 | class TestResult(NamedTuple):
11 |     function: str
12 |     group: str
13 |     index: int
14 |     passed: bool
15 |     should_have_passed: bool
16 |     reason: str
17 | 
18 | 
19 | class BaseTester(ABC):
20 |     @abstractmethod
21 |     def get_runner(self, dialect: Dialect) -> CaseRunner:
22 |         pass
23 | 
24 |     @abstractmethod
25 |     def get_dialect(self, library: DialectsLibrary) -> Dialect:
26 |         pass
27 | 
28 |     def prepare(self, dialects: DialectsLibrary):
29 |         self.dialect = self.get_dialect(dialects)
30 |         self.runner = self.get_runner(self.dialect)
31 |         self.group_indices = {}
32 | 
33 |     def run_test(self, case: Case) -> TestResult:
34 |         result = self.runner.run_case(case)
35 |         group_index = self.group_indices.get(case.group.id, 0)
36 |         self.group_indices[case.group.id] = group_index + 1
37 |         return TestResult(
38 |             case.function,
39 |             case.group.id,
40 |             group_index,
41 |             result.passed,
42 |             result.expected_pass,
43 |             result.reason,
44 |         )
45 | 


--------------------------------------------------------------------------------
/bft/testers/cudf/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/substrait-io/bft/34b7a6e19d536e42082169eae0d049ce94f86b19/bft/testers/cudf/__init__.py


--------------------------------------------------------------------------------
/bft/testers/cudf/runner.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import operator
  3 | 
  4 | import cudf
  5 | import numpy
  6 | 
  7 | from bft.cases.runner import SqlCaseResult, SqlCaseRunner
  8 | from bft.cases.types import Case
  9 | from bft.dialects.types import SqlMapping
 10 | from bft.utils.utils import type_to_dialect_type
 11 | 
 12 | type_map = {
 13 |     "i8": cudf.dtype("int8"),
 14 |     "i16": cudf.dtype("int16"),
 15 |     "i32": cudf.dtype("int32"),
 16 |     "i64": cudf.dtype("int64"),
 17 |     "fp32": cudf.dtype("float32"),
 18 |     "fp64": cudf.dtype("float64"),
 19 |     "boolean": cudf.dtype("bool"),
 20 |     "string": cudf.dtype("string"),
 21 |     "timestamp": cudf.dtype("datetime64[s]"),
 22 |     "date": cudf.dtype("datetime64[s]"),
 23 | }
 24 | 
 25 | 
 26 | def type_to_cudf_dtype(type: str):
 27 |     return type_to_dialect_type(type, type_map)
 28 | 
 29 | 
 30 | def is_string_function(data_types):
 31 |     return cudf.dtype("string") in data_types
 32 | 
 33 | 
 34 | def is_datetime_function(data_types):
 35 |     return cudf.dtype("datetime64[s]") in data_types
 36 | 
 37 | 
 38 | def is_numpy_type(data_type):
 39 |     return type(data_type).__module__ == numpy.__name__
 40 | 
 41 | 
 42 | def get_str_fn_result(
 43 |     fn_name: str, arg_vectors: list[cudf.Series], arg_values: list[str], is_regexp: bool
 44 | ):
 45 |     if len(arg_vectors) == 1:
 46 |         fn = getattr(arg_vectors[0].str, fn_name)
 47 |         return fn()
 48 |     elif len(arg_vectors) == 2:
 49 |         fn = getattr(arg_vectors[0].str, fn_name)
 50 |         if is_regexp:
 51 |             return fn(arg_values[1], regex=True)
 52 |         else:
 53 |             return fn(arg_values[1])
 54 |     else:
 55 |         fn = getattr(arg_vectors[0].str, fn_name)
 56 |         opt_arg = True if arg_values[2] is not None else False
 57 |         if opt_arg and is_regexp:
 58 |             return fn(arg_values[1], arg_values[2], regex=True)
 59 |         elif opt_arg:
 60 |             return fn(arg_values[1], arg_values[2])
 61 |         else:
 62 |             return fn(arg_values[1])
 63 | 
 64 | 
 65 | def get_dt_fn_result(
 66 |     mapping: str, dtype, arg_vectors: list[cudf.Series], arg_values: list[str]
 67 | ):
 68 |     fn_name = mapping.local_name
 69 |     if len(arg_vectors) == 2:
 70 |         if mapping.infix:
 71 |             gdf = cudf.DataFrame(
 72 |                 {"a": arg_values[0], "b": arg_values[1]},
 73 |                 dtype=dtype,
 74 |             )
 75 |             result = gdf.eval(f"(a){fn_name}(b)")
 76 |         elif mapping.extract:
 77 |             extract_property = arg_values[0].lower()
 78 |             result = getattr(arg_vectors[1].dt, extract_property)
 79 |     return result
 80 | 
 81 | 
 82 | class CudfRunner(SqlCaseRunner):
 83 |     def __init__(self, dialect):
 84 |         super().__init__(dialect)
 85 | 
 86 |     def run_sql_case(self, case: Case, mapping: SqlMapping) -> SqlCaseResult:
 87 |         arg_vectors = []
 88 |         arg_values = []
 89 |         data_types = []
 90 |         fn_name = mapping.local_name
 91 |         is_regexp = True if "regexp" in case.function else False
 92 |         for arg in case.args:
 93 |             dtype = type_to_cudf_dtype(arg.type)
 94 |             if dtype is None:
 95 |                 return SqlCaseResult.unsupported(
 96 |                     f"The type {arg.type} is not supported"
 97 |                 )
 98 |             arg_vectors.append(cudf.Series(arg.value, dtype=dtype))
 99 |             arg_values.append(arg.value)
100 |             data_types.append(dtype)
101 | 
102 |         try:
103 |             if is_datetime_function(data_types):
104 |                 result = get_dt_fn_result(mapping, dtype, arg_vectors, arg_values)
105 |             elif is_string_function(data_types):
106 |                 result = get_str_fn_result(fn_name, arg_vectors, arg_values, is_regexp)
107 |             elif len(arg_vectors) == 1:
108 |                 # Some functions that only take a single arg are able to be executed against
109 |                 # both a Series and a Dataframe whereas others are only able to be executed against a Dataframe.
110 |                 if mapping.aggregate:
111 |                     arg_values = arg_values[0]
112 |                 try:
113 |                     gdf = cudf.DataFrame({"a": arg_values}, dtype=dtype)
114 |                     result = gdf.eval(f"{fn_name}(a)")
115 |                 except ValueError:
116 |                     fn = getattr(arg_vectors[0], fn_name)
117 |                     result = fn()
118 |             elif len(arg_vectors) == 2:
119 |                 if mapping.infix:
120 |                     # If there are only Null/Nan/None values in the column, they are set to False instead of <NA>.
121 |                     # We add extra data to ensure the <NA> value exists in the dataframe.
122 |                     gdf = cudf.DataFrame(
123 |                         {"a": [arg_values[0], True], "b": [arg_values[1], True]},
124 |                         dtype=dtype,
125 |                     )
126 |                     result = gdf.eval(f"(a){fn_name}(b)")
127 |                 else:
128 |                     try:
129 |                         fn = getattr(arg_vectors[0], fn_name)
130 |                         result = fn(arg_vectors[1])
131 |                     except AttributeError:
132 |                         fn = getattr(operator, fn_name)
133 |                         result = fn(arg_vectors[0], arg_vectors[1])
134 |                     except ValueError:  # Case for round function
135 |                         fn = getattr(arg_vectors[0], fn_name)
136 |                         result = fn(arg_values[1])
137 |             else:
138 |                 fn = getattr(arg_vectors[0], fn_name)
139 |                 try:
140 |                     result = fn(arg_vectors[1:])
141 |                 except TypeError:
142 |                     result = fn(arg_values[1], arg_values[2])
143 |         except RuntimeError as err:
144 |             return SqlCaseResult.error(str(err))
145 | 
146 |         if mapping.aggregate:
147 |             if is_numpy_type(result):
148 |                 result = result.item()
149 |         else:
150 |             if result.empty and (
151 |                 case.result.value is None or case.result.value is False
152 |             ):
153 |                 return SqlCaseResult.success()
154 |             elif len(result) != 1 and not mapping.infix:
155 |                 raise Exception("Scalar function with one row output more than one row")
156 |             else:
157 |                 result = result[0]
158 | 
159 |         if case.result == "undefined":
160 |             return SqlCaseResult.success()
161 |         elif case.result == "error":
162 |             return SqlCaseResult.unexpected_pass(str(result))
163 |         elif case.result == "nan":
164 |             if math.isnan(result):
165 |                 return SqlCaseResult.success()
166 |         else:
167 |             if case.result.value is None:
168 |                 if str(result) == "<NA>" or math.isnan(result) or result is None:
169 |                     return SqlCaseResult.success()
170 |                 else:
171 |                     return SqlCaseResult.mismatch(str(result))
172 |             elif case.result.value == result:
173 |                 return SqlCaseResult.success()
174 |             elif case.result.value == str(result):
175 |                 return SqlCaseResult.success()
176 |             elif numpy.float32(case.result.value) == result:
177 |                 return SqlCaseResult.success()
178 |             else:
179 |                 return SqlCaseResult.mismatch(str(result))
180 | 


--------------------------------------------------------------------------------
/bft/testers/cudf/tester.py:
--------------------------------------------------------------------------------
 1 | from bft.dialects.types import Dialect, DialectsLibrary
 2 | from bft.testers.base_tester import BaseTester
 3 | 
 4 | from .runner import CudfRunner
 5 | 
 6 | 
 7 | class CudfTester(BaseTester):
 8 |     def get_runner(self, dialect: Dialect):
 9 |         return CudfRunner(dialect)
10 | 
11 |     def get_dialect(self, library: DialectsLibrary):
12 |         return library.get_dialect_by_name("cudf")
13 | 


--------------------------------------------------------------------------------
/bft/testers/datafusion/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/substrait-io/bft/34b7a6e19d536e42082169eae0d049ce94f86b19/bft/testers/datafusion/__init__.py


--------------------------------------------------------------------------------
/bft/testers/datafusion/runner.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | from datetime import datetime
  3 | 
  4 | import datafusion
  5 | import numpy
  6 | import pyarrow as pa
  7 | 
  8 | from bft.cases.runner import SqlCaseResult, SqlCaseRunner
  9 | from bft.cases.types import Case, CaseLiteral
 10 | from bft.dialects.types import SqlMapping
 11 | from bft.utils.utils import type_to_dialect_type
 12 | 
 13 | type_map = {
 14 |     "i8": pa.int8(),
 15 |     "i16": pa.int16(),
 16 |     "i32": pa.int32(),
 17 |     "i64": pa.int64(),
 18 |     "fp32": pa.float32(),
 19 |     "fp64": pa.float64(),
 20 |     "boolean": pa.bool_(),
 21 |     "string": pa.string(),
 22 |     "date": pa.timestamp("s"),
 23 |     "time": pa.timestamp("s"),
 24 |     "timestamp": pa.timestamp("s"),
 25 |     "timestamp_tz": pa.timestamp("s"),
 26 | }
 27 | 
 28 | 
 29 | def type_to_datafusion_type(type: str):
 30 |     return type_to_dialect_type(type, type_map)
 31 | 
 32 | 
 33 | def handle_special_cases(lit: CaseLiteral):
 34 |     if lit == "nan":
 35 |         return math.nan
 36 |     elif lit == "inf":
 37 |         return float("inf")
 38 |     elif lit == "-inf":
 39 |         return float("-inf")
 40 |     return lit
 41 | 
 42 | 
 43 | def is_string_type(arg):
 44 |     return (
 45 |         arg.type in ["string", "timestamp", "timestamp_tz", "date", "time"]
 46 |         or arg.value in ["Null"]
 47 |     ) and arg.value is not None
 48 | 
 49 | 
 50 | def arg_with_type(arg):
 51 |     if is_string_type(arg):
 52 |         arg_val = str(arg.value)
 53 |     elif isinstance(arg.value, list) or arg.value is None:
 54 |         arg_val = None
 55 |     elif arg.type.startswith("i"):
 56 |         arg_val = int(arg.value)
 57 |     elif arg.type.startswith("fp"):
 58 |         arg_val = float(arg.value)
 59 |     else:
 60 |         arg_val = arg.value
 61 |     return arg_val
 62 | 
 63 | 
 64 | def str_to_datetime(str_val, type):
 65 |     if type == "time":
 66 |         return datetime.strptime(str_val, "%H:%M:%S.%f")
 67 |     if len(str_val) > 19:
 68 |         return datetime.strptime(str_val, "%Y-%m-%d %H:%M:%S %Z")
 69 |     elif len(str_val) < 16:
 70 |         return datetime.strptime(str_val, "%Y-%m-%d")
 71 |     else:
 72 |         return datetime.strptime(str_val, "%Y-%m-%d %H:%M:%S")
 73 | 
 74 | 
 75 | class DatafusionRunner(SqlCaseRunner):
 76 |     def __init__(self, dialect):
 77 |         super().__init__(dialect)
 78 |         self.ctx = datafusion.SessionContext()
 79 | 
 80 |     def run_sql_case(self, case: Case, mapping: SqlMapping) -> SqlCaseResult:
 81 | 
 82 |         try:
 83 |             arg_vectors = []
 84 |             arg_names = []
 85 |             arg_vals_list = []
 86 |             orig_types = []
 87 |             arg_types_list = []
 88 | 
 89 |             if mapping.aggregate:
 90 |                 arg_vectors = []
 91 |                 for arg_idx, arg in enumerate(case.args):
 92 |                     arg_vals = []
 93 |                     arg_type = type_to_datafusion_type(arg.type)
 94 |                     if arg_type is None:
 95 |                         return SqlCaseResult.unsupported(f"Unsupported type {arg.type}")
 96 |                     for val in arg.value:
 97 |                         arg_vals.append(handle_special_cases(val))
 98 |                     arg_names.append(f"arg{arg_idx}")
 99 |                     arg_vectors.append(pa.array(arg_vals, arg_type))
100 |             else:
101 |                 for arg_idx, arg in enumerate(case.args):
102 |                     arg_val = arg_with_type(arg)
103 |                     arg_type = type_to_datafusion_type(arg.type)
104 |                     if arg_type is None:
105 |                         return SqlCaseResult.unsupported(f"Unsupported type {arg.type}")
106 |                     orig_types.append(arg.type)
107 |                     arg_vals_list.append(arg_val)
108 |                     arg_types_list.append(arg_type)
109 |                     arg_names.append(f"arg{arg_idx}")
110 | 
111 |                 for val, arg_type, orig_type in zip(
112 |                     arg_vals_list, arg_types_list, orig_types
113 |                 ):
114 |                     if isinstance(arg_type, pa.lib.TimestampType):
115 |                         val = str_to_datetime(val, orig_type)
116 |                     arg_vectors.append(pa.array([val], arg_type))
117 | 
118 |             joined_arg_names = ",".join(arg_names)
119 |             batch = pa.RecordBatch.from_arrays(
120 |                 arg_vectors,
121 |                 names=arg_names,
122 |             )
123 |             self.ctx.register_record_batches("my_table", [[batch]])
124 |             if mapping.infix:
125 |                 if len(case.args) != 2:
126 |                     raise Exception(f"Infix function with {len(case.args)} args")
127 |                 expr_str = f"SELECT {arg_names[0]} {mapping.local_name} {arg_names[1]} FROM my_table;"
128 |             elif mapping.postfix:
129 |                 if len(arg_names) != 1:
130 |                     raise Exception(f"Postfix function with {len(arg_names)} args")
131 |                 expr_str = f"SELECT {arg_names[0]} {mapping.local_name} FROM my_table;"
132 |             elif mapping.extract:
133 |                 if len(arg_names) != 2:
134 |                     raise Exception(f"Extract function with {len(arg_names)} args")
135 |                 expr_str = f"SELECT {mapping.local_name}({arg_vals_list[0]} FROM {arg_names[1]}) FROM my_table;"
136 |             elif mapping.local_name == 'count(*)':
137 |                 expr_str = f"SELECT {mapping.local_name} FROM my_table;"
138 |             elif mapping.aggregate:
139 |                 if len(arg_names) < 1:
140 |                     raise Exception(f"Aggregate function with {len(arg_names)} args")
141 |                 expr_str = f"SELECT {mapping.local_name}({arg_names[0]}) FROM my_table;"
142 |             else:
143 |                 expr_str = (
144 |                     f"SELECT {mapping.local_name}({joined_arg_names}) FROM my_table;"
145 |                 )
146 | 
147 |             result = self.ctx.sql(expr_str).collect()[0].columns[0].to_pylist()
148 | 
149 |             if len(result) != 1:
150 |                 raise Exception("Scalar function with one row output more than one row")
151 |             result = result[0]
152 | 
153 |             if case.result == "undefined":
154 |                 return SqlCaseResult.success()
155 |             elif case.result == "error":
156 |                 return SqlCaseResult.unexpected_pass(str(result))
157 |             elif case.result == "nan":
158 |                 if math.isnan(result):
159 |                     return SqlCaseResult.success()
160 |             # Issues with python float comparison:
161 |             # https://tutorpython.com/python-mathisclose/#The_problem_with_using_for_float_comparison
162 |             # https://stackoverflow.com/questions/5595425/what-is-the-best-way-to-compare-floats-for-almost-equality-in-python
163 |             # Datafusion bug with float when converting from a dataframe to a pylist:
164 |             # https://github.com/apache/arrow-datafusion/issues/9950
165 |             elif case.result.type.startswith('fp') and case.result.value:
166 |                 if math.isclose(result, case.result.value, rel_tol=1e-6):
167 |                     return SqlCaseResult.success()
168 |             else:
169 |                 if result == case.result.value:
170 |                     return SqlCaseResult.success()
171 |                 else:
172 |                     return SqlCaseResult.mismatch(str(result))
173 |         except Exception as err:
174 |             return SqlCaseResult.error(str(err))
175 |         finally:
176 |             self.ctx.deregister_table("my_table")
177 | 


--------------------------------------------------------------------------------
/bft/testers/datafusion/tester.py:
--------------------------------------------------------------------------------
 1 | from bft.dialects.types import Dialect, DialectsLibrary
 2 | from bft.testers.base_tester import BaseTester
 3 | 
 4 | from .runner import DatafusionRunner
 5 | 
 6 | 
 7 | class DatafustionTester(BaseTester):
 8 |     def get_runner(self, dialect: Dialect):
 9 |         return DatafusionRunner(dialect)
10 | 
11 |     def get_dialect(self, library: DialectsLibrary):
12 |         return library.get_dialect_by_name("datafusion")
13 | 


--------------------------------------------------------------------------------
/bft/testers/duckdb/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/substrait-io/bft/34b7a6e19d536e42082169eae0d049ce94f86b19/bft/testers/duckdb/__init__.py


--------------------------------------------------------------------------------
/bft/testers/duckdb/runner.py:
--------------------------------------------------------------------------------
  1 | import datetime
  2 | import math
  3 | from typing import Dict, NamedTuple
  4 | 
  5 | import duckdb
  6 | 
  7 | from bft.cases.runner import SqlCaseResult, SqlCaseRunner
  8 | from bft.cases.types import Case
  9 | from bft.dialects.types import SqlMapping
 10 | from bft.utils.utils import type_to_dialect_type, datetype_value_equal
 11 | 
 12 | type_map = {
 13 |     "i8": "TINYINT",
 14 |     "i16": "SMALLINT",
 15 |     "i32": "INTEGER",
 16 |     "i64": "BIGINT",
 17 |     "fp32": "REAL",
 18 |     "fp64": "DOUBLE",
 19 |     "boolean": "BOOLEAN",
 20 |     "string": "VARCHAR",
 21 |     "date": "DATE",
 22 |     "time": "TIME",
 23 |     "timestamp": "TIMESTAMP",
 24 |     "timestamp_tz": "TIMESTAMPTZ",
 25 |     "interval": "INTERVAL",
 26 |     "decimal": "DECIMAL",
 27 | }
 28 | 
 29 | 
 30 | def type_to_duckdb_type(type: str):
 31 |     return type_to_dialect_type(type, type_map)
 32 | 
 33 | 
 34 | def literal_to_str(lit: str | int | float):
 35 |     if lit is None:
 36 |         return "null"
 37 |     elif lit in [math.nan, "nan"]:
 38 |         return "'NaN'"
 39 |     elif lit in [float("inf"), "inf"]:
 40 |         return "'Infinity'"
 41 |     elif lit in [float("-inf"), "-inf"]:
 42 |         return "'-Infinity'"
 43 |     return str(lit)
 44 | 
 45 | 
 46 | def is_string_type(arg):
 47 |     return (
 48 |         arg.type in ["string", "timestamp", "timestamp_tz", "date", "time"]
 49 |         and arg.value is not None
 50 |     )
 51 | 
 52 | 
 53 | def is_datetype(arg):
 54 |     return type(arg) in [datetime.datetime, datetime.date, datetime.timedelta]
 55 | 
 56 | class DuckDBRunner(SqlCaseRunner):
 57 |     def __init__(self, dialect):
 58 |         super().__init__(dialect)
 59 |         self.conn = duckdb.connect()
 60 | 
 61 |     def run_sql_case(self, case: Case, mapping: SqlMapping) -> SqlCaseResult:
 62 | 
 63 |         try:
 64 |             arg_defs = [
 65 |                 f"arg{idx} {type_to_duckdb_type(arg.type)}"
 66 |                 for idx, arg in enumerate(case.args)
 67 |             ]
 68 |             schema = ",".join(arg_defs)
 69 |             self.conn.execute(f"CREATE TABLE my_table({schema});")
 70 |             self.conn.execute(f"SET TimeZone='UTC';")
 71 | 
 72 |             arg_names = [f"arg{idx}" for idx in range(len(case.args))]
 73 |             joined_arg_names = ",".join(arg_names)
 74 |             arg_vals_list = list()
 75 |             for arg in case.args:
 76 |                 if is_string_type(arg):
 77 |                     arg_vals_list.append("'" + literal_to_str(arg.value) + "'")
 78 |                 else:
 79 |                     arg_vals_list.append(literal_to_str(arg.value))
 80 |             arg_vals = ", ".join(arg_vals_list)
 81 |             if mapping.aggregate:
 82 |                 arg_vals_list = list()
 83 |                 for arg in case.args:
 84 |                     arg_vals = ""
 85 |                     for value in arg.value:
 86 |                         if is_string_type(arg):
 87 |                             if value:
 88 |                                 arg_vals += f"('{literal_to_str(value)}'),"
 89 |                             else:
 90 |                                 arg_vals += f"({literal_to_str(value)}),"
 91 |                         else:
 92 |                             arg_vals += f"({literal_to_str(value)}),"
 93 |                     arg_vals_list.append([arg_vals[:-1]])
 94 |                 for arg_name, arg_vals in zip(arg_names, arg_vals_list):
 95 |                     if len(arg_vals[0]):
 96 |                         self.conn.execute(
 97 |                             f"INSERT INTO my_table ({arg_name}) VALUES {arg_vals[0]};"
 98 |                         )
 99 |             else:
100 |                 self.conn.execute(
101 |                     f"INSERT INTO my_table ({joined_arg_names}) VALUES ({arg_vals});"
102 |                 )
103 | 
104 |             if mapping.infix:
105 |                 if len(arg_names) != 2:
106 |                     raise Exception(f"Infix function with {len(arg_names)} args")
107 |                 expr = f"SELECT {arg_names[0]} {mapping.local_name} {arg_names[1]} FROM my_table;"
108 |             elif mapping.postfix:
109 |                 if len(arg_names) != 1:
110 |                     raise Exception(f"Postfix function with {len(arg_names)} args")
111 |                 expr = f"SELECT {arg_names[0]} {mapping.local_name} FROM my_table;"
112 |             elif mapping.extract:
113 |                 if len(arg_names) != 2:
114 |                     raise Exception(f"Extract function with {len(arg_names)} args")
115 |                 expr = f"SELECT {mapping.local_name}({arg_vals_list[0]} FROM {arg_names[1]}) FROM my_table;"
116 |             elif mapping.local_name == "count(*)":
117 |                 expr = f"SELECT {mapping.local_name} FROM my_table;"
118 |             elif mapping.aggregate:
119 |                 if len(arg_names) < 1:
120 |                     raise Exception(f"Aggregate function with {len(arg_names)} args")
121 |                 expr = f"SELECT {mapping.local_name}({arg_names[0]}) FROM my_table;"
122 |             else:
123 |                 expr = f"SELECT {mapping.local_name}({joined_arg_names}) FROM my_table;"
124 |             result = self.conn.execute(expr).fetchone()[0]
125 | 
126 |             if case.result == "undefined":
127 |                 return SqlCaseResult.success()
128 |             elif case.result == "error":
129 |                 return SqlCaseResult.unexpected_pass(str(result))
130 |             elif str(result) == "nan":
131 |                 if case.result == "nan":
132 |                     return SqlCaseResult.success()
133 |                 else:
134 |                     return SqlCaseResult.mismatch(str(result))
135 |             # Issues with python float comparison:
136 |             # https://tutorpython.com/python-mathisclose/#The_problem_with_using_for_float_comparison
137 |             # https://stackoverflow.com/questions/5595425/what-is-the-best-way-to-compare-floats-for-almost-equality-in-python
138 |             elif case.result.type.startswith("fp") and case.result.value and result:
139 |                 if math.isclose(result, case.result.value, rel_tol=1e-7):
140 |                     return SqlCaseResult.success()
141 |             else:
142 |                 if result == case.result.value:
143 |                     return SqlCaseResult.success()
144 |                 elif is_datetype(result) and datetype_value_equal(
145 |                     result, case.result.value
146 |                 ):
147 |                     return SqlCaseResult.success()
148 |                 else:
149 |                     return SqlCaseResult.mismatch(str(result))
150 |         except duckdb.Error as err:
151 |             return SqlCaseResult.error(str(err))
152 |         finally:
153 |             self.conn.execute("DROP TABLE my_table")
154 | 


--------------------------------------------------------------------------------
/bft/testers/duckdb/runner_test.py:
--------------------------------------------------------------------------------
1 | from bft.testers.duckdb.runner import type_to_duckdb_type
2 | 
3 | 
4 | def test_type_to_duckdb_type():
5 |     assert type_to_duckdb_type("interval") == "INTERVAL"
6 |     assert type_to_duckdb_type("decimal<37, 3>") == "DECIMAL(37, 3)"
7 |     assert type_to_duckdb_type("non_existent") is None
8 | 


--------------------------------------------------------------------------------
/bft/testers/duckdb/tester.py:
--------------------------------------------------------------------------------
 1 | from bft.dialects.types import Dialect, DialectsLibrary
 2 | from bft.testers.base_tester import BaseTester
 3 | 
 4 | from .runner import DuckDBRunner
 5 | 
 6 | 
 7 | class DuckDBTester(BaseTester):
 8 |     def get_runner(self, dialect: Dialect):
 9 |         return DuckDBRunner(dialect)
10 | 
11 |     def get_dialect(self, library: DialectsLibrary):
12 |         return library.get_dialect_by_name("duckdb")
13 | 


--------------------------------------------------------------------------------
/bft/testers/postgres/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/substrait-io/bft/34b7a6e19d536e42082169eae0d049ce94f86b19/bft/testers/postgres/__init__.py


--------------------------------------------------------------------------------
/bft/testers/postgres/runner.py:
--------------------------------------------------------------------------------
  1 | import datetime
  2 | import math
  3 | import os
  4 | 
  5 | import psycopg
  6 | 
  7 | from bft.cases.runner import SqlCaseResult, SqlCaseRunner
  8 | from bft.cases.types import Case
  9 | from bft.dialects.types import SqlMapping
 10 | from bft.utils.utils import datetype_value_equal
 11 | 
 12 | type_map = {
 13 |     "i16": "smallint",
 14 |     "i32": "integer",
 15 |     "i64": "bigint",
 16 |     "fp32": "float4",
 17 |     "fp64": "float8",
 18 |     "boolean": "boolean",
 19 |     "string": "text",
 20 |     "date": "date",
 21 |     "time": "time",
 22 |     "timestamp": "timestamp",
 23 |     "timestamp_tz": "timestamptz",
 24 |     "interval": "interval",
 25 | }
 26 | 
 27 | 
 28 | def type_to_postgres_type(type: str):
 29 |     if type not in type_map:
 30 |         return None
 31 |     return type_map[type]
 32 | 
 33 | 
 34 | def literal_to_str(lit: str | int | float):
 35 |     if lit is None:
 36 |         return "null"
 37 |     elif lit in [float("inf"), "inf"]:
 38 |         return "'Infinity'"
 39 |     elif lit in [float("-inf"), "-inf"]:
 40 |         return "'-Infinity'"
 41 |     return str(lit)
 42 | 
 43 | 
 44 | def is_string_type(arg):
 45 |     return (
 46 |         arg.type in ["string", "timestamp", "timestamp_tz", "date", "time"]
 47 |         and arg.value is not None
 48 |     )
 49 | 
 50 | 
 51 | def is_datetype(arg):
 52 |     print(f"postgres type is: {type(arg)}")
 53 |     return type(arg) in [datetime.datetime, datetime.date, datetime.timedelta]
 54 | 
 55 | 
 56 | def get_connection_str():
 57 |     host = os.environ.get("POSTGRES_HOST", "localhost")
 58 |     dbname = os.environ.get("POSTGRES_DB", "bft")
 59 |     user = os.environ.get("POSTGRES_USER", "postgres")
 60 |     password = os.environ.get("POSTGRES_PASSWORD", "postgres")
 61 |     return f"{host=} {dbname=} {user=} {password=}"
 62 | 
 63 | 
 64 | class PostgresRunner(SqlCaseRunner):
 65 |     def __init__(self, dialect):
 66 |         super().__init__(dialect)
 67 |         self.conn = psycopg.connect(get_connection_str())
 68 | 
 69 |     def run_sql_case(self, case: Case, mapping: SqlMapping) -> SqlCaseResult:
 70 |         self.conn.execute("BEGIN;")
 71 | 
 72 |         try:
 73 |             arg_defs = []
 74 |             for idx, arg in enumerate(case.args):
 75 |                 arg_type = type_to_postgres_type(arg.type)
 76 |                 if arg_type is None:
 77 |                     return SqlCaseResult.unsupported(f"Unsupported type {arg.type}")
 78 |                 arg_defs.append(f"arg{idx} {arg_type}")
 79 |             schema = ",".join(arg_defs)
 80 |             self.conn.execute(f"CREATE TABLE my_table({schema});")
 81 | 
 82 |             arg_names = [f"arg{idx}" for idx in range(len(case.args))]
 83 |             joined_arg_names = ",".join(arg_names)
 84 |             arg_vals_list = list()
 85 |             for arg in case.args:
 86 |                 if is_string_type(arg):
 87 |                     arg_vals_list.append("'" + literal_to_str(arg.value) + "'")
 88 |                 else:
 89 |                     arg_vals_list.append(literal_to_str(arg.value))
 90 |             arg_vals = ", ".join(arg_vals_list)
 91 |             if mapping.aggregate:
 92 |                 arg_vals_list = list()
 93 |                 for arg in case.args:
 94 |                     arg_vals = ""
 95 |                     for value in arg.value:
 96 |                         if is_string_type(arg):
 97 |                             if value:
 98 |                                 arg_vals += f"('{literal_to_str(value)}'),"
 99 |                             else:
100 |                                 arg_vals += f"({literal_to_str(value)}),"
101 |                         else:
102 |                             arg_vals += f"({literal_to_str(value)}),"
103 |                     arg_vals_list.append([arg_vals[:-1]])
104 |                 for arg_name, arg_vals in zip(arg_names, arg_vals_list):
105 |                     if len(arg_vals[0]):
106 |                         self.conn.execute(
107 |                             f"INSERT INTO my_table ({arg_name}) VALUES {arg_vals[0]};"
108 |                         )
109 |             else:
110 |                 self.conn.execute(
111 |                     f"INSERT INTO my_table ({joined_arg_names}) VALUES ({arg_vals});"
112 |                 )
113 | 
114 |             if mapping.infix:
115 |                 if len(arg_names) != 2:
116 |                     raise Exception(f"Infix function with {len(arg_names)} args")
117 |                 expr = f"SELECT {arg_names[0]} {mapping.local_name} {arg_names[1]} FROM my_table;"
118 |             elif mapping.postfix:
119 |                 if len(arg_names) != 1:
120 |                     raise Exception(f"Postfix function with {len(arg_names)} args")
121 |                 expr = f"SELECT {arg_names[0]} {mapping.local_name} FROM my_table;"
122 |             elif mapping.extract:
123 |                 if len(arg_names) != 2:
124 |                     raise Exception(f"Extract function with {len(arg_names)} args")
125 |                 expr = f"SELECT {mapping.local_name}({arg_vals_list[0]} FROM {arg_names[1]}) FROM my_table;"
126 |             elif mapping.between:
127 |                 if len(arg_names) != 3:
128 |                     raise Exception(f"Between function with {len(arg_names)} args")
129 |                 expr = f"SELECT {arg_names[0]} BETWEEN {arg_names[1]} AND {arg_names[2]} FROM my_table;"
130 |             elif mapping.local_name == 'count(*)':
131 |                 expr = f"SELECT {mapping.local_name} FROM my_table;"
132 |             elif mapping.aggregate:
133 |                 if len(arg_names) < 1:
134 |                     raise Exception(f"Aggregate function with {len(arg_names)} args")
135 |                 expr = f"SELECT {mapping.local_name}({arg_names[0]}) FROM my_table;"
136 |             else:
137 |                 expr = f"SELECT {mapping.local_name}({joined_arg_names}) FROM my_table;"
138 |             result = self.conn.execute(expr).fetchone()[0]
139 | 
140 |             if case.result == "undefined":
141 |                 return SqlCaseResult.success()
142 |             elif case.result == "error":
143 |                 return SqlCaseResult.unexpected_pass(str(result))
144 |             elif case.result == "nan":
145 |                 print(f"Expected NAN but received {result}")
146 |                 return SqlCaseResult.error(str(result))
147 |             # Issues with python float comparison:
148 |             # https://tutorpython.com/python-mathisclose/#The_problem_with_using_for_float_comparison
149 |             # https://stackoverflow.com/questions/5595425/what-is-the-best-way-to-compare-floats-for-almost-equality-in-python
150 |             elif case.result.type.startswith("fp") and case.result.value:
151 |                 if math.isclose(result, case.result.value, rel_tol=1e-7):
152 |                     return SqlCaseResult.success()
153 |             else:
154 |                 if result == case.result.value:
155 |                     return SqlCaseResult.success()
156 |                 elif is_datetype(result) and datetype_value_equal(
157 |                     result, case.result.value
158 |                 ):
159 |                     return SqlCaseResult.success()
160 |                 else:
161 |                     return SqlCaseResult.mismatch(str(result))
162 |         except psycopg.Error as err:
163 |             return SqlCaseResult.error(str(err))
164 |         finally:
165 |             self.conn.rollback()
166 | 


--------------------------------------------------------------------------------
/bft/testers/postgres/tester.py:
--------------------------------------------------------------------------------
 1 | from bft.dialects.types import Dialect, DialectsLibrary
 2 | from bft.testers.base_tester import BaseTester
 3 | 
 4 | from .runner import PostgresRunner
 5 | 
 6 | 
 7 | class PostgresTester(BaseTester):
 8 |     def get_runner(self, dialect: Dialect):
 9 |         return PostgresRunner(dialect)
10 | 
11 |     def get_dialect(self, library: DialectsLibrary):
12 |         return library.get_dialect_by_name("postgres")
13 | 


--------------------------------------------------------------------------------
/bft/testers/snowflake/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/substrait-io/bft/34b7a6e19d536e42082169eae0d049ce94f86b19/bft/testers/snowflake/__init__.py


--------------------------------------------------------------------------------
/bft/testers/snowflake/config.yaml:
--------------------------------------------------------------------------------
1 | snowflake:
2 |   username: <username>
3 |   account: <account name>
4 |   warehouse: <warehouse name>
5 |   database: <database name>
6 |   schema: <schema name>
7 | 


--------------------------------------------------------------------------------
/bft/testers/snowflake/runner.py:
--------------------------------------------------------------------------------
  1 | import datetime
  2 | import math
  3 | import os
  4 | import yaml
  5 | from typing import Dict, NamedTuple
  6 | from cryptography.hazmat.primitives.serialization import load_der_private_key
  7 | from cryptography.hazmat.backends import default_backend
  8 | 
  9 | from snowflake.connector import connect
 10 | from snowflake.connector.errors import Error
 11 | 
 12 | from bft.cases.runner import SqlCaseResult, SqlCaseRunner
 13 | from bft.cases.types import Case
 14 | from bft.dialects.types import SqlMapping
 15 | from bft.utils.utils import type_to_dialect_type
 16 | 
 17 | type_map = {
 18 |     "fp64": "FLOAT",
 19 |     "boolean": "BOOLEAN",
 20 |     "string": "VARCHAR",
 21 |     "date": "DATE",
 22 |     "time": "TIME",
 23 |     "timestamp": "TIMESTAMP",
 24 |     "timestamp_tz": "TIMESTAMPTZ",
 25 |     "interval": "INTERVAL",
 26 |     "decimal": "DECIMAL",
 27 | }
 28 | 
 29 | 
 30 | def type_to_snowflake_type(type: str):
 31 |     return type_to_dialect_type(type, type_map)
 32 | 
 33 | 
 34 | def literal_to_str(lit: str | int | float):
 35 |     if lit is None:
 36 |         return "null"
 37 |     elif lit in [math.nan, "nan"]:
 38 |         return "'NaN'"
 39 |     elif lit in [float("inf"), "inf"]:
 40 |         return "'inf'"
 41 |     elif lit in [float("-inf"), "-inf"]:
 42 |         return "'-inf'"
 43 |     return str(lit)
 44 | 
 45 | 
 46 | def literal_to_float(lit: str | int | float):
 47 |     if lit in [float("inf"), "inf"]:
 48 |         return "TO_DOUBLE('inf'::float)"
 49 |     elif lit in [float("-inf"), "-inf"]:
 50 |         return "TO_DOUBLE('-inf'::float)"
 51 |     return lit
 52 | 
 53 | 
 54 | def is_float_type(arg):
 55 |     return arg.type in ["fp32", "fp64"]
 56 | 
 57 | 
 58 | def is_string_type(arg):
 59 |     return (
 60 |         arg.type in ["string", "timestamp", "timestamp_tz", "date", "time"]
 61 |         and arg.value is not None
 62 |     )
 63 | 
 64 | 
 65 | def is_datetype(arg):
 66 |     return type(arg) in [datetime.datetime, datetime.date, datetime.timedelta]
 67 | 
 68 | 
 69 | class SnowflakeRunner(SqlCaseRunner):
 70 |     def __init__(self, dialect):
 71 |         super().__init__(dialect)
 72 |         with open("testers/snowflake/config.yaml", "r") as file:
 73 |             config = yaml.safe_load(file)
 74 |             sf_config = config["snowflake"]
 75 |         print(f"Connecting to {sf_config['account']} as {sf_config['username']}")
 76 |         private_key_path = os.environ["SNOWSQL_PRIVATE_KEY_PATH"]
 77 |         with open(private_key_path, "rb") as f:
 78 |             private_key = f.read()
 79 | 
 80 |         self.conn = connect(
 81 |             user=sf_config["username"],
 82 |             private_key=private_key,
 83 |             account=sf_config["account"],
 84 |             database=sf_config["database"],
 85 |             schema=sf_config["schema"],
 86 |             warehouse=sf_config["warehouse"],
 87 |         )
 88 | 
 89 |     def run_sql_case(self, case: Case, mapping: SqlMapping) -> SqlCaseResult:
 90 | 
 91 |         try:
 92 |             print(f"Running testcase {case} {mapping}")
 93 |             cursor = self.conn.cursor()
 94 |             arg_defs = []
 95 |             for idx, arg in enumerate(case.args):
 96 |                 arg_type = type_to_snowflake_type(arg.type)
 97 |                 if arg_type is None:
 98 |                     return SqlCaseResult.unsupported(f"Unsupported type {arg.type}")
 99 |                 arg_defs.append(f"arg{idx} {arg_type}")
100 |             schema = ",".join(arg_defs)
101 |             cursor.execute(f"CREATE TABLE my_table({schema});")
102 |             cursor.execute(f"SET TimeZone='UTC';")
103 |             print(f"Running case: {case} create table my_table({schema});")
104 | 
105 |             arg_names = [f"arg{idx}" for idx in range(len(case.args))]
106 |             joined_arg_names = ",".join(arg_names)
107 |             arg_vals_list = list()
108 |             for arg in case.args:
109 |                 if is_string_type(arg):
110 |                     arg_vals_list.append("'" + literal_to_str(arg.value) + "'")
111 |                 else:
112 |                     arg_vals_list.append(literal_to_str(arg.value))
113 |             arg_vals = ", ".join(arg_vals_list)
114 |             if mapping.aggregate:
115 |                 arg_vals_list = list()
116 |                 for arg in case.args:
117 |                     arg_vals = ""
118 |                     for value in arg.value:
119 |                         if is_string_type(arg):
120 |                             if value:
121 |                                 arg_vals += f"('{literal_to_str(value)}'),"
122 |                             else:
123 |                                 arg_vals += f"({literal_to_str(value)}),"
124 |                         elif is_float_type(arg):
125 |                             if value:
126 |                                 arg_vals += f"({literal_to_float(value)}),"
127 |                             else:
128 |                                 arg_vals += f"({literal_to_str(value)}),"
129 |                         else:
130 |                             arg_vals += f"({literal_to_str(value)}),"
131 |                     arg_vals_list.append([arg_vals[:-1]])
132 |                 for arg_name, arg_vals in zip(arg_names, arg_vals_list):
133 |                     if len(arg_vals[0]):
134 |                         cursor.execute(
135 |                             f"INSERT INTO my_table ({arg_name}) VALUES {arg_vals[0]};"
136 |                         )
137 |             else:
138 |                 cursor.execute(
139 |                     f"INSERT INTO my_table ({joined_arg_names}) VALUES ({arg_vals});"
140 |                 )
141 | 
142 |             if mapping.infix:
143 |                 if len(arg_names) != 2:
144 |                     raise Exception(f"Infix function with {len(arg_names)} args")
145 |                 expr = f"SELECT {arg_names[0]} {mapping.local_name} {arg_names[1]} FROM my_table;"
146 |             elif mapping.postfix:
147 |                 if len(arg_names) != 1:
148 |                     raise Exception(f"Postfix function with {len(arg_names)} args")
149 |                 expr = f"SELECT {arg_names[0]} {mapping.local_name} FROM my_table;"
150 |             elif mapping.extract:
151 |                 if len(arg_names) != 2:
152 |                     raise Exception(f"Extract function with {len(arg_names)} args")
153 |                 expr = f"SELECT {mapping.local_name}({arg_vals_list[0]} FROM {arg_names[1]}) FROM my_table;"
154 |             elif mapping.local_name == "count(*)":
155 |                 expr = f"SELECT {mapping.local_name} FROM my_table;"
156 |             elif mapping.aggregate:
157 |                 if len(arg_names) < 1:
158 |                     raise Exception(f"Aggregate function with {len(arg_names)} args")
159 |                 expr = f"SELECT {mapping.local_name}({arg_names[0]}) FROM my_table;"
160 |             else:
161 |                 expr = f"SELECT {mapping.local_name}({joined_arg_names}) FROM my_table;"
162 |             result = cursor.execute(expr).fetchone()[0]
163 | 
164 |             if case.result == "undefined":
165 |                 return SqlCaseResult.success()
166 |             elif case.result == "error":
167 |                 return SqlCaseResult.unexpected_pass(str(result))
168 |             # Issues with python float comparison:
169 |             # https://tutorpython.com/python-mathisclose/#The_problem_with_using_for_float_comparison
170 |             # https://stackoverflow.com/questions/5595425/what-is-the-best-way-to-compare-floats-for-almost-equality-in-python
171 |             elif case.result.type.startswith("fp") and case.result.value and result:
172 |                 if math.isclose(result, case.result.value, rel_tol=1e-7):
173 |                     return SqlCaseResult.success()
174 |             else:
175 |                 if result == case.result.value:
176 |                     return SqlCaseResult.success()
177 |                 elif is_datetype(result) and str(result) == case.result.value:
178 |                     return SqlCaseResult.success()
179 |                 else:
180 |                     return SqlCaseResult.mismatch(str(result))
181 |         except Error as err:
182 |             return SqlCaseResult.error(str(err))
183 |         finally:
184 |             cursor.execute("DROP TABLE IF EXISTS my_table")
185 |             cursor.close()
186 | 


--------------------------------------------------------------------------------
/bft/testers/snowflake/tester.py:
--------------------------------------------------------------------------------
 1 | from bft.dialects.types import Dialect, DialectsLibrary
 2 | from bft.testers.base_tester import BaseTester
 3 | 
 4 | from .runner import SnowflakeRunner
 5 | 
 6 | 
 7 | class SnowflakeTester(BaseTester):
 8 |     def get_runner(self, dialect: Dialect):
 9 |         return SnowflakeRunner(dialect)
10 | 
11 |     def get_dialect(self, library: DialectsLibrary):
12 |         return library.get_dialect_by_name("snowflake")
13 | 


--------------------------------------------------------------------------------
/bft/testers/sqlite/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/substrait-io/bft/34b7a6e19d536e42082169eae0d049ce94f86b19/bft/testers/sqlite/__init__.py


--------------------------------------------------------------------------------
/bft/testers/sqlite/runner.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import sqlite3
  3 | from typing import Dict, NamedTuple
  4 | 
  5 | from bft.cases.runner import SqlCaseResult, SqlCaseRunner
  6 | from bft.cases.types import Case, CaseLiteral
  7 | from bft.dialects.types import SqlMapping
  8 | from bft.utils.utils import type_to_dialect_type
  9 | 
 10 | type_map = {
 11 |     "i8": "TINYINT",
 12 |     "i16": "SMALLINT",
 13 |     "i32": "INT",
 14 |     "i64": "HUGEINT",
 15 |     "fp32": "REAL",
 16 |     "fp64": "REAL",
 17 |     "boolean": "BOOLEAN",
 18 |     "string": "TEXT",
 19 | }
 20 | 
 21 | 
 22 | def type_to_sqlite_type(type: str):
 23 |     return type_to_dialect_type(type, type_map)
 24 | 
 25 | 
 26 | def literal_to_str(lit: str | int | float):
 27 |     if lit is None:
 28 |         return "null"
 29 |     elif lit in [float("inf"), "inf"]:
 30 |         return "9e999"
 31 |     elif lit in [float("-inf"), "-inf"]:
 32 |         return "-9e999"
 33 |     return str(lit)
 34 | 
 35 | 
 36 | def flatten(l: list):
 37 |     return [item for sublist in l for item in sublist]
 38 | 
 39 | 
 40 | def extract_argument_values(case: Case, mapping: SqlMapping):
 41 |     arg_vals_list = []
 42 |     for arg in case.args:
 43 |         arg_vals = []
 44 |         if arg.type == "string" and arg.value is not None:
 45 |             arg_vals.append("'" + literal_to_str(arg.value) + "'")
 46 |         elif mapping.aggregate:
 47 |             for value in arg.value:
 48 |                 arg_vals.append(literal_to_str(value))
 49 |         else:
 50 |             arg_vals.append(literal_to_str(arg.value))
 51 |         arg_vals_list.append(arg_vals)
 52 |     return arg_vals_list
 53 | 
 54 | 
 55 | class SqliteRunner(SqlCaseRunner):
 56 |     def __init__(self, dialect):
 57 |         super().__init__(dialect)
 58 |         self.conn = sqlite3.connect(":memory:")
 59 | 
 60 |     def run_sql_case(self, case: Case, mapping: SqlMapping) -> SqlCaseResult:
 61 |         self.conn.execute("BEGIN;")
 62 | 
 63 |         try:
 64 |             arg_defs = []
 65 |             for idx, arg in enumerate(case.args):
 66 |                 arg_type = type_to_sqlite_type(arg.type)
 67 |                 if arg_type is None:
 68 |                     return SqlCaseResult.unsupported(f"Unsupported type {arg.type}")
 69 |                 arg_defs.append(f"arg{idx} {arg_type}")
 70 |             schema = ",".join(arg_defs)
 71 |             self.conn.execute(f"CREATE TABLE my_table({schema});")
 72 | 
 73 |             arg_names = [f"arg{idx}" for idx in range(len(case.args))]
 74 | 
 75 |             joined_arg_names = ",".join(arg_names)
 76 |             arg_vals_list = extract_argument_values(case, mapping)
 77 |             arg_vals = ', '.join(flatten(arg_vals_list))
 78 | 
 79 |             if mapping.aggregate:
 80 |                 for arg_name, arg_vals in zip(arg_names, arg_vals_list):
 81 |                     str_arg_vals = ",".join(f"({val})" for val in arg_vals)
 82 |                     if arg_vals:
 83 |                         self.conn.execute(
 84 |                             f"INSERT INTO my_table ({arg_name}) VALUES {str_arg_vals};"
 85 |                         )
 86 |             else:
 87 |                 self.conn.execute(
 88 |                     f"INSERT INTO my_table ({joined_arg_names}) VALUES ({arg_vals});"
 89 |                 )
 90 | 
 91 |             if mapping.infix:
 92 |                 if len(arg_names) != 2:
 93 |                     raise Exception(f"Infix function with {len(arg_names)} args")
 94 |                 expr = f"SELECT {arg_names[0]} {mapping.local_name} {arg_names[1]} FROM my_table;"
 95 |             elif mapping.postfix:
 96 |                 if len(arg_names) != 1:
 97 |                     raise Exception(f"Postfix function with {len(arg_names)} args")
 98 |                 expr = f"SELECT {arg_names[0]} {mapping.local_name} FROM my_table;"
 99 |             elif mapping.between:
100 |                 if len(arg_names) != 3:
101 |                     raise Exception(f"Between function with {len(arg_names)} args")
102 |                 expr = f"SELECT {arg_names[0]} BETWEEN {arg_names[1]} AND {arg_names[2]} FROM my_table;"
103 |             elif mapping.local_name == 'count(*)':
104 |                 expr = f"SELECT {mapping.local_name} FROM my_table;"
105 |             elif mapping.aggregate:
106 |                 if len(arg_names) < 1:
107 |                     raise Exception(f"Aggregate function with {len(arg_names)} args")
108 |                 expr = f"SELECT {mapping.local_name}({arg_names[0]}) FROM my_table;"
109 |             else:
110 |                 expr = f"SELECT {mapping.local_name}({joined_arg_names}) FROM my_table;"
111 |             result = self.conn.execute(expr).fetchone()[0]
112 | 
113 |             if case.result == "undefined":
114 |                 return SqlCaseResult.success()
115 |             elif case.result == "error":
116 |                 return SqlCaseResult.unexpected_pass(str(result))
117 |             elif case.result == "nan":
118 |                 return SqlCaseResult.error(str(result))
119 |             # Issues with python float comparison:
120 |             # https://tutorpython.com/python-mathisclose/#The_problem_with_using_for_float_comparison
121 |             # https://stackoverflow.com/questions/5595425/what-is-the-best-way-to-compare-floats-for-almost-equality-in-python
122 |             elif case.result.type.startswith("fp") and case.result.value and result:
123 |                 if math.isclose(result, case.result.value, rel_tol=1e-7):
124 |                     return SqlCaseResult.success()
125 |             else:
126 |                 if result == case.result.value:
127 |                     return SqlCaseResult.success()
128 |                 else:
129 |                     return SqlCaseResult.mismatch(str(result))
130 |         except sqlite3.Error as err:
131 |             return SqlCaseResult.error(str(err))
132 |         finally:
133 |             self.conn.rollback()
134 | 


--------------------------------------------------------------------------------
/bft/testers/sqlite/tester.py:
--------------------------------------------------------------------------------
 1 | from bft.dialects.types import Dialect, DialectsLibrary
 2 | from bft.testers.base_tester import BaseTester
 3 | 
 4 | from .runner import SqliteRunner
 5 | 
 6 | 
 7 | class SqliteTester(BaseTester):
 8 |     def get_runner(self, dialect: Dialect):
 9 |         return SqliteRunner(dialect)
10 | 
11 |     def get_dialect(self, library: DialectsLibrary):
12 |         return library.get_dialect_by_name("sqlite")
13 | 


--------------------------------------------------------------------------------
/bft/testers/velox/runner.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | 
 3 | import pyvelox.pyvelox as pv
 4 | 
 5 | from bft.cases.runner import Case, SqlCaseResult, SqlCaseRunner, SqlMapping
 6 | from bft.dialects.types import Dialect
 7 | 
 8 | 
 9 | def is_type_supported(type):
10 |     return type in set({"i64", "fp64", "boolean", "string"})
11 | 
12 | 
13 | class VeloxRunner(SqlCaseRunner):
14 |     def __init__(self, dialect: Dialect):
15 |         super().__init__(dialect)
16 | 
17 |     def run_sql_case(self, case: Case, mapping: SqlMapping) -> SqlCaseResult:
18 |         arg_vectors = []
19 |         arg_names = []
20 |         for arg_idx, arg in enumerate(case.args):
21 |             if not is_type_supported(arg.type):
22 |                 return SqlCaseResult.unsupported(
23 |                     f"The type {arg.type} is not supported"
24 |                 )
25 |             arg_vectors.append(pv.from_list([arg.value]))
26 |             arg_names.append(f"arg{arg_idx}")
27 |         if mapping.infix:
28 |             if len(case.args) != 2:
29 |                 raise Exception(f"Infix function with {len(case.args)} args")
30 |             expr_str = f"arg0 {mapping.local_name} arg1"
31 |         elif mapping.postfix:
32 |             if len(arg_names) != 1:
33 |                 raise Exception(f"Postfix function with {len(arg_names)} args")
34 |             expr_str = f"arg0 {mapping.local_name}"
35 |         elif mapping.between:
36 |             if len(arg_names) != 3:
37 |                 raise Exception(f"between function with {len(arg_names)} args")
38 |             expr_str = f"arg0 {mapping.local_name} arg1 and arg2"
39 |         else:
40 |             joined_args = ", ".join(arg_names)
41 |             expr_str = f"{mapping.local_name}({joined_args})"
42 | 
43 |         try:
44 |             expr = pv.Expression.from_string(expr_str)
45 |             answer = expr.evaluate(arg_names, arg_vectors)
46 |             result = [v for v in answer]
47 |         except RuntimeError as err:
48 |             return SqlCaseResult.error(str(err))
49 | 
50 |         if len(result) != 1:
51 |             raise Exception("Scalar function with one row output more than one row")
52 |         result = result[0]
53 | 
54 |         if case.result == "undefined":
55 |             return SqlCaseResult.success()
56 |         elif case.result == "error":
57 |             return SqlCaseResult.unexpected_pass(str(result))
58 |         elif case.result == "nan":
59 |             if math.isnan(result):
60 |                 return SqlCaseResult.success()
61 |         else:
62 |             if result == case.result.value:
63 |                 return SqlCaseResult.success()
64 |             else:
65 |                 return SqlCaseResult.mismatch(str(result))
66 | 


--------------------------------------------------------------------------------
/bft/testers/velox/tester.py:
--------------------------------------------------------------------------------
 1 | from bft.cases.runner import CaseRunner
 2 | from bft.dialects.types import Dialect, DialectsLibrary
 3 | from bft.testers.base_tester import BaseTester
 4 | from bft.testers.velox.runner import VeloxRunner
 5 | 
 6 | 
 7 | class VeloxTester(BaseTester):
 8 |     def get_runner(self, dialect: Dialect) -> CaseRunner:
 9 |         return VeloxRunner(dialect)
10 | 
11 |     def get_dialect(self, library: DialectsLibrary) -> Dialect:
12 |         return library.get_dialect_by_name("velox_presto")
13 | 


--------------------------------------------------------------------------------
/bft/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/substrait-io/bft/34b7a6e19d536e42082169eae0d049ce94f86b19/bft/tests/__init__.py


--------------------------------------------------------------------------------
/bft/tests/base.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | from typing import List
 3 | 
 4 | import pytest
 5 | 
 6 | from bft.cases.parser import CaseFileParser
 7 | from bft.cases.types import Case
 8 | from bft.testers.base_tester import BaseTester
 9 | from tools.convert_testcases.convert_testcases_to_yaml_format import (
10 |     convert_directory as convert_directory_from_substrait,
11 | )
12 | 
13 | 
14 | # Would be nice to have this as a session-scoped fixture but it doesn't seem that
15 | # parameter values can be a fixture
16 | def cases() -> List[Case]:
17 |     cases = []
18 |     bft_dir = Path(__file__).parent.parent.parent
19 |     parser = CaseFileParser()
20 |     cases_dir = bft_dir / "cases"
21 |     substrait_cases_dir = bft_dir / "substrait" / "tests" / "cases"
22 |     convert_directory_from_substrait(substrait_cases_dir, cases_dir)
23 |     for case_path in cases_dir.resolve().rglob("*.yaml"):
24 |         with open(case_path, "rb") as case_f:
25 |             for case_file in parser.parse(case_f):
26 |                 for case in case_file.cases:
27 |                     case = transform_case(case)
28 |                     cases.append(case)
29 |     return cases
30 | 
31 | 
32 | def transform_case(case):
33 |     # Create a new Case instance with updated `args`
34 |     return Case(
35 |         function=case.function,
36 |         base_uri=case.base_uri,
37 |         group=case.group,
38 |         args=case.args,  # Update args here
39 |         result=case.result,
40 |         options=case.options,
41 |     )
42 | 
43 | 
44 | def case_id_fn(case: Case):
45 |     return f"{case.function}_{case.group.id}_{case.group.index}"
46 | 
47 | 
48 | def run_test(case: Case, tester: BaseTester):
49 |     if tester.runner.__class__.__name__ == "VeloxRunner":
50 |         for case_literal in case.args:
51 |             if case_literal.value is None:
52 |                 pytest.skip("Skipping. Pyvelox does not support null input")
53 |     if tester.runner.__class__.__name__ == "PostgresRunner":
54 |         if type(case.result) != str and "inf" in str(case.result[0]):
55 |             pytest.skip(
56 |                 "Skipping. Postgres errors out when dealing with infinite addition"
57 |             )
58 |     result = tester.run_test(case)
59 |     if result.passed:
60 |         if not result.should_have_passed:
61 |             pytest.fail(f"Unexpected pass: {result.reason}")
62 |         else:
63 |             assert result.passed
64 |     else:
65 |         if result.should_have_passed:
66 |             pytest.fail(f"Unexpected fail: {result.reason}")
67 |         else:
68 |             pytest.xfail(result.reason)
69 | 


--------------------------------------------------------------------------------
/bft/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | from typing import List
 3 | 
 4 | import pytest
 5 | 
 6 | from bft.dialects.loader import load_dialects
 7 | from bft.dialects.types import DialectsLibrary
 8 | 
 9 | 
10 | @pytest.fixture(scope="session")
11 | def dialects() -> DialectsLibrary:
12 |     dialects_dir = Path(__file__) / ".." / ".." / ".." / "dialects"
13 |     return load_dialects(str(dialects_dir.resolve()))
14 | 


--------------------------------------------------------------------------------
/bft/tests/test_cudf.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from bft.testers.cudf.tester import CudfTester
 4 | 
 5 | from .base import cases, run_test
 6 | 
 7 | 
 8 | @pytest.fixture(scope="module")
 9 | def tester(dialects):
10 |     instance = CudfTester()
11 |     instance.prepare(dialects)
12 |     return instance
13 | 
14 | 
15 | @pytest.mark.parametrize("case", cases())
16 | def test_functions(case, tester):
17 |     run_test(case, tester)
18 | 


--------------------------------------------------------------------------------
/bft/tests/test_datafusion.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from bft.testers.datafusion.tester import DatafustionTester
 4 | 
 5 | from .base import cases, run_test
 6 | 
 7 | 
 8 | @pytest.fixture(scope="module")
 9 | def tester(dialects):
10 |     instance = DatafustionTester()
11 |     instance.prepare(dialects)
12 |     return instance
13 | 
14 | 
15 | @pytest.mark.parametrize("case", cases())
16 | def test_functions(case, tester):
17 |     run_test(case, tester)
18 | 


--------------------------------------------------------------------------------
/bft/tests/test_duckdb.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from bft.testers.duckdb.tester import DuckDBTester
 4 | 
 5 | from .base import cases, run_test
 6 | 
 7 | 
 8 | @pytest.fixture(scope="module")
 9 | def tester(dialects):
10 |     instance = DuckDBTester()
11 |     instance.prepare(dialects)
12 |     return instance
13 | 
14 | 
15 | @pytest.mark.parametrize("case", cases())
16 | def test_functions(case, tester):
17 |     run_test(case, tester)
18 | 


--------------------------------------------------------------------------------
/bft/tests/test_postgres.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from bft.testers.postgres.tester import PostgresTester
 4 | 
 5 | from .base import cases, run_test
 6 | 
 7 | 
 8 | @pytest.fixture(scope="module")
 9 | def tester(dialects):
10 |     instance = PostgresTester()
11 |     instance.prepare(dialects)
12 |     return instance
13 | 
14 | 
15 | @pytest.mark.parametrize("case", cases())
16 | def test_functions(case, tester):
17 |     run_test(case, tester)
18 | 


--------------------------------------------------------------------------------
/bft/tests/test_pyvelox.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from bft.testers.velox.tester import VeloxTester
 4 | 
 5 | from .base import cases, run_test
 6 | 
 7 | 
 8 | @pytest.fixture(scope="module")
 9 | def tester(dialects):
10 |     instance = VeloxTester()
11 |     instance.prepare(dialects)
12 |     return instance
13 | 
14 | 
15 | @pytest.mark.parametrize("case", cases())
16 | def test_functions(case, tester):
17 |     run_test(case, tester)
18 | 


--------------------------------------------------------------------------------
/bft/tests/test_snowflake.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from bft.testers.snowflake.tester import SnowflakeTester
 4 | 
 5 | from .base import cases, run_test
 6 | 
 7 | 
 8 | @pytest.fixture(scope="module")
 9 | def tester(dialects):
10 |     instance = SnowflakeTester()
11 |     instance.prepare(dialects)
12 |     return instance
13 | 
14 | 
15 | @pytest.mark.parametrize("case", cases())
16 | def test_functions(case, tester):
17 |     run_test(case, tester)
18 | 


--------------------------------------------------------------------------------
/bft/tests/test_sqlite.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from bft.testers.sqlite.tester import SqliteTester
 4 | 
 5 | from .base import cases, run_test
 6 | 
 7 | 
 8 | @pytest.fixture(scope="module")
 9 | def tester(dialects):
10 |     instance = SqliteTester()
11 |     instance.prepare(dialects)
12 |     return instance
13 | 
14 | 
15 | @pytest.mark.parametrize("case", cases())
16 | def test_functions(case, tester):
17 |     run_test(case, tester)
18 | 


--------------------------------------------------------------------------------
/bft/utils/utils.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict
 2 | import datetime
 3 | 
 4 | 
 5 | def type_to_dialect_type(type: str, type_map: Dict[str, str])->str:
 6 |     """
 7 |     Convert a substrait type to a dialect type
 8 | 
 9 |     :param type: substrait name of base type (i.e. without parameters)
10 |     :param type_map:map of substrait type to dialect base type (i.e. without parameters)
11 |     :return:dialect type
12 | 
13 |     e.g. type_map: {"interval": "INTERVAL", "decimal": "NUMERIC"}
14 |         input type: "decimal<37, 3>",  -> output: "NUMERIC(37, 3)"
15 |     e.g. input type: "interval", output: "INTERVAL"
16 | 
17 |     in above example "decimal" or "interval" are referred as base type whereas decimal<37, 3> is parameterized type
18 | 
19 |     """
20 |     type_to_check = type.split("<")[0].strip() if "<" in type else type
21 |     if type_to_check not in type_map:
22 |         return None
23 |     type_val = type_map[type_to_check]
24 |     if not "<" in type:
25 |         return type_val
26 |     # transform parameterized type name to have dialect type
27 |     return type.replace(type_to_check, type_val).replace("<", "(").replace(">", ")")
28 | 
29 | def has_only_date(value: datetime.datetime):
30 |     if (
31 |         value.hour == 0
32 |         and value.minute == 0
33 |         and value.second == 0
34 |         and value.microsecond == 0
35 |     ):
36 |         return True
37 |     return False
38 | 
39 | def datetype_value_equal(result, case_result):
40 |     if str(result) == case_result:
41 |         return True
42 |     if (
43 |         isinstance(result, datetime.datetime)
44 |         and has_only_date(result)
45 |         and str(result.date()) == case_result
46 |     ):
47 |         return True
48 |     return False
49 | 


--------------------------------------------------------------------------------
/build_site.py:
--------------------------------------------------------------------------------
 1 | import shutil
 2 | from pathlib import Path
 3 | 
 4 | from bft.html.builder import build_site
 5 | 
 6 | 
 7 | def copy_with_progress(src, dst, copy_function=shutil.copy2):
 8 |     for source_path in Path(src).rglob('*'):
 9 |         relative_path = source_path.relative_to(src)
10 |         destination_path = dst / relative_path
11 | 
12 |         if source_path.is_file():
13 |             destination_path.parent.mkdir(parents=True, exist_ok=True)
14 |             copy_function(source_path, destination_path)
15 |             print(f"Copying: {source_path} -> {destination_path}")
16 | 
17 | root = Path(__file__).parent.resolve()
18 | index = root / "index.yaml"
19 | dest = root / "dist"
20 | 
21 | # Remove the destination directory if it exists
22 | if dest.exists():
23 |     shutil.rmtree(dest)
24 | 
25 | # Create the destination directory
26 | dest.mkdir()
27 | 
28 | build_site(index, dest)
29 | 
30 | static_content_dir = root / "static_site"
31 | 
32 | # Use the custom copy_with_progress function
33 | copy_with_progress(static_content_dir, dest)
34 | 
35 | print("Copying static files completed.")
36 | 


--------------------------------------------------------------------------------
/ci/docker/base-tester.Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM alpine:3.18
 2 | ARG PIP_PACKAGES
 3 | 
 4 | ENV PYTHONUNBUFFERED=1
 5 | ENV PYTHONPATH=/bft/substrait
 6 | RUN apk add --update --no-cache python3 && ln -sf python3 /usr/bin/python
 7 | RUN python3 -m ensurepip
 8 | RUN echo "PIP_PACKAGES is $PIP_PACKAGES"
 9 | RUN pip3 install --no-cache --upgrade pip setuptools pytest pyyaml mistletoe $PIP_PACKAGES ruamel.yaml antlr4-python3-runtime pytz
10 | 
11 | WORKDIR /bft
12 | COPY . .
13 | 
14 | CMD /usr/bin/python -mpytest bft/tests/test_sqlite.py
15 | 


--------------------------------------------------------------------------------
/ci/docker/datafusion.Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:22.04
 2 | 
 3 | ENV PYTHONUNBUFFERED=1
 4 | ENV PYTHONPATH=/bft/substrait
 5 | RUN apt-get update && apt-get install -y python3.10 && ln -sf python3 /usr/bin/python
 6 | RUN apt install -y pip
 7 | RUN pip install --upgrade pip setuptools pytest pyyaml mistletoe datafusion ruamel.yaml antlr4-python3-runtime pytz numpy
 8 | 
 9 | WORKDIR /bft
10 | COPY . .
11 | 
12 | CMD /usr/bin/python -mpytest bft/tests/test_datafusion.py
13 | 


--------------------------------------------------------------------------------
/ci/docker/duckdb.Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM alpine:3.18
 2 | 
 3 | ENV PYTHONUNBUFFERED=1
 4 | ENV PYTHONPATH=/bft/substrait
 5 | RUN apk add --update --no-cache python3 && ln -sf python3 /usr/bin/python
 6 | RUN python3 -m ensurepip
 7 | RUN pip3 install --no-cache --upgrade pip setuptools pytest pyyaml mistletoe duckdb ruamel.yaml antlr4-python3-runtime pytz
 8 | 
 9 | WORKDIR /bft
10 | COPY . .
11 | 
12 | CMD /usr/bin/python -mpytest bft/tests/test_duckdb.py
13 | 


--------------------------------------------------------------------------------
/ci/docker/postgres-compose.yaml:
--------------------------------------------------------------------------------
 1 | services:
 2 |   app:
 3 |     image: bft/tester
 4 |     build:
 5 |       context: ../..
 6 |       dockerfile: ./ci/docker/base-tester.Dockerfile
 7 |       args:
 8 |         PIP_PACKAGES: psycopg[binary]
 9 |     command: /usr/bin/python -mpytest bft/tests/test_postgres.py
10 |     depends_on:
11 |       postgres:
12 |         condition: service_healthy
13 |     environment:
14 |       POSTGRES_HOST: postgres
15 |       POSTGRES_USER: postgres
16 |       POSTGRES_PASSWORD: postgres
17 |       POSTGRES_DB: bft
18 | 
19 |   postgres:
20 |     image: postgres:15-alpine
21 |     environment:
22 |       POSTGRES_DB: bft
23 |       POSTGRES_PASSWORD: postgres
24 |     healthcheck:
25 |       test: ["CMD-SHELL", "pg_isready -U postgres"]
26 |       interval: 5s
27 |       timeout: 5s
28 |       retries: 5
29 | 


--------------------------------------------------------------------------------
/ci/docker/postgres-server.Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM postgres:15-alpine
 2 | ENV POSTGRES_DB=bft
 3 | ENV POSTGRES_PASSWORD=postgres
 4 | 
 5 | ENV PYTHONUNBUFFERED=1
 6 | ENV PYTHONPATH=/bft/substrait
 7 | RUN apk add --update --no-cache python3 && ln -sf python3 /usr/bin/python
 8 | RUN python3 -m ensurepip
 9 | RUN pip3 install --no-cache --upgrade pip setuptools pytest pyyaml mistletoe psycopg[binary] ruamel.yaml antlr4-python3-runtime pytz
10 | 
11 | WORKDIR /bft
12 | COPY . .
13 | 
14 | CMD /usr/bin/python -mpytest bft/tests/test_postgres.py
15 | 


--------------------------------------------------------------------------------
/ci/docker/sqlite.Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM alpine:3.18
 2 | 
 3 | ENV PYTHONUNBUFFERED=1
 4 | ENV PYTHONPATH=/bft/substrait
 5 | RUN apk add --update --no-cache python3 && ln -sf python3 /usr/bin/python
 6 | RUN python3 -m ensurepip
 7 | RUN pip3 install --no-cache --upgrade pip setuptools pytest pyyaml mistletoe ruamel.yaml antlr4-python3-runtime pytz
 8 | 
 9 | WORKDIR /bft
10 | COPY . .
11 | 
12 | # CMD to run all commands and display the results
13 | CMD /usr/bin/python -mpytest bft/tests/test_sqlite.py
14 | 


--------------------------------------------------------------------------------
/ci/docker/velox-compose.yaml:
--------------------------------------------------------------------------------
 1 | services:
 2 |   app:
 3 |     image: bft/tester
 4 |     build:
 5 |       context: ../..
 6 |       dockerfile: ./ci/docker/velox.Dockerfile
 7 |       args:
 8 |         PIP_PACKAGES: pyvelox
 9 |     command: /usr/bin/python3 -mpytest bft/tests/test_pyvelox.py
10 | 


--------------------------------------------------------------------------------
/ci/docker/velox.Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:22.04
 2 | 
 3 | ENV PYTHONUNBUFFERED=1
 4 | ENV PYTHONPATH=/bft/substrait
 5 | RUN apt-get update && apt-get install -y \
 6 |     python3 \
 7 |     python3-pip
 8 | RUN pip3 install --no-cache --upgrade pip setuptools pytest pyyaml mistletoe pyvelox ruamel.yaml antlr4-python3-runtime pytz
 9 | 
10 | WORKDIR /bft
11 | COPY . .
12 | 
13 | CMD /usr/bin/python -mpytest bft/tests/test_pyvelox.py
14 | 


--------------------------------------------------------------------------------
/index.yaml:
--------------------------------------------------------------------------------
 1 | substrait:
 2 |   extensions:
 3 |     - location: ./substrait/extensions/functions_aggregate_approx.yaml
 4 |       canonical: https://github.com/substrait-io/substrait/blob/main/extensions/functions_aggregate_approx.yaml
 5 |     - location: ./substrait/extensions/functions_aggregate_generic.yaml
 6 |       canonical: https://github.com/substrait-io/substrait/blob/main/extensions/functions_aggregate_generic.yaml
 7 |     - location: ./substrait/extensions/functions_arithmetic.yaml
 8 |       canonical: https://github.com/substrait-io/substrait/blob/main/extensions/functions_arithmetic.yaml
 9 |     - location: ./substrait/extensions/functions_boolean.yaml
10 |       canonical: https://github.com/substrait-io/substrait/blob/main/extensions/functions_boolean.yaml
11 |     - location: ./substrait/extensions/functions_comparison.yaml
12 |       canonical: https://github.com/substrait-io/substrait/blob/main/extensions/functions_comparison.yaml
13 |     - location: ./substrait/extensions/functions_datetime.yaml
14 |       canonical: https://github.com/substrait-io/substrait/blob/main/extensions/functions_datetime.yaml
15 |     - location: ./substrait/extensions/functions_logarithmic.yaml
16 |       canonical: https://github.com/substrait-io/substrait/blob/main/extensions/functions_logarithmic.yaml
17 |     - location: ./substrait/extensions/functions_rounding.yaml
18 |       canonical: https://github.com/substrait-io/substrait/blob/main/extensions/functions_rounding.yaml
19 |     - location: ./substrait/extensions/functions_string.yaml
20 |       canonical: https://github.com/substrait-io/substrait/blob/main/extensions/functions_string.yaml
21 | cases:
22 |   - ./cases
23 | dialects:
24 |   - ./dialects
25 | supplements:
26 |   - ./supplemental
27 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | datafusion
 2 | duckdb
 3 | jinja2
 4 | mistletoe
 5 | pytest
 6 | pyvelox
 7 | pyyaml
 8 | snowflake
 9 | ruamel.yaml
10 | deepdiff
11 | pytz
12 | 


--------------------------------------------------------------------------------
/static_site/android-chrome-192x192.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/substrait-io/bft/34b7a6e19d536e42082169eae0d049ce94f86b19/static_site/android-chrome-192x192.png


--------------------------------------------------------------------------------
/static_site/android-chrome-512x512.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/substrait-io/bft/34b7a6e19d536e42082169eae0d049ce94f86b19/static_site/android-chrome-512x512.png


--------------------------------------------------------------------------------
/static_site/apple-touch-icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/substrait-io/bft/34b7a6e19d536e42082169eae0d049ce94f86b19/static_site/apple-touch-icon.png


--------------------------------------------------------------------------------
/static_site/assets/index/script.js:
--------------------------------------------------------------------------------
  1 | function showContent(sectionId) {
  2 |     // Hide all sections
  3 |     var sections = document.querySelectorAll('.content-container article');
  4 |     sections.forEach(function (section) {
  5 |         section.style.display = 'none';
  6 |     });
  7 | 
  8 |     // Show the selected section
  9 |     var selectedSection = document.getElementById(sectionId);
 10 |     if (selectedSection) {
 11 |         selectedSection.style.display = 'block';
 12 |     }
 13 | 
 14 |     // Hide the search results section
 15 |     var searchResultsSection = document.getElementById('searchResultsSection');
 16 |     if (searchResultsSection) {
 17 |         searchResultsSection.style.display = 'none';
 18 |     }
 19 | }
 20 | 
 21 | 
 22 | function searchFunctions() {
 23 |     try {
 24 |         const searchTerm = document.getElementById("searchInput").value.toLowerCase();
 25 |         let searchResults = [];
 26 | 
 27 |         for (let i = 0; i < functionsData.length; i++) {
 28 |             const functionName = functionsData[i].name.toLowerCase().replace(/^.*?_/, '');
 29 |             const functionBrief = JSON.parse('"' + functionsData[i].brief + '"').toLowerCase();
 30 | 
 31 |             /*  Perform search based on priority
 32 |                 Highest priority (Rank 3) - Exact name match
 33 |                 Rank 2 - Partial name match
 34 |                 Rank 1 - Exact match in brief contents
 35 |                 Rank 0 - Partial match in brief contents
 36 | 
 37 |             */
 38 |             const exactNameMatch = functionName === searchTerm;
 39 |             const partialNameMatch = !exactNameMatch && functionName.includes(searchTerm);
 40 |             const exactBriefMatch = ` ${functionBrief} `.includes(` ${searchTerm} `);
 41 |             const partialBriefMatch = !exactBriefMatch && functionBrief.includes(searchTerm);
 42 | 
 43 |             if (exactNameMatch || partialNameMatch || exactBriefMatch || partialBriefMatch) {
 44 | 
 45 |                 let rank = 0;
 46 |                 if (exactNameMatch) rank = 3;
 47 |                 else if (partialNameMatch) rank = 2;
 48 |                 else if (exactBriefMatch) rank = 1;
 49 | 
 50 |                 searchResults.push({
 51 |                     category: functionsData[i].category,
 52 |                     name: functionsData[i].name,
 53 |                     brief: functionsData[i].brief,
 54 |                     rank: rank
 55 |                 });
 56 |             }
 57 |         }
 58 | 
 59 |         // Sort search results by rank in descending order
 60 |         searchResults.sort((a, b) => b.rank - a.rank);
 61 | 
 62 |         displaySearchResults(searchResults);
 63 |     } catch (error) {
 64 |         console.error("Error while searching functions:", error);
 65 |     }
 66 | }
 67 | 
 68 | 
 69 | 
 70 | function displaySearchResults(results) {
 71 |     try {
 72 |         let homeSection = document.getElementById("home");
 73 |         let searchResultsSection = document.getElementById("searchResultsSection");
 74 | 
 75 |         // Hide home section and show search results section
 76 |         homeSection.style.display = "none";
 77 |         searchResultsSection.style.display = "block";
 78 | 
 79 |         let categoryTitleElement = document.createElement("h2");
 80 |         categoryTitleElement.className = "category-title";
 81 |         categoryTitleElement.innerHTML = "Search results";
 82 | 
 83 |         searchResultsSection.innerHTML = "";
 84 |         searchResultsSection.appendChild(categoryTitleElement);
 85 | 
 86 |         // Display search results in the section
 87 |         for (var i = 0; i < results.length; i++) {
 88 |             var cardLink = document.createElement("a");
 89 |             cardLink.className = "card mb-3 search-result-card";
 90 |             cardLink.href = "./" + results[i].name.toLowerCase() + ".html";
 91 |             cardLink.onclick = function () {
 92 |                 window.location.href = cardLink.href;
 93 |             };
 94 | 
 95 |             var cardBody = document.createElement("div");
 96 |             cardBody.className = "card-body";
 97 | 
 98 |             var cardTitle = document.createElement("h5");
 99 |             cardTitle.className = "card-title search-result-title";
100 |             cardTitle.innerHTML = results[i].category + " Functions";
101 | 
102 |             var cardText = document.createElement("p");
103 |             cardText.className = "card-text search-result-text";
104 |             cardText.innerHTML = "<span style='font-weight: bold;'>" + results[i].name.replace(/^.*?_/, '') + "</span>: " + results[i].brief;
105 | 
106 |             cardBody.appendChild(cardTitle);
107 |             cardBody.appendChild(cardText);
108 |             cardLink.appendChild(cardBody);
109 | 
110 |             searchResultsSection.appendChild(cardLink);
111 |         }
112 |     } catch (error) {
113 |         console.error("Error in displaying search results:", error);
114 |     }
115 | }
116 | 


--------------------------------------------------------------------------------
/static_site/assets/index/style.css:
--------------------------------------------------------------------------------
  1 | body {
  2 |     font-family: 'Courier New', Courier, monospace;
  3 |     color: #005050;
  4 |     background-color: #f8f9fa;
  5 | }
  6 | 
  7 | .menu-container {
  8 |     position: fixed;
  9 |     top: 3%;
 10 |     left: 0%;
 11 |     height: 100vh;
 12 |     padding-left: 1%;
 13 |     padding-top: 2%;
 14 |     background-color: #005050;
 15 |     color: #ffffff;
 16 |     border-right: 1px solid #dee2e6;
 17 |     overflow-y: auto;
 18 |     z-index: 1000;
 19 |     width: max-content;
 20 | }
 21 | 
 22 | .menu-item {
 23 |     display: block;
 24 |     padding: 8px 16px;
 25 |     margin-bottom: 10px;
 26 |     cursor: pointer;
 27 |     transition: background-color 0.3s;
 28 |     border: 1px solid transparent;
 29 |     color: #ffffff;
 30 | }
 31 | 
 32 | .menu-item:hover {
 33 |     background-color: #007e7e;
 34 |     border: 1px solid #dee2e6;
 35 |     color: #ffffff;
 36 | }
 37 | 
 38 | .content-container {
 39 |     margin-left: 18%;
 40 |     margin-top: 11%;
 41 |     background-color: #f8f9fa;
 42 | }
 43 | 
 44 | .fixed-container {
 45 |     position: fixed;
 46 |     background-color: #f8f9fa;
 47 |     z-index: 1000;
 48 |     width: 72%;
 49 |     margin-top: -7%;
 50 |     padding-top: 2%;
 51 |     margin-bottom: 0%;
 52 | }
 53 | 
 54 | .hidden {
 55 |     display: none;
 56 | }
 57 | 
 58 | .category-title {
 59 |     border-bottom: 1px solid #dee2e6;
 60 |     padding-bottom: 10px;
 61 |     margin-bottom: 20px;
 62 | }
 63 | 
 64 | .nav-link {
 65 |     color: #2a2a2a;
 66 | }
 67 | 
 68 | .nav-link:hover {
 69 |     color: #007bff;
 70 | }
 71 | 
 72 | .navbar {
 73 |     position: fixed;
 74 |     z-index: 2000;
 75 |     width: 100%;
 76 | }
 77 | 
 78 | .card {
 79 |     transition: transform 0.2s;
 80 | }
 81 | 
 82 | .card:hover {
 83 |     transform: scale(1.05);
 84 | }
 85 | 
 86 | 
 87 | .functions-table {
 88 |     border-collapse: collapse;
 89 |     width: 100%;
 90 | }
 91 | 
 92 | .functions-table td {
 93 |     padding: 0.5rem;
 94 | }
 95 | 
 96 | .functions-table .title-column {
 97 |     width: 30%;
 98 |     color: #201D38;
 99 |     font-weight: bold;
100 | }
101 | 
102 | .functions-table tr:hover {
103 |     background-color: #005050;
104 |     color: #ffffff;
105 | }
106 | 
107 | .functions-table tr:hover .title-column {
108 |     color: #ffffff;
109 | }
110 | 
111 | .search-result-title {
112 |     color: #005050;
113 |     text-decoration: none;
114 | }
115 | 
116 | .search-result-text {
117 |     color: black;
118 | }
119 | 


--------------------------------------------------------------------------------
/static_site/assets/supplementary/script.js:
--------------------------------------------------------------------------------
 1 | (function () {
 2 | 
 3 |     var dialectInfo = {};
 4 | 
 5 |     function updateDialect(newValue) {
 6 |         const desiredDialect = `dialect-${newValue}`;
 7 |         const caseInfo = dialectInfo[newValue].examples;
 8 |         const dialectDefinitions = document.querySelectorAll('.dialect-definition');
 9 |         for (var dialectDefinition of dialectDefinitions) {
10 |             if (dialectDefinition.id == desiredDialect) {
11 |                 dialectDefinition.removeAttribute('hidden');
12 |             } else {
13 |                 dialectDefinition.setAttribute('hidden', '');
14 |             }
15 |         }
16 | 
17 |         const cases = document.querySelectorAll('.bft-case');
18 |         const errMessages = document.querySelectorAll('.bft-case-err-message');
19 | 
20 |         for (let i = 0; i < caseInfo.length; i++) {
21 |             const case_msg = caseInfo[i];
22 |             if (case_msg == null) {
23 |                 cases[i].classList.remove("bft-error-case");
24 |                 errMessages[i].setAttribute("hidden", "");
25 |             } else {
26 |                 cases[i].classList.add("bft-error-case");
27 |                 errMessages[i].removeAttribute("hidden");
28 |                 errMessages[i].querySelector("td").innerText = case_msg;
29 |             }
30 |         }
31 | 
32 |         const kernelInfo = dialectInfo[newValue].kernels;
33 |         const kernelItems = document.querySelectorAll('.bft-kernel');
34 |         for (let i = 0; i < kernelInfo.length; i++) {
35 |             const kernelSpans = kernelItems[i].querySelectorAll('span');
36 |             if (kernelInfo[i]) {
37 |                 kernelSpans[0].classList.remove('bft-unsupported-kernel');
38 |                 kernelSpans[1].setAttribute('hidden', '');
39 |             } else {
40 |                 kernelSpans[0].classList.add('bft-unsupported-kernel');
41 |                 kernelSpans[1].removeAttribute('hidden');
42 |             }
43 |         }
44 |     }
45 | 
46 |     window.bftInitialize = function (functionDialectInfo) {
47 |         dialectInfo = functionDialectInfo;
48 |         const dialectSelect = document.getElementById('dialect');
49 |         updateDialect(dialectSelect.value);
50 |         dialectSelect.addEventListener('change', (e) => {
51 |             updateDialect(e.target.value);
52 |         });
53 |     }
54 | 
55 | })();
56 | 


--------------------------------------------------------------------------------
/static_site/assets/supplementary/style.css:
--------------------------------------------------------------------------------
 1 | .tooltip {
 2 |     position: absolute;
 3 |     z-index: 99;
 4 |     padding: 5px;
 5 |     background: #222;
 6 |     color: #fff;
 7 |     border-radius: 5px;
 8 | }
 9 | 
10 | tbody {
11 |     position: relative;
12 | }
13 | 
14 | .bft-error-case td {
15 |     background-color: #FFCDD2;
16 |     background-clip: padding-box;
17 | }
18 | 
19 | .bft-case-err-message {
20 |     font-weight: lighter !important;
21 |     font-style: italic;
22 | }
23 | 
24 | /* We don't use row headers */
25 | table tbody td:first-child {
26 |     font-weight: initial;
27 | }
28 | 
29 | .bft-unsupported-kernel {
30 |     text-decoration: line-through;
31 | }
32 | 
33 | a.disabled {
34 |     cursor: initial;
35 |     color: var(--secondary-color);
36 |     text-decoration: none;
37 | }
38 | 
39 | a.disabled:hover {
40 |     background: initial;
41 | }
42 | 


--------------------------------------------------------------------------------
/static_site/favicon-16x16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/substrait-io/bft/34b7a6e19d536e42082169eae0d049ce94f86b19/static_site/favicon-16x16.png


--------------------------------------------------------------------------------
/static_site/favicon-32x32.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/substrait-io/bft/34b7a6e19d536e42082169eae0d049ce94f86b19/static_site/favicon-32x32.png


--------------------------------------------------------------------------------
/static_site/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/substrait-io/bft/34b7a6e19d536e42082169eae0d049ce94f86b19/static_site/favicon.ico


--------------------------------------------------------------------------------
/supplemental/arithmetic/abs.md:
--------------------------------------------------------------------------------
 1 | # Abs
 2 | 
 3 | ## Options
 4 | 
 5 | ### Overflow
 6 | 
 7 | Computing the absolute of integer values may result in overflow due to unevenness of two's complement.
 8 | This option helps control the behavior of the function when the input goes out of permissible range 
 9 | of the type class.
10 | 
11 | #### SILENT
12 | 
13 | /[%Overflow$SILENT%]
14 | 
15 | #### SATURATE
16 | 
17 | /[%Overflow$SATURATE%]
18 | 
19 | #### ERROR
20 | 
21 | /[%Overflow$ERROR%]
22 | 
23 | ## Details
24 | 
25 | ### Non multiplicative
26 | 
27 | Although the mathematical operation for Absolute value is multiplicative, but the function is not
28 | due to overflow. For example, for int8, abs(-1 * -128) will not be the same as 
29 | abs(-1) * abs(-128), since the former will cause an overflow.
30 | 
31 | ### Triangular Inequality
32 | 
33 | Mathematically, the absolute operation has the triangular inequality, i.e. for two real numbers, 
34 | x & y, abs(x+y) <= abs(x) + abs(y). This might not hold true for the abs function due to overflow.
35 | For example, for int8, abs(-127 + 1) will not be the same as abs(-127) + abs(1), since the 
36 | latter will overflow.
37 | 
38 | ## Properties
39 | 
40 | ### Null propagating
41 | 
42 | /[%Properties$Null_propagating%]
43 | 
44 | ### NaN propagating
45 | 
46 | /[%Properties$NaN_propagating%]
47 | 
48 | ### Stateless
49 | 
50 | /[%Properties$Stateless%]
51 | 


--------------------------------------------------------------------------------
/supplemental/arithmetic/acos.md:
--------------------------------------------------------------------------------
 1 | # Acos
 2 | 
 3 | ## Options
 4 | 
 5 | ### Rounding
 6 | 
 7 | Arccosine of an input can yield a result that is not exactly
 8 | representable in the given type class. In this case the value will be rounded.
 9 | Rounding behaviors are defined as part of the IEEE 754 standard.
10 | 
11 | #### TIE_TO_EVEN
12 | 
13 | /[%Rounding$TIE_TO_EVEN%]
14 | 
15 | #### TIE_AWAY_FROM_ZERO
16 | 
17 | /[%Rounding$TIE_AWAY_FROM_ZERO%]
18 | 
19 | #### TRUNCATE
20 | 
21 | /[%Rounding$TRUNCATE%]
22 | 
23 | #### CEILING
24 | 
25 | /[%Rounding$CEILING%]
26 | 
27 | #### FLOOR
28 | 
29 | /[%Rounding$FLOOR%]
30 | 
31 | ### On_domain_error
32 | 
33 | Arccosine function has a domain of [-1,1], i.e. values of only this range are allowed. This option controls the behavior when the function is called with values outside of this range.
34 | 
35 | #### NAN
36 | 
37 | /[%On_domain_error$NAN%]
38 | 
39 | #### ERROR
40 | 
41 | /[%On_domain_error$ERROR%]
42 | 
43 | ## Details
44 | 
45 | ### Other floating point exceptions
46 | 
47 | The IEEE 754 standard defines a number of exceptions beyond rounding. For
48 | example, division by zero, overflow, and underflow. However, these exceptions
49 | have default behaviors defined by IEEE 754 and, since no known engine deviates
50 | from these default values, these exceptions are not exposed as options. For more
51 | information on what happens in these cases refer to the IEEE 754 standard.
52 | 
53 | ### Numerical Precision
54 | 
55 | The precision of the acos function depends on the architecture in various dialects.
56 | 
57 | ### Output Range
58 | 
59 | The arccosine function has an output range of [0, pi], and it results to 0
60 | at 1.
61 | 
62 | ## Properties
63 | 
64 | ### Null propagating
65 | 
66 | /[%Properties$Null_propagating%]
67 | 
68 | ### NaN propagating
69 | 
70 | /[%Properties$NaN_propagating%]
71 | 
72 | ### Stateless
73 | 
74 | /[%Properties$Stateless%]
75 | 


--------------------------------------------------------------------------------
/supplemental/arithmetic/acosh.md:
--------------------------------------------------------------------------------
 1 | # Acosh
 2 | 
 3 | ## Options
 4 | 
 5 | ### Rounding
 6 | 
 7 | Hyperbolic arccosine of an input can yield a result that is not exactly
 8 | representable in the given type class. In this case the value will be rounded.
 9 | Rounding behaviors are defined as part of the IEEE 754 standard.
10 | 
11 | #### TIE_TO_EVEN
12 | 
13 | /[%Rounding$TIE_TO_EVEN%]
14 | 
15 | #### TIE_AWAY_FROM_ZERO
16 | 
17 | /[%Rounding$TIE_AWAY_FROM_ZERO%]
18 | 
19 | #### TRUNCATE
20 | 
21 | /[%Rounding$TRUNCATE%]
22 | 
23 | #### CEILING
24 | 
25 | /[%Rounding$CEILING%]
26 | 
27 | #### FLOOR
28 | 
29 | /[%Rounding$FLOOR%]
30 | 
31 | ### On_domain_error
32 | 
33 | Hyperbolic arccosine function has a domain of [1, Infinity], i.e. input should be greater than one. This option controls the behavior when the function is called with values outside of this range.
34 | 
35 | #### NAN
36 | 
37 | /[%On_domain_error$NAN%]
38 | 
39 | #### ERROR
40 | 
41 | /[%On_domain_error$ERROR%]
42 | 
43 | ## Details
44 | 
45 | ### Other floating point exceptions
46 | 
47 | The IEEE 754 standard defines a number of exceptions beyond rounding. For
48 | example, division by zero, overflow, and underflow. However, these exceptions
49 | have default behaviors defined by IEEE 754 and, since no known engine deviates
50 | from these default values, these exceptions are not exposed as options. For more
51 | information on what happens in these cases refer to the IEEE 754 standard.
52 | 
53 | ### Numerical Precision
54 | 
55 | The precision of the acosh function depends on the architecture in various dialects.
56 | 
57 | ### Output Range
58 | 
59 | The acosh function has an output range of [0, Infinity], and it results to 0
60 | at 1.
61 | 
62 | ## Properties
63 | 
64 | ### Null propagating
65 | 
66 | /[%Properties$Null_propagating%]
67 | 
68 | ### NaN propagating
69 | 
70 | /[%Properties$NaN_propagating%]
71 | 
72 | ### Stateless
73 | 
74 | /[%Properties$Stateless%]
75 | 


--------------------------------------------------------------------------------
/supplemental/arithmetic/add.md:
--------------------------------------------------------------------------------
 1 | # Add
 2 | 
 3 | ## Options
 4 | 
 5 | ### Overflow
 6 | 
 7 | Adding two integers can trigger an overflow when the result is outside the
 8 | representable range of the type class. This option controls what happens when
 9 | this overflow occurs.
10 | 
11 | #### SILENT
12 | 
13 | /[%Overflow$SILENT%] For e.g. adding two int16 cannot
14 | yield an int32 on overflow.
15 | 
16 | #### SATURATE
17 | 
18 | /[%Overflow$SATURATE%]
19 | 
20 | #### ERROR
21 | 
22 | /[%Overflow$ERROR%]
23 | 
24 | ### Rounding
25 | 
26 | Adding two floating point numbers can yield a result that is not exactly
27 | representable in the given type class. In this case the value will be rounded.
28 | Rounding behaviors are defined as part of the IEEE 754 standard.
29 | 
30 | #### TIE_TO_EVEN
31 | 
32 | /[%Rounding$TIE_TO_EVEN%]
33 | 
34 | #### TIE_AWAY_FROM_ZERO
35 | 
36 | /[%Rounding$TIE_AWAY_FROM_ZERO%]
37 | 
38 | #### TRUNCATE
39 | 
40 | /[%Rounding$TRUNCATE%]
41 | 
42 | #### CEILING
43 | 
44 | /[%Rounding$CEILING%]
45 | 
46 | #### FLOOR
47 | 
48 | /[%Rounding$FLOOR%]
49 | 
50 | ## Details
51 | 
52 | ### Other floating point exceptions
53 | 
54 | The IEEE 754 standard defines a number of exceptions beyond rounding. For
55 | example, division by zero, overflow, and underflow. However, these exceptions
56 | have default behaviors defined by IEEE 754 and, since no known engine deviates
57 | from these default values, these exceptions are not exposed as options. For more
58 | information on what happens in these cases refer to the IEEE 754 standard.
59 | 
60 | ### Not commutative
61 | 
62 | Addition, the algebraic operation, is commutative.  So it may be tempting to
63 | believe the add function is commutative as well.  However, this is not true because
64 | of overflow.  For example, when working with int8 the result of
65 | add(add(120, 10), -5) will yield a different result than add(add(120, -5), 10)
66 | because the first will overflow and the second will not.
67 | 
68 | ## Properties
69 | 
70 | ### Null propagating
71 | 
72 | /[%Properties$Null_propagating%]
73 | 
74 | ### NaN propagating
75 | 
76 | /[%Properties$NaN_propagating%]
77 | 
78 | ### Stateless
79 | 
80 | /[%Properties$Stateless%] This is not guaranteed to be true for integer addition when overflow is SILENT.
81 | 


--------------------------------------------------------------------------------
/supplemental/arithmetic/asin.md:
--------------------------------------------------------------------------------
 1 | # Asin
 2 | 
 3 | ## Options
 4 | 
 5 | ### Rounding
 6 | 
 7 | Arcsine of an input can yield a result that is not exactly
 8 | representable in the given type class. In this case the value will be rounded.
 9 | Rounding behaviors are defined as part of the IEEE 754 standard.
10 | 
11 | #### TIE_TO_EVEN
12 | 
13 | /[%Rounding$TIE_TO_EVEN%]
14 | 
15 | #### TIE_AWAY_FROM_ZERO
16 | 
17 | /[%Rounding$TIE_AWAY_FROM_ZERO%]
18 | 
19 | #### TRUNCATE
20 | 
21 | /[%Rounding$TRUNCATE%]
22 | 
23 | #### CEILING
24 | 
25 | /[%Rounding$CEILING%]
26 | 
27 | #### FLOOR
28 | 
29 | /[%Rounding$FLOOR%]
30 | 
31 | ### On_domain_error
32 | 
33 | Arcsine function has a domain of [-1,1], i.e. values of only this range are allowed. This option controls the behavior when the function is called with values outside of this range.
34 | 
35 | #### NAN
36 | 
37 | /[%On_domain_error$NAN%]
38 | 
39 | #### ERROR
40 | 
41 | /[%On_domain_error$ERROR%]
42 | 
43 | ## Details
44 | 
45 | ### Other floating point exceptions
46 | 
47 | The IEEE 754 standard defines a number of exceptions beyond rounding. For
48 | example, division by zero, overflow, and underflow. However, these exceptions
49 | have default behaviors defined by IEEE 754 and, since no known engine deviates
50 | from these default values, these exceptions are not exposed as options. For more
51 | information on what happens in these cases refer to the IEEE 754 standard.
52 | 
53 | ### Numerical Precision
54 | 
55 | The precision of the asin function depends on the architecture in various dialects.
56 | 
57 | ### Output Range
58 | 
59 | The arcsine function has an output range of [-pi/2, pi/2], where it results to 0
60 | at 0.
61 | 
62 | ## Properties
63 | 
64 | ### Null propagating
65 | 
66 | /[%Properties$Null_propagating%]
67 | 
68 | ### NaN propagating
69 | 
70 | /[%Properties$NaN_propagating%]
71 | 
72 | ### Stateless
73 | 
74 | /[%Properties$Stateless%]
75 | 


--------------------------------------------------------------------------------
/supplemental/arithmetic/asinh.md:
--------------------------------------------------------------------------------
 1 | # Asinh
 2 | 
 3 | ## Options
 4 | 
 5 | ### Rounding
 6 | 
 7 | Hyperbolic arcsine of an input can yield a result that is not exactly
 8 | representable in the given type class. In this case the value will be rounded.
 9 | Rounding behaviors are defined as part of the IEEE 754 standard.
10 | 
11 | #### TIE_TO_EVEN
12 | 
13 | /[%Rounding$TIE_TO_EVEN%]
14 | 
15 | #### TIE_AWAY_FROM_ZERO
16 | 
17 | /[%Rounding$TIE_AWAY_FROM_ZERO%]
18 | 
19 | #### TRUNCATE
20 | 
21 | /[%Rounding$TRUNCATE%]
22 | 
23 | #### CEILING
24 | 
25 | /[%Rounding$CEILING%]
26 | 
27 | #### FLOOR
28 | 
29 | /[%Rounding$FLOOR%]
30 | 
31 | ## Details
32 | 
33 | ### Other floating point exceptions
34 | 
35 | The IEEE 754 standard defines a number of exceptions beyond rounding. For
36 | example, division by zero, overflow, and underflow. However, these exceptions
37 | have default behaviors defined by IEEE 754 and, since no known engine deviates
38 | from these default values, these exceptions are not exposed as options. For more
39 | information on what happens in these cases refer to the IEEE 754 standard.
40 | 
41 | ### Numerical Precision
42 | 
43 | The precision of the asinh function depends on the architecture in various dialects.
44 | 
45 | ### Output Range
46 | 
47 | The asinh function has an output range of all Real numbers, and it results to 0
48 | at 0.
49 | 
50 | ## Properties
51 | 
52 | ### Null propagating
53 | 
54 | /[%Properties$Null_propagating%]
55 | 
56 | ### NaN propagating
57 | 
58 | /[%Properties$NaN_propagating%]
59 | 
60 | ### Stateless
61 | 
62 | /[%Properties$Stateless%]
63 | 


--------------------------------------------------------------------------------
/supplemental/arithmetic/atan.md:
--------------------------------------------------------------------------------
 1 | # Atan
 2 | 
 3 | ## Options
 4 | 
 5 | ### Rounding
 6 | 
 7 | Arctangent of an input can yield a result that is not exactly
 8 | representable in the given type class. In this case the value will be rounded.
 9 | Rounding behaviors are defined as part of the IEEE 754 standard.
10 | 
11 | #### TIE_TO_EVEN
12 | 
13 | /[%Rounding$TIE_TO_EVEN%]
14 | 
15 | #### TIE_AWAY_FROM_ZERO
16 | 
17 | /[%Rounding$TIE_AWAY_FROM_ZERO%]
18 | 
19 | #### TRUNCATE
20 | 
21 | /[%Rounding$TRUNCATE%]
22 | 
23 | #### CEILING
24 | 
25 | /[%Rounding$CEILING%]
26 | 
27 | #### FLOOR
28 | 
29 | /[%Rounding$FLOOR%]
30 | 
31 | ## Details
32 | 
33 | ### Other floating point exceptions
34 | 
35 | The IEEE 754 standard defines a number of exceptions beyond rounding. For
36 | example, division by zero, overflow, and underflow. However, these exceptions
37 | have default behaviors defined by IEEE 754 and, since no known engine deviates
38 | from these default values, these exceptions are not exposed as options. For more
39 | information on what happens in these cases refer to the IEEE 754 standard.
40 | 
41 | ### Numerical Precision
42 | 
43 | The precision of the atan function depends on the architecture in various dialects.
44 | 
45 | ### Output Range
46 | 
47 | The arctangent function has an output range of [-pi/2, pi/2], and it results to 0
48 | at 0.
49 | 
50 | ## Properties
51 | 
52 | ### Null propagating
53 | 
54 | /[%Properties$Null_propagating%]
55 | 
56 | ### NaN propagating
57 | 
58 | /[%Properties$NaN_propagating%]
59 | 
60 | ### Stateless
61 | 
62 | /[%Properties$Stateless%]
63 | 


--------------------------------------------------------------------------------
/supplemental/arithmetic/atan2.md:
--------------------------------------------------------------------------------
 1 | # Atan2
 2 | 
 3 | ## Options
 4 | 
 5 | ### Rounding
 6 | 
 7 | Arctangent of an input can yield a result that is not exactly
 8 | representable in the given type class. In this case the value will be rounded.
 9 | Rounding behaviors are defined as part of the IEEE 754 standard.
10 | 
11 | #### TIE_TO_EVEN
12 | 
13 | /[%Rounding$TIE_TO_EVEN%]
14 | 
15 | #### TIE_AWAY_FROM_ZERO
16 | 
17 | /[%Rounding$TIE_AWAY_FROM_ZERO%]
18 | 
19 | #### TRUNCATE
20 | 
21 | /[%Rounding$TRUNCATE%]
22 | 
23 | #### CEILING
24 | 
25 | /[%Rounding$CEILING%]
26 | 
27 | #### FLOOR
28 | 
29 | /[%Rounding$FLOOR%]
30 | 
31 | ### On_domain_error
32 | 
33 | Mathematically, atan2 function has a domain of [-Infinity, Infinity], i.e. values of only this range are allowed. This option controls the behavior when the function is called with values outside of this range.
34 | 
35 | #### NAN
36 | 
37 | /[%On_domain_error$NAN%]
38 | 
39 | #### ERROR
40 | 
41 | /[%On_domain_error$ERROR%]
42 | 
43 | ## Details
44 | 
45 | ### Other floating point exceptions
46 | 
47 | The IEEE 754 standard defines a number of exceptions beyond rounding. For
48 | example, division by zero, overflow, and underflow. However, these exceptions
49 | have default behaviors defined by IEEE 754 and, since no known engine deviates
50 | from these default values, these exceptions are not exposed as options. For more
51 | information on what happens in these cases refer to the IEEE 754 standard.
52 | 
53 | ### Numerical Precision
54 | 
55 | The precision of the atan2 function depends on the architecture in various dialects.
56 | 
57 | ### Output Range
58 | 
59 | The atan2 function has an output range of [-Infinty, Infinty].
60 | 
61 | ## Properties
62 | 
63 | ### Null propagating
64 | 
65 | /[%Properties$Null_propagating%]
66 | 
67 | ### NaN propagating
68 | 
69 | /[%Properties$NaN_propagating%]
70 | 
71 | ### Stateless
72 | 
73 | /[%Properties$Stateless%]
74 | 


--------------------------------------------------------------------------------
/supplemental/arithmetic/atanh.md:
--------------------------------------------------------------------------------
 1 | # Atanh
 2 | 
 3 | ## Options
 4 | 
 5 | ### Rounding
 6 | 
 7 | Hyperbolic arctangent of an input can yield a result that is not exactly
 8 | representable in the given type class. In this case the value will be rounded.
 9 | Rounding behaviors are defined as part of the IEEE 754 standard.
10 | 
11 | #### TIE_TO_EVEN
12 | 
13 | /[%Rounding$TIE_TO_EVEN%]
14 | 
15 | #### TIE_AWAY_FROM_ZERO
16 | 
17 | /[%Rounding$TIE_AWAY_FROM_ZERO%]
18 | 
19 | #### TRUNCATE
20 | 
21 | /[%Rounding$TRUNCATE%]
22 | 
23 | #### CEILING
24 | 
25 | /[%Rounding$CEILING%]
26 | 
27 | #### FLOOR
28 | 
29 | /[%Rounding$FLOOR%]
30 | 
31 | ### On_domain_error
32 | 
33 | Hyperbolic arcctangent function has a domain of [-1, 1]. This option controls the behavior when the function is called with values outside of this range.
34 | 
35 | #### NAN
36 | 
37 | /[%On_domain_error$NAN%]
38 | 
39 | #### ERROR
40 | 
41 | /[%On_domain_error$ERROR%]
42 | 
43 | ## Details
44 | 
45 | ### Other floating point exceptions
46 | 
47 | The IEEE 754 standard defines a number of exceptions beyond rounding. For
48 | example, division by zero, overflow, and underflow. However, these exceptions
49 | have default behaviors defined by IEEE 754 and, since no known engine deviates
50 | from these default values, these exceptions are not exposed as options. For more
51 | information on what happens in these cases refer to the IEEE 754 standard.
52 | 
53 | ### Numerical Precision
54 | 
55 | The precision of the atanh function depends on the architecture in various dialects.
56 | 
57 | ### Output Range
58 | 
59 | The atanh function has an output range of all real numbers, and it results to 0
60 | at 0.
61 | 
62 | ## Properties
63 | 
64 | ### Null propagating
65 | 
66 | /[%Properties$Null_propagating%]
67 | 
68 | ### NaN propagating
69 | 
70 | /[%Properties$NaN_propagating%]
71 | 
72 | ### Stateless
73 | 
74 | /[%Properties$Stateless%]
75 | 


--------------------------------------------------------------------------------
/supplemental/arithmetic/bitwise_and.md:
--------------------------------------------------------------------------------
 1 | # Bitwise_and
 2 | 
 3 | ## Details
 4 | 
 5 | ### Associative
 6 | 
 7 | The bitwise_and function is associative, i.e.
 8 | the grouping of operands does not affect the result. For example, 
 9 | bitwise_and(bitwise_and(a,b), c) will be same as bitwise_and(a, bitwise_and(b,c)).
10 | 
11 | ### Commutative
12 | 
13 | The order of operands does not affect the result in Bitwise_and. For example, 
14 | bitwise_and(a,b) will be the same as bitwise_and(b,a).
15 | 
16 | ### Identity
17 | 
18 | For any valid integer, the bitwise_and with the bit pattern of all ones will result 
19 | to itself. For example, bitwise_and(123, 111) = 123
20 | 
21 | ### Bitwise Not Relationship
22 | 
23 | The result of performing a bitwise_and operation between a value 
24 | x and its bitwise_not is always 0.
25 | 
26 | ## Properties
27 | 
28 | ### Null propagating
29 | 
30 | /[%Properties$Null_propagating%]
31 | 
32 | ### NaN propagating
33 | 
34 | /[%Properties$NaN_propagating%]
35 | 
36 | ### Stateless
37 | 
38 | /[%Properties$Stateless%]
39 | 


--------------------------------------------------------------------------------
/supplemental/arithmetic/bitwise_not.md:
--------------------------------------------------------------------------------
 1 | # Bitwise_not
 2 | 
 3 | ## Details
 4 | 
 5 | ### Complementary
 6 | 
 7 | The bitwise not function behaves complimentary with itself, i.e. 
 8 | bitwise_not(bitwise_not(x)) will be equal to x, for any integer.
 9 | 
10 | ### XOR Relationship
11 | 
12 | Bitwise_not has a relationship with the XOR function, where the XORing of
13 | a valid integer with the bit pattern of all 1s results in the bitwise_not of 
14 | that integer.
15 | 
16 | ### Two's complement
17 | 
18 | The bitwise_not of a valid integer is equivalent to negating the number and subtracting 1.
19 | 
20 | ## Properties
21 | 
22 | ### Null propagating
23 | 
24 | /[%Properties$Null_propagating%]
25 | 
26 | ### NaN propagating
27 | 
28 | /[%Properties$NaN_propagating%]
29 | 
30 | ### Stateless
31 | 
32 | /[%Properties$Stateless%]
33 | 


--------------------------------------------------------------------------------
/supplemental/arithmetic/bitwise_or.md:
--------------------------------------------------------------------------------
 1 | # Bitwise_or
 2 | 
 3 | ## Details
 4 | 
 5 | ### Associative
 6 | 
 7 | The bitwise_or function is associative, i.e.
 8 | the grouping of operands does not affect the result. For example, 
 9 | bitwise_or(bitwise_or(a,b), c) will be same as bitwise_or(a, bitwise_or(b,c)).
10 | 
11 | ### Commutative
12 | 
13 | The order of operands does not affect the result in Bitwise_or. For example, 
14 | bitwise_or(a,b) will be the same as bitwise_or(b,a).
15 | 
16 | ### Identity
17 | 
18 | For any valid integer, the bitwise_or with zero will result 
19 | to itself. For example, bitwise_or(123, 000) = 123
20 | 
21 | ## Properties
22 | 
23 | ### Null propagating
24 | 
25 | /[%Properties$Null_propagating%]
26 | 
27 | ### NaN propagating
28 | 
29 | /[%Properties$NaN_propagating%]
30 | 
31 | ### Stateless
32 | 
33 | /[%Properties$Stateless%]
34 | 


--------------------------------------------------------------------------------
/supplemental/arithmetic/bitwise_xor.md:
--------------------------------------------------------------------------------
 1 | # Bitwise_or
 2 | 
 3 | ## Details
 4 | 
 5 | ### Associative
 6 | 
 7 | The bitwise_xor function is associative, i.e.
 8 | the grouping of operands does not affect the result. For example, 
 9 | bitwise_xor(bitwise_xor(a,b), c) will be same as bitwise_xor(a, bitwise_xor(b,c)).
10 | 
11 | ### Commutative
12 | 
13 | The order of operands does not affect the result in Bitwise_xor. For example, 
14 | bitwise_xor(a,b) will be the same as bitwise_xor(b,a).
15 | 
16 | ## Properties
17 | 
18 | ### Null propagating
19 | 
20 | /[%Properties$Null_propagating%]
21 | 
22 | ### NaN propagating
23 | 
24 | /[%Properties$NaN_propagating%]
25 | 
26 | ### Stateless
27 | 
28 | /[%Properties$Stateless%]
29 | 


--------------------------------------------------------------------------------
/supplemental/arithmetic/cos.md:
--------------------------------------------------------------------------------
 1 | # Cos
 2 | 
 3 | ## Options
 4 | 
 5 | ### Rounding
 6 | 
 7 | Cosine of an input can yield a result that is not exactly
 8 | representable in the given type class. In this case the value will be rounded.
 9 | Rounding behaviors are defined as part of the IEEE 754 standard.
10 | 
11 | #### TIE_TO_EVEN
12 | 
13 | /[%Rounding$TIE_TO_EVEN%]
14 | 
15 | #### TIE_AWAY_FROM_ZERO
16 | 
17 | /[%Rounding$TIE_AWAY_FROM_ZERO%]
18 | 
19 | #### TRUNCATE
20 | 
21 | /[%Rounding$TRUNCATE%]
22 | 
23 | #### CEILING
24 | 
25 | /[%Rounding$CEILING%]
26 | 
27 | #### FLOOR
28 | 
29 | /[%Rounding$FLOOR%]
30 | 
31 | ## Details
32 | 
33 | ### Other floating point exceptions
34 | 
35 | The IEEE 754 standard defines a number of exceptions beyond rounding. For
36 | example, division by zero, overflow, and underflow. However, these exceptions
37 | have default behaviors defined by IEEE 754 and, since no known engine deviates
38 | from these default values, these exceptions are not exposed as options. For more
39 | information on what happens in these cases refer to the IEEE 754 standard.
40 | 
41 | ### Numerical Precision
42 | 
43 | The precision of the cosine function depends on the architecture in various dialects.
44 | 
45 | ### Output Range
46 | 
47 | Being a sinusoidal trigonometric function, the output of the cos function is restricted to [-1,1].
48 | 
49 | ## Properties
50 | 
51 | ### Null propagating
52 | 
53 | /[%Properties$Null_propagating%]
54 | 
55 | ### NaN propagating
56 | 
57 | /[%Properties$NaN_propagating%]
58 | 
59 | ### Stateless
60 | 
61 | /[%Properties$Stateless%]
62 | 


--------------------------------------------------------------------------------
/supplemental/arithmetic/cosh.md:
--------------------------------------------------------------------------------
 1 | # Cosh
 2 | 
 3 | ## Options
 4 | 
 5 | ### Rounding
 6 | 
 7 | Hyperbolic cosine of an input can yield a result that is not exactly
 8 | representable in the given type class. In this case the value will be rounded.
 9 | Rounding behaviors are defined as part of the IEEE 754 standard.
10 | 
11 | #### TIE_TO_EVEN
12 | 
13 | /[%Rounding$TIE_TO_EVEN%]
14 | 
15 | #### TIE_AWAY_FROM_ZERO
16 | 
17 | /[%Rounding$TIE_AWAY_FROM_ZERO%]
18 | 
19 | #### TRUNCATE
20 | 
21 | /[%Rounding$TRUNCATE%]
22 | 
23 | #### CEILING
24 | 
25 | /[%Rounding$CEILING%]
26 | 
27 | #### FLOOR
28 | 
29 | /[%Rounding$FLOOR%]
30 | 
31 | ## Details
32 | 
33 | ### Other floating point exceptions
34 | 
35 | The IEEE 754 standard defines a number of exceptions beyond rounding. For
36 | example, division by zero, overflow, and underflow. However, these exceptions
37 | have default behaviors defined by IEEE 754 and, since no known engine deviates
38 | from these default values, these exceptions are not exposed as options. For more
39 | information on what happens in these cases refer to the IEEE 754 standard.
40 | 
41 | ### Numerical Precision
42 | 
43 | The precision of the cosh function depends on the architecture in various dialects.
44 | 
45 | ### Output Range
46 | 
47 | The Hyperbolic cosine function has an output range of [1, Infinity], and it results to 1
48 | at 0 radians.
49 | 
50 | ## Properties
51 | 
52 | ### Null propagating
53 | 
54 | /[%Properties$Null_propagating%]
55 | 
56 | ### NaN propagating
57 | 
58 | /[%Properties$NaN_propagating%]
59 | 
60 | ### Stateless
61 | 
62 | /[%Properties$Stateless%]
63 | 


--------------------------------------------------------------------------------
/supplemental/arithmetic/definitions.yaml:
--------------------------------------------------------------------------------
 1 | Overflow:
 2 |   SILENT: >
 3 |     If an overflow occurs then an integer value will be returned. The value is
 4 |     undefined. It may be any integer and can change from engine to engine or
 5 |     even from row to row within the same query.  The only constraint is that it
 6 |     must be a valid value for the result type class.
 7 | 
 8 |   SATURATE: >
 9 |     If an overflow occurs then the largest (for positive overflow) or smallest
10 |     (for negative overflow) possible value for the type class will be returned.
11 | 
12 |   ERROR: >
13 |     If an overflow occurs then an error should be raised.
14 | 
15 | Rounding:
16 |   TIE_TO_EVEN: >
17 |     Round to the nearest value. If the number is exactly halfway between two
18 |     values then round to the number whose least significant digit is even. Or,
19 |     because we are working with binary digits, round to the number whose last digit
20 |     is 0. This is the default behavior in many systems because it helps to avoid
21 |     bias in rounding.
22 | 
23 |   TIE_AWAY_FROM_ZERO: >
24 |     Round to the nearest value. If the number is exactly halfway between two values
25 |     then round to the number furthest from zero.
26 | 
27 |   TRUNCATE: >
28 |     Round to the nearest value. If the number is exactly halfway between two values
29 |     then round to the value closest to zero.
30 | 
31 |   CEILING: >
32 |     Round to the value closest to positive infinity.
33 | 
34 |   FLOOR: >
35 |     Round to the value closest to negative infinity.
36 | 
37 | Properties:
38 |   Null_propagating: >
39 |     If any of the inputs is null then the output will be null
40 | 
41 |   NaN_propagating: >
42 |     If any of the inputs is NaN (and the other input is not null) then the output
43 |     will be NaN
44 | 
45 |   Stateless: >
46 |     The output will be the same regardless of the order of input rows.
47 | 
48 | On_domain_error:
49 |   NAN: >
50 |     Return a Not a Number value if any or all of the input values are either 0 or ±infinity.
51 |   ERROR: >
52 |     If any or all of the input values are either 0 or ±infinity an error should be raised.
53 | 


--------------------------------------------------------------------------------
/supplemental/arithmetic/divide.md:
--------------------------------------------------------------------------------
  1 | # Divide
  2 | 
  3 | ## Options
  4 | 
  5 | ### Overflow
  6 | 
  7 | Dividing two integers can trigger an overflow when the result is outside the
  8 | representable range of the type class. This option controls what happens when
  9 | this overflow occurs.
 10 | 
 11 | #### SILENT
 12 | 
 13 | If an overflow occurs then an integer value will be returned. The value is
 14 | undefined. It may be any integer and can change from engine to engine or
 15 | even from row to row within the same query.  The only constraint is that it
 16 | must be a valid value for the result type class (e.g. dividing two int16
 17 | cannot yield an int32 on overflow)
 18 | 
 19 | #### SATURATE
 20 | 
 21 | If an overflow occurs then the largest (for positive overflow) or smallest
 22 | (for negative overflow) possible value for the type class will be returned.
 23 | 
 24 | #### ERROR
 25 | 
 26 | If an overflow occurs then an error should be raised.
 27 | 
 28 | ### Rounding
 29 | 
 30 | Dividing two floating point numbers can yield a result that is not exactly
 31 | representable in the given type class. In this case the value will be rounded.
 32 | Rounding behaviors are defined as part of the IEEE 754 standard.
 33 | 
 34 | #### TIE_TO_EVEN
 35 | 
 36 | Round to the nearest value. If the number is exactly halfway between two
 37 | values then round to the number whose least significant digit is even. Or,
 38 | because we are working with binary digits, round to the number whose last digit
 39 | is 0. This is the default behavior in many systems because it helps to avoid
 40 | bias in rounding.
 41 | 
 42 | #### TIE_AWAY_FROM_ZERO
 43 | 
 44 | Round to the nearest value. If the number is exactly halfway between two values
 45 | then round to the number furthest from zero.
 46 | 
 47 | #### TRUNCATE
 48 | 
 49 | Round to the nearest value. If the number is exactly halfway between two values
 50 | then round to the value closest to zero.
 51 | 
 52 | #### CEILING
 53 | 
 54 | Round to the value closest to positive infinity.
 55 | 
 56 | #### FLOOR
 57 | 
 58 | Round to the value closest to negative infinity.
 59 | 
 60 | ### On_domain_error
 61 | 
 62 | Option controls what happens when the dividend and divisor in a divide function
 63 | are either both 0 or both ±infinity.
 64 | 
 65 | #### NAN
 66 | 
 67 | /[%On_domain_error$NAN%]
 68 | 
 69 | #### ERROR
 70 | 
 71 | /[%On_domain_error$ERROR%]
 72 | 
 73 | ### On_division_by_zero
 74 | 
 75 | Option controls function behavior in cases when the divisor is 0 but the dividend is not zero.
 76 | 
 77 | #### LIMIT
 78 | 
 79 | Return +infinity or -infinity depending on the signs of the dividend and the divisor involved.
 80 | 
 81 | ## Details
 82 | 
 83 | ### Other floating point exceptions
 84 | 
 85 | The IEEE 754 standard defines a number of exceptions beyond rounding. For
 86 | example, overflow, and underflow. However, these exceptions
 87 | have default behaviors defined by IEEE 754 and, since no known engine deviates
 88 | from these default values, these exceptions are not exposed as options. For more
 89 | information on what happens in these cases refer to the IEEE 754 standard.
 90 | 
 91 | ### Not commutative
 92 | 
 93 | Division, the algebraic operation, is commutative.  So it may be tempting to
 94 | believe the divide function is commutative as well.  However, this is not true
 95 | because of overflow.  For example, when working with int8 the result of
 96 | divide(divide(-128, -1), -1) will yield a different result than
 97 | divide(-128, divide(-1, -1)) because the first will overflow and the second
 98 | will not.
 99 | 
100 | ## Properties
101 | 
102 | ### Null propagating
103 | 
104 | If any of the inputs is null then the output will be null
105 | 
106 | ### NaN propagating
107 | 
108 | If any of the inputs is NaN (and the other input is not null) then the output
109 | will be NaN
110 | 
111 | ### Stateless
112 | 
113 | The output will be the same regardless of the order of input rows. This is not
114 | guaranteed to be true for integer division when overflow is SILENT.
115 | 


--------------------------------------------------------------------------------
/supplemental/arithmetic/exponential.md:
--------------------------------------------------------------------------------
 1 | # Exp
 2 | 
 3 | ## Options
 4 | 
 5 | ### Rounding
 6 | 
 7 | Exponential of an input can yield a result that is not exactly
 8 | representable in the given type class. In this case the value will be rounded.
 9 | Rounding behaviors are defined as part of the IEEE 754 standard.
10 | 
11 | #### TIE_TO_EVEN
12 | 
13 | /[%Rounding$TIE_TO_EVEN%]
14 | 
15 | #### TIE_AWAY_FROM_ZERO
16 | 
17 | /[%Rounding$TIE_AWAY_FROM_ZERO%]
18 | 
19 | #### TRUNCATE
20 | 
21 | /[%Rounding$TRUNCATE%]
22 | 
23 | #### CEILING
24 | 
25 | /[%Rounding$CEILING%]
26 | 
27 | #### FLOOR
28 | 
29 | /[%Rounding$FLOOR%]
30 | 
31 | ## Details
32 | 
33 | ### Other floating point exceptions
34 | 
35 | The IEEE 754 standard defines a number of exceptions beyond rounding. For
36 | example, division by zero, overflow, and underflow. However, these exceptions
37 | have default behaviors defined by IEEE 754 and, since no known engine deviates
38 | from these default values, these exceptions are not exposed as options. For more
39 | information on what happens in these cases refer to the IEEE 754 standard.
40 | 
41 | ### Numerical Precision
42 | 
43 | The precision of the exponential function depends on the precision of the input types
44 | and the way the operation is carried out in various dialects.
45 | 
46 | ## Properties
47 | 
48 | ### Null propagating
49 | 
50 | /[%Properties$Null_propagating%]
51 | 
52 | ### NaN propagating
53 | 
54 | /[%Properties$NaN_propagating%]
55 | 
56 | ### Stateless
57 | 
58 | /[%Properties$Stateless%]
59 | 


--------------------------------------------------------------------------------
/supplemental/arithmetic/factorial.md:
--------------------------------------------------------------------------------
 1 | # Factorial
 2 | 
 3 | ## Options
 4 | 
 5 | ### Overflow
 6 | 
 7 | Factorial being a function that may return a large value out of the permissible limit
 8 | of the type class can cause an overflow. This option helps
 9 | control the behavior upon overflow in the Factorial function.
10 | 
11 | #### SILENT
12 | 
13 | /[%Overflow$SILENT%]
14 | 
15 | #### SATURATE
16 | 
17 | /[%Overflow$SATURATE%]
18 | 
19 | #### ERROR
20 | 
21 | /[%Overflow$ERROR%]
22 | 
23 | ## Details
24 | 
25 | ### Input restrictions
26 | 
27 | Mathematically, factorial is not defined for negative integers or non-integer values, since it is essentially 
28 | the reducing product of a given positive integer.
29 | 
30 | ## Properties
31 | 
32 | ### Null propagating
33 | 
34 | /[%Properties$Null_propagating%]
35 | 
36 | ### NaN propagating
37 | 
38 | /[%Properties$NaN_propagating%]
39 | 
40 | ### Stateless
41 | 
42 | /[%Properties$Stateless%]
43 | 


--------------------------------------------------------------------------------
/supplemental/arithmetic/modulus.md:
--------------------------------------------------------------------------------
 1 | # Modulus
 2 | 
 3 | ## Options
 4 | 
 5 | ### Overflow
 6 | 
 7 | The modulus operation typically occurs after finding the quotient,
 8 | i.e., mod(x, y) = x - round_func(x/y), where the round_func can be
 9 | to truncate, floor, or any such operation. Thus, the entire operation
10 | may trigger an overflow when the result is outside the representable
11 | range of the type class. This option controls what happens when this overflow occurs.
12 | 
13 | #### SILENT
14 | 
15 | If an overflow occurs then an integer value will be returned. The value is
16 | undefined. It may be any integer and can change from engine to engine or
17 | even from row to row within the same query.  The only constraint is that it
18 | must be a valid value for the result type class (e.g. modulus of int16
19 | cannot yield an int32 on overflow)
20 | 
21 | #### SATURATE
22 | 
23 | If an overflow occurs then the largest (for positive overflow) or smallest
24 | (for negative overflow) possible value for the type class will be returned.
25 | 
26 | #### ERROR
27 | 
28 | If an overflow occurs then an error should be raised.
29 | 
30 | ### Division_type
31 | 
32 | Determines the nature of division rounding function and quotient
33 | evaluation that shall lead to the reminder. The reminder will be
34 | determined by  r = x - round_func(x/y)
35 | 
36 | #### TRUNCATE
37 | 
38 | The quotient is evaluated i.e. the round_func(x/y) is truncated,
39 | thus the fractional result is rounded towards zero.
40 | 
41 | #### FLOOR
42 | 
43 | The quotient is evaluated i.e. the round_func(x/y) is floored,
44 | thus the fractional result is rounded to the largest integer
45 | value less than or equal to it.
46 | 
47 | ### On_domain_error
48 | 
49 | Option controls what happens when the dividend is ±infinity or
50 | the divisor is 0 or ±infinity in a divide function.
51 | 
52 | #### NULL
53 | 
54 | Return a NULL if the dividend is ±infinity or the divisor is 0
55 | or ±infinity.
56 | 
57 | #### ERROR
58 | 
59 | If the dividend is ±infinity or the divisor is 0 or ±infinity,
60 | an error should be raised.
61 | 
62 | ## Details
63 |  
64 | ### Overflow
65 | 
66 | The Modulus function requires the Overflow option in situations
67 | where any or all of the involved operations result in overflow
68 | from the specified range. For example, in mod(-128, -1) within
69 | the int8 range, an overflow will occur as the operation will
70 | lead to (-128) - round_func(-128/-1). Since the division operation
71 | (-128/-1) results in an overflow (given that the range of int8
72 | is -127 to 128), the Overflow option becomes essential.
73 | 
74 | ### Not commutative
75 | 
76 | Modulus as an arithmetic operation is not commutative by nature.
77 | 
78 | ## Properties
79 | 
80 | ### Null propagating
81 | 
82 | If any of the inputs is null then the output will be null
83 | 
84 | ### NaN propagating
85 | 
86 | If any of the inputs is NaN (and the other input is not null) then the output
87 | will be NaN
88 | 
89 | ### Stateless
90 | 
91 | The output will be the same regardless of the order of input rows. This is not
92 | guaranteed to be true for integer division when overflow is SILENT.
93 | 


--------------------------------------------------------------------------------
/supplemental/arithmetic/multiply.md:
--------------------------------------------------------------------------------
 1 | # Multiply
 2 | 
 3 | ## Options
 4 | 
 5 | ### Overflow
 6 | 
 7 | Multiplying two integers can trigger an overflow when the result is outside the
 8 | representable range of the type class. This option controls what happens when
 9 | this overflow occurs.
10 | 
11 | #### SILENT
12 | 
13 | If an overflow occurs then an integer value will be returned. The value is
14 | undefined. It may be any integer and can change from engine to engine or
15 | even from row to row within the same query.  The only constraint is that it
16 | must be a valid value for the result type class (e.g. multiplying two int16
17 | cannot yield an int32 on overflow)
18 | 
19 | #### SATURATE
20 | 
21 | If an overflow occurs then the largest (for positive overflow) or smallest
22 | (for negative overflow) possible value for the type class will be returned.
23 | 
24 | #### ERROR
25 | 
26 | If an overflow occurs then an error should be raised.
27 | 
28 | ### Rounding
29 | 
30 | Multiplying two floating point numbers can yield a result that is not exactly
31 | representable in the given type class. In this case the value will be rounded.
32 | Rounding behaviors are defined as part of the IEEE 754 standard.
33 | 
34 | #### TIE_TO_EVEN
35 | 
36 | Round to the nearest value. If the number is exactly halfway between two
37 | values then round to the number whose least significant digit is even. Or,
38 | because we are working with binary digits, round to the number whose last digit
39 | is 0. This is the default behavior in many systems because it helps to avoid
40 | bias in rounding.
41 | 
42 | #### TIE_AWAY_FROM_ZERO
43 | 
44 | Round to the nearest value. If the number is exactly halfway between two values
45 | then round to the number furthest from zero.
46 | 
47 | #### TRUNCATE
48 | 
49 | Round to the nearest value. If the number is exactly halfway between two values
50 | then round to the value closest to zero.
51 | 
52 | #### CEILING
53 | 
54 | Round to the value closest to positive infinity.
55 | 
56 | #### FLOOR
57 | 
58 | Round to the value closest to negative infinity.
59 | 
60 | ## Details
61 | 
62 | ### Other floating point exceptions
63 | 
64 | The IEEE 754 standard defines a number of exceptions beyond rounding. For
65 | example, division by zero, overflow, and underflow. However, these exceptions
66 | have default behaviors defined by IEEE 754 and, since no known engine deviates
67 | from these default values, these exceptions are not exposed as options. For more
68 | information on what happens in these cases refer to the IEEE 754 standard.
69 | 
70 | ### Not commutative
71 | 
72 | Multiplication, the algebraic operation, is commutative.  So it may be tempting to
73 | believe the multiply function is commutative as well.  However, this is not true
74 | because of overflow.  For example, when working with int8 the result of
75 | multiply(multiply(-1, -128), -1) may yield a different result than
76 | multiply(multiply(-1, -1), -128) because the first will overflow and the second
77 | will not.
78 | 
79 | ## Properties
80 | 
81 | ### Null propagating
82 | 
83 | If any of the inputs is null then the output will be null
84 | 
85 | ### NaN propagating
86 | 
87 | If any of the inputs is NaN (and the other input is not null) then the output
88 | will be NaN
89 | 
90 | ### Stateless
91 | 
92 | The output will be the same regardless of the order of input rows. This is not
93 | guaranteed to be true for integer multiplication when overflow is SILENT.
94 | 


--------------------------------------------------------------------------------
/supplemental/arithmetic/negate.md:
--------------------------------------------------------------------------------
 1 | # Negate
 2 | 
 3 | ## Options
 4 | 
 5 | ### Overflow
 6 | 
 7 | Negating a number on the limit of the allowed range of the type class may lead to 
 8 | overflowing. For example, if we have consider for i8, negate(-128), then the result
 9 | will overflow since the range for the int8 type class is [-128,127]. This option helps
10 | control the behavior upon overflow in the negate function.
11 | 
12 | #### SILENT
13 | 
14 | /[%Overflow$SILENT%]
15 | 
16 | #### SATURATE
17 | 
18 | /[%Overflow$SATURATE%]
19 | 
20 | #### ERROR
21 | 
22 | /[%Overflow$ERROR%]
23 | 
24 | ## Details
25 | 
26 | ### Not Idempotent
27 | 
28 | While the algebraic operation is Idempotent, but the function is not, because of Overflow.
29 | For example, with in8, the result of negate(negate(-128)) will not be -128 as this will overflow.
30 | 
31 | ### Not commutative
32 | 
33 | Negation, the algebraic operation, is commutative.  So it may be tempting to
34 | believe the add function is commutative as well.  However, this is not true because
35 | of overflow.  For example, when working with int8 the result of
36 | negate(124 + 4) will yield a different result than negate(124) + negate(4)
37 | because the first will overflow and the second will not.
38 | 
39 | ## Properties
40 | 
41 | ### Null propagating
42 | 
43 | /[%Properties$Null_propagating%]
44 | 
45 | ### NaN propagating
46 | 
47 | /[%Properties$NaN_propagating%]
48 | 
49 | ### Stateless
50 | 
51 | /[%Properties$Stateless%]
52 | 


--------------------------------------------------------------------------------
/supplemental/arithmetic/power.md:
--------------------------------------------------------------------------------
 1 | # Power
 2 | 
 3 | ## Options
 4 | 
 5 | ### Overflow
 6 | 
 7 | The power operation may lead to overflowing when the result is 
 8 | outside the representable range of the type class. 
 9 | This option controls what happens when this overflow occurs.
10 | 
11 | #### SILENT
12 | 
13 | /[%Overflow$SILENT%]
14 | 
15 | #### SATURATE
16 | 
17 | /[%Overflow$SATURATE%]
18 | 
19 | #### ERROR
20 | 
21 | /[%Overflow$ERROR%]
22 | 
23 | ## Details
24 |  
25 | ### Overflow
26 | 
27 | The power function requires the Overflow control for situations where
28 | the resulting value exceeds the type class limit. For example, in 
29 | pow(2, 65), although the input values are in the allowed int64 range, 
30 | but the result goes out of range. 
31 | 
32 | ### Numerical Precision
33 | 
34 | The precision of the power function depends on the precision of the input types
35 | and the way the operation is carried out in various dialects.
36 | 
37 | ## Properties
38 | 
39 | ### Null propagating
40 | 
41 | /[%Properties$Null_propagating%]
42 | 
43 | ### NaN propagating
44 | 
45 | /[%Properties$NaN_propagating%]
46 | 
47 | ### Stateless
48 | 
49 | /[%Properties$Stateless%]
50 | 


--------------------------------------------------------------------------------
/supplemental/arithmetic/sign.md:
--------------------------------------------------------------------------------
 1 | # Sign
 2 | 
 3 | ## Details
 4 | 
 5 | ### Multiplicative
 6 | 
 7 | The Sign function is multiplicative, i.e. sign(x * y) = sign(x) * sign(y). Say for example, in int8, 
 8 | sign(-2 * 3) will be the same as sign(-2) * sign(3).
 9 | 
10 | ## Properties
11 | 
12 | ### Null propagating
13 | 
14 | /[%Properties$Null_propagating%]
15 | 
16 | ### NaN propagating
17 | 
18 | /[%Properties$NaN_propagating%]
19 | 
20 | ### Stateless
21 | 
22 | /[%Properties$Stateless%]
23 | 


--------------------------------------------------------------------------------
/supplemental/arithmetic/sin.md:
--------------------------------------------------------------------------------
 1 | # Sin
 2 | 
 3 | ## Options
 4 | 
 5 | ### Rounding
 6 | 
 7 | Sine of an input can yield a result that is not exactly
 8 | representable in the given type class. In this case the value will be rounded.
 9 | Rounding behaviors are defined as part of the IEEE 754 standard.
10 | 
11 | #### TIE_TO_EVEN
12 | 
13 | /[%Rounding$TIE_TO_EVEN%]
14 | 
15 | #### TIE_AWAY_FROM_ZERO
16 | 
17 | /[%Rounding$TIE_AWAY_FROM_ZERO%]
18 | 
19 | #### TRUNCATE
20 | 
21 | /[%Rounding$TRUNCATE%]
22 | 
23 | #### CEILING
24 | 
25 | /[%Rounding$CEILING%]
26 | 
27 | #### FLOOR
28 | 
29 | /[%Rounding$FLOOR%]
30 | 
31 | ## Details
32 | 
33 | ### Other floating point exceptions
34 | 
35 | The IEEE 754 standard defines a number of exceptions beyond rounding. For
36 | example, division by zero, overflow, and underflow. However, these exceptions
37 | have default behaviors defined by IEEE 754 and, since no known engine deviates
38 | from these default values, these exceptions are not exposed as options. For more
39 | information on what happens in these cases refer to the IEEE 754 standard.
40 | 
41 | ### Numerical Precision
42 | 
43 | The precision of the sin function depends on the architecture in various dialects.
44 | 
45 | ### Output Range
46 | 
47 | Being a sinusoidal trigonometric function, the output of the sin function is restricted to [-1,1].
48 | 
49 | ## Properties
50 | 
51 | ### Null propagating
52 | 
53 | /[%Properties$Null_propagating%]
54 | 
55 | ### NaN propagating
56 | 
57 | /[%Properties$NaN_propagating%]
58 | 
59 | ### Stateless
60 | 
61 | /[%Properties$Stateless%]
62 | 


--------------------------------------------------------------------------------
/supplemental/arithmetic/sinh.md:
--------------------------------------------------------------------------------
 1 | # Sinh
 2 | 
 3 | ## Options
 4 | 
 5 | ### Rounding
 6 | 
 7 | Hyperbolic sine of an input can yield a result that is not exactly
 8 | representable in the given type class. In this case the value will be rounded.
 9 | Rounding behaviors are defined as part of the IEEE 754 standard.
10 | 
11 | #### TIE_TO_EVEN
12 | 
13 | /[%Rounding$TIE_TO_EVEN%]
14 | 
15 | #### TIE_AWAY_FROM_ZERO
16 | 
17 | /[%Rounding$TIE_AWAY_FROM_ZERO%]
18 | 
19 | #### TRUNCATE
20 | 
21 | /[%Rounding$TRUNCATE%]
22 | 
23 | #### CEILING
24 | 
25 | /[%Rounding$CEILING%]
26 | 
27 | #### FLOOR
28 | 
29 | /[%Rounding$FLOOR%]
30 | 
31 | ## Details
32 | 
33 | ### Other floating point exceptions
34 | 
35 | The IEEE 754 standard defines a number of exceptions beyond rounding. For
36 | example, division by zero, overflow, and underflow. However, these exceptions
37 | have default behaviors defined by IEEE 754 and, since no known engine deviates
38 | from these default values, these exceptions are not exposed as options. For more
39 | information on what happens in these cases refer to the IEEE 754 standard.
40 | 
41 | ### Numerical Precision
42 | 
43 | The precision of the sinh function depends on the architecture in various dialects.
44 | 
45 | ### Output Range
46 | 
47 | The Hyperbolic sine function has an output range of [-Infinity, Infinity], and it results to 0
48 | at 0 radians.
49 | 
50 | ## Properties
51 | 
52 | ### Null propagating
53 | 
54 | /[%Properties$Null_propagating%]
55 | 
56 | ### NaN propagating
57 | 
58 | /[%Properties$NaN_propagating%]
59 | 
60 | ### Stateless
61 | 
62 | /[%Properties$Stateless%]
63 | 


--------------------------------------------------------------------------------
/supplemental/arithmetic/sqrt.md:
--------------------------------------------------------------------------------
 1 | # Sqrt
 2 | 
 3 | ## Options
 4 | 
 5 | ### Rounding
 6 | 
 7 | Taking the square root of an input can yield a result that is not exactly
 8 | representable in the given type class. In this case the value will be rounded.
 9 | Rounding behaviors are defined as part of the IEEE 754 standard.
10 | 
11 | #### TIE_TO_EVEN
12 | 
13 | /[%Rounding$TIE_TO_EVEN%]
14 | 
15 | #### TIE_AWAY_FROM_ZERO
16 | 
17 | /[%Rounding$TIE_AWAY_FROM_ZERO%]
18 | 
19 | #### TRUNCATE
20 | 
21 | /[%Rounding$TRUNCATE%]
22 | 
23 | #### CEILING
24 | 
25 | /[%Rounding$CEILING%]
26 | 
27 | #### FLOOR
28 | 
29 | /[%Rounding$FLOOR%]
30 | 
31 | ### On_domain_error
32 | 
33 | Square root operation is typically allowed only for non-negative real numbers. This option controls the behavior when the function is called with values not adhering to this rule.
34 | 
35 | #### NAN
36 | 
37 | /[%On_domain_error$NAN%]
38 | 
39 | #### ERROR
40 | 
41 | /[%On_domain_error$ERROR%]
42 | 
43 | ## Details
44 | 
45 | ### Other floating point exceptions
46 | 
47 | The IEEE 754 standard defines a number of exceptions beyond rounding. For
48 | example, division by zero, overflow, and underflow. However, these exceptions
49 | have default behaviors defined by IEEE 754 and, since no known engine deviates
50 | from these default values, these exceptions are not exposed as options. For more
51 | information on what happens in these cases refer to the IEEE 754 standard.
52 | 
53 | ### Domain restrictions
54 | 
55 | Mathematically, square root function for negative real numbers results to complex numbers, and thus in function usage, typically only positive real numbers are allowed. Applying the function on a negative real number may raise an Error or result in a NaN value.
56 | 
57 | 
58 | ## Properties
59 | 
60 | ### Null propagating
61 | 
62 | /[%Properties$Null_propagating%]
63 | 
64 | ### NaN propagating
65 | 
66 | /[%Properties$NaN_propagating%]
67 | 
68 | ### Stateless
69 | 
70 | /[%Properties$Stateless%]
71 | 


--------------------------------------------------------------------------------
/supplemental/arithmetic/subtract.md:
--------------------------------------------------------------------------------
 1 | # Subtract
 2 | 
 3 | ## Options
 4 | 
 5 | ### Overflow
 6 | 
 7 | Subtracting two integers can trigger an overflow when the result is outside the
 8 | representable range of the type class. This option controls what happens when
 9 | this overflow occurs.
10 | 
11 | #### SILENT
12 | 
13 | /[%Overflow$SILENT%] For e.g. subtracting two int16 cannot
14 | yield an int32 on overflow.
15 | 
16 | #### SATURATE
17 | 
18 | /[%Overflow$SATURATE%]
19 | 
20 | #### ERROR
21 | 
22 | /[%Overflow$ERROR%]
23 | 
24 | ### Rounding
25 | 
26 | Subtracting two floating point numbers can yield a result that is not exactly
27 | representable in the given type class. In this case the value will be rounded.
28 | Rounding behaviors are defined as part of the IEEE 754 standard.
29 | 
30 | #### TIE_TO_EVEN
31 | 
32 | /[%Rounding$TIE_TO_EVEN%]
33 | 
34 | #### TIE_AWAY_FROM_ZERO
35 | 
36 | /[%Rounding$TIE_AWAY_FROM_ZERO%]
37 | 
38 | #### TRUNCATE
39 | 
40 | /[%Rounding$TRUNCATE%]
41 | 
42 | #### CEILING
43 | 
44 | /[%Rounding$CEILING%]
45 | 
46 | #### FLOOR
47 | 
48 | /[%Rounding$FLOOR%]
49 | 
50 | ## Details
51 | 
52 | ### Other floating point exceptions
53 | 
54 | The IEEE 754 standard defines a number of exceptions beyond rounding. For
55 | example, division by zero, overflow, and underflow. However, these exceptions
56 | have default behaviors defined by IEEE 754 and, since no known engine deviates
57 | from these default values, these exceptions are not exposed as options. For more
58 | information on what happens in these cases refer to the IEEE 754 standard.
59 | 
60 | ### Not commutative
61 | 
62 | Subtraction, the algebraic operation, is commutative.  So it may be tempting to
63 | believe the subtract function is commutative as well.  However, this is not true
64 | because of overflow.  For example, when working with int8 the result of
65 | subtract(subtract(-120, 10), -5) will yield a different result than
66 | subtract(subtract(-120, -5), 10) because the first will overflow and the second
67 | will not.
68 | 
69 | ## Properties
70 | 
71 | ### Null propagating
72 | 
73 | /[%Properties$Null_propagating%]
74 | 
75 | ### NaN propagating
76 | 
77 | /[%Properties$NaN_propagating%]
78 | 
79 | ### Stateless
80 | 
81 | /[%Properties$Stateless%] This is not
82 | guaranteed to be true for integer subtraction when overflow is SILENT.
83 | 


--------------------------------------------------------------------------------
/supplemental/arithmetic/sum.md:
--------------------------------------------------------------------------------
 1 | # Sum
 2 | 
 3 | ## Options
 4 | 
 5 | ### Overflow
 6 | 
 7 | Sum of a set of values can trigger an overflow when the result is outside the
 8 | representable range of the type class. This option controls what happens when
 9 | this overflow occurs.
10 | 
11 | #### SILENT
12 | 
13 | If an overflow occurs then an integer value will be returned. The value is
14 | undefined. It may be any integer and can change from engine to engine or
15 | even from row to row within the same query.  The only constraint is that it
16 | must be a valid value for the result type class (e.g. adding two int16 cannot
17 | yield an int32 on overflow)
18 | 
19 | #### SATURATE
20 | 
21 | If an overflow occurs then the largest (for positive overflow) or smallest
22 | (for negative overflow) possible value for the type class will be returned.
23 | 
24 | #### ERROR
25 | 
26 | If an overflow occurs then an error should be raised.
27 | 
28 | ## Details
29 | 
30 | ### Other floating point exceptions
31 | 
32 | The IEEE 754 standard defines a number of exceptions beyond rounding. For
33 | example, division by zero, overflow, and underflow. However, these exceptions
34 | have default behaviors defined by IEEE 754 and, since no known engine deviates
35 | from these default values, these exceptions are not exposed as options. For more
36 | information on what happens in these cases refer to the IEEE 754 standard.
37 | 
38 | ### Not commutative
39 | 
40 | Addition, the algebraic operation, is commutative.  So it may be tempting to
41 | believe the add function is commutative as well.  However, this is not true because
42 | of overflow.  For example, when working with int8 the result of
43 | add(add(120, 10), -5) will yield a different result than add(add(120, -5), 10)
44 | because the first will overflow and the second will not.
45 | 
46 | ## Properties
47 | 
48 | ### Nullability
49 | 
50 | Specifies how the nullability of output arguments are mapped to
51 | input arguments. The Sum aggregate function follows a
52 | DECLARED_OUTPUT nullability.
53 | 
54 | ### Decomposable
55 | 
56 | The Sum aggregate function can be decomposed in more than
57 | one intermediate steps.
58 | 
59 | ### Intermediate
60 | 
61 | The intermediate output type of the Sum function is the
62 | type class of the input arguments.
63 | 


--------------------------------------------------------------------------------
/supplemental/arithmetic/tan.md:
--------------------------------------------------------------------------------
 1 | # Tan
 2 | 
 3 | ## Options
 4 | 
 5 | ### Rounding
 6 | 
 7 | Tangent of an input can yield a result that is not exactly
 8 | representable in the given type class. In this case the value will be rounded.
 9 | Rounding behaviors are defined as part of the IEEE 754 standard.
10 | 
11 | #### TIE_TO_EVEN
12 | 
13 | /[%Rounding$TIE_TO_EVEN%]
14 | 
15 | #### TIE_AWAY_FROM_ZERO
16 | 
17 | /[%Rounding$TIE_AWAY_FROM_ZERO%]
18 | 
19 | #### TRUNCATE
20 | 
21 | /[%Rounding$TRUNCATE%]
22 | 
23 | #### CEILING
24 | 
25 | /[%Rounding$CEILING%]
26 | 
27 | #### FLOOR
28 | 
29 | /[%Rounding$FLOOR%]
30 | 
31 | ## Details
32 | 
33 | ### Other floating point exceptions
34 | 
35 | The IEEE 754 standard defines a number of exceptions beyond rounding. For
36 | example, division by zero, overflow, and underflow. However, these exceptions
37 | have default behaviors defined by IEEE 754 and, since no known engine deviates
38 | from these default values, these exceptions are not exposed as options. For more
39 | information on what happens in these cases refer to the IEEE 754 standard.
40 | 
41 | ### Numerical Precision
42 | 
43 | The precision of the tan function depends on the architecture in various dialects.
44 | 
45 | ### Output Range
46 | 
47 | Mathematically, the tangent function has a range [-Inf, Inf], since it is undefined and approaches
48 | infinity in input values of (pi/2) + k*pi, where k is any integer. Computationally, the inputs
49 | where the tangent function is not defined results in approximately 1255.76 or -1255.76. Thus,
50 | the output range becomes [-1255.76, 1255.76].
51 | 
52 | ## Properties
53 | 
54 | ### Null propagating
55 | 
56 | /[%Properties$Null_propagating%]
57 | 
58 | ### NaN propagating
59 | 
60 | /[%Properties$NaN_propagating%]
61 | 
62 | ### Stateless
63 | 
64 | /[%Properties$Stateless%]
65 | 


--------------------------------------------------------------------------------
/supplemental/arithmetic/tanh.md:
--------------------------------------------------------------------------------
 1 | # Tanh
 2 | 
 3 | ## Options
 4 | 
 5 | ### Rounding
 6 | 
 7 | Hyperbolic tangent of an input can yield a result that is not exactly
 8 | representable in the given type class. In this case the value will be rounded.
 9 | Rounding behaviors are defined as part of the IEEE 754 standard.
10 | 
11 | #### TIE_TO_EVEN
12 | 
13 | /[%Rounding$TIE_TO_EVEN%]
14 | 
15 | #### TIE_AWAY_FROM_ZERO
16 | 
17 | /[%Rounding$TIE_AWAY_FROM_ZERO%]
18 | 
19 | #### TRUNCATE
20 | 
21 | /[%Rounding$TRUNCATE%]
22 | 
23 | #### CEILING
24 | 
25 | /[%Rounding$CEILING%]
26 | 
27 | #### FLOOR
28 | 
29 | /[%Rounding$FLOOR%]
30 | 
31 | ## Details
32 | 
33 | ### Other floating point exceptions
34 | 
35 | The IEEE 754 standard defines a number of exceptions beyond rounding. For
36 | example, division by zero, overflow, and underflow. However, these exceptions
37 | have default behaviors defined by IEEE 754 and, since no known engine deviates
38 | from these default values, these exceptions are not exposed as options. For more
39 | information on what happens in these cases refer to the IEEE 754 standard.
40 | 
41 | ### Numerical Precision
42 | 
43 | The precision of the tanh function depends on the architecture in various dialects.
44 | 
45 | ### Output Range
46 | 
47 | The Hyperbolic cosine function has an output range of [-1, 1], and it results to 0
48 | at 0 radians.
49 | 
50 | ## Properties
51 | 
52 | ### Null propagating
53 | 
54 | /[%Properties$Null_propagating%]
55 | 
56 | ### NaN propagating
57 | 
58 | /[%Properties$NaN_propagating%]
59 | 
60 | ### Stateless
61 | 
62 | /[%Properties$Stateless%]
63 | 


--------------------------------------------------------------------------------
/tools/convert_testcases/check_testcase_format_conversion_roundtrip.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import re
 3 | import shutil
 4 | 
 5 | from ruamel.yaml import YAML
 6 | from deepdiff import DeepDiff
 7 | 
 8 | from convert_testcases_to_substrait_test_format import (
 9 |     convert_directory as convert_directory_to_substrait,
10 |     load_test_file,
11 | )
12 | from convert_testcases_to_yaml_format import (
13 |     convert_directory as convert_directory_to_yaml,
14 | )
15 | 
16 | 
17 | def compare_test_files(original_file, roundtrip_file):
18 |     o_file = load_test_file(original_file)
19 |     r_file = load_test_file(roundtrip_file)
20 |     assert o_file == r_file
21 | 
22 | 
23 | # Compare tests in yaml format, roundtrip_dir contains files converted from substrait test format to yaml
24 | def compare_directories(original_dir, roundtrip_dir):
25 |     count = 0
26 |     for root, _, files in os.walk(original_dir):
27 |         for file_name in files:
28 |             if file_name.endswith(".yaml"):
29 |                 original_file = os.path.join(root, file_name)
30 |                 relative_path = os.path.relpath(original_file, original_dir)
31 |                 roundtrip_file = os.path.join(roundtrip_dir, relative_path).replace(
32 |                     ".test", ".yaml"
33 |                 )
34 | 
35 |                 if not os.path.exists(roundtrip_file):
36 |                     print(f"File missing in roundtrip directory: {roundtrip_file}")
37 |                     count += 1
38 |                     continue
39 | 
40 |                 if not compare_test_files(original_file, roundtrip_file):
41 |                     count += 1
42 |                 else:
43 |                     print(f"YAML content matches: {original_file} and {roundtrip_file}")
44 |     return count
45 | 
46 | 
47 | def main():
48 |     # Directories
49 |     initial_cases_dir = "../../substrait/tests/cases"
50 |     temp_dir = "./temp"
51 |     intermediate_dir = f"{temp_dir}/bft_cases"
52 |     roundtrip_dir = f"{temp_dir}/roundtrip_substrait_cases"
53 |     uri_prefix = (
54 |         "https://github.com/substrait-io/substrait/blob/main/extensions/substrait"
55 |     )
56 | 
57 |     # Step 1: Convert from initial_cases_dir to intermediate_dir
58 |     convert_directory_to_yaml(initial_cases_dir, intermediate_dir)
59 | 
60 |     # Step 2: Convert from intermediate_dir to roundtrip_dir
61 |     convert_directory_to_substrait(intermediate_dir, roundtrip_dir, uri_prefix)
62 | 
63 |     # Step 3: Compare tests in initial and rounttrip_dir in yaml format
64 |     count = compare_directories(initial_cases_dir, roundtrip_dir)
65 |     if count == 0:
66 |         print(
67 |             "All substrait test files match between original and roundtrip directories."
68 |         )
69 |     else:
70 |         print(
71 |             f"Differences found in {count} test files between original and roundtrip directories."
72 |         )
73 | 
74 |     shutil.rmtree(temp_dir)
75 | 
76 | 
77 | if __name__ == "__main__":
78 |     main()
79 | 


--------------------------------------------------------------------------------
/tools/convert_testcases/convert_testcases_to_substrait_test_format.py:
--------------------------------------------------------------------------------
  1 | import yaml
  2 | import os
  3 | from collections import defaultdict
  4 | from itertools import count
  5 | from tools.convert_testcases.convert_testcase_helper import (
  6 |     convert_to_substrait_test_value,
  7 | )
  8 | 
  9 | 
 10 | # Define a custom YAML loader that interprets all values as strings
 11 | def string_loader(loader, node):
 12 |     return str(loader.construct_scalar(node))
 13 | 
 14 | 
 15 | def list_of_decimal_constructor(loader: yaml.SafeLoader, node: yaml.nodes.MappingNode):
 16 |     return [string_loader(loader, item) for item in node.value]
 17 | 
 18 | 
 19 | def load_test_file(file_path):
 20 |     """Load a YAML file, interpreting all values as strings."""
 21 |     # Override default YAML constructors to load all types as strings
 22 |     for tag in ("str", "int", "float", "bool", "null", "decimal"):
 23 |         yaml.add_constructor(f"tag:yaml.org,2002:{tag}", string_loader)
 24 | 
 25 |     yaml.add_constructor("!decimal", string_loader)
 26 |     yaml.add_constructor("!isostring", string_loader)
 27 |     yaml.add_constructor("!decimallist", list_of_decimal_constructor)
 28 | 
 29 |     with open(file_path, "r") as file:
 30 |         return yaml.load(file, Loader=yaml.FullLoader)
 31 | 
 32 | 
 33 | def format_return_value(case):
 34 |     result = case.get("result", {})
 35 |     special = result.get("special")
 36 | 
 37 |     if special:
 38 |         special = special.lower()
 39 | 
 40 |         # Handle special cases for ERROR and UNDEFINED
 41 |         if special in {"error", "undefined"}:
 42 |             return f"<!{special.upper()}>"
 43 | 
 44 |         if special == "nan":
 45 |             return "nan::fp64"
 46 | 
 47 |     # Return formatted result with format_value
 48 |     return convert_to_substrait_test_value(result.get("value"), result.get("type"))
 49 | 
 50 | 
 51 | def format_test_case_group(case, description_map):
 52 |     """Extract group name and description for test case."""
 53 |     group = case.get("group", "basic")
 54 |     group_name = group if isinstance(group, str) else group.get("id", "basic")
 55 |     description = group.get("description", "") if isinstance(group, dict) else ""
 56 | 
 57 |     if group_name not in description_map:
 58 |         description_map[group_name] = description
 59 | 
 60 |     return f"{group_name}: {description_map.get(group_name, '')}"
 61 | 
 62 | 
 63 | def generate_define_table(case, table_id):
 64 |     """Generates the table definition only if there are arguments with 'is_not_a_func_arg'."""
 65 |     args = case.get("args", [])
 66 | 
 67 |     # If args is empty, return an empty string, as no table is needed
 68 |     if not args:
 69 |         return ""
 70 | 
 71 |     # Gather column types and names based on args
 72 |     formatted_columns = ", ".join(str(arg["type"]) for arg in args) if args else ""
 73 | 
 74 |     # Transpose the arguments' values to construct rows
 75 |     values = [
 76 |         [
 77 |             convert_to_substrait_test_value(value, arg["type"], 1)
 78 |             for value in arg.get("value", [])
 79 |         ]
 80 |         for arg in args
 81 |     ]
 82 |     rows = zip(*values)  # zip will combine each nth element of each argument
 83 | 
 84 |     # Format rows as strings for the table definition
 85 |     formatted_rows = ", ".join(f"({', '.join(map(str, row))})" for row in rows)
 86 | 
 87 |     # Define table format with column types
 88 |     table_definition = (
 89 |         f"DEFINE t{table_id}({formatted_columns}) = ({formatted_rows}) \n"
 90 |     )
 91 | 
 92 |     return table_definition
 93 | 
 94 | 
 95 | def format_test_case(case, function, description_map, table_id_counter, is_aggregate):
 96 |     """Format a single test case."""
 97 |     description = format_test_case_group(case, description_map)
 98 |     options = case.get("options")
 99 |     options = (
100 |         f" [{', '.join(f'{k}:{convert_to_substrait_test_value(v, None)}' for k, v in options.items())}]"
101 |         if options
102 |         else ""
103 |     )
104 |     results = format_return_value(case)
105 | 
106 |     args = [arg for arg in case.get("args", []) if not arg.get("is_not_a_func_arg")]
107 |     if is_aggregate and len(args) != 1:
108 |         table_id = next(table_id_counter)
109 |         args = ", ".join(f"t{table_id}.col{idx}" for idx in range(len(args)))
110 |         table_definition = generate_define_table(case, table_id)
111 |         return description, f"{table_definition}{function}({args}){options} = {results}"
112 | 
113 |     args = ", ".join(
114 |         convert_to_substrait_test_value(arg.get("value"), str(arg["type"]))
115 |         for arg in case.get("args", [])
116 |     )
117 |     return description, f"{function}({args}){options} = {results}"
118 | 
119 | 
120 | def convert_test_file_to_new_format(input_data, prefix, is_aggregate):
121 |     """Parse YAML test data to formatted cases."""
122 |     function = input_data["function"]
123 |     base_uri = input_data["base_uri"][len(prefix) :]
124 |     description_map = {}
125 |     table_id_counter = count(0)
126 |     groups = defaultdict(lambda: {"tests": []})
127 | 
128 |     for case in input_data["cases"]:
129 |         description, formatted_test = format_test_case(
130 |             case, function, description_map, table_id_counter, is_aggregate
131 |         )
132 |         groups[description]["tests"].append(formatted_test)
133 | 
134 |     output_lines = [
135 |         f"{'### SUBSTRAIT_AGGREGATE_TEST: v1.0' if is_aggregate else '### SUBSTRAIT_SCALAR_TEST: v1.0'}\n",
136 |         f"### SUBSTRAIT_INCLUDE: '{base_uri}'\n",
137 |     ]
138 | 
139 |     for description, details in groups.items():
140 |         output_lines.append(f"\n# {description}\n")
141 |         output_lines.extend(f"{test}\n" for test in details["tests"])
142 | 
143 |     return output_lines
144 | 
145 | 
146 | def output_test_data(output_file, lines):
147 |     """Write formatted lines to a file."""
148 |     os.makedirs(os.path.dirname(output_file), exist_ok=True)
149 |     with open(output_file, "w") as file:
150 |         file.writelines(lines)
151 | 
152 |     print(f"Converted '{output_file}' successfully.")
153 | 
154 | 
155 | def convert_directory(input_dir, output_dir, prefix):
156 |     """Process all YAML files in a directory, convert and save them to output directory."""
157 |     for root, _, files in os.walk(input_dir):
158 |         for filename in filter(lambda f: f.endswith(".yaml"), files):
159 |             input_file = os.path.join(root, filename)
160 |             output_file = os.path.join(
161 |                 output_dir, os.path.relpath(input_file, input_dir)
162 |             ).replace(".yaml", ".test")
163 |             is_aggregate = "aggregate" in input_file
164 | 
165 |             yaml_data = load_test_file(input_file)
166 |             output_lines = convert_test_file_to_new_format(
167 |                 yaml_data, prefix, is_aggregate
168 |             )
169 |             output_test_data(output_file, output_lines)
170 | 
171 | 
172 | if __name__ == "__main__":
173 |     input_directory = "../../cases"
174 |     output_directory = "../../substrait/tests/cases"
175 |     uri_prefix = (
176 |         "https://github.com/substrait-io/substrait/blob/main/extensions/substrait"
177 |     )
178 |     convert_directory(input_directory, output_directory, uri_prefix)
179 | 


--------------------------------------------------------------------------------
/tools/convert_testcases/convert_testcases_to_yaml_format.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | from ruamel.yaml import YAML
  4 | from tests.coverage.nodes import (
  5 |     TestFile,
  6 |     AggregateArgument,
  7 | )
  8 | from tests.coverage.case_file_parser import load_all_testcases
  9 | from tools.convert_testcases.convert_testcase_helper import (
 10 |     convert_to_yaml_value,
 11 |     convert_to_long_type,
 12 |     SQUOTE_PLACEHOLDER,
 13 |     DQUOTE_PLACEHOLDER,
 14 |     iso_duration_to_timedelta,
 15 | )
 16 | 
 17 | yaml = YAML()
 18 | yaml.indent(mapping=2, sequence=4, offset=2)  # Adjust indentations as needed
 19 | yaml.width = 4096  # Extend line width to prevent line breaks
 20 | 
 21 | 
 22 | def convert_result(test_case):
 23 |     """Convert the result section based on specific conditions."""
 24 |     if test_case.is_return_type_error():
 25 |         return {"special": str(test_case.result.error)}
 26 |     elif str(test_case.result.value) == "nan":
 27 |         return {"special": "nan"}
 28 |     elif test_case.func_name == "add_intervals" and test_case.result.type == "iday":
 29 |         return {
 30 |             "value": convert_to_yaml_value(
 31 |                 (
 32 |                     iso_duration_to_timedelta(test_case.result.value)
 33 |                     if test_case.result.value is not None
 34 |                     else None
 35 |                 ),
 36 |                 "str",
 37 |             ),
 38 |             "type": "string",
 39 |         }
 40 |     else:
 41 |         return {
 42 |             "value": convert_to_yaml_value(
 43 |                 test_case.result.value, test_case.result.type
 44 |             ),
 45 |             "type": convert_to_long_type(test_case.result.type),
 46 |         }
 47 | 
 48 | 
 49 | def convert_table_definition(test_case):
 50 |     column_types = None
 51 | 
 52 |     if all(isinstance(arg, AggregateArgument) for arg in test_case.args):
 53 |         # Extract the column_type from each AggregateArgument
 54 |         column_types = [arg.column_type for arg in test_case.args]
 55 |     elif test_case.args is not None:
 56 |         column_types = [
 57 |             convert_to_long_type(
 58 |                 arg.scalar_value.type
 59 |                 if isinstance(arg, AggregateArgument)
 60 |                 else arg.type
 61 |             )
 62 |             for arg in test_case.args
 63 |         ]
 64 | 
 65 |     columns = list(map(list, zip(*test_case.rows)))
 66 |     if not columns:
 67 |         # Handle the case where columns is empty, but column_types is not
 68 |         return [
 69 |             {"value": [], "type": col_type, "is_not_a_func_arg": "true"}
 70 |             for col_type in column_types
 71 |         ]
 72 |     else:
 73 |         # Handle the case where columns is not empty
 74 |         return [
 75 |             {
 76 |                 "value": convert_to_yaml_value(column, col_type),
 77 |                 "type": col_type,
 78 |                 "is_not_a_func_arg": "true",
 79 |             }
 80 |             for column, col_type in zip(columns, column_types)
 81 |         ]
 82 | 
 83 | 
 84 | def convert_group(test_case, groups):
 85 |     id = str(test_case.group.name.split(": ")[0])
 86 |     desc = test_case.group.name.split(": ")[1] if ": " in test_case.group.name else ""
 87 |     group = id if id in groups else {"id": id, "description": desc}
 88 |     groups[id] = desc
 89 |     return group
 90 | 
 91 | 
 92 | def convert_test_case_to_old_format(test_case, groups):
 93 |     # Match group headers with descriptions
 94 |     print(f"converting test '{test_case}'")
 95 |     case = {}
 96 |     case["group"] = convert_group(test_case, groups)
 97 | 
 98 |     if test_case.rows is not None:
 99 |         case["args"] = convert_table_definition(test_case)
100 |     else:
101 |         if isinstance(test_case.args[0], AggregateArgument):
102 |             case["args"] = [
103 |                 {
104 |                     "value": convert_to_yaml_value(
105 |                         arg.scalar_value.value, arg.scalar_value.type
106 |                     ),
107 |                     "type": convert_to_long_type(arg.scalar_value.type),
108 |                 }
109 |                 for arg in test_case.args
110 |             ]
111 |         else:
112 |             case["args"] = [
113 |                 {
114 |                     "value": convert_to_yaml_value(arg.value, arg.type),
115 |                     "type": convert_to_long_type(arg.type),
116 |                 }
117 |                 for arg in test_case.args
118 |             ]
119 | 
120 |     if len(test_case.options) > 0:
121 |         case["options"] = {
122 |             key: convert_to_yaml_value(value, None)
123 |             for key, value in test_case.options.items()
124 |         }
125 | 
126 |     case["result"] = convert_result(test_case)
127 |     return case
128 | 
129 | 
130 | def convert_test_file_to_yaml(testFile: TestFile):
131 |     # Get function name from the first expression
132 |     function = None
133 |     cases = []
134 |     groups = {}
135 | 
136 |     for test_case in testFile.testcases:
137 |         function = test_case.func_name
138 |         cases.append(convert_test_case_to_old_format(test_case, groups))
139 | 
140 |     # Construct the full YAML structure
141 |     return {
142 |         "base_uri": f"https://github.com/substrait-io/substrait/blob/main/extensions/substrait{testFile.include}",
143 |         "function": function,
144 |         "cases": cases,
145 |     }
146 | 
147 | 
148 | def output_test_data(output_file, input_path, yaml_data):
149 |     with open(output_file, "w") as f:
150 |         yaml.dump(yaml_data, f)
151 | 
152 |     fix_quotes(output_file)
153 | 
154 |     print(f"Converted '{input_path}' to '{output_file}'.")
155 | 
156 | 
157 | def fix_quotes(file_path):
158 |     with open(file_path, "r") as file:
159 |         content = file.read()
160 | 
161 |     # Remove all single quotes
162 |     content = (
163 |         content.replace("'", "")
164 |         .replace('"', "")
165 |         .replace(SQUOTE_PLACEHOLDER, "'")
166 |         .replace(DQUOTE_PLACEHOLDER, '"')
167 |     )
168 | 
169 |     with open(file_path, "w") as file:
170 |         file.write(content)
171 | 
172 | 
173 | def convert_directory(input_dir, output_dir):
174 |     input_test_files = load_all_testcases(input_dir)
175 |     for input_test_file in input_test_files:
176 |         input_file = input_test_file.path
177 |         relative_path = os.path.relpath(input_file, input_dir)
178 |         output_file = os.path.join(output_dir, relative_path).replace(".test", ".yaml")
179 |         os.makedirs(os.path.dirname(output_file), exist_ok=True)
180 |         yaml_data = convert_test_file_to_yaml(input_test_file)
181 |         output_test_data(output_file, input_test_file.path, yaml_data)
182 | 
183 | 
184 | def main():
185 |     input_dir = "../../substrait/tests/cases"
186 |     output_dir = "../../cases"  # Specify the output directory
187 |     convert_directory(input_dir, output_dir)
188 | 
189 | 
190 | if __name__ == "__main__":
191 |     main()
192 | 


--------------------------------------------------------------------------------
/tools/schema/casefile.yaml:
--------------------------------------------------------------------------------
 1 | $id: https://thebft.info/schemas/casefile.json
 2 | $schema: https://json-schema.org/draft/2020-12/schema
 3 | type: object
 4 | properties:
 5 |   function:
 6 |     type: string
 7 |   cases:
 8 |     type: array
 9 |     items:
10 |       type: object
11 |       properties:
12 |         group:
13 |           oneOf:
14 |             - type: object
15 |               properties:
16 |                 id:
17 |                   type: string
18 |                 description:
19 |                   type: string
20 |               required:
21 |                 - id
22 |                 - description
23 |               additionalProperties: false
24 |             - type: string
25 |         args:
26 |           type: array
27 |           items:
28 |             type: object
29 |             properties:
30 |               value:
31 |                 oneOf:
32 |                   - type: string
33 |                   - type: number
34 |                   - type: boolean
35 |                   - type: "null"
36 |                   - type: array
37 |               type:
38 |                 type: string
39 |             required:
40 |               - value
41 |               - type
42 |             additionalProperties: false
43 |         options:
44 |           type: object
45 |           additionalProperties:
46 |             type: string
47 |         result:
48 |           oneOf:
49 |             - type: object
50 |               properties:
51 |                 value:
52 |                   oneOf:
53 |                     - type: string
54 |                     - type: number
55 |                     - type: boolean
56 |                     - type: "null"
57 |                 type:
58 |                   type: string
59 |               required:
60 |                 - value
61 |                 - type
62 |               additionalProperties: false
63 |             - type: object
64 |               properties:
65 |                 special:
66 |                   enum:
67 |                     - error
68 |                     - undefined
69 |               required:
70 |                 - special
71 |               additionalProperties: false
72 |       additionalProperties: false
73 |       required:
74 |         - group
75 |         - result
76 | additionalProperties: false
77 | required:
78 |   - function
79 |   - cases
80 | 


--------------------------------------------------------------------------------
/tools/yaml_to_json.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from pathlib import Path
 3 | 
 4 | import yaml
 5 | 
 6 | try:
 7 |     from yaml import CSafeLoader as SafeLoader
 8 | except ImportError:
 9 |     from yaml import SafeLoader
10 | 
11 | BASE_DIR = Path(__file__).parent.parent
12 | JSON_DIR = BASE_DIR / "function_json"
13 | CASES_DIR = BASE_DIR / "cases"
14 | FUNCTION_FOLDERS = Path(CASES_DIR).glob("*")
15 | 
16 | 
17 | for function_folder in FUNCTION_FOLDERS:
18 |     folder_path = CASES_DIR / function_folder.name
19 |     json_path = JSON_DIR / function_folder.name
20 |     Path(json_path).mkdir(parents=True, exist_ok=True)
21 |     function_yamls = Path(folder_path).rglob("*.yaml")
22 |     for function_yaml in function_yamls:
23 |         yaml_file = folder_path / function_yaml.name
24 |         json_file = json_path / function_yaml.stem
25 |         with open(yaml_file) as f:
26 |             dataMap = yaml.load(f, SafeLoader)
27 |             with open(f"{json_file}.json", "w") as outfile:
28 |                 outfile.write('{}\n'.format(json.dumps(dataMap, indent=4)))
29 | 


--------------------------------------------------------------------------------